Begun parser design + typo fixes + improved lexer modularity

2025-06-18 13:35:07 +10:00 · 2025-06-18 13:35:07 +10:00 · a258802945
commit a258802945
parent 90ca138904
6 changed files with 103 additions and 31 deletions
--- a/src/nlx.nim
+++ b/src/nlx.nim
@ -1,6 +1,7 @@
 import os
 import noether/lexer/tok
 import noether/lexer/tokstream
+import noether/parser/parser

 when isMainModule:
  echo "Noether Lang Extras v0.1.0 - nlx"
@ -9,10 +10,13 @@ when isMainModule:
    let filename = paramStr(1)
    var tokStream = newTokStream(filename, isFile=true)
    
-    # DumpTok
-    var tok: nlTok
-    while tokStream.nextTok(tok):
-      echo tok
+    # # DumpTok
+    # var tok: nlTok
+    # while tokStream.nextTok(tok):
+    #   echo tok
+
+    # DumpTree
+    discard parse(tokStream)
    
  else:
    echo "usage: nlx filename"
--- a/src/noether/lexer/lstream.nim
+++ b/src/noether/lexer/lstream.nim
@ -2,6 +2,7 @@ import std/streams
 import std/options

 import tok
+export tok

 type
  # Character streaming for the nlTokStream
--- a/src/noether/lexer/tokbuilding.nim
+++ b/src/noether/lexer/tokbuilding.nim
@ -3,13 +3,14 @@ include lstream
 type
  # Provides a stream-like interface for lexing nlToks
  # Internally reliant on the functionality of nlLStream
-  nlTokStream = object
+  nlTokStream* = object
    lstream: nlLStream
    build: nlTok   # the build token
+    currTok*: nlTok # the current token
    closed: bool # EOF + all tokens built

 # Generates an EOL token for the nlTokStream's state
-proc EOLTok*(tokStream: nlTokStream): nlTok = 
+proc EOLTok(tokStream: nlTokStream): nlTok = 
  result = nlTok(
    tType: nlTokType.EOL,
    lit: "\0",
--- a/src/noether/lexer/tokstream.nim
+++ b/src/noether/lexer/tokstream.nim
@ -11,6 +11,10 @@ proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
  result.resetBuild()
  discard result.lstream.progLine()

+# Defines a short-hand notation for getting the current line
+proc currLine*(tokStream: nlTokStream): string =
+  result = tokStream.lstream.line
+  
 # Reimplements nlLStream.progress() for nlTokStream
 # to account for additional structure (ie the build token)
 proc progChar(tokStream: var nlTokStream): bool =
@ -32,16 +36,18 @@ proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
  if tokStream.closed:
    return false
  while true:
-    var buildTok: Option[nlTok]
+    var flushedTok: Option[nlTok]
    let 
-      canProgress = tokStream.progBuild(buildTok)
-      tokBuilt = buildTok.isSome
+      canProgress = tokStream.progBuild(flushedTok)
+      buildComplete = flushedTok.isSome
    # canProgress & EOF reached => no more tokens to build :)
    # NOTE: reachedEOF and not canProgress => more tokens unwrapping
-    if tokBuilt:
-      tok = buildTok.get()
+    if buildComplete:
+      # return the finished build token, and save it as the current token
+      tok = flushedTok.get()
+      tokStream.currTok = tok
    if canProgress and not tokStream.progChar():
      tokStream.closed = true
-      return tokBuilt
-    elif tokBuilt:
+      return buildComplete
+    elif buildComplete:
      return true
--- a/src/noether/parser/nodes.nim
+++ b/src/noether/parser/nodes.nim
@ -1,18 +1,19 @@
 from ../lexer/tok import nlTok
-from ../lexer/tokstraem import 
+# from ../lexer/tokstream import 

 type
  # NOTE: by the end of parsing NO nodes should
  # NOTE: have nlNodeType.NONE
-  nlNodeType = enum
+  nlNodeType* = enum
    NONE, # Placeholder Value
    TERM, # Indicates the tree has terminated
    STRL, # String Literal
    CHRL, # Character Literal
-  nlNode {.acyclic.} = ref object of RootObj
-    nType: nlNodeType
-    toks: seq[nlTok] # nodes store the tokens that build them
-    left, right: nlNode
-
-proc parse()
+  nlNode* {.acyclic.} = ref object of RootObj
+    nType*: nlNodeType
+    toks*: seq[nlTok] # nodes store the tokens that build them
+    # left, right: nlNode

+# Short-hand way of appending a token to a node's token sequence
+proc addTok*(node: nlNode, tok: nlTok) =
+  node.toks.add(tok)
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@ -1,20 +1,79 @@
+import strutils
+
+import nodes
 import ../lexer/tokstream

+type
+  nlParseStat = enum
+    OK,
+    UNMATCHED,
+    TOOBIG,
+
+proc `*`(stat: nlParseStat, b: bool): nlParseStat =
+  result = if b: stat else: nlParseStat.OK
+
 # Greed will consume anything except a punishment
-proc greed(tokStream: nlTokStream, toks: var seq[nlTok], punish: str) =
+# Returns a boolean indicating if it succeeded
+proc greed(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: proc(tok: nlTok): bool): bool =
+  var tok: nlTok
+  while tokStream.nextTok(tok):
+    toks.add(tok)
+    if satisfy(tok):
+      return true
+  result = false
+
+proc greedEOL(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: nlTokType): bool =
+  var tok: nlTok
+  while tokStream.nextTok(tok):
+    toks.add(tok)
+    if tok.tType == satisfy or tok.tType == nlTokType.EOL:
+      return true
+  result = false
+
+proc satisfyTypeOrEOL(tokType: nlTokType, tok: nlTok): bool = 
  

-proc parse_strl(tokStream: nlTokStream): nlNode =
+proc prsMatchEOL(tokStream: var nlTokStream, toks: var seq[nlTok]): nlParseStat =
  
  
+proc parse_strl(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
+  node = nlNode(
+    nType: nlNodeType.STRL
+  )
+  node.addTok(tokStream.currTok)
+  result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
+
+proc parse_chrl(tokStream: var nlTokStream, node: var nlNode): bool =
+  node = nlNode(
+    nType: nlNodeType.CHRL
+  )
+  node.addTok(tokStream.currTok)
+  # TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
+  result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
+
 # Attempt to form an nlAST from a nlTokStream
-proc parse(tokStream: nlTokStream): nlNode = 
+proc parse*(tokStream: var nlTokStream): nlNode = 
  var tok: nlTok
-  while true:
-    case tok.tokType:
+  var node: nlNode
+  while tokStream.nextTok(tok):
+    case tok.tType:
    of nlTokType.DQUO:
      # Attempt to parse string literal
-      parse_strl()
-
-    if not tokStream.nextTok(tok):
-      break
+      if not parse_strl(tokStream, node):
+        echo "Unmatched Double Quotation! Malformed String Literal"
+        echo tokStream.currLine()
+        echo repeat(" ", tok.startPos), '^'
+      else:
+        echo "Parsed String Literal"
+        echo node[]
+    of nlTokType.SQUO:
+      # Attempt to parse string literal
+      if not parse_chrl(tokStream, node):
+        echo "Unmatched Single Quotation! Malformed Character Literal"
+        echo tokStream.currLine()
+        echo repeat(" ", tok.startPos), '^'
+      else:
+        echo "Parsed String Literal"
+        echo node[]
+    else:
+      echo "blah blah unhandled case"