From f8697bd66297dcfd3cbff6f492332d0cb1890094 Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 02:09:43 +1000
Subject: [PATCH 01/12] 1 gazillion changes (mostly documenting my insanity
 optimizing + naming)

---
 src/nlx.nim                       |  6 +--
 src/noether/lexer/lstream.nim     | 20 ++++----
 src/noether/lexer/tok.nim         | 28 ++---------
 src/noether/lexer/tokbuilding.nim | 28 +++++------
 src/noether/lexer/tokstream.nim   | 29 ++++++-----
 src/noether/lexer/toktype.nim     | 79 ++++++++++++++++--------------
 src/noether/parser/nodes.nim      | 51 +++++++++++++++-----
 src/noether/parser/parser.nim     | 80 ++++++++++++++-----------------
 src/noether/parser/parseutil.nim  | 68 +++++++++++++++-----------
 9 files changed, 206 insertions(+), 183 deletions(-)

diff --git a/src/nlx.nim b/src/nlx.nim
index 4b0c678..75f59d4 100644
--- a/src/nlx.nim
+++ b/src/nlx.nim
@@ -3,6 +3,7 @@ import noether/lexer/tok
 import noether/lexer/tokstream
 import noether/parser/parser
 
+{.hint: "Don't forget to drink more water (^_^)".}
 when isMainModule:
   echo "Noether Lang Extras v0.1.0 - nlx"
 
@@ -11,9 +12,8 @@ when isMainModule:
     var tokStream = newTokStream(filename, isFile=true)
     
     # # DumpTok
-    # var tok: nlTok
-    # while tokStream.nextTok(tok):
-    #   echo tok
+    # while tokStream.progress():
+    #   echo tokStream.currTok
 
     # DumpTree
     discard parse(tokStream)
diff --git a/src/noether/lexer/lstream.nim b/src/noether/lexer/lstream.nim
index 034f48b..44138e0 100644
--- a/src/noether/lexer/lstream.nim
+++ b/src/noether/lexer/lstream.nim
@@ -41,7 +41,7 @@ proc outOfBounds*(lstream: nlLStream): bool =
   result = (lstream.pos > lstream.line.len - 1)
 
 # Progress the lex stream to the next line (if available)
-proc progLine*(lstream: var nlLStream): bool = 
+proc progressLine*(lstream: var nlLStream): bool = 
   if lstream.stream.readLine(lstream.line):
     inc lstream.lineNum
     lstream.pos = Natural 0
@@ -50,17 +50,17 @@ proc progLine*(lstream: var nlLStream): bool =
 
 # Progress the lex stream to the next character in the line
 # forcefully (aka does NOT check if we reached EOL)
-proc forceProgChar*(lstream: var nlLStream) = 
+proc forceProgressChar*(lstream: var nlLStream) = 
   inc lstream.pos
 
-# Progress the lex stream to the next character (if available)
-proc progress*(lstream: var nlLStream): bool =
-  if not lstream.atEOL():
-    lstream.forceProgChar()
-    result = true
-  else:
-    # attempt to progress next line past EOL
-    result = lstream.progLine()
+# # Progress the lex stream to the next character (if available)
+# proc progressChar*(lstream: var nlLStream): bool =
+#   if not lstream.atEOL():
+#     lstream.forceProgressChar()
+#     result = true
+#   else:
+#     # attempt to progress next line past EOL
+#     result = lstream.progressLine()
 
 proc currChar*(lstream: nlLStream): char = 
   result = lstream.line[lstream.pos]
diff --git a/src/noether/lexer/tok.nim b/src/noether/lexer/tok.nim
index fb3067c..7715b8f 100644
--- a/src/noether/lexer/tok.nim
+++ b/src/noether/lexer/tok.nim
@@ -2,7 +2,7 @@ include toktype
 
 type 
   nlTok* = object
-    tType*: nlTokType
+    tKind*: nlTokKind
     lit*: string
     lineNum*: Natural
     startPos*: Natural
@@ -12,29 +12,11 @@ type
 # all other fields are expected to be filled out later.
 proc emptyTok*(startPos: int): nlTok =
   result = nlTok(
-    tType: nlTokType.NONE,
+    tKind: tkNONE,
     lit: "",
     startPos: Natural startPos,
   )
 
-# Checks if an nlTok has nlTokType.NONE
-proc isTokUntyped*(tType: nlTokType): bool =
-  result = (tType == nlTokType.NONE)
-  
-# Checks if an nlTok has nlTokType.EOL
-proc isTokEOL*(tok: nlTok): bool =
-  result = (tok.tType == nlTokType.EOL)
-
-
-
-# This method is only used to convert null
-# terminator nlToks into line-feed ones.
-# Returns a copy of an nlTok, changing its type
-proc tokTermToLineFeed*(tok: nlTok): nlTok =
-  result = nlTok(
-    tType: nlTokType.LNFD,
-    lit: tok.lit,
-    lineNum: tok.lineNum,
-    startPos: tok.startPos,
-    endPos: tok.endPos,
-  )
+# Checks if an nlTok has tkNONE
+proc isUntyped*(tKind: nlTokKind): bool =
+  result = (tKind == tkNONE)
diff --git a/src/noether/lexer/tokbuilding.nim b/src/noether/lexer/tokbuilding.nim
index 99022ee..46a2222 100644
--- a/src/noether/lexer/tokbuilding.nim
+++ b/src/noether/lexer/tokbuilding.nim
@@ -12,7 +12,7 @@ type
 # Generates an EOL token for the nlTokStream's state
 proc EOLTok(tokStream: nlTokStream): nlTok = 
   result = nlTok(
-    tType: nlTokType.EOL,
+    tKind: tkEOL,
     lit: "\0",
     lineNum: Natural tokStream.lstream.lineNum,
     startPos: Natural tokStream.lstream.pos,
@@ -40,46 +40,46 @@ proc flushBuild(tokStream: var nlTokStream): nlTok =
 
 # Returns whether the build token has a set type yet.
 # This indicates that the build token should inherit
-# the nlTokType of the nlLStream's next character.
+# the nlTokKind of the nlLStream's next character.
 proc isUntypedBuild(tokStream: nlTokStream): bool =
-  result = isTokUntyped(tokStream.build.tType)
+  result = tokStream.build.tKind.isUntyped()
 
-# Check whether an nlTokType is "compatible" with the build token. 
+# Check whether an nlTokKind is "compatible" with the build token. 
 # NOTE: flushBuild() should be called when an incompatible token is discovered.
-proc isCompatibleBuild(tokStream: nlTokStream, tType: nlTokType): bool =
-  result = (tType == tokStream.build.tType)
+proc isCompatibleBuild(tokStream: nlTokStream, tKind: nlTokKind): bool =
+  result = (tKind == tokStream.build.tKind)
 
 # Add a character to the nlTokStream's build token.
 # Flushes and returns the build token if "fully built",
 # and a boolean indicating whether the nlTokStream can progress.
-proc progBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
+proc progressBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
   # the "pos > EOL" invalid state is used intentionally
   # to indicate all tokens have been built, and return EOL Token
   if tokStream.lstream.outOfBounds():
     buildTok = some(EOLTok(tokStream))
     return true # can progress once more
     
-  let tType = getTokType(tokStream.lstream.currChar())
+  let tKind = getTokType(tokStream.lstream.currChar())
   # untyped build tokens must inherited immediately
   if isUntypedBuild(tokStream):
-    tokStream.build.tType = tType
+    tokStream.build.tKind = tKind
   
   # check if EOL reached
   if tokStream.lstream.atEOL():
       # flush old build token, the new one can be left untyped
-      let compatible = isCompatibleBuild(tokStream, tType)
+      let compatible = isCompatibleBuild(tokStream, tKind)
       result = false # DO NOT PROGRESS
       if compatible:
         # force the lstream into an invalid state by progressing beyond EOL
-        # we can then detect this state on the next progBuild and return
+        # we can then detect this state on the next progressBuild and return
         # an EOL character (very unsafe implementation but it works well)
-        tokStream.lstream.forceProgChar()
+        tokStream.lstream.forceProgressChar()
       buildTok = some(flushBuild(tokStream))
   # check character and build token compatability
-  elif not isCompatibleBuild(tokStream, tType):
+  elif not isCompatibleBuild(tokStream, tKind):
       # flush old build token, the new one inherits type
       buildTok = some(flushBuild(tokStream))
-      tokStream.build.tType = tType
+      tokStream.build.tKind = tKind
       result = true # can progress
   else:
     buildTok = none(nlTok)
diff --git a/src/noether/lexer/tokstream.nim b/src/noether/lexer/tokstream.nim
index c3fb3f2..02a045e 100644
--- a/src/noether/lexer/tokstream.nim
+++ b/src/noether/lexer/tokstream.nim
@@ -9,44 +9,43 @@ proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
   # 1. initialise an empty build token 
   # 2. progress to the first line
   result.resetBuild()
-  discard result.lstream.progLine()
+  discard result.lstream.progressLine()
 
 # Defines a short-hand notation for getting the current line
-proc currLine*(tokStream: nlTokStream): string =
+proc line*(tokStream: nlTokStream): string =
   result = tokStream.lstream.line
   
-# Reimplements nlLStream.progress() for nlTokStream
+# Reimplements nlLStream.progressChar for nlTokStream
 # to account for additional structure (ie the build token)
-proc progChar(tokStream: var nlTokStream): bool =
+# NOTE: progressChar progresses to lstream's next char
+proc progressChar(tokStream: var nlTokStream): bool =
   if not tokStream.lstream.atEOL():
-    tokStream.lstream.forceProgChar()
+    tokStream.lstream.forceProgressChar()
     result = true
   else:
     # attempt to progress to next line past EOL
-    result = tokStream.lstream.progLine()
+    result = tokStream.lstream.progressLine()
     tokStream.resetBuild()  
   
-# Generates and sets (by reference) the next token in the stream,
-# via repeatedly calling progBuild() and progChar().
+# Generates and progress the next token in the nlTokStream.
+# via repeatedly calling progressBuild() and progressChar().
 # Returns a boolean indicating whether EOF has been reached.
-# NOTE: progBuild adds lstream's current char to the build token
-# NOTE: progChar progresses to lstream's next char
-proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
+# NOTE: access the new token via `tokStream.tok`
+proc progress*(tokStream: var nlTokStream): bool =
   # Return prematurely if already closed
   if tokStream.closed:
     return false
   while true:
     var flushedTok: Option[nlTok]
     let 
-      canProgress = tokStream.progBuild(flushedTok)
+      canProgress = tokStream.progressBuild(flushedTok)
       buildComplete = flushedTok.isSome
     # canProgress & EOF reached => no more tokens to build :)
     # NOTE: reachedEOF and not canProgress => more tokens unwrapping
     if buildComplete:
       # return the finished build token, and save it as the current token
-      tok = flushedTok.get()
-      tokStream.currTok = tok
-    if canProgress and not tokStream.progChar():
+      tokStream.currTok = flushedTok.get()
+    if canProgress and not tokStream.progressChar():
       tokStream.closed = true
       return buildComplete
     elif buildComplete:
diff --git a/src/noether/lexer/toktype.nim b/src/noether/lexer/toktype.nim
index 0f40023..49add5b 100644
--- a/src/noether/lexer/toktype.nim
+++ b/src/noether/lexer/toktype.nim
@@ -1,54 +1,59 @@
 type
-  # nlTokType allows primitive nlToks to be typed,
-  # the nlTokType enum should never be directly
+  # nlTokKind allows primitive nlToks to be typed,
+  # the nlTokKind enum should never be directly
   # accessed. Use the interface in this file instead.
-  nlTokType* = enum
-    NONE, # Placeholder Value
-    EOF,  # End of File
-    EOL,  # End of Line (\0 --> EOL)
-    WORD, # Alphanumeric token
-    SYMB, # Symbolic token
-    LNFD, # \r \n Line-Feed
-    WTSP, # ' ' \t Whitespace
-    LPAR, # ( Left Parenthesis
-    RPAR, # ) Right Parenthesis
-    LBRA, # { Left Brace
-    RBRA, # } Right Brace
-    LSQB, # [ Left Square Bracket
-    RSQB, # ] Right Square Bracket
-    # LANB, # < Left Angle Bracket
-    # RANB, # > Right Angle Bracket
-    SQUO, # ' Single Quotation Marking
-    DQUO, # " Double Quotation Marking
-    GRVA, # ` Grave Accent
-    HASH, # # Number Sign (Hashtag)
+  nlTokKind* = enum
+    tkNONE, # Placeholder Value
+
+    tkEOF,  # End of File
+    tkEOL,  # End of Line (\0 --> EOL)
+
+    tkWORD, # Alphanumeric token
+    tkSYMB, # Symbolic token
+
+    tkLNFD, # \r \n Line-Feed
+    tkWTSP, # ' ' \t Whitespace
+
+    # RESERVED SYMBOLS
+    tkLPAR, # ( Left Parenthesis
+    tkRPAR, # ) Right Parenthesis
+    tkLBRA, # { Left Brace
+    tkRBRA, # } Right Brace
+    tkLSQB, # [ Left Square Bracket
+    tkRSQB, # ] Right Square Bracket
+    # tkLANB, # < Left Angle Bracket
+    # tkRANB, # > Right Angle Bracket
+    tkSQUO, # ' Single Quotation Marking
+    tkDQUO, # " Double Quotation Marking
+    tkGRVA, # ` Grave Accent
+    tkHASH, # # Number Sign (Hashtag)
         
-# Classifies a character to its nlTokType
-proc getTokType*(c: char): nlTokType =
+# Classifies a character to its nlTokKind
+proc getTokType*(c: char): nlTokKind =
   case c:
   of '\0', '\r', '\n':
-    result = nlTokType.EOL
+    result = tkEOL
   of ' ', '\t':
-    result = nlTokType.WTSP
+    result = tkWTSP
   of '(':
-    result = nlTokType.LPAR
+    result = tkLPAR
   of ')':
-    result = nlTokType.RPAR
+    result = tkRPAR
   of '{':
-    result = nlTokType.LBRA
+    result = tkLBRA
   of '}':
-    result = nlTokType.RBRA
+    result = tkRBRA
   of '[':
-    result = nlTokType.LSQB
+    result = tkLSQB
   of ']':
-    result = nlTokType.RSQB
+    result = tkRSQB
   of '\'':
-    result = nlTokType.SQUO
+    result = tkSQUO
   of '\"':
-    result = nlTokType.DQUO
+    result = tkDQUO
   of '`':
-    result = nlTokType.GRVA
+    result = tkGRVA
   of '#':
-    result = nlTokType.HASH
+    result = tkHASH
   else:
-    result = nlTokType.WORD
+    result = tkWORD
diff --git a/src/noether/parser/nodes.nim b/src/noether/parser/nodes.nim
index 23cf742..c31285e 100644
--- a/src/noether/parser/nodes.nim
+++ b/src/noether/parser/nodes.nim
@@ -1,19 +1,48 @@
+import std/options
 from ../lexer/tok import nlTok
-# from ../lexer/tokstream import 
 
 type
-  # NOTE: by the end of parsing NO nodes should
-  # NOTE: have nlNodeType.NONE
-  nlNodeType* = enum
-    NONE, # Placeholder Value
-    TERM, # Indicates the tree has terminated
-    STRL, # String Literal
-    CHRL, # Character Literal
+  # NOTE: by the end of parsing NO nodes should have nkNone
+  nlNodeKind* = enum
+    nkNone, # Placeholder Value
+
+    nkStrLit, # String Literal
+    nkChrLit, # Character Literal
+
+  # NOTE: always check parent != nil when traversing the tree
   nlNode* {.acyclic.} = ref object of RootObj
-    nType*: nlNodeType
-    toks*: seq[nlTok] # nodes store the tokens that build them
-    # left, right: nlNode
+    nKind*: nlNodeKind
+    toks*: seq[nlTok] # nodes (may) store the tokens that build them
+    parent*: nlNode
+
+  # Purely abstract type that all nlNode objects
+  # with children are expected to inherit from.
+  nlBranchNode* {.acyclic.} = ref object of nlNode
+    child: UncheckedArray[nlNode]
+
+  nlBiNode* {.acyclic.} = ref object of nlBranchNode
+
+proc childCount*(node: nlNode): int {.inline.} = 0
+proc childCount*(node: nlBiNode): int {.inline.} = 2
+
+proc getChild*(node: nlNode, i: int): Option[nlNode] {.inline.} = 
+  result = none(nlNode)
+proc getChild*(node: nlBranchNode, i: int): Option[nlNode] {.inline.} = 
+  result = some(node.child[i])
+  
+proc newNode*(nKind: nlNodeKind): nlNode =
+  result = nlNode(
+    nKind: nKind,
+  )
+  
+proc newBiNode*(nKind: nlNodeKind): nlNode =
+  result = nlBiNode(
+    nKind: nKind,
+  ) 
 
 # Short-hand way of appending a token to a node's token sequence
 proc addTok*(node: nlNode, tok: nlTok) =
+  echo "AM I HERE?"
+  echo node[]
+  echo node.toks
   node.toks.add(tok)
diff --git a/src/noether/parser/parser.nim b/src/noether/parser/parser.nim
index 0598075..4654fb3 100644
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@@ -3,61 +3,55 @@ include parseutil
 
 # NOTE: Matching between two tokens will fill `node` with everything
 # NOTE: between those two tokens EXCLUDING the two tokens themselves.
-proc parseMatch(tokStream: var nlTokStream, 
-                node: var nlNode,
-                matchType: nlTokType): nlParseStat =
+proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
   result = greed(
-    tokStream, 
-    node.toks, 
+    parser,
     satisfyMatch(matchType),
   )
-proc parseMatchLine(tokStream: var nlTokStream, 
-                   node: var nlNode,
-                   matchType: nlTokType): nlParseStat =
-  result = greed(
-    tokStream, 
-    node.toks, 
-    satisfyMatchEOL(matchType),
+proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+  result = greedLine(
+    parser, 
+    satisfyMatch(matchType),
   )
 
-proc parseStrL(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
-  node = nlNode(
-    nType: nlNodeType.STRL
-  )
-  node.addTok(tokStream.currTok)
-  result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
+proc parseStrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatch(tkDQUO)
 
-proc parseChrL(tokStream: var nlTokStream, node: var nlNode): bool =
-  node = nlNode(
-    nType: nlNodeType.CHRL
-  )
-  node.addTok(tokStream.currTok)
-  # TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
-  result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
-
-# Attempt to form an nlAST from a nlTokStream
-proc parse*(tokStream: var nlTokStream): nlNode = 
-  var tok: nlTok
-  var node: nlNode
-  while tokStream.nextTok(tok):
-    case tok.tType:
-    of nlTokType.DQUO:
+proc parseChrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatch(tkSQUO)
+  
+proc parseStmt(parser: var nlParser): nlParseStat = 
+  # initialise build node as none just for the hell of it
+  
+  while parser.stream.progress():
+    echo parser.stream.currTok
+    case parser.stream.currTok.tKind
+    of tkDQUO:
       # Attempt to parse string literal
-      if not parseStrL(tokStream, node):
+      if parser.parseStrLit() != nlParseStat.OK:
         echo "Unmatched Double Quotation! Malformed String Literal"
-        echo tokStream.currLine()
-        echo repeat(" ", tok.startPos), '^'
+        echo parser.stream.line
+        echo repeat(" ", parser.stream.currTok.startPos), '^'
       else:
         echo "Parsed String Literal"
-        echo node[]
-    of nlTokType.SQUO:
+        echo parser.bnode[]
+    of tkSQUO:
       # Attempt to parse string literal
-      if not parseChrL(tokStream, node):
+      if parser.parseChrLit() != nlParseStat.OK:
         echo "Unmatched Single Quotation! Malformed Character Literal"
-        echo tokStream.currLine()
-        echo repeat(" ", tok.startPos), '^'
+        echo parser.stream.line
+        echo repeat(" ", parser.stream.currTok.startPos), '^'
       else:
-        echo "Parsed String Literal"
-        echo node[]
+        echo "Parsed Character Literal"
+        echo parser.bnode[]
     else:
       echo "blah blah unhandled case"
+  result = nlParseStat.OK
+      
+# Attempt to parse nlAST from nlTokStream
+proc parse*(tokStream: var nlTokStream): nlAST =
+  var parser = newParser(tokStream)
+  echo ' '
+  discard parser.parseStmt()
+
+  result = parser.ast
diff --git a/src/noether/parser/parseutil.nim b/src/noether/parser/parseutil.nim
index 6fa1243..4d9deb6 100644
--- a/src/noether/parser/parseutil.nim
+++ b/src/noether/parser/parseutil.nim
@@ -2,21 +2,41 @@ import nodes
 import ../lexer/tokstream
 
 type
-  # NOTE: Values above __FAIL__ indicate a failed state
-  nlParseStat* = enum
+  # NOTE1: Values above MARKER_FAIL indicate a failed state
+  # NOTE2: nlParseStat is marked pure out of habit that's all
+  nlParseStat* {.pure.} = enum
     OK,
-    __FAIL__,
-    MIDAS, # Greedy search was never satisfied
+    MARKER_FAIL,
     UNMATCHED,
     TOOBIG,
 
+  nlAST* = object
+    root: nlNode
+
+  nlParser* = object
+    stream: nlTokStream
+    ast: nlAST
+    # the "build node" is a reference to the AST node
+    # the parser is currently modifying/building from
+    # NOTE: bnode changes frequently, it is NOT the root
+    bnode: nlNode
+    
+
 proc `*`(stat: nlParseStat, b: bool): nlParseStat =
   result = if b: stat else: nlParseStat.OK
 
 proc isFail*(stat: nlParseStat): bool = 
-  result = (stat >= nlParseStat.__FAIL__)
+  result = (stat >= nlParseStat.MARKER_FAIL)
 
+proc newParser*(tokStream: var nlTokStream): nlParser =
+  let rootNode = newNode(nkNone)
+  result = nlParser(
+    stream: tokStream,
+    ast: rootNode,
+    bnode: rootNode,
+  )
 
+  
 #[ "Greed" refers to something I mentioned in my discussion on
  |  Noether's grammar (in an EBNF-like language). Greed just
  |  means "everything until a condition is satisified".
@@ -25,34 +45,28 @@ proc isFail*(stat: nlParseStat): bool =
   
 # Greed will consume anything until a condition is satisfied
 # Returns false if the greed was never satisfied (OMG!!)
-proc greed(tokStream: var nlTokStream, 
-           toks: var seq[nlTok], 
-           satisfy: proc(tok: nlTok): bool,
-           ): nlParseStat =
-  var tok: nlTok
-  while tokStream.nextTok(tok):
-    toks.add(tok)
-    if satisfy(tok):
+proc greed(parser: var nlParser,
+           satisfy: proc(tok: nlTok): bool): nlParseStat =
+  while parser.stream.progress():
+    echo "im definitely here!"
+    parser.bnode.addTok(parser.stream.currTok)
+    if satisfy(parser.stream.currTok):
       return nlParseStat.OK
   result = nlParseStat.UNMATCHED
 
-proc greedLine(tokStream: var nlTokStream, 
-               toks: var seq[nlTok], 
+proc greedLine(parser: var nlParser,
                satisfy: proc(tok: nlTok): bool): nlParseStat =
-  var tok: nlTok
-  while tokStream.nextTok(tok):
-    toks.add(tok)
-    if satisfy(tok):
-      return true
-  result = 
+  while parser.stream.progress():
+    parser.bnode.addTok(parser.stream.currTok)
+    if satisfy(parser.stream.currTok):
+      return nlParseStat.OK
+    elif parser.stream.currTok.tKind == tkEOL:
+      return nlParseStat.UNMATCHED
+  result = nlParseStat.UNMATCHED
 
 #[ Templates for generating greed satisfying conditions.
  ]#
 
 # Satisfied if it finds nlTok of type matchType
-template satisfyMatch(matchType: nlTokType) = 
-  proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType)
-  
-# Satisfied if it finds nlTok of type matchType or EOL reached
-template satisfyMatchEOL(matchType: nlTokType) = 
-  proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType or tok.tType == nlTokType.EOL)
+template satisfyMatch(matchType: nlTokKind): untyped  = 
+  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))

From 2af3000c2ec80230adbcf74d82ec353cd994037f Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 02:11:52 +1000
Subject: [PATCH 02/12] eeeeekkk typo :(

---
 src/noether/parser/nodes.nim     | 1 -
 src/noether/parser/parseutil.nim | 5 +++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/noether/parser/nodes.nim b/src/noether/parser/nodes.nim
index c31285e..a50eee0 100644
--- a/src/noether/parser/nodes.nim
+++ b/src/noether/parser/nodes.nim
@@ -42,7 +42,6 @@ proc newBiNode*(nKind: nlNodeKind): nlNode =
 
 # Short-hand way of appending a token to a node's token sequence
 proc addTok*(node: nlNode, tok: nlTok) =
-  echo "AM I HERE?"
   echo node[]
   echo node.toks
   node.toks.add(tok)
diff --git a/src/noether/parser/parseutil.nim b/src/noether/parser/parseutil.nim
index 4d9deb6..386b03a 100644
--- a/src/noether/parser/parseutil.nim
+++ b/src/noether/parser/parseutil.nim
@@ -32,7 +32,9 @@ proc newParser*(tokStream: var nlTokStream): nlParser =
   let rootNode = newNode(nkNone)
   result = nlParser(
     stream: tokStream,
-    ast: rootNode,
+    ast: nlAST(
+      root: rootNode
+    ),
     bnode: rootNode,
   )
 
@@ -48,7 +50,6 @@ proc newParser*(tokStream: var nlTokStream): nlParser =
 proc greed(parser: var nlParser,
            satisfy: proc(tok: nlTok): bool): nlParseStat =
   while parser.stream.progress():
-    echo "im definitely here!"
     parser.bnode.addTok(parser.stream.currTok)
     if satisfy(parser.stream.currTok):
       return nlParseStat.OK

From 4a8f44d23f17a628a96efc438fcc3eaa8cb72b93 Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 02:23:54 +1000
Subject: [PATCH 03/12] Fixed parseStmt called on uninitialized nlParser.ast

Also parseStmt now discards nlTokKind.tkEOL (this shouldn't be left in long term, just a temporary solution)
---
 src/noether.nim               |  2 +-
 src/noether/parser/nodes.nim  |  2 --
 src/noether/parser/parser.nim | 15 +++++++++------
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/noether.nim b/src/noether.nim
index 509b123..83c2b1b 100644
--- a/src/noether.nim
+++ b/src/noether.nim
@@ -2,4 +2,4 @@
 # uses this file as the main entry point of the application.
 
 when isMainModule:
-  echo "Noether Lang"
+  echo "Noether Lang v0.1.0"
diff --git a/src/noether/parser/nodes.nim b/src/noether/parser/nodes.nim
index a50eee0..bd737c6 100644
--- a/src/noether/parser/nodes.nim
+++ b/src/noether/parser/nodes.nim
@@ -42,6 +42,4 @@ proc newBiNode*(nKind: nlNodeKind): nlNode =
 
 # Short-hand way of appending a token to a node's token sequence
 proc addTok*(node: nlNode, tok: nlTok) =
-  echo node[]
-  echo node.toks
   node.toks.add(tok)
diff --git a/src/noether/parser/parser.nim b/src/noether/parser/parser.nim
index 4654fb3..dcdcc06 100644
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@@ -24,28 +24,31 @@ proc parseStmt(parser: var nlParser): nlParseStat =
   # initialise build node as none just for the hell of it
   
   while parser.stream.progress():
-    echo parser.stream.currTok
+    echo "Current Token: ", parser.stream.currTok
     case parser.stream.currTok.tKind
     of tkDQUO:
       # Attempt to parse string literal
       if parser.parseStrLit() != nlParseStat.OK:
         echo "Unmatched Double Quotation! Malformed String Literal"
         echo parser.stream.line
-        echo repeat(" ", parser.stream.currTok.startPos), '^'
+        echo repeat(" ", parser.stream.currTok.startPos), '^', '\n'
       else:
         echo "Parsed String Literal"
-        echo parser.bnode[]
+        echo parser.bnode[], '\n'
     of tkSQUO:
       # Attempt to parse string literal
       if parser.parseChrLit() != nlParseStat.OK:
         echo "Unmatched Single Quotation! Malformed Character Literal"
         echo parser.stream.line
-        echo repeat(" ", parser.stream.currTok.startPos), '^'
+        echo repeat(" ", parser.stream.currTok.startPos), '^', '\n'
       else:
         echo "Parsed Character Literal"
-        echo parser.bnode[]
+        echo parser.bnode[], '\n'
+    of tkEOL:
+      # TODO: handle this case, don't just discard
+      discard
     else:
-      echo "blah blah unhandled case"
+      echo "blah blah unhandled case\n"
   result = nlParseStat.OK
       
 # Attempt to parse nlAST from nlTokStream

From f8f90fe92daa9330f213ac7148c2a18f0fec9b1f Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 02:24:26 +1000
Subject: [PATCH 04/12] Added ultra simple build script for debug

---
 .gitignore | 4 ++++
 src/ddemo  | 3 +++
 2 files changed, 7 insertions(+)
 create mode 100755 src/ddemo

diff --git a/.gitignore b/.gitignore
index 0a37b21..814ced8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,6 @@
 __pycache__/
 bin/
+
+# TEMP: used while debugging 
+# (and cause I'm super duper lazy)
+src/nlx 
diff --git a/src/ddemo b/src/ddemo
new file mode 100755
index 0000000..35c7af2
--- /dev/null
+++ b/src/ddemo
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+nim c nlx.nim
+./nlx ../lang/demo/$1

From 8e6c0bbbfc6cd312530e5bea5e2d9ef209e8b28d Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 02:33:13 +1000
Subject: [PATCH 05/12] Fixed StrLit + ChrLit matching beyond EOL, also greed
 excludes satisfier

---
 src/noether/parser/parser.nim    | 8 +++-----
 src/noether/parser/parseutil.nim | 8 +++++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/noether/parser/parser.nim b/src/noether/parser/parser.nim
index dcdcc06..7da349f 100644
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@@ -15,16 +15,14 @@ proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
   )
 
 proc parseStrLit(parser: var nlParser): nlParseStat =
-  result = parser.parseMatch(tkDQUO)
+  result = parser.parseMatchLine(tkDQUO)
 
 proc parseChrLit(parser: var nlParser): nlParseStat =
-  result = parser.parseMatch(tkSQUO)
+  result = parser.parseMatchLine(tkSQUO)
   
 proc parseStmt(parser: var nlParser): nlParseStat = 
-  # initialise build node as none just for the hell of it
-  
   while parser.stream.progress():
-    echo "Current Token: ", parser.stream.currTok
+    echo "----- Current Token: ", parser.stream.currTok
     case parser.stream.currTok.tKind
     of tkDQUO:
       # Attempt to parse string literal
diff --git a/src/noether/parser/parseutil.nim b/src/noether/parser/parseutil.nim
index 386b03a..8b9ef20 100644
--- a/src/noether/parser/parseutil.nim
+++ b/src/noether/parser/parseutil.nim
@@ -50,18 +50,20 @@ proc newParser*(tokStream: var nlTokStream): nlParser =
 proc greed(parser: var nlParser,
            satisfy: proc(tok: nlTok): bool): nlParseStat =
   while parser.stream.progress():
-    parser.bnode.addTok(parser.stream.currTok)
     if satisfy(parser.stream.currTok):
       return nlParseStat.OK
+    # NOTE: the matched token is currently excluded
+    parser.bnode.addTok(parser.stream.currTok)
   result = nlParseStat.UNMATCHED
 
 proc greedLine(parser: var nlParser,
                satisfy: proc(tok: nlTok): bool): nlParseStat =
   while parser.stream.progress():
-    parser.bnode.addTok(parser.stream.currTok)
     if satisfy(parser.stream.currTok):
       return nlParseStat.OK
-    elif parser.stream.currTok.tKind == tkEOL:
+    # NOTE: the matched token is currently excluded
+    parser.bnode.addTok(parser.stream.currTok)
+    if parser.stream.currTok.tKind == tkEOL:
       return nlParseStat.UNMATCHED
   result = nlParseStat.UNMATCHED
 

From 72a6075123cbb2749a58908766e718ccd2dc0991 Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 03:26:22 +1000
Subject: [PATCH 06/12] nlParser now exposes a subset of the nlTokStream
 interface

---
 src/noether/parser/parser.nim    | 14 +++++++-------
 src/noether/parser/parseutil.nim | 29 ++++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/noether/parser/parser.nim b/src/noether/parser/parser.nim
index 7da349f..7daf91b 100644
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@@ -21,15 +21,15 @@ proc parseChrLit(parser: var nlParser): nlParseStat =
   result = parser.parseMatchLine(tkSQUO)
   
 proc parseStmt(parser: var nlParser): nlParseStat = 
-  while parser.stream.progress():
-    echo "----- Current Token: ", parser.stream.currTok
-    case parser.stream.currTok.tKind
+  while parser.progressStream():
+    echo "----- Current Token: ", parser.currTok
+    case parser.currTok.tKind
     of tkDQUO:
       # Attempt to parse string literal
       if parser.parseStrLit() != nlParseStat.OK:
         echo "Unmatched Double Quotation! Malformed String Literal"
-        echo parser.stream.line
-        echo repeat(" ", parser.stream.currTok.startPos), '^', '\n'
+        echo parser.line
+        echo repeat(" ", parser.currTok.startPos), '^', '\n'
       else:
         echo "Parsed String Literal"
         echo parser.bnode[], '\n'
@@ -37,8 +37,8 @@ proc parseStmt(parser: var nlParser): nlParseStat =
       # Attempt to parse string literal
       if parser.parseChrLit() != nlParseStat.OK:
         echo "Unmatched Single Quotation! Malformed Character Literal"
-        echo parser.stream.line
-        echo repeat(" ", parser.stream.currTok.startPos), '^', '\n'
+        echo parser.line
+        echo repeat(" ", parser.currTok.startPos), '^', '\n'
       else:
         echo "Parsed Character Literal"
         echo parser.bnode[], '\n'
diff --git a/src/noether/parser/parseutil.nim b/src/noether/parser/parseutil.nim
index 8b9ef20..d531490 100644
--- a/src/noether/parser/parseutil.nim
+++ b/src/noether/parser/parseutil.nim
@@ -20,6 +20,9 @@ type
     # the parser is currently modifying/building from
     # NOTE: bnode changes frequently, it is NOT the root
     bnode: nlNode
+    # flag indicating whether the parser is at
+    # the start of a new line (aka checking indentation)
+    inIndent: bool
     
 
 proc `*`(stat: nlParseStat, b: bool): nlParseStat =
@@ -38,6 +41,18 @@ proc newParser*(tokStream: var nlTokStream): nlParser =
     bnode: rootNode,
   )
 
+# Exposes a subset of the nlTokStream interface
+proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
+proc line(parser: var nlParser): string = parser.stream.line
+
+# Extends upon the functionality of nlTokStream.progress()
+proc progressStream*(parser: var nlParser): bool = 
+  result = parser.stream.progress()
+  if result and parser.currTok.tKind == tkEOL:
+    parser.inIndent = true
+  if 
+
+proc setNewLine()
   
 #[ "Greed" refers to something I mentioned in my discussion on
  |  Noether's grammar (in an EBNF-like language). Greed just
@@ -49,21 +64,21 @@ proc newParser*(tokStream: var nlTokStream): nlParser =
 # Returns false if the greed was never satisfied (OMG!!)
 proc greed(parser: var nlParser,
            satisfy: proc(tok: nlTok): bool): nlParseStat =
-  while parser.stream.progress():
-    if satisfy(parser.stream.currTok):
+  while parser.progressStream():
+    if satisfy(parser.currTok):
       return nlParseStat.OK
     # NOTE: the matched token is currently excluded
-    parser.bnode.addTok(parser.stream.currTok)
+    parser.bnode.addTok(parser.currTok)
   result = nlParseStat.UNMATCHED
 
 proc greedLine(parser: var nlParser,
                satisfy: proc(tok: nlTok): bool): nlParseStat =
-  while parser.stream.progress():
-    if satisfy(parser.stream.currTok):
+  while parser.progressStream():
+    if satisfy(parser.currTok):
       return nlParseStat.OK
     # NOTE: the matched token is currently excluded
-    parser.bnode.addTok(parser.stream.currTok)
-    if parser.stream.currTok.tKind == tkEOL:
+    parser.bnode.addTok(parser.currTok)
+    if parser.currTok.tKind == tkEOL:
       return nlParseStat.UNMATCHED
   result = nlParseStat.UNMATCHED
 

From 99db57dcfdf43e118f2812be560399e544acd0fb Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 08:48:31 +1000
Subject: [PATCH 07/12] YALR (Yet Another Lexer Refactor)

---
 lang/demo/single_toks.no                      |   2 +
 src/ddemo                                     |   8 ++
 src/nlx.nim                                   |  27 ++--
 src/noether/lexer/lstream.nim                 |  66 ----------
 src/noether/lexer/tok.nim                     |  29 +++--
 src/noether/lexer/tokbuilder.nim              | 123 ++++++++++++++++++
 src/noether/lexer/tokbuilding.nim             |  86 ------------
 .../lexer/{toktype.nim => tokkind.nim}        |   6 +-
 src/noether/lexer/tokstream.nim               |  71 +++++-----
 src/noether/lib/io.nim                        |   7 +
 10 files changed, 208 insertions(+), 217 deletions(-)
 create mode 100644 lang/demo/single_toks.no
 delete mode 100644 src/noether/lexer/lstream.nim
 create mode 100644 src/noether/lexer/tokbuilder.nim
 delete mode 100644 src/noether/lexer/tokbuilding.nim
 rename src/noether/lexer/{toktype.nim => tokkind.nim} (93%)
 create mode 100644 src/noether/lib/io.nim

diff --git a/lang/demo/single_toks.no b/lang/demo/single_toks.no
new file mode 100644
index 0000000..683090a
--- /dev/null
+++ b/lang/demo/single_toks.no
@@ -0,0 +1,2 @@
+[a]b(#)
+(c)d[e]
diff --git a/src/ddemo b/src/ddemo
index 35c7af2..af30039 100755
--- a/src/ddemo
+++ b/src/ddemo
@@ -1,3 +1,11 @@
 #!/usr/bin/env bash
+set -e
+
+if [ -z "$1" ]; then
+  echo "Usage: ddemo DEMOFILE"
+  echo "Demo files are located in lang/demo"
+  exit 1
+fi
+
 nim c nlx.nim
 ./nlx ../lang/demo/$1
diff --git a/src/nlx.nim b/src/nlx.nim
index 75f59d4..adf95f0 100644
--- a/src/nlx.nim
+++ b/src/nlx.nim
@@ -1,22 +1,19 @@
 import os
-import noether/lexer/tok
-import noether/lexer/tokstream
-import noether/parser/parser
+import noether/lib/io
+import noether/lexer/[tok, tokstream]
+# import noether/parser/parser
 
 {.hint: "Don't forget to drink more water (^_^)".}
 when isMainModule:
   echo "Noether Lang Extras v0.1.0 - nlx"
 
-  if paramCount() > 0:
-    let filename = paramStr(1)
-    var tokStream = newTokStream(filename, isFile=true)
-    
-    # # DumpTok
-    # while tokStream.progress():
-    #   echo tokStream.currTok
+  var inStream = if paramCount() > 0: streamFile(paramStr 1)
+                 else: streamString(readAll stdin)
 
-    # DumpTree
-    discard parse(tokStream)
-    
-  else:
-    echo "usage: nlx filename"
+  var stream = newTokStream(inStream)
+  # # DumpTok
+  while stream.progress():
+    echo stream.tok
+
+  # DumpTree
+  # discard parse(tokStream)
diff --git a/src/noether/lexer/lstream.nim b/src/noether/lexer/lstream.nim
deleted file mode 100644
index 44138e0..0000000
--- a/src/noether/lexer/lstream.nim
+++ /dev/null
@@ -1,66 +0,0 @@
-import std/streams
-import std/options
-
-import tok
-export tok
-
-type
-  # Character streaming for the nlTokStream
-  nlLStream = object
-    stream: Stream
-    # row/column positions
-    line*: string 
-    lineNum*: Natural
-    pos*: Natural
-
-proc streamFile*(filename: string): FileStream =
-  result = newFileStream(filename, fmRead)
-
-proc streamString*(str: string): StringStream =
-  result = newStringStream(str)
-
-proc newLStream*(content: string, isFile: bool = false): nlLStream =
-  result = nlLStream(
-    stream: if isFile: streamFile(content) else: streamString(content),
-    line: "",
-    lineNum: Natural 0,
-    pos: Natural 0,
-  )
-
-# Checks whether we've reached EOL
-# NOTE: also checks if we've surpassed it (ie invalid lstream.pos)
-proc atEOL*(lstream: nlLStream): bool = 
-  result = (lstream.pos >= lstream.line.len - 1)
-
-# Checks whether we are EXACTLY at EOL, but not surpassed
-proc exactlyEOL*(lstream: nlLStream): bool =
-  result = (lstream.pos == lstream.line.len - 1)
-
-# Checks whether we have surpassed EOL
-proc outOfBounds*(lstream: nlLStream): bool = 
-  result = (lstream.pos > lstream.line.len - 1)
-
-# Progress the lex stream to the next line (if available)
-proc progressLine*(lstream: var nlLStream): bool = 
-  if lstream.stream.readLine(lstream.line):
-    inc lstream.lineNum
-    lstream.pos = Natural 0
-    return true
-  return false
-
-# Progress the lex stream to the next character in the line
-# forcefully (aka does NOT check if we reached EOL)
-proc forceProgressChar*(lstream: var nlLStream) = 
-  inc lstream.pos
-
-# # Progress the lex stream to the next character (if available)
-# proc progressChar*(lstream: var nlLStream): bool =
-#   if not lstream.atEOL():
-#     lstream.forceProgressChar()
-#     result = true
-#   else:
-#     # attempt to progress next line past EOL
-#     result = lstream.progressLine()
-
-proc currChar*(lstream: nlLStream): char = 
-  result = lstream.line[lstream.pos]
diff --git a/src/noether/lexer/tok.nim b/src/noether/lexer/tok.nim
index 7715b8f..08aba66 100644
--- a/src/noether/lexer/tok.nim
+++ b/src/noether/lexer/tok.nim
@@ -1,22 +1,25 @@
-include toktype
+include tokkind
 
 type 
-  nlTok* = object
-    tKind*: nlTokKind
-    lit*: string
-    lineNum*: Natural
-    startPos*: Natural
-    endPos*: Natural
+  nlTok* = tuple
+    # NOTE: nlTokBuilder will mutate nlTok.kind
+    kind: nlTokKind
+    lit: string
+    lineNum: int
+    startPos: int
+    endPos: int
 
 # Generates an "empty" nlTok with only a startPos,
 # all other fields are expected to be filled out later.
-proc emptyTok*(startPos: int): nlTok =
-  result = nlTok(
-    tKind: tkNONE,
+proc emptyTok*(startPos: int): nlTok {.inline.} =
+  result = (
+    kind: tkNONE,
     lit: "",
-    startPos: Natural startPos,
+    lineNum: 0,
+    startPos: startPos,
+    endPos: startPos,
   )
 
 # Checks if an nlTok has tkNONE
-proc isUntyped*(tKind: nlTokKind): bool =
-  result = (tKind == tkNONE)
+proc isUntyped*(tok: nlTok): bool {.inline.} =
+  result = (tok.kind == tkNONE)
diff --git a/src/noether/lexer/tokbuilder.nim b/src/noether/lexer/tokbuilder.nim
new file mode 100644
index 0000000..357841a
--- /dev/null
+++ b/src/noether/lexer/tokbuilder.nim
@@ -0,0 +1,123 @@
+import 
+  streams, 
+  options
+
+import tok
+export tok
+
+type
+  # Abstracts the "building process" (lexing) 
+  # of nlTok objects from a given Stream of characters.
+  nlTokBuilder* = object
+    stream: Stream
+    tok: nlTok # the build token
+    # track line number, line content, etc
+    line: string
+    lineNum: int
+    pos: int 
+    # save char and pos and its token type
+    char: char
+    cTKind: nlTokKind
+
+proc atEOL(builder: nlTokBuilder): bool {.inline.} =
+  result = (builder.char == '\n')
+proc atEOF(builder: nlTokBuilder): bool {.inline.} =
+  result = (builder.char == '\0')
+
+# Initialise a new token builder
+proc newBuilder(stream: var Stream): nlTokBuilder =
+  # NOTE: initial builder.char value is arbitrary, 
+  # NOTE: but CANNOT be initialised to the default '\0'
+  result = nlTokBuilder(
+    stream: stream,
+    tok: emptyTok(0),
+    line: "",
+    lineNum: 1,
+    pos: -1,    # after initial readChar this -> 0
+    char: '\0', # use \0 as initial invalid char   
+  )
+
+
+#[ ====================================================== ]
+ | nlTokBuilder Internal Interface for Token Construction ]
+ ]#
+  
+# Reset the build token to be "empty"
+proc resetBuild(builder: var nlTokBuilder) =
+  builder.tok = emptyTok(builder.pos)
+
+# "Finishes" the build token by setting various properties
+proc finishBuild(builder: var nlTokBuilder) =
+  builder.tok.lineNum = builder.lineNum
+  builder.tok.endPos = builder.pos
+  builder.tok.lit = builder.line[builder.tok.startPos ..< builder.line.high]
+
+# Finish, return, and reset the build token
+proc flushBuild(builder: var nlTokBuilder): nlTok = 
+  echo "Flush @", builder.pos
+  finishBuild(builder)
+  result = builder.tok
+  resetBuild(builder)
+
+# Is the build token "compatible" with the current char?
+# NOTE: flushBuild() is called if incompatible
+proc isCompatibleBuild(builder: nlTokBuilder): bool =
+  result = (builder.cTKind == builder.tok.kind)
+
+# Inherit the build token's type from current char
+proc inherit(builder: var nlTokBuilder) = 
+  builder.tok.kind = builder.cTKind
+
+# Add a character to the nlTokBuilder's build token.
+# Flushes and returns the build token if "fully built",
+# and a boolean indicating whether the nlTokBuilder can progress.
+proc appendBuild(builder: var nlTokBuilder, flushed: var Option[nlTok]): bool =    
+  # untyped build tokens inherit type immediately
+  if builder.tok.isUntyped():
+    builder.inherit()
+  
+  # check if EOF reached
+  # if builder.atEOL():
+  #     echo "EOL DETECT 1"
+  #     result = false # DO NOT PROGRESS
+  #     flushed = some(flushBuild(builder))
+  # check character and build token compatability
+  if not isCompatibleBuild(builder):
+      # flush old build token, the new one inherits type
+      flushed = some(flushBuild(builder))
+      builder.inherit()
+      result = true # can progress
+  else:
+    flushed = none(nlTok)
+    result = true # can progress
+
+#[ ========================================== ]
+ | nlTokBuilder Char Stream Reading Interface ]
+ ]#
+      
+# Read the next char in the stream without
+# checking whether it is safe to do so
+proc forceReadChar(builder: var nlTokBuilder) {.inline.} = 
+  echo "read"
+  inc builder.pos
+  builder.char = builder.stream.readChar()
+  builder.cTKind = getTokKind(builder.char)
+  builder.line.add(builder.char)
+
+# Read the next char in the stream
+# NOTE: readChar raises IOError on error, returns \0 on EOF
+proc readChar(builder: var nlTokBuilder): bool =
+  if builder.atEOL():
+    echo "EOL DETECT 2"
+    inc builder.lineNum
+  # sets builder.char to '\0' if EOF
+  builder.forceReadChar()
+  result = builder.atEOF()
+
+# Read until EOL and return the current line
+# NOTE: Does NOT update the builder's state (unsafe)
+# NOTE: ONLY call if a lex/parse error needs displaying
+proc unsafeGetLine(builder: var nlTokBuilder): string =
+  while not builder.atEOL() and builder.readChar():
+    discard
+  result = builder.line
diff --git a/src/noether/lexer/tokbuilding.nim b/src/noether/lexer/tokbuilding.nim
deleted file mode 100644
index 46a2222..0000000
--- a/src/noether/lexer/tokbuilding.nim
+++ /dev/null
@@ -1,86 +0,0 @@
-include lstream
-
-type
-  # Provides a stream-like interface for lexing nlToks
-  # Internally reliant on the functionality of nlLStream
-  nlTokStream* = object
-    lstream: nlLStream
-    build: nlTok   # the build token
-    currTok*: nlTok # the current token
-    closed: bool # EOF + all tokens built
-
-# Generates an EOL token for the nlTokStream's state
-proc EOLTok(tokStream: nlTokStream): nlTok = 
-  result = nlTok(
-    tKind: tkEOL,
-    lit: "\0",
-    lineNum: Natural tokStream.lstream.lineNum,
-    startPos: Natural tokStream.lstream.pos,
-    endPos: Natural tokStream.lstream.pos,
-  )
-
-# Resets the build token to an "empty" nlTok
-proc resetBuild(tokStream: var nlTokStream) =
-  tokStream.build = emptyTok(tokStream.lstream.pos)
-
-# Completes a token generated by emptyTok()
-# based on the nlTokStream's nlLStream's
-# current line and character positions
-proc finishBuild(ts: var nlTokStream) =
-  ts.build.lineNum = Natural ts.lstream.lineNum
-  ts.build.endPos = Natural ts.lstream.pos
-  ts.build.lit = ts.lstream.line[ts.build.startPos ..< ts.build.endPos]
-
-# Returns the nlTokStream's build token and
-# empties the build token's contents.
-proc flushBuild(tokStream: var nlTokStream): nlTok = 
-  finishBuild(tokStream)
-  result = tokStream.build
-  resetBuild(tokStream)
-
-# Returns whether the build token has a set type yet.
-# This indicates that the build token should inherit
-# the nlTokKind of the nlLStream's next character.
-proc isUntypedBuild(tokStream: nlTokStream): bool =
-  result = tokStream.build.tKind.isUntyped()
-
-# Check whether an nlTokKind is "compatible" with the build token. 
-# NOTE: flushBuild() should be called when an incompatible token is discovered.
-proc isCompatibleBuild(tokStream: nlTokStream, tKind: nlTokKind): bool =
-  result = (tKind == tokStream.build.tKind)
-
-# Add a character to the nlTokStream's build token.
-# Flushes and returns the build token if "fully built",
-# and a boolean indicating whether the nlTokStream can progress.
-proc progressBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
-  # the "pos > EOL" invalid state is used intentionally
-  # to indicate all tokens have been built, and return EOL Token
-  if tokStream.lstream.outOfBounds():
-    buildTok = some(EOLTok(tokStream))
-    return true # can progress once more
-    
-  let tKind = getTokType(tokStream.lstream.currChar())
-  # untyped build tokens must inherited immediately
-  if isUntypedBuild(tokStream):
-    tokStream.build.tKind = tKind
-  
-  # check if EOL reached
-  if tokStream.lstream.atEOL():
-      # flush old build token, the new one can be left untyped
-      let compatible = isCompatibleBuild(tokStream, tKind)
-      result = false # DO NOT PROGRESS
-      if compatible:
-        # force the lstream into an invalid state by progressing beyond EOL
-        # we can then detect this state on the next progressBuild and return
-        # an EOL character (very unsafe implementation but it works well)
-        tokStream.lstream.forceProgressChar()
-      buildTok = some(flushBuild(tokStream))
-  # check character and build token compatability
-  elif not isCompatibleBuild(tokStream, tKind):
-      # flush old build token, the new one inherits type
-      buildTok = some(flushBuild(tokStream))
-      tokStream.build.tKind = tKind
-      result = true # can progress
-  else:
-    buildTok = none(nlTok)
-    result = true # can progress
diff --git a/src/noether/lexer/toktype.nim b/src/noether/lexer/tokkind.nim
similarity index 93%
rename from src/noether/lexer/toktype.nim
rename to src/noether/lexer/tokkind.nim
index 49add5b..3d1d7b6 100644
--- a/src/noether/lexer/toktype.nim
+++ b/src/noether/lexer/tokkind.nim
@@ -29,9 +29,11 @@ type
     tkHASH, # # Number Sign (Hashtag)
         
 # Classifies a character to its nlTokKind
-proc getTokType*(c: char): nlTokKind =
+proc getTokKind*(c: char): nlTokKind =
   case c:
-  of '\0', '\r', '\n':
+  of '\0':
+    result = tkEOF
+  of '\r', '\n':
     result = tkEOL
   of ' ', '\t':
     result = tkWTSP
diff --git a/src/noether/lexer/tokstream.nim b/src/noether/lexer/tokstream.nim
index 02a045e..309e9bb 100644
--- a/src/noether/lexer/tokstream.nim
+++ b/src/noether/lexer/tokstream.nim
@@ -1,52 +1,53 @@
-include tokbuilding
+include tokbuilder
+
+type
+  # Provides a stream-like interface for lexing.
+  # Implemented as a wrapper for nlTokBuilder.
+  nlTokStream* = object
+    builder: nlTokBuilder
+    tok*: nlTok # the current token
+    isClosed: bool # EOF + all tokens built
 
 # Initialises a new nlTokStream on a string or file
-proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
+proc newTokStream*(stream: var Stream): nlTokStream =
   result = nlTokStream(
-    lstream: newLStream(content, isFile=isFile),
-    closed: false,
+    builder: newBuilder(stream),
+    tok: emptyTok(0),
+    isClosed: false,
   )
-  # 1. initialise an empty build token 
-  # 2. progress to the first line
-  result.resetBuild()
-  discard result.lstream.progressLine()
-
-# Defines a short-hand notation for getting the current line
-proc line*(tokStream: nlTokStream): string =
-  result = tokStream.lstream.line
-  
-# Reimplements nlLStream.progressChar for nlTokStream
-# to account for additional structure (ie the build token)
-# NOTE: progressChar progresses to lstream's next char
-proc progressChar(tokStream: var nlTokStream): bool =
-  if not tokStream.lstream.atEOL():
-    tokStream.lstream.forceProgressChar()
-    result = true
-  else:
-    # attempt to progress to next line past EOL
-    result = tokStream.lstream.progressLine()
-    tokStream.resetBuild()  
   
+# Expose a subset of the nlTokBuilder interface
+proc line*(stream: nlTokStream): string =
+  result = stream.builder.line
+proc atEOL*(stream: nlTokStream): bool = 
+  result = stream.builder.atEOL()
+    
 # Generates and progress the next token in the nlTokStream.
 # via repeatedly calling progressBuild() and progressChar().
 # Returns a boolean indicating whether EOF has been reached.
-# NOTE: access the new token via `tokStream.tok`
-proc progress*(tokStream: var nlTokStream): bool =
+# NOTE: access the new token via `stream.tok`
+proc progress*(stream: var nlTokStream): bool =
   # Return prematurely if already closed
-  if tokStream.closed:
+  if stream.isClosed:
     return false
   while true:
+    # echo "\nProgressing..."
     var flushedTok: Option[nlTok]
     let 
-      canProgress = tokStream.progressBuild(flushedTok)
-      buildComplete = flushedTok.isSome
+      atEOF = stream.builder.readChar()
+      newTokBuilt = flushedTok.isSome
+    discard stream.builder.appendBuild(flushedTok)
+    echo flushedTok
+    echo "atEOF: ", atEOF, "\nnewTokBuilt: ", newTokBuilt
     # canProgress & EOF reached => no more tokens to build :)
     # NOTE: reachedEOF and not canProgress => more tokens unwrapping
-    if buildComplete:
+    if newTokBuilt:
       # return the finished build token, and save it as the current token
-      tokStream.currTok = flushedTok.get()
-    if canProgress and not tokStream.progressChar():
-      tokStream.closed = true
-      return buildComplete
-    elif buildComplete:
+      stream.tok = flushedTok.get()
+    # if canProgress and atEOF:
+    if atEOF:
+      if newTokBuilt:
+        stream.isClosed = true
+      return newTokBuilt
+    elif newTokBuilt:
       return true
diff --git a/src/noether/lib/io.nim b/src/noether/lib/io.nim
new file mode 100644
index 0000000..c7eb0eb
--- /dev/null
+++ b/src/noether/lib/io.nim
@@ -0,0 +1,7 @@
+import std/streams
+
+proc streamFile*(filename: string): Stream {.inline.} =
+  result = newFileStream(filename, fmRead)
+  
+proc streamString*(str: string): Stream {.inline.} =
+  result = newStringStream(str)

From 07a9bda9ba017f1d60eed2e719b55264bdc90dcd Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 09:11:49 +1000
Subject: [PATCH 08/12] Once again fixed EOL handling...

---
 src/noether/lexer/tokbuilder.nim | 51 +++++++++++---------------------
 src/noether/lexer/tokstream.nim  |  4 +--
 2 files changed, 19 insertions(+), 36 deletions(-)

diff --git a/src/noether/lexer/tokbuilder.nim b/src/noether/lexer/tokbuilder.nim
index 357841a..0d2f212 100644
--- a/src/noether/lexer/tokbuilder.nim
+++ b/src/noether/lexer/tokbuilder.nim
@@ -54,70 +54,55 @@ proc finishBuild(builder: var nlTokBuilder) =
 
 # Finish, return, and reset the build token
 proc flushBuild(builder: var nlTokBuilder): nlTok = 
-  echo "Flush @", builder.pos
   finishBuild(builder)
   result = builder.tok
   resetBuild(builder)
 
-# Is the build token "compatible" with the current char?
-# NOTE: flushBuild() is called if incompatible
-proc isCompatibleBuild(builder: nlTokBuilder): bool =
-  result = (builder.cTKind == builder.tok.kind)
+# Is the build token "compatible" with the current char? (if not then flushbuild)
+# NOTE: This implicitly handles Windows CRLF, Unix LF, & Mac OS CR compatability 
+# NOTE: since atEOL => '\n', but '\r' and '\n' are both tkEOL so they both flush.
+proc isIncompatibleBuild(builder: nlTokBuilder): bool =
+  result = (builder.cTKind != builder.tok.kind or builder.atEOL())
 
 # Inherit the build token's type from current char
 proc inherit(builder: var nlTokBuilder) = 
   builder.tok.kind = builder.cTKind
 
 # Add a character to the nlTokBuilder's build token.
-# Flushes and returns the build token if "fully built",
-# and a boolean indicating whether the nlTokBuilder can progress.
-proc appendBuild(builder: var nlTokBuilder, flushed: var Option[nlTok]): bool =    
+# Flushes and returns the build token if finished.
+proc appendBuild(builder: var nlTokBuilder): Option[nlTok] =    
   # untyped build tokens inherit type immediately
   if builder.tok.isUntyped():
     builder.inherit()
   
-  # check if EOF reached
-  # if builder.atEOL():
-  #     echo "EOL DETECT 1"
-  #     result = false # DO NOT PROGRESS
-  #     flushed = some(flushBuild(builder))
   # check character and build token compatability
-  if not isCompatibleBuild(builder):
+  if isIncompatibleBuild(builder):
       # flush old build token, the new one inherits type
-      flushed = some(flushBuild(builder))
+      result = some(flushBuild(builder))
       builder.inherit()
-      result = true # can progress
   else:
-    flushed = none(nlTok)
-    result = true # can progress
+    result = none(nlTok)
 
 #[ ========================================== ]
  | nlTokBuilder Char Stream Reading Interface ]
  ]#
       
-# Read the next char in the stream without
-# checking whether it is safe to do so
-proc forceReadChar(builder: var nlTokBuilder) {.inline.} = 
-  echo "read"
-  inc builder.pos
+# Read the next char in the stream
+# NOTE: readChar raises IOError on error, returns \0 on EOF
+proc readChar*(builder: var nlTokBuilder): bool =
+  if builder.atEOL():
+    inc builder.lineNum
+  # sets builder.char to '\0' if EOF
   builder.char = builder.stream.readChar()
   builder.cTKind = getTokKind(builder.char)
   builder.line.add(builder.char)
-
-# Read the next char in the stream
-# NOTE: readChar raises IOError on error, returns \0 on EOF
-proc readChar(builder: var nlTokBuilder): bool =
-  if builder.atEOL():
-    echo "EOL DETECT 2"
-    inc builder.lineNum
-  # sets builder.char to '\0' if EOF
-  builder.forceReadChar()
+  inc builder.pos
   result = builder.atEOF()
 
 # Read until EOL and return the current line
 # NOTE: Does NOT update the builder's state (unsafe)
 # NOTE: ONLY call if a lex/parse error needs displaying
-proc unsafeGetLine(builder: var nlTokBuilder): string =
+proc unsafeGetLine*(builder: var nlTokBuilder): string =
   while not builder.atEOL() and builder.readChar():
     discard
   result = builder.line
diff --git a/src/noether/lexer/tokstream.nim b/src/noether/lexer/tokstream.nim
index 309e9bb..e64f777 100644
--- a/src/noether/lexer/tokstream.nim
+++ b/src/noether/lexer/tokstream.nim
@@ -31,12 +31,10 @@ proc progress*(stream: var nlTokStream): bool =
   if stream.isClosed:
     return false
   while true:
-    # echo "\nProgressing..."
-    var flushedTok: Option[nlTok]
     let 
       atEOF = stream.builder.readChar()
+      flushedTok = stream.builder.appendBuild()
       newTokBuilt = flushedTok.isSome
-    discard stream.builder.appendBuild(flushedTok)
     echo flushedTok
     echo "atEOF: ", atEOF, "\nnewTokBuilt: ", newTokBuilt
     # canProgress & EOF reached => no more tokens to build :)

From d7fb1f0c899a189c7a48acbf5cb2c29d3c968a6d Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 09:38:08 +1000
Subject: [PATCH 09/12] Migrate nlTokBuilder + nlTokStream -> nlLexer

---
 src/nlx.nim                      |  12 +--
 src/noether/lexer/tokbuilder.nim | 132 ++++++++++++++++++++-----------
 2 files changed, 90 insertions(+), 54 deletions(-)

diff --git a/src/nlx.nim b/src/nlx.nim
index adf95f0..565aaae 100644
--- a/src/nlx.nim
+++ b/src/nlx.nim
@@ -1,19 +1,19 @@
 import os
 import noether/lib/io
-import noether/lexer/[tok, tokstream]
+import noether/lexer/tokbuilder
 # import noether/parser/parser
 
 {.hint: "Don't forget to drink more water (^_^)".}
 when isMainModule:
   echo "Noether Lang Extras v0.1.0 - nlx"
 
-  var inStream = if paramCount() > 0: streamFile(paramStr 1)
-                 else: streamString(readAll stdin)
+  var stream = if paramCount() > 0: streamFile(paramStr 1)
+               else: streamString(readAll stdin)
 
-  var stream = newTokStream(inStream)
+  var lexer = newLexer(stream)
   # # DumpTok
-  while stream.progress():
-    echo stream.tok
+  while lexer.progress():
+    echo lexer.tok
 
   # DumpTree
   # discard parse(tokStream)
diff --git a/src/noether/lexer/tokbuilder.nim b/src/noether/lexer/tokbuilder.nim
index 0d2f212..46e3b00 100644
--- a/src/noether/lexer/tokbuilder.nim
+++ b/src/noether/lexer/tokbuilder.nim
@@ -8,9 +8,11 @@ export tok
 type
   # Abstracts the "building process" (lexing) 
   # of nlTok objects from a given Stream of characters.
-  nlTokBuilder* = object
+  nlLexer* = object
     stream: Stream
-    tok: nlTok # the build token
+    done*: bool
+    tok*: nlTok # new finished token
+    btok: nlTok # the build token
     # track line number, line content, etc
     line: string
     lineNum: int
@@ -19,18 +21,18 @@ type
     char: char
     cTKind: nlTokKind
 
-proc atEOL(builder: nlTokBuilder): bool {.inline.} =
-  result = (builder.char == '\n')
-proc atEOF(builder: nlTokBuilder): bool {.inline.} =
-  result = (builder.char == '\0')
+proc atEOL(lexer: nlLexer): bool {.inline.} =
+  result = (lexer.char == '\n')
+proc atEOF(lexer: nlLexer): bool {.inline.} =
+  result = (lexer.char == '\0')
 
-# Initialise a new token builder
-proc newBuilder(stream: var Stream): nlTokBuilder =
-  # NOTE: initial builder.char value is arbitrary, 
-  # NOTE: but CANNOT be initialised to the default '\0'
-  result = nlTokBuilder(
+# Initialise a new lexer
+proc newLexer*(stream: var Stream): nlLexer =
+  result = nlLexer(
     stream: stream,
+    done: false,
     tok: emptyTok(0),
+    btok: emptyTok(0),
     line: "",
     lineNum: 1,
     pos: -1,    # after initial readChar this -> 0
@@ -39,70 +41,104 @@ proc newBuilder(stream: var Stream): nlTokBuilder =
 
 
 #[ ====================================================== ]
- | nlTokBuilder Internal Interface for Token Construction ]
+ | nlLexer Internal Interface for Token Construction ]
  ]#
   
 # Reset the build token to be "empty"
-proc resetBuild(builder: var nlTokBuilder) =
-  builder.tok = emptyTok(builder.pos)
+proc resetBuild(lexer: var nlLexer) =
+  lexer.btok = emptyTok(lexer.pos)
 
 # "Finishes" the build token by setting various properties
-proc finishBuild(builder: var nlTokBuilder) =
-  builder.tok.lineNum = builder.lineNum
-  builder.tok.endPos = builder.pos
-  builder.tok.lit = builder.line[builder.tok.startPos ..< builder.line.high]
+proc finishBuild(lexer: var nlLexer) =
+  lexer.btok.lineNum = lexer.lineNum
+  lexer.btok.endPos = lexer.pos
+  lexer.btok.lit = lexer.line[lexer.btok.startPos ..< lexer.line.high]
 
 # Finish, return, and reset the build token
-proc flushBuild(builder: var nlTokBuilder): nlTok = 
-  finishBuild(builder)
-  result = builder.tok
-  resetBuild(builder)
+proc flushBuild(lexer: var nlLexer): nlTok = 
+  finishBuild(lexer)
+  result = lexer.btok
+  resetBuild(lexer)
 
 # Is the build token "compatible" with the current char? (if not then flushbuild)
 # NOTE: This implicitly handles Windows CRLF, Unix LF, & Mac OS CR compatability 
 # NOTE: since atEOL => '\n', but '\r' and '\n' are both tkEOL so they both flush.
-proc isIncompatibleBuild(builder: nlTokBuilder): bool =
-  result = (builder.cTKind != builder.tok.kind or builder.atEOL())
+proc isIncompatibleBuild(lexer: nlLexer): bool =
+  result = (lexer.cTKind != lexer.btok.kind or lexer.atEOL())
 
 # Inherit the build token's type from current char
-proc inherit(builder: var nlTokBuilder) = 
-  builder.tok.kind = builder.cTKind
+proc inherit(lexer: var nlLexer) = 
+  lexer.btok.kind = lexer.cTKind
 
-# Add a character to the nlTokBuilder's build token.
+# Add a character to the nlLexer's build token.
 # Flushes and returns the build token if finished.
-proc appendBuild(builder: var nlTokBuilder): Option[nlTok] =    
+proc appendBuild(lexer: var nlLexer): Option[nlTok] =    
   # untyped build tokens inherit type immediately
-  if builder.tok.isUntyped():
-    builder.inherit()
+  if lexer.btok.isUntyped():
+    lexer.inherit()
   
   # check character and build token compatability
-  if isIncompatibleBuild(builder):
+  if isIncompatibleBuild(lexer):
       # flush old build token, the new one inherits type
-      result = some(flushBuild(builder))
-      builder.inherit()
+      result = some(flushBuild(lexer))
+      lexer.inherit()
   else:
     result = none(nlTok)
 
-#[ ========================================== ]
- | nlTokBuilder Char Stream Reading Interface ]
+#[ ========================================= ]
+ | nlLexer Internal Char Streaming Interface ]
  ]#
       
 # Read the next char in the stream
 # NOTE: readChar raises IOError on error, returns \0 on EOF
-proc readChar*(builder: var nlTokBuilder): bool =
-  if builder.atEOL():
-    inc builder.lineNum
-  # sets builder.char to '\0' if EOF
-  builder.char = builder.stream.readChar()
-  builder.cTKind = getTokKind(builder.char)
-  builder.line.add(builder.char)
-  inc builder.pos
-  result = builder.atEOF()
+proc readChar(lexer: var nlLexer): bool =
+  if lexer.atEOL():
+    inc lexer.lineNum
+  # sets lexer.char to '\0' if EOF
+  lexer.char = lexer.stream.readChar()
+  lexer.cTKind = getTokKind(lexer.char)
+  lexer.line.add(lexer.char)
+  inc lexer.pos
+  result = lexer.atEOF()
 
+#[ ========================
+ | nlLexer Public Interface
+ ]#
+  
 # Read until EOL and return the current line
-# NOTE: Does NOT update the builder's state (unsafe)
+# NOTE: Does NOT update the lexer's state (unsafe)
 # NOTE: ONLY call if a lex/parse error needs displaying
-proc unsafeGetLine*(builder: var nlTokBuilder): string =
-  while not builder.atEOL() and builder.readChar():
+proc unsafeGetLine*(lexer: var nlLexer): string =
+  while not lexer.atEOL() and lexer.readChar():
     discard
-  result = builder.line
+  result = lexer.line
+
+# Lexes and returns the next token in the "token stream"
+# via repeatedly calling readChar() and appendBuild().
+# Returns a boolean indicating whether EOF has been reached.
+# NOTE: access the new token via `stream.tok`
+proc progress*(lexer: var nlLexer): bool =
+  # Return prematurely if already closed
+  if lexer.done:
+    return false
+  while true:
+    let 
+      atEOF = lexer.readChar()
+      flushedTok = lexer.appendBuild()
+      newTokBuilt = flushedTok.isSome
+    
+    if newTokBuilt:
+      lexer.tok = flushedTok.get()
+    # if canProgress and atEOF:
+    # if atEOF:
+    #   if newTokBuilt:
+    #     stream.isClosed = true
+    #   return newTokBuilt
+    # elif newTokBuilt:
+    #   return true
+    if newTokBuilt:
+      if atEOF:
+        lexer.done = true
+      return true
+    elif atEOF:
+      return false

From f25e66e9ef7270bfaf1e2c6df5fa53467016e379 Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 09:41:15 +1000
Subject: [PATCH 10/12] Garbage collection *so to speak*

---
 src/nlx.nim                                   |  2 +-
 src/noether/lexer/{tokbuilder.nim => lex.nim} |  0
 src/noether/lexer/tokstream.nim               | 51 -------------------
 3 files changed, 1 insertion(+), 52 deletions(-)
 rename src/noether/lexer/{tokbuilder.nim => lex.nim} (100%)
 delete mode 100644 src/noether/lexer/tokstream.nim

diff --git a/src/nlx.nim b/src/nlx.nim
index 565aaae..c7ef1d9 100644
--- a/src/nlx.nim
+++ b/src/nlx.nim
@@ -1,6 +1,6 @@
 import os
 import noether/lib/io
-import noether/lexer/tokbuilder
+import noether/lexer/lex
 # import noether/parser/parser
 
 {.hint: "Don't forget to drink more water (^_^)".}
diff --git a/src/noether/lexer/tokbuilder.nim b/src/noether/lexer/lex.nim
similarity index 100%
rename from src/noether/lexer/tokbuilder.nim
rename to src/noether/lexer/lex.nim
diff --git a/src/noether/lexer/tokstream.nim b/src/noether/lexer/tokstream.nim
deleted file mode 100644
index e64f777..0000000
--- a/src/noether/lexer/tokstream.nim
+++ /dev/null
@@ -1,51 +0,0 @@
-include tokbuilder
-
-type
-  # Provides a stream-like interface for lexing.
-  # Implemented as a wrapper for nlTokBuilder.
-  nlTokStream* = object
-    builder: nlTokBuilder
-    tok*: nlTok # the current token
-    isClosed: bool # EOF + all tokens built
-
-# Initialises a new nlTokStream on a string or file
-proc newTokStream*(stream: var Stream): nlTokStream =
-  result = nlTokStream(
-    builder: newBuilder(stream),
-    tok: emptyTok(0),
-    isClosed: false,
-  )
-  
-# Expose a subset of the nlTokBuilder interface
-proc line*(stream: nlTokStream): string =
-  result = stream.builder.line
-proc atEOL*(stream: nlTokStream): bool = 
-  result = stream.builder.atEOL()
-    
-# Generates and progress the next token in the nlTokStream.
-# via repeatedly calling progressBuild() and progressChar().
-# Returns a boolean indicating whether EOF has been reached.
-# NOTE: access the new token via `stream.tok`
-proc progress*(stream: var nlTokStream): bool =
-  # Return prematurely if already closed
-  if stream.isClosed:
-    return false
-  while true:
-    let 
-      atEOF = stream.builder.readChar()
-      flushedTok = stream.builder.appendBuild()
-      newTokBuilt = flushedTok.isSome
-    echo flushedTok
-    echo "atEOF: ", atEOF, "\nnewTokBuilt: ", newTokBuilt
-    # canProgress & EOF reached => no more tokens to build :)
-    # NOTE: reachedEOF and not canProgress => more tokens unwrapping
-    if newTokBuilt:
-      # return the finished build token, and save it as the current token
-      stream.tok = flushedTok.get()
-    # if canProgress and atEOF:
-    if atEOF:
-      if newTokBuilt:
-        stream.isClosed = true
-      return newTokBuilt
-    elif newTokBuilt:
-      return true

From 1181ea97434788914cf37951e26f2f930d4a04ca Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 12:51:03 +1000
Subject: [PATCH 11/12] Restructure attempt #087 :(

---
 src/nlx.nim                      |  26 ++++--
 src/noether/lexer/lex.nim        |  44 ++++++++--
 src/noether/lexer/tok.nim        |  30 ++++++-
 src/noether/lexer/tokkind.nim    |  60 --------------
 src/noether/parser/parse.nim     |  58 +++++++++++++
 src/noether/parser/parser.nim    | 134 +++++++++++++++++++------------
 src/noether/parser/parseutil.nim |  90 ---------------------
 7 files changed, 227 insertions(+), 215 deletions(-)
 create mode 100644 src/noether/parser/parse.nim
 delete mode 100644 src/noether/parser/parseutil.nim

diff --git a/src/nlx.nim b/src/nlx.nim
index c7ef1d9..e145943 100644
--- a/src/nlx.nim
+++ b/src/nlx.nim
@@ -1,19 +1,29 @@
 import os
 import noether/lib/io
 import noether/lexer/lex
-# import noether/parser/parser
+import noether/parser/parse
 
 {.hint: "Don't forget to drink more water (^_^)".}
 when isMainModule:
   echo "Noether Lang Extras v0.1.0 - nlx"
 
-  var stream = if paramCount() > 0: streamFile(paramStr 1)
+  # really lazy argparse implementation (temporary)
+  let
+    paramC = paramCount() 
+    cmd = if paramC > 2: paramStr 1
+          else: "tok"
+
+  var stream = if paramC > 0: streamFile(paramStr paramC)
                else: streamString(readAll stdin)
 
   var lexer = newLexer(stream)
-  # # DumpTok
-  while lexer.progress():
-    echo lexer.tok
-
-  # DumpTree
-  # discard parse(tokStream)
+  if cmd == "tok":
+    # DumpTok
+    while lexer.progress():
+      echo lexer.tok
+  elif cmd == "tree":
+    discard
+    # DumpTree
+    # discard parse(tokStream)
+  else:
+    echo "Usage: nlx [tok|tree] <demo>\n    demo files are accessible at lang/demo"
diff --git a/src/noether/lexer/lex.nim b/src/noether/lexer/lex.nim
index 46e3b00..8f81b86 100644
--- a/src/noether/lexer/lex.nim
+++ b/src/noether/lexer/lex.nim
@@ -11,15 +11,16 @@ type
   nlLexer* = object
     stream: Stream
     done*: bool
-    tok*: nlTok # new finished token
+    # store current token and upcoming (build) token
+    tok*: nlTok # current token
     btok: nlTok # the build token
+    # save char and pos and its token type
+    char: char
+    cTKind: nlTokKind
     # track line number, line content, etc
     line: string
     lineNum: int
     pos: int 
-    # save char and pos and its token type
-    char: char
-    cTKind: nlTokKind
 
 proc atEOL(lexer: nlLexer): bool {.inline.} =
   result = (lexer.char == '\n')
@@ -37,8 +38,41 @@ proc newLexer*(stream: var Stream): nlLexer =
     lineNum: 1,
     pos: -1,    # after initial readChar this -> 0
     char: '\0', # use \0 as initial invalid char   
+    cTKind: tkNONE,
   )
 
+# Classifies the current character to its nlTokKind
+proc classifyTok*(lexer: nlLexer): nlTokKind {.inline.} =
+  case lexer.char:
+  of '\0':
+    result = tkEOF
+  of '\r', '\n':
+    result = tkEOL
+  of ' ', '\t':
+    result = tkWTSP
+  of '(':
+    result = tkLPAR
+  of ')':
+    result = tkRPAR
+  of '{':
+    result = tkLBRA
+  of '}':
+    result = tkRBRA
+  of '[':
+    result = tkLSQB
+  of ']':
+    result = tkRSQB
+  of '\'':
+    result = tkSQUO
+  of '\"':
+    result = tkDQUO
+  of '`':
+    result = tkGRVA
+  of '#':
+    result = tkHASH
+  else:
+    result = tkWORD
+    
 
 #[ ====================================================== ]
  | nlLexer Internal Interface for Token Construction ]
@@ -96,7 +130,7 @@ proc readChar(lexer: var nlLexer): bool =
     inc lexer.lineNum
   # sets lexer.char to '\0' if EOF
   lexer.char = lexer.stream.readChar()
-  lexer.cTKind = getTokKind(lexer.char)
+  lexer.cTKind = lexer.classifyTok()
   lexer.line.add(lexer.char)
   inc lexer.pos
   result = lexer.atEOF()
diff --git a/src/noether/lexer/tok.nim b/src/noether/lexer/tok.nim
index 08aba66..b19c341 100644
--- a/src/noether/lexer/tok.nim
+++ b/src/noether/lexer/tok.nim
@@ -1,4 +1,32 @@
-include tokkind
+type
+  # nlTokKind allows primitive nlToks to be typed,
+  # the nlTokKind enum should never be directly
+  # accessed. Use the interface in this file instead.
+  nlTokKind* = enum
+    tkNONE, # Placeholder Value
+
+    tkEOF,  # End of File
+    tkEOL,  # End of Line (\0 --> EOL)
+
+    tkWORD, # Alphanumeric token
+    tkSYMB, # Symbolic token
+
+    tkLNFD, # \r \n Line-Feed
+    tkWTSP, # ' ' \t Whitespace
+
+    # RESERVED SYMBOLS
+    tkLPAR, # ( Left Parenthesis
+    tkRPAR, # ) Right Parenthesis
+    tkLBRA, # { Left Brace
+    tkRBRA, # } Right Brace
+    tkLSQB, # [ Left Square Bracket
+    tkRSQB, # ] Right Square Bracket
+    # tkLANB, # < Left Angle Bracket
+    # tkRANB, # > Right Angle Bracket
+    tkSQUO, # ' Single Quotation Marking
+    tkDQUO, # " Double Quotation Marking
+    tkGRVA, # ` Grave Accent
+    tkHASH, # # Number Sign (Hashtag)
 
 type 
   nlTok* = tuple
diff --git a/src/noether/lexer/tokkind.nim b/src/noether/lexer/tokkind.nim
index 3d1d7b6..8b13789 100644
--- a/src/noether/lexer/tokkind.nim
+++ b/src/noether/lexer/tokkind.nim
@@ -1,61 +1 @@
-type
-  # nlTokKind allows primitive nlToks to be typed,
-  # the nlTokKind enum should never be directly
-  # accessed. Use the interface in this file instead.
-  nlTokKind* = enum
-    tkNONE, # Placeholder Value
 
-    tkEOF,  # End of File
-    tkEOL,  # End of Line (\0 --> EOL)
-
-    tkWORD, # Alphanumeric token
-    tkSYMB, # Symbolic token
-
-    tkLNFD, # \r \n Line-Feed
-    tkWTSP, # ' ' \t Whitespace
-
-    # RESERVED SYMBOLS
-    tkLPAR, # ( Left Parenthesis
-    tkRPAR, # ) Right Parenthesis
-    tkLBRA, # { Left Brace
-    tkRBRA, # } Right Brace
-    tkLSQB, # [ Left Square Bracket
-    tkRSQB, # ] Right Square Bracket
-    # tkLANB, # < Left Angle Bracket
-    # tkRANB, # > Right Angle Bracket
-    tkSQUO, # ' Single Quotation Marking
-    tkDQUO, # " Double Quotation Marking
-    tkGRVA, # ` Grave Accent
-    tkHASH, # # Number Sign (Hashtag)
-        
-# Classifies a character to its nlTokKind
-proc getTokKind*(c: char): nlTokKind =
-  case c:
-  of '\0':
-    result = tkEOF
-  of '\r', '\n':
-    result = tkEOL
-  of ' ', '\t':
-    result = tkWTSP
-  of '(':
-    result = tkLPAR
-  of ')':
-    result = tkRPAR
-  of '{':
-    result = tkLBRA
-  of '}':
-    result = tkRBRA
-  of '[':
-    result = tkLSQB
-  of ']':
-    result = tkRSQB
-  of '\'':
-    result = tkSQUO
-  of '\"':
-    result = tkDQUO
-  of '`':
-    result = tkGRVA
-  of '#':
-    result = tkHASH
-  else:
-    result = tkWORD
diff --git a/src/noether/parser/parse.nim b/src/noether/parser/parse.nim
new file mode 100644
index 0000000..0ecd14b
--- /dev/null
+++ b/src/noether/parser/parse.nim
@@ -0,0 +1,58 @@
+import strutils
+include parser
+
+# NOTE: Matching between two tokens will fill `node` with everything
+# NOTE: between those two tokens EXCLUDING the two tokens themselves.
+proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+  result = greed(
+    parser,
+    satisfyMatch(matchType),
+  )
+proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+  result = greedLine(
+    parser, 
+    satisfyMatch(matchType),
+  )
+
+proc parseStrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatchLine(tkDQUO)
+
+proc parseChrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatchLine(tkSQUO)
+  
+proc parseStmt(parser: var nlParser): nlParseStat = 
+  while parser.progressStream():
+    echo "----- Current Token: ", parser.currTok
+    case parser.currTok.tKind
+    of tkDQUO:
+      # Attempt to parse string literal
+      if parser.parseStrLit() != nlParseStat.OK:
+        echo "Unmatched Double Quotation! Malformed String Literal"
+        echo parser.line
+        echo repeat(" ", parser.currTok.startPos), '^', '\n'
+      else:
+        echo "Parsed String Literal"
+        echo parser.bnode[], '\n'
+    of tkSQUO:
+      # Attempt to parse string literal
+      if parser.parseChrLit() != nlParseStat.OK:
+        echo "Unmatched Single Quotation! Malformed Character Literal"
+        echo parser.line
+        echo repeat(" ", parser.currTok.startPos), '^', '\n'
+      else:
+        echo "Parsed Character Literal"
+        echo parser.bnode[], '\n'
+    of tkEOL:
+      # TODO: handle this case, don't just discard
+      discard
+    else:
+      echo "blah blah unhandled case\n"
+  result = nlParseStat.OK
+      
+# Attempt to parse nlAST from nlTokStream
+proc parse*(tokStream: var nlTokStream): nlAST =
+  var parser = newParser(tokStream)
+  echo ' '
+  discard parser.parseStmt()
+
+  result = parser.ast
diff --git a/src/noether/parser/parser.nim b/src/noether/parser/parser.nim
index 7daf91b..7047e6d 100644
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@@ -1,58 +1,90 @@
-import strutils
-include parseutil
+import nodes
+import ../lexer/lex
 
-# NOTE: Matching between two tokens will fill `node` with everything
-# NOTE: between those two tokens EXCLUDING the two tokens themselves.
-proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
-  result = greed(
-    parser,
-    satisfyMatch(matchType),
-  )
-proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
-  result = greedLine(
-    parser, 
-    satisfyMatch(matchType),
+type
+  # NOTE1: Values above MARKER_FAIL indicate a failed state
+  # NOTE2: nlParseStat is marked pure out of habit that's all
+  nlParseStat* {.pure.} = enum
+    OK,
+    MARKER_FAIL,
+    UNMATCHED,
+    TOOBIG,
+
+  nlAST* = object
+    root: nlNode
+
+  nlParser* = object
+    stream: nlTokStream
+    ast: nlAST
+    # the "build node" is a reference to the AST node
+    # the parser is currently modifying/building from
+    # NOTE: bnode changes frequently, it is NOT the root
+    bnode: nlNode
+    # flag indicating whether the parser is at
+    # the start of a new line (aka checking indentation)
+    inIndent: bool
+    
+
+proc `*`(stat: nlParseStat, b: bool): nlParseStat =
+  result = if b: stat else: nlParseStat.OK
+
+proc isFail*(stat: nlParseStat): bool = 
+  result = (stat >= nlParseStat.MARKER_FAIL)
+
+proc newParser*(tokStream: var nlTokStream): nlParser =
+  let rootNode = newNode(nkNone)
+  result = nlParser(
+    stream: tokStream,
+    ast: nlAST(
+      root: rootNode
+    ),
+    bnode: rootNode,
   )
 
-proc parseStrLit(parser: var nlParser): nlParseStat =
-  result = parser.parseMatchLine(tkDQUO)
+# Exposes a subset of the nlTokStream interface
+proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
+proc line(parser: var nlParser): string = parser.stream.line
 
-proc parseChrLit(parser: var nlParser): nlParseStat =
-  result = parser.parseMatchLine(tkSQUO)
+# Extends upon the functionality of nlTokStream.progress()
+proc progressStream*(parser: var nlParser): bool = 
+  result = parser.stream.progress()
+  if result and parser.currTok.tKind == tkEOL:
+    parser.inIndent = true
+  if 
+
+proc setNewLine()
   
-proc parseStmt(parser: var nlParser): nlParseStat = 
+#[ "Greed" refers to something I mentioned in my discussion on
+ |  Noether's grammar (in an EBNF-like language). Greed just
+ |  means "everything until a condition is satisified".
+ |  That condition should be supplied by a Nim procedural type.
+ ]#
+  
+# Greed will consume anything until a condition is satisfied
+# Returns false if the greed was never satisfied (OMG!!)
+proc greed(parser: var nlParser,
+           satisfy: proc(tok: nlTok): bool): nlParseStat =
   while parser.progressStream():
-    echo "----- Current Token: ", parser.currTok
-    case parser.currTok.tKind
-    of tkDQUO:
-      # Attempt to parse string literal
-      if parser.parseStrLit() != nlParseStat.OK:
-        echo "Unmatched Double Quotation! Malformed String Literal"
-        echo parser.line
-        echo repeat(" ", parser.currTok.startPos), '^', '\n'
-      else:
-        echo "Parsed String Literal"
-        echo parser.bnode[], '\n'
-    of tkSQUO:
-      # Attempt to parse string literal
-      if parser.parseChrLit() != nlParseStat.OK:
-        echo "Unmatched Single Quotation! Malformed Character Literal"
-        echo parser.line
-        echo repeat(" ", parser.currTok.startPos), '^', '\n'
-      else:
-        echo "Parsed Character Literal"
-        echo parser.bnode[], '\n'
-    of tkEOL:
-      # TODO: handle this case, don't just discard
-      discard
-    else:
-      echo "blah blah unhandled case\n"
-  result = nlParseStat.OK
-      
-# Attempt to parse nlAST from nlTokStream
-proc parse*(tokStream: var nlTokStream): nlAST =
-  var parser = newParser(tokStream)
-  echo ' '
-  discard parser.parseStmt()
+    if satisfy(parser.currTok):
+      return nlParseStat.OK
+    # NOTE: the matched token is currently excluded
+    parser.bnode.addTok(parser.currTok)
+  result = nlParseStat.UNMATCHED
 
-  result = parser.ast
+proc greedLine(parser: var nlParser,
+               satisfy: proc(tok: nlTok): bool): nlParseStat =
+  while parser.progressStream():
+    if satisfy(parser.currTok):
+      return nlParseStat.OK
+    # NOTE: the matched token is currently excluded
+    parser.bnode.addTok(parser.currTok)
+    if parser.currTok.tKind == tkEOL:
+      return nlParseStat.UNMATCHED
+  result = nlParseStat.UNMATCHED
+
+#[ Templates for generating greed satisfying conditions.
+ ]#
+
+# Satisfied if it finds nlTok of type matchType
+template satisfyMatch(matchType: nlTokKind): untyped  = 
+  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))
diff --git a/src/noether/parser/parseutil.nim b/src/noether/parser/parseutil.nim
deleted file mode 100644
index d531490..0000000
--- a/src/noether/parser/parseutil.nim
+++ /dev/null
@@ -1,90 +0,0 @@
-import nodes
-import ../lexer/tokstream
-
-type
-  # NOTE1: Values above MARKER_FAIL indicate a failed state
-  # NOTE2: nlParseStat is marked pure out of habit that's all
-  nlParseStat* {.pure.} = enum
-    OK,
-    MARKER_FAIL,
-    UNMATCHED,
-    TOOBIG,
-
-  nlAST* = object
-    root: nlNode
-
-  nlParser* = object
-    stream: nlTokStream
-    ast: nlAST
-    # the "build node" is a reference to the AST node
-    # the parser is currently modifying/building from
-    # NOTE: bnode changes frequently, it is NOT the root
-    bnode: nlNode
-    # flag indicating whether the parser is at
-    # the start of a new line (aka checking indentation)
-    inIndent: bool
-    
-
-proc `*`(stat: nlParseStat, b: bool): nlParseStat =
-  result = if b: stat else: nlParseStat.OK
-
-proc isFail*(stat: nlParseStat): bool = 
-  result = (stat >= nlParseStat.MARKER_FAIL)
-
-proc newParser*(tokStream: var nlTokStream): nlParser =
-  let rootNode = newNode(nkNone)
-  result = nlParser(
-    stream: tokStream,
-    ast: nlAST(
-      root: rootNode
-    ),
-    bnode: rootNode,
-  )
-
-# Exposes a subset of the nlTokStream interface
-proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
-proc line(parser: var nlParser): string = parser.stream.line
-
-# Extends upon the functionality of nlTokStream.progress()
-proc progressStream*(parser: var nlParser): bool = 
-  result = parser.stream.progress()
-  if result and parser.currTok.tKind == tkEOL:
-    parser.inIndent = true
-  if 
-
-proc setNewLine()
-  
-#[ "Greed" refers to something I mentioned in my discussion on
- |  Noether's grammar (in an EBNF-like language). Greed just
- |  means "everything until a condition is satisified".
- |  That condition should be supplied by a Nim procedural type.
- ]#
-  
-# Greed will consume anything until a condition is satisfied
-# Returns false if the greed was never satisfied (OMG!!)
-proc greed(parser: var nlParser,
-           satisfy: proc(tok: nlTok): bool): nlParseStat =
-  while parser.progressStream():
-    if satisfy(parser.currTok):
-      return nlParseStat.OK
-    # NOTE: the matched token is currently excluded
-    parser.bnode.addTok(parser.currTok)
-  result = nlParseStat.UNMATCHED
-
-proc greedLine(parser: var nlParser,
-               satisfy: proc(tok: nlTok): bool): nlParseStat =
-  while parser.progressStream():
-    if satisfy(parser.currTok):
-      return nlParseStat.OK
-    # NOTE: the matched token is currently excluded
-    parser.bnode.addTok(parser.currTok)
-    if parser.currTok.tKind == tkEOL:
-      return nlParseStat.UNMATCHED
-  result = nlParseStat.UNMATCHED
-
-#[ Templates for generating greed satisfying conditions.
- ]#
-
-# Satisfied if it finds nlTok of type matchType
-template satisfyMatch(matchType: nlTokKind): untyped  = 
-  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))

From bab593a86bead14ec44ef119c744a12a5ae02fa1 Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Sun, 6 Jul 2025 21:42:09 +1000
Subject: [PATCH 12/12] Typo fix + start of error handling

---
 py/m.py                    | 2 +-
 src/noether/lib/err.nim    | 1 +
 src/noether/parser/err.nim | 8 ++++++++
 3 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 src/noether/lib/err.nim
 create mode 100644 src/noether/parser/err.nim

diff --git a/py/m.py b/py/m.py
index e2b60c5..9576f4c 100644
--- a/py/m.py
+++ b/py/m.py
@@ -2,7 +2,7 @@
 import sys
 import readline
 
-from noether.math import *
+from noether.lib.math import *
 from noether.cli import *
 
 
diff --git a/src/noether/lib/err.nim b/src/noether/lib/err.nim
new file mode 100644
index 0000000..ec4c848
--- /dev/null
+++ b/src/noether/lib/err.nim
@@ -0,0 +1 @@
+proc echoErrorHeader(): =
diff --git a/src/noether/parser/err.nim b/src/noether/parser/err.nim
new file mode 100644
index 0000000..9cc5a73
--- /dev/null
+++ b/src/noether/parser/err.nim
@@ -0,0 +1,8 @@
+#[ Error codes and messaging directly associated with
+ | nlParser and its procedures is written here.
+ | General error functionality is in src/noether/lib/err.nim
+ ]#
+
+import parser
+
+