From 1181ea97434788914cf37951e26f2f930d4a04ca Mon Sep 17 00:00:00 2001
From: Emile Clark-Boman <eclarkboman@gmail.com>
Date: Thu, 19 Jun 2025 12:51:03 +1000
Subject: [PATCH] Restructure attempt #087 :(

---
 src/nlx.nim                      |  26 ++++--
 src/noether/lexer/lex.nim        |  44 ++++++++--
 src/noether/lexer/tok.nim        |  30 ++++++-
 src/noether/lexer/tokkind.nim    |  60 --------------
 src/noether/parser/parse.nim     |  58 +++++++++++++
 src/noether/parser/parser.nim    | 134 +++++++++++++++++++------------
 src/noether/parser/parseutil.nim |  90 ---------------------
 7 files changed, 227 insertions(+), 215 deletions(-)
 create mode 100644 src/noether/parser/parse.nim
 delete mode 100644 src/noether/parser/parseutil.nim
diff --git a/src/nlx.nim b/src/nlx.nim
index c7ef1d9..e145943 100644
--- a/src/nlx.nim
+++ b/src/nlx.nim
@@ -1,19 +1,29 @@
 import os
 import noether/lib/io
 import noether/lexer/lex
-# import noether/parser/parser
+import noether/parser/parse
 
 {.hint: "Don't forget to drink more water (^_^)".}
 when isMainModule:
   echo "Noether Lang Extras v0.1.0 - nlx"
 
-  var stream = if paramCount() > 0: streamFile(paramStr 1)
+  # really lazy argparse implementation (temporary)
+  let
+    paramC = paramCount() 
+    cmd = if paramC > 2: paramStr 1
+          else: "tok"
+
+  var stream = if paramC > 0: streamFile(paramStr paramC)
                else: streamString(readAll stdin)
 
   var lexer = newLexer(stream)
-  # # DumpTok
-  while lexer.progress():
-    echo lexer.tok
-
-  # DumpTree
-  # discard parse(tokStream)
+  if cmd == "tok":
+    # DumpTok
+    while lexer.progress():
+      echo lexer.tok
+  elif cmd == "tree":
+    discard
+    # DumpTree
+    # discard parse(tokStream)
+  else:
+    echo "Usage: nlx [tok|tree] <demo>\n    demo files are accessible at lang/demo"
diff --git a/src/noether/lexer/lex.nim b/src/noether/lexer/lex.nim
index 46e3b00..8f81b86 100644
--- a/src/noether/lexer/lex.nim
+++ b/src/noether/lexer/lex.nim
@@ -11,15 +11,16 @@ type
   nlLexer* = object
     stream: Stream
     done*: bool
-    tok*: nlTok # new finished token
+    # store current token and upcoming (build) token
+    tok*: nlTok # current token
     btok: nlTok # the build token
+    # save char and pos and its token type
+    char: char
+    cTKind: nlTokKind
     # track line number, line content, etc
     line: string
     lineNum: int
     pos: int 
-    # save char and pos and its token type
-    char: char
-    cTKind: nlTokKind
 
 proc atEOL(lexer: nlLexer): bool {.inline.} =
   result = (lexer.char == '\n')
@@ -37,8 +38,41 @@ proc newLexer*(stream: var Stream): nlLexer =
     lineNum: 1,
     pos: -1,    # after initial readChar this -> 0
     char: '\0', # use \0 as initial invalid char   
+    cTKind: tkNONE,
   )
 
+# Classifies the current character to its nlTokKind
+proc classifyTok*(lexer: nlLexer): nlTokKind {.inline.} =
+  case lexer.char:
+  of '\0':
+    result = tkEOF
+  of '\r', '\n':
+    result = tkEOL
+  of ' ', '\t':
+    result = tkWTSP
+  of '(':
+    result = tkLPAR
+  of ')':
+    result = tkRPAR
+  of '{':
+    result = tkLBRA
+  of '}':
+    result = tkRBRA
+  of '[':
+    result = tkLSQB
+  of ']':
+    result = tkRSQB
+  of '\'':
+    result = tkSQUO
+  of '\"':
+    result = tkDQUO
+  of '`':
+    result = tkGRVA
+  of '#':
+    result = tkHASH
+  else:
+    result = tkWORD
+    
 
 #[ ====================================================== ]
  | nlLexer Internal Interface for Token Construction ]
@@ -96,7 +130,7 @@ proc readChar(lexer: var nlLexer): bool =
     inc lexer.lineNum
   # sets lexer.char to '\0' if EOF
   lexer.char = lexer.stream.readChar()
-  lexer.cTKind = getTokKind(lexer.char)
+  lexer.cTKind = lexer.classifyTok()
   lexer.line.add(lexer.char)
   inc lexer.pos
   result = lexer.atEOF()
diff --git a/src/noether/lexer/tok.nim b/src/noether/lexer/tok.nim
index 08aba66..b19c341 100644
--- a/src/noether/lexer/tok.nim
+++ b/src/noether/lexer/tok.nim
@@ -1,4 +1,32 @@
-include tokkind
+type
+  # nlTokKind allows primitive nlToks to be typed,
+  # the nlTokKind enum should never be directly
+  # accessed. Use the interface in this file instead.
+  nlTokKind* = enum
+    tkNONE, # Placeholder Value
+
+    tkEOF,  # End of File
+    tkEOL,  # End of Line (\0 --> EOL)
+
+    tkWORD, # Alphanumeric token
+    tkSYMB, # Symbolic token
+
+    tkLNFD, # \r \n Line-Feed
+    tkWTSP, # ' ' \t Whitespace
+
+    # RESERVED SYMBOLS
+    tkLPAR, # ( Left Parenthesis
+    tkRPAR, # ) Right Parenthesis
+    tkLBRA, # { Left Brace
+    tkRBRA, # } Right Brace
+    tkLSQB, # [ Left Square Bracket
+    tkRSQB, # ] Right Square Bracket
+    # tkLANB, # < Left Angle Bracket
+    # tkRANB, # > Right Angle Bracket
+    tkSQUO, # ' Single Quotation Marking
+    tkDQUO, # " Double Quotation Marking
+    tkGRVA, # ` Grave Accent
+    tkHASH, # # Number Sign (Hashtag)
 
 type 
   nlTok* = tuple
diff --git a/src/noether/lexer/tokkind.nim b/src/noether/lexer/tokkind.nim
index 3d1d7b6..8b13789 100644
--- a/src/noether/lexer/tokkind.nim
+++ b/src/noether/lexer/tokkind.nim
@@ -1,61 +1 @@
-type
-  # nlTokKind allows primitive nlToks to be typed,
-  # the nlTokKind enum should never be directly
-  # accessed. Use the interface in this file instead.
-  nlTokKind* = enum
-    tkNONE, # Placeholder Value
 
-    tkEOF,  # End of File
-    tkEOL,  # End of Line (\0 --> EOL)
-
-    tkWORD, # Alphanumeric token
-    tkSYMB, # Symbolic token
-
-    tkLNFD, # \r \n Line-Feed
-    tkWTSP, # ' ' \t Whitespace
-
-    # RESERVED SYMBOLS
-    tkLPAR, # ( Left Parenthesis
-    tkRPAR, # ) Right Parenthesis
-    tkLBRA, # { Left Brace
-    tkRBRA, # } Right Brace
-    tkLSQB, # [ Left Square Bracket
-    tkRSQB, # ] Right Square Bracket
-    # tkLANB, # < Left Angle Bracket
-    # tkRANB, # > Right Angle Bracket
-    tkSQUO, # ' Single Quotation Marking
-    tkDQUO, # " Double Quotation Marking
-    tkGRVA, # ` Grave Accent
-    tkHASH, # # Number Sign (Hashtag)
-        
-# Classifies a character to its nlTokKind
-proc getTokKind*(c: char): nlTokKind =
-  case c:
-  of '\0':
-    result = tkEOF
-  of '\r', '\n':
-    result = tkEOL
-  of ' ', '\t':
-    result = tkWTSP
-  of '(':
-    result = tkLPAR
-  of ')':
-    result = tkRPAR
-  of '{':
-    result = tkLBRA
-  of '}':
-    result = tkRBRA
-  of '[':
-    result = tkLSQB
-  of ']':
-    result = tkRSQB
-  of '\'':
-    result = tkSQUO
-  of '\"':
-    result = tkDQUO
-  of '`':
-    result = tkGRVA
-  of '#':
-    result = tkHASH
-  else:
-    result = tkWORD
diff --git a/src/noether/parser/parse.nim b/src/noether/parser/parse.nim
new file mode 100644
index 0000000..0ecd14b
--- /dev/null
+++ b/src/noether/parser/parse.nim
@@ -0,0 +1,58 @@
+import strutils
+include parser
+
+# NOTE: Matching between two tokens will fill `node` with everything
+# NOTE: between those two tokens EXCLUDING the two tokens themselves.
+proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+  result = greed(
+    parser,
+    satisfyMatch(matchType),
+  )
+proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+  result = greedLine(
+    parser, 
+    satisfyMatch(matchType),
+  )
+
+proc parseStrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatchLine(tkDQUO)
+
+proc parseChrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatchLine(tkSQUO)
+  
+proc parseStmt(parser: var nlParser): nlParseStat = 
+  while parser.progressStream():
+    echo "----- Current Token: ", parser.currTok
+    case parser.currTok.tKind
+    of tkDQUO:
+      # Attempt to parse string literal
+      if parser.parseStrLit() != nlParseStat.OK:
+        echo "Unmatched Double Quotation! Malformed String Literal"
+        echo parser.line
+        echo repeat(" ", parser.currTok.startPos), '^', '\n'
+      else:
+        echo "Parsed String Literal"
+        echo parser.bnode[], '\n'
+    of tkSQUO:
+      # Attempt to parse string literal
+      if parser.parseChrLit() != nlParseStat.OK:
+        echo "Unmatched Single Quotation! Malformed Character Literal"
+        echo parser.line
+        echo repeat(" ", parser.currTok.startPos), '^', '\n'
+      else:
+        echo "Parsed Character Literal"
+        echo parser.bnode[], '\n'
+    of tkEOL:
+      # TODO: handle this case, don't just discard
+      discard
+    else:
+      echo "blah blah unhandled case\n"
+  result = nlParseStat.OK
+      
+# Attempt to parse nlAST from nlTokStream
+proc parse*(tokStream: var nlTokStream): nlAST =
+  var parser = newParser(tokStream)
+  echo ' '
+  discard parser.parseStmt()
+
+  result = parser.ast
diff --git a/src/noether/parser/parser.nim b/src/noether/parser/parser.nim
index 7daf91b..7047e6d 100644
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@@ -1,58 +1,90 @@
-import strutils
-include parseutil
+import nodes
+import ../lexer/lex
 
-# NOTE: Matching between two tokens will fill `node` with everything
-# NOTE: between those two tokens EXCLUDING the two tokens themselves.
-proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
-  result = greed(
-    parser,
-    satisfyMatch(matchType),
-  )
-proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
-  result = greedLine(
-    parser, 
-    satisfyMatch(matchType),
+type
+  # NOTE1: Values above MARKER_FAIL indicate a failed state
+  # NOTE2: nlParseStat is marked pure out of habit that's all
+  nlParseStat* {.pure.} = enum
+    OK,
+    MARKER_FAIL,
+    UNMATCHED,
+    TOOBIG,
+
+  nlAST* = object
+    root: nlNode
+
+  nlParser* = object
+    stream: nlTokStream
+    ast: nlAST
+    # the "build node" is a reference to the AST node
+    # the parser is currently modifying/building from
+    # NOTE: bnode changes frequently, it is NOT the root
+    bnode: nlNode
+    # flag indicating whether the parser is at
+    # the start of a new line (aka checking indentation)
+    inIndent: bool
+    
+
+proc `*`(stat: nlParseStat, b: bool): nlParseStat =
+  result = if b: stat else: nlParseStat.OK
+
+proc isFail*(stat: nlParseStat): bool = 
+  result = (stat >= nlParseStat.MARKER_FAIL)
+
+proc newParser*(tokStream: var nlTokStream): nlParser =
+  let rootNode = newNode(nkNone)
+  result = nlParser(
+    stream: tokStream,
+    ast: nlAST(
+      root: rootNode
+    ),
+    bnode: rootNode,
   )
 
-proc parseStrLit(parser: var nlParser): nlParseStat =
-  result = parser.parseMatchLine(tkDQUO)
+# Exposes a subset of the nlTokStream interface
+proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
+proc line(parser: var nlParser): string = parser.stream.line
 
-proc parseChrLit(parser: var nlParser): nlParseStat =
-  result = parser.parseMatchLine(tkSQUO)
+# Extends upon the functionality of nlTokStream.progress()
+proc progressStream*(parser: var nlParser): bool = 
+  result = parser.stream.progress()
+  if result and parser.currTok.tKind == tkEOL:
+    parser.inIndent = true
+  if 
+
+proc setNewLine()
   
-proc parseStmt(parser: var nlParser): nlParseStat = 
+#[ "Greed" refers to something I mentioned in my discussion on
+ |  Noether's grammar (in an EBNF-like language). Greed just
+ |  means "everything until a condition is satisified".
+ |  That condition should be supplied by a Nim procedural type.
+ ]#
+  
+# Greed will consume anything until a condition is satisfied
+# Returns false if the greed was never satisfied (OMG!!)
+proc greed(parser: var nlParser,
+           satisfy: proc(tok: nlTok): bool): nlParseStat =
   while parser.progressStream():
-    echo "----- Current Token: ", parser.currTok
-    case parser.currTok.tKind
-    of tkDQUO:
-      # Attempt to parse string literal
-      if parser.parseStrLit() != nlParseStat.OK:
-        echo "Unmatched Double Quotation! Malformed String Literal"
-        echo parser.line
-        echo repeat(" ", parser.currTok.startPos), '^', '\n'
-      else:
-        echo "Parsed String Literal"
-        echo parser.bnode[], '\n'
-    of tkSQUO:
-      # Attempt to parse string literal
-      if parser.parseChrLit() != nlParseStat.OK:
-        echo "Unmatched Single Quotation! Malformed Character Literal"
-        echo parser.line
-        echo repeat(" ", parser.currTok.startPos), '^', '\n'
-      else:
-        echo "Parsed Character Literal"
-        echo parser.bnode[], '\n'
-    of tkEOL:
-      # TODO: handle this case, don't just discard
-      discard
-    else:
-      echo "blah blah unhandled case\n"
-  result = nlParseStat.OK
-      
-# Attempt to parse nlAST from nlTokStream
-proc parse*(tokStream: var nlTokStream): nlAST =
-  var parser = newParser(tokStream)
-  echo ' '
-  discard parser.parseStmt()
+    if satisfy(parser.currTok):
+      return nlParseStat.OK
+    # NOTE: the matched token is currently excluded
+    parser.bnode.addTok(parser.currTok)
+  result = nlParseStat.UNMATCHED
 
-  result = parser.ast
+proc greedLine(parser: var nlParser,
+               satisfy: proc(tok: nlTok): bool): nlParseStat =
+  while parser.progressStream():
+    if satisfy(parser.currTok):
+      return nlParseStat.OK
+    # NOTE: the matched token is currently excluded
+    parser.bnode.addTok(parser.currTok)
+    if parser.currTok.tKind == tkEOL:
+      return nlParseStat.UNMATCHED
+  result = nlParseStat.UNMATCHED
+
+#[ Templates for generating greed satisfying conditions.
+ ]#
+
+# Satisfied if it finds nlTok of type matchType
+template satisfyMatch(matchType: nlTokKind): untyped  = 
+  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))
diff --git a/src/noether/parser/parseutil.nim b/src/noether/parser/parseutil.nim
deleted file mode 100644
index d531490..0000000
--- a/src/noether/parser/parseutil.nim
+++ /dev/null
@@ -1,90 +0,0 @@
-import nodes
-import ../lexer/tokstream
-
-type
-  # NOTE1: Values above MARKER_FAIL indicate a failed state
-  # NOTE2: nlParseStat is marked pure out of habit that's all
-  nlParseStat* {.pure.} = enum
-    OK,
-    MARKER_FAIL,
-    UNMATCHED,
-    TOOBIG,
-
-  nlAST* = object
-    root: nlNode
-
-  nlParser* = object
-    stream: nlTokStream
-    ast: nlAST
-    # the "build node" is a reference to the AST node
-    # the parser is currently modifying/building from
-    # NOTE: bnode changes frequently, it is NOT the root
-    bnode: nlNode
-    # flag indicating whether the parser is at
-    # the start of a new line (aka checking indentation)
-    inIndent: bool
-    
-
-proc `*`(stat: nlParseStat, b: bool): nlParseStat =
-  result = if b: stat else: nlParseStat.OK
-
-proc isFail*(stat: nlParseStat): bool = 
-  result = (stat >= nlParseStat.MARKER_FAIL)
-
-proc newParser*(tokStream: var nlTokStream): nlParser =
-  let rootNode = newNode(nkNone)
-  result = nlParser(
-    stream: tokStream,
-    ast: nlAST(
-      root: rootNode
-    ),
-    bnode: rootNode,
-  )
-
-# Exposes a subset of the nlTokStream interface
-proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
-proc line(parser: var nlParser): string = parser.stream.line
-
-# Extends upon the functionality of nlTokStream.progress()
-proc progressStream*(parser: var nlParser): bool = 
-  result = parser.stream.progress()
-  if result and parser.currTok.tKind == tkEOL:
-    parser.inIndent = true
-  if 
-
-proc setNewLine()
-  
-#[ "Greed" refers to something I mentioned in my discussion on
- |  Noether's grammar (in an EBNF-like language). Greed just
- |  means "everything until a condition is satisified".
- |  That condition should be supplied by a Nim procedural type.
- ]#
-  
-# Greed will consume anything until a condition is satisfied
-# Returns false if the greed was never satisfied (OMG!!)
-proc greed(parser: var nlParser,
-           satisfy: proc(tok: nlTok): bool): nlParseStat =
-  while parser.progressStream():
-    if satisfy(parser.currTok):
-      return nlParseStat.OK
-    # NOTE: the matched token is currently excluded
-    parser.bnode.addTok(parser.currTok)
-  result = nlParseStat.UNMATCHED
-
-proc greedLine(parser: var nlParser,
-               satisfy: proc(tok: nlTok): bool): nlParseStat =
-  while parser.progressStream():
-    if satisfy(parser.currTok):
-      return nlParseStat.OK
-    # NOTE: the matched token is currently excluded
-    parser.bnode.addTok(parser.currTok)
-    if parser.currTok.tKind == tkEOL:
-      return nlParseStat.UNMATCHED
-  result = nlParseStat.UNMATCHED
-
-#[ Templates for generating greed satisfying conditions.
- ]#
-
-# Satisfied if it finds nlTok of type matchType
-template satisfyMatch(matchType: nlTokKind): untyped  = 
-  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))