Restructure attempt #087 :(

2025-06-19 12:51:03 +10:00 · 2025-06-19 12:51:03 +10:00 · 1181ea9743
commit 1181ea9743
parent f25e66e9ef
7 changed files with 227 additions and 215 deletions
--- a/src/nlx.nim
+++ b/src/nlx.nim
@ -1,19 +1,29 @@
 import os
 import noether/lib/io
 import noether/lexer/lex
-# import noether/parser/parser
+import noether/parser/parse

 {.hint: "Don't forget to drink more water (^_^)".}
 when isMainModule:
  echo "Noether Lang Extras v0.1.0 - nlx"

-  var stream = if paramCount() > 0: streamFile(paramStr 1)
+  # really lazy argparse implementation (temporary)
+  let
+    paramC = paramCount() 
+    cmd = if paramC > 2: paramStr 1
+          else: "tok"
+
+  var stream = if paramC > 0: streamFile(paramStr paramC)
               else: streamString(readAll stdin)

  var lexer = newLexer(stream)
-  # # DumpTok
-  while lexer.progress():
-    echo lexer.tok
-
-  # DumpTree
-  # discard parse(tokStream)
+  if cmd == "tok":
+    # DumpTok
+    while lexer.progress():
+      echo lexer.tok
+  elif cmd == "tree":
+    discard
+    # DumpTree
+    # discard parse(tokStream)
+  else:
+    echo "Usage: nlx [tok|tree] <demo>\n    demo files are accessible at lang/demo"
--- a/src/noether/lexer/lex.nim
+++ b/src/noether/lexer/lex.nim
@ -11,15 +11,16 @@ type
  nlLexer* = object
    stream: Stream
    done*: bool
-    tok*: nlTok # new finished token
+    # store current token and upcoming (build) token
+    tok*: nlTok # current token
    btok: nlTok # the build token
+    # save char and pos and its token type
+    char: char
+    cTKind: nlTokKind
    # track line number, line content, etc
    line: string
    lineNum: int
    pos: int 
-    # save char and pos and its token type
-    char: char
-    cTKind: nlTokKind

 proc atEOL(lexer: nlLexer): bool {.inline.} =
  result = (lexer.char == '\n')
@ -37,8 +38,41 @@ proc newLexer*(stream: var Stream): nlLexer =
    lineNum: 1,
    pos: -1,    # after initial readChar this -> 0
    char: '\0', # use \0 as initial invalid char   
+    cTKind: tkNONE,
  )

+# Classifies the current character to its nlTokKind
+proc classifyTok*(lexer: nlLexer): nlTokKind {.inline.} =
+  case lexer.char:
+  of '\0':
+    result = tkEOF
+  of '\r', '\n':
+    result = tkEOL
+  of ' ', '\t':
+    result = tkWTSP
+  of '(':
+    result = tkLPAR
+  of ')':
+    result = tkRPAR
+  of '{':
+    result = tkLBRA
+  of '}':
+    result = tkRBRA
+  of '[':
+    result = tkLSQB
+  of ']':
+    result = tkRSQB
+  of '\'':
+    result = tkSQUO
+  of '\"':
+    result = tkDQUO
+  of '`':
+    result = tkGRVA
+  of '#':
+    result = tkHASH
+  else:
+    result = tkWORD
+    

 #[ ====================================================== ]
 | nlLexer Internal Interface for Token Construction ]
@ -96,7 +130,7 @@ proc readChar(lexer: var nlLexer): bool =
    inc lexer.lineNum
  # sets lexer.char to '\0' if EOF
  lexer.char = lexer.stream.readChar()
-  lexer.cTKind = getTokKind(lexer.char)
+  lexer.cTKind = lexer.classifyTok()
  lexer.line.add(lexer.char)
  inc lexer.pos
  result = lexer.atEOF()
--- a/src/noether/lexer/tok.nim
+++ b/src/noether/lexer/tok.nim
@ -1,4 +1,32 @@
-include tokkind
+type
+  # nlTokKind allows primitive nlToks to be typed,
+  # the nlTokKind enum should never be directly
+  # accessed. Use the interface in this file instead.
+  nlTokKind* = enum
+    tkNONE, # Placeholder Value
+
+    tkEOF,  # End of File
+    tkEOL,  # End of Line (\0 --> EOL)
+
+    tkWORD, # Alphanumeric token
+    tkSYMB, # Symbolic token
+
+    tkLNFD, # \r \n Line-Feed
+    tkWTSP, # ' ' \t Whitespace
+
+    # RESERVED SYMBOLS
+    tkLPAR, # ( Left Parenthesis
+    tkRPAR, # ) Right Parenthesis
+    tkLBRA, # { Left Brace
+    tkRBRA, # } Right Brace
+    tkLSQB, # [ Left Square Bracket
+    tkRSQB, # ] Right Square Bracket
+    # tkLANB, # < Left Angle Bracket
+    # tkRANB, # > Right Angle Bracket
+    tkSQUO, # ' Single Quotation Marking
+    tkDQUO, # " Double Quotation Marking
+    tkGRVA, # ` Grave Accent
+    tkHASH, # # Number Sign (Hashtag)

 type 
  nlTok* = tuple
--- a/src/noether/lexer/tokkind.nim
+++ b/src/noether/lexer/tokkind.nim
@ -1,61 +1 @@
-type
-  # nlTokKind allows primitive nlToks to be typed,
-  # the nlTokKind enum should never be directly
-  # accessed. Use the interface in this file instead.
-  nlTokKind* = enum
-    tkNONE, # Placeholder Value

-    tkEOF,  # End of File
-    tkEOL,  # End of Line (\0 --> EOL)
-
-    tkWORD, # Alphanumeric token
-    tkSYMB, # Symbolic token
-
-    tkLNFD, # \r \n Line-Feed
-    tkWTSP, # ' ' \t Whitespace
-
-    # RESERVED SYMBOLS
-    tkLPAR, # ( Left Parenthesis
-    tkRPAR, # ) Right Parenthesis
-    tkLBRA, # { Left Brace
-    tkRBRA, # } Right Brace
-    tkLSQB, # [ Left Square Bracket
-    tkRSQB, # ] Right Square Bracket
-    # tkLANB, # < Left Angle Bracket
-    # tkRANB, # > Right Angle Bracket
-    tkSQUO, # ' Single Quotation Marking
-    tkDQUO, # " Double Quotation Marking
-    tkGRVA, # ` Grave Accent
-    tkHASH, # # Number Sign (Hashtag)
-        
-# Classifies a character to its nlTokKind
-proc getTokKind*(c: char): nlTokKind =
-  case c:
-  of '\0':
-    result = tkEOF
-  of '\r', '\n':
-    result = tkEOL
-  of ' ', '\t':
-    result = tkWTSP
-  of '(':
-    result = tkLPAR
-  of ')':
-    result = tkRPAR
-  of '{':
-    result = tkLBRA
-  of '}':
-    result = tkRBRA
-  of '[':
-    result = tkLSQB
-  of ']':
-    result = tkRSQB
-  of '\'':
-    result = tkSQUO
-  of '\"':
-    result = tkDQUO
-  of '`':
-    result = tkGRVA
-  of '#':
-    result = tkHASH
-  else:
-    result = tkWORD
--- a/src/noether/parser/parse.nim
+++ b/src/noether/parser/parse.nim
@ -0,0 +1,58 @@
+import strutils
+include parser
+
+# NOTE: Matching between two tokens will fill `node` with everything
+# NOTE: between those two tokens EXCLUDING the two tokens themselves.
+proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+  result = greed(
+    parser,
+    satisfyMatch(matchType),
+  )
+proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+  result = greedLine(
+    parser, 
+    satisfyMatch(matchType),
+  )
+
+proc parseStrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatchLine(tkDQUO)
+
+proc parseChrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatchLine(tkSQUO)
+  
+proc parseStmt(parser: var nlParser): nlParseStat = 
+  while parser.progressStream():
+    echo "----- Current Token: ", parser.currTok
+    case parser.currTok.tKind
+    of tkDQUO:
+      # Attempt to parse string literal
+      if parser.parseStrLit() != nlParseStat.OK:
+        echo "Unmatched Double Quotation! Malformed String Literal"
+        echo parser.line
+        echo repeat(" ", parser.currTok.startPos), '^', '\n'
+      else:
+        echo "Parsed String Literal"
+        echo parser.bnode[], '\n'
+    of tkSQUO:
+      # Attempt to parse string literal
+      if parser.parseChrLit() != nlParseStat.OK:
+        echo "Unmatched Single Quotation! Malformed Character Literal"
+        echo parser.line
+        echo repeat(" ", parser.currTok.startPos), '^', '\n'
+      else:
+        echo "Parsed Character Literal"
+        echo parser.bnode[], '\n'
+    of tkEOL:
+      # TODO: handle this case, don't just discard
+      discard
+    else:
+      echo "blah blah unhandled case\n"
+  result = nlParseStat.OK
+      
+# Attempt to parse nlAST from nlTokStream
+proc parse*(tokStream: var nlTokStream): nlAST =
+  var parser = newParser(tokStream)
+  echo ' '
+  discard parser.parseStmt()
+
+  result = parser.ast
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@ -1,58 +1,90 @@
-import strutils
-include parseutil
+import nodes
+import ../lexer/lex

-# NOTE: Matching between two tokens will fill `node` with everything
-# NOTE: between those two tokens EXCLUDING the two tokens themselves.
-proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
-  result = greed(
-    parser,
-    satisfyMatch(matchType),
-  )
-proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
-  result = greedLine(
-    parser, 
-    satisfyMatch(matchType),
+type
+  # NOTE1: Values above MARKER_FAIL indicate a failed state
+  # NOTE2: nlParseStat is marked pure out of habit that's all
+  nlParseStat* {.pure.} = enum
+    OK,
+    MARKER_FAIL,
+    UNMATCHED,
+    TOOBIG,
+
+  nlAST* = object
+    root: nlNode
+
+  nlParser* = object
+    stream: nlTokStream
+    ast: nlAST
+    # the "build node" is a reference to the AST node
+    # the parser is currently modifying/building from
+    # NOTE: bnode changes frequently, it is NOT the root
+    bnode: nlNode
+    # flag indicating whether the parser is at
+    # the start of a new line (aka checking indentation)
+    inIndent: bool
+    
+
+proc `*`(stat: nlParseStat, b: bool): nlParseStat =
+  result = if b: stat else: nlParseStat.OK
+
+proc isFail*(stat: nlParseStat): bool = 
+  result = (stat >= nlParseStat.MARKER_FAIL)
+
+proc newParser*(tokStream: var nlTokStream): nlParser =
+  let rootNode = newNode(nkNone)
+  result = nlParser(
+    stream: tokStream,
+    ast: nlAST(
+      root: rootNode
+    ),
+    bnode: rootNode,
  )

-proc parseStrLit(parser: var nlParser): nlParseStat =
-  result = parser.parseMatchLine(tkDQUO)
+# Exposes a subset of the nlTokStream interface
+proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
+proc line(parser: var nlParser): string = parser.stream.line

-proc parseChrLit(parser: var nlParser): nlParseStat =
-  result = parser.parseMatchLine(tkSQUO)
+# Extends upon the functionality of nlTokStream.progress()
+proc progressStream*(parser: var nlParser): bool = 
+  result = parser.stream.progress()
+  if result and parser.currTok.tKind == tkEOL:
+    parser.inIndent = true
+  if 
+
+proc setNewLine()
  
-proc parseStmt(parser: var nlParser): nlParseStat = 
+#[ "Greed" refers to something I mentioned in my discussion on
+ |  Noether's grammar (in an EBNF-like language). Greed just
+ |  means "everything until a condition is satisified".
+ |  That condition should be supplied by a Nim procedural type.
+ ]#
+  
+# Greed will consume anything until a condition is satisfied
+# Returns false if the greed was never satisfied (OMG!!)
+proc greed(parser: var nlParser,
+           satisfy: proc(tok: nlTok): bool): nlParseStat =
  while parser.progressStream():
-    echo "----- Current Token: ", parser.currTok
-    case parser.currTok.tKind
-    of tkDQUO:
-      # Attempt to parse string literal
-      if parser.parseStrLit() != nlParseStat.OK:
-        echo "Unmatched Double Quotation! Malformed String Literal"
-        echo parser.line
-        echo repeat(" ", parser.currTok.startPos), '^', '\n'
-      else:
-        echo "Parsed String Literal"
-        echo parser.bnode[], '\n'
-    of tkSQUO:
-      # Attempt to parse string literal
-      if parser.parseChrLit() != nlParseStat.OK:
-        echo "Unmatched Single Quotation! Malformed Character Literal"
-        echo parser.line
-        echo repeat(" ", parser.currTok.startPos), '^', '\n'
-      else:
-        echo "Parsed Character Literal"
-        echo parser.bnode[], '\n'
-    of tkEOL:
-      # TODO: handle this case, don't just discard
-      discard
-    else:
-      echo "blah blah unhandled case\n"
-  result = nlParseStat.OK
-      
-# Attempt to parse nlAST from nlTokStream
-proc parse*(tokStream: var nlTokStream): nlAST =
-  var parser = newParser(tokStream)
-  echo ' '
-  discard parser.parseStmt()
+    if satisfy(parser.currTok):
+      return nlParseStat.OK
+    # NOTE: the matched token is currently excluded
+    parser.bnode.addTok(parser.currTok)
+  result = nlParseStat.UNMATCHED

-  result = parser.ast
+proc greedLine(parser: var nlParser,
+               satisfy: proc(tok: nlTok): bool): nlParseStat =
+  while parser.progressStream():
+    if satisfy(parser.currTok):
+      return nlParseStat.OK
+    # NOTE: the matched token is currently excluded
+    parser.bnode.addTok(parser.currTok)
+    if parser.currTok.tKind == tkEOL:
+      return nlParseStat.UNMATCHED
+  result = nlParseStat.UNMATCHED
+
+#[ Templates for generating greed satisfying conditions.
+ ]#
+
+# Satisfied if it finds nlTok of type matchType
+template satisfyMatch(matchType: nlTokKind): untyped  = 
+  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))
--- a/src/noether/parser/parseutil.nim
+++ b/src/noether/parser/parseutil.nim
@ -1,90 +0,0 @@
-import nodes
-import ../lexer/tokstream
-
-type
-  # NOTE1: Values above MARKER_FAIL indicate a failed state
-  # NOTE2: nlParseStat is marked pure out of habit that's all
-  nlParseStat* {.pure.} = enum
-    OK,
-    MARKER_FAIL,
-    UNMATCHED,
-    TOOBIG,
-
-  nlAST* = object
-    root: nlNode
-
-  nlParser* = object
-    stream: nlTokStream
-    ast: nlAST
-    # the "build node" is a reference to the AST node
-    # the parser is currently modifying/building from
-    # NOTE: bnode changes frequently, it is NOT the root
-    bnode: nlNode
-    # flag indicating whether the parser is at
-    # the start of a new line (aka checking indentation)
-    inIndent: bool
-    
-
-proc `*`(stat: nlParseStat, b: bool): nlParseStat =
-  result = if b: stat else: nlParseStat.OK
-
-proc isFail*(stat: nlParseStat): bool = 
-  result = (stat >= nlParseStat.MARKER_FAIL)
-
-proc newParser*(tokStream: var nlTokStream): nlParser =
-  let rootNode = newNode(nkNone)
-  result = nlParser(
-    stream: tokStream,
-    ast: nlAST(
-      root: rootNode
-    ),
-    bnode: rootNode,
-  )
-
-# Exposes a subset of the nlTokStream interface
-proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
-proc line(parser: var nlParser): string = parser.stream.line
-
-# Extends upon the functionality of nlTokStream.progress()
-proc progressStream*(parser: var nlParser): bool = 
-  result = parser.stream.progress()
-  if result and parser.currTok.tKind == tkEOL:
-    parser.inIndent = true
-  if 
-
-proc setNewLine()
-  
-#[ "Greed" refers to something I mentioned in my discussion on
- |  Noether's grammar (in an EBNF-like language). Greed just
- |  means "everything until a condition is satisified".
- |  That condition should be supplied by a Nim procedural type.
- ]#
-  
-# Greed will consume anything until a condition is satisfied
-# Returns false if the greed was never satisfied (OMG!!)
-proc greed(parser: var nlParser,
-           satisfy: proc(tok: nlTok): bool): nlParseStat =
-  while parser.progressStream():
-    if satisfy(parser.currTok):
-      return nlParseStat.OK
-    # NOTE: the matched token is currently excluded
-    parser.bnode.addTok(parser.currTok)
-  result = nlParseStat.UNMATCHED
-
-proc greedLine(parser: var nlParser,
-               satisfy: proc(tok: nlTok): bool): nlParseStat =
-  while parser.progressStream():
-    if satisfy(parser.currTok):
-      return nlParseStat.OK
-    # NOTE: the matched token is currently excluded
-    parser.bnode.addTok(parser.currTok)
-    if parser.currTok.tKind == tkEOL:
-      return nlParseStat.UNMATCHED
-  result = nlParseStat.UNMATCHED
-
-#[ Templates for generating greed satisfying conditions.
- ]#
-
-# Satisfied if it finds nlTok of type matchType
-template satisfyMatch(matchType: nlTokKind): untyped  = 
-  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))