Restructure attempt #087 :(

2025-06-19 12:51:03 +10:00 · 2025-06-19 12:51:03 +10:00 · 1181ea9743
commit 1181ea9743
parent f25e66e9ef
7 changed files with 227 additions and 215 deletions
--- a/src/nlx.nim
+++ b/src/nlx.nim
@ -1,19 +1,29 @@
 import os
 import noether/lib/io
 import noether/lexer/lex
-# import noether/parser/parser
+import noether/parser/parse
 {.hint: "Don't forget to drink more water (^_^)".}
 when isMainModule:
  echo "Noether Lang Extras v0.1.0 - nlx"
-  var stream = if paramCount() > 0: streamFile(paramStr 1)
+  # really lazy argparse implementation (temporary)
  let
    paramC = paramCount() 
    cmd = if paramC > 2: paramStr 1
          else: "tok"
  var stream = if paramC > 0: streamFile(paramStr paramC)
               else: streamString(readAll stdin)
  var lexer = newLexer(stream)
-  # # DumpTok
+  if cmd == "tok":
    # DumpTok
    while lexer.progress():
      echo lexer.tok
-
+  elif cmd == "tree":
    discard
    # DumpTree
    # discard parse(tokStream)
  else:
    echo "Usage: nlx [tok|tree] <demo>\n    demo files are accessible at lang/demo"
--- a/src/noether/lexer/lex.nim
+++ b/src/noether/lexer/lex.nim
@ -11,15 +11,16 @@ type
  nlLexer* = object
    stream: Stream
    done*: bool
-    tok*: nlTok # new finished token
+    # store current token and upcoming (build) token
    tok*: nlTok # current token
    btok: nlTok # the build token
    # save char and pos and its token type
    char: char
    cTKind: nlTokKind
    # track line number, line content, etc
    line: string
    lineNum: int
    pos: int 
    # save char and pos and its token type
    char: char
    cTKind: nlTokKind
 proc atEOL(lexer: nlLexer): bool {.inline.} =
  result = (lexer.char == '\n')
@ -37,8 +38,41 @@ proc newLexer*(stream: var Stream): nlLexer =
    lineNum: 1,
    pos: -1,    # after initial readChar this -> 0
    char: '\0', # use \0 as initial invalid char   
    cTKind: tkNONE,
  )
 # Classifies the current character to its nlTokKind
 proc classifyTok*(lexer: nlLexer): nlTokKind {.inline.} =
  case lexer.char:
  of '\0':
    result = tkEOF
  of '\r', '\n':
    result = tkEOL
  of ' ', '\t':
    result = tkWTSP
  of '(':
    result = tkLPAR
  of ')':
    result = tkRPAR
  of '{':
    result = tkLBRA
  of '}':
    result = tkRBRA
  of '[':
    result = tkLSQB
  of ']':
    result = tkRSQB
  of '\'':
    result = tkSQUO
  of '\"':
    result = tkDQUO
  of '`':
    result = tkGRVA
  of '#':
    result = tkHASH
  else:
    result = tkWORD
 #[ ====================================================== ]
 | nlLexer Internal Interface for Token Construction ]
@ -96,7 +130,7 @@ proc readChar(lexer: var nlLexer): bool =
    inc lexer.lineNum
  # sets lexer.char to '\0' if EOF
  lexer.char = lexer.stream.readChar()
-  lexer.cTKind = getTokKind(lexer.char)
+  lexer.cTKind = lexer.classifyTok()
  lexer.line.add(lexer.char)
  inc lexer.pos
  result = lexer.atEOF()
--- a/src/noether/lexer/tok.nim
+++ b/src/noether/lexer/tok.nim
@ -1,4 +1,32 @@
-include tokkind
+type
  # nlTokKind allows primitive nlToks to be typed,
  # the nlTokKind enum should never be directly
  # accessed. Use the interface in this file instead.
  nlTokKind* = enum
    tkNONE, # Placeholder Value
    tkEOF,  # End of File
    tkEOL,  # End of Line (\0 --> EOL)
    tkWORD, # Alphanumeric token
    tkSYMB, # Symbolic token
    tkLNFD, # \r \n Line-Feed
    tkWTSP, # ' ' \t Whitespace
    # RESERVED SYMBOLS
    tkLPAR, # ( Left Parenthesis
    tkRPAR, # ) Right Parenthesis
    tkLBRA, # { Left Brace
    tkRBRA, # } Right Brace
    tkLSQB, # [ Left Square Bracket
    tkRSQB, # ] Right Square Bracket
    # tkLANB, # < Left Angle Bracket
    # tkRANB, # > Right Angle Bracket
    tkSQUO, # ' Single Quotation Marking
    tkDQUO, # " Double Quotation Marking
    tkGRVA, # ` Grave Accent
    tkHASH, # # Number Sign (Hashtag)
 type 
  nlTok* = tuple
--- a/src/noether/lexer/tokkind.nim
+++ b/src/noether/lexer/tokkind.nim
@ -1,61 +1 @@
 type
  # nlTokKind allows primitive nlToks to be typed,
  # the nlTokKind enum should never be directly
  # accessed. Use the interface in this file instead.
  nlTokKind* = enum
    tkNONE, # Placeholder Value
    tkEOF,  # End of File
    tkEOL,  # End of Line (\0 --> EOL)
    tkWORD, # Alphanumeric token
    tkSYMB, # Symbolic token
    tkLNFD, # \r \n Line-Feed
    tkWTSP, # ' ' \t Whitespace
    # RESERVED SYMBOLS
    tkLPAR, # ( Left Parenthesis
    tkRPAR, # ) Right Parenthesis
    tkLBRA, # { Left Brace
    tkRBRA, # } Right Brace
    tkLSQB, # [ Left Square Bracket
    tkRSQB, # ] Right Square Bracket
    # tkLANB, # < Left Angle Bracket
    # tkRANB, # > Right Angle Bracket
    tkSQUO, # ' Single Quotation Marking
    tkDQUO, # " Double Quotation Marking
    tkGRVA, # ` Grave Accent
    tkHASH, # # Number Sign (Hashtag)
 # Classifies a character to its nlTokKind
 proc getTokKind*(c: char): nlTokKind =
  case c:
  of '\0':
    result = tkEOF
  of '\r', '\n':
    result = tkEOL
  of ' ', '\t':
    result = tkWTSP
  of '(':
    result = tkLPAR
  of ')':
    result = tkRPAR
  of '{':
    result = tkLBRA
  of '}':
    result = tkRBRA
  of '[':
    result = tkLSQB
  of ']':
    result = tkRSQB
  of '\'':
    result = tkSQUO
  of '\"':
    result = tkDQUO
  of '`':
    result = tkGRVA
  of '#':
    result = tkHASH
  else:
    result = tkWORD
--- a/src/noether/parser/parse.nim
+++ b/src/noether/parser/parse.nim
@ -0,0 +1,58 @@
 import strutils
 include parser
 # NOTE: Matching between two tokens will fill `node` with everything
 # NOTE: between those two tokens EXCLUDING the two tokens themselves.
 proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
  result = greed(
    parser,
    satisfyMatch(matchType),
  )
 proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
  result = greedLine(
    parser, 
    satisfyMatch(matchType),
  )
 proc parseStrLit(parser: var nlParser): nlParseStat =
  result = parser.parseMatchLine(tkDQUO)
 proc parseChrLit(parser: var nlParser): nlParseStat =
  result = parser.parseMatchLine(tkSQUO)
 proc parseStmt(parser: var nlParser): nlParseStat = 
  while parser.progressStream():
    echo "----- Current Token: ", parser.currTok
    case parser.currTok.tKind
    of tkDQUO:
      # Attempt to parse string literal
      if parser.parseStrLit() != nlParseStat.OK:
        echo "Unmatched Double Quotation! Malformed String Literal"
        echo parser.line
        echo repeat(" ", parser.currTok.startPos), '^', '\n'
      else:
        echo "Parsed String Literal"
        echo parser.bnode[], '\n'
    of tkSQUO:
      # Attempt to parse string literal
      if parser.parseChrLit() != nlParseStat.OK:
        echo "Unmatched Single Quotation! Malformed Character Literal"
        echo parser.line
        echo repeat(" ", parser.currTok.startPos), '^', '\n'
      else:
        echo "Parsed Character Literal"
        echo parser.bnode[], '\n'
    of tkEOL:
      # TODO: handle this case, don't just discard
      discard
    else:
      echo "blah blah unhandled case\n"
  result = nlParseStat.OK
 # Attempt to parse nlAST from nlTokStream
 proc parse*(tokStream: var nlTokStream): nlAST =
  var parser = newParser(tokStream)
  echo ' '
  discard parser.parseStmt()
  result = parser.ast
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@ -1,58 +1,90 @@
-import strutils
+import nodes
-include parseutil
+import ../lexer/lex
-# NOTE: Matching between two tokens will fill `node` with everything
+type
-# NOTE: between those two tokens EXCLUDING the two tokens themselves.
+  # NOTE1: Values above MARKER_FAIL indicate a failed state
-proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+  # NOTE2: nlParseStat is marked pure out of habit that's all
-  result = greed(
+  nlParseStat* {.pure.} = enum
-    parser,
+    OK,
-    satisfyMatch(matchType),
+    MARKER_FAIL,
-  )
+    UNMATCHED,
-proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+    TOOBIG,
-  result = greedLine(
+
-    parser, 
+  nlAST* = object
-    satisfyMatch(matchType),
+    root: nlNode
  nlParser* = object
    stream: nlTokStream
    ast: nlAST
    # the "build node" is a reference to the AST node
    # the parser is currently modifying/building from
    # NOTE: bnode changes frequently, it is NOT the root
    bnode: nlNode
    # flag indicating whether the parser is at
    # the start of a new line (aka checking indentation)
    inIndent: bool
 proc `*`(stat: nlParseStat, b: bool): nlParseStat =
  result = if b: stat else: nlParseStat.OK
 proc isFail*(stat: nlParseStat): bool = 
  result = (stat >= nlParseStat.MARKER_FAIL)
 proc newParser*(tokStream: var nlTokStream): nlParser =
  let rootNode = newNode(nkNone)
  result = nlParser(
    stream: tokStream,
    ast: nlAST(
      root: rootNode
    ),
    bnode: rootNode,
  )
-proc parseStrLit(parser: var nlParser): nlParseStat =
+# Exposes a subset of the nlTokStream interface
-  result = parser.parseMatchLine(tkDQUO)
+proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
 proc line(parser: var nlParser): string = parser.stream.line
-proc parseChrLit(parser: var nlParser): nlParseStat =
+# Extends upon the functionality of nlTokStream.progress()
-  result = parser.parseMatchLine(tkSQUO)
+proc progressStream*(parser: var nlParser): bool = 
  result = parser.stream.progress()
  if result and parser.currTok.tKind == tkEOL:
    parser.inIndent = true
  if 
-proc parseStmt(parser: var nlParser): nlParseStat = 
+proc setNewLine()
 #[ "Greed" refers to something I mentioned in my discussion on
 |  Noether's grammar (in an EBNF-like language). Greed just
 |  means "everything until a condition is satisified".
 |  That condition should be supplied by a Nim procedural type.
 ]#
 # Greed will consume anything until a condition is satisfied
 # Returns false if the greed was never satisfied (OMG!!)
 proc greed(parser: var nlParser,
           satisfy: proc(tok: nlTok): bool): nlParseStat =
  while parser.progressStream():
-    echo "----- Current Token: ", parser.currTok
+    if satisfy(parser.currTok):
-    case parser.currTok.tKind
+      return nlParseStat.OK
-    of tkDQUO:
+    # NOTE: the matched token is currently excluded
-      # Attempt to parse string literal
+    parser.bnode.addTok(parser.currTok)
-      if parser.parseStrLit() != nlParseStat.OK:
+  result = nlParseStat.UNMATCHED
        echo "Unmatched Double Quotation! Malformed String Literal"
        echo parser.line
        echo repeat(" ", parser.currTok.startPos), '^', '\n'
      else:
        echo "Parsed String Literal"
        echo parser.bnode[], '\n'
    of tkSQUO:
      # Attempt to parse string literal
      if parser.parseChrLit() != nlParseStat.OK:
        echo "Unmatched Single Quotation! Malformed Character Literal"
        echo parser.line
        echo repeat(" ", parser.currTok.startPos), '^', '\n'
      else:
        echo "Parsed Character Literal"
        echo parser.bnode[], '\n'
    of tkEOL:
      # TODO: handle this case, don't just discard
      discard
    else:
      echo "blah blah unhandled case\n"
  result = nlParseStat.OK
-# Attempt to parse nlAST from nlTokStream
+proc greedLine(parser: var nlParser,
-proc parse*(tokStream: var nlTokStream): nlAST =
+               satisfy: proc(tok: nlTok): bool): nlParseStat =
-  var parser = newParser(tokStream)
+  while parser.progressStream():
-  echo ' '
+    if satisfy(parser.currTok):
-  discard parser.parseStmt()
+      return nlParseStat.OK
    # NOTE: the matched token is currently excluded
    parser.bnode.addTok(parser.currTok)
    if parser.currTok.tKind == tkEOL:
      return nlParseStat.UNMATCHED
  result = nlParseStat.UNMATCHED
-  result = parser.ast
+#[ Templates for generating greed satisfying conditions.
 ]#
 # Satisfied if it finds nlTok of type matchType
 template satisfyMatch(matchType: nlTokKind): untyped  = 
  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))
--- a/src/noether/parser/parseutil.nim
+++ b/src/noether/parser/parseutil.nim
@ -1,90 +0,0 @@
 import nodes
 import ../lexer/tokstream
 type
  # NOTE1: Values above MARKER_FAIL indicate a failed state
  # NOTE2: nlParseStat is marked pure out of habit that's all
  nlParseStat* {.pure.} = enum
    OK,
    MARKER_FAIL,
    UNMATCHED,
    TOOBIG,
  nlAST* = object
    root: nlNode
  nlParser* = object
    stream: nlTokStream
    ast: nlAST
    # the "build node" is a reference to the AST node
    # the parser is currently modifying/building from
    # NOTE: bnode changes frequently, it is NOT the root
    bnode: nlNode
    # flag indicating whether the parser is at
    # the start of a new line (aka checking indentation)
    inIndent: bool
 proc `*`(stat: nlParseStat, b: bool): nlParseStat =
  result = if b: stat else: nlParseStat.OK
 proc isFail*(stat: nlParseStat): bool = 
  result = (stat >= nlParseStat.MARKER_FAIL)
 proc newParser*(tokStream: var nlTokStream): nlParser =
  let rootNode = newNode(nkNone)
  result = nlParser(
    stream: tokStream,
    ast: nlAST(
      root: rootNode
    ),
    bnode: rootNode,
  )
 # Exposes a subset of the nlTokStream interface
 proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
 proc line(parser: var nlParser): string = parser.stream.line
 # Extends upon the functionality of nlTokStream.progress()
 proc progressStream*(parser: var nlParser): bool = 
  result = parser.stream.progress()
  if result and parser.currTok.tKind == tkEOL:
    parser.inIndent = true
  if 
 proc setNewLine()
 #[ "Greed" refers to something I mentioned in my discussion on
 |  Noether's grammar (in an EBNF-like language). Greed just
 |  means "everything until a condition is satisified".
 |  That condition should be supplied by a Nim procedural type.
 ]#
 # Greed will consume anything until a condition is satisfied
 # Returns false if the greed was never satisfied (OMG!!)
 proc greed(parser: var nlParser,
           satisfy: proc(tok: nlTok): bool): nlParseStat =
  while parser.progressStream():
    if satisfy(parser.currTok):
      return nlParseStat.OK
    # NOTE: the matched token is currently excluded
    parser.bnode.addTok(parser.currTok)
  result = nlParseStat.UNMATCHED
 proc greedLine(parser: var nlParser,
               satisfy: proc(tok: nlTok): bool): nlParseStat =
  while parser.progressStream():
    if satisfy(parser.currTok):
      return nlParseStat.OK
    # NOTE: the matched token is currently excluded
    parser.bnode.addTok(parser.currTok)
    if parser.currTok.tKind == tkEOL:
      return nlParseStat.UNMATCHED
  result = nlParseStat.UNMATCHED
 #[ Templates for generating greed satisfying conditions.
 ]#
 # Satisfied if it finds nlTok of type matchType
 template satisfyMatch(matchType: nlTokKind): untyped  = 
  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))