1 gazillion changes (mostly documenting my insanity optimizing + naming)

2025-06-19 02:09:43 +10:00 · 2025-06-19 02:09:43 +10:00 · f8697bd662
commit f8697bd662
parent ebef458186
9 changed files with 206 additions and 183 deletions
--- a/src/nlx.nim
+++ b/src/nlx.nim
@ -3,6 +3,7 @@ import noether/lexer/tok
 import noether/lexer/tokstream
 import noether/parser/parser

+{.hint: "Don't forget to drink more water (^_^)".}
 when isMainModule:
  echo "Noether Lang Extras v0.1.0 - nlx"

@ -11,9 +12,8 @@ when isMainModule:
    var tokStream = newTokStream(filename, isFile=true)
    
    # # DumpTok
-    # var tok: nlTok
-    # while tokStream.nextTok(tok):
-    #   echo tok
+    # while tokStream.progress():
+    #   echo tokStream.currTok

    # DumpTree
    discard parse(tokStream)
--- a/src/noether/lexer/lstream.nim
+++ b/src/noether/lexer/lstream.nim
@ -41,7 +41,7 @@ proc outOfBounds*(lstream: nlLStream): bool =
  result = (lstream.pos > lstream.line.len - 1)

 # Progress the lex stream to the next line (if available)
-proc progLine*(lstream: var nlLStream): bool = 
+proc progressLine*(lstream: var nlLStream): bool = 
  if lstream.stream.readLine(lstream.line):
    inc lstream.lineNum
    lstream.pos = Natural 0
@ -50,17 +50,17 @@ proc progLine*(lstream: var nlLStream): bool =

 # Progress the lex stream to the next character in the line
 # forcefully (aka does NOT check if we reached EOL)
-proc forceProgChar*(lstream: var nlLStream) = 
+proc forceProgressChar*(lstream: var nlLStream) = 
  inc lstream.pos

-# Progress the lex stream to the next character (if available)
-proc progress*(lstream: var nlLStream): bool =
-  if not lstream.atEOL():
-    lstream.forceProgChar()
-    result = true
-  else:
-    # attempt to progress next line past EOL
-    result = lstream.progLine()
+# # Progress the lex stream to the next character (if available)
+# proc progressChar*(lstream: var nlLStream): bool =
+#   if not lstream.atEOL():
+#     lstream.forceProgressChar()
+#     result = true
+#   else:
+#     # attempt to progress next line past EOL
+#     result = lstream.progressLine()

 proc currChar*(lstream: nlLStream): char = 
  result = lstream.line[lstream.pos]
--- a/src/noether/lexer/tok.nim
+++ b/src/noether/lexer/tok.nim
@ -2,7 +2,7 @@ include toktype

 type 
  nlTok* = object
-    tType*: nlTokType
+    tKind*: nlTokKind
    lit*: string
    lineNum*: Natural
    startPos*: Natural
@ -12,29 +12,11 @@ type
 # all other fields are expected to be filled out later.
 proc emptyTok*(startPos: int): nlTok =
  result = nlTok(
-    tType: nlTokType.NONE,
+    tKind: tkNONE,
    lit: "",
    startPos: Natural startPos,
  )

-# Checks if an nlTok has nlTokType.NONE
-proc isTokUntyped*(tType: nlTokType): bool =
-  result = (tType == nlTokType.NONE)
-  
-# Checks if an nlTok has nlTokType.EOL
-proc isTokEOL*(tok: nlTok): bool =
-  result = (tok.tType == nlTokType.EOL)
-
-
-
-# This method is only used to convert null
-# terminator nlToks into line-feed ones.
-# Returns a copy of an nlTok, changing its type
-proc tokTermToLineFeed*(tok: nlTok): nlTok =
-  result = nlTok(
-    tType: nlTokType.LNFD,
-    lit: tok.lit,
-    lineNum: tok.lineNum,
-    startPos: tok.startPos,
-    endPos: tok.endPos,
-  )
+# Checks if an nlTok has tkNONE
+proc isUntyped*(tKind: nlTokKind): bool =
+  result = (tKind == tkNONE)
--- a/src/noether/lexer/tokbuilding.nim
+++ b/src/noether/lexer/tokbuilding.nim
@ -12,7 +12,7 @@ type
 # Generates an EOL token for the nlTokStream's state
 proc EOLTok(tokStream: nlTokStream): nlTok = 
  result = nlTok(
-    tType: nlTokType.EOL,
+    tKind: tkEOL,
    lit: "\0",
    lineNum: Natural tokStream.lstream.lineNum,
    startPos: Natural tokStream.lstream.pos,
@ -40,46 +40,46 @@ proc flushBuild(tokStream: var nlTokStream): nlTok =

 # Returns whether the build token has a set type yet.
 # This indicates that the build token should inherit
-# the nlTokType of the nlLStream's next character.
+# the nlTokKind of the nlLStream's next character.
 proc isUntypedBuild(tokStream: nlTokStream): bool =
-  result = isTokUntyped(tokStream.build.tType)
+  result = tokStream.build.tKind.isUntyped()

-# Check whether an nlTokType is "compatible" with the build token. 
+# Check whether an nlTokKind is "compatible" with the build token. 
 # NOTE: flushBuild() should be called when an incompatible token is discovered.
-proc isCompatibleBuild(tokStream: nlTokStream, tType: nlTokType): bool =
-  result = (tType == tokStream.build.tType)
+proc isCompatibleBuild(tokStream: nlTokStream, tKind: nlTokKind): bool =
+  result = (tKind == tokStream.build.tKind)

 # Add a character to the nlTokStream's build token.
 # Flushes and returns the build token if "fully built",
 # and a boolean indicating whether the nlTokStream can progress.
-proc progBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
+proc progressBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
  # the "pos > EOL" invalid state is used intentionally
  # to indicate all tokens have been built, and return EOL Token
  if tokStream.lstream.outOfBounds():
    buildTok = some(EOLTok(tokStream))
    return true # can progress once more
    
-  let tType = getTokType(tokStream.lstream.currChar())
+  let tKind = getTokType(tokStream.lstream.currChar())
  # untyped build tokens must inherited immediately
  if isUntypedBuild(tokStream):
-    tokStream.build.tType = tType
+    tokStream.build.tKind = tKind
  
  # check if EOL reached
  if tokStream.lstream.atEOL():
      # flush old build token, the new one can be left untyped
-      let compatible = isCompatibleBuild(tokStream, tType)
+      let compatible = isCompatibleBuild(tokStream, tKind)
      result = false # DO NOT PROGRESS
      if compatible:
        # force the lstream into an invalid state by progressing beyond EOL
-        # we can then detect this state on the next progBuild and return
+        # we can then detect this state on the next progressBuild and return
        # an EOL character (very unsafe implementation but it works well)
-        tokStream.lstream.forceProgChar()
+        tokStream.lstream.forceProgressChar()
      buildTok = some(flushBuild(tokStream))
  # check character and build token compatability
-  elif not isCompatibleBuild(tokStream, tType):
+  elif not isCompatibleBuild(tokStream, tKind):
      # flush old build token, the new one inherits type
      buildTok = some(flushBuild(tokStream))
-      tokStream.build.tType = tType
+      tokStream.build.tKind = tKind
      result = true # can progress
  else:
    buildTok = none(nlTok)
--- a/src/noether/lexer/tokstream.nim
+++ b/src/noether/lexer/tokstream.nim
@ -9,44 +9,43 @@ proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
  # 1. initialise an empty build token 
  # 2. progress to the first line
  result.resetBuild()
-  discard result.lstream.progLine()
+  discard result.lstream.progressLine()

 # Defines a short-hand notation for getting the current line
-proc currLine*(tokStream: nlTokStream): string =
+proc line*(tokStream: nlTokStream): string =
  result = tokStream.lstream.line
  
-# Reimplements nlLStream.progress() for nlTokStream
+# Reimplements nlLStream.progressChar for nlTokStream
 # to account for additional structure (ie the build token)
-proc progChar(tokStream: var nlTokStream): bool =
+# NOTE: progressChar progresses to lstream's next char
+proc progressChar(tokStream: var nlTokStream): bool =
  if not tokStream.lstream.atEOL():
-    tokStream.lstream.forceProgChar()
+    tokStream.lstream.forceProgressChar()
    result = true
  else:
    # attempt to progress to next line past EOL
-    result = tokStream.lstream.progLine()
+    result = tokStream.lstream.progressLine()
    tokStream.resetBuild()  
  
-# Generates and sets (by reference) the next token in the stream,
-# via repeatedly calling progBuild() and progChar().
+# Generates and progress the next token in the nlTokStream.
+# via repeatedly calling progressBuild() and progressChar().
 # Returns a boolean indicating whether EOF has been reached.
-# NOTE: progBuild adds lstream's current char to the build token
-# NOTE: progChar progresses to lstream's next char
-proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
+# NOTE: access the new token via `tokStream.tok`
+proc progress*(tokStream: var nlTokStream): bool =
  # Return prematurely if already closed
  if tokStream.closed:
    return false
  while true:
    var flushedTok: Option[nlTok]
    let 
-      canProgress = tokStream.progBuild(flushedTok)
+      canProgress = tokStream.progressBuild(flushedTok)
      buildComplete = flushedTok.isSome
    # canProgress & EOF reached => no more tokens to build :)
    # NOTE: reachedEOF and not canProgress => more tokens unwrapping
    if buildComplete:
      # return the finished build token, and save it as the current token
-      tok = flushedTok.get()
-      tokStream.currTok = tok
-    if canProgress and not tokStream.progChar():
+      tokStream.currTok = flushedTok.get()
+    if canProgress and not tokStream.progressChar():
      tokStream.closed = true
      return buildComplete
    elif buildComplete:
--- a/src/noether/lexer/toktype.nim
+++ b/src/noether/lexer/toktype.nim
@ -1,54 +1,59 @@
 type
-  # nlTokType allows primitive nlToks to be typed,
-  # the nlTokType enum should never be directly
+  # nlTokKind allows primitive nlToks to be typed,
+  # the nlTokKind enum should never be directly
  # accessed. Use the interface in this file instead.
-  nlTokType* = enum
-    NONE, # Placeholder Value
-    EOF,  # End of File
-    EOL,  # End of Line (\0 --> EOL)
-    WORD, # Alphanumeric token
-    SYMB, # Symbolic token
-    LNFD, # \r \n Line-Feed
-    WTSP, # ' ' \t Whitespace
-    LPAR, # ( Left Parenthesis
-    RPAR, # ) Right Parenthesis
-    LBRA, # { Left Brace
-    RBRA, # } Right Brace
-    LSQB, # [ Left Square Bracket
-    RSQB, # ] Right Square Bracket
-    # LANB, # < Left Angle Bracket
-    # RANB, # > Right Angle Bracket
-    SQUO, # ' Single Quotation Marking
-    DQUO, # " Double Quotation Marking
-    GRVA, # ` Grave Accent
-    HASH, # # Number Sign (Hashtag)
+  nlTokKind* = enum
+    tkNONE, # Placeholder Value
+
+    tkEOF,  # End of File
+    tkEOL,  # End of Line (\0 --> EOL)
+
+    tkWORD, # Alphanumeric token
+    tkSYMB, # Symbolic token
+
+    tkLNFD, # \r \n Line-Feed
+    tkWTSP, # ' ' \t Whitespace
+
+    # RESERVED SYMBOLS
+    tkLPAR, # ( Left Parenthesis
+    tkRPAR, # ) Right Parenthesis
+    tkLBRA, # { Left Brace
+    tkRBRA, # } Right Brace
+    tkLSQB, # [ Left Square Bracket
+    tkRSQB, # ] Right Square Bracket
+    # tkLANB, # < Left Angle Bracket
+    # tkRANB, # > Right Angle Bracket
+    tkSQUO, # ' Single Quotation Marking
+    tkDQUO, # " Double Quotation Marking
+    tkGRVA, # ` Grave Accent
+    tkHASH, # # Number Sign (Hashtag)
        
-# Classifies a character to its nlTokType
-proc getTokType*(c: char): nlTokType =
+# Classifies a character to its nlTokKind
+proc getTokType*(c: char): nlTokKind =
  case c:
  of '\0', '\r', '\n':
-    result = nlTokType.EOL
+    result = tkEOL
  of ' ', '\t':
-    result = nlTokType.WTSP
+    result = tkWTSP
  of '(':
-    result = nlTokType.LPAR
+    result = tkLPAR
  of ')':
-    result = nlTokType.RPAR
+    result = tkRPAR
  of '{':
-    result = nlTokType.LBRA
+    result = tkLBRA
  of '}':
-    result = nlTokType.RBRA
+    result = tkRBRA
  of '[':
-    result = nlTokType.LSQB
+    result = tkLSQB
  of ']':
-    result = nlTokType.RSQB
+    result = tkRSQB
  of '\'':
-    result = nlTokType.SQUO
+    result = tkSQUO
  of '\"':
-    result = nlTokType.DQUO
+    result = tkDQUO
  of '`':
-    result = nlTokType.GRVA
+    result = tkGRVA
  of '#':
-    result = nlTokType.HASH
+    result = tkHASH
  else:
-    result = nlTokType.WORD
+    result = tkWORD
--- a/src/noether/parser/nodes.nim
+++ b/src/noether/parser/nodes.nim
@ -1,19 +1,48 @@
+import std/options
 from ../lexer/tok import nlTok
-# from ../lexer/tokstream import 

 type
-  # NOTE: by the end of parsing NO nodes should
-  # NOTE: have nlNodeType.NONE
-  nlNodeType* = enum
-    NONE, # Placeholder Value
-    TERM, # Indicates the tree has terminated
-    STRL, # String Literal
-    CHRL, # Character Literal
+  # NOTE: by the end of parsing NO nodes should have nkNone
+  nlNodeKind* = enum
+    nkNone, # Placeholder Value
+
+    nkStrLit, # String Literal
+    nkChrLit, # Character Literal
+
+  # NOTE: always check parent != nil when traversing the tree
  nlNode* {.acyclic.} = ref object of RootObj
-    nType*: nlNodeType
-    toks*: seq[nlTok] # nodes store the tokens that build them
-    # left, right: nlNode
+    nKind*: nlNodeKind
+    toks*: seq[nlTok] # nodes (may) store the tokens that build them
+    parent*: nlNode
+
+  # Purely abstract type that all nlNode objects
+  # with children are expected to inherit from.
+  nlBranchNode* {.acyclic.} = ref object of nlNode
+    child: UncheckedArray[nlNode]
+
+  nlBiNode* {.acyclic.} = ref object of nlBranchNode
+
+proc childCount*(node: nlNode): int {.inline.} = 0
+proc childCount*(node: nlBiNode): int {.inline.} = 2
+
+proc getChild*(node: nlNode, i: int): Option[nlNode] {.inline.} = 
+  result = none(nlNode)
+proc getChild*(node: nlBranchNode, i: int): Option[nlNode] {.inline.} = 
+  result = some(node.child[i])
+  
+proc newNode*(nKind: nlNodeKind): nlNode =
+  result = nlNode(
+    nKind: nKind,
+  )
+  
+proc newBiNode*(nKind: nlNodeKind): nlNode =
+  result = nlBiNode(
+    nKind: nKind,
+  ) 

 # Short-hand way of appending a token to a node's token sequence
 proc addTok*(node: nlNode, tok: nlTok) =
+  echo "AM I HERE?"
+  echo node[]
+  echo node.toks
  node.toks.add(tok)
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@ -3,61 +3,55 @@ include parseutil

 # NOTE: Matching between two tokens will fill `node` with everything
 # NOTE: between those two tokens EXCLUDING the two tokens themselves.
-proc parseMatch(tokStream: var nlTokStream, 
-                node: var nlNode,
-                matchType: nlTokType): nlParseStat =
+proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
  result = greed(
-    tokStream, 
-    node.toks, 
+    parser,
    satisfyMatch(matchType),
  )
-proc parseMatchLine(tokStream: var nlTokStream, 
-                   node: var nlNode,
-                   matchType: nlTokType): nlParseStat =
-  result = greed(
-    tokStream, 
-    node.toks, 
-    satisfyMatchEOL(matchType),
+proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+  result = greedLine(
+    parser, 
+    satisfyMatch(matchType),
  )

-proc parseStrL(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
-  node = nlNode(
-    nType: nlNodeType.STRL
-  )
-  node.addTok(tokStream.currTok)
-  result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
+proc parseStrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatch(tkDQUO)

-proc parseChrL(tokStream: var nlTokStream, node: var nlNode): bool =
-  node = nlNode(
-    nType: nlNodeType.CHRL
-  )
-  node.addTok(tokStream.currTok)
-  # TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
-  result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
-
-# Attempt to form an nlAST from a nlTokStream
-proc parse*(tokStream: var nlTokStream): nlNode = 
-  var tok: nlTok
-  var node: nlNode
-  while tokStream.nextTok(tok):
-    case tok.tType:
-    of nlTokType.DQUO:
+proc parseChrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatch(tkSQUO)
+  
+proc parseStmt(parser: var nlParser): nlParseStat = 
+  # initialise build node as none just for the hell of it
+  
+  while parser.stream.progress():
+    echo parser.stream.currTok
+    case parser.stream.currTok.tKind
+    of tkDQUO:
      # Attempt to parse string literal
-      if not parseStrL(tokStream, node):
+      if parser.parseStrLit() != nlParseStat.OK:
        echo "Unmatched Double Quotation! Malformed String Literal"
-        echo tokStream.currLine()
-        echo repeat(" ", tok.startPos), '^'
+        echo parser.stream.line
+        echo repeat(" ", parser.stream.currTok.startPos), '^'
      else:
        echo "Parsed String Literal"
-        echo node[]
-    of nlTokType.SQUO:
+        echo parser.bnode[]
+    of tkSQUO:
      # Attempt to parse string literal
-      if not parseChrL(tokStream, node):
+      if parser.parseChrLit() != nlParseStat.OK:
        echo "Unmatched Single Quotation! Malformed Character Literal"
-        echo tokStream.currLine()
-        echo repeat(" ", tok.startPos), '^'
+        echo parser.stream.line
+        echo repeat(" ", parser.stream.currTok.startPos), '^'
      else:
-        echo "Parsed String Literal"
-        echo node[]
+        echo "Parsed Character Literal"
+        echo parser.bnode[]
    else:
      echo "blah blah unhandled case"
+  result = nlParseStat.OK
+      
+# Attempt to parse nlAST from nlTokStream
+proc parse*(tokStream: var nlTokStream): nlAST =
+  var parser = newParser(tokStream)
+  echo ' '
+  discard parser.parseStmt()
+
+  result = parser.ast
--- a/src/noether/parser/parseutil.nim
+++ b/src/noether/parser/parseutil.nim
@ -2,21 +2,41 @@ import nodes
 import ../lexer/tokstream

 type
-  # NOTE: Values above __FAIL__ indicate a failed state
-  nlParseStat* = enum
+  # NOTE1: Values above MARKER_FAIL indicate a failed state
+  # NOTE2: nlParseStat is marked pure out of habit that's all
+  nlParseStat* {.pure.} = enum
    OK,
-    __FAIL__,
-    MIDAS, # Greedy search was never satisfied
+    MARKER_FAIL,
    UNMATCHED,
    TOOBIG,

+  nlAST* = object
+    root: nlNode
+
+  nlParser* = object
+    stream: nlTokStream
+    ast: nlAST
+    # the "build node" is a reference to the AST node
+    # the parser is currently modifying/building from
+    # NOTE: bnode changes frequently, it is NOT the root
+    bnode: nlNode
+    
+
 proc `*`(stat: nlParseStat, b: bool): nlParseStat =
  result = if b: stat else: nlParseStat.OK

 proc isFail*(stat: nlParseStat): bool = 
-  result = (stat >= nlParseStat.__FAIL__)
+  result = (stat >= nlParseStat.MARKER_FAIL)

+proc newParser*(tokStream: var nlTokStream): nlParser =
+  let rootNode = newNode(nkNone)
+  result = nlParser(
+    stream: tokStream,
+    ast: rootNode,
+    bnode: rootNode,
+  )

+  
 #[ "Greed" refers to something I mentioned in my discussion on
 |  Noether's grammar (in an EBNF-like language). Greed just
 |  means "everything until a condition is satisified".
@ -25,34 +45,28 @@ proc isFail*(stat: nlParseStat): bool =
  
 # Greed will consume anything until a condition is satisfied
 # Returns false if the greed was never satisfied (OMG!!)
-proc greed(tokStream: var nlTokStream, 
-           toks: var seq[nlTok], 
-           satisfy: proc(tok: nlTok): bool,
-           ): nlParseStat =
-  var tok: nlTok
-  while tokStream.nextTok(tok):
-    toks.add(tok)
-    if satisfy(tok):
+proc greed(parser: var nlParser,
+           satisfy: proc(tok: nlTok): bool): nlParseStat =
+  while parser.stream.progress():
+    echo "im definitely here!"
+    parser.bnode.addTok(parser.stream.currTok)
+    if satisfy(parser.stream.currTok):
      return nlParseStat.OK
  result = nlParseStat.UNMATCHED

-proc greedLine(tokStream: var nlTokStream, 
-               toks: var seq[nlTok], 
+proc greedLine(parser: var nlParser,
               satisfy: proc(tok: nlTok): bool): nlParseStat =
-  var tok: nlTok
-  while tokStream.nextTok(tok):
-    toks.add(tok)
-    if satisfy(tok):
-      return true
-  result = 
+  while parser.stream.progress():
+    parser.bnode.addTok(parser.stream.currTok)
+    if satisfy(parser.stream.currTok):
+      return nlParseStat.OK
+    elif parser.stream.currTok.tKind == tkEOL:
+      return nlParseStat.UNMATCHED
+  result = nlParseStat.UNMATCHED

 #[ Templates for generating greed satisfying conditions.
 ]#

 # Satisfied if it finds nlTok of type matchType
-template satisfyMatch(matchType: nlTokType) = 
-  proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType)
-  
-# Satisfied if it finds nlTok of type matchType or EOL reached
-template satisfyMatchEOL(matchType: nlTokType) = 
-  proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType or tok.tType == nlTokType.EOL)
+template satisfyMatch(matchType: nlTokKind): untyped  = 
+  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))