1 gazillion changes (mostly documenting my insanity optimizing + naming)

2025-06-19 02:09:43 +10:00 · 2025-06-19 02:09:43 +10:00 · f8697bd662
commit f8697bd662
parent ebef458186
9 changed files with 206 additions and 183 deletions
--- a/src/nlx.nim
+++ b/src/nlx.nim
@ -3,6 +3,7 @@ import noether/lexer/tok
 import noether/lexer/tokstream
 import noether/parser/parser
 {.hint: "Don't forget to drink more water (^_^)".}
 when isMainModule:
  echo "Noether Lang Extras v0.1.0 - nlx"
@ -11,9 +12,8 @@ when isMainModule:
    var tokStream = newTokStream(filename, isFile=true)
    # # DumpTok
-    # var tok: nlTok
+    # while tokStream.progress():
-    # while tokStream.nextTok(tok):
+    #   echo tokStream.currTok
    #   echo tok
    # DumpTree
    discard parse(tokStream)
--- a/src/noether/lexer/lstream.nim
+++ b/src/noether/lexer/lstream.nim
@ -41,7 +41,7 @@ proc outOfBounds*(lstream: nlLStream): bool =
  result = (lstream.pos > lstream.line.len - 1)
 # Progress the lex stream to the next line (if available)
-proc progLine*(lstream: var nlLStream): bool = 
+proc progressLine*(lstream: var nlLStream): bool = 
  if lstream.stream.readLine(lstream.line):
    inc lstream.lineNum
    lstream.pos = Natural 0
@ -50,17 +50,17 @@ proc progLine*(lstream: var nlLStream): bool =
 # Progress the lex stream to the next character in the line
 # forcefully (aka does NOT check if we reached EOL)
-proc forceProgChar*(lstream: var nlLStream) = 
+proc forceProgressChar*(lstream: var nlLStream) = 
  inc lstream.pos
-# Progress the lex stream to the next character (if available)
+# # Progress the lex stream to the next character (if available)
-proc progress*(lstream: var nlLStream): bool =
+# proc progressChar*(lstream: var nlLStream): bool =
-  if not lstream.atEOL():
+#   if not lstream.atEOL():
-    lstream.forceProgChar()
+#     lstream.forceProgressChar()
-    result = true
+#     result = true
-  else:
+#   else:
-    # attempt to progress next line past EOL
+#     # attempt to progress next line past EOL
-    result = lstream.progLine()
+#     result = lstream.progressLine()
 proc currChar*(lstream: nlLStream): char = 
  result = lstream.line[lstream.pos]
--- a/src/noether/lexer/tok.nim
+++ b/src/noether/lexer/tok.nim
@ -2,7 +2,7 @@ include toktype
 type 
  nlTok* = object
-    tType*: nlTokType
+    tKind*: nlTokKind
    lit*: string
    lineNum*: Natural
    startPos*: Natural
@ -12,29 +12,11 @@ type
 # all other fields are expected to be filled out later.
 proc emptyTok*(startPos: int): nlTok =
  result = nlTok(
-    tType: nlTokType.NONE,
+    tKind: tkNONE,
    lit: "",
    startPos: Natural startPos,
  )
-# Checks if an nlTok has nlTokType.NONE
+# Checks if an nlTok has tkNONE
-proc isTokUntyped*(tType: nlTokType): bool =
+proc isUntyped*(tKind: nlTokKind): bool =
-  result = (tType == nlTokType.NONE)
+  result = (tKind == tkNONE)
 # Checks if an nlTok has nlTokType.EOL
 proc isTokEOL*(tok: nlTok): bool =
  result = (tok.tType == nlTokType.EOL)
 # This method is only used to convert null
 # terminator nlToks into line-feed ones.
 # Returns a copy of an nlTok, changing its type
 proc tokTermToLineFeed*(tok: nlTok): nlTok =
  result = nlTok(
    tType: nlTokType.LNFD,
    lit: tok.lit,
    lineNum: tok.lineNum,
    startPos: tok.startPos,
    endPos: tok.endPos,
  )
--- a/src/noether/lexer/tokbuilding.nim
+++ b/src/noether/lexer/tokbuilding.nim
@ -12,7 +12,7 @@ type
 # Generates an EOL token for the nlTokStream's state
 proc EOLTok(tokStream: nlTokStream): nlTok = 
  result = nlTok(
-    tType: nlTokType.EOL,
+    tKind: tkEOL,
    lit: "\0",
    lineNum: Natural tokStream.lstream.lineNum,
    startPos: Natural tokStream.lstream.pos,
@ -40,46 +40,46 @@ proc flushBuild(tokStream: var nlTokStream): nlTok =
 # Returns whether the build token has a set type yet.
 # This indicates that the build token should inherit
-# the nlTokType of the nlLStream's next character.
+# the nlTokKind of the nlLStream's next character.
 proc isUntypedBuild(tokStream: nlTokStream): bool =
-  result = isTokUntyped(tokStream.build.tType)
+  result = tokStream.build.tKind.isUntyped()
-# Check whether an nlTokType is "compatible" with the build token. 
+# Check whether an nlTokKind is "compatible" with the build token. 
 # NOTE: flushBuild() should be called when an incompatible token is discovered.
-proc isCompatibleBuild(tokStream: nlTokStream, tType: nlTokType): bool =
+proc isCompatibleBuild(tokStream: nlTokStream, tKind: nlTokKind): bool =
-  result = (tType == tokStream.build.tType)
+  result = (tKind == tokStream.build.tKind)
 # Add a character to the nlTokStream's build token.
 # Flushes and returns the build token if "fully built",
 # and a boolean indicating whether the nlTokStream can progress.
-proc progBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
+proc progressBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
  # the "pos > EOL" invalid state is used intentionally
  # to indicate all tokens have been built, and return EOL Token
  if tokStream.lstream.outOfBounds():
    buildTok = some(EOLTok(tokStream))
    return true # can progress once more
-  let tType = getTokType(tokStream.lstream.currChar())
+  let tKind = getTokType(tokStream.lstream.currChar())
  # untyped build tokens must inherited immediately
  if isUntypedBuild(tokStream):
-    tokStream.build.tType = tType
+    tokStream.build.tKind = tKind
  # check if EOL reached
  if tokStream.lstream.atEOL():
      # flush old build token, the new one can be left untyped
-      let compatible = isCompatibleBuild(tokStream, tType)
+      let compatible = isCompatibleBuild(tokStream, tKind)
      result = false # DO NOT PROGRESS
      if compatible:
        # force the lstream into an invalid state by progressing beyond EOL
-        # we can then detect this state on the next progBuild and return
+        # we can then detect this state on the next progressBuild and return
        # an EOL character (very unsafe implementation but it works well)
-        tokStream.lstream.forceProgChar()
+        tokStream.lstream.forceProgressChar()
      buildTok = some(flushBuild(tokStream))
  # check character and build token compatability
-  elif not isCompatibleBuild(tokStream, tType):
+  elif not isCompatibleBuild(tokStream, tKind):
      # flush old build token, the new one inherits type
      buildTok = some(flushBuild(tokStream))
-      tokStream.build.tType = tType
+      tokStream.build.tKind = tKind
      result = true # can progress
  else:
    buildTok = none(nlTok)
--- a/src/noether/lexer/tokstream.nim
+++ b/src/noether/lexer/tokstream.nim
@ -9,44 +9,43 @@ proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
  # 1. initialise an empty build token 
  # 2. progress to the first line
  result.resetBuild()
-  discard result.lstream.progLine()
+  discard result.lstream.progressLine()
 # Defines a short-hand notation for getting the current line
-proc currLine*(tokStream: nlTokStream): string =
+proc line*(tokStream: nlTokStream): string =
  result = tokStream.lstream.line
-# Reimplements nlLStream.progress() for nlTokStream
+# Reimplements nlLStream.progressChar for nlTokStream
 # to account for additional structure (ie the build token)
-proc progChar(tokStream: var nlTokStream): bool =
+# NOTE: progressChar progresses to lstream's next char
 proc progressChar(tokStream: var nlTokStream): bool =
  if not tokStream.lstream.atEOL():
-    tokStream.lstream.forceProgChar()
+    tokStream.lstream.forceProgressChar()
    result = true
  else:
    # attempt to progress to next line past EOL
-    result = tokStream.lstream.progLine()
+    result = tokStream.lstream.progressLine()
    tokStream.resetBuild()  
-# Generates and sets (by reference) the next token in the stream,
+# Generates and progress the next token in the nlTokStream.
-# via repeatedly calling progBuild() and progChar().
+# via repeatedly calling progressBuild() and progressChar().
 # Returns a boolean indicating whether EOF has been reached.
-# NOTE: progBuild adds lstream's current char to the build token
+# NOTE: access the new token via `tokStream.tok`
-# NOTE: progChar progresses to lstream's next char
+proc progress*(tokStream: var nlTokStream): bool =
 proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
  # Return prematurely if already closed
  if tokStream.closed:
    return false
  while true:
    var flushedTok: Option[nlTok]
    let 
-      canProgress = tokStream.progBuild(flushedTok)
+      canProgress = tokStream.progressBuild(flushedTok)
      buildComplete = flushedTok.isSome
    # canProgress & EOF reached => no more tokens to build :)
    # NOTE: reachedEOF and not canProgress => more tokens unwrapping
    if buildComplete:
      # return the finished build token, and save it as the current token
-      tok = flushedTok.get()
+      tokStream.currTok = flushedTok.get()
-      tokStream.currTok = tok
+    if canProgress and not tokStream.progressChar():
    if canProgress and not tokStream.progChar():
      tokStream.closed = true
      return buildComplete
    elif buildComplete:
--- a/src/noether/lexer/toktype.nim
+++ b/src/noether/lexer/toktype.nim
@ -1,54 +1,59 @@
 type
-  # nlTokType allows primitive nlToks to be typed,
+  # nlTokKind allows primitive nlToks to be typed,
-  # the nlTokType enum should never be directly
+  # the nlTokKind enum should never be directly
  # accessed. Use the interface in this file instead.
-  nlTokType* = enum
+  nlTokKind* = enum
-    NONE, # Placeholder Value
+    tkNONE, # Placeholder Value
-    EOF,  # End of File
+
-    EOL,  # End of Line (\0 --> EOL)
+    tkEOF,  # End of File
-    WORD, # Alphanumeric token
+    tkEOL,  # End of Line (\0 --> EOL)
-    SYMB, # Symbolic token
+
-    LNFD, # \r \n Line-Feed
+    tkWORD, # Alphanumeric token
-    WTSP, # ' ' \t Whitespace
+    tkSYMB, # Symbolic token
-    LPAR, # ( Left Parenthesis
+
-    RPAR, # ) Right Parenthesis
+    tkLNFD, # \r \n Line-Feed
-    LBRA, # { Left Brace
+    tkWTSP, # ' ' \t Whitespace
-    RBRA, # } Right Brace
+
-    LSQB, # [ Left Square Bracket
+    # RESERVED SYMBOLS
-    RSQB, # ] Right Square Bracket
+    tkLPAR, # ( Left Parenthesis
-    # LANB, # < Left Angle Bracket
+    tkRPAR, # ) Right Parenthesis
-    # RANB, # > Right Angle Bracket
+    tkLBRA, # { Left Brace
-    SQUO, # ' Single Quotation Marking
+    tkRBRA, # } Right Brace
-    DQUO, # " Double Quotation Marking
+    tkLSQB, # [ Left Square Bracket
-    GRVA, # ` Grave Accent
+    tkRSQB, # ] Right Square Bracket
-    HASH, # # Number Sign (Hashtag)
+    # tkLANB, # < Left Angle Bracket
    # tkRANB, # > Right Angle Bracket
    tkSQUO, # ' Single Quotation Marking
    tkDQUO, # " Double Quotation Marking
    tkGRVA, # ` Grave Accent
    tkHASH, # # Number Sign (Hashtag)
-# Classifies a character to its nlTokType
+# Classifies a character to its nlTokKind
-proc getTokType*(c: char): nlTokType =
+proc getTokType*(c: char): nlTokKind =
  case c:
  of '\0', '\r', '\n':
-    result = nlTokType.EOL
+    result = tkEOL
  of ' ', '\t':
-    result = nlTokType.WTSP
+    result = tkWTSP
  of '(':
-    result = nlTokType.LPAR
+    result = tkLPAR
  of ')':
-    result = nlTokType.RPAR
+    result = tkRPAR
  of '{':
-    result = nlTokType.LBRA
+    result = tkLBRA
  of '}':
-    result = nlTokType.RBRA
+    result = tkRBRA
  of '[':
-    result = nlTokType.LSQB
+    result = tkLSQB
  of ']':
-    result = nlTokType.RSQB
+    result = tkRSQB
  of '\'':
-    result = nlTokType.SQUO
+    result = tkSQUO
  of '\"':
-    result = nlTokType.DQUO
+    result = tkDQUO
  of '`':
-    result = nlTokType.GRVA
+    result = tkGRVA
  of '#':
-    result = nlTokType.HASH
+    result = tkHASH
  else:
-    result = nlTokType.WORD
+    result = tkWORD
--- a/src/noether/parser/nodes.nim
+++ b/src/noether/parser/nodes.nim
@ -1,19 +1,48 @@
 import std/options
 from ../lexer/tok import nlTok
 # from ../lexer/tokstream import 
 type
-  # NOTE: by the end of parsing NO nodes should
+  # NOTE: by the end of parsing NO nodes should have nkNone
-  # NOTE: have nlNodeType.NONE
+  nlNodeKind* = enum
-  nlNodeType* = enum
+    nkNone, # Placeholder Value
-    NONE, # Placeholder Value
+
-    TERM, # Indicates the tree has terminated
+    nkStrLit, # String Literal
-    STRL, # String Literal
+    nkChrLit, # Character Literal
-    CHRL, # Character Literal
+
  # NOTE: always check parent != nil when traversing the tree
  nlNode* {.acyclic.} = ref object of RootObj
-    nType*: nlNodeType
+    nKind*: nlNodeKind
-    toks*: seq[nlTok] # nodes store the tokens that build them
+    toks*: seq[nlTok] # nodes (may) store the tokens that build them
-    # left, right: nlNode
+    parent*: nlNode
  # Purely abstract type that all nlNode objects
  # with children are expected to inherit from.
  nlBranchNode* {.acyclic.} = ref object of nlNode
    child: UncheckedArray[nlNode]
  nlBiNode* {.acyclic.} = ref object of nlBranchNode
 proc childCount*(node: nlNode): int {.inline.} = 0
 proc childCount*(node: nlBiNode): int {.inline.} = 2
 proc getChild*(node: nlNode, i: int): Option[nlNode] {.inline.} = 
  result = none(nlNode)
 proc getChild*(node: nlBranchNode, i: int): Option[nlNode] {.inline.} = 
  result = some(node.child[i])
 proc newNode*(nKind: nlNodeKind): nlNode =
  result = nlNode(
    nKind: nKind,
  )
 proc newBiNode*(nKind: nlNodeKind): nlNode =
  result = nlBiNode(
    nKind: nKind,
  ) 
 # Short-hand way of appending a token to a node's token sequence
 proc addTok*(node: nlNode, tok: nlTok) =
  echo "AM I HERE?"
  echo node[]
  echo node.toks
  node.toks.add(tok)
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@ -3,61 +3,55 @@ include parseutil
 # NOTE: Matching between two tokens will fill `node` with everything
 # NOTE: between those two tokens EXCLUDING the two tokens themselves.
-proc parseMatch(tokStream: var nlTokStream, 
+proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
                node: var nlNode,
                matchType: nlTokType): nlParseStat =
  result = greed(
-    tokStream, 
+    parser,
    node.toks, 
    satisfyMatch(matchType),
  )
-proc parseMatchLine(tokStream: var nlTokStream, 
+proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
-                   node: var nlNode,
+  result = greedLine(
-                   matchType: nlTokType): nlParseStat =
+    parser, 
-  result = greed(
+    satisfyMatch(matchType),
    tokStream, 
    node.toks, 
    satisfyMatchEOL(matchType),
  )
-proc parseStrL(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
+proc parseStrLit(parser: var nlParser): nlParseStat =
-  node = nlNode(
+  result = parser.parseMatch(tkDQUO)
    nType: nlNodeType.STRL
  )
  node.addTok(tokStream.currTok)
  result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
-proc parseChrL(tokStream: var nlTokStream, node: var nlNode): bool =
+proc parseChrLit(parser: var nlParser): nlParseStat =
-  node = nlNode(
+  result = parser.parseMatch(tkSQUO)
-    nType: nlNodeType.CHRL
+  
-  )
+proc parseStmt(parser: var nlParser): nlParseStat = 
-  node.addTok(tokStream.currTok)
+  # initialise build node as none just for the hell of it
-  # TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
+  
-  result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
+  while parser.stream.progress():
-
+    echo parser.stream.currTok
-# Attempt to form an nlAST from a nlTokStream
+    case parser.stream.currTok.tKind
-proc parse*(tokStream: var nlTokStream): nlNode = 
+    of tkDQUO:
  var tok: nlTok
  var node: nlNode
  while tokStream.nextTok(tok):
    case tok.tType:
    of nlTokType.DQUO:
      # Attempt to parse string literal
-      if not parseStrL(tokStream, node):
+      if parser.parseStrLit() != nlParseStat.OK:
        echo "Unmatched Double Quotation! Malformed String Literal"
-        echo tokStream.currLine()
+        echo parser.stream.line
-        echo repeat(" ", tok.startPos), '^'
+        echo repeat(" ", parser.stream.currTok.startPos), '^'
      else:
        echo "Parsed String Literal"
-        echo node[]
+        echo parser.bnode[]
-    of nlTokType.SQUO:
+    of tkSQUO:
      # Attempt to parse string literal
-      if not parseChrL(tokStream, node):
+      if parser.parseChrLit() != nlParseStat.OK:
        echo "Unmatched Single Quotation! Malformed Character Literal"
-        echo tokStream.currLine()
+        echo parser.stream.line
-        echo repeat(" ", tok.startPos), '^'
+        echo repeat(" ", parser.stream.currTok.startPos), '^'
      else:
-        echo "Parsed String Literal"
+        echo "Parsed Character Literal"
-        echo node[]
+        echo parser.bnode[]
    else:
      echo "blah blah unhandled case"
  result = nlParseStat.OK
 # Attempt to parse nlAST from nlTokStream
 proc parse*(tokStream: var nlTokStream): nlAST =
  var parser = newParser(tokStream)
  echo ' '
  discard parser.parseStmt()
  result = parser.ast
--- a/src/noether/parser/parseutil.nim
+++ b/src/noether/parser/parseutil.nim
@ -2,21 +2,41 @@ import nodes
 import ../lexer/tokstream
 type
-  # NOTE: Values above __FAIL__ indicate a failed state
+  # NOTE1: Values above MARKER_FAIL indicate a failed state
-  nlParseStat* = enum
+  # NOTE2: nlParseStat is marked pure out of habit that's all
  nlParseStat* {.pure.} = enum
    OK,
-    __FAIL__,
+    MARKER_FAIL,
    MIDAS, # Greedy search was never satisfied
    UNMATCHED,
    TOOBIG,
  nlAST* = object
    root: nlNode
  nlParser* = object
    stream: nlTokStream
    ast: nlAST
    # the "build node" is a reference to the AST node
    # the parser is currently modifying/building from
    # NOTE: bnode changes frequently, it is NOT the root
    bnode: nlNode
 proc `*`(stat: nlParseStat, b: bool): nlParseStat =
  result = if b: stat else: nlParseStat.OK
 proc isFail*(stat: nlParseStat): bool = 
-  result = (stat >= nlParseStat.__FAIL__)
+  result = (stat >= nlParseStat.MARKER_FAIL)
 proc newParser*(tokStream: var nlTokStream): nlParser =
  let rootNode = newNode(nkNone)
  result = nlParser(
    stream: tokStream,
    ast: rootNode,
    bnode: rootNode,
  )
 #[ "Greed" refers to something I mentioned in my discussion on
 |  Noether's grammar (in an EBNF-like language). Greed just
 |  means "everything until a condition is satisified".
@ -25,34 +45,28 @@ proc isFail*(stat: nlParseStat): bool =
 # Greed will consume anything until a condition is satisfied
 # Returns false if the greed was never satisfied (OMG!!)
-proc greed(tokStream: var nlTokStream, 
+proc greed(parser: var nlParser,
-           toks: var seq[nlTok], 
+           satisfy: proc(tok: nlTok): bool): nlParseStat =
-           satisfy: proc(tok: nlTok): bool,
+  while parser.stream.progress():
-           ): nlParseStat =
+    echo "im definitely here!"
-  var tok: nlTok
+    parser.bnode.addTok(parser.stream.currTok)
-  while tokStream.nextTok(tok):
+    if satisfy(parser.stream.currTok):
    toks.add(tok)
    if satisfy(tok):
      return nlParseStat.OK
  result = nlParseStat.UNMATCHED
-proc greedLine(tokStream: var nlTokStream, 
+proc greedLine(parser: var nlParser,
               toks: var seq[nlTok], 
               satisfy: proc(tok: nlTok): bool): nlParseStat =
-  var tok: nlTok
+  while parser.stream.progress():
-  while tokStream.nextTok(tok):
+    parser.bnode.addTok(parser.stream.currTok)
-    toks.add(tok)
+    if satisfy(parser.stream.currTok):
-    if satisfy(tok):
+      return nlParseStat.OK
-      return true
+    elif parser.stream.currTok.tKind == tkEOL:
-  result = 
+      return nlParseStat.UNMATCHED
  result = nlParseStat.UNMATCHED
 #[ Templates for generating greed satisfying conditions.
 ]#
 # Satisfied if it finds nlTok of type matchType
-template satisfyMatch(matchType: nlTokType) = 
+template satisfyMatch(matchType: nlTokKind): untyped  = 
-  proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType)
+  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))
 # Satisfied if it finds nlTok of type matchType or EOL reached
 template satisfyMatchEOL(matchType: nlTokType) = 
  proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType or tok.tType == nlTokType.EOL)