Typo fix + start of error handling

Restructure attempt #087 :(
Garbage collection *so to speak*
2025-07-06 21:42:09 +10:00 · 2025-06-19 12:51:03 +10:00 · 2025-06-19 09:41:15 +10:00 · 2025-06-19 09:38:08 +10:00 · 2025-06-19 09:11:49 +10:00 · 2025-06-19 08:48:31 +10:00
20 changed files with 460 additions and 434 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,6 @@
 __pycache__/
 bin/
 # TEMP: used while debugging 
 # (and cause I'm super duper lazy)
 src/nlx 
--- a/lang/demo/single_toks.no
+++ b/lang/demo/single_toks.no
@ -0,0 +1,2 @@
 [a]b(#)
 (c)d[e]
--- a/py/m.py
+++ b/py/m.py
@ -2,7 +2,7 @@
 import sys
 import readline
-from noether.math import *
+from noether.lib.math import *
 from noether.cli import *
--- a/src/ddemo
+++ b/src/ddemo
@ -0,0 +1,11 @@
 #!/usr/bin/env bash
 set -e
 if [ -z "$1" ]; then
  echo "Usage: ddemo DEMOFILE"
  echo "Demo files are located in lang/demo"
  exit 1
 fi
 nim c nlx.nim
 ./nlx ../lang/demo/$1
--- a/src/nlx.nim
+++ b/src/nlx.nim
@ -1,22 +1,29 @@
 import os
-import noether/lexer/tok
+import noether/lib/io
-import noether/lexer/tokstream
+import noether/lexer/lex
-import noether/parser/parser
+import noether/parser/parse
 {.hint: "Don't forget to drink more water (^_^)".}
 when isMainModule:
  echo "Noether Lang Extras v0.1.0 - nlx"
-  if paramCount() > 0:
+  # really lazy argparse implementation (temporary)
-    let filename = paramStr(1)
+  let
-    var tokStream = newTokStream(filename, isFile=true)
+    paramC = paramCount() 
-    
+    cmd = if paramC > 2: paramStr 1
-    # # DumpTok
+          else: "tok"
    # var tok: nlTok
    # while tokStream.nextTok(tok):
    #   echo tok
  var stream = if paramC > 0: streamFile(paramStr paramC)
               else: streamString(readAll stdin)
  var lexer = newLexer(stream)
  if cmd == "tok":
    # DumpTok
    while lexer.progress():
      echo lexer.tok
  elif cmd == "tree":
    discard
    # DumpTree
-    discard parse(tokStream)
+    # discard parse(tokStream)
  else:
-    echo "usage: nlx filename"
+    echo "Usage: nlx [tok|tree] <demo>\n    demo files are accessible at lang/demo"
--- a/src/noether.nim
+++ b/src/noether.nim
@ -2,4 +2,4 @@
 # uses this file as the main entry point of the application.
 when isMainModule:
-  echo "Noether Lang"
+  echo "Noether Lang v0.1.0"
--- a/src/noether/lexer/lex.nim
+++ b/src/noether/lexer/lex.nim
@ -0,0 +1,178 @@
 import 
  streams, 
  options
 import tok
 export tok
 type
  # Abstracts the "building process" (lexing) 
  # of nlTok objects from a given Stream of characters.
  nlLexer* = object
    stream: Stream
    done*: bool
    # store current token and upcoming (build) token
    tok*: nlTok # current token
    btok: nlTok # the build token
    # save char and pos and its token type
    char: char
    cTKind: nlTokKind
    # track line number, line content, etc
    line: string
    lineNum: int
    pos: int 
 proc atEOL(lexer: nlLexer): bool {.inline.} =
  result = (lexer.char == '\n')
 proc atEOF(lexer: nlLexer): bool {.inline.} =
  result = (lexer.char == '\0')
 # Initialise a new lexer
 proc newLexer*(stream: var Stream): nlLexer =
  result = nlLexer(
    stream: stream,
    done: false,
    tok: emptyTok(0),
    btok: emptyTok(0),
    line: "",
    lineNum: 1,
    pos: -1,    # after initial readChar this -> 0
    char: '\0', # use \0 as initial invalid char   
    cTKind: tkNONE,
  )
 # Classifies the current character to its nlTokKind
 proc classifyTok*(lexer: nlLexer): nlTokKind {.inline.} =
  case lexer.char:
  of '\0':
    result = tkEOF
  of '\r', '\n':
    result = tkEOL
  of ' ', '\t':
    result = tkWTSP
  of '(':
    result = tkLPAR
  of ')':
    result = tkRPAR
  of '{':
    result = tkLBRA
  of '}':
    result = tkRBRA
  of '[':
    result = tkLSQB
  of ']':
    result = tkRSQB
  of '\'':
    result = tkSQUO
  of '\"':
    result = tkDQUO
  of '`':
    result = tkGRVA
  of '#':
    result = tkHASH
  else:
    result = tkWORD
 #[ ====================================================== ]
 | nlLexer Internal Interface for Token Construction ]
 ]#
 # Reset the build token to be "empty"
 proc resetBuild(lexer: var nlLexer) =
  lexer.btok = emptyTok(lexer.pos)
 # "Finishes" the build token by setting various properties
 proc finishBuild(lexer: var nlLexer) =
  lexer.btok.lineNum = lexer.lineNum
  lexer.btok.endPos = lexer.pos
  lexer.btok.lit = lexer.line[lexer.btok.startPos ..< lexer.line.high]
 # Finish, return, and reset the build token
 proc flushBuild(lexer: var nlLexer): nlTok = 
  finishBuild(lexer)
  result = lexer.btok
  resetBuild(lexer)
 # Is the build token "compatible" with the current char? (if not then flushbuild)
 # NOTE: This implicitly handles Windows CRLF, Unix LF, & Mac OS CR compatability 
 # NOTE: since atEOL => '\n', but '\r' and '\n' are both tkEOL so they both flush.
 proc isIncompatibleBuild(lexer: nlLexer): bool =
  result = (lexer.cTKind != lexer.btok.kind or lexer.atEOL())
 # Inherit the build token's type from current char
 proc inherit(lexer: var nlLexer) = 
  lexer.btok.kind = lexer.cTKind
 # Add a character to the nlLexer's build token.
 # Flushes and returns the build token if finished.
 proc appendBuild(lexer: var nlLexer): Option[nlTok] =    
  # untyped build tokens inherit type immediately
  if lexer.btok.isUntyped():
    lexer.inherit()
  # check character and build token compatability
  if isIncompatibleBuild(lexer):
      # flush old build token, the new one inherits type
      result = some(flushBuild(lexer))
      lexer.inherit()
  else:
    result = none(nlTok)
 #[ ========================================= ]
 | nlLexer Internal Char Streaming Interface ]
 ]#
 # Read the next char in the stream
 # NOTE: readChar raises IOError on error, returns \0 on EOF
 proc readChar(lexer: var nlLexer): bool =
  if lexer.atEOL():
    inc lexer.lineNum
  # sets lexer.char to '\0' if EOF
  lexer.char = lexer.stream.readChar()
  lexer.cTKind = lexer.classifyTok()
  lexer.line.add(lexer.char)
  inc lexer.pos
  result = lexer.atEOF()
 #[ ========================
 | nlLexer Public Interface
 ]#
 # Read until EOL and return the current line
 # NOTE: Does NOT update the lexer's state (unsafe)
 # NOTE: ONLY call if a lex/parse error needs displaying
 proc unsafeGetLine*(lexer: var nlLexer): string =
  while not lexer.atEOL() and lexer.readChar():
    discard
  result = lexer.line
 # Lexes and returns the next token in the "token stream"
 # via repeatedly calling readChar() and appendBuild().
 # Returns a boolean indicating whether EOF has been reached.
 # NOTE: access the new token via `stream.tok`
 proc progress*(lexer: var nlLexer): bool =
  # Return prematurely if already closed
  if lexer.done:
    return false
  while true:
    let 
      atEOF = lexer.readChar()
      flushedTok = lexer.appendBuild()
      newTokBuilt = flushedTok.isSome
    if newTokBuilt:
      lexer.tok = flushedTok.get()
    # if canProgress and atEOF:
    # if atEOF:
    #   if newTokBuilt:
    #     stream.isClosed = true
    #   return newTokBuilt
    # elif newTokBuilt:
    #   return true
    if newTokBuilt:
      if atEOF:
        lexer.done = true
      return true
    elif atEOF:
      return false
--- a/src/noether/lexer/lstream.nim
+++ b/src/noether/lexer/lstream.nim
@ -1,66 +0,0 @@
 import std/streams
 import std/options
 import tok
 export tok
 type
  # Character streaming for the nlTokStream
  nlLStream = object
    stream: Stream
    # row/column positions
    line*: string 
    lineNum*: Natural
    pos*: Natural
 proc streamFile*(filename: string): FileStream =
  result = newFileStream(filename, fmRead)
 proc streamString*(str: string): StringStream =
  result = newStringStream(str)
 proc newLStream*(content: string, isFile: bool = false): nlLStream =
  result = nlLStream(
    stream: if isFile: streamFile(content) else: streamString(content),
    line: "",
    lineNum: Natural 0,
    pos: Natural 0,
  )
 # Checks whether we've reached EOL
 # NOTE: also checks if we've surpassed it (ie invalid lstream.pos)
 proc atEOL*(lstream: nlLStream): bool = 
  result = (lstream.pos >= lstream.line.len - 1)
 # Checks whether we are EXACTLY at EOL, but not surpassed
 proc exactlyEOL*(lstream: nlLStream): bool =
  result = (lstream.pos == lstream.line.len - 1)
 # Checks whether we have surpassed EOL
 proc outOfBounds*(lstream: nlLStream): bool = 
  result = (lstream.pos > lstream.line.len - 1)
 # Progress the lex stream to the next line (if available)
 proc progLine*(lstream: var nlLStream): bool = 
  if lstream.stream.readLine(lstream.line):
    inc lstream.lineNum
    lstream.pos = Natural 0
    return true
  return false
 # Progress the lex stream to the next character in the line
 # forcefully (aka does NOT check if we reached EOL)
 proc forceProgChar*(lstream: var nlLStream) = 
  inc lstream.pos
 # Progress the lex stream to the next character (if available)
 proc progress*(lstream: var nlLStream): bool =
  if not lstream.atEOL():
    lstream.forceProgChar()
    result = true
  else:
    # attempt to progress next line past EOL
    result = lstream.progLine()
 proc currChar*(lstream: nlLStream): char = 
  result = lstream.line[lstream.pos]
--- a/src/noether/lexer/tok.nim
+++ b/src/noether/lexer/tok.nim
@ -1,40 +1,53 @@
-include toktype
+type
  # nlTokKind allows primitive nlToks to be typed,
  # the nlTokKind enum should never be directly
  # accessed. Use the interface in this file instead.
  nlTokKind* = enum
    tkNONE, # Placeholder Value
    tkEOF,  # End of File
    tkEOL,  # End of Line (\0 --> EOL)
    tkWORD, # Alphanumeric token
    tkSYMB, # Symbolic token
    tkLNFD, # \r \n Line-Feed
    tkWTSP, # ' ' \t Whitespace
    # RESERVED SYMBOLS
    tkLPAR, # ( Left Parenthesis
    tkRPAR, # ) Right Parenthesis
    tkLBRA, # { Left Brace
    tkRBRA, # } Right Brace
    tkLSQB, # [ Left Square Bracket
    tkRSQB, # ] Right Square Bracket
    # tkLANB, # < Left Angle Bracket
    # tkRANB, # > Right Angle Bracket
    tkSQUO, # ' Single Quotation Marking
    tkDQUO, # " Double Quotation Marking
    tkGRVA, # ` Grave Accent
    tkHASH, # # Number Sign (Hashtag)
 type 
-  nlTok* = object
+  nlTok* = tuple
-    tType*: nlTokType
+    # NOTE: nlTokBuilder will mutate nlTok.kind
-    lit*: string
+    kind: nlTokKind
-    lineNum*: Natural
+    lit: string
-    startPos*: Natural
+    lineNum: int
-    endPos*: Natural
+    startPos: int
    endPos: int
 # Generates an "empty" nlTok with only a startPos,
 # all other fields are expected to be filled out later.
-proc emptyTok*(startPos: int): nlTok =
+proc emptyTok*(startPos: int): nlTok {.inline.} =
-  result = nlTok(
+  result = (
-    tType: nlTokType.NONE,
+    kind: tkNONE,
    lit: "",
-    startPos: Natural startPos,
+    lineNum: 0,
    startPos: startPos,
    endPos: startPos,
  )
-# Checks if an nlTok has nlTokType.NONE
+# Checks if an nlTok has tkNONE
-proc isTokUntyped*(tType: nlTokType): bool =
+proc isUntyped*(tok: nlTok): bool {.inline.} =
-  result = (tType == nlTokType.NONE)
+  result = (tok.kind == tkNONE)
 # Checks if an nlTok has nlTokType.EOL
 proc isTokEOL*(tok: nlTok): bool =
  result = (tok.tType == nlTokType.EOL)
 # This method is only used to convert null
 # terminator nlToks into line-feed ones.
 # Returns a copy of an nlTok, changing its type
 proc tokTermToLineFeed*(tok: nlTok): nlTok =
  result = nlTok(
    tType: nlTokType.LNFD,
    lit: tok.lit,
    lineNum: tok.lineNum,
    startPos: tok.startPos,
    endPos: tok.endPos,
  )
--- a/src/noether/lexer/tokbuilding.nim
+++ b/src/noether/lexer/tokbuilding.nim
@ -1,86 +0,0 @@
 include lstream
 type
  # Provides a stream-like interface for lexing nlToks
  # Internally reliant on the functionality of nlLStream
  nlTokStream* = object
    lstream: nlLStream
    build: nlTok   # the build token
    currTok*: nlTok # the current token
    closed: bool # EOF + all tokens built
 # Generates an EOL token for the nlTokStream's state
 proc EOLTok(tokStream: nlTokStream): nlTok = 
  result = nlTok(
    tType: nlTokType.EOL,
    lit: "\0",
    lineNum: Natural tokStream.lstream.lineNum,
    startPos: Natural tokStream.lstream.pos,
    endPos: Natural tokStream.lstream.pos,
  )
 # Resets the build token to an "empty" nlTok
 proc resetBuild(tokStream: var nlTokStream) =
  tokStream.build = emptyTok(tokStream.lstream.pos)
 # Completes a token generated by emptyTok()
 # based on the nlTokStream's nlLStream's
 # current line and character positions
 proc finishBuild(ts: var nlTokStream) =
  ts.build.lineNum = Natural ts.lstream.lineNum
  ts.build.endPos = Natural ts.lstream.pos
  ts.build.lit = ts.lstream.line[ts.build.startPos ..< ts.build.endPos]
 # Returns the nlTokStream's build token and
 # empties the build token's contents.
 proc flushBuild(tokStream: var nlTokStream): nlTok = 
  finishBuild(tokStream)
  result = tokStream.build
  resetBuild(tokStream)
 # Returns whether the build token has a set type yet.
 # This indicates that the build token should inherit
 # the nlTokType of the nlLStream's next character.
 proc isUntypedBuild(tokStream: nlTokStream): bool =
  result = isTokUntyped(tokStream.build.tType)
 # Check whether an nlTokType is "compatible" with the build token. 
 # NOTE: flushBuild() should be called when an incompatible token is discovered.
 proc isCompatibleBuild(tokStream: nlTokStream, tType: nlTokType): bool =
  result = (tType == tokStream.build.tType)
 # Add a character to the nlTokStream's build token.
 # Flushes and returns the build token if "fully built",
 # and a boolean indicating whether the nlTokStream can progress.
 proc progBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
  # the "pos > EOL" invalid state is used intentionally
  # to indicate all tokens have been built, and return EOL Token
  if tokStream.lstream.outOfBounds():
    buildTok = some(EOLTok(tokStream))
    return true # can progress once more
  let tType = getTokType(tokStream.lstream.currChar())
  # untyped build tokens must inherited immediately
  if isUntypedBuild(tokStream):
    tokStream.build.tType = tType
  # check if EOL reached
  if tokStream.lstream.atEOL():
      # flush old build token, the new one can be left untyped
      let compatible = isCompatibleBuild(tokStream, tType)
      result = false # DO NOT PROGRESS
      if compatible:
        # force the lstream into an invalid state by progressing beyond EOL
        # we can then detect this state on the next progBuild and return
        # an EOL character (very unsafe implementation but it works well)
        tokStream.lstream.forceProgChar()
      buildTok = some(flushBuild(tokStream))
  # check character and build token compatability
  elif not isCompatibleBuild(tokStream, tType):
      # flush old build token, the new one inherits type
      buildTok = some(flushBuild(tokStream))
      tokStream.build.tType = tType
      result = true # can progress
  else:
    buildTok = none(nlTok)
    result = true # can progress
--- a/src/noether/lexer/tokkind.nim
+++ b/src/noether/lexer/tokkind.nim
@ -0,0 +1 @@
--- a/src/noether/lexer/tokstream.nim
+++ b/src/noether/lexer/tokstream.nim
@ -1,53 +0,0 @@
 include tokbuilding
 # Initialises a new nlTokStream on a string or file
 proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
  result = nlTokStream(
    lstream: newLStream(content, isFile=isFile),
    closed: false,
  )
  # 1. initialise an empty build token 
  # 2. progress to the first line
  result.resetBuild()
  discard result.lstream.progLine()
 # Defines a short-hand notation for getting the current line
 proc currLine*(tokStream: nlTokStream): string =
  result = tokStream.lstream.line
 # Reimplements nlLStream.progress() for nlTokStream
 # to account for additional structure (ie the build token)
 proc progChar(tokStream: var nlTokStream): bool =
  if not tokStream.lstream.atEOL():
    tokStream.lstream.forceProgChar()
    result = true
  else:
    # attempt to progress to next line past EOL
    result = tokStream.lstream.progLine()
    tokStream.resetBuild()  
 # Generates and sets (by reference) the next token in the stream,
 # via repeatedly calling progBuild() and progChar().
 # Returns a boolean indicating whether EOF has been reached.
 # NOTE: progBuild adds lstream's current char to the build token
 # NOTE: progChar progresses to lstream's next char
 proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
  # Return prematurely if already closed
  if tokStream.closed:
    return false
  while true:
    var flushedTok: Option[nlTok]
    let 
      canProgress = tokStream.progBuild(flushedTok)
      buildComplete = flushedTok.isSome
    # canProgress & EOF reached => no more tokens to build :)
    # NOTE: reachedEOF and not canProgress => more tokens unwrapping
    if buildComplete:
      # return the finished build token, and save it as the current token
      tok = flushedTok.get()
      tokStream.currTok = tok
    if canProgress and not tokStream.progChar():
      tokStream.closed = true
      return buildComplete
    elif buildComplete:
      return true
--- a/src/noether/lexer/toktype.nim
+++ b/src/noether/lexer/toktype.nim
@ -1,54 +0,0 @@
 type
  # nlTokType allows primitive nlToks to be typed,
  # the nlTokType enum should never be directly
  # accessed. Use the interface in this file instead.
  nlTokType* = enum
    NONE, # Placeholder Value
    EOF,  # End of File
    EOL,  # End of Line (\0 --> EOL)
    WORD, # Alphanumeric token
    SYMB, # Symbolic token
    LNFD, # \r \n Line-Feed
    WTSP, # ' ' \t Whitespace
    LPAR, # ( Left Parenthesis
    RPAR, # ) Right Parenthesis
    LBRA, # { Left Brace
    RBRA, # } Right Brace
    LSQB, # [ Left Square Bracket
    RSQB, # ] Right Square Bracket
    # LANB, # < Left Angle Bracket
    # RANB, # > Right Angle Bracket
    SQUO, # ' Single Quotation Marking
    DQUO, # " Double Quotation Marking
    GRVA, # ` Grave Accent
    HASH, # # Number Sign (Hashtag)
 # Classifies a character to its nlTokType
 proc getTokType*(c: char): nlTokType =
  case c:
  of '\0', '\r', '\n':
    result = nlTokType.EOL
  of ' ', '\t':
    result = nlTokType.WTSP
  of '(':
    result = nlTokType.LPAR
  of ')':
    result = nlTokType.RPAR
  of '{':
    result = nlTokType.LBRA
  of '}':
    result = nlTokType.RBRA
  of '[':
    result = nlTokType.LSQB
  of ']':
    result = nlTokType.RSQB
  of '\'':
    result = nlTokType.SQUO
  of '\"':
    result = nlTokType.DQUO
  of '`':
    result = nlTokType.GRVA
  of '#':
    result = nlTokType.HASH
  else:
    result = nlTokType.WORD
--- a/src/noether/lib/err.nim
+++ b/src/noether/lib/err.nim
@ -0,0 +1 @@
 proc echoErrorHeader(): =
--- a/src/noether/lib/io.nim
+++ b/src/noether/lib/io.nim
@ -0,0 +1,7 @@
 import std/streams
 proc streamFile*(filename: string): Stream {.inline.} =
  result = newFileStream(filename, fmRead)
 proc streamString*(str: string): Stream {.inline.} =
  result = newStringStream(str)
--- a/src/noether/parser/err.nim
+++ b/src/noether/parser/err.nim
@ -0,0 +1,8 @@
 #[ Error codes and messaging directly associated with
 | nlParser and its procedures is written here.
 | General error functionality is in src/noether/lib/err.nim
 ]#
 import parser
--- a/src/noether/parser/nodes.nim
+++ b/src/noether/parser/nodes.nim
@ -1,18 +1,44 @@
 import std/options
 from ../lexer/tok import nlTok
 # from ../lexer/tokstream import 
 type
-  # NOTE: by the end of parsing NO nodes should
+  # NOTE: by the end of parsing NO nodes should have nkNone
-  # NOTE: have nlNodeType.NONE
+  nlNodeKind* = enum
-  nlNodeType* = enum
+    nkNone, # Placeholder Value
-    NONE, # Placeholder Value
+
-    TERM, # Indicates the tree has terminated
+    nkStrLit, # String Literal
-    STRL, # String Literal
+    nkChrLit, # Character Literal
-    CHRL, # Character Literal
+
  # NOTE: always check parent != nil when traversing the tree
  nlNode* {.acyclic.} = ref object of RootObj
-    nType*: nlNodeType
+    nKind*: nlNodeKind
-    toks*: seq[nlTok] # nodes store the tokens that build them
+    toks*: seq[nlTok] # nodes (may) store the tokens that build them
-    # left, right: nlNode
+    parent*: nlNode
  # Purely abstract type that all nlNode objects
  # with children are expected to inherit from.
  nlBranchNode* {.acyclic.} = ref object of nlNode
    child: UncheckedArray[nlNode]
  nlBiNode* {.acyclic.} = ref object of nlBranchNode
 proc childCount*(node: nlNode): int {.inline.} = 0
 proc childCount*(node: nlBiNode): int {.inline.} = 2
 proc getChild*(node: nlNode, i: int): Option[nlNode] {.inline.} = 
  result = none(nlNode)
 proc getChild*(node: nlBranchNode, i: int): Option[nlNode] {.inline.} = 
  result = some(node.child[i])
 proc newNode*(nKind: nlNodeKind): nlNode =
  result = nlNode(
    nKind: nKind,
  )
 proc newBiNode*(nKind: nlNodeKind): nlNode =
  result = nlBiNode(
    nKind: nKind,
  ) 
 # Short-hand way of appending a token to a node's token sequence
 proc addTok*(node: nlNode, tok: nlTok) =
--- a/src/noether/parser/parse.nim
+++ b/src/noether/parser/parse.nim
@ -0,0 +1,58 @@
 import strutils
 include parser
 # NOTE: Matching between two tokens will fill `node` with everything
 # NOTE: between those two tokens EXCLUDING the two tokens themselves.
 proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
  result = greed(
    parser,
    satisfyMatch(matchType),
  )
 proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
  result = greedLine(
    parser, 
    satisfyMatch(matchType),
  )
 proc parseStrLit(parser: var nlParser): nlParseStat =
  result = parser.parseMatchLine(tkDQUO)
 proc parseChrLit(parser: var nlParser): nlParseStat =
  result = parser.parseMatchLine(tkSQUO)
 proc parseStmt(parser: var nlParser): nlParseStat = 
  while parser.progressStream():
    echo "----- Current Token: ", parser.currTok
    case parser.currTok.tKind
    of tkDQUO:
      # Attempt to parse string literal
      if parser.parseStrLit() != nlParseStat.OK:
        echo "Unmatched Double Quotation! Malformed String Literal"
        echo parser.line
        echo repeat(" ", parser.currTok.startPos), '^', '\n'
      else:
        echo "Parsed String Literal"
        echo parser.bnode[], '\n'
    of tkSQUO:
      # Attempt to parse string literal
      if parser.parseChrLit() != nlParseStat.OK:
        echo "Unmatched Single Quotation! Malformed Character Literal"
        echo parser.line
        echo repeat(" ", parser.currTok.startPos), '^', '\n'
      else:
        echo "Parsed Character Literal"
        echo parser.bnode[], '\n'
    of tkEOL:
      # TODO: handle this case, don't just discard
      discard
    else:
      echo "blah blah unhandled case\n"
  result = nlParseStat.OK
 # Attempt to parse nlAST from nlTokStream
 proc parse*(tokStream: var nlTokStream): nlAST =
  var parser = newParser(tokStream)
  echo ' '
  discard parser.parseStmt()
  result = parser.ast
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@ -1,63 +1,90 @@
-import strutils
+import nodes
-include parseutil
+import ../lexer/lex
-# NOTE: Matching between two tokens will fill `node` with everything
+type
-# NOTE: between those two tokens EXCLUDING the two tokens themselves.
+  # NOTE1: Values above MARKER_FAIL indicate a failed state
-proc parseMatch(tokStream: var nlTokStream, 
+  # NOTE2: nlParseStat is marked pure out of habit that's all
-                node: var nlNode,
+  nlParseStat* {.pure.} = enum
-                matchType: nlTokType): nlParseStat =
+    OK,
-  result = greed(
+    MARKER_FAIL,
-    tokStream, 
+    UNMATCHED,
-    node.toks, 
+    TOOBIG,
-    satisfyMatch(matchType),
+
-  )
+  nlAST* = object
-proc parseMatchLine(tokStream: var nlTokStream, 
+    root: nlNode
-                   node: var nlNode,
+
-                   matchType: nlTokType): nlParseStat =
+  nlParser* = object
-  result = greed(
+    stream: nlTokStream
-    tokStream, 
+    ast: nlAST
-    node.toks, 
+    # the "build node" is a reference to the AST node
-    satisfyMatchEOL(matchType),
+    # the parser is currently modifying/building from
    # NOTE: bnode changes frequently, it is NOT the root
    bnode: nlNode
    # flag indicating whether the parser is at
    # the start of a new line (aka checking indentation)
    inIndent: bool
 proc `*`(stat: nlParseStat, b: bool): nlParseStat =
  result = if b: stat else: nlParseStat.OK
 proc isFail*(stat: nlParseStat): bool = 
  result = (stat >= nlParseStat.MARKER_FAIL)
 proc newParser*(tokStream: var nlTokStream): nlParser =
  let rootNode = newNode(nkNone)
  result = nlParser(
    stream: tokStream,
    ast: nlAST(
      root: rootNode
    ),
    bnode: rootNode,
  )
-proc parseStrL(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
+# Exposes a subset of the nlTokStream interface
-  node = nlNode(
+proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
-    nType: nlNodeType.STRL
+proc line(parser: var nlParser): string = parser.stream.line
  )
  node.addTok(tokStream.currTok)
  result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
-proc parseChrL(tokStream: var nlTokStream, node: var nlNode): bool =
+# Extends upon the functionality of nlTokStream.progress()
-  node = nlNode(
+proc progressStream*(parser: var nlParser): bool = 
-    nType: nlNodeType.CHRL
+  result = parser.stream.progress()
-  )
+  if result and parser.currTok.tKind == tkEOL:
-  node.addTok(tokStream.currTok)
+    parser.inIndent = true
-  # TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
+  if 
  result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
-# Attempt to form an nlAST from a nlTokStream
+proc setNewLine()
-proc parse*(tokStream: var nlTokStream): nlNode = 
+  
-  var tok: nlTok
+#[ "Greed" refers to something I mentioned in my discussion on
-  var node: nlNode
+ |  Noether's grammar (in an EBNF-like language). Greed just
-  while tokStream.nextTok(tok):
+ |  means "everything until a condition is satisified".
-    case tok.tType:
+ |  That condition should be supplied by a Nim procedural type.
-    of nlTokType.DQUO:
+ ]#
-      # Attempt to parse string literal
+  
-      if not parseStrL(tokStream, node):
+# Greed will consume anything until a condition is satisfied
-        echo "Unmatched Double Quotation! Malformed String Literal"
+# Returns false if the greed was never satisfied (OMG!!)
-        echo tokStream.currLine()
+proc greed(parser: var nlParser,
-        echo repeat(" ", tok.startPos), '^'
+           satisfy: proc(tok: nlTok): bool): nlParseStat =
-      else:
+  while parser.progressStream():
-        echo "Parsed String Literal"
+    if satisfy(parser.currTok):
-        echo node[]
+      return nlParseStat.OK
-    of nlTokType.SQUO:
+    # NOTE: the matched token is currently excluded
-      # Attempt to parse string literal
+    parser.bnode.addTok(parser.currTok)
-      if not parseChrL(tokStream, node):
+  result = nlParseStat.UNMATCHED
-        echo "Unmatched Single Quotation! Malformed Character Literal"
+
-        echo tokStream.currLine()
+proc greedLine(parser: var nlParser,
-        echo repeat(" ", tok.startPos), '^'
+               satisfy: proc(tok: nlTok): bool): nlParseStat =
-      else:
+  while parser.progressStream():
-        echo "Parsed String Literal"
+    if satisfy(parser.currTok):
-        echo node[]
+      return nlParseStat.OK
-    else:
+    # NOTE: the matched token is currently excluded
-      echo "blah blah unhandled case"
+    parser.bnode.addTok(parser.currTok)
    if parser.currTok.tKind == tkEOL:
      return nlParseStat.UNMATCHED
  result = nlParseStat.UNMATCHED
 #[ Templates for generating greed satisfying conditions.
 ]#
 # Satisfied if it finds nlTok of type matchType
 template satisfyMatch(matchType: nlTokKind): untyped  = 
  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))
--- a/src/noether/parser/parseutil.nim
+++ b/src/noether/parser/parseutil.nim
@ -1,58 +0,0 @@
 import nodes
 import ../lexer/tokstream
 type
  # NOTE: Values above __FAIL__ indicate a failed state
  nlParseStat* = enum
    OK,
    __FAIL__,
    MIDAS, # Greedy search was never satisfied
    UNMATCHED,
    TOOBIG,
 proc `*`(stat: nlParseStat, b: bool): nlParseStat =
  result = if b: stat else: nlParseStat.OK
 proc isFail*(stat: nlParseStat): bool = 
  result = (stat >= nlParseStat.__FAIL__)
 #[ "Greed" refers to something I mentioned in my discussion on
 |  Noether's grammar (in an EBNF-like language). Greed just
 |  means "everything until a condition is satisified".
 |  That condition should be supplied by a Nim procedural type.
 ]#
 # Greed will consume anything until a condition is satisfied
 # Returns false if the greed was never satisfied (OMG!!)
 proc greed(tokStream: var nlTokStream, 
           toks: var seq[nlTok], 
           satisfy: proc(tok: nlTok): bool,
           ): nlParseStat =
  var tok: nlTok
  while tokStream.nextTok(tok):
    toks.add(tok)
    if satisfy(tok):
      return nlParseStat.OK
  result = nlParseStat.UNMATCHED
 proc greedLine(tokStream: var nlTokStream, 
               toks: var seq[nlTok], 
               satisfy: proc(tok: nlTok): bool): nlParseStat =
  var tok: nlTok
  while tokStream.nextTok(tok):
    toks.add(tok)
    if satisfy(tok):
      return true
  result = 
 #[ Templates for generating greed satisfying conditions.
 ]#
 # Satisfied if it finds nlTok of type matchType
 template satisfyMatch(matchType: nlTokType) = 
  proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType)
 # Satisfied if it finds nlTok of type matchType or EOL reached
 template satisfyMatchEOL(matchType: nlTokType) = 
  proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType or tok.tType == nlTokType.EOL)
Author	SHA1	Message	Date
Emile Clark-Boman	bab593a86b	Typo fix + start of error handling	2025-07-06 21:42:09 +10:00
Emile Clark-Boman	1181ea9743	Restructure attempt #087 :(	2025-06-19 12:51:03 +10:00
Emile Clark-Boman	f25e66e9ef	Garbage collection so to speak	2025-06-19 09:41:15 +10:00
Emile Clark-Boman	d7fb1f0c89	Migrate nlTokBuilder + nlTokStream -> nlLexer	2025-06-19 09:38:08 +10:00
Emile Clark-Boman	07a9bda9ba	Once again fixed EOL handling...	2025-06-19 09:11:49 +10:00
Emile Clark-Boman	99db57dcfd	YALR (Yet Another Lexer Refactor)	2025-06-19 08:48:31 +10:00
Emile Clark-Boman	72a6075123	nlParser now exposes a subset of the nlTokStream interface	2025-06-19 03:26:22 +10:00
Emile Clark-Boman	8e6c0bbbfc	Fixed StrLit + ChrLit matching beyond EOL, also greed excludes satisfier	2025-06-19 02:33:13 +10:00
Emile Clark-Boman	f8f90fe92d	Added ultra simple build script for debug	2025-06-19 02:25:52 +10:00
Emile Clark-Boman	4a8f44d23f	Fixed parseStmt called on uninitialized nlParser.ast Also parseStmt now discards nlTokKind.tkEOL (this shouldn't be left in long term, just a temporary solution)	2025-06-19 02:24:04 +10:00
Emile Clark-Boman	2af3000c2e	eeeeekkk typo :(	2025-06-19 02:11:52 +10:00
Emile Clark-Boman	f8697bd662	1 gazillion changes (mostly documenting my insanity optimizing + naming)	2025-06-19 02:09:43 +10:00