Typo fix + start of error handling

Restructure attempt #087 :(
Garbage collection *so to speak*
2025-07-06 21:42:09 +10:00 · 2025-06-19 12:51:03 +10:00 · 2025-06-19 09:41:15 +10:00 · 2025-06-19 09:38:08 +10:00 · 2025-06-19 09:11:49 +10:00 · 2025-06-19 08:48:31 +10:00
20 changed files with 460 additions and 434 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,6 @@
 __pycache__/
 bin/
+
+# TEMP: used while debugging 
+# (and cause I'm super duper lazy)
+src/nlx 
--- a/lang/demo/single_toks.no
+++ b/lang/demo/single_toks.no
@ -0,0 +1,2 @@
+[a]b(#)
+(c)d[e]
--- a/py/m.py
+++ b/py/m.py
@ -2,7 +2,7 @@
 import sys
 import readline

-from noether.math import *
+from noether.lib.math import *
 from noether.cli import *


--- a/src/ddemo
+++ b/src/ddemo
@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+set -e
+
+if [ -z "$1" ]; then
+  echo "Usage: ddemo DEMOFILE"
+  echo "Demo files are located in lang/demo"
+  exit 1
+fi
+
+nim c nlx.nim
+./nlx ../lang/demo/$1
--- a/src/nlx.nim
+++ b/src/nlx.nim
@ -1,22 +1,29 @@
 import os
-import noether/lexer/tok
-import noether/lexer/tokstream
-import noether/parser/parser
+import noether/lib/io
+import noether/lexer/lex
+import noether/parser/parse

+{.hint: "Don't forget to drink more water (^_^)".}
 when isMainModule:
  echo "Noether Lang Extras v0.1.0 - nlx"

-  if paramCount() > 0:
-    let filename = paramStr(1)
-    var tokStream = newTokStream(filename, isFile=true)
+  # really lazy argparse implementation (temporary)
+  let
+    paramC = paramCount() 
+    cmd = if paramC > 2: paramStr 1
+          else: "tok"

-    # # DumpTok
-    # var tok: nlTok
-    # while tokStream.nextTok(tok):
-    #   echo tok
+  var stream = if paramC > 0: streamFile(paramStr paramC)
+               else: streamString(readAll stdin)

+  var lexer = newLexer(stream)
+  if cmd == "tok":
+    # DumpTok
+    while lexer.progress():
+      echo lexer.tok
+  elif cmd == "tree":
+    discard
    # DumpTree
-    discard parse(tokStream)
-    
+    # discard parse(tokStream)
  else:
-    echo "usage: nlx filename"
+    echo "Usage: nlx [tok|tree] <demo>\n    demo files are accessible at lang/demo"
--- a/src/noether.nim
+++ b/src/noether.nim
@ -2,4 +2,4 @@
 # uses this file as the main entry point of the application.

 when isMainModule:
-  echo "Noether Lang"
+  echo "Noether Lang v0.1.0"
--- a/src/noether/lexer/lex.nim
+++ b/src/noether/lexer/lex.nim
@ -0,0 +1,178 @@
+import 
+  streams, 
+  options
+
+import tok
+export tok
+
+type
+  # Abstracts the "building process" (lexing) 
+  # of nlTok objects from a given Stream of characters.
+  nlLexer* = object
+    stream: Stream
+    done*: bool
+    # store current token and upcoming (build) token
+    tok*: nlTok # current token
+    btok: nlTok # the build token
+    # save char and pos and its token type
+    char: char
+    cTKind: nlTokKind
+    # track line number, line content, etc
+    line: string
+    lineNum: int
+    pos: int 
+
+proc atEOL(lexer: nlLexer): bool {.inline.} =
+  result = (lexer.char == '\n')
+proc atEOF(lexer: nlLexer): bool {.inline.} =
+  result = (lexer.char == '\0')
+
+# Initialise a new lexer
+proc newLexer*(stream: var Stream): nlLexer =
+  result = nlLexer(
+    stream: stream,
+    done: false,
+    tok: emptyTok(0),
+    btok: emptyTok(0),
+    line: "",
+    lineNum: 1,
+    pos: -1,    # after initial readChar this -> 0
+    char: '\0', # use \0 as initial invalid char   
+    cTKind: tkNONE,
+  )
+
+# Classifies the current character to its nlTokKind
+proc classifyTok*(lexer: nlLexer): nlTokKind {.inline.} =
+  case lexer.char:
+  of '\0':
+    result = tkEOF
+  of '\r', '\n':
+    result = tkEOL
+  of ' ', '\t':
+    result = tkWTSP
+  of '(':
+    result = tkLPAR
+  of ')':
+    result = tkRPAR
+  of '{':
+    result = tkLBRA
+  of '}':
+    result = tkRBRA
+  of '[':
+    result = tkLSQB
+  of ']':
+    result = tkRSQB
+  of '\'':
+    result = tkSQUO
+  of '\"':
+    result = tkDQUO
+  of '`':
+    result = tkGRVA
+  of '#':
+    result = tkHASH
+  else:
+    result = tkWORD
+    
+
+#[ ====================================================== ]
+ | nlLexer Internal Interface for Token Construction ]
+ ]#
+  
+# Reset the build token to be "empty"
+proc resetBuild(lexer: var nlLexer) =
+  lexer.btok = emptyTok(lexer.pos)
+
+# "Finishes" the build token by setting various properties
+proc finishBuild(lexer: var nlLexer) =
+  lexer.btok.lineNum = lexer.lineNum
+  lexer.btok.endPos = lexer.pos
+  lexer.btok.lit = lexer.line[lexer.btok.startPos ..< lexer.line.high]
+
+# Finish, return, and reset the build token
+proc flushBuild(lexer: var nlLexer): nlTok = 
+  finishBuild(lexer)
+  result = lexer.btok
+  resetBuild(lexer)
+
+# Is the build token "compatible" with the current char? (if not then flushbuild)
+# NOTE: This implicitly handles Windows CRLF, Unix LF, & Mac OS CR compatability 
+# NOTE: since atEOL => '\n', but '\r' and '\n' are both tkEOL so they both flush.
+proc isIncompatibleBuild(lexer: nlLexer): bool =
+  result = (lexer.cTKind != lexer.btok.kind or lexer.atEOL())
+
+# Inherit the build token's type from current char
+proc inherit(lexer: var nlLexer) = 
+  lexer.btok.kind = lexer.cTKind
+
+# Add a character to the nlLexer's build token.
+# Flushes and returns the build token if finished.
+proc appendBuild(lexer: var nlLexer): Option[nlTok] =    
+  # untyped build tokens inherit type immediately
+  if lexer.btok.isUntyped():
+    lexer.inherit()
+  
+  # check character and build token compatability
+  if isIncompatibleBuild(lexer):
+      # flush old build token, the new one inherits type
+      result = some(flushBuild(lexer))
+      lexer.inherit()
+  else:
+    result = none(nlTok)
+
+#[ ========================================= ]
+ | nlLexer Internal Char Streaming Interface ]
+ ]#
+      
+# Read the next char in the stream
+# NOTE: readChar raises IOError on error, returns \0 on EOF
+proc readChar(lexer: var nlLexer): bool =
+  if lexer.atEOL():
+    inc lexer.lineNum
+  # sets lexer.char to '\0' if EOF
+  lexer.char = lexer.stream.readChar()
+  lexer.cTKind = lexer.classifyTok()
+  lexer.line.add(lexer.char)
+  inc lexer.pos
+  result = lexer.atEOF()
+
+#[ ========================
+ | nlLexer Public Interface
+ ]#
+  
+# Read until EOL and return the current line
+# NOTE: Does NOT update the lexer's state (unsafe)
+# NOTE: ONLY call if a lex/parse error needs displaying
+proc unsafeGetLine*(lexer: var nlLexer): string =
+  while not lexer.atEOL() and lexer.readChar():
+    discard
+  result = lexer.line
+
+# Lexes and returns the next token in the "token stream"
+# via repeatedly calling readChar() and appendBuild().
+# Returns a boolean indicating whether EOF has been reached.
+# NOTE: access the new token via `stream.tok`
+proc progress*(lexer: var nlLexer): bool =
+  # Return prematurely if already closed
+  if lexer.done:
+    return false
+  while true:
+    let 
+      atEOF = lexer.readChar()
+      flushedTok = lexer.appendBuild()
+      newTokBuilt = flushedTok.isSome
+    
+    if newTokBuilt:
+      lexer.tok = flushedTok.get()
+    # if canProgress and atEOF:
+    # if atEOF:
+    #   if newTokBuilt:
+    #     stream.isClosed = true
+    #   return newTokBuilt
+    # elif newTokBuilt:
+    #   return true
+    if newTokBuilt:
+      if atEOF:
+        lexer.done = true
+      return true
+    elif atEOF:
+      return false
--- a/src/noether/lexer/lstream.nim
+++ b/src/noether/lexer/lstream.nim
@ -1,66 +0,0 @@
-import std/streams
-import std/options
-
-import tok
-export tok
-
-type
-  # Character streaming for the nlTokStream
-  nlLStream = object
-    stream: Stream
-    # row/column positions
-    line*: string 
-    lineNum*: Natural
-    pos*: Natural
-
-proc streamFile*(filename: string): FileStream =
-  result = newFileStream(filename, fmRead)
-
-proc streamString*(str: string): StringStream =
-  result = newStringStream(str)
-
-proc newLStream*(content: string, isFile: bool = false): nlLStream =
-  result = nlLStream(
-    stream: if isFile: streamFile(content) else: streamString(content),
-    line: "",
-    lineNum: Natural 0,
-    pos: Natural 0,
-  )
-
-# Checks whether we've reached EOL
-# NOTE: also checks if we've surpassed it (ie invalid lstream.pos)
-proc atEOL*(lstream: nlLStream): bool = 
-  result = (lstream.pos >= lstream.line.len - 1)
-
-# Checks whether we are EXACTLY at EOL, but not surpassed
-proc exactlyEOL*(lstream: nlLStream): bool =
-  result = (lstream.pos == lstream.line.len - 1)
-
-# Checks whether we have surpassed EOL
-proc outOfBounds*(lstream: nlLStream): bool = 
-  result = (lstream.pos > lstream.line.len - 1)
-
-# Progress the lex stream to the next line (if available)
-proc progLine*(lstream: var nlLStream): bool = 
-  if lstream.stream.readLine(lstream.line):
-    inc lstream.lineNum
-    lstream.pos = Natural 0
-    return true
-  return false
-
-# Progress the lex stream to the next character in the line
-# forcefully (aka does NOT check if we reached EOL)
-proc forceProgChar*(lstream: var nlLStream) = 
-  inc lstream.pos
-
-# Progress the lex stream to the next character (if available)
-proc progress*(lstream: var nlLStream): bool =
-  if not lstream.atEOL():
-    lstream.forceProgChar()
-    result = true
-  else:
-    # attempt to progress next line past EOL
-    result = lstream.progLine()
-
-proc currChar*(lstream: nlLStream): char = 
-  result = lstream.line[lstream.pos]
--- a/src/noether/lexer/tok.nim
+++ b/src/noether/lexer/tok.nim
@ -1,40 +1,53 @@
-include toktype
+type
+  # nlTokKind allows primitive nlToks to be typed,
+  # the nlTokKind enum should never be directly
+  # accessed. Use the interface in this file instead.
+  nlTokKind* = enum
+    tkNONE, # Placeholder Value
+
+    tkEOF,  # End of File
+    tkEOL,  # End of Line (\0 --> EOL)
+
+    tkWORD, # Alphanumeric token
+    tkSYMB, # Symbolic token
+
+    tkLNFD, # \r \n Line-Feed
+    tkWTSP, # ' ' \t Whitespace
+
+    # RESERVED SYMBOLS
+    tkLPAR, # ( Left Parenthesis
+    tkRPAR, # ) Right Parenthesis
+    tkLBRA, # { Left Brace
+    tkRBRA, # } Right Brace
+    tkLSQB, # [ Left Square Bracket
+    tkRSQB, # ] Right Square Bracket
+    # tkLANB, # < Left Angle Bracket
+    # tkRANB, # > Right Angle Bracket
+    tkSQUO, # ' Single Quotation Marking
+    tkDQUO, # " Double Quotation Marking
+    tkGRVA, # ` Grave Accent
+    tkHASH, # # Number Sign (Hashtag)

 type 
-  nlTok* = object
-    tType*: nlTokType
-    lit*: string
-    lineNum*: Natural
-    startPos*: Natural
-    endPos*: Natural
+  nlTok* = tuple
+    # NOTE: nlTokBuilder will mutate nlTok.kind
+    kind: nlTokKind
+    lit: string
+    lineNum: int
+    startPos: int
+    endPos: int

 # Generates an "empty" nlTok with only a startPos,
 # all other fields are expected to be filled out later.
-proc emptyTok*(startPos: int): nlTok =
-  result = nlTok(
-    tType: nlTokType.NONE,
+proc emptyTok*(startPos: int): nlTok {.inline.} =
+  result = (
+    kind: tkNONE,
    lit: "",
-    startPos: Natural startPos,
+    lineNum: 0,
+    startPos: startPos,
+    endPos: startPos,
  )

-# Checks if an nlTok has nlTokType.NONE
-proc isTokUntyped*(tType: nlTokType): bool =
-  result = (tType == nlTokType.NONE)
-  
-# Checks if an nlTok has nlTokType.EOL
-proc isTokEOL*(tok: nlTok): bool =
-  result = (tok.tType == nlTokType.EOL)
-
-
-
-# This method is only used to convert null
-# terminator nlToks into line-feed ones.
-# Returns a copy of an nlTok, changing its type
-proc tokTermToLineFeed*(tok: nlTok): nlTok =
-  result = nlTok(
-    tType: nlTokType.LNFD,
-    lit: tok.lit,
-    lineNum: tok.lineNum,
-    startPos: tok.startPos,
-    endPos: tok.endPos,
-  )
+# Checks if an nlTok has tkNONE
+proc isUntyped*(tok: nlTok): bool {.inline.} =
+  result = (tok.kind == tkNONE)
--- a/src/noether/lexer/tokbuilding.nim
+++ b/src/noether/lexer/tokbuilding.nim
@ -1,86 +0,0 @@
-include lstream
-
-type
-  # Provides a stream-like interface for lexing nlToks
-  # Internally reliant on the functionality of nlLStream
-  nlTokStream* = object
-    lstream: nlLStream
-    build: nlTok   # the build token
-    currTok*: nlTok # the current token
-    closed: bool # EOF + all tokens built
-
-# Generates an EOL token for the nlTokStream's state
-proc EOLTok(tokStream: nlTokStream): nlTok = 
-  result = nlTok(
-    tType: nlTokType.EOL,
-    lit: "\0",
-    lineNum: Natural tokStream.lstream.lineNum,
-    startPos: Natural tokStream.lstream.pos,
-    endPos: Natural tokStream.lstream.pos,
-  )
-
-# Resets the build token to an "empty" nlTok
-proc resetBuild(tokStream: var nlTokStream) =
-  tokStream.build = emptyTok(tokStream.lstream.pos)
-
-# Completes a token generated by emptyTok()
-# based on the nlTokStream's nlLStream's
-# current line and character positions
-proc finishBuild(ts: var nlTokStream) =
-  ts.build.lineNum = Natural ts.lstream.lineNum
-  ts.build.endPos = Natural ts.lstream.pos
-  ts.build.lit = ts.lstream.line[ts.build.startPos ..< ts.build.endPos]
-
-# Returns the nlTokStream's build token and
-# empties the build token's contents.
-proc flushBuild(tokStream: var nlTokStream): nlTok = 
-  finishBuild(tokStream)
-  result = tokStream.build
-  resetBuild(tokStream)
-
-# Returns whether the build token has a set type yet.
-# This indicates that the build token should inherit
-# the nlTokType of the nlLStream's next character.
-proc isUntypedBuild(tokStream: nlTokStream): bool =
-  result = isTokUntyped(tokStream.build.tType)
-
-# Check whether an nlTokType is "compatible" with the build token. 
-# NOTE: flushBuild() should be called when an incompatible token is discovered.
-proc isCompatibleBuild(tokStream: nlTokStream, tType: nlTokType): bool =
-  result = (tType == tokStream.build.tType)
-
-# Add a character to the nlTokStream's build token.
-# Flushes and returns the build token if "fully built",
-# and a boolean indicating whether the nlTokStream can progress.
-proc progBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
-  # the "pos > EOL" invalid state is used intentionally
-  # to indicate all tokens have been built, and return EOL Token
-  if tokStream.lstream.outOfBounds():
-    buildTok = some(EOLTok(tokStream))
-    return true # can progress once more
-    
-  let tType = getTokType(tokStream.lstream.currChar())
-  # untyped build tokens must inherited immediately
-  if isUntypedBuild(tokStream):
-    tokStream.build.tType = tType
-  
-  # check if EOL reached
-  if tokStream.lstream.atEOL():
-      # flush old build token, the new one can be left untyped
-      let compatible = isCompatibleBuild(tokStream, tType)
-      result = false # DO NOT PROGRESS
-      if compatible:
-        # force the lstream into an invalid state by progressing beyond EOL
-        # we can then detect this state on the next progBuild and return
-        # an EOL character (very unsafe implementation but it works well)
-        tokStream.lstream.forceProgChar()
-      buildTok = some(flushBuild(tokStream))
-  # check character and build token compatability
-  elif not isCompatibleBuild(tokStream, tType):
-      # flush old build token, the new one inherits type
-      buildTok = some(flushBuild(tokStream))
-      tokStream.build.tType = tType
-      result = true # can progress
-  else:
-    buildTok = none(nlTok)
-    result = true # can progress
--- a/src/noether/lexer/tokkind.nim
+++ b/src/noether/lexer/tokkind.nim
@ -0,0 +1 @@
+
--- a/src/noether/lexer/tokstream.nim
+++ b/src/noether/lexer/tokstream.nim
@ -1,53 +0,0 @@
-include tokbuilding
-
-# Initialises a new nlTokStream on a string or file
-proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
-  result = nlTokStream(
-    lstream: newLStream(content, isFile=isFile),
-    closed: false,
-  )
-  # 1. initialise an empty build token 
-  # 2. progress to the first line
-  result.resetBuild()
-  discard result.lstream.progLine()
-
-# Defines a short-hand notation for getting the current line
-proc currLine*(tokStream: nlTokStream): string =
-  result = tokStream.lstream.line
-  
-# Reimplements nlLStream.progress() for nlTokStream
-# to account for additional structure (ie the build token)
-proc progChar(tokStream: var nlTokStream): bool =
-  if not tokStream.lstream.atEOL():
-    tokStream.lstream.forceProgChar()
-    result = true
-  else:
-    # attempt to progress to next line past EOL
-    result = tokStream.lstream.progLine()
-    tokStream.resetBuild()  
-  
-# Generates and sets (by reference) the next token in the stream,
-# via repeatedly calling progBuild() and progChar().
-# Returns a boolean indicating whether EOF has been reached.
-# NOTE: progBuild adds lstream's current char to the build token
-# NOTE: progChar progresses to lstream's next char
-proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
-  # Return prematurely if already closed
-  if tokStream.closed:
-    return false
-  while true:
-    var flushedTok: Option[nlTok]
-    let 
-      canProgress = tokStream.progBuild(flushedTok)
-      buildComplete = flushedTok.isSome
-    # canProgress & EOF reached => no more tokens to build :)
-    # NOTE: reachedEOF and not canProgress => more tokens unwrapping
-    if buildComplete:
-      # return the finished build token, and save it as the current token
-      tok = flushedTok.get()
-      tokStream.currTok = tok
-    if canProgress and not tokStream.progChar():
-      tokStream.closed = true
-      return buildComplete
-    elif buildComplete:
-      return true
--- a/src/noether/lexer/toktype.nim
+++ b/src/noether/lexer/toktype.nim
@ -1,54 +0,0 @@
-type
-  # nlTokType allows primitive nlToks to be typed,
-  # the nlTokType enum should never be directly
-  # accessed. Use the interface in this file instead.
-  nlTokType* = enum
-    NONE, # Placeholder Value
-    EOF,  # End of File
-    EOL,  # End of Line (\0 --> EOL)
-    WORD, # Alphanumeric token
-    SYMB, # Symbolic token
-    LNFD, # \r \n Line-Feed
-    WTSP, # ' ' \t Whitespace
-    LPAR, # ( Left Parenthesis
-    RPAR, # ) Right Parenthesis
-    LBRA, # { Left Brace
-    RBRA, # } Right Brace
-    LSQB, # [ Left Square Bracket
-    RSQB, # ] Right Square Bracket
-    # LANB, # < Left Angle Bracket
-    # RANB, # > Right Angle Bracket
-    SQUO, # ' Single Quotation Marking
-    DQUO, # " Double Quotation Marking
-    GRVA, # ` Grave Accent
-    HASH, # # Number Sign (Hashtag)
-        
-# Classifies a character to its nlTokType
-proc getTokType*(c: char): nlTokType =
-  case c:
-  of '\0', '\r', '\n':
-    result = nlTokType.EOL
-  of ' ', '\t':
-    result = nlTokType.WTSP
-  of '(':
-    result = nlTokType.LPAR
-  of ')':
-    result = nlTokType.RPAR
-  of '{':
-    result = nlTokType.LBRA
-  of '}':
-    result = nlTokType.RBRA
-  of '[':
-    result = nlTokType.LSQB
-  of ']':
-    result = nlTokType.RSQB
-  of '\'':
-    result = nlTokType.SQUO
-  of '\"':
-    result = nlTokType.DQUO
-  of '`':
-    result = nlTokType.GRVA
-  of '#':
-    result = nlTokType.HASH
-  else:
-    result = nlTokType.WORD
--- a/src/noether/lib/err.nim
+++ b/src/noether/lib/err.nim
@ -0,0 +1 @@
+proc echoErrorHeader(): =
--- a/src/noether/lib/io.nim
+++ b/src/noether/lib/io.nim
@ -0,0 +1,7 @@
+import std/streams
+
+proc streamFile*(filename: string): Stream {.inline.} =
+  result = newFileStream(filename, fmRead)
+  
+proc streamString*(str: string): Stream {.inline.} =
+  result = newStringStream(str)
--- a/src/noether/parser/err.nim
+++ b/src/noether/parser/err.nim
@ -0,0 +1,8 @@
+#[ Error codes and messaging directly associated with
+ | nlParser and its procedures is written here.
+ | General error functionality is in src/noether/lib/err.nim
+ ]#
+
+import parser
+
+
--- a/src/noether/parser/nodes.nim
+++ b/src/noether/parser/nodes.nim
@ -1,18 +1,44 @@
+import std/options
 from ../lexer/tok import nlTok
-# from ../lexer/tokstream import 

 type
-  # NOTE: by the end of parsing NO nodes should
-  # NOTE: have nlNodeType.NONE
-  nlNodeType* = enum
-    NONE, # Placeholder Value
-    TERM, # Indicates the tree has terminated
-    STRL, # String Literal
-    CHRL, # Character Literal
+  # NOTE: by the end of parsing NO nodes should have nkNone
+  nlNodeKind* = enum
+    nkNone, # Placeholder Value
+
+    nkStrLit, # String Literal
+    nkChrLit, # Character Literal
+
+  # NOTE: always check parent != nil when traversing the tree
  nlNode* {.acyclic.} = ref object of RootObj
-    nType*: nlNodeType
-    toks*: seq[nlTok] # nodes store the tokens that build them
-    # left, right: nlNode
+    nKind*: nlNodeKind
+    toks*: seq[nlTok] # nodes (may) store the tokens that build them
+    parent*: nlNode
+
+  # Purely abstract type that all nlNode objects
+  # with children are expected to inherit from.
+  nlBranchNode* {.acyclic.} = ref object of nlNode
+    child: UncheckedArray[nlNode]
+
+  nlBiNode* {.acyclic.} = ref object of nlBranchNode
+
+proc childCount*(node: nlNode): int {.inline.} = 0
+proc childCount*(node: nlBiNode): int {.inline.} = 2
+
+proc getChild*(node: nlNode, i: int): Option[nlNode] {.inline.} = 
+  result = none(nlNode)
+proc getChild*(node: nlBranchNode, i: int): Option[nlNode] {.inline.} = 
+  result = some(node.child[i])
+  
+proc newNode*(nKind: nlNodeKind): nlNode =
+  result = nlNode(
+    nKind: nKind,
+  )
+  
+proc newBiNode*(nKind: nlNodeKind): nlNode =
+  result = nlBiNode(
+    nKind: nKind,
+  ) 

 # Short-hand way of appending a token to a node's token sequence
 proc addTok*(node: nlNode, tok: nlTok) =
--- a/src/noether/parser/parse.nim
+++ b/src/noether/parser/parse.nim
@ -0,0 +1,58 @@
+import strutils
+include parser
+
+# NOTE: Matching between two tokens will fill `node` with everything
+# NOTE: between those two tokens EXCLUDING the two tokens themselves.
+proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+  result = greed(
+    parser,
+    satisfyMatch(matchType),
+  )
+proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
+  result = greedLine(
+    parser, 
+    satisfyMatch(matchType),
+  )
+
+proc parseStrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatchLine(tkDQUO)
+
+proc parseChrLit(parser: var nlParser): nlParseStat =
+  result = parser.parseMatchLine(tkSQUO)
+  
+proc parseStmt(parser: var nlParser): nlParseStat = 
+  while parser.progressStream():
+    echo "----- Current Token: ", parser.currTok
+    case parser.currTok.tKind
+    of tkDQUO:
+      # Attempt to parse string literal
+      if parser.parseStrLit() != nlParseStat.OK:
+        echo "Unmatched Double Quotation! Malformed String Literal"
+        echo parser.line
+        echo repeat(" ", parser.currTok.startPos), '^', '\n'
+      else:
+        echo "Parsed String Literal"
+        echo parser.bnode[], '\n'
+    of tkSQUO:
+      # Attempt to parse string literal
+      if parser.parseChrLit() != nlParseStat.OK:
+        echo "Unmatched Single Quotation! Malformed Character Literal"
+        echo parser.line
+        echo repeat(" ", parser.currTok.startPos), '^', '\n'
+      else:
+        echo "Parsed Character Literal"
+        echo parser.bnode[], '\n'
+    of tkEOL:
+      # TODO: handle this case, don't just discard
+      discard
+    else:
+      echo "blah blah unhandled case\n"
+  result = nlParseStat.OK
+      
+# Attempt to parse nlAST from nlTokStream
+proc parse*(tokStream: var nlTokStream): nlAST =
+  var parser = newParser(tokStream)
+  echo ' '
+  discard parser.parseStmt()
+
+  result = parser.ast
--- a/src/noether/parser/parser.nim
+++ b/src/noether/parser/parser.nim
@ -1,63 +1,90 @@
-import strutils
-include parseutil
+import nodes
+import ../lexer/lex

-# NOTE: Matching between two tokens will fill `node` with everything
-# NOTE: between those two tokens EXCLUDING the two tokens themselves.
-proc parseMatch(tokStream: var nlTokStream, 
-                node: var nlNode,
-                matchType: nlTokType): nlParseStat =
-  result = greed(
-    tokStream, 
-    node.toks, 
-    satisfyMatch(matchType),
-  )
-proc parseMatchLine(tokStream: var nlTokStream, 
-                   node: var nlNode,
-                   matchType: nlTokType): nlParseStat =
-  result = greed(
-    tokStream, 
-    node.toks, 
-    satisfyMatchEOL(matchType),
+type
+  # NOTE1: Values above MARKER_FAIL indicate a failed state
+  # NOTE2: nlParseStat is marked pure out of habit that's all
+  nlParseStat* {.pure.} = enum
+    OK,
+    MARKER_FAIL,
+    UNMATCHED,
+    TOOBIG,
+
+  nlAST* = object
+    root: nlNode
+
+  nlParser* = object
+    stream: nlTokStream
+    ast: nlAST
+    # the "build node" is a reference to the AST node
+    # the parser is currently modifying/building from
+    # NOTE: bnode changes frequently, it is NOT the root
+    bnode: nlNode
+    # flag indicating whether the parser is at
+    # the start of a new line (aka checking indentation)
+    inIndent: bool
+    
+
+proc `*`(stat: nlParseStat, b: bool): nlParseStat =
+  result = if b: stat else: nlParseStat.OK
+
+proc isFail*(stat: nlParseStat): bool = 
+  result = (stat >= nlParseStat.MARKER_FAIL)
+
+proc newParser*(tokStream: var nlTokStream): nlParser =
+  let rootNode = newNode(nkNone)
+  result = nlParser(
+    stream: tokStream,
+    ast: nlAST(
+      root: rootNode
+    ),
+    bnode: rootNode,
  )

-proc parseStrL(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
-  node = nlNode(
-    nType: nlNodeType.STRL
-  )
-  node.addTok(tokStream.currTok)
-  result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
+# Exposes a subset of the nlTokStream interface
+proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
+proc line(parser: var nlParser): string = parser.stream.line

-proc parseChrL(tokStream: var nlTokStream, node: var nlNode): bool =
-  node = nlNode(
-    nType: nlNodeType.CHRL
-  )
-  node.addTok(tokStream.currTok)
-  # TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
-  result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
+# Extends upon the functionality of nlTokStream.progress()
+proc progressStream*(parser: var nlParser): bool = 
+  result = parser.stream.progress()
+  if result and parser.currTok.tKind == tkEOL:
+    parser.inIndent = true
+  if 

-# Attempt to form an nlAST from a nlTokStream
-proc parse*(tokStream: var nlTokStream): nlNode = 
-  var tok: nlTok
-  var node: nlNode
-  while tokStream.nextTok(tok):
-    case tok.tType:
-    of nlTokType.DQUO:
-      # Attempt to parse string literal
-      if not parseStrL(tokStream, node):
-        echo "Unmatched Double Quotation! Malformed String Literal"
-        echo tokStream.currLine()
-        echo repeat(" ", tok.startPos), '^'
-      else:
-        echo "Parsed String Literal"
-        echo node[]
-    of nlTokType.SQUO:
-      # Attempt to parse string literal
-      if not parseChrL(tokStream, node):
-        echo "Unmatched Single Quotation! Malformed Character Literal"
-        echo tokStream.currLine()
-        echo repeat(" ", tok.startPos), '^'
-      else:
-        echo "Parsed String Literal"
-        echo node[]
-    else:
-      echo "blah blah unhandled case"
+proc setNewLine()
+  
+#[ "Greed" refers to something I mentioned in my discussion on
+ |  Noether's grammar (in an EBNF-like language). Greed just
+ |  means "everything until a condition is satisified".
+ |  That condition should be supplied by a Nim procedural type.
+ ]#
+  
+# Greed will consume anything until a condition is satisfied
+# Returns false if the greed was never satisfied (OMG!!)
+proc greed(parser: var nlParser,
+           satisfy: proc(tok: nlTok): bool): nlParseStat =
+  while parser.progressStream():
+    if satisfy(parser.currTok):
+      return nlParseStat.OK
+    # NOTE: the matched token is currently excluded
+    parser.bnode.addTok(parser.currTok)
+  result = nlParseStat.UNMATCHED
+
+proc greedLine(parser: var nlParser,
+               satisfy: proc(tok: nlTok): bool): nlParseStat =
+  while parser.progressStream():
+    if satisfy(parser.currTok):
+      return nlParseStat.OK
+    # NOTE: the matched token is currently excluded
+    parser.bnode.addTok(parser.currTok)
+    if parser.currTok.tKind == tkEOL:
+      return nlParseStat.UNMATCHED
+  result = nlParseStat.UNMATCHED
+
+#[ Templates for generating greed satisfying conditions.
+ ]#
+
+# Satisfied if it finds nlTok of type matchType
+template satisfyMatch(matchType: nlTokKind): untyped  = 
+  (proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))
--- a/src/noether/parser/parseutil.nim
+++ b/src/noether/parser/parseutil.nim
@ -1,58 +0,0 @@
-import nodes
-import ../lexer/tokstream
-
-type
-  # NOTE: Values above __FAIL__ indicate a failed state
-  nlParseStat* = enum
-    OK,
-    __FAIL__,
-    MIDAS, # Greedy search was never satisfied
-    UNMATCHED,
-    TOOBIG,
-
-proc `*`(stat: nlParseStat, b: bool): nlParseStat =
-  result = if b: stat else: nlParseStat.OK
-
-proc isFail*(stat: nlParseStat): bool = 
-  result = (stat >= nlParseStat.__FAIL__)
-
-
-#[ "Greed" refers to something I mentioned in my discussion on
- |  Noether's grammar (in an EBNF-like language). Greed just
- |  means "everything until a condition is satisified".
- |  That condition should be supplied by a Nim procedural type.
- ]#
-  
-# Greed will consume anything until a condition is satisfied
-# Returns false if the greed was never satisfied (OMG!!)
-proc greed(tokStream: var nlTokStream, 
-           toks: var seq[nlTok], 
-           satisfy: proc(tok: nlTok): bool,
-           ): nlParseStat =
-  var tok: nlTok
-  while tokStream.nextTok(tok):
-    toks.add(tok)
-    if satisfy(tok):
-      return nlParseStat.OK
-  result = nlParseStat.UNMATCHED
-
-proc greedLine(tokStream: var nlTokStream, 
-               toks: var seq[nlTok], 
-               satisfy: proc(tok: nlTok): bool): nlParseStat =
-  var tok: nlTok
-  while tokStream.nextTok(tok):
-    toks.add(tok)
-    if satisfy(tok):
-      return true
-  result = 
-
-#[ Templates for generating greed satisfying conditions.
- ]#
-
-# Satisfied if it finds nlTok of type matchType
-template satisfyMatch(matchType: nlTokType) = 
-  proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType)
-  
-# Satisfied if it finds nlTok of type matchType or EOL reached
-template satisfyMatchEOL(matchType: nlTokType) = 
-  proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType or tok.tType == nlTokType.EOL)
Author	SHA1	Message	Date
Emile Clark-Boman	bab593a86b	Typo fix + start of error handling	2025-07-06 21:42:09 +10:00
Emile Clark-Boman	1181ea9743	Restructure attempt #087 :(	2025-06-19 12:51:03 +10:00
Emile Clark-Boman	f25e66e9ef	Garbage collection so to speak	2025-06-19 09:41:15 +10:00
Emile Clark-Boman	d7fb1f0c89	Migrate nlTokBuilder + nlTokStream -> nlLexer	2025-06-19 09:38:08 +10:00
Emile Clark-Boman	07a9bda9ba	Once again fixed EOL handling...	2025-06-19 09:11:49 +10:00
Emile Clark-Boman	99db57dcfd	YALR (Yet Another Lexer Refactor)	2025-06-19 08:48:31 +10:00
Emile Clark-Boman	72a6075123	nlParser now exposes a subset of the nlTokStream interface	2025-06-19 03:26:22 +10:00
Emile Clark-Boman	8e6c0bbbfc	Fixed StrLit + ChrLit matching beyond EOL, also greed excludes satisfier	2025-06-19 02:33:13 +10:00
Emile Clark-Boman	f8f90fe92d	Added ultra simple build script for debug	2025-06-19 02:25:52 +10:00
Emile Clark-Boman	4a8f44d23f	Fixed parseStmt called on uninitialized nlParser.ast Also parseStmt now discards nlTokKind.tkEOL (this shouldn't be left in long term, just a temporary solution)	2025-06-19 02:24:04 +10:00
Emile Clark-Boman	2af3000c2e	eeeeekkk typo :(	2025-06-19 02:11:52 +10:00
Emile Clark-Boman	f8697bd662	1 gazillion changes (mostly documenting my insanity optimizing + naming)	2025-06-19 02:09:43 +10:00