Begun parser design + typo fixes + improved lexer modularity

This commit is contained in:
Emile Clark-Boman 2025-06-18 13:35:07 +10:00
parent 90ca138904
commit a258802945
6 changed files with 103 additions and 31 deletions

View file

@ -1,6 +1,7 @@
import os import os
import noether/lexer/tok import noether/lexer/tok
import noether/lexer/tokstream import noether/lexer/tokstream
import noether/parser/parser
when isMainModule: when isMainModule:
echo "Noether Lang Extras v0.1.0 - nlx" echo "Noether Lang Extras v0.1.0 - nlx"
@ -9,10 +10,13 @@ when isMainModule:
let filename = paramStr(1) let filename = paramStr(1)
var tokStream = newTokStream(filename, isFile=true) var tokStream = newTokStream(filename, isFile=true)
# DumpTok # # DumpTok
var tok: nlTok # var tok: nlTok
while tokStream.nextTok(tok): # while tokStream.nextTok(tok):
echo tok # echo tok
# DumpTree
discard parse(tokStream)
else: else:
echo "usage: nlx filename" echo "usage: nlx filename"

View file

@ -2,6 +2,7 @@ import std/streams
import std/options import std/options
import tok import tok
export tok
type type
# Character streaming for the nlTokStream # Character streaming for the nlTokStream

View file

@ -3,13 +3,14 @@ include lstream
type type
# Provides a stream-like interface for lexing nlToks # Provides a stream-like interface for lexing nlToks
# Internally reliant on the functionality of nlLStream # Internally reliant on the functionality of nlLStream
nlTokStream = object nlTokStream* = object
lstream: nlLStream lstream: nlLStream
build: nlTok # the build token build: nlTok # the build token
currTok*: nlTok # the current token
closed: bool # EOF + all tokens built closed: bool # EOF + all tokens built
# Generates an EOL token for the nlTokStream's state # Generates an EOL token for the nlTokStream's state
proc EOLTok*(tokStream: nlTokStream): nlTok = proc EOLTok(tokStream: nlTokStream): nlTok =
result = nlTok( result = nlTok(
tType: nlTokType.EOL, tType: nlTokType.EOL,
lit: "\0", lit: "\0",

View file

@ -11,6 +11,10 @@ proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
result.resetBuild() result.resetBuild()
discard result.lstream.progLine() discard result.lstream.progLine()
# Defines a short-hand notation for getting the current line
proc currLine*(tokStream: nlTokStream): string =
result = tokStream.lstream.line
# Reimplements nlLStream.progress() for nlTokStream # Reimplements nlLStream.progress() for nlTokStream
# to account for additional structure (ie the build token) # to account for additional structure (ie the build token)
proc progChar(tokStream: var nlTokStream): bool = proc progChar(tokStream: var nlTokStream): bool =
@ -32,16 +36,18 @@ proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
if tokStream.closed: if tokStream.closed:
return false return false
while true: while true:
var buildTok: Option[nlTok] var flushedTok: Option[nlTok]
let let
canProgress = tokStream.progBuild(buildTok) canProgress = tokStream.progBuild(flushedTok)
tokBuilt = buildTok.isSome buildComplete = flushedTok.isSome
# canProgress & EOF reached => no more tokens to build :) # canProgress & EOF reached => no more tokens to build :)
# NOTE: reachedEOF and not canProgress => more tokens unwrapping # NOTE: reachedEOF and not canProgress => more tokens unwrapping
if tokBuilt: if buildComplete:
tok = buildTok.get() # return the finished build token, and save it as the current token
tok = flushedTok.get()
tokStream.currTok = tok
if canProgress and not tokStream.progChar(): if canProgress and not tokStream.progChar():
tokStream.closed = true tokStream.closed = true
return tokBuilt return buildComplete
elif tokBuilt: elif buildComplete:
return true return true

View file

@ -1,18 +1,19 @@
from ../lexer/tok import nlTok from ../lexer/tok import nlTok
from ../lexer/tokstraem import # from ../lexer/tokstream import
type type
# NOTE: by the end of parsing NO nodes should # NOTE: by the end of parsing NO nodes should
# NOTE: have nlNodeType.NONE # NOTE: have nlNodeType.NONE
nlNodeType = enum nlNodeType* = enum
NONE, # Placeholder Value NONE, # Placeholder Value
TERM, # Indicates the tree has terminated TERM, # Indicates the tree has terminated
STRL, # String Literal STRL, # String Literal
CHRL, # Character Literal CHRL, # Character Literal
nlNode {.acyclic.} = ref object of RootObj nlNode* {.acyclic.} = ref object of RootObj
nType: nlNodeType nType*: nlNodeType
toks: seq[nlTok] # nodes store the tokens that build them toks*: seq[nlTok] # nodes store the tokens that build them
left, right: nlNode # left, right: nlNode
proc parse() # Short-hand way of appending a token to a node's token sequence
proc addTok*(node: nlNode, tok: nlTok) =
node.toks.add(tok)

View file

@ -1,20 +1,79 @@
import strutils
import nodes
import ../lexer/tokstream import ../lexer/tokstream
type
nlParseStat = enum
OK,
UNMATCHED,
TOOBIG,
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
result = if b: stat else: nlParseStat.OK
# Greed will consume anything except a punishment # Greed will consume anything except a punishment
proc greed(tokStream: nlTokStream, toks: var seq[nlTok], punish: str) = # Returns a boolean indicating if it succeeded
proc greed(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: proc(tok: nlTok): bool): bool =
var tok: nlTok
while tokStream.nextTok(tok):
toks.add(tok)
if satisfy(tok):
return true
result = false
proc greedEOL(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: nlTokType): bool =
var tok: nlTok
while tokStream.nextTok(tok):
toks.add(tok)
if tok.tType == satisfy or tok.tType == nlTokType.EOL:
return true
result = false
proc satisfyTypeOrEOL(tokType: nlTokType, tok: nlTok): bool =
proc parse_strl(tokStream: nlTokStream): nlNode = proc prsMatchEOL(tokStream: var nlTokStream, toks: var seq[nlTok]): nlParseStat =
proc parse_strl(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
node = nlNode(
nType: nlNodeType.STRL
)
node.addTok(tokStream.currTok)
result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
proc parse_chrl(tokStream: var nlTokStream, node: var nlNode): bool =
node = nlNode(
nType: nlNodeType.CHRL
)
node.addTok(tokStream.currTok)
# TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
# Attempt to form an nlAST from a nlTokStream # Attempt to form an nlAST from a nlTokStream
proc parse(tokStream: nlTokStream): nlNode = proc parse*(tokStream: var nlTokStream): nlNode =
var tok: nlTok var tok: nlTok
while true: var node: nlNode
case tok.tokType: while tokStream.nextTok(tok):
case tok.tType:
of nlTokType.DQUO: of nlTokType.DQUO:
# Attempt to parse string literal # Attempt to parse string literal
parse_strl() if not parse_strl(tokStream, node):
echo "Unmatched Double Quotation! Malformed String Literal"
if not tokStream.nextTok(tok): echo tokStream.currLine()
break echo repeat(" ", tok.startPos), '^'
else:
echo "Parsed String Literal"
echo node[]
of nlTokType.SQUO:
# Attempt to parse string literal
if not parse_chrl(tokStream, node):
echo "Unmatched Single Quotation! Malformed Character Literal"
echo tokStream.currLine()
echo repeat(" ", tok.startPos), '^'
else:
echo "Parsed String Literal"
echo node[]
else:
echo "blah blah unhandled case"