Begun parser design + typo fixes + improved lexer modularity

This commit is contained in:
Emile Clark-Boman 2025-06-18 13:35:07 +10:00
parent 90ca138904
commit a258802945
6 changed files with 103 additions and 31 deletions

View file

@ -1,6 +1,7 @@
import os
import noether/lexer/tok
import noether/lexer/tokstream
import noether/parser/parser
when isMainModule:
echo "Noether Lang Extras v0.1.0 - nlx"
@ -9,10 +10,13 @@ when isMainModule:
let filename = paramStr(1)
var tokStream = newTokStream(filename, isFile=true)
# DumpTok
var tok: nlTok
while tokStream.nextTok(tok):
echo tok
# # DumpTok
# var tok: nlTok
# while tokStream.nextTok(tok):
# echo tok
# DumpTree
discard parse(tokStream)
else:
echo "usage: nlx filename"

View file

@ -2,6 +2,7 @@ import std/streams
import std/options
import tok
export tok
type
# Character streaming for the nlTokStream

View file

@ -3,13 +3,14 @@ include lstream
type
# Provides a stream-like interface for lexing nlToks
# Internally reliant on the functionality of nlLStream
nlTokStream = object
nlTokStream* = object
lstream: nlLStream
build: nlTok # the build token
currTok*: nlTok # the current token
closed: bool # EOF + all tokens built
# Generates an EOL token for the nlTokStream's state
proc EOLTok*(tokStream: nlTokStream): nlTok =
proc EOLTok(tokStream: nlTokStream): nlTok =
result = nlTok(
tType: nlTokType.EOL,
lit: "\0",

View file

@ -11,6 +11,10 @@ proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
result.resetBuild()
discard result.lstream.progLine()
# Defines a short-hand notation for getting the current line
proc currLine*(tokStream: nlTokStream): string =
result = tokStream.lstream.line
# Reimplements nlLStream.progress() for nlTokStream
# to account for additional structure (ie the build token)
proc progChar(tokStream: var nlTokStream): bool =
@ -32,16 +36,18 @@ proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
if tokStream.closed:
return false
while true:
var buildTok: Option[nlTok]
var flushedTok: Option[nlTok]
let
canProgress = tokStream.progBuild(buildTok)
tokBuilt = buildTok.isSome
canProgress = tokStream.progBuild(flushedTok)
buildComplete = flushedTok.isSome
# canProgress & EOF reached => no more tokens to build :)
# NOTE: reachedEOF and not canProgress => more tokens unwrapping
if tokBuilt:
tok = buildTok.get()
if buildComplete:
# return the finished build token, and save it as the current token
tok = flushedTok.get()
tokStream.currTok = tok
if canProgress and not tokStream.progChar():
tokStream.closed = true
return tokBuilt
elif tokBuilt:
return buildComplete
elif buildComplete:
return true

View file

@ -1,18 +1,19 @@
from ../lexer/tok import nlTok
from ../lexer/tokstraem import
# from ../lexer/tokstream import
type
# NOTE: by the end of parsing NO nodes should
# NOTE: have nlNodeType.NONE
nlNodeType = enum
nlNodeType* = enum
NONE, # Placeholder Value
TERM, # Indicates the tree has terminated
STRL, # String Literal
CHRL, # Character Literal
nlNode {.acyclic.} = ref object of RootObj
nType: nlNodeType
toks: seq[nlTok] # nodes store the tokens that build them
left, right: nlNode
proc parse()
nlNode* {.acyclic.} = ref object of RootObj
nType*: nlNodeType
toks*: seq[nlTok] # nodes store the tokens that build them
# left, right: nlNode
# Short-hand way of appending a token to a node's token sequence
proc addTok*(node: nlNode, tok: nlTok) =
node.toks.add(tok)

View file

@ -1,20 +1,79 @@
import strutils
import nodes
import ../lexer/tokstream
type
nlParseStat = enum
OK,
UNMATCHED,
TOOBIG,
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
result = if b: stat else: nlParseStat.OK
# Greed will consume anything except a punishment
proc greed(tokStream: nlTokStream, toks: var seq[nlTok], punish: str) =
# Returns a boolean indicating if it succeeded
proc greed(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: proc(tok: nlTok): bool): bool =
var tok: nlTok
while tokStream.nextTok(tok):
toks.add(tok)
if satisfy(tok):
return true
result = false
proc greedEOL(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: nlTokType): bool =
var tok: nlTok
while tokStream.nextTok(tok):
toks.add(tok)
if tok.tType == satisfy or tok.tType == nlTokType.EOL:
return true
result = false
proc satisfyTypeOrEOL(tokType: nlTokType, tok: nlTok): bool =
proc parse_strl(tokStream: nlTokStream): nlNode =
proc prsMatchEOL(tokStream: var nlTokStream, toks: var seq[nlTok]): nlParseStat =
proc parse_strl(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
node = nlNode(
nType: nlNodeType.STRL
)
node.addTok(tokStream.currTok)
result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
proc parse_chrl(tokStream: var nlTokStream, node: var nlNode): bool =
node = nlNode(
nType: nlNodeType.CHRL
)
node.addTok(tokStream.currTok)
# TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
# Attempt to form an nlAST from a nlTokStream
proc parse(tokStream: nlTokStream): nlNode =
proc parse*(tokStream: var nlTokStream): nlNode =
var tok: nlTok
while true:
case tok.tokType:
var node: nlNode
while tokStream.nextTok(tok):
case tok.tType:
of nlTokType.DQUO:
# Attempt to parse string literal
parse_strl()
if not tokStream.nextTok(tok):
break
if not parse_strl(tokStream, node):
echo "Unmatched Double Quotation! Malformed String Literal"
echo tokStream.currLine()
echo repeat(" ", tok.startPos), '^'
else:
echo "Parsed String Literal"
echo node[]
of nlTokType.SQUO:
# Attempt to parse string literal
if not parse_chrl(tokStream, node):
echo "Unmatched Single Quotation! Malformed Character Literal"
echo tokStream.currLine()
echo repeat(" ", tok.startPos), '^'
else:
echo "Parsed String Literal"
echo node[]
else:
echo "blah blah unhandled case"