Begun parser design + typo fixes + improved lexer modularity
This commit is contained in:
parent
90ca138904
commit
a258802945
6 changed files with 103 additions and 31 deletions
12
src/nlx.nim
12
src/nlx.nim
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import noether/lexer/tok
|
||||
import noether/lexer/tokstream
|
||||
import noether/parser/parser
|
||||
|
||||
when isMainModule:
|
||||
echo "Noether Lang Extras v0.1.0 - nlx"
|
||||
|
|
@ -9,10 +10,13 @@ when isMainModule:
|
|||
let filename = paramStr(1)
|
||||
var tokStream = newTokStream(filename, isFile=true)
|
||||
|
||||
# DumpTok
|
||||
var tok: nlTok
|
||||
while tokStream.nextTok(tok):
|
||||
echo tok
|
||||
# # DumpTok
|
||||
# var tok: nlTok
|
||||
# while tokStream.nextTok(tok):
|
||||
# echo tok
|
||||
|
||||
# DumpTree
|
||||
discard parse(tokStream)
|
||||
|
||||
else:
|
||||
echo "usage: nlx filename"
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import std/streams
|
|||
import std/options
|
||||
|
||||
import tok
|
||||
export tok
|
||||
|
||||
type
|
||||
# Character streaming for the nlTokStream
|
||||
|
|
|
|||
|
|
@ -3,13 +3,14 @@ include lstream
|
|||
type
|
||||
# Provides a stream-like interface for lexing nlToks
|
||||
# Internally reliant on the functionality of nlLStream
|
||||
nlTokStream = object
|
||||
nlTokStream* = object
|
||||
lstream: nlLStream
|
||||
build: nlTok # the build token
|
||||
currTok*: nlTok # the current token
|
||||
closed: bool # EOF + all tokens built
|
||||
|
||||
# Generates an EOL token for the nlTokStream's state
|
||||
proc EOLTok*(tokStream: nlTokStream): nlTok =
|
||||
proc EOLTok(tokStream: nlTokStream): nlTok =
|
||||
result = nlTok(
|
||||
tType: nlTokType.EOL,
|
||||
lit: "\0",
|
||||
|
|
|
|||
|
|
@ -11,6 +11,10 @@ proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
|
|||
result.resetBuild()
|
||||
discard result.lstream.progLine()
|
||||
|
||||
# Defines a short-hand notation for getting the current line
|
||||
proc currLine*(tokStream: nlTokStream): string =
|
||||
result = tokStream.lstream.line
|
||||
|
||||
# Reimplements nlLStream.progress() for nlTokStream
|
||||
# to account for additional structure (ie the build token)
|
||||
proc progChar(tokStream: var nlTokStream): bool =
|
||||
|
|
@ -32,16 +36,18 @@ proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
|
|||
if tokStream.closed:
|
||||
return false
|
||||
while true:
|
||||
var buildTok: Option[nlTok]
|
||||
var flushedTok: Option[nlTok]
|
||||
let
|
||||
canProgress = tokStream.progBuild(buildTok)
|
||||
tokBuilt = buildTok.isSome
|
||||
canProgress = tokStream.progBuild(flushedTok)
|
||||
buildComplete = flushedTok.isSome
|
||||
# canProgress & EOF reached => no more tokens to build :)
|
||||
# NOTE: reachedEOF and not canProgress => more tokens unwrapping
|
||||
if tokBuilt:
|
||||
tok = buildTok.get()
|
||||
if buildComplete:
|
||||
# return the finished build token, and save it as the current token
|
||||
tok = flushedTok.get()
|
||||
tokStream.currTok = tok
|
||||
if canProgress and not tokStream.progChar():
|
||||
tokStream.closed = true
|
||||
return tokBuilt
|
||||
elif tokBuilt:
|
||||
return buildComplete
|
||||
elif buildComplete:
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -1,18 +1,19 @@
|
|||
from ../lexer/tok import nlTok
|
||||
from ../lexer/tokstraem import
|
||||
# from ../lexer/tokstream import
|
||||
|
||||
type
|
||||
# NOTE: by the end of parsing NO nodes should
|
||||
# NOTE: have nlNodeType.NONE
|
||||
nlNodeType = enum
|
||||
nlNodeType* = enum
|
||||
NONE, # Placeholder Value
|
||||
TERM, # Indicates the tree has terminated
|
||||
STRL, # String Literal
|
||||
CHRL, # Character Literal
|
||||
nlNode {.acyclic.} = ref object of RootObj
|
||||
nType: nlNodeType
|
||||
toks: seq[nlTok] # nodes store the tokens that build them
|
||||
left, right: nlNode
|
||||
|
||||
proc parse()
|
||||
nlNode* {.acyclic.} = ref object of RootObj
|
||||
nType*: nlNodeType
|
||||
toks*: seq[nlTok] # nodes store the tokens that build them
|
||||
# left, right: nlNode
|
||||
|
||||
# Short-hand way of appending a token to a node's token sequence
|
||||
proc addTok*(node: nlNode, tok: nlTok) =
|
||||
node.toks.add(tok)
|
||||
|
|
|
|||
|
|
@ -1,20 +1,79 @@
|
|||
import strutils
|
||||
|
||||
import nodes
|
||||
import ../lexer/tokstream
|
||||
|
||||
type
|
||||
nlParseStat = enum
|
||||
OK,
|
||||
UNMATCHED,
|
||||
TOOBIG,
|
||||
|
||||
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
||||
result = if b: stat else: nlParseStat.OK
|
||||
|
||||
# Greed will consume anything except a punishment
|
||||
proc greed(tokStream: nlTokStream, toks: var seq[nlTok], punish: str) =
|
||||
# Returns a boolean indicating if it succeeded
|
||||
proc greed(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: proc(tok: nlTok): bool): bool =
|
||||
var tok: nlTok
|
||||
while tokStream.nextTok(tok):
|
||||
toks.add(tok)
|
||||
if satisfy(tok):
|
||||
return true
|
||||
result = false
|
||||
|
||||
proc greedEOL(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: nlTokType): bool =
|
||||
var tok: nlTok
|
||||
while tokStream.nextTok(tok):
|
||||
toks.add(tok)
|
||||
if tok.tType == satisfy or tok.tType == nlTokType.EOL:
|
||||
return true
|
||||
result = false
|
||||
|
||||
proc satisfyTypeOrEOL(tokType: nlTokType, tok: nlTok): bool =
|
||||
|
||||
|
||||
proc parse_strl(tokStream: nlTokStream): nlNode =
|
||||
proc prsMatchEOL(tokStream: var nlTokStream, toks: var seq[nlTok]): nlParseStat =
|
||||
|
||||
|
||||
proc parse_strl(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
|
||||
node = nlNode(
|
||||
nType: nlNodeType.STRL
|
||||
)
|
||||
node.addTok(tokStream.currTok)
|
||||
result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
|
||||
|
||||
proc parse_chrl(tokStream: var nlTokStream, node: var nlNode): bool =
|
||||
node = nlNode(
|
||||
nType: nlNodeType.CHRL
|
||||
)
|
||||
node.addTok(tokStream.currTok)
|
||||
# TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
|
||||
result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
|
||||
|
||||
# Attempt to form an nlAST from a nlTokStream
|
||||
proc parse(tokStream: nlTokStream): nlNode =
|
||||
proc parse*(tokStream: var nlTokStream): nlNode =
|
||||
var tok: nlTok
|
||||
while true:
|
||||
case tok.tokType:
|
||||
var node: nlNode
|
||||
while tokStream.nextTok(tok):
|
||||
case tok.tType:
|
||||
of nlTokType.DQUO:
|
||||
# Attempt to parse string literal
|
||||
parse_strl()
|
||||
|
||||
if not tokStream.nextTok(tok):
|
||||
break
|
||||
if not parse_strl(tokStream, node):
|
||||
echo "Unmatched Double Quotation! Malformed String Literal"
|
||||
echo tokStream.currLine()
|
||||
echo repeat(" ", tok.startPos), '^'
|
||||
else:
|
||||
echo "Parsed String Literal"
|
||||
echo node[]
|
||||
of nlTokType.SQUO:
|
||||
# Attempt to parse string literal
|
||||
if not parse_chrl(tokStream, node):
|
||||
echo "Unmatched Single Quotation! Malformed Character Literal"
|
||||
echo tokStream.currLine()
|
||||
echo repeat(" ", tok.startPos), '^'
|
||||
else:
|
||||
echo "Parsed String Literal"
|
||||
echo node[]
|
||||
else:
|
||||
echo "blah blah unhandled case"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue