Begun parser design + typo fixes + improved lexer modularity
This commit is contained in:
parent
90ca138904
commit
a258802945
6 changed files with 103 additions and 31 deletions
12
src/nlx.nim
12
src/nlx.nim
|
|
@ -1,6 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import noether/lexer/tok
|
import noether/lexer/tok
|
||||||
import noether/lexer/tokstream
|
import noether/lexer/tokstream
|
||||||
|
import noether/parser/parser
|
||||||
|
|
||||||
when isMainModule:
|
when isMainModule:
|
||||||
echo "Noether Lang Extras v0.1.0 - nlx"
|
echo "Noether Lang Extras v0.1.0 - nlx"
|
||||||
|
|
@ -9,10 +10,13 @@ when isMainModule:
|
||||||
let filename = paramStr(1)
|
let filename = paramStr(1)
|
||||||
var tokStream = newTokStream(filename, isFile=true)
|
var tokStream = newTokStream(filename, isFile=true)
|
||||||
|
|
||||||
# DumpTok
|
# # DumpTok
|
||||||
var tok: nlTok
|
# var tok: nlTok
|
||||||
while tokStream.nextTok(tok):
|
# while tokStream.nextTok(tok):
|
||||||
echo tok
|
# echo tok
|
||||||
|
|
||||||
|
# DumpTree
|
||||||
|
discard parse(tokStream)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
echo "usage: nlx filename"
|
echo "usage: nlx filename"
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import std/streams
|
||||||
import std/options
|
import std/options
|
||||||
|
|
||||||
import tok
|
import tok
|
||||||
|
export tok
|
||||||
|
|
||||||
type
|
type
|
||||||
# Character streaming for the nlTokStream
|
# Character streaming for the nlTokStream
|
||||||
|
|
|
||||||
|
|
@ -3,13 +3,14 @@ include lstream
|
||||||
type
|
type
|
||||||
# Provides a stream-like interface for lexing nlToks
|
# Provides a stream-like interface for lexing nlToks
|
||||||
# Internally reliant on the functionality of nlLStream
|
# Internally reliant on the functionality of nlLStream
|
||||||
nlTokStream = object
|
nlTokStream* = object
|
||||||
lstream: nlLStream
|
lstream: nlLStream
|
||||||
build: nlTok # the build token
|
build: nlTok # the build token
|
||||||
|
currTok*: nlTok # the current token
|
||||||
closed: bool # EOF + all tokens built
|
closed: bool # EOF + all tokens built
|
||||||
|
|
||||||
# Generates an EOL token for the nlTokStream's state
|
# Generates an EOL token for the nlTokStream's state
|
||||||
proc EOLTok*(tokStream: nlTokStream): nlTok =
|
proc EOLTok(tokStream: nlTokStream): nlTok =
|
||||||
result = nlTok(
|
result = nlTok(
|
||||||
tType: nlTokType.EOL,
|
tType: nlTokType.EOL,
|
||||||
lit: "\0",
|
lit: "\0",
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,10 @@ proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
|
||||||
result.resetBuild()
|
result.resetBuild()
|
||||||
discard result.lstream.progLine()
|
discard result.lstream.progLine()
|
||||||
|
|
||||||
|
# Defines a short-hand notation for getting the current line
|
||||||
|
proc currLine*(tokStream: nlTokStream): string =
|
||||||
|
result = tokStream.lstream.line
|
||||||
|
|
||||||
# Reimplements nlLStream.progress() for nlTokStream
|
# Reimplements nlLStream.progress() for nlTokStream
|
||||||
# to account for additional structure (ie the build token)
|
# to account for additional structure (ie the build token)
|
||||||
proc progChar(tokStream: var nlTokStream): bool =
|
proc progChar(tokStream: var nlTokStream): bool =
|
||||||
|
|
@ -32,16 +36,18 @@ proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
|
||||||
if tokStream.closed:
|
if tokStream.closed:
|
||||||
return false
|
return false
|
||||||
while true:
|
while true:
|
||||||
var buildTok: Option[nlTok]
|
var flushedTok: Option[nlTok]
|
||||||
let
|
let
|
||||||
canProgress = tokStream.progBuild(buildTok)
|
canProgress = tokStream.progBuild(flushedTok)
|
||||||
tokBuilt = buildTok.isSome
|
buildComplete = flushedTok.isSome
|
||||||
# canProgress & EOF reached => no more tokens to build :)
|
# canProgress & EOF reached => no more tokens to build :)
|
||||||
# NOTE: reachedEOF and not canProgress => more tokens unwrapping
|
# NOTE: reachedEOF and not canProgress => more tokens unwrapping
|
||||||
if tokBuilt:
|
if buildComplete:
|
||||||
tok = buildTok.get()
|
# return the finished build token, and save it as the current token
|
||||||
|
tok = flushedTok.get()
|
||||||
|
tokStream.currTok = tok
|
||||||
if canProgress and not tokStream.progChar():
|
if canProgress and not tokStream.progChar():
|
||||||
tokStream.closed = true
|
tokStream.closed = true
|
||||||
return tokBuilt
|
return buildComplete
|
||||||
elif tokBuilt:
|
elif buildComplete:
|
||||||
return true
|
return true
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,19 @@
|
||||||
from ../lexer/tok import nlTok
|
from ../lexer/tok import nlTok
|
||||||
from ../lexer/tokstraem import
|
# from ../lexer/tokstream import
|
||||||
|
|
||||||
type
|
type
|
||||||
# NOTE: by the end of parsing NO nodes should
|
# NOTE: by the end of parsing NO nodes should
|
||||||
# NOTE: have nlNodeType.NONE
|
# NOTE: have nlNodeType.NONE
|
||||||
nlNodeType = enum
|
nlNodeType* = enum
|
||||||
NONE, # Placeholder Value
|
NONE, # Placeholder Value
|
||||||
TERM, # Indicates the tree has terminated
|
TERM, # Indicates the tree has terminated
|
||||||
STRL, # String Literal
|
STRL, # String Literal
|
||||||
CHRL, # Character Literal
|
CHRL, # Character Literal
|
||||||
nlNode {.acyclic.} = ref object of RootObj
|
nlNode* {.acyclic.} = ref object of RootObj
|
||||||
nType: nlNodeType
|
nType*: nlNodeType
|
||||||
toks: seq[nlTok] # nodes store the tokens that build them
|
toks*: seq[nlTok] # nodes store the tokens that build them
|
||||||
left, right: nlNode
|
# left, right: nlNode
|
||||||
|
|
||||||
proc parse()
|
# Short-hand way of appending a token to a node's token sequence
|
||||||
|
proc addTok*(node: nlNode, tok: nlTok) =
|
||||||
|
node.toks.add(tok)
|
||||||
|
|
|
||||||
|
|
@ -1,20 +1,79 @@
|
||||||
|
import strutils
|
||||||
|
|
||||||
|
import nodes
|
||||||
import ../lexer/tokstream
|
import ../lexer/tokstream
|
||||||
|
|
||||||
|
type
|
||||||
|
nlParseStat = enum
|
||||||
|
OK,
|
||||||
|
UNMATCHED,
|
||||||
|
TOOBIG,
|
||||||
|
|
||||||
|
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
||||||
|
result = if b: stat else: nlParseStat.OK
|
||||||
|
|
||||||
# Greed will consume anything except a punishment
|
# Greed will consume anything except a punishment
|
||||||
proc greed(tokStream: nlTokStream, toks: var seq[nlTok], punish: str) =
|
# Returns a boolean indicating if it succeeded
|
||||||
|
proc greed(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: proc(tok: nlTok): bool): bool =
|
||||||
|
var tok: nlTok
|
||||||
|
while tokStream.nextTok(tok):
|
||||||
|
toks.add(tok)
|
||||||
|
if satisfy(tok):
|
||||||
|
return true
|
||||||
|
result = false
|
||||||
|
|
||||||
|
proc greedEOL(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: nlTokType): bool =
|
||||||
|
var tok: nlTok
|
||||||
|
while tokStream.nextTok(tok):
|
||||||
|
toks.add(tok)
|
||||||
|
if tok.tType == satisfy or tok.tType == nlTokType.EOL:
|
||||||
|
return true
|
||||||
|
result = false
|
||||||
|
|
||||||
|
proc satisfyTypeOrEOL(tokType: nlTokType, tok: nlTok): bool =
|
||||||
|
|
||||||
|
|
||||||
proc parse_strl(tokStream: nlTokStream): nlNode =
|
proc prsMatchEOL(tokStream: var nlTokStream, toks: var seq[nlTok]): nlParseStat =
|
||||||
|
|
||||||
|
|
||||||
|
proc parse_strl(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
|
||||||
|
node = nlNode(
|
||||||
|
nType: nlNodeType.STRL
|
||||||
|
)
|
||||||
|
node.addTok(tokStream.currTok)
|
||||||
|
result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
|
||||||
|
|
||||||
|
proc parse_chrl(tokStream: var nlTokStream, node: var nlNode): bool =
|
||||||
|
node = nlNode(
|
||||||
|
nType: nlNodeType.CHRL
|
||||||
|
)
|
||||||
|
node.addTok(tokStream.currTok)
|
||||||
|
# TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
|
||||||
|
result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
|
||||||
|
|
||||||
# Attempt to form an nlAST from a nlTokStream
|
# Attempt to form an nlAST from a nlTokStream
|
||||||
proc parse(tokStream: nlTokStream): nlNode =
|
proc parse*(tokStream: var nlTokStream): nlNode =
|
||||||
var tok: nlTok
|
var tok: nlTok
|
||||||
while true:
|
var node: nlNode
|
||||||
case tok.tokType:
|
while tokStream.nextTok(tok):
|
||||||
|
case tok.tType:
|
||||||
of nlTokType.DQUO:
|
of nlTokType.DQUO:
|
||||||
# Attempt to parse string literal
|
# Attempt to parse string literal
|
||||||
parse_strl()
|
if not parse_strl(tokStream, node):
|
||||||
|
echo "Unmatched Double Quotation! Malformed String Literal"
|
||||||
if not tokStream.nextTok(tok):
|
echo tokStream.currLine()
|
||||||
break
|
echo repeat(" ", tok.startPos), '^'
|
||||||
|
else:
|
||||||
|
echo "Parsed String Literal"
|
||||||
|
echo node[]
|
||||||
|
of nlTokType.SQUO:
|
||||||
|
# Attempt to parse string literal
|
||||||
|
if not parse_chrl(tokStream, node):
|
||||||
|
echo "Unmatched Single Quotation! Malformed Character Literal"
|
||||||
|
echo tokStream.currLine()
|
||||||
|
echo repeat(" ", tok.startPos), '^'
|
||||||
|
else:
|
||||||
|
echo "Parsed String Literal"
|
||||||
|
echo node[]
|
||||||
|
else:
|
||||||
|
echo "blah blah unhandled case"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue