diff --git a/src/nlx.nim b/src/nlx.nim index 2e590ac..4b0c678 100644 --- a/src/nlx.nim +++ b/src/nlx.nim @@ -1,6 +1,7 @@ import os import noether/lexer/tok import noether/lexer/tokstream +import noether/parser/parser when isMainModule: echo "Noether Lang Extras v0.1.0 - nlx" @@ -9,10 +10,13 @@ when isMainModule: let filename = paramStr(1) var tokStream = newTokStream(filename, isFile=true) - # DumpTok - var tok: nlTok - while tokStream.nextTok(tok): - echo tok + # # DumpTok + # var tok: nlTok + # while tokStream.nextTok(tok): + # echo tok + + # DumpTree + discard parse(tokStream) else: echo "usage: nlx filename" diff --git a/src/noether/lexer/lstream.nim b/src/noether/lexer/lstream.nim index b743c6d..034f48b 100644 --- a/src/noether/lexer/lstream.nim +++ b/src/noether/lexer/lstream.nim @@ -2,6 +2,7 @@ import std/streams import std/options import tok +export tok type # Character streaming for the nlTokStream diff --git a/src/noether/lexer/tokbuilding.nim b/src/noether/lexer/tokbuilding.nim index 043ac71..99022ee 100644 --- a/src/noether/lexer/tokbuilding.nim +++ b/src/noether/lexer/tokbuilding.nim @@ -3,13 +3,14 @@ include lstream type # Provides a stream-like interface for lexing nlToks # Internally reliant on the functionality of nlLStream - nlTokStream = object + nlTokStream* = object lstream: nlLStream - build: nlTok # the build token + build: nlTok # the build token + currTok*: nlTok # the current token closed: bool # EOF + all tokens built # Generates an EOL token for the nlTokStream's state -proc EOLTok*(tokStream: nlTokStream): nlTok = +proc EOLTok(tokStream: nlTokStream): nlTok = result = nlTok( tType: nlTokType.EOL, lit: "\0", diff --git a/src/noether/lexer/tokstream.nim b/src/noether/lexer/tokstream.nim index 98f92b7..c3fb3f2 100644 --- a/src/noether/lexer/tokstream.nim +++ b/src/noether/lexer/tokstream.nim @@ -11,6 +11,10 @@ proc newTokStream*(content: string, isFile: bool = false): nlTokStream = result.resetBuild() discard result.lstream.progLine() +# Defines a short-hand notation for getting the current line +proc currLine*(tokStream: nlTokStream): string = + result = tokStream.lstream.line + # Reimplements nlLStream.progress() for nlTokStream # to account for additional structure (ie the build token) proc progChar(tokStream: var nlTokStream): bool = @@ -32,16 +36,18 @@ proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool = if tokStream.closed: return false while true: - var buildTok: Option[nlTok] + var flushedTok: Option[nlTok] let - canProgress = tokStream.progBuild(buildTok) - tokBuilt = buildTok.isSome + canProgress = tokStream.progBuild(flushedTok) + buildComplete = flushedTok.isSome # canProgress & EOF reached => no more tokens to build :) # NOTE: reachedEOF and not canProgress => more tokens unwrapping - if tokBuilt: - tok = buildTok.get() + if buildComplete: + # return the finished build token, and save it as the current token + tok = flushedTok.get() + tokStream.currTok = tok if canProgress and not tokStream.progChar(): tokStream.closed = true - return tokBuilt - elif tokBuilt: + return buildComplete + elif buildComplete: return true diff --git a/src/noether/parser/nodes.nim b/src/noether/parser/nodes.nim index fdb78e1..23cf742 100644 --- a/src/noether/parser/nodes.nim +++ b/src/noether/parser/nodes.nim @@ -1,18 +1,19 @@ from ../lexer/tok import nlTok -from ../lexer/tokstraem import +# from ../lexer/tokstream import type # NOTE: by the end of parsing NO nodes should # NOTE: have nlNodeType.NONE - nlNodeType = enum + nlNodeType* = enum NONE, # Placeholder Value TERM, # Indicates the tree has terminated STRL, # String Literal CHRL, # Character Literal - nlNode {.acyclic.} = ref object of RootObj - nType: nlNodeType - toks: seq[nlTok] # nodes store the tokens that build them - left, right: nlNode + nlNode* {.acyclic.} = ref object of RootObj + nType*: nlNodeType + toks*: seq[nlTok] # nodes store the tokens that build them + # left, right: nlNode -proc parse() - +# Short-hand way of appending a token to a node's token sequence +proc addTok*(node: nlNode, tok: nlTok) = + node.toks.add(tok) diff --git a/src/noether/parser/parser.nim b/src/noether/parser/parser.nim index b1937c9..f83861d 100644 --- a/src/noether/parser/parser.nim +++ b/src/noether/parser/parser.nim @@ -1,20 +1,79 @@ +import strutils + +import nodes import ../lexer/tokstream +type + nlParseStat = enum + OK, + UNMATCHED, + TOOBIG, + +proc `*`(stat: nlParseStat, b: bool): nlParseStat = + result = if b: stat else: nlParseStat.OK + # Greed will consume anything except a punishment -proc greed(tokStream: nlTokStream, toks: var seq[nlTok], punish: str) = +# Returns a boolean indicating if it succeeded +proc greed(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: proc(tok: nlTok): bool): bool = + var tok: nlTok + while tokStream.nextTok(tok): + toks.add(tok) + if satisfy(tok): + return true + result = false + +proc greedEOL(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: nlTokType): bool = + var tok: nlTok + while tokStream.nextTok(tok): + toks.add(tok) + if tok.tType == satisfy or tok.tType == nlTokType.EOL: + return true + result = false + +proc satisfyTypeOrEOL(tokType: nlTokType, tok: nlTok): bool = -proc parse_strl(tokStream: nlTokStream): nlNode = +proc prsMatchEOL(tokStream: var nlTokStream, toks: var seq[nlTok]): nlParseStat = + +proc parse_strl(tokStream: var nlTokStream, node: var nlNode): nlParseStat = + node = nlNode( + nType: nlNodeType.STRL + ) + node.addTok(tokStream.currTok) + result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO) + +proc parse_chrl(tokStream: var nlTokStream, node: var nlNode): bool = + node = nlNode( + nType: nlNodeType.CHRL + ) + node.addTok(tokStream.currTok) + # TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed + result = greedEOL(tokStream, node.toks, nlTokType.SQUO) # Attempt to form an nlAST from a nlTokStream -proc parse(tokStream: nlTokStream): nlNode = +proc parse*(tokStream: var nlTokStream): nlNode = var tok: nlTok - while true: - case tok.tokType: + var node: nlNode + while tokStream.nextTok(tok): + case tok.tType: of nlTokType.DQUO: # Attempt to parse string literal - parse_strl() - - if not tokStream.nextTok(tok): - break + if not parse_strl(tokStream, node): + echo "Unmatched Double Quotation! Malformed String Literal" + echo tokStream.currLine() + echo repeat(" ", tok.startPos), '^' + else: + echo "Parsed String Literal" + echo node[] + of nlTokType.SQUO: + # Attempt to parse string literal + if not parse_chrl(tokStream, node): + echo "Unmatched Single Quotation! Malformed Character Literal" + echo tokStream.currLine() + echo repeat(" ", tok.startPos), '^' + else: + echo "Parsed String Literal" + echo node[] + else: + echo "blah blah unhandled case"