diff --git a/noether.nimble b/noether.nimble index 3f4ffdc..e6c4a12 100644 --- a/noether.nimble +++ b/noether.nimble @@ -9,6 +9,5 @@ installExt = @["nim"] bin = @["noether", "nlx"] backend = "c" - # Dependencies requires "nim >= 2.2.0" diff --git a/src/nlx.nim b/src/nlx.nim index b79cd2e..1c6c446 100644 --- a/src/nlx.nim +++ b/src/nlx.nim @@ -1,5 +1,5 @@ import os -import noether/lex +import noether/lexer/tokstream when isMainModule: echo "Noether Lang Extras v0.1.0 - nlx" diff --git a/src/noether/lstream.nim b/src/noether/lexer/lstream.nim similarity index 98% rename from src/noether/lstream.nim rename to src/noether/lexer/lstream.nim index 5bc79f2..862eb7b 100644 --- a/src/noether/lstream.nim +++ b/src/noether/lexer/lstream.nim @@ -1,7 +1,7 @@ import std/streams import std/options -include tokens +include tok type # Character streaming for the nlTokStream diff --git a/src/noether/tokens.nim b/src/noether/lexer/tok.nim similarity index 86% rename from src/noether/tokens.nim rename to src/noether/lexer/tok.nim index 6e18151..3b2464b 100644 --- a/src/noether/tokens.nim +++ b/src/noether/lexer/tok.nim @@ -23,9 +23,10 @@ type SQUO, # ' Single Quotation Marking DQUO, # " Double Quotation Marking GRVA, # ` Grave Accent + HASH, # # Number Sign (Hashtag) nlTok = object - tokType*: nlTokType + tType*: nlTokType lit*: string line*: Natural startPos*: Natural @@ -33,23 +34,23 @@ type # Generates an "empty" nlTok with only a startPos, # all other fields are expected to be filled out later. -# NOTE: tokType initialised to nlTokType.NUL +# NOTE: tType initialised to nlTokType.NUL # NOTE: lit initialised to empty string # NOTE: all other fields are uninitialised proc emptyTok(startPos: int): nlTok = result = nlTok( - tokType: nlTokType.NONE, + tType: nlTokType.NONE, lit: "", startPos: Natural startPos, ) # Checks if an nlTok has nlTokType.NONE -proc isTokUntyped(tokType: nlTokType): bool = - result = (tokType == nlTokType.NONE) +proc isTokUntyped(tType: nlTokType): bool = + result = (tType == nlTokType.NONE) # Checks if an nlTok has nlTokType.TERM -proc isTokTerm(tokType: nlTokType): bool = - result = (tokType == nlTokType.TERM) +proc isTokTerm(tType: nlTokType): bool = + result = (tType == nlTokType.TERM) # This method is only used to convert null # terminator nlToks into line-feed ones. @@ -58,7 +59,7 @@ proc isTokTerm(tokType: nlTokType): bool = # NOTE: strings in a useful but annoying way proc tokTermToLineFeed(tok: nlTok): nlTok = result = nlTok( - tokType: nlTokType.LNFD, + tType: nlTokType.LNFD, lit: tok.lit, line: tok.line, startPos: tok.startPos, @@ -92,5 +93,7 @@ proc getTokType(c: char): nlTokType = result = nlTokType.DQUO of '`': result = nlTokType.GRVA + of '#': + result = nlTokType.HASH else: result = nlTokType.WORD diff --git a/src/noether/lex.nim b/src/noether/lexer/tokstream.nim similarity index 85% rename from src/noether/lex.nim rename to src/noether/lexer/tokstream.nim index 443d3fb..5ae2f65 100644 --- a/src/noether/lex.nim +++ b/src/noether/lexer/tokstream.nim @@ -1,7 +1,5 @@ include lstream -import os # TEMP import - type # Provides a stream-like interface for lexing nlToks # Internally reliant on the functionality of nlLStream @@ -10,7 +8,7 @@ type build: nlTok # the current token we're building # Resets the build token to an "empty" nlTok where -# only tokType, lit, and startPos are initialised. +# only tType, lit, and startPos are initialised. proc resetBuild(tokStream: var nlTokStream) = tokStream.build = emptyTok(tokStream.lstream.pos) @@ -20,7 +18,7 @@ proc resetBuild(tokStream: var nlTokStream) = proc finishBuild(tokStream: var nlTokStream) = # if we've reached \0 terminator then forge the start # and end positions to point OUTSIDE the line - let endPos = if isTokTerm(tokStream.build.tokType): + let endPos = if isTokTerm(tokStream.build.tType): inc tokStream.build.startPos; tokStream.build.startPos else: Natural tokStream.lstream.pos @@ -38,30 +36,30 @@ proc flushBuild(tokStream: var nlTokStream): nlTok = # This indicates that the build token should inherit # the nlTokType of the nlLStream's next character. proc isUntypedBuild(tokStream: nlTokStream): bool = - result = isTokUntyped(tokStream.build.tokType) + result = isTokUntyped(tokStream.build.tType) # Check whether an nlTokType is "compatible" with # the build token. flushBuild() should be called # when an incompatible token is discovered. -proc isCompatibleBuild(tokStream: nlTokStream, tokType: nlTokType): bool = - result = (tokType == tokStream.build.tokType) +proc isCompatibleBuild(tokStream: nlTokStream, tType: nlTokType): bool = + result = (tType == tokStream.build.tType) # Add a character to the nlTokStream's build token. # Returns a bool indicating if a new nlTok has been built # or not. flushBuild should then be called. proc appendBuild(tokStream: var nlTokStream, c: char): Option[nlTok] = - let tokType = getTokType(c) + let tType = getTokType(c) # check whether build token should inherit type if isUntypedBuild(tokStream): - tokStream.build.tokType = tokType + tokStream.build.tType = tType # check character and build token compatability - elif not isCompatibleBuild(tokStream, tokType): + elif not isCompatibleBuild(tokStream, tType): # return flushed build token, and reset result = some(flushBuild(tokStream)) # new build token is untyped so inherit type - tokStream.build.tokType = tokType + tokStream.build.tType = tType # check if \0 terminator reached - elif isTokTerm(tokStream.build.tokType): + elif isTokTerm(tokStream.build.tType): # return immediately to avoid concatinating '\0' return some(flushBuild(tokStream)) # else return none to indicate no build was completed @@ -71,7 +69,7 @@ proc appendBuild(tokStream: var nlTokStream, c: char): Option[nlTok] = tokStream.build.lit.add(c) # Generates and returns the next token in the stream, -# result.tokType == nlTokType.NTERM implies line ended +# result.tType == nlTokType.NTERM implies line ended proc nextTok(tokStream: var nlTokStream): nlTok = # try progress to next char, receives none option on failure for optchar in iterChars(tokStream.lstream): @@ -98,7 +96,7 @@ iterator toks*(tokStream: var nlTokStream): nlTok = tok = nextTok(tokStream) # \0 terminator means the line ended OR the file # has ended, so always yield a line-feed just in case - if isTokTerm(tok.tokType): + if isTokTerm(tok.tType): yield tokTermToLineFeed(tok) break yield tok diff --git a/src/noether/parser/arborist.nim b/src/noether/parser/arborist.nim new file mode 100644 index 0000000..42888c8 --- /dev/null +++ b/src/noether/parser/arborist.nim @@ -0,0 +1,7 @@ +# Attempt to form an nlAST from a nlTokStream +proc arborise(tokStream: nlTokStream): nlNode = + for tok in toks(tokStream): + case tok.tokType: + of nlTokType.DQUO: + # Attempt to parse string literal + parse_strl() diff --git a/src/noether/parser/nodes.nim b/src/noether/parser/nodes.nim new file mode 100644 index 0000000..fdb78e1 --- /dev/null +++ b/src/noether/parser/nodes.nim @@ -0,0 +1,18 @@ +from ../lexer/tok import nlTok +from ../lexer/tokstraem import + +type + # NOTE: by the end of parsing NO nodes should + # NOTE: have nlNodeType.NONE + nlNodeType = enum + NONE, # Placeholder Value + TERM, # Indicates the tree has terminated + STRL, # String Literal + CHRL, # Character Literal + nlNode {.acyclic.} = ref object of RootObj + nType: nlNodeType + toks: seq[nlTok] # nodes store the tokens that build them + left, right: nlNode + +proc parse() +