diff --git a/noether.nimble b/noether.nimble index 9cdf1f2..ad8b6b4 100644 --- a/noether.nimble +++ b/noether.nimble @@ -5,8 +5,8 @@ author = "Emile Clark-Boman" description = "Type theoretic imperative and logic language for mathematical programming" license = "MIT" srcDir = "src" -installExt = @["nim"]` -bin = @["noether"] +installExt = @["nim"] +bin = @["noether", "nlx"] # Dependencies diff --git a/src/demo.no b/src/demo.no new file mode 100644 index 0000000..13c87ee --- /dev/null +++ b/src/demo.no @@ -0,0 +1,2 @@ +hello world +a + b + c diff --git a/src/nlx.nim b/src/nlx.nim new file mode 100644 index 0000000..a32c0b8 --- /dev/null +++ b/src/nlx.nim @@ -0,0 +1,13 @@ +import os +import noether/lex + +when isMainModule: + echo "Noether Lang - Extras" + + if paramCount() > 0: + let filename = paramStr(1) + var tokStream = newTokStream(filename, isFile=true) + for tok in toks(tokStream): + echo tok + else: + echo "usage: nlx filename" diff --git a/src/noether.nim b/src/noether.nim index e0c840d..509b123 100644 --- a/src/noether.nim +++ b/src/noether.nim @@ -1,7 +1,5 @@ # This is just an example to get you started. A typical hybrid package # uses this file as the main entry point of the application. -import noether/submodule - when isMainModule: - echo(getWelcomeMessage()) + echo "Noether Lang" diff --git a/src/noether/lex.nim b/src/noether/lex.nim new file mode 100644 index 0000000..443d3fb --- /dev/null +++ b/src/noether/lex.nim @@ -0,0 +1,106 @@ +include lstream + +import os # TEMP import + +type + # Provides a stream-like interface for lexing nlToks + # Internally reliant on the functionality of nlLStream + nlTokStream = object + lstream: nlLStream + build: nlTok # the current token we're building + +# Resets the build token to an "empty" nlTok where +# only tokType, lit, and startPos are initialised. +proc resetBuild(tokStream: var nlTokStream) = + tokStream.build = emptyTok(tokStream.lstream.pos) + +# Completes a token generated by emptyTok() +# based on the nlTokStream's nlLStream's +# current line and character positions +proc finishBuild(tokStream: var nlTokStream) = + # if we've reached \0 terminator then forge the start + # and end positions to point OUTSIDE the line + let endPos = if isTokTerm(tokStream.build.tokType): + inc tokStream.build.startPos; + tokStream.build.startPos + else: Natural tokStream.lstream.pos + tokStream.build.line = Natural tokStream.lstream.lineNum + tokStream.build.endPos = endPos + +# Returns the nlTokStream's build token and +# empties the build token's contents. +proc flushBuild(tokStream: var nlTokStream): nlTok = + finishBuild(tokStream) + result = tokStream.build + resetBuild(tokStream) + +# Returns whether the build token has a set type yet. +# This indicates that the build token should inherit +# the nlTokType of the nlLStream's next character. +proc isUntypedBuild(tokStream: nlTokStream): bool = + result = isTokUntyped(tokStream.build.tokType) + +# Check whether an nlTokType is "compatible" with +# the build token. flushBuild() should be called +# when an incompatible token is discovered. +proc isCompatibleBuild(tokStream: nlTokStream, tokType: nlTokType): bool = + result = (tokType == tokStream.build.tokType) + +# Add a character to the nlTokStream's build token. +# Returns a bool indicating if a new nlTok has been built +# or not. flushBuild should then be called. +proc appendBuild(tokStream: var nlTokStream, c: char): Option[nlTok] = + let tokType = getTokType(c) + # check whether build token should inherit type + if isUntypedBuild(tokStream): + tokStream.build.tokType = tokType + # check character and build token compatability + elif not isCompatibleBuild(tokStream, tokType): + # return flushed build token, and reset + result = some(flushBuild(tokStream)) + # new build token is untyped so inherit type + tokStream.build.tokType = tokType + # check if \0 terminator reached + elif isTokTerm(tokStream.build.tokType): + # return immediately to avoid concatinating '\0' + return some(flushBuild(tokStream)) + # else return none to indicate no build was completed + else: + result = none(nlTok) + # ensure character is appended to the build token + tokStream.build.lit.add(c) + +# Generates and returns the next token in the stream, +# result.tokType == nlTokType.NTERM implies line ended +proc nextTok(tokStream: var nlTokStream): nlTok = + # try progress to next char, receives none option on failure + for optchar in iterChars(tokStream.lstream): + # unpack the Option[char], none => '\0' + let c = if optchar.isSome: optchar.get + else: '\0' + let opttok = appendBuild(tokStream, c) + if opttok.isSome: + return opttok.get + # NOTE: REACHING HERE SHOULD NEVER OCCUR + +# Initialises a new nlTokStream on a string or file +proc newTokStream*(content: string, isFile: bool = false): nlTokStream = + result = nlTokStream( + lstream: newLStream(content, isFile=isFile), + ) + resetBuild(result) + +# Allow the nlTokStream to be iterated +iterator toks*(tokStream: var nlTokStream): nlTok = + var tok: nlTok + while progLine(tokStream.lstream): + while true: + tok = nextTok(tokStream) + # \0 terminator means the line ended OR the file + # has ended, so always yield a line-feed just in case + if isTokTerm(tok.tokType): + yield tokTermToLineFeed(tok) + break + yield tok + # we ONLY reach here on EOF + yield tok diff --git a/src/noether/lexer.nim b/src/noether/lexer.nim deleted file mode 100644 index d65a0b6..0000000 --- a/src/noether/lexer.nim +++ /dev/null @@ -1,29 +0,0 @@ -import std/streams - -type - nlLexer* = object - stream: Stream - pos: Natural - -proc newLexerFromStream(stream: Stream): nlLexer = - result = nlLexer( - stream: stream, - pos: 0, - ) -) - -proc newLexer*(content: string, isFile: bool): nlLexer = - result = newLexerFromStream( - streamFile(content) if isFile else streamString(content) - ) -) - -proc streamFile(filename: string): FileStream = - result = newFileStream(filename, fmRead) - -proc streamString(str: string): StringStream = - result = newStringStream(str) - - -proc nextToken*(lexer: nlLexer): nlToken = - result = newToken[] diff --git a/src/noether/lstream.nim b/src/noether/lstream.nim new file mode 100644 index 0000000..5bc79f2 --- /dev/null +++ b/src/noether/lstream.nim @@ -0,0 +1,45 @@ +import std/streams +import std/options + +include tokens + +type + # Character streaming for the nlTokStream + nlLStream = object + stream: Stream + # row/column positions + line*: string + lineNum*: Natural + pos: Natural + +proc streamFile(filename: string): FileStream = + result = newFileStream(filename, fmRead) + +proc streamString(str: string): StringStream = + result = newStringStream(str) + +proc newLStream(content: string, isFile: bool = false): nlLStream = + result = nlLStream( + stream: if isFile: streamFile(content) else: streamString(content), + line: "", + lineNum: Natural 0, + pos: Natural 0, + ) + +# Progress the lex stream to the next line (if available) +proc progLine(lstream: var nlLStream): bool = + if lstream.stream.readLine(lstream.line): + inc lstream.lineNum + lstream.pos = Natural 0 + return true + return false + +proc currChar(lstream: nlLStream): char = + result = lstream.line[lstream.pos] + +# NOTE: assumes lstream.line does NOT mutate while iterating +iterator iterChars(lstream: var nlLStream): Option[char] = + while lstream.pos < lstream.line.len: + inc lstream.pos + yield some(lstream.line[lstream.pos - 1]) + yield none(char) diff --git a/src/noether/submodule.nim b/src/noether/submodule.nim deleted file mode 100644 index d8146ba..0000000 --- a/src/noether/submodule.nim +++ /dev/null @@ -1,6 +0,0 @@ -# This is just an example to get you started. Users of your hybrid library will -# import this file by writing ``import srcpkg/submodule``. Feel free to rename or -# remove this file altogether. You may create additional modules alongside -# this file as required. - -proc getWelcomeMessage*(): string = "Hello, World!" diff --git a/src/noether/tokens.nim b/src/noether/tokens.nim new file mode 100644 index 0000000..a4a4bcf --- /dev/null +++ b/src/noether/tokens.nim @@ -0,0 +1,79 @@ +type + # nlTokType allows primitive nlToks to be typed, + # the nlTokType enum should never be directly + # accessed. Use the interface in this file instead. + # NOTE: NONE is used as a default value + # NOTE: it is very different to NTERM! + nlTokType = enum + NONE, # Placeholder Value + EOF, # EOF + TERM, # String \0 terminator + WORD, # Alphanumeric token + SYMB, # Symbolic token + LNFD, # Line-Feed + WTSP, # Whitespace + LPAR, # ( + RPAR, # ) + LBRA, # { + RBRA, # } + LSQB, # [ + RSQB, # ] + LANB, # < + RANB, # > + + nlTok = object + tokType*: nlTokType + lit*: string + line*: Natural + startPos*: Natural + endPos*: Natural + +# Generates an "empty" nlTok with only a startPos, +# all other fields are expected to be filled out later. +# NOTE: tokType initialised to nlTokType.NUL +# NOTE: lit initialised to empty string +# NOTE: all other fields are uninitialised +proc emptyTok(startPos: int): nlTok = + result = nlTok( + tokType: nlTokType.NONE, + lit: "", + startPos: Natural startPos, + ) + +# Checks if an nlTok has nlTokType.NONE +proc isTokUntyped(tokType: nlTokType): bool = + result = (tokType == nlTokType.NONE) + +# Checks if an nlTok has nlTokType.TERM +proc isTokTerm(tokType: nlTokType): bool = + result = (tokType == nlTokType.TERM) + +# This method is only used to convert null +# terminator nlToks into line-feed ones. +# Returns a copy of an nlTok, changing its type +# NOTE: this is necessary because Nim handles +# NOTE: strings in a useful but annoying way +proc tokTermToLineFeed(tok: nlTok): nlTok = + result = nlTok( + tokType: nlTokType.LNFD, + lit: tok.lit, + line: tok.line, + startPos: tok.startPos, + endPos: tok.endPos, + ) + +# Classifies a character to its nlTokType +proc getTokType(c: char): nlTokType = + case c: + of '\0': + result = nlTokType.TERM + of '\n': + result = nlTokType.LNFD + of ' ': + result = nlTokType.WTSP + of '(': + result = nlTokType.LPAR + of ')': + result = nlTokType.RPAR + else: + result = nlTokType.WORD