Tokenisation now accessible via the nlTokStream interface
nlTokStream relies on the functionality of nlLStream
This commit is contained in:
parent
4b20f9961b
commit
9109c4d680
9 changed files with 248 additions and 40 deletions
|
|
@ -5,8 +5,8 @@ author = "Emile Clark-Boman"
|
||||||
description = "Type theoretic imperative and logic language for mathematical programming"
|
description = "Type theoretic imperative and logic language for mathematical programming"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
srcDir = "src"
|
srcDir = "src"
|
||||||
installExt = @["nim"]`
|
installExt = @["nim"]
|
||||||
bin = @["noether"]
|
bin = @["noether", "nlx"]
|
||||||
|
|
||||||
|
|
||||||
# Dependencies
|
# Dependencies
|
||||||
|
|
|
||||||
2
src/demo.no
Normal file
2
src/demo.no
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
hello world
|
||||||
|
a + b + c
|
||||||
13
src/nlx.nim
Normal file
13
src/nlx.nim
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
import os
|
||||||
|
import noether/lex
|
||||||
|
|
||||||
|
when isMainModule:
|
||||||
|
echo "Noether Lang - Extras"
|
||||||
|
|
||||||
|
if paramCount() > 0:
|
||||||
|
let filename = paramStr(1)
|
||||||
|
var tokStream = newTokStream(filename, isFile=true)
|
||||||
|
for tok in toks(tokStream):
|
||||||
|
echo tok
|
||||||
|
else:
|
||||||
|
echo "usage: nlx filename"
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
# This is just an example to get you started. A typical hybrid package
|
# This is just an example to get you started. A typical hybrid package
|
||||||
# uses this file as the main entry point of the application.
|
# uses this file as the main entry point of the application.
|
||||||
|
|
||||||
import noether/submodule
|
|
||||||
|
|
||||||
when isMainModule:
|
when isMainModule:
|
||||||
echo(getWelcomeMessage())
|
echo "Noether Lang"
|
||||||
|
|
|
||||||
106
src/noether/lex.nim
Normal file
106
src/noether/lex.nim
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
include lstream
|
||||||
|
|
||||||
|
import os # TEMP import
|
||||||
|
|
||||||
|
type
|
||||||
|
# Provides a stream-like interface for lexing nlToks
|
||||||
|
# Internally reliant on the functionality of nlLStream
|
||||||
|
nlTokStream = object
|
||||||
|
lstream: nlLStream
|
||||||
|
build: nlTok # the current token we're building
|
||||||
|
|
||||||
|
# Resets the build token to an "empty" nlTok where
|
||||||
|
# only tokType, lit, and startPos are initialised.
|
||||||
|
proc resetBuild(tokStream: var nlTokStream) =
|
||||||
|
tokStream.build = emptyTok(tokStream.lstream.pos)
|
||||||
|
|
||||||
|
# Completes a token generated by emptyTok()
|
||||||
|
# based on the nlTokStream's nlLStream's
|
||||||
|
# current line and character positions
|
||||||
|
proc finishBuild(tokStream: var nlTokStream) =
|
||||||
|
# if we've reached \0 terminator then forge the start
|
||||||
|
# and end positions to point OUTSIDE the line
|
||||||
|
let endPos = if isTokTerm(tokStream.build.tokType):
|
||||||
|
inc tokStream.build.startPos;
|
||||||
|
tokStream.build.startPos
|
||||||
|
else: Natural tokStream.lstream.pos
|
||||||
|
tokStream.build.line = Natural tokStream.lstream.lineNum
|
||||||
|
tokStream.build.endPos = endPos
|
||||||
|
|
||||||
|
# Returns the nlTokStream's build token and
|
||||||
|
# empties the build token's contents.
|
||||||
|
proc flushBuild(tokStream: var nlTokStream): nlTok =
|
||||||
|
finishBuild(tokStream)
|
||||||
|
result = tokStream.build
|
||||||
|
resetBuild(tokStream)
|
||||||
|
|
||||||
|
# Returns whether the build token has a set type yet.
|
||||||
|
# This indicates that the build token should inherit
|
||||||
|
# the nlTokType of the nlLStream's next character.
|
||||||
|
proc isUntypedBuild(tokStream: nlTokStream): bool =
|
||||||
|
result = isTokUntyped(tokStream.build.tokType)
|
||||||
|
|
||||||
|
# Check whether an nlTokType is "compatible" with
|
||||||
|
# the build token. flushBuild() should be called
|
||||||
|
# when an incompatible token is discovered.
|
||||||
|
proc isCompatibleBuild(tokStream: nlTokStream, tokType: nlTokType): bool =
|
||||||
|
result = (tokType == tokStream.build.tokType)
|
||||||
|
|
||||||
|
# Add a character to the nlTokStream's build token.
|
||||||
|
# Returns a bool indicating if a new nlTok has been built
|
||||||
|
# or not. flushBuild should then be called.
|
||||||
|
proc appendBuild(tokStream: var nlTokStream, c: char): Option[nlTok] =
|
||||||
|
let tokType = getTokType(c)
|
||||||
|
# check whether build token should inherit type
|
||||||
|
if isUntypedBuild(tokStream):
|
||||||
|
tokStream.build.tokType = tokType
|
||||||
|
# check character and build token compatability
|
||||||
|
elif not isCompatibleBuild(tokStream, tokType):
|
||||||
|
# return flushed build token, and reset
|
||||||
|
result = some(flushBuild(tokStream))
|
||||||
|
# new build token is untyped so inherit type
|
||||||
|
tokStream.build.tokType = tokType
|
||||||
|
# check if \0 terminator reached
|
||||||
|
elif isTokTerm(tokStream.build.tokType):
|
||||||
|
# return immediately to avoid concatinating '\0'
|
||||||
|
return some(flushBuild(tokStream))
|
||||||
|
# else return none to indicate no build was completed
|
||||||
|
else:
|
||||||
|
result = none(nlTok)
|
||||||
|
# ensure character is appended to the build token
|
||||||
|
tokStream.build.lit.add(c)
|
||||||
|
|
||||||
|
# Generates and returns the next token in the stream,
|
||||||
|
# result.tokType == nlTokType.NTERM implies line ended
|
||||||
|
proc nextTok(tokStream: var nlTokStream): nlTok =
|
||||||
|
# try progress to next char, receives none option on failure
|
||||||
|
for optchar in iterChars(tokStream.lstream):
|
||||||
|
# unpack the Option[char], none => '\0'
|
||||||
|
let c = if optchar.isSome: optchar.get
|
||||||
|
else: '\0'
|
||||||
|
let opttok = appendBuild(tokStream, c)
|
||||||
|
if opttok.isSome:
|
||||||
|
return opttok.get
|
||||||
|
# NOTE: REACHING HERE SHOULD NEVER OCCUR
|
||||||
|
|
||||||
|
# Initialises a new nlTokStream on a string or file
|
||||||
|
proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
|
||||||
|
result = nlTokStream(
|
||||||
|
lstream: newLStream(content, isFile=isFile),
|
||||||
|
)
|
||||||
|
resetBuild(result)
|
||||||
|
|
||||||
|
# Allow the nlTokStream to be iterated
|
||||||
|
iterator toks*(tokStream: var nlTokStream): nlTok =
|
||||||
|
var tok: nlTok
|
||||||
|
while progLine(tokStream.lstream):
|
||||||
|
while true:
|
||||||
|
tok = nextTok(tokStream)
|
||||||
|
# \0 terminator means the line ended OR the file
|
||||||
|
# has ended, so always yield a line-feed just in case
|
||||||
|
if isTokTerm(tok.tokType):
|
||||||
|
yield tokTermToLineFeed(tok)
|
||||||
|
break
|
||||||
|
yield tok
|
||||||
|
# we ONLY reach here on EOF
|
||||||
|
yield tok
|
||||||
|
|
@ -1,29 +0,0 @@
|
||||||
import std/streams
|
|
||||||
|
|
||||||
type
|
|
||||||
nlLexer* = object
|
|
||||||
stream: Stream
|
|
||||||
pos: Natural
|
|
||||||
|
|
||||||
proc newLexerFromStream(stream: Stream): nlLexer =
|
|
||||||
result = nlLexer(
|
|
||||||
stream: stream,
|
|
||||||
pos: 0,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
proc newLexer*(content: string, isFile: bool): nlLexer =
|
|
||||||
result = newLexerFromStream(
|
|
||||||
streamFile(content) if isFile else streamString(content)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
proc streamFile(filename: string): FileStream =
|
|
||||||
result = newFileStream(filename, fmRead)
|
|
||||||
|
|
||||||
proc streamString(str: string): StringStream =
|
|
||||||
result = newStringStream(str)
|
|
||||||
|
|
||||||
|
|
||||||
proc nextToken*(lexer: nlLexer): nlToken =
|
|
||||||
result = newToken[]
|
|
||||||
45
src/noether/lstream.nim
Normal file
45
src/noether/lstream.nim
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
import std/streams
|
||||||
|
import std/options
|
||||||
|
|
||||||
|
include tokens
|
||||||
|
|
||||||
|
type
|
||||||
|
# Character streaming for the nlTokStream
|
||||||
|
nlLStream = object
|
||||||
|
stream: Stream
|
||||||
|
# row/column positions
|
||||||
|
line*: string
|
||||||
|
lineNum*: Natural
|
||||||
|
pos: Natural
|
||||||
|
|
||||||
|
proc streamFile(filename: string): FileStream =
|
||||||
|
result = newFileStream(filename, fmRead)
|
||||||
|
|
||||||
|
proc streamString(str: string): StringStream =
|
||||||
|
result = newStringStream(str)
|
||||||
|
|
||||||
|
proc newLStream(content: string, isFile: bool = false): nlLStream =
|
||||||
|
result = nlLStream(
|
||||||
|
stream: if isFile: streamFile(content) else: streamString(content),
|
||||||
|
line: "",
|
||||||
|
lineNum: Natural 0,
|
||||||
|
pos: Natural 0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Progress the lex stream to the next line (if available)
|
||||||
|
proc progLine(lstream: var nlLStream): bool =
|
||||||
|
if lstream.stream.readLine(lstream.line):
|
||||||
|
inc lstream.lineNum
|
||||||
|
lstream.pos = Natural 0
|
||||||
|
return true
|
||||||
|
return false
|
||||||
|
|
||||||
|
proc currChar(lstream: nlLStream): char =
|
||||||
|
result = lstream.line[lstream.pos]
|
||||||
|
|
||||||
|
# NOTE: assumes lstream.line does NOT mutate while iterating
|
||||||
|
iterator iterChars(lstream: var nlLStream): Option[char] =
|
||||||
|
while lstream.pos < lstream.line.len:
|
||||||
|
inc lstream.pos
|
||||||
|
yield some(lstream.line[lstream.pos - 1])
|
||||||
|
yield none(char)
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
# This is just an example to get you started. Users of your hybrid library will
|
|
||||||
# import this file by writing ``import srcpkg/submodule``. Feel free to rename or
|
|
||||||
# remove this file altogether. You may create additional modules alongside
|
|
||||||
# this file as required.
|
|
||||||
|
|
||||||
proc getWelcomeMessage*(): string = "Hello, World!"
|
|
||||||
79
src/noether/tokens.nim
Normal file
79
src/noether/tokens.nim
Normal file
|
|
@ -0,0 +1,79 @@
|
||||||
|
type
|
||||||
|
# nlTokType allows primitive nlToks to be typed,
|
||||||
|
# the nlTokType enum should never be directly
|
||||||
|
# accessed. Use the interface in this file instead.
|
||||||
|
# NOTE: NONE is used as a default value
|
||||||
|
# NOTE: it is very different to NTERM!
|
||||||
|
nlTokType = enum
|
||||||
|
NONE, # Placeholder Value
|
||||||
|
EOF, # EOF
|
||||||
|
TERM, # String \0 terminator
|
||||||
|
WORD, # Alphanumeric token
|
||||||
|
SYMB, # Symbolic token
|
||||||
|
LNFD, # Line-Feed
|
||||||
|
WTSP, # Whitespace
|
||||||
|
LPAR, # (
|
||||||
|
RPAR, # )
|
||||||
|
LBRA, # {
|
||||||
|
RBRA, # }
|
||||||
|
LSQB, # [
|
||||||
|
RSQB, # ]
|
||||||
|
LANB, # <
|
||||||
|
RANB, # >
|
||||||
|
|
||||||
|
nlTok = object
|
||||||
|
tokType*: nlTokType
|
||||||
|
lit*: string
|
||||||
|
line*: Natural
|
||||||
|
startPos*: Natural
|
||||||
|
endPos*: Natural
|
||||||
|
|
||||||
|
# Generates an "empty" nlTok with only a startPos,
|
||||||
|
# all other fields are expected to be filled out later.
|
||||||
|
# NOTE: tokType initialised to nlTokType.NUL
|
||||||
|
# NOTE: lit initialised to empty string
|
||||||
|
# NOTE: all other fields are uninitialised
|
||||||
|
proc emptyTok(startPos: int): nlTok =
|
||||||
|
result = nlTok(
|
||||||
|
tokType: nlTokType.NONE,
|
||||||
|
lit: "",
|
||||||
|
startPos: Natural startPos,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Checks if an nlTok has nlTokType.NONE
|
||||||
|
proc isTokUntyped(tokType: nlTokType): bool =
|
||||||
|
result = (tokType == nlTokType.NONE)
|
||||||
|
|
||||||
|
# Checks if an nlTok has nlTokType.TERM
|
||||||
|
proc isTokTerm(tokType: nlTokType): bool =
|
||||||
|
result = (tokType == nlTokType.TERM)
|
||||||
|
|
||||||
|
# This method is only used to convert null
|
||||||
|
# terminator nlToks into line-feed ones.
|
||||||
|
# Returns a copy of an nlTok, changing its type
|
||||||
|
# NOTE: this is necessary because Nim handles
|
||||||
|
# NOTE: strings in a useful but annoying way
|
||||||
|
proc tokTermToLineFeed(tok: nlTok): nlTok =
|
||||||
|
result = nlTok(
|
||||||
|
tokType: nlTokType.LNFD,
|
||||||
|
lit: tok.lit,
|
||||||
|
line: tok.line,
|
||||||
|
startPos: tok.startPos,
|
||||||
|
endPos: tok.endPos,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Classifies a character to its nlTokType
|
||||||
|
proc getTokType(c: char): nlTokType =
|
||||||
|
case c:
|
||||||
|
of '\0':
|
||||||
|
result = nlTokType.TERM
|
||||||
|
of '\n':
|
||||||
|
result = nlTokType.LNFD
|
||||||
|
of ' ':
|
||||||
|
result = nlTokType.WTSP
|
||||||
|
of '(':
|
||||||
|
result = nlTokType.LPAR
|
||||||
|
of ')':
|
||||||
|
result = nlTokType.RPAR
|
||||||
|
else:
|
||||||
|
result = nlTokType.WORD
|
||||||
Loading…
Add table
Add a link
Reference in a new issue