Tokenisation now accessible via the nlTokStream interface
nlTokStream relies on the functionality of nlLStream
This commit is contained in:
parent
4b20f9961b
commit
9109c4d680
9 changed files with 248 additions and 40 deletions
|
|
@ -5,8 +5,8 @@ author = "Emile Clark-Boman"
|
|||
description = "Type theoretic imperative and logic language for mathematical programming"
|
||||
license = "MIT"
|
||||
srcDir = "src"
|
||||
installExt = @["nim"]`
|
||||
bin = @["noether"]
|
||||
installExt = @["nim"]
|
||||
bin = @["noether", "nlx"]
|
||||
|
||||
|
||||
# Dependencies
|
||||
|
|
|
|||
2
src/demo.no
Normal file
2
src/demo.no
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
hello world
|
||||
a + b + c
|
||||
13
src/nlx.nim
Normal file
13
src/nlx.nim
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import os
|
||||
import noether/lex
|
||||
|
||||
when isMainModule:
|
||||
echo "Noether Lang - Extras"
|
||||
|
||||
if paramCount() > 0:
|
||||
let filename = paramStr(1)
|
||||
var tokStream = newTokStream(filename, isFile=true)
|
||||
for tok in toks(tokStream):
|
||||
echo tok
|
||||
else:
|
||||
echo "usage: nlx filename"
|
||||
|
|
@ -1,7 +1,5 @@
|
|||
# This is just an example to get you started. A typical hybrid package
|
||||
# uses this file as the main entry point of the application.
|
||||
|
||||
import noether/submodule
|
||||
|
||||
when isMainModule:
|
||||
echo(getWelcomeMessage())
|
||||
echo "Noether Lang"
|
||||
|
|
|
|||
106
src/noether/lex.nim
Normal file
106
src/noether/lex.nim
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
include lstream
|
||||
|
||||
import os # TEMP import
|
||||
|
||||
type
|
||||
# Provides a stream-like interface for lexing nlToks
|
||||
# Internally reliant on the functionality of nlLStream
|
||||
nlTokStream = object
|
||||
lstream: nlLStream
|
||||
build: nlTok # the current token we're building
|
||||
|
||||
# Resets the build token to an "empty" nlTok where
|
||||
# only tokType, lit, and startPos are initialised.
|
||||
proc resetBuild(tokStream: var nlTokStream) =
|
||||
tokStream.build = emptyTok(tokStream.lstream.pos)
|
||||
|
||||
# Completes a token generated by emptyTok()
|
||||
# based on the nlTokStream's nlLStream's
|
||||
# current line and character positions
|
||||
proc finishBuild(tokStream: var nlTokStream) =
|
||||
# if we've reached \0 terminator then forge the start
|
||||
# and end positions to point OUTSIDE the line
|
||||
let endPos = if isTokTerm(tokStream.build.tokType):
|
||||
inc tokStream.build.startPos;
|
||||
tokStream.build.startPos
|
||||
else: Natural tokStream.lstream.pos
|
||||
tokStream.build.line = Natural tokStream.lstream.lineNum
|
||||
tokStream.build.endPos = endPos
|
||||
|
||||
# Returns the nlTokStream's build token and
|
||||
# empties the build token's contents.
|
||||
proc flushBuild(tokStream: var nlTokStream): nlTok =
|
||||
finishBuild(tokStream)
|
||||
result = tokStream.build
|
||||
resetBuild(tokStream)
|
||||
|
||||
# Returns whether the build token has a set type yet.
|
||||
# This indicates that the build token should inherit
|
||||
# the nlTokType of the nlLStream's next character.
|
||||
proc isUntypedBuild(tokStream: nlTokStream): bool =
|
||||
result = isTokUntyped(tokStream.build.tokType)
|
||||
|
||||
# Check whether an nlTokType is "compatible" with
|
||||
# the build token. flushBuild() should be called
|
||||
# when an incompatible token is discovered.
|
||||
proc isCompatibleBuild(tokStream: nlTokStream, tokType: nlTokType): bool =
|
||||
result = (tokType == tokStream.build.tokType)
|
||||
|
||||
# Add a character to the nlTokStream's build token.
|
||||
# Returns a bool indicating if a new nlTok has been built
|
||||
# or not. flushBuild should then be called.
|
||||
proc appendBuild(tokStream: var nlTokStream, c: char): Option[nlTok] =
|
||||
let tokType = getTokType(c)
|
||||
# check whether build token should inherit type
|
||||
if isUntypedBuild(tokStream):
|
||||
tokStream.build.tokType = tokType
|
||||
# check character and build token compatability
|
||||
elif not isCompatibleBuild(tokStream, tokType):
|
||||
# return flushed build token, and reset
|
||||
result = some(flushBuild(tokStream))
|
||||
# new build token is untyped so inherit type
|
||||
tokStream.build.tokType = tokType
|
||||
# check if \0 terminator reached
|
||||
elif isTokTerm(tokStream.build.tokType):
|
||||
# return immediately to avoid concatinating '\0'
|
||||
return some(flushBuild(tokStream))
|
||||
# else return none to indicate no build was completed
|
||||
else:
|
||||
result = none(nlTok)
|
||||
# ensure character is appended to the build token
|
||||
tokStream.build.lit.add(c)
|
||||
|
||||
# Generates and returns the next token in the stream,
|
||||
# result.tokType == nlTokType.NTERM implies line ended
|
||||
proc nextTok(tokStream: var nlTokStream): nlTok =
|
||||
# try progress to next char, receives none option on failure
|
||||
for optchar in iterChars(tokStream.lstream):
|
||||
# unpack the Option[char], none => '\0'
|
||||
let c = if optchar.isSome: optchar.get
|
||||
else: '\0'
|
||||
let opttok = appendBuild(tokStream, c)
|
||||
if opttok.isSome:
|
||||
return opttok.get
|
||||
# NOTE: REACHING HERE SHOULD NEVER OCCUR
|
||||
|
||||
# Initialises a new nlTokStream on a string or file
|
||||
proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
|
||||
result = nlTokStream(
|
||||
lstream: newLStream(content, isFile=isFile),
|
||||
)
|
||||
resetBuild(result)
|
||||
|
||||
# Allow the nlTokStream to be iterated
|
||||
iterator toks*(tokStream: var nlTokStream): nlTok =
|
||||
var tok: nlTok
|
||||
while progLine(tokStream.lstream):
|
||||
while true:
|
||||
tok = nextTok(tokStream)
|
||||
# \0 terminator means the line ended OR the file
|
||||
# has ended, so always yield a line-feed just in case
|
||||
if isTokTerm(tok.tokType):
|
||||
yield tokTermToLineFeed(tok)
|
||||
break
|
||||
yield tok
|
||||
# we ONLY reach here on EOF
|
||||
yield tok
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
import std/streams
|
||||
|
||||
type
|
||||
nlLexer* = object
|
||||
stream: Stream
|
||||
pos: Natural
|
||||
|
||||
proc newLexerFromStream(stream: Stream): nlLexer =
|
||||
result = nlLexer(
|
||||
stream: stream,
|
||||
pos: 0,
|
||||
)
|
||||
)
|
||||
|
||||
proc newLexer*(content: string, isFile: bool): nlLexer =
|
||||
result = newLexerFromStream(
|
||||
streamFile(content) if isFile else streamString(content)
|
||||
)
|
||||
)
|
||||
|
||||
proc streamFile(filename: string): FileStream =
|
||||
result = newFileStream(filename, fmRead)
|
||||
|
||||
proc streamString(str: string): StringStream =
|
||||
result = newStringStream(str)
|
||||
|
||||
|
||||
proc nextToken*(lexer: nlLexer): nlToken =
|
||||
result = newToken[]
|
||||
45
src/noether/lstream.nim
Normal file
45
src/noether/lstream.nim
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import std/streams
|
||||
import std/options
|
||||
|
||||
include tokens
|
||||
|
||||
type
|
||||
# Character streaming for the nlTokStream
|
||||
nlLStream = object
|
||||
stream: Stream
|
||||
# row/column positions
|
||||
line*: string
|
||||
lineNum*: Natural
|
||||
pos: Natural
|
||||
|
||||
proc streamFile(filename: string): FileStream =
|
||||
result = newFileStream(filename, fmRead)
|
||||
|
||||
proc streamString(str: string): StringStream =
|
||||
result = newStringStream(str)
|
||||
|
||||
proc newLStream(content: string, isFile: bool = false): nlLStream =
|
||||
result = nlLStream(
|
||||
stream: if isFile: streamFile(content) else: streamString(content),
|
||||
line: "",
|
||||
lineNum: Natural 0,
|
||||
pos: Natural 0,
|
||||
)
|
||||
|
||||
# Progress the lex stream to the next line (if available)
|
||||
proc progLine(lstream: var nlLStream): bool =
|
||||
if lstream.stream.readLine(lstream.line):
|
||||
inc lstream.lineNum
|
||||
lstream.pos = Natural 0
|
||||
return true
|
||||
return false
|
||||
|
||||
proc currChar(lstream: nlLStream): char =
|
||||
result = lstream.line[lstream.pos]
|
||||
|
||||
# NOTE: assumes lstream.line does NOT mutate while iterating
|
||||
iterator iterChars(lstream: var nlLStream): Option[char] =
|
||||
while lstream.pos < lstream.line.len:
|
||||
inc lstream.pos
|
||||
yield some(lstream.line[lstream.pos - 1])
|
||||
yield none(char)
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
# This is just an example to get you started. Users of your hybrid library will
|
||||
# import this file by writing ``import srcpkg/submodule``. Feel free to rename or
|
||||
# remove this file altogether. You may create additional modules alongside
|
||||
# this file as required.
|
||||
|
||||
proc getWelcomeMessage*(): string = "Hello, World!"
|
||||
79
src/noether/tokens.nim
Normal file
79
src/noether/tokens.nim
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
type
|
||||
# nlTokType allows primitive nlToks to be typed,
|
||||
# the nlTokType enum should never be directly
|
||||
# accessed. Use the interface in this file instead.
|
||||
# NOTE: NONE is used as a default value
|
||||
# NOTE: it is very different to NTERM!
|
||||
nlTokType = enum
|
||||
NONE, # Placeholder Value
|
||||
EOF, # EOF
|
||||
TERM, # String \0 terminator
|
||||
WORD, # Alphanumeric token
|
||||
SYMB, # Symbolic token
|
||||
LNFD, # Line-Feed
|
||||
WTSP, # Whitespace
|
||||
LPAR, # (
|
||||
RPAR, # )
|
||||
LBRA, # {
|
||||
RBRA, # }
|
||||
LSQB, # [
|
||||
RSQB, # ]
|
||||
LANB, # <
|
||||
RANB, # >
|
||||
|
||||
nlTok = object
|
||||
tokType*: nlTokType
|
||||
lit*: string
|
||||
line*: Natural
|
||||
startPos*: Natural
|
||||
endPos*: Natural
|
||||
|
||||
# Generates an "empty" nlTok with only a startPos,
|
||||
# all other fields are expected to be filled out later.
|
||||
# NOTE: tokType initialised to nlTokType.NUL
|
||||
# NOTE: lit initialised to empty string
|
||||
# NOTE: all other fields are uninitialised
|
||||
proc emptyTok(startPos: int): nlTok =
|
||||
result = nlTok(
|
||||
tokType: nlTokType.NONE,
|
||||
lit: "",
|
||||
startPos: Natural startPos,
|
||||
)
|
||||
|
||||
# Checks if an nlTok has nlTokType.NONE
|
||||
proc isTokUntyped(tokType: nlTokType): bool =
|
||||
result = (tokType == nlTokType.NONE)
|
||||
|
||||
# Checks if an nlTok has nlTokType.TERM
|
||||
proc isTokTerm(tokType: nlTokType): bool =
|
||||
result = (tokType == nlTokType.TERM)
|
||||
|
||||
# This method is only used to convert null
|
||||
# terminator nlToks into line-feed ones.
|
||||
# Returns a copy of an nlTok, changing its type
|
||||
# NOTE: this is necessary because Nim handles
|
||||
# NOTE: strings in a useful but annoying way
|
||||
proc tokTermToLineFeed(tok: nlTok): nlTok =
|
||||
result = nlTok(
|
||||
tokType: nlTokType.LNFD,
|
||||
lit: tok.lit,
|
||||
line: tok.line,
|
||||
startPos: tok.startPos,
|
||||
endPos: tok.endPos,
|
||||
)
|
||||
|
||||
# Classifies a character to its nlTokType
|
||||
proc getTokType(c: char): nlTokType =
|
||||
case c:
|
||||
of '\0':
|
||||
result = nlTokType.TERM
|
||||
of '\n':
|
||||
result = nlTokType.LNFD
|
||||
of ' ':
|
||||
result = nlTokType.WTSP
|
||||
of '(':
|
||||
result = nlTokType.LPAR
|
||||
of ')':
|
||||
result = nlTokType.RPAR
|
||||
else:
|
||||
result = nlTokType.WORD
|
||||
Loading…
Add table
Add a link
Reference in a new issue