Tokenisation now accessible via the nlTokStream interface

nlTokStream relies on the functionality of nlLStream
This commit is contained in:
Emile Clark-Boman 2025-06-17 11:29:31 +10:00
parent 4b20f9961b
commit 9109c4d680
9 changed files with 248 additions and 40 deletions

View file

@ -5,8 +5,8 @@ author = "Emile Clark-Boman"
description = "Type theoretic imperative and logic language for mathematical programming"
license = "MIT"
srcDir = "src"
installExt = @["nim"]`
bin = @["noether"]
installExt = @["nim"]
bin = @["noether", "nlx"]
# Dependencies

2
src/demo.no Normal file
View file

@ -0,0 +1,2 @@
hello world
a + b + c

13
src/nlx.nim Normal file
View file

@ -0,0 +1,13 @@
import os
import noether/lex
when isMainModule:
echo "Noether Lang - Extras"
if paramCount() > 0:
let filename = paramStr(1)
var tokStream = newTokStream(filename, isFile=true)
for tok in toks(tokStream):
echo tok
else:
echo "usage: nlx filename"

View file

@ -1,7 +1,5 @@
# This is just an example to get you started. A typical hybrid package
# uses this file as the main entry point of the application.
import noether/submodule
when isMainModule:
echo(getWelcomeMessage())
echo "Noether Lang"

106
src/noether/lex.nim Normal file
View file

@ -0,0 +1,106 @@
include lstream
import os # TEMP import
type
# Provides a stream-like interface for lexing nlToks
# Internally reliant on the functionality of nlLStream
nlTokStream = object
lstream: nlLStream
build: nlTok # the current token we're building
# Resets the build token to an "empty" nlTok where
# only tokType, lit, and startPos are initialised.
proc resetBuild(tokStream: var nlTokStream) =
tokStream.build = emptyTok(tokStream.lstream.pos)
# Completes a token generated by emptyTok()
# based on the nlTokStream's nlLStream's
# current line and character positions
proc finishBuild(tokStream: var nlTokStream) =
# if we've reached \0 terminator then forge the start
# and end positions to point OUTSIDE the line
let endPos = if isTokTerm(tokStream.build.tokType):
inc tokStream.build.startPos;
tokStream.build.startPos
else: Natural tokStream.lstream.pos
tokStream.build.line = Natural tokStream.lstream.lineNum
tokStream.build.endPos = endPos
# Returns the nlTokStream's build token and
# empties the build token's contents.
proc flushBuild(tokStream: var nlTokStream): nlTok =
finishBuild(tokStream)
result = tokStream.build
resetBuild(tokStream)
# Returns whether the build token has a set type yet.
# This indicates that the build token should inherit
# the nlTokType of the nlLStream's next character.
proc isUntypedBuild(tokStream: nlTokStream): bool =
result = isTokUntyped(tokStream.build.tokType)
# Check whether an nlTokType is "compatible" with
# the build token. flushBuild() should be called
# when an incompatible token is discovered.
proc isCompatibleBuild(tokStream: nlTokStream, tokType: nlTokType): bool =
result = (tokType == tokStream.build.tokType)
# Add a character to the nlTokStream's build token.
# Returns a bool indicating if a new nlTok has been built
# or not. flushBuild should then be called.
proc appendBuild(tokStream: var nlTokStream, c: char): Option[nlTok] =
let tokType = getTokType(c)
# check whether build token should inherit type
if isUntypedBuild(tokStream):
tokStream.build.tokType = tokType
# check character and build token compatability
elif not isCompatibleBuild(tokStream, tokType):
# return flushed build token, and reset
result = some(flushBuild(tokStream))
# new build token is untyped so inherit type
tokStream.build.tokType = tokType
# check if \0 terminator reached
elif isTokTerm(tokStream.build.tokType):
# return immediately to avoid concatinating '\0'
return some(flushBuild(tokStream))
# else return none to indicate no build was completed
else:
result = none(nlTok)
# ensure character is appended to the build token
tokStream.build.lit.add(c)
# Generates and returns the next token in the stream,
# result.tokType == nlTokType.NTERM implies line ended
proc nextTok(tokStream: var nlTokStream): nlTok =
# try progress to next char, receives none option on failure
for optchar in iterChars(tokStream.lstream):
# unpack the Option[char], none => '\0'
let c = if optchar.isSome: optchar.get
else: '\0'
let opttok = appendBuild(tokStream, c)
if opttok.isSome:
return opttok.get
# NOTE: REACHING HERE SHOULD NEVER OCCUR
# Initialises a new nlTokStream on a string or file
proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
result = nlTokStream(
lstream: newLStream(content, isFile=isFile),
)
resetBuild(result)
# Allow the nlTokStream to be iterated
iterator toks*(tokStream: var nlTokStream): nlTok =
var tok: nlTok
while progLine(tokStream.lstream):
while true:
tok = nextTok(tokStream)
# \0 terminator means the line ended OR the file
# has ended, so always yield a line-feed just in case
if isTokTerm(tok.tokType):
yield tokTermToLineFeed(tok)
break
yield tok
# we ONLY reach here on EOF
yield tok

View file

@ -1,29 +0,0 @@
import std/streams
type
nlLexer* = object
stream: Stream
pos: Natural
proc newLexerFromStream(stream: Stream): nlLexer =
result = nlLexer(
stream: stream,
pos: 0,
)
)
proc newLexer*(content: string, isFile: bool): nlLexer =
result = newLexerFromStream(
streamFile(content) if isFile else streamString(content)
)
)
proc streamFile(filename: string): FileStream =
result = newFileStream(filename, fmRead)
proc streamString(str: string): StringStream =
result = newStringStream(str)
proc nextToken*(lexer: nlLexer): nlToken =
result = newToken[]

45
src/noether/lstream.nim Normal file
View file

@ -0,0 +1,45 @@
import std/streams
import std/options
include tokens
type
# Character streaming for the nlTokStream
nlLStream = object
stream: Stream
# row/column positions
line*: string
lineNum*: Natural
pos: Natural
proc streamFile(filename: string): FileStream =
result = newFileStream(filename, fmRead)
proc streamString(str: string): StringStream =
result = newStringStream(str)
proc newLStream(content: string, isFile: bool = false): nlLStream =
result = nlLStream(
stream: if isFile: streamFile(content) else: streamString(content),
line: "",
lineNum: Natural 0,
pos: Natural 0,
)
# Progress the lex stream to the next line (if available)
proc progLine(lstream: var nlLStream): bool =
if lstream.stream.readLine(lstream.line):
inc lstream.lineNum
lstream.pos = Natural 0
return true
return false
proc currChar(lstream: nlLStream): char =
result = lstream.line[lstream.pos]
# NOTE: assumes lstream.line does NOT mutate while iterating
iterator iterChars(lstream: var nlLStream): Option[char] =
while lstream.pos < lstream.line.len:
inc lstream.pos
yield some(lstream.line[lstream.pos - 1])
yield none(char)

View file

@ -1,6 +0,0 @@
# This is just an example to get you started. Users of your hybrid library will
# import this file by writing ``import srcpkg/submodule``. Feel free to rename or
# remove this file altogether. You may create additional modules alongside
# this file as required.
proc getWelcomeMessage*(): string = "Hello, World!"

79
src/noether/tokens.nim Normal file
View file

@ -0,0 +1,79 @@
type
# nlTokType allows primitive nlToks to be typed,
# the nlTokType enum should never be directly
# accessed. Use the interface in this file instead.
# NOTE: NONE is used as a default value
# NOTE: it is very different to NTERM!
nlTokType = enum
NONE, # Placeholder Value
EOF, # EOF
TERM, # String \0 terminator
WORD, # Alphanumeric token
SYMB, # Symbolic token
LNFD, # Line-Feed
WTSP, # Whitespace
LPAR, # (
RPAR, # )
LBRA, # {
RBRA, # }
LSQB, # [
RSQB, # ]
LANB, # <
RANB, # >
nlTok = object
tokType*: nlTokType
lit*: string
line*: Natural
startPos*: Natural
endPos*: Natural
# Generates an "empty" nlTok with only a startPos,
# all other fields are expected to be filled out later.
# NOTE: tokType initialised to nlTokType.NUL
# NOTE: lit initialised to empty string
# NOTE: all other fields are uninitialised
proc emptyTok(startPos: int): nlTok =
result = nlTok(
tokType: nlTokType.NONE,
lit: "",
startPos: Natural startPos,
)
# Checks if an nlTok has nlTokType.NONE
proc isTokUntyped(tokType: nlTokType): bool =
result = (tokType == nlTokType.NONE)
# Checks if an nlTok has nlTokType.TERM
proc isTokTerm(tokType: nlTokType): bool =
result = (tokType == nlTokType.TERM)
# This method is only used to convert null
# terminator nlToks into line-feed ones.
# Returns a copy of an nlTok, changing its type
# NOTE: this is necessary because Nim handles
# NOTE: strings in a useful but annoying way
proc tokTermToLineFeed(tok: nlTok): nlTok =
result = nlTok(
tokType: nlTokType.LNFD,
lit: tok.lit,
line: tok.line,
startPos: tok.startPos,
endPos: tok.endPos,
)
# Classifies a character to its nlTokType
proc getTokType(c: char): nlTokType =
case c:
of '\0':
result = nlTokType.TERM
of '\n':
result = nlTokType.LNFD
of ' ':
result = nlTokType.WTSP
of '(':
result = nlTokType.LPAR
of ')':
result = nlTokType.RPAR
else:
result = nlTokType.WORD