YALR (Yet Another Lexer Refactor)
This commit is contained in:
parent
72a6075123
commit
99db57dcfd
10 changed files with 208 additions and 217 deletions
2
lang/demo/single_toks.no
Normal file
2
lang/demo/single_toks.no
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
[a]b(#)
|
||||
(c)d[e]
|
||||
|
|
@ -1,3 +1,11 @@
|
|||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: ddemo DEMOFILE"
|
||||
echo "Demo files are located in lang/demo"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
nim c nlx.nim
|
||||
./nlx ../lang/demo/$1
|
||||
|
|
|
|||
21
src/nlx.nim
21
src/nlx.nim
|
|
@ -1,22 +1,19 @@
|
|||
import os
|
||||
import noether/lexer/tok
|
||||
import noether/lexer/tokstream
|
||||
import noether/parser/parser
|
||||
import noether/lib/io
|
||||
import noether/lexer/[tok, tokstream]
|
||||
# import noether/parser/parser
|
||||
|
||||
{.hint: "Don't forget to drink more water (^_^)".}
|
||||
when isMainModule:
|
||||
echo "Noether Lang Extras v0.1.0 - nlx"
|
||||
|
||||
if paramCount() > 0:
|
||||
let filename = paramStr(1)
|
||||
var tokStream = newTokStream(filename, isFile=true)
|
||||
var inStream = if paramCount() > 0: streamFile(paramStr 1)
|
||||
else: streamString(readAll stdin)
|
||||
|
||||
var stream = newTokStream(inStream)
|
||||
# # DumpTok
|
||||
# while tokStream.progress():
|
||||
# echo tokStream.currTok
|
||||
while stream.progress():
|
||||
echo stream.tok
|
||||
|
||||
# DumpTree
|
||||
discard parse(tokStream)
|
||||
|
||||
else:
|
||||
echo "usage: nlx filename"
|
||||
# discard parse(tokStream)
|
||||
|
|
|
|||
|
|
@ -1,66 +0,0 @@
|
|||
import std/streams
|
||||
import std/options
|
||||
|
||||
import tok
|
||||
export tok
|
||||
|
||||
type
|
||||
# Character streaming for the nlTokStream
|
||||
nlLStream = object
|
||||
stream: Stream
|
||||
# row/column positions
|
||||
line*: string
|
||||
lineNum*: Natural
|
||||
pos*: Natural
|
||||
|
||||
proc streamFile*(filename: string): FileStream =
|
||||
result = newFileStream(filename, fmRead)
|
||||
|
||||
proc streamString*(str: string): StringStream =
|
||||
result = newStringStream(str)
|
||||
|
||||
proc newLStream*(content: string, isFile: bool = false): nlLStream =
|
||||
result = nlLStream(
|
||||
stream: if isFile: streamFile(content) else: streamString(content),
|
||||
line: "",
|
||||
lineNum: Natural 0,
|
||||
pos: Natural 0,
|
||||
)
|
||||
|
||||
# Checks whether we've reached EOL
|
||||
# NOTE: also checks if we've surpassed it (ie invalid lstream.pos)
|
||||
proc atEOL*(lstream: nlLStream): bool =
|
||||
result = (lstream.pos >= lstream.line.len - 1)
|
||||
|
||||
# Checks whether we are EXACTLY at EOL, but not surpassed
|
||||
proc exactlyEOL*(lstream: nlLStream): bool =
|
||||
result = (lstream.pos == lstream.line.len - 1)
|
||||
|
||||
# Checks whether we have surpassed EOL
|
||||
proc outOfBounds*(lstream: nlLStream): bool =
|
||||
result = (lstream.pos > lstream.line.len - 1)
|
||||
|
||||
# Progress the lex stream to the next line (if available)
|
||||
proc progressLine*(lstream: var nlLStream): bool =
|
||||
if lstream.stream.readLine(lstream.line):
|
||||
inc lstream.lineNum
|
||||
lstream.pos = Natural 0
|
||||
return true
|
||||
return false
|
||||
|
||||
# Progress the lex stream to the next character in the line
|
||||
# forcefully (aka does NOT check if we reached EOL)
|
||||
proc forceProgressChar*(lstream: var nlLStream) =
|
||||
inc lstream.pos
|
||||
|
||||
# # Progress the lex stream to the next character (if available)
|
||||
# proc progressChar*(lstream: var nlLStream): bool =
|
||||
# if not lstream.atEOL():
|
||||
# lstream.forceProgressChar()
|
||||
# result = true
|
||||
# else:
|
||||
# # attempt to progress next line past EOL
|
||||
# result = lstream.progressLine()
|
||||
|
||||
proc currChar*(lstream: nlLStream): char =
|
||||
result = lstream.line[lstream.pos]
|
||||
|
|
@ -1,22 +1,25 @@
|
|||
include toktype
|
||||
include tokkind
|
||||
|
||||
type
|
||||
nlTok* = object
|
||||
tKind*: nlTokKind
|
||||
lit*: string
|
||||
lineNum*: Natural
|
||||
startPos*: Natural
|
||||
endPos*: Natural
|
||||
nlTok* = tuple
|
||||
# NOTE: nlTokBuilder will mutate nlTok.kind
|
||||
kind: nlTokKind
|
||||
lit: string
|
||||
lineNum: int
|
||||
startPos: int
|
||||
endPos: int
|
||||
|
||||
# Generates an "empty" nlTok with only a startPos,
|
||||
# all other fields are expected to be filled out later.
|
||||
proc emptyTok*(startPos: int): nlTok =
|
||||
result = nlTok(
|
||||
tKind: tkNONE,
|
||||
proc emptyTok*(startPos: int): nlTok {.inline.} =
|
||||
result = (
|
||||
kind: tkNONE,
|
||||
lit: "",
|
||||
startPos: Natural startPos,
|
||||
lineNum: 0,
|
||||
startPos: startPos,
|
||||
endPos: startPos,
|
||||
)
|
||||
|
||||
# Checks if an nlTok has tkNONE
|
||||
proc isUntyped*(tKind: nlTokKind): bool =
|
||||
result = (tKind == tkNONE)
|
||||
proc isUntyped*(tok: nlTok): bool {.inline.} =
|
||||
result = (tok.kind == tkNONE)
|
||||
|
|
|
|||
123
src/noether/lexer/tokbuilder.nim
Normal file
123
src/noether/lexer/tokbuilder.nim
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
import
|
||||
streams,
|
||||
options
|
||||
|
||||
import tok
|
||||
export tok
|
||||
|
||||
type
|
||||
# Abstracts the "building process" (lexing)
|
||||
# of nlTok objects from a given Stream of characters.
|
||||
nlTokBuilder* = object
|
||||
stream: Stream
|
||||
tok: nlTok # the build token
|
||||
# track line number, line content, etc
|
||||
line: string
|
||||
lineNum: int
|
||||
pos: int
|
||||
# save char and pos and its token type
|
||||
char: char
|
||||
cTKind: nlTokKind
|
||||
|
||||
proc atEOL(builder: nlTokBuilder): bool {.inline.} =
|
||||
result = (builder.char == '\n')
|
||||
proc atEOF(builder: nlTokBuilder): bool {.inline.} =
|
||||
result = (builder.char == '\0')
|
||||
|
||||
# Initialise a new token builder
|
||||
proc newBuilder(stream: var Stream): nlTokBuilder =
|
||||
# NOTE: initial builder.char value is arbitrary,
|
||||
# NOTE: but CANNOT be initialised to the default '\0'
|
||||
result = nlTokBuilder(
|
||||
stream: stream,
|
||||
tok: emptyTok(0),
|
||||
line: "",
|
||||
lineNum: 1,
|
||||
pos: -1, # after initial readChar this -> 0
|
||||
char: '\0', # use \0 as initial invalid char
|
||||
)
|
||||
|
||||
|
||||
#[ ====================================================== ]
|
||||
| nlTokBuilder Internal Interface for Token Construction ]
|
||||
]#
|
||||
|
||||
# Reset the build token to be "empty"
|
||||
proc resetBuild(builder: var nlTokBuilder) =
|
||||
builder.tok = emptyTok(builder.pos)
|
||||
|
||||
# "Finishes" the build token by setting various properties
|
||||
proc finishBuild(builder: var nlTokBuilder) =
|
||||
builder.tok.lineNum = builder.lineNum
|
||||
builder.tok.endPos = builder.pos
|
||||
builder.tok.lit = builder.line[builder.tok.startPos ..< builder.line.high]
|
||||
|
||||
# Finish, return, and reset the build token
|
||||
proc flushBuild(builder: var nlTokBuilder): nlTok =
|
||||
echo "Flush @", builder.pos
|
||||
finishBuild(builder)
|
||||
result = builder.tok
|
||||
resetBuild(builder)
|
||||
|
||||
# Is the build token "compatible" with the current char?
|
||||
# NOTE: flushBuild() is called if incompatible
|
||||
proc isCompatibleBuild(builder: nlTokBuilder): bool =
|
||||
result = (builder.cTKind == builder.tok.kind)
|
||||
|
||||
# Inherit the build token's type from current char
|
||||
proc inherit(builder: var nlTokBuilder) =
|
||||
builder.tok.kind = builder.cTKind
|
||||
|
||||
# Add a character to the nlTokBuilder's build token.
|
||||
# Flushes and returns the build token if "fully built",
|
||||
# and a boolean indicating whether the nlTokBuilder can progress.
|
||||
proc appendBuild(builder: var nlTokBuilder, flushed: var Option[nlTok]): bool =
|
||||
# untyped build tokens inherit type immediately
|
||||
if builder.tok.isUntyped():
|
||||
builder.inherit()
|
||||
|
||||
# check if EOF reached
|
||||
# if builder.atEOL():
|
||||
# echo "EOL DETECT 1"
|
||||
# result = false # DO NOT PROGRESS
|
||||
# flushed = some(flushBuild(builder))
|
||||
# check character and build token compatability
|
||||
if not isCompatibleBuild(builder):
|
||||
# flush old build token, the new one inherits type
|
||||
flushed = some(flushBuild(builder))
|
||||
builder.inherit()
|
||||
result = true # can progress
|
||||
else:
|
||||
flushed = none(nlTok)
|
||||
result = true # can progress
|
||||
|
||||
#[ ========================================== ]
|
||||
| nlTokBuilder Char Stream Reading Interface ]
|
||||
]#
|
||||
|
||||
# Read the next char in the stream without
|
||||
# checking whether it is safe to do so
|
||||
proc forceReadChar(builder: var nlTokBuilder) {.inline.} =
|
||||
echo "read"
|
||||
inc builder.pos
|
||||
builder.char = builder.stream.readChar()
|
||||
builder.cTKind = getTokKind(builder.char)
|
||||
builder.line.add(builder.char)
|
||||
|
||||
# Read the next char in the stream
|
||||
# NOTE: readChar raises IOError on error, returns \0 on EOF
|
||||
proc readChar(builder: var nlTokBuilder): bool =
|
||||
if builder.atEOL():
|
||||
echo "EOL DETECT 2"
|
||||
inc builder.lineNum
|
||||
# sets builder.char to '\0' if EOF
|
||||
builder.forceReadChar()
|
||||
result = builder.atEOF()
|
||||
|
||||
# Read until EOL and return the current line
|
||||
# NOTE: Does NOT update the builder's state (unsafe)
|
||||
# NOTE: ONLY call if a lex/parse error needs displaying
|
||||
proc unsafeGetLine(builder: var nlTokBuilder): string =
|
||||
while not builder.atEOL() and builder.readChar():
|
||||
discard
|
||||
result = builder.line
|
||||
|
|
@ -1,86 +0,0 @@
|
|||
include lstream
|
||||
|
||||
type
|
||||
# Provides a stream-like interface for lexing nlToks
|
||||
# Internally reliant on the functionality of nlLStream
|
||||
nlTokStream* = object
|
||||
lstream: nlLStream
|
||||
build: nlTok # the build token
|
||||
currTok*: nlTok # the current token
|
||||
closed: bool # EOF + all tokens built
|
||||
|
||||
# Generates an EOL token for the nlTokStream's state
|
||||
proc EOLTok(tokStream: nlTokStream): nlTok =
|
||||
result = nlTok(
|
||||
tKind: tkEOL,
|
||||
lit: "\0",
|
||||
lineNum: Natural tokStream.lstream.lineNum,
|
||||
startPos: Natural tokStream.lstream.pos,
|
||||
endPos: Natural tokStream.lstream.pos,
|
||||
)
|
||||
|
||||
# Resets the build token to an "empty" nlTok
|
||||
proc resetBuild(tokStream: var nlTokStream) =
|
||||
tokStream.build = emptyTok(tokStream.lstream.pos)
|
||||
|
||||
# Completes a token generated by emptyTok()
|
||||
# based on the nlTokStream's nlLStream's
|
||||
# current line and character positions
|
||||
proc finishBuild(ts: var nlTokStream) =
|
||||
ts.build.lineNum = Natural ts.lstream.lineNum
|
||||
ts.build.endPos = Natural ts.lstream.pos
|
||||
ts.build.lit = ts.lstream.line[ts.build.startPos ..< ts.build.endPos]
|
||||
|
||||
# Returns the nlTokStream's build token and
|
||||
# empties the build token's contents.
|
||||
proc flushBuild(tokStream: var nlTokStream): nlTok =
|
||||
finishBuild(tokStream)
|
||||
result = tokStream.build
|
||||
resetBuild(tokStream)
|
||||
|
||||
# Returns whether the build token has a set type yet.
|
||||
# This indicates that the build token should inherit
|
||||
# the nlTokKind of the nlLStream's next character.
|
||||
proc isUntypedBuild(tokStream: nlTokStream): bool =
|
||||
result = tokStream.build.tKind.isUntyped()
|
||||
|
||||
# Check whether an nlTokKind is "compatible" with the build token.
|
||||
# NOTE: flushBuild() should be called when an incompatible token is discovered.
|
||||
proc isCompatibleBuild(tokStream: nlTokStream, tKind: nlTokKind): bool =
|
||||
result = (tKind == tokStream.build.tKind)
|
||||
|
||||
# Add a character to the nlTokStream's build token.
|
||||
# Flushes and returns the build token if "fully built",
|
||||
# and a boolean indicating whether the nlTokStream can progress.
|
||||
proc progressBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
|
||||
# the "pos > EOL" invalid state is used intentionally
|
||||
# to indicate all tokens have been built, and return EOL Token
|
||||
if tokStream.lstream.outOfBounds():
|
||||
buildTok = some(EOLTok(tokStream))
|
||||
return true # can progress once more
|
||||
|
||||
let tKind = getTokType(tokStream.lstream.currChar())
|
||||
# untyped build tokens must inherited immediately
|
||||
if isUntypedBuild(tokStream):
|
||||
tokStream.build.tKind = tKind
|
||||
|
||||
# check if EOL reached
|
||||
if tokStream.lstream.atEOL():
|
||||
# flush old build token, the new one can be left untyped
|
||||
let compatible = isCompatibleBuild(tokStream, tKind)
|
||||
result = false # DO NOT PROGRESS
|
||||
if compatible:
|
||||
# force the lstream into an invalid state by progressing beyond EOL
|
||||
# we can then detect this state on the next progressBuild and return
|
||||
# an EOL character (very unsafe implementation but it works well)
|
||||
tokStream.lstream.forceProgressChar()
|
||||
buildTok = some(flushBuild(tokStream))
|
||||
# check character and build token compatability
|
||||
elif not isCompatibleBuild(tokStream, tKind):
|
||||
# flush old build token, the new one inherits type
|
||||
buildTok = some(flushBuild(tokStream))
|
||||
tokStream.build.tKind = tKind
|
||||
result = true # can progress
|
||||
else:
|
||||
buildTok = none(nlTok)
|
||||
result = true # can progress
|
||||
|
|
@ -29,9 +29,11 @@ type
|
|||
tkHASH, # # Number Sign (Hashtag)
|
||||
|
||||
# Classifies a character to its nlTokKind
|
||||
proc getTokType*(c: char): nlTokKind =
|
||||
proc getTokKind*(c: char): nlTokKind =
|
||||
case c:
|
||||
of '\0', '\r', '\n':
|
||||
of '\0':
|
||||
result = tkEOF
|
||||
of '\r', '\n':
|
||||
result = tkEOL
|
||||
of ' ', '\t':
|
||||
result = tkWTSP
|
||||
|
|
@ -1,52 +1,53 @@
|
|||
include tokbuilding
|
||||
include tokbuilder
|
||||
|
||||
type
|
||||
# Provides a stream-like interface for lexing.
|
||||
# Implemented as a wrapper for nlTokBuilder.
|
||||
nlTokStream* = object
|
||||
builder: nlTokBuilder
|
||||
tok*: nlTok # the current token
|
||||
isClosed: bool # EOF + all tokens built
|
||||
|
||||
# Initialises a new nlTokStream on a string or file
|
||||
proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
|
||||
proc newTokStream*(stream: var Stream): nlTokStream =
|
||||
result = nlTokStream(
|
||||
lstream: newLStream(content, isFile=isFile),
|
||||
closed: false,
|
||||
builder: newBuilder(stream),
|
||||
tok: emptyTok(0),
|
||||
isClosed: false,
|
||||
)
|
||||
# 1. initialise an empty build token
|
||||
# 2. progress to the first line
|
||||
result.resetBuild()
|
||||
discard result.lstream.progressLine()
|
||||
|
||||
# Defines a short-hand notation for getting the current line
|
||||
proc line*(tokStream: nlTokStream): string =
|
||||
result = tokStream.lstream.line
|
||||
|
||||
# Reimplements nlLStream.progressChar for nlTokStream
|
||||
# to account for additional structure (ie the build token)
|
||||
# NOTE: progressChar progresses to lstream's next char
|
||||
proc progressChar(tokStream: var nlTokStream): bool =
|
||||
if not tokStream.lstream.atEOL():
|
||||
tokStream.lstream.forceProgressChar()
|
||||
result = true
|
||||
else:
|
||||
# attempt to progress to next line past EOL
|
||||
result = tokStream.lstream.progressLine()
|
||||
tokStream.resetBuild()
|
||||
# Expose a subset of the nlTokBuilder interface
|
||||
proc line*(stream: nlTokStream): string =
|
||||
result = stream.builder.line
|
||||
proc atEOL*(stream: nlTokStream): bool =
|
||||
result = stream.builder.atEOL()
|
||||
|
||||
# Generates and progress the next token in the nlTokStream.
|
||||
# via repeatedly calling progressBuild() and progressChar().
|
||||
# Returns a boolean indicating whether EOF has been reached.
|
||||
# NOTE: access the new token via `tokStream.tok`
|
||||
proc progress*(tokStream: var nlTokStream): bool =
|
||||
# NOTE: access the new token via `stream.tok`
|
||||
proc progress*(stream: var nlTokStream): bool =
|
||||
# Return prematurely if already closed
|
||||
if tokStream.closed:
|
||||
if stream.isClosed:
|
||||
return false
|
||||
while true:
|
||||
# echo "\nProgressing..."
|
||||
var flushedTok: Option[nlTok]
|
||||
let
|
||||
canProgress = tokStream.progressBuild(flushedTok)
|
||||
buildComplete = flushedTok.isSome
|
||||
atEOF = stream.builder.readChar()
|
||||
newTokBuilt = flushedTok.isSome
|
||||
discard stream.builder.appendBuild(flushedTok)
|
||||
echo flushedTok
|
||||
echo "atEOF: ", atEOF, "\nnewTokBuilt: ", newTokBuilt
|
||||
# canProgress & EOF reached => no more tokens to build :)
|
||||
# NOTE: reachedEOF and not canProgress => more tokens unwrapping
|
||||
if buildComplete:
|
||||
if newTokBuilt:
|
||||
# return the finished build token, and save it as the current token
|
||||
tokStream.currTok = flushedTok.get()
|
||||
if canProgress and not tokStream.progressChar():
|
||||
tokStream.closed = true
|
||||
return buildComplete
|
||||
elif buildComplete:
|
||||
stream.tok = flushedTok.get()
|
||||
# if canProgress and atEOF:
|
||||
if atEOF:
|
||||
if newTokBuilt:
|
||||
stream.isClosed = true
|
||||
return newTokBuilt
|
||||
elif newTokBuilt:
|
||||
return true
|
||||
|
|
|
|||
7
src/noether/lib/io.nim
Normal file
7
src/noether/lib/io.nim
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
import std/streams
|
||||
|
||||
proc streamFile*(filename: string): Stream {.inline.} =
|
||||
result = newFileStream(filename, fmRead)
|
||||
|
||||
proc streamString*(str: string): Stream {.inline.} =
|
||||
result = newStringStream(str)
|
||||
Loading…
Add table
Add a link
Reference in a new issue