Migrate nlTokBuilder + nlTokStream -> nlLexer
This commit is contained in:
parent
07a9bda9ba
commit
d7fb1f0c89
2 changed files with 90 additions and 54 deletions
12
src/nlx.nim
12
src/nlx.nim
|
|
@ -1,19 +1,19 @@
|
|||
import os
|
||||
import noether/lib/io
|
||||
import noether/lexer/[tok, tokstream]
|
||||
import noether/lexer/tokbuilder
|
||||
# import noether/parser/parser
|
||||
|
||||
{.hint: "Don't forget to drink more water (^_^)".}
|
||||
when isMainModule:
|
||||
echo "Noether Lang Extras v0.1.0 - nlx"
|
||||
|
||||
var inStream = if paramCount() > 0: streamFile(paramStr 1)
|
||||
else: streamString(readAll stdin)
|
||||
var stream = if paramCount() > 0: streamFile(paramStr 1)
|
||||
else: streamString(readAll stdin)
|
||||
|
||||
var stream = newTokStream(inStream)
|
||||
var lexer = newLexer(stream)
|
||||
# # DumpTok
|
||||
while stream.progress():
|
||||
echo stream.tok
|
||||
while lexer.progress():
|
||||
echo lexer.tok
|
||||
|
||||
# DumpTree
|
||||
# discard parse(tokStream)
|
||||
|
|
|
|||
|
|
@ -8,9 +8,11 @@ export tok
|
|||
type
|
||||
# Abstracts the "building process" (lexing)
|
||||
# of nlTok objects from a given Stream of characters.
|
||||
nlTokBuilder* = object
|
||||
nlLexer* = object
|
||||
stream: Stream
|
||||
tok: nlTok # the build token
|
||||
done*: bool
|
||||
tok*: nlTok # new finished token
|
||||
btok: nlTok # the build token
|
||||
# track line number, line content, etc
|
||||
line: string
|
||||
lineNum: int
|
||||
|
|
@ -19,18 +21,18 @@ type
|
|||
char: char
|
||||
cTKind: nlTokKind
|
||||
|
||||
proc atEOL(builder: nlTokBuilder): bool {.inline.} =
|
||||
result = (builder.char == '\n')
|
||||
proc atEOF(builder: nlTokBuilder): bool {.inline.} =
|
||||
result = (builder.char == '\0')
|
||||
proc atEOL(lexer: nlLexer): bool {.inline.} =
|
||||
result = (lexer.char == '\n')
|
||||
proc atEOF(lexer: nlLexer): bool {.inline.} =
|
||||
result = (lexer.char == '\0')
|
||||
|
||||
# Initialise a new token builder
|
||||
proc newBuilder(stream: var Stream): nlTokBuilder =
|
||||
# NOTE: initial builder.char value is arbitrary,
|
||||
# NOTE: but CANNOT be initialised to the default '\0'
|
||||
result = nlTokBuilder(
|
||||
# Initialise a new lexer
|
||||
proc newLexer*(stream: var Stream): nlLexer =
|
||||
result = nlLexer(
|
||||
stream: stream,
|
||||
done: false,
|
||||
tok: emptyTok(0),
|
||||
btok: emptyTok(0),
|
||||
line: "",
|
||||
lineNum: 1,
|
||||
pos: -1, # after initial readChar this -> 0
|
||||
|
|
@ -39,70 +41,104 @@ proc newBuilder(stream: var Stream): nlTokBuilder =
|
|||
|
||||
|
||||
#[ ====================================================== ]
|
||||
| nlTokBuilder Internal Interface for Token Construction ]
|
||||
| nlLexer Internal Interface for Token Construction ]
|
||||
]#
|
||||
|
||||
# Reset the build token to be "empty"
|
||||
proc resetBuild(builder: var nlTokBuilder) =
|
||||
builder.tok = emptyTok(builder.pos)
|
||||
proc resetBuild(lexer: var nlLexer) =
|
||||
lexer.btok = emptyTok(lexer.pos)
|
||||
|
||||
# "Finishes" the build token by setting various properties
|
||||
proc finishBuild(builder: var nlTokBuilder) =
|
||||
builder.tok.lineNum = builder.lineNum
|
||||
builder.tok.endPos = builder.pos
|
||||
builder.tok.lit = builder.line[builder.tok.startPos ..< builder.line.high]
|
||||
proc finishBuild(lexer: var nlLexer) =
|
||||
lexer.btok.lineNum = lexer.lineNum
|
||||
lexer.btok.endPos = lexer.pos
|
||||
lexer.btok.lit = lexer.line[lexer.btok.startPos ..< lexer.line.high]
|
||||
|
||||
# Finish, return, and reset the build token
|
||||
proc flushBuild(builder: var nlTokBuilder): nlTok =
|
||||
finishBuild(builder)
|
||||
result = builder.tok
|
||||
resetBuild(builder)
|
||||
proc flushBuild(lexer: var nlLexer): nlTok =
|
||||
finishBuild(lexer)
|
||||
result = lexer.btok
|
||||
resetBuild(lexer)
|
||||
|
||||
# Is the build token "compatible" with the current char? (if not then flushbuild)
|
||||
# NOTE: This implicitly handles Windows CRLF, Unix LF, & Mac OS CR compatability
|
||||
# NOTE: since atEOL => '\n', but '\r' and '\n' are both tkEOL so they both flush.
|
||||
proc isIncompatibleBuild(builder: nlTokBuilder): bool =
|
||||
result = (builder.cTKind != builder.tok.kind or builder.atEOL())
|
||||
proc isIncompatibleBuild(lexer: nlLexer): bool =
|
||||
result = (lexer.cTKind != lexer.btok.kind or lexer.atEOL())
|
||||
|
||||
# Inherit the build token's type from current char
|
||||
proc inherit(builder: var nlTokBuilder) =
|
||||
builder.tok.kind = builder.cTKind
|
||||
proc inherit(lexer: var nlLexer) =
|
||||
lexer.btok.kind = lexer.cTKind
|
||||
|
||||
# Add a character to the nlTokBuilder's build token.
|
||||
# Add a character to the nlLexer's build token.
|
||||
# Flushes and returns the build token if finished.
|
||||
proc appendBuild(builder: var nlTokBuilder): Option[nlTok] =
|
||||
proc appendBuild(lexer: var nlLexer): Option[nlTok] =
|
||||
# untyped build tokens inherit type immediately
|
||||
if builder.tok.isUntyped():
|
||||
builder.inherit()
|
||||
if lexer.btok.isUntyped():
|
||||
lexer.inherit()
|
||||
|
||||
# check character and build token compatability
|
||||
if isIncompatibleBuild(builder):
|
||||
if isIncompatibleBuild(lexer):
|
||||
# flush old build token, the new one inherits type
|
||||
result = some(flushBuild(builder))
|
||||
builder.inherit()
|
||||
result = some(flushBuild(lexer))
|
||||
lexer.inherit()
|
||||
else:
|
||||
result = none(nlTok)
|
||||
|
||||
#[ ========================================== ]
|
||||
| nlTokBuilder Char Stream Reading Interface ]
|
||||
#[ ========================================= ]
|
||||
| nlLexer Internal Char Streaming Interface ]
|
||||
]#
|
||||
|
||||
# Read the next char in the stream
|
||||
# NOTE: readChar raises IOError on error, returns \0 on EOF
|
||||
proc readChar*(builder: var nlTokBuilder): bool =
|
||||
if builder.atEOL():
|
||||
inc builder.lineNum
|
||||
# sets builder.char to '\0' if EOF
|
||||
builder.char = builder.stream.readChar()
|
||||
builder.cTKind = getTokKind(builder.char)
|
||||
builder.line.add(builder.char)
|
||||
inc builder.pos
|
||||
result = builder.atEOF()
|
||||
proc readChar(lexer: var nlLexer): bool =
|
||||
if lexer.atEOL():
|
||||
inc lexer.lineNum
|
||||
# sets lexer.char to '\0' if EOF
|
||||
lexer.char = lexer.stream.readChar()
|
||||
lexer.cTKind = getTokKind(lexer.char)
|
||||
lexer.line.add(lexer.char)
|
||||
inc lexer.pos
|
||||
result = lexer.atEOF()
|
||||
|
||||
#[ ========================
|
||||
| nlLexer Public Interface
|
||||
]#
|
||||
|
||||
# Read until EOL and return the current line
|
||||
# NOTE: Does NOT update the builder's state (unsafe)
|
||||
# NOTE: Does NOT update the lexer's state (unsafe)
|
||||
# NOTE: ONLY call if a lex/parse error needs displaying
|
||||
proc unsafeGetLine*(builder: var nlTokBuilder): string =
|
||||
while not builder.atEOL() and builder.readChar():
|
||||
proc unsafeGetLine*(lexer: var nlLexer): string =
|
||||
while not lexer.atEOL() and lexer.readChar():
|
||||
discard
|
||||
result = builder.line
|
||||
result = lexer.line
|
||||
|
||||
# Lexes and returns the next token in the "token stream"
|
||||
# via repeatedly calling readChar() and appendBuild().
|
||||
# Returns a boolean indicating whether EOF has been reached.
|
||||
# NOTE: access the new token via `stream.tok`
|
||||
proc progress*(lexer: var nlLexer): bool =
|
||||
# Return prematurely if already closed
|
||||
if lexer.done:
|
||||
return false
|
||||
while true:
|
||||
let
|
||||
atEOF = lexer.readChar()
|
||||
flushedTok = lexer.appendBuild()
|
||||
newTokBuilt = flushedTok.isSome
|
||||
|
||||
if newTokBuilt:
|
||||
lexer.tok = flushedTok.get()
|
||||
# if canProgress and atEOF:
|
||||
# if atEOF:
|
||||
# if newTokBuilt:
|
||||
# stream.isClosed = true
|
||||
# return newTokBuilt
|
||||
# elif newTokBuilt:
|
||||
# return true
|
||||
if newTokBuilt:
|
||||
if atEOF:
|
||||
lexer.done = true
|
||||
return true
|
||||
elif atEOF:
|
||||
return false
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue