Compare commits
12 commits
ebef458186
...
bab593a86b
| Author | SHA1 | Date | |
|---|---|---|---|
| bab593a86b | |||
| 1181ea9743 | |||
| f25e66e9ef | |||
| d7fb1f0c89 | |||
| 07a9bda9ba | |||
| 99db57dcfd | |||
| 72a6075123 | |||
| 8e6c0bbbfc | |||
| f8f90fe92d | |||
| 4a8f44d23f | |||
| 2af3000c2e | |||
| f8697bd662 |
20 changed files with 460 additions and 434 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -1,2 +1,6 @@
|
|||
__pycache__/
|
||||
bin/
|
||||
|
||||
# TEMP: used while debugging
|
||||
# (and cause I'm super duper lazy)
|
||||
src/nlx
|
||||
|
|
|
|||
2
lang/demo/single_toks.no
Normal file
2
lang/demo/single_toks.no
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
[a]b(#)
|
||||
(c)d[e]
|
||||
2
py/m.py
2
py/m.py
|
|
@ -2,7 +2,7 @@
|
|||
import sys
|
||||
import readline
|
||||
|
||||
from noether.math import *
|
||||
from noether.lib.math import *
|
||||
from noether.cli import *
|
||||
|
||||
|
||||
|
|
|
|||
11
src/ddemo
Executable file
11
src/ddemo
Executable file
|
|
@ -0,0 +1,11 @@
|
|||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: ddemo DEMOFILE"
|
||||
echo "Demo files are located in lang/demo"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
nim c nlx.nim
|
||||
./nlx ../lang/demo/$1
|
||||
35
src/nlx.nim
35
src/nlx.nim
|
|
@ -1,22 +1,29 @@
|
|||
import os
|
||||
import noether/lexer/tok
|
||||
import noether/lexer/tokstream
|
||||
import noether/parser/parser
|
||||
import noether/lib/io
|
||||
import noether/lexer/lex
|
||||
import noether/parser/parse
|
||||
|
||||
{.hint: "Don't forget to drink more water (^_^)".}
|
||||
when isMainModule:
|
||||
echo "Noether Lang Extras v0.1.0 - nlx"
|
||||
|
||||
if paramCount() > 0:
|
||||
let filename = paramStr(1)
|
||||
var tokStream = newTokStream(filename, isFile=true)
|
||||
|
||||
# # DumpTok
|
||||
# var tok: nlTok
|
||||
# while tokStream.nextTok(tok):
|
||||
# echo tok
|
||||
# really lazy argparse implementation (temporary)
|
||||
let
|
||||
paramC = paramCount()
|
||||
cmd = if paramC > 2: paramStr 1
|
||||
else: "tok"
|
||||
|
||||
var stream = if paramC > 0: streamFile(paramStr paramC)
|
||||
else: streamString(readAll stdin)
|
||||
|
||||
var lexer = newLexer(stream)
|
||||
if cmd == "tok":
|
||||
# DumpTok
|
||||
while lexer.progress():
|
||||
echo lexer.tok
|
||||
elif cmd == "tree":
|
||||
discard
|
||||
# DumpTree
|
||||
discard parse(tokStream)
|
||||
|
||||
# discard parse(tokStream)
|
||||
else:
|
||||
echo "usage: nlx filename"
|
||||
echo "Usage: nlx [tok|tree] <demo>\n demo files are accessible at lang/demo"
|
||||
|
|
|
|||
|
|
@ -2,4 +2,4 @@
|
|||
# uses this file as the main entry point of the application.
|
||||
|
||||
when isMainModule:
|
||||
echo "Noether Lang"
|
||||
echo "Noether Lang v0.1.0"
|
||||
|
|
|
|||
178
src/noether/lexer/lex.nim
Normal file
178
src/noether/lexer/lex.nim
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
import
|
||||
streams,
|
||||
options
|
||||
|
||||
import tok
|
||||
export tok
|
||||
|
||||
type
|
||||
# Abstracts the "building process" (lexing)
|
||||
# of nlTok objects from a given Stream of characters.
|
||||
nlLexer* = object
|
||||
stream: Stream
|
||||
done*: bool
|
||||
# store current token and upcoming (build) token
|
||||
tok*: nlTok # current token
|
||||
btok: nlTok # the build token
|
||||
# save char and pos and its token type
|
||||
char: char
|
||||
cTKind: nlTokKind
|
||||
# track line number, line content, etc
|
||||
line: string
|
||||
lineNum: int
|
||||
pos: int
|
||||
|
||||
proc atEOL(lexer: nlLexer): bool {.inline.} =
|
||||
result = (lexer.char == '\n')
|
||||
proc atEOF(lexer: nlLexer): bool {.inline.} =
|
||||
result = (lexer.char == '\0')
|
||||
|
||||
# Initialise a new lexer
|
||||
proc newLexer*(stream: var Stream): nlLexer =
|
||||
result = nlLexer(
|
||||
stream: stream,
|
||||
done: false,
|
||||
tok: emptyTok(0),
|
||||
btok: emptyTok(0),
|
||||
line: "",
|
||||
lineNum: 1,
|
||||
pos: -1, # after initial readChar this -> 0
|
||||
char: '\0', # use \0 as initial invalid char
|
||||
cTKind: tkNONE,
|
||||
)
|
||||
|
||||
# Classifies the current character to its nlTokKind
|
||||
proc classifyTok*(lexer: nlLexer): nlTokKind {.inline.} =
|
||||
case lexer.char:
|
||||
of '\0':
|
||||
result = tkEOF
|
||||
of '\r', '\n':
|
||||
result = tkEOL
|
||||
of ' ', '\t':
|
||||
result = tkWTSP
|
||||
of '(':
|
||||
result = tkLPAR
|
||||
of ')':
|
||||
result = tkRPAR
|
||||
of '{':
|
||||
result = tkLBRA
|
||||
of '}':
|
||||
result = tkRBRA
|
||||
of '[':
|
||||
result = tkLSQB
|
||||
of ']':
|
||||
result = tkRSQB
|
||||
of '\'':
|
||||
result = tkSQUO
|
||||
of '\"':
|
||||
result = tkDQUO
|
||||
of '`':
|
||||
result = tkGRVA
|
||||
of '#':
|
||||
result = tkHASH
|
||||
else:
|
||||
result = tkWORD
|
||||
|
||||
|
||||
#[ ====================================================== ]
|
||||
| nlLexer Internal Interface for Token Construction ]
|
||||
]#
|
||||
|
||||
# Reset the build token to be "empty"
|
||||
proc resetBuild(lexer: var nlLexer) =
|
||||
lexer.btok = emptyTok(lexer.pos)
|
||||
|
||||
# "Finishes" the build token by setting various properties
|
||||
proc finishBuild(lexer: var nlLexer) =
|
||||
lexer.btok.lineNum = lexer.lineNum
|
||||
lexer.btok.endPos = lexer.pos
|
||||
lexer.btok.lit = lexer.line[lexer.btok.startPos ..< lexer.line.high]
|
||||
|
||||
# Finish, return, and reset the build token
|
||||
proc flushBuild(lexer: var nlLexer): nlTok =
|
||||
finishBuild(lexer)
|
||||
result = lexer.btok
|
||||
resetBuild(lexer)
|
||||
|
||||
# Is the build token "compatible" with the current char? (if not then flushbuild)
|
||||
# NOTE: This implicitly handles Windows CRLF, Unix LF, & Mac OS CR compatability
|
||||
# NOTE: since atEOL => '\n', but '\r' and '\n' are both tkEOL so they both flush.
|
||||
proc isIncompatibleBuild(lexer: nlLexer): bool =
|
||||
result = (lexer.cTKind != lexer.btok.kind or lexer.atEOL())
|
||||
|
||||
# Inherit the build token's type from current char
|
||||
proc inherit(lexer: var nlLexer) =
|
||||
lexer.btok.kind = lexer.cTKind
|
||||
|
||||
# Add a character to the nlLexer's build token.
|
||||
# Flushes and returns the build token if finished.
|
||||
proc appendBuild(lexer: var nlLexer): Option[nlTok] =
|
||||
# untyped build tokens inherit type immediately
|
||||
if lexer.btok.isUntyped():
|
||||
lexer.inherit()
|
||||
|
||||
# check character and build token compatability
|
||||
if isIncompatibleBuild(lexer):
|
||||
# flush old build token, the new one inherits type
|
||||
result = some(flushBuild(lexer))
|
||||
lexer.inherit()
|
||||
else:
|
||||
result = none(nlTok)
|
||||
|
||||
#[ ========================================= ]
|
||||
| nlLexer Internal Char Streaming Interface ]
|
||||
]#
|
||||
|
||||
# Read the next char in the stream
|
||||
# NOTE: readChar raises IOError on error, returns \0 on EOF
|
||||
proc readChar(lexer: var nlLexer): bool =
|
||||
if lexer.atEOL():
|
||||
inc lexer.lineNum
|
||||
# sets lexer.char to '\0' if EOF
|
||||
lexer.char = lexer.stream.readChar()
|
||||
lexer.cTKind = lexer.classifyTok()
|
||||
lexer.line.add(lexer.char)
|
||||
inc lexer.pos
|
||||
result = lexer.atEOF()
|
||||
|
||||
#[ ========================
|
||||
| nlLexer Public Interface
|
||||
]#
|
||||
|
||||
# Read until EOL and return the current line
|
||||
# NOTE: Does NOT update the lexer's state (unsafe)
|
||||
# NOTE: ONLY call if a lex/parse error needs displaying
|
||||
proc unsafeGetLine*(lexer: var nlLexer): string =
|
||||
while not lexer.atEOL() and lexer.readChar():
|
||||
discard
|
||||
result = lexer.line
|
||||
|
||||
# Lexes and returns the next token in the "token stream"
|
||||
# via repeatedly calling readChar() and appendBuild().
|
||||
# Returns a boolean indicating whether EOF has been reached.
|
||||
# NOTE: access the new token via `stream.tok`
|
||||
proc progress*(lexer: var nlLexer): bool =
|
||||
# Return prematurely if already closed
|
||||
if lexer.done:
|
||||
return false
|
||||
while true:
|
||||
let
|
||||
atEOF = lexer.readChar()
|
||||
flushedTok = lexer.appendBuild()
|
||||
newTokBuilt = flushedTok.isSome
|
||||
|
||||
if newTokBuilt:
|
||||
lexer.tok = flushedTok.get()
|
||||
# if canProgress and atEOF:
|
||||
# if atEOF:
|
||||
# if newTokBuilt:
|
||||
# stream.isClosed = true
|
||||
# return newTokBuilt
|
||||
# elif newTokBuilt:
|
||||
# return true
|
||||
if newTokBuilt:
|
||||
if atEOF:
|
||||
lexer.done = true
|
||||
return true
|
||||
elif atEOF:
|
||||
return false
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
import std/streams
|
||||
import std/options
|
||||
|
||||
import tok
|
||||
export tok
|
||||
|
||||
type
|
||||
# Character streaming for the nlTokStream
|
||||
nlLStream = object
|
||||
stream: Stream
|
||||
# row/column positions
|
||||
line*: string
|
||||
lineNum*: Natural
|
||||
pos*: Natural
|
||||
|
||||
proc streamFile*(filename: string): FileStream =
|
||||
result = newFileStream(filename, fmRead)
|
||||
|
||||
proc streamString*(str: string): StringStream =
|
||||
result = newStringStream(str)
|
||||
|
||||
proc newLStream*(content: string, isFile: bool = false): nlLStream =
|
||||
result = nlLStream(
|
||||
stream: if isFile: streamFile(content) else: streamString(content),
|
||||
line: "",
|
||||
lineNum: Natural 0,
|
||||
pos: Natural 0,
|
||||
)
|
||||
|
||||
# Checks whether we've reached EOL
|
||||
# NOTE: also checks if we've surpassed it (ie invalid lstream.pos)
|
||||
proc atEOL*(lstream: nlLStream): bool =
|
||||
result = (lstream.pos >= lstream.line.len - 1)
|
||||
|
||||
# Checks whether we are EXACTLY at EOL, but not surpassed
|
||||
proc exactlyEOL*(lstream: nlLStream): bool =
|
||||
result = (lstream.pos == lstream.line.len - 1)
|
||||
|
||||
# Checks whether we have surpassed EOL
|
||||
proc outOfBounds*(lstream: nlLStream): bool =
|
||||
result = (lstream.pos > lstream.line.len - 1)
|
||||
|
||||
# Progress the lex stream to the next line (if available)
|
||||
proc progLine*(lstream: var nlLStream): bool =
|
||||
if lstream.stream.readLine(lstream.line):
|
||||
inc lstream.lineNum
|
||||
lstream.pos = Natural 0
|
||||
return true
|
||||
return false
|
||||
|
||||
# Progress the lex stream to the next character in the line
|
||||
# forcefully (aka does NOT check if we reached EOL)
|
||||
proc forceProgChar*(lstream: var nlLStream) =
|
||||
inc lstream.pos
|
||||
|
||||
# Progress the lex stream to the next character (if available)
|
||||
proc progress*(lstream: var nlLStream): bool =
|
||||
if not lstream.atEOL():
|
||||
lstream.forceProgChar()
|
||||
result = true
|
||||
else:
|
||||
# attempt to progress next line past EOL
|
||||
result = lstream.progLine()
|
||||
|
||||
proc currChar*(lstream: nlLStream): char =
|
||||
result = lstream.line[lstream.pos]
|
||||
|
|
@ -1,40 +1,53 @@
|
|||
include toktype
|
||||
type
|
||||
# nlTokKind allows primitive nlToks to be typed,
|
||||
# the nlTokKind enum should never be directly
|
||||
# accessed. Use the interface in this file instead.
|
||||
nlTokKind* = enum
|
||||
tkNONE, # Placeholder Value
|
||||
|
||||
tkEOF, # End of File
|
||||
tkEOL, # End of Line (\0 --> EOL)
|
||||
|
||||
tkWORD, # Alphanumeric token
|
||||
tkSYMB, # Symbolic token
|
||||
|
||||
tkLNFD, # \r \n Line-Feed
|
||||
tkWTSP, # ' ' \t Whitespace
|
||||
|
||||
# RESERVED SYMBOLS
|
||||
tkLPAR, # ( Left Parenthesis
|
||||
tkRPAR, # ) Right Parenthesis
|
||||
tkLBRA, # { Left Brace
|
||||
tkRBRA, # } Right Brace
|
||||
tkLSQB, # [ Left Square Bracket
|
||||
tkRSQB, # ] Right Square Bracket
|
||||
# tkLANB, # < Left Angle Bracket
|
||||
# tkRANB, # > Right Angle Bracket
|
||||
tkSQUO, # ' Single Quotation Marking
|
||||
tkDQUO, # " Double Quotation Marking
|
||||
tkGRVA, # ` Grave Accent
|
||||
tkHASH, # # Number Sign (Hashtag)
|
||||
|
||||
type
|
||||
nlTok* = object
|
||||
tType*: nlTokType
|
||||
lit*: string
|
||||
lineNum*: Natural
|
||||
startPos*: Natural
|
||||
endPos*: Natural
|
||||
nlTok* = tuple
|
||||
# NOTE: nlTokBuilder will mutate nlTok.kind
|
||||
kind: nlTokKind
|
||||
lit: string
|
||||
lineNum: int
|
||||
startPos: int
|
||||
endPos: int
|
||||
|
||||
# Generates an "empty" nlTok with only a startPos,
|
||||
# all other fields are expected to be filled out later.
|
||||
proc emptyTok*(startPos: int): nlTok =
|
||||
result = nlTok(
|
||||
tType: nlTokType.NONE,
|
||||
proc emptyTok*(startPos: int): nlTok {.inline.} =
|
||||
result = (
|
||||
kind: tkNONE,
|
||||
lit: "",
|
||||
startPos: Natural startPos,
|
||||
lineNum: 0,
|
||||
startPos: startPos,
|
||||
endPos: startPos,
|
||||
)
|
||||
|
||||
# Checks if an nlTok has nlTokType.NONE
|
||||
proc isTokUntyped*(tType: nlTokType): bool =
|
||||
result = (tType == nlTokType.NONE)
|
||||
|
||||
# Checks if an nlTok has nlTokType.EOL
|
||||
proc isTokEOL*(tok: nlTok): bool =
|
||||
result = (tok.tType == nlTokType.EOL)
|
||||
|
||||
|
||||
|
||||
# This method is only used to convert null
|
||||
# terminator nlToks into line-feed ones.
|
||||
# Returns a copy of an nlTok, changing its type
|
||||
proc tokTermToLineFeed*(tok: nlTok): nlTok =
|
||||
result = nlTok(
|
||||
tType: nlTokType.LNFD,
|
||||
lit: tok.lit,
|
||||
lineNum: tok.lineNum,
|
||||
startPos: tok.startPos,
|
||||
endPos: tok.endPos,
|
||||
)
|
||||
# Checks if an nlTok has tkNONE
|
||||
proc isUntyped*(tok: nlTok): bool {.inline.} =
|
||||
result = (tok.kind == tkNONE)
|
||||
|
|
|
|||
|
|
@ -1,86 +0,0 @@
|
|||
include lstream
|
||||
|
||||
type
|
||||
# Provides a stream-like interface for lexing nlToks
|
||||
# Internally reliant on the functionality of nlLStream
|
||||
nlTokStream* = object
|
||||
lstream: nlLStream
|
||||
build: nlTok # the build token
|
||||
currTok*: nlTok # the current token
|
||||
closed: bool # EOF + all tokens built
|
||||
|
||||
# Generates an EOL token for the nlTokStream's state
|
||||
proc EOLTok(tokStream: nlTokStream): nlTok =
|
||||
result = nlTok(
|
||||
tType: nlTokType.EOL,
|
||||
lit: "\0",
|
||||
lineNum: Natural tokStream.lstream.lineNum,
|
||||
startPos: Natural tokStream.lstream.pos,
|
||||
endPos: Natural tokStream.lstream.pos,
|
||||
)
|
||||
|
||||
# Resets the build token to an "empty" nlTok
|
||||
proc resetBuild(tokStream: var nlTokStream) =
|
||||
tokStream.build = emptyTok(tokStream.lstream.pos)
|
||||
|
||||
# Completes a token generated by emptyTok()
|
||||
# based on the nlTokStream's nlLStream's
|
||||
# current line and character positions
|
||||
proc finishBuild(ts: var nlTokStream) =
|
||||
ts.build.lineNum = Natural ts.lstream.lineNum
|
||||
ts.build.endPos = Natural ts.lstream.pos
|
||||
ts.build.lit = ts.lstream.line[ts.build.startPos ..< ts.build.endPos]
|
||||
|
||||
# Returns the nlTokStream's build token and
|
||||
# empties the build token's contents.
|
||||
proc flushBuild(tokStream: var nlTokStream): nlTok =
|
||||
finishBuild(tokStream)
|
||||
result = tokStream.build
|
||||
resetBuild(tokStream)
|
||||
|
||||
# Returns whether the build token has a set type yet.
|
||||
# This indicates that the build token should inherit
|
||||
# the nlTokType of the nlLStream's next character.
|
||||
proc isUntypedBuild(tokStream: nlTokStream): bool =
|
||||
result = isTokUntyped(tokStream.build.tType)
|
||||
|
||||
# Check whether an nlTokType is "compatible" with the build token.
|
||||
# NOTE: flushBuild() should be called when an incompatible token is discovered.
|
||||
proc isCompatibleBuild(tokStream: nlTokStream, tType: nlTokType): bool =
|
||||
result = (tType == tokStream.build.tType)
|
||||
|
||||
# Add a character to the nlTokStream's build token.
|
||||
# Flushes and returns the build token if "fully built",
|
||||
# and a boolean indicating whether the nlTokStream can progress.
|
||||
proc progBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
|
||||
# the "pos > EOL" invalid state is used intentionally
|
||||
# to indicate all tokens have been built, and return EOL Token
|
||||
if tokStream.lstream.outOfBounds():
|
||||
buildTok = some(EOLTok(tokStream))
|
||||
return true # can progress once more
|
||||
|
||||
let tType = getTokType(tokStream.lstream.currChar())
|
||||
# untyped build tokens must inherited immediately
|
||||
if isUntypedBuild(tokStream):
|
||||
tokStream.build.tType = tType
|
||||
|
||||
# check if EOL reached
|
||||
if tokStream.lstream.atEOL():
|
||||
# flush old build token, the new one can be left untyped
|
||||
let compatible = isCompatibleBuild(tokStream, tType)
|
||||
result = false # DO NOT PROGRESS
|
||||
if compatible:
|
||||
# force the lstream into an invalid state by progressing beyond EOL
|
||||
# we can then detect this state on the next progBuild and return
|
||||
# an EOL character (very unsafe implementation but it works well)
|
||||
tokStream.lstream.forceProgChar()
|
||||
buildTok = some(flushBuild(tokStream))
|
||||
# check character and build token compatability
|
||||
elif not isCompatibleBuild(tokStream, tType):
|
||||
# flush old build token, the new one inherits type
|
||||
buildTok = some(flushBuild(tokStream))
|
||||
tokStream.build.tType = tType
|
||||
result = true # can progress
|
||||
else:
|
||||
buildTok = none(nlTok)
|
||||
result = true # can progress
|
||||
1
src/noether/lexer/tokkind.nim
Normal file
1
src/noether/lexer/tokkind.nim
Normal file
|
|
@ -0,0 +1 @@
|
|||
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
include tokbuilding
|
||||
|
||||
# Initialises a new nlTokStream on a string or file
|
||||
proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
|
||||
result = nlTokStream(
|
||||
lstream: newLStream(content, isFile=isFile),
|
||||
closed: false,
|
||||
)
|
||||
# 1. initialise an empty build token
|
||||
# 2. progress to the first line
|
||||
result.resetBuild()
|
||||
discard result.lstream.progLine()
|
||||
|
||||
# Defines a short-hand notation for getting the current line
|
||||
proc currLine*(tokStream: nlTokStream): string =
|
||||
result = tokStream.lstream.line
|
||||
|
||||
# Reimplements nlLStream.progress() for nlTokStream
|
||||
# to account for additional structure (ie the build token)
|
||||
proc progChar(tokStream: var nlTokStream): bool =
|
||||
if not tokStream.lstream.atEOL():
|
||||
tokStream.lstream.forceProgChar()
|
||||
result = true
|
||||
else:
|
||||
# attempt to progress to next line past EOL
|
||||
result = tokStream.lstream.progLine()
|
||||
tokStream.resetBuild()
|
||||
|
||||
# Generates and sets (by reference) the next token in the stream,
|
||||
# via repeatedly calling progBuild() and progChar().
|
||||
# Returns a boolean indicating whether EOF has been reached.
|
||||
# NOTE: progBuild adds lstream's current char to the build token
|
||||
# NOTE: progChar progresses to lstream's next char
|
||||
proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
|
||||
# Return prematurely if already closed
|
||||
if tokStream.closed:
|
||||
return false
|
||||
while true:
|
||||
var flushedTok: Option[nlTok]
|
||||
let
|
||||
canProgress = tokStream.progBuild(flushedTok)
|
||||
buildComplete = flushedTok.isSome
|
||||
# canProgress & EOF reached => no more tokens to build :)
|
||||
# NOTE: reachedEOF and not canProgress => more tokens unwrapping
|
||||
if buildComplete:
|
||||
# return the finished build token, and save it as the current token
|
||||
tok = flushedTok.get()
|
||||
tokStream.currTok = tok
|
||||
if canProgress and not tokStream.progChar():
|
||||
tokStream.closed = true
|
||||
return buildComplete
|
||||
elif buildComplete:
|
||||
return true
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
type
|
||||
# nlTokType allows primitive nlToks to be typed,
|
||||
# the nlTokType enum should never be directly
|
||||
# accessed. Use the interface in this file instead.
|
||||
nlTokType* = enum
|
||||
NONE, # Placeholder Value
|
||||
EOF, # End of File
|
||||
EOL, # End of Line (\0 --> EOL)
|
||||
WORD, # Alphanumeric token
|
||||
SYMB, # Symbolic token
|
||||
LNFD, # \r \n Line-Feed
|
||||
WTSP, # ' ' \t Whitespace
|
||||
LPAR, # ( Left Parenthesis
|
||||
RPAR, # ) Right Parenthesis
|
||||
LBRA, # { Left Brace
|
||||
RBRA, # } Right Brace
|
||||
LSQB, # [ Left Square Bracket
|
||||
RSQB, # ] Right Square Bracket
|
||||
# LANB, # < Left Angle Bracket
|
||||
# RANB, # > Right Angle Bracket
|
||||
SQUO, # ' Single Quotation Marking
|
||||
DQUO, # " Double Quotation Marking
|
||||
GRVA, # ` Grave Accent
|
||||
HASH, # # Number Sign (Hashtag)
|
||||
|
||||
# Classifies a character to its nlTokType
|
||||
proc getTokType*(c: char): nlTokType =
|
||||
case c:
|
||||
of '\0', '\r', '\n':
|
||||
result = nlTokType.EOL
|
||||
of ' ', '\t':
|
||||
result = nlTokType.WTSP
|
||||
of '(':
|
||||
result = nlTokType.LPAR
|
||||
of ')':
|
||||
result = nlTokType.RPAR
|
||||
of '{':
|
||||
result = nlTokType.LBRA
|
||||
of '}':
|
||||
result = nlTokType.RBRA
|
||||
of '[':
|
||||
result = nlTokType.LSQB
|
||||
of ']':
|
||||
result = nlTokType.RSQB
|
||||
of '\'':
|
||||
result = nlTokType.SQUO
|
||||
of '\"':
|
||||
result = nlTokType.DQUO
|
||||
of '`':
|
||||
result = nlTokType.GRVA
|
||||
of '#':
|
||||
result = nlTokType.HASH
|
||||
else:
|
||||
result = nlTokType.WORD
|
||||
1
src/noether/lib/err.nim
Normal file
1
src/noether/lib/err.nim
Normal file
|
|
@ -0,0 +1 @@
|
|||
proc echoErrorHeader(): =
|
||||
7
src/noether/lib/io.nim
Normal file
7
src/noether/lib/io.nim
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
import std/streams
|
||||
|
||||
proc streamFile*(filename: string): Stream {.inline.} =
|
||||
result = newFileStream(filename, fmRead)
|
||||
|
||||
proc streamString*(str: string): Stream {.inline.} =
|
||||
result = newStringStream(str)
|
||||
8
src/noether/parser/err.nim
Normal file
8
src/noether/parser/err.nim
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
#[ Error codes and messaging directly associated with
|
||||
| nlParser and its procedures is written here.
|
||||
| General error functionality is in src/noether/lib/err.nim
|
||||
]#
|
||||
|
||||
import parser
|
||||
|
||||
|
||||
|
|
@ -1,18 +1,44 @@
|
|||
import std/options
|
||||
from ../lexer/tok import nlTok
|
||||
# from ../lexer/tokstream import
|
||||
|
||||
type
|
||||
# NOTE: by the end of parsing NO nodes should
|
||||
# NOTE: have nlNodeType.NONE
|
||||
nlNodeType* = enum
|
||||
NONE, # Placeholder Value
|
||||
TERM, # Indicates the tree has terminated
|
||||
STRL, # String Literal
|
||||
CHRL, # Character Literal
|
||||
# NOTE: by the end of parsing NO nodes should have nkNone
|
||||
nlNodeKind* = enum
|
||||
nkNone, # Placeholder Value
|
||||
|
||||
nkStrLit, # String Literal
|
||||
nkChrLit, # Character Literal
|
||||
|
||||
# NOTE: always check parent != nil when traversing the tree
|
||||
nlNode* {.acyclic.} = ref object of RootObj
|
||||
nType*: nlNodeType
|
||||
toks*: seq[nlTok] # nodes store the tokens that build them
|
||||
# left, right: nlNode
|
||||
nKind*: nlNodeKind
|
||||
toks*: seq[nlTok] # nodes (may) store the tokens that build them
|
||||
parent*: nlNode
|
||||
|
||||
# Purely abstract type that all nlNode objects
|
||||
# with children are expected to inherit from.
|
||||
nlBranchNode* {.acyclic.} = ref object of nlNode
|
||||
child: UncheckedArray[nlNode]
|
||||
|
||||
nlBiNode* {.acyclic.} = ref object of nlBranchNode
|
||||
|
||||
proc childCount*(node: nlNode): int {.inline.} = 0
|
||||
proc childCount*(node: nlBiNode): int {.inline.} = 2
|
||||
|
||||
proc getChild*(node: nlNode, i: int): Option[nlNode] {.inline.} =
|
||||
result = none(nlNode)
|
||||
proc getChild*(node: nlBranchNode, i: int): Option[nlNode] {.inline.} =
|
||||
result = some(node.child[i])
|
||||
|
||||
proc newNode*(nKind: nlNodeKind): nlNode =
|
||||
result = nlNode(
|
||||
nKind: nKind,
|
||||
)
|
||||
|
||||
proc newBiNode*(nKind: nlNodeKind): nlNode =
|
||||
result = nlBiNode(
|
||||
nKind: nKind,
|
||||
)
|
||||
|
||||
# Short-hand way of appending a token to a node's token sequence
|
||||
proc addTok*(node: nlNode, tok: nlTok) =
|
||||
|
|
|
|||
58
src/noether/parser/parse.nim
Normal file
58
src/noether/parser/parse.nim
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import strutils
|
||||
include parser
|
||||
|
||||
# NOTE: Matching between two tokens will fill `node` with everything
|
||||
# NOTE: between those two tokens EXCLUDING the two tokens themselves.
|
||||
proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
|
||||
result = greed(
|
||||
parser,
|
||||
satisfyMatch(matchType),
|
||||
)
|
||||
proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
|
||||
result = greedLine(
|
||||
parser,
|
||||
satisfyMatch(matchType),
|
||||
)
|
||||
|
||||
proc parseStrLit(parser: var nlParser): nlParseStat =
|
||||
result = parser.parseMatchLine(tkDQUO)
|
||||
|
||||
proc parseChrLit(parser: var nlParser): nlParseStat =
|
||||
result = parser.parseMatchLine(tkSQUO)
|
||||
|
||||
proc parseStmt(parser: var nlParser): nlParseStat =
|
||||
while parser.progressStream():
|
||||
echo "----- Current Token: ", parser.currTok
|
||||
case parser.currTok.tKind
|
||||
of tkDQUO:
|
||||
# Attempt to parse string literal
|
||||
if parser.parseStrLit() != nlParseStat.OK:
|
||||
echo "Unmatched Double Quotation! Malformed String Literal"
|
||||
echo parser.line
|
||||
echo repeat(" ", parser.currTok.startPos), '^', '\n'
|
||||
else:
|
||||
echo "Parsed String Literal"
|
||||
echo parser.bnode[], '\n'
|
||||
of tkSQUO:
|
||||
# Attempt to parse string literal
|
||||
if parser.parseChrLit() != nlParseStat.OK:
|
||||
echo "Unmatched Single Quotation! Malformed Character Literal"
|
||||
echo parser.line
|
||||
echo repeat(" ", parser.currTok.startPos), '^', '\n'
|
||||
else:
|
||||
echo "Parsed Character Literal"
|
||||
echo parser.bnode[], '\n'
|
||||
of tkEOL:
|
||||
# TODO: handle this case, don't just discard
|
||||
discard
|
||||
else:
|
||||
echo "blah blah unhandled case\n"
|
||||
result = nlParseStat.OK
|
||||
|
||||
# Attempt to parse nlAST from nlTokStream
|
||||
proc parse*(tokStream: var nlTokStream): nlAST =
|
||||
var parser = newParser(tokStream)
|
||||
echo ' '
|
||||
discard parser.parseStmt()
|
||||
|
||||
result = parser.ast
|
||||
|
|
@ -1,63 +1,90 @@
|
|||
import strutils
|
||||
include parseutil
|
||||
import nodes
|
||||
import ../lexer/lex
|
||||
|
||||
# NOTE: Matching between two tokens will fill `node` with everything
|
||||
# NOTE: between those two tokens EXCLUDING the two tokens themselves.
|
||||
proc parseMatch(tokStream: var nlTokStream,
|
||||
node: var nlNode,
|
||||
matchType: nlTokType): nlParseStat =
|
||||
result = greed(
|
||||
tokStream,
|
||||
node.toks,
|
||||
satisfyMatch(matchType),
|
||||
)
|
||||
proc parseMatchLine(tokStream: var nlTokStream,
|
||||
node: var nlNode,
|
||||
matchType: nlTokType): nlParseStat =
|
||||
result = greed(
|
||||
tokStream,
|
||||
node.toks,
|
||||
satisfyMatchEOL(matchType),
|
||||
type
|
||||
# NOTE1: Values above MARKER_FAIL indicate a failed state
|
||||
# NOTE2: nlParseStat is marked pure out of habit that's all
|
||||
nlParseStat* {.pure.} = enum
|
||||
OK,
|
||||
MARKER_FAIL,
|
||||
UNMATCHED,
|
||||
TOOBIG,
|
||||
|
||||
nlAST* = object
|
||||
root: nlNode
|
||||
|
||||
nlParser* = object
|
||||
stream: nlTokStream
|
||||
ast: nlAST
|
||||
# the "build node" is a reference to the AST node
|
||||
# the parser is currently modifying/building from
|
||||
# NOTE: bnode changes frequently, it is NOT the root
|
||||
bnode: nlNode
|
||||
# flag indicating whether the parser is at
|
||||
# the start of a new line (aka checking indentation)
|
||||
inIndent: bool
|
||||
|
||||
|
||||
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
||||
result = if b: stat else: nlParseStat.OK
|
||||
|
||||
proc isFail*(stat: nlParseStat): bool =
|
||||
result = (stat >= nlParseStat.MARKER_FAIL)
|
||||
|
||||
proc newParser*(tokStream: var nlTokStream): nlParser =
|
||||
let rootNode = newNode(nkNone)
|
||||
result = nlParser(
|
||||
stream: tokStream,
|
||||
ast: nlAST(
|
||||
root: rootNode
|
||||
),
|
||||
bnode: rootNode,
|
||||
)
|
||||
|
||||
proc parseStrL(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
|
||||
node = nlNode(
|
||||
nType: nlNodeType.STRL
|
||||
)
|
||||
node.addTok(tokStream.currTok)
|
||||
result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
|
||||
# Exposes a subset of the nlTokStream interface
|
||||
proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
|
||||
proc line(parser: var nlParser): string = parser.stream.line
|
||||
|
||||
proc parseChrL(tokStream: var nlTokStream, node: var nlNode): bool =
|
||||
node = nlNode(
|
||||
nType: nlNodeType.CHRL
|
||||
)
|
||||
node.addTok(tokStream.currTok)
|
||||
# TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
|
||||
result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
|
||||
# Extends upon the functionality of nlTokStream.progress()
|
||||
proc progressStream*(parser: var nlParser): bool =
|
||||
result = parser.stream.progress()
|
||||
if result and parser.currTok.tKind == tkEOL:
|
||||
parser.inIndent = true
|
||||
if
|
||||
|
||||
# Attempt to form an nlAST from a nlTokStream
|
||||
proc parse*(tokStream: var nlTokStream): nlNode =
|
||||
var tok: nlTok
|
||||
var node: nlNode
|
||||
while tokStream.nextTok(tok):
|
||||
case tok.tType:
|
||||
of nlTokType.DQUO:
|
||||
# Attempt to parse string literal
|
||||
if not parseStrL(tokStream, node):
|
||||
echo "Unmatched Double Quotation! Malformed String Literal"
|
||||
echo tokStream.currLine()
|
||||
echo repeat(" ", tok.startPos), '^'
|
||||
else:
|
||||
echo "Parsed String Literal"
|
||||
echo node[]
|
||||
of nlTokType.SQUO:
|
||||
# Attempt to parse string literal
|
||||
if not parseChrL(tokStream, node):
|
||||
echo "Unmatched Single Quotation! Malformed Character Literal"
|
||||
echo tokStream.currLine()
|
||||
echo repeat(" ", tok.startPos), '^'
|
||||
else:
|
||||
echo "Parsed String Literal"
|
||||
echo node[]
|
||||
else:
|
||||
echo "blah blah unhandled case"
|
||||
proc setNewLine()
|
||||
|
||||
#[ "Greed" refers to something I mentioned in my discussion on
|
||||
| Noether's grammar (in an EBNF-like language). Greed just
|
||||
| means "everything until a condition is satisified".
|
||||
| That condition should be supplied by a Nim procedural type.
|
||||
]#
|
||||
|
||||
# Greed will consume anything until a condition is satisfied
|
||||
# Returns false if the greed was never satisfied (OMG!!)
|
||||
proc greed(parser: var nlParser,
|
||||
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
||||
while parser.progressStream():
|
||||
if satisfy(parser.currTok):
|
||||
return nlParseStat.OK
|
||||
# NOTE: the matched token is currently excluded
|
||||
parser.bnode.addTok(parser.currTok)
|
||||
result = nlParseStat.UNMATCHED
|
||||
|
||||
proc greedLine(parser: var nlParser,
|
||||
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
||||
while parser.progressStream():
|
||||
if satisfy(parser.currTok):
|
||||
return nlParseStat.OK
|
||||
# NOTE: the matched token is currently excluded
|
||||
parser.bnode.addTok(parser.currTok)
|
||||
if parser.currTok.tKind == tkEOL:
|
||||
return nlParseStat.UNMATCHED
|
||||
result = nlParseStat.UNMATCHED
|
||||
|
||||
#[ Templates for generating greed satisfying conditions.
|
||||
]#
|
||||
|
||||
# Satisfied if it finds nlTok of type matchType
|
||||
template satisfyMatch(matchType: nlTokKind): untyped =
|
||||
(proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))
|
||||
|
|
|
|||
|
|
@ -1,58 +0,0 @@
|
|||
import nodes
|
||||
import ../lexer/tokstream
|
||||
|
||||
type
|
||||
# NOTE: Values above __FAIL__ indicate a failed state
|
||||
nlParseStat* = enum
|
||||
OK,
|
||||
__FAIL__,
|
||||
MIDAS, # Greedy search was never satisfied
|
||||
UNMATCHED,
|
||||
TOOBIG,
|
||||
|
||||
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
||||
result = if b: stat else: nlParseStat.OK
|
||||
|
||||
proc isFail*(stat: nlParseStat): bool =
|
||||
result = (stat >= nlParseStat.__FAIL__)
|
||||
|
||||
|
||||
#[ "Greed" refers to something I mentioned in my discussion on
|
||||
| Noether's grammar (in an EBNF-like language). Greed just
|
||||
| means "everything until a condition is satisified".
|
||||
| That condition should be supplied by a Nim procedural type.
|
||||
]#
|
||||
|
||||
# Greed will consume anything until a condition is satisfied
|
||||
# Returns false if the greed was never satisfied (OMG!!)
|
||||
proc greed(tokStream: var nlTokStream,
|
||||
toks: var seq[nlTok],
|
||||
satisfy: proc(tok: nlTok): bool,
|
||||
): nlParseStat =
|
||||
var tok: nlTok
|
||||
while tokStream.nextTok(tok):
|
||||
toks.add(tok)
|
||||
if satisfy(tok):
|
||||
return nlParseStat.OK
|
||||
result = nlParseStat.UNMATCHED
|
||||
|
||||
proc greedLine(tokStream: var nlTokStream,
|
||||
toks: var seq[nlTok],
|
||||
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
||||
var tok: nlTok
|
||||
while tokStream.nextTok(tok):
|
||||
toks.add(tok)
|
||||
if satisfy(tok):
|
||||
return true
|
||||
result =
|
||||
|
||||
#[ Templates for generating greed satisfying conditions.
|
||||
]#
|
||||
|
||||
# Satisfied if it finds nlTok of type matchType
|
||||
template satisfyMatch(matchType: nlTokType) =
|
||||
proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType)
|
||||
|
||||
# Satisfied if it finds nlTok of type matchType or EOL reached
|
||||
template satisfyMatchEOL(matchType: nlTokType) =
|
||||
proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType or tok.tType == nlTokType.EOL)
|
||||
Loading…
Add table
Add a link
Reference in a new issue