Restructure attempt #087 :(
This commit is contained in:
parent
f25e66e9ef
commit
1181ea9743
7 changed files with 227 additions and 215 deletions
26
src/nlx.nim
26
src/nlx.nim
|
|
@ -1,19 +1,29 @@
|
|||
import os
|
||||
import noether/lib/io
|
||||
import noether/lexer/lex
|
||||
# import noether/parser/parser
|
||||
import noether/parser/parse
|
||||
|
||||
{.hint: "Don't forget to drink more water (^_^)".}
|
||||
when isMainModule:
|
||||
echo "Noether Lang Extras v0.1.0 - nlx"
|
||||
|
||||
var stream = if paramCount() > 0: streamFile(paramStr 1)
|
||||
# really lazy argparse implementation (temporary)
|
||||
let
|
||||
paramC = paramCount()
|
||||
cmd = if paramC > 2: paramStr 1
|
||||
else: "tok"
|
||||
|
||||
var stream = if paramC > 0: streamFile(paramStr paramC)
|
||||
else: streamString(readAll stdin)
|
||||
|
||||
var lexer = newLexer(stream)
|
||||
# # DumpTok
|
||||
while lexer.progress():
|
||||
echo lexer.tok
|
||||
|
||||
# DumpTree
|
||||
# discard parse(tokStream)
|
||||
if cmd == "tok":
|
||||
# DumpTok
|
||||
while lexer.progress():
|
||||
echo lexer.tok
|
||||
elif cmd == "tree":
|
||||
discard
|
||||
# DumpTree
|
||||
# discard parse(tokStream)
|
||||
else:
|
||||
echo "Usage: nlx [tok|tree] <demo>\n demo files are accessible at lang/demo"
|
||||
|
|
|
|||
|
|
@ -11,15 +11,16 @@ type
|
|||
nlLexer* = object
|
||||
stream: Stream
|
||||
done*: bool
|
||||
tok*: nlTok # new finished token
|
||||
# store current token and upcoming (build) token
|
||||
tok*: nlTok # current token
|
||||
btok: nlTok # the build token
|
||||
# save char and pos and its token type
|
||||
char: char
|
||||
cTKind: nlTokKind
|
||||
# track line number, line content, etc
|
||||
line: string
|
||||
lineNum: int
|
||||
pos: int
|
||||
# save char and pos and its token type
|
||||
char: char
|
||||
cTKind: nlTokKind
|
||||
|
||||
proc atEOL(lexer: nlLexer): bool {.inline.} =
|
||||
result = (lexer.char == '\n')
|
||||
|
|
@ -37,8 +38,41 @@ proc newLexer*(stream: var Stream): nlLexer =
|
|||
lineNum: 1,
|
||||
pos: -1, # after initial readChar this -> 0
|
||||
char: '\0', # use \0 as initial invalid char
|
||||
cTKind: tkNONE,
|
||||
)
|
||||
|
||||
# Classifies the current character to its nlTokKind
|
||||
proc classifyTok*(lexer: nlLexer): nlTokKind {.inline.} =
|
||||
case lexer.char:
|
||||
of '\0':
|
||||
result = tkEOF
|
||||
of '\r', '\n':
|
||||
result = tkEOL
|
||||
of ' ', '\t':
|
||||
result = tkWTSP
|
||||
of '(':
|
||||
result = tkLPAR
|
||||
of ')':
|
||||
result = tkRPAR
|
||||
of '{':
|
||||
result = tkLBRA
|
||||
of '}':
|
||||
result = tkRBRA
|
||||
of '[':
|
||||
result = tkLSQB
|
||||
of ']':
|
||||
result = tkRSQB
|
||||
of '\'':
|
||||
result = tkSQUO
|
||||
of '\"':
|
||||
result = tkDQUO
|
||||
of '`':
|
||||
result = tkGRVA
|
||||
of '#':
|
||||
result = tkHASH
|
||||
else:
|
||||
result = tkWORD
|
||||
|
||||
|
||||
#[ ====================================================== ]
|
||||
| nlLexer Internal Interface for Token Construction ]
|
||||
|
|
@ -96,7 +130,7 @@ proc readChar(lexer: var nlLexer): bool =
|
|||
inc lexer.lineNum
|
||||
# sets lexer.char to '\0' if EOF
|
||||
lexer.char = lexer.stream.readChar()
|
||||
lexer.cTKind = getTokKind(lexer.char)
|
||||
lexer.cTKind = lexer.classifyTok()
|
||||
lexer.line.add(lexer.char)
|
||||
inc lexer.pos
|
||||
result = lexer.atEOF()
|
||||
|
|
|
|||
|
|
@ -1,4 +1,32 @@
|
|||
include tokkind
|
||||
type
|
||||
# nlTokKind allows primitive nlToks to be typed,
|
||||
# the nlTokKind enum should never be directly
|
||||
# accessed. Use the interface in this file instead.
|
||||
nlTokKind* = enum
|
||||
tkNONE, # Placeholder Value
|
||||
|
||||
tkEOF, # End of File
|
||||
tkEOL, # End of Line (\0 --> EOL)
|
||||
|
||||
tkWORD, # Alphanumeric token
|
||||
tkSYMB, # Symbolic token
|
||||
|
||||
tkLNFD, # \r \n Line-Feed
|
||||
tkWTSP, # ' ' \t Whitespace
|
||||
|
||||
# RESERVED SYMBOLS
|
||||
tkLPAR, # ( Left Parenthesis
|
||||
tkRPAR, # ) Right Parenthesis
|
||||
tkLBRA, # { Left Brace
|
||||
tkRBRA, # } Right Brace
|
||||
tkLSQB, # [ Left Square Bracket
|
||||
tkRSQB, # ] Right Square Bracket
|
||||
# tkLANB, # < Left Angle Bracket
|
||||
# tkRANB, # > Right Angle Bracket
|
||||
tkSQUO, # ' Single Quotation Marking
|
||||
tkDQUO, # " Double Quotation Marking
|
||||
tkGRVA, # ` Grave Accent
|
||||
tkHASH, # # Number Sign (Hashtag)
|
||||
|
||||
type
|
||||
nlTok* = tuple
|
||||
|
|
|
|||
|
|
@ -1,61 +1 @@
|
|||
type
|
||||
# nlTokKind allows primitive nlToks to be typed,
|
||||
# the nlTokKind enum should never be directly
|
||||
# accessed. Use the interface in this file instead.
|
||||
nlTokKind* = enum
|
||||
tkNONE, # Placeholder Value
|
||||
|
||||
tkEOF, # End of File
|
||||
tkEOL, # End of Line (\0 --> EOL)
|
||||
|
||||
tkWORD, # Alphanumeric token
|
||||
tkSYMB, # Symbolic token
|
||||
|
||||
tkLNFD, # \r \n Line-Feed
|
||||
tkWTSP, # ' ' \t Whitespace
|
||||
|
||||
# RESERVED SYMBOLS
|
||||
tkLPAR, # ( Left Parenthesis
|
||||
tkRPAR, # ) Right Parenthesis
|
||||
tkLBRA, # { Left Brace
|
||||
tkRBRA, # } Right Brace
|
||||
tkLSQB, # [ Left Square Bracket
|
||||
tkRSQB, # ] Right Square Bracket
|
||||
# tkLANB, # < Left Angle Bracket
|
||||
# tkRANB, # > Right Angle Bracket
|
||||
tkSQUO, # ' Single Quotation Marking
|
||||
tkDQUO, # " Double Quotation Marking
|
||||
tkGRVA, # ` Grave Accent
|
||||
tkHASH, # # Number Sign (Hashtag)
|
||||
|
||||
# Classifies a character to its nlTokKind
|
||||
proc getTokKind*(c: char): nlTokKind =
|
||||
case c:
|
||||
of '\0':
|
||||
result = tkEOF
|
||||
of '\r', '\n':
|
||||
result = tkEOL
|
||||
of ' ', '\t':
|
||||
result = tkWTSP
|
||||
of '(':
|
||||
result = tkLPAR
|
||||
of ')':
|
||||
result = tkRPAR
|
||||
of '{':
|
||||
result = tkLBRA
|
||||
of '}':
|
||||
result = tkRBRA
|
||||
of '[':
|
||||
result = tkLSQB
|
||||
of ']':
|
||||
result = tkRSQB
|
||||
of '\'':
|
||||
result = tkSQUO
|
||||
of '\"':
|
||||
result = tkDQUO
|
||||
of '`':
|
||||
result = tkGRVA
|
||||
of '#':
|
||||
result = tkHASH
|
||||
else:
|
||||
result = tkWORD
|
||||
|
|
|
|||
58
src/noether/parser/parse.nim
Normal file
58
src/noether/parser/parse.nim
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import strutils
|
||||
include parser
|
||||
|
||||
# NOTE: Matching between two tokens will fill `node` with everything
|
||||
# NOTE: between those two tokens EXCLUDING the two tokens themselves.
|
||||
proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
|
||||
result = greed(
|
||||
parser,
|
||||
satisfyMatch(matchType),
|
||||
)
|
||||
proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
|
||||
result = greedLine(
|
||||
parser,
|
||||
satisfyMatch(matchType),
|
||||
)
|
||||
|
||||
proc parseStrLit(parser: var nlParser): nlParseStat =
|
||||
result = parser.parseMatchLine(tkDQUO)
|
||||
|
||||
proc parseChrLit(parser: var nlParser): nlParseStat =
|
||||
result = parser.parseMatchLine(tkSQUO)
|
||||
|
||||
proc parseStmt(parser: var nlParser): nlParseStat =
|
||||
while parser.progressStream():
|
||||
echo "----- Current Token: ", parser.currTok
|
||||
case parser.currTok.tKind
|
||||
of tkDQUO:
|
||||
# Attempt to parse string literal
|
||||
if parser.parseStrLit() != nlParseStat.OK:
|
||||
echo "Unmatched Double Quotation! Malformed String Literal"
|
||||
echo parser.line
|
||||
echo repeat(" ", parser.currTok.startPos), '^', '\n'
|
||||
else:
|
||||
echo "Parsed String Literal"
|
||||
echo parser.bnode[], '\n'
|
||||
of tkSQUO:
|
||||
# Attempt to parse string literal
|
||||
if parser.parseChrLit() != nlParseStat.OK:
|
||||
echo "Unmatched Single Quotation! Malformed Character Literal"
|
||||
echo parser.line
|
||||
echo repeat(" ", parser.currTok.startPos), '^', '\n'
|
||||
else:
|
||||
echo "Parsed Character Literal"
|
||||
echo parser.bnode[], '\n'
|
||||
of tkEOL:
|
||||
# TODO: handle this case, don't just discard
|
||||
discard
|
||||
else:
|
||||
echo "blah blah unhandled case\n"
|
||||
result = nlParseStat.OK
|
||||
|
||||
# Attempt to parse nlAST from nlTokStream
|
||||
proc parse*(tokStream: var nlTokStream): nlAST =
|
||||
var parser = newParser(tokStream)
|
||||
echo ' '
|
||||
discard parser.parseStmt()
|
||||
|
||||
result = parser.ast
|
||||
|
|
@ -1,58 +1,90 @@
|
|||
import strutils
|
||||
include parseutil
|
||||
import nodes
|
||||
import ../lexer/lex
|
||||
|
||||
# NOTE: Matching between two tokens will fill `node` with everything
|
||||
# NOTE: between those two tokens EXCLUDING the two tokens themselves.
|
||||
proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
|
||||
result = greed(
|
||||
parser,
|
||||
satisfyMatch(matchType),
|
||||
)
|
||||
proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
|
||||
result = greedLine(
|
||||
parser,
|
||||
satisfyMatch(matchType),
|
||||
type
|
||||
# NOTE1: Values above MARKER_FAIL indicate a failed state
|
||||
# NOTE2: nlParseStat is marked pure out of habit that's all
|
||||
nlParseStat* {.pure.} = enum
|
||||
OK,
|
||||
MARKER_FAIL,
|
||||
UNMATCHED,
|
||||
TOOBIG,
|
||||
|
||||
nlAST* = object
|
||||
root: nlNode
|
||||
|
||||
nlParser* = object
|
||||
stream: nlTokStream
|
||||
ast: nlAST
|
||||
# the "build node" is a reference to the AST node
|
||||
# the parser is currently modifying/building from
|
||||
# NOTE: bnode changes frequently, it is NOT the root
|
||||
bnode: nlNode
|
||||
# flag indicating whether the parser is at
|
||||
# the start of a new line (aka checking indentation)
|
||||
inIndent: bool
|
||||
|
||||
|
||||
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
||||
result = if b: stat else: nlParseStat.OK
|
||||
|
||||
proc isFail*(stat: nlParseStat): bool =
|
||||
result = (stat >= nlParseStat.MARKER_FAIL)
|
||||
|
||||
proc newParser*(tokStream: var nlTokStream): nlParser =
|
||||
let rootNode = newNode(nkNone)
|
||||
result = nlParser(
|
||||
stream: tokStream,
|
||||
ast: nlAST(
|
||||
root: rootNode
|
||||
),
|
||||
bnode: rootNode,
|
||||
)
|
||||
|
||||
proc parseStrLit(parser: var nlParser): nlParseStat =
|
||||
result = parser.parseMatchLine(tkDQUO)
|
||||
# Exposes a subset of the nlTokStream interface
|
||||
proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
|
||||
proc line(parser: var nlParser): string = parser.stream.line
|
||||
|
||||
proc parseChrLit(parser: var nlParser): nlParseStat =
|
||||
result = parser.parseMatchLine(tkSQUO)
|
||||
# Extends upon the functionality of nlTokStream.progress()
|
||||
proc progressStream*(parser: var nlParser): bool =
|
||||
result = parser.stream.progress()
|
||||
if result and parser.currTok.tKind == tkEOL:
|
||||
parser.inIndent = true
|
||||
if
|
||||
|
||||
proc setNewLine()
|
||||
|
||||
proc parseStmt(parser: var nlParser): nlParseStat =
|
||||
#[ "Greed" refers to something I mentioned in my discussion on
|
||||
| Noether's grammar (in an EBNF-like language). Greed just
|
||||
| means "everything until a condition is satisified".
|
||||
| That condition should be supplied by a Nim procedural type.
|
||||
]#
|
||||
|
||||
# Greed will consume anything until a condition is satisfied
|
||||
# Returns false if the greed was never satisfied (OMG!!)
|
||||
proc greed(parser: var nlParser,
|
||||
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
||||
while parser.progressStream():
|
||||
echo "----- Current Token: ", parser.currTok
|
||||
case parser.currTok.tKind
|
||||
of tkDQUO:
|
||||
# Attempt to parse string literal
|
||||
if parser.parseStrLit() != nlParseStat.OK:
|
||||
echo "Unmatched Double Quotation! Malformed String Literal"
|
||||
echo parser.line
|
||||
echo repeat(" ", parser.currTok.startPos), '^', '\n'
|
||||
else:
|
||||
echo "Parsed String Literal"
|
||||
echo parser.bnode[], '\n'
|
||||
of tkSQUO:
|
||||
# Attempt to parse string literal
|
||||
if parser.parseChrLit() != nlParseStat.OK:
|
||||
echo "Unmatched Single Quotation! Malformed Character Literal"
|
||||
echo parser.line
|
||||
echo repeat(" ", parser.currTok.startPos), '^', '\n'
|
||||
else:
|
||||
echo "Parsed Character Literal"
|
||||
echo parser.bnode[], '\n'
|
||||
of tkEOL:
|
||||
# TODO: handle this case, don't just discard
|
||||
discard
|
||||
else:
|
||||
echo "blah blah unhandled case\n"
|
||||
result = nlParseStat.OK
|
||||
|
||||
# Attempt to parse nlAST from nlTokStream
|
||||
proc parse*(tokStream: var nlTokStream): nlAST =
|
||||
var parser = newParser(tokStream)
|
||||
echo ' '
|
||||
discard parser.parseStmt()
|
||||
if satisfy(parser.currTok):
|
||||
return nlParseStat.OK
|
||||
# NOTE: the matched token is currently excluded
|
||||
parser.bnode.addTok(parser.currTok)
|
||||
result = nlParseStat.UNMATCHED
|
||||
|
||||
result = parser.ast
|
||||
proc greedLine(parser: var nlParser,
|
||||
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
||||
while parser.progressStream():
|
||||
if satisfy(parser.currTok):
|
||||
return nlParseStat.OK
|
||||
# NOTE: the matched token is currently excluded
|
||||
parser.bnode.addTok(parser.currTok)
|
||||
if parser.currTok.tKind == tkEOL:
|
||||
return nlParseStat.UNMATCHED
|
||||
result = nlParseStat.UNMATCHED
|
||||
|
||||
#[ Templates for generating greed satisfying conditions.
|
||||
]#
|
||||
|
||||
# Satisfied if it finds nlTok of type matchType
|
||||
template satisfyMatch(matchType: nlTokKind): untyped =
|
||||
(proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))
|
||||
|
|
|
|||
|
|
@ -1,90 +0,0 @@
|
|||
import nodes
|
||||
import ../lexer/tokstream
|
||||
|
||||
type
|
||||
# NOTE1: Values above MARKER_FAIL indicate a failed state
|
||||
# NOTE2: nlParseStat is marked pure out of habit that's all
|
||||
nlParseStat* {.pure.} = enum
|
||||
OK,
|
||||
MARKER_FAIL,
|
||||
UNMATCHED,
|
||||
TOOBIG,
|
||||
|
||||
nlAST* = object
|
||||
root: nlNode
|
||||
|
||||
nlParser* = object
|
||||
stream: nlTokStream
|
||||
ast: nlAST
|
||||
# the "build node" is a reference to the AST node
|
||||
# the parser is currently modifying/building from
|
||||
# NOTE: bnode changes frequently, it is NOT the root
|
||||
bnode: nlNode
|
||||
# flag indicating whether the parser is at
|
||||
# the start of a new line (aka checking indentation)
|
||||
inIndent: bool
|
||||
|
||||
|
||||
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
||||
result = if b: stat else: nlParseStat.OK
|
||||
|
||||
proc isFail*(stat: nlParseStat): bool =
|
||||
result = (stat >= nlParseStat.MARKER_FAIL)
|
||||
|
||||
proc newParser*(tokStream: var nlTokStream): nlParser =
|
||||
let rootNode = newNode(nkNone)
|
||||
result = nlParser(
|
||||
stream: tokStream,
|
||||
ast: nlAST(
|
||||
root: rootNode
|
||||
),
|
||||
bnode: rootNode,
|
||||
)
|
||||
|
||||
# Exposes a subset of the nlTokStream interface
|
||||
proc currTok(parser: var nlParser): nlTok = parser.stream.currTok
|
||||
proc line(parser: var nlParser): string = parser.stream.line
|
||||
|
||||
# Extends upon the functionality of nlTokStream.progress()
|
||||
proc progressStream*(parser: var nlParser): bool =
|
||||
result = parser.stream.progress()
|
||||
if result and parser.currTok.tKind == tkEOL:
|
||||
parser.inIndent = true
|
||||
if
|
||||
|
||||
proc setNewLine()
|
||||
|
||||
#[ "Greed" refers to something I mentioned in my discussion on
|
||||
| Noether's grammar (in an EBNF-like language). Greed just
|
||||
| means "everything until a condition is satisified".
|
||||
| That condition should be supplied by a Nim procedural type.
|
||||
]#
|
||||
|
||||
# Greed will consume anything until a condition is satisfied
|
||||
# Returns false if the greed was never satisfied (OMG!!)
|
||||
proc greed(parser: var nlParser,
|
||||
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
||||
while parser.progressStream():
|
||||
if satisfy(parser.currTok):
|
||||
return nlParseStat.OK
|
||||
# NOTE: the matched token is currently excluded
|
||||
parser.bnode.addTok(parser.currTok)
|
||||
result = nlParseStat.UNMATCHED
|
||||
|
||||
proc greedLine(parser: var nlParser,
|
||||
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
||||
while parser.progressStream():
|
||||
if satisfy(parser.currTok):
|
||||
return nlParseStat.OK
|
||||
# NOTE: the matched token is currently excluded
|
||||
parser.bnode.addTok(parser.currTok)
|
||||
if parser.currTok.tKind == tkEOL:
|
||||
return nlParseStat.UNMATCHED
|
||||
result = nlParseStat.UNMATCHED
|
||||
|
||||
#[ Templates for generating greed satisfying conditions.
|
||||
]#
|
||||
|
||||
# Satisfied if it finds nlTok of type matchType
|
||||
template satisfyMatch(matchType: nlTokKind): untyped =
|
||||
(proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))
|
||||
Loading…
Add table
Add a link
Reference in a new issue