1 gazillion changes (mostly documenting my insanity optimizing + naming)
This commit is contained in:
parent
ebef458186
commit
f8697bd662
9 changed files with 206 additions and 183 deletions
|
|
@ -3,6 +3,7 @@ import noether/lexer/tok
|
||||||
import noether/lexer/tokstream
|
import noether/lexer/tokstream
|
||||||
import noether/parser/parser
|
import noether/parser/parser
|
||||||
|
|
||||||
|
{.hint: "Don't forget to drink more water (^_^)".}
|
||||||
when isMainModule:
|
when isMainModule:
|
||||||
echo "Noether Lang Extras v0.1.0 - nlx"
|
echo "Noether Lang Extras v0.1.0 - nlx"
|
||||||
|
|
||||||
|
|
@ -11,9 +12,8 @@ when isMainModule:
|
||||||
var tokStream = newTokStream(filename, isFile=true)
|
var tokStream = newTokStream(filename, isFile=true)
|
||||||
|
|
||||||
# # DumpTok
|
# # DumpTok
|
||||||
# var tok: nlTok
|
# while tokStream.progress():
|
||||||
# while tokStream.nextTok(tok):
|
# echo tokStream.currTok
|
||||||
# echo tok
|
|
||||||
|
|
||||||
# DumpTree
|
# DumpTree
|
||||||
discard parse(tokStream)
|
discard parse(tokStream)
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ proc outOfBounds*(lstream: nlLStream): bool =
|
||||||
result = (lstream.pos > lstream.line.len - 1)
|
result = (lstream.pos > lstream.line.len - 1)
|
||||||
|
|
||||||
# Progress the lex stream to the next line (if available)
|
# Progress the lex stream to the next line (if available)
|
||||||
proc progLine*(lstream: var nlLStream): bool =
|
proc progressLine*(lstream: var nlLStream): bool =
|
||||||
if lstream.stream.readLine(lstream.line):
|
if lstream.stream.readLine(lstream.line):
|
||||||
inc lstream.lineNum
|
inc lstream.lineNum
|
||||||
lstream.pos = Natural 0
|
lstream.pos = Natural 0
|
||||||
|
|
@ -50,17 +50,17 @@ proc progLine*(lstream: var nlLStream): bool =
|
||||||
|
|
||||||
# Progress the lex stream to the next character in the line
|
# Progress the lex stream to the next character in the line
|
||||||
# forcefully (aka does NOT check if we reached EOL)
|
# forcefully (aka does NOT check if we reached EOL)
|
||||||
proc forceProgChar*(lstream: var nlLStream) =
|
proc forceProgressChar*(lstream: var nlLStream) =
|
||||||
inc lstream.pos
|
inc lstream.pos
|
||||||
|
|
||||||
# Progress the lex stream to the next character (if available)
|
# # Progress the lex stream to the next character (if available)
|
||||||
proc progress*(lstream: var nlLStream): bool =
|
# proc progressChar*(lstream: var nlLStream): bool =
|
||||||
if not lstream.atEOL():
|
# if not lstream.atEOL():
|
||||||
lstream.forceProgChar()
|
# lstream.forceProgressChar()
|
||||||
result = true
|
# result = true
|
||||||
else:
|
# else:
|
||||||
# attempt to progress next line past EOL
|
# # attempt to progress next line past EOL
|
||||||
result = lstream.progLine()
|
# result = lstream.progressLine()
|
||||||
|
|
||||||
proc currChar*(lstream: nlLStream): char =
|
proc currChar*(lstream: nlLStream): char =
|
||||||
result = lstream.line[lstream.pos]
|
result = lstream.line[lstream.pos]
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ include toktype
|
||||||
|
|
||||||
type
|
type
|
||||||
nlTok* = object
|
nlTok* = object
|
||||||
tType*: nlTokType
|
tKind*: nlTokKind
|
||||||
lit*: string
|
lit*: string
|
||||||
lineNum*: Natural
|
lineNum*: Natural
|
||||||
startPos*: Natural
|
startPos*: Natural
|
||||||
|
|
@ -12,29 +12,11 @@ type
|
||||||
# all other fields are expected to be filled out later.
|
# all other fields are expected to be filled out later.
|
||||||
proc emptyTok*(startPos: int): nlTok =
|
proc emptyTok*(startPos: int): nlTok =
|
||||||
result = nlTok(
|
result = nlTok(
|
||||||
tType: nlTokType.NONE,
|
tKind: tkNONE,
|
||||||
lit: "",
|
lit: "",
|
||||||
startPos: Natural startPos,
|
startPos: Natural startPos,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Checks if an nlTok has nlTokType.NONE
|
# Checks if an nlTok has tkNONE
|
||||||
proc isTokUntyped*(tType: nlTokType): bool =
|
proc isUntyped*(tKind: nlTokKind): bool =
|
||||||
result = (tType == nlTokType.NONE)
|
result = (tKind == tkNONE)
|
||||||
|
|
||||||
# Checks if an nlTok has nlTokType.EOL
|
|
||||||
proc isTokEOL*(tok: nlTok): bool =
|
|
||||||
result = (tok.tType == nlTokType.EOL)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# This method is only used to convert null
|
|
||||||
# terminator nlToks into line-feed ones.
|
|
||||||
# Returns a copy of an nlTok, changing its type
|
|
||||||
proc tokTermToLineFeed*(tok: nlTok): nlTok =
|
|
||||||
result = nlTok(
|
|
||||||
tType: nlTokType.LNFD,
|
|
||||||
lit: tok.lit,
|
|
||||||
lineNum: tok.lineNum,
|
|
||||||
startPos: tok.startPos,
|
|
||||||
endPos: tok.endPos,
|
|
||||||
)
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ type
|
||||||
# Generates an EOL token for the nlTokStream's state
|
# Generates an EOL token for the nlTokStream's state
|
||||||
proc EOLTok(tokStream: nlTokStream): nlTok =
|
proc EOLTok(tokStream: nlTokStream): nlTok =
|
||||||
result = nlTok(
|
result = nlTok(
|
||||||
tType: nlTokType.EOL,
|
tKind: tkEOL,
|
||||||
lit: "\0",
|
lit: "\0",
|
||||||
lineNum: Natural tokStream.lstream.lineNum,
|
lineNum: Natural tokStream.lstream.lineNum,
|
||||||
startPos: Natural tokStream.lstream.pos,
|
startPos: Natural tokStream.lstream.pos,
|
||||||
|
|
@ -40,46 +40,46 @@ proc flushBuild(tokStream: var nlTokStream): nlTok =
|
||||||
|
|
||||||
# Returns whether the build token has a set type yet.
|
# Returns whether the build token has a set type yet.
|
||||||
# This indicates that the build token should inherit
|
# This indicates that the build token should inherit
|
||||||
# the nlTokType of the nlLStream's next character.
|
# the nlTokKind of the nlLStream's next character.
|
||||||
proc isUntypedBuild(tokStream: nlTokStream): bool =
|
proc isUntypedBuild(tokStream: nlTokStream): bool =
|
||||||
result = isTokUntyped(tokStream.build.tType)
|
result = tokStream.build.tKind.isUntyped()
|
||||||
|
|
||||||
# Check whether an nlTokType is "compatible" with the build token.
|
# Check whether an nlTokKind is "compatible" with the build token.
|
||||||
# NOTE: flushBuild() should be called when an incompatible token is discovered.
|
# NOTE: flushBuild() should be called when an incompatible token is discovered.
|
||||||
proc isCompatibleBuild(tokStream: nlTokStream, tType: nlTokType): bool =
|
proc isCompatibleBuild(tokStream: nlTokStream, tKind: nlTokKind): bool =
|
||||||
result = (tType == tokStream.build.tType)
|
result = (tKind == tokStream.build.tKind)
|
||||||
|
|
||||||
# Add a character to the nlTokStream's build token.
|
# Add a character to the nlTokStream's build token.
|
||||||
# Flushes and returns the build token if "fully built",
|
# Flushes and returns the build token if "fully built",
|
||||||
# and a boolean indicating whether the nlTokStream can progress.
|
# and a boolean indicating whether the nlTokStream can progress.
|
||||||
proc progBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
|
proc progressBuild(tokStream: var nlTokStream, buildTok: var Option[nlTok]): bool =
|
||||||
# the "pos > EOL" invalid state is used intentionally
|
# the "pos > EOL" invalid state is used intentionally
|
||||||
# to indicate all tokens have been built, and return EOL Token
|
# to indicate all tokens have been built, and return EOL Token
|
||||||
if tokStream.lstream.outOfBounds():
|
if tokStream.lstream.outOfBounds():
|
||||||
buildTok = some(EOLTok(tokStream))
|
buildTok = some(EOLTok(tokStream))
|
||||||
return true # can progress once more
|
return true # can progress once more
|
||||||
|
|
||||||
let tType = getTokType(tokStream.lstream.currChar())
|
let tKind = getTokType(tokStream.lstream.currChar())
|
||||||
# untyped build tokens must inherited immediately
|
# untyped build tokens must inherited immediately
|
||||||
if isUntypedBuild(tokStream):
|
if isUntypedBuild(tokStream):
|
||||||
tokStream.build.tType = tType
|
tokStream.build.tKind = tKind
|
||||||
|
|
||||||
# check if EOL reached
|
# check if EOL reached
|
||||||
if tokStream.lstream.atEOL():
|
if tokStream.lstream.atEOL():
|
||||||
# flush old build token, the new one can be left untyped
|
# flush old build token, the new one can be left untyped
|
||||||
let compatible = isCompatibleBuild(tokStream, tType)
|
let compatible = isCompatibleBuild(tokStream, tKind)
|
||||||
result = false # DO NOT PROGRESS
|
result = false # DO NOT PROGRESS
|
||||||
if compatible:
|
if compatible:
|
||||||
# force the lstream into an invalid state by progressing beyond EOL
|
# force the lstream into an invalid state by progressing beyond EOL
|
||||||
# we can then detect this state on the next progBuild and return
|
# we can then detect this state on the next progressBuild and return
|
||||||
# an EOL character (very unsafe implementation but it works well)
|
# an EOL character (very unsafe implementation but it works well)
|
||||||
tokStream.lstream.forceProgChar()
|
tokStream.lstream.forceProgressChar()
|
||||||
buildTok = some(flushBuild(tokStream))
|
buildTok = some(flushBuild(tokStream))
|
||||||
# check character and build token compatability
|
# check character and build token compatability
|
||||||
elif not isCompatibleBuild(tokStream, tType):
|
elif not isCompatibleBuild(tokStream, tKind):
|
||||||
# flush old build token, the new one inherits type
|
# flush old build token, the new one inherits type
|
||||||
buildTok = some(flushBuild(tokStream))
|
buildTok = some(flushBuild(tokStream))
|
||||||
tokStream.build.tType = tType
|
tokStream.build.tKind = tKind
|
||||||
result = true # can progress
|
result = true # can progress
|
||||||
else:
|
else:
|
||||||
buildTok = none(nlTok)
|
buildTok = none(nlTok)
|
||||||
|
|
|
||||||
|
|
@ -9,44 +9,43 @@ proc newTokStream*(content: string, isFile: bool = false): nlTokStream =
|
||||||
# 1. initialise an empty build token
|
# 1. initialise an empty build token
|
||||||
# 2. progress to the first line
|
# 2. progress to the first line
|
||||||
result.resetBuild()
|
result.resetBuild()
|
||||||
discard result.lstream.progLine()
|
discard result.lstream.progressLine()
|
||||||
|
|
||||||
# Defines a short-hand notation for getting the current line
|
# Defines a short-hand notation for getting the current line
|
||||||
proc currLine*(tokStream: nlTokStream): string =
|
proc line*(tokStream: nlTokStream): string =
|
||||||
result = tokStream.lstream.line
|
result = tokStream.lstream.line
|
||||||
|
|
||||||
# Reimplements nlLStream.progress() for nlTokStream
|
# Reimplements nlLStream.progressChar for nlTokStream
|
||||||
# to account for additional structure (ie the build token)
|
# to account for additional structure (ie the build token)
|
||||||
proc progChar(tokStream: var nlTokStream): bool =
|
# NOTE: progressChar progresses to lstream's next char
|
||||||
|
proc progressChar(tokStream: var nlTokStream): bool =
|
||||||
if not tokStream.lstream.atEOL():
|
if not tokStream.lstream.atEOL():
|
||||||
tokStream.lstream.forceProgChar()
|
tokStream.lstream.forceProgressChar()
|
||||||
result = true
|
result = true
|
||||||
else:
|
else:
|
||||||
# attempt to progress to next line past EOL
|
# attempt to progress to next line past EOL
|
||||||
result = tokStream.lstream.progLine()
|
result = tokStream.lstream.progressLine()
|
||||||
tokStream.resetBuild()
|
tokStream.resetBuild()
|
||||||
|
|
||||||
# Generates and sets (by reference) the next token in the stream,
|
# Generates and progress the next token in the nlTokStream.
|
||||||
# via repeatedly calling progBuild() and progChar().
|
# via repeatedly calling progressBuild() and progressChar().
|
||||||
# Returns a boolean indicating whether EOF has been reached.
|
# Returns a boolean indicating whether EOF has been reached.
|
||||||
# NOTE: progBuild adds lstream's current char to the build token
|
# NOTE: access the new token via `tokStream.tok`
|
||||||
# NOTE: progChar progresses to lstream's next char
|
proc progress*(tokStream: var nlTokStream): bool =
|
||||||
proc nextTok*(tokStream: var nlTokStream, tok: var nlTok): bool =
|
|
||||||
# Return prematurely if already closed
|
# Return prematurely if already closed
|
||||||
if tokStream.closed:
|
if tokStream.closed:
|
||||||
return false
|
return false
|
||||||
while true:
|
while true:
|
||||||
var flushedTok: Option[nlTok]
|
var flushedTok: Option[nlTok]
|
||||||
let
|
let
|
||||||
canProgress = tokStream.progBuild(flushedTok)
|
canProgress = tokStream.progressBuild(flushedTok)
|
||||||
buildComplete = flushedTok.isSome
|
buildComplete = flushedTok.isSome
|
||||||
# canProgress & EOF reached => no more tokens to build :)
|
# canProgress & EOF reached => no more tokens to build :)
|
||||||
# NOTE: reachedEOF and not canProgress => more tokens unwrapping
|
# NOTE: reachedEOF and not canProgress => more tokens unwrapping
|
||||||
if buildComplete:
|
if buildComplete:
|
||||||
# return the finished build token, and save it as the current token
|
# return the finished build token, and save it as the current token
|
||||||
tok = flushedTok.get()
|
tokStream.currTok = flushedTok.get()
|
||||||
tokStream.currTok = tok
|
if canProgress and not tokStream.progressChar():
|
||||||
if canProgress and not tokStream.progChar():
|
|
||||||
tokStream.closed = true
|
tokStream.closed = true
|
||||||
return buildComplete
|
return buildComplete
|
||||||
elif buildComplete:
|
elif buildComplete:
|
||||||
|
|
|
||||||
|
|
@ -1,54 +1,59 @@
|
||||||
type
|
type
|
||||||
# nlTokType allows primitive nlToks to be typed,
|
# nlTokKind allows primitive nlToks to be typed,
|
||||||
# the nlTokType enum should never be directly
|
# the nlTokKind enum should never be directly
|
||||||
# accessed. Use the interface in this file instead.
|
# accessed. Use the interface in this file instead.
|
||||||
nlTokType* = enum
|
nlTokKind* = enum
|
||||||
NONE, # Placeholder Value
|
tkNONE, # Placeholder Value
|
||||||
EOF, # End of File
|
|
||||||
EOL, # End of Line (\0 --> EOL)
|
tkEOF, # End of File
|
||||||
WORD, # Alphanumeric token
|
tkEOL, # End of Line (\0 --> EOL)
|
||||||
SYMB, # Symbolic token
|
|
||||||
LNFD, # \r \n Line-Feed
|
tkWORD, # Alphanumeric token
|
||||||
WTSP, # ' ' \t Whitespace
|
tkSYMB, # Symbolic token
|
||||||
LPAR, # ( Left Parenthesis
|
|
||||||
RPAR, # ) Right Parenthesis
|
tkLNFD, # \r \n Line-Feed
|
||||||
LBRA, # { Left Brace
|
tkWTSP, # ' ' \t Whitespace
|
||||||
RBRA, # } Right Brace
|
|
||||||
LSQB, # [ Left Square Bracket
|
# RESERVED SYMBOLS
|
||||||
RSQB, # ] Right Square Bracket
|
tkLPAR, # ( Left Parenthesis
|
||||||
# LANB, # < Left Angle Bracket
|
tkRPAR, # ) Right Parenthesis
|
||||||
# RANB, # > Right Angle Bracket
|
tkLBRA, # { Left Brace
|
||||||
SQUO, # ' Single Quotation Marking
|
tkRBRA, # } Right Brace
|
||||||
DQUO, # " Double Quotation Marking
|
tkLSQB, # [ Left Square Bracket
|
||||||
GRVA, # ` Grave Accent
|
tkRSQB, # ] Right Square Bracket
|
||||||
HASH, # # Number Sign (Hashtag)
|
# tkLANB, # < Left Angle Bracket
|
||||||
|
# tkRANB, # > Right Angle Bracket
|
||||||
|
tkSQUO, # ' Single Quotation Marking
|
||||||
|
tkDQUO, # " Double Quotation Marking
|
||||||
|
tkGRVA, # ` Grave Accent
|
||||||
|
tkHASH, # # Number Sign (Hashtag)
|
||||||
|
|
||||||
# Classifies a character to its nlTokType
|
# Classifies a character to its nlTokKind
|
||||||
proc getTokType*(c: char): nlTokType =
|
proc getTokType*(c: char): nlTokKind =
|
||||||
case c:
|
case c:
|
||||||
of '\0', '\r', '\n':
|
of '\0', '\r', '\n':
|
||||||
result = nlTokType.EOL
|
result = tkEOL
|
||||||
of ' ', '\t':
|
of ' ', '\t':
|
||||||
result = nlTokType.WTSP
|
result = tkWTSP
|
||||||
of '(':
|
of '(':
|
||||||
result = nlTokType.LPAR
|
result = tkLPAR
|
||||||
of ')':
|
of ')':
|
||||||
result = nlTokType.RPAR
|
result = tkRPAR
|
||||||
of '{':
|
of '{':
|
||||||
result = nlTokType.LBRA
|
result = tkLBRA
|
||||||
of '}':
|
of '}':
|
||||||
result = nlTokType.RBRA
|
result = tkRBRA
|
||||||
of '[':
|
of '[':
|
||||||
result = nlTokType.LSQB
|
result = tkLSQB
|
||||||
of ']':
|
of ']':
|
||||||
result = nlTokType.RSQB
|
result = tkRSQB
|
||||||
of '\'':
|
of '\'':
|
||||||
result = nlTokType.SQUO
|
result = tkSQUO
|
||||||
of '\"':
|
of '\"':
|
||||||
result = nlTokType.DQUO
|
result = tkDQUO
|
||||||
of '`':
|
of '`':
|
||||||
result = nlTokType.GRVA
|
result = tkGRVA
|
||||||
of '#':
|
of '#':
|
||||||
result = nlTokType.HASH
|
result = tkHASH
|
||||||
else:
|
else:
|
||||||
result = nlTokType.WORD
|
result = tkWORD
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,48 @@
|
||||||
|
import std/options
|
||||||
from ../lexer/tok import nlTok
|
from ../lexer/tok import nlTok
|
||||||
# from ../lexer/tokstream import
|
|
||||||
|
|
||||||
type
|
type
|
||||||
# NOTE: by the end of parsing NO nodes should
|
# NOTE: by the end of parsing NO nodes should have nkNone
|
||||||
# NOTE: have nlNodeType.NONE
|
nlNodeKind* = enum
|
||||||
nlNodeType* = enum
|
nkNone, # Placeholder Value
|
||||||
NONE, # Placeholder Value
|
|
||||||
TERM, # Indicates the tree has terminated
|
nkStrLit, # String Literal
|
||||||
STRL, # String Literal
|
nkChrLit, # Character Literal
|
||||||
CHRL, # Character Literal
|
|
||||||
|
# NOTE: always check parent != nil when traversing the tree
|
||||||
nlNode* {.acyclic.} = ref object of RootObj
|
nlNode* {.acyclic.} = ref object of RootObj
|
||||||
nType*: nlNodeType
|
nKind*: nlNodeKind
|
||||||
toks*: seq[nlTok] # nodes store the tokens that build them
|
toks*: seq[nlTok] # nodes (may) store the tokens that build them
|
||||||
# left, right: nlNode
|
parent*: nlNode
|
||||||
|
|
||||||
|
# Purely abstract type that all nlNode objects
|
||||||
|
# with children are expected to inherit from.
|
||||||
|
nlBranchNode* {.acyclic.} = ref object of nlNode
|
||||||
|
child: UncheckedArray[nlNode]
|
||||||
|
|
||||||
|
nlBiNode* {.acyclic.} = ref object of nlBranchNode
|
||||||
|
|
||||||
|
proc childCount*(node: nlNode): int {.inline.} = 0
|
||||||
|
proc childCount*(node: nlBiNode): int {.inline.} = 2
|
||||||
|
|
||||||
|
proc getChild*(node: nlNode, i: int): Option[nlNode] {.inline.} =
|
||||||
|
result = none(nlNode)
|
||||||
|
proc getChild*(node: nlBranchNode, i: int): Option[nlNode] {.inline.} =
|
||||||
|
result = some(node.child[i])
|
||||||
|
|
||||||
|
proc newNode*(nKind: nlNodeKind): nlNode =
|
||||||
|
result = nlNode(
|
||||||
|
nKind: nKind,
|
||||||
|
)
|
||||||
|
|
||||||
|
proc newBiNode*(nKind: nlNodeKind): nlNode =
|
||||||
|
result = nlBiNode(
|
||||||
|
nKind: nKind,
|
||||||
|
)
|
||||||
|
|
||||||
# Short-hand way of appending a token to a node's token sequence
|
# Short-hand way of appending a token to a node's token sequence
|
||||||
proc addTok*(node: nlNode, tok: nlTok) =
|
proc addTok*(node: nlNode, tok: nlTok) =
|
||||||
|
echo "AM I HERE?"
|
||||||
|
echo node[]
|
||||||
|
echo node.toks
|
||||||
node.toks.add(tok)
|
node.toks.add(tok)
|
||||||
|
|
|
||||||
|
|
@ -3,61 +3,55 @@ include parseutil
|
||||||
|
|
||||||
# NOTE: Matching between two tokens will fill `node` with everything
|
# NOTE: Matching between two tokens will fill `node` with everything
|
||||||
# NOTE: between those two tokens EXCLUDING the two tokens themselves.
|
# NOTE: between those two tokens EXCLUDING the two tokens themselves.
|
||||||
proc parseMatch(tokStream: var nlTokStream,
|
proc parseMatch(parser: var nlParser, matchType: nlTokKind): nlParseStat =
|
||||||
node: var nlNode,
|
|
||||||
matchType: nlTokType): nlParseStat =
|
|
||||||
result = greed(
|
result = greed(
|
||||||
tokStream,
|
parser,
|
||||||
node.toks,
|
|
||||||
satisfyMatch(matchType),
|
satisfyMatch(matchType),
|
||||||
)
|
)
|
||||||
proc parseMatchLine(tokStream: var nlTokStream,
|
proc parseMatchLine(parser: var nlParser, matchType: nlTokKind): nlParseStat =
|
||||||
node: var nlNode,
|
result = greedLine(
|
||||||
matchType: nlTokType): nlParseStat =
|
parser,
|
||||||
result = greed(
|
satisfyMatch(matchType),
|
||||||
tokStream,
|
|
||||||
node.toks,
|
|
||||||
satisfyMatchEOL(matchType),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
proc parseStrL(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
|
proc parseStrLit(parser: var nlParser): nlParseStat =
|
||||||
node = nlNode(
|
result = parser.parseMatch(tkDQUO)
|
||||||
nType: nlNodeType.STRL
|
|
||||||
)
|
|
||||||
node.addTok(tokStream.currTok)
|
|
||||||
result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
|
|
||||||
|
|
||||||
proc parseChrL(tokStream: var nlTokStream, node: var nlNode): bool =
|
proc parseChrLit(parser: var nlParser): nlParseStat =
|
||||||
node = nlNode(
|
result = parser.parseMatch(tkSQUO)
|
||||||
nType: nlNodeType.CHRL
|
|
||||||
)
|
proc parseStmt(parser: var nlParser): nlParseStat =
|
||||||
node.addTok(tokStream.currTok)
|
# initialise build node as none just for the hell of it
|
||||||
# TWO ERRORS ARE POSSIBLE, 1: content too big, 2: never closed
|
|
||||||
result = greedEOL(tokStream, node.toks, nlTokType.SQUO)
|
while parser.stream.progress():
|
||||||
|
echo parser.stream.currTok
|
||||||
# Attempt to form an nlAST from a nlTokStream
|
case parser.stream.currTok.tKind
|
||||||
proc parse*(tokStream: var nlTokStream): nlNode =
|
of tkDQUO:
|
||||||
var tok: nlTok
|
|
||||||
var node: nlNode
|
|
||||||
while tokStream.nextTok(tok):
|
|
||||||
case tok.tType:
|
|
||||||
of nlTokType.DQUO:
|
|
||||||
# Attempt to parse string literal
|
# Attempt to parse string literal
|
||||||
if not parseStrL(tokStream, node):
|
if parser.parseStrLit() != nlParseStat.OK:
|
||||||
echo "Unmatched Double Quotation! Malformed String Literal"
|
echo "Unmatched Double Quotation! Malformed String Literal"
|
||||||
echo tokStream.currLine()
|
echo parser.stream.line
|
||||||
echo repeat(" ", tok.startPos), '^'
|
echo repeat(" ", parser.stream.currTok.startPos), '^'
|
||||||
else:
|
else:
|
||||||
echo "Parsed String Literal"
|
echo "Parsed String Literal"
|
||||||
echo node[]
|
echo parser.bnode[]
|
||||||
of nlTokType.SQUO:
|
of tkSQUO:
|
||||||
# Attempt to parse string literal
|
# Attempt to parse string literal
|
||||||
if not parseChrL(tokStream, node):
|
if parser.parseChrLit() != nlParseStat.OK:
|
||||||
echo "Unmatched Single Quotation! Malformed Character Literal"
|
echo "Unmatched Single Quotation! Malformed Character Literal"
|
||||||
echo tokStream.currLine()
|
echo parser.stream.line
|
||||||
echo repeat(" ", tok.startPos), '^'
|
echo repeat(" ", parser.stream.currTok.startPos), '^'
|
||||||
else:
|
else:
|
||||||
echo "Parsed String Literal"
|
echo "Parsed Character Literal"
|
||||||
echo node[]
|
echo parser.bnode[]
|
||||||
else:
|
else:
|
||||||
echo "blah blah unhandled case"
|
echo "blah blah unhandled case"
|
||||||
|
result = nlParseStat.OK
|
||||||
|
|
||||||
|
# Attempt to parse nlAST from nlTokStream
|
||||||
|
proc parse*(tokStream: var nlTokStream): nlAST =
|
||||||
|
var parser = newParser(tokStream)
|
||||||
|
echo ' '
|
||||||
|
discard parser.parseStmt()
|
||||||
|
|
||||||
|
result = parser.ast
|
||||||
|
|
|
||||||
|
|
@ -2,21 +2,41 @@ import nodes
|
||||||
import ../lexer/tokstream
|
import ../lexer/tokstream
|
||||||
|
|
||||||
type
|
type
|
||||||
# NOTE: Values above __FAIL__ indicate a failed state
|
# NOTE1: Values above MARKER_FAIL indicate a failed state
|
||||||
nlParseStat* = enum
|
# NOTE2: nlParseStat is marked pure out of habit that's all
|
||||||
|
nlParseStat* {.pure.} = enum
|
||||||
OK,
|
OK,
|
||||||
__FAIL__,
|
MARKER_FAIL,
|
||||||
MIDAS, # Greedy search was never satisfied
|
|
||||||
UNMATCHED,
|
UNMATCHED,
|
||||||
TOOBIG,
|
TOOBIG,
|
||||||
|
|
||||||
|
nlAST* = object
|
||||||
|
root: nlNode
|
||||||
|
|
||||||
|
nlParser* = object
|
||||||
|
stream: nlTokStream
|
||||||
|
ast: nlAST
|
||||||
|
# the "build node" is a reference to the AST node
|
||||||
|
# the parser is currently modifying/building from
|
||||||
|
# NOTE: bnode changes frequently, it is NOT the root
|
||||||
|
bnode: nlNode
|
||||||
|
|
||||||
|
|
||||||
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
||||||
result = if b: stat else: nlParseStat.OK
|
result = if b: stat else: nlParseStat.OK
|
||||||
|
|
||||||
proc isFail*(stat: nlParseStat): bool =
|
proc isFail*(stat: nlParseStat): bool =
|
||||||
result = (stat >= nlParseStat.__FAIL__)
|
result = (stat >= nlParseStat.MARKER_FAIL)
|
||||||
|
|
||||||
|
proc newParser*(tokStream: var nlTokStream): nlParser =
|
||||||
|
let rootNode = newNode(nkNone)
|
||||||
|
result = nlParser(
|
||||||
|
stream: tokStream,
|
||||||
|
ast: rootNode,
|
||||||
|
bnode: rootNode,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
#[ "Greed" refers to something I mentioned in my discussion on
|
#[ "Greed" refers to something I mentioned in my discussion on
|
||||||
| Noether's grammar (in an EBNF-like language). Greed just
|
| Noether's grammar (in an EBNF-like language). Greed just
|
||||||
| means "everything until a condition is satisified".
|
| means "everything until a condition is satisified".
|
||||||
|
|
@ -25,34 +45,28 @@ proc isFail*(stat: nlParseStat): bool =
|
||||||
|
|
||||||
# Greed will consume anything until a condition is satisfied
|
# Greed will consume anything until a condition is satisfied
|
||||||
# Returns false if the greed was never satisfied (OMG!!)
|
# Returns false if the greed was never satisfied (OMG!!)
|
||||||
proc greed(tokStream: var nlTokStream,
|
proc greed(parser: var nlParser,
|
||||||
toks: var seq[nlTok],
|
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
||||||
satisfy: proc(tok: nlTok): bool,
|
while parser.stream.progress():
|
||||||
): nlParseStat =
|
echo "im definitely here!"
|
||||||
var tok: nlTok
|
parser.bnode.addTok(parser.stream.currTok)
|
||||||
while tokStream.nextTok(tok):
|
if satisfy(parser.stream.currTok):
|
||||||
toks.add(tok)
|
|
||||||
if satisfy(tok):
|
|
||||||
return nlParseStat.OK
|
return nlParseStat.OK
|
||||||
result = nlParseStat.UNMATCHED
|
result = nlParseStat.UNMATCHED
|
||||||
|
|
||||||
proc greedLine(tokStream: var nlTokStream,
|
proc greedLine(parser: var nlParser,
|
||||||
toks: var seq[nlTok],
|
|
||||||
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
||||||
var tok: nlTok
|
while parser.stream.progress():
|
||||||
while tokStream.nextTok(tok):
|
parser.bnode.addTok(parser.stream.currTok)
|
||||||
toks.add(tok)
|
if satisfy(parser.stream.currTok):
|
||||||
if satisfy(tok):
|
return nlParseStat.OK
|
||||||
return true
|
elif parser.stream.currTok.tKind == tkEOL:
|
||||||
result =
|
return nlParseStat.UNMATCHED
|
||||||
|
result = nlParseStat.UNMATCHED
|
||||||
|
|
||||||
#[ Templates for generating greed satisfying conditions.
|
#[ Templates for generating greed satisfying conditions.
|
||||||
]#
|
]#
|
||||||
|
|
||||||
# Satisfied if it finds nlTok of type matchType
|
# Satisfied if it finds nlTok of type matchType
|
||||||
template satisfyMatch(matchType: nlTokType) =
|
template satisfyMatch(matchType: nlTokKind): untyped =
|
||||||
proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType)
|
(proc(tok {.inject.}: nlTok): bool = (tok.tKind == matchType))
|
||||||
|
|
||||||
# Satisfied if it finds nlTok of type matchType or EOL reached
|
|
||||||
template satisfyMatchEOL(matchType: nlTokType) =
|
|
||||||
proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType or tok.tType == nlTokType.EOL)
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue