Implementing greedy matching via template macros (not in a working state)
This commit is contained in:
parent
a258802945
commit
ebef458186
3 changed files with 83 additions and 39 deletions
|
|
@ -1,3 +1,5 @@
|
|||
Todo:
|
||||
- [ ] Not currently sure how the lexer will interpret non-latin characters (make sure it handles all unicode)
|
||||
- [ ] The lexer currently only handles a limited number of escape codes / whitespace characters
|
||||
|
||||
- [ ] Mark most lexer procedures with the {.inline.} pragma (I thought this was active by default)
|
||||
|
|
|
|||
|
|
@ -1,49 +1,33 @@
|
|||
import strutils
|
||||
include parseutil
|
||||
|
||||
import nodes
|
||||
import ../lexer/tokstream
|
||||
# NOTE: Matching between two tokens will fill `node` with everything
|
||||
# NOTE: between those two tokens EXCLUDING the two tokens themselves.
|
||||
proc parseMatch(tokStream: var nlTokStream,
|
||||
node: var nlNode,
|
||||
matchType: nlTokType): nlParseStat =
|
||||
result = greed(
|
||||
tokStream,
|
||||
node.toks,
|
||||
satisfyMatch(matchType),
|
||||
)
|
||||
proc parseMatchLine(tokStream: var nlTokStream,
|
||||
node: var nlNode,
|
||||
matchType: nlTokType): nlParseStat =
|
||||
result = greed(
|
||||
tokStream,
|
||||
node.toks,
|
||||
satisfyMatchEOL(matchType),
|
||||
)
|
||||
|
||||
type
|
||||
nlParseStat = enum
|
||||
OK,
|
||||
UNMATCHED,
|
||||
TOOBIG,
|
||||
|
||||
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
||||
result = if b: stat else: nlParseStat.OK
|
||||
|
||||
# Greed will consume anything except a punishment
|
||||
# Returns a boolean indicating if it succeeded
|
||||
proc greed(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: proc(tok: nlTok): bool): bool =
|
||||
var tok: nlTok
|
||||
while tokStream.nextTok(tok):
|
||||
toks.add(tok)
|
||||
if satisfy(tok):
|
||||
return true
|
||||
result = false
|
||||
|
||||
proc greedEOL(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: nlTokType): bool =
|
||||
var tok: nlTok
|
||||
while tokStream.nextTok(tok):
|
||||
toks.add(tok)
|
||||
if tok.tType == satisfy or tok.tType == nlTokType.EOL:
|
||||
return true
|
||||
result = false
|
||||
|
||||
proc satisfyTypeOrEOL(tokType: nlTokType, tok: nlTok): bool =
|
||||
|
||||
|
||||
proc prsMatchEOL(tokStream: var nlTokStream, toks: var seq[nlTok]): nlParseStat =
|
||||
|
||||
|
||||
proc parse_strl(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
|
||||
proc parseStrL(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
|
||||
node = nlNode(
|
||||
nType: nlNodeType.STRL
|
||||
)
|
||||
node.addTok(tokStream.currTok)
|
||||
result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
|
||||
|
||||
proc parse_chrl(tokStream: var nlTokStream, node: var nlNode): bool =
|
||||
proc parseChrL(tokStream: var nlTokStream, node: var nlNode): bool =
|
||||
node = nlNode(
|
||||
nType: nlNodeType.CHRL
|
||||
)
|
||||
|
|
@ -59,7 +43,7 @@ proc parse*(tokStream: var nlTokStream): nlNode =
|
|||
case tok.tType:
|
||||
of nlTokType.DQUO:
|
||||
# Attempt to parse string literal
|
||||
if not parse_strl(tokStream, node):
|
||||
if not parseStrL(tokStream, node):
|
||||
echo "Unmatched Double Quotation! Malformed String Literal"
|
||||
echo tokStream.currLine()
|
||||
echo repeat(" ", tok.startPos), '^'
|
||||
|
|
@ -68,7 +52,7 @@ proc parse*(tokStream: var nlTokStream): nlNode =
|
|||
echo node[]
|
||||
of nlTokType.SQUO:
|
||||
# Attempt to parse string literal
|
||||
if not parse_chrl(tokStream, node):
|
||||
if not parseChrL(tokStream, node):
|
||||
echo "Unmatched Single Quotation! Malformed Character Literal"
|
||||
echo tokStream.currLine()
|
||||
echo repeat(" ", tok.startPos), '^'
|
||||
|
|
|
|||
58
src/noether/parser/parseutil.nim
Normal file
58
src/noether/parser/parseutil.nim
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import nodes
|
||||
import ../lexer/tokstream
|
||||
|
||||
type
|
||||
# NOTE: Values above __FAIL__ indicate a failed state
|
||||
nlParseStat* = enum
|
||||
OK,
|
||||
__FAIL__,
|
||||
MIDAS, # Greedy search was never satisfied
|
||||
UNMATCHED,
|
||||
TOOBIG,
|
||||
|
||||
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
||||
result = if b: stat else: nlParseStat.OK
|
||||
|
||||
proc isFail*(stat: nlParseStat): bool =
|
||||
result = (stat >= nlParseStat.__FAIL__)
|
||||
|
||||
|
||||
#[ "Greed" refers to something I mentioned in my discussion on
|
||||
| Noether's grammar (in an EBNF-like language). Greed just
|
||||
| means "everything until a condition is satisified".
|
||||
| That condition should be supplied by a Nim procedural type.
|
||||
]#
|
||||
|
||||
# Greed will consume anything until a condition is satisfied
|
||||
# Returns false if the greed was never satisfied (OMG!!)
|
||||
proc greed(tokStream: var nlTokStream,
|
||||
toks: var seq[nlTok],
|
||||
satisfy: proc(tok: nlTok): bool,
|
||||
): nlParseStat =
|
||||
var tok: nlTok
|
||||
while tokStream.nextTok(tok):
|
||||
toks.add(tok)
|
||||
if satisfy(tok):
|
||||
return nlParseStat.OK
|
||||
result = nlParseStat.UNMATCHED
|
||||
|
||||
proc greedLine(tokStream: var nlTokStream,
|
||||
toks: var seq[nlTok],
|
||||
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
||||
var tok: nlTok
|
||||
while tokStream.nextTok(tok):
|
||||
toks.add(tok)
|
||||
if satisfy(tok):
|
||||
return true
|
||||
result =
|
||||
|
||||
#[ Templates for generating greed satisfying conditions.
|
||||
]#
|
||||
|
||||
# Satisfied if it finds nlTok of type matchType
|
||||
template satisfyMatch(matchType: nlTokType) =
|
||||
proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType)
|
||||
|
||||
# Satisfied if it finds nlTok of type matchType or EOL reached
|
||||
template satisfyMatchEOL(matchType: nlTokType) =
|
||||
proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType or tok.tType == nlTokType.EOL)
|
||||
Loading…
Add table
Add a link
Reference in a new issue