Implementing greedy matching via template macros (not in a working state)

This commit is contained in:
Emile Clark-Boman 2025-06-18 19:04:33 +10:00
parent a258802945
commit ebef458186
3 changed files with 83 additions and 39 deletions

View file

@ -1,3 +1,5 @@
Todo:
- [ ] Not currently sure how the lexer will interpret non-latin characters (make sure it handles all unicode)
- [ ] The lexer currently only handles a limited number of escape codes / whitespace characters
- [ ] Mark most lexer procedures with the {.inline.} pragma (I thought this was active by default)

View file

@ -1,49 +1,33 @@
import strutils
include parseutil
import nodes
import ../lexer/tokstream
# NOTE: Matching between two tokens will fill `node` with everything
# NOTE: between those two tokens EXCLUDING the two tokens themselves.
proc parseMatch(tokStream: var nlTokStream,
node: var nlNode,
matchType: nlTokType): nlParseStat =
result = greed(
tokStream,
node.toks,
satisfyMatch(matchType),
)
proc parseMatchLine(tokStream: var nlTokStream,
node: var nlNode,
matchType: nlTokType): nlParseStat =
result = greed(
tokStream,
node.toks,
satisfyMatchEOL(matchType),
)
type
nlParseStat = enum
OK,
UNMATCHED,
TOOBIG,
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
result = if b: stat else: nlParseStat.OK
# Greed will consume anything except a punishment
# Returns a boolean indicating if it succeeded
proc greed(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: proc(tok: nlTok): bool): bool =
var tok: nlTok
while tokStream.nextTok(tok):
toks.add(tok)
if satisfy(tok):
return true
result = false
proc greedEOL(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: nlTokType): bool =
var tok: nlTok
while tokStream.nextTok(tok):
toks.add(tok)
if tok.tType == satisfy or tok.tType == nlTokType.EOL:
return true
result = false
proc satisfyTypeOrEOL(tokType: nlTokType, tok: nlTok): bool =
proc prsMatchEOL(tokStream: var nlTokStream, toks: var seq[nlTok]): nlParseStat =
proc parse_strl(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
proc parseStrL(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
node = nlNode(
nType: nlNodeType.STRL
)
node.addTok(tokStream.currTok)
result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
proc parse_chrl(tokStream: var nlTokStream, node: var nlNode): bool =
proc parseChrL(tokStream: var nlTokStream, node: var nlNode): bool =
node = nlNode(
nType: nlNodeType.CHRL
)
@ -59,7 +43,7 @@ proc parse*(tokStream: var nlTokStream): nlNode =
case tok.tType:
of nlTokType.DQUO:
# Attempt to parse string literal
if not parse_strl(tokStream, node):
if not parseStrL(tokStream, node):
echo "Unmatched Double Quotation! Malformed String Literal"
echo tokStream.currLine()
echo repeat(" ", tok.startPos), '^'
@ -68,7 +52,7 @@ proc parse*(tokStream: var nlTokStream): nlNode =
echo node[]
of nlTokType.SQUO:
# Attempt to parse string literal
if not parse_chrl(tokStream, node):
if not parseChrL(tokStream, node):
echo "Unmatched Single Quotation! Malformed Character Literal"
echo tokStream.currLine()
echo repeat(" ", tok.startPos), '^'

View file

@ -0,0 +1,58 @@
import nodes
import ../lexer/tokstream
type
# NOTE: Values above __FAIL__ indicate a failed state
nlParseStat* = enum
OK,
__FAIL__,
MIDAS, # Greedy search was never satisfied
UNMATCHED,
TOOBIG,
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
result = if b: stat else: nlParseStat.OK
proc isFail*(stat: nlParseStat): bool =
result = (stat >= nlParseStat.__FAIL__)
#[ "Greed" refers to something I mentioned in my discussion on
| Noether's grammar (in an EBNF-like language). Greed just
| means "everything until a condition is satisified".
| That condition should be supplied by a Nim procedural type.
]#
# Greed will consume anything until a condition is satisfied
# Returns false if the greed was never satisfied (OMG!!)
proc greed(tokStream: var nlTokStream,
toks: var seq[nlTok],
satisfy: proc(tok: nlTok): bool,
): nlParseStat =
var tok: nlTok
while tokStream.nextTok(tok):
toks.add(tok)
if satisfy(tok):
return nlParseStat.OK
result = nlParseStat.UNMATCHED
proc greedLine(tokStream: var nlTokStream,
toks: var seq[nlTok],
satisfy: proc(tok: nlTok): bool): nlParseStat =
var tok: nlTok
while tokStream.nextTok(tok):
toks.add(tok)
if satisfy(tok):
return true
result =
#[ Templates for generating greed satisfying conditions.
]#
# Satisfied if it finds nlTok of type matchType
template satisfyMatch(matchType: nlTokType) =
proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType)
# Satisfied if it finds nlTok of type matchType or EOL reached
template satisfyMatchEOL(matchType: nlTokType) =
proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType or tok.tType == nlTokType.EOL)