Implementing greedy matching via template macros (not in a working state)
This commit is contained in:
parent
a258802945
commit
ebef458186
3 changed files with 83 additions and 39 deletions
|
|
@ -1,3 +1,5 @@
|
||||||
Todo:
|
Todo:
|
||||||
- [ ] Not currently sure how the lexer will interpret non-latin characters (make sure it handles all unicode)
|
- [ ] Not currently sure how the lexer will interpret non-latin characters (make sure it handles all unicode)
|
||||||
- [ ] The lexer currently only handles a limited number of escape codes / whitespace characters
|
- [ ] The lexer currently only handles a limited number of escape codes / whitespace characters
|
||||||
|
|
||||||
|
- [ ] Mark most lexer procedures with the {.inline.} pragma (I thought this was active by default)
|
||||||
|
|
|
||||||
|
|
@ -1,49 +1,33 @@
|
||||||
import strutils
|
import strutils
|
||||||
|
include parseutil
|
||||||
|
|
||||||
import nodes
|
# NOTE: Matching between two tokens will fill `node` with everything
|
||||||
import ../lexer/tokstream
|
# NOTE: between those two tokens EXCLUDING the two tokens themselves.
|
||||||
|
proc parseMatch(tokStream: var nlTokStream,
|
||||||
|
node: var nlNode,
|
||||||
|
matchType: nlTokType): nlParseStat =
|
||||||
|
result = greed(
|
||||||
|
tokStream,
|
||||||
|
node.toks,
|
||||||
|
satisfyMatch(matchType),
|
||||||
|
)
|
||||||
|
proc parseMatchLine(tokStream: var nlTokStream,
|
||||||
|
node: var nlNode,
|
||||||
|
matchType: nlTokType): nlParseStat =
|
||||||
|
result = greed(
|
||||||
|
tokStream,
|
||||||
|
node.toks,
|
||||||
|
satisfyMatchEOL(matchType),
|
||||||
|
)
|
||||||
|
|
||||||
type
|
proc parseStrL(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
|
||||||
nlParseStat = enum
|
|
||||||
OK,
|
|
||||||
UNMATCHED,
|
|
||||||
TOOBIG,
|
|
||||||
|
|
||||||
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
|
||||||
result = if b: stat else: nlParseStat.OK
|
|
||||||
|
|
||||||
# Greed will consume anything except a punishment
|
|
||||||
# Returns a boolean indicating if it succeeded
|
|
||||||
proc greed(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: proc(tok: nlTok): bool): bool =
|
|
||||||
var tok: nlTok
|
|
||||||
while tokStream.nextTok(tok):
|
|
||||||
toks.add(tok)
|
|
||||||
if satisfy(tok):
|
|
||||||
return true
|
|
||||||
result = false
|
|
||||||
|
|
||||||
proc greedEOL(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: nlTokType): bool =
|
|
||||||
var tok: nlTok
|
|
||||||
while tokStream.nextTok(tok):
|
|
||||||
toks.add(tok)
|
|
||||||
if tok.tType == satisfy or tok.tType == nlTokType.EOL:
|
|
||||||
return true
|
|
||||||
result = false
|
|
||||||
|
|
||||||
proc satisfyTypeOrEOL(tokType: nlTokType, tok: nlTok): bool =
|
|
||||||
|
|
||||||
|
|
||||||
proc prsMatchEOL(tokStream: var nlTokStream, toks: var seq[nlTok]): nlParseStat =
|
|
||||||
|
|
||||||
|
|
||||||
proc parse_strl(tokStream: var nlTokStream, node: var nlNode): nlParseStat =
|
|
||||||
node = nlNode(
|
node = nlNode(
|
||||||
nType: nlNodeType.STRL
|
nType: nlNodeType.STRL
|
||||||
)
|
)
|
||||||
node.addTok(tokStream.currTok)
|
node.addTok(tokStream.currTok)
|
||||||
result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
|
result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO)
|
||||||
|
|
||||||
proc parse_chrl(tokStream: var nlTokStream, node: var nlNode): bool =
|
proc parseChrL(tokStream: var nlTokStream, node: var nlNode): bool =
|
||||||
node = nlNode(
|
node = nlNode(
|
||||||
nType: nlNodeType.CHRL
|
nType: nlNodeType.CHRL
|
||||||
)
|
)
|
||||||
|
|
@ -59,7 +43,7 @@ proc parse*(tokStream: var nlTokStream): nlNode =
|
||||||
case tok.tType:
|
case tok.tType:
|
||||||
of nlTokType.DQUO:
|
of nlTokType.DQUO:
|
||||||
# Attempt to parse string literal
|
# Attempt to parse string literal
|
||||||
if not parse_strl(tokStream, node):
|
if not parseStrL(tokStream, node):
|
||||||
echo "Unmatched Double Quotation! Malformed String Literal"
|
echo "Unmatched Double Quotation! Malformed String Literal"
|
||||||
echo tokStream.currLine()
|
echo tokStream.currLine()
|
||||||
echo repeat(" ", tok.startPos), '^'
|
echo repeat(" ", tok.startPos), '^'
|
||||||
|
|
@ -68,7 +52,7 @@ proc parse*(tokStream: var nlTokStream): nlNode =
|
||||||
echo node[]
|
echo node[]
|
||||||
of nlTokType.SQUO:
|
of nlTokType.SQUO:
|
||||||
# Attempt to parse string literal
|
# Attempt to parse string literal
|
||||||
if not parse_chrl(tokStream, node):
|
if not parseChrL(tokStream, node):
|
||||||
echo "Unmatched Single Quotation! Malformed Character Literal"
|
echo "Unmatched Single Quotation! Malformed Character Literal"
|
||||||
echo tokStream.currLine()
|
echo tokStream.currLine()
|
||||||
echo repeat(" ", tok.startPos), '^'
|
echo repeat(" ", tok.startPos), '^'
|
||||||
|
|
|
||||||
58
src/noether/parser/parseutil.nim
Normal file
58
src/noether/parser/parseutil.nim
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
import nodes
|
||||||
|
import ../lexer/tokstream
|
||||||
|
|
||||||
|
type
|
||||||
|
# NOTE: Values above __FAIL__ indicate a failed state
|
||||||
|
nlParseStat* = enum
|
||||||
|
OK,
|
||||||
|
__FAIL__,
|
||||||
|
MIDAS, # Greedy search was never satisfied
|
||||||
|
UNMATCHED,
|
||||||
|
TOOBIG,
|
||||||
|
|
||||||
|
proc `*`(stat: nlParseStat, b: bool): nlParseStat =
|
||||||
|
result = if b: stat else: nlParseStat.OK
|
||||||
|
|
||||||
|
proc isFail*(stat: nlParseStat): bool =
|
||||||
|
result = (stat >= nlParseStat.__FAIL__)
|
||||||
|
|
||||||
|
|
||||||
|
#[ "Greed" refers to something I mentioned in my discussion on
|
||||||
|
| Noether's grammar (in an EBNF-like language). Greed just
|
||||||
|
| means "everything until a condition is satisified".
|
||||||
|
| That condition should be supplied by a Nim procedural type.
|
||||||
|
]#
|
||||||
|
|
||||||
|
# Greed will consume anything until a condition is satisfied
|
||||||
|
# Returns false if the greed was never satisfied (OMG!!)
|
||||||
|
proc greed(tokStream: var nlTokStream,
|
||||||
|
toks: var seq[nlTok],
|
||||||
|
satisfy: proc(tok: nlTok): bool,
|
||||||
|
): nlParseStat =
|
||||||
|
var tok: nlTok
|
||||||
|
while tokStream.nextTok(tok):
|
||||||
|
toks.add(tok)
|
||||||
|
if satisfy(tok):
|
||||||
|
return nlParseStat.OK
|
||||||
|
result = nlParseStat.UNMATCHED
|
||||||
|
|
||||||
|
proc greedLine(tokStream: var nlTokStream,
|
||||||
|
toks: var seq[nlTok],
|
||||||
|
satisfy: proc(tok: nlTok): bool): nlParseStat =
|
||||||
|
var tok: nlTok
|
||||||
|
while tokStream.nextTok(tok):
|
||||||
|
toks.add(tok)
|
||||||
|
if satisfy(tok):
|
||||||
|
return true
|
||||||
|
result =
|
||||||
|
|
||||||
|
#[ Templates for generating greed satisfying conditions.
|
||||||
|
]#
|
||||||
|
|
||||||
|
# Satisfied if it finds nlTok of type matchType
|
||||||
|
template satisfyMatch(matchType: nlTokType) =
|
||||||
|
proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType)
|
||||||
|
|
||||||
|
# Satisfied if it finds nlTok of type matchType or EOL reached
|
||||||
|
template satisfyMatchEOL(matchType: nlTokType) =
|
||||||
|
proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType or tok.tType == nlTokType.EOL)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue