From ebef45818626b0ee8a87cd1d7920cbd458295c1a Mon Sep 17 00:00:00 2001 From: Emile Clark-Boman Date: Wed, 18 Jun 2025 19:04:33 +1000 Subject: [PATCH] Implementing greedy matching via template macros (not in a working state) --- lang/NOTES | 2 ++ src/noether/parser/parser.nim | 62 ++++++++++++-------------------- src/noether/parser/parseutil.nim | 58 ++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 39 deletions(-) create mode 100644 src/noether/parser/parseutil.nim diff --git a/lang/NOTES b/lang/NOTES index 0e8e5c9..df47f66 100644 --- a/lang/NOTES +++ b/lang/NOTES @@ -1,3 +1,5 @@ Todo: - [ ] Not currently sure how the lexer will interpret non-latin characters (make sure it handles all unicode) - [ ] The lexer currently only handles a limited number of escape codes / whitespace characters + +- [ ] Mark most lexer procedures with the {.inline.} pragma (I thought this was active by default) diff --git a/src/noether/parser/parser.nim b/src/noether/parser/parser.nim index f83861d..0598075 100644 --- a/src/noether/parser/parser.nim +++ b/src/noether/parser/parser.nim @@ -1,49 +1,33 @@ import strutils +include parseutil -import nodes -import ../lexer/tokstream +# NOTE: Matching between two tokens will fill `node` with everything +# NOTE: between those two tokens EXCLUDING the two tokens themselves. +proc parseMatch(tokStream: var nlTokStream, + node: var nlNode, + matchType: nlTokType): nlParseStat = + result = greed( + tokStream, + node.toks, + satisfyMatch(matchType), + ) +proc parseMatchLine(tokStream: var nlTokStream, + node: var nlNode, + matchType: nlTokType): nlParseStat = + result = greed( + tokStream, + node.toks, + satisfyMatchEOL(matchType), + ) -type - nlParseStat = enum - OK, - UNMATCHED, - TOOBIG, - -proc `*`(stat: nlParseStat, b: bool): nlParseStat = - result = if b: stat else: nlParseStat.OK - -# Greed will consume anything except a punishment -# Returns a boolean indicating if it succeeded -proc greed(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: proc(tok: nlTok): bool): bool = - var tok: nlTok - while tokStream.nextTok(tok): - toks.add(tok) - if satisfy(tok): - return true - result = false - -proc greedEOL(tokStream: var nlTokStream, toks: var seq[nlTok], satisfy: nlTokType): bool = - var tok: nlTok - while tokStream.nextTok(tok): - toks.add(tok) - if tok.tType == satisfy or tok.tType == nlTokType.EOL: - return true - result = false - -proc satisfyTypeOrEOL(tokType: nlTokType, tok: nlTok): bool = - - -proc prsMatchEOL(tokStream: var nlTokStream, toks: var seq[nlTok]): nlParseStat = - - -proc parse_strl(tokStream: var nlTokStream, node: var nlNode): nlParseStat = +proc parseStrL(tokStream: var nlTokStream, node: var nlNode): nlParseStat = node = nlNode( nType: nlNodeType.STRL ) node.addTok(tokStream.currTok) result = nlParseStat.UNCLOSED * not greedEOL(tokStream, node.toks, nlTokType.DQUO) -proc parse_chrl(tokStream: var nlTokStream, node: var nlNode): bool = +proc parseChrL(tokStream: var nlTokStream, node: var nlNode): bool = node = nlNode( nType: nlNodeType.CHRL ) @@ -59,7 +43,7 @@ proc parse*(tokStream: var nlTokStream): nlNode = case tok.tType: of nlTokType.DQUO: # Attempt to parse string literal - if not parse_strl(tokStream, node): + if not parseStrL(tokStream, node): echo "Unmatched Double Quotation! Malformed String Literal" echo tokStream.currLine() echo repeat(" ", tok.startPos), '^' @@ -68,7 +52,7 @@ proc parse*(tokStream: var nlTokStream): nlNode = echo node[] of nlTokType.SQUO: # Attempt to parse string literal - if not parse_chrl(tokStream, node): + if not parseChrL(tokStream, node): echo "Unmatched Single Quotation! Malformed Character Literal" echo tokStream.currLine() echo repeat(" ", tok.startPos), '^' diff --git a/src/noether/parser/parseutil.nim b/src/noether/parser/parseutil.nim new file mode 100644 index 0000000..6fa1243 --- /dev/null +++ b/src/noether/parser/parseutil.nim @@ -0,0 +1,58 @@ +import nodes +import ../lexer/tokstream + +type + # NOTE: Values above __FAIL__ indicate a failed state + nlParseStat* = enum + OK, + __FAIL__, + MIDAS, # Greedy search was never satisfied + UNMATCHED, + TOOBIG, + +proc `*`(stat: nlParseStat, b: bool): nlParseStat = + result = if b: stat else: nlParseStat.OK + +proc isFail*(stat: nlParseStat): bool = + result = (stat >= nlParseStat.__FAIL__) + + +#[ "Greed" refers to something I mentioned in my discussion on + | Noether's grammar (in an EBNF-like language). Greed just + | means "everything until a condition is satisified". + | That condition should be supplied by a Nim procedural type. + ]# + +# Greed will consume anything until a condition is satisfied +# Returns false if the greed was never satisfied (OMG!!) +proc greed(tokStream: var nlTokStream, + toks: var seq[nlTok], + satisfy: proc(tok: nlTok): bool, + ): nlParseStat = + var tok: nlTok + while tokStream.nextTok(tok): + toks.add(tok) + if satisfy(tok): + return nlParseStat.OK + result = nlParseStat.UNMATCHED + +proc greedLine(tokStream: var nlTokStream, + toks: var seq[nlTok], + satisfy: proc(tok: nlTok): bool): nlParseStat = + var tok: nlTok + while tokStream.nextTok(tok): + toks.add(tok) + if satisfy(tok): + return true + result = + +#[ Templates for generating greed satisfying conditions. + ]# + +# Satisfied if it finds nlTok of type matchType +template satisfyMatch(matchType: nlTokType) = + proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType) + +# Satisfied if it finds nlTok of type matchType or EOL reached +template satisfyMatchEOL(matchType: nlTokType) = + proc(tok: nlTok): bool {.inline.} = (tok.tType == matchType or tok.tType == nlTokType.EOL)