diff --git a/src/backends/ll.c b/src/backends/ll.c index cc5369c..7861e9c 100644 --- a/src/backends/ll.c +++ b/src/backends/ll.c @@ -2,15 +2,153 @@ #include "../internal.h" #include "../parsers/parser_internal.h" +#define H_LL_END_TOKEN 256 -int h_ll_compile(HAllocator* mm__, const HParser* parser, const void* params) { - return -1; // TODO + +/* Grammar representation and analysis */ + +typedef struct HCFGrammar_ { + HHashSet *nts; // HCFChoices, each representing the alternative + // productions for one nonterminal + HHashSet *geneps; // set of NTs that can generate the empty string + HHashTable *first; // "first" sets of the grammar's nonterminals + HArena *arena; +} HCFGrammar; + +bool h_eq_pointer(const void *p, const void *q) { return (p==q); } +HHashValue h_hash_ptr(const void *p) { return (uintptr_t)p; } + +HCFGrammar *h_grammar_new(HAllocator *mm__) +{ + HCFGrammar *g = h_new(HCFGrammar, 1); + assert(g != NULL); + + g->arena = h_new_arena(mm__, 0); // default blocksize + g->nts = h_hashset_new(g->arena, h_eq_pointer, h_hash_ptr); + g->geneps = NULL; + g->first = NULL; + + return g; } -HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) { +void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol); // helper + +/* Convert 'parser' into CFG representation by desugaring and compiling the set + * of nonterminals. + * A NULL return means we are unable to represent the parser as a CFG. + */ +HCFGrammar *h_grammar(HAllocator* mm__, const HParser *parser) +{ + // convert parser to CFG form ("desugar"). + HCFChoice *desugared = h_desugar(mm__, parser); + if(desugared == NULL) + return NULL; // -> backend not suitable for this parser + + HCFGrammar *g = h_grammar_new(mm__); + + // recursively traverse the desugared form and collect all HCFChoices that + // represent a nonterminal. + h_grammar_collect(g, desugared); + + if(h_hashset_empty(g->nts)) { + // desugared is a single nonterminal. wrap it. + // XXX is this even necessary? + } + + return g; +} + +/* Add all nonterminals reachable from symbol to grammar. */ +void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol) +{ + HCFSequence **s; // for the rhs (sentential form) of a production + HCFChoice **x; // for a symbol in s + + if(h_hashset_present(grammar->nts, symbol)) + return; // already visited, get out + + switch(symbol->type) { + case HCF_CHAR: + case HCF_END: + break; // it's a terminal symbol, nothing to do + + case HCF_CHARSET: + h_hashset_put(grammar->nts, symbol); + break; // this type has only terminal children + + case HCF_CHOICE: + h_hashset_put(grammar->nts, symbol); + // each element s of symbol->seq (HCFSequence) represents the RHS of + // a production. call self on all symbols (HCFChoice) in s. + for(s = symbol->seq; *s != NULL; s++) { + for(x = (*s)->items; *x != NULL; x++) { + h_grammar_collect(grammar, *x); + } + } + break; + + default: + assert_message(0, "unknown HCFChoice type"); + } +} + +// first_set(grammar?, production) +// follow_set + + + +/* LL parse table and associated data */ + +typedef struct HLLTable_ { + unsigned int **arr; // Nonterminals numbered from 1, 0 = error. +} HLLTable; + +typedef struct HLLData_ { + HCFGrammar *grammar; + HLLTable *table; +} HLLData; + +#if 0 +/* Interface to look up an entry in the parse table. */ +unsigned int h_ll_lookup(const HLLTable *table, unsigned int nonterminal, uint8_t token) +{ + assert(nonterminal > 0); + return table->arr[n*257+token]; +} +#endif + +// XXX predict_set + +int h_ll_compile(HAllocator* mm__, const HParser* parser, const void* params) +{ + // Convert parser to a CFG. This can fail as indicated by a NULL return. + HCFGrammar *grammar = h_grammar(mm__, parser); + if(grammar == NULL) + return -1; // -> Backend unsuitable for this parser. + + // TODO: eliminate common prefixes + // TODO: eliminate left recursion + // TODO: avoid conflicts by splitting occurances? + + // XXX generate table and store in parser->data. + // XXX any other data structures needed? + + return -1; // XXX 0 on success +} + + + +/* LL driver */ + +HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) +{ + // get table from parser->data. + // run driver. return NULL; // TODO } + + HParserBackendVTable h__ll_backend_vtable = { .compile = h_ll_compile, .parse = h_ll_parse