add grammar representation functions and WIP LL scaffold
This commit is contained in:
parent
88091b602f
commit
da75a7ae9a
1 changed files with 141 additions and 3 deletions
|
|
@ -2,15 +2,153 @@
|
|||
#include "../internal.h"
|
||||
#include "../parsers/parser_internal.h"
|
||||
|
||||
#define H_LL_END_TOKEN 256
|
||||
|
||||
int h_ll_compile(HAllocator* mm__, const HParser* parser, const void* params) {
|
||||
return -1; // TODO
|
||||
|
||||
/* Grammar representation and analysis */
|
||||
|
||||
typedef struct HCFGrammar_ {
|
||||
HHashSet *nts; // HCFChoices, each representing the alternative
|
||||
// productions for one nonterminal
|
||||
HHashSet *geneps; // set of NTs that can generate the empty string
|
||||
HHashTable *first; // "first" sets of the grammar's nonterminals
|
||||
HArena *arena;
|
||||
} HCFGrammar;
|
||||
|
||||
bool h_eq_pointer(const void *p, const void *q) { return (p==q); }
|
||||
HHashValue h_hash_ptr(const void *p) { return (uintptr_t)p; }
|
||||
|
||||
HCFGrammar *h_grammar_new(HAllocator *mm__)
|
||||
{
|
||||
HCFGrammar *g = h_new(HCFGrammar, 1);
|
||||
assert(g != NULL);
|
||||
|
||||
g->arena = h_new_arena(mm__, 0); // default blocksize
|
||||
g->nts = h_hashset_new(g->arena, h_eq_pointer, h_hash_ptr);
|
||||
g->geneps = NULL;
|
||||
g->first = NULL;
|
||||
|
||||
return g;
|
||||
}
|
||||
|
||||
HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) {
|
||||
void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol); // helper
|
||||
|
||||
/* Convert 'parser' into CFG representation by desugaring and compiling the set
|
||||
* of nonterminals.
|
||||
* A NULL return means we are unable to represent the parser as a CFG.
|
||||
*/
|
||||
HCFGrammar *h_grammar(HAllocator* mm__, const HParser *parser)
|
||||
{
|
||||
// convert parser to CFG form ("desugar").
|
||||
HCFChoice *desugared = h_desugar(mm__, parser);
|
||||
if(desugared == NULL)
|
||||
return NULL; // -> backend not suitable for this parser
|
||||
|
||||
HCFGrammar *g = h_grammar_new(mm__);
|
||||
|
||||
// recursively traverse the desugared form and collect all HCFChoices that
|
||||
// represent a nonterminal.
|
||||
h_grammar_collect(g, desugared);
|
||||
|
||||
if(h_hashset_empty(g->nts)) {
|
||||
// desugared is a single nonterminal. wrap it.
|
||||
// XXX is this even necessary?
|
||||
}
|
||||
|
||||
return g;
|
||||
}
|
||||
|
||||
/* Add all nonterminals reachable from symbol to grammar. */
|
||||
void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol)
|
||||
{
|
||||
HCFSequence **s; // for the rhs (sentential form) of a production
|
||||
HCFChoice **x; // for a symbol in s
|
||||
|
||||
if(h_hashset_present(grammar->nts, symbol))
|
||||
return; // already visited, get out
|
||||
|
||||
switch(symbol->type) {
|
||||
case HCF_CHAR:
|
||||
case HCF_END:
|
||||
break; // it's a terminal symbol, nothing to do
|
||||
|
||||
case HCF_CHARSET:
|
||||
h_hashset_put(grammar->nts, symbol);
|
||||
break; // this type has only terminal children
|
||||
|
||||
case HCF_CHOICE:
|
||||
h_hashset_put(grammar->nts, symbol);
|
||||
// each element s of symbol->seq (HCFSequence) represents the RHS of
|
||||
// a production. call self on all symbols (HCFChoice) in s.
|
||||
for(s = symbol->seq; *s != NULL; s++) {
|
||||
for(x = (*s)->items; *x != NULL; x++) {
|
||||
h_grammar_collect(grammar, *x);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert_message(0, "unknown HCFChoice type");
|
||||
}
|
||||
}
|
||||
|
||||
// first_set(grammar?, production)
|
||||
// follow_set
|
||||
|
||||
|
||||
|
||||
/* LL parse table and associated data */
|
||||
|
||||
typedef struct HLLTable_ {
|
||||
unsigned int **arr; // Nonterminals numbered from 1, 0 = error.
|
||||
} HLLTable;
|
||||
|
||||
typedef struct HLLData_ {
|
||||
HCFGrammar *grammar;
|
||||
HLLTable *table;
|
||||
} HLLData;
|
||||
|
||||
#if 0
|
||||
/* Interface to look up an entry in the parse table. */
|
||||
unsigned int h_ll_lookup(const HLLTable *table, unsigned int nonterminal, uint8_t token)
|
||||
{
|
||||
assert(nonterminal > 0);
|
||||
return table->arr[n*257+token];
|
||||
}
|
||||
#endif
|
||||
|
||||
// XXX predict_set
|
||||
|
||||
int h_ll_compile(HAllocator* mm__, const HParser* parser, const void* params)
|
||||
{
|
||||
// Convert parser to a CFG. This can fail as indicated by a NULL return.
|
||||
HCFGrammar *grammar = h_grammar(mm__, parser);
|
||||
if(grammar == NULL)
|
||||
return -1; // -> Backend unsuitable for this parser.
|
||||
|
||||
// TODO: eliminate common prefixes
|
||||
// TODO: eliminate left recursion
|
||||
// TODO: avoid conflicts by splitting occurances?
|
||||
|
||||
// XXX generate table and store in parser->data.
|
||||
// XXX any other data structures needed?
|
||||
|
||||
return -1; // XXX 0 on success
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* LL driver */
|
||||
|
||||
HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state)
|
||||
{
|
||||
// get table from parser->data.
|
||||
// run driver.
|
||||
return NULL; // TODO
|
||||
}
|
||||
|
||||
|
||||
|
||||
HParserBackendVTable h__ll_backend_vtable = {
|
||||
.compile = h_ll_compile,
|
||||
.parse = h_ll_parse
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue