add (untested) "derives epsilon" analysis

This commit is contained in:
Sven M. Hallberg 2013-04-27 23:20:13 +02:00
parent da75a7ae9a
commit 3ff6de5d8a

View file

@ -15,7 +15,7 @@ typedef struct HCFGrammar_ {
HArena *arena; HArena *arena;
} HCFGrammar; } HCFGrammar;
bool h_eq_pointer(const void *p, const void *q) { return (p==q); } bool h_eq_ptr(const void *p, const void *q) { return (p==q); }
HHashValue h_hash_ptr(const void *p) { return (uintptr_t)p; } HHashValue h_hash_ptr(const void *p) { return (uintptr_t)p; }
HCFGrammar *h_grammar_new(HAllocator *mm__) HCFGrammar *h_grammar_new(HAllocator *mm__)
@ -24,14 +24,16 @@ HCFGrammar *h_grammar_new(HAllocator *mm__)
assert(g != NULL); assert(g != NULL);
g->arena = h_new_arena(mm__, 0); // default blocksize g->arena = h_new_arena(mm__, 0); // default blocksize
g->nts = h_hashset_new(g->arena, h_eq_pointer, h_hash_ptr); g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
g->geneps = NULL; g->geneps = NULL;
g->first = NULL; g->first = NULL;
return g; return g;
} }
void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol); // helper // helpers
static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol);
static void collect_geneps(HCFGrammar *grammar);
/* Convert 'parser' into CFG representation by desugaring and compiling the set /* Convert 'parser' into CFG representation by desugaring and compiling the set
* of nonterminals. * of nonterminals.
@ -48,18 +50,20 @@ HCFGrammar *h_grammar(HAllocator* mm__, const HParser *parser)
// recursively traverse the desugared form and collect all HCFChoices that // recursively traverse the desugared form and collect all HCFChoices that
// represent a nonterminal. // represent a nonterminal.
h_grammar_collect(g, desugared); collect_nts(g, desugared);
if(h_hashset_empty(g->nts)) { if(h_hashset_empty(g->nts)) {
// desugared is a single nonterminal. wrap it. // desugared is a single nonterminal. wrap it.
// XXX is this even necessary? // XXX is this even necessary?
} }
// XXX call collect_geneps here?
return g; return g;
} }
/* Add all nonterminals reachable from symbol to grammar. */ /* Add all nonterminals reachable from symbol to grammar. */
void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol) static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
{ {
HCFSequence **s; // for the rhs (sentential form) of a production HCFSequence **s; // for the rhs (sentential form) of a production
HCFChoice **x; // for a symbol in s HCFChoice **x; // for a symbol in s
@ -82,7 +86,7 @@ void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol)
// a production. call self on all symbols (HCFChoice) in s. // a production. call self on all symbols (HCFChoice) in s.
for(s = symbol->seq; *s != NULL; s++) { for(s = symbol->seq; *s != NULL; s++) {
for(x = (*s)->items; *x != NULL; x++) { for(x = (*s)->items; *x != NULL; x++) {
h_grammar_collect(grammar, *x); collect_nts(grammar, *x);
} }
} }
break; break;
@ -92,6 +96,72 @@ void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol)
} }
} }
/* Does the given symbol derive the empty string (under g)? */
bool h_symbol_derives_epsilon(HCFGrammar *g, HCFChoice *symbol)
{
if(g->geneps == NULL)
collect_geneps(g);
assert(g->geneps != NULL);
switch(symbol->type) {
case HCF_END: // the end token doesn't count as empty
case HCF_CHAR:
case HCF_CHARSET:
return false;
default: // HCF_CHOICE
return h_hashset_present(g->geneps, symbol);
}
}
/* Does the sentential form given by s derive the empty string? */
bool h_sequence_derives_epsilon(HCFGrammar *g, HCFSequence *s)
{
// return true iff all symbols in s derive epsilon
HCFChoice **x;
for(x = s->items; *x; x++) {
if(!h_symbol_derives_epsilon(g, *x))
return false;
}
return true;
}
/* Populate the geneps member of g; no-op if called multiple times. */
static void collect_geneps(HCFGrammar *g)
{
if(g->geneps == NULL)
return;
g->geneps = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
assert(g->geneps != NULL);
// iterate over the grammar's symbols, the elements of g->nts.
// add any we can identify as deriving epsilon to g->geneps.
// repeat until g->geneps no longer changes.
size_t prevused = g->nts->used;
do {
size_t i;
HHashTableEntry *hte;
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
HCFChoice *symbol = hte->key;
// only nonterminals can derive epsilon.
if(symbol->type != HCF_CHOICE)
continue;
// this NT derives epsilon iff any of its productions does.
HCFSequence **p;
for(p = symbol->seq; *p != NULL; p++) {
if(h_sequence_derives_epsilon(g, *p)) {
h_hashset_put(g->nts, symbol);
break;
}
}
}
}
} while(g->nts->used != prevused);
}
// first_set(grammar?, production) // first_set(grammar?, production)
// follow_set // follow_set