add (untested) "derives epsilon" analysis
This commit is contained in:
parent
da75a7ae9a
commit
3ff6de5d8a
1 changed files with 76 additions and 6 deletions
|
|
@ -15,7 +15,7 @@ typedef struct HCFGrammar_ {
|
||||||
HArena *arena;
|
HArena *arena;
|
||||||
} HCFGrammar;
|
} HCFGrammar;
|
||||||
|
|
||||||
bool h_eq_pointer(const void *p, const void *q) { return (p==q); }
|
bool h_eq_ptr(const void *p, const void *q) { return (p==q); }
|
||||||
HHashValue h_hash_ptr(const void *p) { return (uintptr_t)p; }
|
HHashValue h_hash_ptr(const void *p) { return (uintptr_t)p; }
|
||||||
|
|
||||||
HCFGrammar *h_grammar_new(HAllocator *mm__)
|
HCFGrammar *h_grammar_new(HAllocator *mm__)
|
||||||
|
|
@ -24,14 +24,16 @@ HCFGrammar *h_grammar_new(HAllocator *mm__)
|
||||||
assert(g != NULL);
|
assert(g != NULL);
|
||||||
|
|
||||||
g->arena = h_new_arena(mm__, 0); // default blocksize
|
g->arena = h_new_arena(mm__, 0); // default blocksize
|
||||||
g->nts = h_hashset_new(g->arena, h_eq_pointer, h_hash_ptr);
|
g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
g->geneps = NULL;
|
g->geneps = NULL;
|
||||||
g->first = NULL;
|
g->first = NULL;
|
||||||
|
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol); // helper
|
// helpers
|
||||||
|
static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol);
|
||||||
|
static void collect_geneps(HCFGrammar *grammar);
|
||||||
|
|
||||||
/* Convert 'parser' into CFG representation by desugaring and compiling the set
|
/* Convert 'parser' into CFG representation by desugaring and compiling the set
|
||||||
* of nonterminals.
|
* of nonterminals.
|
||||||
|
|
@ -48,18 +50,20 @@ HCFGrammar *h_grammar(HAllocator* mm__, const HParser *parser)
|
||||||
|
|
||||||
// recursively traverse the desugared form and collect all HCFChoices that
|
// recursively traverse the desugared form and collect all HCFChoices that
|
||||||
// represent a nonterminal.
|
// represent a nonterminal.
|
||||||
h_grammar_collect(g, desugared);
|
collect_nts(g, desugared);
|
||||||
|
|
||||||
if(h_hashset_empty(g->nts)) {
|
if(h_hashset_empty(g->nts)) {
|
||||||
// desugared is a single nonterminal. wrap it.
|
// desugared is a single nonterminal. wrap it.
|
||||||
// XXX is this even necessary?
|
// XXX is this even necessary?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// XXX call collect_geneps here?
|
||||||
|
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add all nonterminals reachable from symbol to grammar. */
|
/* Add all nonterminals reachable from symbol to grammar. */
|
||||||
void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol)
|
static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
|
||||||
{
|
{
|
||||||
HCFSequence **s; // for the rhs (sentential form) of a production
|
HCFSequence **s; // for the rhs (sentential form) of a production
|
||||||
HCFChoice **x; // for a symbol in s
|
HCFChoice **x; // for a symbol in s
|
||||||
|
|
@ -82,7 +86,7 @@ void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol)
|
||||||
// a production. call self on all symbols (HCFChoice) in s.
|
// a production. call self on all symbols (HCFChoice) in s.
|
||||||
for(s = symbol->seq; *s != NULL; s++) {
|
for(s = symbol->seq; *s != NULL; s++) {
|
||||||
for(x = (*s)->items; *x != NULL; x++) {
|
for(x = (*s)->items; *x != NULL; x++) {
|
||||||
h_grammar_collect(grammar, *x);
|
collect_nts(grammar, *x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -92,6 +96,72 @@ void h_grammar_collect(HCFGrammar *grammar, HCFChoice *symbol)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Does the given symbol derive the empty string (under g)? */
|
||||||
|
bool h_symbol_derives_epsilon(HCFGrammar *g, HCFChoice *symbol)
|
||||||
|
{
|
||||||
|
if(g->geneps == NULL)
|
||||||
|
collect_geneps(g);
|
||||||
|
assert(g->geneps != NULL);
|
||||||
|
|
||||||
|
switch(symbol->type) {
|
||||||
|
case HCF_END: // the end token doesn't count as empty
|
||||||
|
case HCF_CHAR:
|
||||||
|
case HCF_CHARSET:
|
||||||
|
return false;
|
||||||
|
default: // HCF_CHOICE
|
||||||
|
return h_hashset_present(g->geneps, symbol);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Does the sentential form given by s derive the empty string? */
|
||||||
|
bool h_sequence_derives_epsilon(HCFGrammar *g, HCFSequence *s)
|
||||||
|
{
|
||||||
|
// return true iff all symbols in s derive epsilon
|
||||||
|
HCFChoice **x;
|
||||||
|
for(x = s->items; *x; x++) {
|
||||||
|
if(!h_symbol_derives_epsilon(g, *x))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Populate the geneps member of g; no-op if called multiple times. */
|
||||||
|
static void collect_geneps(HCFGrammar *g)
|
||||||
|
{
|
||||||
|
if(g->geneps == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
g->geneps = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
|
assert(g->geneps != NULL);
|
||||||
|
|
||||||
|
// iterate over the grammar's symbols, the elements of g->nts.
|
||||||
|
// add any we can identify as deriving epsilon to g->geneps.
|
||||||
|
// repeat until g->geneps no longer changes.
|
||||||
|
size_t prevused = g->nts->used;
|
||||||
|
do {
|
||||||
|
size_t i;
|
||||||
|
HHashTableEntry *hte;
|
||||||
|
for(i=0; i < g->nts->capacity; i++) {
|
||||||
|
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
||||||
|
HCFChoice *symbol = hte->key;
|
||||||
|
|
||||||
|
// only nonterminals can derive epsilon.
|
||||||
|
if(symbol->type != HCF_CHOICE)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// this NT derives epsilon iff any of its productions does.
|
||||||
|
HCFSequence **p;
|
||||||
|
for(p = symbol->seq; *p != NULL; p++) {
|
||||||
|
if(h_sequence_derives_epsilon(g, *p)) {
|
||||||
|
h_hashset_put(g->nts, symbol);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while(g->nts->used != prevused);
|
||||||
|
}
|
||||||
|
|
||||||
// first_set(grammar?, production)
|
// first_set(grammar?, production)
|
||||||
// follow_set
|
// follow_set
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue