add LALR conflict resolution (untested)
This commit is contained in:
parent
ddfd3796a7
commit
fd297b636c
3 changed files with 138 additions and 38 deletions
|
|
@ -52,6 +52,13 @@ typedef struct HLRTable_ {
|
||||||
HAllocator *mm__;
|
HAllocator *mm__;
|
||||||
} HLRTable;
|
} HLRTable;
|
||||||
|
|
||||||
|
typedef struct HLREnhGrammar_ {
|
||||||
|
HCFGrammar *grammar; // enhanced grammar
|
||||||
|
HHashTable *tmap; // maps transitions to enhanced-grammar symbols
|
||||||
|
HHashTable *smap; // maps enhanced-grammar symbols to transitions
|
||||||
|
HArena *arena;
|
||||||
|
} HLREnhGrammar;
|
||||||
|
|
||||||
|
|
||||||
// compare symbols - terminals by value, others by pointer
|
// compare symbols - terminals by value, others by pointer
|
||||||
static bool eq_symbol(const void *p, const void *q)
|
static bool eq_symbol(const void *p, const void *q)
|
||||||
|
|
@ -410,14 +417,10 @@ static HLRAction *reduce_action(HArena *arena, const HLRItem *item)
|
||||||
return action;
|
return action;
|
||||||
}
|
}
|
||||||
|
|
||||||
HLRTable *h_lr0_table(HCFGrammar *g)
|
HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
|
||||||
{
|
{
|
||||||
HAllocator *mm__ = g->mm__;
|
HAllocator *mm__ = g->mm__;
|
||||||
|
|
||||||
// construct LR(0) DFA
|
|
||||||
HLRDFA *dfa = h_lr0_dfa(g);
|
|
||||||
if(!dfa) return NULL;
|
|
||||||
|
|
||||||
HLRTable *table = h_lrtable_new(mm__, dfa->nstates);
|
HLRTable *table = h_lrtable_new(mm__, dfa->nstates);
|
||||||
HArena *arena = table->arena;
|
HArena *arena = table->arena;
|
||||||
|
|
||||||
|
|
@ -475,13 +478,13 @@ static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
|
||||||
return action->nextstate;
|
return action->nextstate;
|
||||||
}
|
}
|
||||||
|
|
||||||
static HCFChoice *transform_symbol(const HLRTable *table, HHashTable *map,
|
static HCFChoice *transform_symbol(const HLRTable *table, HLREnhGrammar *eg,
|
||||||
size_t x, HCFChoice *B, size_t z);
|
size_t x, HCFChoice *B, size_t z);
|
||||||
|
|
||||||
static HCFChoice *transform_productions(const HLRTable *table, HHashTable *map,
|
static HCFChoice *transform_productions(const HLRTable *table, HLREnhGrammar *eg,
|
||||||
size_t x, HCFChoice *xAy)
|
size_t x, HCFChoice *xAy)
|
||||||
{
|
{
|
||||||
HArena *arena = map->arena;
|
HArena *arena = eg->arena;
|
||||||
|
|
||||||
HCFSequence **seq = h_arena_malloc(arena, seqsize(xAy->seq)
|
HCFSequence **seq = h_arena_malloc(arena, seqsize(xAy->seq)
|
||||||
* sizeof(HCFSequence *));
|
* sizeof(HCFSequence *));
|
||||||
|
|
@ -494,7 +497,7 @@ static HCFChoice *transform_productions(const HLRTable *table, HHashTable *map,
|
||||||
HCFChoice **xBz = h_arena_malloc(arena, seqsize(B) * sizeof(HCFChoice *));
|
HCFChoice **xBz = h_arena_malloc(arena, seqsize(B) * sizeof(HCFChoice *));
|
||||||
for(; *B; B++, xBz++) {
|
for(; *B; B++, xBz++) {
|
||||||
size_t z = follow_transition(table, x, *B);
|
size_t z = follow_transition(table, x, *B);
|
||||||
*xBz = transform_symbol(table, map, x, *B, z);
|
*xBz = transform_symbol(table, eg, x, *B, z);
|
||||||
x=z;
|
x=z;
|
||||||
}
|
}
|
||||||
*xBz = NULL;
|
*xBz = NULL;
|
||||||
|
|
@ -518,21 +521,22 @@ static inline HLRTransition *transition(HArena *arena,
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
static HCFChoice *transform_symbol(const HLRTable *table, HHashTable *map,
|
static HCFChoice *transform_symbol(const HLRTable *table, HLREnhGrammar *eg,
|
||||||
size_t x, HCFChoice *B, size_t z)
|
size_t x, HCFChoice *B, size_t z)
|
||||||
{
|
{
|
||||||
HArena *arena = map->arena;
|
HArena *arena = eg->arena;
|
||||||
|
|
||||||
// look up the transition in map, create symbol if not found
|
// look up the transition in map, create symbol if not found
|
||||||
HLRTransition *x_B_z = transition(arena, x, B, z);
|
HLRTransition *x_B_z = transition(arena, x, B, z);
|
||||||
HCFChoice *xBz = h_hashtable_get(map, x_B_z);
|
HCFChoice *xBz = h_hashtable_get(eg->tmap, x_B_z);
|
||||||
if(!xBz) {
|
if(!xBz) {
|
||||||
HCFChoice *xBz = h_arena_malloc(arena, sizeof(HCFChoice));
|
HCFChoice *xBz = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||||
*xBz = *B;
|
*xBz = *B;
|
||||||
h_hashtable_put(map, x_B_z, xBz);
|
h_hashtable_put(eg->tmap, x_B_z, xBz);
|
||||||
|
h_hashtable_put(eg->smap, xBz, x_B_z);
|
||||||
}
|
}
|
||||||
|
|
||||||
return transform_productions(table, map, x, xBz);
|
return transform_productions(table, eg, x, xBz);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool eq_transition(const void *p, const void *q)
|
static bool eq_transition(const void *p, const void *q)
|
||||||
|
|
@ -547,39 +551,71 @@ static HHashValue hash_transition(const void *p)
|
||||||
return (h_hash_ptr(t->symbol) + t->from + t->to); // XXX ?
|
return (h_hash_ptr(t->symbol) + t->from + t->to); // XXX ?
|
||||||
}
|
}
|
||||||
|
|
||||||
static HHashTable *enhance_grammar(const HCFGrammar *g, const HLRTable *tbl)
|
static HLREnhGrammar *enhance_grammar(const HCFGrammar *g, const HLRTable *tbl)
|
||||||
{
|
{
|
||||||
|
HAllocator *mm__ = g->mm__;
|
||||||
HArena *arena = g->arena; // XXX ?
|
HArena *arena = g->arena; // XXX ?
|
||||||
HHashTable *map = h_hashtable_new(arena, eq_transition, hash_transition);
|
|
||||||
|
HLREnhGrammar *eg = h_arena_malloc(arena, sizeof(HLREnhGrammar));
|
||||||
|
eg->tmap = h_hashtable_new(arena, eq_transition, hash_transition);
|
||||||
|
eg->smap = h_hashtable_new(arena, eq_transition, hash_transition);
|
||||||
|
eg->arena = arena;
|
||||||
|
|
||||||
// copy the start symbol over
|
// copy the start symbol over
|
||||||
HCFChoice *start = h_arena_malloc(arena, sizeof(HCFChoice));
|
HCFChoice *start = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||||
*start = *(g->start);
|
*start = *(g->start);
|
||||||
h_hashtable_put(map, g->start, start);
|
|
||||||
|
|
||||||
transform_productions(tbl, map, 0, start);
|
transform_productions(tbl, eg, 0, start);
|
||||||
|
|
||||||
return map;
|
eg->grammar = h_cfgrammar_(mm__, start);
|
||||||
|
return eg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* LALR table generation */
|
/* LALR table generation */
|
||||||
|
|
||||||
bool is_inadequate(HLRTable *table, size_t state)
|
static inline bool has_conflicts(HLRTable *table)
|
||||||
{
|
|
||||||
// XXX
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool has_conflicts(HLRTable *table)
|
|
||||||
{
|
{
|
||||||
return !h_slist_empty(table->inadeq);
|
return !h_slist_empty(table->inadeq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// place a new entry in tbl; records conflicts in tbl->inadeq
|
||||||
|
// returns 0 on success, -1 on conflict
|
||||||
|
// ignores forall entries
|
||||||
|
int h_lrtable_put(HLRTable *tbl, size_t state, HCFChoice *x, HLRAction *action)
|
||||||
|
{
|
||||||
|
HLRAction *prev = h_hashtable_get(tbl->rows[state], x);
|
||||||
|
if(prev && prev != action) {
|
||||||
|
// conflict
|
||||||
|
h_slist_push(tbl->inadeq, (void *)(uintptr_t)state);
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
h_hashtable_put(tbl->rows[state], x, action);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check whether a sequence of enhanced-grammar symbols (p) matches the given
|
||||||
|
// (original-grammar) production rhs and terminates in the given end state.
|
||||||
|
bool match_production(HLREnhGrammar *eg, HCFChoice **p,
|
||||||
|
HCFChoice **rhs, size_t endstate)
|
||||||
|
{
|
||||||
|
HLRTransition *t;
|
||||||
|
for(; *p && *rhs; p++, rhs++) {
|
||||||
|
t = h_hashtable_get(eg->smap, *p);
|
||||||
|
assert(t != NULL);
|
||||||
|
if(!eq_symbol(t->symbol, *rhs))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return (*p == *rhs // both NULL
|
||||||
|
&& t->to == endstate);
|
||||||
|
}
|
||||||
|
|
||||||
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
{
|
{
|
||||||
// generate CFG from parser
|
// generate CFG from parser
|
||||||
|
// construct LR(0) DFA
|
||||||
// build LR(0) table
|
// build LR(0) table
|
||||||
// if necessary, resolve conflicts "by conversion to SLR"
|
// if necessary, resolve conflicts "by conversion to SLR"
|
||||||
|
|
||||||
|
|
@ -587,21 +623,79 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
if(g == NULL) // backend not suitable (language not context-free)
|
if(g == NULL) // backend not suitable (language not context-free)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
HLRTable *table = h_lr0_table(g);
|
HLRDFA *dfa = h_lr0_dfa(g);
|
||||||
if(table == NULL) // this should normally not happen
|
if(dfa == NULL) { // this should normally not happen
|
||||||
|
h_cfgrammar_free(g);
|
||||||
return -1;
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
HLRTable *table = h_lr0_table(g, dfa);
|
||||||
|
if(table == NULL) { // this should normally not happen
|
||||||
|
h_cfgrammar_free(g);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
if(has_conflicts(table)) {
|
if(has_conflicts(table)) {
|
||||||
HHashTable *map = enhance_grammar(g, table);
|
HArena *arena = table->arena;
|
||||||
if(map == NULL) // this should normally not happen
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
// XXX resolve conflicts
|
HLREnhGrammar *eg = enhance_grammar(g, table);
|
||||||
// iterate over dfa's transitions where 'from' state is inadequate
|
if(eg == NULL) { // this should normally not happen
|
||||||
// look up enhanced symbol corr. to the transition
|
h_cfgrammar_free(g);
|
||||||
// for each terminal in follow set of enh. symbol:
|
h_lrtable_free(table);
|
||||||
// put reduce action into table cell (state, terminal)
|
return -1;
|
||||||
// conflict if already occupied
|
}
|
||||||
|
|
||||||
|
// go through the inadequate states; replace inadeq with a new list
|
||||||
|
HSlist *inadeq = table->inadeq;
|
||||||
|
table->inadeq = h_slist_new(arena);
|
||||||
|
|
||||||
|
for(HSlistNode *x=inadeq->head; x; x=x->next) {
|
||||||
|
size_t state = (uintptr_t)x->elem;
|
||||||
|
|
||||||
|
// clear old forall entry, it's being replaced by more fine-grained ones
|
||||||
|
table->forall[state] = NULL;
|
||||||
|
|
||||||
|
// go through each reducible item of state
|
||||||
|
H_FOREACH_KEY(dfa->states[state], HLRItem *item)
|
||||||
|
if(item->mark < item->len)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// action to place in the table cells indicated by lookahead
|
||||||
|
HLRAction *action = reduce_action(arena, item);
|
||||||
|
|
||||||
|
// find all LR(0)-enhanced productions matching item
|
||||||
|
H_FOREACH(eg->smap, HCFChoice *lhs, HLRTransition *t)
|
||||||
|
if(t->symbol != item->lhs)
|
||||||
|
continue;
|
||||||
|
for(HCFSequence **p=lhs->seq; *p; p++) {
|
||||||
|
HCFChoice **rhs = (*p)->items;
|
||||||
|
if(!match_production(eg, rhs, item->rhs, state))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// the left-hand symbol's follow set is this production's
|
||||||
|
// contribution to the lookahead
|
||||||
|
const HStringMap *fs = h_follow(1, eg->grammar, lhs);
|
||||||
|
assert(fs != NULL);
|
||||||
|
|
||||||
|
// for each lookahead symbol, put action into table cell
|
||||||
|
if(fs->end_branch) {
|
||||||
|
HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||||
|
terminal->type = HCF_END;
|
||||||
|
h_lrtable_put(table, state, terminal, action);
|
||||||
|
}
|
||||||
|
H_FOREACH(fs->char_branches, void *key, HStringMap *m)
|
||||||
|
if(!m->epsilon_branch)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||||
|
terminal->type = HCF_CHAR;
|
||||||
|
terminal->chr = key_char((HCharKey)key);
|
||||||
|
|
||||||
|
h_lrtable_put(table, state, terminal, action);
|
||||||
|
H_END_FOREACH // lookahead character
|
||||||
|
} H_END_FOREACH // enhanced production
|
||||||
|
H_END_FOREACH // reducible item
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
h_cfgrammar_free(g);
|
h_cfgrammar_free(g);
|
||||||
|
|
@ -924,7 +1018,7 @@ int test_lalr(void)
|
||||||
fprintf(stderr, "h_lalr_dfa failed\n");
|
fprintf(stderr, "h_lalr_dfa failed\n");
|
||||||
|
|
||||||
printf("\n==== L R ( 0 ) T A B L E ====\n");
|
printf("\n==== L R ( 0 ) T A B L E ====\n");
|
||||||
HLRTable *table0 = h_lr0_table(g);
|
HLRTable *table0 = h_lr0_table(g, dfa);
|
||||||
if(table0)
|
if(table0)
|
||||||
h_pprint_lrtable(stdout, g, table0, 0);
|
h_pprint_lrtable(stdout, g, table0, 0);
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,11 @@ HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
|
||||||
if(desugared == NULL)
|
if(desugared == NULL)
|
||||||
return NULL; // -> backend not suitable for this parser
|
return NULL; // -> backend not suitable for this parser
|
||||||
|
|
||||||
|
return h_cfgrammar_(mm__, desugared);
|
||||||
|
}
|
||||||
|
|
||||||
|
HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared)
|
||||||
|
{
|
||||||
HCFGrammar *g = h_cfgrammar_new(mm__);
|
HCFGrammar *g = h_cfgrammar_new(mm__);
|
||||||
|
|
||||||
// recursively traverse the desugared form and collect all HCFChoices that
|
// recursively traverse the desugared form and collect all HCFChoices that
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,7 @@ static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_
|
||||||
* A NULL return means we are unable to represent the parser as a CFG.
|
* A NULL return means we are unable to represent the parser as a CFG.
|
||||||
*/
|
*/
|
||||||
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser);
|
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser);
|
||||||
|
HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *start);
|
||||||
|
|
||||||
HCFGrammar *h_cfgrammar_new(HAllocator *mm__);
|
HCFGrammar *h_cfgrammar_new(HAllocator *mm__);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue