split LR table representation by key type (terminals/nonterminals)
This commit is contained in:
parent
d67e12a825
commit
853e1fba46
7 changed files with 156 additions and 68 deletions
|
|
@ -119,11 +119,6 @@ HParserBackendVTable h__glr_backend_vtable = {
|
|||
|
||||
|
||||
// XXX TODO
|
||||
// - split tables into
|
||||
// - one mapping input bytes to actions (shift or reduce or conflict)
|
||||
// - one mapping reduced-to lhs nonterminals to shift states
|
||||
// - can there still be conflicts here?
|
||||
// - use HStringMap to represent lookahead sets and the "piggyback" table
|
||||
// - implement engine merging
|
||||
// - triggered when two enter the same state
|
||||
// - old stacks (/engines?) saved
|
||||
|
|
|
|||
|
|
@ -13,9 +13,23 @@ static inline size_t seqsize(void *p_)
|
|||
return n+1;
|
||||
}
|
||||
|
||||
static HLRAction *
|
||||
lrtable_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol)
|
||||
{
|
||||
switch(symbol->type) {
|
||||
case HCF_END:
|
||||
return table->tmap[state]->end_branch;
|
||||
case HCF_CHAR:
|
||||
return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false);
|
||||
default:
|
||||
// nonterminal case
|
||||
return h_hashtable_get(table->ntmap[state], symbol);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
|
||||
{
|
||||
HLRAction *action = h_hashtable_get(table->rows[x], A);
|
||||
HLRAction *action = lrtable_lookup(table, x, A);
|
||||
assert(action != NULL);
|
||||
assert(action->type == HLR_SHIFT);
|
||||
return action->nextstate;
|
||||
|
|
@ -130,21 +144,48 @@ static inline bool has_conflicts(HLRTable *table)
|
|||
return !h_slist_empty(table->inadeq);
|
||||
}
|
||||
|
||||
// place a new terminal entry in tbl; records conflicts in tbl->inadeq
|
||||
// for each lookahead symbol (fs), put action into tmap
|
||||
// returns 0 on success, -1 on conflict
|
||||
// ignores forall entries
|
||||
static int terminal_put(HLRTable *tbl, size_t state, HCFChoice *x, HLRAction *action)
|
||||
static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *action)
|
||||
{
|
||||
HLRAction *prev = h_hashtable_get(tbl->rows[state], x);
|
||||
if(prev && prev != action) {
|
||||
// conflict
|
||||
action = h_lr_conflict(tbl->arena, prev, action);
|
||||
h_hashtable_put(tbl->rows[state], x, action);
|
||||
return -1;
|
||||
} else {
|
||||
h_hashtable_put(tbl->rows[state], x, action);
|
||||
return 0;
|
||||
int ret = 0;
|
||||
|
||||
if(fs->epsilon_branch) {
|
||||
HLRAction *prev = tmap->epsilon_branch;
|
||||
if(prev && prev != action) {
|
||||
// conflict
|
||||
tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action);
|
||||
ret = -1;
|
||||
} else {
|
||||
tmap->epsilon_branch = action;
|
||||
}
|
||||
}
|
||||
|
||||
if(fs->end_branch) {
|
||||
HLRAction *prev = tmap->end_branch;
|
||||
if(prev && prev != action) {
|
||||
// conflict
|
||||
tmap->end_branch = h_lr_conflict(tmap->arena, prev, action);
|
||||
ret = -1;
|
||||
} else {
|
||||
tmap->end_branch = action;
|
||||
}
|
||||
}
|
||||
|
||||
H_FOREACH(fs->char_branches, void *key, HStringMap *fs_)
|
||||
HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key);
|
||||
|
||||
if(!tmap_) {
|
||||
tmap_ = h_stringmap_new(tmap->arena);
|
||||
h_hashtable_put(tmap->char_branches, key, tmap_);
|
||||
}
|
||||
|
||||
if(terminals_put(tmap_, fs_, action) < 0)
|
||||
ret = -1;
|
||||
H_END_FOREACH
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// check whether a sequence of enhanced-grammar symbols (p) matches the given
|
||||
|
|
@ -254,23 +295,8 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
|||
assert(!h_stringmap_empty(fs));
|
||||
|
||||
// for each lookahead symbol, put action into table cell
|
||||
if(fs->end_branch) {
|
||||
HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||
terminal->type = HCF_END;
|
||||
if(terminal_put(table, state, terminal, action) < 0)
|
||||
inadeq = true;
|
||||
}
|
||||
H_FOREACH(fs->char_branches, void *key, HStringMap *m)
|
||||
if(!m->epsilon_branch)
|
||||
continue;
|
||||
|
||||
HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||
terminal->type = HCF_CHAR;
|
||||
terminal->chr = key_char((HCharKey)key);
|
||||
|
||||
if(terminal_put(table, state, terminal, action) < 0)
|
||||
inadeq = true;
|
||||
H_END_FOREACH // lookahead character
|
||||
if(terminals_put(table->tmap[state], fs, action) < 0)
|
||||
inadeq = true;
|
||||
} H_END_FOREACH // enhanced production
|
||||
H_END_FOREACH // reducible item
|
||||
|
||||
|
|
@ -306,6 +332,8 @@ HParserBackendVTable h__lalr_backend_vtable = {
|
|||
// dummy!
|
||||
int test_lalr(void)
|
||||
{
|
||||
HAllocator *mm__ = &system_allocator;
|
||||
|
||||
/*
|
||||
E -> E '-' T
|
||||
| T
|
||||
|
|
@ -321,7 +349,7 @@ int test_lalr(void)
|
|||
HParser *p = E;
|
||||
|
||||
printf("\n==== G R A M M A R ====\n");
|
||||
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
|
||||
HCFGrammar *g = h_cfgrammar_(mm__, augment(mm__, p));
|
||||
if(g == NULL) {
|
||||
fprintf(stderr, "h_cfgrammar failed\n");
|
||||
return 1;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include "../parsers/parser_internal.h"
|
||||
#include "lr.h"
|
||||
|
||||
|
|
@ -118,14 +119,16 @@ HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
|
|||
|
||||
HLRTable *ret = h_new(HLRTable, 1);
|
||||
ret->nrows = nrows;
|
||||
ret->rows = h_arena_malloc(arena, nrows * sizeof(HHashTable *));
|
||||
ret->ntmap = h_arena_malloc(arena, nrows * sizeof(HHashTable *));
|
||||
ret->tmap = h_arena_malloc(arena, nrows * sizeof(HStringMap *));
|
||||
ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *));
|
||||
ret->inadeq = h_slist_new(arena);
|
||||
ret->arena = arena;
|
||||
ret->mm__ = mm__;
|
||||
|
||||
for(size_t i=0; i<nrows; i++) {
|
||||
ret->rows[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
|
||||
ret->ntmap[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
|
||||
ret->tmap[i] = h_stringmap_new(arena);
|
||||
ret->forall[i] = NULL;
|
||||
}
|
||||
|
||||
|
|
@ -186,6 +189,12 @@ HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new)
|
|||
return action;
|
||||
}
|
||||
|
||||
bool h_lrtable_row_empty(const HLRTable *table, size_t i)
|
||||
{
|
||||
return (h_hashtable_empty(table->ntmap[i])
|
||||
&& h_stringmap_empty(table->tmap[i]));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* LR driver */
|
||||
|
|
@ -214,10 +223,14 @@ terminal_lookup(const HLREngine *engine, const HCFChoice *symbol)
|
|||
|
||||
assert(state < table->nrows);
|
||||
if(table->forall[state]) {
|
||||
assert(h_hashtable_empty(table->rows[state])); // that would be a conflict
|
||||
assert(h_lrtable_row_empty(table, state)); // that would be a conflict
|
||||
return table->forall[state];
|
||||
} else {
|
||||
return h_hashtable_get(table->rows[state], symbol);
|
||||
// XXX use the lookahead stream directly here (cf. llk)
|
||||
if(symbol->type == HCF_END)
|
||||
return table->tmap[state]->end_branch;
|
||||
else
|
||||
return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -228,12 +241,9 @@ nonterminal_lookup(const HLREngine *engine, const HCFChoice *symbol)
|
|||
size_t state = engine->state;
|
||||
|
||||
assert(state < table->nrows);
|
||||
if(table->forall[state]) {
|
||||
assert(h_hashtable_empty(table->rows[state])); // that would be a conflict
|
||||
return table->forall[state];
|
||||
} else {
|
||||
return h_hashtable_get(table->rows[state], symbol);
|
||||
}
|
||||
assert(!table->forall[state]); // contains only reduce entries
|
||||
// we are only looking for shifts
|
||||
return h_hashtable_get(table->ntmap[state], symbol);
|
||||
}
|
||||
|
||||
const HLRAction *h_lrengine_action(const HLREngine *engine)
|
||||
|
|
@ -500,6 +510,19 @@ void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action)
|
|||
}
|
||||
}
|
||||
|
||||
static void valprint_lraction(FILE *file, void *env, void *val)
|
||||
{
|
||||
const HLRAction *action = val;
|
||||
const HCFGrammar *grammar = env;
|
||||
pprint_lraction(file, grammar, action);
|
||||
}
|
||||
|
||||
static void pprint_lrtable_terminals(FILE *file, const HCFGrammar *g,
|
||||
const HStringMap *map)
|
||||
{
|
||||
h_pprint_stringmap(file, ' ', valprint_lraction, (void *)g, map);
|
||||
}
|
||||
|
||||
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
|
||||
unsigned int indent)
|
||||
{
|
||||
|
|
@ -507,18 +530,19 @@ void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
|
|||
for(unsigned int j=0; j<indent; j++) fputc(' ', f);
|
||||
fprintf(f, "%4lu:", i);
|
||||
if(table->forall[i]) {
|
||||
fputs(" - ", f);
|
||||
fputc(' ', f);
|
||||
pprint_lraction(f, g, table->forall[i]);
|
||||
fputs(" -", f);
|
||||
if(!h_hashtable_empty(table->rows[i]))
|
||||
if(!h_lrtable_row_empty(table, i))
|
||||
fputs(" !!", f);
|
||||
}
|
||||
H_FOREACH(table->rows[i], HCFChoice *symbol, HLRAction *action)
|
||||
H_FOREACH(table->ntmap[i], HCFChoice *symbol, HLRAction *action)
|
||||
fputc(' ', f); // separator
|
||||
h_pprint_symbol(f, g, symbol);
|
||||
fputc(':', f);
|
||||
pprint_lraction(f, g, action);
|
||||
H_END_FOREACH
|
||||
fputc(' ', f); // separator
|
||||
pprint_lrtable_terminals(f, g, table->tmap[i]);
|
||||
fputc('\n', f);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -48,8 +48,9 @@ typedef struct HLRAction_ {
|
|||
} HLRAction;
|
||||
|
||||
typedef struct HLRTable_ {
|
||||
size_t nrows;
|
||||
HHashTable **rows; // map symbols to HLRActions
|
||||
size_t nrows; // dimension of the pointer arrays below
|
||||
HHashTable **ntmap; // map nonterminal symbols to HLRActions, per row
|
||||
HStringMap **tmap; // map lookahead strings to HLRActions, per row
|
||||
HLRAction **forall; // shortcut to set an action for an entire row
|
||||
HCFChoice *start; // start symbol
|
||||
HSlist *inadeq; // indices of any inadequate states
|
||||
|
|
@ -110,6 +111,7 @@ HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
|
|||
HLRAction *h_reduce_action(HArena *arena, const HLRItem *item);
|
||||
HLRAction *h_shift_action(HArena *arena, size_t nextstate);
|
||||
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new);
|
||||
bool h_lrtable_row_empty(const HLRTable *table, size_t i);
|
||||
|
||||
bool h_eq_symbol(const void *p, const void *q);
|
||||
bool h_eq_lr_itemset(const void *p, const void *q);
|
||||
|
|
|
|||
|
|
@ -166,7 +166,18 @@ void put_shift(HLRTable *table, size_t state, const HCFChoice *symbol,
|
|||
size_t nextstate)
|
||||
{
|
||||
HLRAction *action = h_shift_action(table->arena, nextstate);
|
||||
h_hashtable_put(table->rows[state], symbol, action);
|
||||
|
||||
switch(symbol->type) {
|
||||
case HCF_END:
|
||||
h_stringmap_put_end(table->tmap[state], action);
|
||||
break;
|
||||
case HCF_CHAR:
|
||||
h_stringmap_put_char(table->tmap[state], symbol->chr, action);
|
||||
break;
|
||||
default:
|
||||
// nonterminal case
|
||||
h_hashtable_put(table->ntmap[state], symbol, action);
|
||||
}
|
||||
}
|
||||
|
||||
HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
|
||||
|
|
@ -210,7 +221,8 @@ HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
|
|||
|
||||
// check for shift/reduce conflict with other entries
|
||||
// NOTE: these are not recorded as HLR_CONFLICTs at this point
|
||||
if(!h_hashtable_empty(table->rows[i]))
|
||||
|
||||
if(!h_lrtable_row_empty(table, i))
|
||||
inadeq = true;
|
||||
}
|
||||
H_END_FOREACH
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue