split LR table representation by key type (terminals/nonterminals)

This commit is contained in:
Sven M. Hallberg 2013-06-21 22:55:04 +02:00
parent d67e12a825
commit 853e1fba46
7 changed files with 156 additions and 68 deletions

View file

@ -119,11 +119,6 @@ HParserBackendVTable h__glr_backend_vtable = {
// XXX TODO // XXX TODO
// - split tables into
// - one mapping input bytes to actions (shift or reduce or conflict)
// - one mapping reduced-to lhs nonterminals to shift states
// - can there still be conflicts here?
// - use HStringMap to represent lookahead sets and the "piggyback" table
// - implement engine merging // - implement engine merging
// - triggered when two enter the same state // - triggered when two enter the same state
// - old stacks (/engines?) saved // - old stacks (/engines?) saved

View file

@ -13,9 +13,23 @@ static inline size_t seqsize(void *p_)
return n+1; return n+1;
} }
static HLRAction *
lrtable_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol)
{
switch(symbol->type) {
case HCF_END:
return table->tmap[state]->end_branch;
case HCF_CHAR:
return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false);
default:
// nonterminal case
return h_hashtable_get(table->ntmap[state], symbol);
}
}
static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A) static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
{ {
HLRAction *action = h_hashtable_get(table->rows[x], A); HLRAction *action = lrtable_lookup(table, x, A);
assert(action != NULL); assert(action != NULL);
assert(action->type == HLR_SHIFT); assert(action->type == HLR_SHIFT);
return action->nextstate; return action->nextstate;
@ -130,23 +144,50 @@ static inline bool has_conflicts(HLRTable *table)
return !h_slist_empty(table->inadeq); return !h_slist_empty(table->inadeq);
} }
// place a new terminal entry in tbl; records conflicts in tbl->inadeq // for each lookahead symbol (fs), put action into tmap
// returns 0 on success, -1 on conflict // returns 0 on success, -1 on conflict
// ignores forall entries // ignores forall entries
static int terminal_put(HLRTable *tbl, size_t state, HCFChoice *x, HLRAction *action) static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *action)
{ {
HLRAction *prev = h_hashtable_get(tbl->rows[state], x); int ret = 0;
if(fs->epsilon_branch) {
HLRAction *prev = tmap->epsilon_branch;
if(prev && prev != action) { if(prev && prev != action) {
// conflict // conflict
action = h_lr_conflict(tbl->arena, prev, action); tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action);
h_hashtable_put(tbl->rows[state], x, action); ret = -1;
return -1;
} else { } else {
h_hashtable_put(tbl->rows[state], x, action); tmap->epsilon_branch = action;
return 0;
} }
} }
if(fs->end_branch) {
HLRAction *prev = tmap->end_branch;
if(prev && prev != action) {
// conflict
tmap->end_branch = h_lr_conflict(tmap->arena, prev, action);
ret = -1;
} else {
tmap->end_branch = action;
}
}
H_FOREACH(fs->char_branches, void *key, HStringMap *fs_)
HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key);
if(!tmap_) {
tmap_ = h_stringmap_new(tmap->arena);
h_hashtable_put(tmap->char_branches, key, tmap_);
}
if(terminals_put(tmap_, fs_, action) < 0)
ret = -1;
H_END_FOREACH
return ret;
}
// check whether a sequence of enhanced-grammar symbols (p) matches the given // check whether a sequence of enhanced-grammar symbols (p) matches the given
// (original-grammar) production rhs and terminates in the given end state. // (original-grammar) production rhs and terminates in the given end state.
static bool match_production(HLREnhGrammar *eg, HCFChoice **p, static bool match_production(HLREnhGrammar *eg, HCFChoice **p,
@ -254,23 +295,8 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
assert(!h_stringmap_empty(fs)); assert(!h_stringmap_empty(fs));
// for each lookahead symbol, put action into table cell // for each lookahead symbol, put action into table cell
if(fs->end_branch) { if(terminals_put(table->tmap[state], fs, action) < 0)
HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
terminal->type = HCF_END;
if(terminal_put(table, state, terminal, action) < 0)
inadeq = true; inadeq = true;
}
H_FOREACH(fs->char_branches, void *key, HStringMap *m)
if(!m->epsilon_branch)
continue;
HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
terminal->type = HCF_CHAR;
terminal->chr = key_char((HCharKey)key);
if(terminal_put(table, state, terminal, action) < 0)
inadeq = true;
H_END_FOREACH // lookahead character
} H_END_FOREACH // enhanced production } H_END_FOREACH // enhanced production
H_END_FOREACH // reducible item H_END_FOREACH // reducible item
@ -306,6 +332,8 @@ HParserBackendVTable h__lalr_backend_vtable = {
// dummy! // dummy!
int test_lalr(void) int test_lalr(void)
{ {
HAllocator *mm__ = &system_allocator;
/* /*
E -> E '-' T E -> E '-' T
| T | T
@ -321,7 +349,7 @@ int test_lalr(void)
HParser *p = E; HParser *p = E;
printf("\n==== G R A M M A R ====\n"); printf("\n==== G R A M M A R ====\n");
HCFGrammar *g = h_cfgrammar(&system_allocator, p); HCFGrammar *g = h_cfgrammar_(mm__, augment(mm__, p));
if(g == NULL) { if(g == NULL) {
fprintf(stderr, "h_cfgrammar failed\n"); fprintf(stderr, "h_cfgrammar failed\n");
return 1; return 1;

View file

@ -1,4 +1,5 @@
#include <assert.h> #include <assert.h>
#include <ctype.h>
#include "../parsers/parser_internal.h" #include "../parsers/parser_internal.h"
#include "lr.h" #include "lr.h"
@ -118,14 +119,16 @@ HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
HLRTable *ret = h_new(HLRTable, 1); HLRTable *ret = h_new(HLRTable, 1);
ret->nrows = nrows; ret->nrows = nrows;
ret->rows = h_arena_malloc(arena, nrows * sizeof(HHashTable *)); ret->ntmap = h_arena_malloc(arena, nrows * sizeof(HHashTable *));
ret->tmap = h_arena_malloc(arena, nrows * sizeof(HStringMap *));
ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *)); ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *));
ret->inadeq = h_slist_new(arena); ret->inadeq = h_slist_new(arena);
ret->arena = arena; ret->arena = arena;
ret->mm__ = mm__; ret->mm__ = mm__;
for(size_t i=0; i<nrows; i++) { for(size_t i=0; i<nrows; i++) {
ret->rows[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol); ret->ntmap[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
ret->tmap[i] = h_stringmap_new(arena);
ret->forall[i] = NULL; ret->forall[i] = NULL;
} }
@ -186,6 +189,12 @@ HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new)
return action; return action;
} }
bool h_lrtable_row_empty(const HLRTable *table, size_t i)
{
return (h_hashtable_empty(table->ntmap[i])
&& h_stringmap_empty(table->tmap[i]));
}
/* LR driver */ /* LR driver */
@ -214,10 +223,14 @@ terminal_lookup(const HLREngine *engine, const HCFChoice *symbol)
assert(state < table->nrows); assert(state < table->nrows);
if(table->forall[state]) { if(table->forall[state]) {
assert(h_hashtable_empty(table->rows[state])); // that would be a conflict assert(h_lrtable_row_empty(table, state)); // that would be a conflict
return table->forall[state]; return table->forall[state];
} else { } else {
return h_hashtable_get(table->rows[state], symbol); // XXX use the lookahead stream directly here (cf. llk)
if(symbol->type == HCF_END)
return table->tmap[state]->end_branch;
else
return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false);
} }
} }
@ -228,12 +241,9 @@ nonterminal_lookup(const HLREngine *engine, const HCFChoice *symbol)
size_t state = engine->state; size_t state = engine->state;
assert(state < table->nrows); assert(state < table->nrows);
if(table->forall[state]) { assert(!table->forall[state]); // contains only reduce entries
assert(h_hashtable_empty(table->rows[state])); // that would be a conflict // we are only looking for shifts
return table->forall[state]; return h_hashtable_get(table->ntmap[state], symbol);
} else {
return h_hashtable_get(table->rows[state], symbol);
}
} }
const HLRAction *h_lrengine_action(const HLREngine *engine) const HLRAction *h_lrengine_action(const HLREngine *engine)
@ -500,6 +510,19 @@ void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action)
} }
} }
static void valprint_lraction(FILE *file, void *env, void *val)
{
const HLRAction *action = val;
const HCFGrammar *grammar = env;
pprint_lraction(file, grammar, action);
}
static void pprint_lrtable_terminals(FILE *file, const HCFGrammar *g,
const HStringMap *map)
{
h_pprint_stringmap(file, ' ', valprint_lraction, (void *)g, map);
}
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table, void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
unsigned int indent) unsigned int indent)
{ {
@ -507,18 +530,19 @@ void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
for(unsigned int j=0; j<indent; j++) fputc(' ', f); for(unsigned int j=0; j<indent; j++) fputc(' ', f);
fprintf(f, "%4lu:", i); fprintf(f, "%4lu:", i);
if(table->forall[i]) { if(table->forall[i]) {
fputs(" - ", f); fputc(' ', f);
pprint_lraction(f, g, table->forall[i]); pprint_lraction(f, g, table->forall[i]);
fputs(" -", f); if(!h_lrtable_row_empty(table, i))
if(!h_hashtable_empty(table->rows[i]))
fputs(" !!", f); fputs(" !!", f);
} }
H_FOREACH(table->rows[i], HCFChoice *symbol, HLRAction *action) H_FOREACH(table->ntmap[i], HCFChoice *symbol, HLRAction *action)
fputc(' ', f); // separator fputc(' ', f); // separator
h_pprint_symbol(f, g, symbol); h_pprint_symbol(f, g, symbol);
fputc(':', f); fputc(':', f);
pprint_lraction(f, g, action); pprint_lraction(f, g, action);
H_END_FOREACH H_END_FOREACH
fputc(' ', f); // separator
pprint_lrtable_terminals(f, g, table->tmap[i]);
fputc('\n', f); fputc('\n', f);
} }

View file

@ -48,8 +48,9 @@ typedef struct HLRAction_ {
} HLRAction; } HLRAction;
typedef struct HLRTable_ { typedef struct HLRTable_ {
size_t nrows; size_t nrows; // dimension of the pointer arrays below
HHashTable **rows; // map symbols to HLRActions HHashTable **ntmap; // map nonterminal symbols to HLRActions, per row
HStringMap **tmap; // map lookahead strings to HLRActions, per row
HLRAction **forall; // shortcut to set an action for an entire row HLRAction **forall; // shortcut to set an action for an entire row
HCFChoice *start; // start symbol HCFChoice *start; // start symbol
HSlist *inadeq; // indices of any inadequate states HSlist *inadeq; // indices of any inadequate states
@ -110,6 +111,7 @@ HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
HLRAction *h_reduce_action(HArena *arena, const HLRItem *item); HLRAction *h_reduce_action(HArena *arena, const HLRItem *item);
HLRAction *h_shift_action(HArena *arena, size_t nextstate); HLRAction *h_shift_action(HArena *arena, size_t nextstate);
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new); HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new);
bool h_lrtable_row_empty(const HLRTable *table, size_t i);
bool h_eq_symbol(const void *p, const void *q); bool h_eq_symbol(const void *p, const void *q);
bool h_eq_lr_itemset(const void *p, const void *q); bool h_eq_lr_itemset(const void *p, const void *q);

View file

@ -166,7 +166,18 @@ void put_shift(HLRTable *table, size_t state, const HCFChoice *symbol,
size_t nextstate) size_t nextstate)
{ {
HLRAction *action = h_shift_action(table->arena, nextstate); HLRAction *action = h_shift_action(table->arena, nextstate);
h_hashtable_put(table->rows[state], symbol, action);
switch(symbol->type) {
case HCF_END:
h_stringmap_put_end(table->tmap[state], action);
break;
case HCF_CHAR:
h_stringmap_put_char(table->tmap[state], symbol->chr, action);
break;
default:
// nonterminal case
h_hashtable_put(table->ntmap[state], symbol, action);
}
} }
HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa) HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
@ -210,7 +221,8 @@ HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
// check for shift/reduce conflict with other entries // check for shift/reduce conflict with other entries
// NOTE: these are not recorded as HLR_CONFLICTs at this point // NOTE: these are not recorded as HLR_CONFLICTs at this point
if(!h_hashtable_empty(table->rows[i]))
if(!h_lrtable_row_empty(table, i))
inadeq = true; inadeq = true;
} }
H_END_FOREACH H_END_FOREACH

View file

@ -813,27 +813,43 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in
#define BUFSIZE 512 #define BUFSIZE 512
static bool static bool
pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n, pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
const HStringMap *set) void (*valprint)(FILE *f, void *env, void *val), void *env,
const HStringMap *map)
{ {
assert(n < BUFSIZE-4); assert(n < BUFSIZE-4);
if(set->epsilon_branch) { if(map->epsilon_branch) {
if(!first) fputc(',', file); first=false; if(!first) fputc(sep, file); first=false;
if(n==0) if(n==0) {
fputs("''", file); fputs("\"\"", file);
else } else {
fputs("\"", file);
fwrite(prefix, 1, n, file); fwrite(prefix, 1, n, file);
fputs("\"", file);
} }
if(set->end_branch) { if(valprint) {
if(!first) fputc(',', file); first=false; fputc(':', file);
fwrite(prefix, 1, n, file); valprint(file, env, map->epsilon_branch);
fputc('$', file); }
} }
// iterate over set->char_branches if(map->end_branch) {
HHashTable *ht = set->char_branches; if(!first) fputs(",\"", file); first=false;
if(n>0) fputs("\"\"", file);
fwrite(prefix, 1, n, file);
if(n>0) fputs("\"\"", file);
fputs("$", file);
if(valprint) {
fputc(':', file);
valprint(file, env, map->end_branch);
}
}
// iterate over map->char_branches
HHashTable *ht = map->char_branches;
size_t i; size_t i;
HHashTableEntry *hte; HHashTableEntry *hte;
for(i=0; i < ht->capacity; i++) { for(i=0; i < ht->capacity; i++) {
@ -859,20 +875,28 @@ pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n,
n_ += sprintf(prefix+n_, "\\x%.2X", c); n_ += sprintf(prefix+n_, "\\x%.2X", c);
} }
first = pprint_stringset_elems(file, first, prefix, n_, ends); first = pprint_stringmap_elems(file, first, prefix, n_,
sep, valprint, env, ends);
} }
} }
return first; return first;
} }
void h_pprint_stringmap(FILE *file, char sep,
void (*valprint)(FILE *f, void *env, void *val), void *env,
const HStringMap *map)
{
char buf[BUFSIZE];
pprint_stringmap_elems(file, true, buf, 0, sep, valprint, env, map);
}
void h_pprint_stringset(FILE *file, const HStringMap *set, int indent) void h_pprint_stringset(FILE *file, const HStringMap *set, int indent)
{ {
int j; int j;
for(j=0; j<indent; j++) fputc(' ', file); for(j=0; j<indent; j++) fputc(' ', file);
char buf[BUFSIZE];
fputc('{', file); fputc('{', file);
pprint_stringset_elems(file, true, buf, 0, set); h_pprint_stringmap(file, ',', NULL, NULL, set);
fputs("}\n", file); fputs("}\n", file);
} }

View file

@ -97,4 +97,7 @@ void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq);
void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x); void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x);
void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent); void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent);
void h_pprint_stringset(FILE *file, const HStringMap *set, int indent); void h_pprint_stringset(FILE *file, const HStringMap *set, int indent);
void h_pprint_stringmap(FILE *file, char sep,
void (*valprint)(FILE *f, void *env, void *val), void *env,
const HStringMap *map);
void h_pprint_char(FILE *file, char c); void h_pprint_char(FILE *file, char c);