split LR table representation by key type (terminals/nonterminals)

This commit is contained in:
Sven M. Hallberg 2013-06-21 22:55:04 +02:00
parent d67e12a825
commit 853e1fba46
7 changed files with 156 additions and 68 deletions

View file

@ -119,11 +119,6 @@ HParserBackendVTable h__glr_backend_vtable = {
// XXX TODO
// - split tables into
// - one mapping input bytes to actions (shift or reduce or conflict)
// - one mapping reduced-to lhs nonterminals to shift states
// - can there still be conflicts here?
// - use HStringMap to represent lookahead sets and the "piggyback" table
// - implement engine merging
// - triggered when two enter the same state
// - old stacks (/engines?) saved

View file

@ -13,9 +13,23 @@ static inline size_t seqsize(void *p_)
return n+1;
}
static HLRAction *
lrtable_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol)
{
switch(symbol->type) {
case HCF_END:
return table->tmap[state]->end_branch;
case HCF_CHAR:
return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false);
default:
// nonterminal case
return h_hashtable_get(table->ntmap[state], symbol);
}
}
static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
{
HLRAction *action = h_hashtable_get(table->rows[x], A);
HLRAction *action = lrtable_lookup(table, x, A);
assert(action != NULL);
assert(action->type == HLR_SHIFT);
return action->nextstate;
@ -130,23 +144,50 @@ static inline bool has_conflicts(HLRTable *table)
return !h_slist_empty(table->inadeq);
}
// place a new terminal entry in tbl; records conflicts in tbl->inadeq
// for each lookahead symbol (fs), put action into tmap
// returns 0 on success, -1 on conflict
// ignores forall entries
static int terminal_put(HLRTable *tbl, size_t state, HCFChoice *x, HLRAction *action)
static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *action)
{
HLRAction *prev = h_hashtable_get(tbl->rows[state], x);
int ret = 0;
if(fs->epsilon_branch) {
HLRAction *prev = tmap->epsilon_branch;
if(prev && prev != action) {
// conflict
action = h_lr_conflict(tbl->arena, prev, action);
h_hashtable_put(tbl->rows[state], x, action);
return -1;
tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action);
ret = -1;
} else {
h_hashtable_put(tbl->rows[state], x, action);
return 0;
tmap->epsilon_branch = action;
}
}
if(fs->end_branch) {
HLRAction *prev = tmap->end_branch;
if(prev && prev != action) {
// conflict
tmap->end_branch = h_lr_conflict(tmap->arena, prev, action);
ret = -1;
} else {
tmap->end_branch = action;
}
}
H_FOREACH(fs->char_branches, void *key, HStringMap *fs_)
HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key);
if(!tmap_) {
tmap_ = h_stringmap_new(tmap->arena);
h_hashtable_put(tmap->char_branches, key, tmap_);
}
if(terminals_put(tmap_, fs_, action) < 0)
ret = -1;
H_END_FOREACH
return ret;
}
// check whether a sequence of enhanced-grammar symbols (p) matches the given
// (original-grammar) production rhs and terminates in the given end state.
static bool match_production(HLREnhGrammar *eg, HCFChoice **p,
@ -254,23 +295,8 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
assert(!h_stringmap_empty(fs));
// for each lookahead symbol, put action into table cell
if(fs->end_branch) {
HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
terminal->type = HCF_END;
if(terminal_put(table, state, terminal, action) < 0)
if(terminals_put(table->tmap[state], fs, action) < 0)
inadeq = true;
}
H_FOREACH(fs->char_branches, void *key, HStringMap *m)
if(!m->epsilon_branch)
continue;
HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
terminal->type = HCF_CHAR;
terminal->chr = key_char((HCharKey)key);
if(terminal_put(table, state, terminal, action) < 0)
inadeq = true;
H_END_FOREACH // lookahead character
} H_END_FOREACH // enhanced production
H_END_FOREACH // reducible item
@ -306,6 +332,8 @@ HParserBackendVTable h__lalr_backend_vtable = {
// dummy!
int test_lalr(void)
{
HAllocator *mm__ = &system_allocator;
/*
E -> E '-' T
| T
@ -321,7 +349,7 @@ int test_lalr(void)
HParser *p = E;
printf("\n==== G R A M M A R ====\n");
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
HCFGrammar *g = h_cfgrammar_(mm__, augment(mm__, p));
if(g == NULL) {
fprintf(stderr, "h_cfgrammar failed\n");
return 1;

View file

@ -1,4 +1,5 @@
#include <assert.h>
#include <ctype.h>
#include "../parsers/parser_internal.h"
#include "lr.h"
@ -118,14 +119,16 @@ HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
HLRTable *ret = h_new(HLRTable, 1);
ret->nrows = nrows;
ret->rows = h_arena_malloc(arena, nrows * sizeof(HHashTable *));
ret->ntmap = h_arena_malloc(arena, nrows * sizeof(HHashTable *));
ret->tmap = h_arena_malloc(arena, nrows * sizeof(HStringMap *));
ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *));
ret->inadeq = h_slist_new(arena);
ret->arena = arena;
ret->mm__ = mm__;
for(size_t i=0; i<nrows; i++) {
ret->rows[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
ret->ntmap[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
ret->tmap[i] = h_stringmap_new(arena);
ret->forall[i] = NULL;
}
@ -186,6 +189,12 @@ HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new)
return action;
}
bool h_lrtable_row_empty(const HLRTable *table, size_t i)
{
return (h_hashtable_empty(table->ntmap[i])
&& h_stringmap_empty(table->tmap[i]));
}
/* LR driver */
@ -214,10 +223,14 @@ terminal_lookup(const HLREngine *engine, const HCFChoice *symbol)
assert(state < table->nrows);
if(table->forall[state]) {
assert(h_hashtable_empty(table->rows[state])); // that would be a conflict
assert(h_lrtable_row_empty(table, state)); // that would be a conflict
return table->forall[state];
} else {
return h_hashtable_get(table->rows[state], symbol);
// XXX use the lookahead stream directly here (cf. llk)
if(symbol->type == HCF_END)
return table->tmap[state]->end_branch;
else
return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false);
}
}
@ -228,12 +241,9 @@ nonterminal_lookup(const HLREngine *engine, const HCFChoice *symbol)
size_t state = engine->state;
assert(state < table->nrows);
if(table->forall[state]) {
assert(h_hashtable_empty(table->rows[state])); // that would be a conflict
return table->forall[state];
} else {
return h_hashtable_get(table->rows[state], symbol);
}
assert(!table->forall[state]); // contains only reduce entries
// we are only looking for shifts
return h_hashtable_get(table->ntmap[state], symbol);
}
const HLRAction *h_lrengine_action(const HLREngine *engine)
@ -500,6 +510,19 @@ void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action)
}
}
static void valprint_lraction(FILE *file, void *env, void *val)
{
const HLRAction *action = val;
const HCFGrammar *grammar = env;
pprint_lraction(file, grammar, action);
}
static void pprint_lrtable_terminals(FILE *file, const HCFGrammar *g,
const HStringMap *map)
{
h_pprint_stringmap(file, ' ', valprint_lraction, (void *)g, map);
}
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
unsigned int indent)
{
@ -507,18 +530,19 @@ void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
for(unsigned int j=0; j<indent; j++) fputc(' ', f);
fprintf(f, "%4lu:", i);
if(table->forall[i]) {
fputs(" - ", f);
fputc(' ', f);
pprint_lraction(f, g, table->forall[i]);
fputs(" -", f);
if(!h_hashtable_empty(table->rows[i]))
if(!h_lrtable_row_empty(table, i))
fputs(" !!", f);
}
H_FOREACH(table->rows[i], HCFChoice *symbol, HLRAction *action)
H_FOREACH(table->ntmap[i], HCFChoice *symbol, HLRAction *action)
fputc(' ', f); // separator
h_pprint_symbol(f, g, symbol);
fputc(':', f);
pprint_lraction(f, g, action);
H_END_FOREACH
fputc(' ', f); // separator
pprint_lrtable_terminals(f, g, table->tmap[i]);
fputc('\n', f);
}

View file

@ -48,8 +48,9 @@ typedef struct HLRAction_ {
} HLRAction;
typedef struct HLRTable_ {
size_t nrows;
HHashTable **rows; // map symbols to HLRActions
size_t nrows; // dimension of the pointer arrays below
HHashTable **ntmap; // map nonterminal symbols to HLRActions, per row
HStringMap **tmap; // map lookahead strings to HLRActions, per row
HLRAction **forall; // shortcut to set an action for an entire row
HCFChoice *start; // start symbol
HSlist *inadeq; // indices of any inadequate states
@ -110,6 +111,7 @@ HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
HLRAction *h_reduce_action(HArena *arena, const HLRItem *item);
HLRAction *h_shift_action(HArena *arena, size_t nextstate);
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new);
bool h_lrtable_row_empty(const HLRTable *table, size_t i);
bool h_eq_symbol(const void *p, const void *q);
bool h_eq_lr_itemset(const void *p, const void *q);

View file

@ -166,7 +166,18 @@ void put_shift(HLRTable *table, size_t state, const HCFChoice *symbol,
size_t nextstate)
{
HLRAction *action = h_shift_action(table->arena, nextstate);
h_hashtable_put(table->rows[state], symbol, action);
switch(symbol->type) {
case HCF_END:
h_stringmap_put_end(table->tmap[state], action);
break;
case HCF_CHAR:
h_stringmap_put_char(table->tmap[state], symbol->chr, action);
break;
default:
// nonterminal case
h_hashtable_put(table->ntmap[state], symbol, action);
}
}
HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
@ -210,7 +221,8 @@ HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
// check for shift/reduce conflict with other entries
// NOTE: these are not recorded as HLR_CONFLICTs at this point
if(!h_hashtable_empty(table->rows[i]))
if(!h_lrtable_row_empty(table, i))
inadeq = true;
}
H_END_FOREACH

View file

@ -813,27 +813,43 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in
#define BUFSIZE 512
static bool
pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n,
const HStringMap *set)
pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
void (*valprint)(FILE *f, void *env, void *val), void *env,
const HStringMap *map)
{
assert(n < BUFSIZE-4);
if(set->epsilon_branch) {
if(!first) fputc(',', file); first=false;
if(n==0)
fputs("''", file);
else
if(map->epsilon_branch) {
if(!first) fputc(sep, file); first=false;
if(n==0) {
fputs("\"\"", file);
} else {
fputs("\"", file);
fwrite(prefix, 1, n, file);
fputs("\"", file);
}
if(set->end_branch) {
if(!first) fputc(',', file); first=false;
fwrite(prefix, 1, n, file);
fputc('$', file);
if(valprint) {
fputc(':', file);
valprint(file, env, map->epsilon_branch);
}
}
// iterate over set->char_branches
HHashTable *ht = set->char_branches;
if(map->end_branch) {
if(!first) fputs(",\"", file); first=false;
if(n>0) fputs("\"\"", file);
fwrite(prefix, 1, n, file);
if(n>0) fputs("\"\"", file);
fputs("$", file);
if(valprint) {
fputc(':', file);
valprint(file, env, map->end_branch);
}
}
// iterate over map->char_branches
HHashTable *ht = map->char_branches;
size_t i;
HHashTableEntry *hte;
for(i=0; i < ht->capacity; i++) {
@ -859,20 +875,28 @@ pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n,
n_ += sprintf(prefix+n_, "\\x%.2X", c);
}
first = pprint_stringset_elems(file, first, prefix, n_, ends);
first = pprint_stringmap_elems(file, first, prefix, n_,
sep, valprint, env, ends);
}
}
return first;
}
void h_pprint_stringmap(FILE *file, char sep,
void (*valprint)(FILE *f, void *env, void *val), void *env,
const HStringMap *map)
{
char buf[BUFSIZE];
pprint_stringmap_elems(file, true, buf, 0, sep, valprint, env, map);
}
void h_pprint_stringset(FILE *file, const HStringMap *set, int indent)
{
int j;
for(j=0; j<indent; j++) fputc(' ', file);
char buf[BUFSIZE];
fputc('{', file);
pprint_stringset_elems(file, true, buf, 0, set);
h_pprint_stringmap(file, ',', NULL, NULL, set);
fputs("}\n", file);
}

View file

@ -97,4 +97,7 @@ void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq);
void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x);
void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent);
void h_pprint_stringset(FILE *file, const HStringMap *set, int indent);
void h_pprint_stringmap(FILE *file, char sep,
void (*valprint)(FILE *f, void *env, void *val), void *env,
const HStringMap *map);
void h_pprint_char(FILE *file, char c);