Merge pull request #14 from pesco/GLR

GLR
This commit is contained in:
Meredith L. Patterson 2013-06-22 10:35:44 -07:00
commit 623ca6e4e8
14 changed files with 1428 additions and 780 deletions

View file

@ -27,9 +27,10 @@ PARSERS := \
BACKENDS := \
packrat \
regex \
llk \
lalr \
regex
glr
HAMMER_PARTS := \
bitreader.o \
@ -43,6 +44,8 @@ HAMMER_PARTS := \
benchmark.o \
cfgrammar.o \
glue.o \
backends/lr.o \
backends/lr0.o \
$(PARSERS:%=parsers/%.o) \
$(BACKENDS:%=backends/%.o)

294
src/backends/glr.c Normal file
View file

@ -0,0 +1,294 @@
#include <assert.h>
#include "lr.h"
static bool glr_step(HParseResult **result, HSlist *engines,
HLREngine *engine, const HLRAction *action);
/* GLR compilation (LALR w/o failing on conflict) */
int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params)
{
int result = h_lalr_compile(mm__, parser, params);
if(result == -1 && parser->backend_data) {
// table is there, just has conflicts? nevermind, that's okay.
result = 0;
}
return result;
}
void h_glr_free(HParser *parser)
{
h_lalr_free(parser);
}
/* Merging engines (when they converge on the same state) */
static HLREngine *lrengine_merge(HLREngine *old, HLREngine *new)
{
HArena *arena = old->arena;
HLREngine *ret = h_arena_malloc(arena, sizeof(HLREngine));
assert(old->state == new->state);
assert(old->input.input == new->input.input);
*ret = *old;
ret->stack = h_slist_new(arena);
ret->merged[0] = old;
ret->merged[1] = new;
return ret;
}
static HSlist *demerge_stack(HSlistNode *bottom, HSlist *stack)
{
HArena *arena = stack->arena;
HSlist *ret = h_slist_new(arena);
// copy the stack from the top
HSlistNode **y = &ret->head;
for(HSlistNode *x=stack->head; x; x=x->next) {
HSlistNode *node = h_arena_malloc(arena, sizeof(HSlistNode));
node->elem = x->elem;
node->next = NULL;
*y = node;
y = &node->next;
}
*y = bottom; // attach the ancestor stack
return ret;
}
static inline HLREngine *respawn(HLREngine *eng, HSlist *stack)
{
// NB: this can be a destructive update because an engine is not used for
// anything after it is merged.
eng->stack = demerge_stack(eng->stack->head, stack);
return eng;
}
static HLREngine *
demerge(HParseResult **result, HSlist *engines,
HLREngine *engine, const HLRAction *action, size_t depth)
{
// no-op on engines that are not merged
if(!engine->merged[0])
return engine;
HSlistNode *p = engine->stack->head;
for(size_t i=0; i<depth; i++) {
// if stack hits bottom, respawn ancestors
if(p == NULL) {
HLREngine *a = respawn(engine->merged[0], engine->stack);
HLREngine *b = respawn(engine->merged[1], engine->stack);
// continue demerge until final depth reached
a = demerge(result, engines, a, action, depth-i);
b = demerge(result, engines, b, action, depth-i);
// step and stow one ancestor...
glr_step(result, engines, a, action);
// ...and return the other
return b;
}
p = p->next;
}
return engine; // there is enough stack before the merge point
}
/* Forking engines (on conflicts */
HLREngine *fork_engine(const HLREngine *engine)
{
HLREngine *eng2 = h_arena_malloc(engine->tarena, sizeof(HLREngine));
eng2->table = engine->table;
eng2->state = engine->state;
eng2->input = engine->input;
// shallow-copy the stack
// this works because h_slist_push and h_slist_drop never modify
// the underlying structure of HSlistNodes, only the head pointer.
// in fact, this gives us prefix sharing for free.
eng2->stack = h_arena_malloc(engine->tarena, sizeof(HSlist));
*eng2->stack = *engine->stack;
eng2->arena = engine->arena;
eng2->tarena = engine->tarena;
return eng2;
}
static const HLRAction *
handle_conflict(HParseResult **result, HSlist *engines,
const HLREngine *engine, const HSlist *branches)
{
// there should be at least two conflicting actions
assert(branches->head);
assert(branches->head->next); // this is just a consistency check
// fork a new engine for all but the first action
for(HSlistNode *x=branches->head->next; x; x=x->next) {
HLRAction *act = x->elem;
HLREngine *eng = fork_engine(engine);
// perform one step and add to engines
glr_step(result, engines, eng, act);
}
// return first action for use with original engine
return branches->head->elem;
}
/* GLR driver */
static bool glr_step(HParseResult **result, HSlist *engines,
HLREngine *engine, const HLRAction *action)
{
// handle forks and demerges (~> spawn engines)
if(action) {
if(action->type == HLR_CONFLICT) {
// fork engine on conflicts
action = handle_conflict(result, engines, engine, action->branches);
} else if(action->type == HLR_REDUCE) {
// demerge/respawn as needed
size_t depth = action->production.length;
engine = demerge(result, engines, engine, action, depth);
}
}
bool run = h_lrengine_step(engine, action);
if(run) {
// store engine in the list, merge if necessary
HSlistNode *x;
for(x=engines->head; x; x=x->next) {
HLREngine *eng = x->elem;
if(eng->state == engine->state) {
x->elem = lrengine_merge(eng, engine);
break;
}
}
if(!x) // no merge happened
h_slist_push(engines, engine);
} else if(engine->state == HLR_SUCCESS) {
// save the result
*result = h_lrengine_result(engine);
}
return run;
}
HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
{
HLRTable *table = parser->backend_data;
if(!table)
return NULL;
HArena *arena = h_new_arena(mm__, 0); // will hold the results
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
// allocate engine lists (will hold one engine per state)
// these are swapped each iteration
HSlist *engines = h_slist_new(tarena);
HSlist *engback = h_slist_new(tarena);
// create initial engine
h_slist_push(engines, h_lrengine_new(arena, tarena, table, stream));
HParseResult *result = NULL;
while(result == NULL && !h_slist_empty(engines)) {
assert(h_slist_empty(engback));
// step all engines
while(!h_slist_empty(engines)) {
HLREngine *engine = h_slist_pop(engines);
const HLRAction *action = h_lrengine_action(engine);
glr_step(&result, engback, engine, action);
}
// swap the lists
HSlist *tmp = engines;
engines = engback;
engback = tmp;
}
if(!result)
h_delete_arena(arena);
h_delete_arena(tarena);
return result;
}
HParserBackendVTable h__glr_backend_vtable = {
.compile = h_glr_compile,
.parse = h_glr_parse,
.free = h_glr_free
};
// dummy!
int test_glr(void)
{
HAllocator *mm__ = &system_allocator;
/*
E -> E '+' E
| 'd'
*/
HParser *d = h_ch('d');
HParser *E = h_indirect();
HParser *E_ = h_choice(h_sequence(E, h_ch('+'), E, NULL), d, NULL);
h_bind_indirect(E, E_);
HParser *p = E;
printf("\n==== G R A M M A R ====\n");
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
if(g == NULL) {
fprintf(stderr, "h_cfgrammar failed\n");
return 1;
}
h_pprint_grammar(stdout, g, 0);
printf("\n==== D F A ====\n");
HLRDFA *dfa = h_lr0_dfa(g);
if(dfa)
h_pprint_lrdfa(stdout, g, dfa, 0);
else
fprintf(stderr, "h_lalr_dfa failed\n");
printf("\n==== L R ( 0 ) T A B L E ====\n");
HLRTable *table0 = h_lr0_table(g, dfa);
if(table0)
h_pprint_lrtable(stdout, g, table0, 0);
else
fprintf(stderr, "h_lr0_table failed\n");
h_lrtable_free(table0);
printf("\n==== L A L R T A B L E ====\n");
if(h_compile(p, PB_GLR, NULL)) {
fprintf(stderr, "does not compile\n");
return 2;
}
h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0);
printf("\n==== P A R S E R E S U L T ====\n");
HParseResult *res = h_parse(p, (uint8_t *)"d+d+d", 5);
if(res)
h_pprint(stdout, res->ast, 0, 2);
else
printf("no parse\n");
return 0;
}

View file

@ -1,427 +1,6 @@
#include <assert.h>
#include "../internal.h"
#include "../cfgrammar.h"
#include "../parsers/parser_internal.h"
#include "contextfree.h"
/* Data structures */
typedef HHashSet HLRState; // states are sets of LRItems
typedef struct HLRDFA_ {
size_t nstates;
const HLRState **states; // array of size nstates
HSlist *transitions;
} HLRDFA;
typedef struct HLRTransition_ {
size_t from; // index into 'states' array
const HCFChoice *symbol;
size_t to; // index into 'states' array
} HLRTransition;
typedef struct HLRItem_ {
HCFChoice *lhs;
HCFChoice **rhs; // NULL-terminated
size_t len; // number of elements in rhs
size_t mark;
} HLRItem;
typedef struct HLRAction_ {
enum {HLR_SHIFT, HLR_REDUCE} type;
union {
size_t nextstate; // used with SHIFT
struct {
HCFChoice *lhs; // symbol carrying semantic actions etc.
size_t length; // # of symbols in rhs
#ifndef NDEBUG
HCFChoice **rhs; // NB: the rhs symbols are not needed for the parse
#endif
} production; // used with REDUCE
};
} HLRAction;
typedef struct HLRTable_ {
size_t nrows;
HHashTable **rows; // map symbols to HLRActions
HLRAction **forall; // shortcut to set an action for an entire row
HCFChoice *start; // start symbol
HSlist *inadeq; // indices of any inadequate states
HArena *arena;
HAllocator *mm__;
} HLRTable;
typedef struct HLREnhGrammar_ {
HCFGrammar *grammar; // enhanced grammar
HHashTable *tmap; // maps transitions to enhanced-grammar symbols
HHashTable *smap; // maps enhanced-grammar symbols to transitions
HHashTable *corr; // maps symbols to sets of corresponding e. symbols
HArena *arena;
} HLREnhGrammar;
// XXX move to internal.h or something
// XXX replace other hashtable iterations with this
#define H_FOREACH_(HT) { \
const HHashTable *ht__ = HT; \
for(size_t i__=0; i__ < ht__->capacity; i__++) { \
for(HHashTableEntry *hte__ = &ht__->contents[i__]; \
hte__; \
hte__ = hte__->next) { \
if(hte__->key == NULL) continue;
#define H_FOREACH_KEY(HT, KEYVAR) H_FOREACH_(HT) \
const KEYVAR = hte__->key;
#define H_FOREACH(HT, KEYVAR, VALVAR) H_FOREACH_KEY(HT, KEYVAR) \
VALVAR = hte__->value;
#define H_END_FOREACH \
} \
} \
}
// compare symbols - terminals by value, others by pointer
static bool eq_symbol(const void *p, const void *q)
{
const HCFChoice *x=p, *y=q;
return (x==y
|| (x->type==HCF_END && y->type==HCF_END)
|| (x->type==HCF_CHAR && y->type==HCF_CHAR && x->chr==y->chr));
}
// hash symbols - terminals by value, others by pointer
static HHashValue hash_symbol(const void *p)
{
const HCFChoice *x=p;
if(x->type == HCF_END)
return 0;
else if(x->type == HCF_CHAR)
return x->chr * 33;
else
return h_hash_ptr(p);
}
// compare LALR items by value
static bool eq_lalr_item(const void *p, const void *q)
{
const HLRItem *a=p, *b=q;
if(!eq_symbol(a->lhs, b->lhs)) return false;
if(a->mark != b->mark) return false;
if(a->len != b->len) return false;
for(size_t i=0; i<a->len; i++)
if(!eq_symbol(a->rhs[i], b->rhs[i])) return false;
return true;
}
// compare LALR item sets (DFA states)
static inline bool eq_lalr_itemset(const void *p, const void *q)
{
return h_hashset_equal(p, q);
}
// hash LALR items
static inline HHashValue hash_lalr_item(const void *p)
{
const HLRItem *x = p;
HHashValue hash = 0;
hash += hash_symbol(x->lhs);
for(HCFChoice **p=x->rhs; *p; p++)
hash += hash_symbol(*p);
hash += x->mark;
return hash;
}
// hash LALR item sets (DFA states) - hash the elements and sum
static HHashValue hash_lalr_itemset(const void *p)
{
HHashValue hash = 0;
H_FOREACH_KEY((const HHashSet *)p, HLRItem *item)
hash += hash_lalr_item(item);
H_END_FOREACH
return hash;
}
HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark)
{
HLRItem *ret = h_arena_malloc(a, sizeof(HLRItem));
size_t len = 0;
for(HCFChoice **p=rhs; *p; p++) len++;
assert(mark <= len);
ret->lhs = lhs;
ret->rhs = rhs;
ret->len = len;
ret->mark = mark;
return ret;
}
static inline HLRState *h_lrstate_new(HArena *arena)
{
return h_hashset_new(arena, eq_lalr_item, hash_lalr_item);
}
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
{
HArena *arena = h_new_arena(mm__, 0); // default blocksize
assert(arena != NULL);
HLRTable *ret = h_new(HLRTable, 1);
ret->nrows = nrows;
ret->rows = h_arena_malloc(arena, nrows * sizeof(HHashTable *));
ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *));
ret->inadeq = h_slist_new(arena);
ret->arena = arena;
ret->mm__ = mm__;
for(size_t i=0; i<nrows; i++) {
ret->rows[i] = h_hashtable_new(arena, eq_symbol, hash_symbol);
ret->forall[i] = NULL;
}
return ret;
}
void h_lrtable_free(HLRTable *table)
{
HAllocator *mm__ = table->mm__;
h_delete_arena(table->arena);
h_free(table);
}
/* Constructing the characteristic automaton (handle recognizer) */
static HLRItem *advance_mark(HArena *arena, const HLRItem *item)
{
assert(item->rhs[item->mark] != NULL);
HLRItem *ret = h_arena_malloc(arena, sizeof(HLRItem));
*ret = *item;
ret->mark++;
return ret;
}
static void expand_to_closure(HCFGrammar *g, HHashSet *items)
{
HAllocator *mm__ = g->mm__;
HArena *arena = g->arena;
HSlist *work = h_slist_new(arena);
// initialize work list with items
H_FOREACH_KEY(items, HLRItem *item)
h_slist_push(work, (void *)item);
H_END_FOREACH
while(!h_slist_empty(work)) {
const HLRItem *item = h_slist_pop(work);
HCFChoice *sym = item->rhs[item->mark]; // symbol after mark
// if there is a non-terminal after the mark, follow it
// NB: unlike LLk, we do consider HCF_CHARSET a non-terminal here
if(sym != NULL && (sym->type==HCF_CHOICE || sym->type==HCF_CHARSET)) {
// add items corresponding to the productions of sym
if(sym->type == HCF_CHOICE) {
for(HCFSequence **p=sym->seq; *p; p++) {
HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0);
if(!h_hashset_present(items, it)) {
h_hashset_put(items, it);
h_slist_push(work, it);
}
}
} else { // HCF_CHARSET
for(unsigned int i=0; i<256; i++) {
if(charset_isset(sym->charset, i)) {
// XXX allocate these single-character symbols statically somewhere
HCFChoice **rhs = h_new(HCFChoice *, 2);
rhs[0] = h_new(HCFChoice, 1);
rhs[0]->type = HCF_CHAR;
rhs[0]->chr = i;
rhs[1] = NULL;
HLRItem *it = h_lritem_new(arena, sym, rhs, 0);
h_hashset_put(items, it);
// single-character item needs no further work
}
}
// if sym is a non-terminal, we need a reshape on it
// this seems as good a place as any to set it
sym->reshape = h_act_first;
}
}
}
}
HLRDFA *h_lr0_dfa(HCFGrammar *g)
{
HArena *arena = g->arena;
HHashSet *states = h_hashset_new(arena, eq_lalr_itemset, hash_lalr_itemset);
// maps itemsets to assigned array indices
HSlist *transitions = h_slist_new(arena);
// list of states that need to be processed
// to save lookups, we push two elements per state, the itemset and its
// assigned index.
HSlist *work = h_slist_new(arena);
// make initial state (kernel)
HLRState *start = h_lrstate_new(arena);
assert(g->start->type == HCF_CHOICE);
for(HCFSequence **p=g->start->seq; *p; p++)
h_hashset_put(start, h_lritem_new(arena, g->start, (*p)->items, 0));
expand_to_closure(g, start);
h_hashtable_put(states, start, 0);
h_slist_push(work, start);
h_slist_push(work, 0);
// while work to do (on some state)
// determine edge symbols
// for each edge symbol:
// advance respective items -> destination state (kernel)
// compute closure
// if destination is a new state:
// add it to state set
// add transition to it
// add it to the work list
while(!h_slist_empty(work)) {
size_t state_idx = (uintptr_t)h_slist_pop(work);
HLRState *state = h_slist_pop(work);
// maps edge symbols to neighbor states (item sets) of s
HHashTable *neighbors = h_hashtable_new(arena, eq_symbol, hash_symbol);
// iterate over state (closure) and generate neighboring sets
H_FOREACH_KEY(state, HLRItem *item)
HCFChoice *sym = item->rhs[item->mark]; // symbol after mark
if(sym != NULL) { // mark was not at the end
// find or create prospective neighbor set
HLRState *neighbor = h_hashtable_get(neighbors, sym);
if(neighbor == NULL) {
neighbor = h_lrstate_new(arena);
h_hashtable_put(neighbors, sym, neighbor);
}
// ...and add the advanced item to it
h_hashset_put(neighbor, advance_mark(arena, item));
}
H_END_FOREACH
// merge expanded neighbor sets into the set of existing states
H_FOREACH(neighbors, HCFChoice *symbol, HLRState *neighbor)
expand_to_closure(g, neighbor);
// look up existing state, allocate new if not found
size_t neighbor_idx;
if(!h_hashset_present(states, neighbor)) {
neighbor_idx = states->used;
h_hashtable_put(states, neighbor, (void *)(uintptr_t)neighbor_idx);
h_slist_push(work, neighbor);
h_slist_push(work, (void *)(uintptr_t)neighbor_idx);
} else {
neighbor_idx = (uintptr_t)h_hashtable_get(states, neighbor);
}
// add transition "state --symbol--> neighbor"
HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition));
t->from = state_idx;
t->to = neighbor_idx;
t->symbol = symbol;
h_slist_push(transitions, t);
H_END_FOREACH
} // end while(work)
// fill DFA struct
HLRDFA *dfa = h_arena_malloc(arena, sizeof(HLRDFA));
dfa->nstates = states->used;
dfa->states = h_arena_malloc(arena, dfa->nstates*sizeof(HLRState *));
H_FOREACH(states, HLRState *state, void *v)
size_t idx = (uintptr_t)v;
dfa->states[idx] = state;
H_END_FOREACH
dfa->transitions = transitions;
return dfa;
}
/* LR(0) table generation */
static HLRAction *shift_action(HArena *arena, size_t nextstate)
{
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
action->type = HLR_SHIFT;
action->nextstate = nextstate;
return action;
}
static HLRAction *reduce_action(HArena *arena, const HLRItem *item)
{
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
action->type = HLR_REDUCE;
action->production.lhs = item->lhs;
action->production.length = item->len;
#ifndef NDEBUG
action->production.rhs = item->rhs;
#endif
return action;
}
HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
{
HAllocator *mm__ = g->mm__;
HLRTable *table = h_lrtable_new(mm__, dfa->nstates);
HArena *arena = table->arena;
// remember start symbol
table->start = g->start;
// add shift entries
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
// for each transition x-A->y, add "shift, goto y" to table entry (x,A)
HLRTransition *t = x->elem;
HLRAction *action = shift_action(arena, t->to);
h_hashtable_put(table->rows[t->from], t->symbol, action);
}
// add reduce entries, record inadequate states
for(size_t i=0; i<dfa->nstates; i++) {
// find reducible items in state
H_FOREACH_KEY(dfa->states[i], HLRItem *item)
if(item->mark == item->len) { // mark at the end
// check for conflicts
// XXX store more informative stuff in the inadeq records?
if(table->forall[i]) {
// reduce/reduce conflict with a previous item
h_slist_push(table->inadeq, (void *)(uintptr_t)i);
} else if(!h_hashtable_empty(table->rows[i])) {
// shift/reduce conflict with one of the row's entries
h_slist_push(table->inadeq, (void *)(uintptr_t)i);
}
// set reduce action for the entire row
table->forall[i] = reduce_action(arena, item);
}
H_END_FOREACH
}
return table;
}
#include "lr.h"
@ -434,9 +13,23 @@ static inline size_t seqsize(void *p_)
return n+1;
}
static HLRAction *
lrtable_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol)
{
switch(symbol->type) {
case HCF_END:
return table->tmap[state]->end_branch;
case HCF_CHAR:
return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false);
default:
// nonterminal case
return h_hashtable_get(table->ntmap[state], symbol);
}
}
static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
{
HLRAction *action = h_hashtable_get(table->rows[x], A);
HLRAction *action = lrtable_lookup(table, x, A);
assert(action != NULL);
assert(action->type == HLR_SHIFT);
return action->nextstate;
@ -489,19 +82,7 @@ static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
xAy->seq = seq;
}
static bool eq_transition(const void *p, const void *q)
{
const HLRTransition *a=p, *b=q;
return (a->from == b->from && a->to == b->to && eq_symbol(a->symbol, b->symbol));
}
static HHashValue hash_transition(const void *p)
{
const HLRTransition *t = p;
return (hash_symbol(t->symbol) + t->from + t->to); // XXX ?
}
HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym)
static HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym)
{
HArena *arena = eg->arena;
HCFChoice *esym = h_arena_malloc(arena, sizeof(HCFChoice));
@ -509,13 +90,14 @@ HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym)
HHashSet *cs = h_hashtable_get(eg->corr, sym);
if(!cs) {
cs = h_hashset_new(arena, eq_symbol, hash_symbol);
cs = h_hashset_new(arena, h_eq_symbol, h_hash_symbol);
h_hashtable_put(eg->corr, sym, cs);
}
h_hashset_put(cs, esym);
return esym;
}
static HLREnhGrammar *enhance_grammar(const HCFGrammar *g, const HLRDFA *dfa,
const HLRTable *table)
{
@ -523,9 +105,9 @@ static HLREnhGrammar *enhance_grammar(const HCFGrammar *g, const HLRDFA *dfa,
HArena *arena = g->arena;
HLREnhGrammar *eg = h_arena_malloc(arena, sizeof(HLREnhGrammar));
eg->tmap = h_hashtable_new(arena, eq_transition, hash_transition);
eg->tmap = h_hashtable_new(arena, h_eq_transition, h_hash_transition);
eg->smap = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr);
eg->corr = h_hashtable_new(arena, eq_symbol, hash_symbol);
eg->corr = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
// XXX must use h_eq/hash_ptr for symbols! so enhanced CHARs are different
eg->arena = arena;
@ -562,32 +144,60 @@ static inline bool has_conflicts(HLRTable *table)
return !h_slist_empty(table->inadeq);
}
// place a new entry in tbl; records conflicts in tbl->inadeq
// for each lookahead symbol (fs), put action into tmap
// returns 0 on success, -1 on conflict
// ignores forall entries
int h_lrtable_put(HLRTable *tbl, size_t state, HCFChoice *x, HLRAction *action)
static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *action)
{
HLRAction *prev = h_hashtable_get(tbl->rows[state], x);
if(prev && prev != action) {
// conflict
h_slist_push(tbl->inadeq, (void *)(uintptr_t)state);
return -1;
} else {
h_hashtable_put(tbl->rows[state], x, action);
return 0;
int ret = 0;
if(fs->epsilon_branch) {
HLRAction *prev = tmap->epsilon_branch;
if(prev && prev != action) {
// conflict
tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action);
ret = -1;
} else {
tmap->epsilon_branch = action;
}
}
if(fs->end_branch) {
HLRAction *prev = tmap->end_branch;
if(prev && prev != action) {
// conflict
tmap->end_branch = h_lr_conflict(tmap->arena, prev, action);
ret = -1;
} else {
tmap->end_branch = action;
}
}
H_FOREACH(fs->char_branches, void *key, HStringMap *fs_)
HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key);
if(!tmap_) {
tmap_ = h_stringmap_new(tmap->arena);
h_hashtable_put(tmap->char_branches, key, tmap_);
}
if(terminals_put(tmap_, fs_, action) < 0)
ret = -1;
H_END_FOREACH
return ret;
}
// check whether a sequence of enhanced-grammar symbols (p) matches the given
// (original-grammar) production rhs and terminates in the given end state.
bool match_production(HLREnhGrammar *eg, HCFChoice **p,
HCFChoice **rhs, size_t endstate)
static bool match_production(HLREnhGrammar *eg, HCFChoice **p,
HCFChoice **rhs, size_t endstate)
{
size_t state = endstate; // initialized to end in case of empty rhs
for(; *p && *rhs; p++, rhs++) {
HLRTransition *t = h_hashtable_get(eg->smap, *p);
assert(t != NULL);
if(!eq_symbol(t->symbol, *rhs))
if(!h_eq_symbol(t->symbol, *rhs))
return false;
state = t->to;
}
@ -597,7 +207,7 @@ bool match_production(HLREnhGrammar *eg, HCFChoice **p,
// desugar parser with a fresh start symbol
// this guarantees that the start symbol will not occur in any productions
static HCFChoice *augment(HAllocator *mm__, HParser *parser)
HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser)
{
HCFChoice *augmented = h_new(HCFChoice, 1);
@ -621,7 +231,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
// build LR(0) table
// if necessary, resolve conflicts "by conversion to SLR"
HCFGrammar *g = h_cfgrammar_(mm__, augment(mm__, parser));
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser));
if(g == NULL) // backend not suitable (language not context-free)
return -1;
@ -653,6 +263,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
for(HSlistNode *x=inadeq->head; x; x=x->next) {
size_t state = (uintptr_t)x->elem;
bool inadeq = false;
// clear old forall entry, it's being replaced by more fine-grained ones
table->forall[state] = NULL;
@ -663,7 +274,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
continue;
// action to place in the table cells indicated by lookahead
HLRAction *action = reduce_action(arena, item);
HLRAction *action = h_reduce_action(arena, item);
// find all LR(0)-enhanced productions matching item
HHashSet *lhss = h_hashtable_get(eg->corr, item->lhs);
@ -684,23 +295,13 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
assert(!h_stringmap_empty(fs));
// for each lookahead symbol, put action into table cell
if(fs->end_branch) {
HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
terminal->type = HCF_END;
h_lrtable_put(table, state, terminal, action);
}
H_FOREACH(fs->char_branches, void *key, HStringMap *m)
if(!m->epsilon_branch)
continue;
HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
terminal->type = HCF_CHAR;
terminal->chr = key_char((HCharKey)key);
h_lrtable_put(table, state, terminal, action);
H_END_FOREACH // lookahead character
if(terminals_put(table->tmap[state], fs, action) < 0)
inadeq = true;
} H_END_FOREACH // enhanced production
H_END_FOREACH // reducible item
if(inadeq)
h_slist_push(table->inadeq, (void *)(uintptr_t)state);
}
}
@ -719,273 +320,6 @@ void h_lalr_free(HParser *parser)
/* LR driver */
const HLRAction *
h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol)
{
assert(state < table->nrows);
if(table->forall[state]) {
assert(h_hashtable_empty(table->rows[state])); // that would be a conflict
return table->forall[state];
} else {
return h_hashtable_get(table->rows[state], symbol);
}
}
HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
{
HLRTable *table = parser->backend_data;
if(!table)
return NULL;
HArena *arena = h_new_arena(mm__, 0); // will hold the results
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
HSlist *left = h_slist_new(tarena); // left stack; reductions happen here
HSlist *right = h_slist_new(tarena); // right stack; input appears here
// stack layout:
// on the left stack, we put pairs: (saved state, semantic value)
// on the right stack, we put pairs: (symbol, semantic value)
// run while the recognizer finds handles in the input
size_t state = 0;
while(1) {
// make sure there is input on the right stack
if(h_slist_empty(right)) {
// XXX use statically-allocated terminal symbols
HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice));
HParsedToken *v;
uint8_t c = h_read_bits(stream, 8, false);
if(stream->overrun) { // end of input
x->type = HCF_END;
v = NULL;
} else {
x->type = HCF_CHAR;
x->chr = c;
v = h_arena_malloc(arena, sizeof(HParsedToken));
v->token_type = TT_UINT;
v->uint = c;
}
h_slist_push(right, v);
h_slist_push(right, x);
}
// peek at input symbol on the right side
HCFChoice *symbol = right->head->elem;
// table lookup
const HLRAction *action = h_lr_lookup(table, state, symbol);
if(action == NULL)
break; // no handle recognizable in input, terminate parsing
if(action->type == HLR_SHIFT) {
h_slist_push(left, (void *)(uintptr_t)state);
h_slist_pop(right); // symbol (discard)
h_slist_push(left, h_slist_pop(right)); // semantic value
state = action->nextstate;
} else {
assert(action->type == HLR_REDUCE);
size_t len = action->production.length;
HCFChoice *symbol = action->production.lhs;
// semantic value of the reduction result
HParsedToken *value = h_arena_malloc(arena, sizeof(HParsedToken));
value->token_type = TT_SEQUENCE;
value->seq = h_carray_new_sized(arena, len);
// pull values off the left stack, rewinding state accordingly
HParsedToken *v = NULL;
for(size_t i=0; i<len; i++) {
v = h_slist_pop(left);
state = (uintptr_t)h_slist_pop(left);
// collect values in result sequence
value->seq->elements[len-1-i] = v;
value->seq->used++;
}
if(v) {
// result position equals position of left-most symbol
value->index = v->index;
value->bit_offset = v->bit_offset;
} else {
// XXX how to get the position in this case?
}
// perform token reshape if indicated
if(symbol->reshape)
value = (HParsedToken *)symbol->reshape(make_result(arena, value));
// call validation and semantic action, if present
if(symbol->pred && !symbol->pred(make_result(tarena, value)))
break; // validation failed -> no parse
if(symbol->action)
value = (HParsedToken *)symbol->action(make_result(arena, value));
// push result (value, symbol) onto the right stack
h_slist_push(right, value);
h_slist_push(right, symbol);
}
}
// parsing was successful iff the start symbol is on top of the right stack
HParseResult *result = NULL;
if(h_slist_pop(right) == table->start) {
// next on the right stack is the start symbol's semantic value
assert(!h_slist_empty(right));
HParsedToken *tok = h_slist_pop(right);
result = make_result(arena, tok);
} else {
h_delete_arena(arena);
result = NULL;
}
h_delete_arena(tarena);
return result;
}
/* Pretty-printers */
void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item)
{
h_pprint_symbol(f, g, item->lhs);
fputs(" ->", f);
HCFChoice **x = item->rhs;
HCFChoice **mark = item->rhs + item->mark;
if(*x == NULL) {
fputc('.', f);
} else {
while(*x) {
if(x == mark)
fputc('.', f);
else
fputc(' ', f);
if((*x)->type == HCF_CHAR) {
// condense character strings
fputc('"', f);
h_pprint_char(f, (*x)->chr);
for(x++; *x; x++) {
if(x == mark)
break;
if((*x)->type != HCF_CHAR)
break;
h_pprint_char(f, (*x)->chr);
}
fputc('"', f);
} else {
h_pprint_symbol(f, g, *x);
x++;
}
}
if(x == mark)
fputs(".", f);
}
}
void h_pprint_lrstate(FILE *f, const HCFGrammar *g,
const HLRState *state, unsigned int indent)
{
bool first = true;
H_FOREACH_KEY(state, HLRItem *item)
if(!first)
for(unsigned int i=0; i<indent; i++) fputc(' ', f);
first = false;
h_pprint_lritem(f, g, item);
fputc('\n', f);
H_END_FOREACH
}
static void pprint_transition(FILE *f, const HCFGrammar *g, const HLRTransition *t)
{
fputs("-", f);
h_pprint_symbol(f, g, t->symbol);
fprintf(f, "->%lu", t->to);
}
void h_pprint_lrdfa(FILE *f, const HCFGrammar *g,
const HLRDFA *dfa, unsigned int indent)
{
for(size_t i=0; i<dfa->nstates; i++) {
unsigned int indent2 = indent + fprintf(f, "%4lu: ", i);
h_pprint_lrstate(f, g, dfa->states[i], indent2);
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
const HLRTransition *t = x->elem;
if(t->from == i) {
for(unsigned int i=0; i<indent2-2; i++) fputc(' ', f);
pprint_transition(f, g, t);
fputc('\n', f);
}
}
}
}
void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action)
{
if(action->type == HLR_SHIFT) {
fprintf(f, "s%lu", action->nextstate);
} else {
fputs("r(", f);
h_pprint_symbol(f, g, action->production.lhs);
fputs(" -> ", f);
#ifdef NDEBUG
// if we can't print the production, at least print its length
fprintf(f, "[%lu]", action->production.length);
#else
HCFSequence seq = {action->production.rhs};
h_pprint_sequence(f, g, &seq);
#endif
fputc(')', f);
}
}
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
unsigned int indent)
{
for(size_t i=0; i<table->nrows; i++) {
for(unsigned int j=0; j<indent; j++) fputc(' ', f);
fprintf(f, "%4lu:", i);
if(table->forall[i]) {
fputs(" - ", f);
pprint_lraction(f, g, table->forall[i]);
fputs(" -", f);
if(!h_hashtable_empty(table->rows[i]))
fputs(" !!", f);
}
H_FOREACH(table->rows[i], HCFChoice *symbol, HLRAction *action)
fputc(' ', f); // separator
h_pprint_symbol(f, g, symbol);
fputc(':', f);
if(table->forall[i]) {
fputc(action->type == HLR_SHIFT? 's' : 'r', f);
fputc('/', f);
fputc(table->forall[i]->type == HLR_SHIFT? 's' : 'r', f);
} else {
pprint_lraction(f, g, action);
}
H_END_FOREACH
fputc('\n', f);
}
#if 0
fputs("inadeq=", f);
for(HSlistNode *x=table->inadeq->head; x; x=x->next) {
fprintf(f, "%lu ", (uintptr_t)x->elem);
}
fputc('\n', f);
#endif
}
HParserBackendVTable h__lalr_backend_vtable = {
.compile = h_lalr_compile,
.parse = h_lr_parse,
@ -998,6 +332,8 @@ HParserBackendVTable h__lalr_backend_vtable = {
// dummy!
int test_lalr(void)
{
HAllocator *mm__ = &system_allocator;
/*
E -> E '-' T
| T
@ -1013,7 +349,7 @@ int test_lalr(void)
HParser *p = E;
printf("\n==== G R A M M A R ====\n");
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
if(g == NULL) {
fprintf(stderr, "h_cfgrammar failed\n");
return 1;

View file

@ -21,7 +21,7 @@ typedef struct HLLkTable_ {
/* Interface to look up an entry in the parse table. */
const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x,
HInputStream lookahead)
const HInputStream *stream)
{
const HStringMap *row = h_hashtable_get(table->rows, x);
assert(row != NULL); // the table should have one row for each nonterminal
@ -29,28 +29,7 @@ const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x,
assert(!row->epsilon_branch); // would match without looking at the input
// XXX cases where this could be useful?
const HStringMap *m = row;
while(m) {
if(m->epsilon_branch) { // input matched
// assert: another lookahead would not bring a more specific match.
// this is for the table generator to ensure.
return m->epsilon_branch;
}
// note the lookahead stream is passed by value, i.e. a copy.
// reading bits from it does not consume them from the real input.
uint8_t c = h_read_bits(&lookahead, 8, false);
if(lookahead.overrun) { // end of input
// XXX assumption of byte-wise grammar and input
return m->end_branch;
}
// no match yet, descend
m = h_stringmap_get_char(m, c);
}
return NULL;
return h_stringmap_get_lookahead(row, *stream);
}
/* Allocate a new parse table. */
@ -321,7 +300,7 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
seq = h_carray_new(arena);
// look up applicable production in parse table
const HCFSequence *p = h_llk_lookup(table, x, *stream);
const HCFSequence *p = h_llk_lookup(table, x, stream);
if(p == NULL)
goto no_parse;

538
src/backends/lr.c Normal file
View file

@ -0,0 +1,538 @@
#include <assert.h>
#include <ctype.h>
#include "../parsers/parser_internal.h"
#include "lr.h"
/* Comparison and hashing functions */
// compare symbols - terminals by value, others by pointer
bool h_eq_symbol(const void *p, const void *q)
{
const HCFChoice *x=p, *y=q;
return (x==y
|| (x->type==HCF_END && y->type==HCF_END)
|| (x->type==HCF_CHAR && y->type==HCF_CHAR && x->chr==y->chr));
}
// hash symbols - terminals by value, others by pointer
HHashValue h_hash_symbol(const void *p)
{
const HCFChoice *x=p;
if(x->type == HCF_END)
return 0;
else if(x->type == HCF_CHAR)
return x->chr * 33;
else
return h_hash_ptr(p);
}
// compare LR items by value
static bool eq_lr_item(const void *p, const void *q)
{
const HLRItem *a=p, *b=q;
if(!h_eq_symbol(a->lhs, b->lhs)) return false;
if(a->mark != b->mark) return false;
if(a->len != b->len) return false;
for(size_t i=0; i<a->len; i++)
if(!h_eq_symbol(a->rhs[i], b->rhs[i])) return false;
return true;
}
// hash LALR items
static inline HHashValue hash_lr_item(const void *p)
{
const HLRItem *x = p;
HHashValue hash = 0;
hash += h_hash_symbol(x->lhs);
for(HCFChoice **p=x->rhs; *p; p++)
hash += h_hash_symbol(*p);
hash += x->mark;
return hash;
}
// compare item sets (DFA states)
bool h_eq_lr_itemset(const void *p, const void *q)
{
return h_hashset_equal(p, q);
}
// hash LR item sets (DFA states) - hash the elements and sum
HHashValue h_hash_lr_itemset(const void *p)
{
HHashValue hash = 0;
H_FOREACH_KEY((const HHashSet *)p, HLRItem *item)
hash += hash_lr_item(item);
H_END_FOREACH
return hash;
}
bool h_eq_transition(const void *p, const void *q)
{
const HLRTransition *a=p, *b=q;
return (a->from == b->from && a->to == b->to && h_eq_symbol(a->symbol, b->symbol));
}
HHashValue h_hash_transition(const void *p)
{
const HLRTransition *t = p;
return (h_hash_symbol(t->symbol) + t->from + t->to); // XXX ?
}
/* Constructors */
HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark)
{
HLRItem *ret = h_arena_malloc(a, sizeof(HLRItem));
size_t len = 0;
for(HCFChoice **p=rhs; *p; p++) len++;
assert(mark <= len);
ret->lhs = lhs;
ret->rhs = rhs;
ret->len = len;
ret->mark = mark;
return ret;
}
HLRState *h_lrstate_new(HArena *arena)
{
return h_hashset_new(arena, eq_lr_item, hash_lr_item);
}
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
{
HArena *arena = h_new_arena(mm__, 0); // default blocksize
assert(arena != NULL);
HLRTable *ret = h_new(HLRTable, 1);
ret->nrows = nrows;
ret->ntmap = h_arena_malloc(arena, nrows * sizeof(HHashTable *));
ret->tmap = h_arena_malloc(arena, nrows * sizeof(HStringMap *));
ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *));
ret->inadeq = h_slist_new(arena);
ret->arena = arena;
ret->mm__ = mm__;
for(size_t i=0; i<nrows; i++) {
ret->ntmap[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
ret->tmap[i] = h_stringmap_new(arena);
ret->forall[i] = NULL;
}
return ret;
}
void h_lrtable_free(HLRTable *table)
{
HAllocator *mm__ = table->mm__;
h_delete_arena(table->arena);
h_free(table);
}
HLRAction *h_shift_action(HArena *arena, size_t nextstate)
{
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
action->type = HLR_SHIFT;
action->nextstate = nextstate;
return action;
}
HLRAction *h_reduce_action(HArena *arena, const HLRItem *item)
{
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
action->type = HLR_REDUCE;
action->production.lhs = item->lhs;
action->production.length = item->len;
#ifndef NDEBUG
action->production.rhs = item->rhs;
#endif
return action;
}
// adds 'new' to the branches of 'action'
// returns a 'action' if it is already of type HLR_CONFLICT
// allocates a new HLRAction otherwise
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new)
{
if(action->type != HLR_CONFLICT) {
HLRAction *old = action;
action = h_arena_malloc(arena, sizeof(HLRAction));
action->type = HLR_CONFLICT;
action->branches = h_slist_new(arena);
h_slist_push(action->branches, old);
h_slist_push(action->branches, new);
} else {
// check if 'new' is already among branches
HSlistNode *x;
for(x=action->branches->head; x; x=x->next) {
if(x->elem == new)
break;
}
// add 'new' if it is not already in list
if(x == NULL)
h_slist_push(action->branches, new);
}
return action;
}
bool h_lrtable_row_empty(const HLRTable *table, size_t i)
{
return (h_hashtable_empty(table->ntmap[i])
&& h_stringmap_empty(table->tmap[i]));
}
/* LR driver */
HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
const HInputStream *stream)
{
HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine));
engine->table = table;
engine->state = 0;
engine->stack = h_slist_new(tarena);
engine->input = *stream;
engine->merged[0] = NULL;
engine->merged[1] = NULL;
engine->arena = arena;
engine->tarena = tarena;
return engine;
}
static const HLRAction *
terminal_lookup(const HLREngine *engine, const HInputStream *stream)
{
const HLRTable *table = engine->table;
size_t state = engine->state;
assert(state < table->nrows);
if(table->forall[state]) {
assert(h_lrtable_row_empty(table, state)); // that would be a conflict
return table->forall[state];
} else {
return h_stringmap_get_lookahead(table->tmap[state], *stream);
}
}
static const HLRAction *
nonterminal_lookup(const HLREngine *engine, const HCFChoice *symbol)
{
const HLRTable *table = engine->table;
size_t state = engine->state;
assert(state < table->nrows);
assert(!table->forall[state]); // contains only reduce entries
// we are only looking for shifts
return h_hashtable_get(table->ntmap[state], symbol);
}
const HLRAction *h_lrengine_action(const HLREngine *engine)
{
return terminal_lookup(engine, &engine->input);
}
static HParsedToken *consume_input(HLREngine *engine)
{
HParsedToken *v;
uint8_t c = h_read_bits(&engine->input, 8, false);
if(engine->input.overrun) { // end of input
v = NULL;
} else {
v = h_arena_malloc(engine->arena, sizeof(HParsedToken));
v->token_type = TT_UINT;
v->uint = c;
}
return v;
}
// run LR parser for one round; returns false when finished
bool h_lrengine_step(HLREngine *engine, const HLRAction *action)
{
// short-hand names
HSlist *stack = engine->stack;
HArena *arena = engine->arena;
HArena *tarena = engine->tarena;
if(action == NULL)
return false; // no handle recognizable in input, terminate
assert(action->type == HLR_SHIFT || action->type == HLR_REDUCE);
if(action->type == HLR_REDUCE) {
size_t len = action->production.length;
HCFChoice *symbol = action->production.lhs;
// semantic value of the reduction result
HParsedToken *value = h_arena_malloc(arena, sizeof(HParsedToken));
value->token_type = TT_SEQUENCE;
value->seq = h_carray_new_sized(arena, len);
// pull values off the stack, rewinding state accordingly
HParsedToken *v = NULL;
for(size_t i=0; i<len; i++) {
v = h_slist_drop(stack);
engine->state = (uintptr_t)h_slist_drop(stack);
// collect values in result sequence
value->seq->elements[len-1-i] = v;
value->seq->used++;
}
if(v) {
// result position equals position of left-most symbol
value->index = v->index;
value->bit_offset = v->bit_offset;
} else {
// XXX how to get the position in this case?
}
// perform token reshape if indicated
if(symbol->reshape)
value = (HParsedToken *)symbol->reshape(make_result(arena, value));
// call validation and semantic action, if present
if(symbol->pred && !symbol->pred(make_result(tarena, value)))
return false; // validation failed -> no parse; terminate
if(symbol->action)
value = (HParsedToken *)symbol->action(make_result(arena, value));
// this is LR, building a right-most derivation bottom-up, so no reduce can
// follow a reduce. we can also assume no conflict follows for GLR if we
// use LALR tables, because only terminal symbols (lookahead) get reduces.
const HLRAction *shift = nonterminal_lookup(engine, symbol);
if(shift == NULL)
return false; // parse error
assert(shift->type == HLR_SHIFT);
// piggy-back the shift right here, never touching the input
h_slist_push(stack, (void *)(uintptr_t)engine->state);
h_slist_push(stack, value);
engine->state = shift->nextstate;
// check for success
if(engine->state == HLR_SUCCESS) {
assert(symbol == engine->table->start);
return false;
}
} else {
assert(action->type == HLR_SHIFT);
HParsedToken *value = consume_input(engine);
h_slist_push(stack, (void *)(uintptr_t)engine->state);
h_slist_push(stack, value);
engine->state = action->nextstate;
}
return true;
}
HParseResult *h_lrengine_result(HLREngine *engine)
{
// parsing was successful iff the engine reaches the end state
if(engine->state == HLR_SUCCESS) {
// on top of the stack is the start symbol's semantic value
assert(!h_slist_empty(engine->stack));
HParsedToken *tok = engine->stack->head->elem;
return make_result(engine->arena, tok);
} else {
return NULL;
}
}
HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
{
HLRTable *table = parser->backend_data;
if(!table)
return NULL;
HArena *arena = h_new_arena(mm__, 0); // will hold the results
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
HLREngine *engine = h_lrengine_new(arena, tarena, table, stream);
// iterate engine to completion
while(h_lrengine_step(engine, h_lrengine_action(engine)));
HParseResult *result = h_lrengine_result(engine);
if(!result)
h_delete_arena(arena);
h_delete_arena(tarena);
return result;
}
/* Pretty-printers */
void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item)
{
h_pprint_symbol(f, g, item->lhs);
fputs(" ->", f);
HCFChoice **x = item->rhs;
HCFChoice **mark = item->rhs + item->mark;
if(*x == NULL) {
fputc('.', f);
} else {
while(*x) {
if(x == mark)
fputc('.', f);
else
fputc(' ', f);
if((*x)->type == HCF_CHAR) {
// condense character strings
fputc('"', f);
h_pprint_char(f, (*x)->chr);
for(x++; *x; x++) {
if(x == mark)
break;
if((*x)->type != HCF_CHAR)
break;
h_pprint_char(f, (*x)->chr);
}
fputc('"', f);
} else {
h_pprint_symbol(f, g, *x);
x++;
}
}
if(x == mark)
fputs(".", f);
}
}
void h_pprint_lrstate(FILE *f, const HCFGrammar *g,
const HLRState *state, unsigned int indent)
{
bool first = true;
H_FOREACH_KEY(state, HLRItem *item)
if(!first)
for(unsigned int i=0; i<indent; i++) fputc(' ', f);
first = false;
h_pprint_lritem(f, g, item);
fputc('\n', f);
H_END_FOREACH
}
static void pprint_transition(FILE *f, const HCFGrammar *g, const HLRTransition *t)
{
fputs("-", f);
h_pprint_symbol(f, g, t->symbol);
fprintf(f, "->%lu", t->to);
}
void h_pprint_lrdfa(FILE *f, const HCFGrammar *g,
const HLRDFA *dfa, unsigned int indent)
{
for(size_t i=0; i<dfa->nstates; i++) {
unsigned int indent2 = indent + fprintf(f, "%4lu: ", i);
h_pprint_lrstate(f, g, dfa->states[i], indent2);
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
const HLRTransition *t = x->elem;
if(t->from == i) {
for(unsigned int i=0; i<indent2-2; i++) fputc(' ', f);
pprint_transition(f, g, t);
fputc('\n', f);
}
}
}
}
void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action)
{
switch(action->type) {
case HLR_SHIFT:
if(action->nextstate == HLR_SUCCESS)
fputs("s~", f);
else
fprintf(f, "s%lu", action->nextstate);
break;
case HLR_REDUCE:
fputs("r(", f);
h_pprint_symbol(f, g, action->production.lhs);
fputs(" -> ", f);
#ifdef NDEBUG
// if we can't print the production, at least print its length
fprintf(f, "[%lu]", action->production.length);
#else
HCFSequence seq = {action->production.rhs};
h_pprint_sequence(f, g, &seq);
#endif
fputc(')', f);
break;
case HLR_CONFLICT:
fputc('!', f);
for(HSlistNode *x=action->branches->head; x; x=x->next) {
HLRAction *branch = x->elem;
assert(branch->type != HLR_CONFLICT); // no nesting
pprint_lraction(f, g, branch);
if(x->next) fputc('/', f); // separator
}
break;
default:
assert_message(0, "not reached");
}
}
static void valprint_lraction(FILE *file, void *env, void *val)
{
const HLRAction *action = val;
const HCFGrammar *grammar = env;
pprint_lraction(file, grammar, action);
}
static void pprint_lrtable_terminals(FILE *file, const HCFGrammar *g,
const HStringMap *map)
{
h_pprint_stringmap(file, ' ', valprint_lraction, (void *)g, map);
}
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
unsigned int indent)
{
for(size_t i=0; i<table->nrows; i++) {
for(unsigned int j=0; j<indent; j++) fputc(' ', f);
fprintf(f, "%4lu:", i);
if(table->forall[i]) {
fputc(' ', f);
pprint_lraction(f, g, table->forall[i]);
if(!h_lrtable_row_empty(table, i))
fputs(" !!", f);
}
H_FOREACH(table->ntmap[i], HCFChoice *symbol, HLRAction *action)
fputc(' ', f); // separator
h_pprint_symbol(f, g, symbol);
fputc(':', f);
pprint_lraction(f, g, action);
H_END_FOREACH
fputc(' ', f); // separator
pprint_lrtable_terminals(f, g, table->tmap[i]);
fputc('\n', f);
}
#if 0
fputs("inadeq=", f);
for(HSlistNode *x=table->inadeq->head; x; x=x->next) {
fprintf(f, "%lu ", (uintptr_t)x->elem);
}
fputc('\n', f);
#endif
}

147
src/backends/lr.h Normal file
View file

@ -0,0 +1,147 @@
#ifndef HAMMER_BACKENDS_LR__H
#define HAMMER_BACKENDS_LR__H
#include "../hammer.h"
#include "../cfgrammar.h"
#include "../internal.h"
typedef HHashSet HLRState; // states are sets of LRItems
typedef struct HLRDFA_ {
size_t nstates;
const HLRState **states; // array of size nstates
HSlist *transitions;
} HLRDFA;
typedef struct HLRTransition_ {
size_t from; // index into 'states' array
const HCFChoice *symbol;
size_t to; // index into 'states' array
} HLRTransition;
typedef struct HLRItem_ {
HCFChoice *lhs;
HCFChoice **rhs; // NULL-terminated
size_t len; // number of elements in rhs
size_t mark;
} HLRItem;
typedef struct HLRAction_ {
enum {HLR_SHIFT, HLR_REDUCE, HLR_CONFLICT} type;
union {
// used with HLR_SHIFT
size_t nextstate;
// used with HLR_REDUCE
struct {
HCFChoice *lhs; // symbol carrying semantic actions etc.
size_t length; // # of symbols in rhs
#ifndef NDEBUG
HCFChoice **rhs; // NB: the rhs symbols are not needed for the parse
#endif
} production;
// used with HLR_CONFLICT
HSlist *branches; // list of possible HLRActions
};
} HLRAction;
typedef struct HLRTable_ {
size_t nrows; // dimension of the pointer arrays below
HHashTable **ntmap; // map nonterminal symbols to HLRActions, per row
HStringMap **tmap; // map lookahead strings to HLRActions, per row
HLRAction **forall; // shortcut to set an action for an entire row
HCFChoice *start; // start symbol
HSlist *inadeq; // indices of any inadequate states
HArena *arena;
HAllocator *mm__;
} HLRTable;
typedef struct HLREnhGrammar_ {
HCFGrammar *grammar; // enhanced grammar
HHashTable *tmap; // maps transitions to enhanced-grammar symbols
HHashTable *smap; // maps enhanced-grammar symbols to transitions
HHashTable *corr; // maps symbols to sets of corresponding e. symbols
HArena *arena;
} HLREnhGrammar;
typedef struct HLREngine_ {
const HLRTable *table;
size_t state;
HSlist *stack; // holds pairs: (saved state, semantic value)
HInputStream input;
struct HLREngine_ *merged[2]; // ancestors merged into this engine
HArena *arena; // will hold the results
HArena *tarena; // tmp, deleted after parse
} HLREngine;
#define HLR_SUCCESS ((size_t)~0) // parser end state
// XXX move to internal.h or something
// XXX replace other hashtable iterations with this
#define H_FOREACH_(HT) { \
const HHashTable *ht__ = HT; \
for(size_t i__=0; i__ < ht__->capacity; i__++) { \
for(HHashTableEntry *hte__ = &ht__->contents[i__]; \
hte__; \
hte__ = hte__->next) { \
if(hte__->key == NULL) continue;
#define H_FOREACH_KEY(HT, KEYVAR) H_FOREACH_(HT) \
const KEYVAR = hte__->key;
#define H_FOREACH(HT, KEYVAR, VALVAR) H_FOREACH_KEY(HT, KEYVAR) \
VALVAR = hte__->value;
#define H_END_FOREACH \
} \
} \
}
HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark);
HLRState *h_lrstate_new(HArena *arena);
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows);
void h_lrtable_free(HLRTable *table);
HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
const HInputStream *stream);
HLRAction *h_reduce_action(HArena *arena, const HLRItem *item);
HLRAction *h_shift_action(HArena *arena, size_t nextstate);
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new);
bool h_lrtable_row_empty(const HLRTable *table, size_t i);
bool h_eq_symbol(const void *p, const void *q);
bool h_eq_lr_itemset(const void *p, const void *q);
bool h_eq_transition(const void *p, const void *q);
HHashValue h_hash_symbol(const void *p);
HHashValue h_hash_lr_itemset(const void *p);
HHashValue h_hash_transition(const void *p);
HLRDFA *h_lr0_dfa(HCFGrammar *g);
HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa);
HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser);
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params);
void h_lalr_free(HParser *parser);
const HLRAction *h_lrengine_action(const HLREngine *engine);
bool h_lrengine_step(HLREngine *engine, const HLRAction *action);
HParseResult *h_lrengine_result(HLREngine *engine);
HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item);
void h_pprint_lrstate(FILE *f, const HCFGrammar *g,
const HLRState *state, unsigned int indent);
void h_pprint_lrdfa(FILE *f, const HCFGrammar *g,
const HLRDFA *dfa, unsigned int indent);
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
unsigned int indent);
#endif

233
src/backends/lr0.c Normal file
View file

@ -0,0 +1,233 @@
#include <assert.h>
#include "lr.h"
/* Constructing the characteristic automaton (handle recognizer) */
static HLRItem *advance_mark(HArena *arena, const HLRItem *item)
{
assert(item->rhs[item->mark] != NULL);
HLRItem *ret = h_arena_malloc(arena, sizeof(HLRItem));
*ret = *item;
ret->mark++;
return ret;
}
static void expand_to_closure(HCFGrammar *g, HHashSet *items)
{
HAllocator *mm__ = g->mm__;
HArena *arena = g->arena;
HSlist *work = h_slist_new(arena);
// initialize work list with items
H_FOREACH_KEY(items, HLRItem *item)
h_slist_push(work, (void *)item);
H_END_FOREACH
while(!h_slist_empty(work)) {
const HLRItem *item = h_slist_pop(work);
HCFChoice *sym = item->rhs[item->mark]; // symbol after mark
// if there is a non-terminal after the mark, follow it
// NB: unlike LLk, we do consider HCF_CHARSET a non-terminal here
if(sym != NULL && (sym->type==HCF_CHOICE || sym->type==HCF_CHARSET)) {
// add items corresponding to the productions of sym
if(sym->type == HCF_CHOICE) {
for(HCFSequence **p=sym->seq; *p; p++) {
HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0);
if(!h_hashset_present(items, it)) {
h_hashset_put(items, it);
h_slist_push(work, it);
}
}
} else { // HCF_CHARSET
for(unsigned int i=0; i<256; i++) {
if(charset_isset(sym->charset, i)) {
// XXX allocate these single-character symbols statically somewhere
HCFChoice **rhs = h_new(HCFChoice *, 2);
rhs[0] = h_new(HCFChoice, 1);
rhs[0]->type = HCF_CHAR;
rhs[0]->chr = i;
rhs[1] = NULL;
HLRItem *it = h_lritem_new(arena, sym, rhs, 0);
h_hashset_put(items, it);
// single-character item needs no further work
}
}
// if sym is a non-terminal, we need a reshape on it
// this seems as good a place as any to set it
sym->reshape = h_act_first;
}
}
}
}
HLRDFA *h_lr0_dfa(HCFGrammar *g)
{
HArena *arena = g->arena;
HHashSet *states = h_hashset_new(arena, h_eq_lr_itemset, h_hash_lr_itemset);
// maps itemsets to assigned array indices
HSlist *transitions = h_slist_new(arena);
// list of states that need to be processed
// to save lookups, we push two elements per state, the itemset and its
// assigned index.
HSlist *work = h_slist_new(arena);
// make initial state (kernel)
HLRState *start = h_lrstate_new(arena);
assert(g->start->type == HCF_CHOICE);
for(HCFSequence **p=g->start->seq; *p; p++)
h_hashset_put(start, h_lritem_new(arena, g->start, (*p)->items, 0));
expand_to_closure(g, start);
h_hashtable_put(states, start, 0);
h_slist_push(work, start);
h_slist_push(work, 0);
// while work to do (on some state)
// determine edge symbols
// for each edge symbol:
// advance respective items -> destination state (kernel)
// compute closure
// if destination is a new state:
// add it to state set
// add transition to it
// add it to the work list
while(!h_slist_empty(work)) {
size_t state_idx = (uintptr_t)h_slist_pop(work);
HLRState *state = h_slist_pop(work);
// maps edge symbols to neighbor states (item sets) of s
HHashTable *neighbors = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
// iterate over state (closure) and generate neighboring sets
H_FOREACH_KEY(state, HLRItem *item)
HCFChoice *sym = item->rhs[item->mark]; // symbol after mark
if(sym != NULL) { // mark was not at the end
// find or create prospective neighbor set
HLRState *neighbor = h_hashtable_get(neighbors, sym);
if(neighbor == NULL) {
neighbor = h_lrstate_new(arena);
h_hashtable_put(neighbors, sym, neighbor);
}
// ...and add the advanced item to it
h_hashset_put(neighbor, advance_mark(arena, item));
}
H_END_FOREACH
// merge expanded neighbor sets into the set of existing states
H_FOREACH(neighbors, HCFChoice *symbol, HLRState *neighbor)
expand_to_closure(g, neighbor);
// look up existing state, allocate new if not found
size_t neighbor_idx;
if(!h_hashset_present(states, neighbor)) {
neighbor_idx = states->used;
h_hashtable_put(states, neighbor, (void *)(uintptr_t)neighbor_idx);
h_slist_push(work, neighbor);
h_slist_push(work, (void *)(uintptr_t)neighbor_idx);
} else {
neighbor_idx = (uintptr_t)h_hashtable_get(states, neighbor);
}
// add transition "state --symbol--> neighbor"
HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition));
t->from = state_idx;
t->to = neighbor_idx;
t->symbol = symbol;
h_slist_push(transitions, t);
H_END_FOREACH
} // end while(work)
// fill DFA struct
HLRDFA *dfa = h_arena_malloc(arena, sizeof(HLRDFA));
dfa->nstates = states->used;
dfa->states = h_arena_malloc(arena, dfa->nstates*sizeof(HLRState *));
H_FOREACH(states, HLRState *state, void *v)
size_t idx = (uintptr_t)v;
dfa->states[idx] = state;
H_END_FOREACH
dfa->transitions = transitions;
return dfa;
}
/* LR(0) table generation */
static inline
void put_shift(HLRTable *table, size_t state, const HCFChoice *symbol,
size_t nextstate)
{
HLRAction *action = h_shift_action(table->arena, nextstate);
switch(symbol->type) {
case HCF_END:
h_stringmap_put_end(table->tmap[state], action);
break;
case HCF_CHAR:
h_stringmap_put_char(table->tmap[state], symbol->chr, action);
break;
default:
// nonterminal case
h_hashtable_put(table->ntmap[state], symbol, action);
}
}
HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
{
HAllocator *mm__ = g->mm__;
HLRTable *table = h_lrtable_new(mm__, dfa->nstates);
HArena *arena = table->arena;
// remember start symbol
table->start = g->start;
// shift to the accepting end state for the start symbol
put_shift(table, 0, g->start, HLR_SUCCESS);
// add shift entries
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
// for each transition x-A->y, add "shift, goto y" to table entry (x,A)
HLRTransition *t = x->elem;
put_shift(table, t->from, t->symbol, t->to);
}
// add reduce entries, record inadequate states
for(size_t i=0; i<dfa->nstates; i++) {
bool inadeq = false;
// find reducible items in state
H_FOREACH_KEY(dfa->states[i], HLRItem *item)
if(item->mark == item->len) { // mark at the end
HLRAction *reduce = h_reduce_action(arena, item);
// check for reduce/reduce conflict on forall
if(table->forall[i]) {
reduce = h_lr_conflict(arena, table->forall[i], reduce);
inadeq = true;
}
table->forall[i] = reduce;
// check for shift/reduce conflict with other entries
// NOTE: these are not recorded as HLR_CONFLICTs at this point
if(!h_lrtable_row_empty(table, i))
inadeq = true;
}
H_END_FOREACH
if(inadeq)
h_slist_push(table->inadeq, (void *)(uintptr_t)i);
}
return table;
}

View file

@ -321,6 +321,31 @@ void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool en
return m->epsilon_branch;
}
void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead)
{
while(m) {
if(m->epsilon_branch) { // input matched
// assert: another lookahead would not bring a more specific match.
// this is for the table generator to ensure. (LLk)
return m->epsilon_branch;
}
// note the lookahead stream is passed by value, i.e. a copy.
// reading bits from it does not consume them from the real input.
uint8_t c = h_read_bits(&lookahead, 8, false);
if(lookahead.overrun) { // end of input
// XXX assumption of byte-wise grammar and input
return m->end_branch;
}
// no match yet, descend
m = h_stringmap_get_char(m, c);
}
return NULL;
}
bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end)
{
return (h_stringmap_get(m, str, n, end) != NULL);
@ -813,27 +838,43 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in
#define BUFSIZE 512
static bool
pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n,
const HStringMap *set)
pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
void (*valprint)(FILE *f, void *env, void *val), void *env,
const HStringMap *map)
{
assert(n < BUFSIZE-4);
if(set->epsilon_branch) {
if(!first) fputc(',', file); first=false;
if(n==0)
fputs("''", file);
else
if(map->epsilon_branch) {
if(!first) fputc(sep, file); first=false;
if(n==0) {
fputs("\"\"", file);
} else {
fputs("\"", file);
fwrite(prefix, 1, n, file);
fputs("\"", file);
}
if(valprint) {
fputc(':', file);
valprint(file, env, map->epsilon_branch);
}
}
if(set->end_branch) {
if(!first) fputc(',', file); first=false;
if(map->end_branch) {
if(!first) fputs(",\"", file); first=false;
if(n>0) fputs("\"\"", file);
fwrite(prefix, 1, n, file);
fputc('$', file);
if(n>0) fputs("\"\"", file);
fputs("$", file);
if(valprint) {
fputc(':', file);
valprint(file, env, map->end_branch);
}
}
// iterate over set->char_branches
HHashTable *ht = set->char_branches;
// iterate over map->char_branches
HHashTable *ht = map->char_branches;
size_t i;
HHashTableEntry *hte;
for(i=0; i < ht->capacity; i++) {
@ -859,20 +900,28 @@ pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n,
n_ += sprintf(prefix+n_, "\\x%.2X", c);
}
first = pprint_stringset_elems(file, first, prefix, n_, ends);
first = pprint_stringmap_elems(file, first, prefix, n_,
sep, valprint, env, ends);
}
}
return first;
}
void h_pprint_stringmap(FILE *file, char sep,
void (*valprint)(FILE *f, void *env, void *val), void *env,
const HStringMap *map)
{
char buf[BUFSIZE];
pprint_stringmap_elems(file, true, buf, 0, sep, valprint, env, map);
}
void h_pprint_stringset(FILE *file, const HStringMap *set, int indent)
{
int j;
for(j=0; j<indent; j++) fputc(' ', file);
char buf[BUFSIZE];
fputc('{', file);
pprint_stringset_elems(file, true, buf, 0, set);
h_pprint_stringmap(file, ',', NULL, NULL, set);
fputs("}\n", file);
}

View file

@ -47,6 +47,7 @@ void h_stringmap_put_char(HStringMap *m, uint8_t c, void *v);
void h_stringmap_update(HStringMap *m, const HStringMap *n);
void h_stringmap_replace(HStringMap *m, void *old, void *new);
void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end);
void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead);
bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end);
bool h_stringmap_present_epsilon(const HStringMap *m);
bool h_stringmap_empty(const HStringMap *m);
@ -97,4 +98,7 @@ void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq);
void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x);
void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent);
void h_pprint_stringset(FILE *file, const HStringMap *set, int indent);
void h_pprint_stringmap(FILE *file, char sep,
void (*valprint)(FILE *f, void *env, void *val), void *env,
const HStringMap *map);
void h_pprint_char(FILE *file, char c);

View file

@ -62,6 +62,16 @@ HSlist* h_slist_copy(HSlist *slist) {
return ret;
}
// like h_slist_pop, but does not deallocate the head node
void* h_slist_drop(HSlist *slist) {
HSlistNode *head = slist->head;
if (!head)
return NULL;
void* ret = head->elem;
slist->head = head->next;
return ret;
}
void* h_slist_pop(HSlist *slist) {
HSlistNode *head = slist->head;
if (!head)

View file

@ -31,6 +31,7 @@ static HParserBackendVTable *backends[PB_MAX + 1] = {
&h__regex_backend_vtable,
&h__llk_backend_vtable,
&h__lalr_backend_vtable,
&h__glr_backend_vtable,
};

View file

@ -37,8 +37,8 @@ typedef enum HParserBackend_ {
PB_REGULAR,
PB_LLk,
PB_LALR,
PB_GLR, // Not Implemented
PB_MAX = PB_LALR
PB_GLR,
PB_MAX = PB_GLR
} HParserBackend;
typedef enum HTokenType_ {

View file

@ -220,6 +220,7 @@ struct HBitWriter_ {
extern HParserBackendVTable h__packrat_backend_vtable;
extern HParserBackendVTable h__llk_backend_vtable;
extern HParserBackendVTable h__lalr_backend_vtable;
extern HParserBackendVTable h__glr_backend_vtable;
// }}}
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
@ -247,6 +248,7 @@ void h_carray_append(HCountedArray *array, void* item);
HSlist* h_slist_new(HArena *arena);
HSlist* h_slist_copy(HSlist *slist);
void* h_slist_pop(HSlist *slist);
void* h_slist_drop(HSlist *slist);
void h_slist_push(HSlist *slist, void* item);
bool h_slist_find(HSlist *slist, const void* item);
HSlist* h_slist_remove_all(HSlist *slist, const void* item);

View file

@ -428,6 +428,19 @@ static void test_rightrec(gconstpointer backend) {
g_check_parse_ok(rr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(u0x61 (u0x61 (u0x61)))");
}
static void test_ambiguous(gconstpointer backend) {
HParser *d_ = h_ch('d');
HParser *p_ = h_ch('+');
HParser *E_ = h_indirect();
h_bind_indirect(E_, h_choice(h_sequence(E_, p_, E_, NULL), d_, NULL));
HParser *expr_ = h_action(E_, h_act_flatten);
g_check_parse_ok(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1, "(u0x64)");
g_check_parse_ok(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d+d", 3, "(u0x64 u0x2b u0x64)");
g_check_parse_ok(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d+d+d", 5, "(u0x64 u0x2b u0x64 u0x2b u0x64)");
g_check_parse_failed(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d+", 2);
}
void register_parser_tests(void) {
g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token);
g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch);
@ -585,4 +598,43 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/lalr/ignore", GINT_TO_POINTER(PB_LALR), test_ignore);
g_test_add_data_func("/core/parser/lalr/leftrec", GINT_TO_POINTER(PB_LALR), test_leftrec);
g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec);
g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token);
g_test_add_data_func("/core/parser/glr/ch", GINT_TO_POINTER(PB_GLR), test_ch);
g_test_add_data_func("/core/parser/glr/ch_range", GINT_TO_POINTER(PB_GLR), test_ch_range);
g_test_add_data_func("/core/parser/glr/int64", GINT_TO_POINTER(PB_GLR), test_int64);
g_test_add_data_func("/core/parser/glr/int32", GINT_TO_POINTER(PB_GLR), test_int32);
g_test_add_data_func("/core/parser/glr/int16", GINT_TO_POINTER(PB_GLR), test_int16);
g_test_add_data_func("/core/parser/glr/int8", GINT_TO_POINTER(PB_GLR), test_int8);
g_test_add_data_func("/core/parser/glr/uint64", GINT_TO_POINTER(PB_GLR), test_uint64);
g_test_add_data_func("/core/parser/glr/uint32", GINT_TO_POINTER(PB_GLR), test_uint32);
g_test_add_data_func("/core/parser/glr/uint16", GINT_TO_POINTER(PB_GLR), test_uint16);
g_test_add_data_func("/core/parser/glr/uint8", GINT_TO_POINTER(PB_GLR), test_uint8);
g_test_add_data_func("/core/parser/glr/int_range", GINT_TO_POINTER(PB_GLR), test_int_range);
#if 0
g_test_add_data_func("/core/parser/glr/float64", GINT_TO_POINTER(PB_GLR), test_float64);
g_test_add_data_func("/core/parser/glr/float32", GINT_TO_POINTER(PB_GLR), test_float32);
#endif
g_test_add_data_func("/core/parser/glr/whitespace", GINT_TO_POINTER(PB_GLR), test_whitespace);
g_test_add_data_func("/core/parser/glr/left", GINT_TO_POINTER(PB_GLR), test_left);
g_test_add_data_func("/core/parser/glr/right", GINT_TO_POINTER(PB_GLR), test_right);
g_test_add_data_func("/core/parser/glr/middle", GINT_TO_POINTER(PB_GLR), test_middle);
g_test_add_data_func("/core/parser/glr/action", GINT_TO_POINTER(PB_GLR), test_action);
g_test_add_data_func("/core/parser/glr/in", GINT_TO_POINTER(PB_GLR), test_in);
g_test_add_data_func("/core/parser/glr/not_in", GINT_TO_POINTER(PB_GLR), test_not_in);
g_test_add_data_func("/core/parser/glr/end_p", GINT_TO_POINTER(PB_GLR), test_end_p);
g_test_add_data_func("/core/parser/glr/nothing_p", GINT_TO_POINTER(PB_GLR), test_nothing_p);
g_test_add_data_func("/core/parser/glr/sequence", GINT_TO_POINTER(PB_GLR), test_sequence);
g_test_add_data_func("/core/parser/glr/choice", GINT_TO_POINTER(PB_GLR), test_choice);
g_test_add_data_func("/core/parser/glr/many", GINT_TO_POINTER(PB_GLR), test_many);
g_test_add_data_func("/core/parser/glr/many1", GINT_TO_POINTER(PB_GLR), test_many1);
g_test_add_data_func("/core/parser/glr/optional", GINT_TO_POINTER(PB_GLR), test_optional);
g_test_add_data_func("/core/parser/glr/sepBy", GINT_TO_POINTER(PB_GLR), test_sepBy);
g_test_add_data_func("/core/parser/glr/sepBy1", GINT_TO_POINTER(PB_GLR), test_sepBy1);
g_test_add_data_func("/core/parser/glr/epsilon_p", GINT_TO_POINTER(PB_GLR), test_epsilon_p);
g_test_add_data_func("/core/parser/glr/attr_bool", GINT_TO_POINTER(PB_GLR), test_attr_bool);
g_test_add_data_func("/core/parser/glr/ignore", GINT_TO_POINTER(PB_GLR), test_ignore);
g_test_add_data_func("/core/parser/glr/leftrec", GINT_TO_POINTER(PB_GLR), test_leftrec);
g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec);
g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous);
}