Added new build system
This commit is contained in:
commit
b0f567c090
27 changed files with 2255 additions and 217 deletions
|
|
@ -11,6 +11,8 @@ struct HCFStack_ {
|
|||
int count;
|
||||
int cap;
|
||||
HCFChoice *last_completed; // Last completed choice.
|
||||
// XXX is last_completed still needed?
|
||||
HCFChoice *prealloc; // If not NULL, will be used for the outermost choice.
|
||||
};
|
||||
|
||||
#ifndef UNUSED
|
||||
|
|
@ -25,11 +27,13 @@ static HCFStack* h_cfstack_new(HAllocator *mm__) {
|
|||
stack->count = 0;
|
||||
stack->cap = 4;
|
||||
stack->stack = h_new(HCFChoice*, stack->cap);
|
||||
stack->prealloc = NULL;
|
||||
return stack;
|
||||
}
|
||||
|
||||
static void h_cfstack_free(HAllocator *mm__, HCFStack *stk__) UNUSED;
|
||||
static void h_cfstack_free(HAllocator *mm__, HCFStack *stk__) {
|
||||
h_free(stk__->prealloc);
|
||||
h_free(stk__->stack);
|
||||
h_free(stk__);
|
||||
}
|
||||
|
|
@ -56,7 +60,9 @@ static inline void h_cfstack_add_to_seq(HAllocator *mm__, HCFStack *stk__, HCFCh
|
|||
}
|
||||
|
||||
static inline HCFChoice* h_cfstack_new_choice_raw(HAllocator *mm__, HCFStack *stk__) {
|
||||
HCFChoice *ret = h_new(HCFChoice, 1);
|
||||
HCFChoice *ret = stk__->prealloc? stk__->prealloc : h_new(HCFChoice, 1);
|
||||
stk__->prealloc = NULL;
|
||||
|
||||
ret->reshape = NULL;
|
||||
ret->action = NULL;
|
||||
ret->pred = NULL;
|
||||
|
|
|
|||
294
src/backends/glr.c
Normal file
294
src/backends/glr.c
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
#include <assert.h>
|
||||
#include "lr.h"
|
||||
|
||||
static bool glr_step(HParseResult **result, HSlist *engines,
|
||||
HLREngine *engine, const HLRAction *action);
|
||||
|
||||
|
||||
/* GLR compilation (LALR w/o failing on conflict) */
|
||||
|
||||
int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||
{
|
||||
int result = h_lalr_compile(mm__, parser, params);
|
||||
|
||||
if(result == -1 && parser->backend_data) {
|
||||
// table is there, just has conflicts? nevermind, that's okay.
|
||||
result = 0;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void h_glr_free(HParser *parser)
|
||||
{
|
||||
h_lalr_free(parser);
|
||||
}
|
||||
|
||||
|
||||
/* Merging engines (when they converge on the same state) */
|
||||
|
||||
static HLREngine *lrengine_merge(HLREngine *old, HLREngine *new)
|
||||
{
|
||||
HArena *arena = old->arena;
|
||||
|
||||
HLREngine *ret = h_arena_malloc(arena, sizeof(HLREngine));
|
||||
|
||||
assert(old->state == new->state);
|
||||
assert(old->input.input == new->input.input);
|
||||
|
||||
*ret = *old;
|
||||
ret->stack = h_slist_new(arena);
|
||||
ret->merged[0] = old;
|
||||
ret->merged[1] = new;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static HSlist *demerge_stack(HSlistNode *bottom, HSlist *stack)
|
||||
{
|
||||
HArena *arena = stack->arena;
|
||||
|
||||
HSlist *ret = h_slist_new(arena);
|
||||
|
||||
// copy the stack from the top
|
||||
HSlistNode **y = &ret->head;
|
||||
for(HSlistNode *x=stack->head; x; x=x->next) {
|
||||
HSlistNode *node = h_arena_malloc(arena, sizeof(HSlistNode));
|
||||
node->elem = x->elem;
|
||||
node->next = NULL;
|
||||
*y = node;
|
||||
y = &node->next;
|
||||
}
|
||||
*y = bottom; // attach the ancestor stack
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline HLREngine *respawn(HLREngine *eng, HSlist *stack)
|
||||
{
|
||||
// NB: this can be a destructive update because an engine is not used for
|
||||
// anything after it is merged.
|
||||
eng->stack = demerge_stack(eng->stack->head, stack);
|
||||
return eng;
|
||||
}
|
||||
|
||||
static HLREngine *
|
||||
demerge(HParseResult **result, HSlist *engines,
|
||||
HLREngine *engine, const HLRAction *action, size_t depth)
|
||||
{
|
||||
// no-op on engines that are not merged
|
||||
if(!engine->merged[0])
|
||||
return engine;
|
||||
|
||||
HSlistNode *p = engine->stack->head;
|
||||
for(size_t i=0; i<depth; i++) {
|
||||
// if stack hits bottom, respawn ancestors
|
||||
if(p == NULL) {
|
||||
HLREngine *a = respawn(engine->merged[0], engine->stack);
|
||||
HLREngine *b = respawn(engine->merged[1], engine->stack);
|
||||
|
||||
// continue demerge until final depth reached
|
||||
a = demerge(result, engines, a, action, depth-i);
|
||||
b = demerge(result, engines, b, action, depth-i);
|
||||
|
||||
// step and stow one ancestor...
|
||||
glr_step(result, engines, a, action);
|
||||
|
||||
// ...and return the other
|
||||
return b;
|
||||
}
|
||||
p = p->next;
|
||||
}
|
||||
|
||||
return engine; // there is enough stack before the merge point
|
||||
}
|
||||
|
||||
|
||||
/* Forking engines (on conflicts */
|
||||
|
||||
HLREngine *fork_engine(const HLREngine *engine)
|
||||
{
|
||||
HLREngine *eng2 = h_arena_malloc(engine->tarena, sizeof(HLREngine));
|
||||
eng2->table = engine->table;
|
||||
eng2->state = engine->state;
|
||||
eng2->input = engine->input;
|
||||
|
||||
// shallow-copy the stack
|
||||
// this works because h_slist_push and h_slist_drop never modify
|
||||
// the underlying structure of HSlistNodes, only the head pointer.
|
||||
// in fact, this gives us prefix sharing for free.
|
||||
eng2->stack = h_arena_malloc(engine->tarena, sizeof(HSlist));
|
||||
*eng2->stack = *engine->stack;
|
||||
|
||||
eng2->arena = engine->arena;
|
||||
eng2->tarena = engine->tarena;
|
||||
return eng2;
|
||||
}
|
||||
|
||||
static const HLRAction *
|
||||
handle_conflict(HParseResult **result, HSlist *engines,
|
||||
const HLREngine *engine, const HSlist *branches)
|
||||
{
|
||||
// there should be at least two conflicting actions
|
||||
assert(branches->head);
|
||||
assert(branches->head->next); // this is just a consistency check
|
||||
|
||||
// fork a new engine for all but the first action
|
||||
for(HSlistNode *x=branches->head->next; x; x=x->next) {
|
||||
HLRAction *act = x->elem;
|
||||
HLREngine *eng = fork_engine(engine);
|
||||
|
||||
// perform one step and add to engines
|
||||
glr_step(result, engines, eng, act);
|
||||
}
|
||||
|
||||
// return first action for use with original engine
|
||||
return branches->head->elem;
|
||||
}
|
||||
|
||||
|
||||
/* GLR driver */
|
||||
|
||||
static bool glr_step(HParseResult **result, HSlist *engines,
|
||||
HLREngine *engine, const HLRAction *action)
|
||||
{
|
||||
// handle forks and demerges (~> spawn engines)
|
||||
if(action) {
|
||||
if(action->type == HLR_CONFLICT) {
|
||||
// fork engine on conflicts
|
||||
action = handle_conflict(result, engines, engine, action->branches);
|
||||
} else if(action->type == HLR_REDUCE) {
|
||||
// demerge/respawn as needed
|
||||
size_t depth = action->production.length;
|
||||
engine = demerge(result, engines, engine, action, depth);
|
||||
}
|
||||
}
|
||||
|
||||
bool run = h_lrengine_step(engine, action);
|
||||
|
||||
if(run) {
|
||||
// store engine in the list, merge if necessary
|
||||
HSlistNode *x;
|
||||
for(x=engines->head; x; x=x->next) {
|
||||
HLREngine *eng = x->elem;
|
||||
if(eng->state == engine->state) {
|
||||
x->elem = lrengine_merge(eng, engine);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(!x) // no merge happened
|
||||
h_slist_push(engines, engine);
|
||||
} else if(engine->state == HLR_SUCCESS) {
|
||||
// save the result
|
||||
*result = h_lrengine_result(engine);
|
||||
}
|
||||
|
||||
return run;
|
||||
}
|
||||
|
||||
HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
|
||||
{
|
||||
HLRTable *table = parser->backend_data;
|
||||
if(!table)
|
||||
return NULL;
|
||||
|
||||
HArena *arena = h_new_arena(mm__, 0); // will hold the results
|
||||
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
|
||||
|
||||
// allocate engine lists (will hold one engine per state)
|
||||
// these are swapped each iteration
|
||||
HSlist *engines = h_slist_new(tarena);
|
||||
HSlist *engback = h_slist_new(tarena);
|
||||
|
||||
// create initial engine
|
||||
h_slist_push(engines, h_lrengine_new(arena, tarena, table, stream));
|
||||
|
||||
HParseResult *result = NULL;
|
||||
while(result == NULL && !h_slist_empty(engines)) {
|
||||
assert(h_slist_empty(engback));
|
||||
|
||||
// step all engines
|
||||
while(!h_slist_empty(engines)) {
|
||||
HLREngine *engine = h_slist_pop(engines);
|
||||
const HLRAction *action = h_lrengine_action(engine);
|
||||
glr_step(&result, engback, engine, action);
|
||||
}
|
||||
|
||||
// swap the lists
|
||||
HSlist *tmp = engines;
|
||||
engines = engback;
|
||||
engback = tmp;
|
||||
}
|
||||
|
||||
if(!result)
|
||||
h_delete_arena(arena);
|
||||
h_delete_arena(tarena);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
HParserBackendVTable h__glr_backend_vtable = {
|
||||
.compile = h_glr_compile,
|
||||
.parse = h_glr_parse,
|
||||
.free = h_glr_free
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
// dummy!
|
||||
int test_glr(void)
|
||||
{
|
||||
HAllocator *mm__ = &system_allocator;
|
||||
|
||||
/*
|
||||
E -> E '+' E
|
||||
| 'd'
|
||||
*/
|
||||
|
||||
HParser *d = h_ch('d');
|
||||
HParser *E = h_indirect();
|
||||
HParser *E_ = h_choice(h_sequence(E, h_ch('+'), E, NULL), d, NULL);
|
||||
h_bind_indirect(E, E_);
|
||||
HParser *p = E;
|
||||
|
||||
printf("\n==== G R A M M A R ====\n");
|
||||
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
|
||||
if(g == NULL) {
|
||||
fprintf(stderr, "h_cfgrammar failed\n");
|
||||
return 1;
|
||||
}
|
||||
h_pprint_grammar(stdout, g, 0);
|
||||
|
||||
printf("\n==== D F A ====\n");
|
||||
HLRDFA *dfa = h_lr0_dfa(g);
|
||||
if(dfa)
|
||||
h_pprint_lrdfa(stdout, g, dfa, 0);
|
||||
else
|
||||
fprintf(stderr, "h_lalr_dfa failed\n");
|
||||
|
||||
printf("\n==== L R ( 0 ) T A B L E ====\n");
|
||||
HLRTable *table0 = h_lr0_table(g, dfa);
|
||||
if(table0)
|
||||
h_pprint_lrtable(stdout, g, table0, 0);
|
||||
else
|
||||
fprintf(stderr, "h_lr0_table failed\n");
|
||||
h_lrtable_free(table0);
|
||||
|
||||
printf("\n==== L A L R T A B L E ====\n");
|
||||
if(h_compile(p, PB_GLR, NULL)) {
|
||||
fprintf(stderr, "does not compile\n");
|
||||
return 2;
|
||||
}
|
||||
h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0);
|
||||
|
||||
printf("\n==== P A R S E R E S U L T ====\n");
|
||||
HParseResult *res = h_parse(p, (uint8_t *)"d+d+d", 5);
|
||||
if(res)
|
||||
h_pprint(stdout, res->ast, 0, 2);
|
||||
else
|
||||
printf("no parse\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
389
src/backends/lalr.c
Normal file
389
src/backends/lalr.c
Normal file
|
|
@ -0,0 +1,389 @@
|
|||
#include <assert.h>
|
||||
#include "contextfree.h"
|
||||
#include "lr.h"
|
||||
|
||||
|
||||
|
||||
/* LALR-via-SLR grammar transformation */
|
||||
|
||||
static inline size_t seqsize(void *p_)
|
||||
{
|
||||
size_t n=0;
|
||||
for(void **p=p_; *p; p++) n++;
|
||||
return n+1;
|
||||
}
|
||||
|
||||
static HLRAction *
|
||||
lrtable_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol)
|
||||
{
|
||||
switch(symbol->type) {
|
||||
case HCF_END:
|
||||
return table->tmap[state]->end_branch;
|
||||
case HCF_CHAR:
|
||||
return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false);
|
||||
default:
|
||||
// nonterminal case
|
||||
return h_hashtable_get(table->ntmap[state], symbol);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
|
||||
{
|
||||
HLRAction *action = lrtable_lookup(table, x, A);
|
||||
assert(action != NULL);
|
||||
assert(action->type == HLR_SHIFT);
|
||||
return action->nextstate;
|
||||
}
|
||||
|
||||
static inline HLRTransition *transition(HArena *arena,
|
||||
size_t x, const HCFChoice *A, size_t y)
|
||||
{
|
||||
HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition));
|
||||
t->from = x;
|
||||
t->symbol = A;
|
||||
t->to = y;
|
||||
return t;
|
||||
}
|
||||
|
||||
// no-op on terminal symbols
|
||||
static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
|
||||
size_t x, HCFChoice *xAy)
|
||||
{
|
||||
if(xAy->type != HCF_CHOICE)
|
||||
return;
|
||||
// XXX CHARSET?
|
||||
|
||||
HArena *arena = eg->arena;
|
||||
|
||||
HCFSequence **seq = h_arena_malloc(arena, seqsize(xAy->seq)
|
||||
* sizeof(HCFSequence *));
|
||||
HCFSequence **p, **q;
|
||||
for(p=xAy->seq, q=seq; *p; p++, q++) {
|
||||
// trace rhs starting in state x and following the transitions
|
||||
// xAy -> ... iBj ...
|
||||
|
||||
size_t i = x;
|
||||
HCFChoice **B = (*p)->items;
|
||||
HCFChoice **items = h_arena_malloc(arena, seqsize(B) * sizeof(HCFChoice *));
|
||||
HCFChoice **iBj = items;
|
||||
for(; *B; B++, iBj++) {
|
||||
size_t j = follow_transition(table, i, *B);
|
||||
HLRTransition *i_B_j = transition(arena, i, *B, j);
|
||||
*iBj = h_hashtable_get(eg->tmap, i_B_j);
|
||||
assert(*iBj != NULL);
|
||||
i = j;
|
||||
}
|
||||
*iBj = NULL;
|
||||
|
||||
*q = h_arena_malloc(arena, sizeof(HCFSequence));
|
||||
(*q)->items = items;
|
||||
}
|
||||
*q = NULL;
|
||||
xAy->seq = seq;
|
||||
}
|
||||
|
||||
static HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym)
|
||||
{
|
||||
HArena *arena = eg->arena;
|
||||
HCFChoice *esym = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||
*esym = *sym;
|
||||
|
||||
HHashSet *cs = h_hashtable_get(eg->corr, sym);
|
||||
if(!cs) {
|
||||
cs = h_hashset_new(arena, h_eq_symbol, h_hash_symbol);
|
||||
h_hashtable_put(eg->corr, sym, cs);
|
||||
}
|
||||
h_hashset_put(cs, esym);
|
||||
|
||||
return esym;
|
||||
}
|
||||
|
||||
static HLREnhGrammar *enhance_grammar(const HCFGrammar *g, const HLRDFA *dfa,
|
||||
const HLRTable *table)
|
||||
{
|
||||
HAllocator *mm__ = g->mm__;
|
||||
HArena *arena = g->arena;
|
||||
|
||||
HLREnhGrammar *eg = h_arena_malloc(arena, sizeof(HLREnhGrammar));
|
||||
eg->tmap = h_hashtable_new(arena, h_eq_transition, h_hash_transition);
|
||||
eg->smap = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr);
|
||||
eg->corr = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
|
||||
// XXX must use h_eq/hash_ptr for symbols! so enhanced CHARs are different
|
||||
eg->arena = arena;
|
||||
|
||||
// establish mapping between transitions and symbols
|
||||
for(HSlistNode *x=dfa->transitions->head; x; x=x->next) {
|
||||
HLRTransition *t = x->elem;
|
||||
|
||||
assert(!h_hashtable_present(eg->tmap, t));
|
||||
|
||||
HCFChoice *sym = new_enhanced_symbol(eg, t->symbol);
|
||||
h_hashtable_put(eg->tmap, t, sym);
|
||||
h_hashtable_put(eg->smap, sym, t);
|
||||
}
|
||||
|
||||
// transform the productions
|
||||
H_FOREACH(eg->tmap, HLRTransition *t, HCFChoice *sym)
|
||||
transform_productions(table, eg, t->from, sym);
|
||||
H_END_FOREACH
|
||||
|
||||
// add the start symbol
|
||||
HCFChoice *start = new_enhanced_symbol(eg, g->start);
|
||||
transform_productions(table, eg, 0, start);
|
||||
|
||||
eg->grammar = h_cfgrammar_(mm__, start);
|
||||
return eg;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* LALR table generation */
|
||||
|
||||
static inline bool has_conflicts(HLRTable *table)
|
||||
{
|
||||
return !h_slist_empty(table->inadeq);
|
||||
}
|
||||
|
||||
// for each lookahead symbol (fs), put action into tmap
|
||||
// returns 0 on success, -1 on conflict
|
||||
// ignores forall entries
|
||||
static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *action)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if(fs->epsilon_branch) {
|
||||
HLRAction *prev = tmap->epsilon_branch;
|
||||
if(prev && prev != action) {
|
||||
// conflict
|
||||
tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action);
|
||||
ret = -1;
|
||||
} else {
|
||||
tmap->epsilon_branch = action;
|
||||
}
|
||||
}
|
||||
|
||||
if(fs->end_branch) {
|
||||
HLRAction *prev = tmap->end_branch;
|
||||
if(prev && prev != action) {
|
||||
// conflict
|
||||
tmap->end_branch = h_lr_conflict(tmap->arena, prev, action);
|
||||
ret = -1;
|
||||
} else {
|
||||
tmap->end_branch = action;
|
||||
}
|
||||
}
|
||||
|
||||
H_FOREACH(fs->char_branches, void *key, HStringMap *fs_)
|
||||
HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key);
|
||||
|
||||
if(!tmap_) {
|
||||
tmap_ = h_stringmap_new(tmap->arena);
|
||||
h_hashtable_put(tmap->char_branches, key, tmap_);
|
||||
}
|
||||
|
||||
if(terminals_put(tmap_, fs_, action) < 0)
|
||||
ret = -1;
|
||||
H_END_FOREACH
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// check whether a sequence of enhanced-grammar symbols (p) matches the given
|
||||
// (original-grammar) production rhs and terminates in the given end state.
|
||||
static bool match_production(HLREnhGrammar *eg, HCFChoice **p,
|
||||
HCFChoice **rhs, size_t endstate)
|
||||
{
|
||||
size_t state = endstate; // initialized to end in case of empty rhs
|
||||
for(; *p && *rhs; p++, rhs++) {
|
||||
HLRTransition *t = h_hashtable_get(eg->smap, *p);
|
||||
assert(t != NULL);
|
||||
if(!h_eq_symbol(t->symbol, *rhs))
|
||||
return false;
|
||||
state = t->to;
|
||||
}
|
||||
return (*p == *rhs // both NULL
|
||||
&& state == endstate);
|
||||
}
|
||||
|
||||
// desugar parser with a fresh start symbol
|
||||
// this guarantees that the start symbol will not occur in any productions
|
||||
HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser)
|
||||
{
|
||||
HCFChoice *augmented = h_new(HCFChoice, 1);
|
||||
|
||||
HCFStack *stk__ = h_cfstack_new(mm__);
|
||||
stk__->prealloc = augmented;
|
||||
HCFS_BEGIN_CHOICE() {
|
||||
HCFS_BEGIN_SEQ() {
|
||||
HCFS_DESUGAR(parser);
|
||||
} HCFS_END_SEQ();
|
||||
HCFS_THIS_CHOICE->reshape = h_act_first;
|
||||
} HCFS_END_CHOICE();
|
||||
h_cfstack_free(mm__, stk__);
|
||||
|
||||
return augmented;
|
||||
}
|
||||
|
||||
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||
{
|
||||
// generate (augmented) CFG from parser
|
||||
// construct LR(0) DFA
|
||||
// build LR(0) table
|
||||
// if necessary, resolve conflicts "by conversion to SLR"
|
||||
|
||||
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser));
|
||||
if(g == NULL) // backend not suitable (language not context-free)
|
||||
return -1;
|
||||
|
||||
HLRDFA *dfa = h_lr0_dfa(g);
|
||||
if(dfa == NULL) { // this should normally not happen
|
||||
h_cfgrammar_free(g);
|
||||
return -1;
|
||||
}
|
||||
|
||||
HLRTable *table = h_lr0_table(g, dfa);
|
||||
if(table == NULL) { // this should normally not happen
|
||||
h_cfgrammar_free(g);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(has_conflicts(table)) {
|
||||
HArena *arena = table->arena;
|
||||
|
||||
HLREnhGrammar *eg = enhance_grammar(g, dfa, table);
|
||||
if(eg == NULL) { // this should normally not happen
|
||||
h_cfgrammar_free(g);
|
||||
h_lrtable_free(table);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// go through the inadequate states; replace inadeq with a new list
|
||||
HSlist *inadeq = table->inadeq;
|
||||
table->inadeq = h_slist_new(arena);
|
||||
|
||||
for(HSlistNode *x=inadeq->head; x; x=x->next) {
|
||||
size_t state = (uintptr_t)x->elem;
|
||||
bool inadeq = false;
|
||||
|
||||
// clear old forall entry, it's being replaced by more fine-grained ones
|
||||
table->forall[state] = NULL;
|
||||
|
||||
// go through each reducible item of state
|
||||
H_FOREACH_KEY(dfa->states[state], HLRItem *item)
|
||||
if(item->mark < item->len)
|
||||
continue;
|
||||
|
||||
// action to place in the table cells indicated by lookahead
|
||||
HLRAction *action = h_reduce_action(arena, item);
|
||||
|
||||
// find all LR(0)-enhanced productions matching item
|
||||
HHashSet *lhss = h_hashtable_get(eg->corr, item->lhs);
|
||||
assert(lhss != NULL);
|
||||
H_FOREACH_KEY(lhss, HCFChoice *lhs)
|
||||
assert(lhs->type == HCF_CHOICE); // XXX could be CHARSET?
|
||||
|
||||
for(HCFSequence **p=lhs->seq; *p; p++) {
|
||||
HCFChoice **rhs = (*p)->items;
|
||||
if(!match_production(eg, rhs, item->rhs, state))
|
||||
continue;
|
||||
|
||||
// the left-hand symbol's follow set is this production's
|
||||
// contribution to the lookahead
|
||||
const HStringMap *fs = h_follow(1, eg->grammar, lhs);
|
||||
assert(fs != NULL);
|
||||
assert(fs->epsilon_branch == NULL);
|
||||
assert(!h_stringmap_empty(fs));
|
||||
|
||||
// for each lookahead symbol, put action into table cell
|
||||
if(terminals_put(table->tmap[state], fs, action) < 0)
|
||||
inadeq = true;
|
||||
} H_END_FOREACH // enhanced production
|
||||
H_END_FOREACH // reducible item
|
||||
|
||||
if(inadeq)
|
||||
h_slist_push(table->inadeq, (void *)(uintptr_t)state);
|
||||
}
|
||||
}
|
||||
|
||||
h_cfgrammar_free(g);
|
||||
parser->backend_data = table;
|
||||
return has_conflicts(table)? -1 : 0;
|
||||
}
|
||||
|
||||
void h_lalr_free(HParser *parser)
|
||||
{
|
||||
HLRTable *table = parser->backend_data;
|
||||
h_lrtable_free(table);
|
||||
parser->backend_data = NULL;
|
||||
parser->backend = PB_PACKRAT;
|
||||
}
|
||||
|
||||
|
||||
|
||||
HParserBackendVTable h__lalr_backend_vtable = {
|
||||
.compile = h_lalr_compile,
|
||||
.parse = h_lr_parse,
|
||||
.free = h_lalr_free
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
// dummy!
|
||||
int test_lalr(void)
|
||||
{
|
||||
HAllocator *mm__ = &system_allocator;
|
||||
|
||||
/*
|
||||
E -> E '-' T
|
||||
| T
|
||||
T -> '(' E ')'
|
||||
| 'n' -- also try [0-9] for the charset paths
|
||||
*/
|
||||
|
||||
HParser *n = h_ch('n');
|
||||
HParser *E = h_indirect();
|
||||
HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL);
|
||||
HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL);
|
||||
h_bind_indirect(E, E_);
|
||||
HParser *p = E;
|
||||
|
||||
printf("\n==== G R A M M A R ====\n");
|
||||
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
|
||||
if(g == NULL) {
|
||||
fprintf(stderr, "h_cfgrammar failed\n");
|
||||
return 1;
|
||||
}
|
||||
h_pprint_grammar(stdout, g, 0);
|
||||
|
||||
printf("\n==== D F A ====\n");
|
||||
HLRDFA *dfa = h_lr0_dfa(g);
|
||||
if(dfa)
|
||||
h_pprint_lrdfa(stdout, g, dfa, 0);
|
||||
else
|
||||
fprintf(stderr, "h_lalr_dfa failed\n");
|
||||
|
||||
printf("\n==== L R ( 0 ) T A B L E ====\n");
|
||||
HLRTable *table0 = h_lr0_table(g, dfa);
|
||||
if(table0)
|
||||
h_pprint_lrtable(stdout, g, table0, 0);
|
||||
else
|
||||
fprintf(stderr, "h_lr0_table failed\n");
|
||||
h_lrtable_free(table0);
|
||||
|
||||
printf("\n==== L A L R T A B L E ====\n");
|
||||
if(h_compile(p, PB_LALR, NULL)) {
|
||||
fprintf(stderr, "does not compile\n");
|
||||
return 2;
|
||||
}
|
||||
h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0);
|
||||
|
||||
printf("\n==== P A R S E R E S U L T ====\n");
|
||||
HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
|
||||
if(res)
|
||||
h_pprint(stdout, res->ast, 0, 2);
|
||||
else
|
||||
printf("no parse\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -3,13 +3,13 @@
|
|||
#include "../cfgrammar.h"
|
||||
#include "../parsers/parser_internal.h"
|
||||
|
||||
// XXX despite the names, this is all LL(1) right now. TODO
|
||||
static const size_t DEFAULT_KMAX = 1;
|
||||
|
||||
|
||||
/* Generating the LL(k) parse table */
|
||||
|
||||
/* Maps each nonterminal (HCFChoice) of the grammar to another hash table that
|
||||
* maps lookahead tokens (HCFToken) to productions (HCFSequence).
|
||||
/* Maps each nonterminal (HCFChoice) of the grammar to a HStringMap that
|
||||
* maps lookahead strings to productions (HCFSequence).
|
||||
*/
|
||||
typedef struct HLLkTable_ {
|
||||
HHashTable *rows;
|
||||
|
|
@ -19,29 +19,17 @@ typedef struct HLLkTable_ {
|
|||
} HLLkTable;
|
||||
|
||||
|
||||
// XXX adaptation to LL(1), to be removed
|
||||
typedef HCharKey HCFToken;
|
||||
static const HCFToken end_token = 0x200;
|
||||
#define char_token char_key
|
||||
|
||||
/* Interface to look up an entry in the parse table. */
|
||||
const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x,
|
||||
HInputStream lookahead)
|
||||
const HInputStream *stream)
|
||||
{
|
||||
// note the lookahead stream is passed by value, i.e. a copy.
|
||||
// reading bits from it does not consume them from the real input.
|
||||
HCFToken tok;
|
||||
uint8_t c = h_read_bits(&lookahead, 8, false);
|
||||
if(lookahead.overrun)
|
||||
tok = end_token;
|
||||
else
|
||||
tok = char_token(c);
|
||||
|
||||
const HHashTable *row = h_hashtable_get(table->rows, x);
|
||||
const HStringMap *row = h_hashtable_get(table->rows, x);
|
||||
assert(row != NULL); // the table should have one row for each nonterminal
|
||||
|
||||
const HCFSequence *production = h_hashtable_get(row, (void *)tok);
|
||||
return production;
|
||||
assert(!row->epsilon_branch); // would match without looking at the input
|
||||
// XXX cases where this could be useful?
|
||||
|
||||
return h_stringmap_get_lookahead(row, *stream);
|
||||
}
|
||||
|
||||
/* Allocate a new parse table. */
|
||||
|
|
@ -72,58 +60,131 @@ void h_llktable_free(HLLkTable *table)
|
|||
h_free(table);
|
||||
}
|
||||
|
||||
/* Compute the predict set of production "A -> rhs". */
|
||||
HHashSet *h_predict(HCFGrammar *g, const HCFChoice *A, const HCFSequence *rhs)
|
||||
void *const CONFLICT = (void *)(uintptr_t)(-1);
|
||||
|
||||
// helper for stringmap_merge
|
||||
static void *combine_entries(HHashSet *workset, void *dst, const void *src)
|
||||
{
|
||||
// predict(A -> rhs) = first(rhs) u follow(A) if "" can be derived from rhs
|
||||
// predict(A -> rhs) = first(rhs) otherwise
|
||||
const HCFStringMap *first_rhs = h_first_seq(1, g, rhs->items);
|
||||
const HCFStringMap *follow_A = h_follow(1, g, A);
|
||||
HHashSet *ret = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||
assert(dst != NULL);
|
||||
assert(src != NULL);
|
||||
|
||||
h_hashset_put_all(ret, first_rhs->char_branches);
|
||||
if(first_rhs->end_branch)
|
||||
h_hashset_put(ret, (void *)end_token);
|
||||
|
||||
if(h_derives_epsilon_seq(g, rhs->items)) {
|
||||
h_hashset_put_all(ret, follow_A->char_branches);
|
||||
if(follow_A->end_branch)
|
||||
h_hashset_put(ret, (void *)end_token);
|
||||
if(dst == CONFLICT) { // previous conflict
|
||||
h_hashset_put(workset, src);
|
||||
} else if(dst != src) { // new conflict
|
||||
h_hashset_put(workset, dst);
|
||||
h_hashset_put(workset, src);
|
||||
dst = CONFLICT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return dst;
|
||||
}
|
||||
|
||||
/* Generate entries for the production "A -> rhs" in the given table row. */
|
||||
static
|
||||
int fill_table_row(HCFGrammar *g, HHashTable *row,
|
||||
const HCFChoice *A, HCFSequence *rhs)
|
||||
// add the mappings of src to dst, marking conflicts and adding the conflicting
|
||||
// values to workset.
|
||||
// note: reuses parts of src to build dst!
|
||||
static void stringmap_merge(HHashSet *workset, HStringMap *dst, HStringMap *src)
|
||||
{
|
||||
// iterate over predict(A -> rhs)
|
||||
HHashSet *pred = h_predict(g, A, rhs);
|
||||
|
||||
size_t i;
|
||||
HHashTableEntry *hte;
|
||||
for(i=0; i < pred->capacity; i++) {
|
||||
for(hte = &pred->contents[i]; hte; hte = hte->next) {
|
||||
if(hte->key == NULL)
|
||||
continue;
|
||||
HCFToken x = (uintptr_t)hte->key;
|
||||
|
||||
if(h_hashtable_present(row, (void *)x))
|
||||
return -1; // table would be ambiguous
|
||||
|
||||
h_hashtable_put(row, (void *)x, rhs);
|
||||
}
|
||||
if(src->epsilon_branch) {
|
||||
if(dst->epsilon_branch)
|
||||
dst->epsilon_branch =
|
||||
combine_entries(workset, dst->epsilon_branch, src->epsilon_branch);
|
||||
else
|
||||
dst->epsilon_branch = src->epsilon_branch;
|
||||
} else {
|
||||
// if there is a non-conflicting value on the left (dst) side, it means
|
||||
// that prediction is already unambiguous. we can drop the right (src)
|
||||
// side we were going to extend with.
|
||||
if(dst->epsilon_branch && dst->epsilon_branch != CONFLICT)
|
||||
return;
|
||||
}
|
||||
|
||||
return 0;
|
||||
if(src->end_branch) {
|
||||
if(dst->end_branch)
|
||||
dst->end_branch =
|
||||
combine_entries(workset, dst->end_branch, src->end_branch);
|
||||
else
|
||||
dst->end_branch = src->end_branch;
|
||||
}
|
||||
|
||||
// iterate over src->char_branches
|
||||
const HHashTable *ht = src->char_branches;
|
||||
for(size_t i=0; i < ht->capacity; i++) {
|
||||
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||
if(hte->key == NULL)
|
||||
continue;
|
||||
|
||||
HCharKey c = (HCharKey)hte->key;
|
||||
HStringMap *src_ = hte->value;
|
||||
|
||||
if(src_) {
|
||||
HStringMap *dst_ = h_hashtable_get(dst->char_branches, (void *)c);
|
||||
if(dst_)
|
||||
stringmap_merge(workset, dst_, src_);
|
||||
else
|
||||
h_hashtable_put(dst->char_branches, (void *)c, src_);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate entries for the productions of A in the given table row. */
|
||||
static int fill_table_row(size_t kmax, HCFGrammar *g, HStringMap *row,
|
||||
const HCFChoice *A)
|
||||
{
|
||||
HHashSet *workset;
|
||||
|
||||
// initialize working set to the productions of A
|
||||
workset = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||
for(HCFSequence **s = A->seq; *s; s++)
|
||||
h_hashset_put(workset, *s);
|
||||
|
||||
// run until workset exhausted or kmax hit
|
||||
size_t k;
|
||||
for(k=1; k<=kmax; k++) {
|
||||
// allocate a fresh workset for the next round
|
||||
HHashSet *nextset = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||
|
||||
// iterate over the productions in workset...
|
||||
const HHashTable *ht = workset;
|
||||
for(size_t i=0; i < ht->capacity; i++) {
|
||||
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||
if(hte->key == NULL)
|
||||
continue;
|
||||
|
||||
HCFSequence *rhs = (void *)hte->key;
|
||||
assert(rhs != NULL);
|
||||
assert(rhs != CONFLICT); // just to be sure there's no mixup
|
||||
|
||||
// calculate predict set; let values map to rhs
|
||||
HStringMap *pred = h_predict(k, g, A, rhs);
|
||||
h_stringmap_replace(pred, NULL, rhs);
|
||||
|
||||
// merge predict set into the row
|
||||
// accumulates conflicts in new workset
|
||||
stringmap_merge(nextset, row, pred);
|
||||
}
|
||||
}
|
||||
|
||||
// switch to the updated workset
|
||||
h_hashset_free(workset);
|
||||
workset = nextset;
|
||||
|
||||
// if the workset is empty, row is without conflict; we're done
|
||||
if(h_hashset_empty(workset))
|
||||
break;
|
||||
|
||||
// clear conflict markers for next iteration
|
||||
h_stringmap_replace(row, CONFLICT, NULL);
|
||||
}
|
||||
|
||||
h_hashset_free(workset);
|
||||
return (k>kmax)? -1 : 0;
|
||||
}
|
||||
|
||||
/* Generate the LL(k) parse table from the given grammar.
|
||||
* Returns -1 on error, 0 on success.
|
||||
*/
|
||||
static int fill_table(HCFGrammar *g, HLLkTable *table)
|
||||
static int fill_table(size_t kmax, HCFGrammar *g, HLLkTable *table)
|
||||
{
|
||||
table->start = g->start;
|
||||
|
||||
|
|
@ -138,18 +199,14 @@ static int fill_table(HCFGrammar *g, HLLkTable *table)
|
|||
assert(a->type == HCF_CHOICE);
|
||||
|
||||
// create table row for this nonterminal
|
||||
HHashTable *row = h_hashtable_new(table->arena, h_eq_ptr, h_hash_ptr);
|
||||
HStringMap *row = h_stringmap_new(table->arena);
|
||||
h_hashtable_put(table->rows, a, row);
|
||||
|
||||
// iterate over a's productions
|
||||
HCFSequence **s;
|
||||
for(s = a->seq; *s; s++) {
|
||||
// record this production in row as appropriate
|
||||
// this can signal an ambiguity conflict.
|
||||
if(fill_table_row(kmax, g, row, a) < 0) {
|
||||
// unresolvable conflicts in row
|
||||
// NB we don't worry about deallocating anything, h_llk_compile will
|
||||
// delete the whole arena for us.
|
||||
if(fill_table_row(g, row, a, *s) < 0)
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -159,6 +216,9 @@ static int fill_table(HCFGrammar *g, HLLkTable *table)
|
|||
|
||||
int h_llk_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||
{
|
||||
size_t kmax = params? (uintptr_t)params : DEFAULT_KMAX;
|
||||
assert(kmax>0);
|
||||
|
||||
// Convert parser to a CFG. This can fail as indicated by a NULL return.
|
||||
HCFGrammar *grammar = h_cfgrammar(mm__, parser);
|
||||
if(grammar == NULL)
|
||||
|
|
@ -170,7 +230,7 @@ int h_llk_compile(HAllocator* mm__, HParser* parser, const void* params)
|
|||
|
||||
// generate table and store in parser->backend_data.
|
||||
HLLkTable *table = h_llktable_new(mm__);
|
||||
if(fill_table(grammar, table) < 0) {
|
||||
if(fill_table(kmax, grammar, table) < 0) {
|
||||
// the table was ambiguous
|
||||
h_cfgrammar_free(grammar);
|
||||
h_llktable_free(table);
|
||||
|
|
@ -240,10 +300,13 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
|||
seq = h_carray_new(arena);
|
||||
|
||||
// look up applicable production in parse table
|
||||
const HCFSequence *p = h_llk_lookup(table, x, *stream);
|
||||
const HCFSequence *p = h_llk_lookup(table, x, stream);
|
||||
if(p == NULL)
|
||||
goto no_parse;
|
||||
|
||||
// an infinite loop case that shouldn't happen
|
||||
assert(!p->items[0] || p->items[0] != x);
|
||||
|
||||
// push production's rhs onto the stack (in reverse order)
|
||||
HCFChoice **s;
|
||||
for(s = p->items; *s; s++);
|
||||
|
|
@ -255,10 +318,12 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
|||
|
||||
// the top of stack is such that there will be a result...
|
||||
HParsedToken *tok; // will hold result token
|
||||
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||
tok->index = stream->index;
|
||||
tok->bit_offset = stream->bit_offset;
|
||||
if(x == mark) {
|
||||
// hit stack frame boundary...
|
||||
// wrap the accumulated parse result, this sequence is finished
|
||||
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||
tok->token_type = TT_SEQUENCE;
|
||||
tok->seq = seq;
|
||||
|
||||
|
|
@ -277,13 +342,13 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
|||
case HCF_END:
|
||||
if(!stream->overrun)
|
||||
goto no_parse;
|
||||
h_arena_free(arena, tok);
|
||||
tok = NULL;
|
||||
break;
|
||||
|
||||
case HCF_CHAR:
|
||||
if(input != x->chr)
|
||||
goto no_parse;
|
||||
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||
tok->token_type = TT_UINT;
|
||||
tok->uint = x->chr;
|
||||
break;
|
||||
|
|
@ -293,7 +358,6 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
|||
goto no_parse;
|
||||
if(!charset_isset(x->charset, input))
|
||||
goto no_parse;
|
||||
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||
tok->token_type = TT_UINT;
|
||||
tok->uint = input;
|
||||
break;
|
||||
|
|
@ -306,8 +370,6 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
|||
|
||||
// 'tok' has been parsed; process it
|
||||
|
||||
// XXX set tok->index and tok->bit_offset (don't take directly from stream, cuz peek!)
|
||||
|
||||
// perform token reshape if indicated
|
||||
if(x->reshape)
|
||||
tok = (HParsedToken *)x->reshape(make_result(arena, tok));
|
||||
|
|
@ -328,10 +390,10 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
|||
h_delete_arena(tarena);
|
||||
return make_result(arena, seq->elements[0]);
|
||||
|
||||
no_parse:
|
||||
h_delete_arena(tarena);
|
||||
h_delete_arena(arena);
|
||||
return NULL;
|
||||
no_parse:
|
||||
h_delete_arena(tarena);
|
||||
h_delete_arena(arena);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -357,9 +419,11 @@ int test_llk(void)
|
|||
Y -> y -- for k=3 use "yy"
|
||||
*/
|
||||
|
||||
HParser *c = h_many(h_ch('x'));
|
||||
HParser *q = h_sequence(c, h_ch('y'), NULL);
|
||||
HParser *p = h_choice(q, h_end_p(), NULL);
|
||||
HParser *X = h_optional(h_ch('x'));
|
||||
HParser *Y = h_sequence(h_ch('y'), h_ch('y'), NULL);
|
||||
HParser *A = h_sequence(X, Y, h_ch('a'), NULL);
|
||||
HParser *B = h_sequence(Y, h_ch('b'), NULL);
|
||||
HParser *p = h_choice(A, B, NULL);
|
||||
|
||||
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
|
||||
|
||||
|
|
@ -372,13 +436,16 @@ int test_llk(void)
|
|||
printf("derive epsilon: ");
|
||||
h_pprint_symbolset(stdout, g, g->geneps, 0);
|
||||
printf("first(A) = ");
|
||||
h_pprint_stringset(stdout, g, h_first(2, g, g->start), 0);
|
||||
printf("follow(C) = ");
|
||||
h_pprint_stringset(stdout, g, h_follow(2, g, h_desugar(&system_allocator, NULL, c)), 0);
|
||||
h_pprint_stringset(stdout, h_first(3, g, g->start), 0);
|
||||
// printf("follow(C) = ");
|
||||
// h_pprint_stringset(stdout, h_follow(3, g, h_desugar(&system_allocator, NULL, c)), 0);
|
||||
|
||||
h_compile(p, PB_LLk, NULL);
|
||||
if(h_compile(p, PB_LLk, (void *)3)) {
|
||||
fprintf(stderr, "does not compile\n");
|
||||
return 2;
|
||||
}
|
||||
|
||||
HParseResult *res = h_parse(p, (uint8_t *)"xxy", 3);
|
||||
HParseResult *res = h_parse(p, (uint8_t *)"xyya", 4);
|
||||
if(res)
|
||||
h_pprint(stdout, res->ast, 0, 2);
|
||||
else
|
||||
|
|
|
|||
538
src/backends/lr.c
Normal file
538
src/backends/lr.c
Normal file
|
|
@ -0,0 +1,538 @@
|
|||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include "../parsers/parser_internal.h"
|
||||
#include "lr.h"
|
||||
|
||||
|
||||
|
||||
/* Comparison and hashing functions */
|
||||
|
||||
// compare symbols - terminals by value, others by pointer
|
||||
bool h_eq_symbol(const void *p, const void *q)
|
||||
{
|
||||
const HCFChoice *x=p, *y=q;
|
||||
return (x==y
|
||||
|| (x->type==HCF_END && y->type==HCF_END)
|
||||
|| (x->type==HCF_CHAR && y->type==HCF_CHAR && x->chr==y->chr));
|
||||
}
|
||||
|
||||
// hash symbols - terminals by value, others by pointer
|
||||
HHashValue h_hash_symbol(const void *p)
|
||||
{
|
||||
const HCFChoice *x=p;
|
||||
if(x->type == HCF_END)
|
||||
return 0;
|
||||
else if(x->type == HCF_CHAR)
|
||||
return x->chr * 33;
|
||||
else
|
||||
return h_hash_ptr(p);
|
||||
}
|
||||
|
||||
// compare LR items by value
|
||||
static bool eq_lr_item(const void *p, const void *q)
|
||||
{
|
||||
const HLRItem *a=p, *b=q;
|
||||
|
||||
if(!h_eq_symbol(a->lhs, b->lhs)) return false;
|
||||
if(a->mark != b->mark) return false;
|
||||
if(a->len != b->len) return false;
|
||||
|
||||
for(size_t i=0; i<a->len; i++)
|
||||
if(!h_eq_symbol(a->rhs[i], b->rhs[i])) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// hash LALR items
|
||||
static inline HHashValue hash_lr_item(const void *p)
|
||||
{
|
||||
const HLRItem *x = p;
|
||||
HHashValue hash = 0;
|
||||
|
||||
hash += h_hash_symbol(x->lhs);
|
||||
for(HCFChoice **p=x->rhs; *p; p++)
|
||||
hash += h_hash_symbol(*p);
|
||||
hash += x->mark;
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
// compare item sets (DFA states)
|
||||
bool h_eq_lr_itemset(const void *p, const void *q)
|
||||
{
|
||||
return h_hashset_equal(p, q);
|
||||
}
|
||||
|
||||
// hash LR item sets (DFA states) - hash the elements and sum
|
||||
HHashValue h_hash_lr_itemset(const void *p)
|
||||
{
|
||||
HHashValue hash = 0;
|
||||
|
||||
H_FOREACH_KEY((const HHashSet *)p, HLRItem *item)
|
||||
hash += hash_lr_item(item);
|
||||
H_END_FOREACH
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool h_eq_transition(const void *p, const void *q)
|
||||
{
|
||||
const HLRTransition *a=p, *b=q;
|
||||
return (a->from == b->from && a->to == b->to && h_eq_symbol(a->symbol, b->symbol));
|
||||
}
|
||||
|
||||
HHashValue h_hash_transition(const void *p)
|
||||
{
|
||||
const HLRTransition *t = p;
|
||||
return (h_hash_symbol(t->symbol) + t->from + t->to); // XXX ?
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Constructors */
|
||||
|
||||
HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark)
|
||||
{
|
||||
HLRItem *ret = h_arena_malloc(a, sizeof(HLRItem));
|
||||
|
||||
size_t len = 0;
|
||||
for(HCFChoice **p=rhs; *p; p++) len++;
|
||||
assert(mark <= len);
|
||||
|
||||
ret->lhs = lhs;
|
||||
ret->rhs = rhs;
|
||||
ret->len = len;
|
||||
ret->mark = mark;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
HLRState *h_lrstate_new(HArena *arena)
|
||||
{
|
||||
return h_hashset_new(arena, eq_lr_item, hash_lr_item);
|
||||
}
|
||||
|
||||
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
|
||||
{
|
||||
HArena *arena = h_new_arena(mm__, 0); // default blocksize
|
||||
assert(arena != NULL);
|
||||
|
||||
HLRTable *ret = h_new(HLRTable, 1);
|
||||
ret->nrows = nrows;
|
||||
ret->ntmap = h_arena_malloc(arena, nrows * sizeof(HHashTable *));
|
||||
ret->tmap = h_arena_malloc(arena, nrows * sizeof(HStringMap *));
|
||||
ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *));
|
||||
ret->inadeq = h_slist_new(arena);
|
||||
ret->arena = arena;
|
||||
ret->mm__ = mm__;
|
||||
|
||||
for(size_t i=0; i<nrows; i++) {
|
||||
ret->ntmap[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
|
||||
ret->tmap[i] = h_stringmap_new(arena);
|
||||
ret->forall[i] = NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void h_lrtable_free(HLRTable *table)
|
||||
{
|
||||
HAllocator *mm__ = table->mm__;
|
||||
h_delete_arena(table->arena);
|
||||
h_free(table);
|
||||
}
|
||||
|
||||
HLRAction *h_shift_action(HArena *arena, size_t nextstate)
|
||||
{
|
||||
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
|
||||
action->type = HLR_SHIFT;
|
||||
action->nextstate = nextstate;
|
||||
return action;
|
||||
}
|
||||
|
||||
HLRAction *h_reduce_action(HArena *arena, const HLRItem *item)
|
||||
{
|
||||
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
|
||||
action->type = HLR_REDUCE;
|
||||
action->production.lhs = item->lhs;
|
||||
action->production.length = item->len;
|
||||
#ifndef NDEBUG
|
||||
action->production.rhs = item->rhs;
|
||||
#endif
|
||||
return action;
|
||||
}
|
||||
|
||||
// adds 'new' to the branches of 'action'
|
||||
// returns a 'action' if it is already of type HLR_CONFLICT
|
||||
// allocates a new HLRAction otherwise
|
||||
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new)
|
||||
{
|
||||
if(action->type != HLR_CONFLICT) {
|
||||
HLRAction *old = action;
|
||||
action = h_arena_malloc(arena, sizeof(HLRAction));
|
||||
action->type = HLR_CONFLICT;
|
||||
action->branches = h_slist_new(arena);
|
||||
h_slist_push(action->branches, old);
|
||||
h_slist_push(action->branches, new);
|
||||
} else {
|
||||
// check if 'new' is already among branches
|
||||
HSlistNode *x;
|
||||
for(x=action->branches->head; x; x=x->next) {
|
||||
if(x->elem == new)
|
||||
break;
|
||||
}
|
||||
// add 'new' if it is not already in list
|
||||
if(x == NULL)
|
||||
h_slist_push(action->branches, new);
|
||||
}
|
||||
|
||||
return action;
|
||||
}
|
||||
|
||||
bool h_lrtable_row_empty(const HLRTable *table, size_t i)
|
||||
{
|
||||
return (h_hashtable_empty(table->ntmap[i])
|
||||
&& h_stringmap_empty(table->tmap[i]));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* LR driver */
|
||||
|
||||
HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
|
||||
const HInputStream *stream)
|
||||
{
|
||||
HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine));
|
||||
|
||||
engine->table = table;
|
||||
engine->state = 0;
|
||||
engine->stack = h_slist_new(tarena);
|
||||
engine->input = *stream;
|
||||
engine->merged[0] = NULL;
|
||||
engine->merged[1] = NULL;
|
||||
engine->arena = arena;
|
||||
engine->tarena = tarena;
|
||||
|
||||
return engine;
|
||||
}
|
||||
|
||||
static const HLRAction *
|
||||
terminal_lookup(const HLREngine *engine, const HInputStream *stream)
|
||||
{
|
||||
const HLRTable *table = engine->table;
|
||||
size_t state = engine->state;
|
||||
|
||||
assert(state < table->nrows);
|
||||
if(table->forall[state]) {
|
||||
assert(h_lrtable_row_empty(table, state)); // that would be a conflict
|
||||
return table->forall[state];
|
||||
} else {
|
||||
return h_stringmap_get_lookahead(table->tmap[state], *stream);
|
||||
}
|
||||
}
|
||||
|
||||
static const HLRAction *
|
||||
nonterminal_lookup(const HLREngine *engine, const HCFChoice *symbol)
|
||||
{
|
||||
const HLRTable *table = engine->table;
|
||||
size_t state = engine->state;
|
||||
|
||||
assert(state < table->nrows);
|
||||
assert(!table->forall[state]); // contains only reduce entries
|
||||
// we are only looking for shifts
|
||||
return h_hashtable_get(table->ntmap[state], symbol);
|
||||
}
|
||||
|
||||
const HLRAction *h_lrengine_action(const HLREngine *engine)
|
||||
{
|
||||
return terminal_lookup(engine, &engine->input);
|
||||
}
|
||||
|
||||
static HParsedToken *consume_input(HLREngine *engine)
|
||||
{
|
||||
HParsedToken *v;
|
||||
|
||||
uint8_t c = h_read_bits(&engine->input, 8, false);
|
||||
|
||||
if(engine->input.overrun) { // end of input
|
||||
v = NULL;
|
||||
} else {
|
||||
v = h_arena_malloc(engine->arena, sizeof(HParsedToken));
|
||||
v->token_type = TT_UINT;
|
||||
v->uint = c;
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
// run LR parser for one round; returns false when finished
|
||||
bool h_lrengine_step(HLREngine *engine, const HLRAction *action)
|
||||
{
|
||||
// short-hand names
|
||||
HSlist *stack = engine->stack;
|
||||
HArena *arena = engine->arena;
|
||||
HArena *tarena = engine->tarena;
|
||||
|
||||
if(action == NULL)
|
||||
return false; // no handle recognizable in input, terminate
|
||||
|
||||
assert(action->type == HLR_SHIFT || action->type == HLR_REDUCE);
|
||||
|
||||
if(action->type == HLR_REDUCE) {
|
||||
size_t len = action->production.length;
|
||||
HCFChoice *symbol = action->production.lhs;
|
||||
|
||||
// semantic value of the reduction result
|
||||
HParsedToken *value = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||
value->token_type = TT_SEQUENCE;
|
||||
value->seq = h_carray_new_sized(arena, len);
|
||||
|
||||
// pull values off the stack, rewinding state accordingly
|
||||
HParsedToken *v = NULL;
|
||||
for(size_t i=0; i<len; i++) {
|
||||
v = h_slist_drop(stack);
|
||||
engine->state = (uintptr_t)h_slist_drop(stack);
|
||||
|
||||
// collect values in result sequence
|
||||
value->seq->elements[len-1-i] = v;
|
||||
value->seq->used++;
|
||||
}
|
||||
if(v) {
|
||||
// result position equals position of left-most symbol
|
||||
value->index = v->index;
|
||||
value->bit_offset = v->bit_offset;
|
||||
} else {
|
||||
// XXX how to get the position in this case?
|
||||
}
|
||||
|
||||
// perform token reshape if indicated
|
||||
if(symbol->reshape)
|
||||
value = (HParsedToken *)symbol->reshape(make_result(arena, value));
|
||||
|
||||
// call validation and semantic action, if present
|
||||
if(symbol->pred && !symbol->pred(make_result(tarena, value)))
|
||||
return false; // validation failed -> no parse; terminate
|
||||
if(symbol->action)
|
||||
value = (HParsedToken *)symbol->action(make_result(arena, value));
|
||||
|
||||
// this is LR, building a right-most derivation bottom-up, so no reduce can
|
||||
// follow a reduce. we can also assume no conflict follows for GLR if we
|
||||
// use LALR tables, because only terminal symbols (lookahead) get reduces.
|
||||
const HLRAction *shift = nonterminal_lookup(engine, symbol);
|
||||
if(shift == NULL)
|
||||
return false; // parse error
|
||||
assert(shift->type == HLR_SHIFT);
|
||||
|
||||
// piggy-back the shift right here, never touching the input
|
||||
h_slist_push(stack, (void *)(uintptr_t)engine->state);
|
||||
h_slist_push(stack, value);
|
||||
engine->state = shift->nextstate;
|
||||
|
||||
// check for success
|
||||
if(engine->state == HLR_SUCCESS) {
|
||||
assert(symbol == engine->table->start);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
assert(action->type == HLR_SHIFT);
|
||||
HParsedToken *value = consume_input(engine);
|
||||
h_slist_push(stack, (void *)(uintptr_t)engine->state);
|
||||
h_slist_push(stack, value);
|
||||
engine->state = action->nextstate;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
HParseResult *h_lrengine_result(HLREngine *engine)
|
||||
{
|
||||
// parsing was successful iff the engine reaches the end state
|
||||
if(engine->state == HLR_SUCCESS) {
|
||||
// on top of the stack is the start symbol's semantic value
|
||||
assert(!h_slist_empty(engine->stack));
|
||||
HParsedToken *tok = engine->stack->head->elem;
|
||||
return make_result(engine->arena, tok);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
|
||||
{
|
||||
HLRTable *table = parser->backend_data;
|
||||
if(!table)
|
||||
return NULL;
|
||||
|
||||
HArena *arena = h_new_arena(mm__, 0); // will hold the results
|
||||
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
|
||||
HLREngine *engine = h_lrengine_new(arena, tarena, table, stream);
|
||||
|
||||
// iterate engine to completion
|
||||
while(h_lrengine_step(engine, h_lrengine_action(engine)));
|
||||
|
||||
HParseResult *result = h_lrengine_result(engine);
|
||||
if(!result)
|
||||
h_delete_arena(arena);
|
||||
h_delete_arena(tarena);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Pretty-printers */
|
||||
|
||||
void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item)
|
||||
{
|
||||
h_pprint_symbol(f, g, item->lhs);
|
||||
fputs(" ->", f);
|
||||
|
||||
HCFChoice **x = item->rhs;
|
||||
HCFChoice **mark = item->rhs + item->mark;
|
||||
if(*x == NULL) {
|
||||
fputc('.', f);
|
||||
} else {
|
||||
while(*x) {
|
||||
if(x == mark)
|
||||
fputc('.', f);
|
||||
else
|
||||
fputc(' ', f);
|
||||
|
||||
if((*x)->type == HCF_CHAR) {
|
||||
// condense character strings
|
||||
fputc('"', f);
|
||||
h_pprint_char(f, (*x)->chr);
|
||||
for(x++; *x; x++) {
|
||||
if(x == mark)
|
||||
break;
|
||||
if((*x)->type != HCF_CHAR)
|
||||
break;
|
||||
h_pprint_char(f, (*x)->chr);
|
||||
}
|
||||
fputc('"', f);
|
||||
} else {
|
||||
h_pprint_symbol(f, g, *x);
|
||||
x++;
|
||||
}
|
||||
}
|
||||
if(x == mark)
|
||||
fputs(".", f);
|
||||
}
|
||||
}
|
||||
|
||||
void h_pprint_lrstate(FILE *f, const HCFGrammar *g,
|
||||
const HLRState *state, unsigned int indent)
|
||||
{
|
||||
bool first = true;
|
||||
H_FOREACH_KEY(state, HLRItem *item)
|
||||
if(!first)
|
||||
for(unsigned int i=0; i<indent; i++) fputc(' ', f);
|
||||
first = false;
|
||||
h_pprint_lritem(f, g, item);
|
||||
fputc('\n', f);
|
||||
H_END_FOREACH
|
||||
}
|
||||
|
||||
static void pprint_transition(FILE *f, const HCFGrammar *g, const HLRTransition *t)
|
||||
{
|
||||
fputs("-", f);
|
||||
h_pprint_symbol(f, g, t->symbol);
|
||||
fprintf(f, "->%lu", t->to);
|
||||
}
|
||||
|
||||
void h_pprint_lrdfa(FILE *f, const HCFGrammar *g,
|
||||
const HLRDFA *dfa, unsigned int indent)
|
||||
{
|
||||
for(size_t i=0; i<dfa->nstates; i++) {
|
||||
unsigned int indent2 = indent + fprintf(f, "%4lu: ", i);
|
||||
h_pprint_lrstate(f, g, dfa->states[i], indent2);
|
||||
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
|
||||
const HLRTransition *t = x->elem;
|
||||
if(t->from == i) {
|
||||
for(unsigned int i=0; i<indent2-2; i++) fputc(' ', f);
|
||||
pprint_transition(f, g, t);
|
||||
fputc('\n', f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action)
|
||||
{
|
||||
switch(action->type) {
|
||||
case HLR_SHIFT:
|
||||
if(action->nextstate == HLR_SUCCESS)
|
||||
fputs("s~", f);
|
||||
else
|
||||
fprintf(f, "s%lu", action->nextstate);
|
||||
break;
|
||||
case HLR_REDUCE:
|
||||
fputs("r(", f);
|
||||
h_pprint_symbol(f, g, action->production.lhs);
|
||||
fputs(" -> ", f);
|
||||
#ifdef NDEBUG
|
||||
// if we can't print the production, at least print its length
|
||||
fprintf(f, "[%lu]", action->production.length);
|
||||
#else
|
||||
HCFSequence seq = {action->production.rhs};
|
||||
h_pprint_sequence(f, g, &seq);
|
||||
#endif
|
||||
fputc(')', f);
|
||||
break;
|
||||
case HLR_CONFLICT:
|
||||
fputc('!', f);
|
||||
for(HSlistNode *x=action->branches->head; x; x=x->next) {
|
||||
HLRAction *branch = x->elem;
|
||||
assert(branch->type != HLR_CONFLICT); // no nesting
|
||||
pprint_lraction(f, g, branch);
|
||||
if(x->next) fputc('/', f); // separator
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert_message(0, "not reached");
|
||||
}
|
||||
}
|
||||
|
||||
static void valprint_lraction(FILE *file, void *env, void *val)
|
||||
{
|
||||
const HLRAction *action = val;
|
||||
const HCFGrammar *grammar = env;
|
||||
pprint_lraction(file, grammar, action);
|
||||
}
|
||||
|
||||
static void pprint_lrtable_terminals(FILE *file, const HCFGrammar *g,
|
||||
const HStringMap *map)
|
||||
{
|
||||
h_pprint_stringmap(file, ' ', valprint_lraction, (void *)g, map);
|
||||
}
|
||||
|
||||
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
|
||||
unsigned int indent)
|
||||
{
|
||||
for(size_t i=0; i<table->nrows; i++) {
|
||||
for(unsigned int j=0; j<indent; j++) fputc(' ', f);
|
||||
fprintf(f, "%4lu:", i);
|
||||
if(table->forall[i]) {
|
||||
fputc(' ', f);
|
||||
pprint_lraction(f, g, table->forall[i]);
|
||||
if(!h_lrtable_row_empty(table, i))
|
||||
fputs(" !!", f);
|
||||
}
|
||||
H_FOREACH(table->ntmap[i], HCFChoice *symbol, HLRAction *action)
|
||||
fputc(' ', f); // separator
|
||||
h_pprint_symbol(f, g, symbol);
|
||||
fputc(':', f);
|
||||
pprint_lraction(f, g, action);
|
||||
H_END_FOREACH
|
||||
fputc(' ', f); // separator
|
||||
pprint_lrtable_terminals(f, g, table->tmap[i]);
|
||||
fputc('\n', f);
|
||||
}
|
||||
|
||||
#if 0
|
||||
fputs("inadeq=", f);
|
||||
for(HSlistNode *x=table->inadeq->head; x; x=x->next) {
|
||||
fprintf(f, "%lu ", (uintptr_t)x->elem);
|
||||
}
|
||||
fputc('\n', f);
|
||||
#endif
|
||||
}
|
||||
147
src/backends/lr.h
Normal file
147
src/backends/lr.h
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
#ifndef HAMMER_BACKENDS_LR__H
|
||||
#define HAMMER_BACKENDS_LR__H
|
||||
|
||||
#include "../hammer.h"
|
||||
#include "../cfgrammar.h"
|
||||
#include "../internal.h"
|
||||
|
||||
|
||||
typedef HHashSet HLRState; // states are sets of LRItems
|
||||
|
||||
typedef struct HLRDFA_ {
|
||||
size_t nstates;
|
||||
const HLRState **states; // array of size nstates
|
||||
HSlist *transitions;
|
||||
} HLRDFA;
|
||||
|
||||
typedef struct HLRTransition_ {
|
||||
size_t from; // index into 'states' array
|
||||
const HCFChoice *symbol;
|
||||
size_t to; // index into 'states' array
|
||||
} HLRTransition;
|
||||
|
||||
typedef struct HLRItem_ {
|
||||
HCFChoice *lhs;
|
||||
HCFChoice **rhs; // NULL-terminated
|
||||
size_t len; // number of elements in rhs
|
||||
size_t mark;
|
||||
} HLRItem;
|
||||
|
||||
typedef struct HLRAction_ {
|
||||
enum {HLR_SHIFT, HLR_REDUCE, HLR_CONFLICT} type;
|
||||
union {
|
||||
// used with HLR_SHIFT
|
||||
size_t nextstate;
|
||||
|
||||
// used with HLR_REDUCE
|
||||
struct {
|
||||
HCFChoice *lhs; // symbol carrying semantic actions etc.
|
||||
size_t length; // # of symbols in rhs
|
||||
#ifndef NDEBUG
|
||||
HCFChoice **rhs; // NB: the rhs symbols are not needed for the parse
|
||||
#endif
|
||||
} production;
|
||||
|
||||
// used with HLR_CONFLICT
|
||||
HSlist *branches; // list of possible HLRActions
|
||||
};
|
||||
} HLRAction;
|
||||
|
||||
typedef struct HLRTable_ {
|
||||
size_t nrows; // dimension of the pointer arrays below
|
||||
HHashTable **ntmap; // map nonterminal symbols to HLRActions, per row
|
||||
HStringMap **tmap; // map lookahead strings to HLRActions, per row
|
||||
HLRAction **forall; // shortcut to set an action for an entire row
|
||||
HCFChoice *start; // start symbol
|
||||
HSlist *inadeq; // indices of any inadequate states
|
||||
HArena *arena;
|
||||
HAllocator *mm__;
|
||||
} HLRTable;
|
||||
|
||||
typedef struct HLREnhGrammar_ {
|
||||
HCFGrammar *grammar; // enhanced grammar
|
||||
HHashTable *tmap; // maps transitions to enhanced-grammar symbols
|
||||
HHashTable *smap; // maps enhanced-grammar symbols to transitions
|
||||
HHashTable *corr; // maps symbols to sets of corresponding e. symbols
|
||||
HArena *arena;
|
||||
} HLREnhGrammar;
|
||||
|
||||
typedef struct HLREngine_ {
|
||||
const HLRTable *table;
|
||||
size_t state;
|
||||
|
||||
HSlist *stack; // holds pairs: (saved state, semantic value)
|
||||
HInputStream input;
|
||||
|
||||
struct HLREngine_ *merged[2]; // ancestors merged into this engine
|
||||
|
||||
HArena *arena; // will hold the results
|
||||
HArena *tarena; // tmp, deleted after parse
|
||||
} HLREngine;
|
||||
|
||||
#define HLR_SUCCESS ((size_t)~0) // parser end state
|
||||
|
||||
|
||||
// XXX move to internal.h or something
|
||||
// XXX replace other hashtable iterations with this
|
||||
#define H_FOREACH_(HT) { \
|
||||
const HHashTable *ht__ = HT; \
|
||||
for(size_t i__=0; i__ < ht__->capacity; i__++) { \
|
||||
for(HHashTableEntry *hte__ = &ht__->contents[i__]; \
|
||||
hte__; \
|
||||
hte__ = hte__->next) { \
|
||||
if(hte__->key == NULL) continue;
|
||||
|
||||
#define H_FOREACH_KEY(HT, KEYVAR) H_FOREACH_(HT) \
|
||||
const KEYVAR = hte__->key;
|
||||
|
||||
#define H_FOREACH(HT, KEYVAR, VALVAR) H_FOREACH_KEY(HT, KEYVAR) \
|
||||
VALVAR = hte__->value;
|
||||
|
||||
#define H_END_FOREACH \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
|
||||
HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark);
|
||||
HLRState *h_lrstate_new(HArena *arena);
|
||||
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows);
|
||||
void h_lrtable_free(HLRTable *table);
|
||||
HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
|
||||
const HInputStream *stream);
|
||||
HLRAction *h_reduce_action(HArena *arena, const HLRItem *item);
|
||||
HLRAction *h_shift_action(HArena *arena, size_t nextstate);
|
||||
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new);
|
||||
bool h_lrtable_row_empty(const HLRTable *table, size_t i);
|
||||
|
||||
bool h_eq_symbol(const void *p, const void *q);
|
||||
bool h_eq_lr_itemset(const void *p, const void *q);
|
||||
bool h_eq_transition(const void *p, const void *q);
|
||||
HHashValue h_hash_symbol(const void *p);
|
||||
HHashValue h_hash_lr_itemset(const void *p);
|
||||
HHashValue h_hash_transition(const void *p);
|
||||
|
||||
HLRDFA *h_lr0_dfa(HCFGrammar *g);
|
||||
HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa);
|
||||
|
||||
HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser);
|
||||
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params);
|
||||
void h_lalr_free(HParser *parser);
|
||||
|
||||
const HLRAction *h_lrengine_action(const HLREngine *engine);
|
||||
bool h_lrengine_step(HLREngine *engine, const HLRAction *action);
|
||||
HParseResult *h_lrengine_result(HLREngine *engine);
|
||||
HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
|
||||
HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
|
||||
|
||||
void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item);
|
||||
void h_pprint_lrstate(FILE *f, const HCFGrammar *g,
|
||||
const HLRState *state, unsigned int indent);
|
||||
void h_pprint_lrdfa(FILE *f, const HCFGrammar *g,
|
||||
const HLRDFA *dfa, unsigned int indent);
|
||||
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
|
||||
unsigned int indent);
|
||||
|
||||
#endif
|
||||
233
src/backends/lr0.c
Normal file
233
src/backends/lr0.c
Normal file
|
|
@ -0,0 +1,233 @@
|
|||
#include <assert.h>
|
||||
#include "lr.h"
|
||||
|
||||
|
||||
|
||||
/* Constructing the characteristic automaton (handle recognizer) */
|
||||
|
||||
static HLRItem *advance_mark(HArena *arena, const HLRItem *item)
|
||||
{
|
||||
assert(item->rhs[item->mark] != NULL);
|
||||
HLRItem *ret = h_arena_malloc(arena, sizeof(HLRItem));
|
||||
*ret = *item;
|
||||
ret->mark++;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void expand_to_closure(HCFGrammar *g, HHashSet *items)
|
||||
{
|
||||
HAllocator *mm__ = g->mm__;
|
||||
HArena *arena = g->arena;
|
||||
HSlist *work = h_slist_new(arena);
|
||||
|
||||
// initialize work list with items
|
||||
H_FOREACH_KEY(items, HLRItem *item)
|
||||
h_slist_push(work, (void *)item);
|
||||
H_END_FOREACH
|
||||
|
||||
while(!h_slist_empty(work)) {
|
||||
const HLRItem *item = h_slist_pop(work);
|
||||
HCFChoice *sym = item->rhs[item->mark]; // symbol after mark
|
||||
|
||||
// if there is a non-terminal after the mark, follow it
|
||||
// NB: unlike LLk, we do consider HCF_CHARSET a non-terminal here
|
||||
if(sym != NULL && (sym->type==HCF_CHOICE || sym->type==HCF_CHARSET)) {
|
||||
// add items corresponding to the productions of sym
|
||||
if(sym->type == HCF_CHOICE) {
|
||||
for(HCFSequence **p=sym->seq; *p; p++) {
|
||||
HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0);
|
||||
if(!h_hashset_present(items, it)) {
|
||||
h_hashset_put(items, it);
|
||||
h_slist_push(work, it);
|
||||
}
|
||||
}
|
||||
} else { // HCF_CHARSET
|
||||
for(unsigned int i=0; i<256; i++) {
|
||||
if(charset_isset(sym->charset, i)) {
|
||||
// XXX allocate these single-character symbols statically somewhere
|
||||
HCFChoice **rhs = h_new(HCFChoice *, 2);
|
||||
rhs[0] = h_new(HCFChoice, 1);
|
||||
rhs[0]->type = HCF_CHAR;
|
||||
rhs[0]->chr = i;
|
||||
rhs[1] = NULL;
|
||||
HLRItem *it = h_lritem_new(arena, sym, rhs, 0);
|
||||
h_hashset_put(items, it);
|
||||
// single-character item needs no further work
|
||||
}
|
||||
}
|
||||
// if sym is a non-terminal, we need a reshape on it
|
||||
// this seems as good a place as any to set it
|
||||
sym->reshape = h_act_first;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HLRDFA *h_lr0_dfa(HCFGrammar *g)
|
||||
{
|
||||
HArena *arena = g->arena;
|
||||
|
||||
HHashSet *states = h_hashset_new(arena, h_eq_lr_itemset, h_hash_lr_itemset);
|
||||
// maps itemsets to assigned array indices
|
||||
HSlist *transitions = h_slist_new(arena);
|
||||
|
||||
// list of states that need to be processed
|
||||
// to save lookups, we push two elements per state, the itemset and its
|
||||
// assigned index.
|
||||
HSlist *work = h_slist_new(arena);
|
||||
|
||||
// make initial state (kernel)
|
||||
HLRState *start = h_lrstate_new(arena);
|
||||
assert(g->start->type == HCF_CHOICE);
|
||||
for(HCFSequence **p=g->start->seq; *p; p++)
|
||||
h_hashset_put(start, h_lritem_new(arena, g->start, (*p)->items, 0));
|
||||
expand_to_closure(g, start);
|
||||
h_hashtable_put(states, start, 0);
|
||||
h_slist_push(work, start);
|
||||
h_slist_push(work, 0);
|
||||
|
||||
// while work to do (on some state)
|
||||
// determine edge symbols
|
||||
// for each edge symbol:
|
||||
// advance respective items -> destination state (kernel)
|
||||
// compute closure
|
||||
// if destination is a new state:
|
||||
// add it to state set
|
||||
// add transition to it
|
||||
// add it to the work list
|
||||
|
||||
while(!h_slist_empty(work)) {
|
||||
size_t state_idx = (uintptr_t)h_slist_pop(work);
|
||||
HLRState *state = h_slist_pop(work);
|
||||
|
||||
// maps edge symbols to neighbor states (item sets) of s
|
||||
HHashTable *neighbors = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
|
||||
|
||||
// iterate over state (closure) and generate neighboring sets
|
||||
H_FOREACH_KEY(state, HLRItem *item)
|
||||
HCFChoice *sym = item->rhs[item->mark]; // symbol after mark
|
||||
|
||||
if(sym != NULL) { // mark was not at the end
|
||||
// find or create prospective neighbor set
|
||||
HLRState *neighbor = h_hashtable_get(neighbors, sym);
|
||||
if(neighbor == NULL) {
|
||||
neighbor = h_lrstate_new(arena);
|
||||
h_hashtable_put(neighbors, sym, neighbor);
|
||||
}
|
||||
|
||||
// ...and add the advanced item to it
|
||||
h_hashset_put(neighbor, advance_mark(arena, item));
|
||||
}
|
||||
H_END_FOREACH
|
||||
|
||||
// merge expanded neighbor sets into the set of existing states
|
||||
H_FOREACH(neighbors, HCFChoice *symbol, HLRState *neighbor)
|
||||
expand_to_closure(g, neighbor);
|
||||
|
||||
// look up existing state, allocate new if not found
|
||||
size_t neighbor_idx;
|
||||
if(!h_hashset_present(states, neighbor)) {
|
||||
neighbor_idx = states->used;
|
||||
h_hashtable_put(states, neighbor, (void *)(uintptr_t)neighbor_idx);
|
||||
h_slist_push(work, neighbor);
|
||||
h_slist_push(work, (void *)(uintptr_t)neighbor_idx);
|
||||
} else {
|
||||
neighbor_idx = (uintptr_t)h_hashtable_get(states, neighbor);
|
||||
}
|
||||
|
||||
// add transition "state --symbol--> neighbor"
|
||||
HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition));
|
||||
t->from = state_idx;
|
||||
t->to = neighbor_idx;
|
||||
t->symbol = symbol;
|
||||
h_slist_push(transitions, t);
|
||||
H_END_FOREACH
|
||||
} // end while(work)
|
||||
|
||||
// fill DFA struct
|
||||
HLRDFA *dfa = h_arena_malloc(arena, sizeof(HLRDFA));
|
||||
dfa->nstates = states->used;
|
||||
dfa->states = h_arena_malloc(arena, dfa->nstates*sizeof(HLRState *));
|
||||
H_FOREACH(states, HLRState *state, void *v)
|
||||
size_t idx = (uintptr_t)v;
|
||||
dfa->states[idx] = state;
|
||||
H_END_FOREACH
|
||||
dfa->transitions = transitions;
|
||||
|
||||
return dfa;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* LR(0) table generation */
|
||||
|
||||
static inline
|
||||
void put_shift(HLRTable *table, size_t state, const HCFChoice *symbol,
|
||||
size_t nextstate)
|
||||
{
|
||||
HLRAction *action = h_shift_action(table->arena, nextstate);
|
||||
|
||||
switch(symbol->type) {
|
||||
case HCF_END:
|
||||
h_stringmap_put_end(table->tmap[state], action);
|
||||
break;
|
||||
case HCF_CHAR:
|
||||
h_stringmap_put_char(table->tmap[state], symbol->chr, action);
|
||||
break;
|
||||
default:
|
||||
// nonterminal case
|
||||
h_hashtable_put(table->ntmap[state], symbol, action);
|
||||
}
|
||||
}
|
||||
|
||||
HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
|
||||
{
|
||||
HAllocator *mm__ = g->mm__;
|
||||
|
||||
HLRTable *table = h_lrtable_new(mm__, dfa->nstates);
|
||||
HArena *arena = table->arena;
|
||||
|
||||
// remember start symbol
|
||||
table->start = g->start;
|
||||
|
||||
// shift to the accepting end state for the start symbol
|
||||
put_shift(table, 0, g->start, HLR_SUCCESS);
|
||||
|
||||
// add shift entries
|
||||
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
|
||||
// for each transition x-A->y, add "shift, goto y" to table entry (x,A)
|
||||
HLRTransition *t = x->elem;
|
||||
|
||||
put_shift(table, t->from, t->symbol, t->to);
|
||||
}
|
||||
|
||||
// add reduce entries, record inadequate states
|
||||
for(size_t i=0; i<dfa->nstates; i++) {
|
||||
bool inadeq = false;
|
||||
|
||||
// find reducible items in state
|
||||
H_FOREACH_KEY(dfa->states[i], HLRItem *item)
|
||||
if(item->mark == item->len) { // mark at the end
|
||||
HLRAction *reduce = h_reduce_action(arena, item);
|
||||
|
||||
// check for reduce/reduce conflict on forall
|
||||
if(table->forall[i]) {
|
||||
reduce = h_lr_conflict(arena, table->forall[i], reduce);
|
||||
inadeq = true;
|
||||
}
|
||||
table->forall[i] = reduce;
|
||||
|
||||
// check for shift/reduce conflict with other entries
|
||||
// NOTE: these are not recorded as HLR_CONFLICTs at this point
|
||||
|
||||
if(!h_lrtable_row_empty(table, i))
|
||||
inadeq = true;
|
||||
}
|
||||
H_END_FOREACH
|
||||
|
||||
if(inadeq)
|
||||
h_slist_push(table->inadeq, (void *)(uintptr_t)i);
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
|
@ -3,14 +3,6 @@
|
|||
#include "../internal.h"
|
||||
#include "../parsers/parser_internal.h"
|
||||
|
||||
static uint32_t djbhash(const uint8_t *buf, size_t len) {
|
||||
uint32_t hash = 5381;
|
||||
while (len--) {
|
||||
hash = hash * 33 + *buf++;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
// short-hand for constructing HCachedResult's
|
||||
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
|
||||
HCachedResult *ret = a_new(HCachedResult, 1);
|
||||
|
|
@ -214,7 +206,7 @@ void h_packrat_free(HParser *parser) {
|
|||
}
|
||||
|
||||
static uint32_t cache_key_hash(const void* key) {
|
||||
return djbhash(key, sizeof(HParserCacheKey));
|
||||
return h_djbhash(key, sizeof(HParserCacheKey));
|
||||
}
|
||||
static bool cache_key_equal(const void* key1, const void* key2) {
|
||||
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue