hammer/src/backends/lr.c

533 lines
14 KiB
C
Raw Normal View History

#include <assert.h>
#include "../parsers/parser_internal.h"
#include "lr.h"
/* Comparison and hashing functions */
// compare symbols - terminals by value, others by pointer
bool h_eq_symbol(const void *p, const void *q)
{
const HCFChoice *x=p, *y=q;
return (x==y
|| (x->type==HCF_END && y->type==HCF_END)
|| (x->type==HCF_CHAR && y->type==HCF_CHAR && x->chr==y->chr));
}
// hash symbols - terminals by value, others by pointer
HHashValue h_hash_symbol(const void *p)
{
const HCFChoice *x=p;
if(x->type == HCF_END)
return 0;
else if(x->type == HCF_CHAR)
return x->chr * 33;
else
return h_hash_ptr(p);
}
// compare LR items by value
static bool eq_lr_item(const void *p, const void *q)
{
const HLRItem *a=p, *b=q;
if(!h_eq_symbol(a->lhs, b->lhs)) return false;
if(a->mark != b->mark) return false;
if(a->len != b->len) return false;
for(size_t i=0; i<a->len; i++)
if(!h_eq_symbol(a->rhs[i], b->rhs[i])) return false;
return true;
}
// hash LALR items
static inline HHashValue hash_lr_item(const void *p)
{
const HLRItem *x = p;
HHashValue hash = 0;
hash += h_hash_symbol(x->lhs);
for(HCFChoice **p=x->rhs; *p; p++)
hash += h_hash_symbol(*p);
hash += x->mark;
return hash;
}
// compare item sets (DFA states)
bool h_eq_lr_itemset(const void *p, const void *q)
{
return h_hashset_equal(p, q);
}
// hash LR item sets (DFA states) - hash the elements and sum
HHashValue h_hash_lr_itemset(const void *p)
{
HHashValue hash = 0;
H_FOREACH_KEY((const HHashSet *)p, HLRItem *item)
hash += hash_lr_item(item);
H_END_FOREACH
return hash;
}
bool h_eq_transition(const void *p, const void *q)
{
const HLRTransition *a=p, *b=q;
return (a->from == b->from && a->to == b->to && h_eq_symbol(a->symbol, b->symbol));
}
HHashValue h_hash_transition(const void *p)
{
const HLRTransition *t = p;
return (h_hash_symbol(t->symbol) + t->from + t->to); // XXX ?
}
/* Constructors */
HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark)
{
HLRItem *ret = h_arena_malloc(a, sizeof(HLRItem));
size_t len = 0;
for(HCFChoice **p=rhs; *p; p++) len++;
assert(mark <= len);
ret->lhs = lhs;
ret->rhs = rhs;
ret->len = len;
ret->mark = mark;
return ret;
}
HLRState *h_lrstate_new(HArena *arena)
{
return h_hashset_new(arena, eq_lr_item, hash_lr_item);
}
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
{
HArena *arena = h_new_arena(mm__, 0); // default blocksize
assert(arena != NULL);
HLRTable *ret = h_new(HLRTable, 1);
ret->nrows = nrows;
ret->rows = h_arena_malloc(arena, nrows * sizeof(HHashTable *));
ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *));
ret->inadeq = h_slist_new(arena);
ret->arena = arena;
ret->mm__ = mm__;
for(size_t i=0; i<nrows; i++) {
ret->rows[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
ret->forall[i] = NULL;
}
return ret;
}
void h_lrtable_free(HLRTable *table)
{
HAllocator *mm__ = table->mm__;
h_delete_arena(table->arena);
h_free(table);
}
HLRAction *h_shift_action(HArena *arena, size_t nextstate)
{
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
action->type = HLR_SHIFT;
action->nextstate = nextstate;
return action;
}
HLRAction *h_reduce_action(HArena *arena, const HLRItem *item)
{
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
action->type = HLR_REDUCE;
action->production.lhs = item->lhs;
action->production.length = item->len;
#ifndef NDEBUG
action->production.rhs = item->rhs;
#endif
return action;
}
// adds 'new' to the branches of 'action'
// returns a 'action' if it is already of type HLR_CONFLICT
// allocates a new HLRAction otherwise
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new)
{
if(action->type != HLR_CONFLICT) {
HLRAction *old = action;
action = h_arena_malloc(arena, sizeof(HLRAction));
action->type = HLR_CONFLICT;
action->branches = h_slist_new(arena);
h_slist_push(action->branches, old);
2013-06-19 18:51:16 +02:00
h_slist_push(action->branches, new);
} else {
// check if 'new' is already among branches
HSlistNode *x;
for(x=action->branches->head; x; x=x->next) {
if(x->elem == new)
break;
}
// add 'new' if it is not already in list
if(x == NULL)
h_slist_push(action->branches, new);
}
return action;
}
/* LR driver */
HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
const HInputStream *stream)
{
HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine));
engine->table = table;
engine->state = 0;
engine->run = true;
engine->stack = h_slist_new(tarena);
engine->input = *stream;
engine->arena = arena;
engine->tarena = tarena;
return engine;
}
static const HLRAction *
terminal_lookup(const HLREngine *engine, const HCFChoice *symbol)
{
const HLRTable *table = engine->table;
size_t state = engine->state;
assert(state < table->nrows);
if(table->forall[state]) {
assert(h_hashtable_empty(table->rows[state])); // that would be a conflict
return table->forall[state];
} else {
return h_hashtable_get(table->rows[state], symbol);
}
}
static const HLRAction *
nonterminal_lookup(const HLREngine *engine, const HCFChoice *symbol)
{
const HLRTable *table = engine->table;
size_t state = engine->state;
assert(state < table->nrows);
if(table->forall[state]) {
assert(h_hashtable_empty(table->rows[state])); // that would be a conflict
return table->forall[state];
} else {
return h_hashtable_get(table->rows[state], symbol);
}
}
const HLRAction *h_lrengine_action(const HLREngine *engine)
{
HArena *tarena = engine->tarena;
// XXX use statically-allocated terminal symbols
HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice));
HInputStream lookahead = engine->input;
uint8_t c = h_read_bits(&lookahead, 8, false);
if(lookahead.overrun) { // end of input
x->type = HCF_END;
} else {
x->type = HCF_CHAR;
x->chr = c;
}
return terminal_lookup(engine, x);
}
static HParsedToken *consume_input(HLREngine *engine)
{
HParsedToken *v;
uint8_t c = h_read_bits(&engine->input, 8, false);
2013-06-19 14:09:39 +02:00
if(engine->input.overrun) { // end of input
v = NULL;
} else {
v = h_arena_malloc(engine->arena, sizeof(HParsedToken));
v->token_type = TT_UINT;
v->uint = c;
}
return v;
2013-06-19 14:09:39 +02:00
}
// run LR parser for one round; returns false when finished
static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action)
2013-06-19 14:09:39 +02:00
{
// short-hand names
HSlist *stack = engine->stack;
2013-06-19 14:09:39 +02:00
HArena *arena = engine->arena;
HArena *tarena = engine->tarena;
if(action == NULL)
return false; // no handle recognizable in input, terminate
assert(action->type == HLR_SHIFT || action->type == HLR_REDUCE);
if(action->type == HLR_REDUCE) {
size_t len = action->production.length;
HCFChoice *symbol = action->production.lhs;
// semantic value of the reduction result
HParsedToken *value = h_arena_malloc(arena, sizeof(HParsedToken));
value->token_type = TT_SEQUENCE;
value->seq = h_carray_new_sized(arena, len);
// pull values off the stack, rewinding state accordingly
HParsedToken *v = NULL;
for(size_t i=0; i<len; i++) {
v = h_slist_drop(stack);
engine->state = (uintptr_t)h_slist_drop(stack);
// collect values in result sequence
value->seq->elements[len-1-i] = v;
value->seq->used++;
}
if(v) {
// result position equals position of left-most symbol
value->index = v->index;
value->bit_offset = v->bit_offset;
} else {
// XXX how to get the position in this case?
}
// perform token reshape if indicated
if(symbol->reshape)
value = (HParsedToken *)symbol->reshape(make_result(arena, value));
// call validation and semantic action, if present
if(symbol->pred && !symbol->pred(make_result(tarena, value)))
return false; // validation failed -> no parse; terminate
if(symbol->action)
value = (HParsedToken *)symbol->action(make_result(arena, value));
// this is LR, building a right-most derivation bottom-up, so no reduce can
// follow a reduce. we can also assume no conflict follows for GLR if we
// use LALR tables, because only terminal symbols (lookahead) get reduces.
const HLRAction *shift = nonterminal_lookup(engine, symbol);
if(shift == NULL)
return false; // parse error
assert(shift->type == HLR_SHIFT);
// piggy-back the shift right here, never touching the input
h_slist_push(stack, (void *)(uintptr_t)engine->state);
h_slist_push(stack, value);
engine->state = shift->nextstate;
if(symbol == engine->table->start)
return false; // reduced to start symbol; accept!
} else {
assert(action->type == HLR_SHIFT);
HParsedToken *value = consume_input(engine);
h_slist_push(stack, (void *)(uintptr_t)engine->state);
h_slist_push(stack, value);
engine->state = action->nextstate;
}
return true;
}
// run LR parser for one round; sets engine->run
void h_lrengine_step(HLREngine *engine, const HLRAction *action)
{
engine->run = h_lrengine_step_(engine, action);
}
HParseResult *h_lrengine_result(HLREngine *engine)
{
// parsing was successful iff after a shift the engine is back in state 0
if(engine->state == 0 && !h_slist_empty(engine->stack)) {
// on top of the stack is the start symbol's semantic value
HParsedToken *tok = engine->stack->head->elem;
return make_result(engine->arena, tok);
} else {
return NULL;
}
}
HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
{
HLRTable *table = parser->backend_data;
if(!table)
return NULL;
HArena *arena = h_new_arena(mm__, 0); // will hold the results
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
HLREngine *engine = h_lrengine_new(arena, tarena, table, stream);
// iterate engine to completion
while(engine->run)
h_lrengine_step(engine, h_lrengine_action(engine));
HParseResult *result = h_lrengine_result(engine);
if(!result)
h_delete_arena(arena);
h_delete_arena(tarena);
return result;
}
/* Pretty-printers */
void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item)
{
h_pprint_symbol(f, g, item->lhs);
fputs(" ->", f);
HCFChoice **x = item->rhs;
HCFChoice **mark = item->rhs + item->mark;
if(*x == NULL) {
fputc('.', f);
} else {
while(*x) {
if(x == mark)
fputc('.', f);
else
fputc(' ', f);
if((*x)->type == HCF_CHAR) {
// condense character strings
fputc('"', f);
h_pprint_char(f, (*x)->chr);
for(x++; *x; x++) {
if(x == mark)
break;
if((*x)->type != HCF_CHAR)
break;
h_pprint_char(f, (*x)->chr);
}
fputc('"', f);
} else {
h_pprint_symbol(f, g, *x);
x++;
}
}
if(x == mark)
fputs(".", f);
}
}
void h_pprint_lrstate(FILE *f, const HCFGrammar *g,
const HLRState *state, unsigned int indent)
{
bool first = true;
H_FOREACH_KEY(state, HLRItem *item)
if(!first)
for(unsigned int i=0; i<indent; i++) fputc(' ', f);
first = false;
h_pprint_lritem(f, g, item);
fputc('\n', f);
H_END_FOREACH
}
static void pprint_transition(FILE *f, const HCFGrammar *g, const HLRTransition *t)
{
fputs("-", f);
h_pprint_symbol(f, g, t->symbol);
fprintf(f, "->%lu", t->to);
}
void h_pprint_lrdfa(FILE *f, const HCFGrammar *g,
const HLRDFA *dfa, unsigned int indent)
{
for(size_t i=0; i<dfa->nstates; i++) {
unsigned int indent2 = indent + fprintf(f, "%4lu: ", i);
h_pprint_lrstate(f, g, dfa->states[i], indent2);
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
const HLRTransition *t = x->elem;
if(t->from == i) {
for(unsigned int i=0; i<indent2-2; i++) fputc(' ', f);
pprint_transition(f, g, t);
fputc('\n', f);
}
}
}
}
void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action)
{
2013-06-19 18:21:53 +02:00
switch(action->type) {
case HLR_SHIFT:
fprintf(f, "s%lu", action->nextstate);
2013-06-19 18:21:53 +02:00
break;
case HLR_REDUCE:
fputs("r(", f);
h_pprint_symbol(f, g, action->production.lhs);
fputs(" -> ", f);
#ifdef NDEBUG
// if we can't print the production, at least print its length
fprintf(f, "[%lu]", action->production.length);
#else
HCFSequence seq = {action->production.rhs};
h_pprint_sequence(f, g, &seq);
#endif
fputc(')', f);
2013-06-19 18:21:53 +02:00
break;
case HLR_CONFLICT:
fputc('!', f);
for(HSlistNode *x=action->branches->head; x; x=x->next) {
HLRAction *branch = x->elem;
assert(branch->type != HLR_CONFLICT); // no nesting
pprint_lraction(f, g, branch);
if(x->next) fputc('/', f); // separator
}
break;
default:
assert_message(0, "not reached");
}
}
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
unsigned int indent)
{
for(size_t i=0; i<table->nrows; i++) {
for(unsigned int j=0; j<indent; j++) fputc(' ', f);
fprintf(f, "%4lu:", i);
if(table->forall[i]) {
fputs(" - ", f);
pprint_lraction(f, g, table->forall[i]);
fputs(" -", f);
if(!h_hashtable_empty(table->rows[i]))
fputs(" !!", f);
}
H_FOREACH(table->rows[i], HCFChoice *symbol, HLRAction *action)
fputc(' ', f); // separator
h_pprint_symbol(f, g, symbol);
fputc(':', f);
2013-06-19 18:21:53 +02:00
pprint_lraction(f, g, action);
H_END_FOREACH
fputc('\n', f);
}
#if 0
fputs("inadeq=", f);
for(HSlistNode *x=table->inadeq->head; x; x=x->next) {
fprintf(f, "%lu ", (uintptr_t)x->elem);
}
fputc('\n', f);
#endif
}