eliminate the right stack; work with the HInputStream directly

This commit is contained in:
Sven M. Hallberg 2013-06-21 20:11:19 +02:00
parent 1e59e461fa
commit c32cf709b2
3 changed files with 54 additions and 60 deletions

View file

@ -29,15 +29,14 @@ HLREngine *fork_engine(const HLREngine *engine)
HLREngine *eng2 = h_arena_malloc(engine->tarena, sizeof(HLREngine)); HLREngine *eng2 = h_arena_malloc(engine->tarena, sizeof(HLREngine));
eng2->table = engine->table; eng2->table = engine->table;
eng2->state = engine->state; eng2->state = engine->state;
eng2->input = engine->input;
// shallow-copy the stacks // shallow-copy the stack
// this works because h_slist_push and h_slist_pop never modify // this works because h_slist_push and h_slist_pop never modify
// the underlying structure of HSlistNodes, only the head pointer. // the underlying structure of HSlistNodes, only the head pointer.
// in fact, this gives us prefix sharing for free. // in fact, this gives us prefix sharing for free.
eng2->left = h_arena_malloc(engine->tarena, sizeof(HSlist)); eng2->stack = h_arena_malloc(engine->tarena, sizeof(HSlist));
eng2->right = h_arena_malloc(engine->tarena, sizeof(HSlist)); *eng2->stack = *engine->stack;
*eng2->left = *engine->left;
*eng2->right = *engine->right;
eng2->arena = engine->arena; eng2->arena = engine->arena;
eng2->tarena = engine->tarena; eng2->tarena = engine->tarena;
@ -54,7 +53,7 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
HSlist *engines = h_slist_new(tarena); HSlist *engines = h_slist_new(tarena);
h_slist_push(engines, h_lrengine_new(arena, tarena, table)); h_slist_push(engines, h_lrengine_new(arena, tarena, table, stream));
HParseResult *result = NULL; HParseResult *result = NULL;
while(result == NULL && !h_slist_empty(engines)) { while(result == NULL && !h_slist_empty(engines)) {
@ -75,7 +74,7 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
continue; continue;
} }
const HLRAction *action = h_lrengine_action(engine, stream); const HLRAction *action = h_lrengine_action(engine);
// fork engine on conflicts // fork engine on conflicts
if(action && action->type == HLR_CONFLICT) { if(action && action->type == HLR_CONFLICT) {
@ -120,8 +119,6 @@ HParserBackendVTable h__glr_backend_vtable = {
// XXX TODO // XXX TODO
// - eliminate right stack by always doing a shift after reduce
// (shift should always follow reduce because rightmost)
// - split tables into // - split tables into
// - one mapping input bytes to actions (shift or reduce or conflict) // - one mapping input bytes to actions (shift or reduce or conflict)
// - one mapping reduced-to lhs nonterminals to shift states // - one mapping reduced-to lhs nonterminals to shift states

View file

@ -202,65 +202,64 @@ h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol)
} }
} }
HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table) HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
const HInputStream *stream)
{ {
HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine)); HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine));
engine->table = table; engine->table = table;
engine->state = 0; engine->state = 0;
engine->run = true; engine->run = true;
engine->left = h_slist_new(tarena); engine->stack = h_slist_new(tarena);
engine->right = h_slist_new(tarena); engine->input = *stream;
engine->arena = arena; engine->arena = arena;
engine->tarena = tarena; engine->tarena = tarena;
return engine; return engine;
} }
const HLRAction *h_lrengine_action(HLREngine *engine, HInputStream *stream) const HLRAction *h_lrengine_action(const HLREngine *engine)
{ {
HSlist *right = engine->right;
HArena *arena = engine->arena;
HArena *tarena = engine->tarena; HArena *tarena = engine->tarena;
// make sure there is input on the right stack
if(h_slist_empty(right)) {
// XXX use statically-allocated terminal symbols // XXX use statically-allocated terminal symbols
HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice)); HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice));
HParsedToken *v;
uint8_t c = h_read_bits(stream, 8, false); HInputStream lookahead = engine->input;
uint8_t c = h_read_bits(&lookahead, 8, false);
if(stream->overrun) { // end of input if(lookahead.overrun) { // end of input
x->type = HCF_END; x->type = HCF_END;
v = NULL;
} else { } else {
x->type = HCF_CHAR; x->type = HCF_CHAR;
x->chr = c; x->chr = c;
v = h_arena_malloc(arena, sizeof(HParsedToken)); }
return h_lr_lookup(engine->table, engine->state, x);
}
static HParsedToken *consume_input(HLREngine *engine)
{
HParsedToken *v;
uint8_t c = h_read_bits(&engine->input, 8, false);
if(engine->input.overrun) { // end of input
v = NULL;
} else {
v = h_arena_malloc(engine->arena, sizeof(HParsedToken));
v->token_type = TT_UINT; v->token_type = TT_UINT;
v->uint = c; v->uint = c;
} }
h_slist_push(right, v); return v;
h_slist_push(right, x);
}
// peek at input symbol on the right side
HCFChoice *symbol = right->head->elem;
// table lookup
const HLRAction *action = h_lr_lookup(engine->table, engine->state, symbol);
return action;
} }
// run LR parser for one round; returns false when finished // run LR parser for one round; returns false when finished
static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action) static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action)
{ {
// short-hand names // short-hand names
HSlist *left = engine->left; HSlist *stack = engine->stack;
HSlist *right = engine->right;
HArena *arena = engine->arena; HArena *arena = engine->arena;
HArena *tarena = engine->tarena; HArena *tarena = engine->tarena;
@ -278,11 +277,11 @@ static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action)
value->token_type = TT_SEQUENCE; value->token_type = TT_SEQUENCE;
value->seq = h_carray_new_sized(arena, len); value->seq = h_carray_new_sized(arena, len);
// pull values off the left stack, rewinding state accordingly // pull values off the stack, rewinding state accordingly
HParsedToken *v = NULL; HParsedToken *v = NULL;
for(size_t i=0; i<len; i++) { for(size_t i=0; i<len; i++) {
v = h_slist_drop(left); v = h_slist_drop(stack);
engine->state = (uintptr_t)h_slist_drop(left); engine->state = (uintptr_t)h_slist_drop(stack);
// collect values in result sequence // collect values in result sequence
value->seq->elements[len-1-i] = v; value->seq->elements[len-1-i] = v;
@ -315,17 +314,17 @@ static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action)
assert(shift->type == HLR_SHIFT); assert(shift->type == HLR_SHIFT);
// piggy-back the shift right here, never touching the input // piggy-back the shift right here, never touching the input
h_slist_push(left, (void *)(uintptr_t)engine->state); h_slist_push(stack, (void *)(uintptr_t)engine->state);
h_slist_push(left, value); h_slist_push(stack, value);
engine->state = shift->nextstate; engine->state = shift->nextstate;
if(symbol == engine->table->start) if(symbol == engine->table->start)
return false; // reduced to start symbol; accept! return false; // reduced to start symbol; accept!
} else { } else {
assert(action->type == HLR_SHIFT); assert(action->type == HLR_SHIFT);
h_slist_push(left, (void *)(uintptr_t)engine->state); HParsedToken *value = consume_input(engine);
h_slist_drop(right); // symbol (discard) h_slist_push(stack, (void *)(uintptr_t)engine->state);
h_slist_push(left, h_slist_drop(right)); // semantic value h_slist_push(stack, value);
engine->state = action->nextstate; engine->state = action->nextstate;
} }
@ -341,9 +340,9 @@ void h_lrengine_step(HLREngine *engine, const HLRAction *action)
HParseResult *h_lrengine_result(HLREngine *engine) HParseResult *h_lrengine_result(HLREngine *engine)
{ {
// parsing was successful iff after a shift the engine is back in state 0 // parsing was successful iff after a shift the engine is back in state 0
if(engine->state == 0 && !h_slist_empty(engine->left)) { if(engine->state == 0 && !h_slist_empty(engine->stack)) {
// on top of the stack is the start symbol's semantic value // on top of the stack is the start symbol's semantic value
HParsedToken *tok = engine->left->head->elem; HParsedToken *tok = engine->stack->head->elem;
return make_result(engine->arena, tok); return make_result(engine->arena, tok);
} else { } else {
return NULL; return NULL;
@ -358,11 +357,11 @@ HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
HArena *arena = h_new_arena(mm__, 0); // will hold the results HArena *arena = h_new_arena(mm__, 0); // will hold the results
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
HLREngine *engine = h_lrengine_new(arena, tarena, table); HLREngine *engine = h_lrengine_new(arena, tarena, table, stream);
// iterate engine to completion // iterate engine to completion
while(engine->run) while(engine->run)
h_lrengine_step(engine, h_lrengine_action(engine, stream)); h_lrengine_step(engine, h_lrengine_action(engine));
HParseResult *result = h_lrengine_result(engine); HParseResult *result = h_lrengine_result(engine);
if(!result) if(!result)

View file

@ -70,11 +70,8 @@ typedef struct HLREngine_ {
size_t state; size_t state;
bool run; bool run;
// stack layout: HSlist *stack; // holds pairs: (saved state, semantic value)
// on the left stack, we put pairs: (saved state, semantic value) HInputStream input;
// on the right stack, we put pairs: (symbol, semantic value)
HSlist *left; // left stack; reductions happen here
HSlist *right; // right stack; input appears here
HArena *arena; // will hold the results HArena *arena; // will hold the results
HArena *tarena; // tmp, deleted after parse HArena *tarena; // tmp, deleted after parse
@ -108,7 +105,8 @@ HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark);
HLRState *h_lrstate_new(HArena *arena); HLRState *h_lrstate_new(HArena *arena);
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows); HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows);
void h_lrtable_free(HLRTable *table); void h_lrtable_free(HLRTable *table);
HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table); HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
const HInputStream *stream);
HLRAction *h_reduce_action(HArena *arena, const HLRItem *item); HLRAction *h_reduce_action(HArena *arena, const HLRItem *item);
HLRAction *h_shift_action(HArena *arena, size_t nextstate); HLRAction *h_shift_action(HArena *arena, size_t nextstate);
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new); HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new);
@ -128,7 +126,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params);
void h_lalr_free(HParser *parser); void h_lalr_free(HParser *parser);
const HLRAction *h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol); const HLRAction *h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol);
const HLRAction *h_lrengine_action(HLREngine *engine, HInputStream *stream); const HLRAction *h_lrengine_action(const HLREngine *engine);
void h_lrengine_step(HLREngine *engine, const HLRAction *action); void h_lrengine_step(HLREngine *engine, const HLRAction *action);
HParseResult *h_lrengine_result(HLREngine *engine); HParseResult *h_lrengine_result(HLREngine *engine);
HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream); HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);