#include #include #include "../parsers/parser_internal.h" #include "lr.h" /* Comparison and hashing functions */ // compare symbols - terminals by value, others by pointer bool h_eq_symbol(const void *p, const void *q) { const HCFChoice *x=p, *y=q; return (x==y || (x->type==HCF_END && y->type==HCF_END) || (x->type==HCF_CHAR && y->type==HCF_CHAR && x->chr==y->chr)); } // hash symbols - terminals by value, others by pointer HHashValue h_hash_symbol(const void *p) { const HCFChoice *x=p; if(x->type == HCF_END) return 0; else if(x->type == HCF_CHAR) return x->chr * 33; else return h_hash_ptr(p); } // compare LR items by value static bool eq_lr_item(const void *p, const void *q) { const HLRItem *a=p, *b=q; if(!h_eq_symbol(a->lhs, b->lhs)) return false; if(a->mark != b->mark) return false; if(a->len != b->len) return false; for(size_t i=0; ilen; i++) if(!h_eq_symbol(a->rhs[i], b->rhs[i])) return false; return true; } // hash LALR items static inline HHashValue hash_lr_item(const void *p) { const HLRItem *x = p; HHashValue hash = 0; hash += h_hash_symbol(x->lhs); for(HCFChoice **p=x->rhs; *p; p++) hash += h_hash_symbol(*p); hash += x->mark; return hash; } // compare item sets (DFA states) bool h_eq_lr_itemset(const void *p, const void *q) { return h_hashset_equal(p, q); } // hash LR item sets (DFA states) - hash the elements and sum HHashValue h_hash_lr_itemset(const void *p) { HHashValue hash = 0; H_FOREACH_KEY((const HHashSet *)p, HLRItem *item) hash += hash_lr_item(item); H_END_FOREACH return hash; } bool h_eq_transition(const void *p, const void *q) { const HLRTransition *a=p, *b=q; return (a->from == b->from && a->to == b->to && h_eq_symbol(a->symbol, b->symbol)); } HHashValue h_hash_transition(const void *p) { const HLRTransition *t = p; return (h_hash_symbol(t->symbol) + t->from + t->to); // XXX ? } /* Constructors */ HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark) { HLRItem *ret = h_arena_malloc(a, sizeof(HLRItem)); size_t len = 0; for(HCFChoice **p=rhs; *p; p++) len++; assert(mark <= len); ret->lhs = lhs; ret->rhs = rhs; ret->len = len; ret->mark = mark; return ret; } HLRState *h_lrstate_new(HArena *arena) { return h_hashset_new(arena, eq_lr_item, hash_lr_item); } HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows) { HArena *arena = h_new_arena(mm__, 0); // default blocksize assert(arena != NULL); HLRTable *ret = h_new(HLRTable, 1); ret->nrows = nrows; ret->ntmap = h_arena_malloc(arena, nrows * sizeof(HHashTable *)); ret->tmap = h_arena_malloc(arena, nrows * sizeof(HStringMap *)); ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *)); ret->inadeq = h_slist_new(arena); ret->arena = arena; ret->mm__ = mm__; for(size_t i=0; intmap[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol); ret->tmap[i] = h_stringmap_new(arena); ret->forall[i] = NULL; } return ret; } void h_lrtable_free(HLRTable *table) { HAllocator *mm__ = table->mm__; h_delete_arena(table->arena); h_free(table); } HLRAction *h_shift_action(HArena *arena, size_t nextstate) { HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction)); action->type = HLR_SHIFT; action->nextstate = nextstate; return action; } HLRAction *h_reduce_action(HArena *arena, const HLRItem *item) { HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction)); action->type = HLR_REDUCE; action->production.lhs = item->lhs; action->production.length = item->len; #ifndef NDEBUG action->production.rhs = item->rhs; #endif return action; } // adds 'new' to the branches of 'action' // returns a 'action' if it is already of type HLR_CONFLICT // allocates a new HLRAction otherwise HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new) { if(action->type != HLR_CONFLICT) { HLRAction *old = action; action = h_arena_malloc(arena, sizeof(HLRAction)); action->type = HLR_CONFLICT; action->branches = h_slist_new(arena); h_slist_push(action->branches, old); h_slist_push(action->branches, new); } else { // check if 'new' is already among branches HSlistNode *x; for(x=action->branches->head; x; x=x->next) { if(x->elem == new) break; } // add 'new' if it is not already in list if(x == NULL) h_slist_push(action->branches, new); } return action; } bool h_lrtable_row_empty(const HLRTable *table, size_t i) { return (h_hashtable_empty(table->ntmap[i]) && h_stringmap_empty(table->tmap[i])); } /* LR driver */ HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table, const HInputStream *stream) { HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine)); engine->table = table; engine->state = 0; engine->run = true; engine->stack = h_slist_new(tarena); engine->input = *stream; engine->merged = NULL; engine->arena = arena; engine->tarena = tarena; return engine; } static const HLRAction * terminal_lookup(const HLREngine *engine, const HInputStream *stream) { const HLRTable *table = engine->table; size_t state = engine->state; assert(state < table->nrows); if(table->forall[state]) { assert(h_lrtable_row_empty(table, state)); // that would be a conflict return table->forall[state]; } else { return h_stringmap_get_lookahead(table->tmap[state], *stream); } } static const HLRAction * nonterminal_lookup(const HLREngine *engine, const HCFChoice *symbol) { const HLRTable *table = engine->table; size_t state = engine->state; assert(state < table->nrows); assert(!table->forall[state]); // contains only reduce entries // we are only looking for shifts return h_hashtable_get(table->ntmap[state], symbol); } const HLRAction *h_lrengine_action(const HLREngine *engine) { return terminal_lookup(engine, &engine->input); } static HParsedToken *consume_input(HLREngine *engine) { HParsedToken *v; uint8_t c = h_read_bits(&engine->input, 8, false); if(engine->input.overrun) { // end of input v = NULL; } else { v = h_arena_malloc(engine->arena, sizeof(HParsedToken)); v->token_type = TT_UINT; v->uint = c; } return v; } // run LR parser for one round; returns false when finished static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action) { // short-hand names HSlist *stack = engine->stack; HArena *arena = engine->arena; HArena *tarena = engine->tarena; if(action == NULL) return false; // no handle recognizable in input, terminate assert(action->type == HLR_SHIFT || action->type == HLR_REDUCE); if(action->type == HLR_REDUCE) { size_t len = action->production.length; HCFChoice *symbol = action->production.lhs; // semantic value of the reduction result HParsedToken *value = h_arena_malloc(arena, sizeof(HParsedToken)); value->token_type = TT_SEQUENCE; value->seq = h_carray_new_sized(arena, len); // pull values off the stack, rewinding state accordingly HParsedToken *v = NULL; for(size_t i=0; istate = (uintptr_t)h_slist_drop(stack); // collect values in result sequence value->seq->elements[len-1-i] = v; value->seq->used++; } if(v) { // result position equals position of left-most symbol value->index = v->index; value->bit_offset = v->bit_offset; } else { // XXX how to get the position in this case? } // perform token reshape if indicated if(symbol->reshape) value = (HParsedToken *)symbol->reshape(make_result(arena, value)); // call validation and semantic action, if present if(symbol->pred && !symbol->pred(make_result(tarena, value))) return false; // validation failed -> no parse; terminate if(symbol->action) value = (HParsedToken *)symbol->action(make_result(arena, value)); // this is LR, building a right-most derivation bottom-up, so no reduce can // follow a reduce. we can also assume no conflict follows for GLR if we // use LALR tables, because only terminal symbols (lookahead) get reduces. const HLRAction *shift = nonterminal_lookup(engine, symbol); if(shift == NULL) return false; // parse error assert(shift->type == HLR_SHIFT); // piggy-back the shift right here, never touching the input h_slist_push(stack, (void *)(uintptr_t)engine->state); h_slist_push(stack, value); engine->state = shift->nextstate; if(symbol == engine->table->start) return false; // reduced to start symbol; accept! } else { assert(action->type == HLR_SHIFT); HParsedToken *value = consume_input(engine); h_slist_push(stack, (void *)(uintptr_t)engine->state); h_slist_push(stack, value); engine->state = action->nextstate; } return true; } // run LR parser for one round; sets engine->run void h_lrengine_step(HLREngine *engine, const HLRAction *action) { engine->run = h_lrengine_step_(engine, action); } HParseResult *h_lrengine_result(HLREngine *engine) { // parsing was successful iff after a shift the engine is back in state 0 if(engine->state == 0 && !h_slist_empty(engine->stack)) { // on top of the stack is the start symbol's semantic value HParsedToken *tok = engine->stack->head->elem; return make_result(engine->arena, tok); } else { return NULL; } } HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) { HLRTable *table = parser->backend_data; if(!table) return NULL; HArena *arena = h_new_arena(mm__, 0); // will hold the results HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse HLREngine *engine = h_lrengine_new(arena, tarena, table, stream); // iterate engine to completion while(engine->run) h_lrengine_step(engine, h_lrengine_action(engine)); HParseResult *result = h_lrengine_result(engine); if(!result) h_delete_arena(arena); h_delete_arena(tarena); return result; } /* Pretty-printers */ void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item) { h_pprint_symbol(f, g, item->lhs); fputs(" ->", f); HCFChoice **x = item->rhs; HCFChoice **mark = item->rhs + item->mark; if(*x == NULL) { fputc('.', f); } else { while(*x) { if(x == mark) fputc('.', f); else fputc(' ', f); if((*x)->type == HCF_CHAR) { // condense character strings fputc('"', f); h_pprint_char(f, (*x)->chr); for(x++; *x; x++) { if(x == mark) break; if((*x)->type != HCF_CHAR) break; h_pprint_char(f, (*x)->chr); } fputc('"', f); } else { h_pprint_symbol(f, g, *x); x++; } } if(x == mark) fputs(".", f); } } void h_pprint_lrstate(FILE *f, const HCFGrammar *g, const HLRState *state, unsigned int indent) { bool first = true; H_FOREACH_KEY(state, HLRItem *item) if(!first) for(unsigned int i=0; isymbol); fprintf(f, "->%lu", t->to); } void h_pprint_lrdfa(FILE *f, const HCFGrammar *g, const HLRDFA *dfa, unsigned int indent) { for(size_t i=0; instates; i++) { unsigned int indent2 = indent + fprintf(f, "%4lu: ", i); h_pprint_lrstate(f, g, dfa->states[i], indent2); for(HSlistNode *x = dfa->transitions->head; x; x = x->next) { const HLRTransition *t = x->elem; if(t->from == i) { for(unsigned int i=0; itype) { case HLR_SHIFT: fprintf(f, "s%lu", action->nextstate); break; case HLR_REDUCE: fputs("r(", f); h_pprint_symbol(f, g, action->production.lhs); fputs(" -> ", f); #ifdef NDEBUG // if we can't print the production, at least print its length fprintf(f, "[%lu]", action->production.length); #else HCFSequence seq = {action->production.rhs}; h_pprint_sequence(f, g, &seq); #endif fputc(')', f); break; case HLR_CONFLICT: fputc('!', f); for(HSlistNode *x=action->branches->head; x; x=x->next) { HLRAction *branch = x->elem; assert(branch->type != HLR_CONFLICT); // no nesting pprint_lraction(f, g, branch); if(x->next) fputc('/', f); // separator } break; default: assert_message(0, "not reached"); } } static void valprint_lraction(FILE *file, void *env, void *val) { const HLRAction *action = val; const HCFGrammar *grammar = env; pprint_lraction(file, grammar, action); } static void pprint_lrtable_terminals(FILE *file, const HCFGrammar *g, const HStringMap *map) { h_pprint_stringmap(file, ' ', valprint_lraction, (void *)g, map); } void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table, unsigned int indent) { for(size_t i=0; inrows; i++) { for(unsigned int j=0; jforall[i]) { fputc(' ', f); pprint_lraction(f, g, table->forall[i]); if(!h_lrtable_row_empty(table, i)) fputs(" !!", f); } H_FOREACH(table->ntmap[i], HCFChoice *symbol, HLRAction *action) fputc(' ', f); // separator h_pprint_symbol(f, g, symbol); fputc(':', f); pprint_lraction(f, g, action); H_END_FOREACH fputc(' ', f); // separator pprint_lrtable_terminals(f, g, table->tmap[i]); fputc('\n', f); } #if 0 fputs("inadeq=", f); for(HSlistNode *x=table->inadeq->head; x; x=x->next) { fprintf(f, "%lu ", (uintptr_t)x->elem); } fputc('\n', f); #endif }