wip end-of-day commit - almost there (untested)
This commit is contained in:
parent
3bb26162c3
commit
dabe4b07a9
1 changed files with 246 additions and 82 deletions
|
|
@ -4,7 +4,8 @@
|
|||
#include "../parsers/parser_internal.h"
|
||||
|
||||
|
||||
/* Constructing the characteristic automaton (handle recognizer) */
|
||||
|
||||
/* Data structures */
|
||||
|
||||
// - states are hashsets containing LRItems
|
||||
// - LRItems contain an optional lookahead set (HStringMap)
|
||||
|
|
@ -19,8 +20,9 @@ typedef struct HLRDFA_ {
|
|||
} HLRDFA;
|
||||
|
||||
typedef struct HLRTransition_ {
|
||||
size_t from, to; // indices into 'states' array
|
||||
size_t from; // index into 'states' array
|
||||
const HCFChoice *symbol;
|
||||
size_t to; // index into 'states' array
|
||||
} HLRTransition;
|
||||
|
||||
typedef struct HLRItem_ {
|
||||
|
|
@ -31,6 +33,32 @@ typedef struct HLRItem_ {
|
|||
HStringMap *lookahead; // optional
|
||||
} HLRItem;
|
||||
|
||||
typedef struct HLRAction_ {
|
||||
enum {HLR_SHIFT, HLR_REDUCE} type;
|
||||
union {
|
||||
size_t nextstate; // used with SHIFT
|
||||
struct {
|
||||
HCFChoice *lhs; // symbol carrying semantic actions etc.
|
||||
size_t length; // # of symbols in rhs
|
||||
// NB: the rhs symbols are not needed for the parse
|
||||
} production; // used with REDUCE
|
||||
};
|
||||
} HLRAction;
|
||||
|
||||
typedef struct HLRTable_ {
|
||||
size_t nrows;
|
||||
HHashTable **rows; // map symbols to HLRActions
|
||||
HLRAction **forall; // shortcut to set an action for an entire row
|
||||
HCFChoice *start; // start symbol
|
||||
HSlist *inadeq; // indices of any inadequate states
|
||||
HArena *arena;
|
||||
HAllocator *mm__;
|
||||
} HLRTable;
|
||||
|
||||
|
||||
|
||||
/* Constructing the characteristic automaton (handle recognizer) */
|
||||
|
||||
HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark)
|
||||
{
|
||||
HLRItem *ret = h_arena_malloc(a, sizeof(HLRItem));
|
||||
|
|
@ -157,7 +185,7 @@ static HHashSet *closure(HCFGrammar *g, const HHashSet *items)
|
|||
return ret;
|
||||
}
|
||||
|
||||
HLRDFA *h_lalr_dfa(HCFGrammar *g)
|
||||
HLRDFA *h_lr0_dfa(HCFGrammar *g)
|
||||
{
|
||||
HArena *arena = g->arena;
|
||||
|
||||
|
|
@ -275,26 +303,24 @@ HLRDFA *h_lalr_dfa(HCFGrammar *g)
|
|||
|
||||
|
||||
|
||||
/* LALR table generation */
|
||||
/* LR(0) table generation */
|
||||
|
||||
typedef struct HLRAction_ {
|
||||
enum {HLR_SHIFT, HLR_REDUCE} type;
|
||||
union {
|
||||
size_t nextstate; // used with shift
|
||||
struct {
|
||||
HCFChoice *lhs;
|
||||
HCFChoice **rhs;
|
||||
} production; // used with reduce
|
||||
};
|
||||
} HLRAction;
|
||||
// XXX replace other hashtable iterations with this
|
||||
// XXX move to internal.h or something
|
||||
#define H_FOREACH_(HT) do { \
|
||||
const HHashTable *ht = HT; \
|
||||
for(size_t i=0; i < ht->capacity; i++) { \
|
||||
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { \
|
||||
if(hte->key == NULL) continue;
|
||||
|
||||
typedef struct HLRTable_ {
|
||||
size_t nrows;
|
||||
HHashTable **rows; // map symbols to HLRActions
|
||||
HCFChoice *start; // start symbol
|
||||
HArena *arena;
|
||||
HAllocator *mm__;
|
||||
} HLRTable;
|
||||
#define H_FOREACH(HT, KEYVAR, VALVAR) H_FOREACH_(HT) \
|
||||
const KEYVAR = hte->key; \
|
||||
VALVAR = hte->value;
|
||||
|
||||
#define H_END_FOREACH \
|
||||
} \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
|
||||
{
|
||||
|
|
@ -313,91 +339,229 @@ HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static HCFGrammar *transform_grammar(const HCFGrammar *g, const HLRTable *table,
|
||||
const HLRDFA *dfa, HHashTable **syms)
|
||||
void h_lrtable_free(HLRTable *table)
|
||||
{
|
||||
HCFGrammar *gt = h_cfgrammar_new(g->mm__);
|
||||
HArena *arena = gt->arena;
|
||||
HAllocator *mm__ = table->mm__;
|
||||
h_delete_arena(table->arena);
|
||||
h_free(table);
|
||||
}
|
||||
|
||||
// old grammar symbol ->
|
||||
//HHashTable *map = h_hashtable_new(
|
||||
static HLRAction *shift_action(HArena *arena, size_t nextstate)
|
||||
{
|
||||
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
|
||||
action->type = HLR_SHIFT;
|
||||
action->nextstate = nextstate;
|
||||
return action;
|
||||
}
|
||||
|
||||
static HLRAction *reduce_action(HArena *arena, HCFChoice *lhs, size_t rhslen)
|
||||
{
|
||||
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
|
||||
action->type = HLR_REDUCE;
|
||||
action->production.lhs = lhs;
|
||||
action->production.length = rhslen;
|
||||
return action;
|
||||
}
|
||||
|
||||
HLRTable *h_lr0_table(HCFGrammar *g)
|
||||
{
|
||||
HAllocator *mm__ = g->mm__;
|
||||
|
||||
// construct LR(0) DFA
|
||||
HLRDFA *dfa = h_lr0_dfa(g);
|
||||
if(!dfa) return NULL;
|
||||
|
||||
HLRTable *table = h_lrtable_new(mm__, dfa->nstates);
|
||||
HArena *arena = table->arena;
|
||||
|
||||
// add shift entries
|
||||
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
|
||||
// for each transition x-A->y, add "shift, goto y" to table entry (x,A)
|
||||
HLRTransition *t = x->elem;
|
||||
|
||||
HLRAction *action = shift_action(arena, t->to);
|
||||
h_hashtable_put(table->rows[t->from], t->symbol, action);
|
||||
}
|
||||
|
||||
// add reduce entries, record inadequate states
|
||||
for(size_t i=0; i<dfa->nstates; i++) {
|
||||
const HLRState *state = dfa->states[i];
|
||||
|
||||
syms[i] = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr);
|
||||
|
||||
|
||||
}
|
||||
|
||||
// iterate over g->nts
|
||||
const HHashTable *ht = g->nts;
|
||||
for(size_t i=0; i < ht->capacity; i++) {
|
||||
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||
if(hte->key == NULL)
|
||||
continue;
|
||||
|
||||
const HCFChoice *A = hte->key;
|
||||
|
||||
// iterate over the productions of A
|
||||
for(HCFSequence **p=A->seq; *p; p++) {
|
||||
// find all transitions marked by A
|
||||
// yields xAy -> rhs'
|
||||
// trace rhs starting in state x and following the transitions
|
||||
// find reducible items in state
|
||||
H_FOREACH(dfa->states[i], HLRItem *item, void *v_)
|
||||
if(item->mark == item->len) { // mark at the end
|
||||
// XXX store more informative stuff in the inadeq records?
|
||||
if(table->forall[i]) {
|
||||
// reduce/reduce conflict with a previous item
|
||||
h_slist_push(table->inadeq, (void *)(uintptr_t)i);
|
||||
} else if(!h_hashtable_empty(table->rows[i])) {
|
||||
// shift/reduce conflict with one of the row's entries
|
||||
h_slist_push(table->inadeq, (void *)(uintptr_t)i);
|
||||
} else {
|
||||
// set reduce action for the entire row
|
||||
table->forall[i] = reduce_action(arena, item->lhs, item->len);
|
||||
}
|
||||
}
|
||||
}
|
||||
H_END_FOREACH
|
||||
}
|
||||
|
||||
return gt;
|
||||
return table;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* LALR-via-SLR grammar transformation */
|
||||
|
||||
static inline size_t seqsize(void *p_)
|
||||
{
|
||||
size_t n=0;
|
||||
for(void **p=p_; *p; p++) n++;
|
||||
return n+1;
|
||||
}
|
||||
|
||||
static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
|
||||
{
|
||||
HLRAction *action = h_hashtable_get(table->rows[x], A);
|
||||
assert(action != NULL);
|
||||
assert(action->type == HLR_SHIFT);
|
||||
return action->nextstate;
|
||||
}
|
||||
|
||||
static HCFChoice *transform_symbol(const HLRTable *table, HHashTable *map,
|
||||
size_t x, HCFChoice *B, size_t z);
|
||||
|
||||
static HCFChoice *transform_productions(const HLRTable *table, HHashTable *map,
|
||||
size_t x, HCFChoice *xAy)
|
||||
{
|
||||
HArena *arena = map->arena;
|
||||
|
||||
HCFSequence **seq = h_arena_malloc(arena, seqsize(xAy->seq)
|
||||
* sizeof(HCFSequence *));
|
||||
HCFSequence **p, **q;
|
||||
for(p=xAy->seq, q=seq; *p; p++, q++) {
|
||||
// trace rhs starting in state x and following the transitions
|
||||
// xAy -> xBz ...
|
||||
|
||||
HCFChoice **B = (*p)->items;
|
||||
HCFChoice **xBz = h_arena_malloc(arena, seqsize(B) * sizeof(HCFChoice *));
|
||||
for(; *B; B++, xBz++) {
|
||||
size_t z = follow_transition(table, x, *B);
|
||||
*xBz = transform_symbol(table, map, x, *B, z);
|
||||
x=z;
|
||||
}
|
||||
*xBz = NULL;
|
||||
|
||||
*q = h_arena_malloc(arena, sizeof(HCFSequence));
|
||||
(*q)->items = xBz;
|
||||
}
|
||||
*q = NULL;
|
||||
xAy->seq = seq;
|
||||
|
||||
return xAy; // pass-through
|
||||
}
|
||||
|
||||
static inline HLRTransition *transition(HArena *arena,
|
||||
size_t x, const HCFChoice *A, size_t y)
|
||||
{
|
||||
HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition));
|
||||
t->from = x;
|
||||
t->symbol = A;
|
||||
t->to = y;
|
||||
return t;
|
||||
}
|
||||
|
||||
static HCFChoice *transform_symbol(const HLRTable *table, HHashTable *map,
|
||||
size_t x, HCFChoice *B, size_t z)
|
||||
{
|
||||
HArena *arena = map->arena;
|
||||
|
||||
// look up the transition in map, create symbol if not found
|
||||
HLRTransition *x_B_z = transition(arena, x, B, z);
|
||||
HCFChoice *xBz = h_hashtable_get(map, x_B_z);
|
||||
if(!xBz) {
|
||||
HCFChoice *xBz = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||
*xBz = *B;
|
||||
h_hashtable_put(map, x_B_z, xBz);
|
||||
}
|
||||
|
||||
return transform_productions(table, map, x, xBz);
|
||||
}
|
||||
|
||||
static bool eq_transition(const void *p, const void *q)
|
||||
{
|
||||
const HLRTransition *a=p, *b=q;
|
||||
return (a->from == b->from && a->to == b->to && a->symbol == b->symbol);
|
||||
}
|
||||
|
||||
static HHashValue hash_transition(const void *p)
|
||||
{
|
||||
const HLRTransition *t = p;
|
||||
return (h_hash_ptr(t->symbol) + t->from + t->to); // XXX ?
|
||||
}
|
||||
|
||||
static HHashTable *enhance_grammar(const HCFGrammar *g, const HLRTable *tbl)
|
||||
{
|
||||
HArena *arena = g->arena; // XXX ?
|
||||
HHashTable *map = h_hashtable_new(arena, eq_transition, hash_transition);
|
||||
|
||||
// copy the start symbol over
|
||||
HCFChoice *start = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||
*start = *(g->start);
|
||||
h_hashtable_put(map, g->start, start);
|
||||
|
||||
transform_productions(tbl, map, 0, start);
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* LALR table generation */
|
||||
|
||||
bool is_inadequate(HLRTable *table, size_t state)
|
||||
{
|
||||
// XXX
|
||||
}
|
||||
|
||||
bool has_conflicts(HLRTable *table)
|
||||
{
|
||||
return !h_slist_empty(table->inadeq);
|
||||
}
|
||||
|
||||
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||
{
|
||||
// generate CFG from parser
|
||||
// construct LR(0) DFA
|
||||
// build parse table, shift-entries only
|
||||
// for each transition a--S-->b, add "shift, goto b" to table entry (a,S)
|
||||
// determine lookahead "by conversion to SLR"
|
||||
// transform grammar to encode transitions in symbols
|
||||
// -> lookahead for an item is the transformed left-hand side's follow set
|
||||
// finish table; for each state:
|
||||
// add reduce entries for its accepting items
|
||||
// in case of conflict, add lookahead info
|
||||
// build LR(0) table
|
||||
// if necessary, resolve conflicts "by conversion to SLR"
|
||||
|
||||
HCFGrammar *g = h_cfgrammar(mm__, parser);
|
||||
if(g == NULL) // backend not suitable (language not context-free)
|
||||
return -1;
|
||||
|
||||
HLRDFA *dfa = h_lalr_dfa(g);
|
||||
if(dfa == NULL) // this should actually not happen
|
||||
HLRTable *table = h_lr0_table(g);
|
||||
if(table == NULL) // this should normally not happen
|
||||
return -1;
|
||||
|
||||
// create table with shift actions
|
||||
HLRTable *table = h_lrtable_new(mm__, dfa->nstates);
|
||||
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
|
||||
HLRTransition *t = x->elem;
|
||||
HLRAction *action = h_arena_malloc(table->arena, sizeof(HLRAction));
|
||||
action->type = HLR_SHIFT;
|
||||
action->nextstate = t->to;
|
||||
h_hashtable_put(table->rows[t->from], t->symbol, action);
|
||||
if(has_conflicts(table)) {
|
||||
HHashTable *map = enhance_grammar(g, table);
|
||||
if(map == NULL) // this should normally not happen
|
||||
return -1;
|
||||
|
||||
// XXX resolve conflicts
|
||||
// iterate over dfa's transitions where 'from' state is inadequate
|
||||
// look up enhanced symbol corr. to the transition
|
||||
// for each terminal in follow set of enh. symbol:
|
||||
// put reduce action into table cell (state, terminal)
|
||||
// conflict if already occupied
|
||||
}
|
||||
|
||||
// mapping (state,item)-pairs to the symbols of the new grammar
|
||||
HHashTable **syms = h_arena_malloc(g->arena, dfa->nstates * sizeof(HHashTable *));
|
||||
// XXX use a different arena for this (and other things)
|
||||
|
||||
HCFGrammar *gt = transform_grammar(g, table, dfa, syms);
|
||||
if(gt == NULL) // this should actually not happen
|
||||
return -1;
|
||||
|
||||
// XXX fill in reduce actions
|
||||
|
||||
return 0;
|
||||
h_cfgrammar_free(g);
|
||||
parser->backend_data = table;
|
||||
return has_conflicts(table)? -1 : 0;
|
||||
}
|
||||
|
||||
void h_lalr_free(HParser *parser)
|
||||
{
|
||||
// XXX free data structures
|
||||
HLRTable *table = parser->backend_data;
|
||||
h_lrtable_free(table);
|
||||
parser->backend_data = NULL;
|
||||
parser->backend = PB_PACKRAT;
|
||||
}
|
||||
|
|
@ -538,7 +702,7 @@ int test_lalr(void)
|
|||
h_pprint_grammar(stdout, g, 0);
|
||||
|
||||
printf("\n==== D F A ====\n");
|
||||
HLRDFA *dfa = h_lalr_dfa(g);
|
||||
HLRDFA *dfa = h_lr0_dfa(g);
|
||||
if(dfa)
|
||||
h_pprint_lrdfa(stdout, g, dfa, 0);
|
||||
else
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue