wip end-of-day commit - almost there (untested)
This commit is contained in:
parent
3bb26162c3
commit
dabe4b07a9
1 changed files with 246 additions and 82 deletions
|
|
@ -4,7 +4,8 @@
|
||||||
#include "../parsers/parser_internal.h"
|
#include "../parsers/parser_internal.h"
|
||||||
|
|
||||||
|
|
||||||
/* Constructing the characteristic automaton (handle recognizer) */
|
|
||||||
|
/* Data structures */
|
||||||
|
|
||||||
// - states are hashsets containing LRItems
|
// - states are hashsets containing LRItems
|
||||||
// - LRItems contain an optional lookahead set (HStringMap)
|
// - LRItems contain an optional lookahead set (HStringMap)
|
||||||
|
|
@ -19,8 +20,9 @@ typedef struct HLRDFA_ {
|
||||||
} HLRDFA;
|
} HLRDFA;
|
||||||
|
|
||||||
typedef struct HLRTransition_ {
|
typedef struct HLRTransition_ {
|
||||||
size_t from, to; // indices into 'states' array
|
size_t from; // index into 'states' array
|
||||||
const HCFChoice *symbol;
|
const HCFChoice *symbol;
|
||||||
|
size_t to; // index into 'states' array
|
||||||
} HLRTransition;
|
} HLRTransition;
|
||||||
|
|
||||||
typedef struct HLRItem_ {
|
typedef struct HLRItem_ {
|
||||||
|
|
@ -31,6 +33,32 @@ typedef struct HLRItem_ {
|
||||||
HStringMap *lookahead; // optional
|
HStringMap *lookahead; // optional
|
||||||
} HLRItem;
|
} HLRItem;
|
||||||
|
|
||||||
|
typedef struct HLRAction_ {
|
||||||
|
enum {HLR_SHIFT, HLR_REDUCE} type;
|
||||||
|
union {
|
||||||
|
size_t nextstate; // used with SHIFT
|
||||||
|
struct {
|
||||||
|
HCFChoice *lhs; // symbol carrying semantic actions etc.
|
||||||
|
size_t length; // # of symbols in rhs
|
||||||
|
// NB: the rhs symbols are not needed for the parse
|
||||||
|
} production; // used with REDUCE
|
||||||
|
};
|
||||||
|
} HLRAction;
|
||||||
|
|
||||||
|
typedef struct HLRTable_ {
|
||||||
|
size_t nrows;
|
||||||
|
HHashTable **rows; // map symbols to HLRActions
|
||||||
|
HLRAction **forall; // shortcut to set an action for an entire row
|
||||||
|
HCFChoice *start; // start symbol
|
||||||
|
HSlist *inadeq; // indices of any inadequate states
|
||||||
|
HArena *arena;
|
||||||
|
HAllocator *mm__;
|
||||||
|
} HLRTable;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* Constructing the characteristic automaton (handle recognizer) */
|
||||||
|
|
||||||
HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark)
|
HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark)
|
||||||
{
|
{
|
||||||
HLRItem *ret = h_arena_malloc(a, sizeof(HLRItem));
|
HLRItem *ret = h_arena_malloc(a, sizeof(HLRItem));
|
||||||
|
|
@ -157,7 +185,7 @@ static HHashSet *closure(HCFGrammar *g, const HHashSet *items)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
HLRDFA *h_lalr_dfa(HCFGrammar *g)
|
HLRDFA *h_lr0_dfa(HCFGrammar *g)
|
||||||
{
|
{
|
||||||
HArena *arena = g->arena;
|
HArena *arena = g->arena;
|
||||||
|
|
||||||
|
|
@ -275,26 +303,24 @@ HLRDFA *h_lalr_dfa(HCFGrammar *g)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* LALR table generation */
|
/* LR(0) table generation */
|
||||||
|
|
||||||
typedef struct HLRAction_ {
|
// XXX replace other hashtable iterations with this
|
||||||
enum {HLR_SHIFT, HLR_REDUCE} type;
|
// XXX move to internal.h or something
|
||||||
union {
|
#define H_FOREACH_(HT) do { \
|
||||||
size_t nextstate; // used with shift
|
const HHashTable *ht = HT; \
|
||||||
struct {
|
for(size_t i=0; i < ht->capacity; i++) { \
|
||||||
HCFChoice *lhs;
|
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { \
|
||||||
HCFChoice **rhs;
|
if(hte->key == NULL) continue;
|
||||||
} production; // used with reduce
|
|
||||||
};
|
|
||||||
} HLRAction;
|
|
||||||
|
|
||||||
typedef struct HLRTable_ {
|
#define H_FOREACH(HT, KEYVAR, VALVAR) H_FOREACH_(HT) \
|
||||||
size_t nrows;
|
const KEYVAR = hte->key; \
|
||||||
HHashTable **rows; // map symbols to HLRActions
|
VALVAR = hte->value;
|
||||||
HCFChoice *start; // start symbol
|
|
||||||
HArena *arena;
|
#define H_END_FOREACH \
|
||||||
HAllocator *mm__;
|
} \
|
||||||
} HLRTable;
|
} \
|
||||||
|
} while(0);
|
||||||
|
|
||||||
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
|
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
|
||||||
{
|
{
|
||||||
|
|
@ -313,91 +339,229 @@ HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static HCFGrammar *transform_grammar(const HCFGrammar *g, const HLRTable *table,
|
void h_lrtable_free(HLRTable *table)
|
||||||
const HLRDFA *dfa, HHashTable **syms)
|
|
||||||
{
|
{
|
||||||
HCFGrammar *gt = h_cfgrammar_new(g->mm__);
|
HAllocator *mm__ = table->mm__;
|
||||||
HArena *arena = gt->arena;
|
h_delete_arena(table->arena);
|
||||||
|
h_free(table);
|
||||||
|
}
|
||||||
|
|
||||||
// old grammar symbol ->
|
static HLRAction *shift_action(HArena *arena, size_t nextstate)
|
||||||
//HHashTable *map = h_hashtable_new(
|
{
|
||||||
|
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
|
||||||
|
action->type = HLR_SHIFT;
|
||||||
|
action->nextstate = nextstate;
|
||||||
|
return action;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HLRAction *reduce_action(HArena *arena, HCFChoice *lhs, size_t rhslen)
|
||||||
|
{
|
||||||
|
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
|
||||||
|
action->type = HLR_REDUCE;
|
||||||
|
action->production.lhs = lhs;
|
||||||
|
action->production.length = rhslen;
|
||||||
|
return action;
|
||||||
|
}
|
||||||
|
|
||||||
|
HLRTable *h_lr0_table(HCFGrammar *g)
|
||||||
|
{
|
||||||
|
HAllocator *mm__ = g->mm__;
|
||||||
|
|
||||||
|
// construct LR(0) DFA
|
||||||
|
HLRDFA *dfa = h_lr0_dfa(g);
|
||||||
|
if(!dfa) return NULL;
|
||||||
|
|
||||||
|
HLRTable *table = h_lrtable_new(mm__, dfa->nstates);
|
||||||
|
HArena *arena = table->arena;
|
||||||
|
|
||||||
|
// add shift entries
|
||||||
|
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
|
||||||
|
// for each transition x-A->y, add "shift, goto y" to table entry (x,A)
|
||||||
|
HLRTransition *t = x->elem;
|
||||||
|
|
||||||
|
HLRAction *action = shift_action(arena, t->to);
|
||||||
|
h_hashtable_put(table->rows[t->from], t->symbol, action);
|
||||||
|
}
|
||||||
|
|
||||||
|
// add reduce entries, record inadequate states
|
||||||
for(size_t i=0; i<dfa->nstates; i++) {
|
for(size_t i=0; i<dfa->nstates; i++) {
|
||||||
const HLRState *state = dfa->states[i];
|
// find reducible items in state
|
||||||
|
H_FOREACH(dfa->states[i], HLRItem *item, void *v_)
|
||||||
syms[i] = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr);
|
if(item->mark == item->len) { // mark at the end
|
||||||
|
// XXX store more informative stuff in the inadeq records?
|
||||||
|
if(table->forall[i]) {
|
||||||
}
|
// reduce/reduce conflict with a previous item
|
||||||
|
h_slist_push(table->inadeq, (void *)(uintptr_t)i);
|
||||||
// iterate over g->nts
|
} else if(!h_hashtable_empty(table->rows[i])) {
|
||||||
const HHashTable *ht = g->nts;
|
// shift/reduce conflict with one of the row's entries
|
||||||
for(size_t i=0; i < ht->capacity; i++) {
|
h_slist_push(table->inadeq, (void *)(uintptr_t)i);
|
||||||
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
} else {
|
||||||
if(hte->key == NULL)
|
// set reduce action for the entire row
|
||||||
continue;
|
table->forall[i] = reduce_action(arena, item->lhs, item->len);
|
||||||
|
}
|
||||||
const HCFChoice *A = hte->key;
|
|
||||||
|
|
||||||
// iterate over the productions of A
|
|
||||||
for(HCFSequence **p=A->seq; *p; p++) {
|
|
||||||
// find all transitions marked by A
|
|
||||||
// yields xAy -> rhs'
|
|
||||||
// trace rhs starting in state x and following the transitions
|
|
||||||
}
|
}
|
||||||
}
|
H_END_FOREACH
|
||||||
}
|
}
|
||||||
|
|
||||||
return gt;
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* LALR-via-SLR grammar transformation */
|
||||||
|
|
||||||
|
static inline size_t seqsize(void *p_)
|
||||||
|
{
|
||||||
|
size_t n=0;
|
||||||
|
for(void **p=p_; *p; p++) n++;
|
||||||
|
return n+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
|
||||||
|
{
|
||||||
|
HLRAction *action = h_hashtable_get(table->rows[x], A);
|
||||||
|
assert(action != NULL);
|
||||||
|
assert(action->type == HLR_SHIFT);
|
||||||
|
return action->nextstate;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HCFChoice *transform_symbol(const HLRTable *table, HHashTable *map,
|
||||||
|
size_t x, HCFChoice *B, size_t z);
|
||||||
|
|
||||||
|
static HCFChoice *transform_productions(const HLRTable *table, HHashTable *map,
|
||||||
|
size_t x, HCFChoice *xAy)
|
||||||
|
{
|
||||||
|
HArena *arena = map->arena;
|
||||||
|
|
||||||
|
HCFSequence **seq = h_arena_malloc(arena, seqsize(xAy->seq)
|
||||||
|
* sizeof(HCFSequence *));
|
||||||
|
HCFSequence **p, **q;
|
||||||
|
for(p=xAy->seq, q=seq; *p; p++, q++) {
|
||||||
|
// trace rhs starting in state x and following the transitions
|
||||||
|
// xAy -> xBz ...
|
||||||
|
|
||||||
|
HCFChoice **B = (*p)->items;
|
||||||
|
HCFChoice **xBz = h_arena_malloc(arena, seqsize(B) * sizeof(HCFChoice *));
|
||||||
|
for(; *B; B++, xBz++) {
|
||||||
|
size_t z = follow_transition(table, x, *B);
|
||||||
|
*xBz = transform_symbol(table, map, x, *B, z);
|
||||||
|
x=z;
|
||||||
|
}
|
||||||
|
*xBz = NULL;
|
||||||
|
|
||||||
|
*q = h_arena_malloc(arena, sizeof(HCFSequence));
|
||||||
|
(*q)->items = xBz;
|
||||||
|
}
|
||||||
|
*q = NULL;
|
||||||
|
xAy->seq = seq;
|
||||||
|
|
||||||
|
return xAy; // pass-through
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline HLRTransition *transition(HArena *arena,
|
||||||
|
size_t x, const HCFChoice *A, size_t y)
|
||||||
|
{
|
||||||
|
HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition));
|
||||||
|
t->from = x;
|
||||||
|
t->symbol = A;
|
||||||
|
t->to = y;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HCFChoice *transform_symbol(const HLRTable *table, HHashTable *map,
|
||||||
|
size_t x, HCFChoice *B, size_t z)
|
||||||
|
{
|
||||||
|
HArena *arena = map->arena;
|
||||||
|
|
||||||
|
// look up the transition in map, create symbol if not found
|
||||||
|
HLRTransition *x_B_z = transition(arena, x, B, z);
|
||||||
|
HCFChoice *xBz = h_hashtable_get(map, x_B_z);
|
||||||
|
if(!xBz) {
|
||||||
|
HCFChoice *xBz = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||||
|
*xBz = *B;
|
||||||
|
h_hashtable_put(map, x_B_z, xBz);
|
||||||
|
}
|
||||||
|
|
||||||
|
return transform_productions(table, map, x, xBz);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool eq_transition(const void *p, const void *q)
|
||||||
|
{
|
||||||
|
const HLRTransition *a=p, *b=q;
|
||||||
|
return (a->from == b->from && a->to == b->to && a->symbol == b->symbol);
|
||||||
|
}
|
||||||
|
|
||||||
|
static HHashValue hash_transition(const void *p)
|
||||||
|
{
|
||||||
|
const HLRTransition *t = p;
|
||||||
|
return (h_hash_ptr(t->symbol) + t->from + t->to); // XXX ?
|
||||||
|
}
|
||||||
|
|
||||||
|
static HHashTable *enhance_grammar(const HCFGrammar *g, const HLRTable *tbl)
|
||||||
|
{
|
||||||
|
HArena *arena = g->arena; // XXX ?
|
||||||
|
HHashTable *map = h_hashtable_new(arena, eq_transition, hash_transition);
|
||||||
|
|
||||||
|
// copy the start symbol over
|
||||||
|
HCFChoice *start = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||||
|
*start = *(g->start);
|
||||||
|
h_hashtable_put(map, g->start, start);
|
||||||
|
|
||||||
|
transform_productions(tbl, map, 0, start);
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* LALR table generation */
|
||||||
|
|
||||||
|
bool is_inadequate(HLRTable *table, size_t state)
|
||||||
|
{
|
||||||
|
// XXX
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_conflicts(HLRTable *table)
|
||||||
|
{
|
||||||
|
return !h_slist_empty(table->inadeq);
|
||||||
}
|
}
|
||||||
|
|
||||||
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
{
|
{
|
||||||
// generate CFG from parser
|
// generate CFG from parser
|
||||||
// construct LR(0) DFA
|
// build LR(0) table
|
||||||
// build parse table, shift-entries only
|
// if necessary, resolve conflicts "by conversion to SLR"
|
||||||
// for each transition a--S-->b, add "shift, goto b" to table entry (a,S)
|
|
||||||
// determine lookahead "by conversion to SLR"
|
|
||||||
// transform grammar to encode transitions in symbols
|
|
||||||
// -> lookahead for an item is the transformed left-hand side's follow set
|
|
||||||
// finish table; for each state:
|
|
||||||
// add reduce entries for its accepting items
|
|
||||||
// in case of conflict, add lookahead info
|
|
||||||
|
|
||||||
HCFGrammar *g = h_cfgrammar(mm__, parser);
|
HCFGrammar *g = h_cfgrammar(mm__, parser);
|
||||||
if(g == NULL) // backend not suitable (language not context-free)
|
if(g == NULL) // backend not suitable (language not context-free)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
HLRDFA *dfa = h_lalr_dfa(g);
|
HLRTable *table = h_lr0_table(g);
|
||||||
if(dfa == NULL) // this should actually not happen
|
if(table == NULL) // this should normally not happen
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
// create table with shift actions
|
if(has_conflicts(table)) {
|
||||||
HLRTable *table = h_lrtable_new(mm__, dfa->nstates);
|
HHashTable *map = enhance_grammar(g, table);
|
||||||
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
|
if(map == NULL) // this should normally not happen
|
||||||
HLRTransition *t = x->elem;
|
return -1;
|
||||||
HLRAction *action = h_arena_malloc(table->arena, sizeof(HLRAction));
|
|
||||||
action->type = HLR_SHIFT;
|
// XXX resolve conflicts
|
||||||
action->nextstate = t->to;
|
// iterate over dfa's transitions where 'from' state is inadequate
|
||||||
h_hashtable_put(table->rows[t->from], t->symbol, action);
|
// look up enhanced symbol corr. to the transition
|
||||||
|
// for each terminal in follow set of enh. symbol:
|
||||||
|
// put reduce action into table cell (state, terminal)
|
||||||
|
// conflict if already occupied
|
||||||
}
|
}
|
||||||
|
|
||||||
// mapping (state,item)-pairs to the symbols of the new grammar
|
h_cfgrammar_free(g);
|
||||||
HHashTable **syms = h_arena_malloc(g->arena, dfa->nstates * sizeof(HHashTable *));
|
parser->backend_data = table;
|
||||||
// XXX use a different arena for this (and other things)
|
return has_conflicts(table)? -1 : 0;
|
||||||
|
|
||||||
HCFGrammar *gt = transform_grammar(g, table, dfa, syms);
|
|
||||||
if(gt == NULL) // this should actually not happen
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
// XXX fill in reduce actions
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_lalr_free(HParser *parser)
|
void h_lalr_free(HParser *parser)
|
||||||
{
|
{
|
||||||
// XXX free data structures
|
HLRTable *table = parser->backend_data;
|
||||||
|
h_lrtable_free(table);
|
||||||
parser->backend_data = NULL;
|
parser->backend_data = NULL;
|
||||||
parser->backend = PB_PACKRAT;
|
parser->backend = PB_PACKRAT;
|
||||||
}
|
}
|
||||||
|
|
@ -538,7 +702,7 @@ int test_lalr(void)
|
||||||
h_pprint_grammar(stdout, g, 0);
|
h_pprint_grammar(stdout, g, 0);
|
||||||
|
|
||||||
printf("\n==== D F A ====\n");
|
printf("\n==== D F A ====\n");
|
||||||
HLRDFA *dfa = h_lalr_dfa(g);
|
HLRDFA *dfa = h_lr0_dfa(g);
|
||||||
if(dfa)
|
if(dfa)
|
||||||
h_pprint_lrdfa(stdout, g, dfa, 0);
|
h_pprint_lrdfa(stdout, g, dfa, 0);
|
||||||
else
|
else
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue