scratch commit of LALR preparations (breaks compile)

This commit is contained in:
Sven M. Hallberg 2013-06-06 13:05:48 +02:00
parent e7a388d1c7
commit 167e187151

View file

@ -4,6 +4,125 @@
#include "../parsers/parser_internal.h"
// PLAN:
// data structures:
// - LR table is an array of hashtables that map grammar symbols (HCFChoice)
// to LRActions.
// build LR(0) DFA
// extend with lookahead information by either:
// - reworking algorithm to propagate lookahead ("simple LALR generation")
// - follow sets of enhanced grammar ("conversion to SLR")
/* Constructing the characteristic automaton (handle recognizer) */
// - DFA is a hashset containing states (mapped to numbers)
// - states are hashsets containing LRItems
// - LRItems contain an optional lookahead set (HStringMap)
// - states (hashsets) get hash and comparison functions that ignore the lookahead
typedef struct HLRDFA_ {
HHashSet *states;
HSlist *transitions;
} HLRDFA;
typedef struct HLRTransition_ {
HLRState *from;
HCFChoice *symbol;
HLRState *to;
} HLRTransition;
typedef struct HLRItem_ {
HCFChoice *lhs;
HCFChoice **rhs;
size_t len; // number of elements in rhs
size_t mark;
HStringMap *lookahead; // optional
} HLRItem;
// compare LALR items - ignores lookahead
static bool eq_lalr_item(const void *p, const void *q)
{
const HLRItem *a=p, *b=q;
if(a->lhs != b->lhs) return false;
if(a->mark != b->mark) return false;
if(a->len != b->len) return false;
for(size_t i=0; i<a->len; i++)
if(a->rhs[i] != b->rhs[i]) return false;
return true;
}
// compare LALR item sets (DFA states)
static inline bool eq_lalr_itemset(const void *p, const void *q)
{
return h_hashset_equal(p, q);
}
// hash LALR items
static inline HHashValue hash_lalr_item(const HLRItem *x)
{
return (h_hash_ptr(x->lhs)
+ h_djbhash((uint8_t *)x->rhs, x->len*sizeof(HCFChoice *))
+ x->mark); // XXX is it okay to just add mark?
}
// hash LALR item sets (DFA states) - hash the elements and sum
static HHashValue hash_lalr_itemset(const void *p)
{
HHashValue hash = 0;
const HHashTable *ht = p;
for(size_t i=0; i < ht->capacity; i++) {
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
continue;
hash += hash_lalr_item(hte->key);
}
}
return hash;
}
static HHashSet *closure(const HHashSet *items);
HLRDFA *h_lalr_dfa(HCFGrammar *g)
{
HHashSet *states = h_hashset_new(g->arena, eq_lalr_itemset, hash_lalr_itemset);
// make initial state (kernel)
// while work to do (on some state)
// compute closure
// determine edge symbols
// for each edge symbol:
// advance respective items -> destination state (kernel)
// if destination is a new state:
// add it to state set
// add transition to it
// add it to the work list
}
/* LALR table generation */
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
{
// generate grammar
// construct dfa / determine lookahead
// extract table
// create an array of hashtables, one per state
// for each transition a--S-->b:
// add "shift, goto b" to table entry (a,S)
// for each state:
// add reduce entries for its accepting items
return -1;
}
void h_lalr_free(HParser *parser)
{
@ -13,13 +132,6 @@ void h_lalr_free(HParser *parser)
}
/* LALR table generation */
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
{
return -1;
}
/* LR driver */