refactor and bughunt unsuccessfully trying to find out why
h_first(2, ...) won't give me length-2 strings
This commit is contained in:
parent
289c57aef5
commit
2c8d76d7ea
3 changed files with 27 additions and 56 deletions
|
|
@ -369,12 +369,12 @@ int test_llk(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
h_pprint_grammar(stdout, g, 0);
|
h_pprint_grammar(stdout, g, 0);
|
||||||
printf("generate epsilon: ");
|
printf("derive epsilon: ");
|
||||||
h_pprint_symbolset(stdout, g, g->geneps, 0);
|
h_pprint_symbolset(stdout, g, g->geneps, 0);
|
||||||
printf("first(A) = ");
|
printf("first(A) = ");
|
||||||
h_pprint_stringset(stdout, g, h_first(1, g, g->start), 0);
|
h_pprint_stringset(stdout, g, h_first(2, g, g->start), 0);
|
||||||
printf("follow(C) = ");
|
printf("follow(C) = ");
|
||||||
h_pprint_stringset(stdout, g, h_follow(1, g, h_desugar(&system_allocator, c)), 0);
|
h_pprint_stringset(stdout, g, h_follow(2, g, h_desugar(&system_allocator, c)), 0);
|
||||||
|
|
||||||
h_compile(p, PB_LLk, NULL);
|
h_compile(p, PB_LLk, NULL);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -131,8 +131,8 @@ static void ensure_k(HCFGrammar *g, size_t k)
|
||||||
if(g->kmax > 0) {
|
if(g->kmax > 0) {
|
||||||
// we are resizing, copy the old tables over
|
// we are resizing, copy the old tables over
|
||||||
for(size_t i=0; i<=g->kmax; i++) {
|
for(size_t i=0; i<=g->kmax; i++) {
|
||||||
first[i] = g->first[0];
|
first[i] = g->first[i];
|
||||||
follow[i] = g->follow[0];
|
follow[i] = g->follow[i];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// we are initializing, allocate the first (in fact, dummy) tables
|
// we are initializing, allocate the first (in fact, dummy) tables
|
||||||
|
|
@ -151,9 +151,12 @@ static void ensure_k(HCFGrammar *g, size_t k)
|
||||||
g->kmax = k;
|
g->kmax = k;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool h_derives_epsilon(HCFGrammar *g, const HCFChoice *symbol)
|
bool h_derives_epsilon(HCFGrammar *g, const HCFChoice *symbol)
|
||||||
{
|
{
|
||||||
|
// XXX this can now also be implemented in terms of h_first:
|
||||||
|
// h_stringmap_present_epsilon(h_first(1, g, symbol))
|
||||||
|
// then the geneps structure and associated functions would be unneed
|
||||||
|
|
||||||
assert(g->geneps != NULL);
|
assert(g->geneps != NULL);
|
||||||
|
|
||||||
switch(symbol->type) {
|
switch(symbol->type) {
|
||||||
|
|
@ -274,6 +277,11 @@ bool h_stringmap_present(const HCFStringMap *m, const uint8_t *str, size_t n, bo
|
||||||
return (h_stringmap_get(m, str, n, end) != NULL);
|
return (h_stringmap_get(m, str, n, end) != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool h_stringmap_present_epsilon(const HCFStringMap *m)
|
||||||
|
{
|
||||||
|
return (m->epsilon_branch != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const HCFStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
|
const HCFStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
{
|
{
|
||||||
|
|
@ -323,11 +331,17 @@ const HCFStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
}
|
}
|
||||||
|
|
||||||
// helpers for h_first_seq, definitions below
|
// helpers for h_first_seq, definitions below
|
||||||
static void first_extend(HCFGrammar *g, HCFStringMap *ret,
|
|
||||||
size_t k, const HCFStringMap *as, HCFChoice **tail);
|
|
||||||
static bool is_singleton_epsilon(const HCFStringMap *m);
|
static bool is_singleton_epsilon(const HCFStringMap *m);
|
||||||
static bool any_string_shorter(size_t k, const HCFStringMap *m);
|
static bool any_string_shorter(size_t k, const HCFStringMap *m);
|
||||||
|
|
||||||
|
// pointer to functions like h_first_seq
|
||||||
|
typedef const HCFStringMap *(*StringSetFun)(size_t, HCFGrammar *, HCFChoice **);
|
||||||
|
|
||||||
|
// helper for h_first_seq and h_follow
|
||||||
|
static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
|
||||||
|
size_t k, const HCFStringMap *as,
|
||||||
|
StringSetFun f, HCFChoice **tail);
|
||||||
|
|
||||||
const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
|
const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
|
||||||
{
|
{
|
||||||
// shortcut: the first set of the empty sequence, for any k, is {""}
|
// shortcut: the first set of the empty sequence, for any k, is {""}
|
||||||
|
|
@ -353,48 +367,11 @@ const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
|
||||||
HCFStringMap *ret = h_stringmap_new(g->arena);
|
HCFStringMap *ret = h_stringmap_new(g->arena);
|
||||||
|
|
||||||
// extend the elements of first_k(X) up to length k from tail
|
// extend the elements of first_k(X) up to length k from tail
|
||||||
first_extend(g, ret, k, first_x, tail);
|
stringset_extend(g, ret, k, first_x, h_first_seq, tail);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// add the set { a b | a <- as, b <- first_l(tail), l=k-|a| } to ret
|
|
||||||
static void first_extend(HCFGrammar *g, HCFStringMap *ret,
|
|
||||||
size_t k, const HCFStringMap *as, HCFChoice **tail)
|
|
||||||
{
|
|
||||||
if(as->epsilon_branch) {
|
|
||||||
// for a="", add first_k(tail) to ret
|
|
||||||
h_stringmap_update(ret, h_first_seq(k, g, tail));
|
|
||||||
}
|
|
||||||
|
|
||||||
if(as->end_branch) {
|
|
||||||
// for a="$", nothing can follow; just add "$" to ret
|
|
||||||
// NB: formally, "$" is considered to be of length k
|
|
||||||
h_stringmap_put_end(ret, INSET);
|
|
||||||
}
|
|
||||||
|
|
||||||
// iterate over as->char_branches
|
|
||||||
const HHashTable *ht = as->char_branches;
|
|
||||||
for(size_t i=0; i < ht->capacity; i++) {
|
|
||||||
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
|
||||||
if(hte->key == NULL)
|
|
||||||
continue;
|
|
||||||
uint8_t c = key_char((HCharKey)hte->key);
|
|
||||||
|
|
||||||
// follow the branch to find the set { a' | t a' <- as }
|
|
||||||
HCFStringMap *as_ = (HCFStringMap *)hte->value;
|
|
||||||
|
|
||||||
// now the elements of ret that begin with t are given by
|
|
||||||
// t { a b | a <- as_, b <- first_l(tail), l=k-|a|-1 }
|
|
||||||
// so we can use recursion over k
|
|
||||||
HCFStringMap *ret_ = h_stringmap_new(g->arena);
|
|
||||||
h_stringmap_put_char(ret, c, ret_);
|
|
||||||
|
|
||||||
first_extend(g, ret_, k-1, as_, tail);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool is_singleton_epsilon(const HCFStringMap *m)
|
static bool is_singleton_epsilon(const HCFStringMap *m)
|
||||||
{
|
{
|
||||||
return ( m->epsilon_branch
|
return ( m->epsilon_branch
|
||||||
|
|
@ -429,15 +406,8 @@ static bool any_string_shorter(size_t k, const HCFStringMap *m)
|
||||||
|
|
||||||
const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x);
|
const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x);
|
||||||
|
|
||||||
// pointer to functions like h_first_seq
|
|
||||||
typedef const HCFStringMap *(*StringSetFun)(size_t, HCFGrammar *, HCFChoice const* const*);
|
|
||||||
|
|
||||||
static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
|
|
||||||
size_t k, const HCFStringMap *as,
|
|
||||||
StringSetFun f, HCFChoice const * const *tail);
|
|
||||||
|
|
||||||
// h_follow adapted to the signature of StringSetFun
|
// h_follow adapted to the signature of StringSetFun
|
||||||
static inline const HCFStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice const* const*s)
|
static inline const HCFStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s)
|
||||||
{
|
{
|
||||||
return h_follow(k, g, *s);
|
return h_follow(k, g, *s);
|
||||||
}
|
}
|
||||||
|
|
@ -479,7 +449,7 @@ const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if(hte->key == NULL)
|
||||||
continue;
|
continue;
|
||||||
HCFChoice const * const a = hte->key; // production's left-hand symbol
|
HCFChoice *a = (void *)hte->key; // production's left-hand symbol
|
||||||
assert(a->type == HCF_CHOICE);
|
assert(a->type == HCF_CHOICE);
|
||||||
|
|
||||||
// iterate over the productions for A
|
// iterate over the productions for A
|
||||||
|
|
@ -509,7 +479,7 @@ const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
// add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret
|
// add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret
|
||||||
static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
|
static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
|
||||||
size_t k, const HCFStringMap *as,
|
size_t k, const HCFStringMap *as,
|
||||||
StringSetFun f, HCFChoice const * const *tail)
|
StringSetFun f, HCFChoice **tail)
|
||||||
{
|
{
|
||||||
if(as->epsilon_branch) {
|
if(as->epsilon_branch) {
|
||||||
// for a="", add f_k(tail) to ret
|
// for a="", add f_k(tail) to ret
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,7 @@ void h_stringmap_put_char(HCFStringMap *m, uint8_t c, void *v);
|
||||||
void h_stringmap_update(HCFStringMap *m, const HCFStringMap *n);
|
void h_stringmap_update(HCFStringMap *m, const HCFStringMap *n);
|
||||||
void *h_stringmap_get(const HCFStringMap *m, const uint8_t *str, size_t n, bool end);
|
void *h_stringmap_get(const HCFStringMap *m, const uint8_t *str, size_t n, bool end);
|
||||||
bool h_stringmap_present(const HCFStringMap *m, const uint8_t *str, size_t n, bool end);
|
bool h_stringmap_present(const HCFStringMap *m, const uint8_t *str, size_t n, bool end);
|
||||||
|
bool h_stringmap_present_epsilon(const HCFStringMap *m);
|
||||||
|
|
||||||
|
|
||||||
/* Convert 'parser' into CFG representation by desugaring and compiling the set
|
/* Convert 'parser' into CFG representation by desugaring and compiling the set
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue