More LL(k) fixes (Merge branch 'LL' of https://github.com/pesco/hammer)
This commit is contained in:
commit
c8dba54bb4
4 changed files with 90 additions and 61 deletions
|
|
@ -81,28 +81,6 @@ void h_llktable_free(HLLkTable *table)
|
||||||
h_free(table);
|
h_free(table);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compute the predict_k set of production "A -> rhs".
|
|
||||||
* Always returns a newly-allocated HCFStringMap.
|
|
||||||
*/
|
|
||||||
HCFStringMap *h_predict(size_t k, HCFGrammar *g,
|
|
||||||
const HCFChoice *A, const HCFSequence *rhs)
|
|
||||||
{
|
|
||||||
assert(k==1); // XXX
|
|
||||||
HCFStringMap *ret = h_stringmap_new(g->arena);
|
|
||||||
|
|
||||||
// predict(A -> rhs) = first(rhs) u follow(A) if "" can be derived from rhs
|
|
||||||
// predict(A -> rhs) = first(rhs) otherwise
|
|
||||||
|
|
||||||
h_stringmap_update(ret, h_first_seq(k, g, rhs->items));
|
|
||||||
if(h_derives_epsilon_seq(g, rhs->items))
|
|
||||||
h_stringmap_update(ret, h_follow(k, g, A));
|
|
||||||
|
|
||||||
// make sure there are only strings of length _exactly_ k
|
|
||||||
ret->epsilon_branch = NULL;
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void *const CONFLICT = (void *)(uintptr_t)(-1);
|
void *const CONFLICT = (void *)(uintptr_t)(-1);
|
||||||
|
|
||||||
// helper for stringmap_merge
|
// helper for stringmap_merge
|
||||||
|
|
@ -113,7 +91,7 @@ static void *combine_entries(HHashSet *workset, void *dst, const void *src)
|
||||||
|
|
||||||
if(dst == CONFLICT) { // previous conflict
|
if(dst == CONFLICT) { // previous conflict
|
||||||
h_hashset_put(workset, src);
|
h_hashset_put(workset, src);
|
||||||
} else if(dst == src) { // new conflict
|
} else if(dst != src) { // new conflict
|
||||||
h_hashset_put(workset, dst);
|
h_hashset_put(workset, dst);
|
||||||
h_hashset_put(workset, src);
|
h_hashset_put(workset, src);
|
||||||
dst = CONFLICT;
|
dst = CONFLICT;
|
||||||
|
|
@ -133,6 +111,12 @@ static void stringmap_merge(HHashSet *workset, HCFStringMap *dst, HCFStringMap *
|
||||||
combine_entries(workset, dst->epsilon_branch, src->epsilon_branch);
|
combine_entries(workset, dst->epsilon_branch, src->epsilon_branch);
|
||||||
else
|
else
|
||||||
dst->epsilon_branch = src->epsilon_branch;
|
dst->epsilon_branch = src->epsilon_branch;
|
||||||
|
} else {
|
||||||
|
// if there is a non-conflicting value on the left (dst) side, it means
|
||||||
|
// that prediction is already unambiguous. we can drop the right (src)
|
||||||
|
// side we were going to extend with.
|
||||||
|
if(dst->epsilon_branch && dst->epsilon_branch != CONFLICT)
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(src->end_branch) {
|
if(src->end_branch) {
|
||||||
|
|
@ -164,9 +148,6 @@ static void stringmap_merge(HHashSet *workset, HCFStringMap *dst, HCFStringMap *
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq);
|
|
||||||
void pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x);
|
|
||||||
|
|
||||||
/* Generate entries for the production "A" in the given table row. */
|
/* Generate entries for the production "A" in the given table row. */
|
||||||
static int fill_table_row(size_t kmax, HCFGrammar *g, HCFStringMap *row,
|
static int fill_table_row(size_t kmax, HCFGrammar *g, HCFStringMap *row,
|
||||||
const HCFChoice *A)
|
const HCFChoice *A)
|
||||||
|
|
@ -181,6 +162,8 @@ static int fill_table_row(size_t kmax, HCFGrammar *g, HCFStringMap *row,
|
||||||
// run until workset exhausted or kmax hit
|
// run until workset exhausted or kmax hit
|
||||||
size_t k;
|
size_t k;
|
||||||
for(k=1; k<=kmax; k++) {
|
for(k=1; k<=kmax; k++) {
|
||||||
|
printf("k=%lu\n", k); // XXX debug
|
||||||
|
|
||||||
// allocate a fresh workset for the next round
|
// allocate a fresh workset for the next round
|
||||||
HHashSet *nextset = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
HHashSet *nextset = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
|
|
||||||
|
|
@ -199,29 +182,30 @@ static int fill_table_row(size_t kmax, HCFGrammar *g, HCFStringMap *row,
|
||||||
HCFStringMap *pred = h_predict(k, g, A, rhs);
|
HCFStringMap *pred = h_predict(k, g, A, rhs);
|
||||||
h_stringmap_replace(pred, NULL, rhs);
|
h_stringmap_replace(pred, NULL, rhs);
|
||||||
|
|
||||||
|
// XXX debug
|
||||||
|
printf("predict(");
|
||||||
|
h_pprint_sequence(stdout, g, rhs);
|
||||||
|
printf(") = ");
|
||||||
|
h_pprint_stringset(stdout, pred, 0);
|
||||||
|
|
||||||
// merge predict set into the row
|
// merge predict set into the row
|
||||||
// accumulates conflicts in new workset
|
// accumulates conflicts in new workset
|
||||||
stringmap_merge(nextset, row, pred);
|
stringmap_merge(nextset, row, pred);
|
||||||
|
|
||||||
// XXX debug
|
|
||||||
if(A == g->start) {
|
|
||||||
printf("predict(");
|
|
||||||
pprint_sequence(stdout, g, rhs);
|
|
||||||
printf(" ) = ");
|
|
||||||
h_pprint_stringset(stdout, g, pred, 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// XXX debug
|
// XXX debug
|
||||||
if(A == g->start) {
|
|
||||||
printf("row(");
|
printf("row(");
|
||||||
pprint_symbol(stdout, g, A);
|
h_pprint_symbol(stdout, g, A);
|
||||||
printf(") = ");
|
printf(") = ");
|
||||||
h_pprint_stringset(stdout, g, row, 0);
|
h_pprint_stringset(stdout, row, 0);
|
||||||
|
if(h_stringmap_get(row, (uint8_t *)"a", 1, false)) {
|
||||||
|
printf(" a -> ");
|
||||||
|
h_pprint_sequence(stdout, g, h_stringmap_get(row, (uint8_t *)"a", 1, false));
|
||||||
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// switch to the updated workset
|
// switch to the updated workset
|
||||||
h_hashtable_free(workset);
|
h_hashset_free(workset);
|
||||||
workset = nextset;
|
workset = nextset;
|
||||||
|
|
||||||
// if the workset is empty, row is without conflict; we're done
|
// if the workset is empty, row is without conflict; we're done
|
||||||
|
|
@ -473,9 +457,9 @@ int test_llk(void)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
HParser *X = h_optional(h_ch('x'));
|
HParser *X = h_optional(h_ch('x'));
|
||||||
HParser *Y = h_sequence(h_ch('y'), NULL);
|
//HParser *Y = h_epsilon_p(); //h_sequence(h_ch('y'), NULL);
|
||||||
HParser *A = h_sequence(X, Y, h_ch('a'), NULL);
|
HParser *A = h_sequence(X, h_ch('a'), NULL);
|
||||||
HParser *B = h_sequence(Y, h_ch('b'), NULL);
|
HParser *B = h_sequence(h_ch('b'), NULL);
|
||||||
HParser *p = h_choice(A, B, NULL);
|
HParser *p = h_choice(A, B, NULL);
|
||||||
|
|
||||||
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
|
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
|
||||||
|
|
@ -489,16 +473,16 @@ int test_llk(void)
|
||||||
printf("derive epsilon: ");
|
printf("derive epsilon: ");
|
||||||
h_pprint_symbolset(stdout, g, g->geneps, 0);
|
h_pprint_symbolset(stdout, g, g->geneps, 0);
|
||||||
printf("first(A) = ");
|
printf("first(A) = ");
|
||||||
h_pprint_stringset(stdout, g, h_first(3, g, g->start), 0);
|
h_pprint_stringset(stdout, h_first(3, g, g->start), 0);
|
||||||
//printf("follow(C) = ");
|
//printf("follow(C) = ");
|
||||||
//h_pprint_stringset(stdout, g, h_follow(3, g, h_desugar(&system_allocator, c)), 0);
|
//h_pprint_stringset(stdout, h_follow(3, g, h_desugar(&system_allocator, c)), 0);
|
||||||
|
|
||||||
if(h_compile(p, PB_LLk, NULL)) {
|
if(h_compile(p, PB_LLk, (void *)2)) {
|
||||||
fprintf(stderr, "does not compile\n");
|
fprintf(stderr, "does not compile\n");
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
HParseResult *res = h_parse(p, (uint8_t *)"xa", 2);
|
HParseResult *res = h_parse(p, (uint8_t *)"ab", 2);
|
||||||
if(res)
|
if(res)
|
||||||
h_pprint(stdout, res->ast, 0, 2);
|
h_pprint(stdout, res->ast, 0, 2);
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -437,7 +437,23 @@ static bool any_string_shorter(size_t k, const HCFStringMap *m)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x);
|
// helper for h_predict
|
||||||
|
static void remove_all_shorter(size_t k, HCFStringMap *m)
|
||||||
|
{
|
||||||
|
if(k==0) return;
|
||||||
|
m->epsilon_branch = NULL;
|
||||||
|
if(k==1) return;
|
||||||
|
|
||||||
|
// iterate over m->char_branches
|
||||||
|
const HHashTable *ht = m->char_branches;
|
||||||
|
for(size_t i=0; i < ht->capacity; i++) {
|
||||||
|
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||||
|
if(hte->key == NULL)
|
||||||
|
continue;
|
||||||
|
remove_all_shorter(k-1, hte->value); // recursion into subtree
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// h_follow adapted to the signature of StringSetFun
|
// h_follow adapted to the signature of StringSetFun
|
||||||
static inline const HCFStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s)
|
static inline const HCFStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s)
|
||||||
|
|
@ -507,6 +523,23 @@ const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HCFStringMap *h_predict(size_t k, HCFGrammar *g,
|
||||||
|
const HCFChoice *A, const HCFSequence *rhs)
|
||||||
|
{
|
||||||
|
HCFStringMap *ret = h_stringmap_new(g->arena);
|
||||||
|
|
||||||
|
// predict_k(A -> rhs) =
|
||||||
|
// { ab | a <- first_k(rhs), b <- follow_k(A), |ab|=k }
|
||||||
|
|
||||||
|
const HCFStringMap *first_rhs = h_first_seq(k, g, rhs->items);
|
||||||
|
stringset_extend(g, ret, k, first_rhs, h_follow_, (HCFChoice **)&A);
|
||||||
|
|
||||||
|
// make sure there are only strings of length _exactly_ k
|
||||||
|
remove_all_shorter(k, ret);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
// add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret
|
// add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret
|
||||||
static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
|
static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
|
||||||
size_t k, const HCFStringMap *as,
|
size_t k, const HCFStringMap *as,
|
||||||
|
|
@ -624,7 +657,7 @@ static HCFChoice **pprint_string(FILE *f, HCFChoice **x)
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
void pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x)
|
void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x)
|
||||||
{
|
{
|
||||||
switch(x->type) {
|
switch(x->type) {
|
||||||
case HCF_CHAR:
|
case HCF_CHAR:
|
||||||
|
|
@ -643,31 +676,36 @@ void pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq)
|
void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq)
|
||||||
{
|
{
|
||||||
HCFChoice **x = seq->items;
|
HCFChoice **x = seq->items;
|
||||||
|
|
||||||
if(*x == NULL) { // the empty sequence
|
if(*x == NULL) { // the empty sequence
|
||||||
fputs(" \"\"", f);
|
fputs("\"\"", f);
|
||||||
} else {
|
} else {
|
||||||
while(*x) {
|
while(*x) {
|
||||||
fputc(' ', f); // separator
|
if(x != seq->items) fputc(' ', f); // internal separator
|
||||||
|
|
||||||
if((*x)->type == HCF_CHAR) {
|
if((*x)->type == HCF_CHAR) {
|
||||||
// condense character strings
|
// condense character strings
|
||||||
x = pprint_string(f, x);
|
x = pprint_string(f, x);
|
||||||
} else {
|
} else {
|
||||||
pprint_symbol(f, g, *x);
|
h_pprint_symbol(f, g, *x);
|
||||||
x++;
|
x++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// adds some separators expected below
|
||||||
|
static void pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq)
|
||||||
|
{
|
||||||
|
fputc(' ', f);
|
||||||
|
h_pprint_sequence(f, g, seq);
|
||||||
fputc('\n', f);
|
fputc('\n', f);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
|
||||||
void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
|
|
||||||
int indent, int len)
|
int indent, int len)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
@ -738,7 +776,7 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in
|
||||||
|
|
||||||
a = hte->key; // production's left-hand symbol
|
a = hte->key; // production's left-hand symbol
|
||||||
|
|
||||||
pprint_symbol(file, g, a);
|
h_pprint_symbol(file, g, a);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -801,7 +839,7 @@ pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n,
|
||||||
return first;
|
return first;
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_pprint_stringset(FILE *file, const HCFGrammar *g, const HCFStringMap *set, int indent)
|
void h_pprint_stringset(FILE *file, const HCFStringMap *set, int indent)
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
for(j=0; j<indent; j++) fputc(' ', file);
|
for(j=0; j<indent; j++) fputc(' ', file);
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ void *h_stringmap_get(const HCFStringMap *m, const uint8_t *str, size_t n, bool
|
||||||
bool h_stringmap_present(const HCFStringMap *m, const uint8_t *str, size_t n, bool end);
|
bool h_stringmap_present(const HCFStringMap *m, const uint8_t *str, size_t n, bool end);
|
||||||
bool h_stringmap_present_epsilon(const HCFStringMap *m);
|
bool h_stringmap_present_epsilon(const HCFStringMap *m);
|
||||||
|
|
||||||
static inline void *h_stringmap_get_char(const HCFStringMap *m, const uint8_t c)
|
static inline HCFStringMap *h_stringmap_get_char(const HCFStringMap *m, const uint8_t c)
|
||||||
{ return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
|
{ return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -80,8 +80,16 @@ const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s);
|
||||||
/* Compute follow_k set of symbol x. Memoized. */
|
/* Compute follow_k set of symbol x. Memoized. */
|
||||||
const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x);
|
const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x);
|
||||||
|
|
||||||
|
/* Compute the predict_k set of production "A -> rhs".
|
||||||
|
* Always returns a newly-allocated HCFStringMap.
|
||||||
|
*/
|
||||||
|
HCFStringMap *h_predict(size_t k, HCFGrammar *g,
|
||||||
|
const HCFChoice *A, const HCFSequence *rhs);
|
||||||
|
|
||||||
|
|
||||||
/* Pretty-printers for grammars and associated data. */
|
/* Pretty-printers for grammars and associated data. */
|
||||||
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent);
|
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent);
|
||||||
|
void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq);
|
||||||
|
void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x);
|
||||||
void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent);
|
void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent);
|
||||||
void h_pprint_stringset(FILE *file, const HCFGrammar *g, const HCFStringMap *set, int indent);
|
void h_pprint_stringset(FILE *file, const HCFStringMap *set, int indent);
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,6 @@ static bool opt_isValidCF(void *env) {
|
||||||
static HParsedToken* reshape_optional(const HParseResult *p) {
|
static HParsedToken* reshape_optional(const HParseResult *p) {
|
||||||
assert(p->ast);
|
assert(p->ast);
|
||||||
assert(p->ast->token_type == TT_SEQUENCE);
|
assert(p->ast->token_type == TT_SEQUENCE);
|
||||||
assert(p->ast->seq->used > 0);
|
|
||||||
|
|
||||||
HParsedToken *res = p->ast->seq->elements[0];
|
HParsedToken *res = p->ast->seq->elements[0];
|
||||||
if(res)
|
if(res)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue