Merge branch 'fix-lalr_bugs' into iterative

This commit is contained in:
Sven M. Hallberg 2015-09-16 22:21:53 +02:00
commit e385e0e5de
5 changed files with 79 additions and 14 deletions

View file

@ -52,7 +52,7 @@ static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
if (xAy->type != HCF_CHOICE) {
return;
}
// XXX CHARSET?
// NB: nothing to do on quasi-terminal CHARSET which carries no list of rhs's
HArena *arena = eg->arena;
@ -286,14 +286,28 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
HHashSet *lhss = h_hashtable_get(eg->corr, item->lhs);
assert(lhss != NULL);
H_FOREACH_KEY(lhss, HCFChoice *lhs)
assert(lhs->type == HCF_CHOICE); // XXX could be CHARSET?
assert(lhs->type == HCF_CHOICE || lhs->type == HCF_CHARSET);
for(HCFSequence **p=lhs->seq; *p; p++) {
HCFChoice **rhs = (*p)->items;
if(!match_production(eg, rhs, item->rhs, state)) {
continue;
}
bool match = false;
if(lhs->type == HCF_CHOICE) {
for(HCFSequence **p=lhs->seq; *p; p++) {
HCFChoice **rhs = (*p)->items;
if(match_production(eg, rhs, item->rhs, state)) {
match = true;
break;
}
}
} else { // HCF_CHARSET
assert(item->rhs[0] != NULL);
assert(item->rhs[1] == NULL);
assert(item->rhs[0]->type == HCF_CHAR);
HLRTransition *t = h_hashtable_get(eg->smap, lhs);
assert(t != NULL);
match = (t->to == state
&& charset_isset(lhs->charset, item->rhs[0]->chr));
}
if(match) {
// the left-hand symbol's follow set is this production's
// contribution to the lookahead
const HStringMap *fs = h_follow(1, eg->grammar, lhs);
@ -304,7 +318,8 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
// for each lookahead symbol, put action into table cell
if(terminals_put(table->tmap[state], fs, action) < 0)
inadeq = true;
} H_END_FOREACH // enhanced production
}
H_END_FOREACH // enhanced production
H_END_FOREACH // reducible item
if(inadeq) {

View file

@ -677,7 +677,7 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
}
void h_pprint_char(FILE *f, char c)
void h_pprint_char(FILE *f, uint8_t c)
{
switch(c) {
case '"': fputs("\\\"", f); break;
@ -690,12 +690,12 @@ void h_pprint_char(FILE *f, char c)
if (isprint((int)c)) {
fputc(c, f);
} else {
fprintf(f, "\\x%.2X", c);
fprintf(f, "\\x%.2X", (unsigned int)c);
}
}
}
static void pprint_charset_char(FILE *f, char c)
static void pprint_charset_char(FILE *f, uint8_t c)
{
switch(c) {
case '"': fputc(c, f); break;

View file

@ -105,4 +105,4 @@ void h_pprint_stringset(FILE *file, const HStringMap *set, int indent);
void h_pprint_stringmap(FILE *file, char sep,
void (*valprint)(FILE *f, void *env, void *val), void *env,
const HStringMap *map);
void h_pprint_char(FILE *file, char c);
void h_pprint_char(FILE *file, uint8_t c);

View file

@ -59,6 +59,32 @@ static bool many_isValidCF(void *env) {
repeat->sep->vtable->isValidCF(repeat->sep->env)));
}
// turn (_ x (_ y (_ z ()))) into (x y z) where '_' are optional
static HParsedToken *reshape_many(const HParseResult *p, void *user)
{
HCountedArray *seq = h_carray_new(p->arena);
const HParsedToken *tok = p->ast;
while(tok) {
assert(tok->token_type == TT_SEQUENCE);
if(tok->seq->used > 0) {
size_t n = tok->seq->used;
assert(n <= 3);
h_carray_append(seq, tok->seq->elements[n-2]);
tok = tok->seq->elements[n-1];
} else {
tok = NULL;
}
}
HParsedToken *res = a_new_(p->arena, HParsedToken, 1);
res->token_type = TT_SEQUENCE;
res->seq = seq;
res->index = p->ast->index;
res->bit_offset = p->ast->bit_offset;
return res;
}
static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) {
// TODO: refactor this.
HRepeat *repeat = (HRepeat*)env;
@ -93,7 +119,7 @@ static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) {
HCFS_BEGIN_CHOICE() { // Mar
HCFS_BEGIN_SEQ() {
if (repeat->sep != NULL) {
HCFS_DESUGAR(h_ignore__m(mm__, repeat->sep));
HCFS_DESUGAR(repeat->sep);
}
//stk__->last_completed->reshape = h_act_ignore; // BUG: This modifies a memoized entry.
HCFS_DESUGAR(repeat->p);
@ -108,7 +134,7 @@ static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) {
//HCFS_DESUGAR(h_ignore__m(mm__, h_epsilon_p()));
} HCFS_END_SEQ();
}
HCFS_THIS_CHOICE->reshape = h_act_flatten;
HCFS_THIS_CHOICE->reshape = reshape_many;
} HCFS_END_CHOICE();
}

View file

@ -118,9 +118,33 @@ static void test_llk_zero_end(void) {
g_check_parse_failed(aze, be, "a", 1);
}
static void test_lalr_charset_lhs(void) {
HParserBackend be = PB_LALR;
HParser *p = h_choice(h_ch('A'), h_uint8(), NULL);
// the above would fail to compile because of an unhandled case in trying
// to resolve a conflict where an item's left-hand-side was an HCF_CHARSET.
g_check_parse_match(p, be, "A",1, "u0x41");
g_check_parse_match(p, be, "B",1, "u0x42");
}
static void test_cfg_many_seq(void) {
HParser *p = h_many(h_sequence(h_ch('A'), h_ch('B'), NULL));
g_check_parse_match(p, PB_LLk, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
g_check_parse_match(p, PB_LALR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
g_check_parse_match(p, PB_GLR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
// these would instead parse as (u0x41 u0x42 u0x41 u0x42) due to a faulty
// reshape on h_many.
}
void register_regression_tests(void) {
g_test_add_func("/core/regression/bug118", test_bug118);
g_test_add_func("/core/regression/seq_index_path", test_seq_index_path);
g_test_add_func("/core/regression/read_bits_48", test_read_bits_48);
g_test_add_func("/core/regression/llk_zero_end", test_llk_zero_end);
g_test_add_func("/core/regression/lalr_charset_lhs", test_lalr_charset_lhs);
g_test_add_func("/core/regression/cfg_many_seq", test_cfg_many_seq);
}