Merge remote-tracking branch 'upstream/master' into fix-wrong_bit_length
This commit is contained in:
commit
89e675bd2a
5 changed files with 92 additions and 39 deletions
|
|
@ -91,7 +91,7 @@ static HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym)
|
||||||
|
|
||||||
HHashSet *cs = h_hashtable_get(eg->corr, sym);
|
HHashSet *cs = h_hashtable_get(eg->corr, sym);
|
||||||
if (!cs) {
|
if (!cs) {
|
||||||
cs = h_hashset_new(arena, h_eq_symbol, h_hash_symbol);
|
cs = h_hashset_new(arena, h_eq_ptr, h_hash_ptr);
|
||||||
h_hashtable_put(eg->corr, sym, cs);
|
h_hashtable_put(eg->corr, sym, cs);
|
||||||
}
|
}
|
||||||
h_hashset_put(cs, esym);
|
h_hashset_put(cs, esym);
|
||||||
|
|
@ -208,6 +208,46 @@ static bool match_production(HLREnhGrammar *eg, HCFChoice **p,
|
||||||
&& state == endstate);
|
&& state == endstate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// variant of match_production where the production lhs is a charset
|
||||||
|
// [..x..] -> x
|
||||||
|
static bool match_charset_production(const HLRTable *table, HLREnhGrammar *eg,
|
||||||
|
const HCFChoice *lhs, HCFChoice *rhs,
|
||||||
|
size_t endstate)
|
||||||
|
{
|
||||||
|
assert(lhs->type == HCF_CHARSET);
|
||||||
|
assert(rhs->type == HCF_CHAR);
|
||||||
|
|
||||||
|
if(!charset_isset(lhs->charset, rhs->chr))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// determine the enhanced-grammar right-hand side and check end state
|
||||||
|
HLRTransition *t = h_hashtable_get(eg->smap, lhs);
|
||||||
|
assert(t != NULL);
|
||||||
|
return (follow_transition(table, t->from, rhs) == endstate);
|
||||||
|
}
|
||||||
|
|
||||||
|
// check wether any production for sym (enhanced-grammar) matches the given
|
||||||
|
// (original-grammar) rhs and terminates in the given end state.
|
||||||
|
static bool match_any_production(const HLRTable *table, HLREnhGrammar *eg,
|
||||||
|
const HCFChoice *sym, HCFChoice **rhs,
|
||||||
|
size_t endstate)
|
||||||
|
{
|
||||||
|
assert(sym->type == HCF_CHOICE || sym->type == HCF_CHARSET);
|
||||||
|
|
||||||
|
if(sym->type == HCF_CHOICE) {
|
||||||
|
for(HCFSequence **p=sym->seq; *p; p++) {
|
||||||
|
if(match_production(eg, (*p)->items, rhs, endstate))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else { // HCF_CHARSET
|
||||||
|
assert(rhs[0] != NULL);
|
||||||
|
assert(rhs[1] == NULL);
|
||||||
|
return match_charset_production(table, eg, sym, rhs[0], endstate);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// desugar parser with a fresh start symbol
|
// desugar parser with a fresh start symbol
|
||||||
// this guarantees that the start symbol will not occur in any productions
|
// this guarantees that the start symbol will not occur in any productions
|
||||||
HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser)
|
HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser)
|
||||||
|
|
@ -286,28 +326,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
HHashSet *lhss = h_hashtable_get(eg->corr, item->lhs);
|
HHashSet *lhss = h_hashtable_get(eg->corr, item->lhs);
|
||||||
assert(lhss != NULL);
|
assert(lhss != NULL);
|
||||||
H_FOREACH_KEY(lhss, HCFChoice *lhs)
|
H_FOREACH_KEY(lhss, HCFChoice *lhs)
|
||||||
assert(lhs->type == HCF_CHOICE || lhs->type == HCF_CHARSET);
|
if(match_any_production(table, eg, lhs, item->rhs, state)) {
|
||||||
|
|
||||||
bool match = false;
|
|
||||||
if(lhs->type == HCF_CHOICE) {
|
|
||||||
for(HCFSequence **p=lhs->seq; *p; p++) {
|
|
||||||
HCFChoice **rhs = (*p)->items;
|
|
||||||
if(match_production(eg, rhs, item->rhs, state)) {
|
|
||||||
match = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else { // HCF_CHARSET
|
|
||||||
assert(item->rhs[0] != NULL);
|
|
||||||
assert(item->rhs[1] == NULL);
|
|
||||||
assert(item->rhs[0]->type == HCF_CHAR);
|
|
||||||
HLRTransition *t = h_hashtable_get(eg->smap, lhs);
|
|
||||||
assert(t != NULL);
|
|
||||||
match = (t->to == state
|
|
||||||
&& charset_isset(lhs->charset, item->rhs[0]->chr));
|
|
||||||
}
|
|
||||||
|
|
||||||
if(match) {
|
|
||||||
// the left-hand symbol's follow set is this production's
|
// the left-hand symbol's follow set is this production's
|
||||||
// contribution to the lookahead
|
// contribution to the lookahead
|
||||||
const HStringMap *fs = h_follow(1, eg->grammar, lhs);
|
const HStringMap *fs = h_follow(1, eg->grammar, lhs);
|
||||||
|
|
|
||||||
|
|
@ -163,7 +163,7 @@ HLRAction *h_reduce_action(HArena *arena, const HLRItem *item)
|
||||||
}
|
}
|
||||||
|
|
||||||
// adds 'new' to the branches of 'action'
|
// adds 'new' to the branches of 'action'
|
||||||
// returns a 'action' if it is already of type HLR_CONFLICT
|
// returns 'action' if it is already of type HLR_CONFLICT
|
||||||
// allocates a new HLRAction otherwise
|
// allocates a new HLRAction otherwise
|
||||||
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new)
|
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -30,9 +30,9 @@ static void expand_to_closure(HCFGrammar *g, HHashSet *items)
|
||||||
HCFChoice *sym = item->rhs[item->mark]; // symbol after mark
|
HCFChoice *sym = item->rhs[item->mark]; // symbol after mark
|
||||||
|
|
||||||
// if there is a non-terminal after the mark, follow it
|
// if there is a non-terminal after the mark, follow it
|
||||||
|
// and add items corresponding to the productions of sym
|
||||||
// NB: unlike LLk, we do consider HCF_CHARSET a non-terminal here
|
// NB: unlike LLk, we do consider HCF_CHARSET a non-terminal here
|
||||||
if(sym != NULL && (sym->type==HCF_CHOICE || sym->type==HCF_CHARSET)) {
|
if(sym != NULL) {
|
||||||
// add items corresponding to the productions of sym
|
|
||||||
if(sym->type == HCF_CHOICE) {
|
if(sym->type == HCF_CHOICE) {
|
||||||
for(HCFSequence **p=sym->seq; *p; p++) {
|
for(HCFSequence **p=sym->seq; *p; p++) {
|
||||||
HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0);
|
HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0);
|
||||||
|
|
@ -41,7 +41,7 @@ static void expand_to_closure(HCFGrammar *g, HHashSet *items)
|
||||||
h_slist_push(work, it);
|
h_slist_push(work, it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else { // HCF_CHARSET
|
} else if(sym->type == HCF_CHARSET) {
|
||||||
for(unsigned int i=0; i<256; i++) {
|
for(unsigned int i=0; i<256; i++) {
|
||||||
if(charset_isset(sym->charset, i)) {
|
if(charset_isset(sym->charset, i)) {
|
||||||
// XXX allocate these single-character symbols statically somewhere
|
// XXX allocate these single-character symbols statically somewhere
|
||||||
|
|
@ -93,8 +93,8 @@ HLRDFA *h_lr0_dfa(HCFGrammar *g)
|
||||||
// compute closure
|
// compute closure
|
||||||
// if destination is a new state:
|
// if destination is a new state:
|
||||||
// add it to state set
|
// add it to state set
|
||||||
// add transition to it
|
|
||||||
// add it to the work list
|
// add it to the work list
|
||||||
|
// add transition to it
|
||||||
|
|
||||||
while(!h_slist_empty(work)) {
|
while(!h_slist_empty(work)) {
|
||||||
size_t state_idx = (uintptr_t)h_slist_pop(work);
|
size_t state_idx = (uintptr_t)h_slist_pop(work);
|
||||||
|
|
|
||||||
|
|
@ -155,20 +155,20 @@ static inline void h_sarray_clear(HSArray *arr) {
|
||||||
typedef unsigned int *HCharset;
|
typedef unsigned int *HCharset;
|
||||||
|
|
||||||
static inline HCharset new_charset(HAllocator* mm__) {
|
static inline HCharset new_charset(HAllocator* mm__) {
|
||||||
HCharset cs = h_new(unsigned int, 256 / sizeof(unsigned int));
|
HCharset cs = h_new(unsigned int, 256 / (sizeof(unsigned int) * 8));
|
||||||
memset(cs, 0, 256);
|
memset(cs, 0, 32); // 32 bytes = 256 bits
|
||||||
return cs;
|
return cs;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int charset_isset(HCharset cs, uint8_t pos) {
|
static inline int charset_isset(HCharset cs, uint8_t pos) {
|
||||||
return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs))));
|
return !!(cs[pos / (sizeof(*cs)*8)] & (1 << (pos % (sizeof(*cs)*8))));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void charset_set(HCharset cs, uint8_t pos, int val) {
|
static inline void charset_set(HCharset cs, uint8_t pos, int val) {
|
||||||
cs[pos / sizeof(*cs)] =
|
cs[pos / (sizeof(*cs)*8)] =
|
||||||
val
|
val
|
||||||
? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs)))
|
? cs[pos / (sizeof(*cs)*8)] | (1 << (pos % (sizeof(*cs)*8)))
|
||||||
: cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs)));
|
: cs[pos / (sizeof(*cs)*8)] & ~(1 << (pos % (sizeof(*cs)*8)));
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef unsigned int HHashValue;
|
typedef unsigned int HHashValue;
|
||||||
|
|
|
||||||
|
|
@ -141,13 +141,24 @@ static void test_wrong_bit_length(void) {
|
||||||
static void test_lalr_charset_lhs(void) {
|
static void test_lalr_charset_lhs(void) {
|
||||||
HParserBackend be = PB_LALR;
|
HParserBackend be = PB_LALR;
|
||||||
|
|
||||||
HParser *p = h_choice(h_ch('A'), h_uint8(), NULL);
|
HParser *p = h_many(h_choice(h_sequence(h_ch('A'), h_ch('B'), NULL),
|
||||||
|
h_in((uint8_t*)"AB",2), NULL));
|
||||||
|
|
||||||
// the above would fail to compile because of an unhandled case in trying
|
// the above would abort because of an unhandled case in trying to resolve
|
||||||
// to resolve a conflict where an item's left-hand-side was an HCF_CHARSET.
|
// a conflict where an item's left-hand-side was an HCF_CHARSET.
|
||||||
|
// however, the compile should fail - the conflict cannot be resolved.
|
||||||
|
|
||||||
g_check_parse_match(p, be, "A",1, "u0x41");
|
if(h_compile(p, be, NULL) == 0) {
|
||||||
g_check_parse_match(p, be, "B",1, "u0x42");
|
g_test_message("LALR compile didn't detect ambiguous grammar");
|
||||||
|
|
||||||
|
// it says it compiled it - well, then it should parse it!
|
||||||
|
// (this helps us see what it thinks it should be doing.)
|
||||||
|
g_check_parse_match(p, be, "AA",2, "(u0x41 u0x41)");
|
||||||
|
g_check_parse_match(p, be, "AB",2, "((u0x41 u0x42))");
|
||||||
|
|
||||||
|
g_test_fail();
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_cfg_many_seq(void) {
|
static void test_cfg_many_seq(void) {
|
||||||
|
|
@ -160,6 +171,28 @@ static void test_cfg_many_seq(void) {
|
||||||
// reshape on h_many.
|
// reshape on h_many.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint8_t test_charset_bits__buf[256];
|
||||||
|
static void *test_charset_bits__alloc(HAllocator *allocator, size_t size)
|
||||||
|
{
|
||||||
|
g_check_cmp_uint64(size, ==, 256/8);
|
||||||
|
assert(size <= 256);
|
||||||
|
return test_charset_bits__buf;
|
||||||
|
}
|
||||||
|
static void test_charset_bits(void) {
|
||||||
|
// charset would allocate 256 bytes instead of 256 bits (= 32 bytes)
|
||||||
|
|
||||||
|
HAllocator alloc = {
|
||||||
|
.alloc = test_charset_bits__alloc,
|
||||||
|
.realloc = NULL,
|
||||||
|
.free = NULL,
|
||||||
|
};
|
||||||
|
test_charset_bits__buf[32] = 0xAB;
|
||||||
|
HCharset cs = new_charset(&alloc);
|
||||||
|
for(size_t i=0; i<32; i++)
|
||||||
|
g_check_cmp_uint32(test_charset_bits__buf[i], ==, 0);
|
||||||
|
g_check_cmp_uint32(test_charset_bits__buf[32], ==, 0xAB);
|
||||||
|
}
|
||||||
|
|
||||||
void register_regression_tests(void) {
|
void register_regression_tests(void) {
|
||||||
g_test_add_func("/core/regression/bug118", test_bug118);
|
g_test_add_func("/core/regression/bug118", test_bug118);
|
||||||
g_test_add_func("/core/regression/seq_index_path", test_seq_index_path);
|
g_test_add_func("/core/regression/seq_index_path", test_seq_index_path);
|
||||||
|
|
@ -168,4 +201,5 @@ void register_regression_tests(void) {
|
||||||
g_test_add_func("/core/regression/wrong_bit_length", test_wrong_bit_length);
|
g_test_add_func("/core/regression/wrong_bit_length", test_wrong_bit_length);
|
||||||
g_test_add_func("/core/regression/lalr_charset_lhs", test_lalr_charset_lhs);
|
g_test_add_func("/core/regression/lalr_charset_lhs", test_lalr_charset_lhs);
|
||||||
g_test_add_func("/core/regression/cfg_many_seq", test_cfg_many_seq);
|
g_test_add_func("/core/regression/cfg_many_seq", test_cfg_many_seq);
|
||||||
|
g_test_add_func("/core/regression/charset_bits", test_charset_bits);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue