From fbdd2b7613efae11bbbb24cd8f007c739608a9cc Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Mon, 17 Mar 2014 23:46:14 +0100 Subject: [PATCH 1/5] pull saved position into HParserCacheValue and fix segfault in grow() --- src/backends/packrat.c | 81 ++++++++++++++++++++++++++---------------- src/internal.h | 10 ++---- src/t_parser.c | 15 +++++++- 3 files changed, 67 insertions(+), 39 deletions(-) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 87f166d..afd3577 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -3,10 +3,22 @@ #include "../internal.h" #include "../parsers/parser_internal.h" -// short-hand for constructing HCachedResult's -static HCachedResult *cached_result(const HParseState *state, HParseResult *result) { - HCachedResult *ret = a_new(HCachedResult, 1); - ret->result = result; +// short-hand for creating cache values (regular case) +static +HParserCacheValue * cached_result(HParseState *state, HParseResult *result) { + HParserCacheValue *ret = a_new(HParserCacheValue, 1); + ret->value_type = PC_RIGHT; + ret->right = result; + ret->input_stream = state->input_stream; + return ret; +} + +// short-hand for caching parse results (left recursion case) +static +HParserCacheValue *cached_lr(HParseState *state, HLeftRec *lr) { + HParserCacheValue *ret = a_new(HParserCacheValue, 1); + ret->value_type = PC_LEFT; + ret->left = lr; ret->input_stream = state->input_stream; return ret; } @@ -52,9 +64,7 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { // Nothing in the cache, and the key parser is not involved HParseResult *tmp = a_new(HParseResult, 1); tmp->ast = NULL; tmp->arena = state->arena; - HParserCacheValue *ret = a_new(HParserCacheValue, 1); - ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp); - return ret; + return cached_result(state, tmp); } if (h_slist_find(head->eval_set, k->parser)) { // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. @@ -64,7 +74,8 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { if (!cached) cached = a_new(HParserCacheValue, 1); cached->value_type = PC_RIGHT; - cached->right = cached_result(state, tmp_res); + cached->right = tmp_res; + cached->input_stream = state->input_stream; } return cached; } @@ -83,14 +94,28 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { some->eval_set = NULL; rec_detect->head = some; } - //assert(state->lr_stack->head != NULL); - HSlistNode *head = state->lr_stack->head; - HLeftRec *lr; - while (head && (lr = head->elem)->rule != p) { + + // XXX is it ok for lr_stack to be empty here?! (we used to have an assert + // saying it was not.) + HSlistNode *it; + for(it=state->lr_stack->head; it; it=it->next) { + HLeftRec *lr = it->elem; + + if(lr->rule == p) + break; + lr->head = rec_detect->head; h_slist_push(lr->head->involved_set, (void*)lr->rule); - head = head->next; + // XXX we are assuming that involved_set does not contain p, yet, + // or ignoring that fact. is this correct? } + //assert(it != NULL); // we should always find p (XXX unless lr_stack empty?) +} + +// helper: true iff pos1 is less than pos2 +static inline bool pos_lt(HInputStream pos1, HInputStream pos2) { + return ((pos1.index < pos2.index) || + (pos1.index == pos2.index && pos1.bit_offset < pos2.bit_offset)); } /* If recall() returns NULL, we need to store a dummy failure in the cache and compute the @@ -103,25 +128,22 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) HParserCacheValue *old_cached = h_hashtable_get(state->cache, k); if (!old_cached || PC_LEFT == old_cached->value_type) errx(1, "impossible match"); - HParseResult *old_res = old_cached->right->result; + HParseResult *old_res = old_cached->right; // reset the eval_set of the head of the recursion at each beginning of growth head->eval_set = h_slist_copy(head->involved_set); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); if (tmp_res) { - if ((old_res->ast->index < tmp_res->ast->index) || - (old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) { - HParserCacheValue *v = a_new(HParserCacheValue, 1); - v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res); - h_hashtable_put(state->cache, k, v); + if (pos_lt(old_cached->input_stream, state->input_stream)) { + h_hashtable_put(state->cache, k, cached_result(state, tmp_res)); return grow(k, state, head); } else { // we're done with growing, we can remove data from the recursion head h_hashtable_del(state->recursion_heads, k); HParserCacheValue *cached = h_hashtable_get(state->cache, k); if (cached && PC_RIGHT == cached->value_type) { - return cached->right->result; + return cached->right; } else { errx(1, "impossible match"); } @@ -140,9 +162,7 @@ HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growab } else { // update cache - HParserCacheValue *v = a_new(HParserCacheValue, 1); - v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed); - h_hashtable_put(state->cache, k, v); + h_hashtable_put(state->cache, k, cached_result(state, growable->seed)); if (!growable->seed) return NULL; else @@ -165,18 +185,17 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { base->seed = NULL; base->rule = parser; base->head = NULL; h_slist_push(state->lr_stack, base); // cache it - HParserCacheValue *dummy = a_new(HParserCacheValue, 1); - dummy->value_type = PC_LEFT; dummy->left = base; - h_hashtable_put(state->cache, key, dummy); + HParserCacheValue *cached = cached_lr(state, base); + h_hashtable_put(state->cache, key, cached); // parse the input HParseResult *tmp_res = perform_lowlevel_parse(state, parser); // the base variable has passed equality tests with the cache h_slist_pop(state->lr_stack); + // update the cached value to our new position + cached->input_stream = state->input_stream; // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one if (NULL == base->head) { - HParserCacheValue *right = a_new(HParserCacheValue, 1); - right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res); - h_hashtable_put(state->cache, key, right); + h_hashtable_put(state->cache, key, cached_result(state, tmp_res)); return tmp_res; } else { base->seed = tmp_res; @@ -185,12 +204,12 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { } } else { // it exists! + state->input_stream = m->input_stream; if (PC_LEFT == m->value_type) { setupLR(parser, state, m->left); return m->left->seed; // BUG: this might not be correct } else { - state->input_stream = m->right->input_stream; - return m->right->result; + return m->right; } } } diff --git a/src/internal.h b/src/internal.h index 056a5af..85cd4db 100644 --- a/src/internal.h +++ b/src/internal.h @@ -255,21 +255,17 @@ typedef struct HLeftRec_ { HRecursionHead *head; } HLeftRec; -/* Result and remaining input, for rerunning from a cached position. */ -typedef struct HCachedResult_ { - HParseResult *result; - HInputStream input_stream; -} HCachedResult; - /* Tagged union for values in the cache: either HLeftRec's (Left) or * HParseResult's (Right). + * Includes the position (input_stream) to advance to after using this value. */ typedef struct HParserCacheValue_t { HParserCacheValueType value_type; union { HLeftRec *left; - HCachedResult *right; + HParseResult *right; }; + HInputStream input_stream; } HParserCacheValue; // This file provides the logical inverse of bitreader.c diff --git a/src/t_parser.c b/src/t_parser.c index e2eca97..23fc5cb 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -419,6 +419,18 @@ static void test_leftrec(gconstpointer backend) { g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)"); } +static void test_leftrec_nonempty(gconstpointer backend) { + HParser *a_ = h_ch('a'); + + HParser *lr_ = h_indirect(); + h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL)); + + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61"); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)"); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)"); + g_check_parse_failed(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0); +} + static void test_rightrec(gconstpointer backend) { HParser *a_ = h_ch('a'); @@ -485,7 +497,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/and", GINT_TO_POINTER(PB_PACKRAT), test_and); g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not); g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore); - //g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); + g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); + g_test_add_data_func("/core/parser/packrat/leftrec_nonempty", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_nonempty); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); From f4afd0cb8d72ae767eb0c9d1d1a7d6b36642e953 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 21 Mar 2014 20:54:33 +0100 Subject: [PATCH 2/5] more leftrec fixes: head caching and input rewinding --- src/backends/packrat.c | 56 +++++++++++++++++++++++++----------------- src/t_parser.c | 1 + 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index afd3577..c1e422e 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -56,26 +56,28 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { HParserCacheValue *cached = h_hashtable_get(state->cache, k); - HRecursionHead *head = h_hashtable_get(state->recursion_heads, k); + HRecursionHead *head = h_hashtable_get(state->recursion_heads, &k->input_pos); if (!head) { // No heads found return cached; } else { // Some heads found if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) { // Nothing in the cache, and the key parser is not involved - HParseResult *tmp = a_new(HParseResult, 1); - tmp->ast = NULL; tmp->arena = state->arena; - return cached_result(state, tmp); + cached = cached_result(state, NULL); + cached->input_stream = k->input_pos; } if (h_slist_find(head->eval_set, k->parser)) { // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. head->eval_set = h_slist_remove_all(head->eval_set, k->parser); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); - // we know that cached has an entry here, modify it - if (!cached) - cached = a_new(HParserCacheValue, 1); - cached->value_type = PC_RIGHT; - cached->right = tmp_res; - cached->input_stream = state->input_stream; + // update the cache + if (!cached) { + cached = cached_result(state, tmp_res); + h_hashtable_put(state->cache, k, cached); + } else { + cached->value_type = PC_RIGHT; + cached->right = tmp_res; + cached->input_stream = state->input_stream; + } } return cached; } @@ -95,8 +97,6 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { rec_detect->head = some; } - // XXX is it ok for lr_stack to be empty here?! (we used to have an assert - // saying it was not.) HSlistNode *it; for(it=state->lr_stack->head; it; it=it->next) { HLeftRec *lr = it->elem; @@ -106,10 +106,7 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { lr->head = rec_detect->head; h_slist_push(lr->head->involved_set, (void*)lr->rule); - // XXX we are assuming that involved_set does not contain p, yet, - // or ignoring that fact. is this correct? } - //assert(it != NULL); // we should always find p (XXX unless lr_stack empty?) } // helper: true iff pos1 is less than pos2 @@ -124,11 +121,14 @@ static inline bool pos_lt(HInputStream pos1, HInputStream pos2) { HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) { // Store the head into the recursion_heads - h_hashtable_put(state->recursion_heads, k, head); + h_hashtable_put(state->recursion_heads, &k->input_pos, head); HParserCacheValue *old_cached = h_hashtable_get(state->cache, k); if (!old_cached || PC_LEFT == old_cached->value_type) errx(1, "impossible match"); HParseResult *old_res = old_cached->right; + + // rewind the input + state->input_stream = k->input_pos; // reset the eval_set of the head of the recursion at each beginning of growth head->eval_set = h_slist_copy(head->involved_set); @@ -140,16 +140,18 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) return grow(k, state, head); } else { // we're done with growing, we can remove data from the recursion head - h_hashtable_del(state->recursion_heads, k); + h_hashtable_del(state->recursion_heads, &k->input_pos); HParserCacheValue *cached = h_hashtable_get(state->cache, k); if (cached && PC_RIGHT == cached->value_type) { + state->input_stream = cached->input_stream; return cached->right; } else { errx(1, "impossible match"); } } } else { - h_hashtable_del(state->recursion_heads, k); + h_hashtable_del(state->recursion_heads, &k->input_pos); + state->input_stream = old_cached->input_stream; return old_res; } } @@ -185,13 +187,14 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { base->seed = NULL; base->rule = parser; base->head = NULL; h_slist_push(state->lr_stack, base); // cache it - HParserCacheValue *cached = cached_lr(state, base); - h_hashtable_put(state->cache, key, cached); + h_hashtable_put(state->cache, key, cached_lr(state, base)); // parse the input HParseResult *tmp_res = perform_lowlevel_parse(state, parser); // the base variable has passed equality tests with the cache h_slist_pop(state->lr_stack); // update the cached value to our new position + HParserCacheValue *cached = h_hashtable_get(state->cache, key); + assert(cached != NULL); cached->input_stream = state->input_stream; // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one if (NULL == base->head) { @@ -207,7 +210,7 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { state->input_stream = m->input_stream; if (PC_LEFT == m->value_type) { setupLR(parser, state, m->left); - return m->left->seed; // BUG: this might not be correct + return m->left->seed; } else { return m->right; } @@ -231,6 +234,14 @@ static bool cache_key_equal(const void* key1, const void* key2) { return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0; } +static uint32_t pos_hash(const void* key) { + return h_djbhash(key, sizeof(HInputStream)); +} + +static bool pos_equal(const void* key1, const void* key2) { + return memcmp(key1, key2, sizeof(HInputStream)) == 0; +} + HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) { HArena * arena = h_new_arena(mm__, 0); HParseState *parse_state = a_new_(arena, HParseState, 1); @@ -238,8 +249,7 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr cache_key_hash); // hash_func parse_state->input_stream = *input_stream; parse_state->lr_stack = h_slist_new(arena); - parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal, - cache_key_hash); + parse_state->recursion_heads = h_hashtable_new(arena, pos_equal, pos_hash); parse_state->arena = arena; HParseResult *res = h_do_parse(parser, parse_state); h_slist_free(parse_state->lr_stack); diff --git a/src/t_parser.c b/src/t_parser.c index 23fc5cb..4b14f4c 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -414,6 +414,7 @@ static void test_leftrec(gconstpointer backend) { HParser *lr_ = h_indirect(); h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), h_epsilon_p(), NULL)); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0, "NULL"); g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "(u0x61)"); g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "((u0x61) u0x61)"); g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)"); From 0fb744b16d279aa61c6d0462e5626cd4c0324246 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 21 Mar 2014 22:34:39 +0100 Subject: [PATCH 3/5] rename leftrec_nonempty test to leftrec-ne --- src/t_parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_parser.c b/src/t_parser.c index 4b14f4c..f2976ab 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -420,7 +420,7 @@ static void test_leftrec(gconstpointer backend) { g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)"); } -static void test_leftrec_nonempty(gconstpointer backend) { +static void test_leftrec_ne(gconstpointer backend) { HParser *a_ = h_ch('a'); HParser *lr_ = h_indirect(); @@ -499,7 +499,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not); g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore); g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); - g_test_add_data_func("/core/parser/packrat/leftrec_nonempty", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_nonempty); + g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); From 5af0e3e2bad366ab4bc6661a4fb138e04a218557 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 21 Mar 2014 22:37:39 +0100 Subject: [PATCH 4/5] disable test /core/parser/packrat/leftrec --- src/t_parser.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/t_parser.c b/src/t_parser.c index f2976ab..08c0dc7 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -498,7 +498,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/and", GINT_TO_POINTER(PB_PACKRAT), test_and); g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not); g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore); - g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); + // XXX(pesco) it seems to me Warth's algorithm just doesn't work for this case + //g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); From 4a973d3f98bcf0434b9d52ed4d346b2493852aef Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 21 Mar 2014 22:39:13 +0100 Subject: [PATCH 5/5] enable leftrec-ne test for LALR and GLR backends --- src/t_parser.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/t_parser.c b/src/t_parser.c index 08c0dc7..4260a7c 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -614,6 +614,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/lalr/attr_bool", GINT_TO_POINTER(PB_LALR), test_attr_bool); g_test_add_data_func("/core/parser/lalr/ignore", GINT_TO_POINTER(PB_LALR), test_ignore); g_test_add_data_func("/core/parser/lalr/leftrec", GINT_TO_POINTER(PB_LALR), test_leftrec); + g_test_add_data_func("/core/parser/lalr/leftrec-ne", GINT_TO_POINTER(PB_LALR), test_leftrec_ne); g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec); g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token); @@ -652,6 +653,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/glr/attr_bool", GINT_TO_POINTER(PB_GLR), test_attr_bool); g_test_add_data_func("/core/parser/glr/ignore", GINT_TO_POINTER(PB_GLR), test_ignore); g_test_add_data_func("/core/parser/glr/leftrec", GINT_TO_POINTER(PB_GLR), test_leftrec); + g_test_add_data_func("/core/parser/glr/leftrec-ne", GINT_TO_POINTER(PB_GLR), test_leftrec_ne); g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec); g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous); }