pull saved position into HParserCacheValue and fix segfault in grow()

This commit is contained in:
Sven M. Hallberg 2014-03-17 23:46:14 +01:00
parent 5ee7982596
commit fbdd2b7613
3 changed files with 67 additions and 39 deletions

View file

@ -3,10 +3,22 @@
#include "../internal.h" #include "../internal.h"
#include "../parsers/parser_internal.h" #include "../parsers/parser_internal.h"
// short-hand for constructing HCachedResult's // short-hand for creating cache values (regular case)
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) { static
HCachedResult *ret = a_new(HCachedResult, 1); HParserCacheValue * cached_result(HParseState *state, HParseResult *result) {
ret->result = result; HParserCacheValue *ret = a_new(HParserCacheValue, 1);
ret->value_type = PC_RIGHT;
ret->right = result;
ret->input_stream = state->input_stream;
return ret;
}
// short-hand for caching parse results (left recursion case)
static
HParserCacheValue *cached_lr(HParseState *state, HLeftRec *lr) {
HParserCacheValue *ret = a_new(HParserCacheValue, 1);
ret->value_type = PC_LEFT;
ret->left = lr;
ret->input_stream = state->input_stream; ret->input_stream = state->input_stream;
return ret; return ret;
} }
@ -52,9 +64,7 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
// Nothing in the cache, and the key parser is not involved // Nothing in the cache, and the key parser is not involved
HParseResult *tmp = a_new(HParseResult, 1); HParseResult *tmp = a_new(HParseResult, 1);
tmp->ast = NULL; tmp->arena = state->arena; tmp->ast = NULL; tmp->arena = state->arena;
HParserCacheValue *ret = a_new(HParserCacheValue, 1); return cached_result(state, tmp);
ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp);
return ret;
} }
if (h_slist_find(head->eval_set, k->parser)) { if (h_slist_find(head->eval_set, k->parser)) {
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
@ -64,7 +74,8 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
if (!cached) if (!cached)
cached = a_new(HParserCacheValue, 1); cached = a_new(HParserCacheValue, 1);
cached->value_type = PC_RIGHT; cached->value_type = PC_RIGHT;
cached->right = cached_result(state, tmp_res); cached->right = tmp_res;
cached->input_stream = state->input_stream;
} }
return cached; return cached;
} }
@ -83,14 +94,28 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
some->eval_set = NULL; some->eval_set = NULL;
rec_detect->head = some; rec_detect->head = some;
} }
//assert(state->lr_stack->head != NULL);
HSlistNode *head = state->lr_stack->head; // XXX is it ok for lr_stack to be empty here?! (we used to have an assert
HLeftRec *lr; // saying it was not.)
while (head && (lr = head->elem)->rule != p) { HSlistNode *it;
for(it=state->lr_stack->head; it; it=it->next) {
HLeftRec *lr = it->elem;
if(lr->rule == p)
break;
lr->head = rec_detect->head; lr->head = rec_detect->head;
h_slist_push(lr->head->involved_set, (void*)lr->rule); h_slist_push(lr->head->involved_set, (void*)lr->rule);
head = head->next; // XXX we are assuming that involved_set does not contain p, yet,
// or ignoring that fact. is this correct?
} }
//assert(it != NULL); // we should always find p (XXX unless lr_stack empty?)
}
// helper: true iff pos1 is less than pos2
static inline bool pos_lt(HInputStream pos1, HInputStream pos2) {
return ((pos1.index < pos2.index) ||
(pos1.index == pos2.index && pos1.bit_offset < pos2.bit_offset));
} }
/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the /* If recall() returns NULL, we need to store a dummy failure in the cache and compute the
@ -103,25 +128,22 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head)
HParserCacheValue *old_cached = h_hashtable_get(state->cache, k); HParserCacheValue *old_cached = h_hashtable_get(state->cache, k);
if (!old_cached || PC_LEFT == old_cached->value_type) if (!old_cached || PC_LEFT == old_cached->value_type)
errx(1, "impossible match"); errx(1, "impossible match");
HParseResult *old_res = old_cached->right->result; HParseResult *old_res = old_cached->right;
// reset the eval_set of the head of the recursion at each beginning of growth // reset the eval_set of the head of the recursion at each beginning of growth
head->eval_set = h_slist_copy(head->involved_set); head->eval_set = h_slist_copy(head->involved_set);
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
if (tmp_res) { if (tmp_res) {
if ((old_res->ast->index < tmp_res->ast->index) || if (pos_lt(old_cached->input_stream, state->input_stream)) {
(old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) { h_hashtable_put(state->cache, k, cached_result(state, tmp_res));
HParserCacheValue *v = a_new(HParserCacheValue, 1);
v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res);
h_hashtable_put(state->cache, k, v);
return grow(k, state, head); return grow(k, state, head);
} else { } else {
// we're done with growing, we can remove data from the recursion head // we're done with growing, we can remove data from the recursion head
h_hashtable_del(state->recursion_heads, k); h_hashtable_del(state->recursion_heads, k);
HParserCacheValue *cached = h_hashtable_get(state->cache, k); HParserCacheValue *cached = h_hashtable_get(state->cache, k);
if (cached && PC_RIGHT == cached->value_type) { if (cached && PC_RIGHT == cached->value_type) {
return cached->right->result; return cached->right;
} else { } else {
errx(1, "impossible match"); errx(1, "impossible match");
} }
@ -140,9 +162,7 @@ HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growab
} }
else { else {
// update cache // update cache
HParserCacheValue *v = a_new(HParserCacheValue, 1); h_hashtable_put(state->cache, k, cached_result(state, growable->seed));
v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed);
h_hashtable_put(state->cache, k, v);
if (!growable->seed) if (!growable->seed)
return NULL; return NULL;
else else
@ -165,18 +185,17 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
base->seed = NULL; base->rule = parser; base->head = NULL; base->seed = NULL; base->rule = parser; base->head = NULL;
h_slist_push(state->lr_stack, base); h_slist_push(state->lr_stack, base);
// cache it // cache it
HParserCacheValue *dummy = a_new(HParserCacheValue, 1); HParserCacheValue *cached = cached_lr(state, base);
dummy->value_type = PC_LEFT; dummy->left = base; h_hashtable_put(state->cache, key, cached);
h_hashtable_put(state->cache, key, dummy);
// parse the input // parse the input
HParseResult *tmp_res = perform_lowlevel_parse(state, parser); HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
// the base variable has passed equality tests with the cache // the base variable has passed equality tests with the cache
h_slist_pop(state->lr_stack); h_slist_pop(state->lr_stack);
// update the cached value to our new position
cached->input_stream = state->input_stream;
// setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
if (NULL == base->head) { if (NULL == base->head) {
HParserCacheValue *right = a_new(HParserCacheValue, 1); h_hashtable_put(state->cache, key, cached_result(state, tmp_res));
right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res);
h_hashtable_put(state->cache, key, right);
return tmp_res; return tmp_res;
} else { } else {
base->seed = tmp_res; base->seed = tmp_res;
@ -185,12 +204,12 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
} }
} else { } else {
// it exists! // it exists!
state->input_stream = m->input_stream;
if (PC_LEFT == m->value_type) { if (PC_LEFT == m->value_type) {
setupLR(parser, state, m->left); setupLR(parser, state, m->left);
return m->left->seed; // BUG: this might not be correct return m->left->seed; // BUG: this might not be correct
} else { } else {
state->input_stream = m->right->input_stream; return m->right;
return m->right->result;
} }
} }
} }

View file

@ -255,21 +255,17 @@ typedef struct HLeftRec_ {
HRecursionHead *head; HRecursionHead *head;
} HLeftRec; } HLeftRec;
/* Result and remaining input, for rerunning from a cached position. */
typedef struct HCachedResult_ {
HParseResult *result;
HInputStream input_stream;
} HCachedResult;
/* Tagged union for values in the cache: either HLeftRec's (Left) or /* Tagged union for values in the cache: either HLeftRec's (Left) or
* HParseResult's (Right). * HParseResult's (Right).
* Includes the position (input_stream) to advance to after using this value.
*/ */
typedef struct HParserCacheValue_t { typedef struct HParserCacheValue_t {
HParserCacheValueType value_type; HParserCacheValueType value_type;
union { union {
HLeftRec *left; HLeftRec *left;
HCachedResult *right; HParseResult *right;
}; };
HInputStream input_stream;
} HParserCacheValue; } HParserCacheValue;
// This file provides the logical inverse of bitreader.c // This file provides the logical inverse of bitreader.c

View file

@ -419,6 +419,18 @@ static void test_leftrec(gconstpointer backend) {
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)"); g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)");
} }
static void test_leftrec_nonempty(gconstpointer backend) {
HParser *a_ = h_ch('a');
HParser *lr_ = h_indirect();
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL));
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61");
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)");
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)");
g_check_parse_failed(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0);
}
static void test_rightrec(gconstpointer backend) { static void test_rightrec(gconstpointer backend) {
HParser *a_ = h_ch('a'); HParser *a_ = h_ch('a');
@ -485,7 +497,8 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/packrat/and", GINT_TO_POINTER(PB_PACKRAT), test_and); g_test_add_data_func("/core/parser/packrat/and", GINT_TO_POINTER(PB_PACKRAT), test_and);
g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not); g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not);
g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore); g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore);
//g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec);
g_test_add_data_func("/core/parser/packrat/leftrec_nonempty", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_nonempty);
g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec);
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);