more leftrec fixes: head caching and input rewinding
This commit is contained in:
parent
fbdd2b7613
commit
f4afd0cb8d
2 changed files with 34 additions and 23 deletions
|
|
@ -56,26 +56,28 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa
|
||||||
|
|
||||||
HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
|
HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
|
||||||
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
|
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
|
||||||
HRecursionHead *head = h_hashtable_get(state->recursion_heads, k);
|
HRecursionHead *head = h_hashtable_get(state->recursion_heads, &k->input_pos);
|
||||||
if (!head) { // No heads found
|
if (!head) { // No heads found
|
||||||
return cached;
|
return cached;
|
||||||
} else { // Some heads found
|
} else { // Some heads found
|
||||||
if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) {
|
if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) {
|
||||||
// Nothing in the cache, and the key parser is not involved
|
// Nothing in the cache, and the key parser is not involved
|
||||||
HParseResult *tmp = a_new(HParseResult, 1);
|
cached = cached_result(state, NULL);
|
||||||
tmp->ast = NULL; tmp->arena = state->arena;
|
cached->input_stream = k->input_pos;
|
||||||
return cached_result(state, tmp);
|
|
||||||
}
|
}
|
||||||
if (h_slist_find(head->eval_set, k->parser)) {
|
if (h_slist_find(head->eval_set, k->parser)) {
|
||||||
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
|
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
|
||||||
head->eval_set = h_slist_remove_all(head->eval_set, k->parser);
|
head->eval_set = h_slist_remove_all(head->eval_set, k->parser);
|
||||||
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
||||||
// we know that cached has an entry here, modify it
|
// update the cache
|
||||||
if (!cached)
|
if (!cached) {
|
||||||
cached = a_new(HParserCacheValue, 1);
|
cached = cached_result(state, tmp_res);
|
||||||
cached->value_type = PC_RIGHT;
|
h_hashtable_put(state->cache, k, cached);
|
||||||
cached->right = tmp_res;
|
} else {
|
||||||
cached->input_stream = state->input_stream;
|
cached->value_type = PC_RIGHT;
|
||||||
|
cached->right = tmp_res;
|
||||||
|
cached->input_stream = state->input_stream;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return cached;
|
return cached;
|
||||||
}
|
}
|
||||||
|
|
@ -95,8 +97,6 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
|
||||||
rec_detect->head = some;
|
rec_detect->head = some;
|
||||||
}
|
}
|
||||||
|
|
||||||
// XXX is it ok for lr_stack to be empty here?! (we used to have an assert
|
|
||||||
// saying it was not.)
|
|
||||||
HSlistNode *it;
|
HSlistNode *it;
|
||||||
for(it=state->lr_stack->head; it; it=it->next) {
|
for(it=state->lr_stack->head; it; it=it->next) {
|
||||||
HLeftRec *lr = it->elem;
|
HLeftRec *lr = it->elem;
|
||||||
|
|
@ -106,10 +106,7 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
|
||||||
|
|
||||||
lr->head = rec_detect->head;
|
lr->head = rec_detect->head;
|
||||||
h_slist_push(lr->head->involved_set, (void*)lr->rule);
|
h_slist_push(lr->head->involved_set, (void*)lr->rule);
|
||||||
// XXX we are assuming that involved_set does not contain p, yet,
|
|
||||||
// or ignoring that fact. is this correct?
|
|
||||||
}
|
}
|
||||||
//assert(it != NULL); // we should always find p (XXX unless lr_stack empty?)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// helper: true iff pos1 is less than pos2
|
// helper: true iff pos1 is less than pos2
|
||||||
|
|
@ -124,11 +121,14 @@ static inline bool pos_lt(HInputStream pos1, HInputStream pos2) {
|
||||||
|
|
||||||
HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) {
|
HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) {
|
||||||
// Store the head into the recursion_heads
|
// Store the head into the recursion_heads
|
||||||
h_hashtable_put(state->recursion_heads, k, head);
|
h_hashtable_put(state->recursion_heads, &k->input_pos, head);
|
||||||
HParserCacheValue *old_cached = h_hashtable_get(state->cache, k);
|
HParserCacheValue *old_cached = h_hashtable_get(state->cache, k);
|
||||||
if (!old_cached || PC_LEFT == old_cached->value_type)
|
if (!old_cached || PC_LEFT == old_cached->value_type)
|
||||||
errx(1, "impossible match");
|
errx(1, "impossible match");
|
||||||
HParseResult *old_res = old_cached->right;
|
HParseResult *old_res = old_cached->right;
|
||||||
|
|
||||||
|
// rewind the input
|
||||||
|
state->input_stream = k->input_pos;
|
||||||
|
|
||||||
// reset the eval_set of the head of the recursion at each beginning of growth
|
// reset the eval_set of the head of the recursion at each beginning of growth
|
||||||
head->eval_set = h_slist_copy(head->involved_set);
|
head->eval_set = h_slist_copy(head->involved_set);
|
||||||
|
|
@ -140,16 +140,18 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head)
|
||||||
return grow(k, state, head);
|
return grow(k, state, head);
|
||||||
} else {
|
} else {
|
||||||
// we're done with growing, we can remove data from the recursion head
|
// we're done with growing, we can remove data from the recursion head
|
||||||
h_hashtable_del(state->recursion_heads, k);
|
h_hashtable_del(state->recursion_heads, &k->input_pos);
|
||||||
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
|
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
|
||||||
if (cached && PC_RIGHT == cached->value_type) {
|
if (cached && PC_RIGHT == cached->value_type) {
|
||||||
|
state->input_stream = cached->input_stream;
|
||||||
return cached->right;
|
return cached->right;
|
||||||
} else {
|
} else {
|
||||||
errx(1, "impossible match");
|
errx(1, "impossible match");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
h_hashtable_del(state->recursion_heads, k);
|
h_hashtable_del(state->recursion_heads, &k->input_pos);
|
||||||
|
state->input_stream = old_cached->input_stream;
|
||||||
return old_res;
|
return old_res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -185,13 +187,14 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
|
||||||
base->seed = NULL; base->rule = parser; base->head = NULL;
|
base->seed = NULL; base->rule = parser; base->head = NULL;
|
||||||
h_slist_push(state->lr_stack, base);
|
h_slist_push(state->lr_stack, base);
|
||||||
// cache it
|
// cache it
|
||||||
HParserCacheValue *cached = cached_lr(state, base);
|
h_hashtable_put(state->cache, key, cached_lr(state, base));
|
||||||
h_hashtable_put(state->cache, key, cached);
|
|
||||||
// parse the input
|
// parse the input
|
||||||
HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
|
HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
|
||||||
// the base variable has passed equality tests with the cache
|
// the base variable has passed equality tests with the cache
|
||||||
h_slist_pop(state->lr_stack);
|
h_slist_pop(state->lr_stack);
|
||||||
// update the cached value to our new position
|
// update the cached value to our new position
|
||||||
|
HParserCacheValue *cached = h_hashtable_get(state->cache, key);
|
||||||
|
assert(cached != NULL);
|
||||||
cached->input_stream = state->input_stream;
|
cached->input_stream = state->input_stream;
|
||||||
// setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
|
// setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
|
||||||
if (NULL == base->head) {
|
if (NULL == base->head) {
|
||||||
|
|
@ -207,7 +210,7 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
|
||||||
state->input_stream = m->input_stream;
|
state->input_stream = m->input_stream;
|
||||||
if (PC_LEFT == m->value_type) {
|
if (PC_LEFT == m->value_type) {
|
||||||
setupLR(parser, state, m->left);
|
setupLR(parser, state, m->left);
|
||||||
return m->left->seed; // BUG: this might not be correct
|
return m->left->seed;
|
||||||
} else {
|
} else {
|
||||||
return m->right;
|
return m->right;
|
||||||
}
|
}
|
||||||
|
|
@ -231,6 +234,14 @@ static bool cache_key_equal(const void* key1, const void* key2) {
|
||||||
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
|
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t pos_hash(const void* key) {
|
||||||
|
return h_djbhash(key, sizeof(HInputStream));
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool pos_equal(const void* key1, const void* key2) {
|
||||||
|
return memcmp(key1, key2, sizeof(HInputStream)) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
|
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
|
||||||
HArena * arena = h_new_arena(mm__, 0);
|
HArena * arena = h_new_arena(mm__, 0);
|
||||||
HParseState *parse_state = a_new_(arena, HParseState, 1);
|
HParseState *parse_state = a_new_(arena, HParseState, 1);
|
||||||
|
|
@ -238,8 +249,7 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr
|
||||||
cache_key_hash); // hash_func
|
cache_key_hash); // hash_func
|
||||||
parse_state->input_stream = *input_stream;
|
parse_state->input_stream = *input_stream;
|
||||||
parse_state->lr_stack = h_slist_new(arena);
|
parse_state->lr_stack = h_slist_new(arena);
|
||||||
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
|
parse_state->recursion_heads = h_hashtable_new(arena, pos_equal, pos_hash);
|
||||||
cache_key_hash);
|
|
||||||
parse_state->arena = arena;
|
parse_state->arena = arena;
|
||||||
HParseResult *res = h_do_parse(parser, parse_state);
|
HParseResult *res = h_do_parse(parser, parse_state);
|
||||||
h_slist_free(parse_state->lr_stack);
|
h_slist_free(parse_state->lr_stack);
|
||||||
|
|
|
||||||
|
|
@ -414,6 +414,7 @@ static void test_leftrec(gconstpointer backend) {
|
||||||
HParser *lr_ = h_indirect();
|
HParser *lr_ = h_indirect();
|
||||||
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), h_epsilon_p(), NULL));
|
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), h_epsilon_p(), NULL));
|
||||||
|
|
||||||
|
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0, "NULL");
|
||||||
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "(u0x61)");
|
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "(u0x61)");
|
||||||
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "((u0x61) u0x61)");
|
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "((u0x61) u0x61)");
|
||||||
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)");
|
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)");
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue