Merge pull request #147 from pesco/iterative

Report correct token position from LR backends
This commit is contained in:
Meredith L. Patterson 2015-09-24 16:31:11 +02:00
commit 690985dddd
3 changed files with 64 additions and 8 deletions

View file

@ -438,13 +438,12 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
// the top of stack is such that there will be a result...
tok = h_arena_malloc(arena, sizeof(HParsedToken));
tok->index = stream->pos + stream->index;
tok->bit_offset = stream->bit_offset;
if(x == MARK) {
// hit stack frame boundary...
// wrap the accumulated parse result, this sequence is finished
tok->token_type = TT_SEQUENCE;
tok->seq = seq;
// XXX would have to set token pos but we've forgotten pos of seq
// recover original nonterminal and result sequence
x = h_slist_pop(stack);
@ -454,6 +453,9 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
else {
// x is a terminal or simple charset; match against input
tok->index = stream->pos + stream->index;
tok->bit_offset = stream->bit_offset;
// consume the input token
uint8_t input = h_read_bits(stream, 8, false);
@ -500,8 +502,16 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
// 'tok' has been parsed; process it
// perform token reshape if indicated
if(x->reshape)
tok = (HParsedToken *)x->reshape(make_result(arena, tok), x->user_data);
if(x->reshape) {
HParsedToken *t = x->reshape(make_result(arena, tok), x->user_data);
if(t) {
t->index = tok->index;
t->bit_offset = tok->bit_offset;
} else {
h_arena_free(arena, tok);
}
tok = t;
}
// call validation and semantic action, if present
if(x->pred && !x->pred(make_result(tarena, tok), x->user_data))

View file

@ -267,6 +267,8 @@ static HParsedToken *consume_input(HLREngine *engine)
v = h_arena_malloc(engine->arena, sizeof(HParsedToken));
v->token_type = TT_UINT;
v->uint = c;
v->index = engine->input.pos + engine->input.index - 1;
v->bit_offset = engine->input.bit_offset;
}
return v;
@ -309,18 +311,28 @@ bool h_lrengine_step(HLREngine *engine, const HLRAction *action)
value->index = v->index;
value->bit_offset = v->bit_offset;
} else {
// XXX how to get the position in this case?
// result position is current input position XXX ?
value->index = engine->input.pos + engine->input.index;
value->bit_offset = engine->input.bit_offset;
}
// perform token reshape if indicated
if(symbol->reshape)
value = (HParsedToken *)symbol->reshape(make_result(arena, value), symbol->user_data);
if(symbol->reshape) {
v = symbol->reshape(make_result(arena, value), symbol->user_data);
if(v) {
v->index = value->index;
v->bit_offset = value->bit_offset;
} else {
h_arena_free(arena, value);
}
value = v;
}
// call validation and semantic action, if present
if(symbol->pred && !symbol->pred(make_result(tarena, value), symbol->user_data))
return false; // validation failed -> no parse; terminate
if(symbol->action)
value = (HParsedToken *)symbol->action(make_result(arena, value), symbol->user_data);
value = symbol->action(make_result(arena, value), symbol->user_data);
// this is LR, building a right-most derivation bottom-up, so no reduce can
// follow a reduce. we can also assume no conflict follows for GLR if we

View file

@ -1,6 +1,7 @@
#include <glib.h>
#include <string.h>
#include "hammer.h"
#include "glue.h"
#include "internal.h"
#include "test_suite.h"
#include "parsers/parser_internal.h"
@ -552,6 +553,34 @@ static void test_result_length(gconstpointer backend) {
g_check_cmp_int64(r->bit_length, ==, 24);
}
static void test_token_position(gconstpointer backend) {
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
HParser *p = h_sequence(h_token((uint8_t*)"foo",3),
h_token((uint8_t*)"bar",3), NULL);
if(h_compile(p, be, NULL) != 0) {
g_test_message("Compile failed");
g_test_fail();
return;
}
HParseResult *r = h_parse(p, (uint8_t*)"foobar", 6);
if(!r) {
g_test_message("Parse failed");
g_test_fail();
return;
}
assert(r->ast != NULL);
HParsedToken *foo = H_INDEX_TOKEN(r->ast, 0);
HParsedToken *bar = H_INDEX_TOKEN(r->ast, 1);
g_check_cmp_uint64(foo->index, ==, 0);
g_check_cmp_uint64(foo->bit_offset, ==, 0);
g_check_cmp_uint64(bar->index, ==, 3);
g_check_cmp_uint64(bar->bit_offset, ==, 0);
}
static void test_ambiguous(gconstpointer backend) {
HParser *d_ = h_ch('d');
HParser *p_ = h_ch('+');
@ -763,6 +792,7 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation);
g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind);
g_test_add_data_func("/core/parser/packrat/result_length", GINT_TO_POINTER(PB_PACKRAT), test_result_length);
//g_test_add_data_func("/core/parser/packrat/token_position", GINT_TO_POINTER(PB_PACKRAT), test_token_position);
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
@ -802,6 +832,7 @@ void register_parser_tests(void) {
//g_test_add_data_func("/core/parser/llk/leftrec", GINT_TO_POINTER(PB_LLk), test_leftrec);
g_test_add_data_func("/core/parser/llk/rightrec", GINT_TO_POINTER(PB_LLk), test_rightrec);
g_test_add_data_func("/core/parser/llk/result_length", GINT_TO_POINTER(PB_LLk), test_result_length);
//g_test_add_data_func("/core/parser/llk/token_position", GINT_TO_POINTER(PB_LLk), test_token_position);
g_test_add_data_func("/core/parser/llk/iterative", GINT_TO_POINTER(PB_LLk), test_iterative);
g_test_add_data_func("/core/parser/llk/iterative/lookahead", GINT_TO_POINTER(PB_LLk), test_iterative_lookahead);
g_test_add_data_func("/core/parser/llk/iterative/result_length", GINT_TO_POINTER(PB_LLk), test_iterative_result_length);
@ -843,6 +874,7 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/regex/attr_bool", GINT_TO_POINTER(PB_REGULAR), test_attr_bool);
g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore);
g_test_add_data_func("/core/parser/regex/result_length", GINT_TO_POINTER(PB_REGULAR), test_result_length);
g_test_add_data_func("/core/parser/regex/token_position", GINT_TO_POINTER(PB_REGULAR), test_token_position);
g_test_add_data_func("/core/parser/lalr/token", GINT_TO_POINTER(PB_LALR), test_token);
g_test_add_data_func("/core/parser/lalr/ch", GINT_TO_POINTER(PB_LALR), test_ch);
@ -883,6 +915,7 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/lalr/leftrec-ne", GINT_TO_POINTER(PB_LALR), test_leftrec_ne);
g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec);
g_test_add_data_func("/core/parser/lalr/result_length", GINT_TO_POINTER(PB_LALR), test_result_length);
g_test_add_data_func("/core/parser/lalr/token_position", GINT_TO_POINTER(PB_LALR), test_token_position);
g_test_add_data_func("/core/parser/lalr/iterative", GINT_TO_POINTER(PB_LALR), test_iterative);
g_test_add_data_func("/core/parser/lalr/iterative/lookahead", GINT_TO_POINTER(PB_LALR), test_iterative_lookahead);
g_test_add_data_func("/core/parser/lalr/iterative/result_length", GINT_TO_POINTER(PB_LALR), test_iterative_result_length);
@ -927,4 +960,5 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec);
g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous);
g_test_add_data_func("/core/parser/glr/result_length", GINT_TO_POINTER(PB_GLR), test_result_length);
g_test_add_data_func("/core/parser/glr/token_position", GINT_TO_POINTER(PB_GLR), test_token_position);
}