Merge pull request #147 from pesco/iterative
Report correct token position from LR backends
This commit is contained in:
commit
690985dddd
3 changed files with 64 additions and 8 deletions
|
|
@ -438,13 +438,12 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
|
|||
|
||||
// the top of stack is such that there will be a result...
|
||||
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||
tok->index = stream->pos + stream->index;
|
||||
tok->bit_offset = stream->bit_offset;
|
||||
if(x == MARK) {
|
||||
// hit stack frame boundary...
|
||||
// wrap the accumulated parse result, this sequence is finished
|
||||
tok->token_type = TT_SEQUENCE;
|
||||
tok->seq = seq;
|
||||
// XXX would have to set token pos but we've forgotten pos of seq
|
||||
|
||||
// recover original nonterminal and result sequence
|
||||
x = h_slist_pop(stack);
|
||||
|
|
@ -454,6 +453,9 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
|
|||
else {
|
||||
// x is a terminal or simple charset; match against input
|
||||
|
||||
tok->index = stream->pos + stream->index;
|
||||
tok->bit_offset = stream->bit_offset;
|
||||
|
||||
// consume the input token
|
||||
uint8_t input = h_read_bits(stream, 8, false);
|
||||
|
||||
|
|
@ -500,8 +502,16 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
|
|||
// 'tok' has been parsed; process it
|
||||
|
||||
// perform token reshape if indicated
|
||||
if(x->reshape)
|
||||
tok = (HParsedToken *)x->reshape(make_result(arena, tok), x->user_data);
|
||||
if(x->reshape) {
|
||||
HParsedToken *t = x->reshape(make_result(arena, tok), x->user_data);
|
||||
if(t) {
|
||||
t->index = tok->index;
|
||||
t->bit_offset = tok->bit_offset;
|
||||
} else {
|
||||
h_arena_free(arena, tok);
|
||||
}
|
||||
tok = t;
|
||||
}
|
||||
|
||||
// call validation and semantic action, if present
|
||||
if(x->pred && !x->pred(make_result(tarena, tok), x->user_data))
|
||||
|
|
|
|||
|
|
@ -267,6 +267,8 @@ static HParsedToken *consume_input(HLREngine *engine)
|
|||
v = h_arena_malloc(engine->arena, sizeof(HParsedToken));
|
||||
v->token_type = TT_UINT;
|
||||
v->uint = c;
|
||||
v->index = engine->input.pos + engine->input.index - 1;
|
||||
v->bit_offset = engine->input.bit_offset;
|
||||
}
|
||||
|
||||
return v;
|
||||
|
|
@ -309,18 +311,28 @@ bool h_lrengine_step(HLREngine *engine, const HLRAction *action)
|
|||
value->index = v->index;
|
||||
value->bit_offset = v->bit_offset;
|
||||
} else {
|
||||
// XXX how to get the position in this case?
|
||||
// result position is current input position XXX ?
|
||||
value->index = engine->input.pos + engine->input.index;
|
||||
value->bit_offset = engine->input.bit_offset;
|
||||
}
|
||||
|
||||
// perform token reshape if indicated
|
||||
if(symbol->reshape)
|
||||
value = (HParsedToken *)symbol->reshape(make_result(arena, value), symbol->user_data);
|
||||
if(symbol->reshape) {
|
||||
v = symbol->reshape(make_result(arena, value), symbol->user_data);
|
||||
if(v) {
|
||||
v->index = value->index;
|
||||
v->bit_offset = value->bit_offset;
|
||||
} else {
|
||||
h_arena_free(arena, value);
|
||||
}
|
||||
value = v;
|
||||
}
|
||||
|
||||
// call validation and semantic action, if present
|
||||
if(symbol->pred && !symbol->pred(make_result(tarena, value), symbol->user_data))
|
||||
return false; // validation failed -> no parse; terminate
|
||||
if(symbol->action)
|
||||
value = (HParsedToken *)symbol->action(make_result(arena, value), symbol->user_data);
|
||||
value = symbol->action(make_result(arena, value), symbol->user_data);
|
||||
|
||||
// this is LR, building a right-most derivation bottom-up, so no reduce can
|
||||
// follow a reduce. we can also assume no conflict follows for GLR if we
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include <glib.h>
|
||||
#include <string.h>
|
||||
#include "hammer.h"
|
||||
#include "glue.h"
|
||||
#include "internal.h"
|
||||
#include "test_suite.h"
|
||||
#include "parsers/parser_internal.h"
|
||||
|
|
@ -552,6 +553,34 @@ static void test_result_length(gconstpointer backend) {
|
|||
g_check_cmp_int64(r->bit_length, ==, 24);
|
||||
}
|
||||
|
||||
static void test_token_position(gconstpointer backend) {
|
||||
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
|
||||
HParser *p = h_sequence(h_token((uint8_t*)"foo",3),
|
||||
h_token((uint8_t*)"bar",3), NULL);
|
||||
|
||||
if(h_compile(p, be, NULL) != 0) {
|
||||
g_test_message("Compile failed");
|
||||
g_test_fail();
|
||||
return;
|
||||
}
|
||||
|
||||
HParseResult *r = h_parse(p, (uint8_t*)"foobar", 6);
|
||||
if(!r) {
|
||||
g_test_message("Parse failed");
|
||||
g_test_fail();
|
||||
return;
|
||||
}
|
||||
|
||||
assert(r->ast != NULL);
|
||||
HParsedToken *foo = H_INDEX_TOKEN(r->ast, 0);
|
||||
HParsedToken *bar = H_INDEX_TOKEN(r->ast, 1);
|
||||
|
||||
g_check_cmp_uint64(foo->index, ==, 0);
|
||||
g_check_cmp_uint64(foo->bit_offset, ==, 0);
|
||||
g_check_cmp_uint64(bar->index, ==, 3);
|
||||
g_check_cmp_uint64(bar->bit_offset, ==, 0);
|
||||
}
|
||||
|
||||
static void test_ambiguous(gconstpointer backend) {
|
||||
HParser *d_ = h_ch('d');
|
||||
HParser *p_ = h_ch('+');
|
||||
|
|
@ -763,6 +792,7 @@ void register_parser_tests(void) {
|
|||
g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation);
|
||||
g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind);
|
||||
g_test_add_data_func("/core/parser/packrat/result_length", GINT_TO_POINTER(PB_PACKRAT), test_result_length);
|
||||
//g_test_add_data_func("/core/parser/packrat/token_position", GINT_TO_POINTER(PB_PACKRAT), test_token_position);
|
||||
|
||||
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
|
||||
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
|
||||
|
|
@ -802,6 +832,7 @@ void register_parser_tests(void) {
|
|||
//g_test_add_data_func("/core/parser/llk/leftrec", GINT_TO_POINTER(PB_LLk), test_leftrec);
|
||||
g_test_add_data_func("/core/parser/llk/rightrec", GINT_TO_POINTER(PB_LLk), test_rightrec);
|
||||
g_test_add_data_func("/core/parser/llk/result_length", GINT_TO_POINTER(PB_LLk), test_result_length);
|
||||
//g_test_add_data_func("/core/parser/llk/token_position", GINT_TO_POINTER(PB_LLk), test_token_position);
|
||||
g_test_add_data_func("/core/parser/llk/iterative", GINT_TO_POINTER(PB_LLk), test_iterative);
|
||||
g_test_add_data_func("/core/parser/llk/iterative/lookahead", GINT_TO_POINTER(PB_LLk), test_iterative_lookahead);
|
||||
g_test_add_data_func("/core/parser/llk/iterative/result_length", GINT_TO_POINTER(PB_LLk), test_iterative_result_length);
|
||||
|
|
@ -843,6 +874,7 @@ void register_parser_tests(void) {
|
|||
g_test_add_data_func("/core/parser/regex/attr_bool", GINT_TO_POINTER(PB_REGULAR), test_attr_bool);
|
||||
g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore);
|
||||
g_test_add_data_func("/core/parser/regex/result_length", GINT_TO_POINTER(PB_REGULAR), test_result_length);
|
||||
g_test_add_data_func("/core/parser/regex/token_position", GINT_TO_POINTER(PB_REGULAR), test_token_position);
|
||||
|
||||
g_test_add_data_func("/core/parser/lalr/token", GINT_TO_POINTER(PB_LALR), test_token);
|
||||
g_test_add_data_func("/core/parser/lalr/ch", GINT_TO_POINTER(PB_LALR), test_ch);
|
||||
|
|
@ -883,6 +915,7 @@ void register_parser_tests(void) {
|
|||
g_test_add_data_func("/core/parser/lalr/leftrec-ne", GINT_TO_POINTER(PB_LALR), test_leftrec_ne);
|
||||
g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec);
|
||||
g_test_add_data_func("/core/parser/lalr/result_length", GINT_TO_POINTER(PB_LALR), test_result_length);
|
||||
g_test_add_data_func("/core/parser/lalr/token_position", GINT_TO_POINTER(PB_LALR), test_token_position);
|
||||
g_test_add_data_func("/core/parser/lalr/iterative", GINT_TO_POINTER(PB_LALR), test_iterative);
|
||||
g_test_add_data_func("/core/parser/lalr/iterative/lookahead", GINT_TO_POINTER(PB_LALR), test_iterative_lookahead);
|
||||
g_test_add_data_func("/core/parser/lalr/iterative/result_length", GINT_TO_POINTER(PB_LALR), test_iterative_result_length);
|
||||
|
|
@ -927,4 +960,5 @@ void register_parser_tests(void) {
|
|||
g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec);
|
||||
g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous);
|
||||
g_test_add_data_func("/core/parser/glr/result_length", GINT_TO_POINTER(PB_GLR), test_result_length);
|
||||
g_test_add_data_func("/core/parser/glr/token_position", GINT_TO_POINTER(PB_GLR), test_token_position);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue