Merge pull request #141 from pesco/iterative
Iterative (chunked) input processing
This commit is contained in:
commit
cb93c3b4ec
11 changed files with 668 additions and 80 deletions
|
|
@ -346,7 +346,10 @@ void h_lalr_free(HParser *parser)
|
||||||
HParserBackendVTable h__lalr_backend_vtable = {
|
HParserBackendVTable h__lalr_backend_vtable = {
|
||||||
.compile = h_lalr_compile,
|
.compile = h_lalr_compile,
|
||||||
.parse = h_lr_parse,
|
.parse = h_lr_parse,
|
||||||
.free = h_lalr_free
|
.free = h_lalr_free,
|
||||||
|
.parse_start = h_lr_parse_start,
|
||||||
|
.parse_chunk = h_lr_parse_chunk,
|
||||||
|
.parse_finish = h_lr_parse_finish
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -355,8 +358,6 @@ HParserBackendVTable h__lalr_backend_vtable = {
|
||||||
// dummy!
|
// dummy!
|
||||||
int test_lalr(void)
|
int test_lalr(void)
|
||||||
{
|
{
|
||||||
HAllocator *mm__ = &system_allocator;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
E -> E '-' T
|
E -> E '-' T
|
||||||
| T
|
| T
|
||||||
|
|
@ -371,44 +372,24 @@ int test_lalr(void)
|
||||||
h_bind_indirect(E, E_);
|
h_bind_indirect(E, E_);
|
||||||
HParser *p = E;
|
HParser *p = E;
|
||||||
|
|
||||||
printf("\n==== G R A M M A R ====\n");
|
HCFGrammar *g = h_pprint_lr_info(stdout, p);
|
||||||
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
|
if(!g)
|
||||||
if (g == NULL) {
|
|
||||||
fprintf(stderr, "h_cfgrammar failed\n");
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
|
||||||
h_pprint_grammar(stdout, g, 0);
|
|
||||||
|
|
||||||
printf("\n==== D F A ====\n");
|
fprintf(stdout, "\n==== L A L R T A B L E ====\n");
|
||||||
HLRDFA *dfa = h_lr0_dfa(g);
|
|
||||||
if (dfa) {
|
|
||||||
h_pprint_lrdfa(stdout, g, dfa, 0);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "h_lalr_dfa failed\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("\n==== L R ( 0 ) T A B L E ====\n");
|
|
||||||
HLRTable *table0 = h_lr0_table(g, dfa);
|
|
||||||
if (table0) {
|
|
||||||
h_pprint_lrtable(stdout, g, table0, 0);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "h_lr0_table failed\n");
|
|
||||||
}
|
|
||||||
h_lrtable_free(table0);
|
|
||||||
|
|
||||||
printf("\n==== L A L R T A B L E ====\n");
|
|
||||||
if (h_compile(p, PB_LALR, NULL)) {
|
if (h_compile(p, PB_LALR, NULL)) {
|
||||||
fprintf(stderr, "does not compile\n");
|
fprintf(stdout, "does not compile\n");
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0);
|
h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0);
|
||||||
|
|
||||||
printf("\n==== P A R S E R E S U L T ====\n");
|
fprintf(stdout, "\n==== P A R S E R E S U L T ====\n");
|
||||||
HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
|
HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
|
||||||
if (res) {
|
if (res) {
|
||||||
h_pprint(stdout, res->ast, 0, 2);
|
h_pprint(stdout, res->ast, 0, 2);
|
||||||
} else {
|
} else {
|
||||||
printf("no parse\n");
|
fprintf(stdout, "no parse\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ static const size_t DEFAULT_KMAX = 1;
|
||||||
* maps lookahead strings to productions (HCFSequence).
|
* maps lookahead strings to productions (HCFSequence).
|
||||||
*/
|
*/
|
||||||
typedef struct HLLkTable_ {
|
typedef struct HLLkTable_ {
|
||||||
|
size_t kmax;
|
||||||
HHashTable *rows;
|
HHashTable *rows;
|
||||||
HCFChoice *start; // start symbol
|
HCFChoice *start; // start symbol
|
||||||
HArena *arena;
|
HArena *arena;
|
||||||
|
|
@ -188,6 +189,7 @@ static int fill_table_row(size_t kmax, HCFGrammar *g, HStringMap *row,
|
||||||
*/
|
*/
|
||||||
static int fill_table(size_t kmax, HCFGrammar *g, HLLkTable *table)
|
static int fill_table(size_t kmax, HCFGrammar *g, HLLkTable *table)
|
||||||
{
|
{
|
||||||
|
table->kmax = kmax;
|
||||||
table->start = g->start;
|
table->start = g->start;
|
||||||
|
|
||||||
// iterate over g->nts
|
// iterate over g->nts
|
||||||
|
|
@ -259,56 +261,172 @@ void h_llk_free(HParser *parser)
|
||||||
|
|
||||||
/* LL(k) driver */
|
/* LL(k) driver */
|
||||||
|
|
||||||
HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
|
typedef struct {
|
||||||
|
HArena *arena; // will hold the results
|
||||||
|
HArena *tarena; // tmp, deleted after parse
|
||||||
|
HSlist *stack;
|
||||||
|
HCountedArray *seq; // accumulates current parse result
|
||||||
|
|
||||||
|
uint8_t *buf; // for lookahead across chunk boundaries
|
||||||
|
// allocated to size 2*kmax
|
||||||
|
// new chunk starts at index kmax
|
||||||
|
// ( 0 ... kmax ... 2*kmax-1 )
|
||||||
|
// \_old_/\______new_______/
|
||||||
|
HInputStream win; // win.length is set to 0 when not in use
|
||||||
|
} HLLkState;
|
||||||
|
|
||||||
|
// in order to construct the parse tree, we delimit the symbol stack into
|
||||||
|
// frames corresponding to production right-hand sides. since only left-most
|
||||||
|
// derivations are produced this linearization is unique.
|
||||||
|
// the 'mark' allocated below simply reserves a memory address to use as the
|
||||||
|
// frame delimiter.
|
||||||
|
// nonterminals, instead of being popped and forgotten, are put back onto the
|
||||||
|
// stack below the mark to tell us which validations and semantic actions to
|
||||||
|
// execute on their corresponding result.
|
||||||
|
// also on the stack below the mark, we store the previously accumulated
|
||||||
|
// value for the surrounding production.
|
||||||
|
static void const * const MARK = &MARK; // stack frame delimiter
|
||||||
|
|
||||||
|
static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser)
|
||||||
{
|
{
|
||||||
const HLLkTable *table = parser->backend_data;
|
const HLLkTable *table = parser->backend_data;
|
||||||
assert(table != NULL);
|
assert(table != NULL);
|
||||||
|
|
||||||
HArena *arena = h_new_arena(mm__, 0); // will hold the results
|
HLLkState *s = h_new(HLLkState, 1);
|
||||||
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
|
s->arena = h_new_arena(mm__, 0);
|
||||||
HSlist *stack = h_slist_new(tarena);
|
s->tarena = h_new_arena(mm__, 0);
|
||||||
HCountedArray *seq = h_carray_new(arena); // accumulates current parse result
|
s->stack = h_slist_new(s->tarena);
|
||||||
|
s->seq = h_carray_new(s->arena);
|
||||||
|
s->buf = h_arena_malloc(s->tarena, 2 * table->kmax);
|
||||||
|
|
||||||
// in order to construct the parse tree, we delimit the symbol stack into
|
s->win.input = s->buf;
|
||||||
// frames corresponding to production right-hand sides. since only left-most
|
s->win.length = 0; // unused
|
||||||
// derivations are produced this linearization is unique.
|
|
||||||
// the 'mark' allocated below simply reserves a memory address to use as the
|
|
||||||
// frame delimiter.
|
|
||||||
// nonterminals, instead of being popped and forgotten, are put back onto the
|
|
||||||
// stack below the mark to tell us which validations and semantic actions to
|
|
||||||
// execute on their corresponding result.
|
|
||||||
// also on the stack below the mark, we store the previously accumulated
|
|
||||||
// value for the surrounding production.
|
|
||||||
void *mark = h_arena_malloc(tarena, 1);
|
|
||||||
|
|
||||||
// initialize with the start symbol on the stack.
|
// initialize with the start symbol on the stack.
|
||||||
h_slist_push(stack, table->start);
|
h_slist_push(s->stack, table->start);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
// helper: add new input to the lookahead window
|
||||||
|
static void append_win(size_t kmax, HLLkState *s, HInputStream *stream)
|
||||||
|
{
|
||||||
|
assert(stream->bit_offset == 0);
|
||||||
|
assert(s->win.input == s->buf);
|
||||||
|
assert(s->win.length == kmax);
|
||||||
|
assert(s->win.index < kmax);
|
||||||
|
|
||||||
|
size_t n = stream->length - stream->index; // bytes to copy
|
||||||
|
if(n > kmax)
|
||||||
|
n = kmax;
|
||||||
|
|
||||||
|
memcpy(s->buf + kmax, stream->input + stream->index, n);
|
||||||
|
s->win.length += n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// helper: save old input to the lookahead window
|
||||||
|
static void save_win(size_t kmax, HLLkState *s, HInputStream *stream)
|
||||||
|
{
|
||||||
|
assert(stream->bit_offset == 0);
|
||||||
|
|
||||||
|
size_t len = stream->length - stream->index;
|
||||||
|
assert(len < kmax);
|
||||||
|
|
||||||
|
if(len == 0) {
|
||||||
|
// stream empty? nothing to do.
|
||||||
|
return;
|
||||||
|
} else if(s->win.length > 0) {
|
||||||
|
// window active? should contain all of stream.
|
||||||
|
assert(s->win.length == kmax + len);
|
||||||
|
assert(s->win.index <= kmax);
|
||||||
|
|
||||||
|
// shift contents down:
|
||||||
|
//
|
||||||
|
// (0 kmax )
|
||||||
|
// ... \_old_/\_new_/ ...
|
||||||
|
//
|
||||||
|
// (0 kmax )
|
||||||
|
// ... \_old_/\_new_/ ...
|
||||||
|
//
|
||||||
|
s->win.pos += len; // position of the window shifts up
|
||||||
|
len = s->win.length - s->win.index;
|
||||||
|
assert(len <= kmax);
|
||||||
|
memmove(s->buf + kmax - len, s->buf + s->win.index, len);
|
||||||
|
} else {
|
||||||
|
// window not active? save stream to window.
|
||||||
|
// buffer starts kmax bytes below chunk boundary
|
||||||
|
s->win.pos = stream->pos - kmax;
|
||||||
|
memcpy(s->buf + kmax - len, stream->input + stream->index, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
// metadata
|
||||||
|
s->win = *stream;
|
||||||
|
s->win.input = s->buf;
|
||||||
|
s->win.index = kmax - len;
|
||||||
|
s->win.length = kmax;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns partial result or NULL (no parse)
|
||||||
|
static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
|
||||||
|
HInputStream* chunk)
|
||||||
|
{
|
||||||
|
HParsedToken *tok = NULL; // will hold result token
|
||||||
|
HCFChoice *x = NULL; // current symbol (from top of stack)
|
||||||
|
HInputStream *stream;
|
||||||
|
|
||||||
|
assert(chunk->index == 0);
|
||||||
|
assert(chunk->bit_offset == 0);
|
||||||
|
|
||||||
|
const HLLkTable *table = parser->backend_data;
|
||||||
|
assert(table != NULL);
|
||||||
|
|
||||||
|
HArena *arena = s->arena;
|
||||||
|
HArena *tarena = s->tarena;
|
||||||
|
HSlist *stack = s->stack;
|
||||||
|
HCountedArray *seq = s->seq;
|
||||||
|
size_t kmax = table->kmax;
|
||||||
|
|
||||||
|
if(!seq)
|
||||||
|
return NULL; // parse already failed
|
||||||
|
|
||||||
|
if(s->win.length > 0) {
|
||||||
|
append_win(kmax, s, chunk);
|
||||||
|
stream = &s->win;
|
||||||
|
} else {
|
||||||
|
stream = chunk;
|
||||||
|
}
|
||||||
|
|
||||||
// when we empty the stack, the parse is complete.
|
// when we empty the stack, the parse is complete.
|
||||||
while(!h_slist_empty(stack)) {
|
while(!h_slist_empty(stack)) {
|
||||||
|
tok = NULL;
|
||||||
|
|
||||||
// pop top of stack for inspection
|
// pop top of stack for inspection
|
||||||
HCFChoice *x = h_slist_pop(stack);
|
x = h_slist_pop(stack);
|
||||||
assert(x != NULL);
|
assert(x != NULL);
|
||||||
|
|
||||||
if(x != mark && x->type == HCF_CHOICE) {
|
if(x != MARK && x->type == HCF_CHOICE) {
|
||||||
// x is a nonterminal; apply the appropriate production and continue
|
// x is a nonterminal; apply the appropriate production and continue
|
||||||
|
|
||||||
// push stack frame
|
|
||||||
h_slist_push(stack, seq); // save current partial value
|
|
||||||
h_slist_push(stack, x); // save the nonterminal
|
|
||||||
h_slist_push(stack, mark); // frame delimiter
|
|
||||||
|
|
||||||
// open a fresh result sequence
|
|
||||||
seq = h_carray_new(arena);
|
|
||||||
|
|
||||||
// look up applicable production in parse table
|
// look up applicable production in parse table
|
||||||
const HCFSequence *p = h_llk_lookup(table, x, stream);
|
const HCFSequence *p = h_llk_lookup(table, x, stream);
|
||||||
if(p == NULL)
|
if(p == NULL)
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
|
if(p == NEED_INPUT) {
|
||||||
|
save_win(kmax, s, chunk);
|
||||||
|
goto need_input;
|
||||||
|
}
|
||||||
|
|
||||||
// an infinite loop case that shouldn't happen
|
// an infinite loop case that shouldn't happen
|
||||||
assert(!p->items[0] || p->items[0] != x);
|
assert(!p->items[0] || p->items[0] != x);
|
||||||
|
|
||||||
|
// push stack frame
|
||||||
|
h_slist_push(stack, seq); // save current partial value
|
||||||
|
h_slist_push(stack, x); // save the nonterminal
|
||||||
|
h_slist_push(stack, (void *)MARK); // frame delimiter
|
||||||
|
|
||||||
|
// open a fresh result sequence
|
||||||
|
seq = h_carray_new(arena);
|
||||||
|
|
||||||
// push production's rhs onto the stack (in reverse order)
|
// push production's rhs onto the stack (in reverse order)
|
||||||
HCFChoice **s;
|
HCFChoice **s;
|
||||||
for(s = p->items; *s; s++);
|
for(s = p->items; *s; s++);
|
||||||
|
|
@ -319,11 +437,10 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
}
|
}
|
||||||
|
|
||||||
// the top of stack is such that there will be a result...
|
// the top of stack is such that there will be a result...
|
||||||
HParsedToken *tok; // will hold result token
|
|
||||||
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||||
tok->index = stream->index;
|
tok->index = stream->pos + stream->index;
|
||||||
tok->bit_offset = stream->bit_offset;
|
tok->bit_offset = stream->bit_offset;
|
||||||
if(x == mark) {
|
if(x == MARK) {
|
||||||
// hit stack frame boundary...
|
// hit stack frame boundary...
|
||||||
// wrap the accumulated parse result, this sequence is finished
|
// wrap the accumulated parse result, this sequence is finished
|
||||||
tok->token_type = TT_SEQUENCE;
|
tok->token_type = TT_SEQUENCE;
|
||||||
|
|
@ -340,17 +457,25 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
// consume the input token
|
// consume the input token
|
||||||
uint8_t input = h_read_bits(stream, 8, false);
|
uint8_t input = h_read_bits(stream, 8, false);
|
||||||
|
|
||||||
|
// when old chunk consumed from window, switch to new chunk
|
||||||
|
if(s->win.length > 0 && s->win.index >= kmax) {
|
||||||
|
s->win.length = 0; // disable the window
|
||||||
|
stream = chunk;
|
||||||
|
}
|
||||||
|
|
||||||
switch(x->type) {
|
switch(x->type) {
|
||||||
case HCF_END:
|
case HCF_END:
|
||||||
if(!stream->overrun)
|
if(!stream->overrun)
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
|
if(!stream->last_chunk)
|
||||||
|
goto need_input;
|
||||||
h_arena_free(arena, tok);
|
h_arena_free(arena, tok);
|
||||||
tok = NULL;
|
tok = NULL;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case HCF_CHAR:
|
case HCF_CHAR:
|
||||||
if(stream->overrun)
|
if(stream->overrun)
|
||||||
goto no_parse;
|
goto need_input;
|
||||||
if(input != x->chr)
|
if(input != x->chr)
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
tok->token_type = TT_UINT;
|
tok->token_type = TT_UINT;
|
||||||
|
|
@ -359,7 +484,7 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
|
|
||||||
case HCF_CHARSET:
|
case HCF_CHARSET:
|
||||||
if(stream->overrun)
|
if(stream->overrun)
|
||||||
goto no_parse;
|
goto need_input;
|
||||||
if(!charset_isset(x->charset, input))
|
if(!charset_isset(x->charset, input))
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
tok->token_type = TT_UINT;
|
tok->token_type = TT_UINT;
|
||||||
|
|
@ -388,24 +513,82 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
h_carray_append(seq, tok);
|
h_carray_append(seq, tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// success
|
||||||
// since we started with a single nonterminal on the stack, seq should
|
// since we started with a single nonterminal on the stack, seq should
|
||||||
// contain exactly the parse result.
|
// contain exactly the parse result.
|
||||||
assert(seq->used == 1);
|
assert(seq->used == 1);
|
||||||
h_delete_arena(tarena);
|
return seq;
|
||||||
return make_result(arena, seq->elements[0]);
|
|
||||||
|
|
||||||
no_parse:
|
no_parse:
|
||||||
h_delete_arena(tarena);
|
|
||||||
h_delete_arena(arena);
|
h_delete_arena(arena);
|
||||||
|
s->arena = NULL;
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
need_input:
|
||||||
|
if(stream->last_chunk)
|
||||||
|
goto no_parse;
|
||||||
|
if(tok)
|
||||||
|
h_arena_free(arena, tok); // no result, yet
|
||||||
|
h_slist_push(stack, x); // try this symbol again next time
|
||||||
|
return seq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static HParseResult *llk_parse_finish_(HAllocator *mm__, HLLkState *s)
|
||||||
|
{
|
||||||
|
HParseResult *res = NULL;
|
||||||
|
|
||||||
|
if(s->seq) {
|
||||||
|
assert(s->seq->used == 1);
|
||||||
|
res = make_result(s->arena, s->seq->elements[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
h_delete_arena(s->tarena);
|
||||||
|
h_free(s);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
|
||||||
|
{
|
||||||
|
HLLkState *s = llk_parse_start_(mm__, parser);
|
||||||
|
|
||||||
|
assert(stream->last_chunk);
|
||||||
|
s->seq = llk_parse_chunk_(s, parser, stream);
|
||||||
|
|
||||||
|
HParseResult *res = llk_parse_finish_(mm__, s);
|
||||||
|
if(res)
|
||||||
|
res->bit_length = stream->index * 8 + stream->bit_offset;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_llk_parse_start(HSuspendedParser *s)
|
||||||
|
{
|
||||||
|
s->backend_state = llk_parse_start_(s->mm__, s->parser);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool h_llk_parse_chunk(HSuspendedParser *s, HInputStream *input)
|
||||||
|
{
|
||||||
|
HLLkState *state = s->backend_state;
|
||||||
|
|
||||||
|
state->seq = llk_parse_chunk_(state, s->parser, input);
|
||||||
|
|
||||||
|
return (state->seq == NULL || h_slist_empty(state->stack));
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult *h_llk_parse_finish(HSuspendedParser *s)
|
||||||
|
{
|
||||||
|
return llk_parse_finish_(s->mm__, s->backend_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
HParserBackendVTable h__llk_backend_vtable = {
|
HParserBackendVTable h__llk_backend_vtable = {
|
||||||
.compile = h_llk_compile,
|
.compile = h_llk_compile,
|
||||||
.parse = h_llk_parse,
|
.parse = h_llk_parse,
|
||||||
.free = h_llk_free
|
.free = h_llk_free,
|
||||||
|
|
||||||
|
.parse_start = h_llk_parse_start,
|
||||||
|
.parse_chunk = h_llk_parse_chunk,
|
||||||
|
.parse_finish = h_llk_parse_finish
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -199,15 +199,14 @@ bool h_lrtable_row_empty(const HLRTable *table, size_t i)
|
||||||
|
|
||||||
/* LR driver */
|
/* LR driver */
|
||||||
|
|
||||||
HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
|
static
|
||||||
const HInputStream *stream)
|
HLREngine *h_lrengine_new_(HArena *arena, HArena *tarena, const HLRTable *table)
|
||||||
{
|
{
|
||||||
HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine));
|
HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine));
|
||||||
|
|
||||||
engine->table = table;
|
engine->table = table;
|
||||||
engine->state = 0;
|
engine->state = 0;
|
||||||
engine->stack = h_slist_new(tarena);
|
engine->stack = h_slist_new(tarena);
|
||||||
engine->input = *stream;
|
|
||||||
engine->merged[0] = NULL;
|
engine->merged[0] = NULL;
|
||||||
engine->merged[1] = NULL;
|
engine->merged[1] = NULL;
|
||||||
engine->arena = arena;
|
engine->arena = arena;
|
||||||
|
|
@ -216,6 +215,14 @@ HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
|
||||||
return engine;
|
return engine;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
|
||||||
|
const HInputStream *stream)
|
||||||
|
{
|
||||||
|
HLREngine *engine = h_lrengine_new_(arena, tarena, table);
|
||||||
|
engine->input = *stream;
|
||||||
|
return engine;
|
||||||
|
}
|
||||||
|
|
||||||
static const HLRAction *
|
static const HLRAction *
|
||||||
terminal_lookup(const HLREngine *engine, const HInputStream *stream)
|
terminal_lookup(const HLREngine *engine, const HInputStream *stream)
|
||||||
{
|
{
|
||||||
|
|
@ -351,7 +358,9 @@ HParseResult *h_lrengine_result(HLREngine *engine)
|
||||||
// on top of the stack is the start symbol's semantic value
|
// on top of the stack is the start symbol's semantic value
|
||||||
assert(!h_slist_empty(engine->stack));
|
assert(!h_slist_empty(engine->stack));
|
||||||
HParsedToken *tok = engine->stack->head->elem;
|
HParsedToken *tok = engine->stack->head->elem;
|
||||||
return make_result(engine->arena, tok);
|
HParseResult *res = make_result(engine->arena, tok);
|
||||||
|
res->bit_length = (engine->input.pos + engine->input.index) * 8;
|
||||||
|
return res;
|
||||||
} else {
|
} else {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
@ -377,7 +386,53 @@ HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void h_lr_parse_start(HSuspendedParser *s)
|
||||||
|
{
|
||||||
|
HLRTable *table = s->parser->backend_data;
|
||||||
|
assert(table != NULL);
|
||||||
|
|
||||||
|
HArena *arena = h_new_arena(s->mm__, 0); // will hold the results
|
||||||
|
HArena *tarena = h_new_arena(s->mm__, 0); // tmp, deleted after parse
|
||||||
|
HLREngine *engine = h_lrengine_new_(arena, tarena, table);
|
||||||
|
|
||||||
|
s->backend_state = engine;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool h_lr_parse_chunk(HSuspendedParser* s, HInputStream *stream)
|
||||||
|
{
|
||||||
|
HLREngine *engine = s->backend_state;
|
||||||
|
engine->input = *stream;
|
||||||
|
|
||||||
|
bool run = true;
|
||||||
|
while(run) {
|
||||||
|
// check input against table to determine which action to take
|
||||||
|
const HLRAction *action = h_lrengine_action(engine);
|
||||||
|
if(action == NEED_INPUT) {
|
||||||
|
// XXX assume lookahead 1
|
||||||
|
assert(engine->input.length - engine->input.index == 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// execute action
|
||||||
|
run = h_lrengine_step(engine, action);
|
||||||
|
if(engine->input.overrun && !engine->input.last_chunk)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
*stream = engine->input;
|
||||||
|
return !run; // done if engine no longer running
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult *h_lr_parse_finish(HSuspendedParser *s)
|
||||||
|
{
|
||||||
|
HLREngine *engine = s->backend_state;
|
||||||
|
|
||||||
|
HParseResult *result = h_lrengine_result(engine);
|
||||||
|
if(!result)
|
||||||
|
h_delete_arena(engine->arena);
|
||||||
|
h_delete_arena(engine->tarena);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/* Pretty-printers */
|
/* Pretty-printers */
|
||||||
|
|
||||||
|
|
@ -536,3 +591,35 @@ void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
|
||||||
fputc('\n', f);
|
fputc('\n', f);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HCFGrammar *h_pprint_lr_info(FILE *f, HParser *p)
|
||||||
|
{
|
||||||
|
HAllocator *mm__ = &system_allocator;
|
||||||
|
|
||||||
|
fprintf(f, "\n==== G R A M M A R ====\n");
|
||||||
|
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
|
||||||
|
if (g == NULL) {
|
||||||
|
fprintf(f, "h_cfgrammar failed\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
h_pprint_grammar(f, g, 0);
|
||||||
|
|
||||||
|
fprintf(f, "\n==== D F A ====\n");
|
||||||
|
HLRDFA *dfa = h_lr0_dfa(g);
|
||||||
|
if (dfa) {
|
||||||
|
h_pprint_lrdfa(f, g, dfa, 0);
|
||||||
|
} else {
|
||||||
|
fprintf(f, "h_lalr_dfa failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "\n==== L R ( 0 ) T A B L E ====\n");
|
||||||
|
HLRTable *table0 = h_lr0_table(g, dfa);
|
||||||
|
if (table0) {
|
||||||
|
h_pprint_lrtable(f, g, table0, 0);
|
||||||
|
} else {
|
||||||
|
fprintf(f, "h_lr0_table failed\n");
|
||||||
|
}
|
||||||
|
h_lrtable_free(table0);
|
||||||
|
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -134,6 +134,9 @@ const HLRAction *h_lrengine_action(const HLREngine *engine);
|
||||||
bool h_lrengine_step(HLREngine *engine, const HLRAction *action);
|
bool h_lrengine_step(HLREngine *engine, const HLRAction *action);
|
||||||
HParseResult *h_lrengine_result(HLREngine *engine);
|
HParseResult *h_lrengine_result(HLREngine *engine);
|
||||||
HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
|
HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
|
||||||
|
void h_lr_parse_start(HSuspendedParser *s);
|
||||||
|
bool h_lr_parse_chunk(HSuspendedParser* s, HInputStream *stream);
|
||||||
|
HParseResult *h_lr_parse_finish(HSuspendedParser *s);
|
||||||
HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
|
HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
|
||||||
|
|
||||||
void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item);
|
void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item);
|
||||||
|
|
@ -143,5 +146,6 @@ void h_pprint_lrdfa(FILE *f, const HCFGrammar *g,
|
||||||
const HLRDFA *dfa, unsigned int indent);
|
const HLRDFA *dfa, unsigned int indent);
|
||||||
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
|
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
|
||||||
unsigned int indent);
|
unsigned int indent);
|
||||||
|
HCFGrammar *h_pprint_lr_info(FILE *f, HParser *p);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -349,6 +349,7 @@ void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool en
|
||||||
return m->epsilon_branch;
|
return m->epsilon_branch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A NULL result means no parse. NEED_INPUT means lookahead is too short.
|
||||||
void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead)
|
void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead)
|
||||||
{
|
{
|
||||||
while(m) {
|
while(m) {
|
||||||
|
|
@ -362,9 +363,13 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead)
|
||||||
// reading bits from it does not consume them from the real input.
|
// reading bits from it does not consume them from the real input.
|
||||||
uint8_t c = h_read_bits(&lookahead, 8, false);
|
uint8_t c = h_read_bits(&lookahead, 8, false);
|
||||||
|
|
||||||
if (lookahead.overrun) { // end of input
|
if (lookahead.overrun) { // end of chunk
|
||||||
// XXX assumption of byte-wise grammar and input
|
if (lookahead.last_chunk) { // end of input
|
||||||
return m->end_branch;
|
// XXX assumption of byte-wise grammar and input
|
||||||
|
return m->end_branch;
|
||||||
|
} else {
|
||||||
|
return NEED_INPUT;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// no match yet, descend
|
// no match yet, descend
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,9 @@ bool h_stringmap_empty(const HStringMap *m);
|
||||||
static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c)
|
static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c)
|
||||||
{ return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
|
{ return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
|
||||||
|
|
||||||
|
// dummy return value used by h_stringmap_get_lookahead when out of input
|
||||||
|
#define NEED_INPUT ((void *)-1)
|
||||||
|
|
||||||
|
|
||||||
/* Convert 'parser' into CFG representation by desugaring and compiling the set
|
/* Convert 'parser' into CFG representation by desugaring and compiling the set
|
||||||
* of nonterminals.
|
* of nonterminals.
|
||||||
|
|
|
||||||
96
src/hammer.c
96
src/hammer.c
|
|
@ -43,6 +43,7 @@ typedef struct {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define DEFAULT_ENDIANNESS (BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN)
|
||||||
|
|
||||||
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
|
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
|
||||||
return h_parse__m(&system_allocator, parser, input, length);
|
return h_parse__m(&system_allocator, parser, input, length);
|
||||||
|
|
@ -50,12 +51,14 @@ HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length
|
||||||
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
|
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
|
||||||
// Set up a parse state...
|
// Set up a parse state...
|
||||||
HInputStream input_stream = {
|
HInputStream input_stream = {
|
||||||
|
.pos = 0,
|
||||||
.index = 0,
|
.index = 0,
|
||||||
.bit_offset = 0,
|
.bit_offset = 0,
|
||||||
.overrun = 0,
|
.overrun = 0,
|
||||||
.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN,
|
.endianness = DEFAULT_ENDIANNESS,
|
||||||
.length = length,
|
.length = length,
|
||||||
.input = input
|
.input = input,
|
||||||
|
.last_chunk = true
|
||||||
};
|
};
|
||||||
|
|
||||||
return backends[parser->backend]->parse(mm__, parser, &input_stream);
|
return backends[parser->backend]->parse(mm__, parser, &input_stream);
|
||||||
|
|
@ -96,3 +99,92 @@ int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, cons
|
||||||
parser->backend = backend;
|
parser->backend = backend;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
HSuspendedParser* h_parse_start(const HParser* parser) {
|
||||||
|
return h_parse_start__m(&system_allocator, parser);
|
||||||
|
}
|
||||||
|
HSuspendedParser* h_parse_start__m(HAllocator* mm__, const HParser* parser) {
|
||||||
|
if(!backends[parser->backend]->parse_start)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
// allocate and init suspended state
|
||||||
|
HSuspendedParser *s = h_new(HSuspendedParser, 1);
|
||||||
|
if(!s)
|
||||||
|
return NULL;
|
||||||
|
s->mm__ = mm__;
|
||||||
|
s->parser = parser;
|
||||||
|
s->backend_state = NULL;
|
||||||
|
s->done = false;
|
||||||
|
s->pos = 0;
|
||||||
|
s->bit_offset = 0;
|
||||||
|
s->endianness = DEFAULT_ENDIANNESS;
|
||||||
|
|
||||||
|
// backend-specific initialization
|
||||||
|
// should allocate s->backend_state
|
||||||
|
backends[parser->backend]->parse_start(s);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) {
|
||||||
|
assert(backends[s->parser->backend]->parse_chunk != NULL);
|
||||||
|
|
||||||
|
// no-op if parser is already done
|
||||||
|
if(s->done)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// input
|
||||||
|
HInputStream input_stream = {
|
||||||
|
.pos = s->pos,
|
||||||
|
.index = 0,
|
||||||
|
.bit_offset = 0,
|
||||||
|
.overrun = 0,
|
||||||
|
.endianness = s->endianness,
|
||||||
|
.length = length,
|
||||||
|
.input = input,
|
||||||
|
.last_chunk = false
|
||||||
|
};
|
||||||
|
|
||||||
|
// process chunk
|
||||||
|
s->done = backends[s->parser->backend]->parse_chunk(s, &input_stream);
|
||||||
|
s->endianness = input_stream.endianness;
|
||||||
|
s->pos += input_stream.index;
|
||||||
|
s->bit_offset = input_stream.bit_offset;
|
||||||
|
|
||||||
|
return s->done;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult* h_parse_finish(HSuspendedParser* s) {
|
||||||
|
assert(backends[s->parser->backend]->parse_chunk != NULL);
|
||||||
|
assert(backends[s->parser->backend]->parse_finish != NULL);
|
||||||
|
|
||||||
|
HAllocator *mm__ = s->mm__;
|
||||||
|
|
||||||
|
// signal end of input if parser is not already done
|
||||||
|
if(!s->done) {
|
||||||
|
HInputStream empty = {
|
||||||
|
.pos = s->pos,
|
||||||
|
.index = 0,
|
||||||
|
.bit_offset = 0,
|
||||||
|
.overrun = 0,
|
||||||
|
.endianness = s->endianness,
|
||||||
|
.length = 0,
|
||||||
|
.input = NULL,
|
||||||
|
.last_chunk = true
|
||||||
|
};
|
||||||
|
|
||||||
|
s->done = backends[s->parser->backend]->parse_chunk(s, &empty);
|
||||||
|
assert(s->done);
|
||||||
|
}
|
||||||
|
|
||||||
|
// extract result
|
||||||
|
HParseResult *r = backends[s->parser->backend]->parse_finish(s);
|
||||||
|
if(r)
|
||||||
|
r->bit_length = s->pos * 8 + s->bit_offset;
|
||||||
|
|
||||||
|
// NB: backend should have freed backend_state
|
||||||
|
h_free(s);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
|
||||||
23
src/hammer.h
23
src/hammer.h
|
|
@ -140,6 +140,8 @@ typedef struct HParser_ {
|
||||||
HCFChoice *desugared; /* if the parser can be desugared, its desugared form */
|
HCFChoice *desugared; /* if the parser can be desugared, its desugared form */
|
||||||
} HParser;
|
} HParser;
|
||||||
|
|
||||||
|
typedef struct HSuspendedParser_ HSuspendedParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Type of an action to apply to an AST, used in the action() parser.
|
* Type of an action to apply to an AST, used in the action() parser.
|
||||||
* It can be any (user-defined) function that takes a HParseResult*
|
* It can be any (user-defined) function that takes a HParseResult*
|
||||||
|
|
@ -265,6 +267,27 @@ typedef struct HBenchmarkResults_ {
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* input, size_t length);
|
HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* input, size_t length);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize a parser for iteratively consuming an input stream in chunks.
|
||||||
|
* This is only supported by some backends.
|
||||||
|
*
|
||||||
|
* Result is NULL if not supported by the backend.
|
||||||
|
*/
|
||||||
|
HAMMER_FN_DECL(HSuspendedParser*, h_parse_start, const HParser* parser);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run a suspended parser (as returned by h_parse_start) on a chunk of input.
|
||||||
|
*
|
||||||
|
* Returns true if the parser is done (needs no more input).
|
||||||
|
*/
|
||||||
|
bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finish an iterative parse. Signals the end of input to the backend and
|
||||||
|
* returns the parse result.
|
||||||
|
*/
|
||||||
|
HParseResult* h_parse_finish(HSuspendedParser* s);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a string, returns a parser that parses that string value.
|
* Given a string, returns a parser that parses that string value.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -72,13 +72,15 @@ typedef struct HCFStack_ HCFStack;
|
||||||
typedef struct HInputStream_ {
|
typedef struct HInputStream_ {
|
||||||
// This should be considered to be a really big value type.
|
// This should be considered to be a really big value type.
|
||||||
const uint8_t *input;
|
const uint8_t *input;
|
||||||
|
size_t pos; // position of this chunk in a multi-chunk stream
|
||||||
size_t index;
|
size_t index;
|
||||||
size_t length;
|
size_t length;
|
||||||
char bit_offset;
|
char bit_offset;
|
||||||
char margin; // The number of bits on the end that is being read
|
char margin; // The number of bits on the end that is being read
|
||||||
// towards that should be ignored.
|
// towards that should be ignored.
|
||||||
char endianness;
|
char endianness;
|
||||||
char overrun;
|
bool overrun;
|
||||||
|
bool last_chunk;
|
||||||
} HInputStream;
|
} HInputStream;
|
||||||
|
|
||||||
typedef struct HSlistNode_ {
|
typedef struct HSlistNode_ {
|
||||||
|
|
@ -210,10 +212,32 @@ struct HParseState_ {
|
||||||
HSlist *symbol_table; // its contents are HHashTables
|
HSlist *symbol_table; // its contents are HHashTables
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct HSuspendedParser_ {
|
||||||
|
HAllocator *mm__;
|
||||||
|
const HParser *parser;
|
||||||
|
void *backend_state;
|
||||||
|
bool done;
|
||||||
|
|
||||||
|
// input stream state
|
||||||
|
size_t pos;
|
||||||
|
uint8_t bit_offset;
|
||||||
|
uint8_t endianness;
|
||||||
|
};
|
||||||
|
|
||||||
typedef struct HParserBackendVTable_ {
|
typedef struct HParserBackendVTable_ {
|
||||||
int (*compile)(HAllocator *mm__, HParser* parser, const void* params);
|
int (*compile)(HAllocator *mm__, HParser* parser, const void* params);
|
||||||
HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HInputStream* stream);
|
HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HInputStream* stream);
|
||||||
void (*free)(HParser* parser);
|
void (*free)(HParser* parser);
|
||||||
|
|
||||||
|
void (*parse_start)(HSuspendedParser *s);
|
||||||
|
// parse_start should allocate s->backend_state.
|
||||||
|
bool (*parse_chunk)(HSuspendedParser *s, HInputStream *input);
|
||||||
|
// if parser is done, return true. otherwise:
|
||||||
|
// parse_chunk MUST consume all input, integrating it into s->backend_state.
|
||||||
|
// parse_chunk will not be called again after it reports done.
|
||||||
|
HParseResult *(*parse_finish)(HSuspendedParser *s);
|
||||||
|
// parse_finish must free s->backend_state.
|
||||||
|
// parse_finish will not be called before parse_chunk reports done.
|
||||||
} HParserBackendVTable;
|
} HParserBackendVTable;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
122
src/t_parser.c
122
src/t_parser.c
|
|
@ -443,6 +443,115 @@ static void test_rightrec(gconstpointer backend) {
|
||||||
g_check_parse_match(rr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(u0x61 (u0x61 (u0x61)))");
|
g_check_parse_match(rr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(u0x61 (u0x61 (u0x61)))");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_iterative(gconstpointer backend) {
|
||||||
|
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
|
||||||
|
HParser *p;
|
||||||
|
|
||||||
|
p = h_token((uint8_t*)"foobar", 6);
|
||||||
|
g_check_parse_chunks_match(p, be, "foo",3, "bar",3, "<66.6f.6f.62.61.72>");
|
||||||
|
g_check_parse_chunks_match(p, be, "foo",3, "barbaz",6, "<66.6f.6f.62.61.72>");
|
||||||
|
g_check_parse_chunks_failed(p, be, "fou",3, "bar",3);
|
||||||
|
g_check_parse_chunks_failed(p, be, "foo",3, "par",3);
|
||||||
|
g_check_parse_chunks_failed(p, be, "foo",3, "baz",3);
|
||||||
|
|
||||||
|
p = h_sequence(h_ch('f'), h_token((uint8_t*)"ooba", 4), h_ch('r'), NULL);
|
||||||
|
g_check_parse_chunks_match(p, be, "foo",3, "bar",3, "(u0x66 <6f.6f.62.61> u0x72)");
|
||||||
|
g_check_parse_chunks_match(p, be, "foo",3, "barbaz",6, "(u0x66 <6f.6f.62.61> u0x72)");
|
||||||
|
g_check_parse_chunks_failed(p, be, "fou",3, "bar",3);
|
||||||
|
g_check_parse_chunks_failed(p, be, "foo",3, "par",3);
|
||||||
|
g_check_parse_chunks_failed(p, be, "foo",3, "baz",3);
|
||||||
|
|
||||||
|
p = h_choice(h_token((uint8_t*)"foobar", 6),
|
||||||
|
h_token((uint8_t*)"phupar", 6), NULL);
|
||||||
|
g_check_parse_chunks_match(p, be, "foo",3, "bar",3, "<66.6f.6f.62.61.72>");
|
||||||
|
g_check_parse_chunks_match(p, be, "foo",3, "barbaz",6, "<66.6f.6f.62.61.72>");
|
||||||
|
g_check_parse_chunks_match(p, be, "phu",3, "par",3, "<70.68.75.70.61.72>");
|
||||||
|
g_check_parse_chunks_failed(p, be, "fou",3, "bar",3);
|
||||||
|
g_check_parse_chunks_failed(p, be, "foo",3, "baz",3);
|
||||||
|
g_check_parse_chunks_match(p, be, "foobar",6, "",0, "<66.6f.6f.62.61.72>");
|
||||||
|
g_check_parse_chunks_match(p, be, "",0, "foobar",6, "<66.6f.6f.62.61.72>");
|
||||||
|
g_check_parse_chunks_failed(p, be, "foo",3, "",0);
|
||||||
|
g_check_parse_chunks_failed(p, be, "",0, "foo",3);
|
||||||
|
|
||||||
|
p = h_sequence(h_ch('f'), h_choice(h_token((uint8_t*)"oo", 2),
|
||||||
|
h_token((uint8_t*)"uu", 2), NULL), NULL);
|
||||||
|
g_check_parse_chunks_match(p, be, "f",1, "oo",2, "(u0x66 <6f.6f>)");
|
||||||
|
g_check_parse_chunks_match(p, be, "f",1, "uu",2, "(u0x66 <75.75>)");
|
||||||
|
g_check_parse_chunks_failed(p, be, "g",1, "oo",2);
|
||||||
|
g_check_parse_chunks_failed(p, be, "f",1, "ou",2);
|
||||||
|
g_check_parse_chunks_failed(p, be, "f",1, "uo",2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_iterative_lookahead(gconstpointer backend) {
|
||||||
|
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
|
||||||
|
HParser *p;
|
||||||
|
|
||||||
|
// needs 2 lookahead
|
||||||
|
p = h_sequence(h_ch('f'), h_choice(h_token((uint8_t*)"oo", 2),
|
||||||
|
h_token((uint8_t*)"ou", 2), NULL), NULL);
|
||||||
|
if(h_compile(p, be, (void *)2) != 0) {
|
||||||
|
g_test_message("Compile failed");
|
||||||
|
g_test_fail();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// partial chunk consumed
|
||||||
|
g_check_parse_chunks_match_(p, "fo",2, "o",1, "(u0x66 <6f.6f>)");
|
||||||
|
g_check_parse_chunks_match_(p, "fo",2, "u",1, "(u0x66 <6f.75>)");
|
||||||
|
g_check_parse_chunks_failed_(p, "go",2, "o",1);
|
||||||
|
g_check_parse_chunks_failed_(p, "fa",2, "u",1);
|
||||||
|
g_check_parse_chunks_failed_(p, "fo",2, "b",1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_iterative_result_length(gconstpointer backend) {
|
||||||
|
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
|
||||||
|
HParser *p = h_token((uint8_t*)"foobar", 6);
|
||||||
|
|
||||||
|
if(h_compile(p, be, NULL) != 0) {
|
||||||
|
g_test_message("Compile failed");
|
||||||
|
g_test_fail();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
HSuspendedParser *s = h_parse_start(p);
|
||||||
|
if(!s) {
|
||||||
|
g_test_message("Chunked parsing not available");
|
||||||
|
g_test_fail();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
h_parse_chunk(s, (uint8_t*)"foo", 3);
|
||||||
|
h_parse_chunk(s, (uint8_t*)"ba", 2);
|
||||||
|
h_parse_chunk(s, (uint8_t*)"rbaz", 4);
|
||||||
|
HParseResult *r = h_parse_finish(s);
|
||||||
|
if(!r) {
|
||||||
|
g_test_message("Parse failed");
|
||||||
|
g_test_fail();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_check_cmp_int64(r->bit_length, ==, 48);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_result_length(gconstpointer backend) {
|
||||||
|
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
|
||||||
|
HParser *p = h_token((uint8_t*)"foo", 3);
|
||||||
|
|
||||||
|
if(h_compile(p, be, NULL) != 0) {
|
||||||
|
g_test_message("Compile failed");
|
||||||
|
g_test_fail();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult *r = h_parse(p, (uint8_t*)"foobar", 6);
|
||||||
|
if(!r) {
|
||||||
|
g_test_message("Parse failed");
|
||||||
|
g_test_fail();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_check_cmp_int64(r->bit_length, ==, 24);
|
||||||
|
}
|
||||||
|
|
||||||
static void test_ambiguous(gconstpointer backend) {
|
static void test_ambiguous(gconstpointer backend) {
|
||||||
HParser *d_ = h_ch('d');
|
HParser *d_ = h_ch('d');
|
||||||
HParser *p_ = h_ch('+');
|
HParser *p_ = h_ch('+');
|
||||||
|
|
@ -653,6 +762,7 @@ void register_parser_tests(void) {
|
||||||
g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get);
|
g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get);
|
||||||
g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation);
|
g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation);
|
||||||
g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind);
|
g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind);
|
||||||
|
g_test_add_data_func("/core/parser/packrat/result_length", GINT_TO_POINTER(PB_PACKRAT), test_result_length);
|
||||||
|
|
||||||
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
|
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
|
||||||
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
|
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
|
||||||
|
|
@ -691,6 +801,10 @@ void register_parser_tests(void) {
|
||||||
g_test_add_data_func("/core/parser/llk/ignore", GINT_TO_POINTER(PB_LLk), test_ignore);
|
g_test_add_data_func("/core/parser/llk/ignore", GINT_TO_POINTER(PB_LLk), test_ignore);
|
||||||
//g_test_add_data_func("/core/parser/llk/leftrec", GINT_TO_POINTER(PB_LLk), test_leftrec);
|
//g_test_add_data_func("/core/parser/llk/leftrec", GINT_TO_POINTER(PB_LLk), test_leftrec);
|
||||||
g_test_add_data_func("/core/parser/llk/rightrec", GINT_TO_POINTER(PB_LLk), test_rightrec);
|
g_test_add_data_func("/core/parser/llk/rightrec", GINT_TO_POINTER(PB_LLk), test_rightrec);
|
||||||
|
g_test_add_data_func("/core/parser/llk/result_length", GINT_TO_POINTER(PB_LLk), test_result_length);
|
||||||
|
g_test_add_data_func("/core/parser/llk/iterative", GINT_TO_POINTER(PB_LLk), test_iterative);
|
||||||
|
g_test_add_data_func("/core/parser/llk/iterative/lookahead", GINT_TO_POINTER(PB_LLk), test_iterative_lookahead);
|
||||||
|
g_test_add_data_func("/core/parser/llk/iterative/result_length", GINT_TO_POINTER(PB_LLk), test_iterative_result_length);
|
||||||
|
|
||||||
g_test_add_data_func("/core/parser/regex/token", GINT_TO_POINTER(PB_REGULAR), test_token);
|
g_test_add_data_func("/core/parser/regex/token", GINT_TO_POINTER(PB_REGULAR), test_token);
|
||||||
g_test_add_data_func("/core/parser/regex/ch", GINT_TO_POINTER(PB_REGULAR), test_ch);
|
g_test_add_data_func("/core/parser/regex/ch", GINT_TO_POINTER(PB_REGULAR), test_ch);
|
||||||
|
|
@ -703,8 +817,8 @@ void register_parser_tests(void) {
|
||||||
g_test_add_data_func("/core/parser/regex/uint32", GINT_TO_POINTER(PB_REGULAR), test_uint32);
|
g_test_add_data_func("/core/parser/regex/uint32", GINT_TO_POINTER(PB_REGULAR), test_uint32);
|
||||||
g_test_add_data_func("/core/parser/regex/uint16", GINT_TO_POINTER(PB_REGULAR), test_uint16);
|
g_test_add_data_func("/core/parser/regex/uint16", GINT_TO_POINTER(PB_REGULAR), test_uint16);
|
||||||
g_test_add_data_func("/core/parser/regex/uint8", GINT_TO_POINTER(PB_REGULAR), test_uint8);
|
g_test_add_data_func("/core/parser/regex/uint8", GINT_TO_POINTER(PB_REGULAR), test_uint8);
|
||||||
g_test_add_data_func("/core/parser/regex/int_range", GINT_TO_POINTER(PB_REGULAR), test_int_range);
|
|
||||||
#if 0
|
#if 0
|
||||||
|
g_test_add_data_func("/core/parser/regex/int_range", GINT_TO_POINTER(PB_REGULAR), test_int_range);
|
||||||
g_test_add_data_func("/core/parser/regex/float64", GINT_TO_POINTER(PB_REGULAR), test_float64);
|
g_test_add_data_func("/core/parser/regex/float64", GINT_TO_POINTER(PB_REGULAR), test_float64);
|
||||||
g_test_add_data_func("/core/parser/regex/float32", GINT_TO_POINTER(PB_REGULAR), test_float32);
|
g_test_add_data_func("/core/parser/regex/float32", GINT_TO_POINTER(PB_REGULAR), test_float32);
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -728,6 +842,7 @@ void register_parser_tests(void) {
|
||||||
g_test_add_data_func("/core/parser/regex/epsilon_p", GINT_TO_POINTER(PB_REGULAR), test_epsilon_p);
|
g_test_add_data_func("/core/parser/regex/epsilon_p", GINT_TO_POINTER(PB_REGULAR), test_epsilon_p);
|
||||||
g_test_add_data_func("/core/parser/regex/attr_bool", GINT_TO_POINTER(PB_REGULAR), test_attr_bool);
|
g_test_add_data_func("/core/parser/regex/attr_bool", GINT_TO_POINTER(PB_REGULAR), test_attr_bool);
|
||||||
g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore);
|
g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore);
|
||||||
|
g_test_add_data_func("/core/parser/regex/result_length", GINT_TO_POINTER(PB_REGULAR), test_result_length);
|
||||||
|
|
||||||
g_test_add_data_func("/core/parser/lalr/token", GINT_TO_POINTER(PB_LALR), test_token);
|
g_test_add_data_func("/core/parser/lalr/token", GINT_TO_POINTER(PB_LALR), test_token);
|
||||||
g_test_add_data_func("/core/parser/lalr/ch", GINT_TO_POINTER(PB_LALR), test_ch);
|
g_test_add_data_func("/core/parser/lalr/ch", GINT_TO_POINTER(PB_LALR), test_ch);
|
||||||
|
|
@ -767,6 +882,10 @@ void register_parser_tests(void) {
|
||||||
g_test_add_data_func("/core/parser/lalr/leftrec", GINT_TO_POINTER(PB_LALR), test_leftrec);
|
g_test_add_data_func("/core/parser/lalr/leftrec", GINT_TO_POINTER(PB_LALR), test_leftrec);
|
||||||
g_test_add_data_func("/core/parser/lalr/leftrec-ne", GINT_TO_POINTER(PB_LALR), test_leftrec_ne);
|
g_test_add_data_func("/core/parser/lalr/leftrec-ne", GINT_TO_POINTER(PB_LALR), test_leftrec_ne);
|
||||||
g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec);
|
g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/result_length", GINT_TO_POINTER(PB_LALR), test_result_length);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/iterative", GINT_TO_POINTER(PB_LALR), test_iterative);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/iterative/lookahead", GINT_TO_POINTER(PB_LALR), test_iterative_lookahead);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/iterative/result_length", GINT_TO_POINTER(PB_LALR), test_iterative_result_length);
|
||||||
|
|
||||||
g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token);
|
g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token);
|
||||||
g_test_add_data_func("/core/parser/glr/ch", GINT_TO_POINTER(PB_GLR), test_ch);
|
g_test_add_data_func("/core/parser/glr/ch", GINT_TO_POINTER(PB_GLR), test_ch);
|
||||||
|
|
@ -807,4 +926,5 @@ void register_parser_tests(void) {
|
||||||
g_test_add_data_func("/core/parser/glr/leftrec-ne", GINT_TO_POINTER(PB_GLR), test_leftrec_ne);
|
g_test_add_data_func("/core/parser/glr/leftrec-ne", GINT_TO_POINTER(PB_GLR), test_leftrec_ne);
|
||||||
g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec);
|
g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec);
|
||||||
g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous);
|
g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous);
|
||||||
|
g_test_add_data_func("/core/parser/glr/result_length", GINT_TO_POINTER(PB_GLR), test_result_length);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,8 @@
|
||||||
#define g_check_parse_failed(parser, backend, input, inp_len) do { \
|
#define g_check_parse_failed(parser, backend, input, inp_len) do { \
|
||||||
int skip = h_compile((HParser *)(parser), (HParserBackend)backend, NULL); \
|
int skip = h_compile((HParser *)(parser), (HParserBackend)backend, NULL); \
|
||||||
if(skip != 0) { \
|
if(skip != 0) { \
|
||||||
g_test_message("Backend not applicable, skipping test"); \
|
g_test_message("Compile failed"); \
|
||||||
|
g_test_fail(); \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
const HParseResult *result = h_parse(parser, (const uint8_t*)input, inp_len); \
|
const HParseResult *result = h_parse(parser, (const uint8_t*)input, inp_len); \
|
||||||
|
|
@ -103,7 +104,8 @@
|
||||||
#define g_check_parse_ok(parser, backend, input, inp_len) do { \
|
#define g_check_parse_ok(parser, backend, input, inp_len) do { \
|
||||||
int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \
|
int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \
|
||||||
if(skip) { \
|
if(skip) { \
|
||||||
g_test_message("Backend not applicable, skipping test"); \
|
g_test_message("Compile failed"); \
|
||||||
|
g_test_fail(); \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
HParseResult *res = h_parse(parser, (const uint8_t*)input, inp_len); \
|
HParseResult *res = h_parse(parser, (const uint8_t*)input, inp_len); \
|
||||||
|
|
@ -124,7 +126,8 @@
|
||||||
#define g_check_parse_match(parser, backend, input, inp_len, result) do { \
|
#define g_check_parse_match(parser, backend, input, inp_len, result) do { \
|
||||||
int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \
|
int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \
|
||||||
if(skip) { \
|
if(skip) { \
|
||||||
g_test_message("Backend not applicable, skipping test"); \
|
g_test_message("Compile failed"); \
|
||||||
|
g_test_fail(); \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
HParseResult *res = h_parse(parser, (const uint8_t*)input, inp_len); \
|
HParseResult *res = h_parse(parser, (const uint8_t*)input, inp_len); \
|
||||||
|
|
@ -145,6 +148,69 @@
|
||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
#define g_check_parse_chunks_failed(parser, backend, chunk1, c1_len, chunk2, c2_len) do { \
|
||||||
|
int skip = h_compile((HParser *)(parser), (HParserBackend)backend, NULL); \
|
||||||
|
if(skip) { \
|
||||||
|
g_test_message("Compile failed"); \
|
||||||
|
g_test_fail(); \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
g_check_parse_chunks_failed_(parser, chunk1, c1_len, chunk2, c2_len); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define g_check_parse_chunks_failed_(parser, chunk1, c1_len, chunk2, c2_len) do { \
|
||||||
|
HSuspendedParser *s = h_parse_start(parser); \
|
||||||
|
if(!s) { \
|
||||||
|
g_test_message("Chunk-wise parsing not available"); \
|
||||||
|
g_test_fail(); \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \
|
||||||
|
h_parse_chunk(s, (const uint8_t*)chunk2, c2_len); \
|
||||||
|
const HParseResult *res = h_parse_finish(s); \
|
||||||
|
if (NULL != res) { \
|
||||||
|
g_test_message("Check failed: shouldn't have succeeded, but did"); \
|
||||||
|
g_test_fail(); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define g_check_parse_chunks_match(parser, backend, chunk1, c1_len, chunk2, c2_len, result) do { \
|
||||||
|
int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \
|
||||||
|
if(skip) { \
|
||||||
|
g_test_message("Compile failed"); \
|
||||||
|
g_test_fail(); \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
g_check_parse_chunks_match_(parser, chunk1, c1_len, chunk2, c2_len, result); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define g_check_parse_chunks_match_(parser, chunk1, c1_len, chunk2, c2_len, result) do { \
|
||||||
|
HSuspendedParser *s = h_parse_start(parser); \
|
||||||
|
if(!s) { \
|
||||||
|
g_test_message("Chunk-wise parsing not available"); \
|
||||||
|
g_test_fail(); \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \
|
||||||
|
h_parse_chunk(s, (const uint8_t*)chunk2, c2_len); \
|
||||||
|
HParseResult *res = h_parse_finish(s); \
|
||||||
|
if (!res) { \
|
||||||
|
g_test_message("Parse failed on line %d", __LINE__); \
|
||||||
|
g_test_fail(); \
|
||||||
|
} else { \
|
||||||
|
char* cres = h_write_result_unamb(res->ast); \
|
||||||
|
g_check_string(cres, ==, result); \
|
||||||
|
(&system_allocator)->free(&system_allocator, cres); \
|
||||||
|
HArenaStats stats; \
|
||||||
|
h_allocator_stats(res->arena, &stats); \
|
||||||
|
g_test_message("Parse used %zd bytes, wasted %zd bytes. " \
|
||||||
|
"Inefficiency: %5f%%", \
|
||||||
|
stats.used, stats.wasted, \
|
||||||
|
stats.wasted * 100. / (stats.used+stats.wasted)); \
|
||||||
|
h_delete_arena(res->arena); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
#define g_check_hashtable_present(table, key) do { \
|
#define g_check_hashtable_present(table, key) do { \
|
||||||
if(!h_hashtable_present(table, key)) { \
|
if(!h_hashtable_present(table, key)) { \
|
||||||
g_test_message("Check failed: key should have been in table, but wasn't"); \
|
g_test_message("Check failed: key should have been in table, but wasn't"); \
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue