move chunk position into HInputStream and simplify internal chunk API

This commit is contained in:
Sven M. Hallberg 2015-09-16 23:25:36 +02:00
parent e385e0e5de
commit 42d35fb883
3 changed files with 57 additions and 33 deletions

View file

@ -266,7 +266,6 @@ typedef struct {
HArena *tarena; // tmp, deleted after parse
HSlist *stack;
HCountedArray *seq; // accumulates current parse result
size_t index; // input position in bytes
uint8_t *buf; // for lookahead across chunk boundaries
// allocated to size 2*kmax
@ -298,7 +297,6 @@ static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser)
s->tarena = h_new_arena(mm__, 0);
s->stack = h_slist_new(s->tarena);
s->seq = h_carray_new(s->arena);
s->index = 0;
s->buf = h_arena_malloc(s->tarena, 2 * table->kmax);
s->win.input = s->buf;
@ -350,13 +348,14 @@ static void save_win(size_t kmax, HLLkState *s, HInputStream *stream)
// (0 kmax )
// ... \_old_/\_new_/ ...
//
s->index += len; // position of the window shifts up
s->win.pos += len; // position of the window shifts up
len = s->win.length - s->win.index;
assert(len <= kmax);
memmove(s->buf + kmax - len, s->buf + s->win.index, len);
} else {
// window not active? save stream to window.
s->index -= kmax; // window starts kmax bytes below next chunk
// buffer starts kmax bytes below chunk boundary
s->win.pos = stream->pos - kmax;
memcpy(s->buf + kmax - len, stream->input + stream->index, len);
}
@ -439,7 +438,7 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
// the top of stack is such that there will be a result...
tok = h_arena_malloc(arena, sizeof(HParsedToken));
tok->index = s->index + stream->index;
tok->index = stream->pos + stream->index;
tok->bit_offset = stream->bit_offset;
if(x == MARK) {
// hit stack frame boundary...
@ -461,7 +460,6 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
// when old chunk consumed from window, switch to new chunk
if(s->win.length > 0 && s->win.index >= kmax) {
s->win.length = 0; // disable the window
s->index += kmax; // new chunk starts kmax bytes above the window
stream = chunk;
}
@ -519,13 +517,11 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
// since we started with a single nonterminal on the stack, seq should
// contain exactly the parse result.
assert(seq->used == 1);
s->index += stream->index;
return seq;
no_parse:
h_delete_arena(arena);
s->arena = NULL;
s->index += stream->index;
return NULL;
need_input:
@ -534,7 +530,6 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
if(tok)
h_arena_free(arena, tok); // no result, yet
h_slist_push(stack, x); // try this symbol again next time
s->index += stream->index;
return seq;
}
@ -545,7 +540,6 @@ static HParseResult *llk_parse_finish_(HAllocator *mm__, HLLkState *s)
if(s->seq) {
assert(s->seq->used == 1);
res = make_result(s->arena, s->seq->elements[0]);
res->bit_length = s->index*8;
}
h_delete_arena(s->tarena);
@ -560,7 +554,11 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
assert(stream->last_chunk);
s->seq = llk_parse_chunk_(s, parser, stream);
return llk_parse_finish_(mm__, s);
HParseResult *res = llk_parse_finish_(mm__, s);
if(res)
res->bit_length = stream->index * 8 + stream->bit_offset;
return res;
}
void h_llk_parse_start(HSuspendedParser *s)
@ -568,29 +566,17 @@ void h_llk_parse_start(HSuspendedParser *s)
s->backend_state = llk_parse_start_(s->mm__, s->parser);
}
void h_llk_parse_chunk(HSuspendedParser *s, HInputStream *input)
bool h_llk_parse_chunk(HSuspendedParser *s, HInputStream *input)
{
HLLkState *state = s->backend_state;
state->seq = llk_parse_chunk_(state, s->parser, input);
return (state->seq == NULL || h_slist_empty(state->stack));
}
HParseResult *h_llk_parse_finish(HSuspendedParser *s)
{
HLLkState *state = s->backend_state;
HInputStream empty = {
.index = 0,
.bit_offset = 0,
.overrun = 0,
.endianness = s->endianness,
.length = 0,
.input = NULL,
.last_chunk = true
};
// signal end of input (no-op parse already done)
state->seq = llk_parse_chunk_(state, s->parser, &empty);
return llk_parse_finish_(s->mm__, s->backend_state);
}

View file

@ -51,6 +51,7 @@ HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
// Set up a parse state...
HInputStream input_stream = {
.pos = 0,
.index = 0,
.bit_offset = 0,
.overrun = 0,
@ -114,6 +115,9 @@ HSuspendedParser* h_parse_start__m(HAllocator* mm__, const HParser* parser) {
s->mm__ = mm__;
s->parser = parser;
s->backend_state = NULL;
s->done = false;
s->pos = 0;
s->bit_offset = 0;
s->endianness = DEFAULT_ENDIANNESS;
// backend-specific initialization
@ -126,8 +130,13 @@ HSuspendedParser* h_parse_start__m(HAllocator* mm__, const HParser* parser) {
bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) {
assert(backends[s->parser->backend]->parse_chunk != NULL);
// no-op if parser is already done
if(s->done)
return true;
// input
HInputStream input_stream = {
.pos = s->pos,
.index = 0,
.bit_offset = 0,
.overrun = 0,
@ -138,18 +147,42 @@ bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) {
};
// process chunk
backends[s->parser->backend]->parse_chunk(s, &input_stream);
s->done = backends[s->parser->backend]->parse_chunk(s, &input_stream);
s->endianness = input_stream.endianness;
s->pos += input_stream.index;
s->bit_offset = input_stream.bit_offset;
return !input_stream.overrun; // parser wants no more input? done.
return s->done;
}
HParseResult* h_parse_finish(HSuspendedParser* s) {
assert(backends[s->parser->backend]->parse_chunk != NULL);
assert(backends[s->parser->backend]->parse_finish != NULL);
HAllocator *mm__ = s->mm__;
// signal end of input if parser is not already done
if(!s->done) {
HInputStream empty = {
.pos = s->pos,
.index = 0,
.bit_offset = 0,
.overrun = 0,
.endianness = s->endianness,
.length = 0,
.input = NULL,
.last_chunk = true
};
s->done = backends[s->parser->backend]->parse_chunk(s, &empty);
assert(s->done);
}
// extract result
HParseResult *r = backends[s->parser->backend]->parse_finish(s);
if(r)
r->bit_length = s->pos * 8 + s->bit_offset;
// NB: backend should have freed backend_state
h_free(s);

View file

@ -72,6 +72,7 @@ typedef struct HCFStack_ HCFStack;
typedef struct HInputStream_ {
// This should be considered to be a really big value type.
const uint8_t *input;
size_t pos; // position of this chunk in a multi-chunk stream
size_t index;
size_t length;
char bit_offset;
@ -215,8 +216,11 @@ struct HSuspendedParser_ {
HAllocator *mm__;
const HParser *parser;
void *backend_state;
bool done;
// the only part of HInputStream that carries across chunks
// input stream state
size_t pos;
uint8_t bit_offset;
uint8_t endianness;
};
@ -227,12 +231,13 @@ typedef struct HParserBackendVTable_ {
void (*parse_start)(HSuspendedParser *s);
// parse_start should allocate s->backend_state.
void (*parse_chunk)(HSuspendedParser *s, HInputStream *input);
// when parse_chunk leaves input.overrun unset, parse is done. else:
bool (*parse_chunk)(HSuspendedParser *s, HInputStream *input);
// if parser is done, return true. otherwise:
// parse_chunk MUST consume all input, integrating it into s->backend_state.
// calling parse_chunk again after parse is done should have no effect.
// parse_chunk will not be called again after it reports done.
HParseResult *(*parse_finish)(HSuspendedParser *s);
// parse_finish must free s->backend_state.
// parse_finish will not be called before parse_chunk reports done.
} HParserBackendVTable;