move chunk position into HInputStream and simplify internal chunk API

This commit is contained in:
Sven M. Hallberg 2015-09-16 23:25:36 +02:00
parent e385e0e5de
commit 42d35fb883
3 changed files with 57 additions and 33 deletions

View file

@ -266,7 +266,6 @@ typedef struct {
HArena *tarena; // tmp, deleted after parse HArena *tarena; // tmp, deleted after parse
HSlist *stack; HSlist *stack;
HCountedArray *seq; // accumulates current parse result HCountedArray *seq; // accumulates current parse result
size_t index; // input position in bytes
uint8_t *buf; // for lookahead across chunk boundaries uint8_t *buf; // for lookahead across chunk boundaries
// allocated to size 2*kmax // allocated to size 2*kmax
@ -298,7 +297,6 @@ static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser)
s->tarena = h_new_arena(mm__, 0); s->tarena = h_new_arena(mm__, 0);
s->stack = h_slist_new(s->tarena); s->stack = h_slist_new(s->tarena);
s->seq = h_carray_new(s->arena); s->seq = h_carray_new(s->arena);
s->index = 0;
s->buf = h_arena_malloc(s->tarena, 2 * table->kmax); s->buf = h_arena_malloc(s->tarena, 2 * table->kmax);
s->win.input = s->buf; s->win.input = s->buf;
@ -350,13 +348,14 @@ static void save_win(size_t kmax, HLLkState *s, HInputStream *stream)
// (0 kmax ) // (0 kmax )
// ... \_old_/\_new_/ ... // ... \_old_/\_new_/ ...
// //
s->index += len; // position of the window shifts up s->win.pos += len; // position of the window shifts up
len = s->win.length - s->win.index; len = s->win.length - s->win.index;
assert(len <= kmax); assert(len <= kmax);
memmove(s->buf + kmax - len, s->buf + s->win.index, len); memmove(s->buf + kmax - len, s->buf + s->win.index, len);
} else { } else {
// window not active? save stream to window. // window not active? save stream to window.
s->index -= kmax; // window starts kmax bytes below next chunk // buffer starts kmax bytes below chunk boundary
s->win.pos = stream->pos - kmax;
memcpy(s->buf + kmax - len, stream->input + stream->index, len); memcpy(s->buf + kmax - len, stream->input + stream->index, len);
} }
@ -439,7 +438,7 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
// the top of stack is such that there will be a result... // the top of stack is such that there will be a result...
tok = h_arena_malloc(arena, sizeof(HParsedToken)); tok = h_arena_malloc(arena, sizeof(HParsedToken));
tok->index = s->index + stream->index; tok->index = stream->pos + stream->index;
tok->bit_offset = stream->bit_offset; tok->bit_offset = stream->bit_offset;
if(x == MARK) { if(x == MARK) {
// hit stack frame boundary... // hit stack frame boundary...
@ -461,7 +460,6 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
// when old chunk consumed from window, switch to new chunk // when old chunk consumed from window, switch to new chunk
if(s->win.length > 0 && s->win.index >= kmax) { if(s->win.length > 0 && s->win.index >= kmax) {
s->win.length = 0; // disable the window s->win.length = 0; // disable the window
s->index += kmax; // new chunk starts kmax bytes above the window
stream = chunk; stream = chunk;
} }
@ -519,13 +517,11 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
// since we started with a single nonterminal on the stack, seq should // since we started with a single nonterminal on the stack, seq should
// contain exactly the parse result. // contain exactly the parse result.
assert(seq->used == 1); assert(seq->used == 1);
s->index += stream->index;
return seq; return seq;
no_parse: no_parse:
h_delete_arena(arena); h_delete_arena(arena);
s->arena = NULL; s->arena = NULL;
s->index += stream->index;
return NULL; return NULL;
need_input: need_input:
@ -534,7 +530,6 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
if(tok) if(tok)
h_arena_free(arena, tok); // no result, yet h_arena_free(arena, tok); // no result, yet
h_slist_push(stack, x); // try this symbol again next time h_slist_push(stack, x); // try this symbol again next time
s->index += stream->index;
return seq; return seq;
} }
@ -545,7 +540,6 @@ static HParseResult *llk_parse_finish_(HAllocator *mm__, HLLkState *s)
if(s->seq) { if(s->seq) {
assert(s->seq->used == 1); assert(s->seq->used == 1);
res = make_result(s->arena, s->seq->elements[0]); res = make_result(s->arena, s->seq->elements[0]);
res->bit_length = s->index*8;
} }
h_delete_arena(s->tarena); h_delete_arena(s->tarena);
@ -560,7 +554,11 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
assert(stream->last_chunk); assert(stream->last_chunk);
s->seq = llk_parse_chunk_(s, parser, stream); s->seq = llk_parse_chunk_(s, parser, stream);
return llk_parse_finish_(mm__, s); HParseResult *res = llk_parse_finish_(mm__, s);
if(res)
res->bit_length = stream->index * 8 + stream->bit_offset;
return res;
} }
void h_llk_parse_start(HSuspendedParser *s) void h_llk_parse_start(HSuspendedParser *s)
@ -568,29 +566,17 @@ void h_llk_parse_start(HSuspendedParser *s)
s->backend_state = llk_parse_start_(s->mm__, s->parser); s->backend_state = llk_parse_start_(s->mm__, s->parser);
} }
void h_llk_parse_chunk(HSuspendedParser *s, HInputStream *input) bool h_llk_parse_chunk(HSuspendedParser *s, HInputStream *input)
{ {
HLLkState *state = s->backend_state; HLLkState *state = s->backend_state;
state->seq = llk_parse_chunk_(state, s->parser, input); state->seq = llk_parse_chunk_(state, s->parser, input);
return (state->seq == NULL || h_slist_empty(state->stack));
} }
HParseResult *h_llk_parse_finish(HSuspendedParser *s) HParseResult *h_llk_parse_finish(HSuspendedParser *s)
{ {
HLLkState *state = s->backend_state;
HInputStream empty = {
.index = 0,
.bit_offset = 0,
.overrun = 0,
.endianness = s->endianness,
.length = 0,
.input = NULL,
.last_chunk = true
};
// signal end of input (no-op parse already done)
state->seq = llk_parse_chunk_(state, s->parser, &empty);
return llk_parse_finish_(s->mm__, s->backend_state); return llk_parse_finish_(s->mm__, s->backend_state);
} }

View file

@ -51,6 +51,7 @@ HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) { HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
// Set up a parse state... // Set up a parse state...
HInputStream input_stream = { HInputStream input_stream = {
.pos = 0,
.index = 0, .index = 0,
.bit_offset = 0, .bit_offset = 0,
.overrun = 0, .overrun = 0,
@ -114,6 +115,9 @@ HSuspendedParser* h_parse_start__m(HAllocator* mm__, const HParser* parser) {
s->mm__ = mm__; s->mm__ = mm__;
s->parser = parser; s->parser = parser;
s->backend_state = NULL; s->backend_state = NULL;
s->done = false;
s->pos = 0;
s->bit_offset = 0;
s->endianness = DEFAULT_ENDIANNESS; s->endianness = DEFAULT_ENDIANNESS;
// backend-specific initialization // backend-specific initialization
@ -126,8 +130,13 @@ HSuspendedParser* h_parse_start__m(HAllocator* mm__, const HParser* parser) {
bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) { bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) {
assert(backends[s->parser->backend]->parse_chunk != NULL); assert(backends[s->parser->backend]->parse_chunk != NULL);
// no-op if parser is already done
if(s->done)
return true;
// input // input
HInputStream input_stream = { HInputStream input_stream = {
.pos = s->pos,
.index = 0, .index = 0,
.bit_offset = 0, .bit_offset = 0,
.overrun = 0, .overrun = 0,
@ -138,18 +147,42 @@ bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) {
}; };
// process chunk // process chunk
backends[s->parser->backend]->parse_chunk(s, &input_stream); s->done = backends[s->parser->backend]->parse_chunk(s, &input_stream);
s->endianness = input_stream.endianness; s->endianness = input_stream.endianness;
s->pos += input_stream.index;
s->bit_offset = input_stream.bit_offset;
return !input_stream.overrun; // parser wants no more input? done. return s->done;
} }
HParseResult* h_parse_finish(HSuspendedParser* s) { HParseResult* h_parse_finish(HSuspendedParser* s) {
assert(backends[s->parser->backend]->parse_chunk != NULL);
assert(backends[s->parser->backend]->parse_finish != NULL); assert(backends[s->parser->backend]->parse_finish != NULL);
HAllocator *mm__ = s->mm__; HAllocator *mm__ = s->mm__;
// signal end of input if parser is not already done
if(!s->done) {
HInputStream empty = {
.pos = s->pos,
.index = 0,
.bit_offset = 0,
.overrun = 0,
.endianness = s->endianness,
.length = 0,
.input = NULL,
.last_chunk = true
};
s->done = backends[s->parser->backend]->parse_chunk(s, &empty);
assert(s->done);
}
// extract result
HParseResult *r = backends[s->parser->backend]->parse_finish(s); HParseResult *r = backends[s->parser->backend]->parse_finish(s);
if(r)
r->bit_length = s->pos * 8 + s->bit_offset;
// NB: backend should have freed backend_state // NB: backend should have freed backend_state
h_free(s); h_free(s);

View file

@ -72,6 +72,7 @@ typedef struct HCFStack_ HCFStack;
typedef struct HInputStream_ { typedef struct HInputStream_ {
// This should be considered to be a really big value type. // This should be considered to be a really big value type.
const uint8_t *input; const uint8_t *input;
size_t pos; // position of this chunk in a multi-chunk stream
size_t index; size_t index;
size_t length; size_t length;
char bit_offset; char bit_offset;
@ -215,8 +216,11 @@ struct HSuspendedParser_ {
HAllocator *mm__; HAllocator *mm__;
const HParser *parser; const HParser *parser;
void *backend_state; void *backend_state;
bool done;
// the only part of HInputStream that carries across chunks // input stream state
size_t pos;
uint8_t bit_offset;
uint8_t endianness; uint8_t endianness;
}; };
@ -227,12 +231,13 @@ typedef struct HParserBackendVTable_ {
void (*parse_start)(HSuspendedParser *s); void (*parse_start)(HSuspendedParser *s);
// parse_start should allocate s->backend_state. // parse_start should allocate s->backend_state.
void (*parse_chunk)(HSuspendedParser *s, HInputStream *input); bool (*parse_chunk)(HSuspendedParser *s, HInputStream *input);
// when parse_chunk leaves input.overrun unset, parse is done. else: // if parser is done, return true. otherwise:
// parse_chunk MUST consume all input, integrating it into s->backend_state. // parse_chunk MUST consume all input, integrating it into s->backend_state.
// calling parse_chunk again after parse is done should have no effect. // parse_chunk will not be called again after it reports done.
HParseResult *(*parse_finish)(HSuspendedParser *s); HParseResult *(*parse_finish)(HSuspendedParser *s);
// parse_finish must free s->backend_state. // parse_finish must free s->backend_state.
// parse_finish will not be called before parse_chunk reports done.
} HParserBackendVTable; } HParserBackendVTable;