move chunk position into HInputStream and simplify internal chunk API
This commit is contained in:
parent
e385e0e5de
commit
42d35fb883
3 changed files with 57 additions and 33 deletions
|
|
@ -266,7 +266,6 @@ typedef struct {
|
||||||
HArena *tarena; // tmp, deleted after parse
|
HArena *tarena; // tmp, deleted after parse
|
||||||
HSlist *stack;
|
HSlist *stack;
|
||||||
HCountedArray *seq; // accumulates current parse result
|
HCountedArray *seq; // accumulates current parse result
|
||||||
size_t index; // input position in bytes
|
|
||||||
|
|
||||||
uint8_t *buf; // for lookahead across chunk boundaries
|
uint8_t *buf; // for lookahead across chunk boundaries
|
||||||
// allocated to size 2*kmax
|
// allocated to size 2*kmax
|
||||||
|
|
@ -298,7 +297,6 @@ static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser)
|
||||||
s->tarena = h_new_arena(mm__, 0);
|
s->tarena = h_new_arena(mm__, 0);
|
||||||
s->stack = h_slist_new(s->tarena);
|
s->stack = h_slist_new(s->tarena);
|
||||||
s->seq = h_carray_new(s->arena);
|
s->seq = h_carray_new(s->arena);
|
||||||
s->index = 0;
|
|
||||||
s->buf = h_arena_malloc(s->tarena, 2 * table->kmax);
|
s->buf = h_arena_malloc(s->tarena, 2 * table->kmax);
|
||||||
|
|
||||||
s->win.input = s->buf;
|
s->win.input = s->buf;
|
||||||
|
|
@ -350,13 +348,14 @@ static void save_win(size_t kmax, HLLkState *s, HInputStream *stream)
|
||||||
// (0 kmax )
|
// (0 kmax )
|
||||||
// ... \_old_/\_new_/ ...
|
// ... \_old_/\_new_/ ...
|
||||||
//
|
//
|
||||||
s->index += len; // position of the window shifts up
|
s->win.pos += len; // position of the window shifts up
|
||||||
len = s->win.length - s->win.index;
|
len = s->win.length - s->win.index;
|
||||||
assert(len <= kmax);
|
assert(len <= kmax);
|
||||||
memmove(s->buf + kmax - len, s->buf + s->win.index, len);
|
memmove(s->buf + kmax - len, s->buf + s->win.index, len);
|
||||||
} else {
|
} else {
|
||||||
// window not active? save stream to window.
|
// window not active? save stream to window.
|
||||||
s->index -= kmax; // window starts kmax bytes below next chunk
|
// buffer starts kmax bytes below chunk boundary
|
||||||
|
s->win.pos = stream->pos - kmax;
|
||||||
memcpy(s->buf + kmax - len, stream->input + stream->index, len);
|
memcpy(s->buf + kmax - len, stream->input + stream->index, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -439,7 +438,7 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
|
||||||
|
|
||||||
// the top of stack is such that there will be a result...
|
// the top of stack is such that there will be a result...
|
||||||
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||||
tok->index = s->index + stream->index;
|
tok->index = stream->pos + stream->index;
|
||||||
tok->bit_offset = stream->bit_offset;
|
tok->bit_offset = stream->bit_offset;
|
||||||
if(x == MARK) {
|
if(x == MARK) {
|
||||||
// hit stack frame boundary...
|
// hit stack frame boundary...
|
||||||
|
|
@ -461,7 +460,6 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
|
||||||
// when old chunk consumed from window, switch to new chunk
|
// when old chunk consumed from window, switch to new chunk
|
||||||
if(s->win.length > 0 && s->win.index >= kmax) {
|
if(s->win.length > 0 && s->win.index >= kmax) {
|
||||||
s->win.length = 0; // disable the window
|
s->win.length = 0; // disable the window
|
||||||
s->index += kmax; // new chunk starts kmax bytes above the window
|
|
||||||
stream = chunk;
|
stream = chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -519,13 +517,11 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
|
||||||
// since we started with a single nonterminal on the stack, seq should
|
// since we started with a single nonterminal on the stack, seq should
|
||||||
// contain exactly the parse result.
|
// contain exactly the parse result.
|
||||||
assert(seq->used == 1);
|
assert(seq->used == 1);
|
||||||
s->index += stream->index;
|
|
||||||
return seq;
|
return seq;
|
||||||
|
|
||||||
no_parse:
|
no_parse:
|
||||||
h_delete_arena(arena);
|
h_delete_arena(arena);
|
||||||
s->arena = NULL;
|
s->arena = NULL;
|
||||||
s->index += stream->index;
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
need_input:
|
need_input:
|
||||||
|
|
@ -534,7 +530,6 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
|
||||||
if(tok)
|
if(tok)
|
||||||
h_arena_free(arena, tok); // no result, yet
|
h_arena_free(arena, tok); // no result, yet
|
||||||
h_slist_push(stack, x); // try this symbol again next time
|
h_slist_push(stack, x); // try this symbol again next time
|
||||||
s->index += stream->index;
|
|
||||||
return seq;
|
return seq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -545,7 +540,6 @@ static HParseResult *llk_parse_finish_(HAllocator *mm__, HLLkState *s)
|
||||||
if(s->seq) {
|
if(s->seq) {
|
||||||
assert(s->seq->used == 1);
|
assert(s->seq->used == 1);
|
||||||
res = make_result(s->arena, s->seq->elements[0]);
|
res = make_result(s->arena, s->seq->elements[0]);
|
||||||
res->bit_length = s->index*8;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
h_delete_arena(s->tarena);
|
h_delete_arena(s->tarena);
|
||||||
|
|
@ -560,7 +554,11 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
assert(stream->last_chunk);
|
assert(stream->last_chunk);
|
||||||
s->seq = llk_parse_chunk_(s, parser, stream);
|
s->seq = llk_parse_chunk_(s, parser, stream);
|
||||||
|
|
||||||
return llk_parse_finish_(mm__, s);
|
HParseResult *res = llk_parse_finish_(mm__, s);
|
||||||
|
if(res)
|
||||||
|
res->bit_length = stream->index * 8 + stream->bit_offset;
|
||||||
|
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_llk_parse_start(HSuspendedParser *s)
|
void h_llk_parse_start(HSuspendedParser *s)
|
||||||
|
|
@ -568,29 +566,17 @@ void h_llk_parse_start(HSuspendedParser *s)
|
||||||
s->backend_state = llk_parse_start_(s->mm__, s->parser);
|
s->backend_state = llk_parse_start_(s->mm__, s->parser);
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_llk_parse_chunk(HSuspendedParser *s, HInputStream *input)
|
bool h_llk_parse_chunk(HSuspendedParser *s, HInputStream *input)
|
||||||
{
|
{
|
||||||
HLLkState *state = s->backend_state;
|
HLLkState *state = s->backend_state;
|
||||||
|
|
||||||
state->seq = llk_parse_chunk_(state, s->parser, input);
|
state->seq = llk_parse_chunk_(state, s->parser, input);
|
||||||
|
|
||||||
|
return (state->seq == NULL || h_slist_empty(state->stack));
|
||||||
}
|
}
|
||||||
|
|
||||||
HParseResult *h_llk_parse_finish(HSuspendedParser *s)
|
HParseResult *h_llk_parse_finish(HSuspendedParser *s)
|
||||||
{
|
{
|
||||||
HLLkState *state = s->backend_state;
|
|
||||||
HInputStream empty = {
|
|
||||||
.index = 0,
|
|
||||||
.bit_offset = 0,
|
|
||||||
.overrun = 0,
|
|
||||||
.endianness = s->endianness,
|
|
||||||
.length = 0,
|
|
||||||
.input = NULL,
|
|
||||||
.last_chunk = true
|
|
||||||
};
|
|
||||||
|
|
||||||
// signal end of input (no-op parse already done)
|
|
||||||
state->seq = llk_parse_chunk_(state, s->parser, &empty);
|
|
||||||
|
|
||||||
return llk_parse_finish_(s->mm__, s->backend_state);
|
return llk_parse_finish_(s->mm__, s->backend_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
39
src/hammer.c
39
src/hammer.c
|
|
@ -51,6 +51,7 @@ HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length
|
||||||
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
|
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
|
||||||
// Set up a parse state...
|
// Set up a parse state...
|
||||||
HInputStream input_stream = {
|
HInputStream input_stream = {
|
||||||
|
.pos = 0,
|
||||||
.index = 0,
|
.index = 0,
|
||||||
.bit_offset = 0,
|
.bit_offset = 0,
|
||||||
.overrun = 0,
|
.overrun = 0,
|
||||||
|
|
@ -114,6 +115,9 @@ HSuspendedParser* h_parse_start__m(HAllocator* mm__, const HParser* parser) {
|
||||||
s->mm__ = mm__;
|
s->mm__ = mm__;
|
||||||
s->parser = parser;
|
s->parser = parser;
|
||||||
s->backend_state = NULL;
|
s->backend_state = NULL;
|
||||||
|
s->done = false;
|
||||||
|
s->pos = 0;
|
||||||
|
s->bit_offset = 0;
|
||||||
s->endianness = DEFAULT_ENDIANNESS;
|
s->endianness = DEFAULT_ENDIANNESS;
|
||||||
|
|
||||||
// backend-specific initialization
|
// backend-specific initialization
|
||||||
|
|
@ -126,8 +130,13 @@ HSuspendedParser* h_parse_start__m(HAllocator* mm__, const HParser* parser) {
|
||||||
bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) {
|
bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) {
|
||||||
assert(backends[s->parser->backend]->parse_chunk != NULL);
|
assert(backends[s->parser->backend]->parse_chunk != NULL);
|
||||||
|
|
||||||
|
// no-op if parser is already done
|
||||||
|
if(s->done)
|
||||||
|
return true;
|
||||||
|
|
||||||
// input
|
// input
|
||||||
HInputStream input_stream = {
|
HInputStream input_stream = {
|
||||||
|
.pos = s->pos,
|
||||||
.index = 0,
|
.index = 0,
|
||||||
.bit_offset = 0,
|
.bit_offset = 0,
|
||||||
.overrun = 0,
|
.overrun = 0,
|
||||||
|
|
@ -138,19 +147,43 @@ bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) {
|
||||||
};
|
};
|
||||||
|
|
||||||
// process chunk
|
// process chunk
|
||||||
backends[s->parser->backend]->parse_chunk(s, &input_stream);
|
s->done = backends[s->parser->backend]->parse_chunk(s, &input_stream);
|
||||||
s->endianness = input_stream.endianness;
|
s->endianness = input_stream.endianness;
|
||||||
|
s->pos += input_stream.index;
|
||||||
|
s->bit_offset = input_stream.bit_offset;
|
||||||
|
|
||||||
return !input_stream.overrun; // parser wants no more input? done.
|
return s->done;
|
||||||
}
|
}
|
||||||
|
|
||||||
HParseResult* h_parse_finish(HSuspendedParser* s) {
|
HParseResult* h_parse_finish(HSuspendedParser* s) {
|
||||||
|
assert(backends[s->parser->backend]->parse_chunk != NULL);
|
||||||
assert(backends[s->parser->backend]->parse_finish != NULL);
|
assert(backends[s->parser->backend]->parse_finish != NULL);
|
||||||
|
|
||||||
HAllocator *mm__ = s->mm__;
|
HAllocator *mm__ = s->mm__;
|
||||||
|
|
||||||
|
// signal end of input if parser is not already done
|
||||||
|
if(!s->done) {
|
||||||
|
HInputStream empty = {
|
||||||
|
.pos = s->pos,
|
||||||
|
.index = 0,
|
||||||
|
.bit_offset = 0,
|
||||||
|
.overrun = 0,
|
||||||
|
.endianness = s->endianness,
|
||||||
|
.length = 0,
|
||||||
|
.input = NULL,
|
||||||
|
.last_chunk = true
|
||||||
|
};
|
||||||
|
|
||||||
|
s->done = backends[s->parser->backend]->parse_chunk(s, &empty);
|
||||||
|
assert(s->done);
|
||||||
|
}
|
||||||
|
|
||||||
|
// extract result
|
||||||
HParseResult *r = backends[s->parser->backend]->parse_finish(s);
|
HParseResult *r = backends[s->parser->backend]->parse_finish(s);
|
||||||
// NB: backend should have freed backend_state
|
if(r)
|
||||||
|
r->bit_length = s->pos * 8 + s->bit_offset;
|
||||||
|
|
||||||
|
// NB: backend should have freed backend_state
|
||||||
h_free(s);
|
h_free(s);
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,7 @@ typedef struct HCFStack_ HCFStack;
|
||||||
typedef struct HInputStream_ {
|
typedef struct HInputStream_ {
|
||||||
// This should be considered to be a really big value type.
|
// This should be considered to be a really big value type.
|
||||||
const uint8_t *input;
|
const uint8_t *input;
|
||||||
|
size_t pos; // position of this chunk in a multi-chunk stream
|
||||||
size_t index;
|
size_t index;
|
||||||
size_t length;
|
size_t length;
|
||||||
char bit_offset;
|
char bit_offset;
|
||||||
|
|
@ -215,8 +216,11 @@ struct HSuspendedParser_ {
|
||||||
HAllocator *mm__;
|
HAllocator *mm__;
|
||||||
const HParser *parser;
|
const HParser *parser;
|
||||||
void *backend_state;
|
void *backend_state;
|
||||||
|
bool done;
|
||||||
|
|
||||||
// the only part of HInputStream that carries across chunks
|
// input stream state
|
||||||
|
size_t pos;
|
||||||
|
uint8_t bit_offset;
|
||||||
uint8_t endianness;
|
uint8_t endianness;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -227,12 +231,13 @@ typedef struct HParserBackendVTable_ {
|
||||||
|
|
||||||
void (*parse_start)(HSuspendedParser *s);
|
void (*parse_start)(HSuspendedParser *s);
|
||||||
// parse_start should allocate s->backend_state.
|
// parse_start should allocate s->backend_state.
|
||||||
void (*parse_chunk)(HSuspendedParser *s, HInputStream *input);
|
bool (*parse_chunk)(HSuspendedParser *s, HInputStream *input);
|
||||||
// when parse_chunk leaves input.overrun unset, parse is done. else:
|
// if parser is done, return true. otherwise:
|
||||||
// parse_chunk MUST consume all input, integrating it into s->backend_state.
|
// parse_chunk MUST consume all input, integrating it into s->backend_state.
|
||||||
// calling parse_chunk again after parse is done should have no effect.
|
// parse_chunk will not be called again after it reports done.
|
||||||
HParseResult *(*parse_finish)(HSuspendedParser *s);
|
HParseResult *(*parse_finish)(HSuspendedParser *s);
|
||||||
// parse_finish must free s->backend_state.
|
// parse_finish must free s->backend_state.
|
||||||
|
// parse_finish will not be called before parse_chunk reports done.
|
||||||
} HParserBackendVTable;
|
} HParserBackendVTable;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue