split h_llk_parse into start/chunk/finish internally
This commit is contained in:
parent
2845a9391e
commit
f1d6d0bc5e
2 changed files with 92 additions and 31 deletions
|
|
@ -259,15 +259,12 @@ void h_llk_free(HParser *parser)
|
||||||
|
|
||||||
/* LL(k) driver */
|
/* LL(k) driver */
|
||||||
|
|
||||||
HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
|
typedef struct {
|
||||||
{
|
HArena *arena; // will hold the results
|
||||||
const HLLkTable *table = parser->backend_data;
|
HArena *tarena; // tmp, deleted after parse
|
||||||
assert(table != NULL);
|
HSlist *stack;
|
||||||
|
HCountedArray *seq; // accumulates current parse result
|
||||||
HArena *arena = h_new_arena(mm__, 0); // will hold the results
|
} HLLkState;
|
||||||
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
|
|
||||||
HSlist *stack = h_slist_new(tarena);
|
|
||||||
HCountedArray *seq = h_carray_new(arena); // accumulates current parse result
|
|
||||||
|
|
||||||
// in order to construct the parse tree, we delimit the symbol stack into
|
// in order to construct the parse tree, we delimit the symbol stack into
|
||||||
// frames corresponding to production right-hand sides. since only left-most
|
// frames corresponding to production right-hand sides. since only left-most
|
||||||
|
|
@ -279,24 +276,57 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
// execute on their corresponding result.
|
// execute on their corresponding result.
|
||||||
// also on the stack below the mark, we store the previously accumulated
|
// also on the stack below the mark, we store the previously accumulated
|
||||||
// value for the surrounding production.
|
// value for the surrounding production.
|
||||||
void *mark = h_arena_malloc(tarena, 1);
|
static int dummy;
|
||||||
|
static void *MARK = &dummy; // stack frame delimiter
|
||||||
|
|
||||||
|
static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser)
|
||||||
|
{
|
||||||
|
const HLLkTable *table = parser->backend_data;
|
||||||
|
assert(table != NULL);
|
||||||
|
|
||||||
|
HLLkState *s = h_new(HLLkState, 1);
|
||||||
|
s->arena = h_new_arena(mm__, 0);
|
||||||
|
s->tarena = h_new_arena(mm__, 0);
|
||||||
|
s->stack = h_slist_new(s->tarena);
|
||||||
|
s->seq = h_carray_new(s->arena);
|
||||||
|
|
||||||
// initialize with the start symbol on the stack.
|
// initialize with the start symbol on the stack.
|
||||||
h_slist_push(stack, table->start);
|
h_slist_push(s->stack, table->start);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns partial result or NULL
|
||||||
|
static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
|
||||||
|
HInputStream* stream, bool last_chunk)
|
||||||
|
{
|
||||||
|
HParsedToken *tok = NULL; // will hold result token
|
||||||
|
HCFChoice *x = NULL; // current symbol (from top of stack)
|
||||||
|
|
||||||
|
const HLLkTable *table = parser->backend_data;
|
||||||
|
assert(table != NULL);
|
||||||
|
|
||||||
|
HArena *arena = s->arena;
|
||||||
|
HArena *tarena = s->tarena;
|
||||||
|
HSlist *stack = s->stack;
|
||||||
|
HCountedArray *seq = s->seq;
|
||||||
|
|
||||||
|
if(!seq)
|
||||||
|
return NULL; // parse already failed
|
||||||
|
|
||||||
// when we empty the stack, the parse is complete.
|
// when we empty the stack, the parse is complete.
|
||||||
while(!h_slist_empty(stack)) {
|
while(!h_slist_empty(stack)) {
|
||||||
// pop top of stack for inspection
|
// pop top of stack for inspection
|
||||||
HCFChoice *x = h_slist_pop(stack);
|
x = h_slist_pop(stack);
|
||||||
assert(x != NULL);
|
assert(x != NULL);
|
||||||
|
|
||||||
if(x != mark && x->type == HCF_CHOICE) {
|
if(x != MARK && x->type == HCF_CHOICE) {
|
||||||
// x is a nonterminal; apply the appropriate production and continue
|
// x is a nonterminal; apply the appropriate production and continue
|
||||||
|
|
||||||
// push stack frame
|
// push stack frame
|
||||||
h_slist_push(stack, seq); // save current partial value
|
h_slist_push(stack, seq); // save current partial value
|
||||||
h_slist_push(stack, x); // save the nonterminal
|
h_slist_push(stack, x); // save the nonterminal
|
||||||
h_slist_push(stack, mark); // frame delimiter
|
h_slist_push(stack, MARK); // frame delimiter
|
||||||
|
|
||||||
// open a fresh result sequence
|
// open a fresh result sequence
|
||||||
seq = h_carray_new(arena);
|
seq = h_carray_new(arena);
|
||||||
|
|
@ -319,11 +349,10 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
}
|
}
|
||||||
|
|
||||||
// the top of stack is such that there will be a result...
|
// the top of stack is such that there will be a result...
|
||||||
HParsedToken *tok; // will hold result token
|
|
||||||
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||||
tok->index = stream->index;
|
tok->index = stream->index;
|
||||||
tok->bit_offset = stream->bit_offset;
|
tok->bit_offset = stream->bit_offset;
|
||||||
if(x == mark) {
|
if(x == MARK) {
|
||||||
// hit stack frame boundary...
|
// hit stack frame boundary...
|
||||||
// wrap the accumulated parse result, this sequence is finished
|
// wrap the accumulated parse result, this sequence is finished
|
||||||
tok->token_type = TT_SEQUENCE;
|
tok->token_type = TT_SEQUENCE;
|
||||||
|
|
@ -344,13 +373,15 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
case HCF_END:
|
case HCF_END:
|
||||||
if(!stream->overrun)
|
if(!stream->overrun)
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
|
if(!last_chunk)
|
||||||
|
goto need_input;
|
||||||
h_arena_free(arena, tok);
|
h_arena_free(arena, tok);
|
||||||
tok = NULL;
|
tok = NULL;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case HCF_CHAR:
|
case HCF_CHAR:
|
||||||
if(stream->overrun)
|
if(stream->overrun)
|
||||||
goto no_parse;
|
goto need_input;
|
||||||
if(input != x->chr)
|
if(input != x->chr)
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
tok->token_type = TT_UINT;
|
tok->token_type = TT_UINT;
|
||||||
|
|
@ -359,7 +390,7 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
|
|
||||||
case HCF_CHARSET:
|
case HCF_CHARSET:
|
||||||
if(stream->overrun)
|
if(stream->overrun)
|
||||||
goto no_parse;
|
goto need_input;
|
||||||
if(!charset_isset(x->charset, input))
|
if(!charset_isset(x->charset, input))
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
tok->token_type = TT_UINT;
|
tok->token_type = TT_UINT;
|
||||||
|
|
@ -388,16 +419,46 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
h_carray_append(seq, tok);
|
h_carray_append(seq, tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// success
|
||||||
// since we started with a single nonterminal on the stack, seq should
|
// since we started with a single nonterminal on the stack, seq should
|
||||||
// contain exactly the parse result.
|
// contain exactly the parse result.
|
||||||
assert(seq->used == 1);
|
assert(seq->used == 1);
|
||||||
h_delete_arena(tarena);
|
return seq;
|
||||||
return make_result(arena, seq->elements[0]);
|
|
||||||
|
|
||||||
no_parse:
|
no_parse:
|
||||||
h_delete_arena(tarena);
|
|
||||||
h_delete_arena(arena);
|
h_delete_arena(arena);
|
||||||
|
s->arena = NULL;
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
need_input:
|
||||||
|
if(last_chunk)
|
||||||
|
goto no_parse;
|
||||||
|
h_arena_free(arena, tok); // no result, yet
|
||||||
|
h_slist_push(stack, x); // try this symbol again next time
|
||||||
|
return seq;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HParseResult *llk_parse_finish_(HAllocator *mm__, HLLkState *s)
|
||||||
|
{
|
||||||
|
HParseResult *res = NULL;
|
||||||
|
|
||||||
|
if(s->seq) {
|
||||||
|
assert(s->seq->used == 1);
|
||||||
|
res = make_result(s->arena, s->seq->elements[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
h_delete_arena(s->tarena);
|
||||||
|
h_free(s);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
|
||||||
|
{
|
||||||
|
HLLkState *s = llk_parse_start_(mm__, parser);
|
||||||
|
|
||||||
|
s->seq = llk_parse_chunk_(s, parser, stream, true /* last chunk */);
|
||||||
|
|
||||||
|
return llk_parse_finish_(mm__, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -225,13 +225,13 @@ typedef struct HParserBackendVTable_ {
|
||||||
void (*free)(HParser* parser);
|
void (*free)(HParser* parser);
|
||||||
|
|
||||||
void (*parse_start)(HSuspendedParser *s);
|
void (*parse_start)(HSuspendedParser *s);
|
||||||
// parse_start should allocate backend_state.
|
// parse_start should allocate s->backend_state.
|
||||||
void (*parse_chunk)(HSuspendedParser *s, HInputStream *input);
|
void (*parse_chunk)(HSuspendedParser *s, HInputStream *input);
|
||||||
// when parse_chunk leaves input.overrun unset, parse is done. else:
|
// when parse_chunk leaves input.overrun unset, parse is done. else:
|
||||||
// parse_chunk MUST consume all input, integrating it into backend_state.
|
// parse_chunk MUST consume all input, integrating it into s->backend_state.
|
||||||
// calling parse_chunk again after parse is done should have no effect.
|
// calling parse_chunk again after parse is done should have no effect.
|
||||||
HParseResult *(*parse_finish)(HSuspendedParser *s);
|
HParseResult *(*parse_finish)(HSuspendedParser *s);
|
||||||
// parse_finish must free backend_state.
|
// parse_finish must free s->backend_state.
|
||||||
} HParserBackendVTable;
|
} HParserBackendVTable;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue