2013-05-24 15:07:47 +02:00
|
|
|
#include <assert.h>
|
2012-05-26 16:00:43 +02:00
|
|
|
#include "parser_internal.h"
|
|
|
|
|
|
|
|
|
|
// TODO: split this up.
|
|
|
|
|
typedef struct {
|
|
|
|
|
const HParser *p, *sep;
|
|
|
|
|
size_t count;
|
|
|
|
|
bool min_p;
|
|
|
|
|
} HRepeat;
|
|
|
|
|
|
|
|
|
|
static HParseResult *parse_many(void* env, HParseState *state) {
|
|
|
|
|
HRepeat *env_ = (HRepeat*) env;
|
2015-10-29 13:12:16 +01:00
|
|
|
size_t size = env_->count;
|
|
|
|
|
if(size <= 0) size = 4;
|
|
|
|
|
if(size > 1024) size = 1024; // let's try parsing some elements first...
|
|
|
|
|
HCountedArray *seq = h_carray_new_sized(state->arena, size);
|
2012-05-26 16:00:43 +02:00
|
|
|
size_t count = 0;
|
|
|
|
|
HInputStream bak;
|
|
|
|
|
while (env_->min_p || env_->count > count) {
|
|
|
|
|
bak = state->input_stream;
|
2013-05-24 15:07:47 +02:00
|
|
|
if (count > 0 && env_->sep != NULL) {
|
2012-05-26 16:00:43 +02:00
|
|
|
HParseResult *sep = h_do_parse(env_->sep, state);
|
|
|
|
|
if (!sep)
|
|
|
|
|
goto err0;
|
|
|
|
|
}
|
|
|
|
|
HParseResult *elem = h_do_parse(env_->p, state);
|
|
|
|
|
if (!elem)
|
|
|
|
|
goto err0;
|
|
|
|
|
if (elem->ast)
|
|
|
|
|
h_carray_append(seq, (void*)elem->ast);
|
|
|
|
|
count++;
|
|
|
|
|
}
|
|
|
|
|
if (count < env_->count)
|
|
|
|
|
goto err;
|
|
|
|
|
succ:
|
|
|
|
|
; // necessary for the label to be here...
|
|
|
|
|
HParsedToken *res = a_new(HParsedToken, 1);
|
|
|
|
|
res->token_type = TT_SEQUENCE;
|
|
|
|
|
res->seq = seq;
|
2013-05-11 19:04:59 +02:00
|
|
|
return make_result(state->arena, res);
|
2012-05-26 16:00:43 +02:00
|
|
|
err0:
|
|
|
|
|
if (count >= env_->count) {
|
|
|
|
|
state->input_stream = bak;
|
|
|
|
|
goto succ;
|
|
|
|
|
}
|
|
|
|
|
err:
|
|
|
|
|
state->input_stream = bak;
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2012-12-18 18:10:40 -05:00
|
|
|
static bool many_isValidRegular(void *env) {
|
|
|
|
|
HRepeat *repeat = (HRepeat*)env;
|
|
|
|
|
return (repeat->p->vtable->isValidRegular(repeat->p->env) &&
|
2013-05-24 15:07:47 +02:00
|
|
|
(repeat->sep == NULL ||
|
|
|
|
|
repeat->sep->vtable->isValidRegular(repeat->sep->env)));
|
2012-12-18 18:10:40 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool many_isValidCF(void *env) {
|
|
|
|
|
HRepeat *repeat = (HRepeat*)env;
|
|
|
|
|
return (repeat->p->vtable->isValidCF(repeat->p->env) &&
|
2013-05-24 15:07:47 +02:00
|
|
|
(repeat->sep == NULL ||
|
|
|
|
|
repeat->sep->vtable->isValidCF(repeat->sep->env)));
|
2012-12-18 18:10:40 -05:00
|
|
|
}
|
|
|
|
|
|
2015-09-16 20:06:24 +02:00
|
|
|
// turn (_ x (_ y (_ z ()))) into (x y z) where '_' are optional
|
|
|
|
|
static HParsedToken *reshape_many(const HParseResult *p, void *user)
|
|
|
|
|
{
|
|
|
|
|
HCountedArray *seq = h_carray_new(p->arena);
|
|
|
|
|
|
|
|
|
|
const HParsedToken *tok = p->ast;
|
|
|
|
|
while(tok) {
|
|
|
|
|
assert(tok->token_type == TT_SEQUENCE);
|
|
|
|
|
if(tok->seq->used > 0) {
|
|
|
|
|
size_t n = tok->seq->used;
|
|
|
|
|
assert(n <= 3);
|
|
|
|
|
h_carray_append(seq, tok->seq->elements[n-2]);
|
|
|
|
|
tok = tok->seq->elements[n-1];
|
|
|
|
|
} else {
|
|
|
|
|
tok = NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HParsedToken *res = a_new_(p->arena, HParsedToken, 1);
|
|
|
|
|
res->token_type = TT_SEQUENCE;
|
|
|
|
|
res->seq = seq;
|
|
|
|
|
res->index = p->ast->index;
|
|
|
|
|
res->bit_offset = p->ast->bit_offset;
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-25 03:35:42 +02:00
|
|
|
static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) {
|
|
|
|
|
// TODO: refactor this.
|
2013-02-20 20:43:16 -05:00
|
|
|
HRepeat *repeat = (HRepeat*)env;
|
2013-05-25 03:35:42 +02:00
|
|
|
if (!repeat->min_p) {
|
|
|
|
|
assert(!"Unreachable");
|
|
|
|
|
HCFS_BEGIN_CHOICE() {
|
|
|
|
|
HCFS_BEGIN_SEQ() {
|
|
|
|
|
for (size_t i = 0; i < repeat->count; i++) {
|
|
|
|
|
if (i != 0 && repeat->sep != NULL)
|
|
|
|
|
HCFS_DESUGAR(repeat->sep); // Should be ignored.
|
|
|
|
|
HCFS_DESUGAR(repeat->p);
|
|
|
|
|
}
|
|
|
|
|
} HCFS_END_SEQ();
|
|
|
|
|
} HCFS_END_CHOICE();
|
|
|
|
|
return;
|
|
|
|
|
}
|
2013-04-27 03:40:21 +02:00
|
|
|
if(repeat->count > 1) {
|
|
|
|
|
assert_message(0, "'h_repeat_n' is not context-free, can't be desugared");
|
2013-05-25 03:35:42 +02:00
|
|
|
return;
|
2013-04-27 03:40:21 +02:00
|
|
|
}
|
|
|
|
|
|
2013-02-20 20:43:16 -05:00
|
|
|
/* many(A) =>
|
|
|
|
|
Ma -> A Mar
|
|
|
|
|
-> \epsilon (but not if many1/sepBy1 is used)
|
|
|
|
|
Mar -> Sep A Mar
|
|
|
|
|
-> \epsilon
|
|
|
|
|
*/
|
2013-04-27 03:40:21 +02:00
|
|
|
|
2013-05-25 03:35:42 +02:00
|
|
|
HCFS_BEGIN_CHOICE() {
|
|
|
|
|
HCFS_BEGIN_SEQ() {
|
|
|
|
|
HCFS_DESUGAR(repeat->p);
|
|
|
|
|
HCFS_BEGIN_CHOICE() { // Mar
|
|
|
|
|
HCFS_BEGIN_SEQ() {
|
|
|
|
|
if (repeat->sep != NULL) {
|
2015-09-16 20:06:24 +02:00
|
|
|
HCFS_DESUGAR(repeat->sep);
|
2013-05-25 03:35:42 +02:00
|
|
|
}
|
|
|
|
|
//stk__->last_completed->reshape = h_act_ignore; // BUG: This modifies a memoized entry.
|
|
|
|
|
HCFS_DESUGAR(repeat->p);
|
|
|
|
|
HCFS_APPEND(HCFS_THIS_CHOICE);
|
|
|
|
|
} HCFS_END_SEQ();
|
|
|
|
|
HCFS_BEGIN_SEQ() {
|
|
|
|
|
} HCFS_END_SEQ();
|
|
|
|
|
} HCFS_END_CHOICE(); // Mar
|
|
|
|
|
}
|
|
|
|
|
if (repeat->count == 0) {
|
|
|
|
|
HCFS_BEGIN_SEQ() {
|
|
|
|
|
//HCFS_DESUGAR(h_ignore__m(mm__, h_epsilon_p()));
|
|
|
|
|
} HCFS_END_SEQ();
|
|
|
|
|
}
|
2015-09-16 20:06:24 +02:00
|
|
|
HCFS_THIS_CHOICE->reshape = reshape_many;
|
2013-05-25 03:35:42 +02:00
|
|
|
} HCFS_END_CHOICE();
|
2013-02-03 02:18:19 -05:00
|
|
|
}
|
|
|
|
|
|
2013-04-22 18:06:17 -07:00
|
|
|
static bool many_ctrvm(HRVMProg *prog, void *env) {
|
|
|
|
|
HRepeat *repeat = (HRepeat*)env;
|
|
|
|
|
uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL);
|
2013-05-24 15:07:47 +02:00
|
|
|
// TODO: implement min & max properly. Right now, it's always
|
|
|
|
|
// max==inf, min={0,1}
|
|
|
|
|
|
|
|
|
|
// Structure:
|
|
|
|
|
// Min == 0:
|
|
|
|
|
// FORK end // if Min == 0
|
|
|
|
|
// GOTO mid
|
|
|
|
|
// nxt: <SEP>
|
|
|
|
|
// mid: <ELEM>
|
|
|
|
|
// FORK nxt
|
|
|
|
|
// end:
|
|
|
|
|
|
|
|
|
|
if (repeat->min_p) {
|
2013-04-22 18:06:17 -07:00
|
|
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
2013-05-24 15:07:47 +02:00
|
|
|
assert(repeat->count < 2); // TODO: The other cases should be supported later.
|
2013-06-24 21:23:28 +02:00
|
|
|
uint16_t end_fork = 0xFFFF; // Shut up GCC
|
2013-05-24 15:07:47 +02:00
|
|
|
if (repeat->count == 0)
|
|
|
|
|
end_fork = h_rvm_insert_insn(prog, RVM_FORK, 0xFFFF);
|
|
|
|
|
uint16_t goto_mid = h_rvm_insert_insn(prog, RVM_GOTO, 0xFFFF);
|
|
|
|
|
uint16_t nxt = h_rvm_get_ip(prog);
|
|
|
|
|
if (repeat->sep != NULL) {
|
|
|
|
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
|
|
|
|
if (!h_compile_regex(prog, repeat->sep))
|
|
|
|
|
return false;
|
|
|
|
|
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
|
|
|
|
|
}
|
|
|
|
|
h_rvm_patch_arg(prog, goto_mid, h_rvm_get_ip(prog));
|
|
|
|
|
if (!h_compile_regex(prog, repeat->p))
|
2013-04-26 20:36:54 -07:00
|
|
|
return false;
|
2013-05-24 15:07:47 +02:00
|
|
|
h_rvm_insert_insn(prog, RVM_FORK, nxt);
|
2013-06-24 21:23:28 +02:00
|
|
|
if (repeat->count == 0)
|
|
|
|
|
h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog));
|
2013-05-24 15:07:47 +02:00
|
|
|
|
|
|
|
|
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
|
|
|
|
|
return true;
|
|
|
|
|
} else {
|
|
|
|
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
|
|
|
|
for (size_t i = 0; i < repeat->count; i++) {
|
|
|
|
|
if (repeat->sep != NULL && i != 0) {
|
|
|
|
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
|
|
|
|
if (!h_compile_regex(prog, repeat->sep))
|
|
|
|
|
return false;
|
|
|
|
|
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
|
|
|
|
|
}
|
|
|
|
|
if (!h_compile_regex(prog, repeat->p))
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
|
|
|
|
|
return true;
|
2013-04-26 20:36:54 -07:00
|
|
|
}
|
2013-04-22 18:06:17 -07:00
|
|
|
}
|
|
|
|
|
|
2012-05-26 16:00:43 +02:00
|
|
|
static const HParserVtable many_vt = {
|
|
|
|
|
.parse = parse_many,
|
2012-12-18 18:10:40 -05:00
|
|
|
.isValidRegular = many_isValidRegular,
|
|
|
|
|
.isValidCF = many_isValidCF,
|
2013-02-03 02:18:19 -05:00
|
|
|
.desugar = desugar_many,
|
2013-04-22 18:06:17 -07:00
|
|
|
.compile_to_rvm = many_ctrvm,
|
2015-10-03 17:54:11 +02:00
|
|
|
.higher = true,
|
2012-05-26 16:00:43 +02:00
|
|
|
};
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_many(const HParser* p) {
|
2012-10-10 15:58:03 +02:00
|
|
|
return h_many__m(&system_allocator, p);
|
|
|
|
|
}
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_many__m(HAllocator* mm__, const HParser* p) {
|
2012-10-10 15:58:03 +02:00
|
|
|
HRepeat *env = h_new(HRepeat, 1);
|
2012-05-26 16:00:43 +02:00
|
|
|
env->p = p;
|
2013-05-24 15:07:47 +02:00
|
|
|
env->sep = NULL;
|
2012-05-26 16:00:43 +02:00
|
|
|
env->count = 0;
|
|
|
|
|
env->min_p = true;
|
2013-04-27 04:17:47 +02:00
|
|
|
return h_new_parser(mm__, &many_vt, env);
|
2012-05-26 16:00:43 +02:00
|
|
|
}
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_many1(const HParser* p) {
|
2012-10-10 15:58:03 +02:00
|
|
|
return h_many1__m(&system_allocator, p);
|
|
|
|
|
}
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
|
2012-10-10 15:58:03 +02:00
|
|
|
HRepeat *env = h_new(HRepeat, 1);
|
2012-05-26 16:00:43 +02:00
|
|
|
env->p = p;
|
2013-05-24 15:07:47 +02:00
|
|
|
env->sep = NULL;
|
2012-05-26 16:00:43 +02:00
|
|
|
env->count = 1;
|
|
|
|
|
env->min_p = true;
|
2013-04-27 04:17:47 +02:00
|
|
|
return h_new_parser(mm__, &many_vt, env);
|
2012-05-26 16:00:43 +02:00
|
|
|
}
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_repeat_n(const HParser* p, const size_t n) {
|
2012-10-10 15:58:03 +02:00
|
|
|
return h_repeat_n__m(&system_allocator, p, n);
|
|
|
|
|
}
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
|
2012-10-10 15:58:03 +02:00
|
|
|
HRepeat *env = h_new(HRepeat, 1);
|
2012-05-26 16:00:43 +02:00
|
|
|
env->p = p;
|
2013-05-24 15:07:47 +02:00
|
|
|
env->sep = NULL;
|
2012-05-26 16:00:43 +02:00
|
|
|
env->count = n;
|
|
|
|
|
env->min_p = false;
|
2013-04-27 04:17:47 +02:00
|
|
|
return h_new_parser(mm__, &many_vt, env);
|
2012-05-26 16:00:43 +02:00
|
|
|
}
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_sepBy(const HParser* p, const HParser* sep) {
|
2012-10-10 15:58:03 +02:00
|
|
|
return h_sepBy__m(&system_allocator, p, sep);
|
|
|
|
|
}
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
2012-10-10 15:58:03 +02:00
|
|
|
HRepeat *env = h_new(HRepeat, 1);
|
2012-05-26 16:00:43 +02:00
|
|
|
env->p = p;
|
|
|
|
|
env->sep = sep;
|
|
|
|
|
env->count = 0;
|
|
|
|
|
env->min_p = true;
|
2013-04-27 04:17:47 +02:00
|
|
|
return h_new_parser(mm__, &many_vt, env);
|
2012-05-26 16:00:43 +02:00
|
|
|
}
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_sepBy1(const HParser* p, const HParser* sep) {
|
2012-10-10 15:58:03 +02:00
|
|
|
return h_sepBy1__m(&system_allocator, p, sep);
|
|
|
|
|
}
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
2012-10-10 15:58:03 +02:00
|
|
|
HRepeat *env = h_new(HRepeat, 1);
|
2012-05-26 16:00:43 +02:00
|
|
|
env->p = p;
|
|
|
|
|
env->sep = sep;
|
|
|
|
|
env->count = 1;
|
|
|
|
|
env->min_p = true;
|
2013-04-27 04:17:47 +02:00
|
|
|
return h_new_parser(mm__, &many_vt, env);
|
2012-05-26 16:00:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
const HParser *length;
|
|
|
|
|
const HParser *value;
|
|
|
|
|
} HLenVal;
|
|
|
|
|
|
|
|
|
|
static HParseResult* parse_length_value(void *env, HParseState *state) {
|
|
|
|
|
HLenVal *lv = (HLenVal*)env;
|
|
|
|
|
HParseResult *len = h_do_parse(lv->length, state);
|
|
|
|
|
if (!len)
|
|
|
|
|
return NULL;
|
|
|
|
|
if (len->ast->token_type != TT_UINT)
|
2015-08-09 18:38:27 +02:00
|
|
|
h_platform_errx(1, "Length parser must return an unsigned integer");
|
2012-05-26 16:00:43 +02:00
|
|
|
// TODO: allocate this using public functions
|
|
|
|
|
HRepeat repeat = {
|
|
|
|
|
.p = lv->value,
|
2013-05-24 15:07:47 +02:00
|
|
|
.sep = NULL,
|
2012-05-26 16:00:43 +02:00
|
|
|
.count = len->ast->uint,
|
|
|
|
|
.min_p = false
|
|
|
|
|
};
|
|
|
|
|
return parse_many(&repeat, state);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const HParserVtable length_value_vt = {
|
|
|
|
|
.parse = parse_length_value,
|
2012-12-18 18:10:40 -05:00
|
|
|
.isValidRegular = h_false,
|
|
|
|
|
.isValidCF = h_false,
|
2012-05-26 16:00:43 +02:00
|
|
|
};
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_length_value(const HParser* length, const HParser* value) {
|
2012-10-10 15:58:03 +02:00
|
|
|
return h_length_value__m(&system_allocator, length, value);
|
|
|
|
|
}
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
|
2012-10-10 15:58:03 +02:00
|
|
|
HLenVal *env = h_new(HLenVal, 1);
|
2012-05-26 16:00:43 +02:00
|
|
|
env->length = length;
|
|
|
|
|
env->value = value;
|
2013-04-27 04:17:47 +02:00
|
|
|
return h_new_parser(mm__, &length_value_vt, env);
|
2012-05-26 16:00:43 +02:00
|
|
|
}
|