Conflicts: src/Makefile src/backends/packrat.c src/compile.c src/hammer.h src/internal.h src/parsers/action.c src/parsers/and.c src/parsers/attr_bool.c src/parsers/bits.c src/parsers/butnot.c src/parsers/ch.c src/parsers/charset.c src/parsers/choice.c src/parsers/difference.c src/parsers/end.c src/parsers/epsilon.c src/parsers/ignore.c src/parsers/ignoreseq.c src/parsers/indirect.c src/parsers/int_range.c src/parsers/many.c src/parsers/not.c src/parsers/nothing.c src/parsers/optional.c src/parsers/sequence.c src/parsers/token.c src/parsers/unimplemented.c src/parsers/whitespace.c src/parsers/xor.c
247 lines
7 KiB
C
247 lines
7 KiB
C
#include "parser_internal.h"
|
|
|
|
// TODO: split this up.
|
|
typedef struct {
|
|
const HParser *p, *sep;
|
|
size_t count;
|
|
bool min_p;
|
|
} HRepeat;
|
|
|
|
static HParseResult *parse_many(void* env, HParseState *state) {
|
|
HRepeat *env_ = (HRepeat*) env;
|
|
HCountedArray *seq = h_carray_new_sized(state->arena, (env_->count > 0 ? env_->count : 4));
|
|
size_t count = 0;
|
|
HInputStream bak;
|
|
while (env_->min_p || env_->count > count) {
|
|
bak = state->input_stream;
|
|
if (count > 0) {
|
|
HParseResult *sep = h_do_parse(env_->sep, state);
|
|
if (!sep)
|
|
goto err0;
|
|
}
|
|
HParseResult *elem = h_do_parse(env_->p, state);
|
|
if (!elem)
|
|
goto err0;
|
|
if (elem->ast)
|
|
h_carray_append(seq, (void*)elem->ast);
|
|
count++;
|
|
}
|
|
if (count < env_->count)
|
|
goto err;
|
|
succ:
|
|
; // necessary for the label to be here...
|
|
HParsedToken *res = a_new(HParsedToken, 1);
|
|
res->token_type = TT_SEQUENCE;
|
|
res->seq = seq;
|
|
return make_result(state->arena, res);
|
|
err0:
|
|
if (count >= env_->count) {
|
|
state->input_stream = bak;
|
|
goto succ;
|
|
}
|
|
err:
|
|
state->input_stream = bak;
|
|
return NULL;
|
|
}
|
|
|
|
static bool many_isValidRegular(void *env) {
|
|
HRepeat *repeat = (HRepeat*)env;
|
|
return (repeat->p->vtable->isValidRegular(repeat->p->env) &&
|
|
repeat->sep->vtable->isValidRegular(repeat->sep->env));
|
|
}
|
|
|
|
static bool many_isValidCF(void *env) {
|
|
HRepeat *repeat = (HRepeat*)env;
|
|
return (repeat->p->vtable->isValidCF(repeat->p->env) &&
|
|
repeat->sep->vtable->isValidCF(repeat->sep->env));
|
|
}
|
|
|
|
static HCFChoice* desugar_many(HAllocator *mm__, void *env) {
|
|
HRepeat *repeat = (HRepeat*)env;
|
|
if(repeat->count > 1) {
|
|
assert_message(0, "'h_repeat_n' is not context-free, can't be desugared");
|
|
return NULL;
|
|
}
|
|
|
|
/* many(A) =>
|
|
Ma -> A Mar
|
|
-> \epsilon (but not if many1/sepBy1 is used)
|
|
Mar -> Sep A Mar
|
|
-> \epsilon
|
|
*/
|
|
|
|
HCFChoice *a = h_desugar(mm__, repeat->p);
|
|
HCFChoice *ma = h_new(HCFChoice, 1);
|
|
HCFChoice *mar = h_new(HCFChoice, 1);
|
|
HCFChoice *eps = desugar_epsilon(mm__, NULL);
|
|
|
|
/* create first subrule */
|
|
ma->type = HCF_CHOICE;
|
|
ma->seq = h_new(HCFSequence*, 3); /* enough for 2 productions */
|
|
ma->seq[0] = h_new(HCFSequence, 1);
|
|
ma->seq[0]->items = h_new(HCFChoice*, 3);
|
|
ma->seq[0]->items[0] = a;
|
|
ma->seq[0]->items[1] = mar;
|
|
ma->seq[0]->items[2] = NULL;
|
|
ma->seq[1] = NULL;
|
|
|
|
/* if not many1/sepBy1, attach epsilon */
|
|
if (repeat->count == 0) {
|
|
ma->seq[1] = h_new(HCFSequence, 1);
|
|
ma->seq[1]->items = h_new(HCFChoice*, 2);
|
|
ma->seq[1]->items[0] = eps;
|
|
ma->seq[1]->items[1] = NULL;
|
|
ma->seq[2] = NULL;
|
|
}
|
|
|
|
/* create second subrule */
|
|
mar->type = HCF_CHOICE;
|
|
mar->seq = h_new(HCFSequence*, 3);
|
|
mar->seq[0] = h_new(HCFSequence, 1);
|
|
mar->seq[0]->items = h_new(HCFChoice*, 4);
|
|
mar->seq[0]->items[0] = h_desugar(mm__, repeat->sep);
|
|
mar->seq[0]->items[1] = a;
|
|
mar->seq[0]->items[2] = mar; // woo recursion!
|
|
mar->seq[0]->items[3] = NULL;
|
|
mar->seq[1] = h_new(HCFSequence, 1);
|
|
mar->seq[1]->items = h_new(HCFChoice*, 2);
|
|
mar->seq[1]->items[0] = eps;
|
|
mar->seq[1]->items[1] = NULL;
|
|
mar->seq[2] = NULL;
|
|
|
|
return ma;
|
|
}
|
|
|
|
static bool many_ctrvm(HRVMProg *prog, void *env) {
|
|
HRepeat *repeat = (HRepeat*)env;
|
|
// FIXME: Implement clear_to_mark
|
|
uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL);
|
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
|
// TODO: implement min and max properly. Right now, it's always min==0, max==inf
|
|
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
|
if (!h_compile_regex(prog, repeat->p))
|
|
return false;
|
|
if (repeat->sep != NULL) {
|
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
|
if (!h_compile_regex(prog, repeat->sep))
|
|
return false;
|
|
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
|
|
}
|
|
h_rvm_insert_insn(prog, RVM_GOTO, insn);
|
|
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
|
|
|
|
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
|
|
return true;
|
|
}
|
|
|
|
static const HParserVtable many_vt = {
|
|
.parse = parse_many,
|
|
.isValidRegular = many_isValidRegular,
|
|
.isValidCF = many_isValidCF,
|
|
.desugar = desugar_many,
|
|
.compile_to_rvm = many_ctrvm,
|
|
};
|
|
|
|
HParser* h_many(const HParser* p) {
|
|
return h_many__m(&system_allocator, p);
|
|
}
|
|
HParser* h_many__m(HAllocator* mm__, const HParser* p) {
|
|
HRepeat *env = h_new(HRepeat, 1);
|
|
env->p = p;
|
|
env->sep = h_epsilon_p__m(mm__);
|
|
env->count = 0;
|
|
env->min_p = true;
|
|
return h_new_parser(mm__, &many_vt, env);
|
|
}
|
|
|
|
HParser* h_many1(const HParser* p) {
|
|
return h_many1__m(&system_allocator, p);
|
|
}
|
|
HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
|
|
HRepeat *env = h_new(HRepeat, 1);
|
|
env->p = p;
|
|
env->sep = h_epsilon_p__m(mm__);
|
|
env->count = 1;
|
|
env->min_p = true;
|
|
return h_new_parser(mm__, &many_vt, env);
|
|
}
|
|
|
|
HParser* h_repeat_n(const HParser* p, const size_t n) {
|
|
return h_repeat_n__m(&system_allocator, p, n);
|
|
}
|
|
HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
|
|
HRepeat *env = h_new(HRepeat, 1);
|
|
env->p = p;
|
|
env->sep = h_epsilon_p__m(mm__);
|
|
env->count = n;
|
|
env->min_p = false;
|
|
return h_new_parser(mm__, &many_vt, env);
|
|
}
|
|
|
|
HParser* h_sepBy(const HParser* p, const HParser* sep) {
|
|
return h_sepBy__m(&system_allocator, p, sep);
|
|
}
|
|
HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
|
HRepeat *env = h_new(HRepeat, 1);
|
|
env->p = p;
|
|
env->sep = sep;
|
|
env->count = 0;
|
|
env->min_p = true;
|
|
return h_new_parser(mm__, &many_vt, env);
|
|
}
|
|
|
|
HParser* h_sepBy1(const HParser* p, const HParser* sep) {
|
|
return h_sepBy1__m(&system_allocator, p, sep);
|
|
}
|
|
HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
|
HRepeat *env = h_new(HRepeat, 1);
|
|
env->p = p;
|
|
env->sep = sep;
|
|
env->count = 1;
|
|
env->min_p = true;
|
|
return h_new_parser(mm__, &many_vt, env);
|
|
}
|
|
|
|
typedef struct {
|
|
const HParser *length;
|
|
const HParser *value;
|
|
} HLenVal;
|
|
|
|
static HParseResult* parse_length_value(void *env, HParseState *state) {
|
|
HLenVal *lv = (HLenVal*)env;
|
|
HParseResult *len = h_do_parse(lv->length, state);
|
|
if (!len)
|
|
return NULL;
|
|
if (len->ast->token_type != TT_UINT)
|
|
errx(1, "Length parser must return an unsigned integer");
|
|
// TODO: allocate this using public functions
|
|
HRepeat repeat = {
|
|
.p = lv->value,
|
|
.sep = h_epsilon_p(),
|
|
.count = len->ast->uint,
|
|
.min_p = false
|
|
};
|
|
return parse_many(&repeat, state);
|
|
}
|
|
|
|
static HCFChoice* desugar_length_value(HAllocator *mm__, void *env) {
|
|
assert_message(0, "'h_length_value' is not context-free, can't be desugared");
|
|
return NULL;
|
|
}
|
|
|
|
static const HParserVtable length_value_vt = {
|
|
.parse = parse_length_value,
|
|
.isValidRegular = h_false,
|
|
.isValidCF = h_false,
|
|
.desugar = desugar_length_value,
|
|
};
|
|
|
|
HParser* h_length_value(const HParser* length, const HParser* value) {
|
|
return h_length_value__m(&system_allocator, length, value);
|
|
}
|
|
HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
|
|
HLenVal *env = h_new(HLenVal, 1);
|
|
env->length = length;
|
|
env->value = value;
|
|
return h_new_parser(mm__, &length_value_vt, env);
|
|
}
|