Merge remote-tracking branch 'tq/master' into LL such that it compiles

Conflicts:
	src/Makefile
	src/backends/packrat.c
	src/compile.c
	src/hammer.h
	src/internal.h
	src/parsers/action.c
	src/parsers/and.c
	src/parsers/attr_bool.c
	src/parsers/bits.c
	src/parsers/butnot.c
	src/parsers/ch.c
	src/parsers/charset.c
	src/parsers/choice.c
	src/parsers/difference.c
	src/parsers/end.c
	src/parsers/epsilon.c
	src/parsers/ignore.c
	src/parsers/ignoreseq.c
	src/parsers/indirect.c
	src/parsers/int_range.c
	src/parsers/many.c
	src/parsers/not.c
	src/parsers/nothing.c
	src/parsers/optional.c
	src/parsers/sequence.c
	src/parsers/token.c
	src/parsers/unimplemented.c
	src/parsers/whitespace.c
	src/parsers/xor.c
This commit is contained in:
Sven M. Hallberg 2013-05-11 19:04:59 +02:00
commit c64a4e435e
46 changed files with 1289 additions and 263 deletions

View file

@ -12,7 +12,7 @@ static HParseResult* parse_action(void *env, HParseState *state) {
//HParsedToken *tok = a->action(h_do_parse(a->p, state));
if(tmp) {
const HParsedToken *tok = a->action(tmp);
return make_result(state, (HParsedToken*)tok);
return make_result(state->arena, (HParsedToken*)tok);
} else
return NULL;
} else // either the parser's missing or the action's missing
@ -44,18 +44,24 @@ static bool action_isValidCF(void *env) {
return a->p->vtable->isValidCF(a->p->env);
}
static bool action_ctrvm(HRVMProg *prog, void* env) {
HParseAction *a = (HParseAction*)env;
return a->p->vtable->compile_to_rvm(prog, a->p->env);
}
static const HParserVtable action_vt = {
.parse = parse_action,
.isValidRegular = action_isValidRegular,
.isValidCF = action_isValidCF,
.desugar = desugar_action,
.compile_to_rvm = action_ctrvm,
};
const HParser* h_action(const HParser* p, const HAction a) {
HParser* h_action(const HParser* p, const HAction a) {
return h_action__m(&system_allocator, p, a);
}
const HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) {
HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) {
HParseAction *env = h_new(HParseAction, 1);
env->p = p;
env->action = a;

View file

@ -5,7 +5,7 @@ static HParseResult *parse_and(void* env, HParseState* state) {
HParseResult *res = h_do_parse((HParser*)env, state);
state->input_stream = bak;
if (res)
return make_result(state, NULL);
return make_result(state->arena, NULL);
return NULL;
}
@ -22,13 +22,14 @@ static const HParserVtable and_vt = {
revision. --mlp, 18/12/12 */
.isValidCF = h_false, /* despite TODO above, this remains false. */
.desugar = desugar_and,
.compile_to_rvm = h_not_regular,
};
const HParser* h_and(const HParser* p) {
HParser* h_and(const HParser* p) {
return h_and__m(&system_allocator, p);
}
const HParser* h_and__m(HAllocator* mm__, const HParser* p) {
HParser* h_and__m(HAllocator* mm__, const HParser* p) {
// zero-width postive lookahead
return h_new_parser(mm__, &and_vt, (void *)p);
}

View file

@ -47,18 +47,24 @@ static HCFChoice* desugar_ab(HAllocator *mm__, void *env) {
return ret;
}
static bool ab_ctrvm(HRVMProg *prog, void *env) {
HAttrBool *ab = (HAttrBool*)env;
return h_compile_regex(prog, ab->p);
}
static const HParserVtable attr_bool_vt = {
.parse = parse_attr_bool,
.isValidRegular = ab_isValidRegular,
.isValidCF = ab_isValidCF,
.desugar = desugar_ab,
.compile_to_rvm = ab_ctrvm,
};
const HParser* h_attr_bool(const HParser* p, HPredicate pred) {
HParser* h_attr_bool(const HParser* p, HPredicate pred) {
return h_attr_bool__m(&system_allocator, p, pred);
}
const HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) {
HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) {
HAttrBool *env = h_new(HAttrBool, 1);
env->p = p;
env->pred = pred;

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h"
struct bits_env {
@ -13,7 +14,7 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
result->sint = h_read_bits(&state->input_stream, env_->length, true);
else
result->uint = h_read_bits(&state->input_stream, env_->length, false);
return make_result(state, result);
return make_result(state->arena, result);
}
static HCFChoice* desugar_bits(HAllocator *mm__, void *env) {
@ -41,16 +42,43 @@ static HCFChoice* desugar_bits(HAllocator *mm__, void *env) {
return ret;
}
static bool h_svm_action_bits(HArena *arena, HSVMContext *ctx, void* env) {
// BUG: relies un undefined behaviour: int64_t is a signed uint64_t; not necessarily true on 32-bit
struct bits_env *env_ = env;
HParsedToken *top = ctx->stack[ctx->stack_count-1];
assert(top->token_type == TT_BYTES);
uint64_t res = 0;
for (size_t i = 0; i < top->bytes.len; i++)
res = (res << 8) | top->bytes.token[i]; // TODO: Handle other endiannesses.
top->uint = res; // possibly cast to signed through union
top->token_type = (env_->signedp ? TT_SINT : TT_UINT);
return true;
}
static bool bits_ctrvm(HRVMProg *prog, void* env) {
struct bits_env *env_ = (struct bits_env*)env;
h_rvm_insert_insn(prog, RVM_PUSH, 0);
for (size_t i=0; (i < env_->length)/8; ++i) { // FUTURE: when we can handle non-byte-aligned, the env_->length/8 part will be different
h_rvm_insert_insn(prog, RVM_MATCH, 0xFF00);
h_rvm_insert_insn(prog, RVM_STEP, 0);
}
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_bits, env));
return true;
}
static const HParserVtable bits_vt = {
.parse = parse_bits,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_bits,
.compile_to_rvm = bits_ctrvm,
};
const HParser* h_bits(size_t len, bool sign) {
HParser* h_bits(size_t len, bool sign) {
return h_bits__m(&system_allocator, len, sign);
}
const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
struct bits_env *env = h_new(struct bits_env, 1);
env->length = len;
env->signedp = sign;
@ -58,10 +86,10 @@ const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
}
#define SIZED_BITS(name_pre, len, signedp) \
const HParser* h_##name_pre##len () { \
HParser* h_##name_pre##len () { \
return h_bits__m(&system_allocator, len, signedp); \
} \
const HParser* h_##name_pre##len##__m(HAllocator* mm__) { \
HParser* h_##name_pre##len##__m(HAllocator* mm__) { \
return h_bits__m(mm__, len, signedp); \
}
SIZED_BITS(int, 8, true)

View file

@ -43,14 +43,15 @@ static HCFChoice* desugar_butnot(HAllocator *mm__, void *env) {
static const HParserVtable butnot_vt = {
.parse = parse_butnot,
.isValidRegular = h_false,
.isValidCF = h_false,
.isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF?
.desugar = desugar_butnot,
.compile_to_rvm = h_not_regular,
};
const HParser* h_butnot(const HParser* p1, const HParser* p2) {
HParser* h_butnot(const HParser* p1, const HParser* p2) {
return h_butnot__m(&system_allocator, p1, p2);
}
const HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1;
env->p2 = p2;

View file

@ -6,7 +6,7 @@ static HParseResult* parse_ch(void* env, HParseState *state) {
if (c == r) {
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_UINT; tok->uint = r;
return make_result(state, tok);
return make_result(state->arena, tok);
} else {
return NULL;
}
@ -20,16 +20,25 @@ static HCFChoice* desugar_ch(HAllocator *mm__, void *env) {
return ret;
}
static bool ch_ctrvm(HRVMProg *prog, void* env) {
uint8_t c = (uint8_t)(unsigned long)(env);
// TODO: Does this capture anything?
h_rvm_insert_insn(prog, RVM_MATCH, c & c << 8);
h_rvm_insert_insn(prog, RVM_STEP, 0);
return true;
}
static const HParserVtable ch_vt = {
.parse = parse_ch,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_ch,
.compile_to_rvm = ch_ctrvm,
};
const HParser* h_ch(const uint8_t c) {
HParser* h_ch(const uint8_t c) {
return h_ch__m(&system_allocator, c);
}
const HParser* h_ch__m(HAllocator* mm__, const uint8_t c) {
HParser* h_ch__m(HAllocator* mm__, const uint8_t c) {
return h_new_parser(mm__, &ch_vt, (void *)(uintptr_t)c);
}

View file

@ -9,7 +9,7 @@ static HParseResult* parse_charset(void *env, HParseState *state) {
if (charset_isset(cs, in)) {
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_UINT; tok->uint = in;
return make_result(state, tok);
return make_result(state->arena, tok);
} else
return NULL;
}
@ -22,17 +22,38 @@ static HCFChoice* desugar_charset(HAllocator *mm__, void *env) {
return ret;
}
// FUTURE: this is horribly inefficient
static bool cs_ctrvm(HRVMProg *prog, void *env) {
HCharset cs = (HCharset)env;
uint16_t start = h_rvm_get_ip(prog);
for (size_t i=0; i<256; ++i) {
if (charset_isset(cs, i)) {
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
h_rvm_insert_insn(prog, RVM_MATCH, i & i << 8);
h_rvm_insert_insn(prog, RVM_GOTO, 0);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
}
}
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
for (size_t i=start; i<jump; ++i) {
if (RVM_GOTO == prog->insns[i].op)
h_rvm_patch_arg(prog, i, jump);
}
return true;
}
static const HParserVtable charset_vt = {
.parse = parse_charset,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_charset,
.compile_to_rvm = cs_ctrvm,
};
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
return h_ch_range__m(&system_allocator, lower, upper);
}
const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) {
HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) {
HCharset cs = new_charset(mm__);
for (int i = 0; i < 256; i++)
charset_set(cs, i, (lower <= i) && (i <= upper));
@ -40,7 +61,7 @@ const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_
}
static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) {
static HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) {
HCharset cs = new_charset(mm__);
for (size_t i = 0; i < 256; i++)
charset_set(cs, i, 1-val);
@ -50,19 +71,19 @@ static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, s
return h_new_parser(mm__, &charset_vt, cs);
}
const HParser* h_in(const uint8_t *options, size_t count) {
HParser* h_in(const uint8_t *options, size_t count) {
return h_in_or_not__m(&system_allocator, options, count, 1);
}
const HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
return h_in_or_not__m(mm__, options, count, 1);
}
const HParser* h_not_in(const uint8_t *options, size_t count) {
HParser* h_not_in(const uint8_t *options, size_t count) {
return h_in_or_not__m(&system_allocator, options, count, 0);
}
const HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
return h_in_or_not__m(mm__, options, count, 0);
}

View file

@ -54,34 +54,53 @@ static HCFChoice* desugar_choice(HAllocator *mm__, void *env) {
return ret;
}
static bool choice_ctrvm(HRVMProg *prog, void* env) {
HSequence *s = (HSequence*)env;
uint16_t gotos[s->len];
uint16_t start = h_rvm_get_ip(prog);
for (size_t i=0; i<s->len; ++i) {
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
if (!h_compile_regex(prog, s->p_array[i]->env))
return false;
gotos[i] = h_rvm_insert_insn(prog, RVM_GOTO, 0);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
}
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
for (size_t i=start; i<s->len; ++i) {
h_rvm_patch_arg(prog, gotos[i], jump);
}
return true;
}
static const HParserVtable choice_vt = {
.parse = parse_choice,
.isValidRegular = choice_isValidRegular,
.isValidCF = choice_isValidCF,
.desugar = desugar_choice,
.compile_to_rvm = choice_ctrvm,
};
const HParser* h_choice(const HParser* p, ...) {
HParser* h_choice(const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_choice__mv(&system_allocator, p, ap);
HParser* ret = h_choice__mv(&system_allocator, p, ap);
va_end(ap);
return ret;
}
const HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) {
HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_choice__mv(mm__, p, ap);
HParser* ret = h_choice__mv(mm__, p, ap);
va_end(ap);
return ret;
}
const HParser* h_choice__v(const HParser* p, va_list ap) {
HParser* h_choice__v(const HParser* p, va_list ap) {
return h_choice__mv(&system_allocator, p, ap);
}
const HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) {
HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) {
va_list ap;
size_t len = 0;
HSequence *s = h_new(HSequence, 1);

View file

@ -42,14 +42,15 @@ static HCFChoice* desugar_difference(HAllocator *mm__, void *env) {
static HParserVtable difference_vt = {
.parse = parse_difference,
.isValidRegular = h_false,
.isValidCF = h_false,
.isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF?
.desugar = desugar_difference,
.compile_to_rvm = h_not_regular,
};
const HParser* h_difference(const HParser* p1, const HParser* p2) {
HParser* h_difference(const HParser* p1, const HParser* p2) {
return h_difference__m(&system_allocator, p1, p2);
}
const HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1;
env->p2 = p2;

View file

@ -17,17 +17,23 @@ static HCFChoice* desugar_end(HAllocator *mm__, void *env) {
return &ret;
}
static bool end_ctrvm(HRVMProg *prog, void *env) {
h_rvm_insert_insn(prog, RVM_EOF, 0);
return true;
}
static const HParserVtable end_vt = {
.parse = parse_end,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_end,
.compile_to_rvm = end_ctrvm,
};
const HParser* h_end_p() {
HParser* h_end_p() {
return h_end_p__m(&system_allocator);
}
const HParser* h_end_p__m(HAllocator* mm__) {
HParser* h_end_p__m(HAllocator* mm__) {
return h_new_parser(mm__, &end_vt, NULL);
}

View file

@ -8,21 +8,23 @@ static HParseResult* parse_epsilon(void* env, HParseState* state) {
return res;
}
static bool epsilon_ctrvm(HRVMProg *prog, void* env) {
return true;
}
static const HParserVtable epsilon_vt = {
.parse = parse_epsilon,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_epsilon,
.compile_to_rvm = epsilon_ctrvm,
};
static HParser epsilon_p = {
.vtable = &epsilon_vt,
.env = NULL
};
const HParser* h_epsilon_p() {
return &epsilon_p;
HParser* h_epsilon_p() {
return h_epsilon_p__m(&system_allocator);
}
const HParser* h_epsilon_p__m(HAllocator* mm__) {
return &epsilon_p;
HParser* h_epsilon_p__m(HAllocator* mm__) {
HParser *epsilon_p = h_new(HParser, 1);
epsilon_p->vtable = &epsilon_vt;
return epsilon_p;
}

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h"
static HParseResult* parse_ignore(void* env, HParseState* state) {
@ -25,16 +26,30 @@ static HCFChoice* desugar_ignore(HAllocator *mm__, void *env) {
return (h_desugar(mm__, p));
}
static bool h_svm_action_pop(HArena *arena, HSVMContext *ctx, void* arg) {
assert(ctx->stack_count > 0);
ctx->stack_count--;
return true;
}
static bool ignore_ctrvm(HRVMProg *prog, void *env) {
HParser *p = (HParser*)env;
h_compile_regex(prog, p->env);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_pop, NULL));
return true;
}
static const HParserVtable ignore_vt = {
.parse = parse_ignore,
.isValidRegular = ignore_isValidRegular,
.isValidCF = ignore_isValidCF,
.desugar = desugar_ignore,
.compile_to_rvm = ignore_ctrvm,
};
const HParser* h_ignore(const HParser* p) {
HParser* h_ignore(const HParser* p) {
return h_ignore__m(&system_allocator, p);
}
const HParser* h_ignore__m(HAllocator* mm__, const HParser* p) {
HParser* h_ignore__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &ignore_vt, (void *)p);
}

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h"
@ -5,7 +6,7 @@
// general case: parse sequence, pick one result
//
typedef struct {
typedef struct HIgnoreSeq_ {
const HParser **parsers;
size_t len; // how many parsers in 'ps'
size_t which; // whose result to return
@ -61,11 +62,40 @@ static bool is_isValidCF(void *env) {
return true;
}
static bool h_svm_action_ignoreseq(HArena *arena, HSVMContext *ctx, void* env) {
HIgnoreSeq *seq = (HIgnoreSeq*)env;
HParsedToken* save;
// We can assume that each subitem generated at most one item on the
// stack.
assert(seq->len >= 1);
for (int i = seq->len - 1; i>=0; i--) {
if (i == (int)seq->which && ctx->stack[ctx->stack_count]->token_type != TT_MARK)
save = ctx->stack[ctx->stack_count-1];
// skip over everything up to and including the mark.
while (ctx->stack[--ctx->stack_count]->token_type != TT_MARK)
;
}
ctx->stack[ctx->stack_count++] = save;
return true;
}
static bool is_ctrvm(HRVMProg *prog, void* env) {
HIgnoreSeq *seq = (HIgnoreSeq*)env;
for (size_t i=0; i<seq->len; ++i) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, seq->parsers[i]->env))
return false;
}
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ignoreseq, env));
return true;
}
static const HParserVtable ignoreseq_vt = {
.parse = parse_ignoreseq,
.isValidRegular = is_isValidRegular,
.isValidCF = is_isValidCF,
.desugar = desugar_ignoreseq,
.compile_to_rvm = is_ctrvm,
};
@ -73,7 +103,7 @@ static const HParserVtable ignoreseq_vt = {
// API frontends
//
static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) {
static HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) {
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
seq->parsers = h_new(const HParser*, 2);
seq->parsers[0] = p;
@ -84,25 +114,25 @@ static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const H
return h_new_parser(mm__, &ignoreseq_vt, seq);
}
const HParser* h_left(const HParser* p, const HParser* q) {
HParser* h_left(const HParser* p, const HParser* q) {
return h_leftright__m(&system_allocator, p, q, 0);
}
const HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) {
HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) {
return h_leftright__m(mm__, p, q, 0);
}
const HParser* h_right(const HParser* p, const HParser* q) {
HParser* h_right(const HParser* p, const HParser* q) {
return h_leftright__m(&system_allocator, p, q, 1);
}
const HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) {
HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) {
return h_leftright__m(mm__, p, q, 1);
}
const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) {
HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) {
return h_middle__m(&system_allocator, p, x, q);
}
const HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) {
HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) {
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
seq->parsers = h_new(const HParser*, 3);
seq->parsers[0] = p;

View file

@ -21,6 +21,7 @@ static const HParserVtable indirect_vt = {
.isValidRegular = h_false,
.isValidCF = indirect_isValidCF,
.desugar = desugar_indirect,
.compile_to_rvm = h_not_regular,
};
void h_bind_indirect(HParser* indirect, const HParser* inner) {

View file

@ -121,17 +121,39 @@ static HCFChoice* desugar_int_range(HAllocator *mm__, void *env) {
return gen_int_range(mm__, r->lower, r->upper, bytes);
}
bool h_svm_action_validate_int_range(HArena *arena, HSVMContext *ctx, void* env) {
HRange *r_env = (HRange*)env;
HParsedToken *head = ctx->stack[ctx->stack_count-1];
switch (head-> token_type) {
case TT_SINT:
return head->sint >= r_env->lower && head->sint <= r_env->upper;
case TT_UINT:
return head->uint >= (uint64_t)r_env->lower && head->uint <= (uint64_t)r_env->upper;
default:
return false;
}
}
static bool ir_ctrvm(HRVMProg *prog, void *env) {
HRange *r_env = (HRange*)env;
h_compile_regex(prog, r_env->p);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_validate_int_range, env));
return false;
}
static const HParserVtable int_range_vt = {
.parse = parse_int_range,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_int_range,
.compile_to_rvm = ir_ctrvm,
};
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
return h_int_range__m(&system_allocator, p, lower, upper);
}
const HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) {
HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) {
// p must be an integer parser, which means it's using parse_bits
// TODO: re-add this check
//assert_message(p->vtable == &bits_vt, "int_range requires an integer parser");

View file

@ -33,7 +33,7 @@ static HParseResult *parse_many(void* env, HParseState *state) {
HParsedToken *res = a_new(HParsedToken, 1);
res->token_type = TT_SEQUENCE;
res->seq = seq;
return make_result(state, res);
return make_result(state->arena, res);
err0:
if (count >= env_->count) {
state->input_stream = bak;
@ -112,17 +112,40 @@ static HCFChoice* desugar_many(HAllocator *mm__, void *env) {
return ma;
}
static bool many_ctrvm(HRVMProg *prog, void *env) {
HRepeat *repeat = (HRepeat*)env;
// FIXME: Implement clear_to_mark
uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL);
h_rvm_insert_insn(prog, RVM_PUSH, 0);
// TODO: implement min and max properly. Right now, it's always min==0, max==inf
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
if (!h_compile_regex(prog, repeat->p))
return false;
if (repeat->sep != NULL) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, repeat->sep))
return false;
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
}
h_rvm_insert_insn(prog, RVM_GOTO, insn);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
return true;
}
static const HParserVtable many_vt = {
.parse = parse_many,
.isValidRegular = many_isValidRegular,
.isValidCF = many_isValidCF,
.desugar = desugar_many,
.compile_to_rvm = many_ctrvm,
};
const HParser* h_many(const HParser* p) {
HParser* h_many(const HParser* p) {
return h_many__m(&system_allocator, p);
}
const HParser* h_many__m(HAllocator* mm__, const HParser* p) {
HParser* h_many__m(HAllocator* mm__, const HParser* p) {
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = h_epsilon_p__m(mm__);
@ -131,10 +154,10 @@ const HParser* h_many__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &many_vt, env);
}
const HParser* h_many1(const HParser* p) {
HParser* h_many1(const HParser* p) {
return h_many1__m(&system_allocator, p);
}
const HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = h_epsilon_p__m(mm__);
@ -143,10 +166,10 @@ const HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &many_vt, env);
}
const HParser* h_repeat_n(const HParser* p, const size_t n) {
HParser* h_repeat_n(const HParser* p, const size_t n) {
return h_repeat_n__m(&system_allocator, p, n);
}
const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = h_epsilon_p__m(mm__);
@ -155,10 +178,10 @@ const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n)
return h_new_parser(mm__, &many_vt, env);
}
const HParser* h_sepBy(const HParser* p, const HParser* sep) {
HParser* h_sepBy(const HParser* p, const HParser* sep) {
return h_sepBy__m(&system_allocator, p, sep);
}
const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = sep;
@ -167,10 +190,10 @@ const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep
return h_new_parser(mm__, &many_vt, env);
}
const HParser* h_sepBy1(const HParser* p, const HParser* sep) {
HParser* h_sepBy1(const HParser* p, const HParser* sep) {
return h_sepBy1__m(&system_allocator, p, sep);
}
const HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = sep;
@ -213,10 +236,10 @@ static const HParserVtable length_value_vt = {
.desugar = desugar_length_value,
};
const HParser* h_length_value(const HParser* length, const HParser* value) {
HParser* h_length_value(const HParser* length, const HParser* value) {
return h_length_value__m(&system_allocator, length, value);
}
const HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
HLenVal *env = h_new(HLenVal, 1);
env->length = length;
env->value = value;

View file

@ -6,7 +6,7 @@ static HParseResult* parse_not(void* env, HParseState* state) {
return NULL;
else {
state->input_stream = bak;
return make_result(state, NULL);
return make_result(state->arena, NULL);
}
}
@ -20,11 +20,12 @@ static const HParserVtable not_vt = {
.isValidRegular = h_false, /* see and.c for why */
.isValidCF = h_false, /* also see and.c for why */
.desugar = desugar_not,
.compile_to_rvm = h_not_regular, // Is actually regular, but the generation step is currently unable to handle it. TODO: fix this.
};
const HParser* h_not(const HParser* p) {
HParser* h_not(const HParser* p) {
return h_not__m(&system_allocator, p);
}
const HParser* h_not__m(HAllocator* mm__, const HParser* p) {
HParser* h_not__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &not_vt, (void *)p);
}

View file

@ -1,6 +1,5 @@
#include "parser_internal.h"
static HParseResult* parse_nothing() {
// not a mistake, this parser always fails
return NULL;
@ -15,16 +14,23 @@ static HCFChoice *desugar_nothing(HAllocator *mm__, void *env) {
return ret;
}
static bool nothing_ctrvm(HRVMProg *prog, void* env) {
h_rvm_insert_insn(prog, RVM_MATCH, 0x0000);
h_rvm_insert_insn(prog, RVM_MATCH, 0xFFFF);
return true;
}
static const HParserVtable nothing_vt = {
.parse = parse_nothing,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_nothing,
.compile_to_rvm = nothing_ctrvm,
};
const HParser* h_nothing_p() {
HParser* h_nothing_p() {
return h_nothing_p__m(&system_allocator);
}
const HParser* h_nothing_p__m(HAllocator* mm__) {
HParser* h_nothing_p__m(HAllocator* mm__) {
return h_new_parser(mm__, &nothing_vt, NULL);
}

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h"
static HParseResult* parse_optional(void* env, HParseState* state) {
@ -8,7 +9,7 @@ static HParseResult* parse_optional(void* env, HParseState* state) {
state->input_stream = bak;
HParsedToken *ast = a_new(HParsedToken, 1);
ast->token_type = TT_NONE;
return make_result(state, ast);
return make_result(state->arena, ast);
}
static bool opt_isValidRegular(void *env) {
@ -26,17 +27,40 @@ static HCFChoice* desugar_optional(HAllocator *mm__, void *env) {
return h_desugar(mm__, p);
}
static bool h_svm_action_optional(HArena *arena, HSVMContext *ctx, void *env) {
if (ctx->stack[ctx->stack_count-1]->token_type == TT_MARK) {
ctx->stack[ctx->stack_count-1]->token_type = TT_NONE;
} else {
ctx->stack_count--;
assert(ctx->stack[ctx->stack_count-1]->token_type == TT_MARK);
ctx->stack[ctx->stack_count-1] = ctx->stack[ctx->stack_count];
}
return true;
}
static bool opt_ctrvm(HRVMProg *prog, void* env) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
HParser *p = (HParser*) env;
if (!h_compile_regex(prog, p->env))
return false;
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_optional, NULL));
return true;
}
static const HParserVtable optional_vt = {
.parse = parse_optional,
.isValidRegular = opt_isValidRegular,
.isValidCF = opt_isValidCF,
.desugar = desugar_optional,
.compile_to_rvm = opt_ctrvm,
};
const HParser* h_optional(const HParser* p) {
HParser* h_optional(const HParser* p) {
return h_optional__m(&system_allocator, p);
}
const HParser* h_optional__m(HAllocator* mm__, const HParser* p) {
HParser* h_optional__m(HAllocator* mm__, const HParser* p) {
// TODO: re-add this
//assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round.");
return h_new_parser(mm__, &optional_vt, (void *)p);

View file

@ -2,15 +2,16 @@
#define HAMMER_PARSE_INTERNAL__H
#include "../hammer.h"
#include "../internal.h"
#include "../backends/regex.h"
#define a_new_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count)))
#define a_new(typ, count) a_new_(state->arena, typ, count)
// we can create a_new0 if necessary. It would allocate some memory and immediately zero it out.
static inline HParseResult* make_result(HParseState *state, HParsedToken *tok) {
HParseResult *ret = a_new(HParseResult, 1);
static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) {
HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult));
ret->ast = tok;
ret->arena = state->arena;
ret->arena = arena;
return ret;
}
@ -23,9 +24,6 @@ static inline size_t token_length(HParseResult *pr) {
}
}
static inline bool h_true(void *env) { return true; }
static inline bool h_false(void *env) { return false; }
/* Epsilon rules happen during desugaring. This handles them. */
static inline HCFChoice* desugar_epsilon(HAllocator *mm__, void *env) {
static HCFChoice *res_seq_l[] = {NULL};

View file

@ -21,7 +21,7 @@ static HParseResult* parse_sequence(void *env, HParseState *state) {
}
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_SEQUENCE; tok->seq = seq;
return make_result(state, tok);
return make_result(state->arena, tok);
}
static bool sequence_isValidRegular(void *env) {
@ -59,34 +59,46 @@ static HCFChoice* desugar_sequence(HAllocator *mm__, void *env) {
return ret;
}
static bool sequence_ctrvm(HRVMProg *prog, void *env) {
HSequence *s = (HSequence*)env;
h_rvm_insert_insn(prog, RVM_PUSH, 0);
for (size_t i=0; i<s->len; ++i) {
if (!s->p_array[i]->vtable->compile_to_rvm(prog, s->p_array[i]->env))
return false;
}
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
return true;
}
static const HParserVtable sequence_vt = {
.parse = parse_sequence,
.isValidRegular = sequence_isValidRegular,
.isValidCF = sequence_isValidCF,
.desugar = desugar_sequence,
.compile_to_rvm = sequence_ctrvm,
};
const HParser* h_sequence(const HParser* p, ...) {
HParser* h_sequence(const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_sequence__mv(&system_allocator, p, ap);
HParser* ret = h_sequence__mv(&system_allocator, p, ap);
va_end(ap);
return ret;
}
const HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) {
HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_sequence__mv(mm__, p, ap);
HParser* ret = h_sequence__mv(mm__, p, ap);
va_end(ap);
return ret;
}
const HParser* h_sequence__v(const HParser* p, va_list ap) {
HParser* h_sequence__v(const HParser* p, va_list ap) {
return h_sequence__mv(&system_allocator, p, ap);
}
const HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) {
HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) {
va_list ap;
size_t len = 0;
const HParser *arg;

View file

@ -15,9 +15,10 @@ static HParseResult* parse_token(void *env, HParseState *state) {
}
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_BYTES; tok->bytes.token = t->str; tok->bytes.len = t->len;
return make_result(state, tok);
return make_result(state->arena, tok);
}
static HCFChoice* desugar_token(HAllocator *mm__, void *env) {
HToken *tok = (HToken*)env;
HCFSequence *seq = h_new(HCFSequence, 1);
@ -37,17 +38,29 @@ static HCFChoice* desugar_token(HAllocator *mm__, void *env) {
return ret;
}
static bool token_ctrvm(HRVMProg *prog, void *env) {
HToken *t = (HToken*)env;
h_rvm_insert_insn(prog, RVM_PUSH, 0);
for (int i=0; i<t->len; ++i) {
h_rvm_insert_insn(prog, RVM_MATCH, t->str[i] & t->str[i] << 8);
h_rvm_insert_insn(prog, RVM_STEP, 0);
}
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
return true;
}
const HParserVtable token_vt = {
.parse = parse_token,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_token,
.compile_to_rvm = token_ctrvm,
};
const HParser* h_token(const uint8_t *str, const size_t len) {
HParser* h_token(const uint8_t *str, const size_t len) {
return h_token__m(&system_allocator, str, len);
}
const HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) {
HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) {
HToken *t = h_new(HToken, 1);
t->str = (uint8_t*)str, t->len = len;
return h_new_parser(mm__, &token_vt, t);

View file

@ -22,6 +22,7 @@ static const HParserVtable unimplemented_vt = {
.isValidRegular = h_false,
.isValidCF = h_false,
.desugar = desugar_unimplemented,
.compile_to_rvm = h_not_regular,
};
static HParser unimplemented = {

View file

@ -49,16 +49,32 @@ static bool ws_isValidCF(void *env) {
return p->vtable->isValidCF(p->env);
}
static bool ws_ctrvm(HRVMProg *prog, void *env) {
HParser *p = (HParser*)env;
uint16_t start = h_rvm_get_ip(prog);
uint16_t next;
const char SPACE_CHRS[6] = {' ', '\f', '\n', '\r', '\t', '\v'};
for (int i = 0; i < 6; i++) {
next = h_rvm_insert_insn(prog, RVM_FORK, 0);
h_rvm_insert_insn(prog, RVM_MATCH, (SPACE_CHRS[i] << 8) | (SPACE_CHRS[i]));
h_rvm_insert_insn(prog, RVM_GOTO, start);
h_rvm_patch_arg(prog, next, h_rvm_get_ip(prog));
}
return h_compile_regex(prog, p->env);
}
static const HParserVtable whitespace_vt = {
.parse = parse_whitespace,
.isValidRegular = ws_isValidRegular,
.isValidCF = ws_isValidCF,
.desugar = desugar_whitespace,
.compile_to_rvm = ws_ctrvm,
};
const HParser* h_whitespace(const HParser* p) {
HParser* h_whitespace(const HParser* p) {
return h_whitespace__m(&system_allocator, p);
}
const HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) {
HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &whitespace_vt, (void *)p);
}

View file

@ -39,14 +39,15 @@ static HCFChoice* desugar_xor(HAllocator *mm__, void *env) {
static const HParserVtable xor_vt = {
.parse = parse_xor,
.isValidRegular = h_false,
.isValidCF = h_false,
.isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF?
.desugar = desugar_xor,
.compile_to_rvm = h_not_regular,
};
const HParser* h_xor(const HParser* p1, const HParser* p2) {
HParser* h_xor(const HParser* p1, const HParser* p2) {
return h_xor__m(&system_allocator, p1, p2);
}
const HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1;
env->p2 = p2;