Merge remote-tracking branch 'mlp/regex'

This commit is contained in:
Dan Hirsch 2013-04-22 21:07:18 -04:00
commit 204147a3d2
26 changed files with 218 additions and 2 deletions

View file

@ -93,3 +93,8 @@ bool h_true(void* env) {
(void)env; (void)env;
return true; return true;
} }
bool h_not_regular(HRVMProg *prog, void *env) {
(void)env;
return false;
}

View file

@ -225,6 +225,7 @@ void h_hashtable_free(HHashTable* ht);
bool h_false(void*); bool h_false(void*);
bool h_true(void*); bool h_true(void*);
bool h_not_regular(HRVMProg*, void*);
#if 0 #if 0
#include <stdlib.h> #include <stdlib.h>

View file

@ -29,10 +29,16 @@ static bool action_isValidCF(void *env) {
return a->p->vtable->isValidCF(a->p->env); return a->p->vtable->isValidCF(a->p->env);
} }
static bool action_ctrvm(HRVMProg *prog, void* env) {
HParseAction *a = (HParseAction*)env;
return a->p->vtable->compile_to_rvm(prog, a->p->env);
}
static const HParserVtable action_vt = { static const HParserVtable action_vt = {
.parse = parse_action, .parse = parse_action,
.isValidRegular = action_isValidRegular, .isValidRegular = action_isValidRegular,
.isValidCF = action_isValidCF, .isValidCF = action_isValidCF,
.compile_to_rvm = action_ctrvm,
}; };
const HParser* h_action(const HParser* p, const HAction a) { const HParser* h_action(const HParser* p, const HAction a) {

View file

@ -16,6 +16,7 @@ static const HParserVtable and_vt = {
to get right, so we're leaving it for a future to get right, so we're leaving it for a future
revision. --mlp, 18/12/12 */ revision. --mlp, 18/12/12 */
.isValidCF = h_false, /* despite TODO above, this remains false. */ .isValidCF = h_false, /* despite TODO above, this remains false. */
.compile_to_rvm = h_not_regular,
}; };

View file

@ -27,10 +27,16 @@ static bool ab_isValidCF(void *env) {
return ab->p->vtable->isValidCF(ab->p->env); return ab->p->vtable->isValidCF(ab->p->env);
} }
static bool ab_ctrvm(HRVMProg *prog, void *env) {
HAttrBool *ab = (HAttrBool*)env;
return h_compile_regex(prog, ab->p);
}
static const HParserVtable attr_bool_vt = { static const HParserVtable attr_bool_vt = {
.parse = parse_attr_bool, .parse = parse_attr_bool,
.isValidRegular = ab_isValidRegular, .isValidRegular = ab_isValidRegular,
.isValidCF = ab_isValidCF, .isValidCF = ab_isValidCF,
.compile_to_rvm = ab_ctrvm,
}; };

View file

@ -16,11 +16,24 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
return make_result(state, result); return make_result(state, result);
} }
static bool bits_ctrvm(HRVMProg *prog, void* env) {
struct bits_env *env_ = (struct bits_env*)env;
h_rvm_insert_insn(prog, RVM_PUSH, 0);
for (size_t i=0; (i < env_->length)/8; ++i) { // FUTURE: when we can handle non-byte-aligned, the env_->length/8 part will be different
h_rvm_insert_insn(prog, RVM_MATCH, 0xFF00);
h_rvm_insert_insn(prog, RVM_STEP, 0);
}
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
return true;
}
static const HParserVtable bits_vt = { static const HParserVtable bits_vt = {
.parse = parse_bits, .parse = parse_bits,
.isValidRegular = h_true, .isValidRegular = h_true,
.isValidCF = h_true, .isValidCF = h_true,
.compile_to_rvm = bits_ctrvm,
}; };
const HParser* h_bits(size_t len, bool sign) { const HParser* h_bits(size_t len, bool sign) {
return h_bits__m(&system_allocator, len, sign); return h_bits__m(&system_allocator, len, sign);
} }

View file

@ -45,6 +45,7 @@ static const HParserVtable butnot_vt = {
.parse = parse_butnot, .parse = parse_butnot,
.isValidRegular = h_false, .isValidRegular = h_false,
.isValidCF = bn_isValidCF, .isValidCF = bn_isValidCF,
.compile_to_rvm = h_not_regular,
}; };
const HParser* h_butnot(const HParser* p1, const HParser* p2) { const HParser* h_butnot(const HParser* p1, const HParser* p2) {

View file

@ -12,10 +12,18 @@ static HParseResult* parse_ch(void* env, HParseState *state) {
} }
} }
static bool ch_ctrvm(HRVMProg *prog, void* env) {
uint8_t c = (uint8_t)(unsigned long)(env);
h_rvm_insert_insn(prog, RVM_MATCH, c & c << 8);
h_rvm_insert_insn(prog, RVM_STEP, 0);
return true;
}
static const HParserVtable ch_vt = { static const HParserVtable ch_vt = {
.parse = parse_ch, .parse = parse_ch,
.isValidRegular = h_true, .isValidRegular = h_true,
.isValidCF = h_true, .isValidCF = h_true,
.compile_to_rvm = ch_ctrvm,
}; };
const HParser* h_ch(const uint8_t c) { const HParser* h_ch(const uint8_t c) {

View file

@ -32,10 +32,31 @@ static HParseResult* parse_charset(void *env, HParseState *state) {
return NULL; return NULL;
} }
// FUTURE: this is horribly inefficient
static bool cs_ctrvm(HRVMProg *prog, void *env) {
HCharset cs = (HCharset)env;
uint16_t start = h_rvm_get_ip(prog);
for (size_t i=0; i<256; ++i) {
if (charset_isset(cs, i)) {
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
h_rvm_insert_insn(prog, RVM_MATCH, i & i << 8);
h_rvm_insert_insn(prog, RVM_GOTO, 0);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
}
}
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
for (size_t i=start; i<jump; ++i) {
if (RVM_GOTO == prog->insns[i].op)
h_rvm_patch_arg(prog, i, jump);
}
return true;
}
static const HParserVtable charset_vt = { static const HParserVtable charset_vt = {
.parse = parse_charset, .parse = parse_charset,
.isValidRegular = h_true, .isValidRegular = h_true,
.isValidCF = h_true, .isValidCF = h_true,
.compile_to_rvm = cs_ctrvm,
}; };
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {

View file

@ -39,10 +39,29 @@ static bool choice_isValidCF(void *env) {
return true; return true;
} }
static bool choice_ctrvm(HRVMProg *prog, void* env) {
HSequence *s = (HSequence*)env;
uint16_t gotos[s->len];
uint16_t start = h_rvm_get_ip(prog);
for (size_t i=0; i<s->len; ++i) {
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
if (!h_compile_regex(prog, s->p_array[i]->env))
return false;
gotos[i] = h_rvm_insert_insn(prog, RVM_GOTO, 0);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
}
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
for (size_t i=start; i<s->len; ++i) {
h_rvm_patch_arg(prog, gotos[i], jump);
}
return true;
}
static const HParserVtable choice_vt = { static const HParserVtable choice_vt = {
.parse = parse_choice, .parse = parse_choice,
.isValidRegular = choice_isValidRegular, .isValidRegular = choice_isValidRegular,
.isValidCF = choice_isValidCF, .isValidCF = choice_isValidCF,
.compile_to_rvm = choice_ctrvm,
}; };
const HParser* h_choice(const HParser* p, ...) { const HParser* h_choice(const HParser* p, ...) {

View file

@ -44,6 +44,7 @@ static HParserVtable difference_vt = {
.parse = parse_difference, .parse = parse_difference,
.isValidRegular = h_false, .isValidRegular = h_false,
.isValidCF = diff_isValidCF, .isValidCF = diff_isValidCF,
.compile_to_rvm = h_not_regular,
}; };
const HParser* h_difference(const HParser* p1, const HParser* p2) { const HParser* h_difference(const HParser* p1, const HParser* p2) {

View file

@ -10,10 +10,16 @@ static HParseResult* parse_end(void *env, HParseState *state) {
} }
} }
static bool end_ctrvm(HRVMProg *prog, void *env) {
h_rvm_insert_insn(prog, RVM_EOF, 0);
return true;
}
static const HParserVtable end_vt = { static const HParserVtable end_vt = {
.parse = parse_end, .parse = parse_end,
.isValidRegular = h_true, .isValidRegular = h_true,
.isValidCF = h_true, .isValidCF = h_true,
.compile_to_rvm = end_ctrvm,
}; };
const HParser* h_end_p() { const HParser* h_end_p() {

View file

@ -8,10 +8,15 @@ static HParseResult* parse_epsilon(void* env, HParseState* state) {
return res; return res;
} }
static bool epsilon_ctrvm(HRVMProg *prog, void* env) {
return true;
}
static const HParserVtable epsilon_vt = { static const HParserVtable epsilon_vt = {
.parse = parse_epsilon, .parse = parse_epsilon,
.isValidRegular = h_true, .isValidRegular = h_true,
.isValidCF = h_true, .isValidCF = h_true,
.compile_to_rvm = epsilon_ctrvm,
}; };
static const HParser epsilon_p = { static const HParser epsilon_p = {

View file

@ -1,4 +1,5 @@
#include "parser_internal.h" #include "parser_internal.h"
#include "backends/regex_actions.h"
static HParseResult* parse_ignore(void* env, HParseState* state) { static HParseResult* parse_ignore(void* env, HParseState* state) {
HParseResult *res0 = h_do_parse((HParser*)env, state); HParseResult *res0 = h_do_parse((HParser*)env, state);
@ -20,10 +21,18 @@ static bool ignore_isValidCF(void *env) {
return (p->vtable->isValidCF(p->env)); return (p->vtable->isValidCF(p->env));
} }
static bool ignore_ctrvm(HRVMProg *prog, void *env) {
HParser *p = (HParser*)env;
h_compile_regex(prog, p->env);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_pop));
return true;
}
static const HParserVtable ignore_vt = { static const HParserVtable ignore_vt = {
.parse = parse_ignore, .parse = parse_ignore,
.isValidRegular = ignore_isValidRegular, .isValidRegular = ignore_isValidRegular,
.isValidCF = ignore_isValidCF, .isValidCF = ignore_isValidCF,
.compile_to_rvm = ignore_ctrvm,
}; };
const HParser* h_ignore(const HParser* p) { const HParser* h_ignore(const HParser* p) {

View file

@ -5,7 +5,7 @@
// general case: parse sequence, pick one result // general case: parse sequence, pick one result
// //
typedef struct { typedef struct HIgnoreSeq_ {
const HParser **parsers; const HParser **parsers;
size_t len; // how many parsers in 'ps' size_t len; // how many parsers in 'ps'
size_t which; // whose result to return size_t which; // whose result to return
@ -44,10 +44,37 @@ static bool is_isValidCF(void *env) {
return true; return true;
} }
static bool h_svm_action_ignoreseq(HArena *arena, HSVMContext *ctx, void* env) {
HIgnoreSeq *seq = (HIgnoreSeq*)env;
HParsedToken* save;
// We can assume that each subitem generated at most one item on the
// stack.
for (int i = seq->len - 1; i>=0; i--) {
if (i == seq->which && ctx->stack[ctx->stack_count]->token_type != TT_MARK)
save = ctx->stack[ctx->stack_count-1];
// skip over everything up to and including the mark.
while (ctx->stack[--ctx->stack_count]->token_type != TT_MARK)
;
}
ctx->stack[ctx->stack_count++] = save;
}
static bool is_ctrvm(HRVMProg *prog, void* env) {
HIgnoreSeq *seq = (HIgnoreSeq*)env;
for (size_t i=0; i<seq->len; ++i) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, seq->parsers[i]->env))
return false;
}
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ignoreseq, env));
return true;
}
static const HParserVtable ignoreseq_vt = { static const HParserVtable ignoreseq_vt = {
.parse = parse_ignoreseq, .parse = parse_ignoreseq,
.isValidRegular = is_isValidRegular, .isValidRegular = is_isValidRegular,
.isValidCF = is_isValidCF, .isValidCF = is_isValidCF,
.compile_to_rvm = is_ctrvm,
}; };

View file

@ -14,6 +14,7 @@ static const HParserVtable indirect_vt = {
.parse = parse_indirect, .parse = parse_indirect,
.isValidRegular = h_false, .isValidRegular = h_false,
.isValidCF = indirect_isValidCF, .isValidCF = indirect_isValidCF,
.compile_to_rvm = h_not_regular,
}; };
void h_bind_indirect(HParser* indirect, const HParser* inner) { void h_bind_indirect(HParser* indirect, const HParser* inner) {

View file

@ -28,10 +28,31 @@ static HParseResult* parse_int_range(void *env, HParseState *state) {
} }
} }
bool h_svm_action_validate_int_range(HArena *arena, HSVMContext *ctx, void* env) {
HRange *r_env = (*HRange)env;
HParsedToken *head = ctx->stack[ctx->stack_count-1];
switch (head-> token_type) {
case TT_SINT:
return head->sint >= r_env->lower && head->sint <= r_env->upper;
case TT_UINT:
return head->uint >= (uint64_t)r_env->lower && head->uint <= (uint64_t)r_env->upper;
default:
return false;
}
}
static bool ir_ctrvm(HRVMProg *prog, void *env) {
HRange *r_env = (*HRange)env;
h_compile_regex(prog, r_env->p);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_validate_int_range, env));
return false;
}
static const HParserVtable int_range_vt = { static const HParserVtable int_range_vt = {
.parse = parse_int_range, .parse = parse_int_range,
.isValidRegular = h_true, .isValidRegular = h_true,
.isValidCF = h_true, .isValidCF = h_true,
.compile_to_rvm = ir_ctrvm,
}; };
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) { const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {

View file

@ -56,10 +56,29 @@ static bool many_isValidCF(void *env) {
repeat->sep->vtable->isValidCF(repeat->sep->env)); repeat->sep->vtable->isValidCF(repeat->sep->env));
} }
static bool many_ctrvm(HRVMProg *prog, void *env) {
HRepeat *repeat = (HRepeat*)env;
// FIXME: Implement clear_to_mark
uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL);
h_rvm_insert_insn(prog, RVM_PUSH, 0);
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
if (!h_compile_regex(prog, repeat->p))
return false;
if (!h_compile_regex(prog, repeat->sep))
return false;
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
h_rvm_insert_insn(prog, RVM_GOTO, insn);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
h_rvm_insert_insn(prog, RVM_ACTION, h_svm_action_make_sequence, NULL);
return true;
}
static const HParserVtable many_vt = { static const HParserVtable many_vt = {
.parse = parse_many, .parse = parse_many,
.isValidRegular = many_isValidRegular, .isValidRegular = many_isValidRegular,
.isValidCF = many_isValidCF, .isValidCF = many_isValidCF,
.compile_to_rvm = many_ctrvm,
}; };
const HParser* h_many(const HParser* p) { const HParser* h_many(const HParser* p) {

View file

@ -14,6 +14,7 @@ static const HParserVtable not_vt = {
.parse = parse_not, .parse = parse_not,
.isValidRegular = h_false, /* see and.c for why */ .isValidRegular = h_false, /* see and.c for why */
.isValidCF = h_false, /* also see and.c for why */ .isValidCF = h_false, /* also see and.c for why */
.compile_to_rvm = h_not_regular,
}; };
const HParser* h_not(const HParser* p) { const HParser* h_not(const HParser* p) {

View file

@ -1,15 +1,20 @@
#include "parser_internal.h" #include "parser_internal.h"
static HParseResult* parse_nothing() { static HParseResult* parse_nothing() {
// not a mistake, this parser always fails // not a mistake, this parser always fails
return NULL; return NULL;
} }
static bool nothing_ctrvm(HRVMProg *prog, void* env) {
h_rvm_insert_insn(prog, RVM_MATCH, 0x00FF);
return true;
}
static const HParserVtable nothing_vt = { static const HParserVtable nothing_vt = {
.parse = parse_nothing, .parse = parse_nothing,
.isValidRegular = h_true, .isValidRegular = h_true,
.isValidCF = h_true, .isValidCF = h_true,
.compile_to_rvm = nothing_ctrvm,
}; };
const HParser* h_nothing_p() { const HParser* h_nothing_p() {

View file

@ -21,10 +21,20 @@ static bool opt_isValidCF(void *env) {
return p->vtable->isValidCF(p->env); return p->vtable->isValidCF(p->env);
} }
static bool opt_ctrvm(HRVMProg *prog, void* env) {
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
HParser *p = (HParser*) env;
if (!h_compile_regex(prog, p->env))
return false;
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
return true;
}
static const HParserVtable optional_vt = { static const HParserVtable optional_vt = {
.parse = parse_optional, .parse = parse_optional,
.isValidRegular = opt_isValidRegular, .isValidRegular = opt_isValidRegular,
.isValidCF = opt_isValidCF, .isValidCF = opt_isValidCF,
.compile_to_rvm = opt_ctrvm,
}; };
const HParser* h_optional(const HParser* p) { const HParser* h_optional(const HParser* p) {

View file

@ -42,10 +42,20 @@ static bool sequence_isValidCF(void *env) {
return true; return true;
} }
static bool sequence_ctrvm(HRVMProg *prog, void *env) {
HSequence *s = (HSequence*)env;
for (size_t i=0; i<s->len; ++i) {
if (!s->p_array[i]->vtable->compile_to_rvm(prog, s->p_array[i]->env))
return false;
}
return true;
}
static const HParserVtable sequence_vt = { static const HParserVtable sequence_vt = {
.parse = parse_sequence, .parse = parse_sequence,
.isValidRegular = sequence_isValidRegular, .isValidRegular = sequence_isValidRegular,
.isValidCF = sequence_isValidCF, .isValidCF = sequence_isValidCF,
.compile_to_rvm = sequence_ctrvm,
}; };
const HParser* h_sequence(const HParser* p, ...) { const HParser* h_sequence(const HParser* p, ...) {

View file

@ -20,10 +20,22 @@ static HParseResult* parse_token(void *env, HParseState *state) {
return make_result(state, tok); return make_result(state, tok);
} }
static bool token_ctrvm(HRVMProg *prog, void *env) {
HToken *t = (HToken*)env;
h_rvm_insert_insn(prog, RVM_PUSH, 0);
for (int i=0; i<t->len; ++i) {
h_rvm_insert_insn(prog, RVM_MATCH, t->str[i] & t->str[i] << 8);
h_rvm_insert_insn(prog, RVM_STEP, 0);
}
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
return true;
}
const HParserVtable token_vt = { const HParserVtable token_vt = {
.parse = parse_token, .parse = parse_token,
.isValidRegular = h_true, .isValidRegular = h_true,
.isValidCF = h_true, .isValidCF = h_true,
.compile_to_rvm = token_ctrvm,
}; };
const HParser* h_token(const uint8_t *str, const size_t len) { const HParser* h_token(const uint8_t *str, const size_t len) {

View file

@ -16,6 +16,7 @@ static const HParserVtable unimplemented_vt = {
.parse = parse_unimplemented, .parse = parse_unimplemented,
.isValidRegular = h_false, .isValidRegular = h_false,
.isValidCF = h_false, .isValidCF = h_false,
.compile_to_rvm = h_not_regular,
}; };
static HParser unimplemented = { static HParser unimplemented = {

View file

@ -24,10 +24,16 @@ static bool ws_isValidCF(void *env) {
return p->vtable->isValidCF(p->env); return p->vtable->isValidCF(p->env);
} }
static bool ws_ctrvm(HRVMProg *prog, void *env) {
HParser *p = (HParser*)env;
return h_compile_regex(prog, p->env);
}
static const HParserVtable whitespace_vt = { static const HParserVtable whitespace_vt = {
.parse = parse_whitespace, .parse = parse_whitespace,
.isValidRegular = ws_isValidRegular, .isValidRegular = ws_isValidRegular,
.isValidCF = ws_isValidCF, .isValidCF = ws_isValidCF,
.compile_to_rvm = ws_ctrvm,
}; };
const HParser* h_whitespace(const HParser* p) { const HParser* h_whitespace(const HParser* p) {

View file

@ -41,6 +41,7 @@ static const HParserVtable xor_vt = {
.parse = parse_xor, .parse = parse_xor,
.isValidRegular = h_false, .isValidRegular = h_false,
.isValidCF = xor_isValidCF, .isValidCF = xor_isValidCF,
.compile_to_rvm = h_not_regular,
}; };
const HParser* h_xor(const HParser* p1, const HParser* p2) { const HParser* h_xor(const HParser* p1, const HParser* p2) {