Got more regex tests passing

This commit is contained in:
Dan Hirsch 2013-05-24 02:50:05 +02:00
parent 0600440b7c
commit de38f7bce8
5 changed files with 54 additions and 18 deletions

View file

@ -98,7 +98,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
if (!heads_p[ip_s]) if (!heads_p[ip_s])
continue; continue;
THREAD.ip = ip_s; THREAD.ip = ip_s;
THREAD.trace = heads_p[ip_s];
uint8_t hi, lo; uint8_t hi, lo;
uint16_t arg; uint16_t arg;
while(ipq_top > 0) { while(ipq_top > 0) {
@ -112,8 +112,6 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
ret_trace = THREAD.trace; ret_trace = THREAD.trace;
goto run_trace; goto run_trace;
case RVM_MATCH: case RVM_MATCH:
// Doesn't actually validate the "must be followed by MATCH
// or STEP. It should. Preproc perhaps?
hi = (arg >> 8) & 0xff; hi = (arg >> 8) & 0xff;
lo = arg & 0xff; lo = arg & 0xff;
THREAD.ip++; THREAD.ip++;
@ -171,7 +169,6 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
run_trace: run_trace:
// Invert the direction of the trace linked list. // Invert the direction of the trace linked list.
ret_trace = invert_trace(ret_trace); ret_trace = invert_trace(ret_trace);
HParseResult *ret = run_trace(mm__, prog, ret_trace, input, len); HParseResult *ret = run_trace(mm__, prog, ret_trace, input, len);
// ret is in its own arena // ret is in its own arena
@ -234,9 +231,13 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
tmp_res->bytes.len = cur->input_pos - tmp_res->index; tmp_res->bytes.len = cur->input_pos - tmp_res->index;
break; break;
case SVM_ACCEPT: case SVM_ACCEPT:
assert(ctx.stack_count == 1); assert(ctx.stack_count <= 1);
HParseResult *res = a_new(HParseResult, 1); HParseResult *res = a_new(HParseResult, 1);
if (ctx.stack_count == 1) {
res->ast = ctx.stack[0]; res->ast = ctx.stack[0];
} else {
res->ast = NULL;
}
res->bit_length = cur->input_pos * 8; res->bit_length = cur->input_pos * 8;
res->arena = arena; res->arena = arena;
return res; return res;

View file

@ -61,7 +61,7 @@ void dump_rvm_prog(HRVMProg *prog) {
uint8_t low, high; uint8_t low, high;
low = insn->arg & 0xff; low = insn->arg & 0xff;
high = (insn->arg >> 8) & 0xff; high = (insn->arg >> 8) & 0xff;
if (high > low) if (high < low)
printf("NONE\n"); printf("NONE\n");
else { else {
if (low >= 0x32 && low <= 0x7e) if (low >= 0x32 && low <= 0x7e)
@ -81,3 +81,20 @@ void dump_rvm_prog(HRVMProg *prog) {
} }
} }
} }
void dump_svm_prog(HRVMProg *prog, HRVMTrace *trace) {
char* symref;
for (; trace != NULL; trace = trace->next) {
printf("@%04zd %-10s", trace->input_pos, svm_op_names[trace->opcode]);
switch (trace->opcode) {
case SVM_ACTION:
symref = getsym(prog->actions[trace->arg].action);
// TODO: somehow format the argument to action
printf("%s\n", symref);
free(symref);
break;
default:
printf("\n");
}
}
}

View file

@ -41,15 +41,27 @@ static bool cs_ctrvm(HRVMProg *prog, void *env) {
h_rvm_insert_insn(prog, RVM_PUSH, 0); h_rvm_insert_insn(prog, RVM_PUSH, 0);
uint16_t start = h_rvm_get_ip(prog); uint16_t start = h_rvm_get_ip(prog);
for (size_t i=0; i<256; ++i) {
// TODO: merge ranges. uint8_t range_start = 0;
if (charset_isset(cs, i)) { bool collecting = false;
for (size_t i=0; i<257; ++i) {
// Position 256 is only there so that every included character has
// a non-included character after it.
if (i < 256 && charset_isset(cs, i)) {
if (!collecting) {
collecting = true;
range_start = i;
}
} else {
if (collecting) {
collecting = false;
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0); uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
h_rvm_insert_insn(prog, RVM_MATCH, i | i << 8); h_rvm_insert_insn(prog, RVM_MATCH, range_start | i << 8);
h_rvm_insert_insn(prog, RVM_GOTO, 0); h_rvm_insert_insn(prog, RVM_GOTO, 0);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
} }
} }
}
h_rvm_insert_insn(prog, RVM_MATCH, 0x00FF); h_rvm_insert_insn(prog, RVM_MATCH, 0x00FF);
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0); uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
for (size_t i=start; i<jump; ++i) { for (size_t i=start; i<jump; ++i) {

View file

@ -97,7 +97,7 @@ static bool is_ctrvm(HRVMProg *prog, void* env) {
HIgnoreSeq *seq = (HIgnoreSeq*)env; HIgnoreSeq *seq = (HIgnoreSeq*)env;
for (size_t i=0; i<seq->len; ++i) { for (size_t i=0; i<seq->len; ++i) {
h_rvm_insert_insn(prog, RVM_PUSH, 0); h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, seq->parsers[i]->env)) if (!h_compile_regex(prog, seq->parsers[i]))
return false; return false;
} }
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ignoreseq, env)); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ignoreseq, env));

View file

@ -67,9 +67,15 @@ static bool ws_ctrvm(HRVMProg *prog, void *env) {
uint16_t start = h_rvm_get_ip(prog); uint16_t start = h_rvm_get_ip(prog);
uint16_t next; uint16_t next;
for (int i = 0; i < 6; i++) { uint16_t ranges[2] = {
0x0d09,
0x2020,
};
for (int i = 0; i < 2; i++) {
next = h_rvm_insert_insn(prog, RVM_FORK, 0); next = h_rvm_insert_insn(prog, RVM_FORK, 0);
h_rvm_insert_insn(prog, RVM_MATCH, (SPACE_CHRS[i] << 8) | (SPACE_CHRS[i])); h_rvm_insert_insn(prog, RVM_MATCH, ranges[i]);
h_rvm_insert_insn(prog, RVM_STEP, 0);
h_rvm_insert_insn(prog, RVM_GOTO, start); h_rvm_insert_insn(prog, RVM_GOTO, start);
h_rvm_patch_arg(prog, next, h_rvm_get_ip(prog)); h_rvm_patch_arg(prog, next, h_rvm_get_ip(prog));
} }