Got more regex tests passing
This commit is contained in:
parent
0600440b7c
commit
de38f7bce8
5 changed files with 54 additions and 18 deletions
|
|
@ -98,7 +98,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
|
||||||
if (!heads_p[ip_s])
|
if (!heads_p[ip_s])
|
||||||
continue;
|
continue;
|
||||||
THREAD.ip = ip_s;
|
THREAD.ip = ip_s;
|
||||||
|
THREAD.trace = heads_p[ip_s];
|
||||||
uint8_t hi, lo;
|
uint8_t hi, lo;
|
||||||
uint16_t arg;
|
uint16_t arg;
|
||||||
while(ipq_top > 0) {
|
while(ipq_top > 0) {
|
||||||
|
|
@ -112,8 +112,6 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
|
||||||
ret_trace = THREAD.trace;
|
ret_trace = THREAD.trace;
|
||||||
goto run_trace;
|
goto run_trace;
|
||||||
case RVM_MATCH:
|
case RVM_MATCH:
|
||||||
// Doesn't actually validate the "must be followed by MATCH
|
|
||||||
// or STEP. It should. Preproc perhaps?
|
|
||||||
hi = (arg >> 8) & 0xff;
|
hi = (arg >> 8) & 0xff;
|
||||||
lo = arg & 0xff;
|
lo = arg & 0xff;
|
||||||
THREAD.ip++;
|
THREAD.ip++;
|
||||||
|
|
@ -171,7 +169,6 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
|
||||||
run_trace:
|
run_trace:
|
||||||
// Invert the direction of the trace linked list.
|
// Invert the direction of the trace linked list.
|
||||||
|
|
||||||
|
|
||||||
ret_trace = invert_trace(ret_trace);
|
ret_trace = invert_trace(ret_trace);
|
||||||
HParseResult *ret = run_trace(mm__, prog, ret_trace, input, len);
|
HParseResult *ret = run_trace(mm__, prog, ret_trace, input, len);
|
||||||
// ret is in its own arena
|
// ret is in its own arena
|
||||||
|
|
@ -234,9 +231,13 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
|
||||||
tmp_res->bytes.len = cur->input_pos - tmp_res->index;
|
tmp_res->bytes.len = cur->input_pos - tmp_res->index;
|
||||||
break;
|
break;
|
||||||
case SVM_ACCEPT:
|
case SVM_ACCEPT:
|
||||||
assert(ctx.stack_count == 1);
|
assert(ctx.stack_count <= 1);
|
||||||
HParseResult *res = a_new(HParseResult, 1);
|
HParseResult *res = a_new(HParseResult, 1);
|
||||||
|
if (ctx.stack_count == 1) {
|
||||||
res->ast = ctx.stack[0];
|
res->ast = ctx.stack[0];
|
||||||
|
} else {
|
||||||
|
res->ast = NULL;
|
||||||
|
}
|
||||||
res->bit_length = cur->input_pos * 8;
|
res->bit_length = cur->input_pos * 8;
|
||||||
res->arena = arena;
|
res->arena = arena;
|
||||||
return res;
|
return res;
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,7 @@ void dump_rvm_prog(HRVMProg *prog) {
|
||||||
uint8_t low, high;
|
uint8_t low, high;
|
||||||
low = insn->arg & 0xff;
|
low = insn->arg & 0xff;
|
||||||
high = (insn->arg >> 8) & 0xff;
|
high = (insn->arg >> 8) & 0xff;
|
||||||
if (high > low)
|
if (high < low)
|
||||||
printf("NONE\n");
|
printf("NONE\n");
|
||||||
else {
|
else {
|
||||||
if (low >= 0x32 && low <= 0x7e)
|
if (low >= 0x32 && low <= 0x7e)
|
||||||
|
|
@ -81,3 +81,20 @@ void dump_rvm_prog(HRVMProg *prog) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void dump_svm_prog(HRVMProg *prog, HRVMTrace *trace) {
|
||||||
|
char* symref;
|
||||||
|
for (; trace != NULL; trace = trace->next) {
|
||||||
|
printf("@%04zd %-10s", trace->input_pos, svm_op_names[trace->opcode]);
|
||||||
|
switch (trace->opcode) {
|
||||||
|
case SVM_ACTION:
|
||||||
|
symref = getsym(prog->actions[trace->arg].action);
|
||||||
|
// TODO: somehow format the argument to action
|
||||||
|
printf("%s\n", symref);
|
||||||
|
free(symref);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -41,15 +41,27 @@ static bool cs_ctrvm(HRVMProg *prog, void *env) {
|
||||||
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||||
|
|
||||||
uint16_t start = h_rvm_get_ip(prog);
|
uint16_t start = h_rvm_get_ip(prog);
|
||||||
for (size_t i=0; i<256; ++i) {
|
|
||||||
// TODO: merge ranges.
|
uint8_t range_start = 0;
|
||||||
if (charset_isset(cs, i)) {
|
bool collecting = false;
|
||||||
|
for (size_t i=0; i<257; ++i) {
|
||||||
|
// Position 256 is only there so that every included character has
|
||||||
|
// a non-included character after it.
|
||||||
|
if (i < 256 && charset_isset(cs, i)) {
|
||||||
|
if (!collecting) {
|
||||||
|
collecting = true;
|
||||||
|
range_start = i;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (collecting) {
|
||||||
|
collecting = false;
|
||||||
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
||||||
h_rvm_insert_insn(prog, RVM_MATCH, i | i << 8);
|
h_rvm_insert_insn(prog, RVM_MATCH, range_start | i << 8);
|
||||||
h_rvm_insert_insn(prog, RVM_GOTO, 0);
|
h_rvm_insert_insn(prog, RVM_GOTO, 0);
|
||||||
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
|
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
h_rvm_insert_insn(prog, RVM_MATCH, 0x00FF);
|
h_rvm_insert_insn(prog, RVM_MATCH, 0x00FF);
|
||||||
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
|
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
|
||||||
for (size_t i=start; i<jump; ++i) {
|
for (size_t i=start; i<jump; ++i) {
|
||||||
|
|
|
||||||
|
|
@ -97,7 +97,7 @@ static bool is_ctrvm(HRVMProg *prog, void* env) {
|
||||||
HIgnoreSeq *seq = (HIgnoreSeq*)env;
|
HIgnoreSeq *seq = (HIgnoreSeq*)env;
|
||||||
for (size_t i=0; i<seq->len; ++i) {
|
for (size_t i=0; i<seq->len; ++i) {
|
||||||
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||||
if (!h_compile_regex(prog, seq->parsers[i]->env))
|
if (!h_compile_regex(prog, seq->parsers[i]))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ignoreseq, env));
|
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ignoreseq, env));
|
||||||
|
|
|
||||||
|
|
@ -67,9 +67,15 @@ static bool ws_ctrvm(HRVMProg *prog, void *env) {
|
||||||
uint16_t start = h_rvm_get_ip(prog);
|
uint16_t start = h_rvm_get_ip(prog);
|
||||||
uint16_t next;
|
uint16_t next;
|
||||||
|
|
||||||
for (int i = 0; i < 6; i++) {
|
uint16_t ranges[2] = {
|
||||||
|
0x0d09,
|
||||||
|
0x2020,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
next = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
next = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
||||||
h_rvm_insert_insn(prog, RVM_MATCH, (SPACE_CHRS[i] << 8) | (SPACE_CHRS[i]));
|
h_rvm_insert_insn(prog, RVM_MATCH, ranges[i]);
|
||||||
|
h_rvm_insert_insn(prog, RVM_STEP, 0);
|
||||||
h_rvm_insert_insn(prog, RVM_GOTO, start);
|
h_rvm_insert_insn(prog, RVM_GOTO, start);
|
||||||
h_rvm_patch_arg(prog, next, h_rvm_get_ip(prog));
|
h_rvm_patch_arg(prog, next, h_rvm_get_ip(prog));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue