Got a lot of regex test cases working
This commit is contained in:
parent
f37a13ef41
commit
0600440b7c
11 changed files with 148 additions and 14 deletions
|
|
@ -1,3 +1,4 @@
|
|||
#define _GNU_SOURCE
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "../internal.h"
|
||||
|
|
@ -13,6 +14,7 @@ typedef enum HSVMOp_ {
|
|||
SVM_ACTION, // Same meaning as RVM_ACTION
|
||||
SVM_CAPTURE, // Same meaning as RVM_CAPTURE
|
||||
SVM_ACCEPT,
|
||||
SVM_OPCOUNT
|
||||
} HSVMOp;
|
||||
|
||||
typedef struct HRVMTrace_ {
|
||||
|
|
@ -42,8 +44,8 @@ HRVMTrace *invert_trace(HRVMTrace *trace) {
|
|||
trace->next = last;
|
||||
last = trace;
|
||||
trace = next;
|
||||
} while (trace->next);
|
||||
return trace;
|
||||
} while (trace);
|
||||
return last;
|
||||
}
|
||||
|
||||
void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_t len) {
|
||||
|
|
@ -151,7 +153,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
|
|||
case RVM_STEP:
|
||||
// save thread
|
||||
live_threads++;
|
||||
heads_n[THREAD.ip++] = THREAD.trace;
|
||||
heads_n[++THREAD.ip] = THREAD.trace;
|
||||
ipq_top--;
|
||||
goto next_insn;
|
||||
}
|
||||
|
|
@ -221,15 +223,15 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
|
|||
case SVM_CAPTURE:
|
||||
// Top of stack must be a mark
|
||||
// This replaces said mark in-place with a TT_BYTES.
|
||||
assert(ctx.stack[ctx.stack_count]->token_type == TT_MARK);
|
||||
assert(ctx.stack[ctx.stack_count-1]->token_type == TT_MARK);
|
||||
|
||||
tmp_res = ctx.stack[ctx.stack_count];
|
||||
tmp_res = ctx.stack[ctx.stack_count-1];
|
||||
tmp_res->token_type = TT_BYTES;
|
||||
// TODO: Will need to copy if bit_offset is nonzero
|
||||
assert(tmp_res->bit_offset == 0);
|
||||
|
||||
tmp_res->bytes.token = input + tmp_res->index;
|
||||
tmp_res->bytes.len = cur->input_pos - tmp_res->index + 1; // inclusive
|
||||
tmp_res->bytes.len = cur->input_pos - tmp_res->index;
|
||||
break;
|
||||
case SVM_ACCEPT:
|
||||
assert(ctx.stack_count == 1);
|
||||
|
|
@ -351,6 +353,7 @@ static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params
|
|||
h_free(prog);
|
||||
return 2;
|
||||
}
|
||||
h_rvm_insert_insn(prog, RVM_ACCEPT, 0);
|
||||
parser->backend_data = prog;
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -364,3 +367,7 @@ HParserBackendVTable h__regex_backend_vtable = {
|
|||
.parse = h_regex_parse,
|
||||
.free = h_regex_free
|
||||
};
|
||||
|
||||
#ifndef NDEBUG
|
||||
#include "regex_debug.c"
|
||||
#endif
|
||||
|
|
|
|||
83
src/backends/regex_debug.c
Normal file
83
src/backends/regex_debug.c
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
// Intended to be included from regex_debug.c
|
||||
#define _GNU_SOURCE
|
||||
#include <stdio.h>
|
||||
#include <malloc.h>
|
||||
|
||||
|
||||
|
||||
// This is some spectacularly non-portable code... but whee!
|
||||
#include <dlfcn.h>
|
||||
char* getsym(void* addr) {
|
||||
Dl_info dli;
|
||||
char* retstr;
|
||||
if (dladdr(addr, &dli) != 0 && dli.dli_sname != NULL) {
|
||||
if (dli.dli_saddr == addr)
|
||||
return strdup(dli.dli_sname);
|
||||
else
|
||||
asprintf(&retstr, "%s+0x%lx", dli.dli_sname, addr - dli.dli_saddr);
|
||||
} else
|
||||
asprintf(&retstr, "%p", addr);
|
||||
|
||||
return retstr;
|
||||
}
|
||||
|
||||
const char* rvm_op_names[RVM_OPCOUNT] = {
|
||||
"ACCEPT",
|
||||
"GOTO",
|
||||
"FORK",
|
||||
"PUSH",
|
||||
"ACTION",
|
||||
"CAPTURE",
|
||||
"EOF",
|
||||
"MATCH",
|
||||
"STEP"
|
||||
};
|
||||
|
||||
const char* svm_op_names[SVM_OPCOUNT] = {
|
||||
"PUSH",
|
||||
"NOP",
|
||||
"ACTION",
|
||||
"CAPTURE",
|
||||
"ACCEPT"
|
||||
};
|
||||
|
||||
void dump_rvm_prog(HRVMProg *prog) {
|
||||
char* symref;
|
||||
for (unsigned int i = 0; i < prog->length; i++) {
|
||||
HRVMInsn *insn = &prog->insns[i];
|
||||
printf("%4d %-10s", i, rvm_op_names[insn->op]);
|
||||
switch (insn->op) {
|
||||
case RVM_GOTO:
|
||||
case RVM_FORK:
|
||||
printf("%hd\n", insn->arg);
|
||||
break;
|
||||
case RVM_ACTION:
|
||||
symref = getsym(prog->actions[insn->arg].action);
|
||||
// TODO: somehow format the argument to action
|
||||
printf("%s\n", symref);
|
||||
free(symref);
|
||||
break;
|
||||
case RVM_MATCH: {
|
||||
uint8_t low, high;
|
||||
low = insn->arg & 0xff;
|
||||
high = (insn->arg >> 8) & 0xff;
|
||||
if (high > low)
|
||||
printf("NONE\n");
|
||||
else {
|
||||
if (low >= 0x32 && low <= 0x7e)
|
||||
printf("%02hhx ('%c')", low, low);
|
||||
else
|
||||
printf("%02hhx", low);
|
||||
|
||||
if (high >= 0x32 && high <= 0x7e)
|
||||
printf(" - %02hhx ('%c')\n", high, high);
|
||||
else
|
||||
printf(" - %02hhx\n", high);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue