Merge tq's regex backend fixes (branch 'master' of https://github.com/thequux/hammer)

This commit is contained in:
Meredith L. Patterson 2013-05-23 17:51:56 -07:00
commit 6cc6e7b572
12 changed files with 199 additions and 29 deletions

View file

@ -1,3 +1,4 @@
#define _GNU_SOURCE
#include <string.h>
#include <assert.h>
#include "../internal.h"
@ -13,6 +14,7 @@ typedef enum HSVMOp_ {
SVM_ACTION, // Same meaning as RVM_ACTION
SVM_CAPTURE, // Same meaning as RVM_CAPTURE
SVM_ACCEPT,
SVM_OPCOUNT
} HSVMOp;
typedef struct HRVMTrace_ {
@ -42,8 +44,8 @@ HRVMTrace *invert_trace(HRVMTrace *trace) {
trace->next = last;
last = trace;
trace = next;
} while (trace->next);
return trace;
} while (trace);
return last;
}
void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_t len) {
@ -96,7 +98,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
if (!heads_p[ip_s])
continue;
THREAD.ip = ip_s;
THREAD.trace = heads_p[ip_s];
uint8_t hi, lo;
uint16_t arg;
while(ipq_top > 0) {
@ -110,8 +112,6 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
ret_trace = THREAD.trace;
goto run_trace;
case RVM_MATCH:
// Doesn't actually validate the "must be followed by MATCH
// or STEP. It should. Preproc perhaps?
hi = (arg >> 8) & 0xff;
lo = arg & 0xff;
THREAD.ip++;
@ -151,7 +151,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
case RVM_STEP:
// save thread
live_threads++;
heads_n[THREAD.ip++] = THREAD.trace;
heads_n[++THREAD.ip] = THREAD.trace;
ipq_top--;
goto next_insn;
}
@ -169,7 +169,6 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
run_trace:
// Invert the direction of the trace linked list.
ret_trace = invert_trace(ret_trace);
HParseResult *ret = run_trace(mm__, prog, ret_trace, input, len);
// ret is in its own arena
@ -221,20 +220,24 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
case SVM_CAPTURE:
// Top of stack must be a mark
// This replaces said mark in-place with a TT_BYTES.
assert(ctx.stack[ctx.stack_count]->token_type == TT_MARK);
assert(ctx.stack[ctx.stack_count-1]->token_type == TT_MARK);
tmp_res = ctx.stack[ctx.stack_count];
tmp_res = ctx.stack[ctx.stack_count-1];
tmp_res->token_type = TT_BYTES;
// TODO: Will need to copy if bit_offset is nonzero
assert(tmp_res->bit_offset == 0);
tmp_res->bytes.token = input + tmp_res->index;
tmp_res->bytes.len = cur->input_pos - tmp_res->index + 1; // inclusive
tmp_res->bytes.len = cur->input_pos - tmp_res->index;
break;
case SVM_ACCEPT:
assert(ctx.stack_count == 1);
HParseResult *res = a_new(HParseResult, 1);
res->ast = ctx.stack[0];
assert(ctx.stack_count <= 1);
HParseResult *res = a_new(HParseResult, 1);
if (ctx.stack_count == 1) {
res->ast = ctx.stack[0];
} else {
res->ast = NULL;
}
res->bit_length = cur->input_pos * 8;
res->arena = arena;
return res;
@ -351,6 +354,7 @@ static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params
h_free(prog);
return 2;
}
h_rvm_insert_insn(prog, RVM_ACCEPT, 0);
parser->backend_data = prog;
return 0;
}
@ -364,3 +368,7 @@ HParserBackendVTable h__regex_backend_vtable = {
.parse = h_regex_parse,
.free = h_regex_free
};
#ifndef NDEBUG
#include "regex_debug.c"
#endif

100
src/backends/regex_debug.c Normal file
View file

@ -0,0 +1,100 @@
// Intended to be included from regex_debug.c
#define _GNU_SOURCE
#include <stdio.h>
#include <malloc.h>
// This is some spectacularly non-portable code... but whee!
#include <dlfcn.h>
char* getsym(void* addr) {
Dl_info dli;
char* retstr;
if (dladdr(addr, &dli) != 0 && dli.dli_sname != NULL) {
if (dli.dli_saddr == addr)
return strdup(dli.dli_sname);
else
asprintf(&retstr, "%s+0x%lx", dli.dli_sname, addr - dli.dli_saddr);
} else
asprintf(&retstr, "%p", addr);
return retstr;
}
const char* rvm_op_names[RVM_OPCOUNT] = {
"ACCEPT",
"GOTO",
"FORK",
"PUSH",
"ACTION",
"CAPTURE",
"EOF",
"MATCH",
"STEP"
};
const char* svm_op_names[SVM_OPCOUNT] = {
"PUSH",
"NOP",
"ACTION",
"CAPTURE",
"ACCEPT"
};
void dump_rvm_prog(HRVMProg *prog) {
char* symref;
for (unsigned int i = 0; i < prog->length; i++) {
HRVMInsn *insn = &prog->insns[i];
printf("%4d %-10s", i, rvm_op_names[insn->op]);
switch (insn->op) {
case RVM_GOTO:
case RVM_FORK:
printf("%hd\n", insn->arg);
break;
case RVM_ACTION:
symref = getsym(prog->actions[insn->arg].action);
// TODO: somehow format the argument to action
printf("%s\n", symref);
free(symref);
break;
case RVM_MATCH: {
uint8_t low, high;
low = insn->arg & 0xff;
high = (insn->arg >> 8) & 0xff;
if (high < low)
printf("NONE\n");
else {
if (low >= 0x32 && low <= 0x7e)
printf("%02hhx ('%c')", low, low);
else
printf("%02hhx", low);
if (high >= 0x32 && high <= 0x7e)
printf(" - %02hhx ('%c')\n", high, high);
else
printf(" - %02hhx\n", high);
}
break;
}
default:
printf("\n");
}
}
}
void dump_svm_prog(HRVMProg *prog, HRVMTrace *trace) {
char* symref;
for (; trace != NULL; trace = trace->next) {
printf("@%04zd %-10s", trace->input_pos, svm_op_names[trace->opcode]);
switch (trace->opcode) {
case SVM_ACTION:
symref = getsym(prog->actions[trace->arg].action);
// TODO: somehow format the argument to action
printf("%s\n", symref);
free(symref);
break;
default:
printf("\n");
}
}
}