2013-03-09 21:42:49 -08:00
|
|
|
#include <string.h>
|
|
|
|
|
#include <assert.h>
|
2013-01-13 17:01:10 +01:00
|
|
|
#include "../internal.h"
|
|
|
|
|
#include "../parsers/parser_internal.h"
|
2013-03-09 21:42:49 -08:00
|
|
|
#include "regex.h"
|
2013-01-13 17:01:10 +01:00
|
|
|
|
|
|
|
|
#undef a_new
|
2013-03-09 21:42:49 -08:00
|
|
|
#define a_new(typ, count) a_new_(arena, typ, count)
|
2013-01-13 17:01:10 +01:00
|
|
|
// Stack VM
|
|
|
|
|
typedef enum HSVMOp_ {
|
|
|
|
|
SVM_PUSH, // Push a mark. There is no VM insn to push an object.
|
|
|
|
|
SVM_NOP, // Used to start the chain, and possibly elsewhere. Does nothing.
|
|
|
|
|
SVM_ACTION, // Same meaning as RVM_ACTION
|
|
|
|
|
SVM_CAPTURE, // Same meaning as RVM_CAPTURE
|
|
|
|
|
SVM_ACCEPT,
|
|
|
|
|
} HSVMOp;
|
|
|
|
|
|
|
|
|
|
typedef struct HRVMTrace_ {
|
|
|
|
|
struct HRVMTrace_ *next; // When parsing, these are
|
|
|
|
|
// reverse-threaded. There is a postproc
|
|
|
|
|
// step that inverts all the pointers.
|
2013-03-09 21:42:49 -08:00
|
|
|
size_t input_pos;
|
2013-01-13 17:01:10 +01:00
|
|
|
uint16_t arg;
|
|
|
|
|
uint8_t opcode;
|
|
|
|
|
} HRVMTrace;
|
|
|
|
|
|
|
|
|
|
typedef struct HRVMThread_ {
|
|
|
|
|
HRVMTrace *trace;
|
|
|
|
|
uint16_t ip;
|
|
|
|
|
} HRVMThread;
|
2013-02-20 02:25:42 -05:00
|
|
|
|
2013-03-09 21:42:49 -08:00
|
|
|
HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, const uint8_t *input, int len);
|
|
|
|
|
|
|
|
|
|
HRVMTrace *invert_trace(HRVMTrace *trace) {
|
|
|
|
|
HRVMTrace *last = NULL;
|
|
|
|
|
if (!trace)
|
|
|
|
|
return NULL;
|
|
|
|
|
if (!trace->next)
|
|
|
|
|
return trace;
|
|
|
|
|
do {
|
|
|
|
|
HRVMTrace *next = trace->next;
|
|
|
|
|
trace->next = last;
|
|
|
|
|
last = trace;
|
|
|
|
|
trace = next;
|
|
|
|
|
} while (trace->next);
|
|
|
|
|
return trace;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_t len) {
|
2013-01-13 17:01:10 +01:00
|
|
|
HArena *arena = h_new_arena(mm__, 0);
|
|
|
|
|
HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length),
|
2013-03-09 21:42:49 -08:00
|
|
|
**heads_n = a_new(HRVMTrace*, prog->length);
|
2013-01-13 17:01:10 +01:00
|
|
|
|
2013-02-20 02:25:42 -05:00
|
|
|
HRVMTrace *ret_trace;
|
2013-01-13 17:01:10 +01:00
|
|
|
|
|
|
|
|
uint8_t *insn_seen = a_new(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued
|
|
|
|
|
HRVMThread *ip_queue = a_new(HRVMThread, prog->length);
|
|
|
|
|
size_t ipq_top;
|
|
|
|
|
|
2013-03-09 21:42:49 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2013-01-13 17:01:10 +01:00
|
|
|
#define THREAD ip_queue[ipq_top-1]
|
|
|
|
|
#define PUSH_SVM(op_, arg_) do { \
|
|
|
|
|
HRVMTrace *nt = a_new(HRVMTrace, 1); \
|
|
|
|
|
nt->arg = (arg_); \
|
|
|
|
|
nt->opcode = (op_); \
|
|
|
|
|
nt->next = THREAD.trace; \
|
2013-03-09 21:42:49 -08:00
|
|
|
nt->input_pos = off; \
|
2013-01-13 17:01:10 +01:00
|
|
|
THREAD.trace = nt; \
|
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
|
|
heads_n[0] = a_new(HRVMTrace, 1); // zeroing
|
|
|
|
|
heads_n[0]->opcode = SVM_NOP;
|
|
|
|
|
|
|
|
|
|
size_t off = 0;
|
|
|
|
|
int live_threads = 1;
|
|
|
|
|
for (off = 0; off <= len; off++) {
|
|
|
|
|
uint8_t ch = ((off == len) ? 0 : input[off]);
|
2013-03-09 21:42:49 -08:00
|
|
|
size_t ip_s; // BUG: there was an unused variable ip. Not sure if
|
|
|
|
|
// I intended to use it somewhere.
|
2013-01-13 17:01:10 +01:00
|
|
|
/* scope */ {
|
|
|
|
|
HRVMTrace **heads_t;
|
|
|
|
|
heads_t = heads_n;
|
|
|
|
|
heads_n = heads_p;
|
|
|
|
|
heads_p = heads_t;
|
|
|
|
|
memset(heads_n, 0, prog->length * sizeof(*heads_n));
|
|
|
|
|
}
|
|
|
|
|
memset(insn_seen, 0, prog->length); // no insns seen yet
|
|
|
|
|
if (!live_threads)
|
2013-02-20 02:25:42 -05:00
|
|
|
goto match_fail;
|
2013-01-13 17:01:10 +01:00
|
|
|
live_threads = 0;
|
|
|
|
|
for (ip_s = 0; ip_s < prog->length; ip_s++) {
|
|
|
|
|
ipq_top = 1;
|
|
|
|
|
// TODO: Write this as a threaded VM
|
|
|
|
|
if (!heads_p[ip_s])
|
|
|
|
|
continue;
|
|
|
|
|
THREAD.ip = ip_s;
|
|
|
|
|
|
|
|
|
|
uint8_t hi, lo;
|
|
|
|
|
uint16_t arg;
|
|
|
|
|
while(ipq_top > 0) {
|
2013-03-09 21:42:49 -08:00
|
|
|
if (insn_seen[THREAD.ip] == 1)
|
2013-01-13 17:01:10 +01:00
|
|
|
continue;
|
2013-03-09 21:42:49 -08:00
|
|
|
insn_seen[THREAD.ip] = 1;
|
2013-01-13 17:01:10 +01:00
|
|
|
arg = prog->insns[THREAD.ip].arg;
|
|
|
|
|
switch(prog->insns[THREAD.ip].op) {
|
|
|
|
|
case RVM_ACCEPT:
|
2013-02-20 02:25:42 -05:00
|
|
|
PUSH_SVM(SVM_ACCEPT, 0);
|
|
|
|
|
ret_trace = THREAD.trace;
|
|
|
|
|
goto run_trace;
|
2013-01-13 17:01:10 +01:00
|
|
|
case RVM_MATCH:
|
|
|
|
|
// Doesn't actually validate the "must be followed by MATCH
|
|
|
|
|
// or STEP. It should. Preproc perhaps?
|
|
|
|
|
hi = (arg >> 8) & 0xff;
|
|
|
|
|
lo = arg & 0xff;
|
|
|
|
|
THREAD.ip++;
|
|
|
|
|
if (ch < lo && ch > hi)
|
|
|
|
|
ipq_top--; // terminate thread
|
|
|
|
|
goto next_insn;
|
|
|
|
|
case RVM_GOTO:
|
|
|
|
|
THREAD.ip = arg;
|
|
|
|
|
goto next_insn;
|
|
|
|
|
case RVM_FORK:
|
|
|
|
|
THREAD.ip++;
|
2013-03-09 21:42:49 -08:00
|
|
|
if (!insn_seen[arg]) {
|
|
|
|
|
insn_seen[THREAD.ip] = 2;
|
2013-01-13 17:01:10 +01:00
|
|
|
HRVMTrace* tr = THREAD.trace;
|
|
|
|
|
ipq_top++;
|
|
|
|
|
THREAD.ip = arg;
|
|
|
|
|
THREAD.trace = tr;
|
|
|
|
|
}
|
|
|
|
|
goto next_insn;
|
|
|
|
|
case RVM_PUSH:
|
2013-03-09 21:42:49 -08:00
|
|
|
PUSH_SVM(SVM_PUSH, 0);
|
2013-01-13 17:01:10 +01:00
|
|
|
THREAD.ip++;
|
|
|
|
|
goto next_insn;
|
|
|
|
|
case RVM_ACTION:
|
|
|
|
|
PUSH_SVM(SVM_ACTION, arg);
|
|
|
|
|
THREAD.ip++;
|
|
|
|
|
goto next_insn;
|
|
|
|
|
case RVM_CAPTURE:
|
|
|
|
|
PUSH_SVM(SVM_CAPTURE, 0);
|
|
|
|
|
THREAD.ip++;
|
|
|
|
|
goto next_insn;
|
|
|
|
|
case RVM_EOF:
|
|
|
|
|
THREAD.ip++;
|
|
|
|
|
if (off != len)
|
|
|
|
|
ipq_top--; // Terminate thread
|
|
|
|
|
goto next_insn;
|
|
|
|
|
case RVM_STEP:
|
|
|
|
|
// save thread
|
|
|
|
|
live_threads++;
|
|
|
|
|
heads_n[THREAD.ip++] = THREAD.trace;
|
|
|
|
|
ipq_top--;
|
|
|
|
|
goto next_insn;
|
|
|
|
|
}
|
|
|
|
|
next_insn:
|
2013-03-09 21:42:49 -08:00
|
|
|
;
|
2013-01-13 17:01:10 +01:00
|
|
|
|
|
|
|
|
}
|
2013-02-20 02:25:42 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// No accept was reached.
|
|
|
|
|
match_fail:
|
|
|
|
|
h_delete_arena(arena);
|
|
|
|
|
return NULL;
|
2013-01-13 17:01:10 +01:00
|
|
|
|
2013-02-20 02:25:42 -05:00
|
|
|
run_trace:
|
|
|
|
|
// Invert the direction of the trace linked list.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ret_trace = invert_trace(ret_trace);
|
2013-03-09 21:42:49 -08:00
|
|
|
HParseResult *ret = run_trace(mm__, prog, ret_trace, input, len);
|
2013-02-20 02:25:42 -05:00
|
|
|
// ret is in its own arena
|
|
|
|
|
h_delete_arena(arena);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2013-03-09 21:42:49 -08:00
|
|
|
#undef PUSH_SVM
|
|
|
|
|
#undef THREAD
|
2013-02-20 02:25:42 -05:00
|
|
|
|
2013-03-09 21:42:49 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void svm_stack_ensure_cap(HAllocator *mm__, HSVMContext *ctx, size_t addl) {
|
|
|
|
|
if (ctx->stack_count + addl >= ctx->stack_capacity) {
|
|
|
|
|
ctx->stack = mm__->realloc(mm__, ctx->stack, sizeof(*ctx->stack) * (ctx->stack_capacity *= 2));
|
|
|
|
|
// TODO: check for realloc failure
|
|
|
|
|
}
|
2013-01-13 17:01:10 +01:00
|
|
|
}
|
2013-03-09 17:25:25 -08:00
|
|
|
|
2013-03-09 21:42:49 -08:00
|
|
|
HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, const uint8_t *input, int len) {
|
|
|
|
|
// orig_prog is only used for the action table
|
|
|
|
|
HSVMContext ctx;
|
|
|
|
|
HArena *arena = h_new_arena(mm__, 0);
|
|
|
|
|
ctx.stack_count = 0;
|
|
|
|
|
ctx.stack_capacity = 16;
|
|
|
|
|
ctx.stack = h_new(HParsedToken*, ctx.stack_capacity);
|
|
|
|
|
|
|
|
|
|
HParsedToken *tmp_res;
|
|
|
|
|
HRVMTrace *cur;
|
|
|
|
|
for (cur = trace; cur; cur = cur->next) {
|
|
|
|
|
switch (cur->opcode) {
|
|
|
|
|
case SVM_PUSH:
|
|
|
|
|
svm_stack_ensure_cap(mm__, &ctx, 1);
|
|
|
|
|
tmp_res = a_new(HParsedToken, 1);
|
|
|
|
|
tmp_res->token_type = TT_MARK;
|
|
|
|
|
tmp_res->index = cur->input_pos;
|
|
|
|
|
tmp_res->bit_offset = 0;
|
|
|
|
|
ctx.stack[ctx.stack_count++] = tmp_res;
|
|
|
|
|
break;
|
|
|
|
|
case SVM_NOP:
|
|
|
|
|
break;
|
|
|
|
|
case SVM_ACTION:
|
|
|
|
|
// Action should modify stack appropriately
|
|
|
|
|
if (!orig_prog->actions[cur->arg].fn(arena, &ctx, orig_prog->actions[cur->arg].env)) {
|
|
|
|
|
// action failed... abort somehow
|
|
|
|
|
// TODO: Actually abort
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case SVM_CAPTURE:
|
|
|
|
|
// Top of stack must be a mark
|
|
|
|
|
// This replaces said mark in-place with a TT_BYTES.
|
|
|
|
|
assert(ctx.stack[ctx.stack_count]->token_type == TT_MARK);
|
|
|
|
|
|
|
|
|
|
tmp_res = ctx.stack[ctx.stack_count];
|
|
|
|
|
tmp_res->token_type = TT_BYTES;
|
|
|
|
|
// TODO: Will need to copy if bit_offset is nonzero
|
|
|
|
|
assert(tmp_res->bit_offset == 0);
|
|
|
|
|
|
|
|
|
|
tmp_res->bytes.token = input + tmp_res->index;
|
|
|
|
|
tmp_res->bytes.len = cur->input_pos - tmp_res->index + 1; // inclusive
|
|
|
|
|
break;
|
|
|
|
|
case SVM_ACCEPT:
|
|
|
|
|
assert(ctx.stack_count == 1);
|
|
|
|
|
HParseResult *res = a_new(HParseResult, 1);
|
|
|
|
|
res->ast = ctx.stack[0];
|
|
|
|
|
res->bit_length = cur->input_pos * 8;
|
|
|
|
|
res->arena = arena;
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
h_delete_arena(arena);
|
|
|
|
|
return NULL;
|
2013-03-09 17:25:25 -08:00
|
|
|
}
|
2013-03-09 21:42:49 -08:00
|
|
|
|
|
|
|
|
// TODO: Implement the primitive actions
|