Got a lot of regex test cases working

This commit is contained in:
Dan Hirsch 2013-05-23 23:26:22 +02:00
parent f37a13ef41
commit 0600440b7c
11 changed files with 148 additions and 14 deletions

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include <string.h>
#include "../internal.h"
#include "parser_internal.h"
@ -22,23 +23,42 @@ static HCFChoice* desugar_charset(HAllocator *mm__, void *env) {
return ret;
}
static bool h_svm_action_ch(HArena *arena, HSVMContext *ctx, void* env) {
// BUG: relies un undefined behaviour: int64_t is a signed uint64_t; not necessarily true on 32-bit
HParsedToken *top = ctx->stack[ctx->stack_count-1];
assert(top->token_type == TT_BYTES);
uint64_t res = 0;
for (size_t i = 0; i < top->bytes.len; i++)
res = (res << 8) | top->bytes.token[i]; // TODO: Handle other endiannesses.
top->uint = res; // possibly cast to signed through union
top->token_type = TT_UINT;
return true;
}
// FUTURE: this is horribly inefficient
static bool cs_ctrvm(HRVMProg *prog, void *env) {
HCharset cs = (HCharset)env;
h_rvm_insert_insn(prog, RVM_PUSH, 0);
uint16_t start = h_rvm_get_ip(prog);
for (size_t i=0; i<256; ++i) {
// TODO: merge ranges.
if (charset_isset(cs, i)) {
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
h_rvm_insert_insn(prog, RVM_MATCH, i & i << 8);
h_rvm_insert_insn(prog, RVM_MATCH, i | i << 8);
h_rvm_insert_insn(prog, RVM_GOTO, 0);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
}
}
h_rvm_insert_insn(prog, RVM_MATCH, 0x00FF);
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
for (size_t i=start; i<jump; ++i) {
if (RVM_GOTO == prog->insns[i].op)
h_rvm_patch_arg(prog, i, jump);
}
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ch, env));
return true;
}