Got a lot of regex test cases working
This commit is contained in:
parent
f37a13ef41
commit
0600440b7c
11 changed files with 148 additions and 14 deletions
|
|
@ -97,6 +97,8 @@ static bool h_svm_action_bits(HArena *arena, HSVMContext *ctx, void* env) {
|
|||
uint64_t res = 0;
|
||||
for (size_t i = 0; i < top->bytes.len; i++)
|
||||
res = (res << 8) | top->bytes.token[i]; // TODO: Handle other endiannesses.
|
||||
uint64_t msb = (env_->signedp ? 1LL:0) << (top->bytes.len * 8 - 1);
|
||||
res = (res ^ msb) - msb;
|
||||
top->uint = res; // possibly cast to signed through union
|
||||
top->token_type = (env_->signedp ? TT_SINT : TT_UINT);
|
||||
return true;
|
||||
|
|
@ -105,7 +107,7 @@ static bool h_svm_action_bits(HArena *arena, HSVMContext *ctx, void* env) {
|
|||
static bool bits_ctrvm(HRVMProg *prog, void* env) {
|
||||
struct bits_env *env_ = (struct bits_env*)env;
|
||||
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||
for (size_t i=0; (i < env_->length)/8; ++i) { // FUTURE: when we can handle non-byte-aligned, the env_->length/8 part will be different
|
||||
for (size_t i=0; i < (env_->length/8); ++i) { // FUTURE: when we can handle non-byte-aligned, the env_->length/8 part will be different
|
||||
h_rvm_insert_insn(prog, RVM_MATCH, 0xFF00);
|
||||
h_rvm_insert_insn(prog, RVM_STEP, 0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include <assert.h>
|
||||
#include "parser_internal.h"
|
||||
|
||||
static HParseResult* parse_ch(void* env, HParseState *state) {
|
||||
|
|
@ -20,11 +21,26 @@ static HCFChoice* desugar_ch(HAllocator *mm__, void *env) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
static bool h_svm_action_ch(HArena *arena, HSVMContext *ctx, void* env) {
|
||||
// BUG: relies un undefined behaviour: int64_t is a signed uint64_t; not necessarily true on 32-bit
|
||||
HParsedToken *top = ctx->stack[ctx->stack_count-1];
|
||||
assert(top->token_type == TT_BYTES);
|
||||
uint64_t res = 0;
|
||||
for (size_t i = 0; i < top->bytes.len; i++)
|
||||
res = (res << 8) | top->bytes.token[i]; // TODO: Handle other endiannesses.
|
||||
top->uint = res; // possibly cast to signed through union
|
||||
top->token_type = TT_UINT;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ch_ctrvm(HRVMProg *prog, void* env) {
|
||||
uint8_t c = (uint8_t)(unsigned long)(env);
|
||||
// TODO: Does this capture anything?
|
||||
h_rvm_insert_insn(prog, RVM_MATCH, c & c << 8);
|
||||
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||
h_rvm_insert_insn(prog, RVM_MATCH, c | c << 8);
|
||||
h_rvm_insert_insn(prog, RVM_STEP, 0);
|
||||
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
|
||||
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ch, env));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "../internal.h"
|
||||
#include "parser_internal.h"
|
||||
|
|
@ -22,23 +23,42 @@ static HCFChoice* desugar_charset(HAllocator *mm__, void *env) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
static bool h_svm_action_ch(HArena *arena, HSVMContext *ctx, void* env) {
|
||||
// BUG: relies un undefined behaviour: int64_t is a signed uint64_t; not necessarily true on 32-bit
|
||||
HParsedToken *top = ctx->stack[ctx->stack_count-1];
|
||||
assert(top->token_type == TT_BYTES);
|
||||
uint64_t res = 0;
|
||||
for (size_t i = 0; i < top->bytes.len; i++)
|
||||
res = (res << 8) | top->bytes.token[i]; // TODO: Handle other endiannesses.
|
||||
top->uint = res; // possibly cast to signed through union
|
||||
top->token_type = TT_UINT;
|
||||
return true;
|
||||
}
|
||||
|
||||
// FUTURE: this is horribly inefficient
|
||||
static bool cs_ctrvm(HRVMProg *prog, void *env) {
|
||||
HCharset cs = (HCharset)env;
|
||||
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||
|
||||
uint16_t start = h_rvm_get_ip(prog);
|
||||
for (size_t i=0; i<256; ++i) {
|
||||
// TODO: merge ranges.
|
||||
if (charset_isset(cs, i)) {
|
||||
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
||||
h_rvm_insert_insn(prog, RVM_MATCH, i & i << 8);
|
||||
h_rvm_insert_insn(prog, RVM_MATCH, i | i << 8);
|
||||
h_rvm_insert_insn(prog, RVM_GOTO, 0);
|
||||
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
|
||||
}
|
||||
}
|
||||
h_rvm_insert_insn(prog, RVM_MATCH, 0x00FF);
|
||||
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
|
||||
for (size_t i=start; i<jump; ++i) {
|
||||
if (RVM_GOTO == prog->insns[i].op)
|
||||
h_rvm_patch_arg(prog, i, jump);
|
||||
}
|
||||
|
||||
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
|
||||
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ch, env));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ static bool token_ctrvm(HRVMProg *prog, void *env) {
|
|||
HToken *t = (HToken*)env;
|
||||
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||
for (int i=0; i<t->len; ++i) {
|
||||
h_rvm_insert_insn(prog, RVM_MATCH, t->str[i] & t->str[i] << 8);
|
||||
h_rvm_insert_insn(prog, RVM_MATCH, t->str[i] | t->str[i] << 8);
|
||||
h_rvm_insert_insn(prog, RVM_STEP, 0);
|
||||
}
|
||||
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ static bool ws_ctrvm(HRVMProg *prog, void *env) {
|
|||
h_rvm_insert_insn(prog, RVM_GOTO, start);
|
||||
h_rvm_patch_arg(prog, next, h_rvm_get_ip(prog));
|
||||
}
|
||||
return h_compile_regex(prog, p->env);
|
||||
return h_compile_regex(prog, p);
|
||||
}
|
||||
|
||||
static const HParserVtable whitespace_vt = {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue