Finshed up the regex backend

This commit is contained in:
Dan Hirsch 2013-04-26 20:36:54 -07:00
parent 204147a3d2
commit 13088c9d7a
39 changed files with 481 additions and 250 deletions

View file

@ -1,7 +1,16 @@
#include <assert.h>
#include <string.h>
#include "../internal.h"
#include "../parsers/parser_internal.h"
static uint32_t djbhash(const uint8_t *buf, size_t len) {
uint32_t hash = 5381;
while (len--) {
hash = hash * 33 + *buf++;
}
return hash;
}
// short-hand for constructing HCachedResult's
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
HCachedResult *ret = a_new(HCachedResult, 1);
@ -190,12 +199,39 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
}
}
int h_packrat_compile(HAllocator* mm__, const HParser* parser, const void* params) {
int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) {
parser->backend = PB_PACKRAT;
return 0; // No compilation necessary, and everything should work
// out of the box.
}
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) {
static uint32_t cache_key_hash(const void* key) {
return djbhash(key, sizeof(HParserCacheKey));
}
static bool cache_key_equal(const void* key1, const void* key2) {
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
}
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
HArena * arena = h_new_arena(mm__, 0);
HParseState *parse_state = a_new_(arena, HParseState, 1);
parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func
cache_key_hash); // hash_func
parse_state->input_stream = *input_stream;
parse_state->lr_stack = h_slist_new(arena);
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
cache_key_hash);
parse_state->arena = arena;
HParseResult *res = h_do_parse(parser, parse_state);
h_slist_free(parse_state->lr_stack);
h_hashtable_free(parse_state->recursion_heads);
// tear down the parse state
h_hashtable_free(parse_state->cache);
if (!res)
h_delete_arena(parse_state->arena);
return res;
return h_do_parse(parser, parse_state);
}

View file

@ -245,10 +245,6 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
return NULL;
}
bool h_compile_regex(HRVMProg *prog, const HParser *parser) {
return parser->vtable->compile_to_rvm(prog, parser->env);
}
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) {
for (uint16_t i = 0; i < prog->action_count; i++) {
if (prog->actions[i].action == action_func && prog->actions[i].env == env)
@ -293,4 +289,78 @@ void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val) {
prog->insns[ip].arg = new_val;
}
// TODO: Implement the primitive actions
size_t h_svm_count_to_mark(HSVMContext *ctx) {
size_t ctm;
for (ctm = 0; ctm < ctx->stack_count-1; ctm++) {
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK)
return ctm;
}
return ctx->stack_count;
}
// TODO: Implement the primitive actions
bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) {
size_t n_items = h_svm_count_to_mark(ctx);
assert (n_items < ctx->stack_count);
HParsedToken *res = ctx->stack[ctx->stack_count - 1 - n_items];
assert (res->token_type == TT_MARK);
res->token_type = TT_SEQUENCE;
HCountedArray *ret_carray = h_carray_new_sized(arena, n_items);
res->seq = ret_carray;
// res index and bit offset are the same as the mark.
for (size_t i = 0; i < n_items; i++) {
ret_carray->elements[i] = ctx->stack[ctx->stack_count - n_items + i];
}
ctx->stack_count -= n_items;
return true;
}
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
while (ctx->stack_count > 0) {
if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK)
return true;
}
return false; // no mark found.
}
// Glue regex backend to rest of system
bool h_compile_regex(HRVMProg *prog, const HParser *parser) {
return parser->vtable->compile_to_rvm(prog, parser->env);
}
static void h_regex_free(HParser *parser) {
HRVMProg *prog = (HRVMProg*)parser->backend_data;
HAllocator *mm__ = prog->allocator;
h_free(prog->insns);
h_free(prog->actions);
h_free(prog);
parser->backend_data = NULL;
parser->backend = PB_PACKRAT;
}
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
if (!parser->vtable->isValidRegular(parser->env))
return 1;
HRVMProg *prog = h_new(HRVMProg, 1);
prog->allocator = mm__;
if (!h_compile_regex(prog, parser)) {
h_free(prog->insns);
h_free(prog->actions);
h_free(prog);
return 2;
}
parser->backend_data = prog;
return 0;
}
static HParseResult *h_regex_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
return h_rvm_run__m(mm__, (HRVMProg*)parser->backend_data, input_stream->input, input_stream->length);
}
HParserBackendVTable h__regex_backend_vtable = {
.compile = h_regex_compile,
.parse = h_regex_parse,
.free = h_regex_free
};

View file

@ -10,9 +10,12 @@ typedef enum HRVMOp_ {
RVM_ACCEPT, // [a]
RVM_GOTO, // [c] parameter is an offset into the instruction table
RVM_FORK, // [c] parameter is an offset into the instruction table
RVM_PUSH, // [a] No arguments, just pushes a mark onto the stack
RVM_PUSH, // [a] No arguments, just pushes a mark (pointer to some
// character in the input string) onto the stack
RVM_ACTION, // [a] argument is an action ID
RVM_CAPTURE, // [a] Capture the last string, and push it on the stack. No arg.
RVM_CAPTURE, // [a] Capture the last string (up to the current
// position, non-inclusive), and push it on the
// stack. No arg.
RVM_EOF, // [m] Succeeds only if at EOF.
RVM_MATCH, // [m] The high byte of the parameter is an upper bound
// and the low byte is a lower bound, both
@ -31,7 +34,7 @@ typedef struct HRVMInsn_{
typedef struct HSVMContext_ {
HParsedToken **stack;
size_t stack_count;
size_t stack_count; // number of items on the stack. Thus stack[stack_count] is the first unused item on the stack.
size_t stack_capacity;
} HSVMContext;
@ -43,13 +46,13 @@ typedef struct HSVMAction_ {
void* env;
} HSVMAction;
typedef struct HRVMProg_ {
struct HRVMProg_ {
HAllocator *allocator;
size_t length;
size_t action_count;
HRVMInsn *insns;
HSVMAction *actions;
} HRVMProg;
};
// Returns true IFF the provided parser could be compiled.
bool h_compile_regex(HRVMProg *prog, const HParser* parser);
@ -68,4 +71,10 @@ uint16_t h_rvm_get_ip(HRVMProg *prog);
// correct target is known.
void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val);
// Common SVM action funcs...
bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env);
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env);
extern HParserBackendVTable h__regex_backend_vtable;
#endif