Finshed up the regex backend
This commit is contained in:
parent
204147a3d2
commit
13088c9d7a
39 changed files with 481 additions and 250 deletions
|
|
@ -1,7 +1,16 @@
|
|||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "../internal.h"
|
||||
#include "../parsers/parser_internal.h"
|
||||
|
||||
static uint32_t djbhash(const uint8_t *buf, size_t len) {
|
||||
uint32_t hash = 5381;
|
||||
while (len--) {
|
||||
hash = hash * 33 + *buf++;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
// short-hand for constructing HCachedResult's
|
||||
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
|
||||
HCachedResult *ret = a_new(HCachedResult, 1);
|
||||
|
|
@ -190,12 +199,39 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
|
|||
}
|
||||
}
|
||||
|
||||
int h_packrat_compile(HAllocator* mm__, const HParser* parser, const void* params) {
|
||||
int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) {
|
||||
parser->backend = PB_PACKRAT;
|
||||
return 0; // No compilation necessary, and everything should work
|
||||
// out of the box.
|
||||
}
|
||||
|
||||
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) {
|
||||
static uint32_t cache_key_hash(const void* key) {
|
||||
return djbhash(key, sizeof(HParserCacheKey));
|
||||
}
|
||||
static bool cache_key_equal(const void* key1, const void* key2) {
|
||||
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
|
||||
}
|
||||
|
||||
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
|
||||
HArena * arena = h_new_arena(mm__, 0);
|
||||
HParseState *parse_state = a_new_(arena, HParseState, 1);
|
||||
parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func
|
||||
cache_key_hash); // hash_func
|
||||
parse_state->input_stream = *input_stream;
|
||||
parse_state->lr_stack = h_slist_new(arena);
|
||||
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
|
||||
cache_key_hash);
|
||||
parse_state->arena = arena;
|
||||
HParseResult *res = h_do_parse(parser, parse_state);
|
||||
h_slist_free(parse_state->lr_stack);
|
||||
h_hashtable_free(parse_state->recursion_heads);
|
||||
// tear down the parse state
|
||||
h_hashtable_free(parse_state->cache);
|
||||
if (!res)
|
||||
h_delete_arena(parse_state->arena);
|
||||
|
||||
return res;
|
||||
|
||||
return h_do_parse(parser, parse_state);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -245,10 +245,6 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
bool h_compile_regex(HRVMProg *prog, const HParser *parser) {
|
||||
return parser->vtable->compile_to_rvm(prog, parser->env);
|
||||
}
|
||||
|
||||
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) {
|
||||
for (uint16_t i = 0; i < prog->action_count; i++) {
|
||||
if (prog->actions[i].action == action_func && prog->actions[i].env == env)
|
||||
|
|
@ -293,4 +289,78 @@ void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val) {
|
|||
prog->insns[ip].arg = new_val;
|
||||
}
|
||||
|
||||
// TODO: Implement the primitive actions
|
||||
size_t h_svm_count_to_mark(HSVMContext *ctx) {
|
||||
size_t ctm;
|
||||
for (ctm = 0; ctm < ctx->stack_count-1; ctm++) {
|
||||
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK)
|
||||
return ctm;
|
||||
}
|
||||
return ctx->stack_count;
|
||||
}
|
||||
|
||||
// TODO: Implement the primitive actions
|
||||
bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) {
|
||||
size_t n_items = h_svm_count_to_mark(ctx);
|
||||
assert (n_items < ctx->stack_count);
|
||||
HParsedToken *res = ctx->stack[ctx->stack_count - 1 - n_items];
|
||||
assert (res->token_type == TT_MARK);
|
||||
res->token_type = TT_SEQUENCE;
|
||||
|
||||
HCountedArray *ret_carray = h_carray_new_sized(arena, n_items);
|
||||
res->seq = ret_carray;
|
||||
// res index and bit offset are the same as the mark.
|
||||
for (size_t i = 0; i < n_items; i++) {
|
||||
ret_carray->elements[i] = ctx->stack[ctx->stack_count - n_items + i];
|
||||
}
|
||||
ctx->stack_count -= n_items;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
|
||||
while (ctx->stack_count > 0) {
|
||||
if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK)
|
||||
return true;
|
||||
}
|
||||
return false; // no mark found.
|
||||
}
|
||||
|
||||
// Glue regex backend to rest of system
|
||||
|
||||
bool h_compile_regex(HRVMProg *prog, const HParser *parser) {
|
||||
return parser->vtable->compile_to_rvm(prog, parser->env);
|
||||
}
|
||||
|
||||
static void h_regex_free(HParser *parser) {
|
||||
HRVMProg *prog = (HRVMProg*)parser->backend_data;
|
||||
HAllocator *mm__ = prog->allocator;
|
||||
h_free(prog->insns);
|
||||
h_free(prog->actions);
|
||||
h_free(prog);
|
||||
parser->backend_data = NULL;
|
||||
parser->backend = PB_PACKRAT;
|
||||
}
|
||||
|
||||
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
|
||||
if (!parser->vtable->isValidRegular(parser->env))
|
||||
return 1;
|
||||
HRVMProg *prog = h_new(HRVMProg, 1);
|
||||
prog->allocator = mm__;
|
||||
if (!h_compile_regex(prog, parser)) {
|
||||
h_free(prog->insns);
|
||||
h_free(prog->actions);
|
||||
h_free(prog);
|
||||
return 2;
|
||||
}
|
||||
parser->backend_data = prog;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static HParseResult *h_regex_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
|
||||
return h_rvm_run__m(mm__, (HRVMProg*)parser->backend_data, input_stream->input, input_stream->length);
|
||||
}
|
||||
|
||||
HParserBackendVTable h__regex_backend_vtable = {
|
||||
.compile = h_regex_compile,
|
||||
.parse = h_regex_parse,
|
||||
.free = h_regex_free
|
||||
};
|
||||
|
|
|
|||
|
|
@ -10,9 +10,12 @@ typedef enum HRVMOp_ {
|
|||
RVM_ACCEPT, // [a]
|
||||
RVM_GOTO, // [c] parameter is an offset into the instruction table
|
||||
RVM_FORK, // [c] parameter is an offset into the instruction table
|
||||
RVM_PUSH, // [a] No arguments, just pushes a mark onto the stack
|
||||
RVM_PUSH, // [a] No arguments, just pushes a mark (pointer to some
|
||||
// character in the input string) onto the stack
|
||||
RVM_ACTION, // [a] argument is an action ID
|
||||
RVM_CAPTURE, // [a] Capture the last string, and push it on the stack. No arg.
|
||||
RVM_CAPTURE, // [a] Capture the last string (up to the current
|
||||
// position, non-inclusive), and push it on the
|
||||
// stack. No arg.
|
||||
RVM_EOF, // [m] Succeeds only if at EOF.
|
||||
RVM_MATCH, // [m] The high byte of the parameter is an upper bound
|
||||
// and the low byte is a lower bound, both
|
||||
|
|
@ -31,7 +34,7 @@ typedef struct HRVMInsn_{
|
|||
|
||||
typedef struct HSVMContext_ {
|
||||
HParsedToken **stack;
|
||||
size_t stack_count;
|
||||
size_t stack_count; // number of items on the stack. Thus stack[stack_count] is the first unused item on the stack.
|
||||
size_t stack_capacity;
|
||||
} HSVMContext;
|
||||
|
||||
|
|
@ -43,13 +46,13 @@ typedef struct HSVMAction_ {
|
|||
void* env;
|
||||
} HSVMAction;
|
||||
|
||||
typedef struct HRVMProg_ {
|
||||
struct HRVMProg_ {
|
||||
HAllocator *allocator;
|
||||
size_t length;
|
||||
size_t action_count;
|
||||
HRVMInsn *insns;
|
||||
HSVMAction *actions;
|
||||
} HRVMProg;
|
||||
};
|
||||
|
||||
// Returns true IFF the provided parser could be compiled.
|
||||
bool h_compile_regex(HRVMProg *prog, const HParser* parser);
|
||||
|
|
@ -68,4 +71,10 @@ uint16_t h_rvm_get_ip(HRVMProg *prog);
|
|||
// correct target is known.
|
||||
void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val);
|
||||
|
||||
// Common SVM action funcs...
|
||||
bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env);
|
||||
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env);
|
||||
|
||||
extern HParserBackendVTable h__regex_backend_vtable;
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue