From 92f2eecf6c8ef971edba87753154ec38f612eace Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Mon, 27 May 2013 08:31:15 +0200 Subject: [PATCH 1/4] Remove unneeded desugaring code. --- src/desugar.c | 2 +- src/parsers/unimplemented.c | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/desugar.c b/src/desugar.c index ce87ca3..a613644 100644 --- a/src/desugar.c +++ b/src/desugar.c @@ -8,7 +8,7 @@ HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser) { if (nstk__ == NULL) { nstk__ = h_cfstack_new(mm__); } - // we're going to do something naughty and cast away the const to memoize + assert(parser->vtable->desugar != NULL); parser->vtable->desugar(mm__, nstk__, parser->env); ((HParser *)parser)->desugared = nstk__->last_completed; if (stk__ == NULL) diff --git a/src/parsers/unimplemented.c b/src/parsers/unimplemented.c index 18255ac..7c3c667 100644 --- a/src/parsers/unimplemented.c +++ b/src/parsers/unimplemented.c @@ -12,16 +12,10 @@ static HParseResult* parse_unimplemented(void* env, HParseState *state) { return &result; } -static HCFChoice* desugar_unimplemented(HAllocator *mm__, HCFStack *stk__, void *env) { - assert_message(0, "'h_unimplemented' is not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable unimplemented_vt = { .parse = parse_unimplemented, .isValidRegular = h_false, .isValidCF = h_false, - .desugar = desugar_unimplemented, .compile_to_rvm = h_not_regular, }; From ce74cf79396d3afdf726dd603d3c678373750e29 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Mon, 24 Jun 2013 21:23:28 +0200 Subject: [PATCH 2/4] Refactored regex backend to use a sparse thread list --- src/backends/regex.c | 30 ++++++++-------------- src/datastructures.c | 16 ++++++++++++ src/internal.h | 60 ++++++++++++++++++++++++++++++++++++++++++++ src/parsers/many.c | 5 ++-- 4 files changed, 90 insertions(+), 21 deletions(-) diff --git a/src/backends/regex.c b/src/backends/regex.c index 6f069be..a3c073c 100644 --- a/src/backends/regex.c +++ b/src/backends/regex.c @@ -50,8 +50,8 @@ HRVMTrace *invert_trace(HRVMTrace *trace) { void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_t len) { HArena *arena = h_new_arena(mm__, 0); - HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length), - **heads_n = a_new(HRVMTrace*, prog->length); + HSArray *heads_n = h_sarray_new(mm__, prog->length), // Both of these contain HRVMTrace*'s + *heads_p = h_sarray_new(mm__, prog->length); HRVMTrace *ret_trace = NULL; @@ -59,10 +59,6 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ HRVMThread *ip_queue = a_new(HRVMThread, prog->length); size_t ipq_top; - - - - #define THREAD ip_queue[ipq_top-1] #define PUSH_SVM(op_, arg_) do { \ HRVMTrace *nt = a_new(HRVMTrace, 1); \ @@ -72,34 +68,30 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ nt->input_pos = off; \ THREAD.trace = nt; \ } while(0) - - heads_n[0] = a_new(HRVMTrace, 1); // zeroing - heads_n[0]->opcode = SVM_NOP; + ((HRVMTrace*)h_sarray_set(heads_n, 0, a_new(HRVMTrace, 1)))->opcode = SVM_NOP; // Initial thread + size_t off = 0; - int live_threads = 1; + int live_threads = 1; // May be redundant for (off = 0; off <= len; off++) { uint8_t ch = ((off == len) ? 0 : input[off]); - size_t ip_s; // BUG: there was an unused variable ip. Not sure if - // I intended to use it somewhere. /* scope */ { - HRVMTrace **heads_t; + HSArray *heads_t; heads_t = heads_n; heads_n = heads_p; heads_p = heads_t; - memset(heads_n, 0, prog->length * sizeof(*heads_n)); + h_sarray_clear(heads_n); } memset(insn_seen, 0, prog->length); // no insns seen yet if (!live_threads) goto match_fail; live_threads = 0; - for (ip_s = 0; ip_s < prog->length; ip_s++) { + HRVMTrace *tr_head; + H_SARRAY_FOREACH_KV(tr_head,ip_s,heads_p) { ipq_top = 1; // TODO: Write this as a threaded VM - if (!heads_p[ip_s]) - continue; THREAD.ip = ip_s; - THREAD.trace = heads_p[ip_s]; + THREAD.trace = tr_head; uint8_t hi, lo; uint16_t arg; while(ipq_top > 0) { @@ -155,7 +147,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ case RVM_STEP: // save thread live_threads++; - heads_n[++THREAD.ip] = THREAD.trace; + h_sarray_set(heads_n, ++THREAD.ip, THREAD.trace); ipq_top--; goto next_insn; } diff --git a/src/datastructures.c b/src/datastructures.c index 99b4ca5..1ddd620 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -284,3 +284,19 @@ HHashValue h_hash_ptr(const void *p) { // XXX just djbhash it return (uintptr_t)p >> 4; } + +HSArray *h_sarray_new(HAllocator *mm__, size_t size) { + HSArray *ret = h_new(HSArray, 1); + ret->capacity = size; + ret->used = 0; + ret->nodes = h_new(HSArrayNode, size); // Does not actually need to be initialized. + ret->mm__ = mm__; + // TODO: Add the valgrind hooks to mark this initialized. + return ret; +} + +void h_sarray_free(HSArray *arr) { + HAllocator *mm__ = arr->mm__; + h_free(arr->nodes); + h_free(arr); +} diff --git a/src/internal.h b/src/internal.h index d0fb53a..b7fe621 100644 --- a/src/internal.h +++ b/src/internal.h @@ -17,6 +17,7 @@ #ifndef HAMMER_INTERNAL__H #define HAMMER_INTERNAL__H +#include #include #include #include "hammer.h" @@ -72,6 +73,65 @@ typedef struct HSlist_ { struct HArena_ *arena; } HSlist; +// {{{ HSArray + +typedef struct HSArrayNode_ { + size_t elem; + size_t index; + void* content; +} HSArrayNode; + +typedef struct HSArray_ { + // Sparse array + // Element n is valid iff arr->nodes[n].index < arr.used && arr.nodes[arr.nodes[n].index].elem == n + HSArrayNode *nodes; // content for node at index n is stored at position n. + size_t capacity; + size_t used; + HAllocator *mm__; +} HSArray; + +HSArray *h_sarray_new(HAllocator *mm__, size_t size); +void h_sarray_free(HSArray *arr); +static inline bool h_sarray_isset(HSArray *arr, size_t n) { + assert(n < arr->capacity); + return (arr->nodes[n].index < arr->used && arr->nodes[arr->nodes[n].index].elem == n); +} +static inline void* h_sarray_get(HSArray *arr, size_t n) { + assert(n < arr->capacity); + if (h_sarray_isset(arr, n)) + return arr->nodes[n].content; + return NULL; +} + +static inline void* h_sarray_set(HSArray *arr, size_t n, void* val) { + assert(n < arr->capacity); + arr->nodes[n].content = val; + if (h_sarray_isset(arr, n)) + return val; + arr->nodes[arr->used].elem = n; + arr->nodes[n].index = arr->used++; + return val; +} + +static inline void h_sarray_clear(HSArray *arr) { + arr->used = 0; +} + +#define H__APPEND2(a,b) a##b +#define H__APPEND(a,b) H__APPEND2(a,b) +#define H__INTVAR(pfx) H__APPEND(intvar__##pfx##__,__COUNTER__) + +#define H_SARRAY_FOREACH_KV_(var,idx,arr,intvar) \ + for (size_t intvar = 0, idx = (var = (arr)->nodes[(arr)->nodes[intvar].elem].content,(arr)->nodes[intvar].elem); \ + intvar < (arr)->used; \ + idx = (arr)->nodes[intvar].elem, var = (arr)->nodes[(arr)->nodes[intvar].elem].content, intvar=intvar+1) + +#define H_SARRAY_FOREACH_KV(var,index,arr) H_SARRAY_FOREACH_KV_(var,index,arr,H__INTVAR(idx)) +#define H_SARRAY_FOREACH_V(var,arr) H_SARRAY_FOREACH_KV_(var,H__INTVAR(elem),arr,H__INTVAR(idx)) +#define H_SARRAY_FOREACH_K(index,arr) H_SARRAY_FOREACH_KV_(H__INTVAR(val),index,arr,H__INTVAR(idx)) + +// }}} + typedef unsigned int *HCharset; static inline HCharset new_charset(HAllocator* mm__) { diff --git a/src/parsers/many.c b/src/parsers/many.c index a095940..1e3b022 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -130,7 +130,7 @@ static bool many_ctrvm(HRVMProg *prog, void *env) { if (repeat->min_p) { h_rvm_insert_insn(prog, RVM_PUSH, 0); assert(repeat->count < 2); // TODO: The other cases should be supported later. - uint16_t end_fork; + uint16_t end_fork = 0xFFFF; // Shut up GCC if (repeat->count == 0) end_fork = h_rvm_insert_insn(prog, RVM_FORK, 0xFFFF); uint16_t goto_mid = h_rvm_insert_insn(prog, RVM_GOTO, 0xFFFF); @@ -145,7 +145,8 @@ static bool many_ctrvm(HRVMProg *prog, void *env) { if (!h_compile_regex(prog, repeat->p)) return false; h_rvm_insert_insn(prog, RVM_FORK, nxt); - h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog)); + if (repeat->count == 0) + h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog)); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL)); return true; From e9a7c0b83d18bb8c03b7a5255783be6dddc30d05 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Mon, 24 Jun 2013 21:26:07 +0200 Subject: [PATCH 3/4] Added new SConscript-based build system; not yet done porting --- SConstruct | 30 +++++++++++++++++++++++ examples/SConscript | 9 +++++++ src/SConscript | 59 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 SConstruct create mode 100644 examples/SConscript create mode 100644 src/SConscript diff --git a/SConstruct b/SConstruct new file mode 100644 index 0000000..10bcdec --- /dev/null +++ b/SConstruct @@ -0,0 +1,30 @@ + +env = Environment() + +env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes") +env['MODE'] = 'shared' + +AddOption("--variant", + dest="variant", + nargs=1, type="choice", + choices=["debug", "opt"], + default="debug", + action="store", + help="Build variant (debug or opt)") + +env['BUILDDIR'] = 'build/$VARIANT' + +dbg = env.Clone(VARIANT='debug') +dbg.Append(CCFLAGS=['-g']) + +opt = env.Clone(VARIANT='opt') +opt.Append(CCFLAGS="-O3") + +if GetOption("variant") == 'debug': + env = dbg +else: + env = opt +Export('env') + +env.SConscript(["src/SConscript"], variant_dir='build/$VARIANT/src') +env.SConscript(["examples/SConscript"], variant_dir='build/$VARIANT/examples') diff --git a/examples/SConscript b/examples/SConscript new file mode 100644 index 0000000..94f32ac --- /dev/null +++ b/examples/SConscript @@ -0,0 +1,9 @@ +Import('env') + +example = env.Clone() +example.Append(LIBS="hammer", LIBPATH="../src") + +example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c']) +example.Program('base64', 'base64.c') +example.Program('base64_sem1', 'base64_sem1.c') +example.Program('base64_sem2', 'base64_sem2.c') diff --git a/src/SConscript b/src/SConscript new file mode 100644 index 0000000..70868a4 --- /dev/null +++ b/src/SConscript @@ -0,0 +1,59 @@ +Import('env') + +parsers = ['parsers/%s.c'%s for s in + ['action', + 'and', + 'attr_bool', + 'bits', + 'butnot', + 'ch', + 'charset', + 'choice', + 'difference', + 'end', + 'epsilon', + 'ignore', + 'ignoreseq', + 'indirect', + 'int_range', + 'many', + 'not', + 'nothing', + 'optional', + 'sequence', + 'token', + 'unimplemented', + 'whitespace', + 'xor']] + +backends = ['backends/%s.c' % s for s in + ['packrat', 'llk', 'regex']] + +misc_hammer_parts = [ + 'allocator.c', + 'benchmark.c', + 'bitreader.c', + 'bitwriter.c', + 'cfgrammar.c', + 'datastructures.c', + 'desugar.c', + 'glue.c', + 'hammer.c', + 'pprint.c', + 'system_allocator.c'] + +tests = ['t_benchmark.c', + 't_bitreader.c', + 't_bitwriter.c', + 't_parser.c', + 't_grammar.c', + 't_misc.c'] + +libhammer = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts) +libhammer = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts) + +testenv = env.Clone() +testenv.ParseConfig('pkg-config --cflags --libs glib-2.0') +testenv.Append(LIBS=['hammer'], LIBPATH=['.']) +testenv.Program('test_suite', tests + ['test_suite.c']) + From aed1de5ce5df812311a9df5d1b9d84cbf1dfa3eb Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Mon, 24 Jun 2013 21:46:23 +0200 Subject: [PATCH 4/4] Applied a bugfix from my local branch --- src/parsers/many.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/parsers/many.c b/src/parsers/many.c index a095940..1e3b022 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -130,7 +130,7 @@ static bool many_ctrvm(HRVMProg *prog, void *env) { if (repeat->min_p) { h_rvm_insert_insn(prog, RVM_PUSH, 0); assert(repeat->count < 2); // TODO: The other cases should be supported later. - uint16_t end_fork; + uint16_t end_fork = 0xFFFF; // Shut up GCC if (repeat->count == 0) end_fork = h_rvm_insert_insn(prog, RVM_FORK, 0xFFFF); uint16_t goto_mid = h_rvm_insert_insn(prog, RVM_GOTO, 0xFFFF); @@ -145,7 +145,8 @@ static bool many_ctrvm(HRVMProg *prog, void *env) { if (!h_compile_regex(prog, repeat->p)) return false; h_rvm_insert_insn(prog, RVM_FORK, nxt); - h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog)); + if (repeat->count == 0) + h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog)); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL)); return true;