diff --git a/Makefile b/Makefile index 6c8f386..09aa037 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,8 @@ SUBDIRS = src examples jni include config.mk +TOPLEVEL=. +include common.mk CONFIG_VARS= INCLUDE_TESTS diff --git a/README.md b/README.md index 492950d..c1c1293 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ Features Installing ========== ### Prerequisites -* make +* SCons * a JDK ### Optional Dependencies @@ -36,11 +36,15 @@ Installing * glib-2.0 (>= 2.29) (for `make test`) * glib-2.0-dev (for `make test`) -To install, type `make`. To run the built-in test suite, type `make test`. +To build, type `scons`. To run the built-in test suite, type `scons test`. For a debug build, add `--variant=debug` -If jni.h and jni_md.h aren't already somewhere on your include path, prepend `C_INCLUDE_PATH=/path/to/jdk/include` to that. +If jni.h and jni_md.h aren't already somewhere on your include path, prepend +`C_INCLUDE_PATH=/path/to/jdk/include` to that. -There is not currently a `make install` target; to make Hammer available system-wide, copy `libhammer.a` to `/usr/lib/` (or `/usr/local/lib/`, or wherever ld will find it) and `hammer.h` to `/usr/include/`. +There is currently no `install` target; to make Hammer available system-wide, +copy `libhammer.a` and `libhammer.so` from `build/opt/src` to `/usr/lib/` (or +`/usr/local/lib/`, or wherever ld will find it) and `hammer.h` to +`/usr/include/`. Usage ===== diff --git a/SConstruct b/SConstruct new file mode 100644 index 0000000..c652b80 --- /dev/null +++ b/SConstruct @@ -0,0 +1,32 @@ + +env = Environment() + +env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes") +env['MODE'] = 'shared' + +AddOption("--variant", + dest="variant", + nargs=1, type="choice", + choices=["debug", "opt"], + default="opt", + action="store", + help="Build variant (debug or opt)") + +env['BUILDDIR'] = 'build/$VARIANT' + +dbg = env.Clone(VARIANT='debug') +dbg.Append(CCFLAGS=['-g']) + +opt = env.Clone(VARIANT='opt') +opt.Append(CCFLAGS="-O3") + +if GetOption("variant") == 'debug': + env = dbg +else: + env = opt +Export('env') + +env.SConscript(["src/SConscript"], variant_dir='build/$VARIANT/src') +env.SConscript(["examples/SConscript"], variant_dir='build/$VARIANT/examples') + +env.Command('test', 'build/$VARIANT/src/test_suite', 'env LD_LIBRARY_PATH=build/$VARIANT/src $SOURCE') \ No newline at end of file diff --git a/common.mk b/common.mk index 2673495..e98d3a2 100644 --- a/common.mk +++ b/common.mk @@ -1,3 +1,7 @@ +ifneq ($(REALLY_USE_OBSOLETE_BUILD_SYSTEM),yes) +$(error This is the old build system. Use "scons" to build, or use $(MAKE) REALLY_USE_OBSOLETE_BUILD_SYSTEM=yes) +endif + # Check to make sure variables are properly set ifeq ($(TOPLEVEL),) $(error $$TOPLEVEL is unset) diff --git a/examples/SConscript b/examples/SConscript new file mode 100644 index 0000000..94f32ac --- /dev/null +++ b/examples/SConscript @@ -0,0 +1,9 @@ +Import('env') + +example = env.Clone() +example.Append(LIBS="hammer", LIBPATH="../src") + +example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c']) +example.Program('base64', 'base64.c') +example.Program('base64_sem1', 'base64_sem1.c') +example.Program('base64_sem2', 'base64_sem2.c') diff --git a/examples/rr.c b/examples/rr.c index 2ba8534..dd25063 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -181,7 +181,7 @@ HParser* init_rdata(uint16_t type) { parsers[16] = txt; // All parsers must consume their input exactly. - for(uint16_t i; ilength), - **heads_n = a_new(HRVMTrace*, prog->length); + HSArray *heads_n = h_sarray_new(mm__, prog->length), // Both of these contain HRVMTrace*'s + *heads_p = h_sarray_new(mm__, prog->length); HRVMTrace *ret_trace = NULL; @@ -59,10 +59,6 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ HRVMThread *ip_queue = a_new(HRVMThread, prog->length); size_t ipq_top; - - - - #define THREAD ip_queue[ipq_top-1] #define PUSH_SVM(op_, arg_) do { \ HRVMTrace *nt = a_new(HRVMTrace, 1); \ @@ -72,34 +68,30 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ nt->input_pos = off; \ THREAD.trace = nt; \ } while(0) - - heads_n[0] = a_new(HRVMTrace, 1); // zeroing - heads_n[0]->opcode = SVM_NOP; + ((HRVMTrace*)h_sarray_set(heads_n, 0, a_new(HRVMTrace, 1)))->opcode = SVM_NOP; // Initial thread + size_t off = 0; - int live_threads = 1; + int live_threads = 1; // May be redundant for (off = 0; off <= len; off++) { uint8_t ch = ((off == len) ? 0 : input[off]); - size_t ip_s; // BUG: there was an unused variable ip. Not sure if - // I intended to use it somewhere. /* scope */ { - HRVMTrace **heads_t; + HSArray *heads_t; heads_t = heads_n; heads_n = heads_p; heads_p = heads_t; - memset(heads_n, 0, prog->length * sizeof(*heads_n)); + h_sarray_clear(heads_n); } memset(insn_seen, 0, prog->length); // no insns seen yet if (!live_threads) goto match_fail; live_threads = 0; - for (ip_s = 0; ip_s < prog->length; ip_s++) { + HRVMTrace *tr_head; + H_SARRAY_FOREACH_KV(tr_head,ip_s,heads_p) { ipq_top = 1; // TODO: Write this as a threaded VM - if (!heads_p[ip_s]) - continue; THREAD.ip = ip_s; - THREAD.trace = heads_p[ip_s]; + THREAD.trace = tr_head; uint8_t hi, lo; uint16_t arg; while(ipq_top > 0) { @@ -155,7 +147,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ case RVM_STEP: // save thread live_threads++; - heads_n[++THREAD.ip] = THREAD.trace; + h_sarray_set(heads_n, ++THREAD.ip, THREAD.trace); ipq_top--; goto next_insn; } diff --git a/src/datastructures.c b/src/datastructures.c index 94bc901..45a7eba 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -355,3 +355,19 @@ uint32_t h_djbhash(const uint8_t *buf, size_t len) { } return hash; } + +HSArray *h_sarray_new(HAllocator *mm__, size_t size) { + HSArray *ret = h_new(HSArray, 1); + ret->capacity = size; + ret->used = 0; + ret->nodes = h_new(HSArrayNode, size); // Does not actually need to be initialized. + ret->mm__ = mm__; + // TODO: Add the valgrind hooks to mark this initialized. + return ret; +} + +void h_sarray_free(HSArray *arr) { + HAllocator *mm__ = arr->mm__; + h_free(arr->nodes); + h_free(arr); +} diff --git a/src/desugar.c b/src/desugar.c index 46176ea..5ef8f9b 100644 --- a/src/desugar.c +++ b/src/desugar.c @@ -11,6 +11,7 @@ HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser) { if(nstk__->prealloc == NULL) nstk__->prealloc = h_new(HCFChoice, 1); // we're going to do something naughty and cast away the const to memoize + assert(parser->vtable->desugar != NULL); ((HParser *)parser)->desugared = nstk__->prealloc; parser->vtable->desugar(mm__, nstk__, parser->env); if (stk__ == NULL) diff --git a/src/internal.h b/src/internal.h index a897e9f..02ee748 100644 --- a/src/internal.h +++ b/src/internal.h @@ -17,6 +17,7 @@ #ifndef HAMMER_INTERNAL__H #define HAMMER_INTERNAL__H +#include #include #include #include "hammer.h" @@ -72,6 +73,65 @@ typedef struct HSlist_ { struct HArena_ *arena; } HSlist; +// {{{ HSArray + +typedef struct HSArrayNode_ { + size_t elem; + size_t index; + void* content; +} HSArrayNode; + +typedef struct HSArray_ { + // Sparse array + // Element n is valid iff arr->nodes[n].index < arr.used && arr.nodes[arr.nodes[n].index].elem == n + HSArrayNode *nodes; // content for node at index n is stored at position n. + size_t capacity; + size_t used; + HAllocator *mm__; +} HSArray; + +HSArray *h_sarray_new(HAllocator *mm__, size_t size); +void h_sarray_free(HSArray *arr); +static inline bool h_sarray_isset(HSArray *arr, size_t n) { + assert(n < arr->capacity); + return (arr->nodes[n].index < arr->used && arr->nodes[arr->nodes[n].index].elem == n); +} +static inline void* h_sarray_get(HSArray *arr, size_t n) { + assert(n < arr->capacity); + if (h_sarray_isset(arr, n)) + return arr->nodes[n].content; + return NULL; +} + +static inline void* h_sarray_set(HSArray *arr, size_t n, void* val) { + assert(n < arr->capacity); + arr->nodes[n].content = val; + if (h_sarray_isset(arr, n)) + return val; + arr->nodes[arr->used].elem = n; + arr->nodes[n].index = arr->used++; + return val; +} + +static inline void h_sarray_clear(HSArray *arr) { + arr->used = 0; +} + +#define H__APPEND2(a,b) a##b +#define H__APPEND(a,b) H__APPEND2(a,b) +#define H__INTVAR(pfx) H__APPEND(intvar__##pfx##__,__COUNTER__) + +#define H_SARRAY_FOREACH_KV_(var,idx,arr,intvar) \ + for (size_t intvar = 0, idx = (var = (arr)->nodes[(arr)->nodes[intvar].elem].content,(arr)->nodes[intvar].elem); \ + intvar < (arr)->used; \ + idx = (arr)->nodes[intvar].elem, var = (arr)->nodes[(arr)->nodes[intvar].elem].content, intvar=intvar+1) + +#define H_SARRAY_FOREACH_KV(var,index,arr) H_SARRAY_FOREACH_KV_(var,index,arr,H__INTVAR(idx)) +#define H_SARRAY_FOREACH_V(var,arr) H_SARRAY_FOREACH_KV_(var,H__INTVAR(elem),arr,H__INTVAR(idx)) +#define H_SARRAY_FOREACH_K(index,arr) H_SARRAY_FOREACH_KV_(H__INTVAR(val),index,arr,H__INTVAR(idx)) + +// }}} + typedef unsigned int *HCharset; static inline HCharset new_charset(HAllocator* mm__) { diff --git a/src/parsers/many.c b/src/parsers/many.c index a095940..1e3b022 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -130,7 +130,7 @@ static bool many_ctrvm(HRVMProg *prog, void *env) { if (repeat->min_p) { h_rvm_insert_insn(prog, RVM_PUSH, 0); assert(repeat->count < 2); // TODO: The other cases should be supported later. - uint16_t end_fork; + uint16_t end_fork = 0xFFFF; // Shut up GCC if (repeat->count == 0) end_fork = h_rvm_insert_insn(prog, RVM_FORK, 0xFFFF); uint16_t goto_mid = h_rvm_insert_insn(prog, RVM_GOTO, 0xFFFF); @@ -145,7 +145,8 @@ static bool many_ctrvm(HRVMProg *prog, void *env) { if (!h_compile_regex(prog, repeat->p)) return false; h_rvm_insert_insn(prog, RVM_FORK, nxt); - h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog)); + if (repeat->count == 0) + h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog)); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL)); return true;