From 70fa455c0301435256e8bae10af85b6ccedfb3de Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sun, 30 Nov 2014 19:18:09 -0500 Subject: [PATCH 1/5] Fix compilation when PKG_CONFIG_PATH is not in environ --- SConstruct | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/SConstruct b/SConstruct index 4cf48a3..a8f7ce8 100644 --- a/SConstruct +++ b/SConstruct @@ -14,7 +14,11 @@ tools = ['default', 'scanreplace'] if 'dotnet' in ARGUMENTS.get('bindings', []): tools.append('csharp/mono') -env = Environment(ENV = {'PATH' : os.environ['PATH'], 'PKG_CONFIG_PATH' : os.environ['PKG_CONFIG_PATH']}, +envvars = {'PATH' : os.environ['PATH']} +if 'PKG_CONFIG_PATH' in os.environ: + envvars['PKG_CONFIG_PATH'] = os.environ['PKG_CONFIG_PATH'] + +env = Environment(ENV = envvars, variables = vars, tools=tools, toolpath=['tools']) From b1078c3d88a32c92517bc9379750c31f68228c9d Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sun, 7 Dec 2014 02:23:01 -0500 Subject: [PATCH 2/5] llvm doesn't care about your size They apparently removed alloc_size at some point (it was a no-op beforehand), causing the attribute to throw an error when clang compiles anything including allocator.h. --- src/allocator.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/allocator.h b/src/allocator.h index 803d89f..4a48693 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -33,11 +33,22 @@ typedef struct HAllocator_ { typedef struct HArena_ HArena ; // hidden implementation HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default... -#ifndef SWIG -void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) )); + +#if defined __llvm__ +# if __has_attribute(malloc) +# define ATTR_MALLOC(n) __attribute__((malloc)) +# else +# define ATTR_MALLOC(n) +# endif +#elif defined SWIG +# define ATTR_MALLOC(n) +#elif defined __GNUC__ +# define ATTR_MALLOC(n) __attribute__((malloc, alloc_size(2))) #else -void* h_arena_malloc(HArena *arena, size_t count); +# define ATTR_MALLOC(n) #endif + +void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2); void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers. void h_delete_arena(HArena *arena); From 5abe74f890825047a6117320d7dbe3590bfd5534 Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sat, 3 Jan 2015 16:35:56 -0500 Subject: [PATCH 3/5] Retab as though tabstop were 8 Tabs after the first non-tab character are crazymaking. I picked 8 because it wasn't 7 and caused the backslashes to line up on H_ACT_APPLY. --- src/glue.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/glue.h b/src/glue.h index 1fe6ce4..18e25ec 100644 --- a/src/glue.h +++ b/src/glue.h @@ -66,13 +66,13 @@ h_attr_bool(h_action(def, act_ ## rule, NULL), validate_ ## rule, NULL) #define H_AVRULE(rule, def) HParser *rule = \ h_action(h_attr_bool(def, validate_ ## rule, NULL), act_ ## rule, NULL) -#define H_ADRULE(rule, def, data) HParser *rule = \ +#define H_ADRULE(rule, def, data) HParser *rule = \ h_action(def, act_ ## rule, data) -#define H_VDRULE(rule, def, data) HParser *rule = \ +#define H_VDRULE(rule, def, data) HParser *rule = \ h_attr_bool(def, validate_ ## rule, data) -#define H_VADRULE(rule, def, data) HParser *rule = \ +#define H_VADRULE(rule, def, data) HParser *rule = \ h_attr_bool(h_action(def, act_ ## rule, data), validate_ ## rule, data) -#define H_AVDRULE(rule, def, data) HParser *rule = \ +#define H_AVDRULE(rule, def, data) HParser *rule = \ h_action(h_attr_bool(def, validate_ ## rule, data), act_ ## rule, data) @@ -109,8 +109,8 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data); // Define 'myaction' as a specialization of 'paction' by supplying the leading // parameters. #define H_ACT_APPLY(myaction, paction, ...) \ - HParsedToken *myaction(const HParseResult *p, void* user_data) { \ - return paction(__VA_ARGS__, p, user_data); \ + HParsedToken *myaction(const HParseResult *p, void* user_data) { \ + return paction(__VA_ARGS__, p, user_data); \ } From 2dad0c48b41408ef6e1a23cec9552f3bc36bbcef Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sat, 3 Jan 2015 16:42:45 -0500 Subject: [PATCH 4/5] H_VALIDATE_APPLY macro I've found this especially useful in combination with my own _attr_uint_const for things like flags and type specifiers. It's possible that its usefulness might be diminished significantly if there were a built-in bitfield constant parser -- that certainly would eliminate all of my current uses of it -- but it still seems nicely symmetric with H_ACT_APPLY. --- src/glue.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/glue.h b/src/glue.h index 18e25ec..6c1c56c 100644 --- a/src/glue.h +++ b/src/glue.h @@ -11,7 +11,8 @@ // // A few standard semantic actions are defined below. The H_ACT_APPLY macro // allows semantic actions to be defined by "partial application" of -// a generic action to fixed paramters. +// a generic action to fixed paramters. H_VALIDATE_APPLY is similar for +// h_atter_bool. // // The definition of more complex semantic actions will usually consist of // extracting data from the given parse tree and constructing a token of custom @@ -113,6 +114,12 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data); return paction(__VA_ARGS__, p, user_data); \ } +// Similar, but for validations. +#define H_VALIDATE_APPLY(myvalidation, pvalidation, ...) \ + bool myvalidation(HParseResult* p, void* user_data) { \ + return pvalidation(__VA_ARGS__, p, user_data); \ + } + // // Working with HParsedTokens From af73181cf4fbd9b6ff269bea40a0f0f7c3c62113 Mon Sep 17 00:00:00 2001 From: TQ Hirsch Date: Sun, 4 Jan 2015 04:00:09 +0100 Subject: [PATCH 5/5] Fix #118 NEWS: * Switching endianness mid-byte no longer potentially re-reads bytes. * bit_offset now consistently refers to the number of bits already read. * HParsedTokens now have a bit_length field; this is a size_t. This may be removed for memory reasons. The bit writer has not yet been updated to match; the result of switching bit writer endianness in the middle of a byte remains undefined. --- src/SConscript | 3 ++- src/backends/packrat.c | 12 ++++++----- src/bitreader.c | 37 +++++++++++++++++----------------- src/hammer.c | 2 +- src/hammer.h | 1 + src/internal.h | 5 +++++ src/parsers/endianness.c | 16 +++------------ src/parsers/parser_internal.h | 1 + src/t_bitreader.c | 15 +++++++------- src/t_bitwriter.c | 2 +- src/t_regression.c | 38 +++++++++++++++++++++++++++++++++++ src/test_suite.c | 2 ++ 12 files changed, 86 insertions(+), 48 deletions(-) create mode 100644 src/t_regression.c diff --git a/src/SConscript b/src/SConscript index 38ace12..386a9a2 100644 --- a/src/SConscript +++ b/src/SConscript @@ -69,7 +69,8 @@ ctests = ['t_benchmark.c', 't_bitwriter.c', 't_parser.c', 't_grammar.c', - 't_misc.c'] + 't_misc.c', + 't_regression.c'] libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts) libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index c1e422e..33082c6 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -33,11 +33,13 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa if (tmp_res) { tmp_res->arena = state->arena; if (!state->input_stream.overrun) { - tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3); - if (state->input_stream.endianness & BIT_BIG_ENDIAN) - tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset; - else - tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset; + size_t bit_length = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak); + if (tmp_res->bit_length == 0) { // Don't modify if forwarding. + tmp_res->bit_length = bit_length; + } + if (tmp_res->ast && tmp_res->ast->bit_length != 0) { + ((HParsedToken*)(tmp_res->ast))->bit_length = bit_length; + } } else tmp_res->bit_length = 0; } diff --git a/src/bitreader.c b/src/bitreader.c index df8c4c3..3627df5 100644 --- a/src/bitreader.c +++ b/src/bitreader.c @@ -39,10 +39,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { if (bits_left <= 64) { // Large enough to handle any valid count, but small enough that overflow isn't a problem. // not in danger of overflowing, so add in bits // add in number of bits... - if (state->endianness & BIT_BIG_ENDIAN) - bits_left = (bits_left << 3) - 8 + state->bit_offset; - else - bits_left = (bits_left << 3) - state->bit_offset; + bits_left = (bits_left << 3) - state->bit_offset - state->margin; if (bits_left < count) { if (state->endianness & BYTE_BIG_ENDIAN) final_shift = count - bits_left; @@ -54,7 +51,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { final_shift = 0; } - if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0) { + if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0 && (state->margin == 0)) { // fast path if (state->endianness & BYTE_BIG_ENDIAN) { while (count > 0) { @@ -73,22 +70,24 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { int segment, segment_len; // Read a segment... if (state->endianness & BIT_BIG_ENDIAN) { - if (count >= state->bit_offset) { - segment_len = state->bit_offset; - state->bit_offset = 8; - segment = state->input[state->index] & ((1 << segment_len) - 1); - state->index++; - } else { - segment_len = count; - state->bit_offset -= count; - segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); - } - } else { // BIT_LITTLE_ENDIAN - if (count + state->bit_offset >= 8) { - segment_len = 8 - state->bit_offset; - segment = (state->input[state->index] >> state->bit_offset); + if (count + state->bit_offset + state->margin >= 8) { + segment_len = 8 - state->bit_offset - state->margin; + segment = (state->input[state->index] >> state->margin) & ((1 << segment_len) - 1); state->index++; state->bit_offset = 0; + state->margin = 0; + } else { + segment_len = count; + state->bit_offset += count; + segment = (state->input[state->index] >> (8 - state->bit_offset)) & ((1 << segment_len) - 1); + } + } else { // BIT_LITTLE_ENDIAN + if (count + state->bit_offset + state->margin >= 8) { + segment_len = 8 - state->bit_offset - state->margin; + segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); + state->index++; + state->bit_offset = 0; + state->margin = 0; } else { segment_len = count; segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); diff --git a/src/hammer.c b/src/hammer.c index 2456bdc..6bb9ebb 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -52,7 +52,7 @@ HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* // Set up a parse state... HInputStream input_stream = { .index = 0, - .bit_offset = 8, + .bit_offset = 0, .overrun = 0, .endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN, .length = length, diff --git a/src/hammer.h b/src/hammer.h index b0ce75d..1c02b05 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -99,6 +99,7 @@ typedef struct HParsedToken_ { HTokenData token_data; #endif size_t index; + size_t bit_length; char bit_offset; } HParsedToken; diff --git a/src/internal.h b/src/internal.h index 6c721eb..0c4d4dc 100644 --- a/src/internal.h +++ b/src/internal.h @@ -70,6 +70,8 @@ typedef struct HInputStream_ { size_t index; size_t length; char bit_offset; + char margin; // The number of bits on the end that is being read + // towards that should be ignored. char endianness; char overrun; } HInputStream; @@ -295,6 +297,9 @@ extern HParserBackendVTable h__glr_backend_vtable; // TODO(thequux): Set symbol visibility for these functions so that they aren't exported. int64_t h_read_bits(HInputStream* state, int count, char signed_p); +static inline size_t h_input_stream_pos(HInputStream* state) { + return state->index * 8 + state->bit_offset + state->margin; +} // need to decide if we want to make this public. HParseResult* h_do_parse(const HParser* parser, HParseState *state); void put_cached(HParseState *ps, const HParser *p, HParseResult *cached); diff --git a/src/parsers/endianness.c b/src/parsers/endianness.c index 091e4c0..e3f53ab 100644 --- a/src/parsers/endianness.c +++ b/src/parsers/endianness.c @@ -11,19 +11,9 @@ static void switch_bit_order(HInputStream *input) { assert(input->bit_offset <= 8); - if((input->bit_offset % 8) != 0) { - // switching bit order in the middle of a byte - // we leave bit_offset untouched. this means that something like - // le(bits(5)),le(bits(3)) - // is equivalent to - // le(bits(5),bits(3)) . - // on the other hand, - // le(bits(5)),be(bits(5)) - // will read the same 5 bits twice and discard the top 3. - } else { - // flip offset (0 <-> 8) - input->bit_offset = 8 - input->bit_offset; - } + char tmp = input->bit_offset; + input->bit_offset = input->margin; + input->margin = tmp; } static HParseResult *parse_endianness(void *env, HParseState *state) diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h index ec97dd1..9a3b6de 100644 --- a/src/parsers/parser_internal.h +++ b/src/parsers/parser_internal.h @@ -18,6 +18,7 @@ static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) { HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult)); ret->ast = tok; ret->arena = arena; + ret->bit_length = 0; // This way it gets overridden in h_do_parse return ret; } diff --git a/src/t_bitreader.c b/src/t_bitreader.c index 40a7bb9..65235c1 100644 --- a/src/t_bitreader.c +++ b/src/t_bitreader.c @@ -4,14 +4,14 @@ #include "internal.h" #include "test_suite.h" -#define MK_INPUT_STREAM(buf,len,endianness_) \ +#define MK_INPUT_STREAM(buf,len,endianness_) \ { \ - .input = (uint8_t*)buf, \ - .length = len, \ - .index = 0, \ - .bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \ - .endianness = endianness_ \ - } + .input = (uint8_t*)buf, \ + .length = len, \ + .index = 0, \ + .bit_offset = 0, \ + .endianness = endianness_ \ + } static void test_bitreader_ints(void) { @@ -56,7 +56,6 @@ static void test_offset_largebits_le(void) { g_check_cmp_int32(h_read_bits(&is, 11, false), ==, 0x2D3); } - void register_bitreader_tests(void) { g_test_add_func("/core/bitreader/be", test_bitreader_be); g_test_add_func("/core/bitreader/le", test_bitreader_le); diff --git a/src/t_bitwriter.c b/src/t_bitwriter.c index 747c86f..6b9b705 100644 --- a/src/t_bitwriter.c +++ b/src/t_bitwriter.c @@ -24,7 +24,7 @@ void run_bitwriter_test(bitwriter_test_elem data[], char flags) { .input = buf, .index = 0, .length = len, - .bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0, + .bit_offset = 0, .endianness = flags, .overrun = 0 }; diff --git a/src/t_regression.c b/src/t_regression.c new file mode 100644 index 0000000..e74f16b --- /dev/null +++ b/src/t_regression.c @@ -0,0 +1,38 @@ +#include +#include +#include "glue.h" +#include "hammer.h" +#include "test_suite.h" + +static void test_bug118(void) { + // https://github.com/UpstandingHackers/hammer/issues/118 + // Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5 + + HParseResult* p; + const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A"; + +#define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN) + H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false))); + H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false))); +#undef MY_ENDIAN + + H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL)); + + H_RULE(header_ok, h_sequence(nibble, nibble, NULL)); + H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL)); + + H_RULE(parser_ok, h_sequence(header_ok, samples, NULL)); + H_RULE(parser_weird, h_sequence(header_weird, samples, NULL)); + + + p = h_parse(parser_weird, input, 6); + g_check_cmp_int32(p->bit_length, ==, 44); + h_parse_result_free(p); + p = h_parse(parser_ok, input, 6); + g_check_cmp_int32(p->bit_length, ==, 40); + h_parse_result_free(p); +} + +void register_regression_tests(void) { + g_test_add_func("/core/regression/bug118", test_bug118); +} diff --git a/src/test_suite.c b/src/test_suite.c index 81f86b2..cba18e8 100644 --- a/src/test_suite.c +++ b/src/test_suite.c @@ -25,6 +25,7 @@ extern void register_parser_tests(); extern void register_grammar_tests(); extern void register_misc_tests(); extern void register_benchmark_tests(); +extern void register_regression_tests(); int main(int argc, char** argv) { g_test_init(&argc, &argv, NULL); @@ -35,6 +36,7 @@ int main(int argc, char** argv) { register_parser_tests(); register_grammar_tests(); register_misc_tests(); + register_regression_tests(); if (g_test_slow() || g_test_perf()) register_benchmark_tests();