NEWS:
* Switching endianness mid-byte no longer potentially re-reads bytes.
* bit_offset now consistently refers to the number of bits already
  read.
* HParsedTokens now have a bit_length field; this is a size_t.  This
  may be removed for memory reasons.

The bit writer has not yet been updated to match; the result of
switching bit writer endianness in the middle of a byte remains
undefined.
This commit is contained in:
TQ Hirsch 2015-01-04 04:00:09 +01:00
parent 58af99ae40
commit af73181cf4
12 changed files with 86 additions and 48 deletions

View file

@ -69,7 +69,8 @@ ctests = ['t_benchmark.c',
't_bitwriter.c', 't_bitwriter.c',
't_parser.c', 't_parser.c',
't_grammar.c', 't_grammar.c',
't_misc.c'] 't_misc.c',
't_regression.c']
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts) libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts) libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts)

View file

@ -33,11 +33,13 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa
if (tmp_res) { if (tmp_res) {
tmp_res->arena = state->arena; tmp_res->arena = state->arena;
if (!state->input_stream.overrun) { if (!state->input_stream.overrun) {
tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3); size_t bit_length = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak);
if (state->input_stream.endianness & BIT_BIG_ENDIAN) if (tmp_res->bit_length == 0) { // Don't modify if forwarding.
tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset; tmp_res->bit_length = bit_length;
else }
tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset; if (tmp_res->ast && tmp_res->ast->bit_length != 0) {
((HParsedToken*)(tmp_res->ast))->bit_length = bit_length;
}
} else } else
tmp_res->bit_length = 0; tmp_res->bit_length = 0;
} }

View file

@ -39,10 +39,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
if (bits_left <= 64) { // Large enough to handle any valid count, but small enough that overflow isn't a problem. if (bits_left <= 64) { // Large enough to handle any valid count, but small enough that overflow isn't a problem.
// not in danger of overflowing, so add in bits // not in danger of overflowing, so add in bits
// add in number of bits... // add in number of bits...
if (state->endianness & BIT_BIG_ENDIAN) bits_left = (bits_left << 3) - state->bit_offset - state->margin;
bits_left = (bits_left << 3) - 8 + state->bit_offset;
else
bits_left = (bits_left << 3) - state->bit_offset;
if (bits_left < count) { if (bits_left < count) {
if (state->endianness & BYTE_BIG_ENDIAN) if (state->endianness & BYTE_BIG_ENDIAN)
final_shift = count - bits_left; final_shift = count - bits_left;
@ -54,7 +51,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
final_shift = 0; final_shift = 0;
} }
if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0) { if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0 && (state->margin == 0)) {
// fast path // fast path
if (state->endianness & BYTE_BIG_ENDIAN) { if (state->endianness & BYTE_BIG_ENDIAN) {
while (count > 0) { while (count > 0) {
@ -73,22 +70,24 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
int segment, segment_len; int segment, segment_len;
// Read a segment... // Read a segment...
if (state->endianness & BIT_BIG_ENDIAN) { if (state->endianness & BIT_BIG_ENDIAN) {
if (count >= state->bit_offset) { if (count + state->bit_offset + state->margin >= 8) {
segment_len = state->bit_offset; segment_len = 8 - state->bit_offset - state->margin;
state->bit_offset = 8; segment = (state->input[state->index] >> state->margin) & ((1 << segment_len) - 1);
segment = state->input[state->index] & ((1 << segment_len) - 1);
state->index++;
} else {
segment_len = count;
state->bit_offset -= count;
segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1);
}
} else { // BIT_LITTLE_ENDIAN
if (count + state->bit_offset >= 8) {
segment_len = 8 - state->bit_offset;
segment = (state->input[state->index] >> state->bit_offset);
state->index++; state->index++;
state->bit_offset = 0; state->bit_offset = 0;
state->margin = 0;
} else {
segment_len = count;
state->bit_offset += count;
segment = (state->input[state->index] >> (8 - state->bit_offset)) & ((1 << segment_len) - 1);
}
} else { // BIT_LITTLE_ENDIAN
if (count + state->bit_offset + state->margin >= 8) {
segment_len = 8 - state->bit_offset - state->margin;
segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1);
state->index++;
state->bit_offset = 0;
state->margin = 0;
} else { } else {
segment_len = count; segment_len = count;
segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1);

View file

@ -52,7 +52,7 @@ HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t*
// Set up a parse state... // Set up a parse state...
HInputStream input_stream = { HInputStream input_stream = {
.index = 0, .index = 0,
.bit_offset = 8, .bit_offset = 0,
.overrun = 0, .overrun = 0,
.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN, .endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN,
.length = length, .length = length,

View file

@ -99,6 +99,7 @@ typedef struct HParsedToken_ {
HTokenData token_data; HTokenData token_data;
#endif #endif
size_t index; size_t index;
size_t bit_length;
char bit_offset; char bit_offset;
} HParsedToken; } HParsedToken;

View file

@ -70,6 +70,8 @@ typedef struct HInputStream_ {
size_t index; size_t index;
size_t length; size_t length;
char bit_offset; char bit_offset;
char margin; // The number of bits on the end that is being read
// towards that should be ignored.
char endianness; char endianness;
char overrun; char overrun;
} HInputStream; } HInputStream;
@ -295,6 +297,9 @@ extern HParserBackendVTable h__glr_backend_vtable;
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported. // TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
int64_t h_read_bits(HInputStream* state, int count, char signed_p); int64_t h_read_bits(HInputStream* state, int count, char signed_p);
static inline size_t h_input_stream_pos(HInputStream* state) {
return state->index * 8 + state->bit_offset + state->margin;
}
// need to decide if we want to make this public. // need to decide if we want to make this public.
HParseResult* h_do_parse(const HParser* parser, HParseState *state); HParseResult* h_do_parse(const HParser* parser, HParseState *state);
void put_cached(HParseState *ps, const HParser *p, HParseResult *cached); void put_cached(HParseState *ps, const HParser *p, HParseResult *cached);

View file

@ -11,19 +11,9 @@ static void switch_bit_order(HInputStream *input)
{ {
assert(input->bit_offset <= 8); assert(input->bit_offset <= 8);
if((input->bit_offset % 8) != 0) { char tmp = input->bit_offset;
// switching bit order in the middle of a byte input->bit_offset = input->margin;
// we leave bit_offset untouched. this means that something like input->margin = tmp;
// le(bits(5)),le(bits(3))
// is equivalent to
// le(bits(5),bits(3)) .
// on the other hand,
// le(bits(5)),be(bits(5))
// will read the same 5 bits twice and discard the top 3.
} else {
// flip offset (0 <-> 8)
input->bit_offset = 8 - input->bit_offset;
}
} }
static HParseResult *parse_endianness(void *env, HParseState *state) static HParseResult *parse_endianness(void *env, HParseState *state)

View file

@ -18,6 +18,7 @@ static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) {
HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult)); HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult));
ret->ast = tok; ret->ast = tok;
ret->arena = arena; ret->arena = arena;
ret->bit_length = 0; // This way it gets overridden in h_do_parse
return ret; return ret;
} }

View file

@ -9,7 +9,7 @@
.input = (uint8_t*)buf, \ .input = (uint8_t*)buf, \
.length = len, \ .length = len, \
.index = 0, \ .index = 0, \
.bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \ .bit_offset = 0, \
.endianness = endianness_ \ .endianness = endianness_ \
} }
@ -56,7 +56,6 @@ static void test_offset_largebits_le(void) {
g_check_cmp_int32(h_read_bits(&is, 11, false), ==, 0x2D3); g_check_cmp_int32(h_read_bits(&is, 11, false), ==, 0x2D3);
} }
void register_bitreader_tests(void) { void register_bitreader_tests(void) {
g_test_add_func("/core/bitreader/be", test_bitreader_be); g_test_add_func("/core/bitreader/be", test_bitreader_be);
g_test_add_func("/core/bitreader/le", test_bitreader_le); g_test_add_func("/core/bitreader/le", test_bitreader_le);

View file

@ -24,7 +24,7 @@ void run_bitwriter_test(bitwriter_test_elem data[], char flags) {
.input = buf, .input = buf,
.index = 0, .index = 0,
.length = len, .length = len,
.bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0, .bit_offset = 0,
.endianness = flags, .endianness = flags,
.overrun = 0 .overrun = 0
}; };

38
src/t_regression.c Normal file
View file

@ -0,0 +1,38 @@
#include <glib.h>
#include <stdint.h>
#include "glue.h"
#include "hammer.h"
#include "test_suite.h"
static void test_bug118(void) {
// https://github.com/UpstandingHackers/hammer/issues/118
// Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5
HParseResult* p;
const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A";
#define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN)
H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false)));
H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false)));
#undef MY_ENDIAN
H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL));
H_RULE(header_ok, h_sequence(nibble, nibble, NULL));
H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL));
H_RULE(parser_ok, h_sequence(header_ok, samples, NULL));
H_RULE(parser_weird, h_sequence(header_weird, samples, NULL));
p = h_parse(parser_weird, input, 6);
g_check_cmp_int32(p->bit_length, ==, 44);
h_parse_result_free(p);
p = h_parse(parser_ok, input, 6);
g_check_cmp_int32(p->bit_length, ==, 40);
h_parse_result_free(p);
}
void register_regression_tests(void) {
g_test_add_func("/core/regression/bug118", test_bug118);
}

View file

@ -25,6 +25,7 @@ extern void register_parser_tests();
extern void register_grammar_tests(); extern void register_grammar_tests();
extern void register_misc_tests(); extern void register_misc_tests();
extern void register_benchmark_tests(); extern void register_benchmark_tests();
extern void register_regression_tests();
int main(int argc, char** argv) { int main(int argc, char** argv) {
g_test_init(&argc, &argv, NULL); g_test_init(&argc, &argv, NULL);
@ -35,6 +36,7 @@ int main(int argc, char** argv) {
register_parser_tests(); register_parser_tests();
register_grammar_tests(); register_grammar_tests();
register_misc_tests(); register_misc_tests();
register_regression_tests();
if (g_test_slow() || g_test_perf()) if (g_test_slow() || g_test_perf())
register_benchmark_tests(); register_benchmark_tests();