add h_with_endianness()

This commit is contained in:
Sven M. Hallberg 2014-05-07 19:24:26 +02:00
parent 4f188340be
commit 5f920b29f8
4 changed files with 123 additions and 0 deletions

View file

@ -29,6 +29,7 @@ parsers = ['parsers/%s.c'%s for s in
'choice',
'difference',
'end',
'endianness',
'epsilon',
'ignore',
'ignoreseq',

View file

@ -611,6 +611,16 @@ HAMMER_FN_DECL_NOARG(HParser*, h_indirect);
*/
HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner);
/**
* This parser runs its argument parser with the given endianness setting.
*
* The value of 'endianness' should be a bit-wise or of the constants
* BYTE_BIG_ENDIAN/BYTE_LITTLE_ENDIAN and BIT_BIG_ENDIAN/BIT_LITTLE_ENDIAN.
*
* Result token type: p's result type.
*/
HAMMER_FN_DECL(HParser*, h_with_endianness, char endianness, const HParser* p);
/**
* Free the memory allocated to an HParseResult when it is no longer needed.
*/

72
src/parsers/endianness.c Normal file
View file

@ -0,0 +1,72 @@
#include "parser_internal.h"
typedef struct {
const HParser *p;
char endianness;
} HParseEndianness;
// helper
static void switch_bit_order(HInputStream *input)
{
assert(input->bit_offset <= 8);
if((input->bit_offset % 8) != 0) {
// switching bit order in the middle of a byte
// we leave bit_offset untouched. this means that something like
// le(bits(5)),le(bits(3))
// is equivalent to
// le(bits(5),bits(3)) .
// on the other hand,
// le(bits(5)),be(bits(5))
// will read the same 5 bits twice and discard the top 3.
} else {
// flip offset (0 <-> 8)
input->bit_offset = 8 - input->bit_offset;
}
}
static HParseResult *parse_endianness(void *env, HParseState *state)
{
HParseEndianness *e = env;
HParseResult *res = NULL;
char diff = state->input_stream.endianness ^ e->endianness;
if(!diff) {
// all the same, nothing to do
res = h_do_parse(e->p, state);
} else {
if(diff & BIT_BIG_ENDIAN)
switch_bit_order(&state->input_stream);
state->input_stream.endianness ^= diff;
res = h_do_parse(e->p, state);
state->input_stream.endianness ^= diff;
if(diff & BIT_BIG_ENDIAN)
switch_bit_order(&state->input_stream);
}
return res;
}
static const HParserVtable endianness_vt = {
.parse = parse_endianness,
.isValidRegular = h_false,
.isValidCF = h_false,
.desugar = NULL,
.compile_to_rvm = h_not_regular,
};
HParser* h_with_endianness(char endianness, const HParser *p)
{
return h_with_endianness__m(&system_allocator, endianness, p);
}
HParser* h_with_endianness__m(HAllocator *mm__, char endianness, const HParser *p)
{
HParseEndianness *env = h_new(HParseEndianness, 1);
env->endianness = endianness;
env->p = p;
return h_new_parser(mm__, &endianness_vt, env);
}

View file

@ -456,6 +456,45 @@ static void test_ambiguous(gconstpointer backend) {
g_check_parse_failed(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d+", 2);
}
static void test_endianness(gconstpointer backend) {
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
HParser *u32_ = h_uint32();
HParser *u5_ = h_bits(5, false);
char bb = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN;
char bl = BYTE_BIG_ENDIAN | BIT_LITTLE_ENDIAN;
char lb = BYTE_LITTLE_ENDIAN | BIT_BIG_ENDIAN;
char ll = BYTE_LITTLE_ENDIAN | BIT_LITTLE_ENDIAN;
HParser *bb_u32_ = h_with_endianness(bb, u32_);
HParser *bb_u5_ = h_with_endianness(bb, u5_);
HParser *ll_u32_ = h_with_endianness(ll, u32_);
HParser *ll_u5_ = h_with_endianness(ll, u5_);
HParser *bl_u32_ = h_with_endianness(bl, u32_);
HParser *bl_u5_ = h_with_endianness(bl, u5_);
HParser *lb_u32_ = h_with_endianness(lb, u32_);
HParser *lb_u5_ = h_with_endianness(lb, u5_);
// default: big-endian
g_check_parse_match(u32_, be, "abcd", 4, "u0x61626364");
g_check_parse_match(u5_, be, "abcd", 4, "u0xc"); // 0x6 << 1
// both big-endian
g_check_parse_match(bb_u32_, be, "abcd", 4, "u0x61626364");
g_check_parse_match(bb_u5_, be, "abcd", 4, "u0xc"); // 0x6 << 1
// both little-endian
g_check_parse_match(ll_u32_, be, "abcd", 4, "u0x64636261");
g_check_parse_match(ll_u5_, be, "abcd", 4, "u0x1");
// mixed cases
g_check_parse_match(bl_u32_, be, "abcd", 4, "u0x61626364");
g_check_parse_match(bl_u5_, be, "abcd", 4, "u0x1");
g_check_parse_match(lb_u32_, be, "abcd", 4, "u0x64636261");
g_check_parse_match(lb_u5_, be, "abcd", 4, "u0xc");
}
void register_parser_tests(void) {
g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token);
g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch);
@ -502,6 +541,7 @@ void register_parser_tests(void) {
//g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec);
g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne);
g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec);
g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness);
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);