diff --git a/src/hammer.c b/src/hammer.c index e4addba..5542e80 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -20,6 +20,7 @@ #include #include #include +#include parse_state_t* from(parse_state_t *ps, const size_t index) { parse_state_t *ret = g_new(parse_state_t, 1); @@ -146,7 +147,25 @@ typedef struct { uint8_t upper; } range_t; -const parser_t* whitespace(const parser_t* p) { return NULL; } +static parse_result_t* parse_whitespace(void* env, parse_state_t *state) { + char c; + input_stream_t bak; + do { + bak = state->input_stream; + c = read_bits(&state->input_stream, 8, false); + if (state->input_stream.overrun) + return NULL; + } while (isspace(c)); + state->input_stream = bak; + return do_parse((parser_t*)env, state); +} + +const parser_t* whitespace(const parser_t* p) { + parser_t *ret = g_new(parser_t, 1); + ret->fn = parse_whitespace; + ret->env = (void*)p; + return ret; +} //const parser_t* action(const parser_t* p, /* fptr to action on AST */) { return NULL; } const parser_t* left_factor_action(const parser_t* p) { return NULL; } @@ -178,7 +197,7 @@ const parser_t* not_in(const uint8_t *options, int count) { for (int i = 0; i < 256; i++) charset_set(cs, i, 1); for (int i = 0; i < count; i++) - charset_set(cs, i, 0); + charset_set(cs, options[i], 0); ret->fn = parse_charset; ret->env = (void*)cs; return (const parser_t*)ret; @@ -225,7 +244,8 @@ static parse_result_t* parse_sequence(void *env, parse_state_t *state) { if (NULL == tmp) { return NULL; } else { - g_sequence_append(seq, tmp); + if (tmp->ast) + g_sequence_append(seq, (void*)tmp->ast); } } parsed_token_t *tok = g_new(parsed_token_t, 1); @@ -622,22 +642,12 @@ static void test_float32(void) { static void test_whitespace(void) { - uint8_t test1[1] = { 'a' }; - uint8_t test2[2] = { ' ', 'a' }; - uint8_t test3[3] = { ' ', ' ', 'a' }; - uint8_t test4[2] = { '\t', 'a' }; - uint8_t test5[2] = { '_', 'a' }; const parser_t *whitespace_ = whitespace(ch('a')); - parse_result_t *ret1 = parse(whitespace_, test1, 1); - parse_result_t *ret2 = parse(whitespace_, test2, 2); - parse_result_t *ret3 = parse(whitespace_, test3, 3); - parse_result_t *ret4 = parse(whitespace_, test4, 2); - parse_result_t *ret5 = parse(whitespace_, test5, 2); - g_check_cmpint(ret1->ast->uint, ==, 'a'); - g_check_cmpint(ret2->ast->uint, ==, 'a'); - g_check_cmpint(ret3->ast->uint, ==, 'a'); - g_check_cmpint(ret4->ast->uint, ==, 'a'); - g_check_failed(ret5); + g_check_parse_ok(whitespace_, "a", 1, "s0x61"); + g_check_parse_ok(whitespace_, " a", 2, "s0x61"); + g_check_parse_ok(whitespace_, " a", 3, "s0x61"); + g_check_parse_ok(whitespace_, "\ta", 2, "s0x61"); + g_check_parse_failed(whitespace_, "_a", 2); } static void test_action(void) { @@ -650,23 +660,16 @@ static void test_left_factor_action(void) { static void test_not_in(void) { uint8_t options[3] = { 'a', 'b', 'c' }; - uint8_t test1[1] = { 'd' }; - uint8_t test2[1] = { 'a' }; const parser_t *not_in_ = not_in(options, 3); - parse_result_t *ret1 = parse(not_in_, test1, 1); - parse_result_t *ret2 = parse(not_in_, test2, 1); - g_check_cmpint(ret1->ast->uint, ==, 'd'); - g_check_failed(ret2); + g_check_parse_ok(not_in_, "d", 1, "s0x64"); + g_check_parse_failed(not_in_, "a", 1); + } static void test_end_p(void) { - uint8_t test1[1] = { 'a' }; - uint8_t test2[2] = { 'a', 'a' }; const parser_t *end_p_ = sequence(ch('a'), end_p(), NULL); - parse_result_t *ret1 = parse(end_p_, test1, 1); - parse_result_t *ret2 = parse(end_p_, test2, 2); - g_check_cmpint(ret1->ast->uint, ==, 'a'); - g_check_failed(ret2); + g_check_parse_ok(end_p_, "a", 1, "(s0x61)"); + g_check_parse_failed(end_p_, "aa", 2); } static void test_nothing_p(void) { @@ -680,12 +683,12 @@ static void test_sequence(void) { const parser_t *sequence_1 = sequence(ch('a'), ch('b'), NULL); const parser_t *sequence_2 = sequence(ch('a'), whitespace(ch('b')), NULL); - g_check_parse_ok(sequence_1, "ab", 2, "(<41> <42>)"); + g_check_parse_ok(sequence_1, "ab", 2, "(s0x61 s0x62)"); g_check_parse_failed(sequence_1, "a", 1); g_check_parse_failed(sequence_1, "b", 1); - g_check_parse_ok(sequence_2, "ab", 2, "(<41> <42>)"); - g_check_parse_ok(sequence_2, "a b", 3, "(<41> <42>)"); - g_check_parse_ok(sequence_2, "a b", 4, "(<41> <42>)"); + g_check_parse_ok(sequence_2, "ab", 2, "(s0x61 s0x62)"); + g_check_parse_ok(sequence_2, "a b", 3, "(s0x61 s0x62)"); + g_check_parse_ok(sequence_2, "a b", 4, "(s0x61 s0x62)"); //g_check_cmpseq(ret1->ast-> } diff --git a/src/pprint.c b/src/pprint.c index 6aad7b3..cf39c0e 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -99,7 +99,7 @@ static void unamb_sub(const parsed_token_t* tok, struct result_buf *buf) { append_buf(buf, tmpbuf, len); break; case TT_UINT: - len = asprintf(&tmpbuf, "s%#lx\n", tok->uint); + len = asprintf(&tmpbuf, "s%#lx", tok->uint); append_buf(buf, tmpbuf, len); break; case TT_SEQUENCE: { @@ -117,8 +117,10 @@ static void unamb_sub(const parsed_token_t* tok, struct result_buf *buf) { unamb_sub(subtok, buf); } append_buf_c(buf, ')'); - } // TODO: implement this + } + break; default: + fprintf(stderr, "Unexpected token type %d\n", tok->token_type); g_assert_not_reached(); } } diff --git a/src/test_suite.h b/src/test_suite.h index 3c01d3f..47c5ac4 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -54,7 +54,7 @@ #define g_check_parse_ok(parser, input, inp_len, result) { \ parse_result_t *res = parse(parser, (const uint8_t*)input, inp_len); \ if (!res) { \ - g_test_message("Parse failed"); \ + g_test_message("Parse failed on line %d", __LINE__); \ g_test_fail(); \ } else { \ char* cres = write_result_unamb(res->ast); \