diff --git a/Makefile b/Makefile index 27ee4d3..08ce4e1 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ # and kick off a recursive make # Also, "make src/all" turns into "make -C src all" -SUBDIRS = src +SUBDIRS = src examples .DEFAULT_GOAL := all diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 0000000..46f30a0 --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,16 @@ + +OUTPUTS := dns.o \ + dns + +TOPLEVEL := ../ + +include ../common.mk + + +all: dns + +dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +dns: dns.o + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) + +dns.o: ../src/hammer.h diff --git a/examples/dns.c b/examples/dns.c index 027d675..ec8f712 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -1,16 +1,51 @@ -#include "../hammer.h" +#include "../src/hammer.h" + +#define false 0 +#define true 1 bool is_zero(parse_result_t *p) { + if (TT_UINT != p->ast->token_type) + return false; return (0 == p->ast->uint); } -bool validate_dns(parse_result_t *p) { - +/** + * A label can't be more than 63 characters. + */ +bool validate_label(parse_result_t *p) { + if (TT_SEQUENCE != p->ast->token_type) + return false; + return (64 > p->ast->seq->used); } -int main(int argc, char **argv) { +/** + * Every DNS message should have QDCOUNT entries in the question + * section, and ANCOUNT+NSCOUNT+ARCOUNT resource records. + */ +bool validate_dns(parse_result_t *p) { + if (TT_SEQUENCE != p->ast->token_type) + return false; + // The header holds the counts as its last 4 elements. + parsed_token_t **elems = p->ast->seq->elements[0]->seq->elements; + size_t qd = elems[8]->uint; + size_t an = elems[9]->uint; + size_t ns = elems[10]->uint; + size_t ar = elems[11]->uint; + parsed_token_t *questions = p->ast->seq->elements[1]; + if (questions->seq->used != qd) + return false; + parsed_token_t *rrs = p->ast->seq->elements[2]; + if (an+ns+ar != rrs->seq->used) + return false; + return true; +} - const parser_t dns_header = sequence(bits(16, false), // ID +parser_t* init_parser() { + static parser_t *dns_message = NULL; + if (dns_message) + return dns_message; + + const parser_t *dns_header = sequence(bits(16, false), // ID bits(1, false), // QR bits(4, false), // opcode bits(1, false), // AA @@ -25,28 +60,44 @@ int main(int argc, char **argv) { uint16(), // ARCOUNT NULL); - const parser_t *dns_question = sequence(length_value(uint8(), uint8()), // QNAME - uint16(), // QTYPE - uint16(), // QCLASS + const parser_t *type = int_range(uint16(), 1, 16); + + const parser_t *qtype = choice(type, + int_range(uint16(), 252, 255), + NULL); + + const parser_t *class = int_range(uint16(), 1, 4); + + const parser_t *qclass = choice(class, + int_range(uint16(), 255, 255), + NULL); + + const parser_t *dns_question = sequence(sequence(many1(length_value(uint8(), + uint8())), + ch('\x00'), + NULL), // QNAME + qtype, // QTYPE + qclass, // QCLASS NULL); - const parser_t *letter = choice(range('a', 'z'), - range('A', 'Z'), + const parser_t *letter = choice(ch_range('a', 'z'), + ch_range('A', 'Z'), NULL); const parser_t *let_dig = choice(letter, - range('0', '9'), + ch_range('0', '9'), NULL); const parser_t *ldh_str = many1(choice(let_dig, ch('-'), NULL)); - const parser_t *label = sequence(letter, - optional(sequence(optional(ldh_str), - let_dig, - NULL)), - NULL); + const parser_t *label = attr_bool(sequence(letter, + optional(sequence(optional(ldh_str), + let_dig, + NULL)), + NULL), + validate_label); /** * You could write it like this ... @@ -62,7 +113,7 @@ int main(int argc, char **argv) { * ... but this is easier and equivalent */ - parser_t *subdomain = sepBy1(label, ch('.')); + const parser_t *subdomain = sepBy1(label, ch('.')); const parser_t *domain = choice(subdomain, ch(' '), @@ -72,14 +123,16 @@ int main(int argc, char **argv) { uint16(), // TYPE uint16(), // CLASS uint32(), // TTL - length_value(uint16(), uint8()) // RDLENGTH+RDATA + length_value(uint16(), uint8()), // RDLENGTH+RDATA NULL); - const parser_t *dns_message = attr_bool(sequence(dns_header, - dns_question, - many(dns_rr), - end_p(), - NULL), - validate_dns); + dns_message = (parser_t*)attr_bool(sequence(dns_header, + many(dns_question), + many(dns_rr), + end_p(), + NULL), + validate_dns); + + return dns_message; } diff --git a/src/hammer.c b/src/hammer.c index 1e34e53..19d2db2 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -19,10 +19,11 @@ #include "internal.h" #include "allocator.h" #include -#include -#include #include #include +#include +#include +#include #define a_new_(arena, typ, count) ((typ*)arena_malloc((arena), sizeof(typ)*(count))) #define a_new(typ, count) a_new_(state->arena, typ, count) @@ -321,11 +322,6 @@ const parser_t* ch(const uint8_t c) { return (const parser_t*)ret; } -typedef struct { - uint8_t lower; - uint8_t upper; -} range_t; - static parse_result_t* parse_whitespace(void* env, parse_state_t *state) { char c; input_stream_t bak; @@ -384,7 +380,7 @@ static parse_result_t* parse_charset(void *env, parse_state_t *state) { return NULL; } -const parser_t* range(const uint8_t lower, const uint8_t upper) { +const parser_t* ch_range(const uint8_t lower, const uint8_t upper) { parser_t *ret = g_new(parser_t, 1); charset cs = new_charset(); for (int i = 0; i < 256; i++) @@ -393,6 +389,74 @@ const parser_t* range(const uint8_t lower, const uint8_t upper) { return (const parser_t*)ret; } +typedef struct { + const parser_t *p; + int64_t lower; + int64_t upper; +} range_t; + +static parse_result_t* parse_int_range(void *env, parse_state_t *state) { + range_t *r_env = (range_t*)env; + parse_result_t *ret = do_parse(r_env->p, state); + if (!ret || !ret->ast) + return NULL; + switch(ret->ast->token_type) { + case TT_SINT: + if (r_env->lower <= ret->ast->sint && r_env->upper >= ret->ast->sint) + return ret; + else + return NULL; + case TT_UINT: + if ((uint64_t)r_env->lower <= ret->ast->uint && (uint64_t)r_env->upper >= ret->ast->uint) + return ret; + else + return NULL; + default: + return NULL; + } +} + +const parser_t* int_range(const parser_t *p, const int64_t lower, const int64_t upper) { + struct bits_env *b_env = p->env; + // p must be an integer parser, which means it's using parse_bits + assert_message(p->fn == parse_bits, "int_range requires an integer parser"); + // if it's a uint parser, it can't be uint64 + assert_message(!(b_env->signedp) ? (b_env->length < 64) : true, "int_range can't use a uint64 parser"); + // and regardless, the bounds need to fit in the parser in question + switch(b_env->length) { + case 32: + if (b_env->signedp) + assert_message(lower >= INT_MIN && upper <= INT_MAX, "bounds for 32-bit signed integer exceeded"); + else + assert_message(lower >= 0 && upper <= UINT_MAX, "bounds for 32-bit unsigned integer exceeded"); + break; + case 16: + if (b_env->signedp) + assert_message(lower >= SHRT_MIN && upper <= SHRT_MAX, "bounds for 16-bit signed integer exceeded"); + else + assert_message(lower >= 0 && upper <= USHRT_MAX, "bounds for 16-bit unsigned integer exceeded"); + break; + case 8: + if (b_env->signedp) + assert_message(lower >= SCHAR_MIN && upper <= SCHAR_MAX, "bounds for 8-bit signed integer exceeded"); + else + assert_message(lower >= 0 && upper <= UCHAR_MAX, "bounds for 8-bit unsigned integer exceeded"); + break; + default: + // how'd that happen? if we got here, this parser is broken. + return NULL; + } + + range_t *r_env = g_new(range_t, 1); + r_env->p = p; + r_env->lower = lower; + r_env->upper = upper; + parser_t *ret = g_new(parser_t, 1); + ret->fn = parse_int_range; + ret->env = (void*)r_env; + return ret; +} + const parser_t* not_in(const uint8_t *options, int count) { parser_t *ret = g_new(parser_t, 1); charset cs = new_charset(); @@ -528,13 +592,6 @@ typedef struct { } two_parsers_t; // return token size in bits... -size_t accumulate_size(parse_result_t *pr) { - if (pr) { - return pr->bit_length; - } // no else, if the AST is null then acc doesn't change - return 0; -} - size_t token_length(parse_result_t *pr) { if (pr) { return pr->bit_length; @@ -825,7 +882,7 @@ typedef struct { static parse_result_t* parse_attr_bool(void *env, parse_state_t *state) { attr_bool_t *a = (attr_bool_t*)env; parse_result_t *res = do_parse(a->p, state); - if (res) { + if (res && res->ast) { if (a->pred(res)) return res; else @@ -965,8 +1022,8 @@ static void test_ch(void) { g_check_parse_failed(ch_, "\xa3", 1); } -static void test_range(void) { - const parser_t *range_ = range('a', 'c'); +static void test_ch_range(void) { + const parser_t *range_ = ch_range('a', 'c'); g_check_parse_ok(range_, "b", 1, "u0x62"); g_check_parse_failed(range_, "d", 1); @@ -1030,6 +1087,13 @@ static void test_uint8(void) { } //@MARK_END +static void test_int_range(void) { + const parser_t *int_range_ = int_range(uint8(), 3, 10); + + g_check_parse_ok(int_range_, "\x05", 1, "u0x5"); + g_check_parse_failed(int_range_, "\xb", 1); +} + #if 0 static void test_float64(void) { const parser_t *float64_ = float64(); @@ -1148,7 +1212,7 @@ static void test_choice(void) { static void test_butnot(void) { const parser_t *butnot_1 = butnot(ch('a'), token((const uint8_t*)"ab", 2)); - const parser_t *butnot_2 = butnot(range('0', '9'), ch('6')); + const parser_t *butnot_2 = butnot(ch_range('0', '9'), ch('6')); g_check_parse_ok(butnot_1, "a", 1, "u0x61"); g_check_parse_failed(butnot_1, "ab", 2); @@ -1164,7 +1228,7 @@ static void test_difference(void) { } static void test_xor(void) { - const parser_t *xor_ = xor(range('0', '6'), range('5', '9')); + const parser_t *xor_ = xor(ch_range('0', '6'), ch_range('5', '9')); g_check_parse_ok(xor_, "0", 1, "u0x30"); g_check_parse_ok(xor_, "9", 1, "u0x39"); @@ -1264,7 +1328,7 @@ static void test_not(void) { void register_parser_tests(void) { g_test_add_func("/core/parser/token", test_token); g_test_add_func("/core/parser/ch", test_ch); - g_test_add_func("/core/parser/range", test_range); + g_test_add_func("/core/parser/ch_range", test_ch_range); g_test_add_func("/core/parser/int64", test_int64); g_test_add_func("/core/parser/int32", test_int32); g_test_add_func("/core/parser/int16", test_int16); @@ -1273,6 +1337,7 @@ void register_parser_tests(void) { g_test_add_func("/core/parser/uint32", test_uint32); g_test_add_func("/core/parser/uint16", test_uint16); g_test_add_func("/core/parser/uint8", test_uint8); + g_test_add_func("/core/parser/int_range", test_int_range); #if 0 g_test_add_func("/core/parser/float64", test_float64); g_test_add_func("/core/parser/float32", test_float32); diff --git a/src/hammer.h b/src/hammer.h index 85b2da4..b3bd412 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -41,11 +41,13 @@ typedef enum token_type { TT_MAX } token_type_t; +typedef struct parsed_token parsed_token_t; + typedef struct counted_array { size_t capacity; size_t used; arena_t arena; - void **elements; + parsed_token_t **elements; } counted_array_t; typedef struct parsed_token { @@ -130,7 +132,14 @@ const parser_t* ch(const uint8_t c); * * Result token type: TT_UINT */ -const parser_t* range(const uint8_t lower, const uint8_t upper); +const parser_t* ch_range(const uint8_t lower, const uint8_t upper); + +/** + * Given an integer parser, p, and two integer bounds, lower and upper, + * returns a parser that parses an integral value within the range + * [lower, upper] (inclusive). + */ +const parser_t* int_range(const parser_t *p, const int64_t lower, const int64_t upper); /** * Returns a parser that parses the specified number of bits. sign == @@ -360,8 +369,13 @@ const parser_t* length_value(const parser_t* length, const parser_t* value); * This parser attaches a predicate function, which returns true or * false, to a parser. The function is evaluated over the parser's * result. + * * The parse only succeeds if the attribute function returns true. * + * attr_bool will check whether p's result exists and whether p's + * result AST exists; you do not need to check for this in your + * predicate function. + * * Result token type: p's result type if pred succeeded, NULL otherwise. */ const parser_t* attr_bool(const parser_t* p, predicate_t pred);