From a68b93c61873dcee0e67bf895a38c09760b0ad4e Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Sat, 2 Jun 2012 03:35:25 +0200 Subject: [PATCH 001/125] adding pesco's base64 example --- .gitignore | 1 + examples/Makefile | 14 +++++++++--- examples/base64.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 examples/base64.c diff --git a/.gitignore b/.gitignore index af536ad..2370b3b 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ src/test_suite lib/hush examples/dns +examples/base64 TAGS diff --git a/examples/Makefile b/examples/Makefile index dc4a0f3..d2c76cf 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -1,13 +1,15 @@ OUTPUTS := dns.o \ - dns + dns \ + base64.o \ + base64 TOPLEVEL := ../ include ../common.mk -all: dns +all: dns base64 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) dns: dns.o rr.o dns_common.o @@ -17,4 +19,10 @@ dns.o: ../src/hammer.h dns_common.h rr.o: ../src/hammer.h rr.h dns_common.h -dns_common.o: ../src/hammer.h dns_common.h \ No newline at end of file +dns_common.o: ../src/hammer.h dns_common.h + +base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64: base64.o + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) + +base64.o: ../src/hammer.h diff --git a/examples/base64.c b/examples/base64.c new file mode 100644 index 0000000..beb2484 --- /dev/null +++ b/examples/base64.c @@ -0,0 +1,54 @@ +#include "../src/hammer.h" + +const HParser* document = NULL; + +void init_parser(void) +{ + // CORE + const HParser *digit = h_ch_range(0x30, 0x39); + const HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL); + + // AUX. + const HParser *plus = h_ch('+'); + const HParser *slash = h_ch('/'); + const HParser *equals = h_ch('='); + + const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); + const HParser *bsfdig_4bit = h_choice( + h_ch('A'), h_ch('E'), h_ch('I'), h_ch('M'), h_ch('Q'), h_ch('U'), + h_ch('Y'), h_ch('c'), h_ch('g'), h_ch('k'), h_ch('o'), h_ch('s'), + h_ch('w'), h_ch('0'), h_ch('4'), h_ch('8'), NULL); + const HParser *bsfdig_2bit = h_choice(h_ch('A'), h_ch('Q'), h_ch('g'), h_ch('w'), NULL); + const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); + const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); + const HParser *base64 = h_choice(base64_2, base64_1, NULL); + // why does this parse "A=="?! + // why does this parse "aaA=" but not "aA=="?! + + document = base64; +} + + +#include + +int main(int argc, char **argv) +{ + uint8_t input[102400]; + size_t inputsize; + const HParseResult *result; + + init_parser(); + + inputsize = fread(input, 1, sizeof(input), stdin); + fprintf(stderr, "inputsize=%lu\ninput=", inputsize); + fwrite(input, 1, inputsize, stderr); + result = h_parse(document, input, inputsize); + + if(result) { + fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8); + h_pprint(stdout, result->ast, 0, 0); + return 0; + } else { + return 1; + } +} From 69ba1917837c88423e8538f7b0b6e4161773fe83 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Mon, 8 Oct 2012 17:11:47 +0200 Subject: [PATCH 002/125] Added singly-linked list --- src/allocator.c | 6 +++++- src/allocator.h | 1 + src/datastructures.c | 33 +++++++++++++++++++++++++++++++++ src/internal.h | 15 +++++++++++++++ 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/allocator.c b/src/allocator.c index f53b84b..59c6e50 100644 --- a/src/allocator.c +++ b/src/allocator.c @@ -86,7 +86,11 @@ void* h_arena_malloc(HArena *arena, size_t size) { return link->rest; } } - + +void h_arena_free(HArena *arena, void* ptr) { + // To be used later... +} + void h_delete_arena(HArena *arena) { struct arena_link *link = arena->head; while (link) { diff --git a/src/allocator.h b/src/allocator.h index 3ec9ae9..3af22d0 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -23,6 +23,7 @@ typedef struct HArena_ HArena ; // hidden implementation HArena *h_new_arena(size_t block_size); // pass 0 for default... void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) )); +void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers. void h_delete_arena(HArena *arena); typedef struct { diff --git a/src/datastructures.c b/src/datastructures.c index 004f7e7..e7552e5 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -30,3 +30,36 @@ void h_carray_append(HCountedArray *array, void* item) { } array->elements[array->used++] = item; } + +// HSlist + +HSlist* h_slist_new(HArena *arena) { + HSlist *ret = h_arena_malloc(arena, sizeof(HSlist)); + ret->head = NULL; + ret->arena = arena; + return ret; +} + +void* h_slist_pop(HSlist *slist) { + HSlistNode *head = slist->head; + if (!head) + return NULL; + void* ret = head->elem; + slist->head = head->next; + h_arena_free(slist->arena, head); + return ret; +} + +void h_slist_push(HSlist *slist, void* item) { + HSlistNode *hnode = h_arena_malloc(slist->arena, sizeof(HSlistNode)); + hnode->elem = item; + hnode->next = slist->head; + // write memory barrier here. + slist->head = hnode; +} + +void h_slist_free(HSlist *slist) { + while (slist->head != NULL) + h_slist_pop(slist); + h_arena_free(slist->arena, slist); +} diff --git a/src/internal.h b/src/internal.h index cc35a8b..b59f824 100644 --- a/src/internal.h +++ b/src/internal.h @@ -154,6 +154,21 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size); HCountedArray *h_carray_new(HArena * arena); void h_carray_append(HCountedArray *array, void* item); +typedef struct HSlistNode_ { + void* elem; + struct HSlistNode_ *next; +} HSlistNode; + +typedef struct HSlist_ { + HSlistNode *head; + struct HArena_ *arena; +} HSlist; + + +HSlist* h_slist_new(HArena *arena); +void* h_slist_pop(HSlist *slist); +void h_slist_push(HSlist *slist, void* item); +void h_slist_free(HSlist *slist); #if 0 #include From ef6af551d1972edcfe53329841ebe313d1eecb2a Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Mon, 8 Oct 2012 18:16:18 +0200 Subject: [PATCH 003/125] Replaced GQueue and GSlist with HSlist. --- src/datastructures.c | 36 +++++++++++++++++++++++++++++++++++- src/hammer.c | 20 ++++++++++---------- src/internal.h | 30 ++++++++++++++++-------------- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/src/datastructures.c b/src/datastructures.c index e7552e5..5758f5d 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -15,6 +15,7 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size) { ret->elements = h_arena_malloc(arena, sizeof(void*) * size); return ret; } + HCountedArray *h_carray_new(HArena * arena) { return h_carray_new_sized(arena, 4); } @@ -32,7 +33,6 @@ void h_carray_append(HCountedArray *array, void* item) { } // HSlist - HSlist* h_slist_new(HArena *arena) { HSlist *ret = h_arena_malloc(arena, sizeof(HSlist)); ret->head = NULL; @@ -58,8 +58,42 @@ void h_slist_push(HSlist *slist, void* item) { slist->head = hnode; } +bool h_slist_find(HSlist *slist, const void* item) { + assert (item != NULL); + HSlistNode *head = slist->head; + while (head != NULL) { + if (head->elem == item) + return true; + head = head->next; + } + return false; +} + +HSlist* h_slist_remove_all(HSlist *slist, const void* item) { + assert (item != NULL); + HSlistNode *node = slist->head; + HSlistNode *prev = NULL; + while (node != NULL) { + if (node->elem == item) { + HSlistNode *next = node->next; + if (prev) + prev->next = next; + else + slist->head = next; + // FIXME free the removed node! this leaks. + node = next; + } + else { + prev = node; + node = prev->next; + } + } + return slist; +} + void h_slist_free(HSlist *slist) { while (slist->head != NULL) h_slist_pop(slist); h_arena_free(slist->arena, slist); } + diff --git a/src/hammer.c b/src/hammer.c index d1a3334..0946044 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -79,7 +79,7 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { if (!head) { // No heads found return cached; } else { // Some heads found - if (!cached && head->head_parser != k->parser && !g_slist_find(head->involved_set, k->parser)) { + if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) { // Nothing in the cache, and the key parser is not involved HParseResult *tmp = a_new(HParseResult, 1); tmp->ast = NULL; tmp->arena = state->arena; @@ -87,9 +87,9 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp); return ret; } - if (g_slist_find(head->eval_set, k->parser)) { + if (h_slist_find(head->eval_set, k->parser)) { // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. - head->eval_set = g_slist_remove_all(head->eval_set, k->parser); + head->eval_set = h_slist_remove_all(head->eval_set, k->parser); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); // we know that cached has an entry here, modify it if (!cached) @@ -112,11 +112,11 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL; rec_detect->head = some; } - size_t i = 0; - HLeftRec *lr = g_queue_peek_nth(state->lr_stack, i); + assert(state->lr_stack->head != NULL); + HLeftRec *lr = state->lr_stack->head->elem; while (lr && lr->rule != p) { lr->head = rec_detect->head; - lr->head->involved_set = g_slist_prepend(lr->head->involved_set, (gpointer)lr->rule); + h_slist_push(lr->head->involved_set, (gpointer)lr->rule); } } @@ -190,7 +190,7 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { // It doesn't exist, so create a dummy result to cache HLeftRec *base = a_new(HLeftRec, 1); base->seed = NULL; base->rule = parser; base->head = NULL; - g_queue_push_head(state->lr_stack, base); + h_slist_push(state->lr_stack, base); // cache it HParserCacheValue *dummy = a_new(HParserCacheValue, 1); dummy->value_type = PC_LEFT; dummy->left = base; @@ -198,7 +198,7 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { // parse the input HParseResult *tmp_res = perform_lowlevel_parse(state, parser); // the base variable has passed equality tests with the cache - g_queue_pop_head(state->lr_stack); + h_slist_pop(state->lr_stack); // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one if (NULL == base->head) { HParserCacheValue *right = a_new(HParserCacheValue, 1); @@ -250,12 +250,12 @@ HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length parse_state->input_stream.overrun = 0; parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN; parse_state->input_stream.length = length; - parse_state->lr_stack = g_queue_new(); + parse_state->lr_stack = h_slist_new(arena); parse_state->recursion_heads = g_hash_table_new(cache_key_hash, cache_key_equal); parse_state->arena = arena; HParseResult *res = h_do_parse(parser, parse_state); - g_queue_free(parse_state->lr_stack); + h_slist_free(parse_state->lr_stack); g_hash_table_destroy(parse_state->recursion_heads); // tear down the parse state g_hash_table_destroy(parse_state->cache); diff --git a/src/internal.h b/src/internal.h index b59f824..bf2d661 100644 --- a/src/internal.h +++ b/src/internal.h @@ -42,6 +42,17 @@ typedef struct HInputStream_ { char overrun; } HInputStream; +typedef struct HSlistNode_ { + void* elem; + struct HSlistNode_ *next; +} HSlistNode; + +typedef struct HSlist_ { + HSlistNode *head; + struct HArena_ *arena; +} HSlist; + + /* The state of the parser. * * Members: @@ -57,7 +68,7 @@ struct HParseState_ { GHashTable *cache; HInputStream input_stream; HArena * arena; - GQueue *lr_stack; + HSlist *lr_stack; GHashTable *recursion_heads; }; @@ -90,8 +101,8 @@ typedef enum HParserCacheValueType_ { */ typedef struct HRecursionHead_ { const HParser *head_parser; - GSList *involved_set; - GSList *eval_set; + HSlist *involved_set; + HSlist *eval_set; } HRecursionHead; @@ -154,20 +165,11 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size); HCountedArray *h_carray_new(HArena * arena); void h_carray_append(HCountedArray *array, void* item); -typedef struct HSlistNode_ { - void* elem; - struct HSlistNode_ *next; -} HSlistNode; - -typedef struct HSlist_ { - HSlistNode *head; - struct HArena_ *arena; -} HSlist; - - HSlist* h_slist_new(HArena *arena); void* h_slist_pop(HSlist *slist); void h_slist_push(HSlist *slist, void* item); +bool h_slist_find(HSlist *slist, const void* item); +HSlist* h_slist_remove_all(HSlist *slist, const void* item); void h_slist_free(HSlist *slist); #if 0 From 12035d66a1dab59ae3b5178674f2481f67ccfc4a Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Mon, 8 Oct 2012 19:20:36 +0200 Subject: [PATCH 004/125] Added hashtable implementation --- src/datastructures.c | 114 +++++++++++++++++++++++++++++++++++++++++++ src/internal.h | 27 ++++++++++ 2 files changed, 141 insertions(+) diff --git a/src/datastructures.c b/src/datastructures.c index e7552e5..00db744 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -63,3 +63,117 @@ void h_slist_free(HSlist *slist) { h_slist_pop(slist); h_arena_free(slist->arena, slist); } + +//////////////////////////////////////////////////////////////// + +HHashTable* h_hashtable_new(HArena *arena) { + HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable*)); + ht->hashFunc = hashFunc; + ht->equalFunc = equalFunc; + ht->capacity = 64; // to start; should be tuned later... + ht->used = 0; + ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity); + memset(ht->contents, sizeof(HHashTableEntry) * ht->capacity); + return ht; +} + +void* h_hashtable_get(HHashTable* ht, void* key) { + HHashValue hashval = ht->hashFunc(key); +#ifdef CONSISTENCY_CHECK + assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 +#endif + + for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)]; + hte != NULL; + hte = hte->next) { + if (hte->hashval != hashval) + continue; + if (ht->equalFunc(key, hte->key)) + return hte->value; + } + return NULL; +} + +void h_hashtable_put(HHashTable* ht, void* key, void* value) { + // # Start with a rebalancing + h_hashtable_ensure_capacity(ht, ht->used + 1); + + HHashValue hashval = ht->hashFunc(key); +#ifdef CONSISTENCY_CHECK + assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 +#endif + + hte = &ht->contents[hashval & (ht->capacity - 1)]; + if (hte->key != NULL) { + do { + if (hte->hashval == hashval && ht->equalFunc(key, hte->key)) + goto insert_here; + } while (hte->next); + // Add a new link... + assert (hte->next == NULL); + hte->next = h_arena_malloc(ht->arena, sizeof(HHashTableEntry)); + hte = hte->next; + hte->next = NULL; + } + + insert_here: + hte->key = key; + hte->value = value; + hte->hashval = hashval; +} + +int h_hashtable_present(HHashTable* ht, void* key) { + HHashValue hashval = ht->hashFunc(key); +#ifdef CONSISTENCY_CHECK + assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 +#endif + + for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)]; + hte != NULL; + hte = hte->next) { + if (hte->hashval != hashval) + continue; + if (ht->equalFunc(key, hte->key)) + return true; + } + return false; +} +void h_hashtable_del(HHashTable* ht, void* key) { + HHashValue hashval = ht->hashFunc(key); +#ifdef CONSISTENCY_CHECK + assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 +#endif + + for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)]; + hte != NULL; + hte = hte->next) { + if (hte->hashval != hashval) + continue; + if (ht->equalFunc(key, hte->key)) { + // FIXME: Leaks keys and values. + HHashTableEntry* hten = hte->next; + if (hten != NULL) { + *hte = *hten; + h_arena_free(ht->arena, hten); + } else { + hte->key = hte->value = NULL; + hte->hashval = 0; + } + return; + } + } +} +void h_hashtable_free(HHashTable* ht) { + for (i = 0; i < ht->capacity; i++) { + HHashTableEntry *hten, *hte = &ht->contents[i]; + // FIXME: Free key and value + hte = hte->next; + while (hte != NULL) { + // FIXME: leaks keys and values. + hten = hte->next; + h_arena_free(ht->arena, hte); + hte = hten; + } + } + h_arena_free(ht->arena, ht->contents); +} diff --git a/src/internal.h b/src/internal.h index b59f824..cde861f 100644 --- a/src/internal.h +++ b/src/internal.h @@ -170,6 +170,33 @@ void* h_slist_pop(HSlist *slist); void h_slist_push(HSlist *slist, void* item); void h_slist_free(HSlist *slist); +typedef unsigned int HHashValue; +typedef HHashValue (*HHashFunc)(const void* key); +typedef bool (*HEqualFunc)(const void* key1, const void* key2); + +typedef struct HHashTableEntry_ { + struct HHashTableEntry_ *next; + void* key; + void* value; + HHashValue hashval; +} HHashTableEntry; + +typedef struct HHashTable_ { + HHashTableEntry *contents; + HHashFunc hashFunc; + HEqualFunc equalFunc; + int capacity; + int used; +} HHashTable; + +HHashTable* h_hashtable_new(HArena *arena, ); +void* h_hashtable_get(HHashTable* ht, void* key); +void h_hashtable_put(HHashTable* ht, void* key, void* value); +int h_hashtable_present(HHashTable* ht, void* key); +void h_hashtable_del(HHashTable* ht, void* key); +void h_hashtable_free(HHashTable* ht); + + #if 0 #include #define arena_malloc(a, s) malloc(s) From b44d00ed335239dc6c919dcae81decc2a7666ccb Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Mon, 8 Oct 2012 21:12:56 +0200 Subject: [PATCH 005/125] Replaced GHashTable with HHashTable ... which has some problem causing it to segfault. --- src/datastructures.c | 11 ++++++----- src/hammer.c | 34 +++++++++++++++++----------------- src/internal.h | 44 ++++++++++++++++++++++---------------------- 3 files changed, 45 insertions(+), 44 deletions(-) diff --git a/src/datastructures.c b/src/datastructures.c index 0cd5d0e..756ba8f 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -3,6 +3,7 @@ #include "allocator.h" #include #include +#include // {{{ counted arrays @@ -97,14 +98,14 @@ void h_slist_free(HSlist *slist) { h_arena_free(slist->arena, slist); } -HHashTable* h_hashtable_new(HArena *arena) { +HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc) { HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable*)); ht->hashFunc = hashFunc; ht->equalFunc = equalFunc; ht->capacity = 64; // to start; should be tuned later... ht->used = 0; ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity); - memset(ht->contents, sizeof(HHashTableEntry) * ht->capacity); + memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity); return ht; } @@ -127,14 +128,14 @@ void* h_hashtable_get(HHashTable* ht, void* key) { void h_hashtable_put(HHashTable* ht, void* key, void* value) { // # Start with a rebalancing - h_hashtable_ensure_capacity(ht, ht->used + 1); + //h_hashtable_ensure_capacity(ht, ht->used + 1); HHashValue hashval = ht->hashFunc(key); #ifdef CONSISTENCY_CHECK assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 #endif - hte = &ht->contents[hashval & (ht->capacity - 1)]; + HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)]; if (hte->key != NULL) { do { if (hte->hashval == hashval && ht->equalFunc(key, hte->key)) @@ -195,7 +196,7 @@ void h_hashtable_del(HHashTable* ht, void* key) { } } void h_hashtable_free(HHashTable* ht) { - for (i = 0; i < ht->capacity; i++) { + for (size_t i = 0; i < ht->capacity; i++) { HHashTableEntry *hten, *hte = &ht->contents[i]; // FIXME: Free key and value hte = hte->next; diff --git a/src/hammer.c b/src/hammer.c index 0946044..3eab2d7 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -74,8 +74,8 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa } HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { - HParserCacheValue *cached = g_hash_table_lookup(state->cache, k); - HRecursionHead *head = g_hash_table_lookup(state->recursion_heads, k); + HParserCacheValue *cached = h_hashtable_get(state->cache, k); + HRecursionHead *head = h_hashtable_get(state->recursion_heads, k); if (!head) { // No heads found return cached; } else { // Some heads found @@ -126,8 +126,8 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) { // Store the head into the recursion_heads - g_hash_table_replace(state->recursion_heads, k, head); - HParserCacheValue *old_cached = g_hash_table_lookup(state->cache, k); + h_hashtable_put(state->recursion_heads, k, head); + HParserCacheValue *old_cached = h_hashtable_get(state->cache, k); if (!old_cached || PC_LEFT == old_cached->value_type) errx(1, "impossible match"); HParseResult *old_res = old_cached->right->result; @@ -141,12 +141,12 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) (old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) { HParserCacheValue *v = a_new(HParserCacheValue, 1); v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res); - g_hash_table_replace(state->cache, k, v); + h_hashtable_put(state->cache, k, v); return grow(k, state, head); } else { // we're done with growing, we can remove data from the recursion head - g_hash_table_remove(state->recursion_heads, k); - HParserCacheValue *cached = g_hash_table_lookup(state->cache, k); + h_hashtable_del(state->recursion_heads, k); + HParserCacheValue *cached = h_hashtable_get(state->cache, k); if (cached && PC_RIGHT == cached->value_type) { return cached->right->result; } else { @@ -154,7 +154,7 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) } } } else { - g_hash_table_remove(state->recursion_heads, k); + h_hashtable_del(state->recursion_heads, k); return old_res; } } @@ -169,7 +169,7 @@ HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growab // update cache HParserCacheValue *v = a_new(HParserCacheValue, 1); v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed); - g_hash_table_replace(state->cache, k, v); + h_hashtable_put(state->cache, k, v); if (!growable->seed) return NULL; else @@ -194,7 +194,7 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { // cache it HParserCacheValue *dummy = a_new(HParserCacheValue, 1); dummy->value_type = PC_LEFT; dummy->left = base; - g_hash_table_replace(state->cache, key, dummy); + h_hashtable_put(state->cache, key, dummy); // parse the input HParseResult *tmp_res = perform_lowlevel_parse(state, parser); // the base variable has passed equality tests with the cache @@ -203,7 +203,7 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { if (NULL == base->head) { HParserCacheValue *right = a_new(HParserCacheValue, 1); right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res); - g_hash_table_replace(state->cache, key, right); + h_hashtable_put(state->cache, key, right); return tmp_res; } else { base->seed = tmp_res; @@ -242,8 +242,8 @@ HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length // Set up a parse state... HArena * arena = h_new_arena(0); HParseState *parse_state = a_new_(arena, HParseState, 1); - parse_state->cache = g_hash_table_new(cache_key_hash, // hash_func - cache_key_equal);// key_equal_func + parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func + cache_key_hash); // hash_func parse_state->input_stream.input = input; parse_state->input_stream.index = 0; parse_state->input_stream.bit_offset = 8; // bit big endian @@ -251,14 +251,14 @@ HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN; parse_state->input_stream.length = length; parse_state->lr_stack = h_slist_new(arena); - parse_state->recursion_heads = g_hash_table_new(cache_key_hash, - cache_key_equal); + parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal, + cache_key_hash); parse_state->arena = arena; HParseResult *res = h_do_parse(parser, parse_state); h_slist_free(parse_state->lr_stack); - g_hash_table_destroy(parse_state->recursion_heads); + h_hashtable_free(parse_state->recursion_heads); // tear down the parse state - g_hash_table_destroy(parse_state->cache); + h_hashtable_free(parse_state->cache); if (!res) h_delete_arena(parse_state->arena); diff --git a/src/internal.h b/src/internal.h index 1636c86..6c68d8b 100644 --- a/src/internal.h +++ b/src/internal.h @@ -52,6 +52,25 @@ typedef struct HSlist_ { struct HArena_ *arena; } HSlist; +typedef unsigned int HHashValue; +typedef HHashValue (*HHashFunc)(const void* key); +typedef bool (*HEqualFunc)(const void* key1, const void* key2); + +typedef struct HHashTableEntry_ { + struct HHashTableEntry_ *next; + void* key; + void* value; + HHashValue hashval; +} HHashTableEntry; + +typedef struct HHashTable_ { + HHashTableEntry *contents; + HHashFunc hashFunc; + HEqualFunc equalFunc; + size_t capacity; + size_t used; + HArena *arena; +} HHashTable; /* The state of the parser. * @@ -65,11 +84,11 @@ typedef struct HSlist_ { */ struct HParseState_ { - GHashTable *cache; + HHashTable *cache; HInputStream input_stream; HArena * arena; HSlist *lr_stack; - GHashTable *recursion_heads; + HHashTable *recursion_heads; }; /* The (location, parser) tuple used to key the cache. @@ -172,26 +191,7 @@ bool h_slist_find(HSlist *slist, const void* item); HSlist* h_slist_remove_all(HSlist *slist, const void* item); void h_slist_free(HSlist *slist); -typedef unsigned int HHashValue; -typedef HHashValue (*HHashFunc)(const void* key); -typedef bool (*HEqualFunc)(const void* key1, const void* key2); - -typedef struct HHashTableEntry_ { - struct HHashTableEntry_ *next; - void* key; - void* value; - HHashValue hashval; -} HHashTableEntry; - -typedef struct HHashTable_ { - HHashTableEntry *contents; - HHashFunc hashFunc; - HEqualFunc equalFunc; - int capacity; - int used; -} HHashTable; - -HHashTable* h_hashtable_new(HArena *arena, ); +HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc); void* h_hashtable_get(HHashTable* ht, void* key); void h_hashtable_put(HHashTable* ht, void* key, void* value); int h_hashtable_present(HHashTable* ht, void* key); From 6101b8c43a68c2e2210806e8d02eef2cd2c66ec0 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Mon, 8 Oct 2012 22:06:33 +0200 Subject: [PATCH 006/125] Hash table now works. --- src/datastructures.c | 22 +++++++++++++++++----- src/internal.h | 2 +- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/datastructures.c b/src/datastructures.c index 756ba8f..e94532e 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -99,13 +99,20 @@ void h_slist_free(HSlist *slist) { } HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc) { - HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable*)); + HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable)); ht->hashFunc = hashFunc; ht->equalFunc = equalFunc; ht->capacity = 64; // to start; should be tuned later... ht->used = 0; + ht->arena = arena; ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity); - memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity); + for (size_t i = 0; i < ht->capacity; i++) { + ht->contents[i].key = NULL; + ht->contents[i].value = NULL; + ht->contents[i].next = NULL; + ht->contents[i].hashval = 0; + } + //memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity); return ht; } @@ -115,7 +122,8 @@ void* h_hashtable_get(HHashTable* ht, void* key) { assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 #endif - for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)]; + HHashTableEntry *hte = NULL; + for (hte = &ht->contents[hashval & (ht->capacity - 1)]; hte != NULL; hte = hte->next) { if (hte->hashval != hashval) @@ -140,13 +148,17 @@ void h_hashtable_put(HHashTable* ht, void* key, void* value) { do { if (hte->hashval == hashval && ht->equalFunc(key, hte->key)) goto insert_here; - } while (hte->next); + if (hte->next != NULL) + hte = hte->next; + } while (hte->next != NULL); // Add a new link... assert (hte->next == NULL); hte->next = h_arena_malloc(ht->arena, sizeof(HHashTableEntry)); hte = hte->next; hte->next = NULL; - } + ht->used++; + } else + ht->used++; insert_here: hte->key = key; diff --git a/src/internal.h b/src/internal.h index 6c68d8b..e8a3f91 100644 --- a/src/internal.h +++ b/src/internal.h @@ -201,7 +201,7 @@ void h_hashtable_free(HHashTable* ht); #if 0 #include -#define arena_malloc(a, s) malloc(s) +#define h_arena_malloc(a, s) malloc(s) #endif #endif // #ifndef HAMMER_INTERNAL__H From d5729efa1fd665ad884c1f31c787919f56ae8a0b Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Wed, 10 Oct 2012 15:58:03 +0200 Subject: [PATCH 007/125] Replaced glib memory allocation --- HACKING | 12 ++++ src/Makefile | 4 +- src/allocator.c | 26 +++++--- src/allocator.h | 8 ++- src/bitwriter.c | 20 +++--- src/hammer.c | 7 +- src/hammer.h | 123 ++++++++++++++++++++++-------------- src/internal.h | 38 +++++------ src/parsers/action.c | 10 ++- src/parsers/and.c | 6 +- src/parsers/attr_bool.c | 10 ++- src/parsers/bits.c | 14 ++-- src/parsers/butnot.c | 9 ++- src/parsers/ch.c | 8 ++- src/parsers/charset.c | 44 +++++++++++-- src/parsers/choice.c | 30 +++++++-- src/parsers/difference.c | 9 ++- src/parsers/end.c | 11 +++- src/parsers/epsilon.c | 3 + src/parsers/ignore.c | 5 +- src/parsers/ignoreseq.c | 28 +++++--- src/parsers/indirect.c | 5 +- src/parsers/int_range.c | 7 +- src/parsers/many.c | 48 +++++++++----- src/parsers/not.c | 5 +- src/parsers/nothing.c | 7 +- src/parsers/optional.c | 5 +- src/parsers/sequence.c | 32 ++++++++-- src/parsers/token.c | 9 ++- src/parsers/unimplemented.c | 3 + src/parsers/whitespace.c | 5 +- src/parsers/xor.c | 9 ++- src/system_allocator.c | 20 ++++++ 33 files changed, 414 insertions(+), 166 deletions(-) create mode 100644 HACKING create mode 100644 src/system_allocator.c diff --git a/HACKING b/HACKING new file mode 100644 index 0000000..869c327 --- /dev/null +++ b/HACKING @@ -0,0 +1,12 @@ +Privileged arguments +==================== + +As a matter of convenience, there are several identifiers that +internal macros use. Chances are that if you use these names for other +things, you're gonna have a bad time. + +In particular, these names, and the macros that use them, are: +- state: + Used by a_new and company. Should be an HParseState* +- mm__: + Used by h_new and h_free. Should be an HAllocator* diff --git a/src/Makefile b/src/Makefile index faae8a7..e4d70d1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -32,6 +32,8 @@ OUTPUTS := bitreader.o \ pprint.o \ allocator.o \ datastructures.o \ + system_allocator.o \ + test_suite.o \ test_suite \ $(PARSERS:%=parsers/%.o) @@ -45,7 +47,7 @@ all: libhammer.a test_suite test_suite: test_suite.o libhammer.a $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitwriter.o \ +libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitwriter.o system_allocator.o \ $(PARSERS:%=parsers/%.o) bitreader.o: test_suite.h diff --git a/src/allocator.c b/src/allocator.c index 59c6e50..e345c87 100644 --- a/src/allocator.c +++ b/src/allocator.c @@ -15,11 +15,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include +#include #include #include -#include "allocator.h" +#include "hammer.h" +#include "internal.h" + struct arena_link { // TODO: @@ -36,22 +38,25 @@ struct arena_link { struct HArena_ { struct arena_link *head; + struct HAllocator_ *mm__; size_t block_size; size_t used; size_t wasted; }; -HArena *h_new_arena(size_t block_size) { +HArena *h_new_arena(HAllocator* mm__, size_t block_size) { if (block_size == 0) block_size = 4096; - struct HArena_ *ret = g_new(struct HArena_, 1); - struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + block_size); + struct HArena_ *ret = h_new(struct HArena_, 1); + struct arena_link *link = (struct arena_link*)mm__->alloc(mm__, sizeof(struct arena_link) + block_size); + memset(link, 0, sizeof(struct arena_link) + block_size); link->free = block_size; link->used = 0; link->next = NULL; ret->head = link; ret->block_size = block_size; ret->used = 0; + ret->mm__ = mm__; ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size; return ret; } @@ -70,13 +75,15 @@ void* h_arena_malloc(HArena *arena, size_t size) { // This involves some annoying casting... arena->used += size; arena->wasted += sizeof(struct arena_link*); - void* link = g_malloc(size + sizeof(struct arena_link*)); + void* link = arena->mm__->alloc(arena->mm__, size + sizeof(struct arena_link*)); + memset(link, 0, size + sizeof(struct arena_link*)); *(struct arena_link**)link = arena->head->next; arena->head->next = (struct arena_link*)link; return (void*)(((uint8_t*)link) + sizeof(struct arena_link*)); } else { // we just need to allocate an ordinary new block. - struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + arena->block_size); + struct arena_link *link = (struct arena_link*)arena->mm__->alloc(arena->mm__, sizeof(struct arena_link) + arena->block_size); + memset(link, 0, sizeof(struct arena_link) + arena->block_size); link->free = arena->block_size - size; link->used = size; link->next = arena->head; @@ -92,16 +99,17 @@ void h_arena_free(HArena *arena, void* ptr) { } void h_delete_arena(HArena *arena) { + HAllocator *mm__ = arena->mm__; struct arena_link *link = arena->head; while (link) { struct arena_link *next = link->next; // Even in the case of a special block, without the full arena // header, this is correct, because the next pointer is the first // in the structure. - g_free(link); + h_free(link); link = next; } - g_free(arena); + h_free(arena); } void h_allocator_stats(HArena *arena, HArenaStats *stats) { diff --git a/src/allocator.h b/src/allocator.h index 3af22d0..e83cae7 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -19,9 +19,15 @@ #define HAMMER_ALLOCATOR__H__ #include +typedef struct HAllocator_ { + void* (*alloc)(struct HAllocator_* allocator, size_t size); + void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size); + void (*free)(struct HAllocator_* allocator, void* ptr); +} HAllocator; + typedef struct HArena_ HArena ; // hidden implementation -HArena *h_new_arena(size_t block_size); // pass 0 for default... +HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default... void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) )); void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers. void h_delete_arena(HArena *arena); diff --git a/src/bitwriter.c b/src/bitwriter.c index 9a72e22..e716f09 100644 --- a/src/bitwriter.c +++ b/src/bitwriter.c @@ -7,6 +7,7 @@ // This file provides the logical inverse of bitreader.c struct HBitWriter_ { uint8_t* buf; + HAllocator *mm__; size_t index; size_t capacity; char bit_offset; // unlike in bit_reader, this is always the number @@ -16,10 +17,12 @@ struct HBitWriter_ { }; // h_bit_writer_ -HBitWriter *h_bit_writer_new() { - HBitWriter *writer = g_new0(HBitWriter, 1); - writer->buf = g_malloc0(writer->capacity = 8); - +HBitWriter *h_bit_writer_new(HAllocator* mm__) { + HBitWriter *writer = h_new(HBitWriter, 1); + memset(writer, 0, sizeof(*writer)); + writer->buf = mm__->alloc(mm__, writer->capacity = 8); + memset(writer->buf, 0, writer->capacity); + writer->mm__ = mm__; writer->flags = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN; return writer; @@ -41,7 +44,7 @@ static void h_bit_writer_reserve(HBitWriter* w, size_t nbits) { int nbytes = (nbits + 7) / 8 + ((w->bit_offset != 0) ? 1 : 0); size_t old_capacity = w->capacity; while (w->index + nbytes >= w->capacity) { - w->buf = g_realloc(w->buf, w->capacity *= 2); + w->buf = w->mm__->realloc(w->mm__, w->buf, w->capacity *= 2); } if (old_capacity != w->capacity) @@ -100,8 +103,9 @@ const uint8_t *h_bit_writer_get_buffer(HBitWriter* w, size_t *len) { } void h_bit_writer_free(HBitWriter* w) { - g_free(w->buf); - g_free(w); + HAllocator *mm__ = w->mm__; + h_free(w->buf); + h_free(w); } #ifdef INCLUDE_TESTS @@ -114,7 +118,7 @@ typedef struct { void run_bitwriter_test(bitwriter_test_elem data[], char flags) { size_t len; const uint8_t *buf; - HBitWriter *w = h_bit_writer_new(); + HBitWriter *w = h_bit_writer_new(&system_allocator); int i; w->flags = flags; for (i = 0; data[i].nbits; i++) { diff --git a/src/hammer.c b/src/hammer.c index 3eab2d7..52881ec 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -238,9 +238,12 @@ static gboolean cache_key_equal(gconstpointer key1, gconstpointer key2) { } -HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) { +HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) { + return h_parse__m(&system_allocator, parser, input, length); +} +HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) { // Set up a parse state... - HArena * arena = h_new_arena(0); + HArena * arena = h_new_arena(mm__, 0); HParseState *parse_state = a_new_(arena, HParseState, 1); parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func cache_key_hash); // hash_func diff --git a/src/hammer.h b/src/hammer.h index a219dfe..2c5a5cf 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -113,18 +113,47 @@ typedef struct HParser_ { void *env; } HParser; +// {{{ Preprocessor definitions +#define HAMMER_FN_DECL_NOARG(rtype_t, name) \ + rtype_t name(void); \ + rtype_t name##__m(HAllocator* mm__) + +#define HAMMER_FN_DECL(rtype_t, name, ...) \ + rtype_t name(__VA_ARGS__); \ + rtype_t name##__m(HAllocator* mm__, __VA_ARGS__) + +#define HAMMER_FN_DECL_ATTR(attr, rtype_t, name, ...) \ + rtype_t name(__VA_ARGS__) attr; \ + rtype_t name##__m(HAllocator* mm__, __VA_ARGS__) attr + +#define HAMMER_FN_DECL_VARARGS(rtype_t, name, ...) \ + rtype_t name(__VA_ARGS__, ...); \ + rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...); \ + rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \ + rtype_t name##__v(__VA_ARGS__, va_list ap) + +// Note: this drops the attributes on the floor for the __v versions +#define HAMMER_FN_DECL_VARARGS_ATTR(attr, rtype_t, name, ...) \ + rtype_t name(__VA_ARGS__, ...) attr; \ + rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...) attr; \ + rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \ + rtype_t name##__v(__VA_ARGS__, va_list ap) + +// }}} + + /** * Top-level function to call a parser that has been built over some * piece of input (of known size). */ -HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length); +HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* input, size_t length); /** * Given a string, returns a parser that parses that string value. * * Result token type: TT_BYTES */ -const HParser* h_token(const uint8_t *str, const size_t len); +HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len); /** * Given a single character, returns a parser that parses that @@ -132,7 +161,7 @@ const HParser* h_token(const uint8_t *str, const size_t len); * * Result token type: TT_UINT */ -const HParser* h_ch(const uint8_t c); +HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c); /** * Given two single-character bounds, lower and upper, returns a parser @@ -141,14 +170,14 @@ const HParser* h_ch(const uint8_t c); * * Result token type: TT_UINT */ -const HParser* h_ch_range(const uint8_t lower, const uint8_t upper); +HAMMER_FN_DECL(const HParser*, h_ch_range, const uint8_t lower, const uint8_t upper); /** * Given an integer parser, p, and two integer bounds, lower and upper, * returns a parser that parses an integral value within the range * [lower, upper] (inclusive). */ -const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper); +HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper); /** * Returns a parser that parses the specified number of bits. sign == @@ -156,63 +185,63 @@ const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t * * Result token type: TT_SINT if sign == true, TT_UINT if sign == false */ -const HParser* h_bits(size_t len, bool sign); +HAMMER_FN_DECL(const HParser*, h_bits, size_t len, bool sign); /** * Returns a parser that parses a signed 8-byte integer value. * * Result token type: TT_SINT */ -const HParser* h_int64(); +HAMMER_FN_DECL_NOARG(const HParser*, h_int64); /** * Returns a parser that parses a signed 4-byte integer value. * * Result token type: TT_SINT */ -const HParser* h_int32(); +HAMMER_FN_DECL_NOARG(const HParser*, h_int32); /** * Returns a parser that parses a signed 2-byte integer value. * * Result token type: TT_SINT */ -const HParser* h_int16(); +HAMMER_FN_DECL_NOARG(const HParser*, h_int16); /** * Returns a parser that parses a signed 1-byte integer value. * * Result token type: TT_SINT */ -const HParser* h_int8(); +HAMMER_FN_DECL_NOARG(const HParser*, h_int8); /** * Returns a parser that parses an unsigned 8-byte integer value. * * Result token type: TT_UINT */ -const HParser* h_uint64(); +HAMMER_FN_DECL_NOARG(const HParser*, h_uint64); /** * Returns a parser that parses an unsigned 4-byte integer value. * * Result token type: TT_UINT */ -const HParser* h_uint32(); +HAMMER_FN_DECL_NOARG(const HParser*, h_uint32); /** * Returns a parser that parses an unsigned 2-byte integer value. * * Result token type: TT_UINT */ -const HParser* h_uint16(); +HAMMER_FN_DECL_NOARG(const HParser*, h_uint16); /** * Returns a parser that parses an unsigned 1-byte integer value. * * Result token type: TT_UINT */ -const HParser* h_uint8(); +HAMMER_FN_DECL_NOARG(const HParser*, h_uint8); /** * Given another parser, p, returns a parser that skips any whitespace @@ -220,7 +249,7 @@ const HParser* h_uint8(); * * Result token type: p's result type */ -const HParser* h_whitespace(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p); /** * Given two parsers, p and q, returns a parser that parses them in @@ -228,7 +257,7 @@ const HParser* h_whitespace(const HParser* p); * * Result token type: p's result type */ -const HParser* h_left(const HParser* p, const HParser* q); +HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q); /** * Given two parsers, p and q, returns a parser that parses them in @@ -236,7 +265,7 @@ const HParser* h_left(const HParser* p, const HParser* q); * * Result token type: q's result type */ -const HParser* h_right(const HParser* p, const HParser* q); +HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q); /** * Given three parsers, p, x, and q, returns a parser that parses them in @@ -244,7 +273,7 @@ const HParser* h_right(const HParser* p, const HParser* q); * * Result token type: x's result type */ -const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q); +HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q); /** * Given another parser, p, and a function f, returns a parser that @@ -252,21 +281,21 @@ const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q); * * Result token type: any */ -const HParser* h_action(const HParser* p, const HAction a); +HAMMER_FN_DECL(const HParser*, h_action, const HParser* p, const HAction a); /** * Parse a single character in the given charset. * * Result token type: TT_UINT */ -const HParser* h_in(const uint8_t *charset, size_t length); +HAMMER_FN_DECL(const HParser*, h_in, const uint8_t *charset, size_t length); /** * Parse a single character *NOT* in the given charset. * * Result token type: TT_UINT */ -const HParser* h_not_in(const uint8_t *charset, size_t length); +HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length); /** * A no-argument parser that succeeds if there is no more input to @@ -274,14 +303,14 @@ const HParser* h_not_in(const uint8_t *charset, size_t length); * * Result token type: None. The HParseResult exists but its AST is NULL. */ -const HParser* h_end_p(); +HAMMER_FN_DECL_NOARG(const HParser*, h_end_p); /** * This parser always fails. * * Result token type: NULL. Always. */ -const HParser* h_nothing_p(); +HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p); /** * Given a null-terminated list of parsers, apply each parser in order. @@ -289,7 +318,7 @@ const HParser* h_nothing_p(); * * Result token type: TT_SEQUENCE */ -const HParser* h_sequence(const HParser* p, ...) __attribute__((sentinel)); +HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequence, const HParser* p); /** * Given an array of parsers, p_array, apply each parser in order. The @@ -298,7 +327,7 @@ const HParser* h_sequence(const HParser* p, ...) __attribute__((sentinel)); * * Result token type: The type of the first successful parser's result. */ -const HParser* h_choice(const HParser* p, ...) __attribute__((sentinel)); +HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice, const HParser* p); /** * Given two parsers, p1 and p2, this parser succeeds in the following @@ -308,7 +337,7 @@ const HParser* h_choice(const HParser* p, ...) __attribute__((sentinel)); * * Result token type: p1's result type. */ -const HParser* h_butnot(const HParser* p1, const HParser* p2); +HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2); /** * Given two parsers, p1 and p2, this parser succeeds in the following @@ -318,7 +347,7 @@ const HParser* h_butnot(const HParser* p1, const HParser* p2); * * Result token type: p1's result type. */ -const HParser* h_difference(const HParser* p1, const HParser* p2); +HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p2); /** * Given two parsers, p1 and p2, this parser succeeds if *either* p1 or @@ -326,7 +355,7 @@ const HParser* h_difference(const HParser* p1, const HParser* p2); * * Result token type: The type of the result of whichever parser succeeded. */ -const HParser* h_xor(const HParser* p1, const HParser* p2); +HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2); /** * Given a parser, p, this parser succeeds for zero or more repetitions @@ -334,7 +363,7 @@ const HParser* h_xor(const HParser* p1, const HParser* p2); * * Result token type: TT_SEQUENCE */ -const HParser* h_many(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_many, const HParser* p); /** * Given a parser, p, this parser succeeds for one or more repetitions @@ -342,7 +371,7 @@ const HParser* h_many(const HParser* p); * * Result token type: TT_SEQUENCE */ -const HParser* h_many1(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p); /** * Given a parser, p, this parser succeeds for exactly N repetitions @@ -350,7 +379,7 @@ const HParser* h_many1(const HParser* p); * * Result token type: TT_SEQUENCE */ -const HParser* h_repeat_n(const HParser* p, const size_t n); +HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n); /** * Given a parser, p, this parser succeeds with the value p parsed or @@ -358,7 +387,7 @@ const HParser* h_repeat_n(const HParser* p, const size_t n); * * Result token type: If p succeeded, the type of its result; if not, TT_NONE. */ -const HParser* h_optional(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p); /** * Given a parser, p, this parser succeeds if p succeeds, but doesn't @@ -366,7 +395,7 @@ const HParser* h_optional(const HParser* p); * * Result token type: None. The HParseResult exists but its AST is NULL. */ -const HParser* h_ignore(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p); /** * Given a parser, p, and a parser for a separator, sep, this parser @@ -377,7 +406,7 @@ const HParser* h_ignore(const HParser* p); * * Result token type: TT_SEQUENCE */ -const HParser* h_sepBy(const HParser* p, const HParser* sep); +HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep); /** * Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element. @@ -385,14 +414,14 @@ const HParser* h_sepBy(const HParser* p, const HParser* sep); * * Result token type: TT_SEQUENCE */ -const HParser* h_sepBy1(const HParser* p, const HParser* sep); +HAMMER_FN_DECL(const HParser*, h_sepBy1, const HParser* p, const HParser* sep); /** * This parser always returns a zero length match, i.e., empty string. * * Result token type: None. The HParseResult exists but its AST is NULL. */ -const HParser* h_epsilon_p(); +HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p); /** * This parser applies its first argument to read an unsigned integer @@ -403,7 +432,7 @@ const HParser* h_epsilon_p(); * * Result token type: TT_SEQUENCE */ -const HParser* h_length_value(const HParser* length, const HParser* value); +HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HParser* value); /** * This parser attaches a predicate function, which returns true or @@ -418,7 +447,7 @@ const HParser* h_length_value(const HParser* length, const HParser* value); * * Result token type: p's result type if pred succeeded, NULL otherwise. */ -const HParser* h_attr_bool(const HParser* p, HPredicate pred); +HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred); /** * The 'and' parser asserts that a conditional syntax is satisfied, @@ -435,7 +464,7 @@ const HParser* h_attr_bool(const HParser* p, HPredicate pred); * * Result token type: None. The HParseResult exists but its AST is NULL. */ -const HParser* h_and(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_and, const HParser* p); /** * The 'not' parser asserts that a conditional syntax is *not* @@ -455,7 +484,7 @@ const HParser* h_and(const HParser* p); * * Result token type: None. The HParseResult exists but its AST is NULL. */ -const HParser* h_not(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_not, const HParser* p); /** * Create a parser that just calls out to another, as yet unknown, @@ -466,35 +495,35 @@ const HParser* h_not(const HParser* p); * Result token type: the type of whatever parser is bound to it with * bind_indirect(). */ -HParser *h_indirect(); +HAMMER_FN_DECL_NOARG(HParser*, h_indirect); /** * Set the inner parser of an indirect. See comments on indirect for * details. */ -void h_bind_indirect(HParser* indirect, const HParser* inner); +HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner); /** * Free the memory allocated to an HParseResult when it is no longer needed. */ -void h_parse_result_free(HParseResult *result); +HAMMER_FN_DECL(void, h_parse_result_free, HParseResult *result); // Some debugging aids /** * Format token into a compact unambiguous form. Useful for parser test cases. * Caller is responsible for freeing the result. */ -char* h_write_result_unamb(const HParsedToken* tok); +HAMMER_FN_DECL(char*, h_write_result_unamb, const HParsedToken* tok); /** * Format token to the given output stream. Indent starting at * [indent] spaces, with [delta] spaces between levels. */ -void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta); +HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent, int delta); /** * TODO: Document me */ -HBitWriter *h_bit_writer_new(void); +HBitWriter *h_bit_writer_new(HAllocator* mm__); /** * TODO: Document me @@ -506,7 +535,7 @@ void h_bit_writer_put(HBitWriter* w, unsigned long long data, size_t nbits); * Must not free [w] until you're done with the result. * [len] is in bytes. */ -const uint8_t *h_bit_writer_get_buffer(HBitWriter* w, size_t *len); +const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len); /** * TODO: Document me diff --git a/src/internal.h b/src/internal.h index e8a3f91..b7f56b6 100644 --- a/src/internal.h +++ b/src/internal.h @@ -29,9 +29,28 @@ errx(1, "Assertion failed (programmer error): %s", message); \ } while(0) #endif + +#define HAMMER_FN_IMPL_NOARGS(rtype_t, name) \ + rtype_t name(void) { \ + return name##__m(system_allocator); \ + } \ + rtype_t name##__m(HAllocator* mm__) +// Functions with arguments are difficult to forward cleanly. Alas, we will need to forward them manually. + +#define h_new(type, count) ((type*)(mm__->alloc(mm__, sizeof(type)*(count)))) +#define h_free(addr) (mm__->free(mm__, (addr))) + #define false 0 #define true 1 +// This is going to be generally useful. +static inline void h_generic_free(HAllocator *allocator, void* ptr) { + allocator->free(allocator, ptr); +} + +HAllocator system_allocator; + + typedef struct HInputStream_ { // This should be considered to be a really big value type. const uint8_t *input; @@ -155,24 +174,6 @@ typedef struct HParserCacheValue_t { }; } HParserCacheValue; -typedef unsigned int *HCharset; - -static inline HCharset new_charset() { - HCharset cs = g_new0(unsigned int, 256 / sizeof(unsigned int)); - return cs; -} - -static inline int charset_isset(HCharset cs, uint8_t pos) { - return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs)))); -} - -static inline void charset_set(HCharset cs, uint8_t pos, int val) { - cs[pos / sizeof(*cs)] = - val - ? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs))) - : cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs))); -} - // TODO(thequux): Set symbol visibility for these functions so that they aren't exported. long long h_read_bits(HInputStream* state, int count, char signed_p); @@ -198,7 +199,6 @@ int h_hashtable_present(HHashTable* ht, void* key); void h_hashtable_del(HHashTable* ht, void* key); void h_hashtable_free(HHashTable* ht); - #if 0 #include #define h_arena_malloc(a, s) malloc(s) diff --git a/src/parsers/action.c b/src/parsers/action.c index 479a840..0f1686f 100644 --- a/src/parsers/action.c +++ b/src/parsers/action.c @@ -23,10 +23,14 @@ static const HParserVtable action_vt = { .parse = parse_action, }; -const HParser* h_action(const HParser* p, const HAction a) { - HParser *res = g_new(HParser, 1); +const HParser* h_action(const HParser* p, const HAction a) { + return h_action__m(&system_allocator, p, a); +} + +const HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) { + HParser *res = h_new(HParser, 1); res->vtable = &action_vt; - HParseAction *env = g_new(HParseAction, 1); + HParseAction *env = h_new(HParseAction, 1); env->p = p; env->action = a; res->env = (void*)env; diff --git a/src/parsers/and.c b/src/parsers/and.c index fb117fb..532cbba 100644 --- a/src/parsers/and.c +++ b/src/parsers/and.c @@ -13,9 +13,13 @@ static const HParserVtable and_vt = { .parse = parse_and, }; + const HParser* h_and(const HParser* p) { + return h_and__m(&system_allocator, p); +} +const HParser* h_and__m(HAllocator* mm__, const HParser* p) { // zero-width postive lookahead - HParser *res = g_new(HParser, 1); + HParser *res = h_new(HParser, 1); res->env = (void*)p; res->vtable = &and_vt; return res; diff --git a/src/parsers/attr_bool.c b/src/parsers/attr_bool.c index bf9e6dc..6fa36f8 100644 --- a/src/parsers/attr_bool.c +++ b/src/parsers/attr_bool.c @@ -21,10 +21,14 @@ static const HParserVtable attr_bool_vt = { .parse = parse_attr_bool, }; -const HParser* h_attr_bool(const HParser* p, HPredicate pred) { - HParser *res = g_new(HParser, 1); + +const HParser* h_attr_bool(const HParser* p, HPredicate pred) { + return h_attr_bool__m(&system_allocator, p, pred); +} +const HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) { + HParser *res = h_new(HParser, 1); res->vtable = &attr_bool_vt; - HAttrBool *env = g_new(HAttrBool, 1); + HAttrBool *env = h_new(HAttrBool, 1); env->p = p; env->pred = pred; res->env = (void*)env; diff --git a/src/parsers/bits.c b/src/parsers/bits.c index 32b7a55..196e33b 100644 --- a/src/parsers/bits.c +++ b/src/parsers/bits.c @@ -20,18 +20,24 @@ static const HParserVtable bits_vt = { .parse = parse_bits, }; const HParser* h_bits(size_t len, bool sign) { - struct bits_env *env = g_new(struct bits_env, 1); + return h_bits__m(&system_allocator, len, sign); +} +const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) { + struct bits_env *env = h_new(struct bits_env, 1); env->length = len; env->signedp = sign; - HParser *res = g_new(HParser, 1); + HParser *res = h_new(HParser, 1); res->vtable = &bits_vt; res->env = env; return res; } #define SIZED_BITS(name_pre, len, signedp) \ - const HParser* h_##name_pre##len () { \ - return h_bits(len, signedp); \ + const HParser* h_##name_pre##len () { \ + return h_bits__m(&system_allocator, len, signedp); \ + } \ + const HParser* h_##name_pre##len##__m(HAllocator* mm__) { \ + return h_bits__m(mm__, len, signedp); \ } SIZED_BITS(int, 8, true) SIZED_BITS(int, 16, true) diff --git a/src/parsers/butnot.c b/src/parsers/butnot.c index 5026d79..422e0e1 100644 --- a/src/parsers/butnot.c +++ b/src/parsers/butnot.c @@ -39,10 +39,13 @@ static const HParserVtable butnot_vt = { .parse = parse_butnot, }; -const HParser* h_butnot(const HParser* p1, const HParser* p2) { - HTwoParsers *env = g_new(HTwoParsers, 1); +const HParser* h_butnot(const HParser* p1, const HParser* p2) { + return h_butnot__m(&system_allocator, p1, p2); +} +const HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) { + HTwoParsers *env = h_new(HTwoParsers, 1); env->p1 = p1; env->p2 = p2; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &butnot_vt; ret->env = (void*)env; return ret; } diff --git a/src/parsers/ch.c b/src/parsers/ch.c index fbfa57e..bb3073f 100644 --- a/src/parsers/ch.c +++ b/src/parsers/ch.c @@ -15,8 +15,12 @@ static HParseResult* parse_ch(void* env, HParseState *state) { static const HParserVtable ch_vt = { .parse = parse_ch, }; -const HParser* h_ch(const uint8_t c) { - HParser *ret = g_new(HParser, 1); + +const HParser* h_ch(const uint8_t c) { + return h_ch__m(&system_allocator, c); +} +const HParser* h_ch__m(HAllocator* mm__, const uint8_t c) { + HParser *ret = h_new(HParser, 1); ret->vtable = &ch_vt; ret->env = GUINT_TO_POINTER(c); return (const HParser*)ret; diff --git a/src/parsers/charset.c b/src/parsers/charset.c index b9642fc..f11715f 100644 --- a/src/parsers/charset.c +++ b/src/parsers/charset.c @@ -1,5 +1,24 @@ +#include #include "parser_internal.h" +typedef unsigned int *HCharset; + +static inline HCharset new_charset(HAllocator* mm__) { + HCharset cs = h_new(unsigned int, 256 / sizeof(unsigned int)); + memset(cs, 0, 256); + return cs; +} + +static inline int charset_isset(HCharset cs, uint8_t pos) { + return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs)))); +} + +static inline void charset_set(HCharset cs, uint8_t pos, int val) { + cs[pos / sizeof(*cs)] = + val + ? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs))) + : cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs))); +} static HParseResult* parse_charset(void *env, HParseState *state) { uint8_t in = h_read_bits(&state->input_stream, 8, false); @@ -18,8 +37,11 @@ static const HParserVtable charset_vt = { }; const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { - HParser *ret = g_new(HParser, 1); - HCharset cs = new_charset(); + return h_ch_range__m(&system_allocator, lower, upper); +} +const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) { + HParser *ret = h_new(HParser, 1); + HCharset cs = new_charset(mm__); for (int i = 0; i < 256; i++) charset_set(cs, i, (lower <= i) && (i <= upper)); ret->vtable = &charset_vt; @@ -28,9 +50,9 @@ const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { } -const HParser* h_in_or_not(const uint8_t *options, size_t count, int val) { - HParser *ret = g_new(HParser, 1); - HCharset cs = new_charset(); +static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) { + HParser *ret = h_new(HParser, 1); + HCharset cs = new_charset(mm__); for (size_t i = 0; i < 256; i++) charset_set(cs, i, 1-val); for (size_t i = 0; i < count; i++) @@ -42,10 +64,18 @@ const HParser* h_in_or_not(const uint8_t *options, size_t count, int val) { } const HParser* h_in(const uint8_t *options, size_t count) { - return h_in_or_not(options, count, 1); + return h_in_or_not__m(&system_allocator, options, count, 1); +} + +const HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) { + return h_in_or_not__m(mm__, options, count, 1); } const HParser* h_not_in(const uint8_t *options, size_t count) { - return h_in_or_not(options, count, 0); + return h_in_or_not__m(&system_allocator, options, count, 0); +} + +const HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) { + return h_in_or_not__m(mm__, options, count, 0); } diff --git a/src/parsers/choice.c b/src/parsers/choice.c index 082a2e1..2430352 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -25,20 +25,40 @@ static const HParserVtable choice_vt = { }; const HParser* h_choice(const HParser* p, ...) { + va_list ap; + va_start(ap, p); + const HParser* ret = h_choice__mv(&system_allocator, p, ap); + va_end(ap); + return ret; +} + +const HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) { + va_list ap; + va_start(ap, p); + const HParser* ret = h_choice__mv(mm__, p, ap); + va_end(ap); + return ret; +} + +const HParser* h_choice__v(const HParser* p, va_list ap) { + return h_choice__mv(&system_allocator, p, ap); +} + +const HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) { va_list ap; size_t len = 0; - HSequence *s = g_new(HSequence, 1); + HSequence *s = h_new(HSequence, 1); const HParser *arg; - va_start(ap, p); + va_copy(ap, ap_); do { len++; arg = va_arg(ap, const HParser *); } while (arg); va_end(ap); - s->p_array = g_new(const HParser *, len); + s->p_array = h_new(const HParser *, len); - va_start(ap, p); + va_copy(ap, ap_); s->p_array[0] = p; for (size_t i = 1; i < len; i++) { s->p_array[i] = va_arg(ap, const HParser *); @@ -46,7 +66,7 @@ const HParser* h_choice(const HParser* p, ...) { va_end(ap); s->len = len; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &choice_vt; ret->env = (void*)s; return ret; } diff --git a/src/parsers/difference.c b/src/parsers/difference.c index 7f167a0..c6cc69c 100644 --- a/src/parsers/difference.c +++ b/src/parsers/difference.c @@ -38,10 +38,13 @@ static HParserVtable difference_vt = { .parse = parse_difference, }; -const HParser* h_difference(const HParser* p1, const HParser* p2) { - HTwoParsers *env = g_new(HTwoParsers, 1); +const HParser* h_difference(const HParser* p1, const HParser* p2) { + return h_difference__m(&system_allocator, p1, p2); +} +const HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) { + HTwoParsers *env = h_new(HTwoParsers, 1); env->p1 = p1; env->p2 = p2; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &difference_vt; ret->env = (void*)env; return ret; } diff --git a/src/parsers/end.c b/src/parsers/end.c index 8e427bd..0f0d1c7 100644 --- a/src/parsers/end.c +++ b/src/parsers/end.c @@ -14,8 +14,13 @@ static const HParserVtable end_vt = { .parse = parse_end, }; -const HParser* h_end_p() { - HParser *ret = g_new(HParser, 1); - ret->vtable = &end_vt; ret->env = NULL; +const HParser* h_end_p() { + return h_end_p__m(&system_allocator); +} + +const HParser* h_end_p__m(HAllocator* mm__) { + HParser *ret = h_new(HParser, 1); + ret->vtable = &end_vt; + ret->env = NULL; return (const HParser*)ret; } diff --git a/src/parsers/epsilon.c b/src/parsers/epsilon.c index dc6d7a6..ae959cd 100644 --- a/src/parsers/epsilon.c +++ b/src/parsers/epsilon.c @@ -20,3 +20,6 @@ static const HParser epsilon_p = { const HParser* h_epsilon_p() { return &epsilon_p; } +const HParser* h_epsilon_p__m(HAllocator* mm__) { + return &epsilon_p; +} diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c index 5972548..03cdc10 100644 --- a/src/parsers/ignore.c +++ b/src/parsers/ignore.c @@ -15,7 +15,10 @@ static const HParserVtable ignore_vt = { }; const HParser* h_ignore(const HParser* p) { - HParser* ret = g_new(HParser, 1); + return h_ignore__m(&system_allocator, p); +} +const HParser* h_ignore__m(HAllocator* mm__, const HParser* p) { + HParser* ret = h_new(HParser, 1); ret->vtable = &ignore_vt; ret->env = (void*)p; return ret; diff --git a/src/parsers/ignoreseq.c b/src/parsers/ignoreseq.c index 8aac2c8..228f635 100644 --- a/src/parsers/ignoreseq.c +++ b/src/parsers/ignoreseq.c @@ -35,38 +35,48 @@ static const HParserVtable ignoreseq_vt = { // API frontends // -static const HParser* h_leftright(const HParser* p, const HParser* q, size_t which) { - HIgnoreSeq *seq = g_new(HIgnoreSeq, 1); - seq->parsers = g_new(const HParser*, 2); +static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) { + HIgnoreSeq *seq = h_new(HIgnoreSeq, 1); + seq->parsers = h_new(const HParser*, 2); seq->parsers[0] = p; seq->parsers[1] = q; seq->count = 2; seq->which = which; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &ignoreseq_vt; ret->env = (void*)seq; return ret; } const HParser* h_left(const HParser* p, const HParser* q) { - return h_leftright(p, q, 0); + return h_leftright__m(&system_allocator, p, q, 0); +} +const HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) { + return h_leftright__m(mm__, p, q, 0); } const HParser* h_right(const HParser* p, const HParser* q) { - return h_leftright(p, q, 1); + return h_leftright__m(&system_allocator, p, q, 1); +} +const HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) { + return h_leftright__m(mm__, p, q, 1); } + const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) { - HIgnoreSeq *seq = g_new(HIgnoreSeq, 1); - seq->parsers = g_new(const HParser*, 3); + return h_middle__m(&system_allocator, p, x, q); +} +const HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) { + HIgnoreSeq *seq = h_new(HIgnoreSeq, 1); + seq->parsers = h_new(const HParser*, 3); seq->parsers[0] = p; seq->parsers[1] = x; seq->parsers[2] = q; seq->count = 3; seq->which = 1; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &ignoreseq_vt; ret->env = (void*)seq; return ret; diff --git a/src/parsers/indirect.c b/src/parsers/indirect.c index 758116d..96e2a65 100644 --- a/src/parsers/indirect.c +++ b/src/parsers/indirect.c @@ -13,7 +13,10 @@ void h_bind_indirect(HParser* indirect, const HParser* inner) { } HParser* h_indirect() { - HParser *res = g_new(HParser, 1); + return h_indirect__m(&system_allocator); +} +HParser* h_indirect__m(HAllocator* mm__) { + HParser *res = h_new(HParser, 1); res->vtable = &indirect_vt; res->env = NULL; return res; diff --git a/src/parsers/int_range.c b/src/parsers/int_range.c index 9fb1c7e..2a1db63 100644 --- a/src/parsers/int_range.c +++ b/src/parsers/int_range.c @@ -33,6 +33,9 @@ static const HParserVtable int_range_vt = { }; const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) { + return h_int_range__m(&system_allocator, p, lower, upper); +} +const HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) { // p must be an integer parser, which means it's using parse_bits // TODO: re-add this check //assert_message(p->vtable == &bits_vt, "int_range requires an integer parser"); @@ -40,11 +43,11 @@ const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t // and regardless, the bounds need to fit in the parser in question // TODO: check this as well. - HRange *r_env = g_new(HRange, 1); + HRange *r_env = h_new(HRange, 1); r_env->p = p; r_env->lower = lower; r_env->upper = upper; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &int_range_vt; ret->env = (void*)r_env; return ret; diff --git a/src/parsers/many.c b/src/parsers/many.c index f18be26..6cb818b 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -49,10 +49,13 @@ static const HParserVtable many_vt = { }; const HParser* h_many(const HParser* p) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); + return h_many__m(&system_allocator, p); +} +const HParser* h_many__m(HAllocator* mm__, const HParser* p) { + HParser *res = h_new(HParser, 1); + HRepeat *env = h_new(HRepeat, 1); env->p = p; - env->sep = h_epsilon_p(); + env->sep = h_epsilon_p__m(mm__); env->count = 0; env->min_p = true; res->vtable = &many_vt; @@ -61,10 +64,13 @@ const HParser* h_many(const HParser* p) { } const HParser* h_many1(const HParser* p) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); + return h_many1__m(&system_allocator, p); +} +const HParser* h_many1__m(HAllocator* mm__, const HParser* p) { + HParser *res = h_new(HParser, 1); + HRepeat *env = h_new(HRepeat, 1); env->p = p; - env->sep = h_epsilon_p(); + env->sep = h_epsilon_p__m(mm__); env->count = 1; env->min_p = true; res->vtable = &many_vt; @@ -73,10 +79,13 @@ const HParser* h_many1(const HParser* p) { } const HParser* h_repeat_n(const HParser* p, const size_t n) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); + return h_repeat_n__m(&system_allocator, p, n); +} +const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) { + HParser *res = h_new(HParser, 1); + HRepeat *env = h_new(HRepeat, 1); env->p = p; - env->sep = h_epsilon_p(); + env->sep = h_epsilon_p__m(mm__); env->count = n; env->min_p = false; res->vtable = &many_vt; @@ -85,8 +94,11 @@ const HParser* h_repeat_n(const HParser* p, const size_t n) { } const HParser* h_sepBy(const HParser* p, const HParser* sep) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); + return h_sepBy__m(&system_allocator, p, sep); +} +const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) { + HParser *res = h_new(HParser, 1); + HRepeat *env = h_new(HRepeat, 1); env->p = p; env->sep = sep; env->count = 0; @@ -97,8 +109,11 @@ const HParser* h_sepBy(const HParser* p, const HParser* sep) { } const HParser* h_sepBy1(const HParser* p, const HParser* sep) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); + return h_sepBy1__m(&system_allocator, p, sep); +} +const HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) { + HParser *res = h_new(HParser, 1); + HRepeat *env = h_new(HRepeat, 1); env->p = p; env->sep = sep; env->count = 1; @@ -135,9 +150,12 @@ static const HParserVtable length_value_vt = { }; const HParser* h_length_value(const HParser* length, const HParser* value) { - HParser *res = g_new(HParser, 1); + return h_length_value__m(&system_allocator, length, value); +} +const HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) { + HParser *res = h_new(HParser, 1); res->vtable = &length_value_vt; - HLenVal *env = g_new(HLenVal, 1); + HLenVal *env = h_new(HLenVal, 1); env->length = length; env->value = value; res->env = (void*)env; diff --git a/src/parsers/not.c b/src/parsers/not.c index 1c46b6d..73cf129 100644 --- a/src/parsers/not.c +++ b/src/parsers/not.c @@ -15,7 +15,10 @@ static const HParserVtable not_vt = { }; const HParser* h_not(const HParser* p) { - HParser *res = g_new(HParser, 1); + return h_not__m(&system_allocator, p); +} +const HParser* h_not__m(HAllocator* mm__, const HParser* p) { + HParser *res = h_new(HParser, 1); res->vtable = ¬_vt; res->env = (void*)p; return res; diff --git a/src/parsers/nothing.c b/src/parsers/nothing.c index 9f81c02..120c8ef 100644 --- a/src/parsers/nothing.c +++ b/src/parsers/nothing.c @@ -10,8 +10,11 @@ static const HParserVtable nothing_vt = { .parse = parse_nothing, }; -const HParser* h_nothing_p() { - HParser *ret = g_new(HParser, 1); +const HParser* h_nothing_p() { + return h_nothing_p__m(&system_allocator); +} +const HParser* h_nothing_p__m(HAllocator* mm__) { + HParser *ret = h_new(HParser, 1); ret->vtable = ¬hing_vt; ret->env = NULL; return (const HParser*)ret; } diff --git a/src/parsers/optional.c b/src/parsers/optional.c index c084576..9625fa0 100644 --- a/src/parsers/optional.c +++ b/src/parsers/optional.c @@ -16,9 +16,12 @@ static const HParserVtable optional_vt = { }; const HParser* h_optional(const HParser* p) { + return h_optional__m(&system_allocator, p); +} +const HParser* h_optional__m(HAllocator* mm__, const HParser* p) { // TODO: re-add this //assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round."); - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &optional_vt; ret->env = (void*)p; return ret; diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index 54196da..dece4f0 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -27,20 +27,40 @@ static const HParserVtable sequence_vt = { .parse = parse_sequence, }; -const HParser* h_sequence(const HParser *p, ...) { +const HParser* h_sequence(const HParser* p, ...) { + va_list ap; + va_start(ap, p); + const HParser* ret = h_sequence__mv(&system_allocator, p, ap); + va_end(ap); + return ret; +} + +const HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) { + va_list ap; + va_start(ap, p); + const HParser* ret = h_sequence__mv(mm__, p, ap); + va_end(ap); + return ret; +} + +const HParser* h_sequence__v(const HParser* p, va_list ap) { + return h_sequence__mv(&system_allocator, p, ap); +} + +const HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) { va_list ap; size_t len = 0; const HParser *arg; - va_start(ap, p); + va_copy(ap, ap_); do { len++; arg = va_arg(ap, const HParser *); } while (arg); va_end(ap); - HSequence *s = g_new(HSequence, 1); - s->p_array = g_new(const HParser *, len); + HSequence *s = h_new(HSequence, 1); + s->p_array = h_new(const HParser *, len); - va_start(ap, p); + va_copy(ap, ap_); s->p_array[0] = p; for (size_t i = 1; i < len; i++) { s->p_array[i] = va_arg(ap, const HParser *); @@ -48,7 +68,7 @@ const HParser* h_sequence(const HParser *p, ...) { va_end(ap); s->len = len; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &sequence_vt; ret->env = (void*)s; return ret; } diff --git a/src/parsers/token.c b/src/parsers/token.c index b3be207..e57c71d 100644 --- a/src/parsers/token.c +++ b/src/parsers/token.c @@ -24,10 +24,13 @@ const const HParserVtable token_vt = { .parse = parse_token, }; -const HParser* h_token(const uint8_t *str, const size_t len) { - HToken *t = g_new(HToken, 1); +const HParser* h_token(const uint8_t *str, const size_t len) { + return h_token__m(&system_allocator, str, len); +} +const HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) { + HToken *t = h_new(HToken, 1); t->str = (uint8_t*)str, t->len = len; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &token_vt; ret->env = t; return (const HParser*)ret; diff --git a/src/parsers/unimplemented.c b/src/parsers/unimplemented.c index 99d153b..35ee391 100644 --- a/src/parsers/unimplemented.c +++ b/src/parsers/unimplemented.c @@ -24,3 +24,6 @@ static HParser unimplemented = { const HParser* h_unimplemented() { return &unimplemented; } +const HParser* h_unimplemented__m(HAllocator* mm__) { + return &unimplemented; +} diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c index 4d2ec17..8e009a4 100644 --- a/src/parsers/whitespace.c +++ b/src/parsers/whitespace.c @@ -19,7 +19,10 @@ static const HParserVtable whitespace_vt = { }; const HParser* h_whitespace(const HParser* p) { - HParser *ret = g_new(HParser, 1); + return h_whitespace__m(&system_allocator, p); +} +const HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) { + HParser *ret = h_new(HParser, 1); ret->vtable = &whitespace_vt; ret->env = (void*)p; return ret; diff --git a/src/parsers/xor.c b/src/parsers/xor.c index 9ffd51e..a11ad4a 100644 --- a/src/parsers/xor.c +++ b/src/parsers/xor.c @@ -35,10 +35,13 @@ static const HParserVtable xor_vt = { .parse = parse_xor, }; -const HParser* h_xor(const HParser* p1, const HParser* p2) { - HTwoParsers *env = g_new(HTwoParsers, 1); +const HParser* h_xor(const HParser* p1, const HParser* p2) { + return h_xor__m(&system_allocator, p1, p2); +} +const HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) { + HTwoParsers *env = h_new(HTwoParsers, 1); env->p1 = p1; env->p2 = p2; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &xor_vt; ret->env = (void*)env; return ret; } diff --git a/src/system_allocator.c b/src/system_allocator.c new file mode 100644 index 0000000..26e2273 --- /dev/null +++ b/src/system_allocator.c @@ -0,0 +1,20 @@ +#include +#include "internal.h" + +static void* system_alloc(HAllocator *allocator, size_t size) { + return malloc(size); +} + +static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) { + return realloc(ptr, size); +} + +static void system_free(HAllocator *allocator, void* ptr) { + free(ptr); +} + +HAllocator system_allocator = { + .alloc = system_alloc, + .realloc = system_realloc, + .free = system_free, +}; From 158b2b3ba66271f0087da13585cbf80913c66eb7 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Wed, 10 Oct 2012 16:24:12 +0200 Subject: [PATCH 008/125] Removed all glib functions from everything other than the test suite. --- src/bitreader.c | 2 ++ src/bitwriter.c | 4 ++++ src/hammer.c | 12 +++++++----- src/hammer.h | 1 - src/internal.h | 1 - src/parsers/ch.c | 4 ++-- src/parsers/choice.c | 1 + src/parsers/sequence.c | 1 + src/pprint.c | 15 ++++++++++----- src/test_suite.c | 1 + 10 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/bitreader.c b/src/bitreader.c index bb93377..b0018f2 100644 --- a/src/bitreader.c +++ b/src/bitreader.c @@ -111,6 +111,8 @@ long long h_read_bits(HInputStream* state, int count, char signed_p) { #ifdef INCLUDE_TESTS +#include + #define MK_INPUT_STREAM(buf,len,endianness_) \ { \ .input = (uint8_t*)buf, \ diff --git a/src/bitwriter.c b/src/bitwriter.c index e716f09..956f2ea 100644 --- a/src/bitwriter.c +++ b/src/bitwriter.c @@ -4,6 +4,9 @@ #include "internal.h" #include "test_suite.h" +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + // This file provides the logical inverse of bitreader.c struct HBitWriter_ { uint8_t* buf; @@ -109,6 +112,7 @@ void h_bit_writer_free(HBitWriter* w) { } #ifdef INCLUDE_TESTS +#include // TESTS BELOW HERE typedef struct { unsigned long long data; diff --git a/src/hammer.c b/src/hammer.c index 52881ec..d378268 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -26,8 +26,8 @@ #include "allocator.h" #include "parsers/parser_internal.h" -static guint djbhash(const uint8_t *buf, size_t len) { - guint hash = 5381; +static uint32_t djbhash(const uint8_t *buf, size_t len) { + uint32_t hash = 5381; while (len--) { hash = hash * 33 + *buf++; } @@ -116,7 +116,7 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { HLeftRec *lr = state->lr_stack->head->elem; while (lr && lr->rule != p) { lr->head = rec_detect->head; - h_slist_push(lr->head->involved_set, (gpointer)lr->rule); + h_slist_push(lr->head->involved_set, (void*)lr->rule); } } @@ -230,10 +230,10 @@ typedef struct { } HTwoParsers; -static guint cache_key_hash(gconstpointer key) { +static uint32_t cache_key_hash(const void* key) { return djbhash(key, sizeof(HParserCacheKey)); } -static gboolean cache_key_equal(gconstpointer key1, gconstpointer key2) { +static bool cache_key_equal(const void* key1, const void* key2) { return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0; } @@ -274,7 +274,9 @@ void h_parse_result_free(HParseResult *result) { #ifdef INCLUDE_TESTS +#include #include "test_suite.h" + static void test_token(void) { const HParser *token_ = h_token((const uint8_t*)"95\xa2", 3); diff --git a/src/hammer.h b/src/hammer.h index 2c5a5cf..1c1a6ce 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -17,7 +17,6 @@ #ifndef HAMMER_HAMMER__H #define HAMMER_HAMMER__H -#include #include #include #include "allocator.h" diff --git a/src/internal.h b/src/internal.h index b7f56b6..a24cc0e 100644 --- a/src/internal.h +++ b/src/internal.h @@ -17,7 +17,6 @@ #ifndef HAMMER_INTERNAL__H #define HAMMER_INTERNAL__H -#include #include #include "hammer.h" diff --git a/src/parsers/ch.c b/src/parsers/ch.c index bb3073f..032731e 100644 --- a/src/parsers/ch.c +++ b/src/parsers/ch.c @@ -1,7 +1,7 @@ #include "parser_internal.h" static HParseResult* parse_ch(void* env, HParseState *state) { - uint8_t c = (uint8_t)GPOINTER_TO_UINT(env); + uint8_t c = (uint8_t)(unsigned long)(env); uint8_t r = (uint8_t)h_read_bits(&state->input_stream, 8, false); if (c == r) { HParsedToken *tok = a_new(HParsedToken, 1); @@ -22,6 +22,6 @@ const HParser* h_ch(const uint8_t c) { const HParser* h_ch__m(HAllocator* mm__, const uint8_t c) { HParser *ret = h_new(HParser, 1); ret->vtable = &ch_vt; - ret->env = GUINT_TO_POINTER(c); + ret->env = (void*)(unsigned long)(c); return (const HParser*)ret; } diff --git a/src/parsers/choice.c b/src/parsers/choice.c index 2430352..73dedde 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -1,3 +1,4 @@ +#include #include "parser_internal.h" typedef struct { diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index dece4f0..21ae31d 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -1,3 +1,4 @@ +#include #include "parser_internal.h" typedef struct { diff --git a/src/pprint.c b/src/pprint.c index 8dc5852..3a8df82 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -17,9 +17,9 @@ #define _GNU_SOURCE #include -#include #include #include "hammer.h" +#include "internal.h" #include typedef struct pp_state { @@ -69,20 +69,21 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { fprintf(stream, "%*sUSER\n", indent, ""); break; default: - g_assert_not_reached(); + assert_message(0, "Should not reach here."); } } struct result_buf { char* output; + HAllocator *mm__; size_t len; size_t capacity; }; static inline void ensure_capacity(struct result_buf *buf, int amt) { while (buf->len + amt >= buf->capacity) - buf->output = g_realloc(buf->output, buf->capacity *= 2); + buf->output = buf->mm__->realloc(buf->mm__, buf->output, buf->capacity *= 2); } static inline void append_buf(struct result_buf *buf, const char* input, int len) { @@ -149,15 +150,19 @@ static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) { break; default: fprintf(stderr, "Unexpected token type %d\n", tok->token_type); - g_assert_not_reached(); + assert_message(0, "Should not reach here."); } } char* h_write_result_unamb(const HParsedToken* tok) { + return h_write_result_unamb__m(&system_allocator, tok); +} +char* h_write_result_unamb__m(HAllocator* mm__, const HParsedToken* tok) { struct result_buf buf = { - .output = g_malloc0(16), + .output = mm__->alloc(mm__, 16), .len = 0, + .mm__ = mm__, .capacity = 16 }; unamb_sub(tok, &buf); diff --git a/src/test_suite.c b/src/test_suite.c index af22c7e..16b3ef7 100644 --- a/src/test_suite.c +++ b/src/test_suite.c @@ -15,6 +15,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include "hammer.h" #include "test_suite.h" From 8eececcac8ee2f2b076b240f32c926af9c07cfd3 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Wed, 10 Oct 2012 16:45:10 +0200 Subject: [PATCH 009/125] Now builds without glib --- Makefile | 7 +++++++ common.mk | 30 +++++++++++++++++++++--------- config.mk | 1 + examples/Makefile | 3 +++ src/Makefile | 12 ++++++++---- 5 files changed, 40 insertions(+), 13 deletions(-) create mode 100644 config.mk diff --git a/Makefile b/Makefile index 08ce4e1..bd383a2 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,10 @@ SUBDIRS = src examples +include config.mk + +CONFIG_VARS= INCLUDE_TESTS + .DEFAULT_GOAL := all %: @@ -25,3 +29,6 @@ $(foreach dir,$(SUBDIRS),$(eval $(call SUBDIR_TEMPLATE,$(dir)))) TAGS: $(shell find * -name "*.c") etags $^ + +config: + @printf "%30s %s\n" $(foreach var,$(CONFIG_VARS),$(var) $($(var)) ) diff --git a/common.mk b/common.mk index 4f70745..972466e 100644 --- a/common.mk +++ b/common.mk @@ -1,17 +1,29 @@ -CFLAGS := $(shell pkg-config --cflags glib-2.0) -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -LDFLAGS := $(shell pkg-config --libs glib-2.0) -CC ?= gcc -$(info CC=$(CC)) -# Set V=1 for verbose mode... -V ?= 0 -CFLAGS += -DINCLUDE_TESTS $(EXTRA_CFLAGS) -HUSH = $(TOPLEVEL)/lib/hush - # Check to make sure variables are properly set ifeq ($(TOPLEVEL),) $(error $$TOPLEVEL is unset) endif +include $(TOPLEVEL)/config.mk + +TEST_CFLAGS := $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS +TEST_LDFLAGS := $(shell pkg-config --libs glib-2.0) + +CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes +LDFLAGS := + +ifneq ($(INCLUDE_TESTS),0) +CFLAGS += $(TEST_CFLAGS) +LDFLAGS += $(TEST_LDFLAGS) +endif + +CC ?= gcc +$(info CC=$(CC)) +# Set V=1 for verbose mode... +V ?= 0 +CFLAGS += $(EXTRA_CFLAGS) +HUSH = $(TOPLEVEL)/lib/hush + + ifsilent = $(if $(findstring 0, $(V)),$(1),) hush = $(call ifsilent,$(HUSH) $(1)) #.SUFFIXES: diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..54248f9 --- /dev/null +++ b/config.mk @@ -0,0 +1 @@ +INCLUDE_TESTS = 1 diff --git a/examples/Makefile b/examples/Makefile index d2c76cf..6a054ca 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -7,6 +7,9 @@ OUTPUTS := dns.o \ TOPLEVEL := ../ include ../common.mk +CFLAGS += $(pkg-config --cflags glib-2.0) +LDFLAGS += $(pkg-config --libs glib-2.0) + all: dns base64 diff --git a/src/Makefile b/src/Makefile index e4d70d1..de340e7 100644 --- a/src/Makefile +++ b/src/Makefile @@ -42,10 +42,7 @@ TOPLEVEL := ../ include ../common.mk -all: libhammer.a test_suite - -test_suite: test_suite.o libhammer.a - $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) +all: libhammer.a libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitwriter.o system_allocator.o \ $(PARSERS:%=parsers/%.o) @@ -53,5 +50,12 @@ libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitwrite bitreader.o: test_suite.h hammer.o: hammer.h +ifneq ($(INCLUDE_TESTS),0) +all: test_suite + test: test_suite ./test_suite -v + +test_suite: test_suite.o libhammer.a + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) +endif From d0d9a94fd0c5b3518fff9cfbd135f4fc6e3e9221 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Fri, 2 Nov 2012 17:16:53 -0400 Subject: [PATCH 010/125] Added benchmark skeleton, and at least stubs for all of the necessary functions --- HACKING | 39 +++++++++++++++++++ NOTES | 1 - TODO | 3 ++ src/Makefile | 35 +++++++++++------ src/backends/packrat.c | 15 +++++++ src/benchmark.c | 88 ++++++++++++++++++++++++++++++++++++++++++ src/compile.c | 15 +++++++ src/hammer.h | 32 +++++++++++++++ src/internal.h | 10 +++++ src/t_benchmark.c | 14 +++++++ 10 files changed, 239 insertions(+), 13 deletions(-) create mode 100644 TODO create mode 100644 src/backends/packrat.c create mode 100644 src/benchmark.c create mode 100644 src/compile.c create mode 100644 src/t_benchmark.c diff --git a/HACKING b/HACKING index 869c327..acee9c2 100644 --- a/HACKING +++ b/HACKING @@ -10,3 +10,42 @@ In particular, these names, and the macros that use them, are: Used by a_new and company. Should be an HParseState* - mm__: Used by h_new and h_free. Should be an HAllocator* + +Function suffixes +================= + +Many functions come in several variants, to handle receiving optional +parameters or parameters in multiple different forms. For example, +often, you have a global memory manager that is used for an entire +program. In this case, you can leave off the memory manager arguments +off, letting them be implicit instead. Further, it is often convenient +to pass an array or va_list to a function instead of listing the +arguments inline (eg, for wrapping a function, generating the +arguments programattically, or writing bindings for another language. + +Because we have found that most variants fall into a fairly small set +of forms, and to minimize the amount of API calls that users need to +remember, there is a consistent naming scheme for these function +variants: the function name is followed by two underscores and a set +of single-character "flags" indicating what optional features that +particular variant has (in alphabetical order, of course): + + __a: takes variadic arguments as a void*[] + __m: takes a memory manager as the first argument, to override the system memory manager. + __v: Takes the variadic argument list as a va_list + + +Memory managers +=============== + +If the __m function variants are used or system_allocator is +overridden, there come some difficult questions to answer, +particularly regarding the behavior when multiple memory managers are +combined. As a general rule of thumb (exceptions will be explicitly +documented), assume that + + If you have a function f, which is passed a memory manager m and + returns a value r, any function that uses r as a parameter must + also be told to use m as a memory manager. + +In other words, don't let the (memory manager) streams cross. \ No newline at end of file diff --git a/NOTES b/NOTES index 84b8c46..77d899d 100644 --- a/NOTES +++ b/NOTES @@ -35,4 +35,3 @@ what the comments say. TODO: implement datastructure linearization func TODO: implement free func for parsers -TODO: Remove glib dependency (i.e., GQueue and GHashtable) \ No newline at end of file diff --git a/TODO b/TODO new file mode 100644 index 0000000..10ad174 --- /dev/null +++ b/TODO @@ -0,0 +1,3 @@ +- Make h_action functions be called only after parse is complete. +- Allow alternative input streams (eg, zlib, base64) + - Bonus points if layered... \ No newline at end of file diff --git a/src/Makefile b/src/Makefile index de340e7..21b7ac5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -25,17 +25,29 @@ PARSERS := \ attr_bool \ indirect -OUTPUTS := bitreader.o \ - hammer.o \ - bitwriter.o \ - libhammer.a \ - pprint.o \ - allocator.o \ - datastructures.o \ - system_allocator.o \ +BACKENDS := \ + packrat + +HAMMER_PARTS := \ + bitreader.o \ + hammer.o \ + bitwriter.o \ + pprint.o \ + allocator.o \ + datastructures.o \ + system_allocator.o \ + benchmark.o \ + compile.o \ + $(PARSERS:%=parsers/%.o) \ + $(BACKENDS:%=backends/%.o) + +TESTS := t_benchmark.o + +OUTPUTS := libhammer.a \ test_suite.o \ test_suite \ - $(PARSERS:%=parsers/%.o) + $(HAMMER_PARTS) \ + $(TESTS) TOPLEVEL := ../ @@ -44,8 +56,7 @@ include ../common.mk all: libhammer.a -libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitwriter.o system_allocator.o \ - $(PARSERS:%=parsers/%.o) +libhammer.a: $(HAMMER_PARTS) bitreader.o: test_suite.h hammer.o: hammer.h @@ -56,6 +67,6 @@ all: test_suite test: test_suite ./test_suite -v -test_suite: test_suite.o libhammer.a +test_suite: test_suite.o $(TESTS) libhammer.a $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) endif diff --git a/src/backends/packrat.c b/src/backends/packrat.c new file mode 100644 index 0000000..b884b73 --- /dev/null +++ b/src/backends/packrat.c @@ -0,0 +1,15 @@ +#include "../internal.h" + +int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) { + return 0; // No compilation necessary, and everything should work + // out of the box. +} + +HParseResult *h_packrat_parse(HAllocator* mm__, HParser* parser, HParseState* parse_state) { + return NULL; // TODO: fill this in. +} + +HParserBackendVTable h__packrat_backend_vtable = { + .compile = h_packrat_compile, + .parse = h_packrat_parse +}; diff --git a/src/benchmark.c b/src/benchmark.c new file mode 100644 index 0000000..62d89d9 --- /dev/null +++ b/src/benchmark.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include "hammer.h" + +/* + Usage: + Create your parser (i.e., HParser*), and then call + + HBenchmarkResults* results = h_benchmark(parser, testcases); + + Then, you can format a report with: + + h_benchmark_report(stdout, results); + + or just generate code to make the parser run as fast as possible with: + + h_benchmark_dump_optimized_code(stdout, results); + +*/ + + +HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases) { + // For now, just output the results to stderr + HParserTestcase* tc = testcases; + HParserBackend backend = PB_MIN; + + for (backend = PB_MIN; backend < PB_MAX; backend++) { + fprintf(stderr, "Compiling for backend %d ... ", backend); + // Step 1: Compile grammar for given parser... + if (h_compile(parser, PB_MIN, NULL) == -1) { + // backend inappropriate for grammar... + fprintf(stderr, "failed\n"); + continue; + } + int tc_failed = 0; + // Step 1: verify all test cases. + for (tc = testcases; tc->input != NULL; tc++) { + HParseResult *res = h_parse(parser, tc->input, tc->length); + char* res_unamb; + if (res != NULL) { + res_unamb = h_write_result_unamb(res->ast); + } else + res_unamb = NULL; + if ((res_unamb == NULL && tc->output_unambiguous == NULL) + || (strcmp(res_unamb, tc->output_unambiguous) != 0)) { + // test case failed... + fprintf(stderr, "failed\n"); + // We want to run all testcases, for purposes of generating a + // report. (eg, if users are trying to fix a grammar for a + // faster backend) + tc_failed++; + } + h_parse_result_free(res); + } + + if (tc_failed > 0) { + // Can't use this parser; skip to the next + fprintf(stderr, "Backend failed testcases; skipping benchmark\n"); + continue; + } + + for (tc = testcases; tc->input != NULL; tc++) { + // The goal is to run each testcase for at least 50ms each + // TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer) + int count = 1, cur; + struct timespec ts_start, ts_end; + long long time_diff; + do { + count *= 2; // Yes, this means that the first run will run the function twice. This is fine, as we want multiple runs anyway. + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_start); + for (cur = 0; cur < count; cur++) { + h_parse_result_free(h_parse(parser, tc->input, tc->length)); + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_end); + + // time_diff is in ns + time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec); + } while (time_diff < 100000000); + fprintf(stderr, "Case %d: %lld ns/parse\n", (int)(tc - testcases), time_diff / count); + } + } + return NULL; +} + +void h_benchmark_report(FILE* stream, HBenchmarkResults* result) { + // TODO: fill in this function +} diff --git a/src/compile.c b/src/compile.c new file mode 100644 index 0000000..e24839d --- /dev/null +++ b/src/compile.c @@ -0,0 +1,15 @@ +// This file contains functions related to managing multiple parse backends +#include "hammer.h" +#include "internal.h" + +static HParserBackendVTable *backends[PB_MAX] = { + &h__packrat_backend_vtable, +}; + +int h_compile(HParser* parser, HParserBackend backend, const void* params) { + return h_compile__m(&system_allocator, parser, backend, params); +} + +int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, const void* params) { + return backends[backend]->compile(mm__, parser, params); +} diff --git a/src/hammer.h b/src/hammer.h index 1c1a6ce..15ff6ec 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -30,6 +30,12 @@ typedef int bool; typedef struct HParseState_ HParseState; +typedef enum HParserBackend_ { + PB_MIN = 0, + PB_PACKRAT = PB_MIN, // PB_MIN is always the default. + PB_MAX +} HParserBackend; + typedef enum HTokenType_ { TT_NONE, TT_BYTES, @@ -112,6 +118,17 @@ typedef struct HParser_ { void *env; } HParser; +// {{{ Stuff for benchmarking +typedef struct HParserTestcase_ { + unsigned char* input; + size_t length; + char* output_unambiguous; +} HParserTestcase; + +typedef struct HBenchmarkResults_ { +} HBenchmarkResults; +// }}} + // {{{ Preprocessor definitions #define HAMMER_FN_DECL_NOARG(rtype_t, name) \ rtype_t name(void); \ @@ -519,6 +536,15 @@ HAMMER_FN_DECL(char*, h_write_result_unamb, const HParsedToken* tok); */ HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent, int delta); +/** + * Build parse tables for the given parser backend. See the + * documentation for the parser backend in question for information + * about the [params] parameter, or just pass in NULL for the defaults. + * + * Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise. + */ +HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params); + /** * TODO: Document me */ @@ -541,4 +567,10 @@ const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len); */ void h_bit_writer_free(HBitWriter* w); +// {{{ Benchmark functions +HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases); +void h_benchmark_report(FILE* stream, HBenchmarkResults* results); +void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results); +// }}} + #endif // #ifndef HAMMER_HAMMER__H diff --git a/src/internal.h b/src/internal.h index a24cc0e..8a8f972 100644 --- a/src/internal.h +++ b/src/internal.h @@ -109,6 +109,12 @@ struct HParseState_ { HHashTable *recursion_heads; }; +typedef struct HParserBackendVTable_ { + int (*compile)(HAllocator *mm__, HParser* parser, const void* params); + HParseResult* (*parse)(HAllocator *mm__, HParser* parser, HParseState* parse_state); +} HParserBackendVTable; + + /* The (location, parser) tuple used to key the cache. */ @@ -173,6 +179,10 @@ typedef struct HParserCacheValue_t { }; } HParserCacheValue; +// Backends {{{ +extern HParserBackendVTable h__packrat_backend_vtable; +// }}} + // TODO(thequux): Set symbol visibility for these functions so that they aren't exported. long long h_read_bits(HInputStream* state, int count, char signed_p); diff --git a/src/t_benchmark.c b/src/t_benchmark.c new file mode 100644 index 0000000..ad682b8 --- /dev/null +++ b/src/t_benchmark.c @@ -0,0 +1,14 @@ +// At this point, this is just a compile/link test. +#include "hammer.h" + +HParserTestcase testcases[] = { + {NULL, 0, NULL} +}; + +void test_benchmark_1() { + HParser *parser = NULL; // TODO: fill this in. + + HBenchmarkResults *res = h_benchmark(parser, testcases); + h_benchmark_report(stderr, res); + +} From 21ec962d7626c36e1689e6fe33df6ce7ec5e5c22 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Mon, 5 Nov 2012 23:08:18 -0500 Subject: [PATCH 011/125] Working on benchmarking test. A lot of things needed to be const and weren't. --- src/Makefile | 3 +++ src/backends/packrat.c | 4 ++-- src/benchmark.c | 2 +- src/compile.c | 4 ++-- src/hammer.h | 4 ++-- src/internal.h | 4 ++-- src/t_benchmark.c | 7 +++++-- 7 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/Makefile b/src/Makefile index 21b7ac5..8cca6d3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -64,6 +64,9 @@ hammer.o: hammer.h ifneq ($(INCLUDE_TESTS),0) all: test_suite +benchmark: t_benchmark.o libhammer.a + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) + test: test_suite ./test_suite -v diff --git a/src/backends/packrat.c b/src/backends/packrat.c index b884b73..e300d1f 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -1,11 +1,11 @@ #include "../internal.h" -int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) { +int h_packrat_compile(HAllocator* mm__, const HParser* parser, const void* params) { return 0; // No compilation necessary, and everything should work // out of the box. } -HParseResult *h_packrat_parse(HAllocator* mm__, HParser* parser, HParseState* parse_state) { +HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) { return NULL; // TODO: fill this in. } diff --git a/src/benchmark.c b/src/benchmark.c index 62d89d9..3bb6373 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -20,7 +20,7 @@ */ -HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases) { +HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases) { // For now, just output the results to stderr HParserTestcase* tc = testcases; HParserBackend backend = PB_MIN; diff --git a/src/compile.c b/src/compile.c index e24839d..e151cfb 100644 --- a/src/compile.c +++ b/src/compile.c @@ -6,10 +6,10 @@ static HParserBackendVTable *backends[PB_MAX] = { &h__packrat_backend_vtable, }; -int h_compile(HParser* parser, HParserBackend backend, const void* params) { +int h_compile(const HParser* parser, HParserBackend backend, const void* params) { return h_compile__m(&system_allocator, parser, backend, params); } -int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, const void* params) { +int h_compile__m(HAllocator* mm__, const HParser* parser, HParserBackend backend, const void* params) { return backends[backend]->compile(mm__, parser, params); } diff --git a/src/hammer.h b/src/hammer.h index 15ff6ec..bf03492 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -543,7 +543,7 @@ HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent * * Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise. */ -HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params); +HAMMER_FN_DECL(int, h_compile, const HParser* parser, HParserBackend backend, const void* params); /** * TODO: Document me @@ -568,7 +568,7 @@ const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len); void h_bit_writer_free(HBitWriter* w); // {{{ Benchmark functions -HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases); +HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases); void h_benchmark_report(FILE* stream, HBenchmarkResults* results); void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results); // }}} diff --git a/src/internal.h b/src/internal.h index 8a8f972..68b7d97 100644 --- a/src/internal.h +++ b/src/internal.h @@ -110,8 +110,8 @@ struct HParseState_ { }; typedef struct HParserBackendVTable_ { - int (*compile)(HAllocator *mm__, HParser* parser, const void* params); - HParseResult* (*parse)(HAllocator *mm__, HParser* parser, HParseState* parse_state); + int (*compile)(HAllocator *mm__, const HParser* parser, const void* params); + HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HParseState* parse_state); } HParserBackendVTable; diff --git a/src/t_benchmark.c b/src/t_benchmark.c index ad682b8..1859dac 100644 --- a/src/t_benchmark.c +++ b/src/t_benchmark.c @@ -2,11 +2,14 @@ #include "hammer.h" HParserTestcase testcases[] = { - {NULL, 0, NULL} + {(unsigned char*)"1,2,3", 5, "(u0x31 u0x32 u0x33)"}, + {(unsigned char*)"1,3,2", 5, "(u0x31 u0x33 u0x32)"}, + {(unsigned char*)"1,3", 3, "(u0x31 u0x33)"}, + {(unsigned char*)"3", 1, "(u0x33)"} }; void test_benchmark_1() { - HParser *parser = NULL; // TODO: fill this in. + const HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(',')); HBenchmarkResults *res = h_benchmark(parser, testcases); h_benchmark_report(stderr, res); From 771de6f98bf8be628eb11a4be8a6c57701b3df6b Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Tue, 6 Nov 2012 00:20:00 -0500 Subject: [PATCH 012/125] Benchmark test case array needed to be terminated with {NULL,0,NULL}. Noted that in comments. --- src/Makefile | 2 +- src/benchmark.c | 3 ++- src/t_benchmark.c | 9 +++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Makefile b/src/Makefile index 8cca6d3..f24f23b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -70,6 +70,6 @@ benchmark: t_benchmark.o libhammer.a test: test_suite ./test_suite -v -test_suite: test_suite.o $(TESTS) libhammer.a +test_suite: test_suite.o libhammer.a $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) endif diff --git a/src/benchmark.c b/src/benchmark.c index 3bb6373..5d193ca 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -5,7 +5,8 @@ /* Usage: - Create your parser (i.e., HParser*), and then call + Create your parser (i.e., const HParser*), and an array of test cases + (i.e., HParserTestcase[], terminated by { NULL, 0, NULL }) and then call HBenchmarkResults* results = h_benchmark(parser, testcases); diff --git a/src/t_benchmark.c b/src/t_benchmark.c index 1859dac..4101829 100644 --- a/src/t_benchmark.c +++ b/src/t_benchmark.c @@ -5,7 +5,8 @@ HParserTestcase testcases[] = { {(unsigned char*)"1,2,3", 5, "(u0x31 u0x32 u0x33)"}, {(unsigned char*)"1,3,2", 5, "(u0x31 u0x33 u0x32)"}, {(unsigned char*)"1,3", 3, "(u0x31 u0x33)"}, - {(unsigned char*)"3", 1, "(u0x33)"} + {(unsigned char*)"3", 1, "(u0x33)"}, + { NULL, 0, NULL } }; void test_benchmark_1() { @@ -13,5 +14,9 @@ void test_benchmark_1() { HBenchmarkResults *res = h_benchmark(parser, testcases); h_benchmark_report(stderr, res); - +} + +int main(int argc, char **argv) { + test_benchmark_1(); + return 0; } From c24044230b89967fa46f077e04226a429737cac9 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Thu, 8 Nov 2012 01:36:19 -0500 Subject: [PATCH 013/125] Benchmark is actually broken right now, for mysterious double-free related reasons. I will dig into this tomorrow. --- src/benchmark.c | 22 ++++++++++++++++++++++ src/hammer.h | 18 ++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/benchmark.c b/src/benchmark.c index 5d193ca..bf7f207 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -20,23 +20,39 @@ */ +#define false 0 +#define true 1 + +#include HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases) { // For now, just output the results to stderr HParserTestcase* tc = testcases; HParserBackend backend = PB_MIN; + HBenchmarkResults *ret = (HBenchmarkResults*)malloc(sizeof(HBenchmarkResults*)); + ret->len = PB_MAX-PB_MIN; + ret->results = (HBackendResults*)malloc(ret->len * sizeof(HBackendResults*)); for (backend = PB_MIN; backend < PB_MAX; backend++) { fprintf(stderr, "Compiling for backend %d ... ", backend); + ret->results[backend].backend = backend; // Step 1: Compile grammar for given parser... if (h_compile(parser, PB_MIN, NULL) == -1) { // backend inappropriate for grammar... fprintf(stderr, "failed\n"); + ret->results[backend].compile_success = false; + ret->results[backend].n_testcases = 0; + ret->results[backend].failed_testcases = 0; + ret->results[backend].cases = NULL; continue; } + ret->results[backend].compile_success = true; int tc_failed = 0; // Step 1: verify all test cases. + ret->results[backend].n_testcases = 0; + ret->results[backend].failed_testcases = 0; for (tc = testcases; tc->input != NULL; tc++) { + ret->results[backend].n_testcases++; HParseResult *res = h_parse(parser, tc->input, tc->length); char* res_unamb; if (res != NULL) { @@ -51,6 +67,7 @@ HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases // report. (eg, if users are trying to fix a grammar for a // faster backend) tc_failed++; + ret->results[backend].failed_testcases++; } h_parse_result_free(res); } @@ -61,6 +78,9 @@ HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases continue; } + ret->results[backend].cases = (HCaseResult*)malloc(ret->results[backend].n_testcases * sizeof(HCaseResult*)); + size_t cur_case = 0; + for (tc = testcases; tc->input != NULL; tc++) { // The goal is to run each testcase for at least 50ms each // TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer) @@ -78,7 +98,9 @@ HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases // time_diff is in ns time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec); } while (time_diff < 100000000); + ret->results[backend].cases[cur_case].parse_time = (time_diff / count); fprintf(stderr, "Case %d: %lld ns/parse\n", (int)(tc - testcases), time_diff / count); + cur_case++; } } return NULL; diff --git a/src/hammer.h b/src/hammer.h index bf03492..256644c 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -125,7 +125,25 @@ typedef struct HParserTestcase_ { char* output_unambiguous; } HParserTestcase; +typedef struct HCaseResult_ { + bool success; + union { + const char* actual_results; // on failure, filled in with the results of h_write_result_unamb + size_t parse_time; // on success, filled in with time for a single parse, in nsec + }; +} HCaseResult; + +typedef struct HBackendResults_ { + HParserBackend backend; + bool compile_success; + size_t n_testcases; + size_t failed_testcases; // actually a count... + HCaseResult *cases; +} HBackendResults; + typedef struct HBenchmarkResults_ { + size_t len; + HBackendResults *results; } HBenchmarkResults; // }}} From 6893987cbc0d6fbe5b187a1d152b07fc046d2665 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Fri, 9 Nov 2012 01:50:07 -0500 Subject: [PATCH 014/125] h_benchmark and h_benchmark_report work. Need tidying. --- src/benchmark.c | 27 +++++++++++++++------------ src/hammer.h | 2 +- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/benchmark.c b/src/benchmark.c index bf7f207..577a380 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -2,6 +2,7 @@ #include #include #include "hammer.h" +#include "internal.h" /* Usage: @@ -20,21 +21,19 @@ */ -#define false 0 -#define true 1 - -#include - HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases) { + return h_benchmark__m(&system_allocator, parser, testcases); +} + +HBenchmarkResults *h_benchmark__m(HAllocator* mm__, const HParser* parser, HParserTestcase* testcases) { // For now, just output the results to stderr HParserTestcase* tc = testcases; HParserBackend backend = PB_MIN; - HBenchmarkResults *ret = (HBenchmarkResults*)malloc(sizeof(HBenchmarkResults*)); + HBenchmarkResults *ret = h_new(HBenchmarkResults, 1); ret->len = PB_MAX-PB_MIN; - ret->results = (HBackendResults*)malloc(ret->len * sizeof(HBackendResults*)); + ret->results = h_new(HBackendResults, ret->len); for (backend = PB_MIN; backend < PB_MAX; backend++) { - fprintf(stderr, "Compiling for backend %d ... ", backend); ret->results[backend].backend = backend; // Step 1: Compile grammar for given parser... if (h_compile(parser, PB_MIN, NULL) == -1) { @@ -78,7 +77,7 @@ HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases continue; } - ret->results[backend].cases = (HCaseResult*)malloc(ret->results[backend].n_testcases * sizeof(HCaseResult*)); + ret->results[backend].cases = h_new(HCaseResult, ret->results[backend].n_testcases); size_t cur_case = 0; for (tc = testcases; tc->input != NULL; tc++) { @@ -99,13 +98,17 @@ HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec); } while (time_diff < 100000000); ret->results[backend].cases[cur_case].parse_time = (time_diff / count); - fprintf(stderr, "Case %d: %lld ns/parse\n", (int)(tc - testcases), time_diff / count); cur_case++; } } - return NULL; + return ret; } void h_benchmark_report(FILE* stream, HBenchmarkResults* result) { - // TODO: fill in this function + for (size_t i=0; ilen; ++i) { + fprintf(stream, "Backend %ld ... \n", i); + for (size_t j=0; jresults[i].n_testcases; ++j) { + fprintf(stream, "Case %ld: %ld ns/parse\n", j, result->results[i].cases[j].parse_time); + } + } } diff --git a/src/hammer.h b/src/hammer.h index 256644c..79e3bd9 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -586,7 +586,7 @@ const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len); void h_bit_writer_free(HBitWriter* w); // {{{ Benchmark functions -HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases); +HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, const HParser* parser, HParserTestcase* testcases); void h_benchmark_report(FILE* stream, HBenchmarkResults* results); void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results); // }}} From 6ae2e1fed41cc4b0141daf8371e7608116003be8 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Sat, 10 Nov 2012 23:34:08 -0500 Subject: [PATCH 015/125] Changed malloc.h to stdlib.h for OSX compatibility. --- src/test_suite.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test_suite.h b/src/test_suite.h index 68cf83a..24932bb 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -17,7 +17,7 @@ #ifndef HAMMER_TEST_SUITE__H #define HAMMER_TEST_SUITE__H -#include +#include // Equivalent to g_assert_*, but not using g_assert... #define g_check_inttype(fmt, typ, n1, op, n2) do { \ From e96969a6f0e5b5d73790d5c78ba269034d76be69 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Sun, 11 Nov 2012 02:28:07 -0500 Subject: [PATCH 016/125] Finally wrote a damn README --- README.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..4c4268c --- /dev/null +++ b/README.md @@ -0,0 +1,48 @@ +Hammer is a parsing library. Like many modern parsing libraries, it provides a parser combinator interface for writing grammars as inline domain-specific languages, but Hammer also provides a variety of parsing backends. It's also bit-oriented rather than character-oriented, making it ideal for parsing binary data such as images, network packets, audio, and executables. + +Hammer is written in C, but will provide bindings for other languages. If you don't see a language you're interested in on the list, just ask. + +Hammer currently builds under Linux and OSX. (Windows is coming.) + +Features +======== +* Bit-oriented -- grammars can include single-bit flags or multi-bit constructs that span character boundaries, with no hassle +* Thread-safe, reentrant +* Benchmarking for parsing backends -- determine empirically which backend will be most time/space-efficient for your grammar +* Parsing backends: + * Packrat parsing + * LL(k) (not yet implemented) + * GLR (not yet implemented) + * LALR(8) (not yet implemented) + * Regular expressions (not yet implemented) +* Language bindings: (not yet implemented) + * C++ + * Java + * Python + * Ruby + * Perl + * Go + * PHP + * .NET + +Installing +========== +Prerequisites +------------- +* pkg-config +* glib-2.0 (for the test suite; everything else will build without glib) +* make + +To install, type `make`. To run the built-in test suite, type `make test`. + +There is not currently a `make install` target; to make Hammer available system-wide, copy `libhammer.a` to `/usr/lib/` (or `/usr/local/lib/`, or wherever ld will find it) and `hammer.h` to `/usr/include/`. + +Usage +===== +Just `#include ` and link with `-lhammer`. + +Examples +======== +The `examples/` directory contains some simple examples, currently including: +* base64 +* DNS From c05559f04b15782459c09f480c4db8ac5625faf4 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Sun, 11 Nov 2012 02:29:34 -0500 Subject: [PATCH 017/125] made prereq header a little smaller --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 4c4268c..bb435dd 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,7 @@ Features Installing ========== -Prerequisites -------------- +### Prerequisites * pkg-config * glib-2.0 (for the test suite; everything else will build without glib) * make From 4ee72f7308393d68d0477b6cf15dbcacb1d01ea8 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Sun, 11 Nov 2012 11:10:39 -0500 Subject: [PATCH 018/125] double 'const' decl was causing clang to error --- src/parsers/token.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsers/token.c b/src/parsers/token.c index e57c71d..13bafbb 100644 --- a/src/parsers/token.c +++ b/src/parsers/token.c @@ -20,7 +20,7 @@ static HParseResult* parse_token(void *env, HParseState *state) { return make_result(state, tok); } -const const HParserVtable token_vt = { +const HParserVtable token_vt = { .parse = parse_token, }; From e9b49cf24c6842f1db9dfb8b62ba2d9be181b00d Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Sun, 11 Nov 2012 12:39:23 -0500 Subject: [PATCH 019/125] gcc 4.4 requires stdarg in hammer.h --- src/hammer.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hammer.h b/src/hammer.h index 79e3bd9..0791769 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -17,6 +17,7 @@ #ifndef HAMMER_HAMMER__H #define HAMMER_HAMMER__H +#include #include #include #include "allocator.h" From 236ec733a148f224d4581a97a1c72f304f52ff77 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Tue, 13 Nov 2012 21:50:48 -0500 Subject: [PATCH 020/125] Reorganizing makefile requirements --- README.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index bb435dd..34d3333 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,13 @@ Hammer is a parsing library. Like many modern parsing libraries, it provides a p Hammer is written in C, but will provide bindings for other languages. If you don't see a language you're interested in on the list, just ask. -Hammer currently builds under Linux and OSX. (Windows is coming.) +Hammer currently builds under Linux. (Windows and OSX are coming.) Features ======== * Bit-oriented -- grammars can include single-bit flags or multi-bit constructs that span character boundaries, with no hassle * Thread-safe, reentrant -* Benchmarking for parsing backends -- determine empirically which backend will be most time/space-efficient for your grammar +* Benchmarking for parsing backends -- determine empirically which backend will be most time-efficient for your grammar * Parsing backends: * Packrat parsing * LL(k) (not yet implemented) @@ -28,10 +28,14 @@ Features Installing ========== ### Prerequisites -* pkg-config -* glib-2.0 (for the test suite; everything else will build without glib) * make +### Optional Dependencies +* doxygen (for `make doc`) +* pkg-config (for `make test`) +* glib-2.0 (for `make test`) +* glib-2.0-dev (for `make test`) + To install, type `make`. To run the built-in test suite, type `make test`. There is not currently a `make install` target; to make Hammer available system-wide, copy `libhammer.a` to `/usr/lib/` (or `/usr/local/lib/`, or wherever ld will find it) and `hammer.h` to `/usr/include/`. From de8db18db4a3c3cc4a59878f09fac61a4964f5f1 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Tue, 13 Nov 2012 22:29:25 -0500 Subject: [PATCH 021/125] Refactored tests; make just builds library, make test builds/runs tests --- common.mk | 9 +- config.mk | 2 +- src/Makefile | 19 +-- src/bitreader.c | 69 -------- src/bitwriter.c | 121 -------------- src/hammer.c | 403 --------------------------------------------- src/internal.h | 14 ++ src/t_benchmark.c | 10 +- src/t_bitreader.c | 67 ++++++++ src/t_bitwriter.c | 108 +++++++++++++ src/t_parser.c | 404 ++++++++++++++++++++++++++++++++++++++++++++++ src/test_suite.c | 2 + 12 files changed, 613 insertions(+), 615 deletions(-) create mode 100644 src/t_bitreader.c create mode 100644 src/t_bitwriter.c create mode 100644 src/t_parser.c diff --git a/common.mk b/common.mk index 972466e..a57429d 100644 --- a/common.mk +++ b/common.mk @@ -5,17 +5,12 @@ endif include $(TOPLEVEL)/config.mk -TEST_CFLAGS := $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS -TEST_LDFLAGS := $(shell pkg-config --libs glib-2.0) +TEST_CFLAGS = $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS +TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0) CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes LDFLAGS := -ifneq ($(INCLUDE_TESTS),0) -CFLAGS += $(TEST_CFLAGS) -LDFLAGS += $(TEST_LDFLAGS) -endif - CC ?= gcc $(info CC=$(CC)) # Set V=1 for verbose mode... diff --git a/config.mk b/config.mk index 54248f9..5161bda 100644 --- a/config.mk +++ b/config.mk @@ -1 +1 @@ -INCLUDE_TESTS = 1 +INCLUDE_TESTS = 0 diff --git a/src/Makefile b/src/Makefile index f24f23b..128de05 100644 --- a/src/Makefile +++ b/src/Makefile @@ -41,7 +41,11 @@ HAMMER_PARTS := \ $(PARSERS:%=parsers/%.o) \ $(BACKENDS:%=backends/%.o) -TESTS := t_benchmark.o +TESTS := t_benchmark.o \ + t_bitreader.o \ + t_bitwriter.o \ + t_parser.o \ + test_suite.o OUTPUTS := libhammer.a \ test_suite.o \ @@ -53,6 +57,8 @@ TOPLEVEL := ../ include ../common.mk +$(TESTS): CFLAGS += $(TEST_CFLAGS) +$(TESTS): LDFLAGS += $(TEST_LDFLAGS) all: libhammer.a @@ -61,15 +67,10 @@ libhammer.a: $(HAMMER_PARTS) bitreader.o: test_suite.h hammer.o: hammer.h -ifneq ($(INCLUDE_TESTS),0) -all: test_suite - -benchmark: t_benchmark.o libhammer.a - $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) +all: libhammer.a test: test_suite ./test_suite -v -test_suite: test_suite.o libhammer.a - $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -endif +test_suite: $(TESTS) libhammer.a + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) $(TEST_LDFLAGS) diff --git a/src/bitreader.c b/src/bitreader.c index b0018f2..4971076 100644 --- a/src/bitreader.c +++ b/src/bitreader.c @@ -108,72 +108,3 @@ long long h_read_bits(HInputStream* state, int count, char signed_p) { out <<= final_shift; return (out ^ msb) - msb; // perform sign extension } - -#ifdef INCLUDE_TESTS - -#include - -#define MK_INPUT_STREAM(buf,len,endianness_) \ - { \ - .input = (uint8_t*)buf, \ - .length = len, \ - .index = 0, \ - .bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \ - .endianness = endianness_ \ - } - - -static void test_bitreader_ints(void) { - HInputStream is = MK_INPUT_STREAM("\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 8, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); - g_check_cmplong(h_read_bits(&is, 64, true), ==, -0x200000000); -} - -static void test_bitreader_be(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); - g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x03); - g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x52); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A); -} -static void test_bitreader_le(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); - g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x02); - g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x4D); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B); -} - -static void test_largebits_be(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); - g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x352); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A); -} - -static void test_largebits_le(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); - g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x26A); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B); -} - -static void test_offset_largebits_be(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xD); - g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x25A); -} - -static void test_offset_largebits_le(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xA); - g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x2D3); -} - - -void register_bitreader_tests(void) { - g_test_add_func("/core/bitreader/be", test_bitreader_be); - g_test_add_func("/core/bitreader/le", test_bitreader_le); - g_test_add_func("/core/bitreader/largebits-be", test_largebits_be); - g_test_add_func("/core/bitreader/largebits-le", test_largebits_le); - g_test_add_func("/core/bitreader/offset-largebits-be", test_offset_largebits_be); - g_test_add_func("/core/bitreader/offset-largebits-le", test_offset_largebits_le); - g_test_add_func("/core/bitreader/ints", test_bitreader_ints); -} - -#endif // #ifdef INCLUDE_TESTS diff --git a/src/bitwriter.c b/src/bitwriter.c index 956f2ea..9374a88 100644 --- a/src/bitwriter.c +++ b/src/bitwriter.c @@ -7,18 +7,6 @@ #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) -// This file provides the logical inverse of bitreader.c -struct HBitWriter_ { - uint8_t* buf; - HAllocator *mm__; - size_t index; - size_t capacity; - char bit_offset; // unlike in bit_reader, this is always the number - // of used bits in the current byte. i.e., 0 always - // means that 8 bits are available for use. - char flags; -}; - // h_bit_writer_ HBitWriter *h_bit_writer_new(HAllocator* mm__) { HBitWriter *writer = h_new(HBitWriter, 1); @@ -110,112 +98,3 @@ void h_bit_writer_free(HBitWriter* w) { h_free(w->buf); h_free(w); } - -#ifdef INCLUDE_TESTS -#include -// TESTS BELOW HERE -typedef struct { - unsigned long long data; - size_t nbits; -} bitwriter_test_elem; // should end with {0,0} - -void run_bitwriter_test(bitwriter_test_elem data[], char flags) { - size_t len; - const uint8_t *buf; - HBitWriter *w = h_bit_writer_new(&system_allocator); - int i; - w->flags = flags; - for (i = 0; data[i].nbits; i++) { - h_bit_writer_put(w, data[i].data, data[i].nbits); - } - - buf = h_bit_writer_get_buffer(w, &len); - HInputStream input = { - .input = buf, - .index = 0, - .length = len, - .bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0, - .endianness = flags, - .overrun = 0 - }; - - for (i = 0; data[i].nbits; i++) { - g_check_cmpulonglong ((unsigned long long)h_read_bits(&input, data[i].nbits, FALSE), ==, data[i].data); - } -} - -static void test_bitwriter_ints(void) { - bitwriter_test_elem data[] = { - { -0x200000000, 64 }, - { 0,0 } - }; - run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); -} - -static void test_bitwriter_be(void) { - bitwriter_test_elem data[] = { - { 0x03, 3 }, - { 0x52, 8 }, - { 0x1A, 5 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); -} - -static void test_bitwriter_le(void) { - bitwriter_test_elem data[] = { - { 0x02, 3 }, - { 0x4D, 8 }, - { 0x0B, 5 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); -} - -static void test_largebits_be(void) { - bitwriter_test_elem data[] = { - { 0x352, 11 }, - { 0x1A, 5 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); -} - -static void test_largebits_le(void) { - bitwriter_test_elem data[] = { - { 0x26A, 11 }, - { 0x0B, 5 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); -} - -static void test_offset_largebits_be(void) { - bitwriter_test_elem data[] = { - { 0xD, 5 }, - { 0x25A, 11 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); -} - -static void test_offset_largebits_le(void) { - bitwriter_test_elem data[] = { - { 0xA, 5 }, - { 0x2D3, 11 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); -} - -void register_bitwriter_tests(void) { - g_test_add_func("/core/bitwriter/be", test_bitwriter_be); - g_test_add_func("/core/bitwriter/le", test_bitwriter_le); - g_test_add_func("/core/bitwriter/largebits-be", test_largebits_be); - g_test_add_func("/core/bitwriter/largebits-le", test_largebits_le); - g_test_add_func("/core/bitwriter/offset-largebits-be", test_offset_largebits_be); - g_test_add_func("/core/bitwriter/offset-largebits-le", test_offset_largebits_le); - g_test_add_func("/core/bitwriter/ints", test_bitwriter_ints); -} - -#endif // #ifdef INCLUDE_TESTS diff --git a/src/hammer.c b/src/hammer.c index d378268..14db6a8 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -272,407 +272,4 @@ void h_parse_result_free(HParseResult *result) { h_delete_arena(result->arena); } -#ifdef INCLUDE_TESTS -#include -#include "test_suite.h" - -static void test_token(void) { - const HParser *token_ = h_token((const uint8_t*)"95\xa2", 3); - - g_check_parse_ok(token_, "95\xa2", 3, "<39.35.a2>"); - g_check_parse_failed(token_, "95", 2); -} - -static void test_ch(void) { - const HParser *ch_ = h_ch(0xa2); - - g_check_parse_ok(ch_, "\xa2", 1, "u0xa2"); - g_check_parse_failed(ch_, "\xa3", 1); -} - -static void test_ch_range(void) { - const HParser *range_ = h_ch_range('a', 'c'); - - g_check_parse_ok(range_, "b", 1, "u0x62"); - g_check_parse_failed(range_, "d", 1); -} - -//@MARK_START -static void test_int64(void) { - const HParser *int64_ = h_int64(); - - g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s-0x200000000"); - g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7); -} - -static void test_int32(void) { - const HParser *int32_ = h_int32(); - - g_check_parse_ok(int32_, "\xff\xfe\x00\x00", 4, "s-0x20000"); - g_check_parse_failed(int32_, "\xff\xfe\x00", 3); -} - -static void test_int16(void) { - const HParser *int16_ = h_int16(); - - g_check_parse_ok(int16_, "\xfe\x00", 2, "s-0x200"); - g_check_parse_failed(int16_, "\xfe", 1); -} - -static void test_int8(void) { - const HParser *int8_ = h_int8(); - - g_check_parse_ok(int8_, "\x88", 1, "s-0x78"); - g_check_parse_failed(int8_, "", 0); -} - -static void test_uint64(void) { - const HParser *uint64_ = h_uint64(); - - g_check_parse_ok(uint64_, "\x00\x00\x00\x02\x00\x00\x00\x00", 8, "u0x200000000"); - g_check_parse_failed(uint64_, "\x00\x00\x00\x02\x00\x00\x00", 7); -} - -static void test_uint32(void) { - const HParser *uint32_ = h_uint32(); - - g_check_parse_ok(uint32_, "\x00\x02\x00\x00", 4, "u0x20000"); - g_check_parse_failed(uint32_, "\x00\x02\x00", 3); -} - -static void test_uint16(void) { - const HParser *uint16_ = h_uint16(); - - g_check_parse_ok(uint16_, "\x02\x00", 2, "u0x200"); - g_check_parse_failed(uint16_, "\x02", 1); -} - -static void test_uint8(void) { - const HParser *uint8_ = h_uint8(); - - g_check_parse_ok(uint8_, "\x78", 1, "u0x78"); - g_check_parse_failed(uint8_, "", 0); -} -//@MARK_END - -static void test_int_range(void) { - const HParser *int_range_ = h_int_range(h_uint8(), 3, 10); - - g_check_parse_ok(int_range_, "\x05", 1, "u0x5"); - g_check_parse_failed(int_range_, "\xb", 1); -} - -#if 0 -static void test_float64(void) { - const HParser *float64_ = h_float64(); - - g_check_parse_ok(float64_, "\x3f\xf0\x00\x00\x00\x00\x00\x00", 8, 1.0); - g_check_parse_failed(float64_, "\x3f\xf0\x00\x00\x00\x00\x00", 7); -} - -static void test_float32(void) { - const HParser *float32_ = h_float32(); - - g_check_parse_ok(float32_, "\x3f\x80\x00\x00", 4, 1.0); - g_check_parse_failed(float32_, "\x3f\x80\x00"); -} -#endif - - -static void test_whitespace(void) { - const HParser *whitespace_ = h_whitespace(h_ch('a')); - - g_check_parse_ok(whitespace_, "a", 1, "u0x61"); - g_check_parse_ok(whitespace_, " a", 2, "u0x61"); - g_check_parse_ok(whitespace_, " a", 3, "u0x61"); - g_check_parse_ok(whitespace_, "\ta", 2, "u0x61"); - g_check_parse_failed(whitespace_, "_a", 2); -} - -static void test_left(void) { - const HParser *left_ = h_left(h_ch('a'), h_ch(' ')); - - g_check_parse_ok(left_, "a ", 2, "u0x61"); - g_check_parse_failed(left_, "a", 1); - g_check_parse_failed(left_, " ", 1); - g_check_parse_failed(left_, "ab", 2); -} - -static void test_right(void) { - const HParser *right_ = h_right(h_ch(' '), h_ch('a')); - - g_check_parse_ok(right_, " a", 2, "u0x61"); - g_check_parse_failed(right_, "a", 1); - g_check_parse_failed(right_, " ", 1); - g_check_parse_failed(right_, "ba", 2); -} - -static void test_middle(void) { - const HParser *middle_ = h_middle(h_ch(' '), h_ch('a'), h_ch(' ')); - - g_check_parse_ok(middle_, " a ", 3, "u0x61"); - g_check_parse_failed(middle_, "a", 1); - g_check_parse_failed(middle_, " ", 1); - g_check_parse_failed(middle_, " a", 2); - g_check_parse_failed(middle_, "a ", 2); - g_check_parse_failed(middle_, " b ", 3); - g_check_parse_failed(middle_, "ba ", 3); - g_check_parse_failed(middle_, " ab", 3); -} - -#include - -const HParsedToken* upcase(const HParseResult *p) { - switch(p->ast->token_type) { - case TT_SEQUENCE: - { - HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); - HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used); - ret->token_type = TT_SEQUENCE; - for (size_t i=0; iast->seq->used; ++i) { - if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) { - HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1); - tmp->token_type = TT_UINT; - tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint); - h_carray_append(seq, tmp); - } else { - h_carray_append(seq, p->ast->seq->elements[i]); - } - } - ret->seq = seq; - return (const HParsedToken*)ret; - } - case TT_UINT: - { - HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); - ret->token_type = TT_UINT; - ret->uint = toupper(p->ast->uint); - return (const HParsedToken*)ret; - } - default: - return p->ast; - } -} - -static void test_action(void) { - const HParser *action_ = h_action(h_sequence(h_choice(h_ch('a'), - h_ch('A'), - NULL), - h_choice(h_ch('b'), - h_ch('B'), - NULL), - NULL), - upcase); - - g_check_parse_ok(action_, "ab", 2, "(u0x41 u0x42)"); - g_check_parse_ok(action_, "AB", 2, "(u0x41 u0x42)"); - g_check_parse_failed(action_, "XX", 2); -} - -static void test_in(void) { - uint8_t options[3] = { 'a', 'b', 'c' }; - const HParser *in_ = h_in(options, 3); - g_check_parse_ok(in_, "b", 1, "u0x62"); - g_check_parse_failed(in_, "d", 1); - -} - -static void test_not_in(void) { - uint8_t options[3] = { 'a', 'b', 'c' }; - const HParser *not_in_ = h_not_in(options, 3); - g_check_parse_ok(not_in_, "d", 1, "u0x64"); - g_check_parse_failed(not_in_, "a", 1); - -} - -static void test_end_p(void) { - const HParser *end_p_ = h_sequence(h_ch('a'), h_end_p(), NULL); - g_check_parse_ok(end_p_, "a", 1, "(u0x61)"); - g_check_parse_failed(end_p_, "aa", 2); -} - -static void test_nothing_p(void) { - const HParser *nothing_p_ = h_nothing_p(); - g_check_parse_failed(nothing_p_, "a", 1); -} - -static void test_sequence(void) { - const HParser *sequence_1 = h_sequence(h_ch('a'), h_ch('b'), NULL); - const HParser *sequence_2 = h_sequence(h_ch('a'), h_whitespace(h_ch('b')), NULL); - - g_check_parse_ok(sequence_1, "ab", 2, "(u0x61 u0x62)"); - g_check_parse_failed(sequence_1, "a", 1); - g_check_parse_failed(sequence_1, "b", 1); - g_check_parse_ok(sequence_2, "ab", 2, "(u0x61 u0x62)"); - g_check_parse_ok(sequence_2, "a b", 3, "(u0x61 u0x62)"); - g_check_parse_ok(sequence_2, "a b", 4, "(u0x61 u0x62)"); -} - -static void test_choice(void) { - const HParser *choice_ = h_choice(h_ch('a'), h_ch('b'), NULL); - - g_check_parse_ok(choice_, "a", 1, "u0x61"); - g_check_parse_ok(choice_, "b", 1, "u0x62"); - g_check_parse_failed(choice_, "c", 1); -} - -static void test_butnot(void) { - const HParser *butnot_1 = h_butnot(h_ch('a'), h_token((const uint8_t*)"ab", 2)); - const HParser *butnot_2 = h_butnot(h_ch_range('0', '9'), h_ch('6')); - - g_check_parse_ok(butnot_1, "a", 1, "u0x61"); - g_check_parse_failed(butnot_1, "ab", 2); - g_check_parse_ok(butnot_1, "aa", 2, "u0x61"); - g_check_parse_failed(butnot_2, "6", 1); -} - -static void test_difference(void) { - const HParser *difference_ = h_difference(h_token((const uint8_t*)"ab", 2), h_ch('a')); - - g_check_parse_ok(difference_, "ab", 2, "<61.62>"); - g_check_parse_failed(difference_, "a", 1); -} - -static void test_xor(void) { - const HParser *xor_ = h_xor(h_ch_range('0', '6'), h_ch_range('5', '9')); - - g_check_parse_ok(xor_, "0", 1, "u0x30"); - g_check_parse_ok(xor_, "9", 1, "u0x39"); - g_check_parse_failed(xor_, "5", 1); - g_check_parse_failed(xor_, "a", 1); -} - -static void test_many(void) { - const HParser *many_ = h_many(h_choice(h_ch('a'), h_ch('b'), NULL)); - g_check_parse_ok(many_, "adef", 4, "(u0x61)"); - g_check_parse_ok(many_, "bdef", 4, "(u0x62)"); - g_check_parse_ok(many_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)"); - g_check_parse_ok(many_, "daabbabadef", 11, "()"); -} - -static void test_many1(void) { - const HParser *many1_ = h_many1(h_choice(h_ch('a'), h_ch('b'), NULL)); - - g_check_parse_ok(many1_, "adef", 4, "(u0x61)"); - g_check_parse_ok(many1_, "bdef", 4, "(u0x62)"); - g_check_parse_ok(many1_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)"); - g_check_parse_failed(many1_, "daabbabadef", 11); -} - -static void test_repeat_n(void) { - const HParser *repeat_n_ = h_repeat_n(h_choice(h_ch('a'), h_ch('b'), NULL), 2); - - g_check_parse_failed(repeat_n_, "adef", 4); - g_check_parse_ok(repeat_n_, "abdef", 5, "(u0x61 u0x62)"); - g_check_parse_failed(repeat_n_, "dabdef", 6); -} - -static void test_optional(void) { - const HParser *optional_ = h_sequence(h_ch('a'), h_optional(h_choice(h_ch('b'), h_ch('c'), NULL)), h_ch('d'), NULL); - - g_check_parse_ok(optional_, "abd", 3, "(u0x61 u0x62 u0x64)"); - g_check_parse_ok(optional_, "acd", 3, "(u0x61 u0x63 u0x64)"); - g_check_parse_ok(optional_, "ad", 2, "(u0x61 null u0x64)"); - g_check_parse_failed(optional_, "aed", 3); - g_check_parse_failed(optional_, "ab", 2); - g_check_parse_failed(optional_, "ac", 2); -} - -static void test_ignore(void) { - const HParser *ignore_ = h_sequence(h_ch('a'), h_ignore(h_ch('b')), h_ch('c'), NULL); - - g_check_parse_ok(ignore_, "abc", 3, "(u0x61 u0x63)"); - g_check_parse_failed(ignore_, "ac", 2); -} - -static void test_sepBy1(void) { - const HParser *sepBy1_ = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(',')); - - g_check_parse_ok(sepBy1_, "1,2,3", 5, "(u0x31 u0x32 u0x33)"); - g_check_parse_ok(sepBy1_, "1,3,2", 5, "(u0x31 u0x33 u0x32)"); - g_check_parse_ok(sepBy1_, "1,3", 3, "(u0x31 u0x33)"); - g_check_parse_ok(sepBy1_, "3", 1, "(u0x33)"); -} - -static void test_epsilon_p(void) { - const HParser *epsilon_p_1 = h_sequence(h_ch('a'), h_epsilon_p(), h_ch('b'), NULL); - const HParser *epsilon_p_2 = h_sequence(h_epsilon_p(), h_ch('a'), NULL); - const HParser *epsilon_p_3 = h_sequence(h_ch('a'), h_epsilon_p(), NULL); - - g_check_parse_ok(epsilon_p_1, "ab", 2, "(u0x61 u0x62)"); - g_check_parse_ok(epsilon_p_2, "a", 1, "(u0x61)"); - g_check_parse_ok(epsilon_p_3, "a", 1, "(u0x61)"); -} - -static void test_attr_bool(void) { - -} - -static void test_and(void) { - const HParser *and_1 = h_sequence(h_and(h_ch('0')), h_ch('0'), NULL); - const HParser *and_2 = h_sequence(h_and(h_ch('0')), h_ch('1'), NULL); - const HParser *and_3 = h_sequence(h_ch('1'), h_and(h_ch('2')), NULL); - - g_check_parse_ok(and_1, "0", 1, "(u0x30)"); - g_check_parse_failed(and_2, "0", 1); - g_check_parse_ok(and_3, "12", 2, "(u0x31)"); -} - -static void test_not(void) { - const HParser *not_1 = h_sequence(h_ch('a'), h_choice(h_ch('+'), h_token((const uint8_t*)"++", 2), NULL), h_ch('b'), NULL); - const HParser *not_2 = h_sequence(h_ch('a'), - h_choice(h_sequence(h_ch('+'), h_not(h_ch('+')), NULL), - h_token((const uint8_t*)"++", 2), - NULL), h_ch('b'), NULL); - - g_check_parse_ok(not_1, "a+b", 3, "(u0x61 u0x2b u0x62)"); - g_check_parse_failed(not_1, "a++b", 4); - g_check_parse_ok(not_2, "a+b", 3, "(u0x61 (u0x2b) u0x62)"); - g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)"); -} - -void register_parser_tests(void) { - g_test_add_func("/core/parser/token", test_token); - g_test_add_func("/core/parser/ch", test_ch); - g_test_add_func("/core/parser/ch_range", test_ch_range); - g_test_add_func("/core/parser/int64", test_int64); - g_test_add_func("/core/parser/int32", test_int32); - g_test_add_func("/core/parser/int16", test_int16); - g_test_add_func("/core/parser/int8", test_int8); - g_test_add_func("/core/parser/uint64", test_uint64); - g_test_add_func("/core/parser/uint32", test_uint32); - g_test_add_func("/core/parser/uint16", test_uint16); - g_test_add_func("/core/parser/uint8", test_uint8); - g_test_add_func("/core/parser/int_range", test_int_range); -#if 0 - g_test_add_func("/core/parser/float64", test_float64); - g_test_add_func("/core/parser/float32", test_float32); -#endif - g_test_add_func("/core/parser/whitespace", test_whitespace); - g_test_add_func("/core/parser/left", test_left); - g_test_add_func("/core/parser/right", test_right); - g_test_add_func("/core/parser/middle", test_middle); - g_test_add_func("/core/parser/action", test_action); - g_test_add_func("/core/parser/in", test_in); - g_test_add_func("/core/parser/not_in", test_not_in); - g_test_add_func("/core/parser/end_p", test_end_p); - g_test_add_func("/core/parser/nothing_p", test_nothing_p); - g_test_add_func("/core/parser/sequence", test_sequence); - g_test_add_func("/core/parser/choice", test_choice); - g_test_add_func("/core/parser/butnot", test_butnot); - g_test_add_func("/core/parser/difference", test_difference); - g_test_add_func("/core/parser/xor", test_xor); - g_test_add_func("/core/parser/many", test_many); - g_test_add_func("/core/parser/many1", test_many1); - g_test_add_func("/core/parser/repeat_n", test_repeat_n); - g_test_add_func("/core/parser/optional", test_optional); - g_test_add_func("/core/parser/sepBy1", test_sepBy1); - g_test_add_func("/core/parser/epsilon_p", test_epsilon_p); - g_test_add_func("/core/parser/attr_bool", test_attr_bool); - g_test_add_func("/core/parser/and", test_and); - g_test_add_func("/core/parser/not", test_not); - g_test_add_func("/core/parser/ignore", test_ignore); -} - -#endif // #ifdef INCLUDE_TESTS diff --git a/src/internal.h b/src/internal.h index 68b7d97..269bc4f 100644 --- a/src/internal.h +++ b/src/internal.h @@ -179,6 +179,20 @@ typedef struct HParserCacheValue_t { }; } HParserCacheValue; +// This file provides the logical inverse of bitreader.c +struct HBitWriter_ { + uint8_t* buf; + HAllocator *mm__; + size_t index; + size_t capacity; + char bit_offset; // unlike in bit_reader, this is always the number + // of used bits in the current byte. i.e., 0 always + // means that 8 bits are available for use. + char flags; +}; + +// }}} + // Backends {{{ extern HParserBackendVTable h__packrat_backend_vtable; // }}} diff --git a/src/t_benchmark.c b/src/t_benchmark.c index 4101829..60d22c5 100644 --- a/src/t_benchmark.c +++ b/src/t_benchmark.c @@ -1,5 +1,6 @@ -// At this point, this is just a compile/link test. +#include #include "hammer.h" +#include "test_suite.h" HParserTestcase testcases[] = { {(unsigned char*)"1,2,3", 5, "(u0x31 u0x32 u0x33)"}, @@ -9,14 +10,13 @@ HParserTestcase testcases[] = { { NULL, 0, NULL } }; -void test_benchmark_1() { +static void test_benchmark_1() { const HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(',')); HBenchmarkResults *res = h_benchmark(parser, testcases); h_benchmark_report(stderr, res); } -int main(int argc, char **argv) { - test_benchmark_1(); - return 0; +void register_benchmark_tests(void) { + g_test_add_func("/core/benchmark/1", test_benchmark_1); } diff --git a/src/t_bitreader.c b/src/t_bitreader.c new file mode 100644 index 0000000..84e1057 --- /dev/null +++ b/src/t_bitreader.c @@ -0,0 +1,67 @@ +#include +#include "hammer.h" +#include "internal.h" +#include "test_suite.h" + +#define MK_INPUT_STREAM(buf,len,endianness_) \ + { \ + .input = (uint8_t*)buf, \ + .length = len, \ + .index = 0, \ + .bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \ + .endianness = endianness_ \ + } + + +static void test_bitreader_ints(void) { + HInputStream is = MK_INPUT_STREAM("\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 8, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); + g_check_cmplong(h_read_bits(&is, 64, true), ==, -0x200000000); +} + +static void test_bitreader_be(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); + g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x03); + g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x52); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A); +} +static void test_bitreader_le(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x02); + g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x4D); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B); +} + +static void test_largebits_be(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); + g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x352); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A); +} + +static void test_largebits_le(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x26A); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B); +} + +static void test_offset_largebits_be(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xD); + g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x25A); +} + +static void test_offset_largebits_le(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xA); + g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x2D3); +} + + +void register_bitreader_tests(void) { + g_test_add_func("/core/bitreader/be", test_bitreader_be); + g_test_add_func("/core/bitreader/le", test_bitreader_le); + g_test_add_func("/core/bitreader/largebits-be", test_largebits_be); + g_test_add_func("/core/bitreader/largebits-le", test_largebits_le); + g_test_add_func("/core/bitreader/offset-largebits-be", test_offset_largebits_be); + g_test_add_func("/core/bitreader/offset-largebits-le", test_offset_largebits_le); + g_test_add_func("/core/bitreader/ints", test_bitreader_ints); +} diff --git a/src/t_bitwriter.c b/src/t_bitwriter.c new file mode 100644 index 0000000..d38c53c --- /dev/null +++ b/src/t_bitwriter.c @@ -0,0 +1,108 @@ +#include +#include "hammer.h" +#include "internal.h" +#include "test_suite.h" + +typedef struct { + unsigned long long data; + size_t nbits; +} bitwriter_test_elem; // should end with {0,0} + +void run_bitwriter_test(bitwriter_test_elem data[], char flags) { + size_t len; + const uint8_t *buf; + HBitWriter *w = h_bit_writer_new(&system_allocator); + int i; + w->flags = flags; + for (i = 0; data[i].nbits; i++) { + h_bit_writer_put(w, data[i].data, data[i].nbits); + } + + buf = h_bit_writer_get_buffer(w, &len); + HInputStream input = { + .input = buf, + .index = 0, + .length = len, + .bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0, + .endianness = flags, + .overrun = 0 + }; + + for (i = 0; data[i].nbits; i++) { + g_check_cmpulonglong ((unsigned long long)h_read_bits(&input, data[i].nbits, FALSE), ==, data[i].data); + } +} + +static void test_bitwriter_ints(void) { + bitwriter_test_elem data[] = { + { -0x200000000, 64 }, + { 0,0 } + }; + run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); +} + +static void test_bitwriter_be(void) { + bitwriter_test_elem data[] = { + { 0x03, 3 }, + { 0x52, 8 }, + { 0x1A, 5 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); +} + +static void test_bitwriter_le(void) { + bitwriter_test_elem data[] = { + { 0x02, 3 }, + { 0x4D, 8 }, + { 0x0B, 5 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); +} + +static void test_largebits_be(void) { + bitwriter_test_elem data[] = { + { 0x352, 11 }, + { 0x1A, 5 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); +} + +static void test_largebits_le(void) { + bitwriter_test_elem data[] = { + { 0x26A, 11 }, + { 0x0B, 5 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); +} + +static void test_offset_largebits_be(void) { + bitwriter_test_elem data[] = { + { 0xD, 5 }, + { 0x25A, 11 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); +} + +static void test_offset_largebits_le(void) { + bitwriter_test_elem data[] = { + { 0xA, 5 }, + { 0x2D3, 11 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); +} + +void register_bitwriter_tests(void) { + g_test_add_func("/core/bitwriter/be", test_bitwriter_be); + g_test_add_func("/core/bitwriter/le", test_bitwriter_le); + g_test_add_func("/core/bitwriter/largebits-be", test_largebits_be); + g_test_add_func("/core/bitwriter/largebits-le", test_largebits_le); + g_test_add_func("/core/bitwriter/offset-largebits-be", test_offset_largebits_be); + g_test_add_func("/core/bitwriter/offset-largebits-le", test_offset_largebits_le); + g_test_add_func("/core/bitwriter/ints", test_bitwriter_ints); +} diff --git a/src/t_parser.c b/src/t_parser.c new file mode 100644 index 0000000..ff05e71 --- /dev/null +++ b/src/t_parser.c @@ -0,0 +1,404 @@ +#include +#include +#include "hammer.h" +#include "internal.h" +#include "test_suite.h" +#include "parsers/parser_internal.h" + +static void test_token(void) { + const HParser *token_ = h_token((const uint8_t*)"95\xa2", 3); + + g_check_parse_ok(token_, "95\xa2", 3, "<39.35.a2>"); + g_check_parse_failed(token_, "95", 2); +} + +static void test_ch(void) { + const HParser *ch_ = h_ch(0xa2); + + g_check_parse_ok(ch_, "\xa2", 1, "u0xa2"); + g_check_parse_failed(ch_, "\xa3", 1); +} + +static void test_ch_range(void) { + const HParser *range_ = h_ch_range('a', 'c'); + + g_check_parse_ok(range_, "b", 1, "u0x62"); + g_check_parse_failed(range_, "d", 1); +} + +//@MARK_START +static void test_int64(void) { + const HParser *int64_ = h_int64(); + + g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s-0x200000000"); + g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7); +} + +static void test_int32(void) { + const HParser *int32_ = h_int32(); + + g_check_parse_ok(int32_, "\xff\xfe\x00\x00", 4, "s-0x20000"); + g_check_parse_failed(int32_, "\xff\xfe\x00", 3); +} + +static void test_int16(void) { + const HParser *int16_ = h_int16(); + + g_check_parse_ok(int16_, "\xfe\x00", 2, "s-0x200"); + g_check_parse_failed(int16_, "\xfe", 1); +} + +static void test_int8(void) { + const HParser *int8_ = h_int8(); + + g_check_parse_ok(int8_, "\x88", 1, "s-0x78"); + g_check_parse_failed(int8_, "", 0); +} + +static void test_uint64(void) { + const HParser *uint64_ = h_uint64(); + + g_check_parse_ok(uint64_, "\x00\x00\x00\x02\x00\x00\x00\x00", 8, "u0x200000000"); + g_check_parse_failed(uint64_, "\x00\x00\x00\x02\x00\x00\x00", 7); +} + +static void test_uint32(void) { + const HParser *uint32_ = h_uint32(); + + g_check_parse_ok(uint32_, "\x00\x02\x00\x00", 4, "u0x20000"); + g_check_parse_failed(uint32_, "\x00\x02\x00", 3); +} + +static void test_uint16(void) { + const HParser *uint16_ = h_uint16(); + + g_check_parse_ok(uint16_, "\x02\x00", 2, "u0x200"); + g_check_parse_failed(uint16_, "\x02", 1); +} + +static void test_uint8(void) { + const HParser *uint8_ = h_uint8(); + + g_check_parse_ok(uint8_, "\x78", 1, "u0x78"); + g_check_parse_failed(uint8_, "", 0); +} +//@MARK_END + +static void test_int_range(void) { + const HParser *int_range_ = h_int_range(h_uint8(), 3, 10); + + g_check_parse_ok(int_range_, "\x05", 1, "u0x5"); + g_check_parse_failed(int_range_, "\xb", 1); +} + +#if 0 +static void test_float64(void) { + const HParser *float64_ = h_float64(); + + g_check_parse_ok(float64_, "\x3f\xf0\x00\x00\x00\x00\x00\x00", 8, 1.0); + g_check_parse_failed(float64_, "\x3f\xf0\x00\x00\x00\x00\x00", 7); +} + +static void test_float32(void) { + const HParser *float32_ = h_float32(); + + g_check_parse_ok(float32_, "\x3f\x80\x00\x00", 4, 1.0); + g_check_parse_failed(float32_, "\x3f\x80\x00"); +} +#endif + + +static void test_whitespace(void) { + const HParser *whitespace_ = h_whitespace(h_ch('a')); + + g_check_parse_ok(whitespace_, "a", 1, "u0x61"); + g_check_parse_ok(whitespace_, " a", 2, "u0x61"); + g_check_parse_ok(whitespace_, " a", 3, "u0x61"); + g_check_parse_ok(whitespace_, "\ta", 2, "u0x61"); + g_check_parse_failed(whitespace_, "_a", 2); +} + +static void test_left(void) { + const HParser *left_ = h_left(h_ch('a'), h_ch(' ')); + + g_check_parse_ok(left_, "a ", 2, "u0x61"); + g_check_parse_failed(left_, "a", 1); + g_check_parse_failed(left_, " ", 1); + g_check_parse_failed(left_, "ab", 2); +} + +static void test_right(void) { + const HParser *right_ = h_right(h_ch(' '), h_ch('a')); + + g_check_parse_ok(right_, " a", 2, "u0x61"); + g_check_parse_failed(right_, "a", 1); + g_check_parse_failed(right_, " ", 1); + g_check_parse_failed(right_, "ba", 2); +} + +static void test_middle(void) { + const HParser *middle_ = h_middle(h_ch(' '), h_ch('a'), h_ch(' ')); + + g_check_parse_ok(middle_, " a ", 3, "u0x61"); + g_check_parse_failed(middle_, "a", 1); + g_check_parse_failed(middle_, " ", 1); + g_check_parse_failed(middle_, " a", 2); + g_check_parse_failed(middle_, "a ", 2); + g_check_parse_failed(middle_, " b ", 3); + g_check_parse_failed(middle_, "ba ", 3); + g_check_parse_failed(middle_, " ab", 3); +} + +#include + +const HParsedToken* upcase(const HParseResult *p) { + switch(p->ast->token_type) { + case TT_SEQUENCE: + { + HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used); + ret->token_type = TT_SEQUENCE; + for (size_t i=0; iast->seq->used; ++i) { + if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) { + HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1); + tmp->token_type = TT_UINT; + tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint); + h_carray_append(seq, tmp); + } else { + h_carray_append(seq, p->ast->seq->elements[i]); + } + } + ret->seq = seq; + return (const HParsedToken*)ret; + } + case TT_UINT: + { + HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + ret->token_type = TT_UINT; + ret->uint = toupper(p->ast->uint); + return (const HParsedToken*)ret; + } + default: + return p->ast; + } +} + +static void test_action(void) { + const HParser *action_ = h_action(h_sequence(h_choice(h_ch('a'), + h_ch('A'), + NULL), + h_choice(h_ch('b'), + h_ch('B'), + NULL), + NULL), + upcase); + + g_check_parse_ok(action_, "ab", 2, "(u0x41 u0x42)"); + g_check_parse_ok(action_, "AB", 2, "(u0x41 u0x42)"); + g_check_parse_failed(action_, "XX", 2); +} + +static void test_in(void) { + uint8_t options[3] = { 'a', 'b', 'c' }; + const HParser *in_ = h_in(options, 3); + g_check_parse_ok(in_, "b", 1, "u0x62"); + g_check_parse_failed(in_, "d", 1); + +} + +static void test_not_in(void) { + uint8_t options[3] = { 'a', 'b', 'c' }; + const HParser *not_in_ = h_not_in(options, 3); + g_check_parse_ok(not_in_, "d", 1, "u0x64"); + g_check_parse_failed(not_in_, "a", 1); + +} + +static void test_end_p(void) { + const HParser *end_p_ = h_sequence(h_ch('a'), h_end_p(), NULL); + g_check_parse_ok(end_p_, "a", 1, "(u0x61)"); + g_check_parse_failed(end_p_, "aa", 2); +} + +static void test_nothing_p(void) { + const HParser *nothing_p_ = h_nothing_p(); + g_check_parse_failed(nothing_p_, "a", 1); +} + +static void test_sequence(void) { + const HParser *sequence_1 = h_sequence(h_ch('a'), h_ch('b'), NULL); + const HParser *sequence_2 = h_sequence(h_ch('a'), h_whitespace(h_ch('b')), NULL); + + g_check_parse_ok(sequence_1, "ab", 2, "(u0x61 u0x62)"); + g_check_parse_failed(sequence_1, "a", 1); + g_check_parse_failed(sequence_1, "b", 1); + g_check_parse_ok(sequence_2, "ab", 2, "(u0x61 u0x62)"); + g_check_parse_ok(sequence_2, "a b", 3, "(u0x61 u0x62)"); + g_check_parse_ok(sequence_2, "a b", 4, "(u0x61 u0x62)"); +} + +static void test_choice(void) { + const HParser *choice_ = h_choice(h_ch('a'), h_ch('b'), NULL); + + g_check_parse_ok(choice_, "a", 1, "u0x61"); + g_check_parse_ok(choice_, "b", 1, "u0x62"); + g_check_parse_failed(choice_, "c", 1); +} + +static void test_butnot(void) { + const HParser *butnot_1 = h_butnot(h_ch('a'), h_token((const uint8_t*)"ab", 2)); + const HParser *butnot_2 = h_butnot(h_ch_range('0', '9'), h_ch('6')); + + g_check_parse_ok(butnot_1, "a", 1, "u0x61"); + g_check_parse_failed(butnot_1, "ab", 2); + g_check_parse_ok(butnot_1, "aa", 2, "u0x61"); + g_check_parse_failed(butnot_2, "6", 1); +} + +static void test_difference(void) { + const HParser *difference_ = h_difference(h_token((const uint8_t*)"ab", 2), h_ch('a')); + + g_check_parse_ok(difference_, "ab", 2, "<61.62>"); + g_check_parse_failed(difference_, "a", 1); +} + +static void test_xor(void) { + const HParser *xor_ = h_xor(h_ch_range('0', '6'), h_ch_range('5', '9')); + + g_check_parse_ok(xor_, "0", 1, "u0x30"); + g_check_parse_ok(xor_, "9", 1, "u0x39"); + g_check_parse_failed(xor_, "5", 1); + g_check_parse_failed(xor_, "a", 1); +} + +static void test_many(void) { + const HParser *many_ = h_many(h_choice(h_ch('a'), h_ch('b'), NULL)); + g_check_parse_ok(many_, "adef", 4, "(u0x61)"); + g_check_parse_ok(many_, "bdef", 4, "(u0x62)"); + g_check_parse_ok(many_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)"); + g_check_parse_ok(many_, "daabbabadef", 11, "()"); +} + +static void test_many1(void) { + const HParser *many1_ = h_many1(h_choice(h_ch('a'), h_ch('b'), NULL)); + + g_check_parse_ok(many1_, "adef", 4, "(u0x61)"); + g_check_parse_ok(many1_, "bdef", 4, "(u0x62)"); + g_check_parse_ok(many1_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)"); + g_check_parse_failed(many1_, "daabbabadef", 11); +} + +static void test_repeat_n(void) { + const HParser *repeat_n_ = h_repeat_n(h_choice(h_ch('a'), h_ch('b'), NULL), 2); + + g_check_parse_failed(repeat_n_, "adef", 4); + g_check_parse_ok(repeat_n_, "abdef", 5, "(u0x61 u0x62)"); + g_check_parse_failed(repeat_n_, "dabdef", 6); +} + +static void test_optional(void) { + const HParser *optional_ = h_sequence(h_ch('a'), h_optional(h_choice(h_ch('b'), h_ch('c'), NULL)), h_ch('d'), NULL); + + g_check_parse_ok(optional_, "abd", 3, "(u0x61 u0x62 u0x64)"); + g_check_parse_ok(optional_, "acd", 3, "(u0x61 u0x63 u0x64)"); + g_check_parse_ok(optional_, "ad", 2, "(u0x61 null u0x64)"); + g_check_parse_failed(optional_, "aed", 3); + g_check_parse_failed(optional_, "ab", 2); + g_check_parse_failed(optional_, "ac", 2); +} + +static void test_ignore(void) { + const HParser *ignore_ = h_sequence(h_ch('a'), h_ignore(h_ch('b')), h_ch('c'), NULL); + + g_check_parse_ok(ignore_, "abc", 3, "(u0x61 u0x63)"); + g_check_parse_failed(ignore_, "ac", 2); +} + +static void test_sepBy1(void) { + const HParser *sepBy1_ = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(',')); + + g_check_parse_ok(sepBy1_, "1,2,3", 5, "(u0x31 u0x32 u0x33)"); + g_check_parse_ok(sepBy1_, "1,3,2", 5, "(u0x31 u0x33 u0x32)"); + g_check_parse_ok(sepBy1_, "1,3", 3, "(u0x31 u0x33)"); + g_check_parse_ok(sepBy1_, "3", 1, "(u0x33)"); +} + +static void test_epsilon_p(void) { + const HParser *epsilon_p_1 = h_sequence(h_ch('a'), h_epsilon_p(), h_ch('b'), NULL); + const HParser *epsilon_p_2 = h_sequence(h_epsilon_p(), h_ch('a'), NULL); + const HParser *epsilon_p_3 = h_sequence(h_ch('a'), h_epsilon_p(), NULL); + + g_check_parse_ok(epsilon_p_1, "ab", 2, "(u0x61 u0x62)"); + g_check_parse_ok(epsilon_p_2, "a", 1, "(u0x61)"); + g_check_parse_ok(epsilon_p_3, "a", 1, "(u0x61)"); +} + +static void test_attr_bool(void) { + +} + +static void test_and(void) { + const HParser *and_1 = h_sequence(h_and(h_ch('0')), h_ch('0'), NULL); + const HParser *and_2 = h_sequence(h_and(h_ch('0')), h_ch('1'), NULL); + const HParser *and_3 = h_sequence(h_ch('1'), h_and(h_ch('2')), NULL); + + g_check_parse_ok(and_1, "0", 1, "(u0x30)"); + g_check_parse_failed(and_2, "0", 1); + g_check_parse_ok(and_3, "12", 2, "(u0x31)"); +} + +static void test_not(void) { + const HParser *not_1 = h_sequence(h_ch('a'), h_choice(h_ch('+'), h_token((const uint8_t*)"++", 2), NULL), h_ch('b'), NULL); + const HParser *not_2 = h_sequence(h_ch('a'), + h_choice(h_sequence(h_ch('+'), h_not(h_ch('+')), NULL), + h_token((const uint8_t*)"++", 2), + NULL), h_ch('b'), NULL); + + g_check_parse_ok(not_1, "a+b", 3, "(u0x61 u0x2b u0x62)"); + g_check_parse_failed(not_1, "a++b", 4); + g_check_parse_ok(not_2, "a+b", 3, "(u0x61 (u0x2b) u0x62)"); + g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)"); +} + +void register_parser_tests(void) { + g_test_add_func("/core/parser/token", test_token); + g_test_add_func("/core/parser/ch", test_ch); + g_test_add_func("/core/parser/ch_range", test_ch_range); + g_test_add_func("/core/parser/int64", test_int64); + g_test_add_func("/core/parser/int32", test_int32); + g_test_add_func("/core/parser/int16", test_int16); + g_test_add_func("/core/parser/int8", test_int8); + g_test_add_func("/core/parser/uint64", test_uint64); + g_test_add_func("/core/parser/uint32", test_uint32); + g_test_add_func("/core/parser/uint16", test_uint16); + g_test_add_func("/core/parser/uint8", test_uint8); + g_test_add_func("/core/parser/int_range", test_int_range); +#if 0 + g_test_add_func("/core/parser/float64", test_float64); + g_test_add_func("/core/parser/float32", test_float32); +#endif + g_test_add_func("/core/parser/whitespace", test_whitespace); + g_test_add_func("/core/parser/left", test_left); + g_test_add_func("/core/parser/right", test_right); + g_test_add_func("/core/parser/middle", test_middle); + g_test_add_func("/core/parser/action", test_action); + g_test_add_func("/core/parser/in", test_in); + g_test_add_func("/core/parser/not_in", test_not_in); + g_test_add_func("/core/parser/end_p", test_end_p); + g_test_add_func("/core/parser/nothing_p", test_nothing_p); + g_test_add_func("/core/parser/sequence", test_sequence); + g_test_add_func("/core/parser/choice", test_choice); + g_test_add_func("/core/parser/butnot", test_butnot); + g_test_add_func("/core/parser/difference", test_difference); + g_test_add_func("/core/parser/xor", test_xor); + g_test_add_func("/core/parser/many", test_many); + g_test_add_func("/core/parser/many1", test_many1); + g_test_add_func("/core/parser/repeat_n", test_repeat_n); + g_test_add_func("/core/parser/optional", test_optional); + g_test_add_func("/core/parser/sepBy1", test_sepBy1); + g_test_add_func("/core/parser/epsilon_p", test_epsilon_p); + g_test_add_func("/core/parser/attr_bool", test_attr_bool); + g_test_add_func("/core/parser/and", test_and); + g_test_add_func("/core/parser/not", test_not); + g_test_add_func("/core/parser/ignore", test_ignore); +} diff --git a/src/test_suite.c b/src/test_suite.c index 16b3ef7..8d2913a 100644 --- a/src/test_suite.c +++ b/src/test_suite.c @@ -22,6 +22,7 @@ extern void register_bitreader_tests(); extern void register_bitwriter_tests(); extern void register_parser_tests(); +extern void register_benchmark_tests(); int main(int argc, char** argv) { g_test_init(&argc, &argv, NULL); @@ -30,6 +31,7 @@ int main(int argc, char** argv) { register_bitreader_tests(); register_bitwriter_tests(); register_parser_tests(); + register_benchmark_tests(); g_test_run(); } From fe757bde2d502ae38041e8bd6515ca62ef7ad555 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Tue, 13 Nov 2012 22:42:11 -0500 Subject: [PATCH 022/125] Refactoring done. Added glib minimum version to README. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 34d3333..7e8f4d7 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Installing ### Optional Dependencies * doxygen (for `make doc`) * pkg-config (for `make test`) -* glib-2.0 (for `make test`) +* glib-2.0 (>= 2.29) (for `make test`) * glib-2.0-dev (for `make test`) To install, type `make`. To run the built-in test suite, type `make test`. From 49ea7864cc54aff2c520a046502a27769aca21c8 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Tue, 13 Nov 2012 22:42:25 -0500 Subject: [PATCH 023/125] Tweaked docs --- Doxyfile | 1826 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ HACKING | 5 +- 2 files changed, 1829 insertions(+), 2 deletions(-) create mode 100644 Doxyfile diff --git a/Doxyfile b/Doxyfile new file mode 100644 index 0000000..16f15ca --- /dev/null +++ b/Doxyfile @@ -0,0 +1,1826 @@ +# Doxyfile 1.8.1.2 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or sequence of words) that should +# identify the project. Note that if you do not use Doxywizard you need +# to put quotes around the project name if it contains spaces. + +PROJECT_NAME = Hammer + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer +# a quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = "Binary parser combinators in C" + +# With the PROJECT_LOGO tag one can specify an logo or icon that is +# included in the documentation. The maximum height of the logo should not +# exceed 55 pixels and the maximum width should not exceed 200 pixels. +# Doxygen will copy the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = /home/thequux/Projects/hammer/docs + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful if your file system +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding +# "class=itcl::class" will allow you to use the command class in the +# itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all +# comments according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you +# can mix doxygen, HTML, and XML commands with Markdown formatting. +# Disable only in case of backward compatibilities issues. + +MARKDOWN_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also makes the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and +# unions are shown inside the group in which they are included (e.g. using +# @ingroup) instead of on a separate page (for HTML and Man pages) or +# section (for LaTeX and RTF). + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and +# unions with only public data fields will be shown inline in the documentation +# of the scope in which they are defined (i.e. file, namespace, or group +# documentation), provided this scope is documented. If set to NO (the default), +# structs, classes, and unions are shown on a separate page (for HTML and Man +# pages) or section (for LaTeX and RTF). + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penalty. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will roughly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols. + +SYMBOL_CACHE_SIZE = 0 + +# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be +# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given +# their name and scope. Since this can be an expensive process and often the +# same symbol appear multiple times in the code, doxygen keeps a cache of +# pre-resolved symbols. If the cache is too small doxygen will become slower. +# If the cache is too large, memory is wasted. The cache size is given by this +# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal +# scope will be included in the documentation. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespaces are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = YES + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to +# do proper type resolution of all parameters of a function it will reject a +# match between the prototype and the implementation of a member function even +# if there is only one candidate or it is obvious which candidate to choose +# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen +# will still accept a match between prototype and implementation in such cases. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or macro consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and macros in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files +# containing the references data. This must be a list of .bib files. The +# .bib extension is automatically appended if omitted. Using this command +# requires the bibtex tool to be installed. See also +# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style +# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this +# feature you need bibtex and perl available in the search path. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# The WARN_NO_PARAMDOC option can be enabled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = /home/thequux/Projects/hammer/src + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh +# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py +# *.f90 *.f *.for *.vhd *.vhdl + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.d \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.idl \ + *.odl \ + *.cs \ + *.php \ + *.php3 \ + *.inc \ + *.m \ + *.markdown \ + *.md \ + *.mm \ + *.dox \ + *.py \ + *.f90 \ + *.f \ + *.for \ + *.vhd \ + *.vhdl + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty or if +# non of the patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) +# and it is also possible to disable source filtering for a specific pattern +# using *.ext= (so without naming a filter). This option only has effect when +# FILTER_SOURCE_FILES is enabled. + +FILTER_SOURCE_PATTERNS = + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C, C++ and Fortran comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. Note that when using a custom header you are responsible +# for the proper inclusion of any scripts and style sheets that doxygen +# needs, which is dependent on the configuration options used. +# It is advised to generate a default header using "doxygen -w html +# header.html footer.html stylesheet.css YourConfigFile" and then modify +# that header. Note that the header is subject to change so you typically +# have to redo this when upgrading to a newer version of doxygen or when +# changing the value of configuration settings such as GENERATE_TREEVIEW! + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# style sheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that +# the files will be copied as-is; there are no commands or markers available. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the style sheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of +# entries shown in the various tree structured indices initially; the user +# can expand and collapse entries dynamically later on. Doxygen will expand +# the tree to such a level that at most the specified number of entries are +# visible (unless a fully collapsed tree already exceeds this amount). +# So setting the number of entries 1 will produce a full collapsed tree by +# default. 0 is a special value representing an infinite number of entries +# and will result in a full expanded tree by default. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) +# at top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. Since the tabs have the same information as the +# navigation tree you can set this option to NO if you already set +# GENERATE_TREEVIEW to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. +# Since the tree basically has the same information as the tab index you +# could consider to set DISABLE_INDEX to NO when enabling this option. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values +# (range [0,1..20]) that doxygen will group on one line in the generated HTML +# documentation. Note that a value of 0 will completely suppress the enum +# values from appearing in the overview section. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax +# (see http://www.mathjax.org) which uses client side Javascript for the +# rendering instead of using prerendered bitmaps. Use this if you do not +# have LaTeX installed or if you want to formulas look prettier in the HTML +# output. When enabled you may also need to install MathJax separately and +# configure the path to it using the MATHJAX_RELPATH option. + +USE_MATHJAX = YES + +# When MathJax is enabled you need to specify the location relative to the +# HTML output directory using the MATHJAX_RELPATH option. The destination +# directory should contain the MathJax.js script. For instance, if the mathjax +# directory is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to +# the MathJax Content Delivery Network so you can quickly see the result without +# installing MathJax. However, it is strongly recommended to install a local +# copy of MathJax from http://www.mathjax.org before deployment. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension +# names that should be enabled during MathJax rendering. + +MATHJAX_EXTENSIONS = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvantages are that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for +# the generated latex document. The footer should contain everything after +# the last chapter. If it is left blank doxygen will generate a +# standard footer. Notice: only use this tag if you know what you are doing! + +LATEX_FOOTER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +# The LATEX_BIB_STYLE tag can be used to specify the style to use for the +# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See +# http://en.wikipedia.org/wiki/BibTeX for more info. + +LATEX_BIB_STYLE = plain + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load style sheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = YES + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# pointed to by INCLUDE_PATH will be searched when a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition that +# overrules the definition found in the source code. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all references to function-like macros +# that are alone on a line, have an all uppercase name, and do not end with a +# semicolon, because these will confuse the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. For each +# tag file the location of the external documentation should be added. The +# format of a tag file without this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths +# or URLs. Note that each tag file must have a unique name (where the name does +# NOT include the path). If a tag file is not located in the directory in which +# doxygen is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option also works with HAVE_DOT disabled, but it is recommended to +# install and use dot, since it yields more powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will use the Helvetica font for all dot files that +# doxygen generates. When you want a differently looking font you can specify +# the font name using DOT_FONTNAME. You need to make sure dot is able to find +# the font, which can be done by putting it in a standard location or by setting +# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the +# directory containing the font. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the Helvetica font. +# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to +# set the path where dot can find it. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If the UML_LOOK tag is enabled, the fields and methods are shown inside +# the class node. If there are many fields or methods and many nodes the +# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS +# threshold limits the number of items for each type to make the size more +# managable. Set this to 0 for no limit. Note that the threshold may be +# exceeded by 50% before the limit is enforced. + +UML_LIMIT_NUM_FIELDS = 10 + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will generate a graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are svg, png, jpg, or gif. +# If left blank png will be used. If you choose svg you need to set +# HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible in IE 9+ (other browsers do not have this requirement). + +DOT_IMAGE_FORMAT = png + +# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to +# enable generation of interactive SVG images that allow zooming and panning. +# Note that this requires a modern browser other than Internet Explorer. +# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you +# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible. Older versions of IE do not have SVG support. + +INTERACTIVE_SVG = NO + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the +# \mscfile command). + +MSCFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/HACKING b/HACKING index acee9c2..7bffb4c 100644 --- a/HACKING +++ b/HACKING @@ -30,7 +30,7 @@ variants: the function name is followed by two underscores and a set of single-character "flags" indicating what optional features that particular variant has (in alphabetical order, of course): - __a: takes variadic arguments as a void*[] + __a: takes variadic arguments as a void*[] (not implemented yet, but will be soon. __m: takes a memory manager as the first argument, to override the system memory manager. __v: Takes the variadic argument list as a va_list @@ -48,4 +48,5 @@ documented), assume that returns a value r, any function that uses r as a parameter must also be told to use m as a memory manager. -In other words, don't let the (memory manager) streams cross. \ No newline at end of file +In other words, don't let the (memory manager) streams cross. + From 1e35c3c63aeeccae392ead7bbb905640686045d5 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Tue, 13 Nov 2012 22:51:31 -0500 Subject: [PATCH 024/125] Fixed build error on OSX... I think. --- src/hammer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hammer.c b/src/hammer.c index 14db6a8..2333a79 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include From 4e6cfe93750bf8698fcc6e03082628c4365216b6 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Tue, 13 Nov 2012 22:54:10 -0500 Subject: [PATCH 025/125] doc isn't actually a target yet --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 7e8f4d7..d88a153 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,6 @@ Installing * make ### Optional Dependencies -* doxygen (for `make doc`) * pkg-config (for `make test`) * glib-2.0 (>= 2.29) (for `make test`) * glib-2.0-dev (for `make test`) From fa6497b9f0cecc725d763d6591dbab49598a92a4 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Wed, 14 Nov 2012 00:02:24 -0500 Subject: [PATCH 026/125] Refactored packrat-parsing code into packrat.c --- src/backends/packrat.c | 192 ++++++++++++++++++++++++++++++++++++++++- src/hammer.c | 188 ---------------------------------------- 2 files changed, 191 insertions(+), 189 deletions(-) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index e300d1f..d05129d 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -1,4 +1,194 @@ +#include #include "../internal.h" +#include "../parsers/parser_internal.h" + +// short-hand for constructing HCachedResult's +static HCachedResult *cached_result(const HParseState *state, HParseResult *result) { + HCachedResult *ret = a_new(HCachedResult, 1); + ret->result = result; + ret->input_stream = state->input_stream; + return ret; +} + +// Really library-internal tool to perform an uncached parse, and handle any common error-handling. +static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) { + // TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when. + HParseResult *tmp_res; + if (parser) { + HInputStream bak = state->input_stream; + tmp_res = parser->vtable->parse(parser->env, state); + if (tmp_res) { + tmp_res->arena = state->arena; + if (!state->input_stream.overrun) { + tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3); + if (state->input_stream.endianness & BIT_BIG_ENDIAN) + tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset; + else + tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset; + } else + tmp_res->bit_length = 0; + } + } else + tmp_res = NULL; + if (state->input_stream.overrun) + return NULL; // overrun is always failure. +#ifdef CONSISTENCY_CHECK + if (!tmp_res) { + state->input_stream = INVALID; + state->input_stream.input = key->input_pos.input; + } +#endif + return tmp_res; +} + +HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { + HParserCacheValue *cached = h_hashtable_get(state->cache, k); + HRecursionHead *head = h_hashtable_get(state->recursion_heads, k); + if (!head) { // No heads found + return cached; + } else { // Some heads found + if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) { + // Nothing in the cache, and the key parser is not involved + HParseResult *tmp = a_new(HParseResult, 1); + tmp->ast = NULL; tmp->arena = state->arena; + HParserCacheValue *ret = a_new(HParserCacheValue, 1); + ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp); + return ret; + } + if (h_slist_find(head->eval_set, k->parser)) { + // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. + head->eval_set = h_slist_remove_all(head->eval_set, k->parser); + HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); + // we know that cached has an entry here, modify it + if (!cached) + cached = a_new(HParserCacheValue, 1); + cached->value_type = PC_RIGHT; + cached->right = cached_result(state, tmp_res); + } + return cached; + } +} + +/* Setting up the left recursion. We have the LR for the rule head; + * we modify the involved_sets of all LRs in the stack, until we + * see the current parser again. + */ + +void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { + if (!rec_detect->head) { + HRecursionHead *some = a_new(HRecursionHead, 1); + some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL; + rec_detect->head = some; + } + assert(state->lr_stack->head != NULL); + HLeftRec *lr = state->lr_stack->head->elem; + while (lr && lr->rule != p) { + lr->head = rec_detect->head; + h_slist_push(lr->head->involved_set, (void*)lr->rule); + } +} + +/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the + * future parse. + */ + +HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) { + // Store the head into the recursion_heads + h_hashtable_put(state->recursion_heads, k, head); + HParserCacheValue *old_cached = h_hashtable_get(state->cache, k); + if (!old_cached || PC_LEFT == old_cached->value_type) + errx(1, "impossible match"); + HParseResult *old_res = old_cached->right->result; + + // reset the eval_set of the head of the recursion at each beginning of growth + head->eval_set = head->involved_set; + HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); + + if (tmp_res) { + if ((old_res->ast->index < tmp_res->ast->index) || + (old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) { + HParserCacheValue *v = a_new(HParserCacheValue, 1); + v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res); + h_hashtable_put(state->cache, k, v); + return grow(k, state, head); + } else { + // we're done with growing, we can remove data from the recursion head + h_hashtable_del(state->recursion_heads, k); + HParserCacheValue *cached = h_hashtable_get(state->cache, k); + if (cached && PC_RIGHT == cached->value_type) { + return cached->right->result; + } else { + errx(1, "impossible match"); + } + } + } else { + h_hashtable_del(state->recursion_heads, k); + return old_res; + } +} + +HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growable) { + if (growable->head) { + if (growable->head->head_parser != k->parser) { + // not the head rule, so not growing + return growable->seed; + } + else { + // update cache + HParserCacheValue *v = a_new(HParserCacheValue, 1); + v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed); + h_hashtable_put(state->cache, k, v); + if (!growable->seed) + return NULL; + else + return grow(k, state, growable->head); + } + } else { + errx(1, "lrAnswer with no head"); + } +} + +/* Warth's recursion. Hi Alessandro! */ +HParseResult* h_do_parse(const HParser* parser, HParseState *state) { + HParserCacheKey *key = a_new(HParserCacheKey, 1); + key->input_pos = state->input_stream; key->parser = parser; + HParserCacheValue *m = recall(key, state); + // check to see if there is already a result for this object... + if (!m) { + // It doesn't exist, so create a dummy result to cache + HLeftRec *base = a_new(HLeftRec, 1); + base->seed = NULL; base->rule = parser; base->head = NULL; + h_slist_push(state->lr_stack, base); + // cache it + HParserCacheValue *dummy = a_new(HParserCacheValue, 1); + dummy->value_type = PC_LEFT; dummy->left = base; + h_hashtable_put(state->cache, key, dummy); + // parse the input + HParseResult *tmp_res = perform_lowlevel_parse(state, parser); + // the base variable has passed equality tests with the cache + h_slist_pop(state->lr_stack); + // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one + if (NULL == base->head) { + HParserCacheValue *right = a_new(HParserCacheValue, 1); + right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res); + h_hashtable_put(state->cache, key, right); + return tmp_res; + } else { + base->seed = tmp_res; + HParseResult *res = lr_answer(key, state, base); + return res; + } + } else { + // it exists! + if (PC_LEFT == m->value_type) { + setupLR(parser, state, m->left); + return m->left->seed; // BUG: this might not be correct + } else { + state->input_stream = m->right->input_stream; + return m->right->result; + } + } +} int h_packrat_compile(HAllocator* mm__, const HParser* parser, const void* params) { return 0; // No compilation necessary, and everything should work @@ -6,7 +196,7 @@ int h_packrat_compile(HAllocator* mm__, const HParser* parser, const void* param } HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) { - return NULL; // TODO: fill this in. + return h_do_parse(parser, parse_state); } HParserBackendVTable h__packrat_backend_vtable = { diff --git a/src/hammer.c b/src/hammer.c index 2333a79..c33f6c8 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -34,194 +34,6 @@ static uint32_t djbhash(const uint8_t *buf, size_t len) { return hash; } -// short-hand for constructing HCachedResult's -static HCachedResult *cached_result(const HParseState *state, HParseResult *result) { - HCachedResult *ret = a_new(HCachedResult, 1); - ret->result = result; - ret->input_stream = state->input_stream; - return ret; -} - -// Really library-internal tool to perform an uncached parse, and handle any common error-handling. -static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) { - // TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when. - HParseResult *tmp_res; - if (parser) { - HInputStream bak = state->input_stream; - tmp_res = parser->vtable->parse(parser->env, state); - if (tmp_res) { - tmp_res->arena = state->arena; - if (!state->input_stream.overrun) { - tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3); - if (state->input_stream.endianness & BIT_BIG_ENDIAN) - tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset; - else - tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset; - } else - tmp_res->bit_length = 0; - } - } else - tmp_res = NULL; - if (state->input_stream.overrun) - return NULL; // overrun is always failure. -#ifdef CONSISTENCY_CHECK - if (!tmp_res) { - state->input_stream = INVALID; - state->input_stream.input = key->input_pos.input; - } -#endif - return tmp_res; -} - -HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { - HParserCacheValue *cached = h_hashtable_get(state->cache, k); - HRecursionHead *head = h_hashtable_get(state->recursion_heads, k); - if (!head) { // No heads found - return cached; - } else { // Some heads found - if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) { - // Nothing in the cache, and the key parser is not involved - HParseResult *tmp = a_new(HParseResult, 1); - tmp->ast = NULL; tmp->arena = state->arena; - HParserCacheValue *ret = a_new(HParserCacheValue, 1); - ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp); - return ret; - } - if (h_slist_find(head->eval_set, k->parser)) { - // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. - head->eval_set = h_slist_remove_all(head->eval_set, k->parser); - HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); - // we know that cached has an entry here, modify it - if (!cached) - cached = a_new(HParserCacheValue, 1); - cached->value_type = PC_RIGHT; - cached->right = cached_result(state, tmp_res); - } - return cached; - } -} - -/* Setting up the left recursion. We have the LR for the rule head; - * we modify the involved_sets of all LRs in the stack, until we - * see the current parser again. - */ - -void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { - if (!rec_detect->head) { - HRecursionHead *some = a_new(HRecursionHead, 1); - some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL; - rec_detect->head = some; - } - assert(state->lr_stack->head != NULL); - HLeftRec *lr = state->lr_stack->head->elem; - while (lr && lr->rule != p) { - lr->head = rec_detect->head; - h_slist_push(lr->head->involved_set, (void*)lr->rule); - } -} - -/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the - * future parse. - */ - -HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) { - // Store the head into the recursion_heads - h_hashtable_put(state->recursion_heads, k, head); - HParserCacheValue *old_cached = h_hashtable_get(state->cache, k); - if (!old_cached || PC_LEFT == old_cached->value_type) - errx(1, "impossible match"); - HParseResult *old_res = old_cached->right->result; - - // reset the eval_set of the head of the recursion at each beginning of growth - head->eval_set = head->involved_set; - HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); - - if (tmp_res) { - if ((old_res->ast->index < tmp_res->ast->index) || - (old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) { - HParserCacheValue *v = a_new(HParserCacheValue, 1); - v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res); - h_hashtable_put(state->cache, k, v); - return grow(k, state, head); - } else { - // we're done with growing, we can remove data from the recursion head - h_hashtable_del(state->recursion_heads, k); - HParserCacheValue *cached = h_hashtable_get(state->cache, k); - if (cached && PC_RIGHT == cached->value_type) { - return cached->right->result; - } else { - errx(1, "impossible match"); - } - } - } else { - h_hashtable_del(state->recursion_heads, k); - return old_res; - } -} - -HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growable) { - if (growable->head) { - if (growable->head->head_parser != k->parser) { - // not the head rule, so not growing - return growable->seed; - } - else { - // update cache - HParserCacheValue *v = a_new(HParserCacheValue, 1); - v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed); - h_hashtable_put(state->cache, k, v); - if (!growable->seed) - return NULL; - else - return grow(k, state, growable->head); - } - } else { - errx(1, "lrAnswer with no head"); - } -} - -/* Warth's recursion. Hi Alessandro! */ -HParseResult* h_do_parse(const HParser* parser, HParseState *state) { - HParserCacheKey *key = a_new(HParserCacheKey, 1); - key->input_pos = state->input_stream; key->parser = parser; - HParserCacheValue *m = recall(key, state); - // check to see if there is already a result for this object... - if (!m) { - // It doesn't exist, so create a dummy result to cache - HLeftRec *base = a_new(HLeftRec, 1); - base->seed = NULL; base->rule = parser; base->head = NULL; - h_slist_push(state->lr_stack, base); - // cache it - HParserCacheValue *dummy = a_new(HParserCacheValue, 1); - dummy->value_type = PC_LEFT; dummy->left = base; - h_hashtable_put(state->cache, key, dummy); - // parse the input - HParseResult *tmp_res = perform_lowlevel_parse(state, parser); - // the base variable has passed equality tests with the cache - h_slist_pop(state->lr_stack); - // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one - if (NULL == base->head) { - HParserCacheValue *right = a_new(HParserCacheValue, 1); - right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res); - h_hashtable_put(state->cache, key, right); - return tmp_res; - } else { - base->seed = tmp_res; - HParseResult *res = lr_answer(key, state, base); - return res; - } - } else { - // it exists! - if (PC_LEFT == m->value_type) { - setupLR(parser, state, m->left); - return m->left->seed; // BUG: this might not be correct - } else { - state->input_stream = m->right->input_stream; - return m->right->result; - } - } -} - /* Helper function, since these lines appear in every parser */ typedef struct { From 8163b3981d2767d15ef136756d82bca1cbd0336c Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Wed, 14 Nov 2012 14:05:25 -0500 Subject: [PATCH 027/125] Changed other instances of malloc.h to stdlib.h for OSX compat --- src/datastructures.c | 2 +- src/internal.h | 2 +- src/pprint.c | 2 +- src/system_allocator.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/datastructures.c b/src/datastructures.c index e94532e..b1e4f75 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -2,7 +2,7 @@ #include "hammer.h" #include "allocator.h" #include -#include +#include #include // {{{ counted arrays diff --git a/src/internal.h b/src/internal.h index 269bc4f..67ecb22 100644 --- a/src/internal.h +++ b/src/internal.h @@ -223,7 +223,7 @@ void h_hashtable_del(HHashTable* ht, void* key); void h_hashtable_free(HHashTable* ht); #if 0 -#include +#include #define h_arena_malloc(a, s) malloc(s) #endif diff --git a/src/pprint.c b/src/pprint.c index 3a8df82..1ffe764 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -20,7 +20,7 @@ #include #include "hammer.h" #include "internal.h" -#include +#include typedef struct pp_state { int delta; diff --git a/src/system_allocator.c b/src/system_allocator.c index 26e2273..7248fd2 100644 --- a/src/system_allocator.c +++ b/src/system_allocator.c @@ -1,4 +1,4 @@ -#include +#include #include "internal.h" static void* system_alloc(HAllocator *allocator, size_t size) { From 4d0ec07c1a9b34f5e11df598877c36fca1516164 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Mon, 7 Jan 2013 23:44:26 +0100 Subject: [PATCH 028/125] make TT_USER > TT_ERR, remove TT_MAX --- src/Makefile | 1 + src/hammer.h | 3 +-- src/t_misc.c | 16 ++++++++++++++++ src/test_suite.c | 2 ++ 4 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 src/t_misc.c diff --git a/src/Makefile b/src/Makefile index 128de05..47e136d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -45,6 +45,7 @@ TESTS := t_benchmark.o \ t_bitreader.o \ t_bitwriter.o \ t_parser.o \ + t_misc.o \ test_suite.o OUTPUTS := libhammer.a \ diff --git a/src/hammer.h b/src/hammer.h index 0791769..5a0c625 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -43,9 +43,8 @@ typedef enum HTokenType_ { TT_SINT, TT_UINT, TT_SEQUENCE, - TT_USER = 64, TT_ERR, - TT_MAX + TT_USER = 64 } HTokenType; typedef struct HCountedArray_ { diff --git a/src/t_misc.c b/src/t_misc.c new file mode 100644 index 0000000..5c08a2e --- /dev/null +++ b/src/t_misc.c @@ -0,0 +1,16 @@ +#include +#include "test_suite.h" +#include "hammer.h" + +static void test_tt_user(void) { + g_check_cmpint(TT_USER, >, TT_NONE); + g_check_cmpint(TT_USER, >, TT_BYTES); + g_check_cmpint(TT_USER, >, TT_SINT); + g_check_cmpint(TT_USER, >, TT_UINT); + g_check_cmpint(TT_USER, >, TT_SEQUENCE); + g_check_cmpint(TT_USER, >, TT_ERR); +} + +void register_misc_tests(void) { + g_test_add_func("/core/misc/tt_user", test_tt_user); +} diff --git a/src/test_suite.c b/src/test_suite.c index 8d2913a..e01d020 100644 --- a/src/test_suite.c +++ b/src/test_suite.c @@ -22,6 +22,7 @@ extern void register_bitreader_tests(); extern void register_bitwriter_tests(); extern void register_parser_tests(); +extern void register_misc_tests(); extern void register_benchmark_tests(); int main(int argc, char** argv) { @@ -31,6 +32,7 @@ int main(int argc, char** argv) { register_bitreader_tests(); register_bitwriter_tests(); register_parser_tests(); + register_misc_tests(); register_benchmark_tests(); g_test_run(); From bc486aa8401a0e5b23c0c3cd1c253ca6a3480e3b Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Tue, 8 Jan 2013 00:19:23 +0100 Subject: [PATCH 029/125] fix base64 example to parse more than the 2- and 1-byte special cases --- examples/base64.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/base64.c b/examples/base64.c index beb2484..8ebe77b 100644 --- a/examples/base64.c +++ b/examples/base64.c @@ -19,9 +19,13 @@ void init_parser(void) h_ch('Y'), h_ch('c'), h_ch('g'), h_ch('k'), h_ch('o'), h_ch('s'), h_ch('w'), h_ch('0'), h_ch('4'), h_ch('8'), NULL); const HParser *bsfdig_2bit = h_choice(h_ch('A'), h_ch('Q'), h_ch('g'), h_ch('w'), NULL); + const HParser *base64_3 = h_repeat_n(bsfdig, 4); const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); - const HParser *base64 = h_choice(base64_2, base64_1, NULL); + const HParser *base64 = h_sequence(h_many(base64_3), + h_optional(h_choice(base64_2, + base64_1, NULL)), + NULL); // why does this parse "A=="?! // why does this parse "aaA=" but not "aA=="?! From 87b5e668c327d55781b8960df59a0c5a177b56ee Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Tue, 8 Jan 2013 00:20:34 +0100 Subject: [PATCH 030/125] remove comments about bugs that no longer exist --- examples/base64.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/base64.c b/examples/base64.c index 8ebe77b..7692532 100644 --- a/examples/base64.c +++ b/examples/base64.c @@ -26,8 +26,6 @@ void init_parser(void) h_optional(h_choice(base64_2, base64_1, NULL)), NULL); - // why does this parse "A=="?! - // why does this parse "aaA=" but not "aA=="?! document = base64; } From 177281289c45c8021fd256190f1be009feabd2b9 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Tue, 8 Jan 2013 00:24:13 +0100 Subject: [PATCH 031/125] use h_in for restricted base64 digit cases --- examples/base64.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/examples/base64.c b/examples/base64.c index 7692532..6c4db9e 100644 --- a/examples/base64.c +++ b/examples/base64.c @@ -14,11 +14,8 @@ void init_parser(void) const HParser *equals = h_ch('='); const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); - const HParser *bsfdig_4bit = h_choice( - h_ch('A'), h_ch('E'), h_ch('I'), h_ch('M'), h_ch('Q'), h_ch('U'), - h_ch('Y'), h_ch('c'), h_ch('g'), h_ch('k'), h_ch('o'), h_ch('s'), - h_ch('w'), h_ch('0'), h_ch('4'), h_ch('8'), NULL); - const HParser *bsfdig_2bit = h_choice(h_ch('A'), h_ch('Q'), h_ch('g'), h_ch('w'), NULL); + const HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16); + const HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4); const HParser *base64_3 = h_repeat_n(bsfdig, 4); const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); From 2bc03e4e9ffa8f6f2ee7f34cf4d993bde29c9074 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 9 Jan 2013 14:48:02 +0100 Subject: [PATCH 032/125] allow h_whitespace(p) to succeed on end of input if p succeeds --- src/parsers/whitespace.c | 2 +- src/t_parser.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c index 8e009a4..45c7dcb 100644 --- a/src/parsers/whitespace.c +++ b/src/parsers/whitespace.c @@ -8,7 +8,7 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) { bak = state->input_stream; c = h_read_bits(&state->input_stream, 8, false); if (state->input_stream.overrun) - return NULL; + break; } while (isspace(c)); state->input_stream = bak; return h_do_parse((HParser*)env, state); diff --git a/src/t_parser.c b/src/t_parser.c index ff05e71..b1f9b63 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -110,12 +110,17 @@ static void test_float32(void) { static void test_whitespace(void) { const HParser *whitespace_ = h_whitespace(h_ch('a')); + const HParser *whitespace_end = h_whitespace(h_end_p()); g_check_parse_ok(whitespace_, "a", 1, "u0x61"); g_check_parse_ok(whitespace_, " a", 2, "u0x61"); g_check_parse_ok(whitespace_, " a", 3, "u0x61"); g_check_parse_ok(whitespace_, "\ta", 2, "u0x61"); g_check_parse_failed(whitespace_, "_a", 2); + + g_check_parse_ok(whitespace_end, "", 0, "NULL"); + g_check_parse_ok(whitespace_end, " ", 2, "NULL"); + g_check_parse_failed(whitespace_end, " x", 3); } static void test_left(void) { From 0497cdb08b034dc30255ebd41316d0d5132df2d6 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 00:08:07 +0100 Subject: [PATCH 033/125] add trivial left-recursion test case --- src/t_parser.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/t_parser.c b/src/t_parser.c index b1f9b63..6bbc409 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -365,6 +365,17 @@ static void test_not(void) { g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)"); } +static void test_leftrec(void) { + const HParser *a_ = h_ch('a'); + + HParser *lr_ = h_indirect(); + h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL)); + + g_check_parse_ok(lr_, "a", 1, "(u0x61)"); + g_check_parse_ok(lr_, "aa", 2, "(u0x61 u0x61)"); + g_check_parse_ok(lr_, "aaa", 3, "((u0x61 u0x61) u0x61)"); +} + void register_parser_tests(void) { g_test_add_func("/core/parser/token", test_token); g_test_add_func("/core/parser/ch", test_ch); @@ -406,4 +417,5 @@ void register_parser_tests(void) { g_test_add_func("/core/parser/and", test_and); g_test_add_func("/core/parser/not", test_not); g_test_add_func("/core/parser/ignore", test_ignore); + g_test_add_func("/core/parser/leftrec", test_leftrec); } From ce88a3f49d82a2c2e03e7061baa43eea33dea617 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 00:39:15 +0100 Subject: [PATCH 034/125] initialize involved_set to an empty HList, not NULL --- src/backends/packrat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index d05129d..25a0966 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -77,7 +77,9 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { if (!rec_detect->head) { HRecursionHead *some = a_new(HRecursionHead, 1); - some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL; + some->head_parser = p; + some->involved_set = h_slist_new(state->arena); + some->eval_set = NULL; rec_detect->head = some; } assert(state->lr_stack->head != NULL); @@ -101,7 +103,7 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) HParseResult *old_res = old_cached->right->result; // reset the eval_set of the head of the recursion at each beginning of growth - head->eval_set = head->involved_set; + head->eval_set = head->involved_set; // BUG: this must be a copy HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); if (tmp_res) { From dffe47bf5775f35bc13cdd9bea856ac3e9cbdfbb Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 00:40:19 +0100 Subject: [PATCH 035/125] initialize eval_set to a proper copy of involved_set --- src/backends/packrat.c | 2 +- src/datastructures.c | 20 ++++++++++++++++++++ src/internal.h | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 25a0966..c67c3e4 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -103,7 +103,7 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) HParseResult *old_res = old_cached->right->result; // reset the eval_set of the head of the recursion at each beginning of growth - head->eval_set = head->involved_set; // BUG: this must be a copy + head->eval_set = h_slist_copy(head->involved_set); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); if (tmp_res) { diff --git a/src/datastructures.c b/src/datastructures.c index b1e4f75..3d94804 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -41,6 +41,26 @@ HSlist* h_slist_new(HArena *arena) { return ret; } +HSlist* h_slist_copy(HSlist *slist) { + HSlist *ret = h_slist_new(slist->arena); + HSlistNode *head = slist->head; + HSlistNode *tail; + if (head != NULL) { + h_slist_push(ret, head->elem); + tail = ret->head; + head = head->next; + } + while (head != NULL) { + // append head item to tail in a new node + HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode)); + node->elem = head->elem; + node->next = NULL; + tail = tail->next = node; + head = head->next; + } + return ret; +} + void* h_slist_pop(HSlist *slist) { HSlistNode *head = slist->head; if (!head) diff --git a/src/internal.h b/src/internal.h index 67ecb22..0dcf857 100644 --- a/src/internal.h +++ b/src/internal.h @@ -209,6 +209,7 @@ HCountedArray *h_carray_new(HArena * arena); void h_carray_append(HCountedArray *array, void* item); HSlist* h_slist_new(HArena *arena); +HSlist* h_slist_copy(HSlist *slist); void* h_slist_pop(HSlist *slist); void h_slist_push(HSlist *slist, void* item); bool h_slist_find(HSlist *slist, const void* item); From 63dbf83b4de91ae22c6fe5c0b08b1831bc7c86fd Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 01:04:04 +0100 Subject: [PATCH 036/125] fix a loop in setupLR --- src/backends/packrat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index c67c3e4..cc2a9db 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -83,10 +83,12 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { rec_detect->head = some; } assert(state->lr_stack->head != NULL); - HLeftRec *lr = state->lr_stack->head->elem; - while (lr && lr->rule != p) { + HSlistNode *head = state->lr_stack->head; + HLeftRec *lr; + while (head && (lr = head->elem)->rule != p) { lr->head = rec_detect->head; h_slist_push(lr->head->involved_set, (void*)lr->rule); + head = head->next; } } From 4e3084739e301a1412f59a1991d64b502e159c17 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 01:09:04 +0100 Subject: [PATCH 037/125] fix expected value of first leftrec test case --- src/t_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_parser.c b/src/t_parser.c index 6bbc409..daca1a3 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -371,7 +371,7 @@ static void test_leftrec(void) { HParser *lr_ = h_indirect(); h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL)); - g_check_parse_ok(lr_, "a", 1, "(u0x61)"); + g_check_parse_ok(lr_, "a", 1, "u0x61"); g_check_parse_ok(lr_, "aa", 2, "(u0x61 u0x61)"); g_check_parse_ok(lr_, "aaa", 3, "((u0x61 u0x61) u0x61)"); } From 445913610da0734630b0affd512e1d6024b27284 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 01:25:37 +0100 Subject: [PATCH 038/125] parse whole input and allow surrounding whitespace in base64 example --- examples/base64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/base64.c b/examples/base64.c index 6c4db9e..cdb088e 100644 --- a/examples/base64.c +++ b/examples/base64.c @@ -24,7 +24,7 @@ void init_parser(void) base64_1, NULL)), NULL); - document = base64; + document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL); } From 2af0f9c47cdff7cdbc86fd4f22974974e08d4111 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 22:33:58 +0100 Subject: [PATCH 039/125] start a variant of the base64 example with semantic actions --- examples/Makefile | 12 ++-- examples/base64_sem1.c | 142 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 5 deletions(-) create mode 100644 examples/base64_sem1.c diff --git a/examples/Makefile b/examples/Makefile index 6a054ca..a3be0ce 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -2,7 +2,9 @@ OUTPUTS := dns.o \ dns \ base64.o \ - base64 + base64 \ + base64_sem1.o \ + base64_sem1 TOPLEVEL := ../ @@ -12,7 +14,7 @@ LDFLAGS += $(pkg-config --libs glib-2.0) -all: dns base64 +all: dns base64 base64_sem1 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) dns: dns.o rr.o dns_common.o @@ -24,8 +26,8 @@ rr.o: ../src/hammer.h rr.h dns_common.h dns_common.o: ../src/hammer.h dns_common.h -base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS) -base64: base64.o +base64%: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64%: base64%.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -base64.o: ../src/hammer.h +base64%.o: ../src/hammer.h diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c new file mode 100644 index 0000000..83efc64 --- /dev/null +++ b/examples/base64_sem1.c @@ -0,0 +1,142 @@ +#include "../src/hammer.h" +#include + + +#define H_RULE(rule, def) const HParser *rule = def +#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) + + +/// +// Semantic action helpers. +// These might be candidates for inclusion in the library. +/// + +// The action equivalent of h_ignore. +const HParsedToken *act_ignore(const HParseResult *p) +{ + return NULL; +} + +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +const HParsedToken *act_index0(const HParseResult *p) +{ + return act_index(0, p); +} + + +/// +// Semantic actions for the grammar below, each corresponds to an "ARULE". +// They must be named act_. +/// + +const HParsedToken *act_bsfdig(const HParseResult *p) +{ + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + + assert(p->ast->token_type == TT_UINT); + uint8_t c = p->ast->uint; + + res->token_type = TT_UINT; + if(c >= 0x40 && c <= 0x5A) // A-Z + res->uint = c - 0x41; + else if(c >= 0x60 && c <= 0x7A) // a-z + res->uint = c - 0x61 + 26; + else if(c >= 0x30 && c <= 0x39) // 0-9 + res->uint = c - 0x30 + 52; + else if(c == '+') + res->uint = 62; + else if(c == '/') + res->uint = 63; + + return res; +} + +#define act_bsfdig_4bit act_bsfdig +#define act_bsfdig_2bit act_bsfdig + +#define act_equals act_ignore +#define act_ws act_ignore + +#define act_document act_index0 + + +/// +// Set up the parser with the grammar to be recognized. +/// + +const HParser *init_parser(void) +{ + // CORE + H_RULE (digit, h_ch_range(0x30, 0x39)); + H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); + H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); + + // AUX. + H_RULE (plus, h_ch('+')); + H_RULE (slash, h_ch('/')); + H_ARULE(equals, h_ch('=')); + + H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL)); + H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); + H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); + H_RULE (base64_3, h_repeat_n(bsfdig, 4)); + H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); + H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); + H_RULE (base64, h_sequence(h_many(base64_3), + h_optional(h_choice(base64_2, + base64_1, NULL)), + NULL)); + + H_ARULE(ws, h_many(space)); + H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); + + return document; +} + + +/// +// Main routine: print input, parse, print result, return success/failure. +/// + +#include + +int main(int argc, char **argv) +{ + uint8_t input[102400]; + size_t inputsize; + const HParser *parser; + const HParseResult *result; + + parser = init_parser(); + + inputsize = fread(input, 1, sizeof(input), stdin); + fprintf(stderr, "inputsize=%lu\ninput=", inputsize); + fwrite(input, 1, inputsize, stderr); + result = h_parse(parser, input, inputsize); + + if(result) { + fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8); + h_pprint(stdout, result->ast, 0, 0); + return 0; + } else { + return 1; + } +} From 54bd5a4a3832a2f4c85cc34799022f61d0ddcf86 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 23:44:28 +0100 Subject: [PATCH 040/125] add semantic actions for base64_3, base64_2, base64_1 --- examples/base64_sem1.c | 84 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 4 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 83efc64..7839435 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -1,4 +1,5 @@ #include "../src/hammer.h" +#include "../src/internal.h" // for h_carray functions (XXX ?!) #include @@ -77,6 +78,81 @@ const HParsedToken *act_bsfdig(const HParseResult *p) #define act_document act_index0 +// helper +void carray_append_uint(HCountedArray *array, uint8_t value) +{ + HParsedToken *item = h_arena_malloc(array->arena, sizeof(HParsedToken)); + item->token_type = TT_UINT; + item->uint = value; + h_carray_append(array, item); +} + +const HParsedToken *act_base64_3(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new_sized(p->arena, 4); + + HParsedToken **digits = p->ast->seq->elements; + uint32_t x = digits[0]->uint; + x <<= 6; x |= digits[1]->uint; + x <<= 6; x |= digits[2]->uint; + x <<= 6; x |= digits[3]->uint; + + carray_append_uint(res->seq, (x >> 16) & 0xFF); + carray_append_uint(res->seq, (x >> 8) & 0xFF); + carray_append_uint(res->seq, x & 0xFF); + + return res; +} + +const HParsedToken *act_base64_2(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new_sized(p->arena, 4); + + HParsedToken **digits = p->ast->seq->elements; + uint32_t x = digits[0]->uint; + x <<= 6; x |= digits[1]->uint; + x <<= 6; x |= digits[2]->uint; + + carray_append_uint(res->seq, (x >> 10) & 0xFF); + carray_append_uint(res->seq, (x >> 2) & 0xFF); + + return res; +} + +const HParsedToken *act_base64_1(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new_sized(p->arena, 4); + + HParsedToken **digits = p->ast->seq->elements; + uint32_t x = digits[0]->uint; + x <<= 6; x |= digits[1]->uint; + + carray_append_uint(res->seq, (x >> 4) & 0xFF); + + return res; +} + +#if 0 +const HParsedToken *act_base64(const HParseResult *p) +{ + // XXX + // concatenate base64_3 blocks + // append trailing base64_2 or _1 block +} +#endif + /// // Set up the parser with the grammar to be recognized. @@ -97,10 +173,10 @@ const HParser *init_parser(void) H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL)); H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); - H_RULE (base64_3, h_repeat_n(bsfdig, 4)); - H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); - H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); - H_RULE (base64, h_sequence(h_many(base64_3), + H_ARULE(base64_3, h_repeat_n(bsfdig, 4)); + H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); + H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); + H_RULE(base64, h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL)); From 97b13672ced335dd7a8c4c926a150ddffb63d401 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 11 Jan 2013 00:41:34 +0100 Subject: [PATCH 041/125] generalize act_base64_n --- examples/base64_sem1.c | 83 ++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 51 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 7839435..6dbb2fd 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -78,70 +78,51 @@ const HParsedToken *act_bsfdig(const HParseResult *p) #define act_document act_index0 -// helper -void carray_append_uint(HCountedArray *array, uint8_t value) +// General-form action to turn a block of base64 digits into bytes. +const HParsedToken *act_base64_n(int n, const HParseResult *p) { - HParsedToken *item = h_arena_malloc(array->arena, sizeof(HParsedToken)); - item->token_type = TT_UINT; - item->uint = value; - h_carray_append(array, item); + assert(p->ast->token_type == TT_SEQUENCE); + + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new_sized(p->arena, n); + + HParsedToken **digits = p->ast->seq->elements; + + uint32_t x = 0; + int bits = 0; + for(int i=0; iuint; + bits += 6; + } + x >>= bits%8; // align, i.e. cut off extra bits + + for(int i=0; iarena, sizeof(HParsedToken)); + item->token_type = TT_UINT; + item->uint = x & 0xFF; + + res->seq->elements[n-1-i] = item; // output the last byte and + x >>= 8; // discard it + } + res->seq->used = n; + + return res; } const HParsedToken *act_base64_3(const HParseResult *p) { - assert(p->ast->token_type == TT_SEQUENCE); - - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new_sized(p->arena, 4); - - HParsedToken **digits = p->ast->seq->elements; - uint32_t x = digits[0]->uint; - x <<= 6; x |= digits[1]->uint; - x <<= 6; x |= digits[2]->uint; - x <<= 6; x |= digits[3]->uint; - - carray_append_uint(res->seq, (x >> 16) & 0xFF); - carray_append_uint(res->seq, (x >> 8) & 0xFF); - carray_append_uint(res->seq, x & 0xFF); - - return res; + return act_base64_n(3, p); } const HParsedToken *act_base64_2(const HParseResult *p) { - assert(p->ast->token_type == TT_SEQUENCE); - - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new_sized(p->arena, 4); - - HParsedToken **digits = p->ast->seq->elements; - uint32_t x = digits[0]->uint; - x <<= 6; x |= digits[1]->uint; - x <<= 6; x |= digits[2]->uint; - - carray_append_uint(res->seq, (x >> 10) & 0xFF); - carray_append_uint(res->seq, (x >> 2) & 0xFF); - - return res; + return act_base64_n(2, p); } const HParsedToken *act_base64_1(const HParseResult *p) { - assert(p->ast->token_type == TT_SEQUENCE); - - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new_sized(p->arena, 4); - - HParsedToken **digits = p->ast->seq->elements; - uint32_t x = digits[0]->uint; - x <<= 6; x |= digits[1]->uint; - - carray_append_uint(res->seq, (x >> 4) & 0xFF); - - return res; + return act_base64_n(1, p); } #if 0 From c62079516d8441f937599d036231c8cfabd4e89a Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 11 Jan 2013 00:57:34 +0100 Subject: [PATCH 042/125] add semantic action for base64 rule --- examples/base64_sem1.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 6dbb2fd..5e1673a 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -125,14 +125,37 @@ const HParsedToken *act_base64_1(const HParseResult *p) return act_base64_n(1, p); } -#if 0 +// Helper to concatenate two arrays. +void carray_concat(HCountedArray *a, const HCountedArray *b) +{ + for(size_t i=0; iused; i++) + h_carray_append(a, b->elements[i]); +} + const HParsedToken *act_base64(const HParseResult *p) { - // XXX + assert(p->ast->token_type == TT_SEQUENCE); + assert(p->ast->seq->used == 2); + assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE); + + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new(p->arena); + // concatenate base64_3 blocks - // append trailing base64_2 or _1 block + HCountedArray *seq = p->ast->seq->elements[0]->seq; + for(size_t i=0; iused; i++) { + assert(seq->elements[i]->token_type == TT_SEQUENCE); + carray_concat(res->seq, seq->elements[i]->seq); + } + + // append one trailing base64_2 or _1 block + const HParsedToken *tok = p->ast->seq->elements[1]; + if(tok->token_type == TT_SEQUENCE) + carray_concat(res->seq, tok->seq); + + return res; } -#endif /// @@ -157,7 +180,7 @@ const HParser *init_parser(void) H_ARULE(base64_3, h_repeat_n(bsfdig, 4)); H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); - H_RULE(base64, h_sequence(h_many(base64_3), + H_ARULE(base64, h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL)); From ad3c2032dc86c4929f72154cd2121e8259397946 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 11 Jan 2013 01:10:16 +0100 Subject: [PATCH 043/125] add a note about weird bug --- examples/base64_sem1.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 5e1673a..8638bb3 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -188,6 +188,10 @@ const HParser *init_parser(void) H_ARULE(ws, h_many(space)); H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); + // BUG sometimes inputs that should just don't parse. + // It *seemed* to happen mostly with things like "bbbbaaaaBA==". + // Using less actions seemed to make it less likely. + return document; } From 32dfae0af3db9eedfae6d97b02677cbd3e9e97db Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 13 Jan 2013 18:19:59 +0100 Subject: [PATCH 044/125] add a variant of the base64 example with coarse-grained semantic actions --- examples/Makefile | 6 +- examples/base64_sem2.c | 207 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+), 2 deletions(-) create mode 100644 examples/base64_sem2.c diff --git a/examples/Makefile b/examples/Makefile index a3be0ce..98797f3 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -4,7 +4,9 @@ OUTPUTS := dns.o \ base64.o \ base64 \ base64_sem1.o \ - base64_sem1 + base64_sem1 \ + base64_sem2.o \ + base64_sem2 TOPLEVEL := ../ @@ -14,7 +16,7 @@ LDFLAGS += $(pkg-config --libs glib-2.0) -all: dns base64 base64_sem1 +all: dns base64 base64_sem1 base64_sem2 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) dns: dns.o rr.o dns_common.o diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c new file mode 100644 index 0000000..957ac48 --- /dev/null +++ b/examples/base64_sem2.c @@ -0,0 +1,207 @@ +#include "../src/hammer.h" +#include "../src/internal.h" // for h_carray functions (XXX ?!) +#include + + +#define H_RULE(rule, def) const HParser *rule = def +#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) + + +/// +// Semantic action helpers. +// These might be candidates for inclusion in the library. +/// + +// The action equivalent of h_ignore. +const HParsedToken *act_ignore(const HParseResult *p) +{ + return NULL; +} + +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +const HParsedToken *act_index0(const HParseResult *p) +{ + return act_index(0, p); +} + + +/// +// Semantic actions for the grammar below, each corresponds to an "ARULE". +// They must be named act_. +/// + +// helper: return the numeric value of a parsed base64 digit +uint8_t bsfdig_value(const HParsedToken *p) +{ + uint8_t value = 0; + + if(p && p->token_type == TT_UINT) { + uint8_t c = p->uint; + if(c >= 0x40 && c <= 0x5A) // A-Z + value = c - 0x41; + else if(c >= 0x60 && c <= 0x7A) // a-z + value = c - 0x61 + 26; + else if(c >= 0x30 && c <= 0x39) // 0-9 + value = c - 0x30 + 52; + else if(c == '+') + value = 62; + else if(c == '/') + value = 63; + } + + return value; +} + +// helper: append a byte value to a sequence +void seq_append_byte(HCountedArray *a, uint8_t b) +{ + HParsedToken *item = h_arena_malloc(a->arena, sizeof(HParsedToken)); + item->token_type = TT_UINT; + item->uint = b; + h_carray_append(a, item); +} + +const HParsedToken *act_base64(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + assert(p->ast->seq->used == 2); + assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE); + + // grab b64_3 block sequence + // grab and analyze b64 end block (_2 or _1) + const HParsedToken *b64_3 = p->ast->seq->elements[0]; + const HParsedToken *b64_2 = p->ast->seq->elements[1]; + const HParsedToken *b64_1 = p->ast->seq->elements[1]; + + if(b64_2->token_type != TT_SEQUENCE) + b64_1 = b64_2 = NULL; + else if(b64_2->seq->elements[2]->uint == '=') + b64_2 = NULL; + else + b64_1 = NULL; + + // allocate result sequence + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new(p->arena); + + // concatenate base64_3 blocks + for(size_t i=0; iseq->used; i++) { + assert(b64_3->seq->elements[i]->token_type == TT_SEQUENCE); + HParsedToken **digits = b64_3->seq->elements[i]->seq->elements; + + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + x <<= 6; x |= bsfdig_value(digits[2]); + x <<= 6; x |= bsfdig_value(digits[3]); + seq_append_byte(res->seq, (x >> 16) & 0xFF); + seq_append_byte(res->seq, (x >> 8) & 0xFF); + seq_append_byte(res->seq, x & 0xFF); + } + + // append one trailing base64_2 or _1 block + if(b64_2) { + HParsedToken **digits = b64_2->seq->elements; + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + x <<= 6; x |= bsfdig_value(digits[2]); + seq_append_byte(res->seq, (x >> 10) & 0xFF); + seq_append_byte(res->seq, (x >> 2) & 0xFF); + } else if(b64_1) { + HParsedToken **digits = b64_1->seq->elements; + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + seq_append_byte(res->seq, (x >> 4) & 0xFF); + } + + return res; +} + +#define act_ws act_ignore +#define act_document act_index0 + + +/// +// Set up the parser with the grammar to be recognized. +/// + +const HParser *init_parser(void) +{ + // CORE + H_RULE (digit, h_ch_range(0x30, 0x39)); + H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); + H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); + + // AUX. + H_RULE (plus, h_ch('+')); + H_RULE (slash, h_ch('/')); + H_RULE (equals, h_ch('=')); + + H_RULE (bsfdig, h_choice(alpha, digit, plus, slash, NULL)); + H_RULE (bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); + H_RULE (bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); + H_RULE (base64_3, h_repeat_n(bsfdig, 4)); + H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); + H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); + H_ARULE(base64, h_sequence(h_many(base64_3), + h_optional(h_choice(base64_2, + base64_1, NULL)), + NULL)); + + H_ARULE(ws, h_many(space)); + H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); + + // BUG sometimes inputs that should just don't parse. + // It *seemed* to happen mostly with things like "bbbbaaaaBA==". + // Using less actions seemed to make it less likely. + + return document; +} + + +/// +// Main routine: print input, parse, print result, return success/failure. +/// + +#include + +int main(int argc, char **argv) +{ + uint8_t input[102400]; + size_t inputsize; + const HParser *parser; + const HParseResult *result; + + parser = init_parser(); + + inputsize = fread(input, 1, sizeof(input), stdin); + fprintf(stderr, "inputsize=%lu\ninput=", inputsize); + fwrite(input, 1, inputsize, stderr); + result = h_parse(parser, input, inputsize); + + if(result) { + fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8); + h_pprint(stdout, result->ast, 0, 0); + return 0; + } else { + return 1; + } +} From 1998ae243aab922aa3d520d98e607ac853b3bc53 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 13 Jan 2013 18:47:03 +0100 Subject: [PATCH 045/125] add introductory comments to base64 examples --- examples/base64.c | 10 ++++++++++ examples/base64_sem1.c | 14 ++++++++++++++ examples/base64_sem2.c | 15 +++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/examples/base64.c b/examples/base64.c index cdb088e..ee142e3 100644 --- a/examples/base64.c +++ b/examples/base64.c @@ -1,3 +1,13 @@ +// Example parser: Base64, syntax only. +// +// Demonstrates how to construct a Hammer parser that recognizes valid Base64 +// sequences. +// +// Note that no semantic evaluation of the sequence is performed, i.e. the +// byte sequence being represented is not returned, or determined. See +// base64_sem1.c and base64_sem2.c for examples how to attach appropriate +// semantic actions to the grammar. + #include "../src/hammer.h" const HParser* document = NULL; diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 8638bb3..92f0b3f 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -1,3 +1,17 @@ +// Example parser: Base64, with fine-grained semantic actions +// +// Demonstrates how to attach semantic actions to grammar rules and piece by +// piece transform the parse tree into the desired semantic representation, +// in this case a sequence of 8-bit values. +// +// Note how the grammar is defined by using the macros H_RULE and H_ARULE. +// Those rules using ARULE get an attached action which must be declared (as +// (a function of type HAction) with a standard name based on the rule name. +// +// This variant of the example uses fine-grained semantic actions that +// transform the parse tree in small steps in a bottom-up fashion. Compare +// base64_sem2.c for an alternative approach using a single top-level action. + #include "../src/hammer.h" #include "../src/internal.h" // for h_carray functions (XXX ?!) #include diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c index 957ac48..c57555e 100644 --- a/examples/base64_sem2.c +++ b/examples/base64_sem2.c @@ -1,3 +1,18 @@ +// Example parser: Base64, with fine-grained semantic actions +// +// Demonstrates how to attach semantic actions to a grammar and transform the +// parse tree into the desired semantic representation, in this case a sequence +// of 8-bit values. +// +// Note how the grammar is defined by using the macros H_RULE and H_ARULE. +// Those rules using ARULE get an attached action which must be declared (as +// (a function of type HAction) with a standard name based on the rule name. +// +// This variant of the example uses coarse-grained semantic actions, +// transforming the entire parse tree in one big step. Compare base64_sem1.c +// for an alternative approach using a fine-grained piece-by-piece +// transformation. + #include "../src/hammer.h" #include "../src/internal.h" // for h_carray functions (XXX ?!) #include From 619725e5bb2e3962235ccaa0f543e710b5f88f55 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 13 Jan 2013 18:56:16 +0100 Subject: [PATCH 046/125] typo --- examples/base64_sem2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c index c57555e..11b0660 100644 --- a/examples/base64_sem2.c +++ b/examples/base64_sem2.c @@ -6,7 +6,7 @@ // // Note how the grammar is defined by using the macros H_RULE and H_ARULE. // Those rules using ARULE get an attached action which must be declared (as -// (a function of type HAction) with a standard name based on the rule name. +// a function of type HAction) with a standard name based on the rule name. // // This variant of the example uses coarse-grained semantic actions, // transforming the entire parse tree in one big step. Compare base64_sem1.c From fab8705828b5e13ac15ba87c8a7ff995cf7063be Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 13 Jan 2013 18:56:45 +0100 Subject: [PATCH 047/125] typo --- examples/base64_sem1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 92f0b3f..8de31db 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -6,7 +6,7 @@ // // Note how the grammar is defined by using the macros H_RULE and H_ARULE. // Those rules using ARULE get an attached action which must be declared (as -// (a function of type HAction) with a standard name based on the rule name. +// a function of type HAction) with a standard name based on the rule name. // // This variant of the example uses fine-grained semantic actions that // transform the parse tree in small steps in a bottom-up fashion. Compare From 40ab87b4fbb5c08696c13cc178573cd8774c08eb Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Mon, 14 Jan 2013 20:35:20 +0100 Subject: [PATCH 048/125] add user-defined token types for dns parts --- examples/dns.c | 2 +- examples/dns.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/examples/dns.c b/examples/dns.c index 54d9c7e..676855e 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -286,7 +286,7 @@ void set_rr(struct dns_rr rr, HCountedArray *rdata) { const HParsedToken* pack_dns_struct(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); - ret->token_type = TT_USER; + ret->token_type = TT_DNS_MESSAGE; dns_message_t *msg = h_arena_malloc(p->arena, sizeof(dns_message_t)); diff --git a/examples/dns.h b/examples/dns.h index 151c46e..a26374e 100644 --- a/examples/dns.h +++ b/examples/dns.h @@ -1,5 +1,13 @@ #include "../src/hammer.h" +enum DNSTokenType_ { + TT_DNS_MESSAGE = TT_USER, + TT_DNS_HEADER, + TT_DNS_QNAME, + TT_DNS_QUESTION, + TT_DNS_RR +}; + struct dns_header { uint16_t id; bool qr, aa, tc, rd, ra; @@ -9,6 +17,7 @@ struct dns_header { size_t authority_count; size_t additional_count; }; + struct dns_qname { size_t qlen; struct { @@ -16,11 +25,13 @@ struct dns_qname { uint8_t *label; } *labels; }; + struct dns_question { struct dns_qname qname; uint16_t qtype; uint16_t qclass; }; + struct dns_rr { char* name; uint16_t type; From 2ff51ab9c36dfc0fc723c38d26009048413e9e37 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Mon, 14 Jan 2013 21:25:46 +0100 Subject: [PATCH 049/125] rework dns example syntax into H_RULE style --- examples/dns.c | 119 +++++++++++++++++++++++++------------------------ 1 file changed, 61 insertions(+), 58 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 676855e..010c462 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -10,6 +10,10 @@ #define false 0 #define true 1 +#define H_RULE(rule, def) const HParser *rule = def +#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) + + bool is_zero(HParseResult *p) { if (TT_UINT != p->ast->token_type) return false; @@ -283,7 +287,7 @@ void set_rr(struct dns_rr rr, HCountedArray *rdata) { } } -const HParsedToken* pack_dns_struct(const HParseResult *p) { +const HParsedToken* act_dns_message(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); ret->token_type = TT_DNS_MESSAGE; @@ -360,66 +364,65 @@ const HParsedToken* pack_dns_struct(const HParseResult *p) { return ret; } +// The action equivalent of h_ignore. +const HParsedToken *act_ignore(const HParseResult *p) +{ + return NULL; +} + +#define act_dns_hdzero act_ignore + const HParser* init_parser() { - static HParser *dns_message = NULL; - if (dns_message) - return dns_message; + static const HParser *ret = NULL; + if (ret) + return ret; - const HParser *domain = init_domain(); + H_RULE (domain, init_domain()); + H_ARULE(dns_hdzero, h_attr_bool(h_bits(3, false), is_zero)); + H_RULE (dns_header, h_sequence(h_bits(16, false), // ID + h_bits(1, false), // QR + h_bits(4, false), // opcode + h_bits(1, false), // AA + h_bits(1, false), // TC + h_bits(1, false), // RD + h_bits(1, false), // RA + dns_hdzero, // Z + h_bits(4, false), // RCODE + h_uint16(), // QDCOUNT + h_uint16(), // ANCOUNT + h_uint16(), // NSCOUNT + h_uint16(), // ARCOUNT + NULL)); + H_RULE (type, h_int_range(h_uint16(), 1, 16)); + H_RULE (qtype, h_choice(type, + h_int_range(h_uint16(), 252, 255), + NULL)); + H_RULE (class, h_int_range(h_uint16(), 1, 4)); + H_RULE (qclass, h_choice(class, + h_int_range(h_uint16(), 255, 255), + NULL)); + H_RULE (dns_question, h_sequence(h_sequence(h_many1(h_length_value(h_int_range(h_uint8(), 1, 255), + h_uint8())), + h_ch('\x00'), + NULL), // QNAME + qtype, // QTYPE + qclass, // QCLASS + NULL)); + H_RULE (dns_rr, h_sequence(domain, // NAME + type, // TYPE + class, // CLASS + h_uint32(), // TTL + h_length_value(h_uint16(), h_uint8()), // RDLENGTH+RDATA + NULL)); + H_ARULE(dns_message, h_attr_bool(h_sequence(dns_header, + h_many(dns_question), + h_many(dns_rr), + h_end_p(), + NULL), + validate_dns)); - const HParser *dns_header = h_sequence(h_bits(16, false), // ID - h_bits(1, false), // QR - h_bits(4, false), // opcode - h_bits(1, false), // AA - h_bits(1, false), // TC - h_bits(1, false), // RD - h_bits(1, false), // RA - h_ignore(h_attr_bool(h_bits(3, false), is_zero)), // Z - h_bits(4, false), // RCODE - h_uint16(), // QDCOUNT - h_uint16(), // ANCOUNT - h_uint16(), // NSCOUNT - h_uint16(), // ARCOUNT - NULL); - - const HParser *type = h_int_range(h_uint16(), 1, 16); - - const HParser *qtype = h_choice(type, - h_int_range(h_uint16(), 252, 255), - NULL); - - const HParser *class = h_int_range(h_uint16(), 1, 4); - - const HParser *qclass = h_choice(class, - h_int_range(h_uint16(), 255, 255), - NULL); - - const HParser *dns_question = h_sequence(h_sequence(h_many1(h_length_value(h_int_range(h_uint8(), 1, 255), - h_uint8())), - h_ch('\x00'), - NULL), // QNAME - qtype, // QTYPE - qclass, // QCLASS - NULL); - - - const HParser *dns_rr = h_sequence(domain, // NAME - type, // TYPE - class, // CLASS - h_uint32(), // TTL - h_length_value(h_uint16(), h_uint8()), // RDLENGTH+RDATA - NULL); - - - dns_message = (HParser*)h_action(h_attr_bool(h_sequence(dns_header, - h_many(dns_question), - h_many(dns_rr), - h_end_p(), - NULL), - validate_dns), - pack_dns_struct); - - return dns_message; + ret = dns_message; + return ret; } int start_listening() { From 2db9cd23c81e8c24a771675b27a63ee71848acbb Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Tue, 15 Jan 2013 00:19:43 +0100 Subject: [PATCH 050/125] grammar beautification --- examples/dns.c | 88 +++++++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 010c462..08e4288 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -287,7 +287,7 @@ void set_rr(struct dns_rr rr, HCountedArray *rdata) { } } -const HParsedToken* act_dns_message(const HParseResult *p) { +const HParsedToken* act_message(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); ret->token_type = TT_DNS_MESSAGE; @@ -370,58 +370,58 @@ const HParsedToken *act_ignore(const HParseResult *p) return NULL; } -#define act_dns_hdzero act_ignore +#define act_hdzero act_ignore const HParser* init_parser() { static const HParser *ret = NULL; if (ret) return ret; - H_RULE (domain, init_domain()); - H_ARULE(dns_hdzero, h_attr_bool(h_bits(3, false), is_zero)); - H_RULE (dns_header, h_sequence(h_bits(16, false), // ID - h_bits(1, false), // QR - h_bits(4, false), // opcode - h_bits(1, false), // AA - h_bits(1, false), // TC - h_bits(1, false), // RD - h_bits(1, false), // RA - dns_hdzero, // Z - h_bits(4, false), // RCODE - h_uint16(), // QDCOUNT - h_uint16(), // ANCOUNT - h_uint16(), // NSCOUNT - h_uint16(), // ARCOUNT - NULL)); - H_RULE (type, h_int_range(h_uint16(), 1, 16)); - H_RULE (qtype, h_choice(type, - h_int_range(h_uint16(), 252, 255), + H_RULE (domain, init_domain()); + H_ARULE(hdzero, h_attr_bool(h_bits(3, false), is_zero)); + H_RULE (header, h_sequence(h_bits(16, false), // ID + h_bits(1, false), // QR + h_bits(4, false), // opcode + h_bits(1, false), // AA + h_bits(1, false), // TC + h_bits(1, false), // RD + h_bits(1, false), // RA + hdzero, // Z + h_bits(4, false), // RCODE + h_uint16(), // QDCOUNT + h_uint16(), // ANCOUNT + h_uint16(), // NSCOUNT + h_uint16(), // ARCOUNT NULL)); - H_RULE (class, h_int_range(h_uint16(), 1, 4)); - H_RULE (qclass, h_choice(class, - h_int_range(h_uint16(), 255, 255), + H_RULE (type, h_int_range(h_uint16(), 1, 16)); + H_RULE (qtype, h_choice(type, + h_int_range(h_uint16(), 252, 255), + NULL)); + H_RULE (class, h_int_range(h_uint16(), 1, 4)); + H_RULE (qclass, h_choice(class, + h_int_range(h_uint16(), 255, 255), + NULL)); + H_RULE (len, h_int_range(h_uint8(), 1, 255)); + H_RULE (label, h_length_value(len, h_uint8())); + H_RULE (qname, h_sequence(h_many1(label), + h_ch('\x00'), NULL)); - H_RULE (dns_question, h_sequence(h_sequence(h_many1(h_length_value(h_int_range(h_uint8(), 1, 255), - h_uint8())), - h_ch('\x00'), - NULL), // QNAME - qtype, // QTYPE - qclass, // QCLASS - NULL)); - H_RULE (dns_rr, h_sequence(domain, // NAME - type, // TYPE - class, // CLASS - h_uint32(), // TTL - h_length_value(h_uint16(), h_uint8()), // RDLENGTH+RDATA - NULL)); - H_ARULE(dns_message, h_attr_bool(h_sequence(dns_header, - h_many(dns_question), - h_many(dns_rr), - h_end_p(), - NULL), - validate_dns)); + H_RULE (question, h_sequence(qname, qtype, qclass, NULL)); + H_RULE (rdata, h_length_value(h_uint16(), h_uint8())); + H_RULE (rr, h_sequence(domain, // NAME + type, // TYPE + class, // CLASS + h_uint32(), // TTL + rdata, // RDLENGTH+RDATA + NULL)); + H_ARULE(message, h_attr_bool(h_sequence(header, + h_many(question), + h_many(rr), + h_end_p(), + NULL), + validate_dns)); - ret = dns_message; + ret = message; return ret; } From 21df49cc151b8cd17db866e9d4cf843ef194f316 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Tue, 15 Jan 2013 01:24:47 +0100 Subject: [PATCH 051/125] split out act_header --- examples/dns.c | 77 +++++++++++++++++++++++++++++--------------------- examples/dns.h | 14 ++++----- src/pprint.c | 6 +++- 3 files changed, 57 insertions(+), 40 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 08e4288..5c59626 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "../src/hammer.h" #include "dns_common.h" #include "dns.h" @@ -27,12 +28,12 @@ bool is_zero(HParseResult *p) { bool validate_dns(HParseResult *p) { if (TT_SEQUENCE != p->ast->token_type) return false; - // The header holds the counts as its last 4 elements. - HParsedToken **elems = p->ast->seq->elements[0]->seq->elements; - size_t qd = elems[8]->uint; - size_t an = elems[9]->uint; - size_t ns = elems[10]->uint; - size_t ar = elems[11]->uint; + assert(p->ast->seq->elements[0]->token_type == (HTokenType)TT_dns_header); + dns_header_t *header = (dns_header_t *)p->ast->seq->elements[0]->user; + size_t qd = header->question_count; + size_t an = header->answer_count; + size_t ns = header->authority_count; + size_t ar = header->additional_count; HParsedToken *questions = p->ast->seq->elements[1]; if (questions->seq->used != qd) return false; @@ -287,34 +288,46 @@ void set_rr(struct dns_rr rr, HCountedArray *rdata) { } } +const HParsedToken* act_header(const HParseResult *p) { + HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); + ret->token_type = TT_dns_header; + ret->user = h_arena_malloc(p->arena, sizeof(dns_header_t)); + + HParsedToken **fields = p->ast->seq->elements; + dns_header_t header_ = { + .id = fields[0]->uint, + .qr = fields[1]->uint, + .opcode = fields[2]->uint, + .aa = fields[3]->uint, + .tc = fields[4]->uint, + .rd = fields[5]->uint, + .ra = fields[6]->uint, + .rcode = fields[7]->uint, + .question_count = fields[8]->uint, + .answer_count = fields[9]->uint, + .authority_count = fields[10]->uint, + .additional_count = fields[11]->uint + }; + *(dns_header_t *)ret->user = header_; + + return ret; +} + const HParsedToken* act_message(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); - ret->token_type = TT_DNS_MESSAGE; + ret->token_type = TT_dns_message; dns_message_t *msg = h_arena_malloc(p->arena, sizeof(dns_message_t)); - HParsedToken *hdr = p->ast->seq->elements[0]; - struct dns_header header = { - .id = hdr->seq->elements[0]->uint, - .qr = hdr->seq->elements[1]->uint, - .opcode = hdr->seq->elements[2]->uint, - .aa = hdr->seq->elements[3]->uint, - .tc = hdr->seq->elements[4]->uint, - .rd = hdr->seq->elements[5]->uint, - .ra = hdr->seq->elements[6]->uint, - .rcode = hdr->seq->elements[7]->uint, - .question_count = hdr->seq->elements[8]->uint, - .answer_count = hdr->seq->elements[9]->uint, - .authority_count = hdr->seq->elements[10]->uint, - .additional_count = hdr->seq->elements[11]->uint - }; - msg->header = header; + assert(p->ast->seq->elements[0]->token_type == (HTokenType)TT_dns_header); + dns_header_t *header = (dns_header_t *)p->ast->seq->elements[0]->user; + msg->header = *header; HParsedToken *qs = p->ast->seq->elements[1]; struct dns_question *questions = h_arena_malloc(p->arena, - sizeof(struct dns_question)*(header.question_count)); - for (size_t i=0; iquestion_count)); + for (size_t i=0; iquestion_count; ++i) { // QNAME is a sequence of labels. In the parser, it's defined as // sequence(many1(length_value(...)), ch('\x00'), NULL). questions[i].qname = get_qname(qs->seq->elements[i]->seq->elements[0]); @@ -325,8 +338,8 @@ const HParsedToken* act_message(const HParseResult *p) { HParsedToken *rrs = p->ast->seq->elements[2]; struct dns_rr *answers = h_arena_malloc(p->arena, - sizeof(struct dns_rr)*(header.answer_count)); - for (size_t i=0; ianswer_count)); + for (size_t i=0; ianswer_count; ++i) { answers[i].name = get_domain(rrs[i].seq->elements[0]); answers[i].type = rrs[i].seq->elements[1]->uint; answers[i].class = rrs[i].seq->elements[2]->uint; @@ -337,8 +350,8 @@ const HParsedToken* act_message(const HParseResult *p) { msg->answers = answers; struct dns_rr *authority = h_arena_malloc(p->arena, - sizeof(struct dns_rr)*(header.authority_count)); - for (size_t i=0, j=header.answer_count; iauthority_count)); + for (size_t i=0, j=header->answer_count; iauthority_count; ++i, ++j) { authority[i].name = get_domain(rrs[j].seq->elements[0]); authority[i].type = rrs[j].seq->elements[1]->uint; authority[i].class = rrs[j].seq->elements[2]->uint; @@ -349,8 +362,8 @@ const HParsedToken* act_message(const HParseResult *p) { msg->authority = authority; struct dns_rr *additional = h_arena_malloc(p->arena, - sizeof(struct dns_rr)*(header.additional_count)); - for (size_t i=0, j=header.answer_count+header.authority_count; iadditional_count)); + for (size_t i=0, j=header->answer_count+header->authority_count; iadditional_count; ++i, ++j) { additional[i].name = get_domain(rrs[j].seq->elements[0]); additional[i].type = rrs[j].seq->elements[1]->uint; additional[i].class = rrs[j].seq->elements[2]->uint; @@ -379,7 +392,7 @@ const HParser* init_parser() { H_RULE (domain, init_domain()); H_ARULE(hdzero, h_attr_bool(h_bits(3, false), is_zero)); - H_RULE (header, h_sequence(h_bits(16, false), // ID + H_ARULE(header, h_sequence(h_bits(16, false), // ID h_bits(1, false), // QR h_bits(4, false), // opcode h_bits(1, false), // AA diff --git a/examples/dns.h b/examples/dns.h index a26374e..6f9fb13 100644 --- a/examples/dns.h +++ b/examples/dns.h @@ -1,14 +1,14 @@ #include "../src/hammer.h" enum DNSTokenType_ { - TT_DNS_MESSAGE = TT_USER, - TT_DNS_HEADER, - TT_DNS_QNAME, - TT_DNS_QUESTION, - TT_DNS_RR + TT_dns_message = TT_USER, + TT_dns_header, + TT_dns_qname, + TT_dns_question, + TT_dns_rr }; -struct dns_header { +typedef struct dns_header { uint16_t id; bool qr, aa, tc, rd, ra; char opcode, rcode; @@ -16,7 +16,7 @@ struct dns_header { size_t answer_count; size_t authority_count; size_t additional_count; -}; +} dns_header_t; struct dns_qname { size_t qlen; diff --git a/src/pprint.c b/src/pprint.c index 1ffe764..d8b22e2 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -69,7 +69,11 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { fprintf(stream, "%*sUSER\n", indent, ""); break; default: - assert_message(0, "Should not reach here."); + if(tok->token_type > TT_USER) { + fprintf(stream, "%*sUSER %d\n", indent, "", tok->token_type-TT_USER); + } else { + assert_message(0, "Should not reach here."); + } } } From acfc903a153b878b4b682be6c98be701b5a151c4 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Tue, 15 Jan 2013 13:54:30 +0100 Subject: [PATCH 052/125] start pulling pack_dns_struct apart --- examples/dns.c | 98 ++++++++++++++++++++++++++++++++++---------------- examples/dns.h | 37 ++++++++++--------- 2 files changed, 88 insertions(+), 47 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 5c59626..d25e825 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -43,27 +43,6 @@ bool validate_dns(HParseResult *p) { return true; } -struct dns_qname get_qname(const HParsedToken *t) { - // The qname parser parses at least 1 length-value pair, then a NULL. - // So, t->seq->elements[0] is a sequence of at least 1 such pair, - // and t->seq->elements[1] is the null. - const HParsedToken *labels = t->seq->elements[0]; - struct dns_qname ret = { - .qlen = labels->seq->used, - .labels = h_arena_malloc(t->seq->arena, sizeof(*ret.labels)*labels->seq->used) - }; - // i is which label we're on - for (size_t i=0; iseq->used; ++i) { - ret.labels[i].len = labels->seq->elements[i]->seq->used; - ret.labels[i].label = h_arena_malloc(t->seq->arena, ret.labels[i].len + 1); - // j is which char of the label we're on - for (size_t j=0; jseq->elements[i]->seq->elements[j]->uint; - ret.labels[i].label[ret.labels[i].len] = 0; - } - return ret; -} - char* get_domain(const HParsedToken *t) { switch(t->token_type) { case TT_UINT: @@ -313,6 +292,43 @@ const HParsedToken* act_header(const HParseResult *p) { return ret; } +const HParsedToken* act_label(const HParseResult *p) { + HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); + ret->token_type = TT_dns_label; + ret->user = h_arena_malloc(p->arena, sizeof(dns_label_t)); + dns_label_t *r = (dns_label_t *)ret->user; + + r->len = p->ast->seq->used; + r->label = h_arena_malloc(p->arena, r->len + 1); + for (size_t i=0; ilen; ++i) + r->label[i] = p->ast->seq->elements[i]->uint; + r->label[r->len] = 0; + + return ret; +} + +const HParsedToken* act_question(const HParseResult *p) { + HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); + ret->token_type = TT_dns_question; + ret->user = h_arena_malloc(p->arena, sizeof(dns_question_t)); + + dns_question_t *q = (dns_question_t *)ret->user; + HParsedToken **fields = p->ast->seq->elements; + + // QNAME is a sequence of labels. Pack them into an array. + q->qname.qlen = fields[0]->seq->used; + q->qname.labels = h_arena_malloc(p->arena, sizeof(dns_label_t)*q->qname.qlen); + for(size_t i=0; iseq->used; i++) { + assert(fields[0]->seq->elements[i]->token_type == (HTokenType)TT_dns_label); + q->qname.labels[i] = *(dns_label_t *)fields[0]->seq->elements[i]->user; + } + + q->qtype = fields[1]->uint; + q->qclass = fields[2]->uint; + + return ret; +} + const HParsedToken* act_message(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); @@ -326,13 +342,10 @@ const HParsedToken* act_message(const HParseResult *p) { HParsedToken *qs = p->ast->seq->elements[1]; struct dns_question *questions = h_arena_malloc(p->arena, - sizeof(struct dns_question)*(header->question_count)); + sizeof(struct dns_question)*(header->question_count)); for (size_t i=0; iquestion_count; ++i) { - // QNAME is a sequence of labels. In the parser, it's defined as - // sequence(many1(length_value(...)), ch('\x00'), NULL). - questions[i].qname = get_qname(qs->seq->elements[i]->seq->elements[0]); - questions[i].qtype = qs->seq->elements[i]->seq->elements[1]->uint; - questions[i].qclass = qs->seq->elements[i]->seq->elements[2]->uint; + assert(qs->seq->elements[i]->token_type == (HTokenType)TT_dns_question); + questions[i] = *(dns_question_t *)qs->seq->elements[i]->user; } msg->questions = questions; @@ -383,7 +396,32 @@ const HParsedToken *act_ignore(const HParseResult *p) return NULL; } +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +const HParsedToken *act_index0(const HParseResult *p) +{ + return act_index(0, p); +} + #define act_hdzero act_ignore +#define act_qname act_index0 const HParser* init_parser() { static const HParser *ret = NULL; @@ -415,11 +453,11 @@ const HParser* init_parser() { h_int_range(h_uint16(), 255, 255), NULL)); H_RULE (len, h_int_range(h_uint8(), 1, 255)); - H_RULE (label, h_length_value(len, h_uint8())); - H_RULE (qname, h_sequence(h_many1(label), + H_ARULE(label, h_length_value(len, h_uint8())); + H_ARULE(qname, h_sequence(h_many1(label), h_ch('\x00'), NULL)); - H_RULE (question, h_sequence(qname, qtype, qclass, NULL)); + H_ARULE(question, h_sequence(qname, qtype, qclass, NULL)); H_RULE (rdata, h_length_value(h_uint16(), h_uint8())); H_RULE (rr, h_sequence(domain, // NAME type, // TYPE diff --git a/examples/dns.h b/examples/dns.h index 6f9fb13..81bed55 100644 --- a/examples/dns.h +++ b/examples/dns.h @@ -3,6 +3,7 @@ enum DNSTokenType_ { TT_dns_message = TT_USER, TT_dns_header, + TT_dns_label, TT_dns_qname, TT_dns_question, TT_dns_rr @@ -18,21 +19,23 @@ typedef struct dns_header { size_t additional_count; } dns_header_t; -struct dns_qname { - size_t qlen; - struct { - size_t len; - uint8_t *label; - } *labels; -}; +typedef struct dns_label { + size_t len; + uint8_t *label; +} dns_label_t; -struct dns_question { - struct dns_qname qname; +typedef struct dns_qname { + size_t qlen; + dns_label_t *labels; +} dns_qname_t; + +typedef struct dns_question { + dns_qname_t qname; uint16_t qtype; uint16_t qclass; -}; +} dns_question_t; -struct dns_rr { +typedef struct dns_rr { char* name; uint16_t type; uint16_t class; @@ -81,12 +84,12 @@ struct dns_rr { uint8_t* bit_map; } wks; }; -}; +} dns_rr_t; typedef struct dns_message { - struct dns_header header; - struct dns_question *questions; - struct dns_rr *answers; - struct dns_rr *authority; - struct dns_rr *additional; + dns_header_t header; + dns_question_t *questions; + dns_rr_t *answers; + dns_rr_t *authority; + dns_rr_t *additional; } dns_message_t; From b5db3177db043e8baf2a76cf40f8ad6e31540981 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Tue, 15 Jan 2013 17:50:18 +0100 Subject: [PATCH 053/125] normalize allocation of result in act_message wrt other actions --- examples/dns.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index d25e825..6db8bf8 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -331,10 +331,11 @@ const HParsedToken* act_question(const HParseResult *p) { const HParsedToken* act_message(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); + HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); ret->token_type = TT_dns_message; - - dns_message_t *msg = h_arena_malloc(p->arena, sizeof(dns_message_t)); + ret->user = h_arena_malloc(p->arena, sizeof(dns_message_t)); + dns_message_t *msg = ret->user; assert(p->ast->seq->elements[0]->token_type == (HTokenType)TT_dns_header); dns_header_t *header = (dns_header_t *)p->ast->seq->elements[0]->user; @@ -386,7 +387,6 @@ const HParsedToken* act_message(const HParseResult *p) { } msg->additional = additional; - ret->user = (void*)msg; return ret; } From 342e39eb61bf583d582938ceaad3949c9b0665f5 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Tue, 15 Jan 2013 20:02:53 +0100 Subject: [PATCH 054/125] add helpers for easier token construction and access to sequence elements --- examples/dns.c | 154 +++++++++++++++++++++++++++++++------------------ 1 file changed, 97 insertions(+), 57 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 6db8bf8..01442c8 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -11,9 +11,79 @@ #define false 0 #define true 1 + +/// +// API Additions +/// + #define H_RULE(rule, def) const HParser *rule = def #define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) +// The action equivalent of h_ignore. +const HParsedToken *act_ignore(const HParseResult *p) +{ + return NULL; +} + +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +const HParsedToken *act_index0(const HParseResult *p) +{ + return act_index(0, p); +} + +HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value) { + HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); + ret->token_type = type; + ret->user = value; + return ret; +} + +#define H_MAKE(TYP) \ + ((TYP ## _t *) h_arena_malloc(p->arena, sizeof(TYP ## _t))) + +#define H_MAKE_TOKEN(TYP, VAL) \ + h_make_token(p->arena, TT_ ## TYP, VAL) + +HParsedToken *h_carray_index(const HCountedArray *a, size_t i) { + assert(i < a->used); + return a->elements[i]; +} + +HParsedToken *h_seq_index(const HParsedToken *p, size_t i) { + assert(p->token_type == TT_SEQUENCE); + return h_carray_index(p->seq, i); +} + +void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i) { + HParsedToken *elem = h_seq_index(p, i); + assert(elem->token_type == (HTokenType)type); + return elem->user; +} + +#define H_SEQ_INDEX(TYP, SEQ, IDX) \ + ((TYP ## _t *) h_seq_index_user(TT_ ## TYP, SEQ, IDX)) + +#define H_FIELD(TYP, IDX) \ + H_SEQ_INDEX(TYP, p->ast, IDX) + bool is_zero(HParseResult *p) { if (TT_UINT != p->ast->token_type) @@ -21,6 +91,11 @@ bool is_zero(HParseResult *p) { return (0 == p->ast->uint); } + +/// +// Semantic Actions +/// + /** * Every DNS message should have QDCOUNT entries in the question * section, and ANCOUNT+NSCOUNT+ARCOUNT resource records. @@ -268,10 +343,6 @@ void set_rr(struct dns_rr rr, HCountedArray *rdata) { } const HParsedToken* act_header(const HParseResult *p) { - HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); - ret->token_type = TT_dns_header; - ret->user = h_arena_malloc(p->arena, sizeof(dns_header_t)); - HParsedToken **fields = p->ast->seq->elements; dns_header_t header_ = { .id = fields[0]->uint, @@ -287,16 +358,15 @@ const HParsedToken* act_header(const HParseResult *p) { .authority_count = fields[10]->uint, .additional_count = fields[11]->uint }; - *(dns_header_t *)ret->user = header_; - return ret; + dns_header_t *header = H_MAKE(dns_header); + *header = header_; + + return H_MAKE_TOKEN(dns_header, header); } const HParsedToken* act_label(const HParseResult *p) { - HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); - ret->token_type = TT_dns_label; - ret->user = h_arena_malloc(p->arena, sizeof(dns_label_t)); - dns_label_t *r = (dns_label_t *)ret->user; + dns_label_t *r = H_MAKE(dns_label); r->len = p->ast->seq->used; r->label = h_arena_malloc(p->arena, r->len + 1); @@ -304,41 +374,31 @@ const HParsedToken* act_label(const HParseResult *p) { r->label[i] = p->ast->seq->elements[i]->uint; r->label[r->len] = 0; - return ret; + return H_MAKE_TOKEN(dns_label, r); } const HParsedToken* act_question(const HParseResult *p) { - HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); - ret->token_type = TT_dns_question; - ret->user = h_arena_malloc(p->arena, sizeof(dns_question_t)); - - dns_question_t *q = (dns_question_t *)ret->user; + dns_question_t *q = H_MAKE(dns_question); HParsedToken **fields = p->ast->seq->elements; // QNAME is a sequence of labels. Pack them into an array. q->qname.qlen = fields[0]->seq->used; q->qname.labels = h_arena_malloc(p->arena, sizeof(dns_label_t)*q->qname.qlen); for(size_t i=0; iseq->used; i++) { - assert(fields[0]->seq->elements[i]->token_type == (HTokenType)TT_dns_label); - q->qname.labels[i] = *(dns_label_t *)fields[0]->seq->elements[i]->user; + q->qname.labels[i] = *H_SEQ_INDEX(dns_label, fields[0], i); } q->qtype = fields[1]->uint; q->qclass = fields[2]->uint; - return ret; + return H_MAKE_TOKEN(dns_question, q); } const HParsedToken* act_message(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); + dns_message_t *msg = H_MAKE(dns_message); - HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); - ret->token_type = TT_dns_message; - ret->user = h_arena_malloc(p->arena, sizeof(dns_message_t)); - dns_message_t *msg = ret->user; - - assert(p->ast->seq->elements[0]->token_type == (HTokenType)TT_dns_header); - dns_header_t *header = (dns_header_t *)p->ast->seq->elements[0]->user; + dns_header_t *header = H_FIELD(dns_header, 0); msg->header = *header; HParsedToken *qs = p->ast->seq->elements[1]; @@ -387,42 +447,17 @@ const HParsedToken* act_message(const HParseResult *p) { } msg->additional = additional; - return ret; -} - -// The action equivalent of h_ignore. -const HParsedToken *act_ignore(const HParseResult *p) -{ - return NULL; -} - -// Helper to build HAction's that pick one index out of a sequence. -const HParsedToken *act_index(int i, const HParseResult *p) -{ - if(!p) return NULL; - - const HParsedToken *tok = p->ast; - - if(!tok || tok->token_type != TT_SEQUENCE) - return NULL; - - const HCountedArray *seq = tok->seq; - size_t n = seq->used; - - if(i<0 || (size_t)i>=n) - return NULL; - else - return tok->seq->elements[i]; -} - -const HParsedToken *act_index0(const HParseResult *p) -{ - return act_index(0, p); + return H_MAKE_TOKEN(dns_message, msg); } #define act_hdzero act_ignore #define act_qname act_index0 + +/// +// Parser / Grammar +/// + const HParser* init_parser() { static const HParser *ret = NULL; if (ret) @@ -476,6 +511,11 @@ const HParser* init_parser() { return ret; } + +/// +// Program Logic for a Dummy DNS Server +/// + int start_listening() { // return: fd int sock; From 83f494432133243fc8908a74af4fec6fa91a2721 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Tue, 15 Jan 2013 22:12:13 +0100 Subject: [PATCH 055/125] separate parsing and packing of rdata --- examples/dns.c | 234 +++++++++++++++++-------------------------------- 1 file changed, 82 insertions(+), 152 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 01442c8..e38a2b4 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -167,176 +167,106 @@ void set_rr(struct dns_rr rr, HCountedArray *rdata) { for (size_t i=0; iused; ++i) data[i] = rdata->elements[i]->uint; + // Mapping numeric RR types (as indices) to parsers + const HParser *parsers[] = { + NULL, // there is no type 0 + init_a(), // 1 + init_ns(), + init_md(), + init_mf(), + init_cname(), // 5 + init_soa(), + init_mb(), + init_mg(), + init_mr(), + init_null(), // 10 + init_wks(), + init_ptr(), + init_hinfo(), + init_minfo(), + init_mx(), // 15 + init_txt() + }; + + // Parse rdata if possible. + const HParseResult *r = NULL; + if (rr.type < sizeof(parsers)) { + const HParser *p = parsers[rr.type]; + if (p) + r = h_parse(p, (const uint8_t*)data, rdata->used); + } + // If the RR doesn't parse, set its type to 0. + if (!r) + rr.type = 0; + + // Pack the parsed rdata into rr. switch(rr.type) { case 1: // A - { - const HParseResult *r = h_parse(init_a(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.a = r->ast->seq->elements[0]->uint; - break; - } + rr.a = r->ast->seq->elements[0]->uint; + break; case 2: // NS - { - const HParseResult *r = h_parse(init_ns(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.ns = get_domain(r->ast->seq->elements[0]); - break; - } + rr.ns = get_domain(r->ast->seq->elements[0]); + break; case 3: // MD - { - const HParseResult *r = h_parse(init_md(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.md = get_domain(r->ast->seq->elements[0]); - break; - } + rr.md = get_domain(r->ast->seq->elements[0]); + break; case 4: // MF - { - const HParseResult *r = h_parse(init_mf(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.md = get_domain(r->ast->seq->elements[0]); - break; - } + rr.md = get_domain(r->ast->seq->elements[0]); + break; case 5: // CNAME - { - const HParseResult *r = h_parse(init_cname(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.cname = get_domain(r->ast->seq->elements[0]); - break; - } + rr.cname = get_domain(r->ast->seq->elements[0]); + break; case 6: // SOA - { - const HParseResult *r = h_parse(init_soa(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.soa.mname = get_domain(r->ast->seq->elements[0]); - rr.soa.rname = get_domain(r->ast->seq->elements[1]); - rr.soa.serial = r->ast->seq->elements[2]->uint; - rr.soa.refresh = r->ast->seq->elements[3]->uint; - rr.soa.retry = r->ast->seq->elements[4]->uint; - rr.soa.expire = r->ast->seq->elements[5]->uint; - rr.soa.minimum = r->ast->seq->elements[6]->uint; - } - break; - } + rr.soa.mname = get_domain(r->ast->seq->elements[0]); + rr.soa.rname = get_domain(r->ast->seq->elements[1]); + rr.soa.serial = r->ast->seq->elements[2]->uint; + rr.soa.refresh = r->ast->seq->elements[3]->uint; + rr.soa.retry = r->ast->seq->elements[4]->uint; + rr.soa.expire = r->ast->seq->elements[5]->uint; + rr.soa.minimum = r->ast->seq->elements[6]->uint; + break; case 7: // MB - { - const HParseResult *r = h_parse(init_mb(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.mb = get_domain(r->ast->seq->elements[0]); - break; - } + rr.mb = get_domain(r->ast->seq->elements[0]); + break; case 8: // MG - { - const HParseResult *r = h_parse(init_mg(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.mg = get_domain(r->ast->seq->elements[0]); - break; - } + rr.mg = get_domain(r->ast->seq->elements[0]); + break; case 9: // MR - { - const HParseResult *r = h_parse(init_mr(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.mr = get_domain(r->ast->seq->elements[0]); - break; - } + rr.mr = get_domain(r->ast->seq->elements[0]); + break; case 10: // NULL - { - const HParseResult *r = h_parse(init_null(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->used); - for (size_t i=0; iast->seq->used; ++i) - rr.null[i] = r->ast->seq->elements[i]->uint; - } - break; - } + rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->used); + for (size_t i=0; iast->seq->used; ++i) + rr.null[i] = r->ast->seq->elements[i]->uint; + break; case 11: // WKS - { - const HParseResult *r = h_parse(init_wks(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.wks.address = r->ast->seq->elements[0]->uint; - rr.wks.protocol = r->ast->seq->elements[1]->uint; - rr.wks.len = r->ast->seq->elements[2]->seq->used; - rr.wks.bit_map = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->elements[2]->seq->used); - for (size_t i=0; iast->seq->elements[2]->seq->elements[i]->uint; - } - break; - } + rr.wks.address = r->ast->seq->elements[0]->uint; + rr.wks.protocol = r->ast->seq->elements[1]->uint; + rr.wks.len = r->ast->seq->elements[2]->seq->used; + rr.wks.bit_map = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->elements[2]->seq->used); + for (size_t i=0; iast->seq->elements[2]->seq->elements[i]->uint; + break; case 12: // PTR - { - const HParseResult *r = h_parse(init_ptr(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.ptr = get_domain(r->ast->seq->elements[0]); - break; - } + rr.ptr = get_domain(r->ast->seq->elements[0]); + break; case 13: // HINFO - { - const HParseResult *r = h_parse(init_hinfo(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.hinfo.cpu = get_cs(r->ast->seq->elements[0]->seq); - rr.hinfo.os = get_cs(r->ast->seq->elements[1]->seq); - } - break; - } + rr.hinfo.cpu = get_cs(r->ast->seq->elements[0]->seq); + rr.hinfo.os = get_cs(r->ast->seq->elements[1]->seq); + break; case 14: // MINFO - { - const HParseResult *r = h_parse(init_minfo(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.minfo.rmailbx = get_domain(r->ast->seq->elements[0]); - rr.minfo.emailbx = get_domain(r->ast->seq->elements[1]); - } - break; - } + rr.minfo.rmailbx = get_domain(r->ast->seq->elements[0]); + rr.minfo.emailbx = get_domain(r->ast->seq->elements[1]); + break; case 15: // MX - { - const HParseResult *r = h_parse(init_mx(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.mx.preference = r->ast->seq->elements[0]->uint; - rr.mx.exchange = get_domain(r->ast->seq->elements[1]); - } - break; - } + rr.mx.preference = r->ast->seq->elements[0]->uint; + rr.mx.exchange = get_domain(r->ast->seq->elements[1]); + break; case 16: // TXT - { - const HParseResult *r = h_parse(init_txt(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.txt.count = r->ast->seq->elements[0]->seq->used; - rr.txt.txt_data = get_txt(r->ast->seq->elements[0]->seq); - } - break; - } + rr.txt.count = r->ast->seq->elements[0]->seq->used; + rr.txt.txt_data = get_txt(r->ast->seq->elements[0]->seq); + break; default: break; } From 9c10a75f3d7e7da1f5908f09f3bbb498eb189ba6 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Tue, 15 Jan 2013 22:18:33 +0100 Subject: [PATCH 056/125] use H_SEQ_INDEX to access a sequence --- examples/dns.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index e38a2b4..d786d59 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -335,8 +335,7 @@ const HParsedToken* act_message(const HParseResult *p) { struct dns_question *questions = h_arena_malloc(p->arena, sizeof(struct dns_question)*(header->question_count)); for (size_t i=0; iquestion_count; ++i) { - assert(qs->seq->elements[i]->token_type == (HTokenType)TT_dns_question); - questions[i] = *(dns_question_t *)qs->seq->elements[i]->user; + questions[i] = *H_SEQ_INDEX(dns_question, qs, i); } msg->questions = questions; From 4b30ebdb772ec288ffaee89fb1269cc75c332b69 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Tue, 15 Jan 2013 22:34:15 +0100 Subject: [PATCH 057/125] pull out struct types of complex rdata fields --- examples/dns.h | 94 ++++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 41 deletions(-) diff --git a/examples/dns.h b/examples/dns.h index 81bed55..672e3b7 100644 --- a/examples/dns.h +++ b/examples/dns.h @@ -35,6 +35,43 @@ typedef struct dns_question { uint16_t qclass; } dns_question_t; +typedef struct { + uint8_t* cpu; + uint8_t* os; +} dns_rr_hinfo_t; + +typedef struct { + char* rmailbx; + char* emailbx; +} dns_rr_minfo_t; + +typedef struct { + uint16_t preference; + char* exchange; +} dns_rr_mx_t; + +typedef struct { + char* mname; + char* rname; + uint32_t serial; + uint32_t refresh; + uint32_t retry; + uint32_t expire; + uint32_t minimum; +} dns_rr_soa_t; + +typedef struct { + size_t count; + uint8_t** txt_data; +} dns_rr_txt_t; + +typedef struct { + uint32_t address; + uint8_t protocol; + size_t len; + uint8_t* bit_map; +} dns_rr_wks_t; + typedef struct dns_rr { char* name; uint16_t type; @@ -42,47 +79,22 @@ typedef struct dns_rr { uint32_t ttl; // cmos is also acceptable. uint16_t rdlength; union { - char* cname; - struct { - uint8_t* cpu; - uint8_t* os; - } hinfo; - char* mb; - char* md; - char* mf; - char* mg; - struct { - char* rmailbx; - char* emailbx; - } minfo; - char* mr; - struct { - uint16_t preference; - char* exchange; - } mx; - uint8_t* null; - char* ns; - char* ptr; - struct { - char* mname; - char* rname; - uint32_t serial; - uint32_t refresh; - uint32_t retry; - uint32_t expire; - uint32_t minimum; - } soa; - struct { - size_t count; - uint8_t** txt_data; - } txt; - uint32_t a; - struct { - uint32_t address; - uint8_t protocol; - size_t len; - uint8_t* bit_map; - } wks; + uint32_t a; + char* ns; + char* md; + char* mf; + char* cname; + dns_rr_soa_t soa; + char* mb; + char* mg; + char* mr; + uint8_t* null; + dns_rr_wks_t wks; + char* ptr; + dns_rr_hinfo_t hinfo; + dns_rr_minfo_t minfo; + dns_rr_mx_t mx; + dns_rr_txt_t txt; }; } dns_rr_t; From 341254137052b4a33e151bde3c2036d8e2a63c42 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 16 Jan 2013 15:05:04 +0100 Subject: [PATCH 058/125] move api additions to glue.[ch] --- examples/Makefile | 5 ++- examples/dns.c | 76 +------------------------------------------ examples/dns_common.c | 1 + examples/dns_common.h | 1 + examples/glue.c | 65 ++++++++++++++++++++++++++++++++++++ examples/glue.h | 36 ++++++++++++++++++++ 6 files changed, 106 insertions(+), 78 deletions(-) create mode 100644 examples/glue.c create mode 100644 examples/glue.h diff --git a/examples/Makefile b/examples/Makefile index 6a054ca..786af44 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -15,14 +15,13 @@ LDFLAGS += $(pkg-config --libs glib-2.0) all: dns base64 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) -dns: dns.o rr.o dns_common.o +dns: dns.o rr.o dns_common.o glue.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) dns.o: ../src/hammer.h dns_common.h - rr.o: ../src/hammer.h rr.h dns_common.h - dns_common.o: ../src/hammer.h dns_common.h +glue.o: ../src/hammer.h glue.h base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS) base64: base64.o diff --git a/examples/dns.c b/examples/dns.c index d786d59..b513a51 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -13,89 +13,15 @@ /// -// API Additions +// Semantic Actions and Validations /// -#define H_RULE(rule, def) const HParser *rule = def -#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) - -// The action equivalent of h_ignore. -const HParsedToken *act_ignore(const HParseResult *p) -{ - return NULL; -} - -// Helper to build HAction's that pick one index out of a sequence. -const HParsedToken *act_index(int i, const HParseResult *p) -{ - if(!p) return NULL; - - const HParsedToken *tok = p->ast; - - if(!tok || tok->token_type != TT_SEQUENCE) - return NULL; - - const HCountedArray *seq = tok->seq; - size_t n = seq->used; - - if(i<0 || (size_t)i>=n) - return NULL; - else - return tok->seq->elements[i]; -} - -const HParsedToken *act_index0(const HParseResult *p) -{ - return act_index(0, p); -} - -HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value) { - HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); - ret->token_type = type; - ret->user = value; - return ret; -} - -#define H_MAKE(TYP) \ - ((TYP ## _t *) h_arena_malloc(p->arena, sizeof(TYP ## _t))) - -#define H_MAKE_TOKEN(TYP, VAL) \ - h_make_token(p->arena, TT_ ## TYP, VAL) - -HParsedToken *h_carray_index(const HCountedArray *a, size_t i) { - assert(i < a->used); - return a->elements[i]; -} - -HParsedToken *h_seq_index(const HParsedToken *p, size_t i) { - assert(p->token_type == TT_SEQUENCE); - return h_carray_index(p->seq, i); -} - -void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i) { - HParsedToken *elem = h_seq_index(p, i); - assert(elem->token_type == (HTokenType)type); - return elem->user; -} - -#define H_SEQ_INDEX(TYP, SEQ, IDX) \ - ((TYP ## _t *) h_seq_index_user(TT_ ## TYP, SEQ, IDX)) - -#define H_FIELD(TYP, IDX) \ - H_SEQ_INDEX(TYP, p->ast, IDX) - - bool is_zero(HParseResult *p) { if (TT_UINT != p->ast->token_type) return false; return (0 == p->ast->uint); } - -/// -// Semantic Actions -/// - /** * Every DNS message should have QDCOUNT entries in the question * section, and ANCOUNT+NSCOUNT+ARCOUNT resource records. diff --git a/examples/dns_common.c b/examples/dns_common.c index 3d349f1..5bd2374 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -13,6 +13,7 @@ bool validate_label(HParseResult *p) { return (64 > p->ast->seq->used); } + const HParser* init_domain() { static const HParser *domain = NULL; if (domain) diff --git a/examples/dns_common.h b/examples/dns_common.h index 41d73f0..2d796f8 100644 --- a/examples/dns_common.h +++ b/examples/dns_common.h @@ -2,6 +2,7 @@ #define HAMMER_DNS_COMMON__H #include "../src/hammer.h" +#include "glue.h" const HParser* init_domain(); const HParser* init_character_string(); diff --git a/examples/glue.c b/examples/glue.c new file mode 100644 index 0000000..a438b17 --- /dev/null +++ b/examples/glue.c @@ -0,0 +1,65 @@ +#include "glue.h" + + +// The action equivalent of h_ignore. +const HParsedToken *act_ignore(const HParseResult *p) +{ + return NULL; +} + +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +const HParsedToken *act_index0(const HParseResult *p) +{ + return act_index(0, p); +} + +HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value) +{ + HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); + ret->token_type = type; + ret->user = value; + return ret; +} + +#define H_MAKE(TYP) \ + ((TYP ## _t *) h_arena_malloc(p->arena, sizeof(TYP ## _t))) + +#define H_MAKE_TOKEN(TYP, VAL) \ + h_make_token(p->arena, TT_ ## TYP, VAL) + +HParsedToken *h_carray_index(const HCountedArray *a, size_t i) +{ + assert(i < a->used); + return a->elements[i]; +} + +HParsedToken *h_seq_index(const HParsedToken *p, size_t i) +{ + assert(p->token_type == TT_SEQUENCE); + return h_carray_index(p->seq, i); +} + +void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i) +{ + HParsedToken *elem = h_seq_index(p, i); + assert(elem->token_type == (HTokenType)type); + return elem->user; +} diff --git a/examples/glue.h b/examples/glue.h new file mode 100644 index 0000000..d8776b3 --- /dev/null +++ b/examples/glue.h @@ -0,0 +1,36 @@ +#ifndef HAMMER_EXAMPLES_GLUE__H +#define HAMMER_EXAMPLES_GLUE__H + +#include +#include "../src/hammer.h" + +/// +// API Additions +/// + +#define H_RULE(rule, def) const HParser *rule = def +#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) + +const HParsedToken *act_ignore(const HParseResult *p); +const HParsedToken *act_index(int i, const HParseResult *p); +const HParsedToken *act_index0(const HParseResult *p); + +HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value); + +#define H_MAKE(TYP) \ + ((TYP ## _t *) h_arena_malloc(p->arena, sizeof(TYP ## _t))) + +#define H_MAKE_TOKEN(TYP, VAL) \ + h_make_token(p->arena, TT_ ## TYP, VAL) + +HParsedToken *h_carray_index(const HCountedArray *a, size_t i); +HParsedToken *h_seq_index(const HParsedToken *p, size_t i); +void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i); + +#define H_SEQ_INDEX(TYP, SEQ, IDX) \ + ((TYP ## _t *) h_seq_index_user(TT_ ## TYP, SEQ, IDX)) + +#define H_FIELD(TYP, IDX) \ + H_SEQ_INDEX(TYP, p->ast, IDX) + +#endif From a38d2e4ab0cc1fd3c274af80216412669d6f3101 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 16 Jan 2013 15:06:51 +0100 Subject: [PATCH 059/125] replace another field access with a glue macro --- examples/dns.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index b513a51..fb2b148 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -2,7 +2,6 @@ #include #include #include -#include #include "../src/hammer.h" #include "dns_common.h" #include "dns.h" @@ -29,8 +28,7 @@ bool is_zero(HParseResult *p) { bool validate_dns(HParseResult *p) { if (TT_SEQUENCE != p->ast->token_type) return false; - assert(p->ast->seq->elements[0]->token_type == (HTokenType)TT_dns_header); - dns_header_t *header = (dns_header_t *)p->ast->seq->elements[0]->user; + dns_header_t *header = H_FIELD(dns_header, 0); size_t qd = header->question_count; size_t an = header->answer_count; size_t ns = header->authority_count; From aa295f1536d195f9a030026626c1c215ebf3b6a3 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 16 Jan 2013 15:40:10 +0100 Subject: [PATCH 060/125] rewrite dns domain grammar using H_RULE --- examples/dns_common.c | 58 ++++++++++++------------------------------- 1 file changed, 16 insertions(+), 42 deletions(-) diff --git a/examples/dns_common.c b/examples/dns_common.c index 5bd2374..0b07cf8 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -15,50 +15,24 @@ bool validate_label(HParseResult *p) { const HParser* init_domain() { - static const HParser *domain = NULL; - if (domain) - return domain; + static const HParser *ret = NULL; + if (ret) + return ret; - const HParser *letter = h_choice(h_ch_range('a', 'z'), - h_ch_range('A', 'Z'), - NULL); + H_RULE (letter, h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL)); + H_RULE (let_dig, h_choice(letter, h_ch_range('0','9'), NULL)); + H_RULE (ldh_str, h_many1(h_choice(let_dig, h_ch('-'), NULL))); + H_RULE (label, h_attr_bool(h_sequence(letter, + h_optional(h_sequence(h_optional(ldh_str), + let_dig, + NULL)), + NULL), + validate_label)); + H_RULE (subdomain, h_sepBy1(label, h_ch('.'))); + H_RULE (domain, h_choice(subdomain, h_ch(' '), NULL)); - const HParser *let_dig = h_choice(letter, - h_ch_range('0', '9'), - NULL); - - const HParser *ldh_str = h_many1(h_choice(let_dig, - h_ch('-'), - NULL)); - - const HParser *label = h_attr_bool(h_sequence(letter, - h_optional(h_sequence(h_optional(ldh_str), - let_dig, - NULL)), - NULL), - validate_label); - - /** - * You could write it like this ... - * HParser *indirect_subdomain = h_indirect(); - * const HParser *subdomain = h_choice(label, - * h_sequence(indirect_subdomain, - * h_ch('.'), - * label, - * NULL), - * NULL); - * h_bind_indirect(indirect_subdomain, subdomain); - * - * ... but this is easier and equivalent - */ - - const HParser *subdomain = h_sepBy1(label, h_ch('.')); - - domain = h_choice(subdomain, - h_ch(' '), - NULL); - - return domain; + ret = domain; + return ret; } const HParser* init_character_string() { From 16b1e02baaaa0e14f2bc048466e0870043ab21ef Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 16 Jan 2013 16:03:20 +0100 Subject: [PATCH 061/125] add an assertion to h_seq_index --- examples/glue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/glue.c b/examples/glue.c index a438b17..dd6d1b2 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -53,6 +53,7 @@ HParsedToken *h_carray_index(const HCountedArray *a, size_t i) HParsedToken *h_seq_index(const HParsedToken *p, size_t i) { + assert(p != NULL); assert(p->token_type == TT_SEQUENCE); return h_carray_index(p->seq, i); } From 0083031d6f3fcdf0a1ad84cdeb899c11dd02860b Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 16 Jan 2013 16:08:30 +0100 Subject: [PATCH 062/125] move get_domain logic into an action on the domain parser --- examples/dns.c | 27 +++------------------------ examples/dns.h | 5 ++++- examples/dns_common.c | 29 ++++++++++++++++++++++++++++- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index fb2b148..5904616 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -43,30 +43,9 @@ bool validate_dns(HParseResult *p) { } char* get_domain(const HParsedToken *t) { - switch(t->token_type) { - case TT_UINT: - return " "; - case TT_SEQUENCE: - { - // Sequence of subdomains separated by "." - // Each subdomain is a label, which can be no more than 63 chars. - char *ret = h_arena_malloc(t->seq->arena, 64*t->seq->used); - size_t count = 0; - for (size_t i=0; iseq->used; ++i) { - HParsedToken *tmp = t->seq->elements[i]; - for (size_t j=0; jseq->used; ++j) { - ret[count] = tmp->seq->elements[i]->uint; - ++count; - } - ret[count] = '.'; - ++count; - } - ret[count-1] = '\x00'; - return ret; - } - default: - return NULL; - } + assert(t != NULL); + assert(t->token_type == (HTokenType)TT_dns_domain); + return t->user; } uint8_t* get_cs(const HCountedArray *arr) { diff --git a/examples/dns.h b/examples/dns.h index 672e3b7..913b186 100644 --- a/examples/dns.h +++ b/examples/dns.h @@ -6,7 +6,8 @@ enum DNSTokenType_ { TT_dns_label, TT_dns_qname, TT_dns_question, - TT_dns_rr + TT_dns_rr, + TT_dns_domain }; typedef struct dns_header { @@ -98,6 +99,8 @@ typedef struct dns_rr { }; } dns_rr_t; +typedef char *dns_domain_t; + typedef struct dns_message { dns_header_t header; dns_question_t *questions; diff --git a/examples/dns_common.c b/examples/dns_common.c index 0b07cf8..d25abe2 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -1,5 +1,6 @@ #include "../src/hammer.h" #include "dns_common.h" +#include "dns.h" #define false 0 #define true 1 @@ -13,6 +14,32 @@ bool validate_label(HParseResult *p) { return (64 > p->ast->seq->used); } +const HParsedToken* act_domain(const HParseResult *p) { + switch(p->ast->token_type) { + case TT_UINT: + return H_MAKE_TOKEN(dns_domain, " "); + case TT_SEQUENCE: + { + // Sequence of subdomains separated by "." + // Each subdomain is a label, which can be no more than 63 chars. + char *ret = h_arena_malloc(p->arena, 64*p->ast->seq->used); + size_t count = 0; + for (size_t i=0; iast->seq->used; ++i) { + HParsedToken *tmp = p->ast->seq->elements[i]; + for (size_t j=0; jseq->used; ++j) { + ret[count] = tmp->seq->elements[i]->uint; + ++count; + } + ret[count] = '.'; + ++count; + } + ret[count-1] = '\x00'; + return H_MAKE_TOKEN(dns_domain, ret); + } + default: + return NULL; + } +} const HParser* init_domain() { static const HParser *ret = NULL; @@ -29,7 +56,7 @@ const HParser* init_domain() { NULL), validate_label)); H_RULE (subdomain, h_sepBy1(label, h_ch('.'))); - H_RULE (domain, h_choice(subdomain, h_ch(' '), NULL)); + H_ARULE(domain, h_choice(subdomain, h_ch(' '), NULL)); ret = domain; return ret; From e54c5236ff6d5c34e19939d2844331a2a511adf4 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 16 Jan 2013 16:31:46 +0100 Subject: [PATCH 063/125] replace get_domain calls in set_rr with H_FIELD --- examples/dns.c | 74 +++++++++++++++++++++---------------------- examples/dns_common.c | 43 +++++++++++++++---------- 2 files changed, 64 insertions(+), 53 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 5904616..6acf5af 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -92,83 +92,83 @@ void set_rr(struct dns_rr rr, HCountedArray *rdata) { }; // Parse rdata if possible. - const HParseResult *r = NULL; + const HParseResult *p = NULL; if (rr.type < sizeof(parsers)) { - const HParser *p = parsers[rr.type]; - if (p) - r = h_parse(p, (const uint8_t*)data, rdata->used); + const HParser *parser = parsers[rr.type]; + if (parser) + p = h_parse(parser, (const uint8_t*)data, rdata->used); } // If the RR doesn't parse, set its type to 0. - if (!r) + if (!p) rr.type = 0; // Pack the parsed rdata into rr. switch(rr.type) { case 1: // A - rr.a = r->ast->seq->elements[0]->uint; + rr.a = p->ast->seq->elements[0]->uint; break; case 2: // NS - rr.ns = get_domain(r->ast->seq->elements[0]); + rr.ns = *H_FIELD(dns_domain, 0); break; case 3: // MD - rr.md = get_domain(r->ast->seq->elements[0]); + rr.md = *H_FIELD(dns_domain, 0); break; case 4: // MF - rr.md = get_domain(r->ast->seq->elements[0]); + rr.md = *H_FIELD(dns_domain, 0); break; case 5: // CNAME - rr.cname = get_domain(r->ast->seq->elements[0]); + rr.cname = *H_FIELD(dns_domain, 0); break; case 6: // SOA - rr.soa.mname = get_domain(r->ast->seq->elements[0]); - rr.soa.rname = get_domain(r->ast->seq->elements[1]); - rr.soa.serial = r->ast->seq->elements[2]->uint; - rr.soa.refresh = r->ast->seq->elements[3]->uint; - rr.soa.retry = r->ast->seq->elements[4]->uint; - rr.soa.expire = r->ast->seq->elements[5]->uint; - rr.soa.minimum = r->ast->seq->elements[6]->uint; + rr.soa.mname = *H_FIELD(dns_domain, 0); + rr.soa.rname = *H_FIELD(dns_domain, 1); + rr.soa.serial = p->ast->seq->elements[2]->uint; + rr.soa.refresh = p->ast->seq->elements[3]->uint; + rr.soa.retry = p->ast->seq->elements[4]->uint; + rr.soa.expire = p->ast->seq->elements[5]->uint; + rr.soa.minimum = p->ast->seq->elements[6]->uint; break; case 7: // MB - rr.mb = get_domain(r->ast->seq->elements[0]); + rr.mb = *H_FIELD(dns_domain, 0); break; case 8: // MG - rr.mg = get_domain(r->ast->seq->elements[0]); + rr.mg = *H_FIELD(dns_domain, 0); break; case 9: // MR - rr.mr = get_domain(r->ast->seq->elements[0]); + rr.mr = *H_FIELD(dns_domain, 0); break; case 10: // NULL - rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->used); - for (size_t i=0; iast->seq->used; ++i) - rr.null[i] = r->ast->seq->elements[i]->uint; + rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*p->ast->seq->used); + for (size_t i=0; iast->seq->used; ++i) + rr.null[i] = p->ast->seq->elements[i]->uint; break; case 11: // WKS - rr.wks.address = r->ast->seq->elements[0]->uint; - rr.wks.protocol = r->ast->seq->elements[1]->uint; - rr.wks.len = r->ast->seq->elements[2]->seq->used; - rr.wks.bit_map = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->elements[2]->seq->used); + rr.wks.address = p->ast->seq->elements[0]->uint; + rr.wks.protocol = p->ast->seq->elements[1]->uint; + rr.wks.len = p->ast->seq->elements[2]->seq->used; + rr.wks.bit_map = h_arena_malloc(rdata->arena, sizeof(uint8_t)*p->ast->seq->elements[2]->seq->used); for (size_t i=0; iast->seq->elements[2]->seq->elements[i]->uint; + rr.wks.bit_map[i] = p->ast->seq->elements[2]->seq->elements[i]->uint; break; case 12: // PTR - rr.ptr = get_domain(r->ast->seq->elements[0]); + rr.ptr = *H_FIELD(dns_domain, 0); break; case 13: // HINFO - rr.hinfo.cpu = get_cs(r->ast->seq->elements[0]->seq); - rr.hinfo.os = get_cs(r->ast->seq->elements[1]->seq); + rr.hinfo.cpu = get_cs(p->ast->seq->elements[0]->seq); + rr.hinfo.os = get_cs(p->ast->seq->elements[1]->seq); break; case 14: // MINFO - rr.minfo.rmailbx = get_domain(r->ast->seq->elements[0]); - rr.minfo.emailbx = get_domain(r->ast->seq->elements[1]); + rr.minfo.rmailbx = *H_FIELD(dns_domain, 0); + rr.minfo.emailbx = *H_FIELD(dns_domain, 1); break; case 15: // MX - rr.mx.preference = r->ast->seq->elements[0]->uint; - rr.mx.exchange = get_domain(r->ast->seq->elements[1]); + rr.mx.preference = p->ast->seq->elements[0]->uint; + rr.mx.exchange = *H_FIELD(dns_domain, 1); break; case 16: // TXT - rr.txt.count = r->ast->seq->elements[0]->seq->used; - rr.txt.txt_data = get_txt(r->ast->seq->elements[0]->seq); + rr.txt.count = p->ast->seq->elements[0]->seq->used; + rr.txt.txt_data = get_txt(p->ast->seq->elements[0]->seq); break; default: break; diff --git a/examples/dns_common.c b/examples/dns_common.c index d25abe2..f05a3e6 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -15,30 +15,41 @@ bool validate_label(HParseResult *p) { } const HParsedToken* act_domain(const HParseResult *p) { + const HParsedToken *ret = NULL; + char *arr = NULL; + switch(p->ast->token_type) { case TT_UINT: - return H_MAKE_TOKEN(dns_domain, " "); + arr = " "; + break; case TT_SEQUENCE: - { - // Sequence of subdomains separated by "." - // Each subdomain is a label, which can be no more than 63 chars. - char *ret = h_arena_malloc(p->arena, 64*p->ast->seq->used); - size_t count = 0; - for (size_t i=0; iast->seq->used; ++i) { - HParsedToken *tmp = p->ast->seq->elements[i]; - for (size_t j=0; jseq->used; ++j) { - ret[count] = tmp->seq->elements[i]->uint; - ++count; - } - ret[count] = '.'; + // Sequence of subdomains separated by "." + // Each subdomain is a label, which can be no more than 63 chars. + arr = h_arena_malloc(p->arena, 64*p->ast->seq->used); + size_t count = 0; + for (size_t i=0; iast->seq->used; ++i) { + HParsedToken *tmp = p->ast->seq->elements[i]; + for (size_t j=0; jseq->used; ++j) { + arr[count] = tmp->seq->elements[i]->uint; ++count; } - ret[count-1] = '\x00'; - return H_MAKE_TOKEN(dns_domain, ret); + arr[count] = '.'; + ++count; } + arr[count-1] = '\x00'; + break; default: - return NULL; + arr = NULL; + ret = NULL; } + + if(arr) { + dns_domain_t *val = H_MAKE(dns_domain); // dns_domain_t is char* + *val = arr; + ret = H_MAKE_TOKEN(dns_domain, val); + } + + return ret; } const HParser* init_domain() { From ccd304ddd7f8996db57e411d05c524c9c66fa3d6 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 16 Jan 2013 16:36:23 +0100 Subject: [PATCH 064/125] replace remaining get_domain calls and remove get_domain --- examples/dns.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 6acf5af..dc10832 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -42,12 +42,6 @@ bool validate_dns(HParseResult *p) { return true; } -char* get_domain(const HParsedToken *t) { - assert(t != NULL); - assert(t->token_type == (HTokenType)TT_dns_domain); - return t->user; -} - uint8_t* get_cs(const HCountedArray *arr) { uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used); for (size_t i=0; iused; ++i) @@ -246,7 +240,7 @@ const HParsedToken* act_message(const HParseResult *p) { struct dns_rr *answers = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->answer_count)); for (size_t i=0; ianswer_count; ++i) { - answers[i].name = get_domain(rrs[i].seq->elements[0]); + answers[i].name = *H_SEQ_INDEX(dns_domain, rrs+i, 0); answers[i].type = rrs[i].seq->elements[1]->uint; answers[i].class = rrs[i].seq->elements[2]->uint; answers[i].ttl = rrs[i].seq->elements[3]->uint; @@ -258,7 +252,7 @@ const HParsedToken* act_message(const HParseResult *p) { struct dns_rr *authority = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->authority_count)); for (size_t i=0, j=header->answer_count; iauthority_count; ++i, ++j) { - authority[i].name = get_domain(rrs[j].seq->elements[0]); + authority[i].name = *H_SEQ_INDEX(dns_domain, rrs+j, 0); authority[i].type = rrs[j].seq->elements[1]->uint; authority[i].class = rrs[j].seq->elements[2]->uint; authority[i].ttl = rrs[j].seq->elements[3]->uint; @@ -270,7 +264,7 @@ const HParsedToken* act_message(const HParseResult *p) { struct dns_rr *additional = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->additional_count)); for (size_t i=0, j=header->answer_count+header->authority_count; iadditional_count; ++i, ++j) { - additional[i].name = get_domain(rrs[j].seq->elements[0]); + additional[i].name = *H_SEQ_INDEX(dns_domain, rrs+j, 0); additional[i].type = rrs[j].seq->elements[1]->uint; additional[i].class = rrs[j].seq->elements[2]->uint; additional[i].ttl = rrs[j].seq->elements[3]->uint; From ac3ad6d690febd5ee35e4d17b0c81dd60e5e239c Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 16 Jan 2013 16:40:49 +0100 Subject: [PATCH 065/125] remove left-over definitions that have moved to glue.h --- examples/glue.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/examples/glue.c b/examples/glue.c index dd6d1b2..767f853 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -39,12 +39,6 @@ HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value) return ret; } -#define H_MAKE(TYP) \ - ((TYP ## _t *) h_arena_malloc(p->arena, sizeof(TYP ## _t))) - -#define H_MAKE_TOKEN(TYP, VAL) \ - h_make_token(p->arena, TT_ ## TYP, VAL) - HParsedToken *h_carray_index(const HCountedArray *a, size_t i) { assert(i < a->used); From 4f1e9ad22d0d5144ecd3cc9bde6d4aca66512538 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 16 Jan 2013 16:41:23 +0100 Subject: [PATCH 066/125] don't add _t type suffix inside H_ macros --- examples/dns.c | 56 ++++++++++++++++++++++++------------------------- examples/dns.h | 14 ++++++------- examples/glue.h | 4 ++-- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index dc10832..026287c 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -28,7 +28,7 @@ bool is_zero(HParseResult *p) { bool validate_dns(HParseResult *p) { if (TT_SEQUENCE != p->ast->token_type) return false; - dns_header_t *header = H_FIELD(dns_header, 0); + dns_header_t *header = H_FIELD(dns_header_t, 0); size_t qd = header->question_count; size_t an = header->answer_count; size_t ns = header->authority_count; @@ -103,20 +103,20 @@ void set_rr(struct dns_rr rr, HCountedArray *rdata) { rr.a = p->ast->seq->elements[0]->uint; break; case 2: // NS - rr.ns = *H_FIELD(dns_domain, 0); + rr.ns = *H_FIELD(dns_domain_t, 0); break; case 3: // MD - rr.md = *H_FIELD(dns_domain, 0); + rr.md = *H_FIELD(dns_domain_t, 0); break; case 4: // MF - rr.md = *H_FIELD(dns_domain, 0); + rr.md = *H_FIELD(dns_domain_t, 0); break; case 5: // CNAME - rr.cname = *H_FIELD(dns_domain, 0); + rr.cname = *H_FIELD(dns_domain_t, 0); break; case 6: // SOA - rr.soa.mname = *H_FIELD(dns_domain, 0); - rr.soa.rname = *H_FIELD(dns_domain, 1); + rr.soa.mname = *H_FIELD(dns_domain_t, 0); + rr.soa.rname = *H_FIELD(dns_domain_t, 1); rr.soa.serial = p->ast->seq->elements[2]->uint; rr.soa.refresh = p->ast->seq->elements[3]->uint; rr.soa.retry = p->ast->seq->elements[4]->uint; @@ -124,13 +124,13 @@ void set_rr(struct dns_rr rr, HCountedArray *rdata) { rr.soa.minimum = p->ast->seq->elements[6]->uint; break; case 7: // MB - rr.mb = *H_FIELD(dns_domain, 0); + rr.mb = *H_FIELD(dns_domain_t, 0); break; case 8: // MG - rr.mg = *H_FIELD(dns_domain, 0); + rr.mg = *H_FIELD(dns_domain_t, 0); break; case 9: // MR - rr.mr = *H_FIELD(dns_domain, 0); + rr.mr = *H_FIELD(dns_domain_t, 0); break; case 10: // NULL rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*p->ast->seq->used); @@ -146,19 +146,19 @@ void set_rr(struct dns_rr rr, HCountedArray *rdata) { rr.wks.bit_map[i] = p->ast->seq->elements[2]->seq->elements[i]->uint; break; case 12: // PTR - rr.ptr = *H_FIELD(dns_domain, 0); + rr.ptr = *H_FIELD(dns_domain_t, 0); break; case 13: // HINFO rr.hinfo.cpu = get_cs(p->ast->seq->elements[0]->seq); rr.hinfo.os = get_cs(p->ast->seq->elements[1]->seq); break; case 14: // MINFO - rr.minfo.rmailbx = *H_FIELD(dns_domain, 0); - rr.minfo.emailbx = *H_FIELD(dns_domain, 1); + rr.minfo.rmailbx = *H_FIELD(dns_domain_t, 0); + rr.minfo.emailbx = *H_FIELD(dns_domain_t, 1); break; case 15: // MX rr.mx.preference = p->ast->seq->elements[0]->uint; - rr.mx.exchange = *H_FIELD(dns_domain, 1); + rr.mx.exchange = *H_FIELD(dns_domain_t, 1); break; case 16: // TXT rr.txt.count = p->ast->seq->elements[0]->seq->used; @@ -186,14 +186,14 @@ const HParsedToken* act_header(const HParseResult *p) { .additional_count = fields[11]->uint }; - dns_header_t *header = H_MAKE(dns_header); + dns_header_t *header = H_MAKE(dns_header_t); *header = header_; - return H_MAKE_TOKEN(dns_header, header); + return H_MAKE_TOKEN(dns_header_t, header); } const HParsedToken* act_label(const HParseResult *p) { - dns_label_t *r = H_MAKE(dns_label); + dns_label_t *r = H_MAKE(dns_label_t); r->len = p->ast->seq->used; r->label = h_arena_malloc(p->arena, r->len + 1); @@ -201,38 +201,38 @@ const HParsedToken* act_label(const HParseResult *p) { r->label[i] = p->ast->seq->elements[i]->uint; r->label[r->len] = 0; - return H_MAKE_TOKEN(dns_label, r); + return H_MAKE_TOKEN(dns_label_t, r); } const HParsedToken* act_question(const HParseResult *p) { - dns_question_t *q = H_MAKE(dns_question); + dns_question_t *q = H_MAKE(dns_question_t); HParsedToken **fields = p->ast->seq->elements; // QNAME is a sequence of labels. Pack them into an array. q->qname.qlen = fields[0]->seq->used; q->qname.labels = h_arena_malloc(p->arena, sizeof(dns_label_t)*q->qname.qlen); for(size_t i=0; iseq->used; i++) { - q->qname.labels[i] = *H_SEQ_INDEX(dns_label, fields[0], i); + q->qname.labels[i] = *H_SEQ_INDEX(dns_label_t, fields[0], i); } q->qtype = fields[1]->uint; q->qclass = fields[2]->uint; - return H_MAKE_TOKEN(dns_question, q); + return H_MAKE_TOKEN(dns_question_t, q); } const HParsedToken* act_message(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); - dns_message_t *msg = H_MAKE(dns_message); + dns_message_t *msg = H_MAKE(dns_message_t); - dns_header_t *header = H_FIELD(dns_header, 0); + dns_header_t *header = H_FIELD(dns_header_t, 0); msg->header = *header; HParsedToken *qs = p->ast->seq->elements[1]; struct dns_question *questions = h_arena_malloc(p->arena, sizeof(struct dns_question)*(header->question_count)); for (size_t i=0; iquestion_count; ++i) { - questions[i] = *H_SEQ_INDEX(dns_question, qs, i); + questions[i] = *H_SEQ_INDEX(dns_question_t, qs, i); } msg->questions = questions; @@ -240,7 +240,7 @@ const HParsedToken* act_message(const HParseResult *p) { struct dns_rr *answers = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->answer_count)); for (size_t i=0; ianswer_count; ++i) { - answers[i].name = *H_SEQ_INDEX(dns_domain, rrs+i, 0); + answers[i].name = *H_SEQ_INDEX(dns_domain_t, rrs+i, 0); answers[i].type = rrs[i].seq->elements[1]->uint; answers[i].class = rrs[i].seq->elements[2]->uint; answers[i].ttl = rrs[i].seq->elements[3]->uint; @@ -252,7 +252,7 @@ const HParsedToken* act_message(const HParseResult *p) { struct dns_rr *authority = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->authority_count)); for (size_t i=0, j=header->answer_count; iauthority_count; ++i, ++j) { - authority[i].name = *H_SEQ_INDEX(dns_domain, rrs+j, 0); + authority[i].name = *H_SEQ_INDEX(dns_domain_t, rrs+j, 0); authority[i].type = rrs[j].seq->elements[1]->uint; authority[i].class = rrs[j].seq->elements[2]->uint; authority[i].ttl = rrs[j].seq->elements[3]->uint; @@ -264,7 +264,7 @@ const HParsedToken* act_message(const HParseResult *p) { struct dns_rr *additional = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->additional_count)); for (size_t i=0, j=header->answer_count+header->authority_count; iadditional_count; ++i, ++j) { - additional[i].name = *H_SEQ_INDEX(dns_domain, rrs+j, 0); + additional[i].name = *H_SEQ_INDEX(dns_domain_t, rrs+j, 0); additional[i].type = rrs[j].seq->elements[1]->uint; additional[i].class = rrs[j].seq->elements[2]->uint; additional[i].ttl = rrs[j].seq->elements[3]->uint; @@ -273,7 +273,7 @@ const HParsedToken* act_message(const HParseResult *p) { } msg->additional = additional; - return H_MAKE_TOKEN(dns_message, msg); + return H_MAKE_TOKEN(dns_message_t, msg); } #define act_hdzero act_ignore diff --git a/examples/dns.h b/examples/dns.h index 913b186..57602b0 100644 --- a/examples/dns.h +++ b/examples/dns.h @@ -1,13 +1,13 @@ #include "../src/hammer.h" enum DNSTokenType_ { - TT_dns_message = TT_USER, - TT_dns_header, - TT_dns_label, - TT_dns_qname, - TT_dns_question, - TT_dns_rr, - TT_dns_domain + TT_dns_message_t = TT_USER, + TT_dns_header_t, + TT_dns_label_t, + TT_dns_qname_t, + TT_dns_question_t, + TT_dns_rr_t, + TT_dns_domain_t }; typedef struct dns_header { diff --git a/examples/glue.h b/examples/glue.h index d8776b3..88b788d 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -18,7 +18,7 @@ const HParsedToken *act_index0(const HParseResult *p); HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value); #define H_MAKE(TYP) \ - ((TYP ## _t *) h_arena_malloc(p->arena, sizeof(TYP ## _t))) + ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) #define H_MAKE_TOKEN(TYP, VAL) \ h_make_token(p->arena, TT_ ## TYP, VAL) @@ -28,7 +28,7 @@ HParsedToken *h_seq_index(const HParsedToken *p, size_t i); void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i); #define H_SEQ_INDEX(TYP, SEQ, IDX) \ - ((TYP ## _t *) h_seq_index_user(TT_ ## TYP, SEQ, IDX)) + ((TYP *) h_seq_index_user(TT_ ## TYP, SEQ, IDX)) #define H_FIELD(TYP, IDX) \ H_SEQ_INDEX(TYP, p->ast, IDX) From 587143eec190b3afdfb458bd4ee1007d04259819 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 16 Jan 2013 18:20:23 +0100 Subject: [PATCH 067/125] cleanup and bugfixing on domain parser --- examples/dns_common.c | 18 +++++++------ examples/glue.c | 61 ++++++++++++++++++++++++++++++++++++++++++- examples/glue.h | 7 +++++ 3 files changed, 77 insertions(+), 9 deletions(-) diff --git a/examples/dns_common.c b/examples/dns_common.c index f05a3e6..9f02fc6 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -44,14 +44,16 @@ const HParsedToken* act_domain(const HParseResult *p) { } if(arr) { - dns_domain_t *val = H_MAKE(dns_domain); // dns_domain_t is char* + dns_domain_t *val = H_MAKE(dns_domain_t); // dns_domain_t is char* *val = arr; - ret = H_MAKE_TOKEN(dns_domain, val); + ret = H_MAKE_TOKEN(dns_domain_t, val); } return ret; } +#define act_label_ act_flatten + const HParser* init_domain() { static const HParser *ret = NULL; if (ret) @@ -60,12 +62,12 @@ const HParser* init_domain() { H_RULE (letter, h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL)); H_RULE (let_dig, h_choice(letter, h_ch_range('0','9'), NULL)); H_RULE (ldh_str, h_many1(h_choice(let_dig, h_ch('-'), NULL))); - H_RULE (label, h_attr_bool(h_sequence(letter, - h_optional(h_sequence(h_optional(ldh_str), - let_dig, - NULL)), - NULL), - validate_label)); + H_ARULE(label_, h_sequence(letter, + h_optional(h_sequence(h_optional(ldh_str), + let_dig, + NULL)), + NULL)); + H_RULE (label, h_attr_bool(label_, validate_label)); H_RULE (subdomain, h_sepBy1(label, h_ch('.'))); H_ARULE(domain, h_choice(subdomain, h_ch(' '), NULL)); diff --git a/examples/glue.c b/examples/glue.c index 767f853..b635d7c 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -1,4 +1,5 @@ #include "glue.h" +#include "../src/internal.h" // for h_carray_* // The action equivalent of h_ignore. @@ -31,10 +32,68 @@ const HParsedToken *act_index0(const HParseResult *p) return act_index(0, p); } -HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value) +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + + h_carray_append(xs->seq, (void *)x); +} + +void h_seq_append(HParsedToken *xs, const HParsedToken *ys) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + assert(ys != NULL); + assert(ys->token_type == TT_SEQUENCE); + + for(size_t i; iseq->used; i++) + h_carray_append(xs->seq, ys->seq->elements[i]); +} + +// Flatten nested sequences. Always returns a sequence. +// If input element is not a sequence, returns it as a singleton sequence. +const HParsedToken *h_token_flatten(HArena *arena, const HParsedToken *p) +{ + assert(p != NULL); + + HParsedToken *ret = h_make_token_seq(arena); + switch(p->token_type) { + case TT_SEQUENCE: + // Flatten and append all. + for(size_t i; iseq->used; i++) { + h_seq_append(ret, h_token_flatten(arena, h_seq_index(p, i))); + } + break; + default: + // Make singleton sequence. + h_seq_snoc(ret, p); + break; + } + + return ret; +} + +// Action version of h_token_flatten. +const HParsedToken *act_flatten(const HParseResult *p) { + return h_token_flatten(p->arena, p->ast); +} + +HParsedToken *h_make_token_(HArena *arena, HTokenType type) { HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); ret->token_type = type; + return ret; +} + +HParsedToken *h_make_token_seq(HArena *arena) +{ + return h_make_token_(arena, TT_SEQUENCE); +} + +HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value) +{ + HParsedToken *ret = h_make_token_(arena, type); ret->user = value; return ret; } diff --git a/examples/glue.h b/examples/glue.h index 88b788d..c65f7b4 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -14,8 +14,15 @@ const HParsedToken *act_ignore(const HParseResult *p); const HParsedToken *act_index(int i, const HParseResult *p); const HParsedToken *act_index0(const HParseResult *p); +const HParsedToken *act_flatten(const HParseResult *p); + +const HParsedToken *h_token_flatten(HArena *arena, const HParsedToken *p); + +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); +void h_seq_append(HParsedToken *xs, const HParsedToken *ys); HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value); +HParsedToken *h_make_token_seq(HArena *arena); #define H_MAKE(TYP) \ ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) From f0155d18d670c2397235fd3566b03994352a599b Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 16 Jan 2013 22:42:47 +0100 Subject: [PATCH 068/125] pull packing of RRs out of act_message into act_rr --- examples/dns.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 026287c..afeb7b1 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -204,6 +204,21 @@ const HParsedToken* act_label(const HParseResult *p) { return H_MAKE_TOKEN(dns_label_t, r); } +const HParsedToken* act_rr(const HParseResult *p) { + dns_rr_t *rr = H_MAKE(dns_rr_t); + + rr->name = *H_FIELD(dns_domain_t, 0); + rr->type = p->ast->seq->elements[1]->uint; + rr->class = p->ast->seq->elements[2]->uint; + rr->ttl = p->ast->seq->elements[3]->uint; + rr->rdlength = p->ast->seq->elements[4]->seq->used; + + // Parse and pack RDATA. + set_rr(*rr, p->ast->seq->elements[4]->seq); + + return H_MAKE_TOKEN(dns_rr_t, rr); +} + const HParsedToken* act_question(const HParseResult *p) { dns_question_t *q = H_MAKE(dns_question_t); HParsedToken **fields = p->ast->seq->elements; @@ -225,9 +240,11 @@ const HParsedToken* act_message(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); dns_message_t *msg = H_MAKE(dns_message_t); + // Copy header into message struct. dns_header_t *header = H_FIELD(dns_header_t, 0); msg->header = *header; + // Copy questions into message struct. HParsedToken *qs = p->ast->seq->elements[1]; struct dns_question *questions = h_arena_malloc(p->arena, sizeof(struct dns_question)*(header->question_count)); @@ -236,40 +253,28 @@ const HParsedToken* act_message(const HParseResult *p) { } msg->questions = questions; + // Copy answer RRs into message struct. HParsedToken *rrs = p->ast->seq->elements[2]; struct dns_rr *answers = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->answer_count)); for (size_t i=0; ianswer_count; ++i) { - answers[i].name = *H_SEQ_INDEX(dns_domain_t, rrs+i, 0); - answers[i].type = rrs[i].seq->elements[1]->uint; - answers[i].class = rrs[i].seq->elements[2]->uint; - answers[i].ttl = rrs[i].seq->elements[3]->uint; - answers[i].rdlength = rrs[i].seq->elements[4]->seq->used; - set_rr(answers[i], rrs[i].seq->elements[4]->seq); + answers[i] = *H_SEQ_INDEX(dns_rr_t, rrs, i); } msg->answers = answers; + // Copy authority RRs into message struct. struct dns_rr *authority = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->authority_count)); for (size_t i=0, j=header->answer_count; iauthority_count; ++i, ++j) { - authority[i].name = *H_SEQ_INDEX(dns_domain_t, rrs+j, 0); - authority[i].type = rrs[j].seq->elements[1]->uint; - authority[i].class = rrs[j].seq->elements[2]->uint; - authority[i].ttl = rrs[j].seq->elements[3]->uint; - authority[i].rdlength = rrs[j].seq->elements[4]->seq->used; - set_rr(authority[i], rrs[j].seq->elements[4]->seq); + authority[i] = *H_SEQ_INDEX(dns_rr_t, rrs, j); } msg->authority = authority; + // Copy additional RRs into message struct. struct dns_rr *additional = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->additional_count)); for (size_t i=0, j=header->answer_count+header->authority_count; iadditional_count; ++i, ++j) { - additional[i].name = *H_SEQ_INDEX(dns_domain_t, rrs+j, 0); - additional[i].type = rrs[j].seq->elements[1]->uint; - additional[i].class = rrs[j].seq->elements[2]->uint; - additional[i].ttl = rrs[j].seq->elements[3]->uint; - additional[i].rdlength = rrs[j].seq->elements[4]->seq->used; - set_rr(additional[i], rrs[j].seq->elements[4]->seq); + additional[i] = *H_SEQ_INDEX(dns_rr_t, rrs, j); } msg->additional = additional; @@ -320,7 +325,7 @@ const HParser* init_parser() { NULL)); H_ARULE(question, h_sequence(qname, qtype, qclass, NULL)); H_RULE (rdata, h_length_value(h_uint16(), h_uint8())); - H_RULE (rr, h_sequence(domain, // NAME + H_ARULE (rr, h_sequence(domain, // NAME type, // TYPE class, // CLASS h_uint32(), // TTL From 588d534f3249e2576efe31e0794b91cce6ef3bf5 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 00:50:15 +0100 Subject: [PATCH 069/125] pull selection and initialization of RDATA parser out of set_rdata --- examples/dns.c | 35 ++++++----------------------------- examples/rr.c | 33 +++++++++++++++++++++++++++++++++ examples/rr.h | 2 ++ 3 files changed, 41 insertions(+), 29 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index afeb7b1..4174206 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -59,39 +59,16 @@ uint8_t** get_txt(const HCountedArray *arr) { return ret; } -void set_rr(struct dns_rr rr, HCountedArray *rdata) { +void set_rdata(struct dns_rr rr, HCountedArray *rdata) { uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used); for (size_t i=0; iused; ++i) data[i] = rdata->elements[i]->uint; - // Mapping numeric RR types (as indices) to parsers - const HParser *parsers[] = { - NULL, // there is no type 0 - init_a(), // 1 - init_ns(), - init_md(), - init_mf(), - init_cname(), // 5 - init_soa(), - init_mb(), - init_mg(), - init_mr(), - init_null(), // 10 - init_wks(), - init_ptr(), - init_hinfo(), - init_minfo(), - init_mx(), // 15 - init_txt() - }; - - // Parse rdata if possible. + // Parse RDATA if possible. const HParseResult *p = NULL; - if (rr.type < sizeof(parsers)) { - const HParser *parser = parsers[rr.type]; - if (parser) - p = h_parse(parser, (const uint8_t*)data, rdata->used); - } + const HParser *parser = init_rdata(rr.type); + if (parser) + p = h_parse(parser, (const uint8_t*)data, rdata->used); // If the RR doesn't parse, set its type to 0. if (!p) @@ -214,7 +191,7 @@ const HParsedToken* act_rr(const HParseResult *p) { rr->rdlength = p->ast->seq->elements[4]->seq->used; // Parse and pack RDATA. - set_rr(*rr, p->ast->seq->elements[4]->seq); + set_rdata(*rr, p->ast->seq->elements[4]->seq); return H_MAKE_TOKEN(dns_rr_t, rr); } diff --git a/examples/rr.c b/examples/rr.c index 8dae885..5feaf37 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -5,6 +5,39 @@ #define false 0 #define true 1 +#define RDATA_TYPE_MAX 16 +const HParser* init_rdata(uint16_t type) { + static const HParser *parsers[RDATA_TYPE_MAX+1]; + static int inited = 0; + + if (type > RDATA_TYPE_MAX) + return NULL; + + if (inited) + return parsers[type]; + + parsers[ 0] = NULL; // there is no type 0 + parsers[ 1] = init_a(); + parsers[ 2] = init_ns(); + parsers[ 3] = init_md(); + parsers[ 4] = init_mf(); + parsers[ 5] = init_cname(); + parsers[ 6] = init_soa(); + parsers[ 7] = init_mb(); + parsers[ 8] = init_mg(); + parsers[ 9] = init_mr(); + parsers[10] = init_null(); + parsers[11] = init_wks(); + parsers[12] = init_ptr(); + parsers[13] = init_hinfo(); + parsers[14] = init_minfo(); + parsers[15] = init_mx(); + parsers[16] = init_txt(); + + inited = 1; + return parsers[type]; +} + const HParser* init_cname() { static const HParser *cname = NULL; if (cname) diff --git a/examples/rr.h b/examples/rr.h index 54172f8..ccac1a6 100644 --- a/examples/rr.h +++ b/examples/rr.h @@ -3,6 +3,8 @@ #include "../src/hammer.h" +const HParser* init_rdata(uint16_t type); + const HParser* init_cname(); const HParser* init_hinfo(); const HParser* init_mb(); From 6515a80c3aade81714cebd892f825d279e16ed9f Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 01:17:37 +0100 Subject: [PATCH 070/125] condense rr.c --- examples/dns.c | 2 +- examples/rr.c | 301 +++++++++++-------------------------------------- examples/rr.h | 17 --- 3 files changed, 67 insertions(+), 253 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 4174206..81c2488 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -69,7 +69,7 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { const HParser *parser = init_rdata(rr.type); if (parser) p = h_parse(parser, (const uint8_t*)data, rdata->used); - + // If the RR doesn't parse, set its type to 0. if (!p) rr.type = 0; diff --git a/examples/rr.c b/examples/rr.c index 5feaf37..84022ab 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -5,248 +5,79 @@ #define false 0 #define true 1 -#define RDATA_TYPE_MAX 16 -const HParser* init_rdata(uint16_t type) { - static const HParser *parsers[RDATA_TYPE_MAX+1]; - static int inited = 0; - - if (type > RDATA_TYPE_MAX) - return NULL; - - if (inited) - return parsers[type]; - - parsers[ 0] = NULL; // there is no type 0 - parsers[ 1] = init_a(); - parsers[ 2] = init_ns(); - parsers[ 3] = init_md(); - parsers[ 4] = init_mf(); - parsers[ 5] = init_cname(); - parsers[ 6] = init_soa(); - parsers[ 7] = init_mb(); - parsers[ 8] = init_mg(); - parsers[ 9] = init_mr(); - parsers[10] = init_null(); - parsers[11] = init_wks(); - parsers[12] = init_ptr(); - parsers[13] = init_hinfo(); - parsers[14] = init_minfo(); - parsers[15] = init_mx(); - parsers[16] = init_txt(); - - inited = 1; - return parsers[type]; -} - -const HParser* init_cname() { - static const HParser *cname = NULL; - if (cname) - return cname; - - cname = h_sequence(init_domain(), - h_end_p(), - NULL); - - return cname; -} - -const HParser* init_hinfo() { - static const HParser *hinfo = NULL; - if (hinfo) - return hinfo; - - const HParser* cstr = init_character_string(); - - hinfo = h_sequence(cstr, - cstr, - h_end_p(), - NULL); - - return hinfo; -} - -const HParser* init_mb() { - static const HParser *mb = NULL; - if (mb) - return mb; - - mb = h_sequence(init_domain(), - h_end_p(), - NULL); - - return mb; -} - -const HParser* init_md() { - static const HParser *md = NULL; - if (md) - return md; - - md = h_sequence(init_domain(), - h_end_p, - NULL); - - return md; -} - -const HParser* init_mf() { - static const HParser *mf = NULL; - if (mf) - return mf; - - mf = h_sequence(init_domain(), - h_end_p(), - NULL); - - return mf; -} - -const HParser* init_mg() { - static const HParser *mg = NULL; - if (mg) - return mg; - - mg = h_sequence(init_domain(), - h_end_p(), - NULL); - - return mg; -} - -const HParser* init_minfo() { - static const HParser *minfo = NULL; - if (minfo) - return minfo; - - const HParser* domain = init_domain(); - - minfo = h_sequence(domain, - domain, - h_end_p(), - NULL); - - return minfo; -} - -const HParser* init_mr() { - static const HParser *mr = NULL; - if (mr) - return mr; - - mr = h_sequence(init_domain(), - h_end_p(), - NULL); - - return mr; -} - -const HParser* init_mx() { - static const HParser *mx = NULL; - if (mx) - return mx; - - mx = h_sequence(h_uint16(), - init_domain(), - h_end_p(), - NULL); - - return mx; -} - bool validate_null(HParseResult *p) { if (TT_SEQUENCE != p->ast->token_type) return false; return (65536 > p->ast->seq->used); } -const HParser* init_null() { - static const HParser *null_ = NULL; - if (null_) - return null_; +#define RDATA_TYPE_MAX 16 +const HParser* init_rdata(uint16_t type) { + static const HParser *parsers[RDATA_TYPE_MAX+1]; + static int inited = 0; - null_ = h_attr_bool(h_many(h_uint8()), validate_null); - - return null_; -} - -const HParser* init_ns() { - static const HParser *ns = NULL; - if (ns) - return ns; - - ns = h_sequence(init_domain(), - h_end_p(), - NULL); - - return ns; -} - -const HParser* init_ptr() { - static const HParser *ptr = NULL; - if (ptr) - return ptr; + if (type >= sizeof(parsers)) + return NULL; - ptr = h_sequence(init_domain(), - h_end_p(), - NULL); + if (inited) + return parsers[type]; - return ptr; -} - -const HParser* init_soa() { - static const HParser *soa = NULL; - if (soa) - return soa; - - const HParser *domain = init_domain(); - - soa = h_sequence(domain, // MNAME - domain, // RNAME - h_uint32(), // SERIAL - h_uint32(), // REFRESH - h_uint32(), // RETRY - h_uint32(), // EXPIRE - h_uint32(), // MINIMUM - h_end_p(), - NULL); - - return soa; -} - -const HParser* init_txt() { - static const HParser *txt = NULL; - if (txt) - return txt; - - txt = h_sequence(h_many1(init_character_string()), - h_end_p(), - NULL); - - return txt; -} - -const HParser* init_a() { - static const HParser *a = NULL; - if (a) - return a; - - a = h_sequence(h_uint32(), - h_end_p(), - NULL); - - return a; -} - -const HParser* init_wks() { - static const HParser *wks = NULL; - if (wks) - return wks; - - wks = h_sequence(h_uint32(), - h_uint8(), - h_many(h_uint8()), - h_end_p(), - NULL); - - return wks; + + H_RULE (domain, init_domain()); + H_RULE (cstr, init_character_string()); + + H_RULE (a, h_uint32()); + H_RULE (ns, domain); + H_RULE (md, domain); + H_RULE (mf, domain); + H_RULE (cname, domain); + H_RULE (soa, h_sequence(domain, // MNAME + domain, // RNAME + h_uint32(), // SERIAL + h_uint32(), // REFRESH + h_uint32(), // RETRY + h_uint32(), // EXPIRE + h_uint32(), // MINIMUM + NULL)); + H_RULE (mb, domain); + H_RULE (mg, domain); + H_RULE (mr, domain); + H_RULE (null_, h_attr_bool(h_many(h_uint8()), validate_null)); + H_RULE (wks, h_sequence(h_uint32(), + h_uint8(), + h_many(h_uint8()), + NULL)); + H_RULE (ptr, domain); + H_RULE (hinfo, h_sequence(cstr, cstr, NULL)); + H_RULE (minfo, h_sequence(domain, domain, NULL)); + H_RULE (mx, h_sequence(h_uint16(), domain, NULL)); + H_RULE (txt, h_many1(cstr)); + + + parsers[ 0] = NULL; // there is no type 0 + parsers[ 1] = a; + parsers[ 2] = ns; + parsers[ 3] = md; + parsers[ 4] = mf; + parsers[ 5] = cname; + parsers[ 6] = soa; + parsers[ 7] = mb; + parsers[ 8] = mg; + parsers[ 9] = mr; + parsers[10] = null_; + parsers[11] = wks; + parsers[12] = ptr; + parsers[13] = hinfo; + parsers[14] = minfo; + parsers[15] = mx; + parsers[16] = txt; + + // All parsers must consume their input exactly. + for(uint16_t i; i Date: Thu, 17 Jan 2013 17:44:41 +0100 Subject: [PATCH 071/125] add "VRULE" family of macros to attach validations like actions --- examples/dns.c | 95 +++++++++++++++++++++++++------------------------ examples/glue.h | 6 ++++ 2 files changed, 55 insertions(+), 46 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 81c2488..b02a4ba 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -12,10 +12,10 @@ /// -// Semantic Actions and Validations +// Validations /// -bool is_zero(HParseResult *p) { +bool validate_hdzero(HParseResult *p) { if (TT_UINT != p->ast->token_type) return false; return (0 == p->ast->uint); @@ -25,7 +25,7 @@ bool is_zero(HParseResult *p) { * Every DNS message should have QDCOUNT entries in the question * section, and ANCOUNT+NSCOUNT+ARCOUNT resource records. */ -bool validate_dns(HParseResult *p) { +bool validate_message(HParseResult *p) { if (TT_SEQUENCE != p->ast->token_type) return false; dns_header_t *header = H_FIELD(dns_header_t, 0); @@ -42,6 +42,10 @@ bool validate_dns(HParseResult *p) { return true; } +/// +// Semantic Actions +/// + uint8_t* get_cs(const HCountedArray *arr) { uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used); for (size_t i=0; iused; ++i) @@ -271,49 +275,48 @@ const HParser* init_parser() { if (ret) return ret; - H_RULE (domain, init_domain()); - H_ARULE(hdzero, h_attr_bool(h_bits(3, false), is_zero)); - H_ARULE(header, h_sequence(h_bits(16, false), // ID - h_bits(1, false), // QR - h_bits(4, false), // opcode - h_bits(1, false), // AA - h_bits(1, false), // TC - h_bits(1, false), // RD - h_bits(1, false), // RA - hdzero, // Z - h_bits(4, false), // RCODE - h_uint16(), // QDCOUNT - h_uint16(), // ANCOUNT - h_uint16(), // NSCOUNT - h_uint16(), // ARCOUNT - NULL)); - H_RULE (type, h_int_range(h_uint16(), 1, 16)); - H_RULE (qtype, h_choice(type, - h_int_range(h_uint16(), 252, 255), - NULL)); - H_RULE (class, h_int_range(h_uint16(), 1, 4)); - H_RULE (qclass, h_choice(class, - h_int_range(h_uint16(), 255, 255), - NULL)); - H_RULE (len, h_int_range(h_uint8(), 1, 255)); - H_ARULE(label, h_length_value(len, h_uint8())); - H_ARULE(qname, h_sequence(h_many1(label), - h_ch('\x00'), - NULL)); - H_ARULE(question, h_sequence(qname, qtype, qclass, NULL)); - H_RULE (rdata, h_length_value(h_uint16(), h_uint8())); - H_ARULE (rr, h_sequence(domain, // NAME - type, // TYPE - class, // CLASS - h_uint32(), // TTL - rdata, // RDLENGTH+RDATA - NULL)); - H_ARULE(message, h_attr_bool(h_sequence(header, - h_many(question), - h_many(rr), - h_end_p(), - NULL), - validate_dns)); + H_RULE (domain, init_domain()); + H_AVRULE(hdzero, h_bits(3, false)); + H_ARULE (header, h_sequence(h_bits(16, false), // ID + h_bits(1, false), // QR + h_bits(4, false), // opcode + h_bits(1, false), // AA + h_bits(1, false), // TC + h_bits(1, false), // RD + h_bits(1, false), // RA + hdzero, // Z + h_bits(4, false), // RCODE + h_uint16(), // QDCOUNT + h_uint16(), // ANCOUNT + h_uint16(), // NSCOUNT + h_uint16(), // ARCOUNT + NULL)); + H_RULE (type, h_int_range(h_uint16(), 1, 16)); + H_RULE (qtype, h_choice(type, + h_int_range(h_uint16(), 252, 255), + NULL)); + H_RULE (class, h_int_range(h_uint16(), 1, 4)); + H_RULE (qclass, h_choice(class, + h_int_range(h_uint16(), 255, 255), + NULL)); + H_RULE (len, h_int_range(h_uint8(), 1, 255)); + H_ARULE (label, h_length_value(len, h_uint8())); + H_ARULE (qname, h_sequence(h_many1(label), + h_ch('\x00'), + NULL)); + H_ARULE (question, h_sequence(qname, qtype, qclass, NULL)); + H_RULE (rdata, h_length_value(h_uint16(), h_uint8())); + H_ARULE (rr, h_sequence(domain, // NAME + type, // TYPE + class, // CLASS + h_uint32(), // TTL + rdata, // RDLENGTH+RDATA + NULL)); + H_AVRULE(message, h_sequence(header, + h_many(question), + h_many(rr), + h_end_p(), + NULL)); ret = message; return ret; diff --git a/examples/glue.h b/examples/glue.h index c65f7b4..7939a48 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -10,6 +10,12 @@ #define H_RULE(rule, def) const HParser *rule = def #define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) +#define H_VRULE(rule, def) const HParser *rule = \ + h_attr_bool(def, validate_ ## rule) +#define H_VARULE(rule, def) const HParser *rule = \ + h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule) +#define H_AVRULE(rule, def) const HParser *rule = \ + h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) const HParsedToken *act_ignore(const HParseResult *p); const HParsedToken *act_index(int i, const HParseResult *p); From dd574bd735ba1813e0fe1a2a3b0a80967481eed8 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 18:31:13 +0100 Subject: [PATCH 072/125] give glue actions an h_ prefix and add H_ACT_APPLY macro --- examples/dns.c | 3 ++- examples/dns_common.c | 4 +++- examples/dns_common.h | 2 ++ examples/glue.c | 11 +++-------- examples/glue.h | 14 ++++++++++---- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index b02a4ba..ba1f592 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -262,10 +262,11 @@ const HParsedToken* act_message(const HParseResult *p) { return H_MAKE_TOKEN(dns_message_t, msg); } -#define act_hdzero act_ignore +#define act_hdzero h_act_ignore #define act_qname act_index0 + /// // Parser / Grammar /// diff --git a/examples/dns_common.c b/examples/dns_common.c index 9f02fc6..57f81aa 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -5,6 +5,8 @@ #define false 0 #define true 1 +H_ACT_APPLY(act_index0, h_act_index, 0) + /** * A label can't be more than 63 characters. */ @@ -52,7 +54,7 @@ const HParsedToken* act_domain(const HParseResult *p) { return ret; } -#define act_label_ act_flatten +#define act_label_ h_act_flatten const HParser* init_domain() { static const HParser *ret = NULL; diff --git a/examples/dns_common.h b/examples/dns_common.h index 2d796f8..6b04519 100644 --- a/examples/dns_common.h +++ b/examples/dns_common.h @@ -7,4 +7,6 @@ const HParser* init_domain(); const HParser* init_character_string(); +const HParsedToken* act_index0(const HParseResult *p); + #endif diff --git a/examples/glue.c b/examples/glue.c index b635d7c..80864fc 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -3,13 +3,13 @@ // The action equivalent of h_ignore. -const HParsedToken *act_ignore(const HParseResult *p) +const HParsedToken *h_act_ignore(const HParseResult *p) { return NULL; } // Helper to build HAction's that pick one index out of a sequence. -const HParsedToken *act_index(int i, const HParseResult *p) +const HParsedToken *h_act_index(int i, const HParseResult *p) { if(!p) return NULL; @@ -27,11 +27,6 @@ const HParsedToken *act_index(int i, const HParseResult *p) return tok->seq->elements[i]; } -const HParsedToken *act_index0(const HParseResult *p) -{ - return act_index(0, p); -} - void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) { assert(xs != NULL); @@ -75,7 +70,7 @@ const HParsedToken *h_token_flatten(HArena *arena, const HParsedToken *p) } // Action version of h_token_flatten. -const HParsedToken *act_flatten(const HParseResult *p) { +const HParsedToken *h_act_flatten(const HParseResult *p) { return h_token_flatten(p->arena, p->ast); } diff --git a/examples/glue.h b/examples/glue.h index 7939a48..2b60646 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -17,10 +17,16 @@ #define H_AVRULE(rule, def) const HParser *rule = \ h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) -const HParsedToken *act_ignore(const HParseResult *p); -const HParsedToken *act_index(int i, const HParseResult *p); -const HParsedToken *act_index0(const HParseResult *p); -const HParsedToken *act_flatten(const HParseResult *p); +const HParsedToken *h_act_ignore(const HParseResult *p); +const HParsedToken *h_act_index(int i, const HParseResult *p); +const HParsedToken *h_act_flatten(const HParseResult *p); + +// Define 'myaction' as a specialization of 'paction' by supplying the leading +// parameters. +#define H_ACT_APPLY(myaction, paction, ...) \ + const HParsedToken *myaction(const HParseResult *p) { \ + return paction(__VA_ARGS__, p); \ + } const HParsedToken *h_token_flatten(HArena *arena, const HParsedToken *p); From 954a762fc0c184627cfe3273d73739b7ee8a9855 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 18:34:11 +0100 Subject: [PATCH 073/125] replace remaining h_attr_bool calls with VRULEs --- examples/dns_common.c | 25 ++++++++++++------------- examples/rr.c | 4 ++-- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/examples/dns_common.c b/examples/dns_common.c index 57f81aa..3ea5594 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -16,6 +16,8 @@ bool validate_label(HParseResult *p) { return (64 > p->ast->seq->used); } +#define act_label h_act_flatten + const HParsedToken* act_domain(const HParseResult *p) { const HParsedToken *ret = NULL; char *arr = NULL; @@ -54,24 +56,21 @@ const HParsedToken* act_domain(const HParseResult *p) { return ret; } -#define act_label_ h_act_flatten - const HParser* init_domain() { static const HParser *ret = NULL; if (ret) return ret; - H_RULE (letter, h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL)); - H_RULE (let_dig, h_choice(letter, h_ch_range('0','9'), NULL)); - H_RULE (ldh_str, h_many1(h_choice(let_dig, h_ch('-'), NULL))); - H_ARULE(label_, h_sequence(letter, - h_optional(h_sequence(h_optional(ldh_str), - let_dig, - NULL)), - NULL)); - H_RULE (label, h_attr_bool(label_, validate_label)); - H_RULE (subdomain, h_sepBy1(label, h_ch('.'))); - H_ARULE(domain, h_choice(subdomain, h_ch(' '), NULL)); + H_RULE (letter, h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL)); + H_RULE (let_dig, h_choice(letter, h_ch_range('0','9'), NULL)); + H_RULE (ldh_str, h_many1(h_choice(let_dig, h_ch('-'), NULL))); + H_VARULE(label, h_sequence(letter, + h_optional(h_sequence(h_optional(ldh_str), + let_dig, + NULL)), + NULL)); + H_RULE (subdomain, h_sepBy1(label, h_ch('.'))); + H_ARULE (domain, h_choice(subdomain, h_ch(' '), NULL)); ret = domain; return ret; diff --git a/examples/rr.c b/examples/rr.c index 84022ab..f252953 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -42,7 +42,7 @@ const HParser* init_rdata(uint16_t type) { H_RULE (mb, domain); H_RULE (mg, domain); H_RULE (mr, domain); - H_RULE (null_, h_attr_bool(h_many(h_uint8()), validate_null)); + H_VRULE(null, h_many(h_uint8())); H_RULE (wks, h_sequence(h_uint32(), h_uint8(), h_many(h_uint8()), @@ -64,7 +64,7 @@ const HParser* init_rdata(uint16_t type) { parsers[ 7] = mb; parsers[ 8] = mg; parsers[ 9] = mr; - parsers[10] = null_; + parsers[10] = null; parsers[11] = wks; parsers[12] = ptr; parsers[13] = hinfo; From b953b9a763a7357f602cea85067804e23f7962eb Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 19:13:06 +0100 Subject: [PATCH 074/125] move get_txt into an action building the whole dns_rr_txt structure --- examples/dns.c | 15 ++------------- examples/dns.h | 6 ++++++ examples/rr.c | 21 ++++++++++++++++++++- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index ba1f592..637b2a1 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -53,16 +53,6 @@ uint8_t* get_cs(const HCountedArray *arr) { return ret; } -uint8_t** get_txt(const HCountedArray *arr) { - uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used); - for (size_t i=0; iused; ++i) { - uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->elements[i]->seq->used); - for (size_t j=0; jelements[i]->seq->used; ++j) - tmp[j] = arr->elements[i]->seq->elements[j]->uint; - } - return ret; -} - void set_rdata(struct dns_rr rr, HCountedArray *rdata) { uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used); for (size_t i=0; iused; ++i) @@ -142,8 +132,7 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { rr.mx.exchange = *H_FIELD(dns_domain_t, 1); break; case 16: // TXT - rr.txt.count = p->ast->seq->elements[0]->seq->used; - rr.txt.txt_data = get_txt(p->ast->seq->elements[0]->seq); + rr.txt = *(dns_rr_txt_t *)p->ast; break; default: break; @@ -348,7 +337,7 @@ int start_listening() { const int TYPE_MAX = 16; typedef const char* cstr; -const char* TYPE_STR[17] = { +static const char* TYPE_STR[17] = { "nil", "A", "NS", "MD", "MF", "CNAME", "SOA", "MB", "MG", "MR", "NULL", "WKS", diff --git a/examples/dns.h b/examples/dns.h index 57602b0..f3e023c 100644 --- a/examples/dns.h +++ b/examples/dns.h @@ -7,6 +7,12 @@ enum DNSTokenType_ { TT_dns_qname_t, TT_dns_question_t, TT_dns_rr_t, + TT_dns_rr_txt_t, + TT_dns_rr_hinfo_t, + TT_dns_rr_minfo_t, + TT_dns_rr_mx_t, + TT_dns_rr_soa_t, + TT_dns_rr_wks_t, TT_dns_domain_t }; diff --git a/examples/rr.c b/examples/rr.c index f252953..3619ca1 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -1,5 +1,6 @@ #include "../src/hammer.h" #include "dns_common.h" +#include "dns.h" #include "rr.h" #define false 0 @@ -11,6 +12,24 @@ bool validate_null(HParseResult *p) { return (65536 > p->ast->seq->used); } +const HParsedToken *act_txt(const HParseResult *p) { + dns_rr_txt_t *txt = H_MAKE(dns_rr_txt_t); + + const HCountedArray *arr = p->ast->seq->elements[0]->seq; + uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used); + for (size_t i=0; iused; ++i) { + uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->elements[i]->seq->used); + for (size_t j=0; jelements[i]->seq->used; ++j) + tmp[j] = arr->elements[i]->seq->elements[j]->uint; + ret[i] = tmp; + } + + txt->count = p->ast->seq->elements[0]->seq->used; + txt->txt_data = ret; + + return H_MAKE_TOKEN(dns_rr_txt_t, txt); +} + #define RDATA_TYPE_MAX 16 const HParser* init_rdata(uint16_t type) { static const HParser *parsers[RDATA_TYPE_MAX+1]; @@ -51,7 +70,7 @@ const HParser* init_rdata(uint16_t type) { H_RULE (hinfo, h_sequence(cstr, cstr, NULL)); H_RULE (minfo, h_sequence(domain, domain, NULL)); H_RULE (mx, h_sequence(h_uint16(), domain, NULL)); - H_RULE (txt, h_many1(cstr)); + H_ARULE(txt, h_many1(cstr)); parsers[ 0] = NULL; // there is no type 0 From 0304c75bba2a94e2ffb24fe29420c58e580decc7 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 19:15:33 +0100 Subject: [PATCH 075/125] correct improper uses of H_FIELD on domain-only RDATAs --- examples/dns.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 637b2a1..e3c8bd9 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -74,16 +74,16 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { rr.a = p->ast->seq->elements[0]->uint; break; case 2: // NS - rr.ns = *H_FIELD(dns_domain_t, 0); + rr.ns = *(dns_domain_t *)p->ast->user; break; case 3: // MD - rr.md = *H_FIELD(dns_domain_t, 0); + rr.md = *(dns_domain_t *)p->ast->user; break; case 4: // MF - rr.md = *H_FIELD(dns_domain_t, 0); + rr.md = *(dns_domain_t *)p->ast->user; break; case 5: // CNAME - rr.cname = *H_FIELD(dns_domain_t, 0); + rr.cname = *(dns_domain_t *)p->ast->user; break; case 6: // SOA rr.soa.mname = *H_FIELD(dns_domain_t, 0); @@ -95,13 +95,13 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { rr.soa.minimum = p->ast->seq->elements[6]->uint; break; case 7: // MB - rr.mb = *H_FIELD(dns_domain_t, 0); + rr.mb = *(dns_domain_t *)p->ast->user; break; case 8: // MG - rr.mg = *H_FIELD(dns_domain_t, 0); + rr.mg = *(dns_domain_t *)p->ast->user; break; case 9: // MR - rr.mr = *H_FIELD(dns_domain_t, 0); + rr.mr = *(dns_domain_t *)p->ast->user; break; case 10: // NULL rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*p->ast->seq->used); @@ -117,7 +117,7 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { rr.wks.bit_map[i] = p->ast->seq->elements[2]->seq->elements[i]->uint; break; case 12: // PTR - rr.ptr = *H_FIELD(dns_domain_t, 0); + rr.ptr = *(dns_domain_t *)p->ast->user; break; case 13: // HINFO rr.hinfo.cpu = get_cs(p->ast->seq->elements[0]->seq); @@ -132,7 +132,7 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { rr.mx.exchange = *H_FIELD(dns_domain_t, 1); break; case 16: // TXT - rr.txt = *(dns_rr_txt_t *)p->ast; + rr.txt = *(dns_rr_txt_t *)p->ast->user; break; default: break; From 8fbc137223fbd6d16dd3f922f948af67a283fd11 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 20:43:29 +0100 Subject: [PATCH 076/125] move get_cs into an action --- examples/dns.c | 11 ++--------- examples/dns.h | 12 +++++++----- examples/rr.c | 17 +++++++++++++++-- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index e3c8bd9..3f4ed9b 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -46,13 +46,6 @@ bool validate_message(HParseResult *p) { // Semantic Actions /// -uint8_t* get_cs(const HCountedArray *arr) { - uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used); - for (size_t i=0; iused; ++i) - ret[i] = arr->elements[i]->uint; - return ret; -} - void set_rdata(struct dns_rr rr, HCountedArray *rdata) { uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used); for (size_t i=0; iused; ++i) @@ -120,8 +113,8 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { rr.ptr = *(dns_domain_t *)p->ast->user; break; case 13: // HINFO - rr.hinfo.cpu = get_cs(p->ast->seq->elements[0]->seq); - rr.hinfo.os = get_cs(p->ast->seq->elements[1]->seq); + rr.hinfo.cpu = *H_FIELD(dns_cstr_t, 0); + rr.hinfo.os = *H_FIELD(dns_cstr_t, 1); break; case 14: // MINFO rr.minfo.rmailbx = *H_FIELD(dns_domain_t, 0); diff --git a/examples/dns.h b/examples/dns.h index f3e023c..2cff916 100644 --- a/examples/dns.h +++ b/examples/dns.h @@ -13,9 +13,13 @@ enum DNSTokenType_ { TT_dns_rr_mx_t, TT_dns_rr_soa_t, TT_dns_rr_wks_t, - TT_dns_domain_t + TT_dns_domain_t, + TT_dns_cstr_t }; +typedef char *dns_domain_t; +typedef uint8_t *dns_cstr_t; + typedef struct dns_header { uint16_t id; bool qr, aa, tc, rd, ra; @@ -43,8 +47,8 @@ typedef struct dns_question { } dns_question_t; typedef struct { - uint8_t* cpu; - uint8_t* os; + dns_cstr_t cpu; + dns_cstr_t os; } dns_rr_hinfo_t; typedef struct { @@ -105,8 +109,6 @@ typedef struct dns_rr { }; } dns_rr_t; -typedef char *dns_domain_t; - typedef struct dns_message { dns_header_t header; dns_question_t *questions; diff --git a/examples/rr.c b/examples/rr.c index 3619ca1..42b4648 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -15,7 +15,7 @@ bool validate_null(HParseResult *p) { const HParsedToken *act_txt(const HParseResult *p) { dns_rr_txt_t *txt = H_MAKE(dns_rr_txt_t); - const HCountedArray *arr = p->ast->seq->elements[0]->seq; + const HCountedArray *arr = p->ast->seq; uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used); for (size_t i=0; iused; ++i) { uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->elements[i]->seq->used); @@ -30,6 +30,19 @@ const HParsedToken *act_txt(const HParseResult *p) { return H_MAKE_TOKEN(dns_rr_txt_t, txt); } +const HParsedToken* act_cstr(const HParseResult *p) { + dns_cstr_t *cs = H_MAKE(dns_cstr_t); + + const HCountedArray *arr = p->ast->seq; + uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used); + for (size_t i=0; iused; ++i) + ret[i] = arr->elements[i]->uint; + assert(ret[arr->used-1] == '\0'); // XXX Is this right?! If so, shouldn't it be a validation? + *cs = ret; + + return H_MAKE_TOKEN(dns_cstr_t, cs); +} + #define RDATA_TYPE_MAX 16 const HParser* init_rdata(uint16_t type) { static const HParser *parsers[RDATA_TYPE_MAX+1]; @@ -43,7 +56,7 @@ const HParser* init_rdata(uint16_t type) { H_RULE (domain, init_domain()); - H_RULE (cstr, init_character_string()); + H_ARULE(cstr, init_character_string()); H_RULE (a, h_uint32()); H_RULE (ns, domain); From b88d6ca34a3ee82d6c6fb57e3094b90fbebfae5f Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 21:35:33 +0100 Subject: [PATCH 077/125] move SOA processing into an action --- examples/dns.c | 9 ++------- examples/rr.c | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 3f4ed9b..0456223 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -79,13 +79,7 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { rr.cname = *(dns_domain_t *)p->ast->user; break; case 6: // SOA - rr.soa.mname = *H_FIELD(dns_domain_t, 0); - rr.soa.rname = *H_FIELD(dns_domain_t, 1); - rr.soa.serial = p->ast->seq->elements[2]->uint; - rr.soa.refresh = p->ast->seq->elements[3]->uint; - rr.soa.retry = p->ast->seq->elements[4]->uint; - rr.soa.expire = p->ast->seq->elements[5]->uint; - rr.soa.minimum = p->ast->seq->elements[6]->uint; + rr.soa = *(dns_rr_soa_t *)p->ast->user; break; case 7: // MB rr.mb = *(dns_domain_t *)p->ast->user; @@ -100,6 +94,7 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*p->ast->seq->used); for (size_t i=0; iast->seq->used; ++i) rr.null[i] = p->ast->seq->elements[i]->uint; + // XXX Where is the length stored!? break; case 11: // WKS rr.wks.address = p->ast->seq->elements[0]->uint; diff --git a/examples/rr.c b/examples/rr.c index 42b4648..5752fe9 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -43,6 +43,20 @@ const HParsedToken* act_cstr(const HParseResult *p) { return H_MAKE_TOKEN(dns_cstr_t, cs); } +const HParsedToken* act_soa(const HParseResult *p) { + dns_rr_soa_t *soa = H_MAKE(dns_rr_soa_t); + + soa->mname = *H_FIELD(dns_domain_t, 0); + soa->rname = *H_FIELD(dns_domain_t, 1); + soa->serial = p->ast->seq->elements[2]->uint; + soa->refresh = p->ast->seq->elements[3]->uint; + soa->retry = p->ast->seq->elements[4]->uint; + soa->expire = p->ast->seq->elements[5]->uint; + soa->minimum = p->ast->seq->elements[6]->uint; + + return H_MAKE_TOKEN(dns_rr_soa_t, soa); +} + #define RDATA_TYPE_MAX 16 const HParser* init_rdata(uint16_t type) { static const HParser *parsers[RDATA_TYPE_MAX+1]; @@ -63,7 +77,7 @@ const HParser* init_rdata(uint16_t type) { H_RULE (md, domain); H_RULE (mf, domain); H_RULE (cname, domain); - H_RULE (soa, h_sequence(domain, // MNAME + H_ARULE(soa, h_sequence(domain, // MNAME domain, // RNAME h_uint32(), // SERIAL h_uint32(), // REFRESH From 4d4094049320ed938408c7c46aef7c8092bcaa15 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 21:39:32 +0100 Subject: [PATCH 078/125] move WKS into action --- examples/dns.c | 7 +------ examples/rr.c | 13 +++++++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 0456223..f591591 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -97,12 +97,7 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { // XXX Where is the length stored!? break; case 11: // WKS - rr.wks.address = p->ast->seq->elements[0]->uint; - rr.wks.protocol = p->ast->seq->elements[1]->uint; - rr.wks.len = p->ast->seq->elements[2]->seq->used; - rr.wks.bit_map = h_arena_malloc(rdata->arena, sizeof(uint8_t)*p->ast->seq->elements[2]->seq->used); - for (size_t i=0; iast->seq->elements[2]->seq->elements[i]->uint; + rr.wks = *(dns_rr_wks_t *)p->ast->user; break; case 12: // PTR rr.ptr = *(dns_domain_t *)p->ast->user; diff --git a/examples/rr.c b/examples/rr.c index 5752fe9..83241d8 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -57,6 +57,19 @@ const HParsedToken* act_soa(const HParseResult *p) { return H_MAKE_TOKEN(dns_rr_soa_t, soa); } +const HParsedToken* act_wks(const HParseResult *p) { + dns_rr_wks_t *wks = H_MAKE(dns_rr_wks_t); + + wks->address = p->ast->seq->elements[0]->uint; + wks->protocol = p->ast->seq->elements[1]->uint; + wks->len = p->ast->seq->elements[2]->seq->used; + wks->bit_map = h_arena_malloc(p->arena, sizeof(uint8_t)*wks->len); + for (size_t i=0; ilen; ++i) + wks->bit_map[i] = p->ast->seq->elements[2]->seq->elements[i]->uint; + + return H_MAKE_TOKEN(dns_rr_wks_t, wks); +} + #define RDATA_TYPE_MAX 16 const HParser* init_rdata(uint16_t type) { static const HParser *parsers[RDATA_TYPE_MAX+1]; From e6a546b95b59e8cd6314d042727b97b400ff9f73 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 21:58:17 +0100 Subject: [PATCH 079/125] make actions for the remaining RDATA types --- examples/dns.c | 36 +++++++++++++++--------------------- examples/dns.h | 5 ++++- examples/rr.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 22 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index f591591..8afd29c 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -64,58 +64,52 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { // Pack the parsed rdata into rr. switch(rr.type) { case 1: // A - rr.a = p->ast->seq->elements[0]->uint; + rr.a = p->ast->seq->elements[0]->uint; break; case 2: // NS - rr.ns = *(dns_domain_t *)p->ast->user; + rr.ns = *(dns_domain_t *)p->ast->user; break; case 3: // MD - rr.md = *(dns_domain_t *)p->ast->user; + rr.md = *(dns_domain_t *)p->ast->user; break; case 4: // MF - rr.md = *(dns_domain_t *)p->ast->user; + rr.md = *(dns_domain_t *)p->ast->user; break; case 5: // CNAME rr.cname = *(dns_domain_t *)p->ast->user; break; case 6: // SOA - rr.soa = *(dns_rr_soa_t *)p->ast->user; + rr.soa = *(dns_rr_soa_t *)p->ast->user; break; case 7: // MB - rr.mb = *(dns_domain_t *)p->ast->user; + rr.mb = *(dns_domain_t *)p->ast->user; break; case 8: // MG - rr.mg = *(dns_domain_t *)p->ast->user; + rr.mg = *(dns_domain_t *)p->ast->user; break; case 9: // MR - rr.mr = *(dns_domain_t *)p->ast->user; + rr.mr = *(dns_domain_t *)p->ast->user; break; case 10: // NULL - rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*p->ast->seq->used); - for (size_t i=0; iast->seq->used; ++i) - rr.null[i] = p->ast->seq->elements[i]->uint; - // XXX Where is the length stored!? + rr.null = *(dns_rr_null_t *)p->ast->user; break; case 11: // WKS - rr.wks = *(dns_rr_wks_t *)p->ast->user; + rr.wks = *(dns_rr_wks_t *)p->ast->user; break; case 12: // PTR - rr.ptr = *(dns_domain_t *)p->ast->user; + rr.ptr = *(dns_domain_t *)p->ast->user; break; case 13: // HINFO - rr.hinfo.cpu = *H_FIELD(dns_cstr_t, 0); - rr.hinfo.os = *H_FIELD(dns_cstr_t, 1); + rr.hinfo = *(dns_rr_hinfo_t *)p->ast->user; break; case 14: // MINFO - rr.minfo.rmailbx = *H_FIELD(dns_domain_t, 0); - rr.minfo.emailbx = *H_FIELD(dns_domain_t, 1); + rr.minfo = *(dns_rr_minfo_t *)p->ast->user; break; case 15: // MX - rr.mx.preference = p->ast->seq->elements[0]->uint; - rr.mx.exchange = *H_FIELD(dns_domain_t, 1); + rr.mx = *(dns_rr_mx_t *)p->ast->user; break; case 16: // TXT - rr.txt = *(dns_rr_txt_t *)p->ast->user; + rr.txt = *(dns_rr_txt_t *)p->ast->user; break; default: break; diff --git a/examples/dns.h b/examples/dns.h index 2cff916..ed2c26f 100644 --- a/examples/dns.h +++ b/examples/dns.h @@ -13,6 +13,7 @@ enum DNSTokenType_ { TT_dns_rr_mx_t, TT_dns_rr_soa_t, TT_dns_rr_wks_t, + TT_dns_rr_null_t, TT_dns_domain_t, TT_dns_cstr_t }; @@ -83,6 +84,8 @@ typedef struct { uint8_t* bit_map; } dns_rr_wks_t; +typedef uint8_t *dns_rr_null_t; + typedef struct dns_rr { char* name; uint16_t type; @@ -99,7 +102,7 @@ typedef struct dns_rr { char* mb; char* mg; char* mr; - uint8_t* null; + dns_rr_null_t null; dns_rr_wks_t wks; char* ptr; dns_rr_hinfo_t hinfo; diff --git a/examples/rr.c b/examples/rr.c index 83241d8..6bfb782 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -6,12 +6,28 @@ #define false 0 #define true 1 + +/// +// Validations and Semantic Actions +/// + bool validate_null(HParseResult *p) { if (TT_SEQUENCE != p->ast->token_type) return false; return (65536 > p->ast->seq->used); } +const HParsedToken *act_null(const HParseResult *p) { + dns_rr_null_t *null = H_MAKE(dns_rr_null_t); + + size_t len = p->ast->seq->used; + uint8_t *buf = h_arena_malloc(p->arena, sizeof(uint8_t)*len); + for (size_t i=0; iast->seq->elements[i]->uint; + + return H_MAKE_TOKEN(dns_rr_null_t, null); +} + const HParsedToken *act_txt(const HParseResult *p) { dns_rr_txt_t *txt = H_MAKE(dns_rr_txt_t); @@ -70,6 +86,38 @@ const HParsedToken* act_wks(const HParseResult *p) { return H_MAKE_TOKEN(dns_rr_wks_t, wks); } +const HParsedToken* act_hinfo(const HParseResult *p) { + dns_rr_hinfo_t *hinfo = H_MAKE(dns_rr_hinfo_t); + + hinfo->cpu = *H_FIELD(dns_cstr_t, 0); + hinfo->os = *H_FIELD(dns_cstr_t, 1); + + return H_MAKE_TOKEN(dns_rr_hinfo_t, hinfo); +} + +const HParsedToken* act_minfo(const HParseResult *p) { + dns_rr_minfo_t *minfo = H_MAKE(dns_rr_minfo_t); + + minfo->rmailbx = *H_FIELD(dns_domain_t, 0); + minfo->emailbx = *H_FIELD(dns_domain_t, 1); + + return H_MAKE_TOKEN(dns_rr_minfo_t, minfo); +} + +const HParsedToken* act_mx(const HParseResult *p) { + dns_rr_mx_t *mx = H_MAKE(dns_rr_mx_t); + + mx->preference = p->ast->seq->elements[0]->uint; + mx->exchange = *H_FIELD(dns_domain_t, 1); + + return H_MAKE_TOKEN(dns_rr_mx_t, mx); +} + + +/// +// Parsers for all types of RDATA +/// + #define RDATA_TYPE_MAX 16 const HParser* init_rdata(uint16_t type) { static const HParser *parsers[RDATA_TYPE_MAX+1]; From c9d3101f4128bfbfa533f043e4f66d213804a59a Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 21:59:55 +0100 Subject: [PATCH 080/125] a comment --- examples/dns.c | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/dns.c b/examples/dns.c index 8afd29c..d8b9bed 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -46,6 +46,7 @@ bool validate_message(HParseResult *p) { // Semantic Actions /// +// Helper: Parse and pack the RDATA field of a Resource Record. void set_rdata(struct dns_rr rr, HCountedArray *rdata) { uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used); for (size_t i=0; iused; ++i) From 8dea41c237a635e938240c3712892a066e6c782f Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 17 Jan 2013 22:20:58 +0100 Subject: [PATCH 081/125] condense the packing table in set_rdata a little --- examples/dns.c | 67 +++++++++++++------------------------------------- 1 file changed, 17 insertions(+), 50 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index d8b9bed..012b7fb 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -64,56 +64,23 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { // Pack the parsed rdata into rr. switch(rr.type) { - case 1: // A - rr.a = p->ast->seq->elements[0]->uint; - break; - case 2: // NS - rr.ns = *(dns_domain_t *)p->ast->user; - break; - case 3: // MD - rr.md = *(dns_domain_t *)p->ast->user; - break; - case 4: // MF - rr.md = *(dns_domain_t *)p->ast->user; - break; - case 5: // CNAME - rr.cname = *(dns_domain_t *)p->ast->user; - break; - case 6: // SOA - rr.soa = *(dns_rr_soa_t *)p->ast->user; - break; - case 7: // MB - rr.mb = *(dns_domain_t *)p->ast->user; - break; - case 8: // MG - rr.mg = *(dns_domain_t *)p->ast->user; - break; - case 9: // MR - rr.mr = *(dns_domain_t *)p->ast->user; - break; - case 10: // NULL - rr.null = *(dns_rr_null_t *)p->ast->user; - break; - case 11: // WKS - rr.wks = *(dns_rr_wks_t *)p->ast->user; - break; - case 12: // PTR - rr.ptr = *(dns_domain_t *)p->ast->user; - break; - case 13: // HINFO - rr.hinfo = *(dns_rr_hinfo_t *)p->ast->user; - break; - case 14: // MINFO - rr.minfo = *(dns_rr_minfo_t *)p->ast->user; - break; - case 15: // MX - rr.mx = *(dns_rr_mx_t *)p->ast->user; - break; - case 16: // TXT - rr.txt = *(dns_rr_txt_t *)p->ast->user; - break; - default: - break; + case 1: rr.a = p->ast->seq->elements[0]->uint; break; + case 2: rr.ns = *(dns_domain_t *)p->ast->user; break; + case 3: rr.md = *(dns_domain_t *)p->ast->user; break; + case 4: rr.md = *(dns_domain_t *)p->ast->user; break; + case 5: rr.cname = *(dns_domain_t *)p->ast->user; break; + case 6: rr.soa = *(dns_rr_soa_t *)p->ast->user; break; + case 7: rr.mb = *(dns_domain_t *)p->ast->user; break; + case 8: rr.mg = *(dns_domain_t *)p->ast->user; break; + case 9: rr.mr = *(dns_domain_t *)p->ast->user; break; + case 10: rr.null = *(dns_rr_null_t *)p->ast->user; break; + case 11: rr.wks = *(dns_rr_wks_t *)p->ast->user; break; + case 12: rr.ptr = *(dns_domain_t *)p->ast->user; break; + case 13: rr.hinfo = *(dns_rr_hinfo_t *)p->ast->user; break; + case 14: rr.minfo = *(dns_rr_minfo_t *)p->ast->user; break; + case 15: rr.mx = *(dns_rr_mx_t *)p->ast->user; break; + case 16: rr.txt = *(dns_rr_txt_t *)p->ast->user; break; + default: break; } } From d0d115f20617cc303ee027255df7d832bdadf3b3 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 23 Jan 2013 16:55:26 +0100 Subject: [PATCH 082/125] wrap only non-null rdata parsers with h_end_p --- examples/rr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/rr.c b/examples/rr.c index 6bfb782..8aa48e6 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -181,8 +181,10 @@ const HParser* init_rdata(uint16_t type) { // All parsers must consume their input exactly. for(uint16_t i; i Date: Wed, 23 Jan 2013 16:57:59 +0100 Subject: [PATCH 083/125] cosmetics --- examples/dns.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 012b7fb..622c62a 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -42,6 +42,7 @@ bool validate_message(HParseResult *p) { return true; } + /// // Semantic Actions /// @@ -200,9 +201,8 @@ const HParsedToken* act_message(const HParseResult *p) { #define act_qname act_index0 - /// -// Parser / Grammar +// Grammar /// const HParser* init_parser() { @@ -259,7 +259,7 @@ const HParser* init_parser() { /// -// Program Logic for a Dummy DNS Server +// Main Program for a Dummy DNS Server /// int start_listening() { From dc037e6c2854ce9c529a192d9dc393c6b7773f43 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 23 Jan 2013 18:16:14 +0100 Subject: [PATCH 084/125] organize definitions in glue.h a little --- examples/glue.h | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/examples/glue.h b/examples/glue.h index 2b60646..7fb9f8a 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -1,12 +1,17 @@ +// +// API additions for writing grammar and semantic actions more concisely +// + #ifndef HAMMER_EXAMPLES_GLUE__H #define HAMMER_EXAMPLES_GLUE__H #include #include "../src/hammer.h" -/// -// API Additions -/// + +// +// Grammar specification +// #define H_RULE(rule, def) const HParser *rule = def #define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) @@ -17,6 +22,11 @@ #define H_AVRULE(rule, def) const HParser *rule = \ h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) + +// +// Pre-fab semantic actions +// + const HParsedToken *h_act_ignore(const HParseResult *p); const HParsedToken *h_act_index(int i, const HParseResult *p); const HParsedToken *h_act_flatten(const HParseResult *p); @@ -28,10 +38,12 @@ const HParsedToken *h_act_flatten(const HParseResult *p); return paction(__VA_ARGS__, p); \ } -const HParsedToken *h_token_flatten(HArena *arena, const HParsedToken *p); -void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); -void h_seq_append(HParsedToken *xs, const HParsedToken *ys); +// +// Working with HParsedTokens +// + +// Token constructors... HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value); HParsedToken *h_make_token_seq(HArena *arena); @@ -42,6 +54,14 @@ HParsedToken *h_make_token_seq(HArena *arena); #define H_MAKE_TOKEN(TYP, VAL) \ h_make_token(p->arena, TT_ ## TYP, VAL) +// Sequences... + +// Flatten nested sequences into one. +const HParsedToken *h_token_flatten(HArena *arena, const HParsedToken *p); + +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); +void h_seq_append(HParsedToken *xs, const HParsedToken *ys); + HParsedToken *h_carray_index(const HCountedArray *a, size_t i); HParsedToken *h_seq_index(const HParsedToken *p, size_t i); void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i); @@ -52,4 +72,5 @@ void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i); #define H_FIELD(TYP, IDX) \ H_SEQ_INDEX(TYP, p->ast, IDX) + #endif From ef17e42ec86c476fee42fab006ab6ff0128e0556 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 23 Jan 2013 18:17:17 +0100 Subject: [PATCH 085/125] rename H_MAKE to H_ALLOC --- examples/dns.c | 10 +++++----- examples/dns_common.c | 2 +- examples/glue.h | 2 +- examples/rr.c | 16 ++++++++-------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 622c62a..e3f36a6 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -102,14 +102,14 @@ const HParsedToken* act_header(const HParseResult *p) { .additional_count = fields[11]->uint }; - dns_header_t *header = H_MAKE(dns_header_t); + dns_header_t *header = H_ALLOC(dns_header_t); *header = header_; return H_MAKE_TOKEN(dns_header_t, header); } const HParsedToken* act_label(const HParseResult *p) { - dns_label_t *r = H_MAKE(dns_label_t); + dns_label_t *r = H_ALLOC(dns_label_t); r->len = p->ast->seq->used; r->label = h_arena_malloc(p->arena, r->len + 1); @@ -121,7 +121,7 @@ const HParsedToken* act_label(const HParseResult *p) { } const HParsedToken* act_rr(const HParseResult *p) { - dns_rr_t *rr = H_MAKE(dns_rr_t); + dns_rr_t *rr = H_ALLOC(dns_rr_t); rr->name = *H_FIELD(dns_domain_t, 0); rr->type = p->ast->seq->elements[1]->uint; @@ -136,7 +136,7 @@ const HParsedToken* act_rr(const HParseResult *p) { } const HParsedToken* act_question(const HParseResult *p) { - dns_question_t *q = H_MAKE(dns_question_t); + dns_question_t *q = H_ALLOC(dns_question_t); HParsedToken **fields = p->ast->seq->elements; // QNAME is a sequence of labels. Pack them into an array. @@ -154,7 +154,7 @@ const HParsedToken* act_question(const HParseResult *p) { const HParsedToken* act_message(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); - dns_message_t *msg = H_MAKE(dns_message_t); + dns_message_t *msg = H_ALLOC(dns_message_t); // Copy header into message struct. dns_header_t *header = H_FIELD(dns_header_t, 0); diff --git a/examples/dns_common.c b/examples/dns_common.c index 3ea5594..0171a59 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -48,7 +48,7 @@ const HParsedToken* act_domain(const HParseResult *p) { } if(arr) { - dns_domain_t *val = H_MAKE(dns_domain_t); // dns_domain_t is char* + dns_domain_t *val = H_ALLOC(dns_domain_t); // dns_domain_t is char* *val = arr; ret = H_MAKE_TOKEN(dns_domain_t, val); } diff --git a/examples/glue.h b/examples/glue.h index 7fb9f8a..67f6907 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -48,7 +48,7 @@ const HParsedToken *h_act_flatten(const HParseResult *p); HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value); HParsedToken *h_make_token_seq(HArena *arena); -#define H_MAKE(TYP) \ +#define H_ALLOC(TYP) \ ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) #define H_MAKE_TOKEN(TYP, VAL) \ diff --git a/examples/rr.c b/examples/rr.c index 8aa48e6..bd4c125 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -18,7 +18,7 @@ bool validate_null(HParseResult *p) { } const HParsedToken *act_null(const HParseResult *p) { - dns_rr_null_t *null = H_MAKE(dns_rr_null_t); + dns_rr_null_t *null = H_ALLOC(dns_rr_null_t); size_t len = p->ast->seq->used; uint8_t *buf = h_arena_malloc(p->arena, sizeof(uint8_t)*len); @@ -29,7 +29,7 @@ const HParsedToken *act_null(const HParseResult *p) { } const HParsedToken *act_txt(const HParseResult *p) { - dns_rr_txt_t *txt = H_MAKE(dns_rr_txt_t); + dns_rr_txt_t *txt = H_ALLOC(dns_rr_txt_t); const HCountedArray *arr = p->ast->seq; uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used); @@ -47,7 +47,7 @@ const HParsedToken *act_txt(const HParseResult *p) { } const HParsedToken* act_cstr(const HParseResult *p) { - dns_cstr_t *cs = H_MAKE(dns_cstr_t); + dns_cstr_t *cs = H_ALLOC(dns_cstr_t); const HCountedArray *arr = p->ast->seq; uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used); @@ -60,7 +60,7 @@ const HParsedToken* act_cstr(const HParseResult *p) { } const HParsedToken* act_soa(const HParseResult *p) { - dns_rr_soa_t *soa = H_MAKE(dns_rr_soa_t); + dns_rr_soa_t *soa = H_ALLOC(dns_rr_soa_t); soa->mname = *H_FIELD(dns_domain_t, 0); soa->rname = *H_FIELD(dns_domain_t, 1); @@ -74,7 +74,7 @@ const HParsedToken* act_soa(const HParseResult *p) { } const HParsedToken* act_wks(const HParseResult *p) { - dns_rr_wks_t *wks = H_MAKE(dns_rr_wks_t); + dns_rr_wks_t *wks = H_ALLOC(dns_rr_wks_t); wks->address = p->ast->seq->elements[0]->uint; wks->protocol = p->ast->seq->elements[1]->uint; @@ -87,7 +87,7 @@ const HParsedToken* act_wks(const HParseResult *p) { } const HParsedToken* act_hinfo(const HParseResult *p) { - dns_rr_hinfo_t *hinfo = H_MAKE(dns_rr_hinfo_t); + dns_rr_hinfo_t *hinfo = H_ALLOC(dns_rr_hinfo_t); hinfo->cpu = *H_FIELD(dns_cstr_t, 0); hinfo->os = *H_FIELD(dns_cstr_t, 1); @@ -96,7 +96,7 @@ const HParsedToken* act_hinfo(const HParseResult *p) { } const HParsedToken* act_minfo(const HParseResult *p) { - dns_rr_minfo_t *minfo = H_MAKE(dns_rr_minfo_t); + dns_rr_minfo_t *minfo = H_ALLOC(dns_rr_minfo_t); minfo->rmailbx = *H_FIELD(dns_domain_t, 0); minfo->emailbx = *H_FIELD(dns_domain_t, 1); @@ -105,7 +105,7 @@ const HParsedToken* act_minfo(const HParseResult *p) { } const HParsedToken* act_mx(const HParseResult *p) { - dns_rr_mx_t *mx = H_MAKE(dns_rr_mx_t); + dns_rr_mx_t *mx = H_ALLOC(dns_rr_mx_t); mx->preference = p->ast->seq->elements[0]->uint; mx->exchange = *H_FIELD(dns_domain_t, 1); From 4c8b0086d7771616f02e39cc1c512ea6b165a5e5 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 23 Jan 2013 18:28:50 +0100 Subject: [PATCH 086/125] rename H_MAKE_TOKEN to H_MAKE and remove 'token' from other function names --- examples/dns.c | 10 +++++----- examples/dns_common.c | 2 +- examples/glue.c | 20 ++++++++++---------- examples/glue.h | 10 +++++----- examples/rr.c | 16 ++++++++-------- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index e3f36a6..c8e3faf 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -105,7 +105,7 @@ const HParsedToken* act_header(const HParseResult *p) { dns_header_t *header = H_ALLOC(dns_header_t); *header = header_; - return H_MAKE_TOKEN(dns_header_t, header); + return H_MAKE(dns_header_t, header); } const HParsedToken* act_label(const HParseResult *p) { @@ -117,7 +117,7 @@ const HParsedToken* act_label(const HParseResult *p) { r->label[i] = p->ast->seq->elements[i]->uint; r->label[r->len] = 0; - return H_MAKE_TOKEN(dns_label_t, r); + return H_MAKE(dns_label_t, r); } const HParsedToken* act_rr(const HParseResult *p) { @@ -132,7 +132,7 @@ const HParsedToken* act_rr(const HParseResult *p) { // Parse and pack RDATA. set_rdata(*rr, p->ast->seq->elements[4]->seq); - return H_MAKE_TOKEN(dns_rr_t, rr); + return H_MAKE(dns_rr_t, rr); } const HParsedToken* act_question(const HParseResult *p) { @@ -149,7 +149,7 @@ const HParsedToken* act_question(const HParseResult *p) { q->qtype = fields[1]->uint; q->qclass = fields[2]->uint; - return H_MAKE_TOKEN(dns_question_t, q); + return H_MAKE(dns_question_t, q); } const HParsedToken* act_message(const HParseResult *p) { @@ -194,7 +194,7 @@ const HParsedToken* act_message(const HParseResult *p) { } msg->additional = additional; - return H_MAKE_TOKEN(dns_message_t, msg); + return H_MAKE(dns_message_t, msg); } #define act_hdzero h_act_ignore diff --git a/examples/dns_common.c b/examples/dns_common.c index 0171a59..76915b6 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -50,7 +50,7 @@ const HParsedToken* act_domain(const HParseResult *p) { if(arr) { dns_domain_t *val = H_ALLOC(dns_domain_t); // dns_domain_t is char* *val = arr; - ret = H_MAKE_TOKEN(dns_domain_t, val); + ret = H_MAKE(dns_domain_t, val); } return ret; diff --git a/examples/glue.c b/examples/glue.c index 80864fc..1c86b15 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -48,16 +48,16 @@ void h_seq_append(HParsedToken *xs, const HParsedToken *ys) // Flatten nested sequences. Always returns a sequence. // If input element is not a sequence, returns it as a singleton sequence. -const HParsedToken *h_token_flatten(HArena *arena, const HParsedToken *p) +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) { assert(p != NULL); - HParsedToken *ret = h_make_token_seq(arena); + HParsedToken *ret = h_make_seq(arena); switch(p->token_type) { case TT_SEQUENCE: // Flatten and append all. for(size_t i; iseq->used; i++) { - h_seq_append(ret, h_token_flatten(arena, h_seq_index(p, i))); + h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); } break; default: @@ -69,26 +69,26 @@ const HParsedToken *h_token_flatten(HArena *arena, const HParsedToken *p) return ret; } -// Action version of h_token_flatten. +// Action version of h_seq_flatten. const HParsedToken *h_act_flatten(const HParseResult *p) { - return h_token_flatten(p->arena, p->ast); + return h_seq_flatten(p->arena, p->ast); } -HParsedToken *h_make_token_(HArena *arena, HTokenType type) +HParsedToken *h_make_(HArena *arena, HTokenType type) { HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); ret->token_type = type; return ret; } -HParsedToken *h_make_token_seq(HArena *arena) +HParsedToken *h_make_seq(HArena *arena) { - return h_make_token_(arena, TT_SEQUENCE); + return h_make_(arena, TT_SEQUENCE); } -HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value) +HParsedToken *h_make(HArena *arena, HTokenType type, void *value) { - HParsedToken *ret = h_make_token_(arena, type); + HParsedToken *ret = h_make_(arena, type); ret->user = value; return ret; } diff --git a/examples/glue.h b/examples/glue.h index 67f6907..dac8fbb 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -45,19 +45,19 @@ const HParsedToken *h_act_flatten(const HParseResult *p); // Token constructors... -HParsedToken *h_make_token(HArena *arena, HTokenType type, void *value); -HParsedToken *h_make_token_seq(HArena *arena); +HParsedToken *h_make(HArena *arena, HTokenType type, void *value); +HParsedToken *h_make_seq(HArena *arena); #define H_ALLOC(TYP) \ ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) -#define H_MAKE_TOKEN(TYP, VAL) \ - h_make_token(p->arena, TT_ ## TYP, VAL) +#define H_MAKE(TYP, VAL) \ + h_make(p->arena, TT_ ## TYP, VAL) // Sequences... // Flatten nested sequences into one. -const HParsedToken *h_token_flatten(HArena *arena, const HParsedToken *p); +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); void h_seq_append(HParsedToken *xs, const HParsedToken *ys); diff --git a/examples/rr.c b/examples/rr.c index bd4c125..94c4b7c 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -25,7 +25,7 @@ const HParsedToken *act_null(const HParseResult *p) { for (size_t i=0; iast->seq->elements[i]->uint; - return H_MAKE_TOKEN(dns_rr_null_t, null); + return H_MAKE(dns_rr_null_t, null); } const HParsedToken *act_txt(const HParseResult *p) { @@ -43,7 +43,7 @@ const HParsedToken *act_txt(const HParseResult *p) { txt->count = p->ast->seq->elements[0]->seq->used; txt->txt_data = ret; - return H_MAKE_TOKEN(dns_rr_txt_t, txt); + return H_MAKE(dns_rr_txt_t, txt); } const HParsedToken* act_cstr(const HParseResult *p) { @@ -56,7 +56,7 @@ const HParsedToken* act_cstr(const HParseResult *p) { assert(ret[arr->used-1] == '\0'); // XXX Is this right?! If so, shouldn't it be a validation? *cs = ret; - return H_MAKE_TOKEN(dns_cstr_t, cs); + return H_MAKE(dns_cstr_t, cs); } const HParsedToken* act_soa(const HParseResult *p) { @@ -70,7 +70,7 @@ const HParsedToken* act_soa(const HParseResult *p) { soa->expire = p->ast->seq->elements[5]->uint; soa->minimum = p->ast->seq->elements[6]->uint; - return H_MAKE_TOKEN(dns_rr_soa_t, soa); + return H_MAKE(dns_rr_soa_t, soa); } const HParsedToken* act_wks(const HParseResult *p) { @@ -83,7 +83,7 @@ const HParsedToken* act_wks(const HParseResult *p) { for (size_t i=0; ilen; ++i) wks->bit_map[i] = p->ast->seq->elements[2]->seq->elements[i]->uint; - return H_MAKE_TOKEN(dns_rr_wks_t, wks); + return H_MAKE(dns_rr_wks_t, wks); } const HParsedToken* act_hinfo(const HParseResult *p) { @@ -92,7 +92,7 @@ const HParsedToken* act_hinfo(const HParseResult *p) { hinfo->cpu = *H_FIELD(dns_cstr_t, 0); hinfo->os = *H_FIELD(dns_cstr_t, 1); - return H_MAKE_TOKEN(dns_rr_hinfo_t, hinfo); + return H_MAKE(dns_rr_hinfo_t, hinfo); } const HParsedToken* act_minfo(const HParseResult *p) { @@ -101,7 +101,7 @@ const HParsedToken* act_minfo(const HParseResult *p) { minfo->rmailbx = *H_FIELD(dns_domain_t, 0); minfo->emailbx = *H_FIELD(dns_domain_t, 1); - return H_MAKE_TOKEN(dns_rr_minfo_t, minfo); + return H_MAKE(dns_rr_minfo_t, minfo); } const HParsedToken* act_mx(const HParseResult *p) { @@ -110,7 +110,7 @@ const HParsedToken* act_mx(const HParseResult *p) { mx->preference = p->ast->seq->elements[0]->uint; mx->exchange = *H_FIELD(dns_domain_t, 1); - return H_MAKE_TOKEN(dns_rr_mx_t, mx); + return H_MAKE(dns_rr_mx_t, mx); } From 2d53a0f873d34c0edae0216312a46ef997effdaa Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 23 Jan 2013 18:30:55 +0100 Subject: [PATCH 087/125] mark h_carray_index as meant to be internal --- examples/glue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/glue.h b/examples/glue.h index dac8fbb..c8b17f3 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -62,7 +62,7 @@ const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); void h_seq_append(HParsedToken *xs, const HParsedToken *ys); -HParsedToken *h_carray_index(const HCountedArray *a, size_t i); +HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal HParsedToken *h_seq_index(const HParsedToken *p, size_t i); void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i); From 2c90bd3ef5d68d9c9073a8841fe5452ec3fffbb4 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 23 Jan 2013 19:06:25 +0100 Subject: [PATCH 088/125] rename: h_seq_index_user -> h_seq_index -> h_seq_index_token --- examples/glue.c | 8 ++++---- examples/glue.h | 38 ++++++++++++++++++++++---------------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/examples/glue.c b/examples/glue.c index 1c86b15..778f356 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -57,7 +57,7 @@ const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) case TT_SEQUENCE: // Flatten and append all. for(size_t i; iseq->used; i++) { - h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); + h_seq_append(ret, h_seq_flatten(arena, h_seq_index_token(p, i))); } break; default: @@ -99,16 +99,16 @@ HParsedToken *h_carray_index(const HCountedArray *a, size_t i) return a->elements[i]; } -HParsedToken *h_seq_index(const HParsedToken *p, size_t i) +HParsedToken *h_seq_index_token(const HParsedToken *p, size_t i) { assert(p != NULL); assert(p->token_type == TT_SEQUENCE); return h_carray_index(p->seq, i); } -void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i) +void *h_seq_index(HTokenType type, const HParsedToken *p, size_t i) { - HParsedToken *elem = h_seq_index(p, i); + HParsedToken *elem = h_seq_index_token(p, i); assert(elem->token_type == (HTokenType)type); return elem->user; } diff --git a/examples/glue.h b/examples/glue.h index c8b17f3..5716197 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -43,34 +43,40 @@ const HParsedToken *h_act_flatten(const HParseResult *p); // Working with HParsedTokens // +// Standard short-hand for arena-allocating a variable in a semantic action. +#define H_ALLOC(TYP) \ + ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) + // Token constructors... HParsedToken *h_make(HArena *arena, HTokenType type, void *value); HParsedToken *h_make_seq(HArena *arena); -#define H_ALLOC(TYP) \ - ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) - -#define H_MAKE(TYP, VAL) \ - h_make(p->arena, TT_ ## TYP, VAL) +// Standard short-hand to make a user-type token. +#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) // Sequences... +// Access a sequence element by index. +HParsedToken *h_seq_index_token(const HParsedToken *p, size_t i); + +// Access a user-type element of a sequence by index. +#define H_SEQ_INDEX(TYP, SEQ, IDX) \ + ((TYP *) h_seq_index(TT_ ## TYP, SEQ, IDX)) + +// Standard short-hand to access a user-type field on a sequence token. +#define H_FIELD(TYP, IDX) H_SEQ_INDEX(TYP, p->ast, IDX) + +// Append elements to a sequence. +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one +void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many + // Flatten nested sequences into one. const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); -void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); -void h_seq_append(HParsedToken *xs, const HParsedToken *ys); - +// Helpers for implementing H_SEQ_INDEX. +void *h_seq_index(HTokenType type, const HParsedToken *p, size_t i); // XXX helper HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal -HParsedToken *h_seq_index(const HParsedToken *p, size_t i); -void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i); - -#define H_SEQ_INDEX(TYP, SEQ, IDX) \ - ((TYP *) h_seq_index_user(TT_ ## TYP, SEQ, IDX)) - -#define H_FIELD(TYP, IDX) \ - H_SEQ_INDEX(TYP, p->ast, IDX) #endif From 3df4030bf821ef33539594c4e3685aa0ce477472 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 23 Jan 2013 19:10:36 +0100 Subject: [PATCH 089/125] cosmetics --- examples/glue.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/glue.h b/examples/glue.h index 5716197..d428e06 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -50,12 +50,12 @@ const HParsedToken *h_act_flatten(const HParseResult *p); // Token constructors... HParsedToken *h_make(HArena *arena, HTokenType type, void *value); -HParsedToken *h_make_seq(HArena *arena); +HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. // Standard short-hand to make a user-type token. #define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) -// Sequences... +// Sequence access... // Access a sequence element by index. HParsedToken *h_seq_index_token(const HParsedToken *p, size_t i); @@ -67,6 +67,12 @@ HParsedToken *h_seq_index_token(const HParsedToken *p, size_t i); // Standard short-hand to access a user-type field on a sequence token. #define H_FIELD(TYP, IDX) H_SEQ_INDEX(TYP, p->ast, IDX) +// Lower-level helper for H_SEQ_INDEX. +void *h_seq_index(HTokenType type, const HParsedToken *p, size_t i); +HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal + +// Sequence modification... + // Append elements to a sequence. void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many @@ -74,9 +80,5 @@ void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many // Flatten nested sequences into one. const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); -// Helpers for implementing H_SEQ_INDEX. -void *h_seq_index(HTokenType type, const HParsedToken *p, size_t i); // XXX helper -HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal - #endif From 54046e25dae73783701602e6d6d101adb1d050c4 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 23 Jan 2013 19:24:13 +0100 Subject: [PATCH 090/125] forgot to allocate the carray in h_make_seq (oops) --- examples/glue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/glue.c b/examples/glue.c index 778f356..29b8de7 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -83,7 +83,9 @@ HParsedToken *h_make_(HArena *arena, HTokenType type) HParsedToken *h_make_seq(HArena *arena) { - return h_make_(arena, TT_SEQUENCE); + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new(arena); + return ret; } HParsedToken *h_make(HArena *arena, HTokenType type, void *value) From 71cdd46cebac7862fb0e788ba7555df1a7401e89 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 23 Jan 2013 19:29:29 +0100 Subject: [PATCH 091/125] expand h_make family to include seq, bytes, sint, and uint tokens --- examples/glue.c | 31 ++++++++++++++++++++++++++++--- examples/glue.h | 11 +++++++++-- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/examples/glue.c b/examples/glue.c index 29b8de7..4cc85d0 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -74,6 +74,7 @@ const HParsedToken *h_act_flatten(const HParseResult *p) { return h_seq_flatten(p->arena, p->ast); } +// Low-level helper for the h_make family. HParsedToken *h_make_(HArena *arena, HTokenType type) { HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); @@ -81,6 +82,14 @@ HParsedToken *h_make_(HArena *arena, HTokenType type) return ret; } +HParsedToken *h_make(HArena *arena, HTokenType type, void *value) +{ + assert(type >= TT_USER); + HParsedToken *ret = h_make_(arena, type); + ret->user = value; + return ret; +} + HParsedToken *h_make_seq(HArena *arena) { HParsedToken *ret = h_make_(arena, TT_SEQUENCE); @@ -88,13 +97,29 @@ HParsedToken *h_make_seq(HArena *arena) return ret; } -HParsedToken *h_make(HArena *arena, HTokenType type, void *value) +HParsedToken *h_make_bytes(HArena *arena, size_t len) { - HParsedToken *ret = h_make_(arena, type); - ret->user = value; + HParsedToken *ret = h_make_(arena, TT_BYTES); + ret->bytes.len = len; + ret->bytes.token = h_arena_malloc(arena, len); return ret; } +HParsedToken *h_make_sint(HArena *arena, int64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_SINT); + ret->sint = val; + return ret; +} + +HParsedToken *h_make_uint(HArena *arena, uint64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_UINT); + ret->uint = val; + return ret; +} + +// XXX -> internal HParsedToken *h_carray_index(const HCountedArray *a, size_t i) { assert(i < a->used); diff --git a/examples/glue.h b/examples/glue.h index d428e06..9b11315 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -51,9 +51,16 @@ const HParsedToken *h_act_flatten(const HParseResult *p); HParsedToken *h_make(HArena *arena, HTokenType type, void *value); HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. +HParsedToken *h_make_bytes(HArena *arena, size_t len); +HParsedToken *h_make_sint(HArena *arena, int64_t val); +HParsedToken *h_make_uint(HArena *arena, uint64_t val); -// Standard short-hand to make a user-type token. -#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) +// Standard short-hands to make tokens in an action. +#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) +#define H_MAKE_SEQ() h_make_seq(p->arena) +#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) +#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) +#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) // Sequence access... From 70aaf1308cc6afb8e89a1464de40f8a2b2afcd26 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 18:12:00 +0100 Subject: [PATCH 092/125] massage glue api around some, add a bunch of dummy declarations --- examples/dns.c | 10 +++++----- examples/glue.c | 8 ++++---- examples/glue.h | 42 +++++++++++++++++++++++++++++++----------- src/hammer.h | 10 ++++++---- 4 files changed, 46 insertions(+), 24 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index c8e3faf..c126812 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -143,7 +143,7 @@ const HParsedToken* act_question(const HParseResult *p) { q->qname.qlen = fields[0]->seq->used; q->qname.labels = h_arena_malloc(p->arena, sizeof(dns_label_t)*q->qname.qlen); for(size_t i=0; iseq->used; i++) { - q->qname.labels[i] = *H_SEQ_INDEX(dns_label_t, fields[0], i); + q->qname.labels[i] = *H_INDEX(dns_label_t, fields[0], i); } q->qtype = fields[1]->uint; @@ -165,7 +165,7 @@ const HParsedToken* act_message(const HParseResult *p) { struct dns_question *questions = h_arena_malloc(p->arena, sizeof(struct dns_question)*(header->question_count)); for (size_t i=0; iquestion_count; ++i) { - questions[i] = *H_SEQ_INDEX(dns_question_t, qs, i); + questions[i] = *H_INDEX(dns_question_t, qs, i); } msg->questions = questions; @@ -174,7 +174,7 @@ const HParsedToken* act_message(const HParseResult *p) { struct dns_rr *answers = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->answer_count)); for (size_t i=0; ianswer_count; ++i) { - answers[i] = *H_SEQ_INDEX(dns_rr_t, rrs, i); + answers[i] = *H_INDEX(dns_rr_t, rrs, i); } msg->answers = answers; @@ -182,7 +182,7 @@ const HParsedToken* act_message(const HParseResult *p) { struct dns_rr *authority = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->authority_count)); for (size_t i=0, j=header->answer_count; iauthority_count; ++i, ++j) { - authority[i] = *H_SEQ_INDEX(dns_rr_t, rrs, j); + authority[i] = *H_INDEX(dns_rr_t, rrs, j); } msg->authority = authority; @@ -190,7 +190,7 @@ const HParsedToken* act_message(const HParseResult *p) { struct dns_rr *additional = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->additional_count)); for (size_t i=0, j=header->answer_count+header->authority_count; iadditional_count; ++i, ++j) { - additional[i] = *H_SEQ_INDEX(dns_rr_t, rrs, j); + additional[i] = *H_INDEX(dns_rr_t, rrs, j); } msg->additional = additional; diff --git a/examples/glue.c b/examples/glue.c index 4cc85d0..6fe3392 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -57,7 +57,7 @@ const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) case TT_SEQUENCE: // Flatten and append all. for(size_t i; iseq->used; i++) { - h_seq_append(ret, h_seq_flatten(arena, h_seq_index_token(p, i))); + h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); } break; default: @@ -126,16 +126,16 @@ HParsedToken *h_carray_index(const HCountedArray *a, size_t i) return a->elements[i]; } -HParsedToken *h_seq_index_token(const HParsedToken *p, size_t i) +HParsedToken *h_seq_index(const HParsedToken *p, size_t i) { assert(p != NULL); assert(p->token_type == TT_SEQUENCE); return h_carray_index(p->seq, i); } -void *h_seq_index(HTokenType type, const HParsedToken *p, size_t i) +void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i) { - HParsedToken *elem = h_seq_index_token(p, i); + HParsedToken *elem = h_seq_index(p, i); assert(elem->token_type == (HTokenType)type); return elem->user; } diff --git a/examples/glue.h b/examples/glue.h index 9b11315..4712cfc 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -44,8 +44,7 @@ const HParsedToken *h_act_flatten(const HParseResult *p); // // Standard short-hand for arena-allocating a variable in a semantic action. -#define H_ALLOC(TYP) \ - ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) +#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) // Token constructors... @@ -62,28 +61,49 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val); #define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) #define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) +// Extract type-specific value back from HParsedTokens... + +void * h_cast (HTokenType type, const HParsedToken *p); +HCountedArray *h_cast_seq (const HParsedToken *p); +HBytes h_cast_bytes(const HParsedToken *p); +int64_t h_cast_sint (const HParsedToken *p); +uint64_t h_cast_uint (const HParsedToken *p); + +// Standard short-hand to cast to a user type. +#define H_CAST(TYP, TOK) ((TYP *) h_cast(TT_ ## TYP, TOK)) + // Sequence access... // Access a sequence element by index. -HParsedToken *h_seq_index_token(const HParsedToken *p, size_t i); +HParsedToken * h_seq_index(const HParsedToken *p, size_t i); -// Access a user-type element of a sequence by index. -#define H_SEQ_INDEX(TYP, SEQ, IDX) \ - ((TYP *) h_seq_index(TT_ ## TYP, SEQ, IDX)) +// Convenience functions combining index access and h_cast_*. +HCountedArray *h_seq_index_seq (const HParsedToken *p, size_t i); +HBytes h_seq_index_bytes(const HParsedToken *p, size_t i); +int64_t h_seq_index_sint (const HParsedToken *p, size_t i); +uint64_t h_seq_index_uint (const HParsedToken *p, size_t i); +void * h_seq_index_user (HTokenType type, const HParsedToken *p, size_t i); -// Standard short-hand to access a user-type field on a sequence token. -#define H_FIELD(TYP, IDX) H_SEQ_INDEX(TYP, p->ast, IDX) +// Standard short-hand to access and cast a user-type sequence element. +#define H_INDEX(TYP, SEQ, IDX) \ + ((TYP *) h_seq_index_user(TT_ ## TYP, SEQ, IDX)) -// Lower-level helper for H_SEQ_INDEX. -void *h_seq_index(HTokenType type, const HParsedToken *p, size_t i); +// Standard short-hand to access and cast a user-type field on a sequence token. +#define H_FIELD(TYP, IDX) H_INDEX(TYP, p->ast, IDX) + +// Lower-level helper for h_seq_index. HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal // Sequence modification... -// Append elements to a sequence. +// Add elements to a sequence. +void h_seq_cons(const HParsedToken *x, HParsedToken *xs); // prepend one void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one +void h_seq_prepend(const HParsedToken *xs, HParsedToken *ys); // prepend many void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many +// XXX TODO: Remove elements from a sequence. + // Flatten nested sequences into one. const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); diff --git a/src/hammer.h b/src/hammer.h index 5a0c625..7ef2883 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -54,13 +54,15 @@ typedef struct HCountedArray_ { struct HParsedToken_ **elements; } HCountedArray; +typedef struct HBytes_ { + const uint8_t *token; + size_t len; +} HBytes; + typedef struct HParsedToken_ { HTokenType token_type; union { - struct { - const uint8_t *token; - size_t len; - } bytes; + HBytes bytes; int64_t sint; uint64_t uint; double dbl; From d6ef9ac526f417b9f6f0b60756bf8c19cc7a68c2 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 18:32:01 +0100 Subject: [PATCH 093/125] add definitions for h_cast family of functions --- examples/glue.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/examples/glue.c b/examples/glue.c index 6fe3392..52ffd89 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -119,6 +119,36 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val) return ret; } +void * h_cast(HTokenType type, const HParsedToken *p) +{ + assert(p->token_type == type); + return p->user; +} + +HCountedArray *h_cast_seq (const HParsedToken *p) +{ + assert(p->token_type == TT_SEQUENCE); + return p->seq; +} + +HBytes h_cast_bytes(const HParsedToken *p) +{ + assert(p->token_type == TT_BYTES); + return p->bytes; +} + +int64_t h_cast_sint (const HParsedToken *p) +{ + assert(p->token_type == TT_SINT); + return p->sint; +} + +uint64_t h_cast_uint (const HParsedToken *p) +{ + assert(p->token_type == TT_UINT); + return p->uint; +} + // XXX -> internal HParsedToken *h_carray_index(const HCountedArray *a, size_t i) { From 0b4002c697102ce0ca1ca6ae413df1548fbd2307 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 18:37:04 +0100 Subject: [PATCH 094/125] add definitions for h_seq_index family of functions --- examples/glue.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/examples/glue.c b/examples/glue.c index 52ffd89..01eb08c 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -125,7 +125,7 @@ void * h_cast(HTokenType type, const HParsedToken *p) return p->user; } -HCountedArray *h_cast_seq (const HParsedToken *p) +HCountedArray *h_cast_seq(const HParsedToken *p) { assert(p->token_type == TT_SEQUENCE); return p->seq; @@ -137,13 +137,13 @@ HBytes h_cast_bytes(const HParsedToken *p) return p->bytes; } -int64_t h_cast_sint (const HParsedToken *p) +int64_t h_cast_sint(const HParsedToken *p) { assert(p->token_type == TT_SINT); return p->sint; } -uint64_t h_cast_uint (const HParsedToken *p) +uint64_t h_cast_uint(const HParsedToken *p) { assert(p->token_type == TT_UINT); return p->uint; @@ -163,9 +163,27 @@ HParsedToken *h_seq_index(const HParsedToken *p, size_t i) return h_carray_index(p->seq, i); } +HCountedArray *h_seq_index_seq(const HParsedToken *p, size_t i) +{ + return h_cast_seq(h_seq_index(p, i)); +} + +HBytes h_seq_index_bytes(const HParsedToken *p, size_t i) +{ + return h_cast_bytes(h_seq_index(p, i)); +} + +int64_t h_seq_index_sint(const HParsedToken *p, size_t i) +{ + return h_cast_sint(h_seq_index(p, i)); +} + +uint64_t h_seq_index_uint(const HParsedToken *p, size_t i) +{ + return h_cast_uint(h_seq_index(p, i)); +} + void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i) { - HParsedToken *elem = h_seq_index(p, i); - assert(elem->token_type == (HTokenType)type); - return elem->user; + return h_cast(type, h_seq_index(p, i)); } From dbb77c0c551364c291ebb4a9908aab2011fc9654 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 18:42:45 +0100 Subject: [PATCH 095/125] ah, it's not an slist, forget about cons and prepend for now --- examples/glue.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/glue.h b/examples/glue.h index 4712cfc..5fdabc3 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -97,9 +97,7 @@ HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> intern // Sequence modification... // Add elements to a sequence. -void h_seq_cons(const HParsedToken *x, HParsedToken *xs); // prepend one void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one -void h_seq_prepend(const HParsedToken *xs, HParsedToken *ys); // prepend many void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many // XXX TODO: Remove elements from a sequence. From 118c03c5346c33768dff6c07f9bad6a346b52291 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 18:43:40 +0100 Subject: [PATCH 096/125] move h_seq modification functions around to match the order in the header --- examples/glue.c | 84 ++++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/examples/glue.c b/examples/glue.c index 01eb08c..49bda56 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -27,48 +27,6 @@ const HParsedToken *h_act_index(int i, const HParseResult *p) return tok->seq->elements[i]; } -void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) -{ - assert(xs != NULL); - assert(xs->token_type == TT_SEQUENCE); - - h_carray_append(xs->seq, (void *)x); -} - -void h_seq_append(HParsedToken *xs, const HParsedToken *ys) -{ - assert(xs != NULL); - assert(xs->token_type == TT_SEQUENCE); - assert(ys != NULL); - assert(ys->token_type == TT_SEQUENCE); - - for(size_t i; iseq->used; i++) - h_carray_append(xs->seq, ys->seq->elements[i]); -} - -// Flatten nested sequences. Always returns a sequence. -// If input element is not a sequence, returns it as a singleton sequence. -const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) -{ - assert(p != NULL); - - HParsedToken *ret = h_make_seq(arena); - switch(p->token_type) { - case TT_SEQUENCE: - // Flatten and append all. - for(size_t i; iseq->used; i++) { - h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); - } - break; - default: - // Make singleton sequence. - h_seq_snoc(ret, p); - break; - } - - return ret; -} - // Action version of h_seq_flatten. const HParsedToken *h_act_flatten(const HParseResult *p) { return h_seq_flatten(p->arena, p->ast); @@ -187,3 +145,45 @@ void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i) { return h_cast(type, h_seq_index(p, i)); } + +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + + h_carray_append(xs->seq, (HParsedToken *)x); +} + +void h_seq_append(HParsedToken *xs, const HParsedToken *ys) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + assert(ys != NULL); + assert(ys->token_type == TT_SEQUENCE); + + for(size_t i; iseq->used; i++) + h_carray_append(xs->seq, ys->seq->elements[i]); +} + +// Flatten nested sequences. Always returns a sequence. +// If input element is not a sequence, returns it as a singleton sequence. +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) +{ + assert(p != NULL); + + HParsedToken *ret = h_make_seq(arena); + switch(p->token_type) { + case TT_SEQUENCE: + // Flatten and append all. + for(size_t i; iseq->used; i++) { + h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); + } + break; + default: + // Make singleton sequence. + h_seq_snoc(ret, p); + break; + } + + return ret; +} From b58b82b4735d3045b9b7ec05fee01f123bd2c048 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 18:52:07 +0100 Subject: [PATCH 097/125] add h_seq_len --- examples/glue.c | 7 +++++++ examples/glue.h | 3 +++ 2 files changed, 10 insertions(+) diff --git a/examples/glue.c b/examples/glue.c index 49bda56..de5f1e7 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -114,6 +114,13 @@ HParsedToken *h_carray_index(const HCountedArray *a, size_t i) return a->elements[i]; } +size_t h_seq_len(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->used; +} + HParsedToken *h_seq_index(const HParsedToken *p, size_t i) { assert(p != NULL); diff --git a/examples/glue.h b/examples/glue.h index 5fdabc3..4f69f51 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -74,6 +74,9 @@ uint64_t h_cast_uint (const HParsedToken *p); // Sequence access... +// Return the length of a sequence. +size_t h_seq_len(const HParsedToken *p); + // Access a sequence element by index. HParsedToken * h_seq_index(const HParsedToken *p, size_t i); From 6e166174d6c10a927c7c6edaadb410e6dd74e068 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 19:06:28 +0100 Subject: [PATCH 098/125] add H_FIELD variants for built-in types --- examples/glue.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/glue.h b/examples/glue.h index 4f69f51..568cbda 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -91,8 +91,12 @@ void * h_seq_index_user (HTokenType type, const HParsedToken *p, size_t #define H_INDEX(TYP, SEQ, IDX) \ ((TYP *) h_seq_index_user(TT_ ## TYP, SEQ, IDX)) -// Standard short-hand to access and cast a user-type field on a sequence token. +// Standard short-hand to access and cast elements on a sequence token. #define H_FIELD(TYP, IDX) H_INDEX(TYP, p->ast, IDX) +#define H_FIELD_SEQ(IDX) h_seq_index_seq(p->ast, IDX) +#define H_FIELD_BYTES(IDX) h_seq_index_bytes(p->ast, IDX) +#define H_FIELD_SINT(IDX) h_seq_index_sint(p->ast, IDX) +#define H_FIELD_UINT(IDX) h_seq_index_uint(p->ast, IDX) // Lower-level helper for h_seq_index. HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal From 0e37c6b8a16346dafe7d8d3eb4a3a0d86a57d41f Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 19:11:02 +0100 Subject: [PATCH 099/125] apply some simplifications to dns example --- examples/dns.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index c126812..381be6a 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -28,17 +28,18 @@ bool validate_hdzero(HParseResult *p) { bool validate_message(HParseResult *p) { if (TT_SEQUENCE != p->ast->token_type) return false; + dns_header_t *header = H_FIELD(dns_header_t, 0); size_t qd = header->question_count; size_t an = header->answer_count; size_t ns = header->authority_count; size_t ar = header->additional_count; - HParsedToken *questions = p->ast->seq->elements[1]; - if (questions->seq->used != qd) + + if (H_FIELD_SEQ(1)->used != qd) return false; - HParsedToken *rrs = p->ast->seq->elements[2]; - if (an+ns+ar != rrs->seq->used) + if (an+ns+ar != H_FIELD_SEQ(2)->used) return false; + return true; } @@ -51,7 +52,7 @@ bool validate_message(HParseResult *p) { void set_rdata(struct dns_rr rr, HCountedArray *rdata) { uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used); for (size_t i=0; iused; ++i) - data[i] = rdata->elements[i]->uint; + data[i] = h_cast_uint(rdata->elements[i]); // Parse RDATA if possible. const HParseResult *p = NULL; From 690a17b1f6d7774848a68482ab0f79b74d6f297b Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 19:12:01 +0100 Subject: [PATCH 100/125] fix semantic action on A records --- examples/dns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/dns.c b/examples/dns.c index 381be6a..3125b39 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -66,7 +66,7 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { // Pack the parsed rdata into rr. switch(rr.type) { - case 1: rr.a = p->ast->seq->elements[0]->uint; break; + case 1: rr.a = h_cast_uint(p->ast); break; case 2: rr.ns = *(dns_domain_t *)p->ast->user; break; case 3: rr.md = *(dns_domain_t *)p->ast->user; break; case 4: rr.md = *(dns_domain_t *)p->ast->user; break; From 2f312b8c1f94587392cc16f5d4f833754e7beb8f Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 19:16:25 +0100 Subject: [PATCH 101/125] use H_CAST in set_rdata --- examples/dns.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 3125b39..fb33171 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -67,21 +67,21 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { // Pack the parsed rdata into rr. switch(rr.type) { case 1: rr.a = h_cast_uint(p->ast); break; - case 2: rr.ns = *(dns_domain_t *)p->ast->user; break; - case 3: rr.md = *(dns_domain_t *)p->ast->user; break; - case 4: rr.md = *(dns_domain_t *)p->ast->user; break; - case 5: rr.cname = *(dns_domain_t *)p->ast->user; break; - case 6: rr.soa = *(dns_rr_soa_t *)p->ast->user; break; - case 7: rr.mb = *(dns_domain_t *)p->ast->user; break; - case 8: rr.mg = *(dns_domain_t *)p->ast->user; break; - case 9: rr.mr = *(dns_domain_t *)p->ast->user; break; - case 10: rr.null = *(dns_rr_null_t *)p->ast->user; break; - case 11: rr.wks = *(dns_rr_wks_t *)p->ast->user; break; - case 12: rr.ptr = *(dns_domain_t *)p->ast->user; break; - case 13: rr.hinfo = *(dns_rr_hinfo_t *)p->ast->user; break; - case 14: rr.minfo = *(dns_rr_minfo_t *)p->ast->user; break; - case 15: rr.mx = *(dns_rr_mx_t *)p->ast->user; break; - case 16: rr.txt = *(dns_rr_txt_t *)p->ast->user; break; + case 2: rr.ns = *H_CAST(dns_domain_t, p->ast); break; + case 3: rr.md = *H_CAST(dns_domain_t, p->ast); break; + case 4: rr.md = *H_CAST(dns_domain_t, p->ast); break; + case 5: rr.cname = *H_CAST(dns_domain_t, p->ast); break; + case 6: rr.soa = *H_CAST(dns_rr_soa_t, p->ast); break; + case 7: rr.mb = *H_CAST(dns_domain_t, p->ast); break; + case 8: rr.mg = *H_CAST(dns_domain_t, p->ast); break; + case 9: rr.mr = *H_CAST(dns_domain_t, p->ast); break; + case 10: rr.null = *H_CAST(dns_rr_null_t, p->ast); break; + case 11: rr.wks = *H_CAST(dns_rr_wks_t, p->ast); break; + case 12: rr.ptr = *H_CAST(dns_domain_t, p->ast); break; + case 13: rr.hinfo = *H_CAST(dns_rr_hinfo_t, p->ast); break; + case 14: rr.minfo = *H_CAST(dns_rr_minfo_t, p->ast); break; + case 15: rr.mx = *H_CAST(dns_rr_mx_t, p->ast); break; + case 16: rr.txt = *H_CAST(dns_rr_txt_t, p->ast); break; default: break; } } From 102d9e2c8655cf4f940c0d7c6628db455cb2a8ea Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 19:24:04 +0100 Subject: [PATCH 102/125] add h_seq_elements accessor --- examples/glue.c | 7 +++++++ examples/glue.h | 5 ++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/examples/glue.c b/examples/glue.c index de5f1e7..02087d4 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -121,6 +121,13 @@ size_t h_seq_len(const HParsedToken *p) return p->seq->used; } +HParsedToken **h_seq_elements(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->elements; +} + HParsedToken *h_seq_index(const HParsedToken *p, size_t i) { assert(p != NULL); diff --git a/examples/glue.h b/examples/glue.h index 568cbda..b386625 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -77,8 +77,11 @@ uint64_t h_cast_uint (const HParsedToken *p); // Return the length of a sequence. size_t h_seq_len(const HParsedToken *p); +// Access a sequence's element array. +HParsedToken **h_seq_elements(const HParsedToken *p); + // Access a sequence element by index. -HParsedToken * h_seq_index(const HParsedToken *p, size_t i); +HParsedToken *h_seq_index(const HParsedToken *p, size_t i); // Convenience functions combining index access and h_cast_*. HCountedArray *h_seq_index_seq (const HParsedToken *p, size_t i); From 53bc7af36aca2af6555f91e0e51da3ebaba0cfa6 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 19:31:10 +0100 Subject: [PATCH 103/125] use more glue in dns.c --- examples/dns.c | 54 +++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index fb33171..45c05ba 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -87,20 +87,20 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { } const HParsedToken* act_header(const HParseResult *p) { - HParsedToken **fields = p->ast->seq->elements; + HParsedToken **fields = h_seq_elements(p->ast); dns_header_t header_ = { - .id = fields[0]->uint, - .qr = fields[1]->uint, - .opcode = fields[2]->uint, - .aa = fields[3]->uint, - .tc = fields[4]->uint, - .rd = fields[5]->uint, - .ra = fields[6]->uint, - .rcode = fields[7]->uint, - .question_count = fields[8]->uint, - .answer_count = fields[9]->uint, - .authority_count = fields[10]->uint, - .additional_count = fields[11]->uint + .id = h_cast_uint(fields[0]), + .qr = h_cast_uint(fields[1]), + .opcode = h_cast_uint(fields[2]), + .aa = h_cast_uint(fields[3]), + .tc = h_cast_uint(fields[4]), + .rd = h_cast_uint(fields[5]), + .ra = h_cast_uint(fields[6]), + .rcode = h_cast_uint(fields[7]), + .question_count = h_cast_uint(fields[8]), + .answer_count = h_cast_uint(fields[9]), + .authority_count = h_cast_uint(fields[10]), + .additional_count = h_cast_uint(fields[11]) }; dns_header_t *header = H_ALLOC(dns_header_t); @@ -112,10 +112,10 @@ const HParsedToken* act_header(const HParseResult *p) { const HParsedToken* act_label(const HParseResult *p) { dns_label_t *r = H_ALLOC(dns_label_t); - r->len = p->ast->seq->used; + r->len = h_seq_len(p->ast); r->label = h_arena_malloc(p->arena, r->len + 1); for (size_t i=0; ilen; ++i) - r->label[i] = p->ast->seq->elements[i]->uint; + r->label[i] = H_FIELD_UINT(i); r->label[r->len] = 0; return H_MAKE(dns_label_t, r); @@ -125,30 +125,30 @@ const HParsedToken* act_rr(const HParseResult *p) { dns_rr_t *rr = H_ALLOC(dns_rr_t); rr->name = *H_FIELD(dns_domain_t, 0); - rr->type = p->ast->seq->elements[1]->uint; - rr->class = p->ast->seq->elements[2]->uint; - rr->ttl = p->ast->seq->elements[3]->uint; - rr->rdlength = p->ast->seq->elements[4]->seq->used; + rr->type = H_FIELD_UINT(1); + rr->class = H_FIELD_UINT(2); + rr->ttl = H_FIELD_UINT(3); + rr->rdlength = H_FIELD_SEQ(4)->used; // Parse and pack RDATA. - set_rdata(*rr, p->ast->seq->elements[4]->seq); + set_rdata(*rr, H_FIELD_SEQ(4)); return H_MAKE(dns_rr_t, rr); } const HParsedToken* act_question(const HParseResult *p) { dns_question_t *q = H_ALLOC(dns_question_t); - HParsedToken **fields = p->ast->seq->elements; + HParsedToken **fields = h_seq_elements(p->ast); // QNAME is a sequence of labels. Pack them into an array. - q->qname.qlen = fields[0]->seq->used; + q->qname.qlen = h_seq_len(fields[0]); q->qname.labels = h_arena_malloc(p->arena, sizeof(dns_label_t)*q->qname.qlen); - for(size_t i=0; iseq->used; i++) { + for(size_t i=0; iqname.qlen; i++) { q->qname.labels[i] = *H_INDEX(dns_label_t, fields[0], i); } - q->qtype = fields[1]->uint; - q->qclass = fields[2]->uint; + q->qtype = h_cast_uint(fields[1]); + q->qclass = h_cast_uint(fields[2]); return H_MAKE(dns_question_t, q); } @@ -162,7 +162,7 @@ const HParsedToken* act_message(const HParseResult *p) { msg->header = *header; // Copy questions into message struct. - HParsedToken *qs = p->ast->seq->elements[1]; + HParsedToken *qs = h_seq_index(p->ast, 1); struct dns_question *questions = h_arena_malloc(p->arena, sizeof(struct dns_question)*(header->question_count)); for (size_t i=0; iquestion_count; ++i) { @@ -171,7 +171,7 @@ const HParsedToken* act_message(const HParseResult *p) { msg->questions = questions; // Copy answer RRs into message struct. - HParsedToken *rrs = p->ast->seq->elements[2]; + HParsedToken *rrs = h_seq_index(p->ast, 2); struct dns_rr *answers = h_arena_malloc(p->arena, sizeof(struct dns_rr)*(header->answer_count)); for (size_t i=0; ianswer_count; ++i) { From b6856fdfbfb28b1ca113afa6b68e7a50236309fa Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 19:32:09 +0100 Subject: [PATCH 104/125] indentation fix --- examples/dns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/dns.c b/examples/dns.c index 45c05ba..c404f6c 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -181,7 +181,7 @@ const HParsedToken* act_message(const HParseResult *p) { // Copy authority RRs into message struct. struct dns_rr *authority = h_arena_malloc(p->arena, - sizeof(struct dns_rr)*(header->authority_count)); + sizeof(struct dns_rr)*(header->authority_count)); for (size_t i=0, j=header->answer_count; iauthority_count; ++i, ++j) { authority[i] = *H_INDEX(dns_rr_t, rrs, j); } From 64c44522e90e8d4d47b4ebdc6fb03636ecc48b75 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 20:16:44 +0100 Subject: [PATCH 105/125] use new glue api in rr.c --- examples/rr.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/examples/rr.c b/examples/rr.c index 94c4b7c..38edd18 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -20,10 +20,10 @@ bool validate_null(HParseResult *p) { const HParsedToken *act_null(const HParseResult *p) { dns_rr_null_t *null = H_ALLOC(dns_rr_null_t); - size_t len = p->ast->seq->used; + size_t len = h_seq_len(p->ast); uint8_t *buf = h_arena_malloc(p->arena, sizeof(uint8_t)*len); for (size_t i=0; iast->seq->elements[i]->uint; + buf[i] = H_FIELD_UINT(i); return H_MAKE(dns_rr_null_t, null); } @@ -31,16 +31,17 @@ const HParsedToken *act_null(const HParseResult *p) { const HParsedToken *act_txt(const HParseResult *p) { dns_rr_txt_t *txt = H_ALLOC(dns_rr_txt_t); - const HCountedArray *arr = p->ast->seq; + const HCountedArray *arr = h_cast_seq(p->ast); uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used); for (size_t i=0; iused; ++i) { - uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->elements[i]->seq->used); - for (size_t j=0; jelements[i]->seq->used; ++j) - tmp[j] = arr->elements[i]->seq->elements[j]->uint; + size_t len = h_seq_len(arr->elements[i]); + uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*len); + for (size_t j=0; jelements[i], j); ret[i] = tmp; } - txt->count = p->ast->seq->elements[0]->seq->used; + txt->count = arr->used; txt->txt_data = ret; return H_MAKE(dns_rr_txt_t, txt); @@ -49,10 +50,10 @@ const HParsedToken *act_txt(const HParseResult *p) { const HParsedToken* act_cstr(const HParseResult *p) { dns_cstr_t *cs = H_ALLOC(dns_cstr_t); - const HCountedArray *arr = p->ast->seq; + const HCountedArray *arr = h_cast_seq(p->ast); uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used); for (size_t i=0; iused; ++i) - ret[i] = arr->elements[i]->uint; + ret[i] = h_cast_uint(arr->elements[i]); assert(ret[arr->used-1] == '\0'); // XXX Is this right?! If so, shouldn't it be a validation? *cs = ret; @@ -64,11 +65,11 @@ const HParsedToken* act_soa(const HParseResult *p) { soa->mname = *H_FIELD(dns_domain_t, 0); soa->rname = *H_FIELD(dns_domain_t, 1); - soa->serial = p->ast->seq->elements[2]->uint; - soa->refresh = p->ast->seq->elements[3]->uint; - soa->retry = p->ast->seq->elements[4]->uint; - soa->expire = p->ast->seq->elements[5]->uint; - soa->minimum = p->ast->seq->elements[6]->uint; + soa->serial = H_FIELD_UINT(2); + soa->refresh = H_FIELD_UINT(3); + soa->retry = H_FIELD_UINT(4); + soa->expire = H_FIELD_UINT(5); + soa->minimum = H_FIELD_UINT(6); return H_MAKE(dns_rr_soa_t, soa); } @@ -76,12 +77,12 @@ const HParsedToken* act_soa(const HParseResult *p) { const HParsedToken* act_wks(const HParseResult *p) { dns_rr_wks_t *wks = H_ALLOC(dns_rr_wks_t); - wks->address = p->ast->seq->elements[0]->uint; - wks->protocol = p->ast->seq->elements[1]->uint; - wks->len = p->ast->seq->elements[2]->seq->used; + wks->address = H_FIELD_UINT(0); + wks->protocol = H_FIELD_UINT(1); + wks->len = H_FIELD_SEQ(2)->used; wks->bit_map = h_arena_malloc(p->arena, sizeof(uint8_t)*wks->len); for (size_t i=0; ilen; ++i) - wks->bit_map[i] = p->ast->seq->elements[2]->seq->elements[i]->uint; + wks->bit_map[i] = h_seq_index_uint(h_seq_index(p->ast, 2), i); return H_MAKE(dns_rr_wks_t, wks); } @@ -107,7 +108,7 @@ const HParsedToken* act_minfo(const HParseResult *p) { const HParsedToken* act_mx(const HParseResult *p) { dns_rr_mx_t *mx = H_ALLOC(dns_rr_mx_t); - mx->preference = p->ast->seq->elements[0]->uint; + mx->preference = H_FIELD_UINT(0); mx->exchange = *H_FIELD(dns_domain_t, 1); return H_MAKE(dns_rr_mx_t, mx); From 811c68fdf268f9dd7838f7b7d8991b79e81d472a Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 20:47:51 +0100 Subject: [PATCH 106/125] let H_INDEX and H_FIELD accept varargs for nested sequence access --- examples/glue.c | 21 +++++++++++++++++++++ examples/glue.h | 10 +++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/examples/glue.c b/examples/glue.c index 02087d4..1457f6f 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -135,6 +135,27 @@ HParsedToken *h_seq_index(const HParsedToken *p, size_t i) return h_carray_index(p->seq, i); } +HParsedToken *h_seq_index_path(HParsedToken *p, ...) +{ + va_list va; + + va_start(va, p); + p = h_seq_index_vpath(p, va); + va_end(va); + + return p; +} + +HParsedToken *h_seq_index_vpath(HParsedToken *p, va_list va) +{ + int i; + + while((i = va_arg(va, int)) >= 0) + p = h_seq_index(p, i); + + return p; +} + HCountedArray *h_seq_index_seq(const HParsedToken *p, size_t i) { return h_cast_seq(h_seq_index(p, i)); diff --git a/examples/glue.h b/examples/glue.h index b386625..cd5e4ef 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -83,6 +83,10 @@ HParsedToken **h_seq_elements(const HParsedToken *p); // Access a sequence element by index. HParsedToken *h_seq_index(const HParsedToken *p, size_t i); +// Access an element in a nested sequence by a path of indices. +HParsedToken *h_seq_index_path(HParsedToken *p, ...); +HParsedToken *h_seq_index_vpath(HParsedToken *p, va_list va); + // Convenience functions combining index access and h_cast_*. HCountedArray *h_seq_index_seq (const HParsedToken *p, size_t i); HBytes h_seq_index_bytes(const HParsedToken *p, size_t i); @@ -91,11 +95,11 @@ uint64_t h_seq_index_uint (const HParsedToken *p, size_t i); void * h_seq_index_user (HTokenType type, const HParsedToken *p, size_t i); // Standard short-hand to access and cast a user-type sequence element. -#define H_INDEX(TYP, SEQ, IDX) \ - ((TYP *) h_seq_index_user(TT_ ## TYP, SEQ, IDX)) +#define H_INDEX(TYP, SEQ, ...) \ + ((TYP *) h_cast(TT_ ## TYP, h_seq_index_path(SEQ, __VA_ARGS__, -1))) // Standard short-hand to access and cast elements on a sequence token. -#define H_FIELD(TYP, IDX) H_INDEX(TYP, p->ast, IDX) +#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__) #define H_FIELD_SEQ(IDX) h_seq_index_seq(p->ast, IDX) #define H_FIELD_BYTES(IDX) h_seq_index_bytes(p->ast, IDX) #define H_FIELD_SINT(IDX) h_seq_index_sint(p->ast, IDX) From 18fbf6fd69fc0a3662b28304ca5cb2e910ecaea0 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 21:09:23 +0100 Subject: [PATCH 107/125] replace casting h_seq_index functions with H_INDEX macro family, supporting varargs --- examples/glue.c | 44 ++++++++++---------------------------------- examples/glue.h | 28 +++++++++++++--------------- examples/rr.c | 4 ++-- 3 files changed, 25 insertions(+), 51 deletions(-) diff --git a/examples/glue.c b/examples/glue.c index 1457f6f..3a8f6cb 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -135,50 +135,26 @@ HParsedToken *h_seq_index(const HParsedToken *p, size_t i) return h_carray_index(p->seq, i); } -HParsedToken *h_seq_index_path(HParsedToken *p, ...) +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...) { va_list va; - va_start(va, p); - p = h_seq_index_vpath(p, va); + va_start(va, i); + HParsedToken *ret = h_seq_index_vpath(p, i, va); va_end(va); - return p; + return ret; } -HParsedToken *h_seq_index_vpath(HParsedToken *p, va_list va) +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va) { - int i; + HParsedToken *ret = h_seq_index(p, i); + int j; - while((i = va_arg(va, int)) >= 0) - p = h_seq_index(p, i); + while((j = va_arg(va, int)) >= 0) + ret = h_seq_index(p, j); - return p; -} - -HCountedArray *h_seq_index_seq(const HParsedToken *p, size_t i) -{ - return h_cast_seq(h_seq_index(p, i)); -} - -HBytes h_seq_index_bytes(const HParsedToken *p, size_t i) -{ - return h_cast_bytes(h_seq_index(p, i)); -} - -int64_t h_seq_index_sint(const HParsedToken *p, size_t i) -{ - return h_cast_sint(h_seq_index(p, i)); -} - -uint64_t h_seq_index_uint(const HParsedToken *p, size_t i) -{ - return h_cast_uint(h_seq_index(p, i)); -} - -void *h_seq_index_user(HTokenType type, const HParsedToken *p, size_t i) -{ - return h_cast(type, h_seq_index(p, i)); + return ret; } void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) diff --git a/examples/glue.h b/examples/glue.h index cd5e4ef..1dc2fbd 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -84,26 +84,24 @@ HParsedToken **h_seq_elements(const HParsedToken *p); HParsedToken *h_seq_index(const HParsedToken *p, size_t i); // Access an element in a nested sequence by a path of indices. -HParsedToken *h_seq_index_path(HParsedToken *p, ...); -HParsedToken *h_seq_index_vpath(HParsedToken *p, va_list va); +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...); +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); -// Convenience functions combining index access and h_cast_*. -HCountedArray *h_seq_index_seq (const HParsedToken *p, size_t i); -HBytes h_seq_index_bytes(const HParsedToken *p, size_t i); -int64_t h_seq_index_sint (const HParsedToken *p, size_t i); -uint64_t h_seq_index_uint (const HParsedToken *p, size_t i); -void * h_seq_index_user (HTokenType type, const HParsedToken *p, size_t i); - -// Standard short-hand to access and cast a user-type sequence element. +// Convenience macros combining (nested) index access and h_cast. #define H_INDEX(TYP, SEQ, ...) \ - ((TYP *) h_cast(TT_ ## TYP, h_seq_index_path(SEQ, __VA_ARGS__, -1))) + ((TYP *) h_cast(TT_ ## TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__))) +#define H_INDEX_SEQ(SEQ, ...) h_cast_seq(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_BYTES(SEQ, ...) h_cast_bytes(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SINT(SEQ, ...) h_cast_sint(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_UINT(SEQ, ...) h_cast_uint(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1) // Standard short-hand to access and cast elements on a sequence token. #define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__) -#define H_FIELD_SEQ(IDX) h_seq_index_seq(p->ast, IDX) -#define H_FIELD_BYTES(IDX) h_seq_index_bytes(p->ast, IDX) -#define H_FIELD_SINT(IDX) h_seq_index_sint(p->ast, IDX) -#define H_FIELD_UINT(IDX) h_seq_index_uint(p->ast, IDX) +#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__) +#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__) +#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__) +#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__) // Lower-level helper for h_seq_index. HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal diff --git a/examples/rr.c b/examples/rr.c index 38edd18..1e74370 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -37,7 +37,7 @@ const HParsedToken *act_txt(const HParseResult *p) { size_t len = h_seq_len(arr->elements[i]); uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*len); for (size_t j=0; jelements[i], j); + tmp[j] = H_INDEX_UINT(arr->elements[i], j); ret[i] = tmp; } @@ -82,7 +82,7 @@ const HParsedToken* act_wks(const HParseResult *p) { wks->len = H_FIELD_SEQ(2)->used; wks->bit_map = h_arena_malloc(p->arena, sizeof(uint8_t)*wks->len); for (size_t i=0; ilen; ++i) - wks->bit_map[i] = h_seq_index_uint(h_seq_index(p->ast, 2), i); + wks->bit_map[i] = H_INDEX_UINT(p->ast, 2, i); return H_MAKE(dns_rr_wks_t, wks); } From 7149260a13def22c236f11a6ecfd09ff6cd22ef7 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 25 Jan 2013 21:52:11 +0100 Subject: [PATCH 108/125] make h_cast a family of macros, add H_ASSERT family, make h_assert_type a macro --- examples/dns.c | 32 ++++++++++++++++---------------- examples/glue.c | 30 ------------------------------ examples/glue.h | 35 +++++++++++++++++++++-------------- examples/rr.c | 6 +++--- 4 files changed, 40 insertions(+), 63 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index c404f6c..6c572a0 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -52,7 +52,7 @@ bool validate_message(HParseResult *p) { void set_rdata(struct dns_rr rr, HCountedArray *rdata) { uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used); for (size_t i=0; iused; ++i) - data[i] = h_cast_uint(rdata->elements[i]); + data[i] = H_CAST_UINT(rdata->elements[i]); // Parse RDATA if possible. const HParseResult *p = NULL; @@ -66,7 +66,7 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { // Pack the parsed rdata into rr. switch(rr.type) { - case 1: rr.a = h_cast_uint(p->ast); break; + case 1: rr.a = H_CAST_UINT(p->ast); break; case 2: rr.ns = *H_CAST(dns_domain_t, p->ast); break; case 3: rr.md = *H_CAST(dns_domain_t, p->ast); break; case 4: rr.md = *H_CAST(dns_domain_t, p->ast); break; @@ -89,18 +89,18 @@ void set_rdata(struct dns_rr rr, HCountedArray *rdata) { const HParsedToken* act_header(const HParseResult *p) { HParsedToken **fields = h_seq_elements(p->ast); dns_header_t header_ = { - .id = h_cast_uint(fields[0]), - .qr = h_cast_uint(fields[1]), - .opcode = h_cast_uint(fields[2]), - .aa = h_cast_uint(fields[3]), - .tc = h_cast_uint(fields[4]), - .rd = h_cast_uint(fields[5]), - .ra = h_cast_uint(fields[6]), - .rcode = h_cast_uint(fields[7]), - .question_count = h_cast_uint(fields[8]), - .answer_count = h_cast_uint(fields[9]), - .authority_count = h_cast_uint(fields[10]), - .additional_count = h_cast_uint(fields[11]) + .id = H_CAST_UINT(fields[0]), + .qr = H_CAST_UINT(fields[1]), + .opcode = H_CAST_UINT(fields[2]), + .aa = H_CAST_UINT(fields[3]), + .tc = H_CAST_UINT(fields[4]), + .rd = H_CAST_UINT(fields[5]), + .ra = H_CAST_UINT(fields[6]), + .rcode = H_CAST_UINT(fields[7]), + .question_count = H_CAST_UINT(fields[8]), + .answer_count = H_CAST_UINT(fields[9]), + .authority_count = H_CAST_UINT(fields[10]), + .additional_count = H_CAST_UINT(fields[11]) }; dns_header_t *header = H_ALLOC(dns_header_t); @@ -147,8 +147,8 @@ const HParsedToken* act_question(const HParseResult *p) { q->qname.labels[i] = *H_INDEX(dns_label_t, fields[0], i); } - q->qtype = h_cast_uint(fields[1]); - q->qclass = h_cast_uint(fields[2]); + q->qtype = H_CAST_UINT(fields[1]); + q->qclass = H_CAST_UINT(fields[2]); return H_MAKE(dns_question_t, q); } diff --git a/examples/glue.c b/examples/glue.c index 3a8f6cb..7f9c6fa 100644 --- a/examples/glue.c +++ b/examples/glue.c @@ -77,36 +77,6 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val) return ret; } -void * h_cast(HTokenType type, const HParsedToken *p) -{ - assert(p->token_type == type); - return p->user; -} - -HCountedArray *h_cast_seq(const HParsedToken *p) -{ - assert(p->token_type == TT_SEQUENCE); - return p->seq; -} - -HBytes h_cast_bytes(const HParsedToken *p) -{ - assert(p->token_type == TT_BYTES); - return p->bytes; -} - -int64_t h_cast_sint(const HParsedToken *p) -{ - assert(p->token_type == TT_SINT); - return p->sint; -} - -uint64_t h_cast_uint(const HParsedToken *p) -{ - assert(p->token_type == TT_UINT); - return p->uint; -} - // XXX -> internal HParsedToken *h_carray_index(const HCountedArray *a, size_t i) { diff --git a/examples/glue.h b/examples/glue.h index 1dc2fbd..addcf18 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -61,16 +61,24 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val); #define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) #define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) -// Extract type-specific value back from HParsedTokens... +// Extract (cast) type-specific value back from HParsedTokens... -void * h_cast (HTokenType type, const HParsedToken *p); -HCountedArray *h_cast_seq (const HParsedToken *p); -HBytes h_cast_bytes(const HParsedToken *p); -int64_t h_cast_sint (const HParsedToken *p); -uint64_t h_cast_uint (const HParsedToken *p); +// Pass-through assertion that a given token has the expected type. +#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P) -// Standard short-hand to cast to a user type. -#define H_CAST(TYP, TOK) ((TYP *) h_cast(TT_ ## TYP, TOK)) +// Convenience short-hand forms of h_assert_type. +#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK) +#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK) +#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK) +#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK) +#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK) + +// Assert expected type and return contained value. +#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user) +#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq) +#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes) +#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint) +#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint) // Sequence access... @@ -88,12 +96,11 @@ HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...); HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); // Convenience macros combining (nested) index access and h_cast. -#define H_INDEX(TYP, SEQ, ...) \ - ((TYP *) h_cast(TT_ ## TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__))) -#define H_INDEX_SEQ(SEQ, ...) h_cast_seq(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_BYTES(SEQ, ...) h_cast_bytes(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_SINT(SEQ, ...) h_cast_sint(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_UINT(SEQ, ...) h_cast_uint(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) #define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1) // Standard short-hand to access and cast elements on a sequence token. diff --git a/examples/rr.c b/examples/rr.c index 1e74370..8c14e0a 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -31,7 +31,7 @@ const HParsedToken *act_null(const HParseResult *p) { const HParsedToken *act_txt(const HParseResult *p) { dns_rr_txt_t *txt = H_ALLOC(dns_rr_txt_t); - const HCountedArray *arr = h_cast_seq(p->ast); + const HCountedArray *arr = H_CAST_SEQ(p->ast); uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used); for (size_t i=0; iused; ++i) { size_t len = h_seq_len(arr->elements[i]); @@ -50,10 +50,10 @@ const HParsedToken *act_txt(const HParseResult *p) { const HParsedToken* act_cstr(const HParseResult *p) { dns_cstr_t *cs = H_ALLOC(dns_cstr_t); - const HCountedArray *arr = h_cast_seq(p->ast); + const HCountedArray *arr = H_CAST_SEQ(p->ast); uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used); for (size_t i=0; iused; ++i) - ret[i] = h_cast_uint(arr->elements[i]); + ret[i] = H_CAST_UINT(arr->elements[i]); assert(ret[arr->used-1] == '\0'); // XXX Is this right?! If so, shouldn't it be a validation? *cs = ret; From e95aef0b09a002ed9e513c5d0298eca2b4b76b32 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 1 Feb 2013 01:42:10 +0100 Subject: [PATCH 109/125] add some docs to glue.h --- examples/glue.h | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/examples/glue.h b/examples/glue.h index addcf18..ccb488e 100644 --- a/examples/glue.h +++ b/examples/glue.h @@ -1,6 +1,25 @@ // // API additions for writing grammar and semantic actions more concisely // +// +// Quick Overview: +// +// Grammars can be succinctly specified with the family of H_RULE macros. +// H_RULE defines a plain parser variable. H_ARULE additionally attaches a +// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE +// combine both. +// +// A few standard semantic actions are defined below. The H_ACT_APPLY macro +// allows semantic actions to be defined by "partial application" of +// a generic action to fixed paramters. +// +// The definition of more complex semantic actions will usually consist of +// extracting data from the given parse tree and constructing a token of custom +// type to represent the result. A number of functions and convenience macros +// are provided to capture the most common cases and idioms. +// +// See the leading comment blocks on the sections below for more details. +// #ifndef HAMMER_EXAMPLES_GLUE__H #define HAMMER_EXAMPLES_GLUE__H @@ -12,6 +31,29 @@ // // Grammar specification // +// H_RULE is simply a short-hand for the typical declaration and definition of +// a parser variable. See its plain definition below. The goal is to save +// horizontal space as well as to provide a clear and unified look together with +// the other macro variants that stays close to an abstract PEG or BNF grammar. +// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their +// combinations as they allow the definition of syntax to be given without +// intermingling it with the semantic specifications. +// +// H_ARULE defines a variable just like H_RULE but attaches a semantic action +// to the result of the parser via h_action. The action is expected to be +// named act_. +// +// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool. +// The validation is expected to be named validate_. +// +// H_VARULE combines H_RULE with both an action and a validation. The action is +// attached before the validation, i.e. the validation receives as input the +// result of the action. +// +// H_AVRULE is like H_VARULE but the action is attached outside the validation, +// i.e. the validation receives the uninterpreted AST as input. +// + #define H_RULE(rule, def) const HParser *rule = def #define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) @@ -26,6 +68,25 @@ // // Pre-fab semantic actions // +// A collection of generally useful semantic actions is provided. +// +// h_act_ignore is the action equivalent of the parser combinator h_ignore. It +// simply causes the AST it is applied to to be replaced with NULL. This most +// importantly causes it to be elided from the result of a surrounding +// h_sequence. +// +// h_act_index is of note as it is not itself suitable to be passed to +// h_action. It is parameterized by an index to be picked from a sequence +// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY +// macro provides a concise way to define such a parameter-application wrapper. +// +// h_act_flatten acts on a token of possibly nested sequences by recursively +// flattening it into a single sequence. Cf. h_seq_flatten below. +// +// H_ACT_APPLY implements "partial application" for semantic actions. It +// defines a new action that supplies given parameters to a parameterized +// action such as h_act_index. +// const HParsedToken *h_act_ignore(const HParseResult *p); const HParsedToken *h_act_index(int i, const HParseResult *p); @@ -42,6 +103,68 @@ const HParsedToken *h_act_flatten(const HParseResult *p); // // Working with HParsedTokens // +// The type HParsedToken represents a dynamically-typed universe of values. +// Declared below are constructors to turn ordinary values into their +// HParsedToken equivalents, extractors to retrieve the original values from +// inside an HParsedToken, and functions that inspect and modify tokens of +// sequence type directly. +// +// In addition, there are a number of short-hand macros that work with some +// conventions to eliminate common boilerplate. These conventions are listed +// below. Be sure to follow them if you want to use the respective macros. +// +// * The single argument to semantic actions should be called 'p'. +// +// The H_MAKE macros suppy 'p->arena' to their underlying h_make +// counterparts. The H_FIELD macros supply 'p->ast' to their underlying +// H_INDEX counterparts. +// +// * For each custom token type, there should be a typedef for the +// corresponding value type. +// +// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to +// a pointer to the given type. +// +// * For each custom token type, say 'foo_t', there must be an integer +// constant 'TT_foo_t' to identify the token type. This constant must have a +// value greater or equal than TT_USER. +// +// One idiom is to define an enum for all custom token types and to assign a +// value of TT_USER to the first element. This can be viewed as extending +// the HTokenType enum. +// +// The H_MAKE and H_ASSERT macros derive the name of the token type constant +// from the given type name. +// +// +// The H_ALLOC macro is useful for allocating values of custom token types. +// +// The H_MAKE family of macros construct tokens of a given type. The native +// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ. +// The form with no suffix is used for custom token types. This convention is +// also used for other macro and function families. +// +// The H_ASSERT family simply asserts that a given token has the expected type. +// It mainly serves as an implementation aid for H_CAST. Of note in that regard +// is that, unlike the standard 'assert' macro, these form _expressions_ that +// return the value of their token argument; thus they can be used in a +// "pass-through" fashion inside other expressions. +// +// The H_CAST family combines a type assertion with access to the +// statically-typed value inside a token. +// +// A number of functions h_seq_* operate on and inspect sequence tokens. +// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence. +// Therefore there are h_seq_snoc and h_seq_append to build up sequences. +// +// The macro families H_FIELD and H_INDEX combine index access on a sequence +// with a cast to the appropriate result type. H_FIELD is used to access the +// elements of the argument token 'p' in an action. H_INDEX allows any sequence +// token to be specified. Both macro families take an arbitrary number of index +// arguments, giving access to elements in nested sequences by path. +// These macros are very useful to avoid spaghetti chains of unchecked pointer +// dereferences. +// // Standard short-hand for arena-allocating a variable in a semantic action. #define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) From 195e50a7f18b86fa8d17aaaa632a454ed67a2d7e Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:24:13 +0100 Subject: [PATCH 110/125] move glue.[ch] into the library proper --- examples/Makefile | 3 +- examples/dns_common.h | 2 +- examples/glue.c | 170 ---------------------------- examples/glue.h | 251 ------------------------------------------ src/Makefile | 2 + 5 files changed, 4 insertions(+), 424 deletions(-) delete mode 100644 examples/glue.c delete mode 100644 examples/glue.h diff --git a/examples/Makefile b/examples/Makefile index 786af44..9c6ac1b 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -15,13 +15,12 @@ LDFLAGS += $(pkg-config --libs glib-2.0) all: dns base64 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) -dns: dns.o rr.o dns_common.o glue.o +dns: dns.o rr.o dns_common.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) dns.o: ../src/hammer.h dns_common.h rr.o: ../src/hammer.h rr.h dns_common.h dns_common.o: ../src/hammer.h dns_common.h -glue.o: ../src/hammer.h glue.h base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS) base64: base64.o diff --git a/examples/dns_common.h b/examples/dns_common.h index 6b04519..c1d8d7e 100644 --- a/examples/dns_common.h +++ b/examples/dns_common.h @@ -2,7 +2,7 @@ #define HAMMER_DNS_COMMON__H #include "../src/hammer.h" -#include "glue.h" +#include "../src/glue.h" const HParser* init_domain(); const HParser* init_character_string(); diff --git a/examples/glue.c b/examples/glue.c deleted file mode 100644 index 7f9c6fa..0000000 --- a/examples/glue.c +++ /dev/null @@ -1,170 +0,0 @@ -#include "glue.h" -#include "../src/internal.h" // for h_carray_* - - -// The action equivalent of h_ignore. -const HParsedToken *h_act_ignore(const HParseResult *p) -{ - return NULL; -} - -// Helper to build HAction's that pick one index out of a sequence. -const HParsedToken *h_act_index(int i, const HParseResult *p) -{ - if(!p) return NULL; - - const HParsedToken *tok = p->ast; - - if(!tok || tok->token_type != TT_SEQUENCE) - return NULL; - - const HCountedArray *seq = tok->seq; - size_t n = seq->used; - - if(i<0 || (size_t)i>=n) - return NULL; - else - return tok->seq->elements[i]; -} - -// Action version of h_seq_flatten. -const HParsedToken *h_act_flatten(const HParseResult *p) { - return h_seq_flatten(p->arena, p->ast); -} - -// Low-level helper for the h_make family. -HParsedToken *h_make_(HArena *arena, HTokenType type) -{ - HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); - ret->token_type = type; - return ret; -} - -HParsedToken *h_make(HArena *arena, HTokenType type, void *value) -{ - assert(type >= TT_USER); - HParsedToken *ret = h_make_(arena, type); - ret->user = value; - return ret; -} - -HParsedToken *h_make_seq(HArena *arena) -{ - HParsedToken *ret = h_make_(arena, TT_SEQUENCE); - ret->seq = h_carray_new(arena); - return ret; -} - -HParsedToken *h_make_bytes(HArena *arena, size_t len) -{ - HParsedToken *ret = h_make_(arena, TT_BYTES); - ret->bytes.len = len; - ret->bytes.token = h_arena_malloc(arena, len); - return ret; -} - -HParsedToken *h_make_sint(HArena *arena, int64_t val) -{ - HParsedToken *ret = h_make_(arena, TT_SINT); - ret->sint = val; - return ret; -} - -HParsedToken *h_make_uint(HArena *arena, uint64_t val) -{ - HParsedToken *ret = h_make_(arena, TT_UINT); - ret->uint = val; - return ret; -} - -// XXX -> internal -HParsedToken *h_carray_index(const HCountedArray *a, size_t i) -{ - assert(i < a->used); - return a->elements[i]; -} - -size_t h_seq_len(const HParsedToken *p) -{ - assert(p != NULL); - assert(p->token_type == TT_SEQUENCE); - return p->seq->used; -} - -HParsedToken **h_seq_elements(const HParsedToken *p) -{ - assert(p != NULL); - assert(p->token_type == TT_SEQUENCE); - return p->seq->elements; -} - -HParsedToken *h_seq_index(const HParsedToken *p, size_t i) -{ - assert(p != NULL); - assert(p->token_type == TT_SEQUENCE); - return h_carray_index(p->seq, i); -} - -HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...) -{ - va_list va; - - va_start(va, i); - HParsedToken *ret = h_seq_index_vpath(p, i, va); - va_end(va); - - return ret; -} - -HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va) -{ - HParsedToken *ret = h_seq_index(p, i); - int j; - - while((j = va_arg(va, int)) >= 0) - ret = h_seq_index(p, j); - - return ret; -} - -void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) -{ - assert(xs != NULL); - assert(xs->token_type == TT_SEQUENCE); - - h_carray_append(xs->seq, (HParsedToken *)x); -} - -void h_seq_append(HParsedToken *xs, const HParsedToken *ys) -{ - assert(xs != NULL); - assert(xs->token_type == TT_SEQUENCE); - assert(ys != NULL); - assert(ys->token_type == TT_SEQUENCE); - - for(size_t i; iseq->used; i++) - h_carray_append(xs->seq, ys->seq->elements[i]); -} - -// Flatten nested sequences. Always returns a sequence. -// If input element is not a sequence, returns it as a singleton sequence. -const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) -{ - assert(p != NULL); - - HParsedToken *ret = h_make_seq(arena); - switch(p->token_type) { - case TT_SEQUENCE: - // Flatten and append all. - for(size_t i; iseq->used; i++) { - h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); - } - break; - default: - // Make singleton sequence. - h_seq_snoc(ret, p); - break; - } - - return ret; -} diff --git a/examples/glue.h b/examples/glue.h deleted file mode 100644 index ccb488e..0000000 --- a/examples/glue.h +++ /dev/null @@ -1,251 +0,0 @@ -// -// API additions for writing grammar and semantic actions more concisely -// -// -// Quick Overview: -// -// Grammars can be succinctly specified with the family of H_RULE macros. -// H_RULE defines a plain parser variable. H_ARULE additionally attaches a -// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE -// combine both. -// -// A few standard semantic actions are defined below. The H_ACT_APPLY macro -// allows semantic actions to be defined by "partial application" of -// a generic action to fixed paramters. -// -// The definition of more complex semantic actions will usually consist of -// extracting data from the given parse tree and constructing a token of custom -// type to represent the result. A number of functions and convenience macros -// are provided to capture the most common cases and idioms. -// -// See the leading comment blocks on the sections below for more details. -// - -#ifndef HAMMER_EXAMPLES_GLUE__H -#define HAMMER_EXAMPLES_GLUE__H - -#include -#include "../src/hammer.h" - - -// -// Grammar specification -// -// H_RULE is simply a short-hand for the typical declaration and definition of -// a parser variable. See its plain definition below. The goal is to save -// horizontal space as well as to provide a clear and unified look together with -// the other macro variants that stays close to an abstract PEG or BNF grammar. -// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their -// combinations as they allow the definition of syntax to be given without -// intermingling it with the semantic specifications. -// -// H_ARULE defines a variable just like H_RULE but attaches a semantic action -// to the result of the parser via h_action. The action is expected to be -// named act_. -// -// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool. -// The validation is expected to be named validate_. -// -// H_VARULE combines H_RULE with both an action and a validation. The action is -// attached before the validation, i.e. the validation receives as input the -// result of the action. -// -// H_AVRULE is like H_VARULE but the action is attached outside the validation, -// i.e. the validation receives the uninterpreted AST as input. -// - - -#define H_RULE(rule, def) const HParser *rule = def -#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) -#define H_VRULE(rule, def) const HParser *rule = \ - h_attr_bool(def, validate_ ## rule) -#define H_VARULE(rule, def) const HParser *rule = \ - h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule) -#define H_AVRULE(rule, def) const HParser *rule = \ - h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) - - -// -// Pre-fab semantic actions -// -// A collection of generally useful semantic actions is provided. -// -// h_act_ignore is the action equivalent of the parser combinator h_ignore. It -// simply causes the AST it is applied to to be replaced with NULL. This most -// importantly causes it to be elided from the result of a surrounding -// h_sequence. -// -// h_act_index is of note as it is not itself suitable to be passed to -// h_action. It is parameterized by an index to be picked from a sequence -// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY -// macro provides a concise way to define such a parameter-application wrapper. -// -// h_act_flatten acts on a token of possibly nested sequences by recursively -// flattening it into a single sequence. Cf. h_seq_flatten below. -// -// H_ACT_APPLY implements "partial application" for semantic actions. It -// defines a new action that supplies given parameters to a parameterized -// action such as h_act_index. -// - -const HParsedToken *h_act_ignore(const HParseResult *p); -const HParsedToken *h_act_index(int i, const HParseResult *p); -const HParsedToken *h_act_flatten(const HParseResult *p); - -// Define 'myaction' as a specialization of 'paction' by supplying the leading -// parameters. -#define H_ACT_APPLY(myaction, paction, ...) \ - const HParsedToken *myaction(const HParseResult *p) { \ - return paction(__VA_ARGS__, p); \ - } - - -// -// Working with HParsedTokens -// -// The type HParsedToken represents a dynamically-typed universe of values. -// Declared below are constructors to turn ordinary values into their -// HParsedToken equivalents, extractors to retrieve the original values from -// inside an HParsedToken, and functions that inspect and modify tokens of -// sequence type directly. -// -// In addition, there are a number of short-hand macros that work with some -// conventions to eliminate common boilerplate. These conventions are listed -// below. Be sure to follow them if you want to use the respective macros. -// -// * The single argument to semantic actions should be called 'p'. -// -// The H_MAKE macros suppy 'p->arena' to their underlying h_make -// counterparts. The H_FIELD macros supply 'p->ast' to their underlying -// H_INDEX counterparts. -// -// * For each custom token type, there should be a typedef for the -// corresponding value type. -// -// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to -// a pointer to the given type. -// -// * For each custom token type, say 'foo_t', there must be an integer -// constant 'TT_foo_t' to identify the token type. This constant must have a -// value greater or equal than TT_USER. -// -// One idiom is to define an enum for all custom token types and to assign a -// value of TT_USER to the first element. This can be viewed as extending -// the HTokenType enum. -// -// The H_MAKE and H_ASSERT macros derive the name of the token type constant -// from the given type name. -// -// -// The H_ALLOC macro is useful for allocating values of custom token types. -// -// The H_MAKE family of macros construct tokens of a given type. The native -// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ. -// The form with no suffix is used for custom token types. This convention is -// also used for other macro and function families. -// -// The H_ASSERT family simply asserts that a given token has the expected type. -// It mainly serves as an implementation aid for H_CAST. Of note in that regard -// is that, unlike the standard 'assert' macro, these form _expressions_ that -// return the value of their token argument; thus they can be used in a -// "pass-through" fashion inside other expressions. -// -// The H_CAST family combines a type assertion with access to the -// statically-typed value inside a token. -// -// A number of functions h_seq_* operate on and inspect sequence tokens. -// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence. -// Therefore there are h_seq_snoc and h_seq_append to build up sequences. -// -// The macro families H_FIELD and H_INDEX combine index access on a sequence -// with a cast to the appropriate result type. H_FIELD is used to access the -// elements of the argument token 'p' in an action. H_INDEX allows any sequence -// token to be specified. Both macro families take an arbitrary number of index -// arguments, giving access to elements in nested sequences by path. -// These macros are very useful to avoid spaghetti chains of unchecked pointer -// dereferences. -// - -// Standard short-hand for arena-allocating a variable in a semantic action. -#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) - -// Token constructors... - -HParsedToken *h_make(HArena *arena, HTokenType type, void *value); -HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. -HParsedToken *h_make_bytes(HArena *arena, size_t len); -HParsedToken *h_make_sint(HArena *arena, int64_t val); -HParsedToken *h_make_uint(HArena *arena, uint64_t val); - -// Standard short-hands to make tokens in an action. -#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) -#define H_MAKE_SEQ() h_make_seq(p->arena) -#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) -#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) -#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) - -// Extract (cast) type-specific value back from HParsedTokens... - -// Pass-through assertion that a given token has the expected type. -#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P) - -// Convenience short-hand forms of h_assert_type. -#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK) -#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK) -#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK) -#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK) -#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK) - -// Assert expected type and return contained value. -#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user) -#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq) -#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes) -#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint) -#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint) - -// Sequence access... - -// Return the length of a sequence. -size_t h_seq_len(const HParsedToken *p); - -// Access a sequence's element array. -HParsedToken **h_seq_elements(const HParsedToken *p); - -// Access a sequence element by index. -HParsedToken *h_seq_index(const HParsedToken *p, size_t i); - -// Access an element in a nested sequence by a path of indices. -HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...); -HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); - -// Convenience macros combining (nested) index access and h_cast. -#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1) - -// Standard short-hand to access and cast elements on a sequence token. -#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__) -#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__) -#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__) -#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__) -#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__) - -// Lower-level helper for h_seq_index. -HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal - -// Sequence modification... - -// Add elements to a sequence. -void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one -void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many - -// XXX TODO: Remove elements from a sequence. - -// Flatten nested sequences into one. -const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); - - -#endif diff --git a/src/Makefile b/src/Makefile index 47e136d..ead0516 100644 --- a/src/Makefile +++ b/src/Makefile @@ -38,6 +38,7 @@ HAMMER_PARTS := \ system_allocator.o \ benchmark.o \ compile.o \ + glue.o \ $(PARSERS:%=parsers/%.o) \ $(BACKENDS:%=backends/%.o) @@ -67,6 +68,7 @@ libhammer.a: $(HAMMER_PARTS) bitreader.o: test_suite.h hammer.o: hammer.h +glue.o: hammer.h glue.h all: libhammer.a From 25df438832be00de17400404efecd4b1939951b9 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:25:19 +0100 Subject: [PATCH 111/125] oops, moved files without adding --- src/glue.c | 170 ++++++++++++++++++++++++++++++++++++ src/glue.h | 251 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 421 insertions(+) create mode 100644 src/glue.c create mode 100644 src/glue.h diff --git a/src/glue.c b/src/glue.c new file mode 100644 index 0000000..7f9c6fa --- /dev/null +++ b/src/glue.c @@ -0,0 +1,170 @@ +#include "glue.h" +#include "../src/internal.h" // for h_carray_* + + +// The action equivalent of h_ignore. +const HParsedToken *h_act_ignore(const HParseResult *p) +{ + return NULL; +} + +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *h_act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +// Action version of h_seq_flatten. +const HParsedToken *h_act_flatten(const HParseResult *p) { + return h_seq_flatten(p->arena, p->ast); +} + +// Low-level helper for the h_make family. +HParsedToken *h_make_(HArena *arena, HTokenType type) +{ + HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); + ret->token_type = type; + return ret; +} + +HParsedToken *h_make(HArena *arena, HTokenType type, void *value) +{ + assert(type >= TT_USER); + HParsedToken *ret = h_make_(arena, type); + ret->user = value; + return ret; +} + +HParsedToken *h_make_seq(HArena *arena) +{ + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new(arena); + return ret; +} + +HParsedToken *h_make_bytes(HArena *arena, size_t len) +{ + HParsedToken *ret = h_make_(arena, TT_BYTES); + ret->bytes.len = len; + ret->bytes.token = h_arena_malloc(arena, len); + return ret; +} + +HParsedToken *h_make_sint(HArena *arena, int64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_SINT); + ret->sint = val; + return ret; +} + +HParsedToken *h_make_uint(HArena *arena, uint64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_UINT); + ret->uint = val; + return ret; +} + +// XXX -> internal +HParsedToken *h_carray_index(const HCountedArray *a, size_t i) +{ + assert(i < a->used); + return a->elements[i]; +} + +size_t h_seq_len(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->used; +} + +HParsedToken **h_seq_elements(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->elements; +} + +HParsedToken *h_seq_index(const HParsedToken *p, size_t i) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return h_carray_index(p->seq, i); +} + +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...) +{ + va_list va; + + va_start(va, i); + HParsedToken *ret = h_seq_index_vpath(p, i, va); + va_end(va); + + return ret; +} + +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va) +{ + HParsedToken *ret = h_seq_index(p, i); + int j; + + while((j = va_arg(va, int)) >= 0) + ret = h_seq_index(p, j); + + return ret; +} + +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + + h_carray_append(xs->seq, (HParsedToken *)x); +} + +void h_seq_append(HParsedToken *xs, const HParsedToken *ys) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + assert(ys != NULL); + assert(ys->token_type == TT_SEQUENCE); + + for(size_t i; iseq->used; i++) + h_carray_append(xs->seq, ys->seq->elements[i]); +} + +// Flatten nested sequences. Always returns a sequence. +// If input element is not a sequence, returns it as a singleton sequence. +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) +{ + assert(p != NULL); + + HParsedToken *ret = h_make_seq(arena); + switch(p->token_type) { + case TT_SEQUENCE: + // Flatten and append all. + for(size_t i; iseq->used; i++) { + h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); + } + break; + default: + // Make singleton sequence. + h_seq_snoc(ret, p); + break; + } + + return ret; +} diff --git a/src/glue.h b/src/glue.h new file mode 100644 index 0000000..90944ea --- /dev/null +++ b/src/glue.h @@ -0,0 +1,251 @@ +// +// API additions for writing grammar and semantic actions more concisely +// +// +// Quick Overview: +// +// Grammars can be succinctly specified with the family of H_RULE macros. +// H_RULE defines a plain parser variable. H_ARULE additionally attaches a +// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE +// combine both. +// +// A few standard semantic actions are defined below. The H_ACT_APPLY macro +// allows semantic actions to be defined by "partial application" of +// a generic action to fixed paramters. +// +// The definition of more complex semantic actions will usually consist of +// extracting data from the given parse tree and constructing a token of custom +// type to represent the result. A number of functions and convenience macros +// are provided to capture the most common cases and idioms. +// +// See the leading comment blocks on the sections below for more details. +// + +#ifndef HAMMER_GLUE__H +#define HAMMER_GLUE__H + +#include +#include "hammer.h" + + +// +// Grammar specification +// +// H_RULE is simply a short-hand for the typical declaration and definition of +// a parser variable. See its plain definition below. The goal is to save +// horizontal space as well as to provide a clear and unified look together with +// the other macro variants that stays close to an abstract PEG or BNF grammar. +// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their +// combinations as they allow the definition of syntax to be given without +// intermingling it with the semantic specifications. +// +// H_ARULE defines a variable just like H_RULE but attaches a semantic action +// to the result of the parser via h_action. The action is expected to be +// named act_. +// +// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool. +// The validation is expected to be named validate_. +// +// H_VARULE combines H_RULE with both an action and a validation. The action is +// attached before the validation, i.e. the validation receives as input the +// result of the action. +// +// H_AVRULE is like H_VARULE but the action is attached outside the validation, +// i.e. the validation receives the uninterpreted AST as input. +// + + +#define H_RULE(rule, def) const HParser *rule = def +#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) +#define H_VRULE(rule, def) const HParser *rule = \ + h_attr_bool(def, validate_ ## rule) +#define H_VARULE(rule, def) const HParser *rule = \ + h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule) +#define H_AVRULE(rule, def) const HParser *rule = \ + h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) + + +// +// Pre-fab semantic actions +// +// A collection of generally useful semantic actions is provided. +// +// h_act_ignore is the action equivalent of the parser combinator h_ignore. It +// simply causes the AST it is applied to to be replaced with NULL. This most +// importantly causes it to be elided from the result of a surrounding +// h_sequence. +// +// h_act_index is of note as it is not itself suitable to be passed to +// h_action. It is parameterized by an index to be picked from a sequence +// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY +// macro provides a concise way to define such a parameter-application wrapper. +// +// h_act_flatten acts on a token of possibly nested sequences by recursively +// flattening it into a single sequence. Cf. h_seq_flatten below. +// +// H_ACT_APPLY implements "partial application" for semantic actions. It +// defines a new action that supplies given parameters to a parameterized +// action such as h_act_index. +// + +const HParsedToken *h_act_ignore(const HParseResult *p); +const HParsedToken *h_act_index(int i, const HParseResult *p); +const HParsedToken *h_act_flatten(const HParseResult *p); + +// Define 'myaction' as a specialization of 'paction' by supplying the leading +// parameters. +#define H_ACT_APPLY(myaction, paction, ...) \ + const HParsedToken *myaction(const HParseResult *p) { \ + return paction(__VA_ARGS__, p); \ + } + + +// +// Working with HParsedTokens +// +// The type HParsedToken represents a dynamically-typed universe of values. +// Declared below are constructors to turn ordinary values into their +// HParsedToken equivalents, extractors to retrieve the original values from +// inside an HParsedToken, and functions that inspect and modify tokens of +// sequence type directly. +// +// In addition, there are a number of short-hand macros that work with some +// conventions to eliminate common boilerplate. These conventions are listed +// below. Be sure to follow them if you want to use the respective macros. +// +// * The single argument to semantic actions should be called 'p'. +// +// The H_MAKE macros suppy 'p->arena' to their underlying h_make +// counterparts. The H_FIELD macros supply 'p->ast' to their underlying +// H_INDEX counterparts. +// +// * For each custom token type, there should be a typedef for the +// corresponding value type. +// +// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to +// a pointer to the given type. +// +// * For each custom token type, say 'foo_t', there must be an integer +// constant 'TT_foo_t' to identify the token type. This constant must have a +// value greater or equal than TT_USER. +// +// One idiom is to define an enum for all custom token types and to assign a +// value of TT_USER to the first element. This can be viewed as extending +// the HTokenType enum. +// +// The H_MAKE and H_ASSERT macros derive the name of the token type constant +// from the given type name. +// +// +// The H_ALLOC macro is useful for allocating values of custom token types. +// +// The H_MAKE family of macros construct tokens of a given type. The native +// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ. +// The form with no suffix is used for custom token types. This convention is +// also used for other macro and function families. +// +// The H_ASSERT family simply asserts that a given token has the expected type. +// It mainly serves as an implementation aid for H_CAST. Of note in that regard +// is that, unlike the standard 'assert' macro, these form _expressions_ that +// return the value of their token argument; thus they can be used in a +// "pass-through" fashion inside other expressions. +// +// The H_CAST family combines a type assertion with access to the +// statically-typed value inside a token. +// +// A number of functions h_seq_* operate on and inspect sequence tokens. +// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence. +// Therefore there are h_seq_snoc and h_seq_append to build up sequences. +// +// The macro families H_FIELD and H_INDEX combine index access on a sequence +// with a cast to the appropriate result type. H_FIELD is used to access the +// elements of the argument token 'p' in an action. H_INDEX allows any sequence +// token to be specified. Both macro families take an arbitrary number of index +// arguments, giving access to elements in nested sequences by path. +// These macros are very useful to avoid spaghetti chains of unchecked pointer +// dereferences. +// + +// Standard short-hand for arena-allocating a variable in a semantic action. +#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) + +// Token constructors... + +HParsedToken *h_make(HArena *arena, HTokenType type, void *value); +HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. +HParsedToken *h_make_bytes(HArena *arena, size_t len); +HParsedToken *h_make_sint(HArena *arena, int64_t val); +HParsedToken *h_make_uint(HArena *arena, uint64_t val); + +// Standard short-hands to make tokens in an action. +#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) +#define H_MAKE_SEQ() h_make_seq(p->arena) +#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) +#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) +#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) + +// Extract (cast) type-specific value back from HParsedTokens... + +// Pass-through assertion that a given token has the expected type. +#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P) + +// Convenience short-hand forms of h_assert_type. +#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK) +#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK) +#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK) +#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK) +#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK) + +// Assert expected type and return contained value. +#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user) +#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq) +#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes) +#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint) +#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint) + +// Sequence access... + +// Return the length of a sequence. +size_t h_seq_len(const HParsedToken *p); + +// Access a sequence's element array. +HParsedToken **h_seq_elements(const HParsedToken *p); + +// Access a sequence element by index. +HParsedToken *h_seq_index(const HParsedToken *p, size_t i); + +// Access an element in a nested sequence by a path of indices. +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...); +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); + +// Convenience macros combining (nested) index access and h_cast. +#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1) + +// Standard short-hand to access and cast elements on a sequence token. +#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__) +#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__) +#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__) +#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__) +#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__) + +// Lower-level helper for h_seq_index. +HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal + +// Sequence modification... + +// Add elements to a sequence. +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one +void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many + +// XXX TODO: Remove elements from a sequence. + +// Flatten nested sequences into one. +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); + + +#endif From b06a98ce2600727fe18bcb19f402ab355d5f6467 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:47:18 +0100 Subject: [PATCH 112/125] fix linking of base64 example --- examples/Makefile | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/examples/Makefile b/examples/Makefile index 10ba9f1..663a214 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -22,12 +22,20 @@ dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) dns: dns.o rr.o dns_common.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -dns.o: ../src/hammer.h dns_common.h -rr.o: ../src/hammer.h rr.h dns_common.h -dns_common.o: ../src/hammer.h dns_common.h +dns.o: ../src/hammer.h dns_common.h ../src/glue.h +rr.o: ../src/hammer.h rr.h dns_common.h ../src/glue.h +dns_common.o: ../src/hammer.h dns_common.h ../src/glue.h -base64%: LDFLAGS:=-L../src -lhammer $(LDFLAGS) -base64%: base64%.o +base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64: base64.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -base64%.o: ../src/hammer.h +base64_sem1: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64_sem1: base64_sem1.o + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) + +base64_sem2: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64_sem2: base64_sem2.o + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) + +base64%.o: ../src/hammer.h ../src/glue.h From 38ddcc5ab4427bafb3a49d28e86141dba5991897 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:47:53 +0100 Subject: [PATCH 113/125] add semantic base64 examples to gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index db2ee3a..7f4d7d9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ src/test_suite lib/hush examples/dns examples/base64 +examples/base64_sem1 +examples/base64_sem2 TAGS *.swp *.swo From a5c579c23da8f250889c0afee438800dbe56c899 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:53:43 +0100 Subject: [PATCH 114/125] switch semantic base64 examples to pre-fab actions provided by glue.h --- examples/base64_sem1.c | 64 ++++++------------------------------------ examples/base64_sem2.c | 45 +++-------------------------- 2 files changed, 12 insertions(+), 97 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 8de31db..9d1012f 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -13,50 +13,11 @@ // base64_sem2.c for an alternative approach using a single top-level action. #include "../src/hammer.h" +#include "../src/glue.h" #include "../src/internal.h" // for h_carray functions (XXX ?!) #include -#define H_RULE(rule, def) const HParser *rule = def -#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) - - -/// -// Semantic action helpers. -// These might be candidates for inclusion in the library. -/// - -// The action equivalent of h_ignore. -const HParsedToken *act_ignore(const HParseResult *p) -{ - return NULL; -} - -// Helper to build HAction's that pick one index out of a sequence. -const HParsedToken *act_index(int i, const HParseResult *p) -{ - if(!p) return NULL; - - const HParsedToken *tok = p->ast; - - if(!tok || tok->token_type != TT_SEQUENCE) - return NULL; - - const HCountedArray *seq = tok->seq; - size_t n = seq->used; - - if(i<0 || (size_t)i>=n) - return NULL; - else - return tok->seq->elements[i]; -} - -const HParsedToken *act_index0(const HParseResult *p) -{ - return act_index(0, p); -} - - /// // Semantic actions for the grammar below, each corresponds to an "ARULE". // They must be named act_. @@ -84,11 +45,13 @@ const HParsedToken *act_bsfdig(const HParseResult *p) return res; } +H_ACT_APPLY(act_index0, h_act_index, 0); + #define act_bsfdig_4bit act_bsfdig #define act_bsfdig_2bit act_bsfdig -#define act_equals act_ignore -#define act_ws act_ignore +#define act_equals h_act_ignore +#define act_ws h_act_ignore #define act_document act_index0 @@ -124,20 +87,9 @@ const HParsedToken *act_base64_n(int n, const HParseResult *p) return res; } -const HParsedToken *act_base64_3(const HParseResult *p) -{ - return act_base64_n(3, p); -} - -const HParsedToken *act_base64_2(const HParseResult *p) -{ - return act_base64_n(2, p); -} - -const HParsedToken *act_base64_1(const HParseResult *p) -{ - return act_base64_n(1, p); -} +H_ACT_APPLY(act_base64_3, act_base64_n, 3); +H_ACT_APPLY(act_base64_2, act_base64_n, 2); +H_ACT_APPLY(act_base64_1, act_base64_n, 1); // Helper to concatenate two arrays. void carray_concat(HCountedArray *a, const HCountedArray *b) diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c index 11b0660..4b886c6 100644 --- a/examples/base64_sem2.c +++ b/examples/base64_sem2.c @@ -14,50 +14,11 @@ // transformation. #include "../src/hammer.h" +#include "../src/glue.h" #include "../src/internal.h" // for h_carray functions (XXX ?!) #include -#define H_RULE(rule, def) const HParser *rule = def -#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) - - -/// -// Semantic action helpers. -// These might be candidates for inclusion in the library. -/// - -// The action equivalent of h_ignore. -const HParsedToken *act_ignore(const HParseResult *p) -{ - return NULL; -} - -// Helper to build HAction's that pick one index out of a sequence. -const HParsedToken *act_index(int i, const HParseResult *p) -{ - if(!p) return NULL; - - const HParsedToken *tok = p->ast; - - if(!tok || tok->token_type != TT_SEQUENCE) - return NULL; - - const HCountedArray *seq = tok->seq; - size_t n = seq->used; - - if(i<0 || (size_t)i>=n) - return NULL; - else - return tok->seq->elements[i]; -} - -const HParsedToken *act_index0(const HParseResult *p) -{ - return act_index(0, p); -} - - /// // Semantic actions for the grammar below, each corresponds to an "ARULE". // They must be named act_. @@ -150,7 +111,9 @@ const HParsedToken *act_base64(const HParseResult *p) return res; } -#define act_ws act_ignore +H_ACT_APPLY(act_index0, h_act_index, 0); + +#define act_ws h_act_ignore #define act_document act_index0 From b83be8472e044b012ac3812daf76c37ab41a3a85 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:29:54 +0100 Subject: [PATCH 115/125] fix counter init in h_seq_append --- src/glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glue.c b/src/glue.c index 7f9c6fa..2735e48 100644 --- a/src/glue.c +++ b/src/glue.c @@ -142,7 +142,7 @@ void h_seq_append(HParsedToken *xs, const HParsedToken *ys) assert(ys != NULL); assert(ys->token_type == TT_SEQUENCE); - for(size_t i; iseq->used; i++) + for(size_t i=0; iseq->used; i++) h_carray_append(xs->seq, ys->seq->elements[i]); } From af23f3bbf31856fbd79359db62697d82e6457f66 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:45:29 +0100 Subject: [PATCH 116/125] add more glue functions in base64_sem1 --- examples/base64_sem1.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 9d1012f..1c318e3 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -25,12 +25,10 @@ const HParsedToken *act_bsfdig(const HParseResult *p) { - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + HParsedToken *res = H_MAKE_UINT(0); - assert(p->ast->token_type == TT_UINT); - uint8_t c = p->ast->uint; + uint8_t c = H_CAST_UINT(p->ast); - res->token_type = TT_UINT; if(c >= 0x40 && c <= 0x5A) // A-Z res->uint = c - 0x41; else if(c >= 0x60 && c <= 0x7A) // a-z @@ -58,13 +56,11 @@ H_ACT_APPLY(act_index0, h_act_index, 0); // General-form action to turn a block of base64 digits into bytes. const HParsedToken *act_base64_n(int n, const HParseResult *p) { - assert(p->ast->token_type == TT_SEQUENCE); - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); res->token_type = TT_SEQUENCE; res->seq = h_carray_new_sized(p->arena, n); - HParsedToken **digits = p->ast->seq->elements; + HParsedToken **digits = h_seq_elements(p->ast); uint32_t x = 0; int bits = 0; @@ -75,9 +71,7 @@ const HParsedToken *act_base64_n(int n, const HParseResult *p) x >>= bits%8; // align, i.e. cut off extra bits for(int i=0; iarena, sizeof(HParsedToken)); - item->token_type = TT_UINT; - item->uint = x & 0xFF; + HParsedToken *item = H_MAKE_UINT(x & 0xFF); res->seq->elements[n-1-i] = item; // output the last byte and x >>= 8; // discard it @@ -91,34 +85,23 @@ H_ACT_APPLY(act_base64_3, act_base64_n, 3); H_ACT_APPLY(act_base64_2, act_base64_n, 2); H_ACT_APPLY(act_base64_1, act_base64_n, 1); -// Helper to concatenate two arrays. -void carray_concat(HCountedArray *a, const HCountedArray *b) -{ - for(size_t i=0; iused; i++) - h_carray_append(a, b->elements[i]); -} - const HParsedToken *act_base64(const HParseResult *p) { assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->seq->used == 2); assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE); - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new(p->arena); + HParsedToken *res = H_MAKE_SEQ(); // concatenate base64_3 blocks - HCountedArray *seq = p->ast->seq->elements[0]->seq; - for(size_t i=0; iused; i++) { - assert(seq->elements[i]->token_type == TT_SEQUENCE); - carray_concat(res->seq, seq->elements[i]->seq); - } + HCountedArray *seq = H_FIELD_SEQ(0); + for(size_t i=0; iused; i++) + h_seq_append(res, seq->elements[i]); // append one trailing base64_2 or _1 block - const HParsedToken *tok = p->ast->seq->elements[1]; + const HParsedToken *tok = h_seq_index(p->ast, 1); if(tok->token_type == TT_SEQUENCE) - carray_concat(res->seq, tok->seq); + h_seq_append(res, tok); return res; } From 662357ccb43afceb55bc63aca287984a1ed59bbf Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:47:36 +0100 Subject: [PATCH 117/125] add glue helper to construct sized sequences --- src/glue.c | 7 +++++++ src/glue.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/src/glue.c b/src/glue.c index 2735e48..f1e086a 100644 --- a/src/glue.c +++ b/src/glue.c @@ -55,6 +55,13 @@ HParsedToken *h_make_seq(HArena *arena) return ret; } +HParsedToken *h_make_seqn(HArena *arena, size_t n) +{ + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new_sized(arena, n); + return ret; +} + HParsedToken *h_make_bytes(HArena *arena, size_t len) { HParsedToken *ret = h_make_(arena, TT_BYTES); diff --git a/src/glue.h b/src/glue.h index 90944ea..3125ae0 100644 --- a/src/glue.h +++ b/src/glue.h @@ -173,6 +173,7 @@ const HParsedToken *h_act_flatten(const HParseResult *p); HParsedToken *h_make(HArena *arena, HTokenType type, void *value); HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. +HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n. HParsedToken *h_make_bytes(HArena *arena, size_t len); HParsedToken *h_make_sint(HArena *arena, int64_t val); HParsedToken *h_make_uint(HArena *arena, uint64_t val); @@ -180,6 +181,7 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val); // Standard short-hands to make tokens in an action. #define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) #define H_MAKE_SEQ() h_make_seq(p->arena) +#define H_MAKE_SEQN(N) h_make_seqn(p->arena, N) #define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) #define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) #define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) From d9c2c921c431c609003d2ba3e7f309ce3709aeec Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:48:35 +0100 Subject: [PATCH 118/125] use H_MAKE_SEQN in base64_sem1 --- examples/base64_sem1.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 1c318e3..8074352 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -56,9 +56,7 @@ H_ACT_APPLY(act_index0, h_act_index, 0); // General-form action to turn a block of base64 digits into bytes. const HParsedToken *act_base64_n(int n, const HParseResult *p) { - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new_sized(p->arena, n); + HParsedToken *res = H_MAKE_SEQN(n); HParsedToken **digits = h_seq_elements(p->ast); From 51b90828379cc7840901e2e74532bc188658d1f6 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:49:25 +0100 Subject: [PATCH 119/125] remove obsolete dependency on internal.h --- examples/base64_sem1.c | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 8074352..f2a3e82 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -14,7 +14,6 @@ #include "../src/hammer.h" #include "../src/glue.h" -#include "../src/internal.h" // for h_carray functions (XXX ?!) #include From f1f7c4f488d8ba0b43ad13e7186730afe4a37ad9 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 17:00:04 +0100 Subject: [PATCH 120/125] remove dependency on internal.h for base64_sem2 --- examples/base64_sem2.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c index 4b886c6..32afe5b 100644 --- a/examples/base64_sem2.c +++ b/examples/base64_sem2.c @@ -15,7 +15,6 @@ #include "../src/hammer.h" #include "../src/glue.h" -#include "../src/internal.h" // for h_carray functions (XXX ?!) #include @@ -47,13 +46,7 @@ uint8_t bsfdig_value(const HParsedToken *p) } // helper: append a byte value to a sequence -void seq_append_byte(HCountedArray *a, uint8_t b) -{ - HParsedToken *item = h_arena_malloc(a->arena, sizeof(HParsedToken)); - item->token_type = TT_UINT; - item->uint = b; - h_carray_append(a, item); -} +#define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b)) const HParsedToken *act_base64(const HParseResult *p) { @@ -75,9 +68,7 @@ const HParsedToken *act_base64(const HParseResult *p) b64_1 = NULL; // allocate result sequence - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new(p->arena); + HParsedToken *res = H_MAKE_SEQ(); // concatenate base64_3 blocks for(size_t i=0; iseq->used; i++) { @@ -88,9 +79,9 @@ const HParsedToken *act_base64(const HParseResult *p) x <<= 6; x |= bsfdig_value(digits[1]); x <<= 6; x |= bsfdig_value(digits[2]); x <<= 6; x |= bsfdig_value(digits[3]); - seq_append_byte(res->seq, (x >> 16) & 0xFF); - seq_append_byte(res->seq, (x >> 8) & 0xFF); - seq_append_byte(res->seq, x & 0xFF); + seq_append_byte(res, (x >> 16) & 0xFF); + seq_append_byte(res, (x >> 8) & 0xFF); + seq_append_byte(res, x & 0xFF); } // append one trailing base64_2 or _1 block @@ -99,13 +90,13 @@ const HParsedToken *act_base64(const HParseResult *p) uint32_t x = bsfdig_value(digits[0]); x <<= 6; x |= bsfdig_value(digits[1]); x <<= 6; x |= bsfdig_value(digits[2]); - seq_append_byte(res->seq, (x >> 10) & 0xFF); - seq_append_byte(res->seq, (x >> 2) & 0xFF); + seq_append_byte(res, (x >> 10) & 0xFF); + seq_append_byte(res, (x >> 2) & 0xFF); } else if(b64_1) { HParsedToken **digits = b64_1->seq->elements; uint32_t x = bsfdig_value(digits[0]); x <<= 6; x |= bsfdig_value(digits[1]); - seq_append_byte(res->seq, (x >> 4) & 0xFF); + seq_append_byte(res, (x >> 4) & 0xFF); } return res; From 866e66ccf9538e3c19d03739e8c804ab2fc4029e Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:29:54 +0100 Subject: [PATCH 121/125] fix counter init in h_seq_append --- src/glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glue.c b/src/glue.c index 7f9c6fa..2735e48 100644 --- a/src/glue.c +++ b/src/glue.c @@ -142,7 +142,7 @@ void h_seq_append(HParsedToken *xs, const HParsedToken *ys) assert(ys != NULL); assert(ys->token_type == TT_SEQUENCE); - for(size_t i; iseq->used; i++) + for(size_t i=0; iseq->used; i++) h_carray_append(xs->seq, ys->seq->elements[i]); } From 0cfec9781ab7dfa0718b04bd5a34fd3d2eb72394 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:47:36 +0100 Subject: [PATCH 122/125] add glue helper to construct sized sequences --- src/glue.c | 7 +++++++ src/glue.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/src/glue.c b/src/glue.c index 2735e48..f1e086a 100644 --- a/src/glue.c +++ b/src/glue.c @@ -55,6 +55,13 @@ HParsedToken *h_make_seq(HArena *arena) return ret; } +HParsedToken *h_make_seqn(HArena *arena, size_t n) +{ + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new_sized(arena, n); + return ret; +} + HParsedToken *h_make_bytes(HArena *arena, size_t len) { HParsedToken *ret = h_make_(arena, TT_BYTES); diff --git a/src/glue.h b/src/glue.h index 90944ea..3125ae0 100644 --- a/src/glue.h +++ b/src/glue.h @@ -173,6 +173,7 @@ const HParsedToken *h_act_flatten(const HParseResult *p); HParsedToken *h_make(HArena *arena, HTokenType type, void *value); HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. +HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n. HParsedToken *h_make_bytes(HArena *arena, size_t len); HParsedToken *h_make_sint(HArena *arena, int64_t val); HParsedToken *h_make_uint(HArena *arena, uint64_t val); @@ -180,6 +181,7 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val); // Standard short-hands to make tokens in an action. #define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) #define H_MAKE_SEQ() h_make_seq(p->arena) +#define H_MAKE_SEQN(N) h_make_seqn(p->arena, N) #define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) #define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) #define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) From 928391d547851f588588394ff83c6272b78d7e71 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Sun, 17 Feb 2013 19:57:12 -0500 Subject: [PATCH 123/125] Added irc channel to README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index d88a153..ffd5cff 100644 --- a/README.md +++ b/README.md @@ -48,3 +48,7 @@ Examples The `examples/` directory contains some simple examples, currently including: * base64 * DNS + +Community +========= +Please join us at `#hammer` on `irc.upstandinghackers.com` if you have any questions or just want to talk about parsing. From 3a0da2fd844c6f60cfdfcd0102846844f92630e9 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Mon, 18 Feb 2013 20:03:19 -0500 Subject: [PATCH 124/125] Added contact info --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ffd5cff..e008b12 100644 --- a/README.md +++ b/README.md @@ -51,4 +51,8 @@ The `examples/` directory contains some simple examples, currently including: Community ========= -Please join us at `#hammer` on `irc.upstandinghackers.com` if you have any questions or just want to talk about parsing. +Please join us at `#hammer` on `irc.upstandinghackers.com` if you have any questions or just want to talk about parsing. + +Contact +======= +You can also email us at . From 4041a5865cb40c20160b23ad0e6757f3fc760754 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sat, 13 Apr 2013 14:42:36 +0200 Subject: [PATCH 125/125] pass rr by reference to set_rdata --- examples/dns.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/examples/dns.c b/examples/dns.c index 6c572a0..7887ba6 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -49,40 +49,40 @@ bool validate_message(HParseResult *p) { /// // Helper: Parse and pack the RDATA field of a Resource Record. -void set_rdata(struct dns_rr rr, HCountedArray *rdata) { +void set_rdata(struct dns_rr *rr, HCountedArray *rdata) { uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used); for (size_t i=0; iused; ++i) data[i] = H_CAST_UINT(rdata->elements[i]); // Parse RDATA if possible. const HParseResult *p = NULL; - const HParser *parser = init_rdata(rr.type); + const HParser *parser = init_rdata(rr->type); if (parser) p = h_parse(parser, (const uint8_t*)data, rdata->used); // If the RR doesn't parse, set its type to 0. if (!p) - rr.type = 0; + rr->type = 0; // Pack the parsed rdata into rr. - switch(rr.type) { - case 1: rr.a = H_CAST_UINT(p->ast); break; - case 2: rr.ns = *H_CAST(dns_domain_t, p->ast); break; - case 3: rr.md = *H_CAST(dns_domain_t, p->ast); break; - case 4: rr.md = *H_CAST(dns_domain_t, p->ast); break; - case 5: rr.cname = *H_CAST(dns_domain_t, p->ast); break; - case 6: rr.soa = *H_CAST(dns_rr_soa_t, p->ast); break; - case 7: rr.mb = *H_CAST(dns_domain_t, p->ast); break; - case 8: rr.mg = *H_CAST(dns_domain_t, p->ast); break; - case 9: rr.mr = *H_CAST(dns_domain_t, p->ast); break; - case 10: rr.null = *H_CAST(dns_rr_null_t, p->ast); break; - case 11: rr.wks = *H_CAST(dns_rr_wks_t, p->ast); break; - case 12: rr.ptr = *H_CAST(dns_domain_t, p->ast); break; - case 13: rr.hinfo = *H_CAST(dns_rr_hinfo_t, p->ast); break; - case 14: rr.minfo = *H_CAST(dns_rr_minfo_t, p->ast); break; - case 15: rr.mx = *H_CAST(dns_rr_mx_t, p->ast); break; - case 16: rr.txt = *H_CAST(dns_rr_txt_t, p->ast); break; - default: break; + switch(rr->type) { + case 1: rr->a = H_CAST_UINT(p->ast); break; + case 2: rr->ns = *H_CAST(dns_domain_t, p->ast); break; + case 3: rr->md = *H_CAST(dns_domain_t, p->ast); break; + case 4: rr->md = *H_CAST(dns_domain_t, p->ast); break; + case 5: rr->cname = *H_CAST(dns_domain_t, p->ast); break; + case 6: rr->soa = *H_CAST(dns_rr_soa_t, p->ast); break; + case 7: rr->mb = *H_CAST(dns_domain_t, p->ast); break; + case 8: rr->mg = *H_CAST(dns_domain_t, p->ast); break; + case 9: rr->mr = *H_CAST(dns_domain_t, p->ast); break; + case 10: rr->null = *H_CAST(dns_rr_null_t, p->ast); break; + case 11: rr->wks = *H_CAST(dns_rr_wks_t, p->ast); break; + case 12: rr->ptr = *H_CAST(dns_domain_t, p->ast); break; + case 13: rr->hinfo = *H_CAST(dns_rr_hinfo_t, p->ast); break; + case 14: rr->minfo = *H_CAST(dns_rr_minfo_t, p->ast); break; + case 15: rr->mx = *H_CAST(dns_rr_mx_t, p->ast); break; + case 16: rr->txt = *H_CAST(dns_rr_txt_t, p->ast); break; + default: break; } } @@ -131,7 +131,7 @@ const HParsedToken* act_rr(const HParseResult *p) { rr->rdlength = H_FIELD_SEQ(4)->used; // Parse and pack RDATA. - set_rdata(*rr, H_FIELD_SEQ(4)); + set_rdata(rr, H_FIELD_SEQ(4)); return H_MAKE(dns_rr_t, rr); }