From 0497cdb08b034dc30255ebd41316d0d5132df2d6 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 00:08:07 +0100 Subject: [PATCH 01/28] add trivial left-recursion test case --- src/t_parser.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/t_parser.c b/src/t_parser.c index b1f9b63..6bbc409 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -365,6 +365,17 @@ static void test_not(void) { g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)"); } +static void test_leftrec(void) { + const HParser *a_ = h_ch('a'); + + HParser *lr_ = h_indirect(); + h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL)); + + g_check_parse_ok(lr_, "a", 1, "(u0x61)"); + g_check_parse_ok(lr_, "aa", 2, "(u0x61 u0x61)"); + g_check_parse_ok(lr_, "aaa", 3, "((u0x61 u0x61) u0x61)"); +} + void register_parser_tests(void) { g_test_add_func("/core/parser/token", test_token); g_test_add_func("/core/parser/ch", test_ch); @@ -406,4 +417,5 @@ void register_parser_tests(void) { g_test_add_func("/core/parser/and", test_and); g_test_add_func("/core/parser/not", test_not); g_test_add_func("/core/parser/ignore", test_ignore); + g_test_add_func("/core/parser/leftrec", test_leftrec); } From ce88a3f49d82a2c2e03e7061baa43eea33dea617 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 00:39:15 +0100 Subject: [PATCH 02/28] initialize involved_set to an empty HList, not NULL --- src/backends/packrat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index d05129d..25a0966 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -77,7 +77,9 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { if (!rec_detect->head) { HRecursionHead *some = a_new(HRecursionHead, 1); - some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL; + some->head_parser = p; + some->involved_set = h_slist_new(state->arena); + some->eval_set = NULL; rec_detect->head = some; } assert(state->lr_stack->head != NULL); @@ -101,7 +103,7 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) HParseResult *old_res = old_cached->right->result; // reset the eval_set of the head of the recursion at each beginning of growth - head->eval_set = head->involved_set; + head->eval_set = head->involved_set; // BUG: this must be a copy HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); if (tmp_res) { From dffe47bf5775f35bc13cdd9bea856ac3e9cbdfbb Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 00:40:19 +0100 Subject: [PATCH 03/28] initialize eval_set to a proper copy of involved_set --- src/backends/packrat.c | 2 +- src/datastructures.c | 20 ++++++++++++++++++++ src/internal.h | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 25a0966..c67c3e4 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -103,7 +103,7 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) HParseResult *old_res = old_cached->right->result; // reset the eval_set of the head of the recursion at each beginning of growth - head->eval_set = head->involved_set; // BUG: this must be a copy + head->eval_set = h_slist_copy(head->involved_set); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); if (tmp_res) { diff --git a/src/datastructures.c b/src/datastructures.c index b1e4f75..3d94804 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -41,6 +41,26 @@ HSlist* h_slist_new(HArena *arena) { return ret; } +HSlist* h_slist_copy(HSlist *slist) { + HSlist *ret = h_slist_new(slist->arena); + HSlistNode *head = slist->head; + HSlistNode *tail; + if (head != NULL) { + h_slist_push(ret, head->elem); + tail = ret->head; + head = head->next; + } + while (head != NULL) { + // append head item to tail in a new node + HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode)); + node->elem = head->elem; + node->next = NULL; + tail = tail->next = node; + head = head->next; + } + return ret; +} + void* h_slist_pop(HSlist *slist) { HSlistNode *head = slist->head; if (!head) diff --git a/src/internal.h b/src/internal.h index 67ecb22..0dcf857 100644 --- a/src/internal.h +++ b/src/internal.h @@ -209,6 +209,7 @@ HCountedArray *h_carray_new(HArena * arena); void h_carray_append(HCountedArray *array, void* item); HSlist* h_slist_new(HArena *arena); +HSlist* h_slist_copy(HSlist *slist); void* h_slist_pop(HSlist *slist); void h_slist_push(HSlist *slist, void* item); bool h_slist_find(HSlist *slist, const void* item); From 63dbf83b4de91ae22c6fe5c0b08b1831bc7c86fd Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 01:04:04 +0100 Subject: [PATCH 04/28] fix a loop in setupLR --- src/backends/packrat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index c67c3e4..cc2a9db 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -83,10 +83,12 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { rec_detect->head = some; } assert(state->lr_stack->head != NULL); - HLeftRec *lr = state->lr_stack->head->elem; - while (lr && lr->rule != p) { + HSlistNode *head = state->lr_stack->head; + HLeftRec *lr; + while (head && (lr = head->elem)->rule != p) { lr->head = rec_detect->head; h_slist_push(lr->head->involved_set, (void*)lr->rule); + head = head->next; } } From 4e3084739e301a1412f59a1991d64b502e159c17 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 01:09:04 +0100 Subject: [PATCH 05/28] fix expected value of first leftrec test case --- src/t_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_parser.c b/src/t_parser.c index 6bbc409..daca1a3 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -371,7 +371,7 @@ static void test_leftrec(void) { HParser *lr_ = h_indirect(); h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL)); - g_check_parse_ok(lr_, "a", 1, "(u0x61)"); + g_check_parse_ok(lr_, "a", 1, "u0x61"); g_check_parse_ok(lr_, "aa", 2, "(u0x61 u0x61)"); g_check_parse_ok(lr_, "aaa", 3, "((u0x61 u0x61) u0x61)"); } From 445913610da0734630b0affd512e1d6024b27284 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 01:25:37 +0100 Subject: [PATCH 06/28] parse whole input and allow surrounding whitespace in base64 example --- examples/base64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/base64.c b/examples/base64.c index 6c4db9e..cdb088e 100644 --- a/examples/base64.c +++ b/examples/base64.c @@ -24,7 +24,7 @@ void init_parser(void) base64_1, NULL)), NULL); - document = base64; + document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL); } From 2af0f9c47cdff7cdbc86fd4f22974974e08d4111 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 22:33:58 +0100 Subject: [PATCH 07/28] start a variant of the base64 example with semantic actions --- examples/Makefile | 12 ++-- examples/base64_sem1.c | 142 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 5 deletions(-) create mode 100644 examples/base64_sem1.c diff --git a/examples/Makefile b/examples/Makefile index 6a054ca..a3be0ce 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -2,7 +2,9 @@ OUTPUTS := dns.o \ dns \ base64.o \ - base64 + base64 \ + base64_sem1.o \ + base64_sem1 TOPLEVEL := ../ @@ -12,7 +14,7 @@ LDFLAGS += $(pkg-config --libs glib-2.0) -all: dns base64 +all: dns base64 base64_sem1 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) dns: dns.o rr.o dns_common.o @@ -24,8 +26,8 @@ rr.o: ../src/hammer.h rr.h dns_common.h dns_common.o: ../src/hammer.h dns_common.h -base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS) -base64: base64.o +base64%: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64%: base64%.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -base64.o: ../src/hammer.h +base64%.o: ../src/hammer.h diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c new file mode 100644 index 0000000..83efc64 --- /dev/null +++ b/examples/base64_sem1.c @@ -0,0 +1,142 @@ +#include "../src/hammer.h" +#include + + +#define H_RULE(rule, def) const HParser *rule = def +#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) + + +/// +// Semantic action helpers. +// These might be candidates for inclusion in the library. +/// + +// The action equivalent of h_ignore. +const HParsedToken *act_ignore(const HParseResult *p) +{ + return NULL; +} + +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +const HParsedToken *act_index0(const HParseResult *p) +{ + return act_index(0, p); +} + + +/// +// Semantic actions for the grammar below, each corresponds to an "ARULE". +// They must be named act_. +/// + +const HParsedToken *act_bsfdig(const HParseResult *p) +{ + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + + assert(p->ast->token_type == TT_UINT); + uint8_t c = p->ast->uint; + + res->token_type = TT_UINT; + if(c >= 0x40 && c <= 0x5A) // A-Z + res->uint = c - 0x41; + else if(c >= 0x60 && c <= 0x7A) // a-z + res->uint = c - 0x61 + 26; + else if(c >= 0x30 && c <= 0x39) // 0-9 + res->uint = c - 0x30 + 52; + else if(c == '+') + res->uint = 62; + else if(c == '/') + res->uint = 63; + + return res; +} + +#define act_bsfdig_4bit act_bsfdig +#define act_bsfdig_2bit act_bsfdig + +#define act_equals act_ignore +#define act_ws act_ignore + +#define act_document act_index0 + + +/// +// Set up the parser with the grammar to be recognized. +/// + +const HParser *init_parser(void) +{ + // CORE + H_RULE (digit, h_ch_range(0x30, 0x39)); + H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); + H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); + + // AUX. + H_RULE (plus, h_ch('+')); + H_RULE (slash, h_ch('/')); + H_ARULE(equals, h_ch('=')); + + H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL)); + H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); + H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); + H_RULE (base64_3, h_repeat_n(bsfdig, 4)); + H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); + H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); + H_RULE (base64, h_sequence(h_many(base64_3), + h_optional(h_choice(base64_2, + base64_1, NULL)), + NULL)); + + H_ARULE(ws, h_many(space)); + H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); + + return document; +} + + +/// +// Main routine: print input, parse, print result, return success/failure. +/// + +#include + +int main(int argc, char **argv) +{ + uint8_t input[102400]; + size_t inputsize; + const HParser *parser; + const HParseResult *result; + + parser = init_parser(); + + inputsize = fread(input, 1, sizeof(input), stdin); + fprintf(stderr, "inputsize=%lu\ninput=", inputsize); + fwrite(input, 1, inputsize, stderr); + result = h_parse(parser, input, inputsize); + + if(result) { + fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8); + h_pprint(stdout, result->ast, 0, 0); + return 0; + } else { + return 1; + } +} From 54bd5a4a3832a2f4c85cc34799022f61d0ddcf86 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 10 Jan 2013 23:44:28 +0100 Subject: [PATCH 08/28] add semantic actions for base64_3, base64_2, base64_1 --- examples/base64_sem1.c | 84 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 4 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 83efc64..7839435 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -1,4 +1,5 @@ #include "../src/hammer.h" +#include "../src/internal.h" // for h_carray functions (XXX ?!) #include @@ -77,6 +78,81 @@ const HParsedToken *act_bsfdig(const HParseResult *p) #define act_document act_index0 +// helper +void carray_append_uint(HCountedArray *array, uint8_t value) +{ + HParsedToken *item = h_arena_malloc(array->arena, sizeof(HParsedToken)); + item->token_type = TT_UINT; + item->uint = value; + h_carray_append(array, item); +} + +const HParsedToken *act_base64_3(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new_sized(p->arena, 4); + + HParsedToken **digits = p->ast->seq->elements; + uint32_t x = digits[0]->uint; + x <<= 6; x |= digits[1]->uint; + x <<= 6; x |= digits[2]->uint; + x <<= 6; x |= digits[3]->uint; + + carray_append_uint(res->seq, (x >> 16) & 0xFF); + carray_append_uint(res->seq, (x >> 8) & 0xFF); + carray_append_uint(res->seq, x & 0xFF); + + return res; +} + +const HParsedToken *act_base64_2(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new_sized(p->arena, 4); + + HParsedToken **digits = p->ast->seq->elements; + uint32_t x = digits[0]->uint; + x <<= 6; x |= digits[1]->uint; + x <<= 6; x |= digits[2]->uint; + + carray_append_uint(res->seq, (x >> 10) & 0xFF); + carray_append_uint(res->seq, (x >> 2) & 0xFF); + + return res; +} + +const HParsedToken *act_base64_1(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new_sized(p->arena, 4); + + HParsedToken **digits = p->ast->seq->elements; + uint32_t x = digits[0]->uint; + x <<= 6; x |= digits[1]->uint; + + carray_append_uint(res->seq, (x >> 4) & 0xFF); + + return res; +} + +#if 0 +const HParsedToken *act_base64(const HParseResult *p) +{ + // XXX + // concatenate base64_3 blocks + // append trailing base64_2 or _1 block +} +#endif + /// // Set up the parser with the grammar to be recognized. @@ -97,10 +173,10 @@ const HParser *init_parser(void) H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL)); H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); - H_RULE (base64_3, h_repeat_n(bsfdig, 4)); - H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); - H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); - H_RULE (base64, h_sequence(h_many(base64_3), + H_ARULE(base64_3, h_repeat_n(bsfdig, 4)); + H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); + H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); + H_RULE(base64, h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL)); From 97b13672ced335dd7a8c4c926a150ddffb63d401 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 11 Jan 2013 00:41:34 +0100 Subject: [PATCH 09/28] generalize act_base64_n --- examples/base64_sem1.c | 83 ++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 51 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 7839435..6dbb2fd 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -78,70 +78,51 @@ const HParsedToken *act_bsfdig(const HParseResult *p) #define act_document act_index0 -// helper -void carray_append_uint(HCountedArray *array, uint8_t value) +// General-form action to turn a block of base64 digits into bytes. +const HParsedToken *act_base64_n(int n, const HParseResult *p) { - HParsedToken *item = h_arena_malloc(array->arena, sizeof(HParsedToken)); - item->token_type = TT_UINT; - item->uint = value; - h_carray_append(array, item); + assert(p->ast->token_type == TT_SEQUENCE); + + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new_sized(p->arena, n); + + HParsedToken **digits = p->ast->seq->elements; + + uint32_t x = 0; + int bits = 0; + for(int i=0; iuint; + bits += 6; + } + x >>= bits%8; // align, i.e. cut off extra bits + + for(int i=0; iarena, sizeof(HParsedToken)); + item->token_type = TT_UINT; + item->uint = x & 0xFF; + + res->seq->elements[n-1-i] = item; // output the last byte and + x >>= 8; // discard it + } + res->seq->used = n; + + return res; } const HParsedToken *act_base64_3(const HParseResult *p) { - assert(p->ast->token_type == TT_SEQUENCE); - - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new_sized(p->arena, 4); - - HParsedToken **digits = p->ast->seq->elements; - uint32_t x = digits[0]->uint; - x <<= 6; x |= digits[1]->uint; - x <<= 6; x |= digits[2]->uint; - x <<= 6; x |= digits[3]->uint; - - carray_append_uint(res->seq, (x >> 16) & 0xFF); - carray_append_uint(res->seq, (x >> 8) & 0xFF); - carray_append_uint(res->seq, x & 0xFF); - - return res; + return act_base64_n(3, p); } const HParsedToken *act_base64_2(const HParseResult *p) { - assert(p->ast->token_type == TT_SEQUENCE); - - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new_sized(p->arena, 4); - - HParsedToken **digits = p->ast->seq->elements; - uint32_t x = digits[0]->uint; - x <<= 6; x |= digits[1]->uint; - x <<= 6; x |= digits[2]->uint; - - carray_append_uint(res->seq, (x >> 10) & 0xFF); - carray_append_uint(res->seq, (x >> 2) & 0xFF); - - return res; + return act_base64_n(2, p); } const HParsedToken *act_base64_1(const HParseResult *p) { - assert(p->ast->token_type == TT_SEQUENCE); - - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new_sized(p->arena, 4); - - HParsedToken **digits = p->ast->seq->elements; - uint32_t x = digits[0]->uint; - x <<= 6; x |= digits[1]->uint; - - carray_append_uint(res->seq, (x >> 4) & 0xFF); - - return res; + return act_base64_n(1, p); } #if 0 From c62079516d8441f937599d036231c8cfabd4e89a Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 11 Jan 2013 00:57:34 +0100 Subject: [PATCH 10/28] add semantic action for base64 rule --- examples/base64_sem1.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 6dbb2fd..5e1673a 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -125,14 +125,37 @@ const HParsedToken *act_base64_1(const HParseResult *p) return act_base64_n(1, p); } -#if 0 +// Helper to concatenate two arrays. +void carray_concat(HCountedArray *a, const HCountedArray *b) +{ + for(size_t i=0; iused; i++) + h_carray_append(a, b->elements[i]); +} + const HParsedToken *act_base64(const HParseResult *p) { - // XXX + assert(p->ast->token_type == TT_SEQUENCE); + assert(p->ast->seq->used == 2); + assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE); + + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new(p->arena); + // concatenate base64_3 blocks - // append trailing base64_2 or _1 block + HCountedArray *seq = p->ast->seq->elements[0]->seq; + for(size_t i=0; iused; i++) { + assert(seq->elements[i]->token_type == TT_SEQUENCE); + carray_concat(res->seq, seq->elements[i]->seq); + } + + // append one trailing base64_2 or _1 block + const HParsedToken *tok = p->ast->seq->elements[1]; + if(tok->token_type == TT_SEQUENCE) + carray_concat(res->seq, tok->seq); + + return res; } -#endif /// @@ -157,7 +180,7 @@ const HParser *init_parser(void) H_ARULE(base64_3, h_repeat_n(bsfdig, 4)); H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); - H_RULE(base64, h_sequence(h_many(base64_3), + H_ARULE(base64, h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL)); From ad3c2032dc86c4929f72154cd2121e8259397946 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 11 Jan 2013 01:10:16 +0100 Subject: [PATCH 11/28] add a note about weird bug --- examples/base64_sem1.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 5e1673a..8638bb3 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -188,6 +188,10 @@ const HParser *init_parser(void) H_ARULE(ws, h_many(space)); H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); + // BUG sometimes inputs that should just don't parse. + // It *seemed* to happen mostly with things like "bbbbaaaaBA==". + // Using less actions seemed to make it less likely. + return document; } From 32dfae0af3db9eedfae6d97b02677cbd3e9e97db Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 13 Jan 2013 18:19:59 +0100 Subject: [PATCH 12/28] add a variant of the base64 example with coarse-grained semantic actions --- examples/Makefile | 6 +- examples/base64_sem2.c | 207 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+), 2 deletions(-) create mode 100644 examples/base64_sem2.c diff --git a/examples/Makefile b/examples/Makefile index a3be0ce..98797f3 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -4,7 +4,9 @@ OUTPUTS := dns.o \ base64.o \ base64 \ base64_sem1.o \ - base64_sem1 + base64_sem1 \ + base64_sem2.o \ + base64_sem2 TOPLEVEL := ../ @@ -14,7 +16,7 @@ LDFLAGS += $(pkg-config --libs glib-2.0) -all: dns base64 base64_sem1 +all: dns base64 base64_sem1 base64_sem2 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) dns: dns.o rr.o dns_common.o diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c new file mode 100644 index 0000000..957ac48 --- /dev/null +++ b/examples/base64_sem2.c @@ -0,0 +1,207 @@ +#include "../src/hammer.h" +#include "../src/internal.h" // for h_carray functions (XXX ?!) +#include + + +#define H_RULE(rule, def) const HParser *rule = def +#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) + + +/// +// Semantic action helpers. +// These might be candidates for inclusion in the library. +/// + +// The action equivalent of h_ignore. +const HParsedToken *act_ignore(const HParseResult *p) +{ + return NULL; +} + +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +const HParsedToken *act_index0(const HParseResult *p) +{ + return act_index(0, p); +} + + +/// +// Semantic actions for the grammar below, each corresponds to an "ARULE". +// They must be named act_. +/// + +// helper: return the numeric value of a parsed base64 digit +uint8_t bsfdig_value(const HParsedToken *p) +{ + uint8_t value = 0; + + if(p && p->token_type == TT_UINT) { + uint8_t c = p->uint; + if(c >= 0x40 && c <= 0x5A) // A-Z + value = c - 0x41; + else if(c >= 0x60 && c <= 0x7A) // a-z + value = c - 0x61 + 26; + else if(c >= 0x30 && c <= 0x39) // 0-9 + value = c - 0x30 + 52; + else if(c == '+') + value = 62; + else if(c == '/') + value = 63; + } + + return value; +} + +// helper: append a byte value to a sequence +void seq_append_byte(HCountedArray *a, uint8_t b) +{ + HParsedToken *item = h_arena_malloc(a->arena, sizeof(HParsedToken)); + item->token_type = TT_UINT; + item->uint = b; + h_carray_append(a, item); +} + +const HParsedToken *act_base64(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + assert(p->ast->seq->used == 2); + assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE); + + // grab b64_3 block sequence + // grab and analyze b64 end block (_2 or _1) + const HParsedToken *b64_3 = p->ast->seq->elements[0]; + const HParsedToken *b64_2 = p->ast->seq->elements[1]; + const HParsedToken *b64_1 = p->ast->seq->elements[1]; + + if(b64_2->token_type != TT_SEQUENCE) + b64_1 = b64_2 = NULL; + else if(b64_2->seq->elements[2]->uint == '=') + b64_2 = NULL; + else + b64_1 = NULL; + + // allocate result sequence + HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + res->token_type = TT_SEQUENCE; + res->seq = h_carray_new(p->arena); + + // concatenate base64_3 blocks + for(size_t i=0; iseq->used; i++) { + assert(b64_3->seq->elements[i]->token_type == TT_SEQUENCE); + HParsedToken **digits = b64_3->seq->elements[i]->seq->elements; + + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + x <<= 6; x |= bsfdig_value(digits[2]); + x <<= 6; x |= bsfdig_value(digits[3]); + seq_append_byte(res->seq, (x >> 16) & 0xFF); + seq_append_byte(res->seq, (x >> 8) & 0xFF); + seq_append_byte(res->seq, x & 0xFF); + } + + // append one trailing base64_2 or _1 block + if(b64_2) { + HParsedToken **digits = b64_2->seq->elements; + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + x <<= 6; x |= bsfdig_value(digits[2]); + seq_append_byte(res->seq, (x >> 10) & 0xFF); + seq_append_byte(res->seq, (x >> 2) & 0xFF); + } else if(b64_1) { + HParsedToken **digits = b64_1->seq->elements; + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + seq_append_byte(res->seq, (x >> 4) & 0xFF); + } + + return res; +} + +#define act_ws act_ignore +#define act_document act_index0 + + +/// +// Set up the parser with the grammar to be recognized. +/// + +const HParser *init_parser(void) +{ + // CORE + H_RULE (digit, h_ch_range(0x30, 0x39)); + H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); + H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); + + // AUX. + H_RULE (plus, h_ch('+')); + H_RULE (slash, h_ch('/')); + H_RULE (equals, h_ch('=')); + + H_RULE (bsfdig, h_choice(alpha, digit, plus, slash, NULL)); + H_RULE (bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); + H_RULE (bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); + H_RULE (base64_3, h_repeat_n(bsfdig, 4)); + H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); + H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); + H_ARULE(base64, h_sequence(h_many(base64_3), + h_optional(h_choice(base64_2, + base64_1, NULL)), + NULL)); + + H_ARULE(ws, h_many(space)); + H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); + + // BUG sometimes inputs that should just don't parse. + // It *seemed* to happen mostly with things like "bbbbaaaaBA==". + // Using less actions seemed to make it less likely. + + return document; +} + + +/// +// Main routine: print input, parse, print result, return success/failure. +/// + +#include + +int main(int argc, char **argv) +{ + uint8_t input[102400]; + size_t inputsize; + const HParser *parser; + const HParseResult *result; + + parser = init_parser(); + + inputsize = fread(input, 1, sizeof(input), stdin); + fprintf(stderr, "inputsize=%lu\ninput=", inputsize); + fwrite(input, 1, inputsize, stderr); + result = h_parse(parser, input, inputsize); + + if(result) { + fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8); + h_pprint(stdout, result->ast, 0, 0); + return 0; + } else { + return 1; + } +} From 1998ae243aab922aa3d520d98e607ac853b3bc53 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 13 Jan 2013 18:47:03 +0100 Subject: [PATCH 13/28] add introductory comments to base64 examples --- examples/base64.c | 10 ++++++++++ examples/base64_sem1.c | 14 ++++++++++++++ examples/base64_sem2.c | 15 +++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/examples/base64.c b/examples/base64.c index cdb088e..ee142e3 100644 --- a/examples/base64.c +++ b/examples/base64.c @@ -1,3 +1,13 @@ +// Example parser: Base64, syntax only. +// +// Demonstrates how to construct a Hammer parser that recognizes valid Base64 +// sequences. +// +// Note that no semantic evaluation of the sequence is performed, i.e. the +// byte sequence being represented is not returned, or determined. See +// base64_sem1.c and base64_sem2.c for examples how to attach appropriate +// semantic actions to the grammar. + #include "../src/hammer.h" const HParser* document = NULL; diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 8638bb3..92f0b3f 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -1,3 +1,17 @@ +// Example parser: Base64, with fine-grained semantic actions +// +// Demonstrates how to attach semantic actions to grammar rules and piece by +// piece transform the parse tree into the desired semantic representation, +// in this case a sequence of 8-bit values. +// +// Note how the grammar is defined by using the macros H_RULE and H_ARULE. +// Those rules using ARULE get an attached action which must be declared (as +// (a function of type HAction) with a standard name based on the rule name. +// +// This variant of the example uses fine-grained semantic actions that +// transform the parse tree in small steps in a bottom-up fashion. Compare +// base64_sem2.c for an alternative approach using a single top-level action. + #include "../src/hammer.h" #include "../src/internal.h" // for h_carray functions (XXX ?!) #include diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c index 957ac48..c57555e 100644 --- a/examples/base64_sem2.c +++ b/examples/base64_sem2.c @@ -1,3 +1,18 @@ +// Example parser: Base64, with fine-grained semantic actions +// +// Demonstrates how to attach semantic actions to a grammar and transform the +// parse tree into the desired semantic representation, in this case a sequence +// of 8-bit values. +// +// Note how the grammar is defined by using the macros H_RULE and H_ARULE. +// Those rules using ARULE get an attached action which must be declared (as +// (a function of type HAction) with a standard name based on the rule name. +// +// This variant of the example uses coarse-grained semantic actions, +// transforming the entire parse tree in one big step. Compare base64_sem1.c +// for an alternative approach using a fine-grained piece-by-piece +// transformation. + #include "../src/hammer.h" #include "../src/internal.h" // for h_carray functions (XXX ?!) #include From 619725e5bb2e3962235ccaa0f543e710b5f88f55 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 13 Jan 2013 18:56:16 +0100 Subject: [PATCH 14/28] typo --- examples/base64_sem2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c index c57555e..11b0660 100644 --- a/examples/base64_sem2.c +++ b/examples/base64_sem2.c @@ -6,7 +6,7 @@ // // Note how the grammar is defined by using the macros H_RULE and H_ARULE. // Those rules using ARULE get an attached action which must be declared (as -// (a function of type HAction) with a standard name based on the rule name. +// a function of type HAction) with a standard name based on the rule name. // // This variant of the example uses coarse-grained semantic actions, // transforming the entire parse tree in one big step. Compare base64_sem1.c From fab8705828b5e13ac15ba87c8a7ff995cf7063be Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 13 Jan 2013 18:56:45 +0100 Subject: [PATCH 15/28] typo --- examples/base64_sem1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 92f0b3f..8de31db 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -6,7 +6,7 @@ // // Note how the grammar is defined by using the macros H_RULE and H_ARULE. // Those rules using ARULE get an attached action which must be declared (as -// (a function of type HAction) with a standard name based on the rule name. +// a function of type HAction) with a standard name based on the rule name. // // This variant of the example uses fine-grained semantic actions that // transform the parse tree in small steps in a bottom-up fashion. Compare From 195e50a7f18b86fa8d17aaaa632a454ed67a2d7e Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:24:13 +0100 Subject: [PATCH 16/28] move glue.[ch] into the library proper --- examples/Makefile | 3 +- examples/dns_common.h | 2 +- examples/glue.c | 170 ---------------------------- examples/glue.h | 251 ------------------------------------------ src/Makefile | 2 + 5 files changed, 4 insertions(+), 424 deletions(-) delete mode 100644 examples/glue.c delete mode 100644 examples/glue.h diff --git a/examples/Makefile b/examples/Makefile index 786af44..9c6ac1b 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -15,13 +15,12 @@ LDFLAGS += $(pkg-config --libs glib-2.0) all: dns base64 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) -dns: dns.o rr.o dns_common.o glue.o +dns: dns.o rr.o dns_common.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) dns.o: ../src/hammer.h dns_common.h rr.o: ../src/hammer.h rr.h dns_common.h dns_common.o: ../src/hammer.h dns_common.h -glue.o: ../src/hammer.h glue.h base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS) base64: base64.o diff --git a/examples/dns_common.h b/examples/dns_common.h index 6b04519..c1d8d7e 100644 --- a/examples/dns_common.h +++ b/examples/dns_common.h @@ -2,7 +2,7 @@ #define HAMMER_DNS_COMMON__H #include "../src/hammer.h" -#include "glue.h" +#include "../src/glue.h" const HParser* init_domain(); const HParser* init_character_string(); diff --git a/examples/glue.c b/examples/glue.c deleted file mode 100644 index 7f9c6fa..0000000 --- a/examples/glue.c +++ /dev/null @@ -1,170 +0,0 @@ -#include "glue.h" -#include "../src/internal.h" // for h_carray_* - - -// The action equivalent of h_ignore. -const HParsedToken *h_act_ignore(const HParseResult *p) -{ - return NULL; -} - -// Helper to build HAction's that pick one index out of a sequence. -const HParsedToken *h_act_index(int i, const HParseResult *p) -{ - if(!p) return NULL; - - const HParsedToken *tok = p->ast; - - if(!tok || tok->token_type != TT_SEQUENCE) - return NULL; - - const HCountedArray *seq = tok->seq; - size_t n = seq->used; - - if(i<0 || (size_t)i>=n) - return NULL; - else - return tok->seq->elements[i]; -} - -// Action version of h_seq_flatten. -const HParsedToken *h_act_flatten(const HParseResult *p) { - return h_seq_flatten(p->arena, p->ast); -} - -// Low-level helper for the h_make family. -HParsedToken *h_make_(HArena *arena, HTokenType type) -{ - HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); - ret->token_type = type; - return ret; -} - -HParsedToken *h_make(HArena *arena, HTokenType type, void *value) -{ - assert(type >= TT_USER); - HParsedToken *ret = h_make_(arena, type); - ret->user = value; - return ret; -} - -HParsedToken *h_make_seq(HArena *arena) -{ - HParsedToken *ret = h_make_(arena, TT_SEQUENCE); - ret->seq = h_carray_new(arena); - return ret; -} - -HParsedToken *h_make_bytes(HArena *arena, size_t len) -{ - HParsedToken *ret = h_make_(arena, TT_BYTES); - ret->bytes.len = len; - ret->bytes.token = h_arena_malloc(arena, len); - return ret; -} - -HParsedToken *h_make_sint(HArena *arena, int64_t val) -{ - HParsedToken *ret = h_make_(arena, TT_SINT); - ret->sint = val; - return ret; -} - -HParsedToken *h_make_uint(HArena *arena, uint64_t val) -{ - HParsedToken *ret = h_make_(arena, TT_UINT); - ret->uint = val; - return ret; -} - -// XXX -> internal -HParsedToken *h_carray_index(const HCountedArray *a, size_t i) -{ - assert(i < a->used); - return a->elements[i]; -} - -size_t h_seq_len(const HParsedToken *p) -{ - assert(p != NULL); - assert(p->token_type == TT_SEQUENCE); - return p->seq->used; -} - -HParsedToken **h_seq_elements(const HParsedToken *p) -{ - assert(p != NULL); - assert(p->token_type == TT_SEQUENCE); - return p->seq->elements; -} - -HParsedToken *h_seq_index(const HParsedToken *p, size_t i) -{ - assert(p != NULL); - assert(p->token_type == TT_SEQUENCE); - return h_carray_index(p->seq, i); -} - -HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...) -{ - va_list va; - - va_start(va, i); - HParsedToken *ret = h_seq_index_vpath(p, i, va); - va_end(va); - - return ret; -} - -HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va) -{ - HParsedToken *ret = h_seq_index(p, i); - int j; - - while((j = va_arg(va, int)) >= 0) - ret = h_seq_index(p, j); - - return ret; -} - -void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) -{ - assert(xs != NULL); - assert(xs->token_type == TT_SEQUENCE); - - h_carray_append(xs->seq, (HParsedToken *)x); -} - -void h_seq_append(HParsedToken *xs, const HParsedToken *ys) -{ - assert(xs != NULL); - assert(xs->token_type == TT_SEQUENCE); - assert(ys != NULL); - assert(ys->token_type == TT_SEQUENCE); - - for(size_t i; iseq->used; i++) - h_carray_append(xs->seq, ys->seq->elements[i]); -} - -// Flatten nested sequences. Always returns a sequence. -// If input element is not a sequence, returns it as a singleton sequence. -const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) -{ - assert(p != NULL); - - HParsedToken *ret = h_make_seq(arena); - switch(p->token_type) { - case TT_SEQUENCE: - // Flatten and append all. - for(size_t i; iseq->used; i++) { - h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); - } - break; - default: - // Make singleton sequence. - h_seq_snoc(ret, p); - break; - } - - return ret; -} diff --git a/examples/glue.h b/examples/glue.h deleted file mode 100644 index ccb488e..0000000 --- a/examples/glue.h +++ /dev/null @@ -1,251 +0,0 @@ -// -// API additions for writing grammar and semantic actions more concisely -// -// -// Quick Overview: -// -// Grammars can be succinctly specified with the family of H_RULE macros. -// H_RULE defines a plain parser variable. H_ARULE additionally attaches a -// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE -// combine both. -// -// A few standard semantic actions are defined below. The H_ACT_APPLY macro -// allows semantic actions to be defined by "partial application" of -// a generic action to fixed paramters. -// -// The definition of more complex semantic actions will usually consist of -// extracting data from the given parse tree and constructing a token of custom -// type to represent the result. A number of functions and convenience macros -// are provided to capture the most common cases and idioms. -// -// See the leading comment blocks on the sections below for more details. -// - -#ifndef HAMMER_EXAMPLES_GLUE__H -#define HAMMER_EXAMPLES_GLUE__H - -#include -#include "../src/hammer.h" - - -// -// Grammar specification -// -// H_RULE is simply a short-hand for the typical declaration and definition of -// a parser variable. See its plain definition below. The goal is to save -// horizontal space as well as to provide a clear and unified look together with -// the other macro variants that stays close to an abstract PEG or BNF grammar. -// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their -// combinations as they allow the definition of syntax to be given without -// intermingling it with the semantic specifications. -// -// H_ARULE defines a variable just like H_RULE but attaches a semantic action -// to the result of the parser via h_action. The action is expected to be -// named act_. -// -// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool. -// The validation is expected to be named validate_. -// -// H_VARULE combines H_RULE with both an action and a validation. The action is -// attached before the validation, i.e. the validation receives as input the -// result of the action. -// -// H_AVRULE is like H_VARULE but the action is attached outside the validation, -// i.e. the validation receives the uninterpreted AST as input. -// - - -#define H_RULE(rule, def) const HParser *rule = def -#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) -#define H_VRULE(rule, def) const HParser *rule = \ - h_attr_bool(def, validate_ ## rule) -#define H_VARULE(rule, def) const HParser *rule = \ - h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule) -#define H_AVRULE(rule, def) const HParser *rule = \ - h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) - - -// -// Pre-fab semantic actions -// -// A collection of generally useful semantic actions is provided. -// -// h_act_ignore is the action equivalent of the parser combinator h_ignore. It -// simply causes the AST it is applied to to be replaced with NULL. This most -// importantly causes it to be elided from the result of a surrounding -// h_sequence. -// -// h_act_index is of note as it is not itself suitable to be passed to -// h_action. It is parameterized by an index to be picked from a sequence -// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY -// macro provides a concise way to define such a parameter-application wrapper. -// -// h_act_flatten acts on a token of possibly nested sequences by recursively -// flattening it into a single sequence. Cf. h_seq_flatten below. -// -// H_ACT_APPLY implements "partial application" for semantic actions. It -// defines a new action that supplies given parameters to a parameterized -// action such as h_act_index. -// - -const HParsedToken *h_act_ignore(const HParseResult *p); -const HParsedToken *h_act_index(int i, const HParseResult *p); -const HParsedToken *h_act_flatten(const HParseResult *p); - -// Define 'myaction' as a specialization of 'paction' by supplying the leading -// parameters. -#define H_ACT_APPLY(myaction, paction, ...) \ - const HParsedToken *myaction(const HParseResult *p) { \ - return paction(__VA_ARGS__, p); \ - } - - -// -// Working with HParsedTokens -// -// The type HParsedToken represents a dynamically-typed universe of values. -// Declared below are constructors to turn ordinary values into their -// HParsedToken equivalents, extractors to retrieve the original values from -// inside an HParsedToken, and functions that inspect and modify tokens of -// sequence type directly. -// -// In addition, there are a number of short-hand macros that work with some -// conventions to eliminate common boilerplate. These conventions are listed -// below. Be sure to follow them if you want to use the respective macros. -// -// * The single argument to semantic actions should be called 'p'. -// -// The H_MAKE macros suppy 'p->arena' to their underlying h_make -// counterparts. The H_FIELD macros supply 'p->ast' to their underlying -// H_INDEX counterparts. -// -// * For each custom token type, there should be a typedef for the -// corresponding value type. -// -// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to -// a pointer to the given type. -// -// * For each custom token type, say 'foo_t', there must be an integer -// constant 'TT_foo_t' to identify the token type. This constant must have a -// value greater or equal than TT_USER. -// -// One idiom is to define an enum for all custom token types and to assign a -// value of TT_USER to the first element. This can be viewed as extending -// the HTokenType enum. -// -// The H_MAKE and H_ASSERT macros derive the name of the token type constant -// from the given type name. -// -// -// The H_ALLOC macro is useful for allocating values of custom token types. -// -// The H_MAKE family of macros construct tokens of a given type. The native -// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ. -// The form with no suffix is used for custom token types. This convention is -// also used for other macro and function families. -// -// The H_ASSERT family simply asserts that a given token has the expected type. -// It mainly serves as an implementation aid for H_CAST. Of note in that regard -// is that, unlike the standard 'assert' macro, these form _expressions_ that -// return the value of their token argument; thus they can be used in a -// "pass-through" fashion inside other expressions. -// -// The H_CAST family combines a type assertion with access to the -// statically-typed value inside a token. -// -// A number of functions h_seq_* operate on and inspect sequence tokens. -// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence. -// Therefore there are h_seq_snoc and h_seq_append to build up sequences. -// -// The macro families H_FIELD and H_INDEX combine index access on a sequence -// with a cast to the appropriate result type. H_FIELD is used to access the -// elements of the argument token 'p' in an action. H_INDEX allows any sequence -// token to be specified. Both macro families take an arbitrary number of index -// arguments, giving access to elements in nested sequences by path. -// These macros are very useful to avoid spaghetti chains of unchecked pointer -// dereferences. -// - -// Standard short-hand for arena-allocating a variable in a semantic action. -#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) - -// Token constructors... - -HParsedToken *h_make(HArena *arena, HTokenType type, void *value); -HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. -HParsedToken *h_make_bytes(HArena *arena, size_t len); -HParsedToken *h_make_sint(HArena *arena, int64_t val); -HParsedToken *h_make_uint(HArena *arena, uint64_t val); - -// Standard short-hands to make tokens in an action. -#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) -#define H_MAKE_SEQ() h_make_seq(p->arena) -#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) -#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) -#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) - -// Extract (cast) type-specific value back from HParsedTokens... - -// Pass-through assertion that a given token has the expected type. -#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P) - -// Convenience short-hand forms of h_assert_type. -#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK) -#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK) -#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK) -#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK) -#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK) - -// Assert expected type and return contained value. -#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user) -#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq) -#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes) -#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint) -#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint) - -// Sequence access... - -// Return the length of a sequence. -size_t h_seq_len(const HParsedToken *p); - -// Access a sequence's element array. -HParsedToken **h_seq_elements(const HParsedToken *p); - -// Access a sequence element by index. -HParsedToken *h_seq_index(const HParsedToken *p, size_t i); - -// Access an element in a nested sequence by a path of indices. -HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...); -HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); - -// Convenience macros combining (nested) index access and h_cast. -#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1) - -// Standard short-hand to access and cast elements on a sequence token. -#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__) -#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__) -#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__) -#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__) -#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__) - -// Lower-level helper for h_seq_index. -HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal - -// Sequence modification... - -// Add elements to a sequence. -void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one -void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many - -// XXX TODO: Remove elements from a sequence. - -// Flatten nested sequences into one. -const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); - - -#endif diff --git a/src/Makefile b/src/Makefile index 47e136d..ead0516 100644 --- a/src/Makefile +++ b/src/Makefile @@ -38,6 +38,7 @@ HAMMER_PARTS := \ system_allocator.o \ benchmark.o \ compile.o \ + glue.o \ $(PARSERS:%=parsers/%.o) \ $(BACKENDS:%=backends/%.o) @@ -67,6 +68,7 @@ libhammer.a: $(HAMMER_PARTS) bitreader.o: test_suite.h hammer.o: hammer.h +glue.o: hammer.h glue.h all: libhammer.a From 25df438832be00de17400404efecd4b1939951b9 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:25:19 +0100 Subject: [PATCH 17/28] oops, moved files without adding --- src/glue.c | 170 ++++++++++++++++++++++++++++++++++++ src/glue.h | 251 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 421 insertions(+) create mode 100644 src/glue.c create mode 100644 src/glue.h diff --git a/src/glue.c b/src/glue.c new file mode 100644 index 0000000..7f9c6fa --- /dev/null +++ b/src/glue.c @@ -0,0 +1,170 @@ +#include "glue.h" +#include "../src/internal.h" // for h_carray_* + + +// The action equivalent of h_ignore. +const HParsedToken *h_act_ignore(const HParseResult *p) +{ + return NULL; +} + +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *h_act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +// Action version of h_seq_flatten. +const HParsedToken *h_act_flatten(const HParseResult *p) { + return h_seq_flatten(p->arena, p->ast); +} + +// Low-level helper for the h_make family. +HParsedToken *h_make_(HArena *arena, HTokenType type) +{ + HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); + ret->token_type = type; + return ret; +} + +HParsedToken *h_make(HArena *arena, HTokenType type, void *value) +{ + assert(type >= TT_USER); + HParsedToken *ret = h_make_(arena, type); + ret->user = value; + return ret; +} + +HParsedToken *h_make_seq(HArena *arena) +{ + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new(arena); + return ret; +} + +HParsedToken *h_make_bytes(HArena *arena, size_t len) +{ + HParsedToken *ret = h_make_(arena, TT_BYTES); + ret->bytes.len = len; + ret->bytes.token = h_arena_malloc(arena, len); + return ret; +} + +HParsedToken *h_make_sint(HArena *arena, int64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_SINT); + ret->sint = val; + return ret; +} + +HParsedToken *h_make_uint(HArena *arena, uint64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_UINT); + ret->uint = val; + return ret; +} + +// XXX -> internal +HParsedToken *h_carray_index(const HCountedArray *a, size_t i) +{ + assert(i < a->used); + return a->elements[i]; +} + +size_t h_seq_len(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->used; +} + +HParsedToken **h_seq_elements(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->elements; +} + +HParsedToken *h_seq_index(const HParsedToken *p, size_t i) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return h_carray_index(p->seq, i); +} + +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...) +{ + va_list va; + + va_start(va, i); + HParsedToken *ret = h_seq_index_vpath(p, i, va); + va_end(va); + + return ret; +} + +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va) +{ + HParsedToken *ret = h_seq_index(p, i); + int j; + + while((j = va_arg(va, int)) >= 0) + ret = h_seq_index(p, j); + + return ret; +} + +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + + h_carray_append(xs->seq, (HParsedToken *)x); +} + +void h_seq_append(HParsedToken *xs, const HParsedToken *ys) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + assert(ys != NULL); + assert(ys->token_type == TT_SEQUENCE); + + for(size_t i; iseq->used; i++) + h_carray_append(xs->seq, ys->seq->elements[i]); +} + +// Flatten nested sequences. Always returns a sequence. +// If input element is not a sequence, returns it as a singleton sequence. +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) +{ + assert(p != NULL); + + HParsedToken *ret = h_make_seq(arena); + switch(p->token_type) { + case TT_SEQUENCE: + // Flatten and append all. + for(size_t i; iseq->used; i++) { + h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); + } + break; + default: + // Make singleton sequence. + h_seq_snoc(ret, p); + break; + } + + return ret; +} diff --git a/src/glue.h b/src/glue.h new file mode 100644 index 0000000..90944ea --- /dev/null +++ b/src/glue.h @@ -0,0 +1,251 @@ +// +// API additions for writing grammar and semantic actions more concisely +// +// +// Quick Overview: +// +// Grammars can be succinctly specified with the family of H_RULE macros. +// H_RULE defines a plain parser variable. H_ARULE additionally attaches a +// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE +// combine both. +// +// A few standard semantic actions are defined below. The H_ACT_APPLY macro +// allows semantic actions to be defined by "partial application" of +// a generic action to fixed paramters. +// +// The definition of more complex semantic actions will usually consist of +// extracting data from the given parse tree and constructing a token of custom +// type to represent the result. A number of functions and convenience macros +// are provided to capture the most common cases and idioms. +// +// See the leading comment blocks on the sections below for more details. +// + +#ifndef HAMMER_GLUE__H +#define HAMMER_GLUE__H + +#include +#include "hammer.h" + + +// +// Grammar specification +// +// H_RULE is simply a short-hand for the typical declaration and definition of +// a parser variable. See its plain definition below. The goal is to save +// horizontal space as well as to provide a clear and unified look together with +// the other macro variants that stays close to an abstract PEG or BNF grammar. +// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their +// combinations as they allow the definition of syntax to be given without +// intermingling it with the semantic specifications. +// +// H_ARULE defines a variable just like H_RULE but attaches a semantic action +// to the result of the parser via h_action. The action is expected to be +// named act_. +// +// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool. +// The validation is expected to be named validate_. +// +// H_VARULE combines H_RULE with both an action and a validation. The action is +// attached before the validation, i.e. the validation receives as input the +// result of the action. +// +// H_AVRULE is like H_VARULE but the action is attached outside the validation, +// i.e. the validation receives the uninterpreted AST as input. +// + + +#define H_RULE(rule, def) const HParser *rule = def +#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) +#define H_VRULE(rule, def) const HParser *rule = \ + h_attr_bool(def, validate_ ## rule) +#define H_VARULE(rule, def) const HParser *rule = \ + h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule) +#define H_AVRULE(rule, def) const HParser *rule = \ + h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) + + +// +// Pre-fab semantic actions +// +// A collection of generally useful semantic actions is provided. +// +// h_act_ignore is the action equivalent of the parser combinator h_ignore. It +// simply causes the AST it is applied to to be replaced with NULL. This most +// importantly causes it to be elided from the result of a surrounding +// h_sequence. +// +// h_act_index is of note as it is not itself suitable to be passed to +// h_action. It is parameterized by an index to be picked from a sequence +// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY +// macro provides a concise way to define such a parameter-application wrapper. +// +// h_act_flatten acts on a token of possibly nested sequences by recursively +// flattening it into a single sequence. Cf. h_seq_flatten below. +// +// H_ACT_APPLY implements "partial application" for semantic actions. It +// defines a new action that supplies given parameters to a parameterized +// action such as h_act_index. +// + +const HParsedToken *h_act_ignore(const HParseResult *p); +const HParsedToken *h_act_index(int i, const HParseResult *p); +const HParsedToken *h_act_flatten(const HParseResult *p); + +// Define 'myaction' as a specialization of 'paction' by supplying the leading +// parameters. +#define H_ACT_APPLY(myaction, paction, ...) \ + const HParsedToken *myaction(const HParseResult *p) { \ + return paction(__VA_ARGS__, p); \ + } + + +// +// Working with HParsedTokens +// +// The type HParsedToken represents a dynamically-typed universe of values. +// Declared below are constructors to turn ordinary values into their +// HParsedToken equivalents, extractors to retrieve the original values from +// inside an HParsedToken, and functions that inspect and modify tokens of +// sequence type directly. +// +// In addition, there are a number of short-hand macros that work with some +// conventions to eliminate common boilerplate. These conventions are listed +// below. Be sure to follow them if you want to use the respective macros. +// +// * The single argument to semantic actions should be called 'p'. +// +// The H_MAKE macros suppy 'p->arena' to their underlying h_make +// counterparts. The H_FIELD macros supply 'p->ast' to their underlying +// H_INDEX counterparts. +// +// * For each custom token type, there should be a typedef for the +// corresponding value type. +// +// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to +// a pointer to the given type. +// +// * For each custom token type, say 'foo_t', there must be an integer +// constant 'TT_foo_t' to identify the token type. This constant must have a +// value greater or equal than TT_USER. +// +// One idiom is to define an enum for all custom token types and to assign a +// value of TT_USER to the first element. This can be viewed as extending +// the HTokenType enum. +// +// The H_MAKE and H_ASSERT macros derive the name of the token type constant +// from the given type name. +// +// +// The H_ALLOC macro is useful for allocating values of custom token types. +// +// The H_MAKE family of macros construct tokens of a given type. The native +// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ. +// The form with no suffix is used for custom token types. This convention is +// also used for other macro and function families. +// +// The H_ASSERT family simply asserts that a given token has the expected type. +// It mainly serves as an implementation aid for H_CAST. Of note in that regard +// is that, unlike the standard 'assert' macro, these form _expressions_ that +// return the value of their token argument; thus they can be used in a +// "pass-through" fashion inside other expressions. +// +// The H_CAST family combines a type assertion with access to the +// statically-typed value inside a token. +// +// A number of functions h_seq_* operate on and inspect sequence tokens. +// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence. +// Therefore there are h_seq_snoc and h_seq_append to build up sequences. +// +// The macro families H_FIELD and H_INDEX combine index access on a sequence +// with a cast to the appropriate result type. H_FIELD is used to access the +// elements of the argument token 'p' in an action. H_INDEX allows any sequence +// token to be specified. Both macro families take an arbitrary number of index +// arguments, giving access to elements in nested sequences by path. +// These macros are very useful to avoid spaghetti chains of unchecked pointer +// dereferences. +// + +// Standard short-hand for arena-allocating a variable in a semantic action. +#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) + +// Token constructors... + +HParsedToken *h_make(HArena *arena, HTokenType type, void *value); +HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. +HParsedToken *h_make_bytes(HArena *arena, size_t len); +HParsedToken *h_make_sint(HArena *arena, int64_t val); +HParsedToken *h_make_uint(HArena *arena, uint64_t val); + +// Standard short-hands to make tokens in an action. +#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) +#define H_MAKE_SEQ() h_make_seq(p->arena) +#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) +#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) +#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) + +// Extract (cast) type-specific value back from HParsedTokens... + +// Pass-through assertion that a given token has the expected type. +#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P) + +// Convenience short-hand forms of h_assert_type. +#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK) +#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK) +#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK) +#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK) +#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK) + +// Assert expected type and return contained value. +#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user) +#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq) +#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes) +#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint) +#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint) + +// Sequence access... + +// Return the length of a sequence. +size_t h_seq_len(const HParsedToken *p); + +// Access a sequence's element array. +HParsedToken **h_seq_elements(const HParsedToken *p); + +// Access a sequence element by index. +HParsedToken *h_seq_index(const HParsedToken *p, size_t i); + +// Access an element in a nested sequence by a path of indices. +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...); +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); + +// Convenience macros combining (nested) index access and h_cast. +#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1) + +// Standard short-hand to access and cast elements on a sequence token. +#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__) +#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__) +#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__) +#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__) +#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__) + +// Lower-level helper for h_seq_index. +HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal + +// Sequence modification... + +// Add elements to a sequence. +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one +void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many + +// XXX TODO: Remove elements from a sequence. + +// Flatten nested sequences into one. +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); + + +#endif From b06a98ce2600727fe18bcb19f402ab355d5f6467 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:47:18 +0100 Subject: [PATCH 18/28] fix linking of base64 example --- examples/Makefile | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/examples/Makefile b/examples/Makefile index 10ba9f1..663a214 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -22,12 +22,20 @@ dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) dns: dns.o rr.o dns_common.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -dns.o: ../src/hammer.h dns_common.h -rr.o: ../src/hammer.h rr.h dns_common.h -dns_common.o: ../src/hammer.h dns_common.h +dns.o: ../src/hammer.h dns_common.h ../src/glue.h +rr.o: ../src/hammer.h rr.h dns_common.h ../src/glue.h +dns_common.o: ../src/hammer.h dns_common.h ../src/glue.h -base64%: LDFLAGS:=-L../src -lhammer $(LDFLAGS) -base64%: base64%.o +base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64: base64.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -base64%.o: ../src/hammer.h +base64_sem1: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64_sem1: base64_sem1.o + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) + +base64_sem2: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64_sem2: base64_sem2.o + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) + +base64%.o: ../src/hammer.h ../src/glue.h From 38ddcc5ab4427bafb3a49d28e86141dba5991897 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:47:53 +0100 Subject: [PATCH 19/28] add semantic base64 examples to gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index db2ee3a..7f4d7d9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ src/test_suite lib/hush examples/dns examples/base64 +examples/base64_sem1 +examples/base64_sem2 TAGS *.swp *.swo From a5c579c23da8f250889c0afee438800dbe56c899 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:53:43 +0100 Subject: [PATCH 20/28] switch semantic base64 examples to pre-fab actions provided by glue.h --- examples/base64_sem1.c | 64 ++++++------------------------------------ examples/base64_sem2.c | 45 +++-------------------------- 2 files changed, 12 insertions(+), 97 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 8de31db..9d1012f 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -13,50 +13,11 @@ // base64_sem2.c for an alternative approach using a single top-level action. #include "../src/hammer.h" +#include "../src/glue.h" #include "../src/internal.h" // for h_carray functions (XXX ?!) #include -#define H_RULE(rule, def) const HParser *rule = def -#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) - - -/// -// Semantic action helpers. -// These might be candidates for inclusion in the library. -/// - -// The action equivalent of h_ignore. -const HParsedToken *act_ignore(const HParseResult *p) -{ - return NULL; -} - -// Helper to build HAction's that pick one index out of a sequence. -const HParsedToken *act_index(int i, const HParseResult *p) -{ - if(!p) return NULL; - - const HParsedToken *tok = p->ast; - - if(!tok || tok->token_type != TT_SEQUENCE) - return NULL; - - const HCountedArray *seq = tok->seq; - size_t n = seq->used; - - if(i<0 || (size_t)i>=n) - return NULL; - else - return tok->seq->elements[i]; -} - -const HParsedToken *act_index0(const HParseResult *p) -{ - return act_index(0, p); -} - - /// // Semantic actions for the grammar below, each corresponds to an "ARULE". // They must be named act_. @@ -84,11 +45,13 @@ const HParsedToken *act_bsfdig(const HParseResult *p) return res; } +H_ACT_APPLY(act_index0, h_act_index, 0); + #define act_bsfdig_4bit act_bsfdig #define act_bsfdig_2bit act_bsfdig -#define act_equals act_ignore -#define act_ws act_ignore +#define act_equals h_act_ignore +#define act_ws h_act_ignore #define act_document act_index0 @@ -124,20 +87,9 @@ const HParsedToken *act_base64_n(int n, const HParseResult *p) return res; } -const HParsedToken *act_base64_3(const HParseResult *p) -{ - return act_base64_n(3, p); -} - -const HParsedToken *act_base64_2(const HParseResult *p) -{ - return act_base64_n(2, p); -} - -const HParsedToken *act_base64_1(const HParseResult *p) -{ - return act_base64_n(1, p); -} +H_ACT_APPLY(act_base64_3, act_base64_n, 3); +H_ACT_APPLY(act_base64_2, act_base64_n, 2); +H_ACT_APPLY(act_base64_1, act_base64_n, 1); // Helper to concatenate two arrays. void carray_concat(HCountedArray *a, const HCountedArray *b) diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c index 11b0660..4b886c6 100644 --- a/examples/base64_sem2.c +++ b/examples/base64_sem2.c @@ -14,50 +14,11 @@ // transformation. #include "../src/hammer.h" +#include "../src/glue.h" #include "../src/internal.h" // for h_carray functions (XXX ?!) #include -#define H_RULE(rule, def) const HParser *rule = def -#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) - - -/// -// Semantic action helpers. -// These might be candidates for inclusion in the library. -/// - -// The action equivalent of h_ignore. -const HParsedToken *act_ignore(const HParseResult *p) -{ - return NULL; -} - -// Helper to build HAction's that pick one index out of a sequence. -const HParsedToken *act_index(int i, const HParseResult *p) -{ - if(!p) return NULL; - - const HParsedToken *tok = p->ast; - - if(!tok || tok->token_type != TT_SEQUENCE) - return NULL; - - const HCountedArray *seq = tok->seq; - size_t n = seq->used; - - if(i<0 || (size_t)i>=n) - return NULL; - else - return tok->seq->elements[i]; -} - -const HParsedToken *act_index0(const HParseResult *p) -{ - return act_index(0, p); -} - - /// // Semantic actions for the grammar below, each corresponds to an "ARULE". // They must be named act_. @@ -150,7 +111,9 @@ const HParsedToken *act_base64(const HParseResult *p) return res; } -#define act_ws act_ignore +H_ACT_APPLY(act_index0, h_act_index, 0); + +#define act_ws h_act_ignore #define act_document act_index0 From b83be8472e044b012ac3812daf76c37ab41a3a85 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:29:54 +0100 Subject: [PATCH 21/28] fix counter init in h_seq_append --- src/glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glue.c b/src/glue.c index 7f9c6fa..2735e48 100644 --- a/src/glue.c +++ b/src/glue.c @@ -142,7 +142,7 @@ void h_seq_append(HParsedToken *xs, const HParsedToken *ys) assert(ys != NULL); assert(ys->token_type == TT_SEQUENCE); - for(size_t i; iseq->used; i++) + for(size_t i=0; iseq->used; i++) h_carray_append(xs->seq, ys->seq->elements[i]); } From af23f3bbf31856fbd79359db62697d82e6457f66 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:45:29 +0100 Subject: [PATCH 22/28] add more glue functions in base64_sem1 --- examples/base64_sem1.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 9d1012f..1c318e3 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -25,12 +25,10 @@ const HParsedToken *act_bsfdig(const HParseResult *p) { - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); + HParsedToken *res = H_MAKE_UINT(0); - assert(p->ast->token_type == TT_UINT); - uint8_t c = p->ast->uint; + uint8_t c = H_CAST_UINT(p->ast); - res->token_type = TT_UINT; if(c >= 0x40 && c <= 0x5A) // A-Z res->uint = c - 0x41; else if(c >= 0x60 && c <= 0x7A) // a-z @@ -58,13 +56,11 @@ H_ACT_APPLY(act_index0, h_act_index, 0); // General-form action to turn a block of base64 digits into bytes. const HParsedToken *act_base64_n(int n, const HParseResult *p) { - assert(p->ast->token_type == TT_SEQUENCE); - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); res->token_type = TT_SEQUENCE; res->seq = h_carray_new_sized(p->arena, n); - HParsedToken **digits = p->ast->seq->elements; + HParsedToken **digits = h_seq_elements(p->ast); uint32_t x = 0; int bits = 0; @@ -75,9 +71,7 @@ const HParsedToken *act_base64_n(int n, const HParseResult *p) x >>= bits%8; // align, i.e. cut off extra bits for(int i=0; iarena, sizeof(HParsedToken)); - item->token_type = TT_UINT; - item->uint = x & 0xFF; + HParsedToken *item = H_MAKE_UINT(x & 0xFF); res->seq->elements[n-1-i] = item; // output the last byte and x >>= 8; // discard it @@ -91,34 +85,23 @@ H_ACT_APPLY(act_base64_3, act_base64_n, 3); H_ACT_APPLY(act_base64_2, act_base64_n, 2); H_ACT_APPLY(act_base64_1, act_base64_n, 1); -// Helper to concatenate two arrays. -void carray_concat(HCountedArray *a, const HCountedArray *b) -{ - for(size_t i=0; iused; i++) - h_carray_append(a, b->elements[i]); -} - const HParsedToken *act_base64(const HParseResult *p) { assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->seq->used == 2); assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE); - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new(p->arena); + HParsedToken *res = H_MAKE_SEQ(); // concatenate base64_3 blocks - HCountedArray *seq = p->ast->seq->elements[0]->seq; - for(size_t i=0; iused; i++) { - assert(seq->elements[i]->token_type == TT_SEQUENCE); - carray_concat(res->seq, seq->elements[i]->seq); - } + HCountedArray *seq = H_FIELD_SEQ(0); + for(size_t i=0; iused; i++) + h_seq_append(res, seq->elements[i]); // append one trailing base64_2 or _1 block - const HParsedToken *tok = p->ast->seq->elements[1]; + const HParsedToken *tok = h_seq_index(p->ast, 1); if(tok->token_type == TT_SEQUENCE) - carray_concat(res->seq, tok->seq); + h_seq_append(res, tok); return res; } From 662357ccb43afceb55bc63aca287984a1ed59bbf Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:47:36 +0100 Subject: [PATCH 23/28] add glue helper to construct sized sequences --- src/glue.c | 7 +++++++ src/glue.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/src/glue.c b/src/glue.c index 2735e48..f1e086a 100644 --- a/src/glue.c +++ b/src/glue.c @@ -55,6 +55,13 @@ HParsedToken *h_make_seq(HArena *arena) return ret; } +HParsedToken *h_make_seqn(HArena *arena, size_t n) +{ + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new_sized(arena, n); + return ret; +} + HParsedToken *h_make_bytes(HArena *arena, size_t len) { HParsedToken *ret = h_make_(arena, TT_BYTES); diff --git a/src/glue.h b/src/glue.h index 90944ea..3125ae0 100644 --- a/src/glue.h +++ b/src/glue.h @@ -173,6 +173,7 @@ const HParsedToken *h_act_flatten(const HParseResult *p); HParsedToken *h_make(HArena *arena, HTokenType type, void *value); HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. +HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n. HParsedToken *h_make_bytes(HArena *arena, size_t len); HParsedToken *h_make_sint(HArena *arena, int64_t val); HParsedToken *h_make_uint(HArena *arena, uint64_t val); @@ -180,6 +181,7 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val); // Standard short-hands to make tokens in an action. #define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) #define H_MAKE_SEQ() h_make_seq(p->arena) +#define H_MAKE_SEQN(N) h_make_seqn(p->arena, N) #define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) #define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) #define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) From d9c2c921c431c609003d2ba3e7f309ce3709aeec Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:48:35 +0100 Subject: [PATCH 24/28] use H_MAKE_SEQN in base64_sem1 --- examples/base64_sem1.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 1c318e3..8074352 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -56,9 +56,7 @@ H_ACT_APPLY(act_index0, h_act_index, 0); // General-form action to turn a block of base64 digits into bytes. const HParsedToken *act_base64_n(int n, const HParseResult *p) { - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new_sized(p->arena, n); + HParsedToken *res = H_MAKE_SEQN(n); HParsedToken **digits = h_seq_elements(p->ast); From 51b90828379cc7840901e2e74532bc188658d1f6 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:49:25 +0100 Subject: [PATCH 25/28] remove obsolete dependency on internal.h --- examples/base64_sem1.c | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 8074352..f2a3e82 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -14,7 +14,6 @@ #include "../src/hammer.h" #include "../src/glue.h" -#include "../src/internal.h" // for h_carray functions (XXX ?!) #include From f1f7c4f488d8ba0b43ad13e7186730afe4a37ad9 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 17:00:04 +0100 Subject: [PATCH 26/28] remove dependency on internal.h for base64_sem2 --- examples/base64_sem2.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c index 4b886c6..32afe5b 100644 --- a/examples/base64_sem2.c +++ b/examples/base64_sem2.c @@ -15,7 +15,6 @@ #include "../src/hammer.h" #include "../src/glue.h" -#include "../src/internal.h" // for h_carray functions (XXX ?!) #include @@ -47,13 +46,7 @@ uint8_t bsfdig_value(const HParsedToken *p) } // helper: append a byte value to a sequence -void seq_append_byte(HCountedArray *a, uint8_t b) -{ - HParsedToken *item = h_arena_malloc(a->arena, sizeof(HParsedToken)); - item->token_type = TT_UINT; - item->uint = b; - h_carray_append(a, item); -} +#define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b)) const HParsedToken *act_base64(const HParseResult *p) { @@ -75,9 +68,7 @@ const HParsedToken *act_base64(const HParseResult *p) b64_1 = NULL; // allocate result sequence - HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken)); - res->token_type = TT_SEQUENCE; - res->seq = h_carray_new(p->arena); + HParsedToken *res = H_MAKE_SEQ(); // concatenate base64_3 blocks for(size_t i=0; iseq->used; i++) { @@ -88,9 +79,9 @@ const HParsedToken *act_base64(const HParseResult *p) x <<= 6; x |= bsfdig_value(digits[1]); x <<= 6; x |= bsfdig_value(digits[2]); x <<= 6; x |= bsfdig_value(digits[3]); - seq_append_byte(res->seq, (x >> 16) & 0xFF); - seq_append_byte(res->seq, (x >> 8) & 0xFF); - seq_append_byte(res->seq, x & 0xFF); + seq_append_byte(res, (x >> 16) & 0xFF); + seq_append_byte(res, (x >> 8) & 0xFF); + seq_append_byte(res, x & 0xFF); } // append one trailing base64_2 or _1 block @@ -99,13 +90,13 @@ const HParsedToken *act_base64(const HParseResult *p) uint32_t x = bsfdig_value(digits[0]); x <<= 6; x |= bsfdig_value(digits[1]); x <<= 6; x |= bsfdig_value(digits[2]); - seq_append_byte(res->seq, (x >> 10) & 0xFF); - seq_append_byte(res->seq, (x >> 2) & 0xFF); + seq_append_byte(res, (x >> 10) & 0xFF); + seq_append_byte(res, (x >> 2) & 0xFF); } else if(b64_1) { HParsedToken **digits = b64_1->seq->elements; uint32_t x = bsfdig_value(digits[0]); x <<= 6; x |= bsfdig_value(digits[1]); - seq_append_byte(res->seq, (x >> 4) & 0xFF); + seq_append_byte(res, (x >> 4) & 0xFF); } return res; From 866e66ccf9538e3c19d03739e8c804ab2fc4029e Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:29:54 +0100 Subject: [PATCH 27/28] fix counter init in h_seq_append --- src/glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glue.c b/src/glue.c index 7f9c6fa..2735e48 100644 --- a/src/glue.c +++ b/src/glue.c @@ -142,7 +142,7 @@ void h_seq_append(HParsedToken *xs, const HParsedToken *ys) assert(ys != NULL); assert(ys->token_type == TT_SEQUENCE); - for(size_t i; iseq->used; i++) + for(size_t i=0; iseq->used; i++) h_carray_append(xs->seq, ys->seq->elements[i]); } From 0cfec9781ab7dfa0718b04bd5a34fd3d2eb72394 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 16:47:36 +0100 Subject: [PATCH 28/28] add glue helper to construct sized sequences --- src/glue.c | 7 +++++++ src/glue.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/src/glue.c b/src/glue.c index 2735e48..f1e086a 100644 --- a/src/glue.c +++ b/src/glue.c @@ -55,6 +55,13 @@ HParsedToken *h_make_seq(HArena *arena) return ret; } +HParsedToken *h_make_seqn(HArena *arena, size_t n) +{ + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new_sized(arena, n); + return ret; +} + HParsedToken *h_make_bytes(HArena *arena, size_t len) { HParsedToken *ret = h_make_(arena, TT_BYTES); diff --git a/src/glue.h b/src/glue.h index 90944ea..3125ae0 100644 --- a/src/glue.h +++ b/src/glue.h @@ -173,6 +173,7 @@ const HParsedToken *h_act_flatten(const HParseResult *p); HParsedToken *h_make(HArena *arena, HTokenType type, void *value); HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. +HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n. HParsedToken *h_make_bytes(HArena *arena, size_t len); HParsedToken *h_make_sint(HArena *arena, int64_t val); HParsedToken *h_make_uint(HArena *arena, uint64_t val); @@ -180,6 +181,7 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val); // Standard short-hands to make tokens in an action. #define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) #define H_MAKE_SEQ() h_make_seq(p->arena) +#define H_MAKE_SEQN(N) h_make_seqn(p->arena, N) #define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) #define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) #define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL)