From 702e82f1c11733e7580fed9557cfecb0ebd32062 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Mon, 12 May 2014 09:45:22 +0200 Subject: [PATCH 01/23] h_put_value and h_get_value done. --- src/SConscript | 3 +- src/datastructures.c | 23 +++++++++++++++ src/hammer.h | 24 +++++++++++++++ src/internal.h | 7 ++++- src/parsers/value.c | 69 ++++++++++++++++++++++++++++++++++++++++++++ src/t_parser.c | 21 ++++++++++++++ 6 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 src/parsers/value.c diff --git a/src/SConscript b/src/SConscript index 0388393..6fb3bf1 100644 --- a/src/SConscript +++ b/src/SConscript @@ -42,7 +42,8 @@ parsers = ['parsers/%s.c'%s for s in 'token', 'unimplemented', 'whitespace', - 'xor']] + 'xor', + 'value']] backends = ['backends/%s.c' % s for s in ['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']] diff --git a/src/datastructures.c b/src/datastructures.c index 141adcd..0feeb21 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -1,6 +1,7 @@ #include "internal.h" #include "hammer.h" #include "allocator.h" +#include "parsers/parser_internal.h" #include #include #include @@ -393,6 +394,28 @@ uint32_t h_djbhash(const uint8_t *buf, size_t len) { return hash; } +void h_symbol_put(HParseState *state, const char* key, void *value) { + if (!state->symbol_table) { + state->symbol_table = h_slist_new(state->arena); + h_slist_push(state->symbol_table, h_hashtable_new(state->arena, + h_eq_ptr, + h_hash_ptr)); + } + HHashTable *head = h_slist_top(state->symbol_table); + assert(!h_hashtable_present(head, key)); + h_hashtable_put(head, key, value); +} + +void* h_symbol_get(HParseState *state, const char* key) { + if (state->symbol_table) { + HHashTable *head = h_slist_top(state->symbol_table); + if (head) { + return h_hashtable_get(head, key); + } + } + return NULL; +} + HSArray *h_sarray_new(HAllocator *mm__, size_t size) { HSArray *ret = h_new(HSArray, 1); ret->capacity = size; diff --git a/src/hammer.h b/src/hammer.h index f0ac686..ca14dfe 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -611,6 +611,30 @@ HAMMER_FN_DECL_NOARG(HParser*, h_indirect); */ HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner); +/** + * The 'h_put_value' combinator stashes the result of the parser + * it wraps in a symbol table in the parse state, so that non- + * local actions and predicates can access this value. + * + * Try not to use this combinator if you can avoid it. + * + * Result token type: p's token type if name was not already in + * the symbol table. It is an error, and thus a NULL result (and + * parse failure), to attempt to rename a symbol. + */ +HAMMER_FN_DECL(HParser*, h_put_value, const HParser *p, const char* name); + +/** + * The 'h_get_value' combinator retrieves a named HParseResult that + * was previously stashed in the parse state. + * + * Try not to use this combinator if you can avoid it. + * + * Result token type: whatever the stashed HParseResult is, if + * present. If absent, NULL (and thus parse failure). + */ +HAMMER_FN_DECL(HParser*, h_get_value, const char* name); + /** * Free the memory allocated to an HParseResult when it is no longer needed. */ diff --git a/src/internal.h b/src/internal.h index 85cd4db..6c721eb 100644 --- a/src/internal.h +++ b/src/internal.h @@ -190,6 +190,7 @@ typedef struct HHashTable_ { * arena - the arena that has been allocated for the parse this state is in. * lr_stack - a stack of HLeftRec's, used in Warth's recursion * recursion_heads - table of recursion heads. Keys are HParserCacheKey's with only an HInputStream (parser can be NULL), values are HRecursionHead's. + * symbol_table - stack of tables of values that have been stashed in the context of this parse. * */ @@ -199,6 +200,7 @@ struct HParseState_ { HArena * arena; HSlist *lr_stack; HHashTable *recursion_heads; + HSlist *symbol_table; // its contents are HHashTables }; typedef struct HParserBackendVTable_ { @@ -316,6 +318,7 @@ HSlist* h_slist_new(HArena *arena); HSlist* h_slist_copy(HSlist *slist); void* h_slist_pop(HSlist *slist); void* h_slist_drop(HSlist *slist); +static inline void* h_slist_top(HSlist *sl) { return sl->head->elem; } void h_slist_push(HSlist *slist, void* item); bool h_slist_find(HSlist *slist, const void* item); HSlist* h_slist_remove_all(HSlist *slist, const void* item); @@ -347,8 +350,10 @@ bool h_eq_ptr(const void *p, const void *q); HHashValue h_hash_ptr(const void *p); uint32_t h_djbhash(const uint8_t *buf, size_t len); -typedef struct HCFSequence_ HCFSequence; +void h_symbol_put(HParseState *state, const char* key, void *value); +void* h_symbol_get(HParseState *state, const char* key); +typedef struct HCFSequence_ HCFSequence; struct HCFChoice_ { enum HCFChoiceType { diff --git a/src/parsers/value.c b/src/parsers/value.c new file mode 100644 index 0000000..531db7c --- /dev/null +++ b/src/parsers/value.c @@ -0,0 +1,69 @@ +#include "parser_internal.h" + +typedef struct { + const HParser* p; + const char* key; +} HStoredValue; + +/* Stash an HParseResult into a symbol table, so that it can be + retrieved and used later. */ + +static HParseResult* parse_put(void *env, HParseState* state) { + HStoredValue *s = (HStoredValue*)env; + if (s->p && s->key && !h_symbol_get(state, s->key)) { + HParseResult *tmp = h_do_parse(s->p, state); + if (tmp) { + h_symbol_put(state, s->key, tmp); + } + return tmp; + } + // otherwise there's no parser, no key, or key's stored already + return NULL; +} + +static const HParserVtable put_vt = { + .parse = parse_put, + .isValidRegular = h_false, + .isValidCF = h_false, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_put_value(const HParser* p, const char* name) { + return h_put_value__m(&system_allocator, p, name); +} + +HParser* h_put_value__m(HAllocator* mm__, const HParser* p, const char* name) { + HStoredValue *env = h_new(HStoredValue, 1); + env->p = p; + env->key = name; + return h_new_parser(mm__, &put_vt, env); +} + +/* Retrieve a stashed result from the symbol table. */ + +static HParseResult* parse_get(void *env, HParseState* state) { + HStoredValue *s = (HStoredValue*)env; + if (!s->p && s->key) { + return h_symbol_get(state, s->key); + } else { // either there's no key, or there was a parser here + return NULL; + } +} + +static const HParserVtable get_vt = { + .parse = parse_get, + .isValidRegular = h_false, + .isValidCF = h_false, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_get_value(const char* name) { + return h_get_value__m(&system_allocator, name); +} + +HParser* h_get_value__m(HAllocator* mm__, const char* name) { + HStoredValue *env = h_new(HStoredValue, 1); + env->p = NULL; + env->key = name; + return h_new_parser(mm__, &get_vt, env); +} diff --git a/src/t_parser.c b/src/t_parser.c index 4260a7c..59f8b6e 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -456,6 +456,26 @@ static void test_ambiguous(gconstpointer backend) { g_check_parse_failed(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d+", 2); } +HParsedToken* act_get(const HParseResult *p, void* user_data) { + HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + ret->token_type = TT_UINT; + ret->uint = 3 * (1 << p->ast->uint); + return ret; +} + +static void test_put_get(gconstpointer backend) { + HParser *p = h_sequence(h_put_value(h_uint8(), "size"), + h_token((const uint8_t*)"foo", 3), + h_length_value(h_action(h_get_value("size"), + act_get, NULL), + h_uint8()), + NULL); + // Yes, the quotes in the next line look weird. Leave them alone, + // this is to deal with how C strings handle hex-formatted chars. + g_check_parse_match(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcdef", 10, "(u0x1 <66.6f.6f> (u0x61 u0x62 u0x63 u0x64 u0x65 u0x66))"); + g_check_parse_failed(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcde", 9); +} + void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); @@ -502,6 +522,7 @@ void register_parser_tests(void) { //g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); + g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); From eb6c74cf892eba0995e47a031a6a583f1bf61ab2 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" Date: Mon, 12 May 2014 09:45:22 +0200 Subject: [PATCH 02/23] h_put_value and h_get_value done. --- src/SConscript | 3 +- src/datastructures.c | 23 +++++++++++++++ src/hammer.h | 24 +++++++++++++++ src/internal.h | 7 ++++- src/parsers/value.c | 69 ++++++++++++++++++++++++++++++++++++++++++++ src/t_parser.c | 21 ++++++++++++++ 6 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 src/parsers/value.c diff --git a/src/SConscript b/src/SConscript index 155a621..49d43eb 100644 --- a/src/SConscript +++ b/src/SConscript @@ -43,7 +43,8 @@ parsers = ['parsers/%s.c'%s for s in 'token', 'unimplemented', 'whitespace', - 'xor']] + 'xor', + 'value']] backends = ['backends/%s.c' % s for s in ['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']] diff --git a/src/datastructures.c b/src/datastructures.c index 141adcd..0feeb21 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -1,6 +1,7 @@ #include "internal.h" #include "hammer.h" #include "allocator.h" +#include "parsers/parser_internal.h" #include #include #include @@ -393,6 +394,28 @@ uint32_t h_djbhash(const uint8_t *buf, size_t len) { return hash; } +void h_symbol_put(HParseState *state, const char* key, void *value) { + if (!state->symbol_table) { + state->symbol_table = h_slist_new(state->arena); + h_slist_push(state->symbol_table, h_hashtable_new(state->arena, + h_eq_ptr, + h_hash_ptr)); + } + HHashTable *head = h_slist_top(state->symbol_table); + assert(!h_hashtable_present(head, key)); + h_hashtable_put(head, key, value); +} + +void* h_symbol_get(HParseState *state, const char* key) { + if (state->symbol_table) { + HHashTable *head = h_slist_top(state->symbol_table); + if (head) { + return h_hashtable_get(head, key); + } + } + return NULL; +} + HSArray *h_sarray_new(HAllocator *mm__, size_t size) { HSArray *ret = h_new(HSArray, 1); ret->capacity = size; diff --git a/src/hammer.h b/src/hammer.h index 7780873..947456d 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -621,6 +621,30 @@ HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner); */ HAMMER_FN_DECL(HParser*, h_with_endianness, char endianness, const HParser* p); +/** + * The 'h_put_value' combinator stashes the result of the parser + * it wraps in a symbol table in the parse state, so that non- + * local actions and predicates can access this value. + * + * Try not to use this combinator if you can avoid it. + * + * Result token type: p's token type if name was not already in + * the symbol table. It is an error, and thus a NULL result (and + * parse failure), to attempt to rename a symbol. + */ +HAMMER_FN_DECL(HParser*, h_put_value, const HParser *p, const char* name); + +/** + * The 'h_get_value' combinator retrieves a named HParseResult that + * was previously stashed in the parse state. + * + * Try not to use this combinator if you can avoid it. + * + * Result token type: whatever the stashed HParseResult is, if + * present. If absent, NULL (and thus parse failure). + */ +HAMMER_FN_DECL(HParser*, h_get_value, const char* name); + /** * Free the memory allocated to an HParseResult when it is no longer needed. */ diff --git a/src/internal.h b/src/internal.h index 85cd4db..6c721eb 100644 --- a/src/internal.h +++ b/src/internal.h @@ -190,6 +190,7 @@ typedef struct HHashTable_ { * arena - the arena that has been allocated for the parse this state is in. * lr_stack - a stack of HLeftRec's, used in Warth's recursion * recursion_heads - table of recursion heads. Keys are HParserCacheKey's with only an HInputStream (parser can be NULL), values are HRecursionHead's. + * symbol_table - stack of tables of values that have been stashed in the context of this parse. * */ @@ -199,6 +200,7 @@ struct HParseState_ { HArena * arena; HSlist *lr_stack; HHashTable *recursion_heads; + HSlist *symbol_table; // its contents are HHashTables }; typedef struct HParserBackendVTable_ { @@ -316,6 +318,7 @@ HSlist* h_slist_new(HArena *arena); HSlist* h_slist_copy(HSlist *slist); void* h_slist_pop(HSlist *slist); void* h_slist_drop(HSlist *slist); +static inline void* h_slist_top(HSlist *sl) { return sl->head->elem; } void h_slist_push(HSlist *slist, void* item); bool h_slist_find(HSlist *slist, const void* item); HSlist* h_slist_remove_all(HSlist *slist, const void* item); @@ -347,8 +350,10 @@ bool h_eq_ptr(const void *p, const void *q); HHashValue h_hash_ptr(const void *p); uint32_t h_djbhash(const uint8_t *buf, size_t len); -typedef struct HCFSequence_ HCFSequence; +void h_symbol_put(HParseState *state, const char* key, void *value); +void* h_symbol_get(HParseState *state, const char* key); +typedef struct HCFSequence_ HCFSequence; struct HCFChoice_ { enum HCFChoiceType { diff --git a/src/parsers/value.c b/src/parsers/value.c new file mode 100644 index 0000000..531db7c --- /dev/null +++ b/src/parsers/value.c @@ -0,0 +1,69 @@ +#include "parser_internal.h" + +typedef struct { + const HParser* p; + const char* key; +} HStoredValue; + +/* Stash an HParseResult into a symbol table, so that it can be + retrieved and used later. */ + +static HParseResult* parse_put(void *env, HParseState* state) { + HStoredValue *s = (HStoredValue*)env; + if (s->p && s->key && !h_symbol_get(state, s->key)) { + HParseResult *tmp = h_do_parse(s->p, state); + if (tmp) { + h_symbol_put(state, s->key, tmp); + } + return tmp; + } + // otherwise there's no parser, no key, or key's stored already + return NULL; +} + +static const HParserVtable put_vt = { + .parse = parse_put, + .isValidRegular = h_false, + .isValidCF = h_false, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_put_value(const HParser* p, const char* name) { + return h_put_value__m(&system_allocator, p, name); +} + +HParser* h_put_value__m(HAllocator* mm__, const HParser* p, const char* name) { + HStoredValue *env = h_new(HStoredValue, 1); + env->p = p; + env->key = name; + return h_new_parser(mm__, &put_vt, env); +} + +/* Retrieve a stashed result from the symbol table. */ + +static HParseResult* parse_get(void *env, HParseState* state) { + HStoredValue *s = (HStoredValue*)env; + if (!s->p && s->key) { + return h_symbol_get(state, s->key); + } else { // either there's no key, or there was a parser here + return NULL; + } +} + +static const HParserVtable get_vt = { + .parse = parse_get, + .isValidRegular = h_false, + .isValidCF = h_false, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_get_value(const char* name) { + return h_get_value__m(&system_allocator, name); +} + +HParser* h_get_value__m(HAllocator* mm__, const char* name) { + HStoredValue *env = h_new(HStoredValue, 1); + env->p = NULL; + env->key = name; + return h_new_parser(mm__, &get_vt, env); +} diff --git a/src/t_parser.c b/src/t_parser.c index a98eb11..2b66bff 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -495,6 +495,26 @@ static void test_endianness(gconstpointer backend) { g_check_parse_match(lb_u5_, be, "abcd", 4, "u0xc"); } +HParsedToken* act_get(const HParseResult *p, void* user_data) { + HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + ret->token_type = TT_UINT; + ret->uint = 3 * (1 << p->ast->uint); + return ret; +} + +static void test_put_get(gconstpointer backend) { + HParser *p = h_sequence(h_put_value(h_uint8(), "size"), + h_token((const uint8_t*)"foo", 3), + h_length_value(h_action(h_get_value("size"), + act_get, NULL), + h_uint8()), + NULL); + // Yes, the quotes in the next line look weird. Leave them alone, + // this is to deal with how C strings handle hex-formatted chars. + g_check_parse_match(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcdef", 10, "(u0x1 <66.6f.6f> (u0x61 u0x62 u0x63 u0x64 u0x65 u0x66))"); + g_check_parse_failed(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcde", 9); +} + void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); @@ -542,6 +562,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness); + g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); From 41dca83631d9272d193679a0a99a574a4fdd9933 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 18 Jun 2014 21:54:52 +0200 Subject: [PATCH 03/23] add h_permutation --- src/SConscript | 1 + src/hammer.h | 26 ++++++ src/parsers/permutation.c | 179 ++++++++++++++++++++++++++++++++++++++ src/t_parser.c | 54 ++++++++++++ 4 files changed, 260 insertions(+) create mode 100644 src/parsers/permutation.c diff --git a/src/SConscript b/src/SConscript index 49d43eb..38ace12 100644 --- a/src/SConscript +++ b/src/SConscript @@ -39,6 +39,7 @@ parsers = ['parsers/%s.c'%s for s in 'not', 'nothing', 'optional', + 'permutation', 'sequence', 'token', 'unimplemented', diff --git a/src/hammer.h b/src/hammer.h index 947456d..52058b5 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -437,6 +437,32 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, HPa */ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, HParser* p); +/** + * Given a null-terminated list of parsers, match a permutation phrase of these + * parsers, i.e. match all parsers exactly once in any order. + * + * If multiple orders would match, the lexically smallest permutation is used; + * in other words, at any step the remaining available parsers are tried in + * the order in which they appear in the arguments. + * + * As an exception, 'h_optional' parsers (actually those that return a result + * of token type TT_NONE) are detected and the algorithm will try to match them + * with a non-empty result. Specifically, a result of TT_NONE is treated as a + * non-match as long as any other argument matches. + * + * Other parsers that succeed on any input (e.g. h_many), that match the same + * input as others, or that match input which is a prefix of another match can + * lead to unexpected results and should probably not be used as arguments. + * + * The result is a sequence of the same length as the argument list. + * Each parser's result is placed at that parser's index in the arguments. + * The permutation itself (the order in which the arguments were matched) is + * not returned. + * + * Result token type: TT_SEQUENCE + */ +HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_permutation, HParser* p); + /** * Given two parsers, p1 and p2, this parser succeeds in the following * cases: diff --git a/src/parsers/permutation.c b/src/parsers/permutation.c new file mode 100644 index 0000000..564565a --- /dev/null +++ b/src/parsers/permutation.c @@ -0,0 +1,179 @@ +#include +#include "parser_internal.h" + +typedef struct { + size_t len; + HParser **p_array; +} HSequence; + +// main recursion, used by parse_permutation below +static int parse_permutation_tail(const HSequence *s, + HCountedArray *seq, + const size_t k, char *set, + HParseState *state) +{ + // shorthands + const size_t n = s->len; + HParser **ps = s->p_array; + + // trivial base case + if(k >= n) + return 1; + + HInputStream bak = state->input_stream; + + // try available parsers as first element of the permutation tail + HParseResult *match = NULL; + size_t i; + for(i=0; ielements[i] = (void *)match->ast; + + // treat empty optionals (TT_NONE) like failure here + if(match && match->ast && match->ast->token_type == TT_NONE) + match = NULL; + + if(match) { + // remove parser from active set + set[i] = 0; + + // parse the rest of the permutation phrase + if(parse_permutation_tail(s, seq, k+1, set, state)) { + // success + return 1; + } else { + // place parser back in active set and try the next + set[i] = 1; + } + } + + state->input_stream = bak; // rewind input + } + } + + // if all available parsers were empty optionals (TT_NONE), still succeed + for(i=0; ielements[i]; + if(!(tok && tok->token_type == TT_NONE)) + break; + } + } + if(i==n) // all were TT_NONE + return 1; + + // permutations exhausted + return 0; +} + +static HParseResult *parse_permutation(void *env, HParseState *state) +{ + const HSequence *s = env; + const size_t n = s->len; + + // current set of available (not yet matched) parsers + char *set = h_arena_malloc(state->arena, sizeof(char) * n); + memset(set, 1, sizeof(char) * n); + + // parse result + HCountedArray *seq = h_carray_new_sized(state->arena, n); + + if(parse_permutation_tail(s, seq, 0, set, state)) { + // success + // return the sequence of results + seq->used = n; + HParsedToken *tok = a_new(HParsedToken, 1); + tok->token_type = TT_SEQUENCE; + tok->seq = seq; + return make_result(state->arena, tok); + } else { + // no parse + // XXX free seq + return NULL; + } +} + + +static const HParserVtable permutation_vt = { + .parse = parse_permutation, + .isValidRegular = h_false, + .isValidCF = h_false, + .desugar = NULL, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_permutation(HParser* p, ...) { + va_list ap; + va_start(ap, p); + HParser* ret = h_permutation__mv(&system_allocator, p, ap); + va_end(ap); + return ret; +} + +HParser* h_permutation__m(HAllocator* mm__, HParser* p, ...) { + va_list ap; + va_start(ap, p); + HParser* ret = h_permutation__mv(mm__, p, ap); + va_end(ap); + return ret; +} + +HParser* h_permutation__v(HParser* p, va_list ap) { + return h_permutation__mv(&system_allocator, p, ap); +} + +HParser* h_permutation__mv(HAllocator* mm__, HParser* p, va_list ap_) { + va_list ap; + size_t len = 0; + HSequence *s = h_new(HSequence, 1); + + HParser *arg; + va_copy(ap, ap_); + do { + len++; + arg = va_arg(ap, HParser *); + } while (arg); + va_end(ap); + s->p_array = h_new(HParser *, len); + + va_copy(ap, ap_); + s->p_array[0] = p; + for (size_t i = 1; i < len; i++) { + s->p_array[i] = va_arg(ap, HParser *); + } while (arg); + va_end(ap); + + s->len = len; + return h_new_parser(mm__, &permutation_vt, s); +} + +HParser* h_permutation__a(void *args[]) { + return h_permutation__ma(&system_allocator, args); +} + +HParser* h_permutation__ma(HAllocator* mm__, void *args[]) { + size_t len = -1; // because do...while + const HParser *arg; + + do { + arg=((HParser **)args)[++len]; + } while(arg); + + HSequence *s = h_new(HSequence, 1); + s->p_array = h_new(HParser *, len); + + for (size_t i = 0; i < len; i++) { + s->p_array[i] = ((HParser **)args)[i]; + } + + s->len = len; + HParser *ret = h_new(HParser, 1); + ret->vtable = &permutation_vt; + ret->env = (void*)s; + ret->backend = PB_MIN; + return ret; +} diff --git a/src/t_parser.c b/src/t_parser.c index 2b66bff..191996c 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -515,6 +515,59 @@ static void test_put_get(gconstpointer backend) { g_check_parse_failed(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcde", 9); } +static void test_permutation(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + const HParser *p = h_permutation(h_ch('a'), h_ch('b'), h_ch('c'), NULL); + + g_check_parse_match(p, be, "abc", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "acb", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "bac", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "bca", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "cab", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "cba", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_failed(p, be, "a", 1); + g_check_parse_failed(p, be, "ab", 2); + g_check_parse_failed(p, be, "abb", 3); + + const HParser *po = h_permutation(h_ch('a'), h_ch('b'), h_optional(h_ch('c')), NULL); + + g_check_parse_match(po, be, "abc", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "acb", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "bac", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "bca", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "cab", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "cba", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "ab", 2, "(u0x61 u0x62 null)"); + g_check_parse_match(po, be, "ba", 2, "(u0x61 u0x62 null)"); + g_check_parse_failed(po, be, "a", 1); + g_check_parse_failed(po, be, "b", 1); + g_check_parse_failed(po, be, "c", 1); + g_check_parse_failed(po, be, "ca", 2); + g_check_parse_failed(po, be, "cb", 2); + g_check_parse_failed(po, be, "cc", 2); + g_check_parse_failed(po, be, "ccab", 4); + g_check_parse_failed(po, be, "ccc", 3); + + const HParser *po2 = h_permutation(h_optional(h_ch('c')), h_ch('a'), h_ch('b'), NULL); + + g_check_parse_match(po2, be, "abc", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "acb", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "bac", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "bca", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "cab", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "cba", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "ab", 2, "(null u0x61 u0x62)"); + g_check_parse_match(po2, be, "ba", 2, "(null u0x61 u0x62)"); + g_check_parse_failed(po2, be, "a", 1); + g_check_parse_failed(po2, be, "b", 1); + g_check_parse_failed(po2, be, "c", 1); + g_check_parse_failed(po2, be, "ca", 2); + g_check_parse_failed(po2, be, "cb", 2); + g_check_parse_failed(po2, be, "cc", 2); + g_check_parse_failed(po2, be, "ccab", 4); + g_check_parse_failed(po2, be, "ccc", 3); +} + void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); @@ -563,6 +616,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness); g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get); + g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); From e057e8cb1b32d0b6dcfcc9baca14afe182b6e3fe Mon Sep 17 00:00:00 2001 From: stewart Date: Wed, 29 Oct 2014 10:32:21 +0800 Subject: [PATCH 04/23] Scons now doesn't throw away /home/stewart/.nix-profile/lib/pkgconfig:/nix/var/nix/profiles/default/lib/pkgconfig:/run/current-system/sw/lib/pkgconfig, so NixOS can build it --- SConstruct | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SConstruct b/SConstruct index f74f8d5..4cf48a3 100644 --- a/SConstruct +++ b/SConstruct @@ -14,7 +14,7 @@ tools = ['default', 'scanreplace'] if 'dotnet' in ARGUMENTS.get('bindings', []): tools.append('csharp/mono') -env = Environment(ENV = {'PATH' : os.environ['PATH']}, +env = Environment(ENV = {'PATH' : os.environ['PATH'], 'PKG_CONFIG_PATH' : os.environ['PKG_CONFIG_PATH']}, variables = vars, tools=tools, toolpath=['tools']) From 011efe8cfbecb950c5d31af1e6709670e6ddfc25 Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sun, 30 Nov 2014 19:23:29 -0500 Subject: [PATCH 05/23] Mention user guide in README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 14f9b08..8880cb6 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,8 @@ Just `#include ` (also `#include ` if you plan t If you've installed Hammer system-wide, you can use `pkg-config` in the usual way. +For documentation, see the [user guide](https://github.com/UpstandingHackers/hammer/wiki/User-guide). + Examples ======== The `examples/` directory contains some simple examples, currently including: From f2434d5b1256610fde4d3d8ce942fe06a1485f7d Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sun, 7 Dec 2014 00:09:54 -0500 Subject: [PATCH 06/23] Move backend names into src/benchmark.c It's causing unreferenced-variable warnings, and isn't referenced anywhere aside from benchmark.c. If client code is likely to reference it, perhaps move it into another header, so people who include hammer.h don't have to refer to it to have warning-free code. --- src/benchmark.c | 8 ++++++++ src/hammer.h | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/benchmark.c b/src/benchmark.c index 408bfdb..75595b2 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -14,6 +14,14 @@ #include #endif +static const char* HParserBackendNames[] = { + "Packrat", + "Regular", + "LL(k)", + "LALR", + "GLR" +}; + void h_benchmark_clock_gettime(struct timespec *ts) { if (ts == NULL) return; diff --git a/src/hammer.h b/src/hammer.h index 52058b5..b0ce75d 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -46,14 +46,6 @@ typedef enum HParserBackend_ { PB_MAX = PB_GLR } HParserBackend; -static const char* HParserBackendNames[] = { - "Packrat", - "Regular", - "LL(k)", - "LALR", - "GLR" -}; - typedef enum HTokenType_ { // Before you change the explicit values of these, think of the poor bindings ;_; TT_NONE = 1, From 70fa455c0301435256e8bae10af85b6ccedfb3de Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sun, 30 Nov 2014 19:18:09 -0500 Subject: [PATCH 07/23] Fix compilation when PKG_CONFIG_PATH is not in environ --- SConstruct | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/SConstruct b/SConstruct index 4cf48a3..a8f7ce8 100644 --- a/SConstruct +++ b/SConstruct @@ -14,7 +14,11 @@ tools = ['default', 'scanreplace'] if 'dotnet' in ARGUMENTS.get('bindings', []): tools.append('csharp/mono') -env = Environment(ENV = {'PATH' : os.environ['PATH'], 'PKG_CONFIG_PATH' : os.environ['PKG_CONFIG_PATH']}, +envvars = {'PATH' : os.environ['PATH']} +if 'PKG_CONFIG_PATH' in os.environ: + envvars['PKG_CONFIG_PATH'] = os.environ['PKG_CONFIG_PATH'] + +env = Environment(ENV = envvars, variables = vars, tools=tools, toolpath=['tools']) From 8d5f00870f72929b1b1523c0ab6a23ecca73bae0 Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sun, 30 Nov 2014 19:08:36 -0500 Subject: [PATCH 08/23] Remove vacuous states in base64_sem?.c I don't think these actually affect correctness since there's no way for 0x40 or 0x60 to show up in a parse tree anyway, but they're confusing. --- examples/base64_sem1.c | 4 ++-- examples/base64_sem2.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 4da171f..afbbef8 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -29,9 +29,9 @@ HParsedToken *act_bsfdig(const HParseResult *p, void* user_data) uint8_t c = H_CAST_UINT(p->ast); - if(c >= 0x40 && c <= 0x5A) // A-Z + if(c >= 0x41 && c <= 0x5A) // A-Z res->uint = c - 0x41; - else if(c >= 0x60 && c <= 0x7A) // a-z + else if(c >= 0x61 && c <= 0x7A) // a-z res->uint = c - 0x61 + 26; else if(c >= 0x30 && c <= 0x39) // 0-9 res->uint = c - 0x30 + 52; diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c index b7a2263..b8f7b4a 100644 --- a/examples/base64_sem2.c +++ b/examples/base64_sem2.c @@ -31,9 +31,9 @@ uint8_t bsfdig_value(const HParsedToken *p) if(p && p->token_type == TT_UINT) { uint8_t c = p->uint; - if(c >= 0x40 && c <= 0x5A) // A-Z + if(c >= 0x41 && c <= 0x5A) // A-Z value = c - 0x41; - else if(c >= 0x60 && c <= 0x7A) // a-z + else if(c >= 0x61 && c <= 0x7A) // a-z value = c - 0x61 + 26; else if(c >= 0x30 && c <= 0x39) // 0-9 value = c - 0x30 + 52; From b1078c3d88a32c92517bc9379750c31f68228c9d Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sun, 7 Dec 2014 02:23:01 -0500 Subject: [PATCH 09/23] llvm doesn't care about your size They apparently removed alloc_size at some point (it was a no-op beforehand), causing the attribute to throw an error when clang compiles anything including allocator.h. --- src/allocator.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/allocator.h b/src/allocator.h index 803d89f..4a48693 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -33,11 +33,22 @@ typedef struct HAllocator_ { typedef struct HArena_ HArena ; // hidden implementation HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default... -#ifndef SWIG -void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) )); + +#if defined __llvm__ +# if __has_attribute(malloc) +# define ATTR_MALLOC(n) __attribute__((malloc)) +# else +# define ATTR_MALLOC(n) +# endif +#elif defined SWIG +# define ATTR_MALLOC(n) +#elif defined __GNUC__ +# define ATTR_MALLOC(n) __attribute__((malloc, alloc_size(2))) #else -void* h_arena_malloc(HArena *arena, size_t count); +# define ATTR_MALLOC(n) #endif + +void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2); void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers. void h_delete_arena(HArena *arena); From 5abe74f890825047a6117320d7dbe3590bfd5534 Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sat, 3 Jan 2015 16:35:56 -0500 Subject: [PATCH 10/23] Retab as though tabstop were 8 Tabs after the first non-tab character are crazymaking. I picked 8 because it wasn't 7 and caused the backslashes to line up on H_ACT_APPLY. --- src/glue.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/glue.h b/src/glue.h index 1fe6ce4..18e25ec 100644 --- a/src/glue.h +++ b/src/glue.h @@ -66,13 +66,13 @@ h_attr_bool(h_action(def, act_ ## rule, NULL), validate_ ## rule, NULL) #define H_AVRULE(rule, def) HParser *rule = \ h_action(h_attr_bool(def, validate_ ## rule, NULL), act_ ## rule, NULL) -#define H_ADRULE(rule, def, data) HParser *rule = \ +#define H_ADRULE(rule, def, data) HParser *rule = \ h_action(def, act_ ## rule, data) -#define H_VDRULE(rule, def, data) HParser *rule = \ +#define H_VDRULE(rule, def, data) HParser *rule = \ h_attr_bool(def, validate_ ## rule, data) -#define H_VADRULE(rule, def, data) HParser *rule = \ +#define H_VADRULE(rule, def, data) HParser *rule = \ h_attr_bool(h_action(def, act_ ## rule, data), validate_ ## rule, data) -#define H_AVDRULE(rule, def, data) HParser *rule = \ +#define H_AVDRULE(rule, def, data) HParser *rule = \ h_action(h_attr_bool(def, validate_ ## rule, data), act_ ## rule, data) @@ -109,8 +109,8 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data); // Define 'myaction' as a specialization of 'paction' by supplying the leading // parameters. #define H_ACT_APPLY(myaction, paction, ...) \ - HParsedToken *myaction(const HParseResult *p, void* user_data) { \ - return paction(__VA_ARGS__, p, user_data); \ + HParsedToken *myaction(const HParseResult *p, void* user_data) { \ + return paction(__VA_ARGS__, p, user_data); \ } From 2dad0c48b41408ef6e1a23cec9552f3bc36bbcef Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sat, 3 Jan 2015 16:42:45 -0500 Subject: [PATCH 11/23] H_VALIDATE_APPLY macro I've found this especially useful in combination with my own _attr_uint_const for things like flags and type specifiers. It's possible that its usefulness might be diminished significantly if there were a built-in bitfield constant parser -- that certainly would eliminate all of my current uses of it -- but it still seems nicely symmetric with H_ACT_APPLY. --- src/glue.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/glue.h b/src/glue.h index 18e25ec..6c1c56c 100644 --- a/src/glue.h +++ b/src/glue.h @@ -11,7 +11,8 @@ // // A few standard semantic actions are defined below. The H_ACT_APPLY macro // allows semantic actions to be defined by "partial application" of -// a generic action to fixed paramters. +// a generic action to fixed paramters. H_VALIDATE_APPLY is similar for +// h_atter_bool. // // The definition of more complex semantic actions will usually consist of // extracting data from the given parse tree and constructing a token of custom @@ -113,6 +114,12 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data); return paction(__VA_ARGS__, p, user_data); \ } +// Similar, but for validations. +#define H_VALIDATE_APPLY(myvalidation, pvalidation, ...) \ + bool myvalidation(HParseResult* p, void* user_data) { \ + return pvalidation(__VA_ARGS__, p, user_data); \ + } + // // Working with HParsedTokens From af73181cf4fbd9b6ff269bea40a0f0f7c3c62113 Mon Sep 17 00:00:00 2001 From: TQ Hirsch Date: Sun, 4 Jan 2015 04:00:09 +0100 Subject: [PATCH 12/23] Fix #118 NEWS: * Switching endianness mid-byte no longer potentially re-reads bytes. * bit_offset now consistently refers to the number of bits already read. * HParsedTokens now have a bit_length field; this is a size_t. This may be removed for memory reasons. The bit writer has not yet been updated to match; the result of switching bit writer endianness in the middle of a byte remains undefined. --- src/SConscript | 3 ++- src/backends/packrat.c | 12 ++++++----- src/bitreader.c | 37 +++++++++++++++++----------------- src/hammer.c | 2 +- src/hammer.h | 1 + src/internal.h | 5 +++++ src/parsers/endianness.c | 16 +++------------ src/parsers/parser_internal.h | 1 + src/t_bitreader.c | 15 +++++++------- src/t_bitwriter.c | 2 +- src/t_regression.c | 38 +++++++++++++++++++++++++++++++++++ src/test_suite.c | 2 ++ 12 files changed, 86 insertions(+), 48 deletions(-) create mode 100644 src/t_regression.c diff --git a/src/SConscript b/src/SConscript index 38ace12..386a9a2 100644 --- a/src/SConscript +++ b/src/SConscript @@ -69,7 +69,8 @@ ctests = ['t_benchmark.c', 't_bitwriter.c', 't_parser.c', 't_grammar.c', - 't_misc.c'] + 't_misc.c', + 't_regression.c'] libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts) libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index c1e422e..33082c6 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -33,11 +33,13 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa if (tmp_res) { tmp_res->arena = state->arena; if (!state->input_stream.overrun) { - tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3); - if (state->input_stream.endianness & BIT_BIG_ENDIAN) - tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset; - else - tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset; + size_t bit_length = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak); + if (tmp_res->bit_length == 0) { // Don't modify if forwarding. + tmp_res->bit_length = bit_length; + } + if (tmp_res->ast && tmp_res->ast->bit_length != 0) { + ((HParsedToken*)(tmp_res->ast))->bit_length = bit_length; + } } else tmp_res->bit_length = 0; } diff --git a/src/bitreader.c b/src/bitreader.c index df8c4c3..3627df5 100644 --- a/src/bitreader.c +++ b/src/bitreader.c @@ -39,10 +39,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { if (bits_left <= 64) { // Large enough to handle any valid count, but small enough that overflow isn't a problem. // not in danger of overflowing, so add in bits // add in number of bits... - if (state->endianness & BIT_BIG_ENDIAN) - bits_left = (bits_left << 3) - 8 + state->bit_offset; - else - bits_left = (bits_left << 3) - state->bit_offset; + bits_left = (bits_left << 3) - state->bit_offset - state->margin; if (bits_left < count) { if (state->endianness & BYTE_BIG_ENDIAN) final_shift = count - bits_left; @@ -54,7 +51,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { final_shift = 0; } - if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0) { + if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0 && (state->margin == 0)) { // fast path if (state->endianness & BYTE_BIG_ENDIAN) { while (count > 0) { @@ -73,22 +70,24 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { int segment, segment_len; // Read a segment... if (state->endianness & BIT_BIG_ENDIAN) { - if (count >= state->bit_offset) { - segment_len = state->bit_offset; - state->bit_offset = 8; - segment = state->input[state->index] & ((1 << segment_len) - 1); - state->index++; - } else { - segment_len = count; - state->bit_offset -= count; - segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); - } - } else { // BIT_LITTLE_ENDIAN - if (count + state->bit_offset >= 8) { - segment_len = 8 - state->bit_offset; - segment = (state->input[state->index] >> state->bit_offset); + if (count + state->bit_offset + state->margin >= 8) { + segment_len = 8 - state->bit_offset - state->margin; + segment = (state->input[state->index] >> state->margin) & ((1 << segment_len) - 1); state->index++; state->bit_offset = 0; + state->margin = 0; + } else { + segment_len = count; + state->bit_offset += count; + segment = (state->input[state->index] >> (8 - state->bit_offset)) & ((1 << segment_len) - 1); + } + } else { // BIT_LITTLE_ENDIAN + if (count + state->bit_offset + state->margin >= 8) { + segment_len = 8 - state->bit_offset - state->margin; + segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); + state->index++; + state->bit_offset = 0; + state->margin = 0; } else { segment_len = count; segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); diff --git a/src/hammer.c b/src/hammer.c index 2456bdc..6bb9ebb 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -52,7 +52,7 @@ HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* // Set up a parse state... HInputStream input_stream = { .index = 0, - .bit_offset = 8, + .bit_offset = 0, .overrun = 0, .endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN, .length = length, diff --git a/src/hammer.h b/src/hammer.h index b0ce75d..1c02b05 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -99,6 +99,7 @@ typedef struct HParsedToken_ { HTokenData token_data; #endif size_t index; + size_t bit_length; char bit_offset; } HParsedToken; diff --git a/src/internal.h b/src/internal.h index 6c721eb..0c4d4dc 100644 --- a/src/internal.h +++ b/src/internal.h @@ -70,6 +70,8 @@ typedef struct HInputStream_ { size_t index; size_t length; char bit_offset; + char margin; // The number of bits on the end that is being read + // towards that should be ignored. char endianness; char overrun; } HInputStream; @@ -295,6 +297,9 @@ extern HParserBackendVTable h__glr_backend_vtable; // TODO(thequux): Set symbol visibility for these functions so that they aren't exported. int64_t h_read_bits(HInputStream* state, int count, char signed_p); +static inline size_t h_input_stream_pos(HInputStream* state) { + return state->index * 8 + state->bit_offset + state->margin; +} // need to decide if we want to make this public. HParseResult* h_do_parse(const HParser* parser, HParseState *state); void put_cached(HParseState *ps, const HParser *p, HParseResult *cached); diff --git a/src/parsers/endianness.c b/src/parsers/endianness.c index 091e4c0..e3f53ab 100644 --- a/src/parsers/endianness.c +++ b/src/parsers/endianness.c @@ -11,19 +11,9 @@ static void switch_bit_order(HInputStream *input) { assert(input->bit_offset <= 8); - if((input->bit_offset % 8) != 0) { - // switching bit order in the middle of a byte - // we leave bit_offset untouched. this means that something like - // le(bits(5)),le(bits(3)) - // is equivalent to - // le(bits(5),bits(3)) . - // on the other hand, - // le(bits(5)),be(bits(5)) - // will read the same 5 bits twice and discard the top 3. - } else { - // flip offset (0 <-> 8) - input->bit_offset = 8 - input->bit_offset; - } + char tmp = input->bit_offset; + input->bit_offset = input->margin; + input->margin = tmp; } static HParseResult *parse_endianness(void *env, HParseState *state) diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h index ec97dd1..9a3b6de 100644 --- a/src/parsers/parser_internal.h +++ b/src/parsers/parser_internal.h @@ -18,6 +18,7 @@ static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) { HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult)); ret->ast = tok; ret->arena = arena; + ret->bit_length = 0; // This way it gets overridden in h_do_parse return ret; } diff --git a/src/t_bitreader.c b/src/t_bitreader.c index 40a7bb9..65235c1 100644 --- a/src/t_bitreader.c +++ b/src/t_bitreader.c @@ -4,14 +4,14 @@ #include "internal.h" #include "test_suite.h" -#define MK_INPUT_STREAM(buf,len,endianness_) \ +#define MK_INPUT_STREAM(buf,len,endianness_) \ { \ - .input = (uint8_t*)buf, \ - .length = len, \ - .index = 0, \ - .bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \ - .endianness = endianness_ \ - } + .input = (uint8_t*)buf, \ + .length = len, \ + .index = 0, \ + .bit_offset = 0, \ + .endianness = endianness_ \ + } static void test_bitreader_ints(void) { @@ -56,7 +56,6 @@ static void test_offset_largebits_le(void) { g_check_cmp_int32(h_read_bits(&is, 11, false), ==, 0x2D3); } - void register_bitreader_tests(void) { g_test_add_func("/core/bitreader/be", test_bitreader_be); g_test_add_func("/core/bitreader/le", test_bitreader_le); diff --git a/src/t_bitwriter.c b/src/t_bitwriter.c index 747c86f..6b9b705 100644 --- a/src/t_bitwriter.c +++ b/src/t_bitwriter.c @@ -24,7 +24,7 @@ void run_bitwriter_test(bitwriter_test_elem data[], char flags) { .input = buf, .index = 0, .length = len, - .bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0, + .bit_offset = 0, .endianness = flags, .overrun = 0 }; diff --git a/src/t_regression.c b/src/t_regression.c new file mode 100644 index 0000000..e74f16b --- /dev/null +++ b/src/t_regression.c @@ -0,0 +1,38 @@ +#include +#include +#include "glue.h" +#include "hammer.h" +#include "test_suite.h" + +static void test_bug118(void) { + // https://github.com/UpstandingHackers/hammer/issues/118 + // Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5 + + HParseResult* p; + const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A"; + +#define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN) + H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false))); + H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false))); +#undef MY_ENDIAN + + H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL)); + + H_RULE(header_ok, h_sequence(nibble, nibble, NULL)); + H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL)); + + H_RULE(parser_ok, h_sequence(header_ok, samples, NULL)); + H_RULE(parser_weird, h_sequence(header_weird, samples, NULL)); + + + p = h_parse(parser_weird, input, 6); + g_check_cmp_int32(p->bit_length, ==, 44); + h_parse_result_free(p); + p = h_parse(parser_ok, input, 6); + g_check_cmp_int32(p->bit_length, ==, 40); + h_parse_result_free(p); +} + +void register_regression_tests(void) { + g_test_add_func("/core/regression/bug118", test_bug118); +} diff --git a/src/test_suite.c b/src/test_suite.c index 81f86b2..cba18e8 100644 --- a/src/test_suite.c +++ b/src/test_suite.c @@ -25,6 +25,7 @@ extern void register_parser_tests(); extern void register_grammar_tests(); extern void register_misc_tests(); extern void register_benchmark_tests(); +extern void register_regression_tests(); int main(int argc, char** argv) { g_test_init(&argc, &argv, NULL); @@ -35,6 +36,7 @@ int main(int argc, char** argv) { register_parser_tests(); register_grammar_tests(); register_misc_tests(); + register_regression_tests(); if (g_test_slow() || g_test_perf()) register_benchmark_tests(); From a0104776ed81c328a13c49f5cbf33c2bbb8e4218 Mon Sep 17 00:00:00 2001 From: Steven Dee Date: Sun, 4 Jan 2015 21:37:21 -0500 Subject: [PATCH 13/23] Free res_unamb --- src/benchmark.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/benchmark.c b/src/benchmark.c index 75595b2..ce416da 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include "hammer.h" @@ -120,6 +121,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest ret->results[backend].failed_testcases++; } h_parse_result_free(res); + free(res_unamb); } if (tc_failed > 0) { From 42d51ed479ee56c88133ea8268b5b8f5e0aad46e Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 23 Jan 2015 18:59:30 +0100 Subject: [PATCH 14/23] add monadic bind combinator h_bind --- src/SConscript | 1 + src/hammer.h | 42 ++++++++++++++++++++++++++++++------------ src/parsers/bind.c | 42 ++++++++++++++++++++++++++++++++++++++++++ src/t_parser.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 12 deletions(-) create mode 100644 src/parsers/bind.c diff --git a/src/SConscript b/src/SConscript index 38ace12..b7e6159 100644 --- a/src/SConscript +++ b/src/SConscript @@ -22,6 +22,7 @@ parsers = ['parsers/%s.c'%s for s in ['action', 'and', 'attr_bool', + 'bind', 'bits', 'butnot', 'ch', diff --git a/src/hammer.h b/src/hammer.h index b0ce75d..716ab6d 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -122,6 +122,19 @@ typedef struct HParseResult_ { */ typedef struct HBitWriter_ HBitWriter; +typedef struct HCFChoice_ HCFChoice; +typedef struct HRVMProg_ HRVMProg; +typedef struct HParserVtable_ HParserVtable; + +// TODO: Make this internal +typedef struct HParser_ { + const HParserVtable *vtable; + HParserBackend backend; + void* backend_data; + void *env; + HCFChoice *desugared; /* if the parser can be desugared, its desugared form */ +} HParser; + /** * Type of an action to apply to an AST, used in the action() parser. * It can be any (user-defined) function that takes a HParseResult* @@ -141,18 +154,12 @@ typedef HParsedToken* (*HAction)(const HParseResult *p, void* user_data); */ typedef bool (*HPredicate)(HParseResult *p, void* user_data); -typedef struct HCFChoice_ HCFChoice; -typedef struct HRVMProg_ HRVMProg; -typedef struct HParserVtable_ HParserVtable; - -// TODO: Make this internal -typedef struct HParser_ { - const HParserVtable *vtable; - HParserBackend backend; - void* backend_data; - void *env; - HCFChoice *desugared; /* if the parser can be desugared, its desugared form */ -} HParser; +/** + * Type of a parser that depends on the result of a previous parser, + * used in h_bind(). The void* argument is passed through from h_bind() and can + * be used to arbitrarily parameterize the function further. + */ +typedef HParser* (*HContinuation)(const HParsedToken *x, void *env); // {{{ Stuff for benchmarking typedef struct HParserTestcase_ { @@ -663,6 +670,17 @@ HAMMER_FN_DECL(HParser*, h_put_value, const HParser *p, const char* name); */ HAMMER_FN_DECL(HParser*, h_get_value, const char* name); +/** + * Monadic bind for HParsers, i.e.: + * Sequencing where later parsers may depend on the result(s) of earlier ones. + * + * Run p and call the result x. Then run k(env,x). Fail if p fails or if + * k(env,x) fails. + * + * Result: the result of k(x,env). + */ +HAMMER_FN_DECL(HParser*, h_bind, const HParser *p, HContinuation k, void *env); + /** * Free the memory allocated to an HParseResult when it is no longer needed. */ diff --git a/src/parsers/bind.c b/src/parsers/bind.c new file mode 100644 index 0000000..ccbf6da --- /dev/null +++ b/src/parsers/bind.c @@ -0,0 +1,42 @@ +#include "parser_internal.h" + +typedef struct { + const HParser *p; + HContinuation k; + void *env; +} BindEnv; + +static HParseResult *parse_bind(void *be_, HParseState *state) { + BindEnv *be = be_; + + HParseResult *res = h_do_parse(be->p, state); + if(!res) + return NULL; + + HParser *kx = be->k(res->ast, be->env); + return h_do_parse(kx, state); +} + +static const HParserVtable bind_vt = { + .parse = parse_bind, + .isValidRegular = h_false, + .isValidCF = h_false, + .compile_to_rvm = h_not_regular, +}; + +HParser *h_bind(const HParser *p, HContinuation k, void *env) +{ + return h_bind__m(&system_allocator, p, k, env); +} + +HParser *h_bind__m(HAllocator *mm__, + const HParser *p, HContinuation k, void *env) +{ + BindEnv *be = h_new(BindEnv, 1); + + be->p = p; + be->k = k; + be->env = env; + + return h_new_parser(mm__, &bind_vt, be); +} diff --git a/src/t_parser.c b/src/t_parser.c index 191996c..25495e3 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -568,6 +568,34 @@ static void test_permutation(gconstpointer backend) { g_check_parse_failed(po2, be, "ccc", 3); } +static HParser *f_test_bind(const HParsedToken *p, void *env) { + uint8_t one = (uintptr_t)env; + + assert(p); + assert(p->token_type == TT_SEQUENCE); + + int v=0; + for(size_t i=0; iseq->used; i++) { + assert(p->seq->elements[i]->token_type == TT_UINT); + v = v*10 + p->seq->elements[i]->uint - '0'; + } + + return h_ch(one - 1 + v); +} +static void test_bind(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + const HParser *digit = h_ch_range('0', '9'); + const HParser *nat = h_many1(digit); + const HParser *p = h_bind(nat, f_test_bind, (void *)(uintptr_t)'a'); + + g_check_parse_match(p, be, "1a", 2, "u0x61"); + g_check_parse_match(p, be, "2b", 2, "u0x62"); + g_check_parse_match(p, be, "26z", 3, "u0x7a"); + g_check_parse_failed(p, be, "1x", 2); + g_check_parse_failed(p, be, "29y", 3); + g_check_parse_failed(p, be, "@", 1); +} + void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); @@ -617,6 +645,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness); g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get); g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation); + g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); From 78266af453d5d9e72235a5c7acc8d3641550f6e3 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 23 Jan 2015 22:06:43 +0100 Subject: [PATCH 15/23] allow h_bind's function argument to return NULL as a shortcut for h_nothing_p() --- src/hammer.h | 2 +- src/parsers/bind.c | 3 +++ src/t_parser.c | 27 +++++++++++++++++---------- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/hammer.h b/src/hammer.h index 716ab6d..c974e1f 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -675,7 +675,7 @@ HAMMER_FN_DECL(HParser*, h_get_value, const char* name); * Sequencing where later parsers may depend on the result(s) of earlier ones. * * Run p and call the result x. Then run k(env,x). Fail if p fails or if - * k(env,x) fails. + * k(env,x) fails or if k(env,x) is NULL. * * Result: the result of k(x,env). */ diff --git a/src/parsers/bind.c b/src/parsers/bind.c index ccbf6da..de0a8ec 100644 --- a/src/parsers/bind.c +++ b/src/parsers/bind.c @@ -14,6 +14,9 @@ static HParseResult *parse_bind(void *be_, HParseState *state) { return NULL; HParser *kx = be->k(res->ast, be->env); + if(!kx) + return NULL; + return h_do_parse(kx, state); } diff --git a/src/t_parser.c b/src/t_parser.c index 25495e3..c16e384 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -569,18 +569,23 @@ static void test_permutation(gconstpointer backend) { } static HParser *f_test_bind(const HParsedToken *p, void *env) { - uint8_t one = (uintptr_t)env; - - assert(p); - assert(p->token_type == TT_SEQUENCE); + uint8_t one = (uintptr_t)env; + + assert(p); + assert(p->token_type == TT_SEQUENCE); - int v=0; - for(size_t i=0; iseq->used; i++) { - assert(p->seq->elements[i]->token_type == TT_UINT); - v = v*10 + p->seq->elements[i]->uint - '0'; - } + int v=0; + for(size_t i=0; iseq->used; i++) { + assert(p->seq->elements[i]->token_type == TT_UINT); + v = v*10 + p->seq->elements[i]->uint - '0'; + } - return h_ch(one - 1 + v); + if(v > 26) + return h_nothing_p(); // fail + else if(v > 127) + return NULL; // equivalent to the above + else + return h_ch(one - 1 + v); } static void test_bind(gconstpointer backend) { HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); @@ -594,6 +599,8 @@ static void test_bind(gconstpointer backend) { g_check_parse_failed(p, be, "1x", 2); g_check_parse_failed(p, be, "29y", 3); g_check_parse_failed(p, be, "@", 1); + g_check_parse_failed(p, be, "27{", 3); + g_check_parse_failed(p, be, "272{", 4); } void register_parser_tests(void) { From c9b2957832c67df2c495a0046c468b4b0f69e5e0 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 30 Jan 2015 20:35:14 +0100 Subject: [PATCH 16/23] add g_check_cmp_int --- src/test_suite.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test_suite.h b/src/test_suite.h index 1f983c7..9a58a20 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -212,6 +212,7 @@ +#define g_check_cmp_int(n1, op, n2) g_check_inttype("%d", int, n1, op, n2) #define g_check_cmp_int32(n1, op, n2) g_check_inttype("%d", int32_t, n1, op, n2) #define g_check_cmp_int64(n1, op, n2) g_check_inttype("%" PRId64, int64_t, n1, op, n2) #define g_check_cmp_uint32(n1, op, n2) g_check_inttype("%u", uint32_t, n1, op, n2) From cbc7358b66ed3b7885045c43072f29fe6945f023 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 30 Jan 2015 20:35:35 +0100 Subject: [PATCH 17/23] add a test that shows a bug in h_seq_index_path --- src/t_misc.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/t_misc.c b/src/t_misc.c index 92c2b32..31d82ea 100644 --- a/src/t_misc.c +++ b/src/t_misc.c @@ -2,6 +2,8 @@ #include #include "test_suite.h" #include "hammer.h" +#include "internal.h" +#include "glue.h" static void test_tt_user(void) { g_check_cmp_int32(TT_USER, >, TT_NONE); @@ -29,7 +31,28 @@ static void test_tt_registry(void) { g_check_cmp_int32(h_get_token_type_number("com.upstandinghackers.test.unkown_token_type"), ==, 0); } +static void test_seq_index_path(void) { + HArena *arena = h_new_arena(&system_allocator, 0); + + HParsedToken *seq = h_make_seqn(arena, 1); + HParsedToken *seq2 = h_make_seqn(arena, 2); + HParsedToken *tok1 = h_make_uint(arena, 41); + HParsedToken *tok2 = h_make_uint(arena, 42); + + seq->seq->elements[0] = seq2; + seq->seq->used = 1; + seq2->seq->elements[0] = tok1; + seq2->seq->elements[1] = tok2; + seq2->seq->used = 2; + + g_check_cmp_int(h_seq_index_path(seq, 0, -1)->token_type, ==, TT_SEQUENCE); + g_check_cmp_int(h_seq_index_path(seq, 0, 0, -1)->token_type, ==, TT_UINT); + g_check_cmp_int64(h_seq_index_path(seq, 0, 0, -1)->uint, ==, 41); + g_check_cmp_int64(h_seq_index_path(seq, 0, 1, -1)->uint, ==, 42); +} + void register_misc_tests(void) { g_test_add_func("/core/misc/tt_user", test_tt_user); g_test_add_func("/core/misc/tt_registry", test_tt_registry); + g_test_add_func("/core/misc/seq_index_path", test_seq_index_path); } From 341123e2ea4b2fae8a4e7de6d991a57702d017f4 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 30 Jan 2015 20:36:37 +0100 Subject: [PATCH 18/23] descend properly in h_seq_index_path --- src/glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glue.c b/src/glue.c index c2d915a..cb3a7ce 100644 --- a/src/glue.c +++ b/src/glue.c @@ -173,7 +173,7 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va) int j; while((j = va_arg(va, int)) >= 0) - ret = h_seq_index(p, j); + ret = h_seq_index(ret, j); return ret; } From 12771a5655b0739aec4443802d57fac2ce9aa89b Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Fri, 30 Jan 2015 20:42:11 +0100 Subject: [PATCH 19/23] oh, there's t_regression.c now, neat. --- src/t_misc.c | 23 ----------------------- src/t_regression.c | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/t_misc.c b/src/t_misc.c index 31d82ea..92c2b32 100644 --- a/src/t_misc.c +++ b/src/t_misc.c @@ -2,8 +2,6 @@ #include #include "test_suite.h" #include "hammer.h" -#include "internal.h" -#include "glue.h" static void test_tt_user(void) { g_check_cmp_int32(TT_USER, >, TT_NONE); @@ -31,28 +29,7 @@ static void test_tt_registry(void) { g_check_cmp_int32(h_get_token_type_number("com.upstandinghackers.test.unkown_token_type"), ==, 0); } -static void test_seq_index_path(void) { - HArena *arena = h_new_arena(&system_allocator, 0); - - HParsedToken *seq = h_make_seqn(arena, 1); - HParsedToken *seq2 = h_make_seqn(arena, 2); - HParsedToken *tok1 = h_make_uint(arena, 41); - HParsedToken *tok2 = h_make_uint(arena, 42); - - seq->seq->elements[0] = seq2; - seq->seq->used = 1; - seq2->seq->elements[0] = tok1; - seq2->seq->elements[1] = tok2; - seq2->seq->used = 2; - - g_check_cmp_int(h_seq_index_path(seq, 0, -1)->token_type, ==, TT_SEQUENCE); - g_check_cmp_int(h_seq_index_path(seq, 0, 0, -1)->token_type, ==, TT_UINT); - g_check_cmp_int64(h_seq_index_path(seq, 0, 0, -1)->uint, ==, 41); - g_check_cmp_int64(h_seq_index_path(seq, 0, 1, -1)->uint, ==, 42); -} - void register_misc_tests(void) { g_test_add_func("/core/misc/tt_user", test_tt_user); g_test_add_func("/core/misc/tt_registry", test_tt_registry); - g_test_add_func("/core/misc/seq_index_path", test_seq_index_path); } diff --git a/src/t_regression.c b/src/t_regression.c index e74f16b..e342546 100644 --- a/src/t_regression.c +++ b/src/t_regression.c @@ -3,6 +3,7 @@ #include "glue.h" #include "hammer.h" #include "test_suite.h" +#include "internal.h" static void test_bug118(void) { // https://github.com/UpstandingHackers/hammer/issues/118 @@ -33,6 +34,27 @@ static void test_bug118(void) { h_parse_result_free(p); } +static void test_seq_index_path(void) { + HArena *arena = h_new_arena(&system_allocator, 0); + + HParsedToken *seq = h_make_seqn(arena, 1); + HParsedToken *seq2 = h_make_seqn(arena, 2); + HParsedToken *tok1 = h_make_uint(arena, 41); + HParsedToken *tok2 = h_make_uint(arena, 42); + + seq->seq->elements[0] = seq2; + seq->seq->used = 1; + seq2->seq->elements[0] = tok1; + seq2->seq->elements[1] = tok2; + seq2->seq->used = 2; + + g_check_cmp_int(h_seq_index_path(seq, 0, -1)->token_type, ==, TT_SEQUENCE); + g_check_cmp_int(h_seq_index_path(seq, 0, 0, -1)->token_type, ==, TT_UINT); + g_check_cmp_int64(h_seq_index_path(seq, 0, 0, -1)->uint, ==, 41); + g_check_cmp_int64(h_seq_index_path(seq, 0, 1, -1)->uint, ==, 42); +} + void register_regression_tests(void) { g_test_add_func("/core/regression/bug118", test_bug118); + g_test_add_func("/core/regression/seq_index_path", test_seq_index_path); } From 2eaf5d90526ed09fbcd39c23a953efa726746746 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Thu, 12 Feb 2015 01:40:45 +0100 Subject: [PATCH 20/23] add some missing type conversions to make h_read_bits work with count>32 --- src/bitreader.c | 4 ++-- src/t_regression.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/bitreader.c b/src/bitreader.c index 3627df5..fe21e43 100644 --- a/src/bitreader.c +++ b/src/bitreader.c @@ -62,7 +62,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { int i; for (i = 0; count > 0; i += 8) { count -= 8; - out |= state->input[state->index++] << i; + out |= (int64_t)state->input[state->index++] << i; } } } else { @@ -99,7 +99,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { if (state->endianness & BYTE_BIG_ENDIAN) { out = out << segment_len | segment; } else { // BYTE_LITTLE_ENDIAN - out |= segment << offset; + out |= (int64_t)segment << offset; offset += segment_len; } count -= segment_len; diff --git a/src/t_regression.c b/src/t_regression.c index e342546..d05cbde 100644 --- a/src/t_regression.c +++ b/src/t_regression.c @@ -54,7 +54,49 @@ static void test_seq_index_path(void) { g_check_cmp_int64(h_seq_index_path(seq, 0, 1, -1)->uint, ==, 42); } +#define MK_INPUT_STREAM(buf,len,endianness_) \ + { \ + .input = (uint8_t*)buf, \ + .length = len, \ + .index = 0, \ + .bit_offset = 0, \ + .endianness = endianness_ \ + } + +static void test_read_bits_48(void) { + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 32, false), ==, 0x78563412); + g_check_cmp_int64(h_read_bits(&is, 16, false), ==, 0xBC9A); + } + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 31, false), ==, 0x78563412); + g_check_cmp_int64(h_read_bits(&is, 17, false), ==, 0x17934); + } + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 33, false), ==, 0x78563412); + g_check_cmp_int64(h_read_bits(&is, 17, false), ==, 0x5E4D); + } + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 36, false), ==, 0xA78563412); + g_check_cmp_int64(h_read_bits(&is, 12, false), ==, 0xBC9); + } + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 40, false), ==, 0x9A78563412); + g_check_cmp_int64(h_read_bits(&is, 8, false), ==, 0xBC); + } + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 48, false), ==, 0xBC9A78563412); + } +} + void register_regression_tests(void) { g_test_add_func("/core/regression/bug118", test_bug118); g_test_add_func("/core/regression/seq_index_path", test_seq_index_path); + g_test_add_func("/core/regression/read_bits_48", test_read_bits_48); } From 1ce0ef9b8a3fb1bfc4966dd097e8c00ba66031e0 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Mon, 23 Feb 2015 14:10:29 +0100 Subject: [PATCH 21/23] pass a temporary arena allocator to the continuation in h_bind --- src/hammer.h | 7 ++++++- src/parsers/bind.c | 44 ++++++++++++++++++++++++++++++++++++++++---- src/t_parser.c | 10 +++++----- 3 files changed, 51 insertions(+), 10 deletions(-) diff --git a/src/hammer.h b/src/hammer.h index 28167e7..f893f10 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -159,8 +159,13 @@ typedef bool (*HPredicate)(HParseResult *p, void* user_data); * Type of a parser that depends on the result of a previous parser, * used in h_bind(). The void* argument is passed through from h_bind() and can * be used to arbitrarily parameterize the function further. + * + * The HAllocator* argument gives access to temporary memory and is to be used + * for any allocations inside the function. Specifically, construction of any + * HParsers should use the '__m' combinator variants with the given allocator. + * Anything allocated thus will be freed by 'h_bind'. */ -typedef HParser* (*HContinuation)(const HParsedToken *x, void *env); +typedef HParser* (*HContinuation)(HAllocator *mm__, const HParsedToken *x, void *env); // {{{ Stuff for benchmarking typedef struct HParserTestcase_ { diff --git a/src/parsers/bind.c b/src/parsers/bind.c index de0a8ec..efd8981 100644 --- a/src/parsers/bind.c +++ b/src/parsers/bind.c @@ -4,8 +4,34 @@ typedef struct { const HParser *p; HContinuation k; void *env; + HAllocator *mm__; } BindEnv; +// an HAllocator backed by an HArena +typedef struct { + HAllocator allocator; // inherit XXX is this the proper way to do it? + HArena *arena; +} ArenaAllocator; + +void *aa_alloc(HAllocator *allocator, size_t size) +{ + HArena *arena = ((ArenaAllocator *)allocator)->arena; + return h_arena_malloc(arena, size); +} + +void *aa_realloc(HAllocator *allocator, void *ptr, size_t size) +{ + HArena *arena = ((ArenaAllocator *)allocator)->arena; + assert(0); // XXX realloc for arena allocator + return NULL; +} + +void aa_free(HAllocator *allocator, void *ptr) +{ + HArena *arena = ((ArenaAllocator *)allocator)->arena; + h_arena_free(arena, ptr); +} + static HParseResult *parse_bind(void *be_, HParseState *state) { BindEnv *be = be_; @@ -13,11 +39,20 @@ static HParseResult *parse_bind(void *be_, HParseState *state) { if(!res) return NULL; - HParser *kx = be->k(res->ast, be->env); - if(!kx) - return NULL; + // create a temporary arena allocator for the continuation + HArena *arena = h_new_arena(be->mm__, 0); + ArenaAllocator aa = {{aa_alloc, aa_realloc, aa_free}, arena}; - return h_do_parse(kx, state); + HParser *kx = be->k((HAllocator *)&aa, res->ast, be->env); + if(!kx) { + h_delete_arena(arena); + return NULL; + } + + res = h_do_parse(kx, state); + + h_delete_arena(arena); + return res; } static const HParserVtable bind_vt = { @@ -40,6 +75,7 @@ HParser *h_bind__m(HAllocator *mm__, be->p = p; be->k = k; be->env = env; + be->mm__ = mm__; return h_new_parser(mm__, &bind_vt, be); } diff --git a/src/t_parser.c b/src/t_parser.c index c16e384..df9567e 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -568,7 +568,7 @@ static void test_permutation(gconstpointer backend) { g_check_parse_failed(po2, be, "ccc", 3); } -static HParser *f_test_bind(const HParsedToken *p, void *env) { +static HParser *k_test_bind(HAllocator *mm__, const HParsedToken *p, void *env) { uint8_t one = (uintptr_t)env; assert(p); @@ -581,17 +581,17 @@ static HParser *f_test_bind(const HParsedToken *p, void *env) { } if(v > 26) - return h_nothing_p(); // fail + return h_nothing_p__m(mm__); // fail else if(v > 127) - return NULL; // equivalent to the above + return NULL; // equivalent to the above else - return h_ch(one - 1 + v); + return h_ch__m(mm__, one - 1 + v); } static void test_bind(gconstpointer backend) { HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); const HParser *digit = h_ch_range('0', '9'); const HParser *nat = h_many1(digit); - const HParser *p = h_bind(nat, f_test_bind, (void *)(uintptr_t)'a'); + const HParser *p = h_bind(nat, k_test_bind, (void *)(uintptr_t)'a'); g_check_parse_match(p, be, "1a", 2, "u0x61"); g_check_parse_match(p, be, "2b", 2, "u0x62"); From be80fdedfccf76a8662bd02b37677983af7b1312 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 4 Mar 2015 14:52:28 +0100 Subject: [PATCH 22/23] make arena-allocator implementation functions static --- src/parsers/bind.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parsers/bind.c b/src/parsers/bind.c index efd8981..8083c8b 100644 --- a/src/parsers/bind.c +++ b/src/parsers/bind.c @@ -13,20 +13,20 @@ typedef struct { HArena *arena; } ArenaAllocator; -void *aa_alloc(HAllocator *allocator, size_t size) +static void *aa_alloc(HAllocator *allocator, size_t size) { HArena *arena = ((ArenaAllocator *)allocator)->arena; return h_arena_malloc(arena, size); } -void *aa_realloc(HAllocator *allocator, void *ptr, size_t size) +static void *aa_realloc(HAllocator *allocator, void *ptr, size_t size) { HArena *arena = ((ArenaAllocator *)allocator)->arena; assert(0); // XXX realloc for arena allocator return NULL; } -void aa_free(HAllocator *allocator, void *ptr) +static void aa_free(HAllocator *allocator, void *ptr) { HArena *arena = ((ArenaAllocator *)allocator)->arena; h_arena_free(arena, ptr); From aa2dc7bcf5c326f1cd870670bdd6b88d61fabd5a Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Wed, 4 Mar 2015 14:54:42 +0100 Subject: [PATCH 23/23] put the message into the assert in aa_realloc --- src/parsers/bind.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsers/bind.c b/src/parsers/bind.c index 8083c8b..f024a82 100644 --- a/src/parsers/bind.c +++ b/src/parsers/bind.c @@ -22,7 +22,7 @@ static void *aa_alloc(HAllocator *allocator, size_t size) static void *aa_realloc(HAllocator *allocator, void *ptr, size_t size) { HArena *arena = ((ArenaAllocator *)allocator)->arena; - assert(0); // XXX realloc for arena allocator + assert(((void)"XXX need realloc for arena allocator", 0)); return NULL; }