commit
8043c7f7b4
26 changed files with 761 additions and 88 deletions
|
|
@ -60,6 +60,8 @@ Just `#include <hammer/hammer.h>` (also `#include <hammer/glue.h>` if you plan t
|
|||
|
||||
If you've installed Hammer system-wide, you can use `pkg-config` in the usual way.
|
||||
|
||||
For documentation, see the [user guide](https://github.com/UpstandingHackers/hammer/wiki/User-guide).
|
||||
|
||||
Examples
|
||||
========
|
||||
The `examples/` directory contains some simple examples, currently including:
|
||||
|
|
|
|||
|
|
@ -14,7 +14,11 @@ tools = ['default', 'scanreplace']
|
|||
if 'dotnet' in ARGUMENTS.get('bindings', []):
|
||||
tools.append('csharp/mono')
|
||||
|
||||
env = Environment(ENV = {'PATH' : os.environ['PATH']},
|
||||
envvars = {'PATH' : os.environ['PATH']}
|
||||
if 'PKG_CONFIG_PATH' in os.environ:
|
||||
envvars['PKG_CONFIG_PATH'] = os.environ['PKG_CONFIG_PATH']
|
||||
|
||||
env = Environment(ENV = envvars,
|
||||
variables = vars,
|
||||
tools=tools,
|
||||
toolpath=['tools'])
|
||||
|
|
|
|||
|
|
@ -29,9 +29,9 @@ HParsedToken *act_bsfdig(const HParseResult *p, void* user_data)
|
|||
|
||||
uint8_t c = H_CAST_UINT(p->ast);
|
||||
|
||||
if(c >= 0x40 && c <= 0x5A) // A-Z
|
||||
if(c >= 0x41 && c <= 0x5A) // A-Z
|
||||
res->uint = c - 0x41;
|
||||
else if(c >= 0x60 && c <= 0x7A) // a-z
|
||||
else if(c >= 0x61 && c <= 0x7A) // a-z
|
||||
res->uint = c - 0x61 + 26;
|
||||
else if(c >= 0x30 && c <= 0x39) // 0-9
|
||||
res->uint = c - 0x30 + 52;
|
||||
|
|
|
|||
|
|
@ -31,9 +31,9 @@ uint8_t bsfdig_value(const HParsedToken *p)
|
|||
|
||||
if(p && p->token_type == TT_UINT) {
|
||||
uint8_t c = p->uint;
|
||||
if(c >= 0x40 && c <= 0x5A) // A-Z
|
||||
if(c >= 0x41 && c <= 0x5A) // A-Z
|
||||
value = c - 0x41;
|
||||
else if(c >= 0x60 && c <= 0x7A) // a-z
|
||||
else if(c >= 0x61 && c <= 0x7A) // a-z
|
||||
value = c - 0x61 + 26;
|
||||
else if(c >= 0x30 && c <= 0x39) // 0-9
|
||||
value = c - 0x30 + 52;
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ parsers = ['parsers/%s.c'%s for s in
|
|||
['action',
|
||||
'and',
|
||||
'attr_bool',
|
||||
'bind',
|
||||
'bits',
|
||||
'butnot',
|
||||
'ch',
|
||||
|
|
@ -39,11 +40,13 @@ parsers = ['parsers/%s.c'%s for s in
|
|||
'not',
|
||||
'nothing',
|
||||
'optional',
|
||||
'permutation',
|
||||
'sequence',
|
||||
'token',
|
||||
'unimplemented',
|
||||
'whitespace',
|
||||
'xor']]
|
||||
'xor',
|
||||
'value']]
|
||||
|
||||
backends = ['backends/%s.c' % s for s in
|
||||
['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']]
|
||||
|
|
@ -67,7 +70,8 @@ ctests = ['t_benchmark.c',
|
|||
't_bitwriter.c',
|
||||
't_parser.c',
|
||||
't_grammar.c',
|
||||
't_misc.c']
|
||||
't_misc.c',
|
||||
't_regression.c']
|
||||
|
||||
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
|
||||
libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts)
|
||||
|
|
|
|||
|
|
@ -33,11 +33,22 @@ typedef struct HAllocator_ {
|
|||
typedef struct HArena_ HArena ; // hidden implementation
|
||||
|
||||
HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default...
|
||||
#ifndef SWIG
|
||||
void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) ));
|
||||
|
||||
#if defined __llvm__
|
||||
# if __has_attribute(malloc)
|
||||
# define ATTR_MALLOC(n) __attribute__((malloc))
|
||||
# else
|
||||
# define ATTR_MALLOC(n)
|
||||
# endif
|
||||
#elif defined SWIG
|
||||
# define ATTR_MALLOC(n)
|
||||
#elif defined __GNUC__
|
||||
# define ATTR_MALLOC(n) __attribute__((malloc, alloc_size(2)))
|
||||
#else
|
||||
void* h_arena_malloc(HArena *arena, size_t count);
|
||||
# define ATTR_MALLOC(n)
|
||||
#endif
|
||||
|
||||
void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2);
|
||||
void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers.
|
||||
void h_delete_arena(HArena *arena);
|
||||
|
||||
|
|
|
|||
|
|
@ -33,11 +33,13 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa
|
|||
if (tmp_res) {
|
||||
tmp_res->arena = state->arena;
|
||||
if (!state->input_stream.overrun) {
|
||||
tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3);
|
||||
if (state->input_stream.endianness & BIT_BIG_ENDIAN)
|
||||
tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset;
|
||||
else
|
||||
tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset;
|
||||
size_t bit_length = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak);
|
||||
if (tmp_res->bit_length == 0) { // Don't modify if forwarding.
|
||||
tmp_res->bit_length = bit_length;
|
||||
}
|
||||
if (tmp_res->ast && tmp_res->ast->bit_length != 0) {
|
||||
((HParsedToken*)(tmp_res->ast))->bit_length = bit_length;
|
||||
}
|
||||
} else
|
||||
tmp_res->bit_length = 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <string.h>
|
||||
#include "hammer.h"
|
||||
|
|
@ -14,6 +15,14 @@
|
|||
#include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
static const char* HParserBackendNames[] = {
|
||||
"Packrat",
|
||||
"Regular",
|
||||
"LL(k)",
|
||||
"LALR",
|
||||
"GLR"
|
||||
};
|
||||
|
||||
void h_benchmark_clock_gettime(struct timespec *ts) {
|
||||
if (ts == NULL)
|
||||
return;
|
||||
|
|
@ -112,6 +121,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
|
|||
ret->results[backend].failed_testcases++;
|
||||
}
|
||||
h_parse_result_free(res);
|
||||
free(res_unamb);
|
||||
}
|
||||
|
||||
if (tc_failed > 0) {
|
||||
|
|
|
|||
|
|
@ -39,10 +39,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
|
|||
if (bits_left <= 64) { // Large enough to handle any valid count, but small enough that overflow isn't a problem.
|
||||
// not in danger of overflowing, so add in bits
|
||||
// add in number of bits...
|
||||
if (state->endianness & BIT_BIG_ENDIAN)
|
||||
bits_left = (bits_left << 3) - 8 + state->bit_offset;
|
||||
else
|
||||
bits_left = (bits_left << 3) - state->bit_offset;
|
||||
bits_left = (bits_left << 3) - state->bit_offset - state->margin;
|
||||
if (bits_left < count) {
|
||||
if (state->endianness & BYTE_BIG_ENDIAN)
|
||||
final_shift = count - bits_left;
|
||||
|
|
@ -54,7 +51,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
|
|||
final_shift = 0;
|
||||
}
|
||||
|
||||
if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0) {
|
||||
if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0 && (state->margin == 0)) {
|
||||
// fast path
|
||||
if (state->endianness & BYTE_BIG_ENDIAN) {
|
||||
while (count > 0) {
|
||||
|
|
@ -65,7 +62,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
|
|||
int i;
|
||||
for (i = 0; count > 0; i += 8) {
|
||||
count -= 8;
|
||||
out |= state->input[state->index++] << i;
|
||||
out |= (int64_t)state->input[state->index++] << i;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
@ -73,22 +70,24 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
|
|||
int segment, segment_len;
|
||||
// Read a segment...
|
||||
if (state->endianness & BIT_BIG_ENDIAN) {
|
||||
if (count >= state->bit_offset) {
|
||||
segment_len = state->bit_offset;
|
||||
state->bit_offset = 8;
|
||||
segment = state->input[state->index] & ((1 << segment_len) - 1);
|
||||
state->index++;
|
||||
} else {
|
||||
segment_len = count;
|
||||
state->bit_offset -= count;
|
||||
segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1);
|
||||
}
|
||||
} else { // BIT_LITTLE_ENDIAN
|
||||
if (count + state->bit_offset >= 8) {
|
||||
segment_len = 8 - state->bit_offset;
|
||||
segment = (state->input[state->index] >> state->bit_offset);
|
||||
if (count + state->bit_offset + state->margin >= 8) {
|
||||
segment_len = 8 - state->bit_offset - state->margin;
|
||||
segment = (state->input[state->index] >> state->margin) & ((1 << segment_len) - 1);
|
||||
state->index++;
|
||||
state->bit_offset = 0;
|
||||
state->margin = 0;
|
||||
} else {
|
||||
segment_len = count;
|
||||
state->bit_offset += count;
|
||||
segment = (state->input[state->index] >> (8 - state->bit_offset)) & ((1 << segment_len) - 1);
|
||||
}
|
||||
} else { // BIT_LITTLE_ENDIAN
|
||||
if (count + state->bit_offset + state->margin >= 8) {
|
||||
segment_len = 8 - state->bit_offset - state->margin;
|
||||
segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1);
|
||||
state->index++;
|
||||
state->bit_offset = 0;
|
||||
state->margin = 0;
|
||||
} else {
|
||||
segment_len = count;
|
||||
segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1);
|
||||
|
|
@ -100,7 +99,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
|
|||
if (state->endianness & BYTE_BIG_ENDIAN) {
|
||||
out = out << segment_len | segment;
|
||||
} else { // BYTE_LITTLE_ENDIAN
|
||||
out |= segment << offset;
|
||||
out |= (int64_t)segment << offset;
|
||||
offset += segment_len;
|
||||
}
|
||||
count -= segment_len;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "internal.h"
|
||||
#include "hammer.h"
|
||||
#include "allocator.h"
|
||||
#include "parsers/parser_internal.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
|
@ -393,6 +394,28 @@ uint32_t h_djbhash(const uint8_t *buf, size_t len) {
|
|||
return hash;
|
||||
}
|
||||
|
||||
void h_symbol_put(HParseState *state, const char* key, void *value) {
|
||||
if (!state->symbol_table) {
|
||||
state->symbol_table = h_slist_new(state->arena);
|
||||
h_slist_push(state->symbol_table, h_hashtable_new(state->arena,
|
||||
h_eq_ptr,
|
||||
h_hash_ptr));
|
||||
}
|
||||
HHashTable *head = h_slist_top(state->symbol_table);
|
||||
assert(!h_hashtable_present(head, key));
|
||||
h_hashtable_put(head, key, value);
|
||||
}
|
||||
|
||||
void* h_symbol_get(HParseState *state, const char* key) {
|
||||
if (state->symbol_table) {
|
||||
HHashTable *head = h_slist_top(state->symbol_table);
|
||||
if (head) {
|
||||
return h_hashtable_get(head, key);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
HSArray *h_sarray_new(HAllocator *mm__, size_t size) {
|
||||
HSArray *ret = h_new(HSArray, 1);
|
||||
ret->capacity = size;
|
||||
|
|
|
|||
|
|
@ -173,7 +173,7 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va)
|
|||
int j;
|
||||
|
||||
while((j = va_arg(va, int)) >= 0)
|
||||
ret = h_seq_index(p, j);
|
||||
ret = h_seq_index(ret, j);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
21
src/glue.h
21
src/glue.h
|
|
@ -11,7 +11,8 @@
|
|||
//
|
||||
// A few standard semantic actions are defined below. The H_ACT_APPLY macro
|
||||
// allows semantic actions to be defined by "partial application" of
|
||||
// a generic action to fixed paramters.
|
||||
// a generic action to fixed paramters. H_VALIDATE_APPLY is similar for
|
||||
// h_atter_bool.
|
||||
//
|
||||
// The definition of more complex semantic actions will usually consist of
|
||||
// extracting data from the given parse tree and constructing a token of custom
|
||||
|
|
@ -66,13 +67,13 @@
|
|||
h_attr_bool(h_action(def, act_ ## rule, NULL), validate_ ## rule, NULL)
|
||||
#define H_AVRULE(rule, def) HParser *rule = \
|
||||
h_action(h_attr_bool(def, validate_ ## rule, NULL), act_ ## rule, NULL)
|
||||
#define H_ADRULE(rule, def, data) HParser *rule = \
|
||||
#define H_ADRULE(rule, def, data) HParser *rule = \
|
||||
h_action(def, act_ ## rule, data)
|
||||
#define H_VDRULE(rule, def, data) HParser *rule = \
|
||||
#define H_VDRULE(rule, def, data) HParser *rule = \
|
||||
h_attr_bool(def, validate_ ## rule, data)
|
||||
#define H_VADRULE(rule, def, data) HParser *rule = \
|
||||
#define H_VADRULE(rule, def, data) HParser *rule = \
|
||||
h_attr_bool(h_action(def, act_ ## rule, data), validate_ ## rule, data)
|
||||
#define H_AVDRULE(rule, def, data) HParser *rule = \
|
||||
#define H_AVDRULE(rule, def, data) HParser *rule = \
|
||||
h_action(h_attr_bool(def, validate_ ## rule, data), act_ ## rule, data)
|
||||
|
||||
|
||||
|
|
@ -109,8 +110,14 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data);
|
|||
// Define 'myaction' as a specialization of 'paction' by supplying the leading
|
||||
// parameters.
|
||||
#define H_ACT_APPLY(myaction, paction, ...) \
|
||||
HParsedToken *myaction(const HParseResult *p, void* user_data) { \
|
||||
return paction(__VA_ARGS__, p, user_data); \
|
||||
HParsedToken *myaction(const HParseResult *p, void* user_data) { \
|
||||
return paction(__VA_ARGS__, p, user_data); \
|
||||
}
|
||||
|
||||
// Similar, but for validations.
|
||||
#define H_VALIDATE_APPLY(myvalidation, pvalidation, ...) \
|
||||
bool myvalidation(HParseResult* p, void* user_data) { \
|
||||
return pvalidation(__VA_ARGS__, p, user_data); \
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t*
|
|||
// Set up a parse state...
|
||||
HInputStream input_stream = {
|
||||
.index = 0,
|
||||
.bit_offset = 8,
|
||||
.bit_offset = 0,
|
||||
.overrun = 0,
|
||||
.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN,
|
||||
.length = length,
|
||||
|
|
|
|||
106
src/hammer.h
106
src/hammer.h
|
|
@ -46,14 +46,6 @@ typedef enum HParserBackend_ {
|
|||
PB_MAX = PB_GLR
|
||||
} HParserBackend;
|
||||
|
||||
static const char* HParserBackendNames[] = {
|
||||
"Packrat",
|
||||
"Regular",
|
||||
"LL(k)",
|
||||
"LALR",
|
||||
"GLR"
|
||||
};
|
||||
|
||||
typedef enum HTokenType_ {
|
||||
// Before you change the explicit values of these, think of the poor bindings ;_;
|
||||
TT_NONE = 1,
|
||||
|
|
@ -107,6 +99,7 @@ typedef struct HParsedToken_ {
|
|||
HTokenData token_data;
|
||||
#endif
|
||||
size_t index;
|
||||
size_t bit_length;
|
||||
char bit_offset;
|
||||
} HParsedToken;
|
||||
|
||||
|
|
@ -130,6 +123,19 @@ typedef struct HParseResult_ {
|
|||
*/
|
||||
typedef struct HBitWriter_ HBitWriter;
|
||||
|
||||
typedef struct HCFChoice_ HCFChoice;
|
||||
typedef struct HRVMProg_ HRVMProg;
|
||||
typedef struct HParserVtable_ HParserVtable;
|
||||
|
||||
// TODO: Make this internal
|
||||
typedef struct HParser_ {
|
||||
const HParserVtable *vtable;
|
||||
HParserBackend backend;
|
||||
void* backend_data;
|
||||
void *env;
|
||||
HCFChoice *desugared; /* if the parser can be desugared, its desugared form */
|
||||
} HParser;
|
||||
|
||||
/**
|
||||
* Type of an action to apply to an AST, used in the action() parser.
|
||||
* It can be any (user-defined) function that takes a HParseResult*
|
||||
|
|
@ -149,18 +155,17 @@ typedef HParsedToken* (*HAction)(const HParseResult *p, void* user_data);
|
|||
*/
|
||||
typedef bool (*HPredicate)(HParseResult *p, void* user_data);
|
||||
|
||||
typedef struct HCFChoice_ HCFChoice;
|
||||
typedef struct HRVMProg_ HRVMProg;
|
||||
typedef struct HParserVtable_ HParserVtable;
|
||||
|
||||
// TODO: Make this internal
|
||||
typedef struct HParser_ {
|
||||
const HParserVtable *vtable;
|
||||
HParserBackend backend;
|
||||
void* backend_data;
|
||||
void *env;
|
||||
HCFChoice *desugared; /* if the parser can be desugared, its desugared form */
|
||||
} HParser;
|
||||
/**
|
||||
* Type of a parser that depends on the result of a previous parser,
|
||||
* used in h_bind(). The void* argument is passed through from h_bind() and can
|
||||
* be used to arbitrarily parameterize the function further.
|
||||
*
|
||||
* The HAllocator* argument gives access to temporary memory and is to be used
|
||||
* for any allocations inside the function. Specifically, construction of any
|
||||
* HParsers should use the '__m' combinator variants with the given allocator.
|
||||
* Anything allocated thus will be freed by 'h_bind'.
|
||||
*/
|
||||
typedef HParser* (*HContinuation)(HAllocator *mm__, const HParsedToken *x, void *env);
|
||||
|
||||
// {{{ Stuff for benchmarking
|
||||
typedef struct HParserTestcase_ {
|
||||
|
|
@ -437,6 +442,32 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, HPa
|
|||
*/
|
||||
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, HParser* p);
|
||||
|
||||
/**
|
||||
* Given a null-terminated list of parsers, match a permutation phrase of these
|
||||
* parsers, i.e. match all parsers exactly once in any order.
|
||||
*
|
||||
* If multiple orders would match, the lexically smallest permutation is used;
|
||||
* in other words, at any step the remaining available parsers are tried in
|
||||
* the order in which they appear in the arguments.
|
||||
*
|
||||
* As an exception, 'h_optional' parsers (actually those that return a result
|
||||
* of token type TT_NONE) are detected and the algorithm will try to match them
|
||||
* with a non-empty result. Specifically, a result of TT_NONE is treated as a
|
||||
* non-match as long as any other argument matches.
|
||||
*
|
||||
* Other parsers that succeed on any input (e.g. h_many), that match the same
|
||||
* input as others, or that match input which is a prefix of another match can
|
||||
* lead to unexpected results and should probably not be used as arguments.
|
||||
*
|
||||
* The result is a sequence of the same length as the argument list.
|
||||
* Each parser's result is placed at that parser's index in the arguments.
|
||||
* The permutation itself (the order in which the arguments were matched) is
|
||||
* not returned.
|
||||
*
|
||||
* Result token type: TT_SEQUENCE
|
||||
*/
|
||||
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_permutation, HParser* p);
|
||||
|
||||
/**
|
||||
* Given two parsers, p1 and p2, this parser succeeds in the following
|
||||
* cases:
|
||||
|
|
@ -621,6 +652,41 @@ HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner);
|
|||
*/
|
||||
HAMMER_FN_DECL(HParser*, h_with_endianness, char endianness, const HParser* p);
|
||||
|
||||
/**
|
||||
* The 'h_put_value' combinator stashes the result of the parser
|
||||
* it wraps in a symbol table in the parse state, so that non-
|
||||
* local actions and predicates can access this value.
|
||||
*
|
||||
* Try not to use this combinator if you can avoid it.
|
||||
*
|
||||
* Result token type: p's token type if name was not already in
|
||||
* the symbol table. It is an error, and thus a NULL result (and
|
||||
* parse failure), to attempt to rename a symbol.
|
||||
*/
|
||||
HAMMER_FN_DECL(HParser*, h_put_value, const HParser *p, const char* name);
|
||||
|
||||
/**
|
||||
* The 'h_get_value' combinator retrieves a named HParseResult that
|
||||
* was previously stashed in the parse state.
|
||||
*
|
||||
* Try not to use this combinator if you can avoid it.
|
||||
*
|
||||
* Result token type: whatever the stashed HParseResult is, if
|
||||
* present. If absent, NULL (and thus parse failure).
|
||||
*/
|
||||
HAMMER_FN_DECL(HParser*, h_get_value, const char* name);
|
||||
|
||||
/**
|
||||
* Monadic bind for HParsers, i.e.:
|
||||
* Sequencing where later parsers may depend on the result(s) of earlier ones.
|
||||
*
|
||||
* Run p and call the result x. Then run k(env,x). Fail if p fails or if
|
||||
* k(env,x) fails or if k(env,x) is NULL.
|
||||
*
|
||||
* Result: the result of k(x,env).
|
||||
*/
|
||||
HAMMER_FN_DECL(HParser*, h_bind, const HParser *p, HContinuation k, void *env);
|
||||
|
||||
/**
|
||||
* Free the memory allocated to an HParseResult when it is no longer needed.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -70,6 +70,8 @@ typedef struct HInputStream_ {
|
|||
size_t index;
|
||||
size_t length;
|
||||
char bit_offset;
|
||||
char margin; // The number of bits on the end that is being read
|
||||
// towards that should be ignored.
|
||||
char endianness;
|
||||
char overrun;
|
||||
} HInputStream;
|
||||
|
|
@ -190,6 +192,7 @@ typedef struct HHashTable_ {
|
|||
* arena - the arena that has been allocated for the parse this state is in.
|
||||
* lr_stack - a stack of HLeftRec's, used in Warth's recursion
|
||||
* recursion_heads - table of recursion heads. Keys are HParserCacheKey's with only an HInputStream (parser can be NULL), values are HRecursionHead's.
|
||||
* symbol_table - stack of tables of values that have been stashed in the context of this parse.
|
||||
*
|
||||
*/
|
||||
|
||||
|
|
@ -199,6 +202,7 @@ struct HParseState_ {
|
|||
HArena * arena;
|
||||
HSlist *lr_stack;
|
||||
HHashTable *recursion_heads;
|
||||
HSlist *symbol_table; // its contents are HHashTables
|
||||
};
|
||||
|
||||
typedef struct HParserBackendVTable_ {
|
||||
|
|
@ -293,6 +297,9 @@ extern HParserBackendVTable h__glr_backend_vtable;
|
|||
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
|
||||
|
||||
int64_t h_read_bits(HInputStream* state, int count, char signed_p);
|
||||
static inline size_t h_input_stream_pos(HInputStream* state) {
|
||||
return state->index * 8 + state->bit_offset + state->margin;
|
||||
}
|
||||
// need to decide if we want to make this public.
|
||||
HParseResult* h_do_parse(const HParser* parser, HParseState *state);
|
||||
void put_cached(HParseState *ps, const HParser *p, HParseResult *cached);
|
||||
|
|
@ -316,6 +323,7 @@ HSlist* h_slist_new(HArena *arena);
|
|||
HSlist* h_slist_copy(HSlist *slist);
|
||||
void* h_slist_pop(HSlist *slist);
|
||||
void* h_slist_drop(HSlist *slist);
|
||||
static inline void* h_slist_top(HSlist *sl) { return sl->head->elem; }
|
||||
void h_slist_push(HSlist *slist, void* item);
|
||||
bool h_slist_find(HSlist *slist, const void* item);
|
||||
HSlist* h_slist_remove_all(HSlist *slist, const void* item);
|
||||
|
|
@ -347,8 +355,10 @@ bool h_eq_ptr(const void *p, const void *q);
|
|||
HHashValue h_hash_ptr(const void *p);
|
||||
uint32_t h_djbhash(const uint8_t *buf, size_t len);
|
||||
|
||||
typedef struct HCFSequence_ HCFSequence;
|
||||
void h_symbol_put(HParseState *state, const char* key, void *value);
|
||||
void* h_symbol_get(HParseState *state, const char* key);
|
||||
|
||||
typedef struct HCFSequence_ HCFSequence;
|
||||
|
||||
struct HCFChoice_ {
|
||||
enum HCFChoiceType {
|
||||
|
|
|
|||
81
src/parsers/bind.c
Normal file
81
src/parsers/bind.c
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#include "parser_internal.h"
|
||||
|
||||
typedef struct {
|
||||
const HParser *p;
|
||||
HContinuation k;
|
||||
void *env;
|
||||
HAllocator *mm__;
|
||||
} BindEnv;
|
||||
|
||||
// an HAllocator backed by an HArena
|
||||
typedef struct {
|
||||
HAllocator allocator; // inherit XXX is this the proper way to do it?
|
||||
HArena *arena;
|
||||
} ArenaAllocator;
|
||||
|
||||
static void *aa_alloc(HAllocator *allocator, size_t size)
|
||||
{
|
||||
HArena *arena = ((ArenaAllocator *)allocator)->arena;
|
||||
return h_arena_malloc(arena, size);
|
||||
}
|
||||
|
||||
static void *aa_realloc(HAllocator *allocator, void *ptr, size_t size)
|
||||
{
|
||||
HArena *arena = ((ArenaAllocator *)allocator)->arena;
|
||||
assert(((void)"XXX need realloc for arena allocator", 0));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void aa_free(HAllocator *allocator, void *ptr)
|
||||
{
|
||||
HArena *arena = ((ArenaAllocator *)allocator)->arena;
|
||||
h_arena_free(arena, ptr);
|
||||
}
|
||||
|
||||
static HParseResult *parse_bind(void *be_, HParseState *state) {
|
||||
BindEnv *be = be_;
|
||||
|
||||
HParseResult *res = h_do_parse(be->p, state);
|
||||
if(!res)
|
||||
return NULL;
|
||||
|
||||
// create a temporary arena allocator for the continuation
|
||||
HArena *arena = h_new_arena(be->mm__, 0);
|
||||
ArenaAllocator aa = {{aa_alloc, aa_realloc, aa_free}, arena};
|
||||
|
||||
HParser *kx = be->k((HAllocator *)&aa, res->ast, be->env);
|
||||
if(!kx) {
|
||||
h_delete_arena(arena);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
res = h_do_parse(kx, state);
|
||||
|
||||
h_delete_arena(arena);
|
||||
return res;
|
||||
}
|
||||
|
||||
static const HParserVtable bind_vt = {
|
||||
.parse = parse_bind,
|
||||
.isValidRegular = h_false,
|
||||
.isValidCF = h_false,
|
||||
.compile_to_rvm = h_not_regular,
|
||||
};
|
||||
|
||||
HParser *h_bind(const HParser *p, HContinuation k, void *env)
|
||||
{
|
||||
return h_bind__m(&system_allocator, p, k, env);
|
||||
}
|
||||
|
||||
HParser *h_bind__m(HAllocator *mm__,
|
||||
const HParser *p, HContinuation k, void *env)
|
||||
{
|
||||
BindEnv *be = h_new(BindEnv, 1);
|
||||
|
||||
be->p = p;
|
||||
be->k = k;
|
||||
be->env = env;
|
||||
be->mm__ = mm__;
|
||||
|
||||
return h_new_parser(mm__, &bind_vt, be);
|
||||
}
|
||||
|
|
@ -11,19 +11,9 @@ static void switch_bit_order(HInputStream *input)
|
|||
{
|
||||
assert(input->bit_offset <= 8);
|
||||
|
||||
if((input->bit_offset % 8) != 0) {
|
||||
// switching bit order in the middle of a byte
|
||||
// we leave bit_offset untouched. this means that something like
|
||||
// le(bits(5)),le(bits(3))
|
||||
// is equivalent to
|
||||
// le(bits(5),bits(3)) .
|
||||
// on the other hand,
|
||||
// le(bits(5)),be(bits(5))
|
||||
// will read the same 5 bits twice and discard the top 3.
|
||||
} else {
|
||||
// flip offset (0 <-> 8)
|
||||
input->bit_offset = 8 - input->bit_offset;
|
||||
}
|
||||
char tmp = input->bit_offset;
|
||||
input->bit_offset = input->margin;
|
||||
input->margin = tmp;
|
||||
}
|
||||
|
||||
static HParseResult *parse_endianness(void *env, HParseState *state)
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) {
|
|||
HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult));
|
||||
ret->ast = tok;
|
||||
ret->arena = arena;
|
||||
ret->bit_length = 0; // This way it gets overridden in h_do_parse
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
|||
179
src/parsers/permutation.c
Normal file
179
src/parsers/permutation.c
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
#include <stdarg.h>
|
||||
#include "parser_internal.h"
|
||||
|
||||
typedef struct {
|
||||
size_t len;
|
||||
HParser **p_array;
|
||||
} HSequence;
|
||||
|
||||
// main recursion, used by parse_permutation below
|
||||
static int parse_permutation_tail(const HSequence *s,
|
||||
HCountedArray *seq,
|
||||
const size_t k, char *set,
|
||||
HParseState *state)
|
||||
{
|
||||
// shorthands
|
||||
const size_t n = s->len;
|
||||
HParser **ps = s->p_array;
|
||||
|
||||
// trivial base case
|
||||
if(k >= n)
|
||||
return 1;
|
||||
|
||||
HInputStream bak = state->input_stream;
|
||||
|
||||
// try available parsers as first element of the permutation tail
|
||||
HParseResult *match = NULL;
|
||||
size_t i;
|
||||
for(i=0; i<n; i++) {
|
||||
if(set[i]) {
|
||||
match = h_do_parse(ps[i], state);
|
||||
|
||||
// save result
|
||||
if(match)
|
||||
seq->elements[i] = (void *)match->ast;
|
||||
|
||||
// treat empty optionals (TT_NONE) like failure here
|
||||
if(match && match->ast && match->ast->token_type == TT_NONE)
|
||||
match = NULL;
|
||||
|
||||
if(match) {
|
||||
// remove parser from active set
|
||||
set[i] = 0;
|
||||
|
||||
// parse the rest of the permutation phrase
|
||||
if(parse_permutation_tail(s, seq, k+1, set, state)) {
|
||||
// success
|
||||
return 1;
|
||||
} else {
|
||||
// place parser back in active set and try the next
|
||||
set[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
state->input_stream = bak; // rewind input
|
||||
}
|
||||
}
|
||||
|
||||
// if all available parsers were empty optionals (TT_NONE), still succeed
|
||||
for(i=0; i<n; i++) {
|
||||
if(set[i]) {
|
||||
HParsedToken *tok = seq->elements[i];
|
||||
if(!(tok && tok->token_type == TT_NONE))
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(i==n) // all were TT_NONE
|
||||
return 1;
|
||||
|
||||
// permutations exhausted
|
||||
return 0;
|
||||
}
|
||||
|
||||
static HParseResult *parse_permutation(void *env, HParseState *state)
|
||||
{
|
||||
const HSequence *s = env;
|
||||
const size_t n = s->len;
|
||||
|
||||
// current set of available (not yet matched) parsers
|
||||
char *set = h_arena_malloc(state->arena, sizeof(char) * n);
|
||||
memset(set, 1, sizeof(char) * n);
|
||||
|
||||
// parse result
|
||||
HCountedArray *seq = h_carray_new_sized(state->arena, n);
|
||||
|
||||
if(parse_permutation_tail(s, seq, 0, set, state)) {
|
||||
// success
|
||||
// return the sequence of results
|
||||
seq->used = n;
|
||||
HParsedToken *tok = a_new(HParsedToken, 1);
|
||||
tok->token_type = TT_SEQUENCE;
|
||||
tok->seq = seq;
|
||||
return make_result(state->arena, tok);
|
||||
} else {
|
||||
// no parse
|
||||
// XXX free seq
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const HParserVtable permutation_vt = {
|
||||
.parse = parse_permutation,
|
||||
.isValidRegular = h_false,
|
||||
.isValidCF = h_false,
|
||||
.desugar = NULL,
|
||||
.compile_to_rvm = h_not_regular,
|
||||
};
|
||||
|
||||
HParser* h_permutation(HParser* p, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, p);
|
||||
HParser* ret = h_permutation__mv(&system_allocator, p, ap);
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
HParser* h_permutation__m(HAllocator* mm__, HParser* p, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, p);
|
||||
HParser* ret = h_permutation__mv(mm__, p, ap);
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
HParser* h_permutation__v(HParser* p, va_list ap) {
|
||||
return h_permutation__mv(&system_allocator, p, ap);
|
||||
}
|
||||
|
||||
HParser* h_permutation__mv(HAllocator* mm__, HParser* p, va_list ap_) {
|
||||
va_list ap;
|
||||
size_t len = 0;
|
||||
HSequence *s = h_new(HSequence, 1);
|
||||
|
||||
HParser *arg;
|
||||
va_copy(ap, ap_);
|
||||
do {
|
||||
len++;
|
||||
arg = va_arg(ap, HParser *);
|
||||
} while (arg);
|
||||
va_end(ap);
|
||||
s->p_array = h_new(HParser *, len);
|
||||
|
||||
va_copy(ap, ap_);
|
||||
s->p_array[0] = p;
|
||||
for (size_t i = 1; i < len; i++) {
|
||||
s->p_array[i] = va_arg(ap, HParser *);
|
||||
} while (arg);
|
||||
va_end(ap);
|
||||
|
||||
s->len = len;
|
||||
return h_new_parser(mm__, &permutation_vt, s);
|
||||
}
|
||||
|
||||
HParser* h_permutation__a(void *args[]) {
|
||||
return h_permutation__ma(&system_allocator, args);
|
||||
}
|
||||
|
||||
HParser* h_permutation__ma(HAllocator* mm__, void *args[]) {
|
||||
size_t len = -1; // because do...while
|
||||
const HParser *arg;
|
||||
|
||||
do {
|
||||
arg=((HParser **)args)[++len];
|
||||
} while(arg);
|
||||
|
||||
HSequence *s = h_new(HSequence, 1);
|
||||
s->p_array = h_new(HParser *, len);
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
s->p_array[i] = ((HParser **)args)[i];
|
||||
}
|
||||
|
||||
s->len = len;
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &permutation_vt;
|
||||
ret->env = (void*)s;
|
||||
ret->backend = PB_MIN;
|
||||
return ret;
|
||||
}
|
||||
69
src/parsers/value.c
Normal file
69
src/parsers/value.c
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
#include "parser_internal.h"
|
||||
|
||||
typedef struct {
|
||||
const HParser* p;
|
||||
const char* key;
|
||||
} HStoredValue;
|
||||
|
||||
/* Stash an HParseResult into a symbol table, so that it can be
|
||||
retrieved and used later. */
|
||||
|
||||
static HParseResult* parse_put(void *env, HParseState* state) {
|
||||
HStoredValue *s = (HStoredValue*)env;
|
||||
if (s->p && s->key && !h_symbol_get(state, s->key)) {
|
||||
HParseResult *tmp = h_do_parse(s->p, state);
|
||||
if (tmp) {
|
||||
h_symbol_put(state, s->key, tmp);
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
// otherwise there's no parser, no key, or key's stored already
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const HParserVtable put_vt = {
|
||||
.parse = parse_put,
|
||||
.isValidRegular = h_false,
|
||||
.isValidCF = h_false,
|
||||
.compile_to_rvm = h_not_regular,
|
||||
};
|
||||
|
||||
HParser* h_put_value(const HParser* p, const char* name) {
|
||||
return h_put_value__m(&system_allocator, p, name);
|
||||
}
|
||||
|
||||
HParser* h_put_value__m(HAllocator* mm__, const HParser* p, const char* name) {
|
||||
HStoredValue *env = h_new(HStoredValue, 1);
|
||||
env->p = p;
|
||||
env->key = name;
|
||||
return h_new_parser(mm__, &put_vt, env);
|
||||
}
|
||||
|
||||
/* Retrieve a stashed result from the symbol table. */
|
||||
|
||||
static HParseResult* parse_get(void *env, HParseState* state) {
|
||||
HStoredValue *s = (HStoredValue*)env;
|
||||
if (!s->p && s->key) {
|
||||
return h_symbol_get(state, s->key);
|
||||
} else { // either there's no key, or there was a parser here
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static const HParserVtable get_vt = {
|
||||
.parse = parse_get,
|
||||
.isValidRegular = h_false,
|
||||
.isValidCF = h_false,
|
||||
.compile_to_rvm = h_not_regular,
|
||||
};
|
||||
|
||||
HParser* h_get_value(const char* name) {
|
||||
return h_get_value__m(&system_allocator, name);
|
||||
}
|
||||
|
||||
HParser* h_get_value__m(HAllocator* mm__, const char* name) {
|
||||
HStoredValue *env = h_new(HStoredValue, 1);
|
||||
env->p = NULL;
|
||||
env->key = name;
|
||||
return h_new_parser(mm__, &get_vt, env);
|
||||
}
|
||||
|
|
@ -4,14 +4,14 @@
|
|||
#include "internal.h"
|
||||
#include "test_suite.h"
|
||||
|
||||
#define MK_INPUT_STREAM(buf,len,endianness_) \
|
||||
#define MK_INPUT_STREAM(buf,len,endianness_) \
|
||||
{ \
|
||||
.input = (uint8_t*)buf, \
|
||||
.length = len, \
|
||||
.index = 0, \
|
||||
.bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \
|
||||
.endianness = endianness_ \
|
||||
}
|
||||
.input = (uint8_t*)buf, \
|
||||
.length = len, \
|
||||
.index = 0, \
|
||||
.bit_offset = 0, \
|
||||
.endianness = endianness_ \
|
||||
}
|
||||
|
||||
|
||||
static void test_bitreader_ints(void) {
|
||||
|
|
@ -56,7 +56,6 @@ static void test_offset_largebits_le(void) {
|
|||
g_check_cmp_int32(h_read_bits(&is, 11, false), ==, 0x2D3);
|
||||
}
|
||||
|
||||
|
||||
void register_bitreader_tests(void) {
|
||||
g_test_add_func("/core/bitreader/be", test_bitreader_be);
|
||||
g_test_add_func("/core/bitreader/le", test_bitreader_le);
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ void run_bitwriter_test(bitwriter_test_elem data[], char flags) {
|
|||
.input = buf,
|
||||
.index = 0,
|
||||
.length = len,
|
||||
.bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0,
|
||||
.bit_offset = 0,
|
||||
.endianness = flags,
|
||||
.overrun = 0
|
||||
};
|
||||
|
|
|
|||
111
src/t_parser.c
111
src/t_parser.c
|
|
@ -495,6 +495,114 @@ static void test_endianness(gconstpointer backend) {
|
|||
g_check_parse_match(lb_u5_, be, "abcd", 4, "u0xc");
|
||||
}
|
||||
|
||||
HParsedToken* act_get(const HParseResult *p, void* user_data) {
|
||||
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
|
||||
ret->token_type = TT_UINT;
|
||||
ret->uint = 3 * (1 << p->ast->uint);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void test_put_get(gconstpointer backend) {
|
||||
HParser *p = h_sequence(h_put_value(h_uint8(), "size"),
|
||||
h_token((const uint8_t*)"foo", 3),
|
||||
h_length_value(h_action(h_get_value("size"),
|
||||
act_get, NULL),
|
||||
h_uint8()),
|
||||
NULL);
|
||||
// Yes, the quotes in the next line look weird. Leave them alone,
|
||||
// this is to deal with how C strings handle hex-formatted chars.
|
||||
g_check_parse_match(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcdef", 10, "(u0x1 <66.6f.6f> (u0x61 u0x62 u0x63 u0x64 u0x65 u0x66))");
|
||||
g_check_parse_failed(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcde", 9);
|
||||
}
|
||||
|
||||
static void test_permutation(gconstpointer backend) {
|
||||
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
|
||||
const HParser *p = h_permutation(h_ch('a'), h_ch('b'), h_ch('c'), NULL);
|
||||
|
||||
g_check_parse_match(p, be, "abc", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(p, be, "acb", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(p, be, "bac", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(p, be, "bca", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(p, be, "cab", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(p, be, "cba", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_failed(p, be, "a", 1);
|
||||
g_check_parse_failed(p, be, "ab", 2);
|
||||
g_check_parse_failed(p, be, "abb", 3);
|
||||
|
||||
const HParser *po = h_permutation(h_ch('a'), h_ch('b'), h_optional(h_ch('c')), NULL);
|
||||
|
||||
g_check_parse_match(po, be, "abc", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "acb", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "bac", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "bca", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "cab", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "cba", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "ab", 2, "(u0x61 u0x62 null)");
|
||||
g_check_parse_match(po, be, "ba", 2, "(u0x61 u0x62 null)");
|
||||
g_check_parse_failed(po, be, "a", 1);
|
||||
g_check_parse_failed(po, be, "b", 1);
|
||||
g_check_parse_failed(po, be, "c", 1);
|
||||
g_check_parse_failed(po, be, "ca", 2);
|
||||
g_check_parse_failed(po, be, "cb", 2);
|
||||
g_check_parse_failed(po, be, "cc", 2);
|
||||
g_check_parse_failed(po, be, "ccab", 4);
|
||||
g_check_parse_failed(po, be, "ccc", 3);
|
||||
|
||||
const HParser *po2 = h_permutation(h_optional(h_ch('c')), h_ch('a'), h_ch('b'), NULL);
|
||||
|
||||
g_check_parse_match(po2, be, "abc", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "acb", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "bac", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "bca", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "cab", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "cba", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "ab", 2, "(null u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "ba", 2, "(null u0x61 u0x62)");
|
||||
g_check_parse_failed(po2, be, "a", 1);
|
||||
g_check_parse_failed(po2, be, "b", 1);
|
||||
g_check_parse_failed(po2, be, "c", 1);
|
||||
g_check_parse_failed(po2, be, "ca", 2);
|
||||
g_check_parse_failed(po2, be, "cb", 2);
|
||||
g_check_parse_failed(po2, be, "cc", 2);
|
||||
g_check_parse_failed(po2, be, "ccab", 4);
|
||||
g_check_parse_failed(po2, be, "ccc", 3);
|
||||
}
|
||||
|
||||
static HParser *k_test_bind(HAllocator *mm__, const HParsedToken *p, void *env) {
|
||||
uint8_t one = (uintptr_t)env;
|
||||
|
||||
assert(p);
|
||||
assert(p->token_type == TT_SEQUENCE);
|
||||
|
||||
int v=0;
|
||||
for(size_t i=0; i<p->seq->used; i++) {
|
||||
assert(p->seq->elements[i]->token_type == TT_UINT);
|
||||
v = v*10 + p->seq->elements[i]->uint - '0';
|
||||
}
|
||||
|
||||
if(v > 26)
|
||||
return h_nothing_p__m(mm__); // fail
|
||||
else if(v > 127)
|
||||
return NULL; // equivalent to the above
|
||||
else
|
||||
return h_ch__m(mm__, one - 1 + v);
|
||||
}
|
||||
static void test_bind(gconstpointer backend) {
|
||||
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
|
||||
const HParser *digit = h_ch_range('0', '9');
|
||||
const HParser *nat = h_many1(digit);
|
||||
const HParser *p = h_bind(nat, k_test_bind, (void *)(uintptr_t)'a');
|
||||
|
||||
g_check_parse_match(p, be, "1a", 2, "u0x61");
|
||||
g_check_parse_match(p, be, "2b", 2, "u0x62");
|
||||
g_check_parse_match(p, be, "26z", 3, "u0x7a");
|
||||
g_check_parse_failed(p, be, "1x", 2);
|
||||
g_check_parse_failed(p, be, "29y", 3);
|
||||
g_check_parse_failed(p, be, "@", 1);
|
||||
g_check_parse_failed(p, be, "27{", 3);
|
||||
g_check_parse_failed(p, be, "272{", 4);
|
||||
}
|
||||
|
||||
void register_parser_tests(void) {
|
||||
g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token);
|
||||
g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch);
|
||||
|
|
@ -542,6 +650,9 @@ void register_parser_tests(void) {
|
|||
g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne);
|
||||
g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec);
|
||||
g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness);
|
||||
g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get);
|
||||
g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation);
|
||||
g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind);
|
||||
|
||||
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
|
||||
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
|
||||
|
|
|
|||
102
src/t_regression.c
Normal file
102
src/t_regression.c
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
#include <glib.h>
|
||||
#include <stdint.h>
|
||||
#include "glue.h"
|
||||
#include "hammer.h"
|
||||
#include "test_suite.h"
|
||||
#include "internal.h"
|
||||
|
||||
static void test_bug118(void) {
|
||||
// https://github.com/UpstandingHackers/hammer/issues/118
|
||||
// Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5
|
||||
|
||||
HParseResult* p;
|
||||
const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A";
|
||||
|
||||
#define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN)
|
||||
H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false)));
|
||||
H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false)));
|
||||
#undef MY_ENDIAN
|
||||
|
||||
H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL));
|
||||
|
||||
H_RULE(header_ok, h_sequence(nibble, nibble, NULL));
|
||||
H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL));
|
||||
|
||||
H_RULE(parser_ok, h_sequence(header_ok, samples, NULL));
|
||||
H_RULE(parser_weird, h_sequence(header_weird, samples, NULL));
|
||||
|
||||
|
||||
p = h_parse(parser_weird, input, 6);
|
||||
g_check_cmp_int32(p->bit_length, ==, 44);
|
||||
h_parse_result_free(p);
|
||||
p = h_parse(parser_ok, input, 6);
|
||||
g_check_cmp_int32(p->bit_length, ==, 40);
|
||||
h_parse_result_free(p);
|
||||
}
|
||||
|
||||
static void test_seq_index_path(void) {
|
||||
HArena *arena = h_new_arena(&system_allocator, 0);
|
||||
|
||||
HParsedToken *seq = h_make_seqn(arena, 1);
|
||||
HParsedToken *seq2 = h_make_seqn(arena, 2);
|
||||
HParsedToken *tok1 = h_make_uint(arena, 41);
|
||||
HParsedToken *tok2 = h_make_uint(arena, 42);
|
||||
|
||||
seq->seq->elements[0] = seq2;
|
||||
seq->seq->used = 1;
|
||||
seq2->seq->elements[0] = tok1;
|
||||
seq2->seq->elements[1] = tok2;
|
||||
seq2->seq->used = 2;
|
||||
|
||||
g_check_cmp_int(h_seq_index_path(seq, 0, -1)->token_type, ==, TT_SEQUENCE);
|
||||
g_check_cmp_int(h_seq_index_path(seq, 0, 0, -1)->token_type, ==, TT_UINT);
|
||||
g_check_cmp_int64(h_seq_index_path(seq, 0, 0, -1)->uint, ==, 41);
|
||||
g_check_cmp_int64(h_seq_index_path(seq, 0, 1, -1)->uint, ==, 42);
|
||||
}
|
||||
|
||||
#define MK_INPUT_STREAM(buf,len,endianness_) \
|
||||
{ \
|
||||
.input = (uint8_t*)buf, \
|
||||
.length = len, \
|
||||
.index = 0, \
|
||||
.bit_offset = 0, \
|
||||
.endianness = endianness_ \
|
||||
}
|
||||
|
||||
static void test_read_bits_48(void) {
|
||||
{
|
||||
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmp_int64(h_read_bits(&is, 32, false), ==, 0x78563412);
|
||||
g_check_cmp_int64(h_read_bits(&is, 16, false), ==, 0xBC9A);
|
||||
}
|
||||
{
|
||||
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmp_int64(h_read_bits(&is, 31, false), ==, 0x78563412);
|
||||
g_check_cmp_int64(h_read_bits(&is, 17, false), ==, 0x17934);
|
||||
}
|
||||
{
|
||||
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmp_int64(h_read_bits(&is, 33, false), ==, 0x78563412);
|
||||
g_check_cmp_int64(h_read_bits(&is, 17, false), ==, 0x5E4D);
|
||||
}
|
||||
{
|
||||
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmp_int64(h_read_bits(&is, 36, false), ==, 0xA78563412);
|
||||
g_check_cmp_int64(h_read_bits(&is, 12, false), ==, 0xBC9);
|
||||
}
|
||||
{
|
||||
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmp_int64(h_read_bits(&is, 40, false), ==, 0x9A78563412);
|
||||
g_check_cmp_int64(h_read_bits(&is, 8, false), ==, 0xBC);
|
||||
}
|
||||
{
|
||||
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmp_int64(h_read_bits(&is, 48, false), ==, 0xBC9A78563412);
|
||||
}
|
||||
}
|
||||
|
||||
void register_regression_tests(void) {
|
||||
g_test_add_func("/core/regression/bug118", test_bug118);
|
||||
g_test_add_func("/core/regression/seq_index_path", test_seq_index_path);
|
||||
g_test_add_func("/core/regression/read_bits_48", test_read_bits_48);
|
||||
}
|
||||
|
|
@ -25,6 +25,7 @@ extern void register_parser_tests();
|
|||
extern void register_grammar_tests();
|
||||
extern void register_misc_tests();
|
||||
extern void register_benchmark_tests();
|
||||
extern void register_regression_tests();
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
g_test_init(&argc, &argv, NULL);
|
||||
|
|
@ -35,6 +36,7 @@ int main(int argc, char** argv) {
|
|||
register_parser_tests();
|
||||
register_grammar_tests();
|
||||
register_misc_tests();
|
||||
register_regression_tests();
|
||||
if (g_test_slow() || g_test_perf())
|
||||
register_benchmark_tests();
|
||||
|
||||
|
|
|
|||
|
|
@ -212,6 +212,7 @@
|
|||
|
||||
|
||||
|
||||
#define g_check_cmp_int(n1, op, n2) g_check_inttype("%d", int, n1, op, n2)
|
||||
#define g_check_cmp_int32(n1, op, n2) g_check_inttype("%d", int32_t, n1, op, n2)
|
||||
#define g_check_cmp_int64(n1, op, n2) g_check_inttype("%" PRId64, int64_t, n1, op, n2)
|
||||
#define g_check_cmp_uint32(n1, op, n2) g_check_inttype("%u", uint32_t, n1, op, n2)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue