Merge remote branch 'upstream/master'

Conflicts:
	src/hammer.h
This commit is contained in:
aegis 2013-04-23 16:31:03 +02:00
commit f817211446
66 changed files with 5165 additions and 1679 deletions

View file

@ -25,31 +25,55 @@ PARSERS := \
attr_bool \
indirect
OUTPUTS := bitreader.o \
hammer.o \
bitwriter.o \
libhammer.a \
pprint.o \
allocator.o \
datastructures.o \
BACKENDS := \
packrat
HAMMER_PARTS := \
bitreader.o \
hammer.o \
bitwriter.o \
pprint.o \
allocator.o \
datastructures.o \
system_allocator.o \
benchmark.o \
compile.o \
glue.o \
$(PARSERS:%=parsers/%.o) \
$(BACKENDS:%=backends/%.o)
TESTS := t_benchmark.o \
t_bitreader.o \
t_bitwriter.o \
t_parser.o \
t_misc.o \
test_suite.o
OUTPUTS := libhammer.a \
test_suite.o \
test_suite \
$(PARSERS:%=parsers/%.o)
$(HAMMER_PARTS) \
$(TESTS)
TOPLEVEL := ../
include ../common.mk
$(TESTS): CFLAGS += $(TEST_CFLAGS)
$(TESTS): LDFLAGS += $(TEST_LDFLAGS)
all: libhammer.a test_suite
all: libhammer.a
test_suite: test_suite.o libhammer.a
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitwriter.o \
$(PARSERS:%=parsers/%.o)
libhammer.a: $(HAMMER_PARTS)
bitreader.o: test_suite.h
hammer.o: hammer.h
glue.o: hammer.h glue.h
all: libhammer.a
test: test_suite
./test_suite -v
test_suite: $(TESTS) libhammer.a
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) $(TEST_LDFLAGS)

View file

@ -15,11 +15,13 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <glib.h>
#include <string.h>
#include <stdint.h>
#include <sys/types.h>
#include "allocator.h"
#include "hammer.h"
#include "internal.h"
struct arena_link {
// TODO:
@ -36,22 +38,25 @@ struct arena_link {
struct HArena_ {
struct arena_link *head;
struct HAllocator_ *mm__;
size_t block_size;
size_t used;
size_t wasted;
};
HArena *h_new_arena(size_t block_size) {
HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
if (block_size == 0)
block_size = 4096;
struct HArena_ *ret = g_new(struct HArena_, 1);
struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + block_size);
struct HArena_ *ret = h_new(struct HArena_, 1);
struct arena_link *link = (struct arena_link*)mm__->alloc(mm__, sizeof(struct arena_link) + block_size);
memset(link, 0, sizeof(struct arena_link) + block_size);
link->free = block_size;
link->used = 0;
link->next = NULL;
ret->head = link;
ret->block_size = block_size;
ret->used = 0;
ret->mm__ = mm__;
ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size;
return ret;
}
@ -70,13 +75,15 @@ void* h_arena_malloc(HArena *arena, size_t size) {
// This involves some annoying casting...
arena->used += size;
arena->wasted += sizeof(struct arena_link*);
void* link = g_malloc(size + sizeof(struct arena_link*));
void* link = arena->mm__->alloc(arena->mm__, size + sizeof(struct arena_link*));
memset(link, 0, size + sizeof(struct arena_link*));
*(struct arena_link**)link = arena->head->next;
arena->head->next = (struct arena_link*)link;
return (void*)(((uint8_t*)link) + sizeof(struct arena_link*));
} else {
// we just need to allocate an ordinary new block.
struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + arena->block_size);
struct arena_link *link = (struct arena_link*)arena->mm__->alloc(arena->mm__, sizeof(struct arena_link) + arena->block_size);
memset(link, 0, sizeof(struct arena_link) + arena->block_size);
link->free = arena->block_size - size;
link->used = size;
link->next = arena->head;
@ -86,18 +93,23 @@ void* h_arena_malloc(HArena *arena, size_t size) {
return link->rest;
}
}
void h_arena_free(HArena *arena, void* ptr) {
// To be used later...
}
void h_delete_arena(HArena *arena) {
HAllocator *mm__ = arena->mm__;
struct arena_link *link = arena->head;
while (link) {
struct arena_link *next = link->next;
// Even in the case of a special block, without the full arena
// header, this is correct, because the next pointer is the first
// in the structure.
g_free(link);
h_free(link);
link = next;
}
g_free(arena);
h_free(arena);
}
void h_allocator_stats(HArena *arena, HArenaStats *stats) {

View file

@ -19,10 +19,17 @@
#define HAMMER_ALLOCATOR__H__
#include <sys/types.h>
typedef struct HAllocator_ {
void* (*alloc)(struct HAllocator_* allocator, size_t size);
void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size);
void (*free)(struct HAllocator_* allocator, void* ptr);
} HAllocator;
typedef struct HArena_ HArena ; // hidden implementation
HArena *h_new_arena(size_t block_size); // pass 0 for default...
HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default...
void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) ));
void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers.
void h_delete_arena(HArena *arena);
typedef struct {

209
src/backends/packrat.c Normal file
View file

@ -0,0 +1,209 @@
#include <assert.h>
#include "../internal.h"
#include "../parsers/parser_internal.h"
// short-hand for constructing HCachedResult's
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
HCachedResult *ret = a_new(HCachedResult, 1);
ret->result = result;
ret->input_stream = state->input_stream;
return ret;
}
// Really library-internal tool to perform an uncached parse, and handle any common error-handling.
static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) {
// TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when.
HParseResult *tmp_res;
if (parser) {
HInputStream bak = state->input_stream;
tmp_res = parser->vtable->parse(parser->env, state);
if (tmp_res) {
tmp_res->arena = state->arena;
if (!state->input_stream.overrun) {
tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3);
if (state->input_stream.endianness & BIT_BIG_ENDIAN)
tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset;
else
tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset;
} else
tmp_res->bit_length = 0;
}
} else
tmp_res = NULL;
if (state->input_stream.overrun)
return NULL; // overrun is always failure.
#ifdef CONSISTENCY_CHECK
if (!tmp_res) {
state->input_stream = INVALID;
state->input_stream.input = key->input_pos.input;
}
#endif
return tmp_res;
}
HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
HRecursionHead *head = h_hashtable_get(state->recursion_heads, k);
if (!head) { // No heads found
return cached;
} else { // Some heads found
if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) {
// Nothing in the cache, and the key parser is not involved
HParseResult *tmp = a_new(HParseResult, 1);
tmp->ast = NULL; tmp->arena = state->arena;
HParserCacheValue *ret = a_new(HParserCacheValue, 1);
ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp);
return ret;
}
if (h_slist_find(head->eval_set, k->parser)) {
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
head->eval_set = h_slist_remove_all(head->eval_set, k->parser);
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
// we know that cached has an entry here, modify it
if (!cached)
cached = a_new(HParserCacheValue, 1);
cached->value_type = PC_RIGHT;
cached->right = cached_result(state, tmp_res);
}
return cached;
}
}
/* Setting up the left recursion. We have the LR for the rule head;
* we modify the involved_sets of all LRs in the stack, until we
* see the current parser again.
*/
void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
if (!rec_detect->head) {
HRecursionHead *some = a_new(HRecursionHead, 1);
some->head_parser = p;
some->involved_set = h_slist_new(state->arena);
some->eval_set = NULL;
rec_detect->head = some;
}
assert(state->lr_stack->head != NULL);
HSlistNode *head = state->lr_stack->head;
HLeftRec *lr;
while (head && (lr = head->elem)->rule != p) {
lr->head = rec_detect->head;
h_slist_push(lr->head->involved_set, (void*)lr->rule);
head = head->next;
}
}
/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the
* future parse.
*/
HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) {
// Store the head into the recursion_heads
h_hashtable_put(state->recursion_heads, k, head);
HParserCacheValue *old_cached = h_hashtable_get(state->cache, k);
if (!old_cached || PC_LEFT == old_cached->value_type)
errx(1, "impossible match");
HParseResult *old_res = old_cached->right->result;
// reset the eval_set of the head of the recursion at each beginning of growth
head->eval_set = h_slist_copy(head->involved_set);
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
if (tmp_res) {
if ((old_res->ast->index < tmp_res->ast->index) ||
(old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) {
HParserCacheValue *v = a_new(HParserCacheValue, 1);
v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res);
h_hashtable_put(state->cache, k, v);
return grow(k, state, head);
} else {
// we're done with growing, we can remove data from the recursion head
h_hashtable_del(state->recursion_heads, k);
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
if (cached && PC_RIGHT == cached->value_type) {
return cached->right->result;
} else {
errx(1, "impossible match");
}
}
} else {
h_hashtable_del(state->recursion_heads, k);
return old_res;
}
}
HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growable) {
if (growable->head) {
if (growable->head->head_parser != k->parser) {
// not the head rule, so not growing
return growable->seed;
}
else {
// update cache
HParserCacheValue *v = a_new(HParserCacheValue, 1);
v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed);
h_hashtable_put(state->cache, k, v);
if (!growable->seed)
return NULL;
else
return grow(k, state, growable->head);
}
} else {
errx(1, "lrAnswer with no head");
}
}
/* Warth's recursion. Hi Alessandro! */
HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
HParserCacheKey *key = a_new(HParserCacheKey, 1);
key->input_pos = state->input_stream; key->parser = parser;
HParserCacheValue *m = recall(key, state);
// check to see if there is already a result for this object...
if (!m) {
// It doesn't exist, so create a dummy result to cache
HLeftRec *base = a_new(HLeftRec, 1);
base->seed = NULL; base->rule = parser; base->head = NULL;
h_slist_push(state->lr_stack, base);
// cache it
HParserCacheValue *dummy = a_new(HParserCacheValue, 1);
dummy->value_type = PC_LEFT; dummy->left = base;
h_hashtable_put(state->cache, key, dummy);
// parse the input
HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
// the base variable has passed equality tests with the cache
h_slist_pop(state->lr_stack);
// setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
if (NULL == base->head) {
HParserCacheValue *right = a_new(HParserCacheValue, 1);
right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res);
h_hashtable_put(state->cache, key, right);
return tmp_res;
} else {
base->seed = tmp_res;
HParseResult *res = lr_answer(key, state, base);
return res;
}
} else {
// it exists!
if (PC_LEFT == m->value_type) {
setupLR(parser, state, m->left);
return m->left->seed; // BUG: this might not be correct
} else {
state->input_stream = m->right->input_stream;
return m->right->result;
}
}
}
int h_packrat_compile(HAllocator* mm__, const HParser* parser, const void* params) {
return 0; // No compilation necessary, and everything should work
// out of the box.
}
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) {
return h_do_parse(parser, parse_state);
}
HParserBackendVTable h__packrat_backend_vtable = {
.compile = h_packrat_compile,
.parse = h_packrat_parse
};

114
src/benchmark.c Normal file
View file

@ -0,0 +1,114 @@
#include <stdio.h>
#include <time.h>
#include <string.h>
#include "hammer.h"
#include "internal.h"
/*
Usage:
Create your parser (i.e., const HParser*), and an array of test cases
(i.e., HParserTestcase[], terminated by { NULL, 0, NULL }) and then call
HBenchmarkResults* results = h_benchmark(parser, testcases);
Then, you can format a report with:
h_benchmark_report(stdout, results);
or just generate code to make the parser run as fast as possible with:
h_benchmark_dump_optimized_code(stdout, results);
*/
HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases) {
return h_benchmark__m(&system_allocator, parser, testcases);
}
HBenchmarkResults *h_benchmark__m(HAllocator* mm__, const HParser* parser, HParserTestcase* testcases) {
// For now, just output the results to stderr
HParserTestcase* tc = testcases;
HParserBackend backend = PB_MIN;
HBenchmarkResults *ret = h_new(HBenchmarkResults, 1);
ret->len = PB_MAX-PB_MIN;
ret->results = h_new(HBackendResults, ret->len);
for (backend = PB_MIN; backend < PB_MAX; backend++) {
ret->results[backend].backend = backend;
// Step 1: Compile grammar for given parser...
if (h_compile(parser, PB_MIN, NULL) == -1) {
// backend inappropriate for grammar...
fprintf(stderr, "failed\n");
ret->results[backend].compile_success = false;
ret->results[backend].n_testcases = 0;
ret->results[backend].failed_testcases = 0;
ret->results[backend].cases = NULL;
continue;
}
ret->results[backend].compile_success = true;
int tc_failed = 0;
// Step 1: verify all test cases.
ret->results[backend].n_testcases = 0;
ret->results[backend].failed_testcases = 0;
for (tc = testcases; tc->input != NULL; tc++) {
ret->results[backend].n_testcases++;
HParseResult *res = h_parse(parser, tc->input, tc->length);
char* res_unamb;
if (res != NULL) {
res_unamb = h_write_result_unamb(res->ast);
} else
res_unamb = NULL;
if ((res_unamb == NULL && tc->output_unambiguous == NULL)
|| (strcmp(res_unamb, tc->output_unambiguous) != 0)) {
// test case failed...
fprintf(stderr, "failed\n");
// We want to run all testcases, for purposes of generating a
// report. (eg, if users are trying to fix a grammar for a
// faster backend)
tc_failed++;
ret->results[backend].failed_testcases++;
}
h_parse_result_free(res);
}
if (tc_failed > 0) {
// Can't use this parser; skip to the next
fprintf(stderr, "Backend failed testcases; skipping benchmark\n");
continue;
}
ret->results[backend].cases = h_new(HCaseResult, ret->results[backend].n_testcases);
size_t cur_case = 0;
for (tc = testcases; tc->input != NULL; tc++) {
// The goal is to run each testcase for at least 50ms each
// TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer)
int count = 1, cur;
struct timespec ts_start, ts_end;
long long time_diff;
do {
count *= 2; // Yes, this means that the first run will run the function twice. This is fine, as we want multiple runs anyway.
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_start);
for (cur = 0; cur < count; cur++) {
h_parse_result_free(h_parse(parser, tc->input, tc->length));
}
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_end);
// time_diff is in ns
time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec);
} while (time_diff < 100000000);
ret->results[backend].cases[cur_case].parse_time = (time_diff / count);
cur_case++;
}
}
return ret;
}
void h_benchmark_report(FILE* stream, HBenchmarkResults* result) {
for (size_t i=0; i<result->len; ++i) {
fprintf(stream, "Backend %ld ... \n", i);
for (size_t j=0; j<result->results[i].n_testcases; ++j) {
fprintf(stream, "Case %ld: %ld ns/parse\n", j, result->results[i].cases[j].parse_time);
}
}
}

View file

@ -108,70 +108,3 @@ long long h_read_bits(HInputStream* state, int count, char signed_p) {
out <<= final_shift;
return (out ^ msb) - msb; // perform sign extension
}
#ifdef INCLUDE_TESTS
#define MK_INPUT_STREAM(buf,len,endianness_) \
{ \
.input = (uint8_t*)buf, \
.length = len, \
.index = 0, \
.bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \
.endianness = endianness_ \
}
static void test_bitreader_ints(void) {
HInputStream is = MK_INPUT_STREAM("\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 8, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
g_check_cmplong(h_read_bits(&is, 64, true), ==, -0x200000000);
}
static void test_bitreader_be(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x03);
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x52);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
}
static void test_bitreader_le(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x02);
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x4D);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
}
static void test_largebits_be(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x352);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
}
static void test_largebits_le(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x26A);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
}
static void test_offset_largebits_be(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xD);
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x25A);
}
static void test_offset_largebits_le(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xA);
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x2D3);
}
void register_bitreader_tests(void) {
g_test_add_func("/core/bitreader/be", test_bitreader_be);
g_test_add_func("/core/bitreader/le", test_bitreader_le);
g_test_add_func("/core/bitreader/largebits-be", test_largebits_be);
g_test_add_func("/core/bitreader/largebits-le", test_largebits_le);
g_test_add_func("/core/bitreader/offset-largebits-be", test_offset_largebits_be);
g_test_add_func("/core/bitreader/offset-largebits-le", test_offset_largebits_le);
g_test_add_func("/core/bitreader/ints", test_bitreader_ints);
}
#endif // #ifdef INCLUDE_TESTS

View file

@ -4,22 +4,16 @@
#include "internal.h"
#include "test_suite.h"
// This file provides the logical inverse of bitreader.c
struct HBitWriter_ {
uint8_t* buf;
size_t index;
size_t capacity;
char bit_offset; // unlike in bit_reader, this is always the number
// of used bits in the current byte. i.e., 0 always
// means that 8 bits are available for use.
char flags;
};
#define MIN(a,b) (((a)<(b))?(a):(b))
#define MAX(a,b) (((a)>(b))?(a):(b))
// h_bit_writer_
HBitWriter *h_bit_writer_new() {
HBitWriter *writer = g_new0(HBitWriter, 1);
writer->buf = g_malloc0(writer->capacity = 8);
HBitWriter *h_bit_writer_new(HAllocator* mm__) {
HBitWriter *writer = h_new(HBitWriter, 1);
memset(writer, 0, sizeof(*writer));
writer->buf = mm__->alloc(mm__, writer->capacity = 8);
memset(writer->buf, 0, writer->capacity);
writer->mm__ = mm__;
writer->flags = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN;
return writer;
@ -41,7 +35,7 @@ static void h_bit_writer_reserve(HBitWriter* w, size_t nbits) {
int nbytes = (nbits + 7) / 8 + ((w->bit_offset != 0) ? 1 : 0);
size_t old_capacity = w->capacity;
while (w->index + nbytes >= w->capacity) {
w->buf = g_realloc(w->buf, w->capacity *= 2);
w->buf = w->mm__->realloc(w->mm__, w->buf, w->capacity *= 2);
}
if (old_capacity != w->capacity)
@ -100,114 +94,7 @@ const uint8_t *h_bit_writer_get_buffer(HBitWriter* w, size_t *len) {
}
void h_bit_writer_free(HBitWriter* w) {
g_free(w->buf);
g_free(w);
HAllocator *mm__ = w->mm__;
h_free(w->buf);
h_free(w);
}
#ifdef INCLUDE_TESTS
// TESTS BELOW HERE
typedef struct {
unsigned long long data;
size_t nbits;
} bitwriter_test_elem; // should end with {0,0}
void run_bitwriter_test(bitwriter_test_elem data[], char flags) {
size_t len;
const uint8_t *buf;
HBitWriter *w = h_bit_writer_new();
int i;
w->flags = flags;
for (i = 0; data[i].nbits; i++) {
h_bit_writer_put(w, data[i].data, data[i].nbits);
}
buf = h_bit_writer_get_buffer(w, &len);
HInputStream input = {
.input = buf,
.index = 0,
.length = len,
.bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0,
.endianness = flags,
.overrun = 0
};
for (i = 0; data[i].nbits; i++) {
g_check_cmpulonglong ((unsigned long long)h_read_bits(&input, data[i].nbits, FALSE), ==, data[i].data);
}
}
static void test_bitwriter_ints(void) {
bitwriter_test_elem data[] = {
{ -0x200000000, 64 },
{ 0,0 }
};
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
}
static void test_bitwriter_be(void) {
bitwriter_test_elem data[] = {
{ 0x03, 3 },
{ 0x52, 8 },
{ 0x1A, 5 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
}
static void test_bitwriter_le(void) {
bitwriter_test_elem data[] = {
{ 0x02, 3 },
{ 0x4D, 8 },
{ 0x0B, 5 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
}
static void test_largebits_be(void) {
bitwriter_test_elem data[] = {
{ 0x352, 11 },
{ 0x1A, 5 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
}
static void test_largebits_le(void) {
bitwriter_test_elem data[] = {
{ 0x26A, 11 },
{ 0x0B, 5 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
}
static void test_offset_largebits_be(void) {
bitwriter_test_elem data[] = {
{ 0xD, 5 },
{ 0x25A, 11 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
}
static void test_offset_largebits_le(void) {
bitwriter_test_elem data[] = {
{ 0xA, 5 },
{ 0x2D3, 11 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
}
void register_bitwriter_tests(void) {
g_test_add_func("/core/bitwriter/be", test_bitwriter_be);
g_test_add_func("/core/bitwriter/le", test_bitwriter_le);
g_test_add_func("/core/bitwriter/largebits-be", test_largebits_be);
g_test_add_func("/core/bitwriter/largebits-le", test_largebits_le);
g_test_add_func("/core/bitwriter/offset-largebits-be", test_offset_largebits_be);
g_test_add_func("/core/bitwriter/offset-largebits-le", test_offset_largebits_le);
g_test_add_func("/core/bitwriter/ints", test_bitwriter_ints);
}
#endif // #ifdef INCLUDE_TESTS

15
src/compile.c Normal file
View file

@ -0,0 +1,15 @@
// This file contains functions related to managing multiple parse backends
#include "hammer.h"
#include "internal.h"
static HParserBackendVTable *backends[PB_MAX] = {
&h__packrat_backend_vtable,
};
int h_compile(const HParser* parser, HParserBackend backend, const void* params) {
return h_compile__m(&system_allocator, parser, backend, params);
}
int h_compile__m(HAllocator* mm__, const HParser* parser, HParserBackend backend, const void* params) {
return backends[backend]->compile(mm__, parser, params);
}

View file

@ -2,7 +2,8 @@
#include "hammer.h"
#include "allocator.h"
#include <assert.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
// {{{ counted arrays
@ -15,6 +16,7 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size) {
ret->elements = h_arena_malloc(arena, sizeof(void*) * size);
return ret;
}
HCountedArray *h_carray_new(HArena * arena) {
return h_carray_new_sized(arena, 4);
}
@ -30,3 +32,213 @@ void h_carray_append(HCountedArray *array, void* item) {
}
array->elements[array->used++] = item;
}
// HSlist
HSlist* h_slist_new(HArena *arena) {
HSlist *ret = h_arena_malloc(arena, sizeof(HSlist));
ret->head = NULL;
ret->arena = arena;
return ret;
}
HSlist* h_slist_copy(HSlist *slist) {
HSlist *ret = h_slist_new(slist->arena);
HSlistNode *head = slist->head;
HSlistNode *tail;
if (head != NULL) {
h_slist_push(ret, head->elem);
tail = ret->head;
head = head->next;
}
while (head != NULL) {
// append head item to tail in a new node
HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode));
node->elem = head->elem;
node->next = NULL;
tail = tail->next = node;
head = head->next;
}
return ret;
}
void* h_slist_pop(HSlist *slist) {
HSlistNode *head = slist->head;
if (!head)
return NULL;
void* ret = head->elem;
slist->head = head->next;
h_arena_free(slist->arena, head);
return ret;
}
void h_slist_push(HSlist *slist, void* item) {
HSlistNode *hnode = h_arena_malloc(slist->arena, sizeof(HSlistNode));
hnode->elem = item;
hnode->next = slist->head;
// write memory barrier here.
slist->head = hnode;
}
bool h_slist_find(HSlist *slist, const void* item) {
assert (item != NULL);
HSlistNode *head = slist->head;
while (head != NULL) {
if (head->elem == item)
return true;
head = head->next;
}
return false;
}
HSlist* h_slist_remove_all(HSlist *slist, const void* item) {
assert (item != NULL);
HSlistNode *node = slist->head;
HSlistNode *prev = NULL;
while (node != NULL) {
if (node->elem == item) {
HSlistNode *next = node->next;
if (prev)
prev->next = next;
else
slist->head = next;
// FIXME free the removed node! this leaks.
node = next;
}
else {
prev = node;
node = prev->next;
}
}
return slist;
}
void h_slist_free(HSlist *slist) {
while (slist->head != NULL)
h_slist_pop(slist);
h_arena_free(slist->arena, slist);
}
HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc) {
HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable));
ht->hashFunc = hashFunc;
ht->equalFunc = equalFunc;
ht->capacity = 64; // to start; should be tuned later...
ht->used = 0;
ht->arena = arena;
ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity);
for (size_t i = 0; i < ht->capacity; i++) {
ht->contents[i].key = NULL;
ht->contents[i].value = NULL;
ht->contents[i].next = NULL;
ht->contents[i].hashval = 0;
}
//memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity);
return ht;
}
void* h_hashtable_get(HHashTable* ht, void* key) {
HHashValue hashval = ht->hashFunc(key);
#ifdef CONSISTENCY_CHECK
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
#endif
HHashTableEntry *hte = NULL;
for (hte = &ht->contents[hashval & (ht->capacity - 1)];
hte != NULL;
hte = hte->next) {
if (hte->hashval != hashval)
continue;
if (ht->equalFunc(key, hte->key))
return hte->value;
}
return NULL;
}
void h_hashtable_put(HHashTable* ht, void* key, void* value) {
// # Start with a rebalancing
//h_hashtable_ensure_capacity(ht, ht->used + 1);
HHashValue hashval = ht->hashFunc(key);
#ifdef CONSISTENCY_CHECK
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
#endif
HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)];
if (hte->key != NULL) {
do {
if (hte->hashval == hashval && ht->equalFunc(key, hte->key))
goto insert_here;
if (hte->next != NULL)
hte = hte->next;
} while (hte->next != NULL);
// Add a new link...
assert (hte->next == NULL);
hte->next = h_arena_malloc(ht->arena, sizeof(HHashTableEntry));
hte = hte->next;
hte->next = NULL;
ht->used++;
} else
ht->used++;
insert_here:
hte->key = key;
hte->value = value;
hte->hashval = hashval;
}
int h_hashtable_present(HHashTable* ht, void* key) {
HHashValue hashval = ht->hashFunc(key);
#ifdef CONSISTENCY_CHECK
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
#endif
for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)];
hte != NULL;
hte = hte->next) {
if (hte->hashval != hashval)
continue;
if (ht->equalFunc(key, hte->key))
return true;
}
return false;
}
void h_hashtable_del(HHashTable* ht, void* key) {
HHashValue hashval = ht->hashFunc(key);
#ifdef CONSISTENCY_CHECK
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
#endif
for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)];
hte != NULL;
hte = hte->next) {
if (hte->hashval != hashval)
continue;
if (ht->equalFunc(key, hte->key)) {
// FIXME: Leaks keys and values.
HHashTableEntry* hten = hte->next;
if (hten != NULL) {
*hte = *hten;
h_arena_free(ht->arena, hten);
} else {
hte->key = hte->value = NULL;
hte->hashval = 0;
}
return;
}
}
}
void h_hashtable_free(HHashTable* ht) {
for (size_t i = 0; i < ht->capacity; i++) {
HHashTableEntry *hten, *hte = &ht->contents[i];
// FIXME: Free key and value
hte = hte->next;
while (hte != NULL) {
// FIXME: leaks keys and values.
hten = hte->next;
h_arena_free(ht->arena, hte);
hte = hten;
}
}
h_arena_free(ht->arena, ht->contents);
}

177
src/glue.c Normal file
View file

@ -0,0 +1,177 @@
#include "glue.h"
#include "../src/internal.h" // for h_carray_*
// The action equivalent of h_ignore.
const HParsedToken *h_act_ignore(const HParseResult *p)
{
return NULL;
}
// Helper to build HAction's that pick one index out of a sequence.
const HParsedToken *h_act_index(int i, const HParseResult *p)
{
if(!p) return NULL;
const HParsedToken *tok = p->ast;
if(!tok || tok->token_type != TT_SEQUENCE)
return NULL;
const HCountedArray *seq = tok->seq;
size_t n = seq->used;
if(i<0 || (size_t)i>=n)
return NULL;
else
return tok->seq->elements[i];
}
// Action version of h_seq_flatten.
const HParsedToken *h_act_flatten(const HParseResult *p) {
return h_seq_flatten(p->arena, p->ast);
}
// Low-level helper for the h_make family.
HParsedToken *h_make_(HArena *arena, HTokenType type)
{
HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken));
ret->token_type = type;
return ret;
}
HParsedToken *h_make(HArena *arena, HTokenType type, void *value)
{
assert(type >= TT_USER);
HParsedToken *ret = h_make_(arena, type);
ret->user = value;
return ret;
}
HParsedToken *h_make_seq(HArena *arena)
{
HParsedToken *ret = h_make_(arena, TT_SEQUENCE);
ret->seq = h_carray_new(arena);
return ret;
}
HParsedToken *h_make_seqn(HArena *arena, size_t n)
{
HParsedToken *ret = h_make_(arena, TT_SEQUENCE);
ret->seq = h_carray_new_sized(arena, n);
return ret;
}
HParsedToken *h_make_bytes(HArena *arena, size_t len)
{
HParsedToken *ret = h_make_(arena, TT_BYTES);
ret->bytes.len = len;
ret->bytes.token = h_arena_malloc(arena, len);
return ret;
}
HParsedToken *h_make_sint(HArena *arena, int64_t val)
{
HParsedToken *ret = h_make_(arena, TT_SINT);
ret->sint = val;
return ret;
}
HParsedToken *h_make_uint(HArena *arena, uint64_t val)
{
HParsedToken *ret = h_make_(arena, TT_UINT);
ret->uint = val;
return ret;
}
// XXX -> internal
HParsedToken *h_carray_index(const HCountedArray *a, size_t i)
{
assert(i < a->used);
return a->elements[i];
}
size_t h_seq_len(const HParsedToken *p)
{
assert(p != NULL);
assert(p->token_type == TT_SEQUENCE);
return p->seq->used;
}
HParsedToken **h_seq_elements(const HParsedToken *p)
{
assert(p != NULL);
assert(p->token_type == TT_SEQUENCE);
return p->seq->elements;
}
HParsedToken *h_seq_index(const HParsedToken *p, size_t i)
{
assert(p != NULL);
assert(p->token_type == TT_SEQUENCE);
return h_carray_index(p->seq, i);
}
HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...)
{
va_list va;
va_start(va, i);
HParsedToken *ret = h_seq_index_vpath(p, i, va);
va_end(va);
return ret;
}
HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va)
{
HParsedToken *ret = h_seq_index(p, i);
int j;
while((j = va_arg(va, int)) >= 0)
ret = h_seq_index(p, j);
return ret;
}
void h_seq_snoc(HParsedToken *xs, const HParsedToken *x)
{
assert(xs != NULL);
assert(xs->token_type == TT_SEQUENCE);
h_carray_append(xs->seq, (HParsedToken *)x);
}
void h_seq_append(HParsedToken *xs, const HParsedToken *ys)
{
assert(xs != NULL);
assert(xs->token_type == TT_SEQUENCE);
assert(ys != NULL);
assert(ys->token_type == TT_SEQUENCE);
for(size_t i=0; i<ys->seq->used; i++)
h_carray_append(xs->seq, ys->seq->elements[i]);
}
// Flatten nested sequences. Always returns a sequence.
// If input element is not a sequence, returns it as a singleton sequence.
const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p)
{
assert(p != NULL);
HParsedToken *ret = h_make_seq(arena);
switch(p->token_type) {
case TT_SEQUENCE:
// Flatten and append all.
for(size_t i; i<p->seq->used; i++) {
h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i)));
}
break;
default:
// Make singleton sequence.
h_seq_snoc(ret, p);
break;
}
return ret;
}

253
src/glue.h Normal file
View file

@ -0,0 +1,253 @@
//
// API additions for writing grammar and semantic actions more concisely
//
//
// Quick Overview:
//
// Grammars can be succinctly specified with the family of H_RULE macros.
// H_RULE defines a plain parser variable. H_ARULE additionally attaches a
// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE
// combine both.
//
// A few standard semantic actions are defined below. The H_ACT_APPLY macro
// allows semantic actions to be defined by "partial application" of
// a generic action to fixed paramters.
//
// The definition of more complex semantic actions will usually consist of
// extracting data from the given parse tree and constructing a token of custom
// type to represent the result. A number of functions and convenience macros
// are provided to capture the most common cases and idioms.
//
// See the leading comment blocks on the sections below for more details.
//
#ifndef HAMMER_GLUE__H
#define HAMMER_GLUE__H
#include <assert.h>
#include "hammer.h"
//
// Grammar specification
//
// H_RULE is simply a short-hand for the typical declaration and definition of
// a parser variable. See its plain definition below. The goal is to save
// horizontal space as well as to provide a clear and unified look together with
// the other macro variants that stays close to an abstract PEG or BNF grammar.
// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their
// combinations as they allow the definition of syntax to be given without
// intermingling it with the semantic specifications.
//
// H_ARULE defines a variable just like H_RULE but attaches a semantic action
// to the result of the parser via h_action. The action is expected to be
// named act_<rulename>.
//
// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool.
// The validation is expected to be named validate_<rulename>.
//
// H_VARULE combines H_RULE with both an action and a validation. The action is
// attached before the validation, i.e. the validation receives as input the
// result of the action.
//
// H_AVRULE is like H_VARULE but the action is attached outside the validation,
// i.e. the validation receives the uninterpreted AST as input.
//
#define H_RULE(rule, def) const HParser *rule = def
#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule)
#define H_VRULE(rule, def) const HParser *rule = \
h_attr_bool(def, validate_ ## rule)
#define H_VARULE(rule, def) const HParser *rule = \
h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule)
#define H_AVRULE(rule, def) const HParser *rule = \
h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule)
//
// Pre-fab semantic actions
//
// A collection of generally useful semantic actions is provided.
//
// h_act_ignore is the action equivalent of the parser combinator h_ignore. It
// simply causes the AST it is applied to to be replaced with NULL. This most
// importantly causes it to be elided from the result of a surrounding
// h_sequence.
//
// h_act_index is of note as it is not itself suitable to be passed to
// h_action. It is parameterized by an index to be picked from a sequence
// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY
// macro provides a concise way to define such a parameter-application wrapper.
//
// h_act_flatten acts on a token of possibly nested sequences by recursively
// flattening it into a single sequence. Cf. h_seq_flatten below.
//
// H_ACT_APPLY implements "partial application" for semantic actions. It
// defines a new action that supplies given parameters to a parameterized
// action such as h_act_index.
//
const HParsedToken *h_act_ignore(const HParseResult *p);
const HParsedToken *h_act_index(int i, const HParseResult *p);
const HParsedToken *h_act_flatten(const HParseResult *p);
// Define 'myaction' as a specialization of 'paction' by supplying the leading
// parameters.
#define H_ACT_APPLY(myaction, paction, ...) \
const HParsedToken *myaction(const HParseResult *p) { \
return paction(__VA_ARGS__, p); \
}
//
// Working with HParsedTokens
//
// The type HParsedToken represents a dynamically-typed universe of values.
// Declared below are constructors to turn ordinary values into their
// HParsedToken equivalents, extractors to retrieve the original values from
// inside an HParsedToken, and functions that inspect and modify tokens of
// sequence type directly.
//
// In addition, there are a number of short-hand macros that work with some
// conventions to eliminate common boilerplate. These conventions are listed
// below. Be sure to follow them if you want to use the respective macros.
//
// * The single argument to semantic actions should be called 'p'.
//
// The H_MAKE macros suppy 'p->arena' to their underlying h_make
// counterparts. The H_FIELD macros supply 'p->ast' to their underlying
// H_INDEX counterparts.
//
// * For each custom token type, there should be a typedef for the
// corresponding value type.
//
// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to
// a pointer to the given type.
//
// * For each custom token type, say 'foo_t', there must be an integer
// constant 'TT_foo_t' to identify the token type. This constant must have a
// value greater or equal than TT_USER.
//
// One idiom is to define an enum for all custom token types and to assign a
// value of TT_USER to the first element. This can be viewed as extending
// the HTokenType enum.
//
// The H_MAKE and H_ASSERT macros derive the name of the token type constant
// from the given type name.
//
//
// The H_ALLOC macro is useful for allocating values of custom token types.
//
// The H_MAKE family of macros construct tokens of a given type. The native
// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ.
// The form with no suffix is used for custom token types. This convention is
// also used for other macro and function families.
//
// The H_ASSERT family simply asserts that a given token has the expected type.
// It mainly serves as an implementation aid for H_CAST. Of note in that regard
// is that, unlike the standard 'assert' macro, these form _expressions_ that
// return the value of their token argument; thus they can be used in a
// "pass-through" fashion inside other expressions.
//
// The H_CAST family combines a type assertion with access to the
// statically-typed value inside a token.
//
// A number of functions h_seq_* operate on and inspect sequence tokens.
// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence.
// Therefore there are h_seq_snoc and h_seq_append to build up sequences.
//
// The macro families H_FIELD and H_INDEX combine index access on a sequence
// with a cast to the appropriate result type. H_FIELD is used to access the
// elements of the argument token 'p' in an action. H_INDEX allows any sequence
// token to be specified. Both macro families take an arbitrary number of index
// arguments, giving access to elements in nested sequences by path.
// These macros are very useful to avoid spaghetti chains of unchecked pointer
// dereferences.
//
// Standard short-hand for arena-allocating a variable in a semantic action.
#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP)))
// Token constructors...
HParsedToken *h_make(HArena *arena, HTokenType type, void *value);
HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence.
HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n.
HParsedToken *h_make_bytes(HArena *arena, size_t len);
HParsedToken *h_make_sint(HArena *arena, int64_t val);
HParsedToken *h_make_uint(HArena *arena, uint64_t val);
// Standard short-hands to make tokens in an action.
#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL)
#define H_MAKE_SEQ() h_make_seq(p->arena)
#define H_MAKE_SEQN(N) h_make_seqn(p->arena, N)
#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN)
#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL)
#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL)
// Extract (cast) type-specific value back from HParsedTokens...
// Pass-through assertion that a given token has the expected type.
#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P)
// Convenience short-hand forms of h_assert_type.
#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK)
#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK)
#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK)
#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK)
#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK)
// Assert expected type and return contained value.
#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user)
#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq)
#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes)
#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint)
#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint)
// Sequence access...
// Return the length of a sequence.
size_t h_seq_len(const HParsedToken *p);
// Access a sequence's element array.
HParsedToken **h_seq_elements(const HParsedToken *p);
// Access a sequence element by index.
HParsedToken *h_seq_index(const HParsedToken *p, size_t i);
// Access an element in a nested sequence by a path of indices.
HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...);
HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
// Convenience macros combining (nested) index access and h_cast.
#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1)
// Standard short-hand to access and cast elements on a sequence token.
#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__)
#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__)
#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__)
#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__)
#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__)
// Lower-level helper for h_seq_index.
HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal
// Sequence modification...
// Add elements to a sequence.
void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one
void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many
// XXX TODO: Remove elements from a sequence.
// Flatten nested sequences into one.
const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p);
#endif

View file

@ -17,7 +17,7 @@
#include <assert.h>
#include <ctype.h>
#include <error.h>
#include <err.h>
#include <limits.h>
#include <stdarg.h>
#include <string.h>
@ -26,202 +26,14 @@
#include "allocator.h"
#include "parsers/parser_internal.h"
static guint djbhash(const uint8_t *buf, size_t len) {
guint hash = 5381;
static uint32_t djbhash(const uint8_t *buf, size_t len) {
uint32_t hash = 5381;
while (len--) {
hash = hash * 33 + *buf++;
}
return hash;
}
// short-hand for constructing HCachedResult's
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
HCachedResult *ret = a_new(HCachedResult, 1);
ret->result = result;
ret->input_stream = state->input_stream;
return ret;
}
// Really library-internal tool to perform an uncached parse, and handle any common error-handling.
static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) {
// TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when.
HParseResult *tmp_res;
if (parser) {
HInputStream bak = state->input_stream;
tmp_res = parser->vtable->parse(parser->env, state);
if (tmp_res) {
tmp_res->arena = state->arena;
if (!state->input_stream.overrun) {
tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3);
if (state->input_stream.endianness & BIT_BIG_ENDIAN)
tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset;
else
tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset;
} else
tmp_res->bit_length = 0;
}
} else
tmp_res = NULL;
if (state->input_stream.overrun)
return NULL; // overrun is always failure.
#ifdef CONSISTENCY_CHECK
if (!tmp_res) {
state->input_stream = INVALID;
state->input_stream.input = key->input_pos.input;
}
#endif
return tmp_res;
}
HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
HParserCacheValue *cached = g_hash_table_lookup(state->cache, k);
HRecursionHead *head = g_hash_table_lookup(state->recursion_heads, k);
if (!head) { // No heads found
return cached;
} else { // Some heads found
if (!cached && head->head_parser != k->parser && !g_slist_find(head->involved_set, k->parser)) {
// Nothing in the cache, and the key parser is not involved
HParseResult *tmp = a_new(HParseResult, 1);
tmp->ast = NULL; tmp->arena = state->arena;
HParserCacheValue *ret = a_new(HParserCacheValue, 1);
ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp);
return ret;
}
if (g_slist_find(head->eval_set, k->parser)) {
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
head->eval_set = g_slist_remove_all(head->eval_set, k->parser);
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
// we know that cached has an entry here, modify it
if (!cached)
cached = a_new(HParserCacheValue, 1);
cached->value_type = PC_RIGHT;
cached->right = cached_result(state, tmp_res);
}
return cached;
}
}
/* Setting up the left recursion. We have the LR for the rule head;
* we modify the involved_sets of all LRs in the stack, until we
* see the current parser again.
*/
void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
if (!rec_detect->head) {
HRecursionHead *some = a_new(HRecursionHead, 1);
some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL;
rec_detect->head = some;
}
size_t i = 0;
HLeftRec *lr = g_queue_peek_nth(state->lr_stack, i);
while (lr && lr->rule != p) {
lr->head = rec_detect->head;
lr->head->involved_set = g_slist_prepend(lr->head->involved_set, (gpointer)lr->rule);
}
}
/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the
* future parse.
*/
HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) {
// Store the head into the recursion_heads
g_hash_table_replace(state->recursion_heads, k, head);
HParserCacheValue *old_cached = g_hash_table_lookup(state->cache, k);
if (!old_cached || PC_LEFT == old_cached->value_type)
errx(1, "impossible match");
HParseResult *old_res = old_cached->right->result;
// reset the eval_set of the head of the recursion at each beginning of growth
head->eval_set = head->involved_set;
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
if (tmp_res) {
if ((old_res->ast->index < tmp_res->ast->index) ||
(old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) {
HParserCacheValue *v = a_new(HParserCacheValue, 1);
v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res);
g_hash_table_replace(state->cache, k, v);
return grow(k, state, head);
} else {
// we're done with growing, we can remove data from the recursion head
g_hash_table_remove(state->recursion_heads, k);
HParserCacheValue *cached = g_hash_table_lookup(state->cache, k);
if (cached && PC_RIGHT == cached->value_type) {
return cached->right->result;
} else {
errx(1, "impossible match");
}
}
} else {
g_hash_table_remove(state->recursion_heads, k);
return old_res;
}
}
HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growable) {
if (growable->head) {
if (growable->head->head_parser != k->parser) {
// not the head rule, so not growing
return growable->seed;
}
else {
// update cache
HParserCacheValue *v = a_new(HParserCacheValue, 1);
v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed);
g_hash_table_replace(state->cache, k, v);
if (!growable->seed)
return NULL;
else
return grow(k, state, growable->head);
}
} else {
errx(1, "lrAnswer with no head");
}
}
/* Warth's recursion. Hi Alessandro! */
HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
HParserCacheKey *key = a_new(HParserCacheKey, 1);
key->input_pos = state->input_stream; key->parser = parser;
HParserCacheValue *m = recall(key, state);
// check to see if there is already a result for this object...
if (!m) {
// It doesn't exist, so create a dummy result to cache
HLeftRec *base = a_new(HLeftRec, 1);
base->seed = NULL; base->rule = parser; base->head = NULL;
g_queue_push_head(state->lr_stack, base);
// cache it
HParserCacheValue *dummy = a_new(HParserCacheValue, 1);
dummy->value_type = PC_LEFT; dummy->left = base;
g_hash_table_replace(state->cache, key, dummy);
// parse the input
HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
// the base variable has passed equality tests with the cache
g_queue_pop_head(state->lr_stack);
// setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
if (NULL == base->head) {
HParserCacheValue *right = a_new(HParserCacheValue, 1);
right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res);
g_hash_table_replace(state->cache, key, right);
return tmp_res;
} else {
base->seed = tmp_res;
HParseResult *res = lr_answer(key, state, base);
return res;
}
} else {
// it exists!
if (PC_LEFT == m->value_type) {
setupLR(parser, state, m->left);
return m->left->seed; // BUG: this might not be correct
} else {
state->input_stream = m->right->input_stream;
return m->right->result;
}
}
}
/* Helper function, since these lines appear in every parser */
typedef struct {
@ -230,35 +42,38 @@ typedef struct {
} HTwoParsers;
static guint cache_key_hash(gconstpointer key) {
static uint32_t cache_key_hash(const void* key) {
return djbhash(key, sizeof(HParserCacheKey));
}
static gboolean cache_key_equal(gconstpointer key1, gconstpointer key2) {
static bool cache_key_equal(const void* key1, const void* key2) {
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
}
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
return h_parse__m(&system_allocator, parser, input, length);
}
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
// Set up a parse state...
HArena * arena = h_new_arena(0);
HArena * arena = h_new_arena(mm__, 0);
HParseState *parse_state = a_new_(arena, HParseState, 1);
parse_state->cache = g_hash_table_new(cache_key_hash, // hash_func
cache_key_equal);// key_equal_func
parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func
cache_key_hash); // hash_func
parse_state->input_stream.input = input;
parse_state->input_stream.index = 0;
parse_state->input_stream.bit_offset = 8; // bit big endian
parse_state->input_stream.overrun = 0;
parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN;
parse_state->input_stream.length = length;
parse_state->lr_stack = g_queue_new();
parse_state->recursion_heads = g_hash_table_new(cache_key_hash,
cache_key_equal);
parse_state->lr_stack = h_slist_new(arena);
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
cache_key_hash);
parse_state->arena = arena;
HParseResult *res = h_do_parse(parser, parse_state);
g_queue_free(parse_state->lr_stack);
g_hash_table_destroy(parse_state->recursion_heads);
h_slist_free(parse_state->lr_stack);
h_hashtable_free(parse_state->recursion_heads);
// tear down the parse state
g_hash_table_destroy(parse_state->cache);
h_hashtable_free(parse_state->cache);
if (!res)
h_delete_arena(parse_state->arena);
@ -269,405 +84,4 @@ void h_parse_result_free(HParseResult *result) {
h_delete_arena(result->arena);
}
#ifdef INCLUDE_TESTS
#include "test_suite.h"
static void test_token(void) {
const HParser *token_ = h_token((const uint8_t*)"95\xa2", 3);
g_check_parse_ok(token_, "95\xa2", 3, "<39.35.a2>");
g_check_parse_failed(token_, "95", 2);
}
static void test_ch(void) {
const HParser *ch_ = h_ch(0xa2);
g_check_parse_ok(ch_, "\xa2", 1, "u0xa2");
g_check_parse_failed(ch_, "\xa3", 1);
}
static void test_ch_range(void) {
const HParser *range_ = h_ch_range('a', 'c');
g_check_parse_ok(range_, "b", 1, "u0x62");
g_check_parse_failed(range_, "d", 1);
}
//@MARK_START
static void test_int64(void) {
const HParser *int64_ = h_int64();
g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s-0x200000000");
g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7);
}
static void test_int32(void) {
const HParser *int32_ = h_int32();
g_check_parse_ok(int32_, "\xff\xfe\x00\x00", 4, "s-0x20000");
g_check_parse_failed(int32_, "\xff\xfe\x00", 3);
}
static void test_int16(void) {
const HParser *int16_ = h_int16();
g_check_parse_ok(int16_, "\xfe\x00", 2, "s-0x200");
g_check_parse_failed(int16_, "\xfe", 1);
}
static void test_int8(void) {
const HParser *int8_ = h_int8();
g_check_parse_ok(int8_, "\x88", 1, "s-0x78");
g_check_parse_failed(int8_, "", 0);
}
static void test_uint64(void) {
const HParser *uint64_ = h_uint64();
g_check_parse_ok(uint64_, "\x00\x00\x00\x02\x00\x00\x00\x00", 8, "u0x200000000");
g_check_parse_failed(uint64_, "\x00\x00\x00\x02\x00\x00\x00", 7);
}
static void test_uint32(void) {
const HParser *uint32_ = h_uint32();
g_check_parse_ok(uint32_, "\x00\x02\x00\x00", 4, "u0x20000");
g_check_parse_failed(uint32_, "\x00\x02\x00", 3);
}
static void test_uint16(void) {
const HParser *uint16_ = h_uint16();
g_check_parse_ok(uint16_, "\x02\x00", 2, "u0x200");
g_check_parse_failed(uint16_, "\x02", 1);
}
static void test_uint8(void) {
const HParser *uint8_ = h_uint8();
g_check_parse_ok(uint8_, "\x78", 1, "u0x78");
g_check_parse_failed(uint8_, "", 0);
}
//@MARK_END
static void test_int_range(void) {
const HParser *int_range_ = h_int_range(h_uint8(), 3, 10);
g_check_parse_ok(int_range_, "\x05", 1, "u0x5");
g_check_parse_failed(int_range_, "\xb", 1);
}
#if 0
static void test_float64(void) {
const HParser *float64_ = h_float64();
g_check_parse_ok(float64_, "\x3f\xf0\x00\x00\x00\x00\x00\x00", 8, 1.0);
g_check_parse_failed(float64_, "\x3f\xf0\x00\x00\x00\x00\x00", 7);
}
static void test_float32(void) {
const HParser *float32_ = h_float32();
g_check_parse_ok(float32_, "\x3f\x80\x00\x00", 4, 1.0);
g_check_parse_failed(float32_, "\x3f\x80\x00");
}
#endif
static void test_whitespace(void) {
const HParser *whitespace_ = h_whitespace(h_ch('a'));
g_check_parse_ok(whitespace_, "a", 1, "u0x61");
g_check_parse_ok(whitespace_, " a", 2, "u0x61");
g_check_parse_ok(whitespace_, " a", 3, "u0x61");
g_check_parse_ok(whitespace_, "\ta", 2, "u0x61");
g_check_parse_failed(whitespace_, "_a", 2);
}
static void test_left(void) {
const HParser *left_ = h_left(h_ch('a'), h_ch(' '));
g_check_parse_ok(left_, "a ", 2, "u0x61");
g_check_parse_failed(left_, "a", 1);
g_check_parse_failed(left_, " ", 1);
g_check_parse_failed(left_, "ab", 2);
}
static void test_right(void) {
const HParser *right_ = h_right(h_ch(' '), h_ch('a'));
g_check_parse_ok(right_, " a", 2, "u0x61");
g_check_parse_failed(right_, "a", 1);
g_check_parse_failed(right_, " ", 1);
g_check_parse_failed(right_, "ba", 2);
}
static void test_middle(void) {
const HParser *middle_ = h_middle(h_ch(' '), h_ch('a'), h_ch(' '));
g_check_parse_ok(middle_, " a ", 3, "u0x61");
g_check_parse_failed(middle_, "a", 1);
g_check_parse_failed(middle_, " ", 1);
g_check_parse_failed(middle_, " a", 2);
g_check_parse_failed(middle_, "a ", 2);
g_check_parse_failed(middle_, " b ", 3);
g_check_parse_failed(middle_, "ba ", 3);
g_check_parse_failed(middle_, " ab", 3);
}
#include <ctype.h>
const HParsedToken* upcase(const HParseResult *p) {
switch(p->ast->token_type) {
case TT_SEQUENCE:
{
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used);
ret->token_type = TT_SEQUENCE;
for (size_t i=0; i<p->ast->seq->used; ++i) {
if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) {
HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1);
tmp->token_type = TT_UINT;
tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint);
h_carray_append(seq, tmp);
} else {
h_carray_append(seq, p->ast->seq->elements[i]);
}
}
ret->seq = seq;
return (const HParsedToken*)ret;
}
case TT_UINT:
{
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
ret->token_type = TT_UINT;
ret->uint = toupper(p->ast->uint);
return (const HParsedToken*)ret;
}
default:
return p->ast;
}
}
static void test_action(void) {
const HParser *action_ = h_action(h_sequence(h_choice(h_ch('a'),
h_ch('A'),
NULL),
h_choice(h_ch('b'),
h_ch('B'),
NULL),
NULL),
upcase);
g_check_parse_ok(action_, "ab", 2, "(u0x41 u0x42)");
g_check_parse_ok(action_, "AB", 2, "(u0x41 u0x42)");
g_check_parse_failed(action_, "XX", 2);
}
static void test_in(void) {
uint8_t options[3] = { 'a', 'b', 'c' };
const HParser *in_ = h_in(options, 3);
g_check_parse_ok(in_, "b", 1, "u0x62");
g_check_parse_failed(in_, "d", 1);
}
static void test_not_in(void) {
uint8_t options[3] = { 'a', 'b', 'c' };
const HParser *not_in_ = h_not_in(options, 3);
g_check_parse_ok(not_in_, "d", 1, "u0x64");
g_check_parse_failed(not_in_, "a", 1);
}
static void test_end_p(void) {
const HParser *end_p_ = h_sequence(h_ch('a'), h_end_p(), NULL);
g_check_parse_ok(end_p_, "a", 1, "(u0x61)");
g_check_parse_failed(end_p_, "aa", 2);
}
static void test_nothing_p(void) {
const HParser *nothing_p_ = h_nothing_p();
g_check_parse_failed(nothing_p_, "a", 1);
}
static void test_sequence(void) {
const HParser *sequence_1 = h_sequence(h_ch('a'), h_ch('b'), NULL);
const HParser *sequence_2 = h_sequence(h_ch('a'), h_whitespace(h_ch('b')), NULL);
g_check_parse_ok(sequence_1, "ab", 2, "(u0x61 u0x62)");
g_check_parse_failed(sequence_1, "a", 1);
g_check_parse_failed(sequence_1, "b", 1);
g_check_parse_ok(sequence_2, "ab", 2, "(u0x61 u0x62)");
g_check_parse_ok(sequence_2, "a b", 3, "(u0x61 u0x62)");
g_check_parse_ok(sequence_2, "a b", 4, "(u0x61 u0x62)");
}
static void test_choice(void) {
const HParser *choice_ = h_choice(h_ch('a'), h_ch('b'), NULL);
g_check_parse_ok(choice_, "a", 1, "u0x61");
g_check_parse_ok(choice_, "b", 1, "u0x62");
g_check_parse_failed(choice_, "c", 1);
}
static void test_butnot(void) {
const HParser *butnot_1 = h_butnot(h_ch('a'), h_token((const uint8_t*)"ab", 2));
const HParser *butnot_2 = h_butnot(h_ch_range('0', '9'), h_ch('6'));
g_check_parse_ok(butnot_1, "a", 1, "u0x61");
g_check_parse_failed(butnot_1, "ab", 2);
g_check_parse_ok(butnot_1, "aa", 2, "u0x61");
g_check_parse_failed(butnot_2, "6", 1);
}
static void test_difference(void) {
const HParser *difference_ = h_difference(h_token((const uint8_t*)"ab", 2), h_ch('a'));
g_check_parse_ok(difference_, "ab", 2, "<61.62>");
g_check_parse_failed(difference_, "a", 1);
}
static void test_xor(void) {
const HParser *xor_ = h_xor(h_ch_range('0', '6'), h_ch_range('5', '9'));
g_check_parse_ok(xor_, "0", 1, "u0x30");
g_check_parse_ok(xor_, "9", 1, "u0x39");
g_check_parse_failed(xor_, "5", 1);
g_check_parse_failed(xor_, "a", 1);
}
static void test_many(void) {
const HParser *many_ = h_many(h_choice(h_ch('a'), h_ch('b'), NULL));
g_check_parse_ok(many_, "adef", 4, "(u0x61)");
g_check_parse_ok(many_, "bdef", 4, "(u0x62)");
g_check_parse_ok(many_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
g_check_parse_ok(many_, "daabbabadef", 11, "()");
}
static void test_many1(void) {
const HParser *many1_ = h_many1(h_choice(h_ch('a'), h_ch('b'), NULL));
g_check_parse_ok(many1_, "adef", 4, "(u0x61)");
g_check_parse_ok(many1_, "bdef", 4, "(u0x62)");
g_check_parse_ok(many1_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
g_check_parse_failed(many1_, "daabbabadef", 11);
}
static void test_repeat_n(void) {
const HParser *repeat_n_ = h_repeat_n(h_choice(h_ch('a'), h_ch('b'), NULL), 2);
g_check_parse_failed(repeat_n_, "adef", 4);
g_check_parse_ok(repeat_n_, "abdef", 5, "(u0x61 u0x62)");
g_check_parse_failed(repeat_n_, "dabdef", 6);
}
static void test_optional(void) {
const HParser *optional_ = h_sequence(h_ch('a'), h_optional(h_choice(h_ch('b'), h_ch('c'), NULL)), h_ch('d'), NULL);
g_check_parse_ok(optional_, "abd", 3, "(u0x61 u0x62 u0x64)");
g_check_parse_ok(optional_, "acd", 3, "(u0x61 u0x63 u0x64)");
g_check_parse_ok(optional_, "ad", 2, "(u0x61 null u0x64)");
g_check_parse_failed(optional_, "aed", 3);
g_check_parse_failed(optional_, "ab", 2);
g_check_parse_failed(optional_, "ac", 2);
}
static void test_ignore(void) {
const HParser *ignore_ = h_sequence(h_ch('a'), h_ignore(h_ch('b')), h_ch('c'), NULL);
g_check_parse_ok(ignore_, "abc", 3, "(u0x61 u0x63)");
g_check_parse_failed(ignore_, "ac", 2);
}
static void test_sepBy1(void) {
const HParser *sepBy1_ = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
g_check_parse_ok(sepBy1_, "1,2,3", 5, "(u0x31 u0x32 u0x33)");
g_check_parse_ok(sepBy1_, "1,3,2", 5, "(u0x31 u0x33 u0x32)");
g_check_parse_ok(sepBy1_, "1,3", 3, "(u0x31 u0x33)");
g_check_parse_ok(sepBy1_, "3", 1, "(u0x33)");
}
static void test_epsilon_p(void) {
const HParser *epsilon_p_1 = h_sequence(h_ch('a'), h_epsilon_p(), h_ch('b'), NULL);
const HParser *epsilon_p_2 = h_sequence(h_epsilon_p(), h_ch('a'), NULL);
const HParser *epsilon_p_3 = h_sequence(h_ch('a'), h_epsilon_p(), NULL);
g_check_parse_ok(epsilon_p_1, "ab", 2, "(u0x61 u0x62)");
g_check_parse_ok(epsilon_p_2, "a", 1, "(u0x61)");
g_check_parse_ok(epsilon_p_3, "a", 1, "(u0x61)");
}
static void test_attr_bool(void) {
}
static void test_and(void) {
const HParser *and_1 = h_sequence(h_and(h_ch('0')), h_ch('0'), NULL);
const HParser *and_2 = h_sequence(h_and(h_ch('0')), h_ch('1'), NULL);
const HParser *and_3 = h_sequence(h_ch('1'), h_and(h_ch('2')), NULL);
g_check_parse_ok(and_1, "0", 1, "(u0x30)");
g_check_parse_failed(and_2, "0", 1);
g_check_parse_ok(and_3, "12", 2, "(u0x31)");
}
static void test_not(void) {
const HParser *not_1 = h_sequence(h_ch('a'), h_choice(h_ch('+'), h_token((const uint8_t*)"++", 2), NULL), h_ch('b'), NULL);
const HParser *not_2 = h_sequence(h_ch('a'),
h_choice(h_sequence(h_ch('+'), h_not(h_ch('+')), NULL),
h_token((const uint8_t*)"++", 2),
NULL), h_ch('b'), NULL);
g_check_parse_ok(not_1, "a+b", 3, "(u0x61 u0x2b u0x62)");
g_check_parse_failed(not_1, "a++b", 4);
g_check_parse_ok(not_2, "a+b", 3, "(u0x61 (u0x2b) u0x62)");
g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)");
}
void register_parser_tests(void) {
g_test_add_func("/core/parser/token", test_token);
g_test_add_func("/core/parser/ch", test_ch);
g_test_add_func("/core/parser/ch_range", test_ch_range);
g_test_add_func("/core/parser/int64", test_int64);
g_test_add_func("/core/parser/int32", test_int32);
g_test_add_func("/core/parser/int16", test_int16);
g_test_add_func("/core/parser/int8", test_int8);
g_test_add_func("/core/parser/uint64", test_uint64);
g_test_add_func("/core/parser/uint32", test_uint32);
g_test_add_func("/core/parser/uint16", test_uint16);
g_test_add_func("/core/parser/uint8", test_uint8);
g_test_add_func("/core/parser/int_range", test_int_range);
#if 0
g_test_add_func("/core/parser/float64", test_float64);
g_test_add_func("/core/parser/float32", test_float32);
#endif
g_test_add_func("/core/parser/whitespace", test_whitespace);
g_test_add_func("/core/parser/left", test_left);
g_test_add_func("/core/parser/right", test_right);
g_test_add_func("/core/parser/middle", test_middle);
g_test_add_func("/core/parser/action", test_action);
g_test_add_func("/core/parser/in", test_in);
g_test_add_func("/core/parser/not_in", test_not_in);
g_test_add_func("/core/parser/end_p", test_end_p);
g_test_add_func("/core/parser/nothing_p", test_nothing_p);
g_test_add_func("/core/parser/sequence", test_sequence);
g_test_add_func("/core/parser/choice", test_choice);
g_test_add_func("/core/parser/butnot", test_butnot);
g_test_add_func("/core/parser/difference", test_difference);
g_test_add_func("/core/parser/xor", test_xor);
g_test_add_func("/core/parser/many", test_many);
g_test_add_func("/core/parser/many1", test_many1);
g_test_add_func("/core/parser/repeat_n", test_repeat_n);
g_test_add_func("/core/parser/optional", test_optional);
g_test_add_func("/core/parser/sepBy1", test_sepBy1);
g_test_add_func("/core/parser/epsilon_p", test_epsilon_p);
g_test_add_func("/core/parser/attr_bool", test_attr_bool);
g_test_add_func("/core/parser/and", test_and);
g_test_add_func("/core/parser/not", test_not);
g_test_add_func("/core/parser/ignore", test_ignore);
}
#endif // #ifdef INCLUDE_TESTS

View file

@ -17,7 +17,7 @@
#ifndef HAMMER_HAMMER__H
#define HAMMER_HAMMER__H
#include <glib.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include "allocator.h"
@ -31,6 +31,12 @@ typedef int bool;
typedef struct HParseState_ HParseState;
typedef enum HParserBackend_ {
PB_MIN = 0,
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
PB_MAX
} HParserBackend;
typedef enum HTokenType_ {
// Before you change the explicit values of these, think of the poor bindings ;_;
TT_NONE = 1,
@ -39,8 +45,7 @@ typedef enum HTokenType_ {
TT_UINT = 8,
TT_SEQUENCE = 16,
TT_ERR = 32,
TT_USER = 64,
TT_MAX = 128
TT_USER = 64
} HTokenType;
typedef struct HCountedArray_ {
@ -50,13 +55,15 @@ typedef struct HCountedArray_ {
struct HParsedToken_ **elements;
} HCountedArray;
typedef struct HBytes_ {
const uint8_t *token;
size_t len;
} HBytes;
typedef struct HParsedToken_ {
HTokenType token_type;
union {
struct {
const uint8_t *token;
size_t len;
} bytes;
HBytes bytes;
int64_t sint;
uint64_t uint;
double dbl;
@ -114,18 +121,76 @@ typedef struct HParser_ {
void *env;
} HParser;
// {{{ Stuff for benchmarking
typedef struct HParserTestcase_ {
unsigned char* input;
size_t length;
char* output_unambiguous;
} HParserTestcase;
typedef struct HCaseResult_ {
bool success;
union {
const char* actual_results; // on failure, filled in with the results of h_write_result_unamb
size_t parse_time; // on success, filled in with time for a single parse, in nsec
};
} HCaseResult;
typedef struct HBackendResults_ {
HParserBackend backend;
bool compile_success;
size_t n_testcases;
size_t failed_testcases; // actually a count...
HCaseResult *cases;
} HBackendResults;
typedef struct HBenchmarkResults_ {
size_t len;
HBackendResults *results;
} HBenchmarkResults;
// }}}
// {{{ Preprocessor definitions
#define HAMMER_FN_DECL_NOARG(rtype_t, name) \
rtype_t name(void); \
rtype_t name##__m(HAllocator* mm__)
#define HAMMER_FN_DECL(rtype_t, name, ...) \
rtype_t name(__VA_ARGS__); \
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__)
#define HAMMER_FN_DECL_ATTR(attr, rtype_t, name, ...) \
rtype_t name(__VA_ARGS__) attr; \
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__) attr
#define HAMMER_FN_DECL_VARARGS(rtype_t, name, ...) \
rtype_t name(__VA_ARGS__, ...); \
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...); \
rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \
rtype_t name##__v(__VA_ARGS__, va_list ap)
// Note: this drops the attributes on the floor for the __v versions
#define HAMMER_FN_DECL_VARARGS_ATTR(attr, rtype_t, name, ...) \
rtype_t name(__VA_ARGS__, ...) attr; \
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...) attr; \
rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \
rtype_t name##__v(__VA_ARGS__, va_list ap)
// }}}
/**
* Top-level function to call a parser that has been built over some
* piece of input (of known size).
*/
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length);
HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* input, size_t length);
/**
* Given a string, returns a parser that parses that string value.
*
* Result token type: TT_BYTES
*/
const HParser* h_token(const uint8_t *str, const size_t len);
HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len);
/**
* Given a single character, returns a parser that parses that
@ -133,7 +198,7 @@ const HParser* h_token(const uint8_t *str, const size_t len);
*
* Result token type: TT_UINT
*/
const HParser* h_ch(const uint8_t c);
HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c);
/**
* Given two single-character bounds, lower and upper, returns a parser
@ -142,14 +207,14 @@ const HParser* h_ch(const uint8_t c);
*
* Result token type: TT_UINT
*/
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper);
HAMMER_FN_DECL(const HParser*, h_ch_range, const uint8_t lower, const uint8_t upper);
/**
* Given an integer parser, p, and two integer bounds, lower and upper,
* returns a parser that parses an integral value within the range
* [lower, upper] (inclusive).
*/
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper);
HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper);
/**
* Returns a parser that parses the specified number of bits. sign ==
@ -157,63 +222,63 @@ const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t
*
* Result token type: TT_SINT if sign == true, TT_UINT if sign == false
*/
const HParser* h_bits(size_t len, bool sign);
HAMMER_FN_DECL(const HParser*, h_bits, size_t len, bool sign);
/**
* Returns a parser that parses a signed 8-byte integer value.
*
* Result token type: TT_SINT
*/
const HParser* h_int64();
HAMMER_FN_DECL_NOARG(const HParser*, h_int64);
/**
* Returns a parser that parses a signed 4-byte integer value.
*
* Result token type: TT_SINT
*/
const HParser* h_int32();
HAMMER_FN_DECL_NOARG(const HParser*, h_int32);
/**
* Returns a parser that parses a signed 2-byte integer value.
*
* Result token type: TT_SINT
*/
const HParser* h_int16();
HAMMER_FN_DECL_NOARG(const HParser*, h_int16);
/**
* Returns a parser that parses a signed 1-byte integer value.
*
* Result token type: TT_SINT
*/
const HParser* h_int8();
HAMMER_FN_DECL_NOARG(const HParser*, h_int8);
/**
* Returns a parser that parses an unsigned 8-byte integer value.
*
* Result token type: TT_UINT
*/
const HParser* h_uint64();
HAMMER_FN_DECL_NOARG(const HParser*, h_uint64);
/**
* Returns a parser that parses an unsigned 4-byte integer value.
*
* Result token type: TT_UINT
*/
const HParser* h_uint32();
HAMMER_FN_DECL_NOARG(const HParser*, h_uint32);
/**
* Returns a parser that parses an unsigned 2-byte integer value.
*
* Result token type: TT_UINT
*/
const HParser* h_uint16();
HAMMER_FN_DECL_NOARG(const HParser*, h_uint16);
/**
* Returns a parser that parses an unsigned 1-byte integer value.
*
* Result token type: TT_UINT
*/
const HParser* h_uint8();
HAMMER_FN_DECL_NOARG(const HParser*, h_uint8);
/**
* Given another parser, p, returns a parser that skips any whitespace
@ -221,7 +286,7 @@ const HParser* h_uint8();
*
* Result token type: p's result type
*/
const HParser* h_whitespace(const HParser* p);
HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p);
/**
* Given two parsers, p and q, returns a parser that parses them in
@ -229,7 +294,7 @@ const HParser* h_whitespace(const HParser* p);
*
* Result token type: p's result type
*/
const HParser* h_left(const HParser* p, const HParser* q);
HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q);
/**
* Given two parsers, p and q, returns a parser that parses them in
@ -237,7 +302,7 @@ const HParser* h_left(const HParser* p, const HParser* q);
*
* Result token type: q's result type
*/
const HParser* h_right(const HParser* p, const HParser* q);
HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q);
/**
* Given three parsers, p, x, and q, returns a parser that parses them in
@ -245,7 +310,7 @@ const HParser* h_right(const HParser* p, const HParser* q);
*
* Result token type: x's result type
*/
const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q);
HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q);
/**
* Given another parser, p, and a function f, returns a parser that
@ -253,21 +318,21 @@ const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q);
*
* Result token type: any
*/
const HParser* h_action(const HParser* p, const HAction a);
HAMMER_FN_DECL(const HParser*, h_action, const HParser* p, const HAction a);
/**
* Parse a single character in the given charset.
*
* Result token type: TT_UINT
*/
const HParser* h_in(const uint8_t *charset, size_t length);
HAMMER_FN_DECL(const HParser*, h_in, const uint8_t *charset, size_t length);
/**
* Parse a single character *NOT* in the given charset.
*
* Result token type: TT_UINT
*/
const HParser* h_not_in(const uint8_t *charset, size_t length);
HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length);
/**
* A no-argument parser that succeeds if there is no more input to
@ -275,14 +340,14 @@ const HParser* h_not_in(const uint8_t *charset, size_t length);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
const HParser* h_end_p();
HAMMER_FN_DECL_NOARG(const HParser*, h_end_p);
/**
* This parser always fails.
*
* Result token type: NULL. Always.
*/
const HParser* h_nothing_p();
HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p);
/**
* Given a null-terminated list of parsers, apply each parser in order.
@ -290,7 +355,7 @@ const HParser* h_nothing_p();
*
* Result token type: TT_SEQUENCE
*/
const HParser* h_sequence(const HParser* p, ...) __attribute__((sentinel));
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequence, const HParser* p);
/**
* Given an array of parsers, p_array, apply each parser in order. The
@ -299,7 +364,7 @@ const HParser* h_sequence(const HParser* p, ...) __attribute__((sentinel));
*
* Result token type: The type of the first successful parser's result.
*/
const HParser* h_choice(const HParser* p, ...) __attribute__((sentinel));
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice, const HParser* p);
/**
* Given two parsers, p1 and p2, this parser succeeds in the following
@ -309,7 +374,7 @@ const HParser* h_choice(const HParser* p, ...) __attribute__((sentinel));
*
* Result token type: p1's result type.
*/
const HParser* h_butnot(const HParser* p1, const HParser* p2);
HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2);
/**
* Given two parsers, p1 and p2, this parser succeeds in the following
@ -319,7 +384,7 @@ const HParser* h_butnot(const HParser* p1, const HParser* p2);
*
* Result token type: p1's result type.
*/
const HParser* h_difference(const HParser* p1, const HParser* p2);
HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p2);
/**
* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or
@ -327,7 +392,7 @@ const HParser* h_difference(const HParser* p1, const HParser* p2);
*
* Result token type: The type of the result of whichever parser succeeded.
*/
const HParser* h_xor(const HParser* p1, const HParser* p2);
HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2);
/**
* Given a parser, p, this parser succeeds for zero or more repetitions
@ -335,7 +400,7 @@ const HParser* h_xor(const HParser* p1, const HParser* p2);
*
* Result token type: TT_SEQUENCE
*/
const HParser* h_many(const HParser* p);
HAMMER_FN_DECL(const HParser*, h_many, const HParser* p);
/**
* Given a parser, p, this parser succeeds for one or more repetitions
@ -343,7 +408,7 @@ const HParser* h_many(const HParser* p);
*
* Result token type: TT_SEQUENCE
*/
const HParser* h_many1(const HParser* p);
HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p);
/**
* Given a parser, p, this parser succeeds for exactly N repetitions
@ -351,7 +416,7 @@ const HParser* h_many1(const HParser* p);
*
* Result token type: TT_SEQUENCE
*/
const HParser* h_repeat_n(const HParser* p, const size_t n);
HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n);
/**
* Given a parser, p, this parser succeeds with the value p parsed or
@ -359,7 +424,7 @@ const HParser* h_repeat_n(const HParser* p, const size_t n);
*
* Result token type: If p succeeded, the type of its result; if not, TT_NONE.
*/
const HParser* h_optional(const HParser* p);
HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p);
/**
* Given a parser, p, this parser succeeds if p succeeds, but doesn't
@ -367,7 +432,7 @@ const HParser* h_optional(const HParser* p);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
const HParser* h_ignore(const HParser* p);
HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p);
/**
* Given a parser, p, and a parser for a separator, sep, this parser
@ -378,7 +443,7 @@ const HParser* h_ignore(const HParser* p);
*
* Result token type: TT_SEQUENCE
*/
const HParser* h_sepBy(const HParser* p, const HParser* sep);
HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep);
/**
* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
@ -386,14 +451,14 @@ const HParser* h_sepBy(const HParser* p, const HParser* sep);
*
* Result token type: TT_SEQUENCE
*/
const HParser* h_sepBy1(const HParser* p, const HParser* sep);
HAMMER_FN_DECL(const HParser*, h_sepBy1, const HParser* p, const HParser* sep);
/**
* This parser always returns a zero length match, i.e., empty string.
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
const HParser* h_epsilon_p();
HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p);
/**
* This parser applies its first argument to read an unsigned integer
@ -404,7 +469,7 @@ const HParser* h_epsilon_p();
*
* Result token type: TT_SEQUENCE
*/
const HParser* h_length_value(const HParser* length, const HParser* value);
HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HParser* value);
/**
* This parser attaches a predicate function, which returns true or
@ -419,7 +484,7 @@ const HParser* h_length_value(const HParser* length, const HParser* value);
*
* Result token type: p's result type if pred succeeded, NULL otherwise.
*/
const HParser* h_attr_bool(const HParser* p, HPredicate pred);
HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred);
/**
* The 'and' parser asserts that a conditional syntax is satisfied,
@ -436,7 +501,7 @@ const HParser* h_attr_bool(const HParser* p, HPredicate pred);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
const HParser* h_and(const HParser* p);
HAMMER_FN_DECL(const HParser*, h_and, const HParser* p);
/**
* The 'not' parser asserts that a conditional syntax is *not*
@ -456,7 +521,7 @@ const HParser* h_and(const HParser* p);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
const HParser* h_not(const HParser* p);
HAMMER_FN_DECL(const HParser*, h_not, const HParser* p);
/**
* Create a parser that just calls out to another, as yet unknown,
@ -467,35 +532,44 @@ const HParser* h_not(const HParser* p);
* Result token type: the type of whatever parser is bound to it with
* bind_indirect().
*/
HParser *h_indirect();
HAMMER_FN_DECL_NOARG(HParser*, h_indirect);
/**
* Set the inner parser of an indirect. See comments on indirect for
* details.
*/
void h_bind_indirect(HParser* indirect, const HParser* inner);
HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner);
/**
* Free the memory allocated to an HParseResult when it is no longer needed.
*/
void h_parse_result_free(HParseResult *result);
HAMMER_FN_DECL(void, h_parse_result_free, HParseResult *result);
// Some debugging aids
/**
* Format token into a compact unambiguous form. Useful for parser test cases.
* Caller is responsible for freeing the result.
*/
char* h_write_result_unamb(const HParsedToken* tok);
HAMMER_FN_DECL(char*, h_write_result_unamb, const HParsedToken* tok);
/**
* Format token to the given output stream. Indent starting at
* [indent] spaces, with [delta] spaces between levels.
*/
void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta);
HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent, int delta);
/**
* Build parse tables for the given parser backend. See the
* documentation for the parser backend in question for information
* about the [params] parameter, or just pass in NULL for the defaults.
*
* Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
*/
HAMMER_FN_DECL(int, h_compile, const HParser* parser, HParserBackend backend, const void* params);
/**
* TODO: Document me
*/
HBitWriter *h_bit_writer_new(void);
HBitWriter *h_bit_writer_new(HAllocator* mm__);
/**
* TODO: Document me
@ -507,11 +581,17 @@ void h_bit_writer_put(HBitWriter* w, unsigned long long data, size_t nbits);
* Must not free [w] until you're done with the result.
* [len] is in bytes.
*/
const uint8_t *h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
/**
* TODO: Document me
*/
void h_bit_writer_free(HBitWriter* w);
// {{{ Benchmark functions
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, const HParser* parser, HParserTestcase* testcases);
void h_benchmark_report(FILE* stream, HBenchmarkResults* results);
void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results);
// }}}
#endif // #ifndef HAMMER_HAMMER__H

View file

@ -17,7 +17,6 @@
#ifndef HAMMER_INTERNAL__H
#define HAMMER_INTERNAL__H
#include <glib.h>
#include <err.h>
#include "hammer.h"
@ -29,9 +28,28 @@
errx(1, "Assertion failed (programmer error): %s", message); \
} while(0)
#endif
#define HAMMER_FN_IMPL_NOARGS(rtype_t, name) \
rtype_t name(void) { \
return name##__m(system_allocator); \
} \
rtype_t name##__m(HAllocator* mm__)
// Functions with arguments are difficult to forward cleanly. Alas, we will need to forward them manually.
#define h_new(type, count) ((type*)(mm__->alloc(mm__, sizeof(type)*(count))))
#define h_free(addr) (mm__->free(mm__, (addr)))
#define false 0
#define true 1
// This is going to be generally useful.
static inline void h_generic_free(HAllocator *allocator, void* ptr) {
allocator->free(allocator, ptr);
}
HAllocator system_allocator;
typedef struct HInputStream_ {
// This should be considered to be a really big value type.
const uint8_t *input;
@ -42,6 +60,36 @@ typedef struct HInputStream_ {
char overrun;
} HInputStream;
typedef struct HSlistNode_ {
void* elem;
struct HSlistNode_ *next;
} HSlistNode;
typedef struct HSlist_ {
HSlistNode *head;
struct HArena_ *arena;
} HSlist;
typedef unsigned int HHashValue;
typedef HHashValue (*HHashFunc)(const void* key);
typedef bool (*HEqualFunc)(const void* key1, const void* key2);
typedef struct HHashTableEntry_ {
struct HHashTableEntry_ *next;
void* key;
void* value;
HHashValue hashval;
} HHashTableEntry;
typedef struct HHashTable_ {
HHashTableEntry *contents;
HHashFunc hashFunc;
HEqualFunc equalFunc;
size_t capacity;
size_t used;
HArena *arena;
} HHashTable;
/* The state of the parser.
*
* Members:
@ -54,13 +102,19 @@ typedef struct HInputStream_ {
*/
struct HParseState_ {
GHashTable *cache;
HHashTable *cache;
HInputStream input_stream;
HArena * arena;
GQueue *lr_stack;
GHashTable *recursion_heads;
HSlist *lr_stack;
HHashTable *recursion_heads;
};
typedef struct HParserBackendVTable_ {
int (*compile)(HAllocator *mm__, const HParser* parser, const void* params);
HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HParseState* parse_state);
} HParserBackendVTable;
/* The (location, parser) tuple used to key the cache.
*/
@ -90,8 +144,8 @@ typedef enum HParserCacheValueType_ {
*/
typedef struct HRecursionHead_ {
const HParser *head_parser;
GSList *involved_set;
GSList *eval_set;
HSlist *involved_set;
HSlist *eval_set;
} HRecursionHead;
@ -125,23 +179,23 @@ typedef struct HParserCacheValue_t {
};
} HParserCacheValue;
typedef unsigned int *HCharset;
// This file provides the logical inverse of bitreader.c
struct HBitWriter_ {
uint8_t* buf;
HAllocator *mm__;
size_t index;
size_t capacity;
char bit_offset; // unlike in bit_reader, this is always the number
// of used bits in the current byte. i.e., 0 always
// means that 8 bits are available for use.
char flags;
};
static inline HCharset new_charset() {
HCharset cs = g_new0(unsigned int, 256 / sizeof(unsigned int));
return cs;
}
// }}}
static inline int charset_isset(HCharset cs, uint8_t pos) {
return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs))));
}
static inline void charset_set(HCharset cs, uint8_t pos, int val) {
cs[pos / sizeof(*cs)] =
val
? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs)))
: cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs)));
}
// Backends {{{
extern HParserBackendVTable h__packrat_backend_vtable;
// }}}
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
@ -154,10 +208,24 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size);
HCountedArray *h_carray_new(HArena * arena);
void h_carray_append(HCountedArray *array, void* item);
HSlist* h_slist_new(HArena *arena);
HSlist* h_slist_copy(HSlist *slist);
void* h_slist_pop(HSlist *slist);
void h_slist_push(HSlist *slist, void* item);
bool h_slist_find(HSlist *slist, const void* item);
HSlist* h_slist_remove_all(HSlist *slist, const void* item);
void h_slist_free(HSlist *slist);
HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc);
void* h_hashtable_get(HHashTable* ht, void* key);
void h_hashtable_put(HHashTable* ht, void* key, void* value);
int h_hashtable_present(HHashTable* ht, void* key);
void h_hashtable_del(HHashTable* ht, void* key);
void h_hashtable_free(HHashTable* ht);
#if 0
#include <malloc.h>
#define arena_malloc(a, s) malloc(s)
#include <stdlib.h>
#define h_arena_malloc(a, s) malloc(s)
#endif
#endif // #ifndef HAMMER_INTERNAL__H

View file

@ -23,10 +23,14 @@ static const HParserVtable action_vt = {
.parse = parse_action,
};
const HParser* h_action(const HParser* p, const HAction a) {
HParser *res = g_new(HParser, 1);
const HParser* h_action(const HParser* p, const HAction a) {
return h_action__m(&system_allocator, p, a);
}
const HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) {
HParser *res = h_new(HParser, 1);
res->vtable = &action_vt;
HParseAction *env = g_new(HParseAction, 1);
HParseAction *env = h_new(HParseAction, 1);
env->p = p;
env->action = a;
res->env = (void*)env;

View file

@ -13,9 +13,13 @@ static const HParserVtable and_vt = {
.parse = parse_and,
};
const HParser* h_and(const HParser* p) {
return h_and__m(&system_allocator, p);
}
const HParser* h_and__m(HAllocator* mm__, const HParser* p) {
// zero-width postive lookahead
HParser *res = g_new(HParser, 1);
HParser *res = h_new(HParser, 1);
res->env = (void*)p;
res->vtable = &and_vt;
return res;

View file

@ -21,10 +21,14 @@ static const HParserVtable attr_bool_vt = {
.parse = parse_attr_bool,
};
const HParser* h_attr_bool(const HParser* p, HPredicate pred) {
HParser *res = g_new(HParser, 1);
const HParser* h_attr_bool(const HParser* p, HPredicate pred) {
return h_attr_bool__m(&system_allocator, p, pred);
}
const HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) {
HParser *res = h_new(HParser, 1);
res->vtable = &attr_bool_vt;
HAttrBool *env = g_new(HAttrBool, 1);
HAttrBool *env = h_new(HAttrBool, 1);
env->p = p;
env->pred = pred;
res->env = (void*)env;

View file

@ -20,18 +20,24 @@ static const HParserVtable bits_vt = {
.parse = parse_bits,
};
const HParser* h_bits(size_t len, bool sign) {
struct bits_env *env = g_new(struct bits_env, 1);
return h_bits__m(&system_allocator, len, sign);
}
const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
struct bits_env *env = h_new(struct bits_env, 1);
env->length = len;
env->signedp = sign;
HParser *res = g_new(HParser, 1);
HParser *res = h_new(HParser, 1);
res->vtable = &bits_vt;
res->env = env;
return res;
}
#define SIZED_BITS(name_pre, len, signedp) \
const HParser* h_##name_pre##len () { \
return h_bits(len, signedp); \
const HParser* h_##name_pre##len () { \
return h_bits__m(&system_allocator, len, signedp); \
} \
const HParser* h_##name_pre##len##__m(HAllocator* mm__) { \
return h_bits__m(mm__, len, signedp); \
}
SIZED_BITS(int, 8, true)
SIZED_BITS(int, 16, true)

View file

@ -39,10 +39,13 @@ static const HParserVtable butnot_vt = {
.parse = parse_butnot,
};
const HParser* h_butnot(const HParser* p1, const HParser* p2) {
HTwoParsers *env = g_new(HTwoParsers, 1);
const HParser* h_butnot(const HParser* p1, const HParser* p2) {
return h_butnot__m(&system_allocator, p1, p2);
}
const HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1; env->p2 = p2;
HParser *ret = g_new(HParser, 1);
HParser *ret = h_new(HParser, 1);
ret->vtable = &butnot_vt; ret->env = (void*)env;
return ret;
}

View file

@ -1,7 +1,7 @@
#include "parser_internal.h"
static HParseResult* parse_ch(void* env, HParseState *state) {
uint8_t c = (uint8_t)GPOINTER_TO_UINT(env);
uint8_t c = (uint8_t)(unsigned long)(env);
uint8_t r = (uint8_t)h_read_bits(&state->input_stream, 8, false);
if (c == r) {
HParsedToken *tok = a_new(HParsedToken, 1);
@ -15,9 +15,13 @@ static HParseResult* parse_ch(void* env, HParseState *state) {
static const HParserVtable ch_vt = {
.parse = parse_ch,
};
const HParser* h_ch(const uint8_t c) {
HParser *ret = g_new(HParser, 1);
const HParser* h_ch(const uint8_t c) {
return h_ch__m(&system_allocator, c);
}
const HParser* h_ch__m(HAllocator* mm__, const uint8_t c) {
HParser *ret = h_new(HParser, 1);
ret->vtable = &ch_vt;
ret->env = GUINT_TO_POINTER(c);
ret->env = (void*)(unsigned long)(c);
return (const HParser*)ret;
}

View file

@ -1,5 +1,24 @@
#include <string.h>
#include "parser_internal.h"
typedef unsigned int *HCharset;
static inline HCharset new_charset(HAllocator* mm__) {
HCharset cs = h_new(unsigned int, 256 / sizeof(unsigned int));
memset(cs, 0, 256);
return cs;
}
static inline int charset_isset(HCharset cs, uint8_t pos) {
return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs))));
}
static inline void charset_set(HCharset cs, uint8_t pos, int val) {
cs[pos / sizeof(*cs)] =
val
? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs)))
: cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs)));
}
static HParseResult* parse_charset(void *env, HParseState *state) {
uint8_t in = h_read_bits(&state->input_stream, 8, false);
@ -18,8 +37,11 @@ static const HParserVtable charset_vt = {
};
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
HParser *ret = g_new(HParser, 1);
HCharset cs = new_charset();
return h_ch_range__m(&system_allocator, lower, upper);
}
const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) {
HParser *ret = h_new(HParser, 1);
HCharset cs = new_charset(mm__);
for (int i = 0; i < 256; i++)
charset_set(cs, i, (lower <= i) && (i <= upper));
ret->vtable = &charset_vt;
@ -28,9 +50,9 @@ const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
}
const HParser* h_in_or_not(const uint8_t *options, size_t count, int val) {
HParser *ret = g_new(HParser, 1);
HCharset cs = new_charset();
static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) {
HParser *ret = h_new(HParser, 1);
HCharset cs = new_charset(mm__);
for (size_t i = 0; i < 256; i++)
charset_set(cs, i, 1-val);
for (size_t i = 0; i < count; i++)
@ -42,10 +64,18 @@ const HParser* h_in_or_not(const uint8_t *options, size_t count, int val) {
}
const HParser* h_in(const uint8_t *options, size_t count) {
return h_in_or_not(options, count, 1);
return h_in_or_not__m(&system_allocator, options, count, 1);
}
const HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
return h_in_or_not__m(mm__, options, count, 1);
}
const HParser* h_not_in(const uint8_t *options, size_t count) {
return h_in_or_not(options, count, 0);
return h_in_or_not__m(&system_allocator, options, count, 0);
}
const HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
return h_in_or_not__m(mm__, options, count, 0);
}

View file

@ -1,3 +1,4 @@
#include <stdarg.h>
#include "parser_internal.h"
typedef struct {
@ -25,20 +26,40 @@ static const HParserVtable choice_vt = {
};
const HParser* h_choice(const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_choice__mv(&system_allocator, p, ap);
va_end(ap);
return ret;
}
const HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_choice__mv(mm__, p, ap);
va_end(ap);
return ret;
}
const HParser* h_choice__v(const HParser* p, va_list ap) {
return h_choice__mv(&system_allocator, p, ap);
}
const HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) {
va_list ap;
size_t len = 0;
HSequence *s = g_new(HSequence, 1);
HSequence *s = h_new(HSequence, 1);
const HParser *arg;
va_start(ap, p);
va_copy(ap, ap_);
do {
len++;
arg = va_arg(ap, const HParser *);
} while (arg);
va_end(ap);
s->p_array = g_new(const HParser *, len);
s->p_array = h_new(const HParser *, len);
va_start(ap, p);
va_copy(ap, ap_);
s->p_array[0] = p;
for (size_t i = 1; i < len; i++) {
s->p_array[i] = va_arg(ap, const HParser *);
@ -46,7 +67,7 @@ const HParser* h_choice(const HParser* p, ...) {
va_end(ap);
s->len = len;
HParser *ret = g_new(HParser, 1);
HParser *ret = h_new(HParser, 1);
ret->vtable = &choice_vt; ret->env = (void*)s;
return ret;
}

View file

@ -38,10 +38,13 @@ static HParserVtable difference_vt = {
.parse = parse_difference,
};
const HParser* h_difference(const HParser* p1, const HParser* p2) {
HTwoParsers *env = g_new(HTwoParsers, 1);
const HParser* h_difference(const HParser* p1, const HParser* p2) {
return h_difference__m(&system_allocator, p1, p2);
}
const HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1; env->p2 = p2;
HParser *ret = g_new(HParser, 1);
HParser *ret = h_new(HParser, 1);
ret->vtable = &difference_vt; ret->env = (void*)env;
return ret;
}

View file

@ -14,8 +14,13 @@ static const HParserVtable end_vt = {
.parse = parse_end,
};
const HParser* h_end_p() {
HParser *ret = g_new(HParser, 1);
ret->vtable = &end_vt; ret->env = NULL;
const HParser* h_end_p() {
return h_end_p__m(&system_allocator);
}
const HParser* h_end_p__m(HAllocator* mm__) {
HParser *ret = h_new(HParser, 1);
ret->vtable = &end_vt;
ret->env = NULL;
return (const HParser*)ret;
}

View file

@ -20,3 +20,6 @@ static const HParser epsilon_p = {
const HParser* h_epsilon_p() {
return &epsilon_p;
}
const HParser* h_epsilon_p__m(HAllocator* mm__) {
return &epsilon_p;
}

View file

@ -15,7 +15,10 @@ static const HParserVtable ignore_vt = {
};
const HParser* h_ignore(const HParser* p) {
HParser* ret = g_new(HParser, 1);
return h_ignore__m(&system_allocator, p);
}
const HParser* h_ignore__m(HAllocator* mm__, const HParser* p) {
HParser* ret = h_new(HParser, 1);
ret->vtable = &ignore_vt;
ret->env = (void*)p;
return ret;

View file

@ -35,38 +35,48 @@ static const HParserVtable ignoreseq_vt = {
// API frontends
//
static const HParser* h_leftright(const HParser* p, const HParser* q, size_t which) {
HIgnoreSeq *seq = g_new(HIgnoreSeq, 1);
seq->parsers = g_new(const HParser*, 2);
static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) {
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
seq->parsers = h_new(const HParser*, 2);
seq->parsers[0] = p;
seq->parsers[1] = q;
seq->count = 2;
seq->which = which;
HParser *ret = g_new(HParser, 1);
HParser *ret = h_new(HParser, 1);
ret->vtable = &ignoreseq_vt;
ret->env = (void*)seq;
return ret;
}
const HParser* h_left(const HParser* p, const HParser* q) {
return h_leftright(p, q, 0);
return h_leftright__m(&system_allocator, p, q, 0);
}
const HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) {
return h_leftright__m(mm__, p, q, 0);
}
const HParser* h_right(const HParser* p, const HParser* q) {
return h_leftright(p, q, 1);
return h_leftright__m(&system_allocator, p, q, 1);
}
const HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) {
return h_leftright__m(mm__, p, q, 1);
}
const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) {
HIgnoreSeq *seq = g_new(HIgnoreSeq, 1);
seq->parsers = g_new(const HParser*, 3);
return h_middle__m(&system_allocator, p, x, q);
}
const HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) {
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
seq->parsers = h_new(const HParser*, 3);
seq->parsers[0] = p;
seq->parsers[1] = x;
seq->parsers[2] = q;
seq->count = 3;
seq->which = 1;
HParser *ret = g_new(HParser, 1);
HParser *ret = h_new(HParser, 1);
ret->vtable = &ignoreseq_vt;
ret->env = (void*)seq;
return ret;

View file

@ -13,7 +13,10 @@ void h_bind_indirect(HParser* indirect, const HParser* inner) {
}
HParser* h_indirect() {
HParser *res = g_new(HParser, 1);
return h_indirect__m(&system_allocator);
}
HParser* h_indirect__m(HAllocator* mm__) {
HParser *res = h_new(HParser, 1);
res->vtable = &indirect_vt;
res->env = NULL;
return res;

View file

@ -33,6 +33,9 @@ static const HParserVtable int_range_vt = {
};
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
return h_int_range__m(&system_allocator, p, lower, upper);
}
const HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) {
// p must be an integer parser, which means it's using parse_bits
// TODO: re-add this check
//assert_message(p->vtable == &bits_vt, "int_range requires an integer parser");
@ -40,11 +43,11 @@ const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t
// and regardless, the bounds need to fit in the parser in question
// TODO: check this as well.
HRange *r_env = g_new(HRange, 1);
HRange *r_env = h_new(HRange, 1);
r_env->p = p;
r_env->lower = lower;
r_env->upper = upper;
HParser *ret = g_new(HParser, 1);
HParser *ret = h_new(HParser, 1);
ret->vtable = &int_range_vt;
ret->env = (void*)r_env;
return ret;

View file

@ -49,10 +49,13 @@ static const HParserVtable many_vt = {
};
const HParser* h_many(const HParser* p) {
HParser *res = g_new(HParser, 1);
HRepeat *env = g_new(HRepeat, 1);
return h_many__m(&system_allocator, p);
}
const HParser* h_many__m(HAllocator* mm__, const HParser* p) {
HParser *res = h_new(HParser, 1);
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = h_epsilon_p();
env->sep = h_epsilon_p__m(mm__);
env->count = 0;
env->min_p = true;
res->vtable = &many_vt;
@ -61,10 +64,13 @@ const HParser* h_many(const HParser* p) {
}
const HParser* h_many1(const HParser* p) {
HParser *res = g_new(HParser, 1);
HRepeat *env = g_new(HRepeat, 1);
return h_many1__m(&system_allocator, p);
}
const HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
HParser *res = h_new(HParser, 1);
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = h_epsilon_p();
env->sep = h_epsilon_p__m(mm__);
env->count = 1;
env->min_p = true;
res->vtable = &many_vt;
@ -73,10 +79,13 @@ const HParser* h_many1(const HParser* p) {
}
const HParser* h_repeat_n(const HParser* p, const size_t n) {
HParser *res = g_new(HParser, 1);
HRepeat *env = g_new(HRepeat, 1);
return h_repeat_n__m(&system_allocator, p, n);
}
const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
HParser *res = h_new(HParser, 1);
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = h_epsilon_p();
env->sep = h_epsilon_p__m(mm__);
env->count = n;
env->min_p = false;
res->vtable = &many_vt;
@ -85,8 +94,11 @@ const HParser* h_repeat_n(const HParser* p, const size_t n) {
}
const HParser* h_sepBy(const HParser* p, const HParser* sep) {
HParser *res = g_new(HParser, 1);
HRepeat *env = g_new(HRepeat, 1);
return h_sepBy__m(&system_allocator, p, sep);
}
const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HParser *res = h_new(HParser, 1);
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = sep;
env->count = 0;
@ -97,8 +109,11 @@ const HParser* h_sepBy(const HParser* p, const HParser* sep) {
}
const HParser* h_sepBy1(const HParser* p, const HParser* sep) {
HParser *res = g_new(HParser, 1);
HRepeat *env = g_new(HRepeat, 1);
return h_sepBy1__m(&system_allocator, p, sep);
}
const HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HParser *res = h_new(HParser, 1);
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = sep;
env->count = 1;
@ -135,9 +150,12 @@ static const HParserVtable length_value_vt = {
};
const HParser* h_length_value(const HParser* length, const HParser* value) {
HParser *res = g_new(HParser, 1);
return h_length_value__m(&system_allocator, length, value);
}
const HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
HParser *res = h_new(HParser, 1);
res->vtable = &length_value_vt;
HLenVal *env = g_new(HLenVal, 1);
HLenVal *env = h_new(HLenVal, 1);
env->length = length;
env->value = value;
res->env = (void*)env;

View file

@ -15,7 +15,10 @@ static const HParserVtable not_vt = {
};
const HParser* h_not(const HParser* p) {
HParser *res = g_new(HParser, 1);
return h_not__m(&system_allocator, p);
}
const HParser* h_not__m(HAllocator* mm__, const HParser* p) {
HParser *res = h_new(HParser, 1);
res->vtable = &not_vt;
res->env = (void*)p;
return res;

View file

@ -10,8 +10,11 @@ static const HParserVtable nothing_vt = {
.parse = parse_nothing,
};
const HParser* h_nothing_p() {
HParser *ret = g_new(HParser, 1);
const HParser* h_nothing_p() {
return h_nothing_p__m(&system_allocator);
}
const HParser* h_nothing_p__m(HAllocator* mm__) {
HParser *ret = h_new(HParser, 1);
ret->vtable = &nothing_vt; ret->env = NULL;
return (const HParser*)ret;
}

View file

@ -16,9 +16,12 @@ static const HParserVtable optional_vt = {
};
const HParser* h_optional(const HParser* p) {
return h_optional__m(&system_allocator, p);
}
const HParser* h_optional__m(HAllocator* mm__, const HParser* p) {
// TODO: re-add this
//assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round.");
HParser *ret = g_new(HParser, 1);
HParser *ret = h_new(HParser, 1);
ret->vtable = &optional_vt;
ret->env = (void*)p;
return ret;

View file

@ -1,3 +1,4 @@
#include <stdarg.h>
#include "parser_internal.h"
typedef struct {
@ -27,20 +28,40 @@ static const HParserVtable sequence_vt = {
.parse = parse_sequence,
};
const HParser* h_sequence(const HParser *p, ...) {
const HParser* h_sequence(const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_sequence__mv(&system_allocator, p, ap);
va_end(ap);
return ret;
}
const HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_sequence__mv(mm__, p, ap);
va_end(ap);
return ret;
}
const HParser* h_sequence__v(const HParser* p, va_list ap) {
return h_sequence__mv(&system_allocator, p, ap);
}
const HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) {
va_list ap;
size_t len = 0;
const HParser *arg;
va_start(ap, p);
va_copy(ap, ap_);
do {
len++;
arg = va_arg(ap, const HParser *);
} while (arg);
va_end(ap);
HSequence *s = g_new(HSequence, 1);
s->p_array = g_new(const HParser *, len);
HSequence *s = h_new(HSequence, 1);
s->p_array = h_new(const HParser *, len);
va_start(ap, p);
va_copy(ap, ap_);
s->p_array[0] = p;
for (size_t i = 1; i < len; i++) {
s->p_array[i] = va_arg(ap, const HParser *);
@ -48,7 +69,7 @@ const HParser* h_sequence(const HParser *p, ...) {
va_end(ap);
s->len = len;
HParser *ret = g_new(HParser, 1);
HParser *ret = h_new(HParser, 1);
ret->vtable = &sequence_vt; ret->env = (void*)s;
return ret;
}

View file

@ -20,14 +20,17 @@ static HParseResult* parse_token(void *env, HParseState *state) {
return make_result(state, tok);
}
const const HParserVtable token_vt = {
const HParserVtable token_vt = {
.parse = parse_token,
};
const HParser* h_token(const uint8_t *str, const size_t len) {
HToken *t = g_new(HToken, 1);
const HParser* h_token(const uint8_t *str, const size_t len) {
return h_token__m(&system_allocator, str, len);
}
const HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) {
HToken *t = h_new(HToken, 1);
t->str = (uint8_t*)str, t->len = len;
HParser *ret = g_new(HParser, 1);
HParser *ret = h_new(HParser, 1);
ret->vtable = &token_vt;
ret->env = t;
return (const HParser*)ret;

View file

@ -24,3 +24,6 @@ static HParser unimplemented = {
const HParser* h_unimplemented() {
return &unimplemented;
}
const HParser* h_unimplemented__m(HAllocator* mm__) {
return &unimplemented;
}

View file

@ -8,7 +8,7 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) {
bak = state->input_stream;
c = h_read_bits(&state->input_stream, 8, false);
if (state->input_stream.overrun)
return NULL;
break;
} while (isspace(c));
state->input_stream = bak;
return h_do_parse((HParser*)env, state);
@ -19,7 +19,10 @@ static const HParserVtable whitespace_vt = {
};
const HParser* h_whitespace(const HParser* p) {
HParser *ret = g_new(HParser, 1);
return h_whitespace__m(&system_allocator, p);
}
const HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) {
HParser *ret = h_new(HParser, 1);
ret->vtable = &whitespace_vt;
ret->env = (void*)p;
return ret;

View file

@ -35,10 +35,13 @@ static const HParserVtable xor_vt = {
.parse = parse_xor,
};
const HParser* h_xor(const HParser* p1, const HParser* p2) {
HTwoParsers *env = g_new(HTwoParsers, 1);
const HParser* h_xor(const HParser* p1, const HParser* p2) {
return h_xor__m(&system_allocator, p1, p2);
}
const HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1; env->p2 = p2;
HParser *ret = g_new(HParser, 1);
HParser *ret = h_new(HParser, 1);
ret->vtable = &xor_vt; ret->env = (void*)env;
return ret;
}

View file

@ -17,10 +17,10 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <glib.h>
#include <string.h>
#include "hammer.h"
#include <malloc.h>
#include "internal.h"
#include <stdlib.h>
typedef struct pp_state {
int delta;
@ -69,20 +69,25 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
fprintf(stream, "%*sUSER\n", indent, "");
break;
default:
g_assert_not_reached();
if(tok->token_type > TT_USER) {
fprintf(stream, "%*sUSER %d\n", indent, "", tok->token_type-TT_USER);
} else {
assert_message(0, "Should not reach here.");
}
}
}
struct result_buf {
char* output;
HAllocator *mm__;
size_t len;
size_t capacity;
};
static inline void ensure_capacity(struct result_buf *buf, int amt) {
while (buf->len + amt >= buf->capacity)
buf->output = g_realloc(buf->output, buf->capacity *= 2);
buf->output = buf->mm__->realloc(buf->mm__, buf->output, buf->capacity *= 2);
}
static inline void append_buf(struct result_buf *buf, const char* input, int len) {
@ -149,15 +154,19 @@ static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) {
break;
default:
fprintf(stderr, "Unexpected token type %d\n", tok->token_type);
g_assert_not_reached();
assert_message(0, "Should not reach here.");
}
}
char* h_write_result_unamb(const HParsedToken* tok) {
return h_write_result_unamb__m(&system_allocator, tok);
}
char* h_write_result_unamb__m(HAllocator* mm__, const HParsedToken* tok) {
struct result_buf buf = {
.output = g_malloc0(16),
.output = mm__->alloc(mm__, 16),
.len = 0,
.mm__ = mm__,
.capacity = 16
};
unamb_sub(tok, &buf);

20
src/system_allocator.c Normal file
View file

@ -0,0 +1,20 @@
#include <stdlib.h>
#include "internal.h"
static void* system_alloc(HAllocator *allocator, size_t size) {
return malloc(size);
}
static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) {
return realloc(ptr, size);
}
static void system_free(HAllocator *allocator, void* ptr) {
free(ptr);
}
HAllocator system_allocator = {
.alloc = system_alloc,
.realloc = system_realloc,
.free = system_free,
};

22
src/t_benchmark.c Normal file
View file

@ -0,0 +1,22 @@
#include <glib.h>
#include "hammer.h"
#include "test_suite.h"
HParserTestcase testcases[] = {
{(unsigned char*)"1,2,3", 5, "(u0x31 u0x32 u0x33)"},
{(unsigned char*)"1,3,2", 5, "(u0x31 u0x33 u0x32)"},
{(unsigned char*)"1,3", 3, "(u0x31 u0x33)"},
{(unsigned char*)"3", 1, "(u0x33)"},
{ NULL, 0, NULL }
};
static void test_benchmark_1() {
const HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
HBenchmarkResults *res = h_benchmark(parser, testcases);
h_benchmark_report(stderr, res);
}
void register_benchmark_tests(void) {
g_test_add_func("/core/benchmark/1", test_benchmark_1);
}

67
src/t_bitreader.c Normal file
View file

@ -0,0 +1,67 @@
#include <glib.h>
#include "hammer.h"
#include "internal.h"
#include "test_suite.h"
#define MK_INPUT_STREAM(buf,len,endianness_) \
{ \
.input = (uint8_t*)buf, \
.length = len, \
.index = 0, \
.bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \
.endianness = endianness_ \
}
static void test_bitreader_ints(void) {
HInputStream is = MK_INPUT_STREAM("\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 8, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
g_check_cmplong(h_read_bits(&is, 64, true), ==, -0x200000000);
}
static void test_bitreader_be(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x03);
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x52);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
}
static void test_bitreader_le(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x02);
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x4D);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
}
static void test_largebits_be(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x352);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
}
static void test_largebits_le(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x26A);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
}
static void test_offset_largebits_be(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xD);
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x25A);
}
static void test_offset_largebits_le(void) {
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xA);
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x2D3);
}
void register_bitreader_tests(void) {
g_test_add_func("/core/bitreader/be", test_bitreader_be);
g_test_add_func("/core/bitreader/le", test_bitreader_le);
g_test_add_func("/core/bitreader/largebits-be", test_largebits_be);
g_test_add_func("/core/bitreader/largebits-le", test_largebits_le);
g_test_add_func("/core/bitreader/offset-largebits-be", test_offset_largebits_be);
g_test_add_func("/core/bitreader/offset-largebits-le", test_offset_largebits_le);
g_test_add_func("/core/bitreader/ints", test_bitreader_ints);
}

108
src/t_bitwriter.c Normal file
View file

@ -0,0 +1,108 @@
#include <glib.h>
#include "hammer.h"
#include "internal.h"
#include "test_suite.h"
typedef struct {
unsigned long long data;
size_t nbits;
} bitwriter_test_elem; // should end with {0,0}
void run_bitwriter_test(bitwriter_test_elem data[], char flags) {
size_t len;
const uint8_t *buf;
HBitWriter *w = h_bit_writer_new(&system_allocator);
int i;
w->flags = flags;
for (i = 0; data[i].nbits; i++) {
h_bit_writer_put(w, data[i].data, data[i].nbits);
}
buf = h_bit_writer_get_buffer(w, &len);
HInputStream input = {
.input = buf,
.index = 0,
.length = len,
.bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0,
.endianness = flags,
.overrun = 0
};
for (i = 0; data[i].nbits; i++) {
g_check_cmpulonglong ((unsigned long long)h_read_bits(&input, data[i].nbits, FALSE), ==, data[i].data);
}
}
static void test_bitwriter_ints(void) {
bitwriter_test_elem data[] = {
{ -0x200000000, 64 },
{ 0,0 }
};
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
}
static void test_bitwriter_be(void) {
bitwriter_test_elem data[] = {
{ 0x03, 3 },
{ 0x52, 8 },
{ 0x1A, 5 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
}
static void test_bitwriter_le(void) {
bitwriter_test_elem data[] = {
{ 0x02, 3 },
{ 0x4D, 8 },
{ 0x0B, 5 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
}
static void test_largebits_be(void) {
bitwriter_test_elem data[] = {
{ 0x352, 11 },
{ 0x1A, 5 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
}
static void test_largebits_le(void) {
bitwriter_test_elem data[] = {
{ 0x26A, 11 },
{ 0x0B, 5 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
}
static void test_offset_largebits_be(void) {
bitwriter_test_elem data[] = {
{ 0xD, 5 },
{ 0x25A, 11 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
}
static void test_offset_largebits_le(void) {
bitwriter_test_elem data[] = {
{ 0xA, 5 },
{ 0x2D3, 11 },
{ 0, 0 }
};
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
}
void register_bitwriter_tests(void) {
g_test_add_func("/core/bitwriter/be", test_bitwriter_be);
g_test_add_func("/core/bitwriter/le", test_bitwriter_le);
g_test_add_func("/core/bitwriter/largebits-be", test_largebits_be);
g_test_add_func("/core/bitwriter/largebits-le", test_largebits_le);
g_test_add_func("/core/bitwriter/offset-largebits-be", test_offset_largebits_be);
g_test_add_func("/core/bitwriter/offset-largebits-le", test_offset_largebits_le);
g_test_add_func("/core/bitwriter/ints", test_bitwriter_ints);
}

16
src/t_misc.c Normal file
View file

@ -0,0 +1,16 @@
#include <glib.h>
#include "test_suite.h"
#include "hammer.h"
static void test_tt_user(void) {
g_check_cmpint(TT_USER, >, TT_NONE);
g_check_cmpint(TT_USER, >, TT_BYTES);
g_check_cmpint(TT_USER, >, TT_SINT);
g_check_cmpint(TT_USER, >, TT_UINT);
g_check_cmpint(TT_USER, >, TT_SEQUENCE);
g_check_cmpint(TT_USER, >, TT_ERR);
}
void register_misc_tests(void) {
g_test_add_func("/core/misc/tt_user", test_tt_user);
}

421
src/t_parser.c Normal file
View file

@ -0,0 +1,421 @@
#include <glib.h>
#include <string.h>
#include "hammer.h"
#include "internal.h"
#include "test_suite.h"
#include "parsers/parser_internal.h"
static void test_token(void) {
const HParser *token_ = h_token((const uint8_t*)"95\xa2", 3);
g_check_parse_ok(token_, "95\xa2", 3, "<39.35.a2>");
g_check_parse_failed(token_, "95", 2);
}
static void test_ch(void) {
const HParser *ch_ = h_ch(0xa2);
g_check_parse_ok(ch_, "\xa2", 1, "u0xa2");
g_check_parse_failed(ch_, "\xa3", 1);
}
static void test_ch_range(void) {
const HParser *range_ = h_ch_range('a', 'c');
g_check_parse_ok(range_, "b", 1, "u0x62");
g_check_parse_failed(range_, "d", 1);
}
//@MARK_START
static void test_int64(void) {
const HParser *int64_ = h_int64();
g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s-0x200000000");
g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7);
}
static void test_int32(void) {
const HParser *int32_ = h_int32();
g_check_parse_ok(int32_, "\xff\xfe\x00\x00", 4, "s-0x20000");
g_check_parse_failed(int32_, "\xff\xfe\x00", 3);
}
static void test_int16(void) {
const HParser *int16_ = h_int16();
g_check_parse_ok(int16_, "\xfe\x00", 2, "s-0x200");
g_check_parse_failed(int16_, "\xfe", 1);
}
static void test_int8(void) {
const HParser *int8_ = h_int8();
g_check_parse_ok(int8_, "\x88", 1, "s-0x78");
g_check_parse_failed(int8_, "", 0);
}
static void test_uint64(void) {
const HParser *uint64_ = h_uint64();
g_check_parse_ok(uint64_, "\x00\x00\x00\x02\x00\x00\x00\x00", 8, "u0x200000000");
g_check_parse_failed(uint64_, "\x00\x00\x00\x02\x00\x00\x00", 7);
}
static void test_uint32(void) {
const HParser *uint32_ = h_uint32();
g_check_parse_ok(uint32_, "\x00\x02\x00\x00", 4, "u0x20000");
g_check_parse_failed(uint32_, "\x00\x02\x00", 3);
}
static void test_uint16(void) {
const HParser *uint16_ = h_uint16();
g_check_parse_ok(uint16_, "\x02\x00", 2, "u0x200");
g_check_parse_failed(uint16_, "\x02", 1);
}
static void test_uint8(void) {
const HParser *uint8_ = h_uint8();
g_check_parse_ok(uint8_, "\x78", 1, "u0x78");
g_check_parse_failed(uint8_, "", 0);
}
//@MARK_END
static void test_int_range(void) {
const HParser *int_range_ = h_int_range(h_uint8(), 3, 10);
g_check_parse_ok(int_range_, "\x05", 1, "u0x5");
g_check_parse_failed(int_range_, "\xb", 1);
}
#if 0
static void test_float64(void) {
const HParser *float64_ = h_float64();
g_check_parse_ok(float64_, "\x3f\xf0\x00\x00\x00\x00\x00\x00", 8, 1.0);
g_check_parse_failed(float64_, "\x3f\xf0\x00\x00\x00\x00\x00", 7);
}
static void test_float32(void) {
const HParser *float32_ = h_float32();
g_check_parse_ok(float32_, "\x3f\x80\x00\x00", 4, 1.0);
g_check_parse_failed(float32_, "\x3f\x80\x00");
}
#endif
static void test_whitespace(void) {
const HParser *whitespace_ = h_whitespace(h_ch('a'));
const HParser *whitespace_end = h_whitespace(h_end_p());
g_check_parse_ok(whitespace_, "a", 1, "u0x61");
g_check_parse_ok(whitespace_, " a", 2, "u0x61");
g_check_parse_ok(whitespace_, " a", 3, "u0x61");
g_check_parse_ok(whitespace_, "\ta", 2, "u0x61");
g_check_parse_failed(whitespace_, "_a", 2);
g_check_parse_ok(whitespace_end, "", 0, "NULL");
g_check_parse_ok(whitespace_end, " ", 2, "NULL");
g_check_parse_failed(whitespace_end, " x", 3);
}
static void test_left(void) {
const HParser *left_ = h_left(h_ch('a'), h_ch(' '));
g_check_parse_ok(left_, "a ", 2, "u0x61");
g_check_parse_failed(left_, "a", 1);
g_check_parse_failed(left_, " ", 1);
g_check_parse_failed(left_, "ab", 2);
}
static void test_right(void) {
const HParser *right_ = h_right(h_ch(' '), h_ch('a'));
g_check_parse_ok(right_, " a", 2, "u0x61");
g_check_parse_failed(right_, "a", 1);
g_check_parse_failed(right_, " ", 1);
g_check_parse_failed(right_, "ba", 2);
}
static void test_middle(void) {
const HParser *middle_ = h_middle(h_ch(' '), h_ch('a'), h_ch(' '));
g_check_parse_ok(middle_, " a ", 3, "u0x61");
g_check_parse_failed(middle_, "a", 1);
g_check_parse_failed(middle_, " ", 1);
g_check_parse_failed(middle_, " a", 2);
g_check_parse_failed(middle_, "a ", 2);
g_check_parse_failed(middle_, " b ", 3);
g_check_parse_failed(middle_, "ba ", 3);
g_check_parse_failed(middle_, " ab", 3);
}
#include <ctype.h>
const HParsedToken* upcase(const HParseResult *p) {
switch(p->ast->token_type) {
case TT_SEQUENCE:
{
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used);
ret->token_type = TT_SEQUENCE;
for (size_t i=0; i<p->ast->seq->used; ++i) {
if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) {
HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1);
tmp->token_type = TT_UINT;
tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint);
h_carray_append(seq, tmp);
} else {
h_carray_append(seq, p->ast->seq->elements[i]);
}
}
ret->seq = seq;
return (const HParsedToken*)ret;
}
case TT_UINT:
{
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
ret->token_type = TT_UINT;
ret->uint = toupper(p->ast->uint);
return (const HParsedToken*)ret;
}
default:
return p->ast;
}
}
static void test_action(void) {
const HParser *action_ = h_action(h_sequence(h_choice(h_ch('a'),
h_ch('A'),
NULL),
h_choice(h_ch('b'),
h_ch('B'),
NULL),
NULL),
upcase);
g_check_parse_ok(action_, "ab", 2, "(u0x41 u0x42)");
g_check_parse_ok(action_, "AB", 2, "(u0x41 u0x42)");
g_check_parse_failed(action_, "XX", 2);
}
static void test_in(void) {
uint8_t options[3] = { 'a', 'b', 'c' };
const HParser *in_ = h_in(options, 3);
g_check_parse_ok(in_, "b", 1, "u0x62");
g_check_parse_failed(in_, "d", 1);
}
static void test_not_in(void) {
uint8_t options[3] = { 'a', 'b', 'c' };
const HParser *not_in_ = h_not_in(options, 3);
g_check_parse_ok(not_in_, "d", 1, "u0x64");
g_check_parse_failed(not_in_, "a", 1);
}
static void test_end_p(void) {
const HParser *end_p_ = h_sequence(h_ch('a'), h_end_p(), NULL);
g_check_parse_ok(end_p_, "a", 1, "(u0x61)");
g_check_parse_failed(end_p_, "aa", 2);
}
static void test_nothing_p(void) {
const HParser *nothing_p_ = h_nothing_p();
g_check_parse_failed(nothing_p_, "a", 1);
}
static void test_sequence(void) {
const HParser *sequence_1 = h_sequence(h_ch('a'), h_ch('b'), NULL);
const HParser *sequence_2 = h_sequence(h_ch('a'), h_whitespace(h_ch('b')), NULL);
g_check_parse_ok(sequence_1, "ab", 2, "(u0x61 u0x62)");
g_check_parse_failed(sequence_1, "a", 1);
g_check_parse_failed(sequence_1, "b", 1);
g_check_parse_ok(sequence_2, "ab", 2, "(u0x61 u0x62)");
g_check_parse_ok(sequence_2, "a b", 3, "(u0x61 u0x62)");
g_check_parse_ok(sequence_2, "a b", 4, "(u0x61 u0x62)");
}
static void test_choice(void) {
const HParser *choice_ = h_choice(h_ch('a'), h_ch('b'), NULL);
g_check_parse_ok(choice_, "a", 1, "u0x61");
g_check_parse_ok(choice_, "b", 1, "u0x62");
g_check_parse_failed(choice_, "c", 1);
}
static void test_butnot(void) {
const HParser *butnot_1 = h_butnot(h_ch('a'), h_token((const uint8_t*)"ab", 2));
const HParser *butnot_2 = h_butnot(h_ch_range('0', '9'), h_ch('6'));
g_check_parse_ok(butnot_1, "a", 1, "u0x61");
g_check_parse_failed(butnot_1, "ab", 2);
g_check_parse_ok(butnot_1, "aa", 2, "u0x61");
g_check_parse_failed(butnot_2, "6", 1);
}
static void test_difference(void) {
const HParser *difference_ = h_difference(h_token((const uint8_t*)"ab", 2), h_ch('a'));
g_check_parse_ok(difference_, "ab", 2, "<61.62>");
g_check_parse_failed(difference_, "a", 1);
}
static void test_xor(void) {
const HParser *xor_ = h_xor(h_ch_range('0', '6'), h_ch_range('5', '9'));
g_check_parse_ok(xor_, "0", 1, "u0x30");
g_check_parse_ok(xor_, "9", 1, "u0x39");
g_check_parse_failed(xor_, "5", 1);
g_check_parse_failed(xor_, "a", 1);
}
static void test_many(void) {
const HParser *many_ = h_many(h_choice(h_ch('a'), h_ch('b'), NULL));
g_check_parse_ok(many_, "adef", 4, "(u0x61)");
g_check_parse_ok(many_, "bdef", 4, "(u0x62)");
g_check_parse_ok(many_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
g_check_parse_ok(many_, "daabbabadef", 11, "()");
}
static void test_many1(void) {
const HParser *many1_ = h_many1(h_choice(h_ch('a'), h_ch('b'), NULL));
g_check_parse_ok(many1_, "adef", 4, "(u0x61)");
g_check_parse_ok(many1_, "bdef", 4, "(u0x62)");
g_check_parse_ok(many1_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
g_check_parse_failed(many1_, "daabbabadef", 11);
}
static void test_repeat_n(void) {
const HParser *repeat_n_ = h_repeat_n(h_choice(h_ch('a'), h_ch('b'), NULL), 2);
g_check_parse_failed(repeat_n_, "adef", 4);
g_check_parse_ok(repeat_n_, "abdef", 5, "(u0x61 u0x62)");
g_check_parse_failed(repeat_n_, "dabdef", 6);
}
static void test_optional(void) {
const HParser *optional_ = h_sequence(h_ch('a'), h_optional(h_choice(h_ch('b'), h_ch('c'), NULL)), h_ch('d'), NULL);
g_check_parse_ok(optional_, "abd", 3, "(u0x61 u0x62 u0x64)");
g_check_parse_ok(optional_, "acd", 3, "(u0x61 u0x63 u0x64)");
g_check_parse_ok(optional_, "ad", 2, "(u0x61 null u0x64)");
g_check_parse_failed(optional_, "aed", 3);
g_check_parse_failed(optional_, "ab", 2);
g_check_parse_failed(optional_, "ac", 2);
}
static void test_ignore(void) {
const HParser *ignore_ = h_sequence(h_ch('a'), h_ignore(h_ch('b')), h_ch('c'), NULL);
g_check_parse_ok(ignore_, "abc", 3, "(u0x61 u0x63)");
g_check_parse_failed(ignore_, "ac", 2);
}
static void test_sepBy1(void) {
const HParser *sepBy1_ = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
g_check_parse_ok(sepBy1_, "1,2,3", 5, "(u0x31 u0x32 u0x33)");
g_check_parse_ok(sepBy1_, "1,3,2", 5, "(u0x31 u0x33 u0x32)");
g_check_parse_ok(sepBy1_, "1,3", 3, "(u0x31 u0x33)");
g_check_parse_ok(sepBy1_, "3", 1, "(u0x33)");
}
static void test_epsilon_p(void) {
const HParser *epsilon_p_1 = h_sequence(h_ch('a'), h_epsilon_p(), h_ch('b'), NULL);
const HParser *epsilon_p_2 = h_sequence(h_epsilon_p(), h_ch('a'), NULL);
const HParser *epsilon_p_3 = h_sequence(h_ch('a'), h_epsilon_p(), NULL);
g_check_parse_ok(epsilon_p_1, "ab", 2, "(u0x61 u0x62)");
g_check_parse_ok(epsilon_p_2, "a", 1, "(u0x61)");
g_check_parse_ok(epsilon_p_3, "a", 1, "(u0x61)");
}
static void test_attr_bool(void) {
}
static void test_and(void) {
const HParser *and_1 = h_sequence(h_and(h_ch('0')), h_ch('0'), NULL);
const HParser *and_2 = h_sequence(h_and(h_ch('0')), h_ch('1'), NULL);
const HParser *and_3 = h_sequence(h_ch('1'), h_and(h_ch('2')), NULL);
g_check_parse_ok(and_1, "0", 1, "(u0x30)");
g_check_parse_failed(and_2, "0", 1);
g_check_parse_ok(and_3, "12", 2, "(u0x31)");
}
static void test_not(void) {
const HParser *not_1 = h_sequence(h_ch('a'), h_choice(h_ch('+'), h_token((const uint8_t*)"++", 2), NULL), h_ch('b'), NULL);
const HParser *not_2 = h_sequence(h_ch('a'),
h_choice(h_sequence(h_ch('+'), h_not(h_ch('+')), NULL),
h_token((const uint8_t*)"++", 2),
NULL), h_ch('b'), NULL);
g_check_parse_ok(not_1, "a+b", 3, "(u0x61 u0x2b u0x62)");
g_check_parse_failed(not_1, "a++b", 4);
g_check_parse_ok(not_2, "a+b", 3, "(u0x61 (u0x2b) u0x62)");
g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)");
}
static void test_leftrec(void) {
const HParser *a_ = h_ch('a');
HParser *lr_ = h_indirect();
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL));
g_check_parse_ok(lr_, "a", 1, "u0x61");
g_check_parse_ok(lr_, "aa", 2, "(u0x61 u0x61)");
g_check_parse_ok(lr_, "aaa", 3, "((u0x61 u0x61) u0x61)");
}
void register_parser_tests(void) {
g_test_add_func("/core/parser/token", test_token);
g_test_add_func("/core/parser/ch", test_ch);
g_test_add_func("/core/parser/ch_range", test_ch_range);
g_test_add_func("/core/parser/int64", test_int64);
g_test_add_func("/core/parser/int32", test_int32);
g_test_add_func("/core/parser/int16", test_int16);
g_test_add_func("/core/parser/int8", test_int8);
g_test_add_func("/core/parser/uint64", test_uint64);
g_test_add_func("/core/parser/uint32", test_uint32);
g_test_add_func("/core/parser/uint16", test_uint16);
g_test_add_func("/core/parser/uint8", test_uint8);
g_test_add_func("/core/parser/int_range", test_int_range);
#if 0
g_test_add_func("/core/parser/float64", test_float64);
g_test_add_func("/core/parser/float32", test_float32);
#endif
g_test_add_func("/core/parser/whitespace", test_whitespace);
g_test_add_func("/core/parser/left", test_left);
g_test_add_func("/core/parser/right", test_right);
g_test_add_func("/core/parser/middle", test_middle);
g_test_add_func("/core/parser/action", test_action);
g_test_add_func("/core/parser/in", test_in);
g_test_add_func("/core/parser/not_in", test_not_in);
g_test_add_func("/core/parser/end_p", test_end_p);
g_test_add_func("/core/parser/nothing_p", test_nothing_p);
g_test_add_func("/core/parser/sequence", test_sequence);
g_test_add_func("/core/parser/choice", test_choice);
g_test_add_func("/core/parser/butnot", test_butnot);
g_test_add_func("/core/parser/difference", test_difference);
g_test_add_func("/core/parser/xor", test_xor);
g_test_add_func("/core/parser/many", test_many);
g_test_add_func("/core/parser/many1", test_many1);
g_test_add_func("/core/parser/repeat_n", test_repeat_n);
g_test_add_func("/core/parser/optional", test_optional);
g_test_add_func("/core/parser/sepBy1", test_sepBy1);
g_test_add_func("/core/parser/epsilon_p", test_epsilon_p);
g_test_add_func("/core/parser/attr_bool", test_attr_bool);
g_test_add_func("/core/parser/and", test_and);
g_test_add_func("/core/parser/not", test_not);
g_test_add_func("/core/parser/ignore", test_ignore);
g_test_add_func("/core/parser/leftrec", test_leftrec);
}

View file

@ -15,12 +15,15 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <glib.h>
#include "hammer.h"
#include "test_suite.h"
extern void register_bitreader_tests();
extern void register_bitwriter_tests();
extern void register_parser_tests();
extern void register_misc_tests();
extern void register_benchmark_tests();
int main(int argc, char** argv) {
g_test_init(&argc, &argv, NULL);
@ -29,6 +32,8 @@ int main(int argc, char** argv) {
register_bitreader_tests();
register_bitwriter_tests();
register_parser_tests();
register_misc_tests();
register_benchmark_tests();
g_test_run();
}

View file

@ -17,7 +17,7 @@
#ifndef HAMMER_TEST_SUITE__H
#define HAMMER_TEST_SUITE__H
#include <malloc.h>
#include <stdlib.h>
// Equivalent to g_assert_*, but not using g_assert...
#define g_check_inttype(fmt, typ, n1, op, n2) do { \