This commit is contained in:
Meredith L. Patterson 2014-04-20 00:46:37 +02:00
commit d947c6aede
18 changed files with 201 additions and 66 deletions

View file

@ -10,9 +10,13 @@ vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packa
vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept))
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'perl', 'php', 'python', 'ruby']))
tools = ['default', 'scanreplace']
if 'dotnet' in ARGUMENTS.get('bindings', []):
tools.append('csharp/mono')
env = Environment(ENV = {'PATH' : os.environ['PATH']},
variables = vars,
tools=['default', 'scanreplace', 'csharp/mono'],
tools=tools,
toolpath=['tools'])
if not 'bindings' in env:
@ -47,6 +51,8 @@ env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attr
if env['PLATFORM'] == 'darwin':
env.Append(SHLINKFLAGS = '-install_name ' + env["libpath"] + '/${TARGET.file}')
elif os.uname()[0] == "OpenBSD":
pass
else:
env.MergeFlags("-lrt")

1
contrib/freebsd/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
work

35
contrib/freebsd/Makefile Normal file
View file

@ -0,0 +1,35 @@
# Created by thequux for Upstanding Hackers
# Copyright (c) 2014 Upstanding Hackers
#
# The contents of this file are available under the 2-clause BSD
# license.
PORTNAME = hammer
PORTVERSION = 0.9
CATEGORIES = devel
EXTRACT_SUFX = .tar.bz2
USES = scons
# MASTER_SITE
MAINTAINER = thequux@upstandinghackers.com
COMMENT = Parser combinators. In C.
LICENSE = GPLv2
GIT_REV = master
GIT_URL = git://github.com/UpstandingHackers/hammer.git
SCONS_ARGS = --variant=opt DESTDIR=${STAGEDIR} prefix=${LOCALBASE}
SCONS_TARGET = all
SCONS_INSTALL_TARGET = install
do-fetch:
${MKDIR} ${WRKDIR}
git clone -b ${GIT_REV} ${GIT_URL} ${WRKDIR}/${DISTNAME}
cd ${WRKDIR}; tar cvfy ${DISTDIR}/${DISTNAME}.tar.bz2 ${DISTNAME}
rm -rf ${WRKDIR}/${DISTNAME}
FETCH_DEPENDS += git:${PORTSDIR}/devel/git
# TODO: strip libhammer.so
.include <bsd.port.mk>

View file

@ -0,0 +1,4 @@
Hammer is a fast parser combinator library written in C with bindings
for many languages.
WWW: http://github.com/UpstandingHackers/hammer

14
contrib/freebsd/pkg-plist Normal file
View file

@ -0,0 +1,14 @@
include/hammer/internal.h
include/hammer/allocator.h
include/hammer/parsers/parser_internal.h
include/hammer/backends/regex.h
include/hammer/backends/contextfree.h
include/hammer/glue.h
include/hammer/hammer.h
lib/libhammer.so
lib/libhammer.a
lib/pkgconfig/libhammer.pc
@dirrm include/hammer/parsers
@dirrm include/hammer/backends
@dirrm include/hammer
@dirrmtry lib/pkgconfig

View file

@ -81,7 +81,6 @@ static void *combine_entries(HHashSet *workset, void *dst, const void *src)
// add the mappings of src to dst, marking conflicts and adding the conflicting
// values to workset.
// note: reuses parts of src to build dst!
static void stringmap_merge(HHashSet *workset, HStringMap *dst, HStringMap *src)
{
if(src->epsilon_branch) {
@ -118,13 +117,16 @@ static void stringmap_merge(HHashSet *workset, HStringMap *dst, HStringMap *src)
if(src_) {
HStringMap *dst_ = h_hashtable_get(dst->char_branches, (void *)c);
if(dst_)
if(dst_) {
stringmap_merge(workset, dst_, src_);
else
} else {
if(src_->arena != dst->arena)
src_ = h_stringmap_copy(dst->arena, src_);
h_hashtable_put(dst->char_branches, (void *)c, src_);
}
}
}
}
}
/* Generate entries for the productions of A in the given table row. */

View file

@ -3,10 +3,22 @@
#include "../internal.h"
#include "../parsers/parser_internal.h"
// short-hand for constructing HCachedResult's
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
HCachedResult *ret = a_new(HCachedResult, 1);
ret->result = result;
// short-hand for creating cache values (regular case)
static
HParserCacheValue * cached_result(HParseState *state, HParseResult *result) {
HParserCacheValue *ret = a_new(HParserCacheValue, 1);
ret->value_type = PC_RIGHT;
ret->right = result;
ret->input_stream = state->input_stream;
return ret;
}
// short-hand for caching parse results (left recursion case)
static
HParserCacheValue *cached_lr(HParseState *state, HLeftRec *lr) {
HParserCacheValue *ret = a_new(HParserCacheValue, 1);
ret->value_type = PC_LEFT;
ret->left = lr;
ret->input_stream = state->input_stream;
return ret;
}
@ -44,27 +56,28 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa
HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
HRecursionHead *head = h_hashtable_get(state->recursion_heads, k);
HRecursionHead *head = h_hashtable_get(state->recursion_heads, &k->input_pos);
if (!head) { // No heads found
return cached;
} else { // Some heads found
if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) {
// Nothing in the cache, and the key parser is not involved
HParseResult *tmp = a_new(HParseResult, 1);
tmp->ast = NULL; tmp->arena = state->arena;
HParserCacheValue *ret = a_new(HParserCacheValue, 1);
ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp);
return ret;
cached = cached_result(state, NULL);
cached->input_stream = k->input_pos;
}
if (h_slist_find(head->eval_set, k->parser)) {
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
head->eval_set = h_slist_remove_all(head->eval_set, k->parser);
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
// we know that cached has an entry here, modify it
if (!cached)
cached = a_new(HParserCacheValue, 1);
// update the cache
if (!cached) {
cached = cached_result(state, tmp_res);
h_hashtable_put(state->cache, k, cached);
} else {
cached->value_type = PC_RIGHT;
cached->right = cached_result(state, tmp_res);
cached->right = tmp_res;
cached->input_stream = state->input_stream;
}
}
return cached;
}
@ -83,51 +96,62 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
some->eval_set = NULL;
rec_detect->head = some;
}
//assert(state->lr_stack->head != NULL);
HSlistNode *head = state->lr_stack->head;
HLeftRec *lr;
while (head && (lr = head->elem)->rule != p) {
HSlistNode *it;
for(it=state->lr_stack->head; it; it=it->next) {
HLeftRec *lr = it->elem;
if(lr->rule == p)
break;
lr->head = rec_detect->head;
h_slist_push(lr->head->involved_set, (void*)lr->rule);
head = head->next;
}
}
// helper: true iff pos1 is less than pos2
static inline bool pos_lt(HInputStream pos1, HInputStream pos2) {
return ((pos1.index < pos2.index) ||
(pos1.index == pos2.index && pos1.bit_offset < pos2.bit_offset));
}
/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the
* future parse.
*/
HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) {
// Store the head into the recursion_heads
h_hashtable_put(state->recursion_heads, k, head);
h_hashtable_put(state->recursion_heads, &k->input_pos, head);
HParserCacheValue *old_cached = h_hashtable_get(state->cache, k);
if (!old_cached || PC_LEFT == old_cached->value_type)
errx(1, "impossible match");
HParseResult *old_res = old_cached->right->result;
HParseResult *old_res = old_cached->right;
// rewind the input
state->input_stream = k->input_pos;
// reset the eval_set of the head of the recursion at each beginning of growth
head->eval_set = h_slist_copy(head->involved_set);
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
if (tmp_res) {
if ((old_res->ast->index < tmp_res->ast->index) ||
(old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) {
HParserCacheValue *v = a_new(HParserCacheValue, 1);
v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res);
h_hashtable_put(state->cache, k, v);
if (pos_lt(old_cached->input_stream, state->input_stream)) {
h_hashtable_put(state->cache, k, cached_result(state, tmp_res));
return grow(k, state, head);
} else {
// we're done with growing, we can remove data from the recursion head
h_hashtable_del(state->recursion_heads, k);
h_hashtable_del(state->recursion_heads, &k->input_pos);
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
if (cached && PC_RIGHT == cached->value_type) {
return cached->right->result;
state->input_stream = cached->input_stream;
return cached->right;
} else {
errx(1, "impossible match");
}
}
} else {
h_hashtable_del(state->recursion_heads, k);
h_hashtable_del(state->recursion_heads, &k->input_pos);
state->input_stream = old_cached->input_stream;
return old_res;
}
}
@ -140,9 +164,7 @@ HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growab
}
else {
// update cache
HParserCacheValue *v = a_new(HParserCacheValue, 1);
v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed);
h_hashtable_put(state->cache, k, v);
h_hashtable_put(state->cache, k, cached_result(state, growable->seed));
if (!growable->seed)
return NULL;
else
@ -165,18 +187,18 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
base->seed = NULL; base->rule = parser; base->head = NULL;
h_slist_push(state->lr_stack, base);
// cache it
HParserCacheValue *dummy = a_new(HParserCacheValue, 1);
dummy->value_type = PC_LEFT; dummy->left = base;
h_hashtable_put(state->cache, key, dummy);
h_hashtable_put(state->cache, key, cached_lr(state, base));
// parse the input
HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
// the base variable has passed equality tests with the cache
h_slist_pop(state->lr_stack);
// update the cached value to our new position
HParserCacheValue *cached = h_hashtable_get(state->cache, key);
assert(cached != NULL);
cached->input_stream = state->input_stream;
// setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
if (NULL == base->head) {
HParserCacheValue *right = a_new(HParserCacheValue, 1);
right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res);
h_hashtable_put(state->cache, key, right);
h_hashtable_put(state->cache, key, cached_result(state, tmp_res));
return tmp_res;
} else {
base->seed = tmp_res;
@ -185,12 +207,12 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
}
} else {
// it exists!
state->input_stream = m->input_stream;
if (PC_LEFT == m->value_type) {
setupLR(parser, state, m->left);
return m->left->seed; // BUG: this might not be correct
return m->left->seed;
} else {
state->input_stream = m->right->input_stream;
return m->right->result;
return m->right;
}
}
}
@ -212,6 +234,14 @@ static bool cache_key_equal(const void* key1, const void* key2) {
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
}
static uint32_t pos_hash(const void* key) {
return h_djbhash(key, sizeof(HInputStream));
}
static bool pos_equal(const void* key1, const void* key2) {
return memcmp(key1, key2, sizeof(HInputStream)) == 0;
}
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
HArena * arena = h_new_arena(mm__, 0);
HParseState *parse_state = a_new_(arena, HParseState, 1);
@ -219,8 +249,7 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr
cache_key_hash); // hash_func
parse_state->input_stream = *input_stream;
parse_state->lr_stack = h_slist_new(arena);
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
cache_key_hash);
parse_state->recursion_heads = h_hashtable_new(arena, pos_equal, pos_hash);
parse_state->arena = arena;
HParseResult *res = h_do_parse(parser, parse_state);
h_slist_free(parse_state->lr_stack);

View file

@ -10,7 +10,13 @@
#include <mach/mach.h>
#endif
#ifdef __NetBSD__
#include <sys/resource.h>
#endif
void h_benchmark_clock_gettime(struct timespec *ts) {
if (ts == NULL)
return;
#ifdef __MACH__ // OS X does not have clock_gettime, use clock_get_time
/*
* This returns real time, not CPU time. See http://stackoverflow.com/a/6725161
@ -23,6 +29,18 @@ void h_benchmark_clock_gettime(struct timespec *ts) {
mach_port_deallocate(mach_task_self(), cclock);
ts->tv_sec = mts.tv_sec;
ts->tv_nsec = mts.tv_nsec;
#elif defined(__NetBSD__)
// NetBSD doesn't have CLOCK_THREAD_CPUTIME_ID. We'll use getrusage instead
struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage);
ts->tv_nsec = (rusage.ru_utime.tv_usec + rusage.ru_stime.tv_usec) * 1000;
// not going to overflow; can be at most 2e9-2
ts->tv_sec = rusage.ru_utime.tv_sec + rusage.ru_utime.tv_sec;
if (ts->tv_nsec >= 1000000000) {
ts->tv_nsec -= 1000000000; // subtract a second
ts->tv_sec += 1; // add it back.
}
assert (ts->tv_nsec <= 1000000000);
#else
clock_gettime(CLOCK_THREAD_CPUTIME_ID, ts);
#endif

View file

@ -281,6 +281,13 @@ void h_stringmap_update(HStringMap *m, const HStringMap *n)
h_hashtable_merge(combine_stringmap, m->char_branches, n->char_branches);
}
HStringMap *h_stringmap_copy(HArena *a, const HStringMap *m)
{
HStringMap *res = h_stringmap_new(a);
h_stringmap_update(res, m);
return res;
}
/* Replace all occurances of old in m with new.
* If old is NULL, replace all values in m with new.
* If new is NULL, remove the respective values.
@ -641,7 +648,7 @@ void h_pprint_char(FILE *f, char c)
case '\n': fputs("\\n", f); break;
case '\r': fputs("\\r", f); break;
default:
if(isprint(c)) {
if(isprint((int)c)) {
fputc(c, f);
} else {
fprintf(f, "\\x%.2X", c);

View file

@ -40,6 +40,7 @@ typedef struct HStringMap_ {
} HStringMap;
HStringMap *h_stringmap_new(HArena *a);
HStringMap *h_stringmap_copy(HArena *a, const HStringMap *m);
void h_stringmap_put_end(HStringMap *m, void *v);
void h_stringmap_put_epsilon(HStringMap *m, void *v);
void h_stringmap_put_after(HStringMap *m, uint8_t c, HStringMap *ends);

View file

@ -255,21 +255,17 @@ typedef struct HLeftRec_ {
HRecursionHead *head;
} HLeftRec;
/* Result and remaining input, for rerunning from a cached position. */
typedef struct HCachedResult_ {
HParseResult *result;
HInputStream input_stream;
} HCachedResult;
/* Tagged union for values in the cache: either HLeftRec's (Left) or
* HParseResult's (Right).
* Includes the position (input_stream) to advance to after using this value.
*/
typedef struct HParserCacheValue_t {
HParserCacheValueType value_type;
union {
HLeftRec *left;
HCachedResult *right;
HParseResult *right;
};
HInputStream input_stream;
} HParserCacheValue;
// This file provides the logical inverse of bitreader.c

View file

@ -13,8 +13,8 @@ static HParseResult* parse_action(void *env, HParseState *state) {
HParseResult *tmp = h_do_parse(a->p, state);
//HParsedToken *tok = a->action(h_do_parse(a->p, state));
if(tmp) {
const HParsedToken *tok = a->action(tmp, a->user_data);
return make_result(state->arena, (HParsedToken*)tok);
HParsedToken *tok = (HParsedToken*)a->action(tmp, a->user_data);
return make_result(state->arena, tok);
} else
return NULL;
} else // either the parser's missing or the action's missing

View file

@ -25,5 +25,6 @@ HParser* h_and(const HParser* p) {
}
HParser* h_and__m(HAllocator* mm__, const HParser* p) {
// zero-width postive lookahead
return h_new_parser(mm__, &and_vt, (void *)p);
void* env = (void*)p;
return h_new_parser(mm__, &and_vt, env);
}

View file

@ -55,5 +55,6 @@ HParser* h_ignore(const HParser* p) {
return h_ignore__m(&system_allocator, p);
}
HParser* h_ignore__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &ignore_vt, (void *)p);
void* env = (void*)p;
return h_new_parser(mm__, &ignore_vt, env);
}

View file

@ -21,5 +21,6 @@ HParser* h_not(const HParser* p) {
return h_not__m(&system_allocator, p);
}
HParser* h_not__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &not_vt, (void *)p);
void* env = (void*)p;
return h_new_parser(mm__, &not_vt, env);
}

View file

@ -92,6 +92,7 @@ HParser* h_optional(const HParser* p) {
HParser* h_optional__m(HAllocator* mm__, const HParser* p) {
// TODO: re-add this
//assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round.");
return h_new_parser(mm__, &optional_vt, (void *)p);
void* env = (void*)p;
return h_new_parser(mm__, &optional_vt, env);
}

View file

@ -10,7 +10,7 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) {
c = h_read_bits(&state->input_stream, 8, false);
if (state->input_stream.overrun)
break;
} while (isspace(c));
} while (isspace((int)c));
state->input_stream = bak;
return h_do_parse((HParser*)env, state);
}
@ -81,5 +81,6 @@ HParser* h_whitespace(const HParser* p) {
return h_whitespace__m(&system_allocator, p);
}
HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &whitespace_vt, (void *)p);
void* env = (void*)p;
return h_new_parser(mm__, &whitespace_vt, env);
}

View file

@ -414,11 +414,24 @@ static void test_leftrec(gconstpointer backend) {
HParser *lr_ = h_indirect();
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), h_epsilon_p(), NULL));
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0, "NULL");
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "(u0x61)");
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "((u0x61) u0x61)");
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)");
}
static void test_leftrec_ne(gconstpointer backend) {
HParser *a_ = h_ch('a');
HParser *lr_ = h_indirect();
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL));
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61");
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)");
g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)");
g_check_parse_failed(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0);
}
static void test_rightrec(gconstpointer backend) {
HParser *a_ = h_ch('a');
@ -485,7 +498,9 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/packrat/and", GINT_TO_POINTER(PB_PACKRAT), test_and);
g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not);
g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore);
// XXX(pesco) it seems to me Warth's algorithm just doesn't work for this case
//g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec);
g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne);
g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec);
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
@ -599,6 +614,7 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/lalr/attr_bool", GINT_TO_POINTER(PB_LALR), test_attr_bool);
g_test_add_data_func("/core/parser/lalr/ignore", GINT_TO_POINTER(PB_LALR), test_ignore);
g_test_add_data_func("/core/parser/lalr/leftrec", GINT_TO_POINTER(PB_LALR), test_leftrec);
g_test_add_data_func("/core/parser/lalr/leftrec-ne", GINT_TO_POINTER(PB_LALR), test_leftrec_ne);
g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec);
g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token);
@ -637,6 +653,7 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/glr/attr_bool", GINT_TO_POINTER(PB_GLR), test_attr_bool);
g_test_add_data_func("/core/parser/glr/ignore", GINT_TO_POINTER(PB_GLR), test_ignore);
g_test_add_data_func("/core/parser/glr/leftrec", GINT_TO_POINTER(PB_GLR), test_leftrec);
g_test_add_data_func("/core/parser/glr/leftrec-ne", GINT_TO_POINTER(PB_GLR), test_leftrec_ne);
g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec);
g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous);
}