Merge pull request #101 from abiggerhammer/master

Make HHashTable resizeable and improve benchmarking output
This commit is contained in:
TQ Hirsch 2014-04-20 21:50:59 +02:00
commit 4f9efcd726
10 changed files with 296 additions and 148 deletions

View file

@ -1,6 +1,7 @@
# -*- python -*-
import os
import os.path
import platform
import sys
@ -44,7 +45,7 @@ env['backendsincpath'] = calcInstallPath("$prefix", "include", "hammer", "backen
env['pkgconfigpath'] = calcInstallPath("$prefix", "lib", "pkgconfig")
env.ScanReplace('libhammer.pc.in')
env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes")
env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable")
if env['PLATFORM'] == 'darwin':
env.Append(SHLINKFLAGS = '-install_name ' + env["libpath"] + '/${TARGET.file}')

View file

@ -9,6 +9,9 @@ static bool glr_step(HParseResult **result, HSlist *engines,
int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params)
{
if (!parser->vtable->isValidCF(parser->env)) {
return -1;
}
int result = h_lalr_compile(mm__, parser, params);
if(result == -1 && parser->backend_data) {

View file

@ -49,8 +49,9 @@ static inline HLRTransition *transition(HArena *arena,
static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
size_t x, HCFChoice *xAy)
{
if(xAy->type != HCF_CHOICE)
if (xAy->type != HCF_CHOICE) {
return;
}
// XXX CHARSET?
HArena *arena = eg->arena;
@ -89,7 +90,7 @@ static HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym)
*esym = *sym;
HHashSet *cs = h_hashtable_get(eg->corr, sym);
if(!cs) {
if (!cs) {
cs = h_hashset_new(arena, h_eq_symbol, h_hash_symbol);
h_hashtable_put(eg->corr, sym, cs);
}
@ -151,9 +152,9 @@ static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *acti
{
int ret = 0;
if(fs->epsilon_branch) {
if (fs->epsilon_branch) {
HLRAction *prev = tmap->epsilon_branch;
if(prev && prev != action) {
if (prev && prev != action) {
// conflict
tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action);
ret = -1;
@ -162,9 +163,9 @@ static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *acti
}
}
if(fs->end_branch) {
if (fs->end_branch) {
HLRAction *prev = tmap->end_branch;
if(prev && prev != action) {
if (prev && prev != action) {
// conflict
tmap->end_branch = h_lr_conflict(tmap->arena, prev, action);
ret = -1;
@ -176,13 +177,14 @@ static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *acti
H_FOREACH(fs->char_branches, void *key, HStringMap *fs_)
HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key);
if(!tmap_) {
if (!tmap_) {
tmap_ = h_stringmap_new(tmap->arena);
h_hashtable_put(tmap->char_branches, key, tmap_);
}
if(terminals_put(tmap_, fs_, action) < 0)
if (terminals_put(tmap_, fs_, action) < 0) {
ret = -1;
}
H_END_FOREACH
return ret;
@ -197,8 +199,9 @@ static bool match_production(HLREnhGrammar *eg, HCFChoice **p,
for(; *p && *rhs; p++, rhs++) {
HLRTransition *t = h_hashtable_get(eg->smap, *p);
assert(t != NULL);
if(!h_eq_symbol(t->symbol, *rhs))
if (!h_eq_symbol(t->symbol, *rhs)) {
return false;
}
state = t->to;
}
return (*p == *rhs // both NULL
@ -231,18 +234,21 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
// build LR(0) table
// if necessary, resolve conflicts "by conversion to SLR"
if (!parser->vtable->isValidCF(parser->env)) {
return -1;
}
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser));
if(g == NULL) // backend not suitable (language not context-free)
return -1;
HLRDFA *dfa = h_lr0_dfa(g);
if(dfa == NULL) { // this should normally not happen
if (dfa == NULL) { // this should normally not happen
h_cfgrammar_free(g);
return -1;
}
HLRTable *table = h_lr0_table(g, dfa);
if(table == NULL) { // this should normally not happen
if (table == NULL) { // this should normally not happen
h_cfgrammar_free(g);
return -1;
}
@ -282,10 +288,11 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
H_FOREACH_KEY(lhss, HCFChoice *lhs)
assert(lhs->type == HCF_CHOICE); // XXX could be CHARSET?
for(HCFSequence **p=lhs->seq; *p; p++) {
for(HCFSequence **p=lhs->seq; *p; p++) {
HCFChoice **rhs = (*p)->items;
if(!match_production(eg, rhs, item->rhs, state))
if(!match_production(eg, rhs, item->rhs, state)) {
continue;
}
// the left-hand symbol's follow set is this production's
// contribution to the lookahead
@ -297,11 +304,12 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
// for each lookahead symbol, put action into table cell
if(terminals_put(table->tmap[state], fs, action) < 0)
inadeq = true;
} H_END_FOREACH // enhanced production
} H_END_FOREACH // enhanced production
H_END_FOREACH // reducible item
if(inadeq)
if(inadeq) {
h_slist_push(table->inadeq, (void *)(uintptr_t)state);
}
}
}
@ -350,7 +358,7 @@ int test_lalr(void)
printf("\n==== G R A M M A R ====\n");
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
if(g == NULL) {
if (g == NULL) {
fprintf(stderr, "h_cfgrammar failed\n");
return 1;
}
@ -358,21 +366,23 @@ int test_lalr(void)
printf("\n==== D F A ====\n");
HLRDFA *dfa = h_lr0_dfa(g);
if(dfa)
if (dfa) {
h_pprint_lrdfa(stdout, g, dfa, 0);
else
} else {
fprintf(stderr, "h_lalr_dfa failed\n");
}
printf("\n==== L R ( 0 ) T A B L E ====\n");
HLRTable *table0 = h_lr0_table(g, dfa);
if(table0)
if (table0) {
h_pprint_lrtable(stdout, g, table0, 0);
else
} else {
fprintf(stderr, "h_lr0_table failed\n");
}
h_lrtable_free(table0);
printf("\n==== L A L R T A B L E ====\n");
if(h_compile(p, PB_LALR, NULL)) {
if (h_compile(p, PB_LALR, NULL)) {
fprintf(stderr, "does not compile\n");
return 2;
}
@ -380,10 +390,10 @@ int test_lalr(void)
printf("\n==== P A R S E R E S U L T ====\n");
HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
if(res)
if (res) {
h_pprint(stdout, res->ast, 0, 2);
else
} else {
printf("no parse\n");
}
return 0;
}

View file

@ -35,10 +35,12 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
HRVMTrace *invert_trace(HRVMTrace *trace) {
HRVMTrace *last = NULL;
if (!trace)
if (!trace) {
return NULL;
if (!trace->next)
}
if (!trace->next) {
return trace;
}
do {
HRVMTrace *next = trace->next;
trace->next = last;
@ -83,8 +85,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
h_sarray_clear(heads_n);
}
memset(insn_seen, 0, prog->length); // no insns seen yet
if (!live_threads)
if (!live_threads) {
goto match_fail;
}
live_threads = 0;
HRVMTrace *tr_head;
H_SARRAY_FOREACH_KV(tr_head,ip_s,heads_p) {
@ -111,8 +114,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
hi = (arg >> 8) & 0xff;
lo = arg & 0xff;
THREAD.ip++;
if (ch < lo || ch > hi)
ipq_top--; // terminate thread
if (ch < lo || ch > hi) {
ipq_top--; // terminate thread
}
goto next_insn;
case RVM_GOTO:
THREAD.ip = arg;
@ -141,8 +145,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
goto next_insn;
case RVM_EOF:
THREAD.ip++;
if (off != len)
if (off != len) {
ipq_top--; // Terminate thread
}
goto next_insn;
case RVM_STEP:
// save thread
@ -249,8 +254,9 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) {
for (uint16_t i = 0; i < prog->action_count; i++) {
if (prog->actions[i].action == action_func && prog->actions[i].env == env)
if (prog->actions[i].action == action_func && prog->actions[i].env == env) {
return i;
}
}
// Ensure that there's room in the action array...
if (!(prog->action_count & (prog->action_count + 1))) {
@ -294,8 +300,9 @@ void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val) {
size_t h_svm_count_to_mark(HSVMContext *ctx) {
size_t ctm;
for (ctm = 0; ctm < ctx->stack_count; ctm++) {
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK)
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK) {
return ctm;
}
}
return ctx->stack_count;
}
@ -320,8 +327,10 @@ bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) {
}
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
while (ctx->stack_count > 0) { if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK)
while (ctx->stack_count > 0) {
if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK) {
return true;
}
}
return false; // no mark found.
}
@ -343,8 +352,9 @@ static void h_regex_free(HParser *parser) {
}
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
if (!parser->vtable->isValidRegular(parser->env))
return 1;
if (!parser->vtable->isValidRegular(parser->env)) {
return -1;
}
HRVMProg *prog = h_new(HRVMProg, 1);
prog->length = prog->action_count = 0;
prog->insns = NULL;

View file

@ -80,13 +80,14 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
// Step 1: Compile grammar for given parser...
if (h_compile(parser, backend, NULL) == -1) {
// backend inappropriate for grammar...
fprintf(stderr, "failed\n");
fprintf(stderr, "Compiling for %s failed\n", HParserBackendNames[backend]);
ret->results[backend].compile_success = false;
ret->results[backend].n_testcases = 0;
ret->results[backend].failed_testcases = 0;
ret->results[backend].cases = NULL;
continue;
}
fprintf(stderr, "Compiled for %s\n", HParserBackendNames[backend]);
ret->results[backend].compile_success = true;
int tc_failed = 0;
// Step 1: verify all test cases.
@ -103,7 +104,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
if ((res_unamb == NULL && tc->output_unambiguous != NULL)
|| (res_unamb != NULL && strcmp(res_unamb, tc->output_unambiguous) != 0)) {
// test case failed...
fprintf(stderr, "failed\n");
fprintf(stderr, "Parsing with %s failed\n", HParserBackendNames[backend]);
// We want to run all testcases, for purposes of generating a
// report. (eg, if users are trying to fix a grammar for a
// faster backend)
@ -115,7 +116,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
if (tc_failed > 0) {
// Can't use this parser; skip to the next
fprintf(stderr, "Backend failed testcases; skipping benchmark\n");
fprintf(stderr, "%s failed testcases; skipping benchmark\n", HParserBackendNames[backend]);
continue;
}
@ -140,6 +141,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec);
} while (time_diff < 100000000);
ret->results[backend].cases[cur_case].parse_time = (time_diff / count);
ret->results[backend].cases[cur_case].length = tc->length;
cur_case++;
}
}
@ -148,11 +150,16 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
void h_benchmark_report(FILE* stream, HBenchmarkResults* result) {
for (size_t i=0; i<result->len; ++i) {
fprintf(stream, "Backend %zd ... \n", i);
if (result->results[i].cases == NULL) {
fprintf(stream, "Skipping %s because grammar did not compile for it\n", HParserBackendNames[i]);
} else {
fprintf(stream, "Backend %zd (%s) ... \n", i, HParserBackendNames[i]);
}
for (size_t j=0; j<result->results[i].n_testcases; ++j) {
if(result->results[i].cases == NULL)
if (result->results[i].cases == NULL) {
continue;
fprintf(stream, "Case %zd: %zd ns/parse\n", j, result->results[i].cases[j].parse_time);
}
fprintf(stream, "Case %zd: %zd ns/parse, %zd ns/byte\n", j, result->results[i].cases[j].parse_time, result->results[i].cases[j].parse_time / result->results[i].cases[j].length);
}
}
}

View file

@ -46,11 +46,14 @@ static void collect_geneps(HCFGrammar *grammar);
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
{
if (!parser->vtable->isValidCF(parser->env)) {
return NULL;
}
// convert parser to CFG form ("desugar").
HCFChoice *desugared = h_desugar(mm__, NULL, parser);
if(desugared == NULL)
if (desugared == NULL) {
return NULL; // -> backend not suitable for this parser
}
return h_cfgrammar_(mm__, desugared);
}
@ -61,7 +64,7 @@ HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared)
// recursively traverse the desugared form and collect all HCFChoices that
// represent a nonterminal (type HCF_CHOICE or HCF_CHARSET).
collect_nts(g, desugared);
if(h_hashset_empty(g->nts)) {
if (h_hashset_empty(g->nts)) {
// desugared is a terminal. wrap it in a singleton HCF_CHOICE.
HCFChoice *nt = h_new(HCFChoice, 1);
nt->type = HCF_CHOICE;
@ -92,8 +95,9 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
HCFSequence **s; // for the rhs (sentential form) of a production
HCFChoice **x; // for a symbol in s
if(h_hashset_present(grammar->nts, symbol))
if (h_hashset_present(grammar->nts, symbol)) {
return; // already visited, get out
}
switch(symbol->type) {
case HCF_CHAR:
@ -127,8 +131,9 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
/* Increase g->kmax if needed, allocating enough first/follow slots. */
static void ensure_k(HCFGrammar *g, size_t k)
{
if(k <= g->kmax) return;
if (k <= g->kmax) {
return;
}
// NB: we don't actually use first/follow[0] but allocate it anyway
// so indices of the array correspond neatly to values of k
@ -136,7 +141,7 @@ static void ensure_k(HCFGrammar *g, size_t k)
HHashTable **first = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *));
HHashTable **follow = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *));
if(g->kmax > 0) {
if (g->kmax > 0) {
// we are resizing, copy the old tables over
for(size_t i=0; i<=g->kmax; i++) {
first[i] = g->first[i];
@ -181,8 +186,9 @@ bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s)
{
// return true iff all symbols in s derive epsilon
for(; *s; s++) {
if(!h_derives_epsilon(g, *s))
if (!h_derives_epsilon(g, *s)) {
return false;
}
}
return true;
}
@ -190,8 +196,9 @@ bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s)
/* Populate the geneps member of g; no-op if called multiple times. */
static void collect_geneps(HCFGrammar *g)
{
if(g->geneps != NULL)
if (g->geneps != NULL) {
return;
}
g->geneps = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
assert(g->geneps != NULL);
@ -206,15 +213,16 @@ static void collect_geneps(HCFGrammar *g)
HHashTableEntry *hte;
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
const HCFChoice *symbol = hte->key;
assert(symbol->type == HCF_CHOICE);
// this NT derives epsilon if any one of its productions does.
HCFSequence **p;
for(p = symbol->seq; *p != NULL; p++) {
if(h_derives_epsilon_seq(g, (*p)->items)) {
if (h_derives_epsilon_seq(g, (*p)->items)) {
h_hashset_put(g->geneps, symbol);
break;
}
@ -262,8 +270,9 @@ static void *combine_stringmap(void *v1, const void *v2)
{
HStringMap *m1 = v1;
const HStringMap *m2 = v2;
if(!m1)
if (!m1) {
m1 = h_stringmap_new(m2->arena);
}
h_stringmap_update(m1, m2);
return m1;
@ -272,12 +281,12 @@ static void *combine_stringmap(void *v1, const void *v2)
/* Note: Does *not* reuse submaps from n in building m. */
void h_stringmap_update(HStringMap *m, const HStringMap *n)
{
if(n->epsilon_branch)
if (n->epsilon_branch) {
m->epsilon_branch = n->epsilon_branch;
if(n->end_branch)
}
if (n->end_branch) {
m->end_branch = n->end_branch;
}
h_hashtable_merge(combine_stringmap, m->char_branches, n->char_branches);
}
@ -294,24 +303,34 @@ HStringMap *h_stringmap_copy(HArena *a, const HStringMap *m)
*/
void h_stringmap_replace(HStringMap *m, void *old, void *new)
{
if(!old) {
if(m->epsilon_branch) m->epsilon_branch = new;
if(m->end_branch) m->end_branch = new;
if (!old) {
if (m->epsilon_branch) {
m->epsilon_branch = new;
}
if (m->end_branch) {
m->end_branch = new;
}
} else {
if(m->epsilon_branch == old) m->epsilon_branch = new;
if(m->end_branch == old) m->end_branch = new;
if (m->epsilon_branch == old) {
m->epsilon_branch = new;
}
if (m->end_branch == old) {
m->end_branch = new;
}
}
// iterate over m->char_branches
const HHashTable *ht = m->char_branches;
for(size_t i=0; i < ht->capacity; i++) {
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
for (size_t i=0; i < ht->capacity; i++) {
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
continue;
}
HStringMap *m_ = hte->value;
if(m_)
if (m_) {
h_stringmap_replace(m_, old, new);
}
}
}
}
@ -319,11 +338,13 @@ void h_stringmap_replace(HStringMap *m, void *old, void *new)
void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end)
{
for(size_t i=0; i<n; i++) {
if(i==n-1 && end && m->end_branch)
if (i==n-1 && end && m->end_branch) {
return m->end_branch;
}
m = h_stringmap_get_char(m, str[i]);
if(!m)
if (!m) {
return NULL;
}
}
return m->epsilon_branch;
}
@ -331,7 +352,7 @@ void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool en
void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead)
{
while(m) {
if(m->epsilon_branch) { // input matched
if (m->epsilon_branch) { // input matched
// assert: another lookahead would not bring a more specific match.
// this is for the table generator to ensure. (LLk)
return m->epsilon_branch;
@ -341,7 +362,7 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead)
// reading bits from it does not consume them from the real input.
uint8_t c = h_read_bits(&lookahead, 8, false);
if(lookahead.overrun) { // end of input
if (lookahead.overrun) { // end of input
// XXX assumption of byte-wise grammar and input
return m->end_branch;
}
@ -377,14 +398,15 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
uint8_t c;
// shortcut: first_0(X) is always {""}
if(k==0)
if (k==0) {
return g->singleton_epsilon;
}
// memoize via g->first
ensure_k(g, k);
ret = h_hashtable_get(g->first[k], x);
if(ret != NULL)
if (ret != NULL) {
return ret;
}
ret = h_stringmap_new(g->arena);
assert(ret != NULL);
h_hashtable_put(g->first[k], x, ret);
@ -399,7 +421,7 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
case HCF_CHARSET:
c=0;
do {
if(charset_isset(x->charset, c)) {
if (charset_isset(x->charset, c)) {
h_stringmap_put_char(ret, c, INSET);
}
} while(c++ < 255);
@ -432,9 +454,9 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
{
// shortcut: the first set of the empty sequence, for any k, is {""}
if(*s == NULL)
if (*s == NULL) {
return g->singleton_epsilon;
}
// first_k(X tail) = { a b | a <- first_k(X), b <- first_l(tail), l=k-|a| }
HCFChoice *x = s[0];
@ -443,12 +465,14 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
const HStringMap *first_x = h_first(k, g, x);
// shortcut: if first_k(X) = {""}, just return first_k(tail)
if(is_singleton_epsilon(first_x))
if (is_singleton_epsilon(first_x)) {
return h_first_seq(k, g, tail);
}
// shortcut: if no elements of first_k(X) have length <k, just return first_k(X)
if(!any_string_shorter(k, first_x))
if (!any_string_shorter(k, first_x)) {
return first_x;
}
// create a new result set and build up the set described above
HStringMap *ret = h_stringmap_new(g->arena);
@ -468,23 +492,25 @@ static bool is_singleton_epsilon(const HStringMap *m)
static bool any_string_shorter(size_t k, const HStringMap *m)
{
if(k==0)
if (k==0) {
return false;
if(m->epsilon_branch)
}
if (m->epsilon_branch) {
return true;
}
// iterate over m->char_branches
const HHashTable *ht = m->char_branches;
for(size_t i=0; i < ht->capacity; i++) {
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
for (size_t i=0; i < ht->capacity; i++) {
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
continue;
}
HStringMap *m_ = hte->value;
// check subtree for strings shorter than k-1
if(any_string_shorter(k-1, m_))
if (any_string_shorter(k-1, m_)) {
return true;
}
}
}
@ -494,16 +520,21 @@ static bool any_string_shorter(size_t k, const HStringMap *m)
// helper for h_predict
static void remove_all_shorter(size_t k, HStringMap *m)
{
if(k==0) return;
if (k==0) {
return;
}
m->epsilon_branch = NULL;
if(k==1) return;
if (k==1) {
return;
}
// iterate over m->char_branches
const HHashTable *ht = m->char_branches;
for(size_t i=0; i < ht->capacity; i++) {
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
for (size_t i=0; i < ht->capacity; i++) {
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
continue;
}
remove_all_shorter(k-1, hte->value); // recursion into subtree
}
}
@ -530,39 +561,41 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
HStringMap *ret;
// shortcut: follow_0(X) is always {""}
if(k==0)
if (k==0) {
return g->singleton_epsilon;
}
// memoize via g->follow
ensure_k(g, k);
ret = h_hashtable_get(g->follow[k], x);
if(ret != NULL)
if (ret != NULL) {
return ret;
}
ret = h_stringmap_new(g->arena);
assert(ret != NULL);
h_hashtable_put(g->follow[k], x, ret);
// if X is the start symbol, the end token is in its follow set
if(x == g->start)
if (x == g->start) {
h_stringmap_put_end(ret, INSET);
}
// iterate over g->nts
size_t i;
HHashTableEntry *hte;
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
for (i=0; i < g->nts->capacity; i++) {
for (hte = &g->nts->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
continue;
}
HCFChoice *a = (void *)hte->key; // production's left-hand symbol
assert(a->type == HCF_CHOICE);
// iterate over the productions for A
HCFSequence **p;
for(p=a->seq; *p; p++) {
for (p=a->seq; *p; p++) {
HCFChoice **s = (*p)->items; // production's right-hand side
for(; *s; s++) {
if(*s == x) { // occurance found
for (; *s; s++) {
if (*s == x) { // occurance found
HCFChoice **tail = s+1;
const HStringMap *first_tail = h_first_seq(k, g, tail);
@ -604,12 +637,12 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
size_t k, const HStringMap *as,
StringSetFun f, HCFChoice **tail)
{
if(as->epsilon_branch) {
if (as->epsilon_branch) {
// for a="", add f_k(tail) to ret
h_stringmap_update(ret, f(k, g, tail));
}
if(as->end_branch) {
if (as->end_branch) {
// for a="$", nothing can follow; just add "$" to ret
// NB: formally, "$" is considered to be of length k
h_stringmap_put_end(ret, INSET);
@ -619,8 +652,9 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
const HHashTable *ht = as->char_branches;
for(size_t i=0; i < ht->capacity; i++) {
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
uint8_t c = key_char((HCharKey)hte->key);
// follow the branch to find the set { a' | t a' <- as }
@ -648,7 +682,7 @@ void h_pprint_char(FILE *f, char c)
case '\n': fputs("\\n", f); break;
case '\r': fputs("\\r", f); break;
default:
if(isprint((int)c)) {
if (isprint((int)c)) {
fputc(c, f);
} else {
fprintf(f, "\\x%.2X", c);
@ -672,11 +706,11 @@ static void pprint_charset(FILE *f, const HCharset cs)
fputc('[', f);
for(i=0; i<256; i++) {
if(charset_isset(cs, i)) {
if (charset_isset(cs, i)) {
pprint_charset_char(f, i);
// detect ranges
if(i+2<256 && charset_isset(cs, i+1) && charset_isset(cs, i+2)) {
if (i+2<256 && charset_isset(cs, i+1) && charset_isset(cs, i+2)) {
fputc('-', f);
for(; i<256 && charset_isset(cs, i); i++);
i--; // back to the last in range
@ -708,8 +742,9 @@ static HCFChoice **pprint_string(FILE *f, HCFChoice **x)
{
fputc('"', f);
for(; *x; x++) {
if((*x)->type != HCF_CHAR)
if ((*x)->type != HCF_CHAR) {
break;
}
h_pprint_char(f, (*x)->chr);
}
fputc('"', f);
@ -739,13 +774,14 @@ void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq)
{
HCFChoice **x = seq->items;
if(*x == NULL) { // the empty sequence
if (*x == NULL) { // the empty sequence
fputs("\"\"", f);
} else {
while(*x) {
if(x != seq->items) fputc(' ', f); // internal separator
if((*x)->type == HCF_CHAR) {
if (x != seq->items) {
fputc(' ', f); // internal separator
}
if ((*x)->type == HCF_CHAR) {
// condense character strings
x = pprint_string(f, x);
} else {
@ -781,7 +817,9 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
assert(nt->type == HCF_CHOICE);
HCFSequence **p = nt->seq;
if(*p == NULL) return; // shouldn't happen
if (*p == NULL) {
return; // shouldn't happen
}
pprint_sequence(f, g, *p++); // print first production on the same line
for(; *p; p++) { // print the rest below with "or" bars
for(i=0; i<column; i++) fputc(' ', f); // indent
@ -792,8 +830,9 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
{
if(g->nts->used < 1)
if (g->nts->used < 1) {
return;
}
// determine maximum string length of symbol names
int len;
@ -805,8 +844,9 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
HHashTableEntry *hte;
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
const HCFChoice *a = hte->key; // production's left-hand symbol
assert(a->type == HCF_CHOICE);
@ -828,10 +868,12 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in
const HCFChoice *a = NULL;
for(i=0; i < set->capacity; i++) {
for(hte = &set->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
if(a != NULL) // we're not on the first element
}
if(a != NULL) { // we're not on the first element
fputc(',', file);
}
a = hte->key; // production's left-hand symbol
@ -851,9 +893,12 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
{
assert(n < BUFSIZE-4);
if(map->epsilon_branch) {
if(!first) fputc(sep, file); first=false;
if(n==0) {
if (map->epsilon_branch) {
if (!first) {
fputc(sep, file);
first=false;
}
if (n==0) {
fputs("\"\"", file);
} else {
fputs("\"", file);
@ -861,20 +906,27 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
fputs("\"", file);
}
if(valprint) {
if (valprint) {
fputc(':', file);
valprint(file, env, map->epsilon_branch);
}
}
if(map->end_branch) {
if(!first) fputs(",\"", file); first=false;
if(n>0) fputs("\"\"", file);
if (map->end_branch) {
if (!first) {
fputs(",\"", file);
first=false;
}
if (n>0) {
fputs("\"\"", file);
}
fwrite(prefix, 1, n, file);
if(n>0) fputs("\"\"", file);
if (n>0) {
fputs("\"\"", file);
}
fputs("$", file);
if(valprint) {
if (valprint) {
fputc(':', file);
valprint(file, env, map->end_branch);
}
@ -886,8 +938,9 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
HHashTableEntry *hte;
for(i=0; i < ht->capacity; i++) {
for(hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
uint8_t c = key_char((HCharKey)hte->key);
HStringMap *ends = hte->value;
@ -901,10 +954,11 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
case '\n': prefix[n_++] = '\\'; prefix[n_++] = 'n'; break;
case '\r': prefix[n_++] = '\\'; prefix[n_++] = 'r'; break;
default:
if(isprint(c))
if (isprint(c)) {
prefix[n_++] = c;
else
} else {
n_ += sprintf(prefix+n_, "\\x%.2X", c);
}
}
first = pprint_stringmap_elems(file, first, prefix, n_,

View file

@ -157,30 +157,67 @@ void* h_hashtable_get(const HHashTable* ht, const void* key) {
for (hte = &ht->contents[hashval & (ht->capacity - 1)];
hte != NULL;
hte = hte->next) {
if (hte->key == NULL)
if (hte->key == NULL) {
continue;
if (hte->hashval != hashval)
}
if (hte->hashval != hashval) {
continue;
if (ht->equalFunc(key, hte->key))
}
if (ht->equalFunc(key, hte->key)) {
return hte->value;
}
}
return NULL;
}
void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry* new_entry);
void h_hashtable_ensure_capacity(HHashTable* ht, size_t n) {
bool do_resize = false;
size_t old_capacity = ht->capacity;
while (n * 1.3 > ht->capacity) {
ht->capacity *= 2;
do_resize = true;
}
if (!do_resize)
return;
HHashTableEntry *old_contents = ht->contents;
HHashTableEntry *new_contents = h_arena_malloc(ht->arena, sizeof(HHashTableEntry) * ht->capacity);
ht->contents = new_contents;
ht->used = 0;
memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity);
for (size_t i = 0; i < old_capacity; ++i)
for (HHashTableEntry *entry = &old_contents[i];
entry;
entry = entry->next)
if (entry->key)
h_hashtable_put_raw(ht, entry);
//h_arena_free(ht->arena, old_contents);
}
void h_hashtable_put(HHashTable* ht, const void* key, void* value) {
// # Start with a rebalancing
//h_hashtable_ensure_capacity(ht, ht->used + 1);
h_hashtable_ensure_capacity(ht, ht->used + 1);
HHashValue hashval = ht->hashFunc(key);
HHashTableEntry entry = {
.key = key,
.value = value,
.hashval = hashval
};
h_hashtable_put_raw(ht, &entry);
}
void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry *new_entry) {
#ifdef CONSISTENCY_CHECK
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
#endif
HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)];
HHashTableEntry *hte = &ht->contents[new_entry->hashval & (ht->capacity - 1)];
if (hte->key != NULL) {
for(;;) {
// check each link, stay on last if not found
if (hte->hashval == hashval && ht->equalFunc(key, hte->key))
if (hte->hashval == new_entry->hashval && ht->equalFunc(new_entry->key, hte->key))
goto insert_here;
if (hte->next == NULL)
break;
@ -196,9 +233,9 @@ void h_hashtable_put(HHashTable* ht, const void* key, void* value) {
ht->used++;
insert_here:
hte->key = key;
hte->value = value;
hte->hashval = hashval;
hte->key = new_entry->key;
hte->value = new_entry->value;
hte->hashval = new_entry->hashval;
}
void h_hashtable_update(HHashTable *dst, const HHashTable *src) {

View file

@ -8,14 +8,16 @@ HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser) {
if (nstk__ == NULL) {
nstk__ = h_cfstack_new(mm__);
}
if(nstk__->prealloc == NULL)
if (nstk__->prealloc == NULL) {
nstk__->prealloc = h_new(HCFChoice, 1);
}
// we're going to do something naughty and cast away the const to memoize
assert(parser->vtable->desugar != NULL);
((HParser *)parser)->desugared = nstk__->prealloc;
parser->vtable->desugar(mm__, nstk__, parser->env);
if (stk__ == NULL)
if (stk__ == NULL) {
h_cfstack_free(mm__, nstk__);
}
} else if (stk__ != NULL) {
HCFS_APPEND(parser->desugared);
}

View file

@ -46,6 +46,14 @@ typedef enum HParserBackend_ {
PB_MAX = PB_GLR
} HParserBackend;
static const char* HParserBackendNames[] = {
"Packrat",
"Regular",
"LL(k)",
"LALR",
"GLR"
};
typedef enum HTokenType_ {
// Before you change the explicit values of these, think of the poor bindings ;_;
TT_NONE = 1,
@ -178,6 +186,7 @@ typedef struct HCaseResult_ {
#else
HResultTiming timestamp;
#endif
size_t length;
} HCaseResult;
typedef struct HBackendResults_ {

View file

@ -1,16 +1,28 @@
#include "parser_internal.h"
typedef struct HIndirectEnv_ {
const HParser* parser;
bool touched;
} HIndirectEnv;
static HParseResult* parse_indirect(void* env, HParseState* state) {
return h_do_parse(env, state);
return h_do_parse(((HIndirectEnv*)env)->parser, state);
}
static bool indirect_isValidCF(void *env) {
HParser *p = (HParser*)env;
return p->vtable->isValidCF(p->env);
HIndirectEnv *ie = (HIndirectEnv*)env;
if (ie->touched)
return true;
ie->touched = true;
const HParser *p = ie->parser;
// self->vtable->isValidCF = h_true;
bool ret = p->vtable->isValidCF(p->env);
ie->touched = false;
return ret;
}
static void desugar_indirect(HAllocator *mm__, HCFStack *stk__, void *env) {
HCFS_DESUGAR( (HParser *)env );
HCFS_DESUGAR( ((HIndirectEnv *)env)->parser );
}
static const HParserVtable indirect_vt = {
@ -27,12 +39,15 @@ void h_bind_indirect__m(HAllocator *mm__, HParser* indirect, const HParser* inne
void h_bind_indirect(HParser* indirect, const HParser* inner) {
assert_message(indirect->vtable == &indirect_vt, "You can only bind an indirect parser");
indirect->env = (void*)inner;
((HIndirectEnv*)indirect->env)->parser = inner;
}
HParser* h_indirect() {
return h_indirect__m(&system_allocator);
}
HParser* h_indirect__m(HAllocator* mm__) {
return h_new_parser(mm__, &indirect_vt, NULL);
HIndirectEnv *env = h_new(HIndirectEnv, 1);
env->parser = NULL;
env->touched = false;
return h_new_parser(mm__, &indirect_vt, env);
}