Merge pull request #101 from abiggerhammer/master

Make HHashTable resizeable and improve benchmarking output
This commit is contained in:
TQ Hirsch 2014-04-20 21:50:59 +02:00
commit 4f9efcd726
10 changed files with 296 additions and 148 deletions

View file

@ -1,6 +1,7 @@
# -*- python -*- # -*- python -*-
import os import os
import os.path import os.path
import platform
import sys import sys
@ -44,7 +45,7 @@ env['backendsincpath'] = calcInstallPath("$prefix", "include", "hammer", "backen
env['pkgconfigpath'] = calcInstallPath("$prefix", "lib", "pkgconfig") env['pkgconfigpath'] = calcInstallPath("$prefix", "lib", "pkgconfig")
env.ScanReplace('libhammer.pc.in') env.ScanReplace('libhammer.pc.in')
env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes") env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable")
if env['PLATFORM'] == 'darwin': if env['PLATFORM'] == 'darwin':
env.Append(SHLINKFLAGS = '-install_name ' + env["libpath"] + '/${TARGET.file}') env.Append(SHLINKFLAGS = '-install_name ' + env["libpath"] + '/${TARGET.file}')

View file

@ -9,6 +9,9 @@ static bool glr_step(HParseResult **result, HSlist *engines,
int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params) int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params)
{ {
if (!parser->vtable->isValidCF(parser->env)) {
return -1;
}
int result = h_lalr_compile(mm__, parser, params); int result = h_lalr_compile(mm__, parser, params);
if(result == -1 && parser->backend_data) { if(result == -1 && parser->backend_data) {

View file

@ -49,8 +49,9 @@ static inline HLRTransition *transition(HArena *arena,
static void transform_productions(const HLRTable *table, HLREnhGrammar *eg, static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
size_t x, HCFChoice *xAy) size_t x, HCFChoice *xAy)
{ {
if(xAy->type != HCF_CHOICE) if (xAy->type != HCF_CHOICE) {
return; return;
}
// XXX CHARSET? // XXX CHARSET?
HArena *arena = eg->arena; HArena *arena = eg->arena;
@ -89,7 +90,7 @@ static HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym)
*esym = *sym; *esym = *sym;
HHashSet *cs = h_hashtable_get(eg->corr, sym); HHashSet *cs = h_hashtable_get(eg->corr, sym);
if(!cs) { if (!cs) {
cs = h_hashset_new(arena, h_eq_symbol, h_hash_symbol); cs = h_hashset_new(arena, h_eq_symbol, h_hash_symbol);
h_hashtable_put(eg->corr, sym, cs); h_hashtable_put(eg->corr, sym, cs);
} }
@ -151,9 +152,9 @@ static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *acti
{ {
int ret = 0; int ret = 0;
if(fs->epsilon_branch) { if (fs->epsilon_branch) {
HLRAction *prev = tmap->epsilon_branch; HLRAction *prev = tmap->epsilon_branch;
if(prev && prev != action) { if (prev && prev != action) {
// conflict // conflict
tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action); tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action);
ret = -1; ret = -1;
@ -162,9 +163,9 @@ static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *acti
} }
} }
if(fs->end_branch) { if (fs->end_branch) {
HLRAction *prev = tmap->end_branch; HLRAction *prev = tmap->end_branch;
if(prev && prev != action) { if (prev && prev != action) {
// conflict // conflict
tmap->end_branch = h_lr_conflict(tmap->arena, prev, action); tmap->end_branch = h_lr_conflict(tmap->arena, prev, action);
ret = -1; ret = -1;
@ -176,13 +177,14 @@ static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *acti
H_FOREACH(fs->char_branches, void *key, HStringMap *fs_) H_FOREACH(fs->char_branches, void *key, HStringMap *fs_)
HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key); HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key);
if(!tmap_) { if (!tmap_) {
tmap_ = h_stringmap_new(tmap->arena); tmap_ = h_stringmap_new(tmap->arena);
h_hashtable_put(tmap->char_branches, key, tmap_); h_hashtable_put(tmap->char_branches, key, tmap_);
} }
if(terminals_put(tmap_, fs_, action) < 0) if (terminals_put(tmap_, fs_, action) < 0) {
ret = -1; ret = -1;
}
H_END_FOREACH H_END_FOREACH
return ret; return ret;
@ -197,8 +199,9 @@ static bool match_production(HLREnhGrammar *eg, HCFChoice **p,
for(; *p && *rhs; p++, rhs++) { for(; *p && *rhs; p++, rhs++) {
HLRTransition *t = h_hashtable_get(eg->smap, *p); HLRTransition *t = h_hashtable_get(eg->smap, *p);
assert(t != NULL); assert(t != NULL);
if(!h_eq_symbol(t->symbol, *rhs)) if (!h_eq_symbol(t->symbol, *rhs)) {
return false; return false;
}
state = t->to; state = t->to;
} }
return (*p == *rhs // both NULL return (*p == *rhs // both NULL
@ -231,18 +234,21 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
// build LR(0) table // build LR(0) table
// if necessary, resolve conflicts "by conversion to SLR" // if necessary, resolve conflicts "by conversion to SLR"
if (!parser->vtable->isValidCF(parser->env)) {
return -1;
}
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser)); HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser));
if(g == NULL) // backend not suitable (language not context-free) if(g == NULL) // backend not suitable (language not context-free)
return -1; return -1;
HLRDFA *dfa = h_lr0_dfa(g); HLRDFA *dfa = h_lr0_dfa(g);
if(dfa == NULL) { // this should normally not happen if (dfa == NULL) { // this should normally not happen
h_cfgrammar_free(g); h_cfgrammar_free(g);
return -1; return -1;
} }
HLRTable *table = h_lr0_table(g, dfa); HLRTable *table = h_lr0_table(g, dfa);
if(table == NULL) { // this should normally not happen if (table == NULL) { // this should normally not happen
h_cfgrammar_free(g); h_cfgrammar_free(g);
return -1; return -1;
} }
@ -284,8 +290,9 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
for(HCFSequence **p=lhs->seq; *p; p++) { for(HCFSequence **p=lhs->seq; *p; p++) {
HCFChoice **rhs = (*p)->items; HCFChoice **rhs = (*p)->items;
if(!match_production(eg, rhs, item->rhs, state)) if(!match_production(eg, rhs, item->rhs, state)) {
continue; continue;
}
// the left-hand symbol's follow set is this production's // the left-hand symbol's follow set is this production's
// contribution to the lookahead // contribution to the lookahead
@ -300,10 +307,11 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
} H_END_FOREACH // enhanced production } H_END_FOREACH // enhanced production
H_END_FOREACH // reducible item H_END_FOREACH // reducible item
if(inadeq) if(inadeq) {
h_slist_push(table->inadeq, (void *)(uintptr_t)state); h_slist_push(table->inadeq, (void *)(uintptr_t)state);
} }
} }
}
h_cfgrammar_free(g); h_cfgrammar_free(g);
parser->backend_data = table; parser->backend_data = table;
@ -350,7 +358,7 @@ int test_lalr(void)
printf("\n==== G R A M M A R ====\n"); printf("\n==== G R A M M A R ====\n");
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
if(g == NULL) { if (g == NULL) {
fprintf(stderr, "h_cfgrammar failed\n"); fprintf(stderr, "h_cfgrammar failed\n");
return 1; return 1;
} }
@ -358,21 +366,23 @@ int test_lalr(void)
printf("\n==== D F A ====\n"); printf("\n==== D F A ====\n");
HLRDFA *dfa = h_lr0_dfa(g); HLRDFA *dfa = h_lr0_dfa(g);
if(dfa) if (dfa) {
h_pprint_lrdfa(stdout, g, dfa, 0); h_pprint_lrdfa(stdout, g, dfa, 0);
else } else {
fprintf(stderr, "h_lalr_dfa failed\n"); fprintf(stderr, "h_lalr_dfa failed\n");
}
printf("\n==== L R ( 0 ) T A B L E ====\n"); printf("\n==== L R ( 0 ) T A B L E ====\n");
HLRTable *table0 = h_lr0_table(g, dfa); HLRTable *table0 = h_lr0_table(g, dfa);
if(table0) if (table0) {
h_pprint_lrtable(stdout, g, table0, 0); h_pprint_lrtable(stdout, g, table0, 0);
else } else {
fprintf(stderr, "h_lr0_table failed\n"); fprintf(stderr, "h_lr0_table failed\n");
}
h_lrtable_free(table0); h_lrtable_free(table0);
printf("\n==== L A L R T A B L E ====\n"); printf("\n==== L A L R T A B L E ====\n");
if(h_compile(p, PB_LALR, NULL)) { if (h_compile(p, PB_LALR, NULL)) {
fprintf(stderr, "does not compile\n"); fprintf(stderr, "does not compile\n");
return 2; return 2;
} }
@ -380,10 +390,10 @@ int test_lalr(void)
printf("\n==== P A R S E R E S U L T ====\n"); printf("\n==== P A R S E R E S U L T ====\n");
HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13); HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
if(res) if (res) {
h_pprint(stdout, res->ast, 0, 2); h_pprint(stdout, res->ast, 0, 2);
else } else {
printf("no parse\n"); printf("no parse\n");
}
return 0; return 0;
} }

View file

@ -35,10 +35,12 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
HRVMTrace *invert_trace(HRVMTrace *trace) { HRVMTrace *invert_trace(HRVMTrace *trace) {
HRVMTrace *last = NULL; HRVMTrace *last = NULL;
if (!trace) if (!trace) {
return NULL; return NULL;
if (!trace->next) }
if (!trace->next) {
return trace; return trace;
}
do { do {
HRVMTrace *next = trace->next; HRVMTrace *next = trace->next;
trace->next = last; trace->next = last;
@ -83,8 +85,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
h_sarray_clear(heads_n); h_sarray_clear(heads_n);
} }
memset(insn_seen, 0, prog->length); // no insns seen yet memset(insn_seen, 0, prog->length); // no insns seen yet
if (!live_threads) if (!live_threads) {
goto match_fail; goto match_fail;
}
live_threads = 0; live_threads = 0;
HRVMTrace *tr_head; HRVMTrace *tr_head;
H_SARRAY_FOREACH_KV(tr_head,ip_s,heads_p) { H_SARRAY_FOREACH_KV(tr_head,ip_s,heads_p) {
@ -111,8 +114,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
hi = (arg >> 8) & 0xff; hi = (arg >> 8) & 0xff;
lo = arg & 0xff; lo = arg & 0xff;
THREAD.ip++; THREAD.ip++;
if (ch < lo || ch > hi) if (ch < lo || ch > hi) {
ipq_top--; // terminate thread ipq_top--; // terminate thread
}
goto next_insn; goto next_insn;
case RVM_GOTO: case RVM_GOTO:
THREAD.ip = arg; THREAD.ip = arg;
@ -141,8 +145,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
goto next_insn; goto next_insn;
case RVM_EOF: case RVM_EOF:
THREAD.ip++; THREAD.ip++;
if (off != len) if (off != len) {
ipq_top--; // Terminate thread ipq_top--; // Terminate thread
}
goto next_insn; goto next_insn;
case RVM_STEP: case RVM_STEP:
// save thread // save thread
@ -249,9 +254,10 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) { uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) {
for (uint16_t i = 0; i < prog->action_count; i++) { for (uint16_t i = 0; i < prog->action_count; i++) {
if (prog->actions[i].action == action_func && prog->actions[i].env == env) if (prog->actions[i].action == action_func && prog->actions[i].env == env) {
return i; return i;
} }
}
// Ensure that there's room in the action array... // Ensure that there's room in the action array...
if (!(prog->action_count & (prog->action_count + 1))) { if (!(prog->action_count & (prog->action_count + 1))) {
// needs to be scaled up. // needs to be scaled up.
@ -294,9 +300,10 @@ void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val) {
size_t h_svm_count_to_mark(HSVMContext *ctx) { size_t h_svm_count_to_mark(HSVMContext *ctx) {
size_t ctm; size_t ctm;
for (ctm = 0; ctm < ctx->stack_count; ctm++) { for (ctm = 0; ctm < ctx->stack_count; ctm++) {
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK) if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK) {
return ctm; return ctm;
} }
}
return ctx->stack_count; return ctx->stack_count;
} }
@ -320,9 +327,11 @@ bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) {
} }
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) { bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
while (ctx->stack_count > 0) { if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK) while (ctx->stack_count > 0) {
if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK) {
return true; return true;
} }
}
return false; // no mark found. return false; // no mark found.
} }
@ -343,8 +352,9 @@ static void h_regex_free(HParser *parser) {
} }
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) { static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
if (!parser->vtable->isValidRegular(parser->env)) if (!parser->vtable->isValidRegular(parser->env)) {
return 1; return -1;
}
HRVMProg *prog = h_new(HRVMProg, 1); HRVMProg *prog = h_new(HRVMProg, 1);
prog->length = prog->action_count = 0; prog->length = prog->action_count = 0;
prog->insns = NULL; prog->insns = NULL;

View file

@ -80,13 +80,14 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
// Step 1: Compile grammar for given parser... // Step 1: Compile grammar for given parser...
if (h_compile(parser, backend, NULL) == -1) { if (h_compile(parser, backend, NULL) == -1) {
// backend inappropriate for grammar... // backend inappropriate for grammar...
fprintf(stderr, "failed\n"); fprintf(stderr, "Compiling for %s failed\n", HParserBackendNames[backend]);
ret->results[backend].compile_success = false; ret->results[backend].compile_success = false;
ret->results[backend].n_testcases = 0; ret->results[backend].n_testcases = 0;
ret->results[backend].failed_testcases = 0; ret->results[backend].failed_testcases = 0;
ret->results[backend].cases = NULL; ret->results[backend].cases = NULL;
continue; continue;
} }
fprintf(stderr, "Compiled for %s\n", HParserBackendNames[backend]);
ret->results[backend].compile_success = true; ret->results[backend].compile_success = true;
int tc_failed = 0; int tc_failed = 0;
// Step 1: verify all test cases. // Step 1: verify all test cases.
@ -103,7 +104,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
if ((res_unamb == NULL && tc->output_unambiguous != NULL) if ((res_unamb == NULL && tc->output_unambiguous != NULL)
|| (res_unamb != NULL && strcmp(res_unamb, tc->output_unambiguous) != 0)) { || (res_unamb != NULL && strcmp(res_unamb, tc->output_unambiguous) != 0)) {
// test case failed... // test case failed...
fprintf(stderr, "failed\n"); fprintf(stderr, "Parsing with %s failed\n", HParserBackendNames[backend]);
// We want to run all testcases, for purposes of generating a // We want to run all testcases, for purposes of generating a
// report. (eg, if users are trying to fix a grammar for a // report. (eg, if users are trying to fix a grammar for a
// faster backend) // faster backend)
@ -115,7 +116,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
if (tc_failed > 0) { if (tc_failed > 0) {
// Can't use this parser; skip to the next // Can't use this parser; skip to the next
fprintf(stderr, "Backend failed testcases; skipping benchmark\n"); fprintf(stderr, "%s failed testcases; skipping benchmark\n", HParserBackendNames[backend]);
continue; continue;
} }
@ -140,6 +141,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec); time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec);
} while (time_diff < 100000000); } while (time_diff < 100000000);
ret->results[backend].cases[cur_case].parse_time = (time_diff / count); ret->results[backend].cases[cur_case].parse_time = (time_diff / count);
ret->results[backend].cases[cur_case].length = tc->length;
cur_case++; cur_case++;
} }
} }
@ -148,11 +150,16 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
void h_benchmark_report(FILE* stream, HBenchmarkResults* result) { void h_benchmark_report(FILE* stream, HBenchmarkResults* result) {
for (size_t i=0; i<result->len; ++i) { for (size_t i=0; i<result->len; ++i) {
fprintf(stream, "Backend %zd ... \n", i); if (result->results[i].cases == NULL) {
fprintf(stream, "Skipping %s because grammar did not compile for it\n", HParserBackendNames[i]);
} else {
fprintf(stream, "Backend %zd (%s) ... \n", i, HParserBackendNames[i]);
}
for (size_t j=0; j<result->results[i].n_testcases; ++j) { for (size_t j=0; j<result->results[i].n_testcases; ++j) {
if(result->results[i].cases == NULL) if (result->results[i].cases == NULL) {
continue; continue;
fprintf(stream, "Case %zd: %zd ns/parse\n", j, result->results[i].cases[j].parse_time); }
fprintf(stream, "Case %zd: %zd ns/parse, %zd ns/byte\n", j, result->results[i].cases[j].parse_time, result->results[i].cases[j].parse_time / result->results[i].cases[j].length);
} }
} }
} }

View file

@ -46,11 +46,14 @@ static void collect_geneps(HCFGrammar *grammar);
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser) HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
{ {
if (!parser->vtable->isValidCF(parser->env)) {
return NULL;
}
// convert parser to CFG form ("desugar"). // convert parser to CFG form ("desugar").
HCFChoice *desugared = h_desugar(mm__, NULL, parser); HCFChoice *desugared = h_desugar(mm__, NULL, parser);
if(desugared == NULL) if (desugared == NULL) {
return NULL; // -> backend not suitable for this parser return NULL; // -> backend not suitable for this parser
}
return h_cfgrammar_(mm__, desugared); return h_cfgrammar_(mm__, desugared);
} }
@ -61,7 +64,7 @@ HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared)
// recursively traverse the desugared form and collect all HCFChoices that // recursively traverse the desugared form and collect all HCFChoices that
// represent a nonterminal (type HCF_CHOICE or HCF_CHARSET). // represent a nonterminal (type HCF_CHOICE or HCF_CHARSET).
collect_nts(g, desugared); collect_nts(g, desugared);
if(h_hashset_empty(g->nts)) { if (h_hashset_empty(g->nts)) {
// desugared is a terminal. wrap it in a singleton HCF_CHOICE. // desugared is a terminal. wrap it in a singleton HCF_CHOICE.
HCFChoice *nt = h_new(HCFChoice, 1); HCFChoice *nt = h_new(HCFChoice, 1);
nt->type = HCF_CHOICE; nt->type = HCF_CHOICE;
@ -92,8 +95,9 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
HCFSequence **s; // for the rhs (sentential form) of a production HCFSequence **s; // for the rhs (sentential form) of a production
HCFChoice **x; // for a symbol in s HCFChoice **x; // for a symbol in s
if(h_hashset_present(grammar->nts, symbol)) if (h_hashset_present(grammar->nts, symbol)) {
return; // already visited, get out return; // already visited, get out
}
switch(symbol->type) { switch(symbol->type) {
case HCF_CHAR: case HCF_CHAR:
@ -127,8 +131,9 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
/* Increase g->kmax if needed, allocating enough first/follow slots. */ /* Increase g->kmax if needed, allocating enough first/follow slots. */
static void ensure_k(HCFGrammar *g, size_t k) static void ensure_k(HCFGrammar *g, size_t k)
{ {
if(k <= g->kmax) return; if (k <= g->kmax) {
return;
}
// NB: we don't actually use first/follow[0] but allocate it anyway // NB: we don't actually use first/follow[0] but allocate it anyway
// so indices of the array correspond neatly to values of k // so indices of the array correspond neatly to values of k
@ -136,7 +141,7 @@ static void ensure_k(HCFGrammar *g, size_t k)
HHashTable **first = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *)); HHashTable **first = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *));
HHashTable **follow = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *)); HHashTable **follow = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *));
if(g->kmax > 0) { if (g->kmax > 0) {
// we are resizing, copy the old tables over // we are resizing, copy the old tables over
for(size_t i=0; i<=g->kmax; i++) { for(size_t i=0; i<=g->kmax; i++) {
first[i] = g->first[i]; first[i] = g->first[i];
@ -181,17 +186,19 @@ bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s)
{ {
// return true iff all symbols in s derive epsilon // return true iff all symbols in s derive epsilon
for(; *s; s++) { for(; *s; s++) {
if(!h_derives_epsilon(g, *s)) if (!h_derives_epsilon(g, *s)) {
return false; return false;
} }
}
return true; return true;
} }
/* Populate the geneps member of g; no-op if called multiple times. */ /* Populate the geneps member of g; no-op if called multiple times. */
static void collect_geneps(HCFGrammar *g) static void collect_geneps(HCFGrammar *g)
{ {
if(g->geneps != NULL) if (g->geneps != NULL) {
return; return;
}
g->geneps = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr); g->geneps = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
assert(g->geneps != NULL); assert(g->geneps != NULL);
@ -206,15 +213,16 @@ static void collect_geneps(HCFGrammar *g)
HHashTableEntry *hte; HHashTableEntry *hte;
for(i=0; i < g->nts->capacity; i++) { for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) { for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL) if (hte->key == NULL) {
continue; continue;
}
const HCFChoice *symbol = hte->key; const HCFChoice *symbol = hte->key;
assert(symbol->type == HCF_CHOICE); assert(symbol->type == HCF_CHOICE);
// this NT derives epsilon if any one of its productions does. // this NT derives epsilon if any one of its productions does.
HCFSequence **p; HCFSequence **p;
for(p = symbol->seq; *p != NULL; p++) { for(p = symbol->seq; *p != NULL; p++) {
if(h_derives_epsilon_seq(g, (*p)->items)) { if (h_derives_epsilon_seq(g, (*p)->items)) {
h_hashset_put(g->geneps, symbol); h_hashset_put(g->geneps, symbol);
break; break;
} }
@ -262,8 +270,9 @@ static void *combine_stringmap(void *v1, const void *v2)
{ {
HStringMap *m1 = v1; HStringMap *m1 = v1;
const HStringMap *m2 = v2; const HStringMap *m2 = v2;
if(!m1) if (!m1) {
m1 = h_stringmap_new(m2->arena); m1 = h_stringmap_new(m2->arena);
}
h_stringmap_update(m1, m2); h_stringmap_update(m1, m2);
return m1; return m1;
@ -272,12 +281,12 @@ static void *combine_stringmap(void *v1, const void *v2)
/* Note: Does *not* reuse submaps from n in building m. */ /* Note: Does *not* reuse submaps from n in building m. */
void h_stringmap_update(HStringMap *m, const HStringMap *n) void h_stringmap_update(HStringMap *m, const HStringMap *n)
{ {
if(n->epsilon_branch) if (n->epsilon_branch) {
m->epsilon_branch = n->epsilon_branch; m->epsilon_branch = n->epsilon_branch;
}
if(n->end_branch) if (n->end_branch) {
m->end_branch = n->end_branch; m->end_branch = n->end_branch;
}
h_hashtable_merge(combine_stringmap, m->char_branches, n->char_branches); h_hashtable_merge(combine_stringmap, m->char_branches, n->char_branches);
} }
@ -294,44 +303,56 @@ HStringMap *h_stringmap_copy(HArena *a, const HStringMap *m)
*/ */
void h_stringmap_replace(HStringMap *m, void *old, void *new) void h_stringmap_replace(HStringMap *m, void *old, void *new)
{ {
if(!old) { if (!old) {
if(m->epsilon_branch) m->epsilon_branch = new; if (m->epsilon_branch) {
if(m->end_branch) m->end_branch = new; m->epsilon_branch = new;
}
if (m->end_branch) {
m->end_branch = new;
}
} else { } else {
if(m->epsilon_branch == old) m->epsilon_branch = new; if (m->epsilon_branch == old) {
if(m->end_branch == old) m->end_branch = new; m->epsilon_branch = new;
}
if (m->end_branch == old) {
m->end_branch = new;
}
} }
// iterate over m->char_branches // iterate over m->char_branches
const HHashTable *ht = m->char_branches; const HHashTable *ht = m->char_branches;
for(size_t i=0; i < ht->capacity; i++) { for (size_t i=0; i < ht->capacity; i++) {
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL) if (hte->key == NULL) {
continue; continue;
}
HStringMap *m_ = hte->value; HStringMap *m_ = hte->value;
if(m_) if (m_) {
h_stringmap_replace(m_, old, new); h_stringmap_replace(m_, old, new);
} }
} }
}
} }
void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end) void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end)
{ {
for(size_t i=0; i<n; i++) { for(size_t i=0; i<n; i++) {
if(i==n-1 && end && m->end_branch) if (i==n-1 && end && m->end_branch) {
return m->end_branch; return m->end_branch;
}
m = h_stringmap_get_char(m, str[i]); m = h_stringmap_get_char(m, str[i]);
if(!m) if (!m) {
return NULL; return NULL;
} }
}
return m->epsilon_branch; return m->epsilon_branch;
} }
void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead) void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead)
{ {
while(m) { while(m) {
if(m->epsilon_branch) { // input matched if (m->epsilon_branch) { // input matched
// assert: another lookahead would not bring a more specific match. // assert: another lookahead would not bring a more specific match.
// this is for the table generator to ensure. (LLk) // this is for the table generator to ensure. (LLk)
return m->epsilon_branch; return m->epsilon_branch;
@ -341,7 +362,7 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead)
// reading bits from it does not consume them from the real input. // reading bits from it does not consume them from the real input.
uint8_t c = h_read_bits(&lookahead, 8, false); uint8_t c = h_read_bits(&lookahead, 8, false);
if(lookahead.overrun) { // end of input if (lookahead.overrun) { // end of input
// XXX assumption of byte-wise grammar and input // XXX assumption of byte-wise grammar and input
return m->end_branch; return m->end_branch;
} }
@ -377,14 +398,15 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
uint8_t c; uint8_t c;
// shortcut: first_0(X) is always {""} // shortcut: first_0(X) is always {""}
if(k==0) if (k==0) {
return g->singleton_epsilon; return g->singleton_epsilon;
}
// memoize via g->first // memoize via g->first
ensure_k(g, k); ensure_k(g, k);
ret = h_hashtable_get(g->first[k], x); ret = h_hashtable_get(g->first[k], x);
if(ret != NULL) if (ret != NULL) {
return ret; return ret;
}
ret = h_stringmap_new(g->arena); ret = h_stringmap_new(g->arena);
assert(ret != NULL); assert(ret != NULL);
h_hashtable_put(g->first[k], x, ret); h_hashtable_put(g->first[k], x, ret);
@ -399,7 +421,7 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
case HCF_CHARSET: case HCF_CHARSET:
c=0; c=0;
do { do {
if(charset_isset(x->charset, c)) { if (charset_isset(x->charset, c)) {
h_stringmap_put_char(ret, c, INSET); h_stringmap_put_char(ret, c, INSET);
} }
} while(c++ < 255); } while(c++ < 255);
@ -432,9 +454,9 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s) const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
{ {
// shortcut: the first set of the empty sequence, for any k, is {""} // shortcut: the first set of the empty sequence, for any k, is {""}
if(*s == NULL) if (*s == NULL) {
return g->singleton_epsilon; return g->singleton_epsilon;
}
// first_k(X tail) = { a b | a <- first_k(X), b <- first_l(tail), l=k-|a| } // first_k(X tail) = { a b | a <- first_k(X), b <- first_l(tail), l=k-|a| }
HCFChoice *x = s[0]; HCFChoice *x = s[0];
@ -443,12 +465,14 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
const HStringMap *first_x = h_first(k, g, x); const HStringMap *first_x = h_first(k, g, x);
// shortcut: if first_k(X) = {""}, just return first_k(tail) // shortcut: if first_k(X) = {""}, just return first_k(tail)
if(is_singleton_epsilon(first_x)) if (is_singleton_epsilon(first_x)) {
return h_first_seq(k, g, tail); return h_first_seq(k, g, tail);
}
// shortcut: if no elements of first_k(X) have length <k, just return first_k(X) // shortcut: if no elements of first_k(X) have length <k, just return first_k(X)
if(!any_string_shorter(k, first_x)) if (!any_string_shorter(k, first_x)) {
return first_x; return first_x;
}
// create a new result set and build up the set described above // create a new result set and build up the set described above
HStringMap *ret = h_stringmap_new(g->arena); HStringMap *ret = h_stringmap_new(g->arena);
@ -468,25 +492,27 @@ static bool is_singleton_epsilon(const HStringMap *m)
static bool any_string_shorter(size_t k, const HStringMap *m) static bool any_string_shorter(size_t k, const HStringMap *m)
{ {
if(k==0) if (k==0) {
return false; return false;
}
if(m->epsilon_branch) if (m->epsilon_branch) {
return true; return true;
}
// iterate over m->char_branches // iterate over m->char_branches
const HHashTable *ht = m->char_branches; const HHashTable *ht = m->char_branches;
for(size_t i=0; i < ht->capacity; i++) { for (size_t i=0; i < ht->capacity; i++) {
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL) if (hte->key == NULL) {
continue; continue;
}
HStringMap *m_ = hte->value; HStringMap *m_ = hte->value;
// check subtree for strings shorter than k-1 // check subtree for strings shorter than k-1
if(any_string_shorter(k-1, m_)) if (any_string_shorter(k-1, m_)) {
return true; return true;
} }
} }
}
return false; return false;
} }
@ -494,16 +520,21 @@ static bool any_string_shorter(size_t k, const HStringMap *m)
// helper for h_predict // helper for h_predict
static void remove_all_shorter(size_t k, HStringMap *m) static void remove_all_shorter(size_t k, HStringMap *m)
{ {
if(k==0) return; if (k==0) {
return;
}
m->epsilon_branch = NULL; m->epsilon_branch = NULL;
if(k==1) return; if (k==1) {
return;
}
// iterate over m->char_branches // iterate over m->char_branches
const HHashTable *ht = m->char_branches; const HHashTable *ht = m->char_branches;
for(size_t i=0; i < ht->capacity; i++) { for (size_t i=0; i < ht->capacity; i++) {
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL) if (hte->key == NULL) {
continue; continue;
}
remove_all_shorter(k-1, hte->value); // recursion into subtree remove_all_shorter(k-1, hte->value); // recursion into subtree
} }
} }
@ -530,39 +561,41 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
HStringMap *ret; HStringMap *ret;
// shortcut: follow_0(X) is always {""} // shortcut: follow_0(X) is always {""}
if(k==0) if (k==0) {
return g->singleton_epsilon; return g->singleton_epsilon;
}
// memoize via g->follow // memoize via g->follow
ensure_k(g, k); ensure_k(g, k);
ret = h_hashtable_get(g->follow[k], x); ret = h_hashtable_get(g->follow[k], x);
if(ret != NULL) if (ret != NULL) {
return ret; return ret;
}
ret = h_stringmap_new(g->arena); ret = h_stringmap_new(g->arena);
assert(ret != NULL); assert(ret != NULL);
h_hashtable_put(g->follow[k], x, ret); h_hashtable_put(g->follow[k], x, ret);
// if X is the start symbol, the end token is in its follow set // if X is the start symbol, the end token is in its follow set
if(x == g->start) if (x == g->start) {
h_stringmap_put_end(ret, INSET); h_stringmap_put_end(ret, INSET);
}
// iterate over g->nts // iterate over g->nts
size_t i; size_t i;
HHashTableEntry *hte; HHashTableEntry *hte;
for(i=0; i < g->nts->capacity; i++) { for (i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) { for (hte = &g->nts->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL) if (hte->key == NULL) {
continue; continue;
}
HCFChoice *a = (void *)hte->key; // production's left-hand symbol HCFChoice *a = (void *)hte->key; // production's left-hand symbol
assert(a->type == HCF_CHOICE); assert(a->type == HCF_CHOICE);
// iterate over the productions for A // iterate over the productions for A
HCFSequence **p; HCFSequence **p;
for(p=a->seq; *p; p++) { for (p=a->seq; *p; p++) {
HCFChoice **s = (*p)->items; // production's right-hand side HCFChoice **s = (*p)->items; // production's right-hand side
for(; *s; s++) { for (; *s; s++) {
if(*s == x) { // occurance found if (*s == x) { // occurance found
HCFChoice **tail = s+1; HCFChoice **tail = s+1;
const HStringMap *first_tail = h_first_seq(k, g, tail); const HStringMap *first_tail = h_first_seq(k, g, tail);
@ -604,12 +637,12 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
size_t k, const HStringMap *as, size_t k, const HStringMap *as,
StringSetFun f, HCFChoice **tail) StringSetFun f, HCFChoice **tail)
{ {
if(as->epsilon_branch) { if (as->epsilon_branch) {
// for a="", add f_k(tail) to ret // for a="", add f_k(tail) to ret
h_stringmap_update(ret, f(k, g, tail)); h_stringmap_update(ret, f(k, g, tail));
} }
if(as->end_branch) { if (as->end_branch) {
// for a="$", nothing can follow; just add "$" to ret // for a="$", nothing can follow; just add "$" to ret
// NB: formally, "$" is considered to be of length k // NB: formally, "$" is considered to be of length k
h_stringmap_put_end(ret, INSET); h_stringmap_put_end(ret, INSET);
@ -619,8 +652,9 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
const HHashTable *ht = as->char_branches; const HHashTable *ht = as->char_branches;
for(size_t i=0; i < ht->capacity; i++) { for(size_t i=0; i < ht->capacity; i++) {
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL) if (hte->key == NULL) {
continue; continue;
}
uint8_t c = key_char((HCharKey)hte->key); uint8_t c = key_char((HCharKey)hte->key);
// follow the branch to find the set { a' | t a' <- as } // follow the branch to find the set { a' | t a' <- as }
@ -648,7 +682,7 @@ void h_pprint_char(FILE *f, char c)
case '\n': fputs("\\n", f); break; case '\n': fputs("\\n", f); break;
case '\r': fputs("\\r", f); break; case '\r': fputs("\\r", f); break;
default: default:
if(isprint((int)c)) { if (isprint((int)c)) {
fputc(c, f); fputc(c, f);
} else { } else {
fprintf(f, "\\x%.2X", c); fprintf(f, "\\x%.2X", c);
@ -672,11 +706,11 @@ static void pprint_charset(FILE *f, const HCharset cs)
fputc('[', f); fputc('[', f);
for(i=0; i<256; i++) { for(i=0; i<256; i++) {
if(charset_isset(cs, i)) { if (charset_isset(cs, i)) {
pprint_charset_char(f, i); pprint_charset_char(f, i);
// detect ranges // detect ranges
if(i+2<256 && charset_isset(cs, i+1) && charset_isset(cs, i+2)) { if (i+2<256 && charset_isset(cs, i+1) && charset_isset(cs, i+2)) {
fputc('-', f); fputc('-', f);
for(; i<256 && charset_isset(cs, i); i++); for(; i<256 && charset_isset(cs, i); i++);
i--; // back to the last in range i--; // back to the last in range
@ -708,8 +742,9 @@ static HCFChoice **pprint_string(FILE *f, HCFChoice **x)
{ {
fputc('"', f); fputc('"', f);
for(; *x; x++) { for(; *x; x++) {
if((*x)->type != HCF_CHAR) if ((*x)->type != HCF_CHAR) {
break; break;
}
h_pprint_char(f, (*x)->chr); h_pprint_char(f, (*x)->chr);
} }
fputc('"', f); fputc('"', f);
@ -739,13 +774,14 @@ void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq)
{ {
HCFChoice **x = seq->items; HCFChoice **x = seq->items;
if(*x == NULL) { // the empty sequence if (*x == NULL) { // the empty sequence
fputs("\"\"", f); fputs("\"\"", f);
} else { } else {
while(*x) { while(*x) {
if(x != seq->items) fputc(' ', f); // internal separator if (x != seq->items) {
fputc(' ', f); // internal separator
if((*x)->type == HCF_CHAR) { }
if ((*x)->type == HCF_CHAR) {
// condense character strings // condense character strings
x = pprint_string(f, x); x = pprint_string(f, x);
} else { } else {
@ -781,7 +817,9 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
assert(nt->type == HCF_CHOICE); assert(nt->type == HCF_CHOICE);
HCFSequence **p = nt->seq; HCFSequence **p = nt->seq;
if(*p == NULL) return; // shouldn't happen if (*p == NULL) {
return; // shouldn't happen
}
pprint_sequence(f, g, *p++); // print first production on the same line pprint_sequence(f, g, *p++); // print first production on the same line
for(; *p; p++) { // print the rest below with "or" bars for(; *p; p++) { // print the rest below with "or" bars
for(i=0; i<column; i++) fputc(' ', f); // indent for(i=0; i<column; i++) fputc(' ', f); // indent
@ -792,8 +830,9 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent) void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
{ {
if(g->nts->used < 1) if (g->nts->used < 1) {
return; return;
}
// determine maximum string length of symbol names // determine maximum string length of symbol names
int len; int len;
@ -805,8 +844,9 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
HHashTableEntry *hte; HHashTableEntry *hte;
for(i=0; i < g->nts->capacity; i++) { for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) { for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL) if (hte->key == NULL) {
continue; continue;
}
const HCFChoice *a = hte->key; // production's left-hand symbol const HCFChoice *a = hte->key; // production's left-hand symbol
assert(a->type == HCF_CHOICE); assert(a->type == HCF_CHOICE);
@ -828,10 +868,12 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in
const HCFChoice *a = NULL; const HCFChoice *a = NULL;
for(i=0; i < set->capacity; i++) { for(i=0; i < set->capacity; i++) {
for(hte = &set->contents[i]; hte; hte = hte->next) { for(hte = &set->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL) if (hte->key == NULL) {
continue; continue;
if(a != NULL) // we're not on the first element }
if(a != NULL) { // we're not on the first element
fputc(',', file); fputc(',', file);
}
a = hte->key; // production's left-hand symbol a = hte->key; // production's left-hand symbol
@ -851,9 +893,12 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
{ {
assert(n < BUFSIZE-4); assert(n < BUFSIZE-4);
if(map->epsilon_branch) { if (map->epsilon_branch) {
if(!first) fputc(sep, file); first=false; if (!first) {
if(n==0) { fputc(sep, file);
first=false;
}
if (n==0) {
fputs("\"\"", file); fputs("\"\"", file);
} else { } else {
fputs("\"", file); fputs("\"", file);
@ -861,20 +906,27 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
fputs("\"", file); fputs("\"", file);
} }
if(valprint) { if (valprint) {
fputc(':', file); fputc(':', file);
valprint(file, env, map->epsilon_branch); valprint(file, env, map->epsilon_branch);
} }
} }
if(map->end_branch) { if (map->end_branch) {
if(!first) fputs(",\"", file); first=false; if (!first) {
if(n>0) fputs("\"\"", file); fputs(",\"", file);
first=false;
}
if (n>0) {
fputs("\"\"", file);
}
fwrite(prefix, 1, n, file); fwrite(prefix, 1, n, file);
if(n>0) fputs("\"\"", file); if (n>0) {
fputs("\"\"", file);
}
fputs("$", file); fputs("$", file);
if(valprint) { if (valprint) {
fputc(':', file); fputc(':', file);
valprint(file, env, map->end_branch); valprint(file, env, map->end_branch);
} }
@ -886,8 +938,9 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
HHashTableEntry *hte; HHashTableEntry *hte;
for(i=0; i < ht->capacity; i++) { for(i=0; i < ht->capacity; i++) {
for(hte = &ht->contents[i]; hte; hte = hte->next) { for(hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL) if (hte->key == NULL) {
continue; continue;
}
uint8_t c = key_char((HCharKey)hte->key); uint8_t c = key_char((HCharKey)hte->key);
HStringMap *ends = hte->value; HStringMap *ends = hte->value;
@ -901,11 +954,12 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
case '\n': prefix[n_++] = '\\'; prefix[n_++] = 'n'; break; case '\n': prefix[n_++] = '\\'; prefix[n_++] = 'n'; break;
case '\r': prefix[n_++] = '\\'; prefix[n_++] = 'r'; break; case '\r': prefix[n_++] = '\\'; prefix[n_++] = 'r'; break;
default: default:
if(isprint(c)) if (isprint(c)) {
prefix[n_++] = c; prefix[n_++] = c;
else } else {
n_ += sprintf(prefix+n_, "\\x%.2X", c); n_ += sprintf(prefix+n_, "\\x%.2X", c);
} }
}
first = pprint_stringmap_elems(file, first, prefix, n_, first = pprint_stringmap_elems(file, first, prefix, n_,
sep, valprint, env, ends); sep, valprint, env, ends);

View file

@ -157,30 +157,67 @@ void* h_hashtable_get(const HHashTable* ht, const void* key) {
for (hte = &ht->contents[hashval & (ht->capacity - 1)]; for (hte = &ht->contents[hashval & (ht->capacity - 1)];
hte != NULL; hte != NULL;
hte = hte->next) { hte = hte->next) {
if (hte->key == NULL) if (hte->key == NULL) {
continue; continue;
if (hte->hashval != hashval) }
if (hte->hashval != hashval) {
continue; continue;
if (ht->equalFunc(key, hte->key)) }
if (ht->equalFunc(key, hte->key)) {
return hte->value; return hte->value;
} }
}
return NULL; return NULL;
} }
void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry* new_entry);
void h_hashtable_ensure_capacity(HHashTable* ht, size_t n) {
bool do_resize = false;
size_t old_capacity = ht->capacity;
while (n * 1.3 > ht->capacity) {
ht->capacity *= 2;
do_resize = true;
}
if (!do_resize)
return;
HHashTableEntry *old_contents = ht->contents;
HHashTableEntry *new_contents = h_arena_malloc(ht->arena, sizeof(HHashTableEntry) * ht->capacity);
ht->contents = new_contents;
ht->used = 0;
memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity);
for (size_t i = 0; i < old_capacity; ++i)
for (HHashTableEntry *entry = &old_contents[i];
entry;
entry = entry->next)
if (entry->key)
h_hashtable_put_raw(ht, entry);
//h_arena_free(ht->arena, old_contents);
}
void h_hashtable_put(HHashTable* ht, const void* key, void* value) { void h_hashtable_put(HHashTable* ht, const void* key, void* value) {
// # Start with a rebalancing // # Start with a rebalancing
//h_hashtable_ensure_capacity(ht, ht->used + 1); h_hashtable_ensure_capacity(ht, ht->used + 1);
HHashValue hashval = ht->hashFunc(key); HHashValue hashval = ht->hashFunc(key);
HHashTableEntry entry = {
.key = key,
.value = value,
.hashval = hashval
};
h_hashtable_put_raw(ht, &entry);
}
void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry *new_entry) {
#ifdef CONSISTENCY_CHECK #ifdef CONSISTENCY_CHECK
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
#endif #endif
HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)]; HHashTableEntry *hte = &ht->contents[new_entry->hashval & (ht->capacity - 1)];
if (hte->key != NULL) { if (hte->key != NULL) {
for(;;) { for(;;) {
// check each link, stay on last if not found // check each link, stay on last if not found
if (hte->hashval == hashval && ht->equalFunc(key, hte->key)) if (hte->hashval == new_entry->hashval && ht->equalFunc(new_entry->key, hte->key))
goto insert_here; goto insert_here;
if (hte->next == NULL) if (hte->next == NULL)
break; break;
@ -196,9 +233,9 @@ void h_hashtable_put(HHashTable* ht, const void* key, void* value) {
ht->used++; ht->used++;
insert_here: insert_here:
hte->key = key; hte->key = new_entry->key;
hte->value = value; hte->value = new_entry->value;
hte->hashval = hashval; hte->hashval = new_entry->hashval;
} }
void h_hashtable_update(HHashTable *dst, const HHashTable *src) { void h_hashtable_update(HHashTable *dst, const HHashTable *src) {

View file

@ -8,14 +8,16 @@ HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser) {
if (nstk__ == NULL) { if (nstk__ == NULL) {
nstk__ = h_cfstack_new(mm__); nstk__ = h_cfstack_new(mm__);
} }
if(nstk__->prealloc == NULL) if (nstk__->prealloc == NULL) {
nstk__->prealloc = h_new(HCFChoice, 1); nstk__->prealloc = h_new(HCFChoice, 1);
}
// we're going to do something naughty and cast away the const to memoize // we're going to do something naughty and cast away the const to memoize
assert(parser->vtable->desugar != NULL); assert(parser->vtable->desugar != NULL);
((HParser *)parser)->desugared = nstk__->prealloc; ((HParser *)parser)->desugared = nstk__->prealloc;
parser->vtable->desugar(mm__, nstk__, parser->env); parser->vtable->desugar(mm__, nstk__, parser->env);
if (stk__ == NULL) if (stk__ == NULL) {
h_cfstack_free(mm__, nstk__); h_cfstack_free(mm__, nstk__);
}
} else if (stk__ != NULL) { } else if (stk__ != NULL) {
HCFS_APPEND(parser->desugared); HCFS_APPEND(parser->desugared);
} }

View file

@ -46,6 +46,14 @@ typedef enum HParserBackend_ {
PB_MAX = PB_GLR PB_MAX = PB_GLR
} HParserBackend; } HParserBackend;
static const char* HParserBackendNames[] = {
"Packrat",
"Regular",
"LL(k)",
"LALR",
"GLR"
};
typedef enum HTokenType_ { typedef enum HTokenType_ {
// Before you change the explicit values of these, think of the poor bindings ;_; // Before you change the explicit values of these, think of the poor bindings ;_;
TT_NONE = 1, TT_NONE = 1,
@ -178,6 +186,7 @@ typedef struct HCaseResult_ {
#else #else
HResultTiming timestamp; HResultTiming timestamp;
#endif #endif
size_t length;
} HCaseResult; } HCaseResult;
typedef struct HBackendResults_ { typedef struct HBackendResults_ {

View file

@ -1,16 +1,28 @@
#include "parser_internal.h" #include "parser_internal.h"
typedef struct HIndirectEnv_ {
const HParser* parser;
bool touched;
} HIndirectEnv;
static HParseResult* parse_indirect(void* env, HParseState* state) { static HParseResult* parse_indirect(void* env, HParseState* state) {
return h_do_parse(env, state); return h_do_parse(((HIndirectEnv*)env)->parser, state);
} }
static bool indirect_isValidCF(void *env) { static bool indirect_isValidCF(void *env) {
HParser *p = (HParser*)env; HIndirectEnv *ie = (HIndirectEnv*)env;
return p->vtable->isValidCF(p->env); if (ie->touched)
return true;
ie->touched = true;
const HParser *p = ie->parser;
// self->vtable->isValidCF = h_true;
bool ret = p->vtable->isValidCF(p->env);
ie->touched = false;
return ret;
} }
static void desugar_indirect(HAllocator *mm__, HCFStack *stk__, void *env) { static void desugar_indirect(HAllocator *mm__, HCFStack *stk__, void *env) {
HCFS_DESUGAR( (HParser *)env ); HCFS_DESUGAR( ((HIndirectEnv *)env)->parser );
} }
static const HParserVtable indirect_vt = { static const HParserVtable indirect_vt = {
@ -27,12 +39,15 @@ void h_bind_indirect__m(HAllocator *mm__, HParser* indirect, const HParser* inne
void h_bind_indirect(HParser* indirect, const HParser* inner) { void h_bind_indirect(HParser* indirect, const HParser* inner) {
assert_message(indirect->vtable == &indirect_vt, "You can only bind an indirect parser"); assert_message(indirect->vtable == &indirect_vt, "You can only bind an indirect parser");
indirect->env = (void*)inner; ((HIndirectEnv*)indirect->env)->parser = inner;
} }
HParser* h_indirect() { HParser* h_indirect() {
return h_indirect__m(&system_allocator); return h_indirect__m(&system_allocator);
} }
HParser* h_indirect__m(HAllocator* mm__) { HParser* h_indirect__m(HAllocator* mm__) {
return h_new_parser(mm__, &indirect_vt, NULL); HIndirectEnv *env = h_new(HIndirectEnv, 1);
env->parser = NULL;
env->touched = false;
return h_new_parser(mm__, &indirect_vt, env);
} }