CF backends weren't checking whether a grammar is CF; they are now. Also, brace-fixing.
This commit is contained in:
parent
d947c6aede
commit
c9419f4c34
5 changed files with 204 additions and 125 deletions
|
|
@ -9,6 +9,9 @@ static bool glr_step(HParseResult **result, HSlist *engines,
|
||||||
|
|
||||||
int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
{
|
{
|
||||||
|
if (!parser->vtable->isValidCF(parser->env)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
int result = h_lalr_compile(mm__, parser, params);
|
int result = h_lalr_compile(mm__, parser, params);
|
||||||
|
|
||||||
if(result == -1 && parser->backend_data) {
|
if(result == -1 && parser->backend_data) {
|
||||||
|
|
|
||||||
|
|
@ -49,8 +49,9 @@ static inline HLRTransition *transition(HArena *arena,
|
||||||
static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
|
static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
|
||||||
size_t x, HCFChoice *xAy)
|
size_t x, HCFChoice *xAy)
|
||||||
{
|
{
|
||||||
if(xAy->type != HCF_CHOICE)
|
if (xAy->type != HCF_CHOICE) {
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
// XXX CHARSET?
|
// XXX CHARSET?
|
||||||
|
|
||||||
HArena *arena = eg->arena;
|
HArena *arena = eg->arena;
|
||||||
|
|
@ -181,8 +182,9 @@ static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *acti
|
||||||
h_hashtable_put(tmap->char_branches, key, tmap_);
|
h_hashtable_put(tmap->char_branches, key, tmap_);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(terminals_put(tmap_, fs_, action) < 0)
|
if (terminals_put(tmap_, fs_, action) < 0) {
|
||||||
ret = -1;
|
ret = -1;
|
||||||
|
}
|
||||||
H_END_FOREACH
|
H_END_FOREACH
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
@ -197,8 +199,9 @@ static bool match_production(HLREnhGrammar *eg, HCFChoice **p,
|
||||||
for(; *p && *rhs; p++, rhs++) {
|
for(; *p && *rhs; p++, rhs++) {
|
||||||
HLRTransition *t = h_hashtable_get(eg->smap, *p);
|
HLRTransition *t = h_hashtable_get(eg->smap, *p);
|
||||||
assert(t != NULL);
|
assert(t != NULL);
|
||||||
if(!h_eq_symbol(t->symbol, *rhs))
|
if (!h_eq_symbol(t->symbol, *rhs)) {
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
state = t->to;
|
state = t->to;
|
||||||
}
|
}
|
||||||
return (*p == *rhs // both NULL
|
return (*p == *rhs // both NULL
|
||||||
|
|
@ -231,6 +234,9 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
// build LR(0) table
|
// build LR(0) table
|
||||||
// if necessary, resolve conflicts "by conversion to SLR"
|
// if necessary, resolve conflicts "by conversion to SLR"
|
||||||
|
|
||||||
|
if (!parser->vtable->isValidCF(parser->env)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser));
|
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser));
|
||||||
if(g == NULL) // backend not suitable (language not context-free)
|
if(g == NULL) // backend not suitable (language not context-free)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
@ -284,8 +290,9 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
|
|
||||||
for(HCFSequence **p=lhs->seq; *p; p++) {
|
for(HCFSequence **p=lhs->seq; *p; p++) {
|
||||||
HCFChoice **rhs = (*p)->items;
|
HCFChoice **rhs = (*p)->items;
|
||||||
if(!match_production(eg, rhs, item->rhs, state))
|
if(!match_production(eg, rhs, item->rhs, state)) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// the left-hand symbol's follow set is this production's
|
// the left-hand symbol's follow set is this production's
|
||||||
// contribution to the lookahead
|
// contribution to the lookahead
|
||||||
|
|
@ -300,10 +307,11 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
} H_END_FOREACH // enhanced production
|
} H_END_FOREACH // enhanced production
|
||||||
H_END_FOREACH // reducible item
|
H_END_FOREACH // reducible item
|
||||||
|
|
||||||
if(inadeq)
|
if(inadeq) {
|
||||||
h_slist_push(table->inadeq, (void *)(uintptr_t)state);
|
h_slist_push(table->inadeq, (void *)(uintptr_t)state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
h_cfgrammar_free(g);
|
h_cfgrammar_free(g);
|
||||||
parser->backend_data = table;
|
parser->backend_data = table;
|
||||||
|
|
@ -358,17 +366,19 @@ int test_lalr(void)
|
||||||
|
|
||||||
printf("\n==== D F A ====\n");
|
printf("\n==== D F A ====\n");
|
||||||
HLRDFA *dfa = h_lr0_dfa(g);
|
HLRDFA *dfa = h_lr0_dfa(g);
|
||||||
if(dfa)
|
if (dfa) {
|
||||||
h_pprint_lrdfa(stdout, g, dfa, 0);
|
h_pprint_lrdfa(stdout, g, dfa, 0);
|
||||||
else
|
} else {
|
||||||
fprintf(stderr, "h_lalr_dfa failed\n");
|
fprintf(stderr, "h_lalr_dfa failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
printf("\n==== L R ( 0 ) T A B L E ====\n");
|
printf("\n==== L R ( 0 ) T A B L E ====\n");
|
||||||
HLRTable *table0 = h_lr0_table(g, dfa);
|
HLRTable *table0 = h_lr0_table(g, dfa);
|
||||||
if(table0)
|
if (table0) {
|
||||||
h_pprint_lrtable(stdout, g, table0, 0);
|
h_pprint_lrtable(stdout, g, table0, 0);
|
||||||
else
|
} else {
|
||||||
fprintf(stderr, "h_lr0_table failed\n");
|
fprintf(stderr, "h_lr0_table failed\n");
|
||||||
|
}
|
||||||
h_lrtable_free(table0);
|
h_lrtable_free(table0);
|
||||||
|
|
||||||
printf("\n==== L A L R T A B L E ====\n");
|
printf("\n==== L A L R T A B L E ====\n");
|
||||||
|
|
@ -380,10 +390,10 @@ int test_lalr(void)
|
||||||
|
|
||||||
printf("\n==== P A R S E R E S U L T ====\n");
|
printf("\n==== P A R S E R E S U L T ====\n");
|
||||||
HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
|
HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
|
||||||
if(res)
|
if (res) {
|
||||||
h_pprint(stdout, res->ast, 0, 2);
|
h_pprint(stdout, res->ast, 0, 2);
|
||||||
else
|
} else {
|
||||||
printf("no parse\n");
|
printf("no parse\n");
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,10 +35,12 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
|
||||||
|
|
||||||
HRVMTrace *invert_trace(HRVMTrace *trace) {
|
HRVMTrace *invert_trace(HRVMTrace *trace) {
|
||||||
HRVMTrace *last = NULL;
|
HRVMTrace *last = NULL;
|
||||||
if (!trace)
|
if (!trace) {
|
||||||
return NULL;
|
return NULL;
|
||||||
if (!trace->next)
|
}
|
||||||
|
if (!trace->next) {
|
||||||
return trace;
|
return trace;
|
||||||
|
}
|
||||||
do {
|
do {
|
||||||
HRVMTrace *next = trace->next;
|
HRVMTrace *next = trace->next;
|
||||||
trace->next = last;
|
trace->next = last;
|
||||||
|
|
@ -83,8 +85,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
|
||||||
h_sarray_clear(heads_n);
|
h_sarray_clear(heads_n);
|
||||||
}
|
}
|
||||||
memset(insn_seen, 0, prog->length); // no insns seen yet
|
memset(insn_seen, 0, prog->length); // no insns seen yet
|
||||||
if (!live_threads)
|
if (!live_threads) {
|
||||||
goto match_fail;
|
goto match_fail;
|
||||||
|
}
|
||||||
live_threads = 0;
|
live_threads = 0;
|
||||||
HRVMTrace *tr_head;
|
HRVMTrace *tr_head;
|
||||||
H_SARRAY_FOREACH_KV(tr_head,ip_s,heads_p) {
|
H_SARRAY_FOREACH_KV(tr_head,ip_s,heads_p) {
|
||||||
|
|
@ -111,8 +114,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
|
||||||
hi = (arg >> 8) & 0xff;
|
hi = (arg >> 8) & 0xff;
|
||||||
lo = arg & 0xff;
|
lo = arg & 0xff;
|
||||||
THREAD.ip++;
|
THREAD.ip++;
|
||||||
if (ch < lo || ch > hi)
|
if (ch < lo || ch > hi) {
|
||||||
ipq_top--; // terminate thread
|
ipq_top--; // terminate thread
|
||||||
|
}
|
||||||
goto next_insn;
|
goto next_insn;
|
||||||
case RVM_GOTO:
|
case RVM_GOTO:
|
||||||
THREAD.ip = arg;
|
THREAD.ip = arg;
|
||||||
|
|
@ -141,8 +145,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
|
||||||
goto next_insn;
|
goto next_insn;
|
||||||
case RVM_EOF:
|
case RVM_EOF:
|
||||||
THREAD.ip++;
|
THREAD.ip++;
|
||||||
if (off != len)
|
if (off != len) {
|
||||||
ipq_top--; // Terminate thread
|
ipq_top--; // Terminate thread
|
||||||
|
}
|
||||||
goto next_insn;
|
goto next_insn;
|
||||||
case RVM_STEP:
|
case RVM_STEP:
|
||||||
// save thread
|
// save thread
|
||||||
|
|
@ -249,9 +254,10 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
|
||||||
|
|
||||||
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) {
|
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) {
|
||||||
for (uint16_t i = 0; i < prog->action_count; i++) {
|
for (uint16_t i = 0; i < prog->action_count; i++) {
|
||||||
if (prog->actions[i].action == action_func && prog->actions[i].env == env)
|
if (prog->actions[i].action == action_func && prog->actions[i].env == env) {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// Ensure that there's room in the action array...
|
// Ensure that there's room in the action array...
|
||||||
if (!(prog->action_count & (prog->action_count + 1))) {
|
if (!(prog->action_count & (prog->action_count + 1))) {
|
||||||
// needs to be scaled up.
|
// needs to be scaled up.
|
||||||
|
|
@ -294,9 +300,10 @@ void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val) {
|
||||||
size_t h_svm_count_to_mark(HSVMContext *ctx) {
|
size_t h_svm_count_to_mark(HSVMContext *ctx) {
|
||||||
size_t ctm;
|
size_t ctm;
|
||||||
for (ctm = 0; ctm < ctx->stack_count; ctm++) {
|
for (ctm = 0; ctm < ctx->stack_count; ctm++) {
|
||||||
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK)
|
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK) {
|
||||||
return ctm;
|
return ctm;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return ctx->stack_count;
|
return ctx->stack_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -320,9 +327,11 @@ bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
|
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
|
||||||
while (ctx->stack_count > 0) { if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK)
|
while (ctx->stack_count > 0) {
|
||||||
|
if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return false; // no mark found.
|
return false; // no mark found.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -343,8 +352,9 @@ static void h_regex_free(HParser *parser) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
|
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
|
||||||
if (!parser->vtable->isValidRegular(parser->env))
|
if (!parser->vtable->isValidRegular(parser->env)) {
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
HRVMProg *prog = h_new(HRVMProg, 1);
|
HRVMProg *prog = h_new(HRVMProg, 1);
|
||||||
prog->length = prog->action_count = 0;
|
prog->length = prog->action_count = 0;
|
||||||
prog->insns = NULL;
|
prog->insns = NULL;
|
||||||
|
|
|
||||||
172
src/cfgrammar.c
172
src/cfgrammar.c
|
|
@ -46,11 +46,14 @@ static void collect_geneps(HCFGrammar *grammar);
|
||||||
|
|
||||||
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
|
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
|
||||||
{
|
{
|
||||||
|
if (!parser->vtable->isValidCF(parser->env)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
// convert parser to CFG form ("desugar").
|
// convert parser to CFG form ("desugar").
|
||||||
HCFChoice *desugared = h_desugar(mm__, NULL, parser);
|
HCFChoice *desugared = h_desugar(mm__, NULL, parser);
|
||||||
if(desugared == NULL)
|
if (desugared == NULL) {
|
||||||
return NULL; // -> backend not suitable for this parser
|
return NULL; // -> backend not suitable for this parser
|
||||||
|
}
|
||||||
return h_cfgrammar_(mm__, desugared);
|
return h_cfgrammar_(mm__, desugared);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -92,8 +95,9 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
|
||||||
HCFSequence **s; // for the rhs (sentential form) of a production
|
HCFSequence **s; // for the rhs (sentential form) of a production
|
||||||
HCFChoice **x; // for a symbol in s
|
HCFChoice **x; // for a symbol in s
|
||||||
|
|
||||||
if(h_hashset_present(grammar->nts, symbol))
|
if (h_hashset_present(grammar->nts, symbol)) {
|
||||||
return; // already visited, get out
|
return; // already visited, get out
|
||||||
|
}
|
||||||
|
|
||||||
switch(symbol->type) {
|
switch(symbol->type) {
|
||||||
case HCF_CHAR:
|
case HCF_CHAR:
|
||||||
|
|
@ -127,8 +131,9 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
|
||||||
/* Increase g->kmax if needed, allocating enough first/follow slots. */
|
/* Increase g->kmax if needed, allocating enough first/follow slots. */
|
||||||
static void ensure_k(HCFGrammar *g, size_t k)
|
static void ensure_k(HCFGrammar *g, size_t k)
|
||||||
{
|
{
|
||||||
if(k <= g->kmax) return;
|
if (k <= g->kmax) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
// NB: we don't actually use first/follow[0] but allocate it anyway
|
// NB: we don't actually use first/follow[0] but allocate it anyway
|
||||||
// so indices of the array correspond neatly to values of k
|
// so indices of the array correspond neatly to values of k
|
||||||
|
|
||||||
|
|
@ -181,17 +186,19 @@ bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s)
|
||||||
{
|
{
|
||||||
// return true iff all symbols in s derive epsilon
|
// return true iff all symbols in s derive epsilon
|
||||||
for(; *s; s++) {
|
for(; *s; s++) {
|
||||||
if(!h_derives_epsilon(g, *s))
|
if (!h_derives_epsilon(g, *s)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Populate the geneps member of g; no-op if called multiple times. */
|
/* Populate the geneps member of g; no-op if called multiple times. */
|
||||||
static void collect_geneps(HCFGrammar *g)
|
static void collect_geneps(HCFGrammar *g)
|
||||||
{
|
{
|
||||||
if(g->geneps != NULL)
|
if (g->geneps != NULL) {
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
g->geneps = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
g->geneps = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
assert(g->geneps != NULL);
|
assert(g->geneps != NULL);
|
||||||
|
|
@ -206,8 +213,9 @@ static void collect_geneps(HCFGrammar *g)
|
||||||
HHashTableEntry *hte;
|
HHashTableEntry *hte;
|
||||||
for(i=0; i < g->nts->capacity; i++) {
|
for(i=0; i < g->nts->capacity; i++) {
|
||||||
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if (hte->key == NULL) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
const HCFChoice *symbol = hte->key;
|
const HCFChoice *symbol = hte->key;
|
||||||
assert(symbol->type == HCF_CHOICE);
|
assert(symbol->type == HCF_CHOICE);
|
||||||
|
|
||||||
|
|
@ -262,8 +270,9 @@ static void *combine_stringmap(void *v1, const void *v2)
|
||||||
{
|
{
|
||||||
HStringMap *m1 = v1;
|
HStringMap *m1 = v1;
|
||||||
const HStringMap *m2 = v2;
|
const HStringMap *m2 = v2;
|
||||||
if(!m1)
|
if (!m1) {
|
||||||
m1 = h_stringmap_new(m2->arena);
|
m1 = h_stringmap_new(m2->arena);
|
||||||
|
}
|
||||||
h_stringmap_update(m1, m2);
|
h_stringmap_update(m1, m2);
|
||||||
|
|
||||||
return m1;
|
return m1;
|
||||||
|
|
@ -272,12 +281,12 @@ static void *combine_stringmap(void *v1, const void *v2)
|
||||||
/* Note: Does *not* reuse submaps from n in building m. */
|
/* Note: Does *not* reuse submaps from n in building m. */
|
||||||
void h_stringmap_update(HStringMap *m, const HStringMap *n)
|
void h_stringmap_update(HStringMap *m, const HStringMap *n)
|
||||||
{
|
{
|
||||||
if(n->epsilon_branch)
|
if (n->epsilon_branch) {
|
||||||
m->epsilon_branch = n->epsilon_branch;
|
m->epsilon_branch = n->epsilon_branch;
|
||||||
|
}
|
||||||
if(n->end_branch)
|
if (n->end_branch) {
|
||||||
m->end_branch = n->end_branch;
|
m->end_branch = n->end_branch;
|
||||||
|
}
|
||||||
h_hashtable_merge(combine_stringmap, m->char_branches, n->char_branches);
|
h_hashtable_merge(combine_stringmap, m->char_branches, n->char_branches);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -295,36 +304,48 @@ HStringMap *h_stringmap_copy(HArena *a, const HStringMap *m)
|
||||||
void h_stringmap_replace(HStringMap *m, void *old, void *new)
|
void h_stringmap_replace(HStringMap *m, void *old, void *new)
|
||||||
{
|
{
|
||||||
if (!old) {
|
if (!old) {
|
||||||
if(m->epsilon_branch) m->epsilon_branch = new;
|
if (m->epsilon_branch) {
|
||||||
if(m->end_branch) m->end_branch = new;
|
m->epsilon_branch = new;
|
||||||
|
}
|
||||||
|
if (m->end_branch) {
|
||||||
|
m->end_branch = new;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if(m->epsilon_branch == old) m->epsilon_branch = new;
|
if (m->epsilon_branch == old) {
|
||||||
if(m->end_branch == old) m->end_branch = new;
|
m->epsilon_branch = new;
|
||||||
|
}
|
||||||
|
if (m->end_branch == old) {
|
||||||
|
m->end_branch = new;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterate over m->char_branches
|
// iterate over m->char_branches
|
||||||
const HHashTable *ht = m->char_branches;
|
const HHashTable *ht = m->char_branches;
|
||||||
for (size_t i=0; i < ht->capacity; i++) {
|
for (size_t i=0; i < ht->capacity; i++) {
|
||||||
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if (hte->key == NULL) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
HStringMap *m_ = hte->value;
|
HStringMap *m_ = hte->value;
|
||||||
if(m_)
|
if (m_) {
|
||||||
h_stringmap_replace(m_, old, new);
|
h_stringmap_replace(m_, old, new);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end)
|
void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end)
|
||||||
{
|
{
|
||||||
for(size_t i=0; i<n; i++) {
|
for(size_t i=0; i<n; i++) {
|
||||||
if(i==n-1 && end && m->end_branch)
|
if (i==n-1 && end && m->end_branch) {
|
||||||
return m->end_branch;
|
return m->end_branch;
|
||||||
|
}
|
||||||
m = h_stringmap_get_char(m, str[i]);
|
m = h_stringmap_get_char(m, str[i]);
|
||||||
if(!m)
|
if (!m) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return m->epsilon_branch;
|
return m->epsilon_branch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -377,14 +398,15 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
uint8_t c;
|
uint8_t c;
|
||||||
|
|
||||||
// shortcut: first_0(X) is always {""}
|
// shortcut: first_0(X) is always {""}
|
||||||
if(k==0)
|
if (k==0) {
|
||||||
return g->singleton_epsilon;
|
return g->singleton_epsilon;
|
||||||
|
}
|
||||||
// memoize via g->first
|
// memoize via g->first
|
||||||
ensure_k(g, k);
|
ensure_k(g, k);
|
||||||
ret = h_hashtable_get(g->first[k], x);
|
ret = h_hashtable_get(g->first[k], x);
|
||||||
if(ret != NULL)
|
if (ret != NULL) {
|
||||||
return ret;
|
return ret;
|
||||||
|
}
|
||||||
ret = h_stringmap_new(g->arena);
|
ret = h_stringmap_new(g->arena);
|
||||||
assert(ret != NULL);
|
assert(ret != NULL);
|
||||||
h_hashtable_put(g->first[k], x, ret);
|
h_hashtable_put(g->first[k], x, ret);
|
||||||
|
|
@ -432,9 +454,9 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
|
||||||
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
|
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
|
||||||
{
|
{
|
||||||
// shortcut: the first set of the empty sequence, for any k, is {""}
|
// shortcut: the first set of the empty sequence, for any k, is {""}
|
||||||
if(*s == NULL)
|
if (*s == NULL) {
|
||||||
return g->singleton_epsilon;
|
return g->singleton_epsilon;
|
||||||
|
}
|
||||||
// first_k(X tail) = { a b | a <- first_k(X), b <- first_l(tail), l=k-|a| }
|
// first_k(X tail) = { a b | a <- first_k(X), b <- first_l(tail), l=k-|a| }
|
||||||
|
|
||||||
HCFChoice *x = s[0];
|
HCFChoice *x = s[0];
|
||||||
|
|
@ -443,12 +465,14 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
|
||||||
const HStringMap *first_x = h_first(k, g, x);
|
const HStringMap *first_x = h_first(k, g, x);
|
||||||
|
|
||||||
// shortcut: if first_k(X) = {""}, just return first_k(tail)
|
// shortcut: if first_k(X) = {""}, just return first_k(tail)
|
||||||
if(is_singleton_epsilon(first_x))
|
if (is_singleton_epsilon(first_x)) {
|
||||||
return h_first_seq(k, g, tail);
|
return h_first_seq(k, g, tail);
|
||||||
|
}
|
||||||
|
|
||||||
// shortcut: if no elements of first_k(X) have length <k, just return first_k(X)
|
// shortcut: if no elements of first_k(X) have length <k, just return first_k(X)
|
||||||
if(!any_string_shorter(k, first_x))
|
if (!any_string_shorter(k, first_x)) {
|
||||||
return first_x;
|
return first_x;
|
||||||
|
}
|
||||||
|
|
||||||
// create a new result set and build up the set described above
|
// create a new result set and build up the set described above
|
||||||
HStringMap *ret = h_stringmap_new(g->arena);
|
HStringMap *ret = h_stringmap_new(g->arena);
|
||||||
|
|
@ -468,25 +492,27 @@ static bool is_singleton_epsilon(const HStringMap *m)
|
||||||
|
|
||||||
static bool any_string_shorter(size_t k, const HStringMap *m)
|
static bool any_string_shorter(size_t k, const HStringMap *m)
|
||||||
{
|
{
|
||||||
if(k==0)
|
if (k==0) {
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
if(m->epsilon_branch)
|
if (m->epsilon_branch) {
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
// iterate over m->char_branches
|
// iterate over m->char_branches
|
||||||
const HHashTable *ht = m->char_branches;
|
const HHashTable *ht = m->char_branches;
|
||||||
for (size_t i=0; i < ht->capacity; i++) {
|
for (size_t i=0; i < ht->capacity; i++) {
|
||||||
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if (hte->key == NULL) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
HStringMap *m_ = hte->value;
|
HStringMap *m_ = hte->value;
|
||||||
|
|
||||||
// check subtree for strings shorter than k-1
|
// check subtree for strings shorter than k-1
|
||||||
if(any_string_shorter(k-1, m_))
|
if (any_string_shorter(k-1, m_)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -494,16 +520,21 @@ static bool any_string_shorter(size_t k, const HStringMap *m)
|
||||||
// helper for h_predict
|
// helper for h_predict
|
||||||
static void remove_all_shorter(size_t k, HStringMap *m)
|
static void remove_all_shorter(size_t k, HStringMap *m)
|
||||||
{
|
{
|
||||||
if(k==0) return;
|
if (k==0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
m->epsilon_branch = NULL;
|
m->epsilon_branch = NULL;
|
||||||
if(k==1) return;
|
if (k==1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// iterate over m->char_branches
|
// iterate over m->char_branches
|
||||||
const HHashTable *ht = m->char_branches;
|
const HHashTable *ht = m->char_branches;
|
||||||
for (size_t i=0; i < ht->capacity; i++) {
|
for (size_t i=0; i < ht->capacity; i++) {
|
||||||
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if (hte->key == NULL) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
remove_all_shorter(k-1, hte->value); // recursion into subtree
|
remove_all_shorter(k-1, hte->value); // recursion into subtree
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -530,29 +561,31 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
HStringMap *ret;
|
HStringMap *ret;
|
||||||
|
|
||||||
// shortcut: follow_0(X) is always {""}
|
// shortcut: follow_0(X) is always {""}
|
||||||
if(k==0)
|
if (k==0) {
|
||||||
return g->singleton_epsilon;
|
return g->singleton_epsilon;
|
||||||
|
}
|
||||||
// memoize via g->follow
|
// memoize via g->follow
|
||||||
ensure_k(g, k);
|
ensure_k(g, k);
|
||||||
ret = h_hashtable_get(g->follow[k], x);
|
ret = h_hashtable_get(g->follow[k], x);
|
||||||
if(ret != NULL)
|
if (ret != NULL) {
|
||||||
return ret;
|
return ret;
|
||||||
|
}
|
||||||
ret = h_stringmap_new(g->arena);
|
ret = h_stringmap_new(g->arena);
|
||||||
assert(ret != NULL);
|
assert(ret != NULL);
|
||||||
h_hashtable_put(g->follow[k], x, ret);
|
h_hashtable_put(g->follow[k], x, ret);
|
||||||
|
|
||||||
// if X is the start symbol, the end token is in its follow set
|
// if X is the start symbol, the end token is in its follow set
|
||||||
if(x == g->start)
|
if (x == g->start) {
|
||||||
h_stringmap_put_end(ret, INSET);
|
h_stringmap_put_end(ret, INSET);
|
||||||
|
}
|
||||||
// iterate over g->nts
|
// iterate over g->nts
|
||||||
size_t i;
|
size_t i;
|
||||||
HHashTableEntry *hte;
|
HHashTableEntry *hte;
|
||||||
for (i=0; i < g->nts->capacity; i++) {
|
for (i=0; i < g->nts->capacity; i++) {
|
||||||
for (hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
for (hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if (hte->key == NULL) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
HCFChoice *a = (void *)hte->key; // production's left-hand symbol
|
HCFChoice *a = (void *)hte->key; // production's left-hand symbol
|
||||||
assert(a->type == HCF_CHOICE);
|
assert(a->type == HCF_CHOICE);
|
||||||
|
|
||||||
|
|
@ -619,8 +652,9 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
|
||||||
const HHashTable *ht = as->char_branches;
|
const HHashTable *ht = as->char_branches;
|
||||||
for(size_t i=0; i < ht->capacity; i++) {
|
for(size_t i=0; i < ht->capacity; i++) {
|
||||||
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if (hte->key == NULL) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
uint8_t c = key_char((HCharKey)hte->key);
|
uint8_t c = key_char((HCharKey)hte->key);
|
||||||
|
|
||||||
// follow the branch to find the set { a' | t a' <- as }
|
// follow the branch to find the set { a' | t a' <- as }
|
||||||
|
|
@ -708,8 +742,9 @@ static HCFChoice **pprint_string(FILE *f, HCFChoice **x)
|
||||||
{
|
{
|
||||||
fputc('"', f);
|
fputc('"', f);
|
||||||
for(; *x; x++) {
|
for(; *x; x++) {
|
||||||
if((*x)->type != HCF_CHAR)
|
if ((*x)->type != HCF_CHAR) {
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
h_pprint_char(f, (*x)->chr);
|
h_pprint_char(f, (*x)->chr);
|
||||||
}
|
}
|
||||||
fputc('"', f);
|
fputc('"', f);
|
||||||
|
|
@ -743,8 +778,9 @@ void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq)
|
||||||
fputs("\"\"", f);
|
fputs("\"\"", f);
|
||||||
} else {
|
} else {
|
||||||
while(*x) {
|
while(*x) {
|
||||||
if(x != seq->items) fputc(' ', f); // internal separator
|
if (x != seq->items) {
|
||||||
|
fputc(' ', f); // internal separator
|
||||||
|
}
|
||||||
if ((*x)->type == HCF_CHAR) {
|
if ((*x)->type == HCF_CHAR) {
|
||||||
// condense character strings
|
// condense character strings
|
||||||
x = pprint_string(f, x);
|
x = pprint_string(f, x);
|
||||||
|
|
@ -781,7 +817,9 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
|
||||||
|
|
||||||
assert(nt->type == HCF_CHOICE);
|
assert(nt->type == HCF_CHOICE);
|
||||||
HCFSequence **p = nt->seq;
|
HCFSequence **p = nt->seq;
|
||||||
if(*p == NULL) return; // shouldn't happen
|
if (*p == NULL) {
|
||||||
|
return; // shouldn't happen
|
||||||
|
}
|
||||||
pprint_sequence(f, g, *p++); // print first production on the same line
|
pprint_sequence(f, g, *p++); // print first production on the same line
|
||||||
for(; *p; p++) { // print the rest below with "or" bars
|
for(; *p; p++) { // print the rest below with "or" bars
|
||||||
for(i=0; i<column; i++) fputc(' ', f); // indent
|
for(i=0; i<column; i++) fputc(' ', f); // indent
|
||||||
|
|
@ -792,8 +830,9 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
|
||||||
|
|
||||||
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
|
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
|
||||||
{
|
{
|
||||||
if(g->nts->used < 1)
|
if (g->nts->used < 1) {
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// determine maximum string length of symbol names
|
// determine maximum string length of symbol names
|
||||||
int len;
|
int len;
|
||||||
|
|
@ -805,8 +844,9 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
|
||||||
HHashTableEntry *hte;
|
HHashTableEntry *hte;
|
||||||
for(i=0; i < g->nts->capacity; i++) {
|
for(i=0; i < g->nts->capacity; i++) {
|
||||||
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if (hte->key == NULL) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
const HCFChoice *a = hte->key; // production's left-hand symbol
|
const HCFChoice *a = hte->key; // production's left-hand symbol
|
||||||
assert(a->type == HCF_CHOICE);
|
assert(a->type == HCF_CHOICE);
|
||||||
|
|
||||||
|
|
@ -828,10 +868,12 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in
|
||||||
const HCFChoice *a = NULL;
|
const HCFChoice *a = NULL;
|
||||||
for(i=0; i < set->capacity; i++) {
|
for(i=0; i < set->capacity; i++) {
|
||||||
for(hte = &set->contents[i]; hte; hte = hte->next) {
|
for(hte = &set->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if (hte->key == NULL) {
|
||||||
continue;
|
continue;
|
||||||
if(a != NULL) // we're not on the first element
|
}
|
||||||
|
if(a != NULL) { // we're not on the first element
|
||||||
fputc(',', file);
|
fputc(',', file);
|
||||||
|
}
|
||||||
|
|
||||||
a = hte->key; // production's left-hand symbol
|
a = hte->key; // production's left-hand symbol
|
||||||
|
|
||||||
|
|
@ -852,7 +894,10 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
|
||||||
assert(n < BUFSIZE-4);
|
assert(n < BUFSIZE-4);
|
||||||
|
|
||||||
if (map->epsilon_branch) {
|
if (map->epsilon_branch) {
|
||||||
if(!first) fputc(sep, file); first=false;
|
if (!first) {
|
||||||
|
fputc(sep, file);
|
||||||
|
first=false;
|
||||||
|
}
|
||||||
if (n==0) {
|
if (n==0) {
|
||||||
fputs("\"\"", file);
|
fputs("\"\"", file);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -868,10 +913,17 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (map->end_branch) {
|
if (map->end_branch) {
|
||||||
if(!first) fputs(",\"", file); first=false;
|
if (!first) {
|
||||||
if(n>0) fputs("\"\"", file);
|
fputs(",\"", file);
|
||||||
|
first=false;
|
||||||
|
}
|
||||||
|
if (n>0) {
|
||||||
|
fputs("\"\"", file);
|
||||||
|
}
|
||||||
fwrite(prefix, 1, n, file);
|
fwrite(prefix, 1, n, file);
|
||||||
if(n>0) fputs("\"\"", file);
|
if (n>0) {
|
||||||
|
fputs("\"\"", file);
|
||||||
|
}
|
||||||
fputs("$", file);
|
fputs("$", file);
|
||||||
|
|
||||||
if (valprint) {
|
if (valprint) {
|
||||||
|
|
@ -886,8 +938,9 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
|
||||||
HHashTableEntry *hte;
|
HHashTableEntry *hte;
|
||||||
for(i=0; i < ht->capacity; i++) {
|
for(i=0; i < ht->capacity; i++) {
|
||||||
for(hte = &ht->contents[i]; hte; hte = hte->next) {
|
for(hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if (hte->key == NULL) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
uint8_t c = key_char((HCharKey)hte->key);
|
uint8_t c = key_char((HCharKey)hte->key);
|
||||||
HStringMap *ends = hte->value;
|
HStringMap *ends = hte->value;
|
||||||
|
|
||||||
|
|
@ -901,11 +954,12 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
|
||||||
case '\n': prefix[n_++] = '\\'; prefix[n_++] = 'n'; break;
|
case '\n': prefix[n_++] = '\\'; prefix[n_++] = 'n'; break;
|
||||||
case '\r': prefix[n_++] = '\\'; prefix[n_++] = 'r'; break;
|
case '\r': prefix[n_++] = '\\'; prefix[n_++] = 'r'; break;
|
||||||
default:
|
default:
|
||||||
if(isprint(c))
|
if (isprint(c)) {
|
||||||
prefix[n_++] = c;
|
prefix[n_++] = c;
|
||||||
else
|
} else {
|
||||||
n_ += sprintf(prefix+n_, "\\x%.2X", c);
|
n_ += sprintf(prefix+n_, "\\x%.2X", c);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
first = pprint_stringmap_elems(file, first, prefix, n_,
|
first = pprint_stringmap_elems(file, first, prefix, n_,
|
||||||
sep, valprint, env, ends);
|
sep, valprint, env, ends);
|
||||||
|
|
|
||||||
|
|
@ -8,14 +8,16 @@ HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser) {
|
||||||
if (nstk__ == NULL) {
|
if (nstk__ == NULL) {
|
||||||
nstk__ = h_cfstack_new(mm__);
|
nstk__ = h_cfstack_new(mm__);
|
||||||
}
|
}
|
||||||
if(nstk__->prealloc == NULL)
|
if (nstk__->prealloc == NULL) {
|
||||||
nstk__->prealloc = h_new(HCFChoice, 1);
|
nstk__->prealloc = h_new(HCFChoice, 1);
|
||||||
|
}
|
||||||
// we're going to do something naughty and cast away the const to memoize
|
// we're going to do something naughty and cast away the const to memoize
|
||||||
assert(parser->vtable->desugar != NULL);
|
assert(parser->vtable->desugar != NULL);
|
||||||
((HParser *)parser)->desugared = nstk__->prealloc;
|
((HParser *)parser)->desugared = nstk__->prealloc;
|
||||||
parser->vtable->desugar(mm__, nstk__, parser->env);
|
parser->vtable->desugar(mm__, nstk__, parser->env);
|
||||||
if (stk__ == NULL)
|
if (stk__ == NULL) {
|
||||||
h_cfstack_free(mm__, nstk__);
|
h_cfstack_free(mm__, nstk__);
|
||||||
|
}
|
||||||
} else if (stk__ != NULL) {
|
} else if (stk__ != NULL) {
|
||||||
HCFS_APPEND(parser->desugared);
|
HCFS_APPEND(parser->desugared);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue