CF backends weren't checking whether a grammar is CF; they are now. Also, brace-fixing.

This commit is contained in:
Meredith L. Patterson 2014-04-20 16:57:01 +02:00
parent d947c6aede
commit c9419f4c34
5 changed files with 204 additions and 125 deletions

View file

@ -9,6 +9,9 @@ static bool glr_step(HParseResult **result, HSlist *engines,
int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params)
{
if (!parser->vtable->isValidCF(parser->env)) {
return -1;
}
int result = h_lalr_compile(mm__, parser, params);
if(result == -1 && parser->backend_data) {

View file

@ -49,8 +49,9 @@ static inline HLRTransition *transition(HArena *arena,
static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
size_t x, HCFChoice *xAy)
{
if(xAy->type != HCF_CHOICE)
if (xAy->type != HCF_CHOICE) {
return;
}
// XXX CHARSET?
HArena *arena = eg->arena;
@ -181,8 +182,9 @@ static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *acti
h_hashtable_put(tmap->char_branches, key, tmap_);
}
if(terminals_put(tmap_, fs_, action) < 0)
if (terminals_put(tmap_, fs_, action) < 0) {
ret = -1;
}
H_END_FOREACH
return ret;
@ -197,8 +199,9 @@ static bool match_production(HLREnhGrammar *eg, HCFChoice **p,
for(; *p && *rhs; p++, rhs++) {
HLRTransition *t = h_hashtable_get(eg->smap, *p);
assert(t != NULL);
if(!h_eq_symbol(t->symbol, *rhs))
if (!h_eq_symbol(t->symbol, *rhs)) {
return false;
}
state = t->to;
}
return (*p == *rhs // both NULL
@ -231,6 +234,9 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
// build LR(0) table
// if necessary, resolve conflicts "by conversion to SLR"
if (!parser->vtable->isValidCF(parser->env)) {
return -1;
}
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser));
if(g == NULL) // backend not suitable (language not context-free)
return -1;
@ -284,8 +290,9 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
for(HCFSequence **p=lhs->seq; *p; p++) {
HCFChoice **rhs = (*p)->items;
if(!match_production(eg, rhs, item->rhs, state))
if(!match_production(eg, rhs, item->rhs, state)) {
continue;
}
// the left-hand symbol's follow set is this production's
// contribution to the lookahead
@ -300,10 +307,11 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
} H_END_FOREACH // enhanced production
H_END_FOREACH // reducible item
if(inadeq)
if(inadeq) {
h_slist_push(table->inadeq, (void *)(uintptr_t)state);
}
}
}
h_cfgrammar_free(g);
parser->backend_data = table;
@ -358,17 +366,19 @@ int test_lalr(void)
printf("\n==== D F A ====\n");
HLRDFA *dfa = h_lr0_dfa(g);
if(dfa)
if (dfa) {
h_pprint_lrdfa(stdout, g, dfa, 0);
else
} else {
fprintf(stderr, "h_lalr_dfa failed\n");
}
printf("\n==== L R ( 0 ) T A B L E ====\n");
HLRTable *table0 = h_lr0_table(g, dfa);
if(table0)
if (table0) {
h_pprint_lrtable(stdout, g, table0, 0);
else
} else {
fprintf(stderr, "h_lr0_table failed\n");
}
h_lrtable_free(table0);
printf("\n==== L A L R T A B L E ====\n");
@ -380,10 +390,10 @@ int test_lalr(void)
printf("\n==== P A R S E R E S U L T ====\n");
HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
if(res)
if (res) {
h_pprint(stdout, res->ast, 0, 2);
else
} else {
printf("no parse\n");
}
return 0;
}

View file

@ -35,10 +35,12 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
HRVMTrace *invert_trace(HRVMTrace *trace) {
HRVMTrace *last = NULL;
if (!trace)
if (!trace) {
return NULL;
if (!trace->next)
}
if (!trace->next) {
return trace;
}
do {
HRVMTrace *next = trace->next;
trace->next = last;
@ -83,8 +85,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
h_sarray_clear(heads_n);
}
memset(insn_seen, 0, prog->length); // no insns seen yet
if (!live_threads)
if (!live_threads) {
goto match_fail;
}
live_threads = 0;
HRVMTrace *tr_head;
H_SARRAY_FOREACH_KV(tr_head,ip_s,heads_p) {
@ -111,8 +114,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
hi = (arg >> 8) & 0xff;
lo = arg & 0xff;
THREAD.ip++;
if (ch < lo || ch > hi)
if (ch < lo || ch > hi) {
ipq_top--; // terminate thread
}
goto next_insn;
case RVM_GOTO:
THREAD.ip = arg;
@ -141,8 +145,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
goto next_insn;
case RVM_EOF:
THREAD.ip++;
if (off != len)
if (off != len) {
ipq_top--; // Terminate thread
}
goto next_insn;
case RVM_STEP:
// save thread
@ -249,9 +254,10 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) {
for (uint16_t i = 0; i < prog->action_count; i++) {
if (prog->actions[i].action == action_func && prog->actions[i].env == env)
if (prog->actions[i].action == action_func && prog->actions[i].env == env) {
return i;
}
}
// Ensure that there's room in the action array...
if (!(prog->action_count & (prog->action_count + 1))) {
// needs to be scaled up.
@ -294,9 +300,10 @@ void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val) {
size_t h_svm_count_to_mark(HSVMContext *ctx) {
size_t ctm;
for (ctm = 0; ctm < ctx->stack_count; ctm++) {
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK)
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK) {
return ctm;
}
}
return ctx->stack_count;
}
@ -320,9 +327,11 @@ bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) {
}
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
while (ctx->stack_count > 0) { if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK)
while (ctx->stack_count > 0) {
if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK) {
return true;
}
}
return false; // no mark found.
}
@ -343,8 +352,9 @@ static void h_regex_free(HParser *parser) {
}
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
if (!parser->vtable->isValidRegular(parser->env))
if (!parser->vtable->isValidRegular(parser->env)) {
return 1;
}
HRVMProg *prog = h_new(HRVMProg, 1);
prog->length = prog->action_count = 0;
prog->insns = NULL;

View file

@ -46,11 +46,14 @@ static void collect_geneps(HCFGrammar *grammar);
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
{
if (!parser->vtable->isValidCF(parser->env)) {
return NULL;
}
// convert parser to CFG form ("desugar").
HCFChoice *desugared = h_desugar(mm__, NULL, parser);
if(desugared == NULL)
if (desugared == NULL) {
return NULL; // -> backend not suitable for this parser
}
return h_cfgrammar_(mm__, desugared);
}
@ -92,8 +95,9 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
HCFSequence **s; // for the rhs (sentential form) of a production
HCFChoice **x; // for a symbol in s
if(h_hashset_present(grammar->nts, symbol))
if (h_hashset_present(grammar->nts, symbol)) {
return; // already visited, get out
}
switch(symbol->type) {
case HCF_CHAR:
@ -127,8 +131,9 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
/* Increase g->kmax if needed, allocating enough first/follow slots. */
static void ensure_k(HCFGrammar *g, size_t k)
{
if(k <= g->kmax) return;
if (k <= g->kmax) {
return;
}
// NB: we don't actually use first/follow[0] but allocate it anyway
// so indices of the array correspond neatly to values of k
@ -181,17 +186,19 @@ bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s)
{
// return true iff all symbols in s derive epsilon
for(; *s; s++) {
if(!h_derives_epsilon(g, *s))
if (!h_derives_epsilon(g, *s)) {
return false;
}
}
return true;
}
/* Populate the geneps member of g; no-op if called multiple times. */
static void collect_geneps(HCFGrammar *g)
{
if(g->geneps != NULL)
if (g->geneps != NULL) {
return;
}
g->geneps = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
assert(g->geneps != NULL);
@ -206,8 +213,9 @@ static void collect_geneps(HCFGrammar *g)
HHashTableEntry *hte;
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
const HCFChoice *symbol = hte->key;
assert(symbol->type == HCF_CHOICE);
@ -262,8 +270,9 @@ static void *combine_stringmap(void *v1, const void *v2)
{
HStringMap *m1 = v1;
const HStringMap *m2 = v2;
if(!m1)
if (!m1) {
m1 = h_stringmap_new(m2->arena);
}
h_stringmap_update(m1, m2);
return m1;
@ -272,12 +281,12 @@ static void *combine_stringmap(void *v1, const void *v2)
/* Note: Does *not* reuse submaps from n in building m. */
void h_stringmap_update(HStringMap *m, const HStringMap *n)
{
if(n->epsilon_branch)
if (n->epsilon_branch) {
m->epsilon_branch = n->epsilon_branch;
if(n->end_branch)
}
if (n->end_branch) {
m->end_branch = n->end_branch;
}
h_hashtable_merge(combine_stringmap, m->char_branches, n->char_branches);
}
@ -295,36 +304,48 @@ HStringMap *h_stringmap_copy(HArena *a, const HStringMap *m)
void h_stringmap_replace(HStringMap *m, void *old, void *new)
{
if (!old) {
if(m->epsilon_branch) m->epsilon_branch = new;
if(m->end_branch) m->end_branch = new;
if (m->epsilon_branch) {
m->epsilon_branch = new;
}
if (m->end_branch) {
m->end_branch = new;
}
} else {
if(m->epsilon_branch == old) m->epsilon_branch = new;
if(m->end_branch == old) m->end_branch = new;
if (m->epsilon_branch == old) {
m->epsilon_branch = new;
}
if (m->end_branch == old) {
m->end_branch = new;
}
}
// iterate over m->char_branches
const HHashTable *ht = m->char_branches;
for (size_t i=0; i < ht->capacity; i++) {
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
HStringMap *m_ = hte->value;
if(m_)
if (m_) {
h_stringmap_replace(m_, old, new);
}
}
}
}
void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end)
{
for(size_t i=0; i<n; i++) {
if(i==n-1 && end && m->end_branch)
if (i==n-1 && end && m->end_branch) {
return m->end_branch;
}
m = h_stringmap_get_char(m, str[i]);
if(!m)
if (!m) {
return NULL;
}
}
return m->epsilon_branch;
}
@ -377,14 +398,15 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
uint8_t c;
// shortcut: first_0(X) is always {""}
if(k==0)
if (k==0) {
return g->singleton_epsilon;
}
// memoize via g->first
ensure_k(g, k);
ret = h_hashtable_get(g->first[k], x);
if(ret != NULL)
if (ret != NULL) {
return ret;
}
ret = h_stringmap_new(g->arena);
assert(ret != NULL);
h_hashtable_put(g->first[k], x, ret);
@ -432,9 +454,9 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
{
// shortcut: the first set of the empty sequence, for any k, is {""}
if(*s == NULL)
if (*s == NULL) {
return g->singleton_epsilon;
}
// first_k(X tail) = { a b | a <- first_k(X), b <- first_l(tail), l=k-|a| }
HCFChoice *x = s[0];
@ -443,12 +465,14 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
const HStringMap *first_x = h_first(k, g, x);
// shortcut: if first_k(X) = {""}, just return first_k(tail)
if(is_singleton_epsilon(first_x))
if (is_singleton_epsilon(first_x)) {
return h_first_seq(k, g, tail);
}
// shortcut: if no elements of first_k(X) have length <k, just return first_k(X)
if(!any_string_shorter(k, first_x))
if (!any_string_shorter(k, first_x)) {
return first_x;
}
// create a new result set and build up the set described above
HStringMap *ret = h_stringmap_new(g->arena);
@ -468,25 +492,27 @@ static bool is_singleton_epsilon(const HStringMap *m)
static bool any_string_shorter(size_t k, const HStringMap *m)
{
if(k==0)
if (k==0) {
return false;
if(m->epsilon_branch)
}
if (m->epsilon_branch) {
return true;
}
// iterate over m->char_branches
const HHashTable *ht = m->char_branches;
for (size_t i=0; i < ht->capacity; i++) {
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
HStringMap *m_ = hte->value;
// check subtree for strings shorter than k-1
if(any_string_shorter(k-1, m_))
if (any_string_shorter(k-1, m_)) {
return true;
}
}
}
return false;
}
@ -494,16 +520,21 @@ static bool any_string_shorter(size_t k, const HStringMap *m)
// helper for h_predict
static void remove_all_shorter(size_t k, HStringMap *m)
{
if(k==0) return;
if (k==0) {
return;
}
m->epsilon_branch = NULL;
if(k==1) return;
if (k==1) {
return;
}
// iterate over m->char_branches
const HHashTable *ht = m->char_branches;
for (size_t i=0; i < ht->capacity; i++) {
for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
remove_all_shorter(k-1, hte->value); // recursion into subtree
}
}
@ -530,29 +561,31 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
HStringMap *ret;
// shortcut: follow_0(X) is always {""}
if(k==0)
if (k==0) {
return g->singleton_epsilon;
}
// memoize via g->follow
ensure_k(g, k);
ret = h_hashtable_get(g->follow[k], x);
if(ret != NULL)
if (ret != NULL) {
return ret;
}
ret = h_stringmap_new(g->arena);
assert(ret != NULL);
h_hashtable_put(g->follow[k], x, ret);
// if X is the start symbol, the end token is in its follow set
if(x == g->start)
if (x == g->start) {
h_stringmap_put_end(ret, INSET);
}
// iterate over g->nts
size_t i;
HHashTableEntry *hte;
for (i=0; i < g->nts->capacity; i++) {
for (hte = &g->nts->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
HCFChoice *a = (void *)hte->key; // production's left-hand symbol
assert(a->type == HCF_CHOICE);
@ -619,8 +652,9 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
const HHashTable *ht = as->char_branches;
for(size_t i=0; i < ht->capacity; i++) {
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
uint8_t c = key_char((HCharKey)hte->key);
// follow the branch to find the set { a' | t a' <- as }
@ -708,8 +742,9 @@ static HCFChoice **pprint_string(FILE *f, HCFChoice **x)
{
fputc('"', f);
for(; *x; x++) {
if((*x)->type != HCF_CHAR)
if ((*x)->type != HCF_CHAR) {
break;
}
h_pprint_char(f, (*x)->chr);
}
fputc('"', f);
@ -743,8 +778,9 @@ void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq)
fputs("\"\"", f);
} else {
while(*x) {
if(x != seq->items) fputc(' ', f); // internal separator
if (x != seq->items) {
fputc(' ', f); // internal separator
}
if ((*x)->type == HCF_CHAR) {
// condense character strings
x = pprint_string(f, x);
@ -781,7 +817,9 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
assert(nt->type == HCF_CHOICE);
HCFSequence **p = nt->seq;
if(*p == NULL) return; // shouldn't happen
if (*p == NULL) {
return; // shouldn't happen
}
pprint_sequence(f, g, *p++); // print first production on the same line
for(; *p; p++) { // print the rest below with "or" bars
for(i=0; i<column; i++) fputc(' ', f); // indent
@ -792,8 +830,9 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
{
if(g->nts->used < 1)
if (g->nts->used < 1) {
return;
}
// determine maximum string length of symbol names
int len;
@ -805,8 +844,9 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
HHashTableEntry *hte;
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
const HCFChoice *a = hte->key; // production's left-hand symbol
assert(a->type == HCF_CHOICE);
@ -828,10 +868,12 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in
const HCFChoice *a = NULL;
for(i=0; i < set->capacity; i++) {
for(hte = &set->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
if(a != NULL) // we're not on the first element
}
if(a != NULL) { // we're not on the first element
fputc(',', file);
}
a = hte->key; // production's left-hand symbol
@ -852,7 +894,10 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
assert(n < BUFSIZE-4);
if (map->epsilon_branch) {
if(!first) fputc(sep, file); first=false;
if (!first) {
fputc(sep, file);
first=false;
}
if (n==0) {
fputs("\"\"", file);
} else {
@ -868,10 +913,17 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
}
if (map->end_branch) {
if(!first) fputs(",\"", file); first=false;
if(n>0) fputs("\"\"", file);
if (!first) {
fputs(",\"", file);
first=false;
}
if (n>0) {
fputs("\"\"", file);
}
fwrite(prefix, 1, n, file);
if(n>0) fputs("\"\"", file);
if (n>0) {
fputs("\"\"", file);
}
fputs("$", file);
if (valprint) {
@ -886,8 +938,9 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
HHashTableEntry *hte;
for(i=0; i < ht->capacity; i++) {
for(hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL)
if (hte->key == NULL) {
continue;
}
uint8_t c = key_char((HCharKey)hte->key);
HStringMap *ends = hte->value;
@ -901,11 +954,12 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
case '\n': prefix[n_++] = '\\'; prefix[n_++] = 'n'; break;
case '\r': prefix[n_++] = '\\'; prefix[n_++] = 'r'; break;
default:
if(isprint(c))
if (isprint(c)) {
prefix[n_++] = c;
else
} else {
n_ += sprintf(prefix+n_, "\\x%.2X", c);
}
}
first = pprint_stringmap_elems(file, first, prefix, n_,
sep, valprint, env, ends);

View file

@ -8,14 +8,16 @@ HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser) {
if (nstk__ == NULL) {
nstk__ = h_cfstack_new(mm__);
}
if(nstk__->prealloc == NULL)
if (nstk__->prealloc == NULL) {
nstk__->prealloc = h_new(HCFChoice, 1);
}
// we're going to do something naughty and cast away the const to memoize
assert(parser->vtable->desugar != NULL);
((HParser *)parser)->desugared = nstk__->prealloc;
parser->vtable->desugar(mm__, nstk__, parser->env);
if (stk__ == NULL)
if (stk__ == NULL) {
h_cfstack_free(mm__, nstk__);
}
} else if (stk__ != NULL) {
HCFS_APPEND(parser->desugared);
}