augment grammar to ensure start symbol never occurs on rhs

This commit is contained in:
Sven M. Hallberg 2013-06-16 22:23:35 +02:00
parent 8d21c782e7
commit 24c15f34cc

View file

@ -2,6 +2,7 @@
#include "../internal.h"
#include "../cfgrammar.h"
#include "../parsers/parser_internal.h"
#include "contextfree.h"
@ -274,8 +275,6 @@ HLRDFA *h_lr0_dfa(HCFGrammar *g)
// assigned index.
HSlist *work = h_slist_new(arena);
// XXX augment grammar?!
// make initial state (kernel)
HLRState *start = h_lrstate_new(arena);
assert(g->start->type == HCF_CHOICE);
@ -596,14 +595,33 @@ bool match_production(HLREnhGrammar *eg, HCFChoice **p,
&& state == endstate);
}
// desugar parser with a fresh start symbol
// this guarantees that the start symbol will not occur in any productions
static HCFChoice *augment(HAllocator *mm__, HParser *parser)
{
HCFChoice *augmented = h_new(HCFChoice, 1);
HCFStack *stk__ = h_cfstack_new(mm__);
stk__->prealloc = augmented;
HCFS_BEGIN_CHOICE() {
HCFS_BEGIN_SEQ() {
HCFS_DESUGAR(parser);
} HCFS_END_SEQ();
HCFS_THIS_CHOICE->reshape = h_act_first;
} HCFS_END_CHOICE();
h_cfstack_free(mm__, stk__);
return augmented;
}
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
{
// generate CFG from parser
// generate (augmented) CFG from parser
// construct LR(0) DFA
// build LR(0) table
// if necessary, resolve conflicts "by conversion to SLR"
HCFGrammar *g = h_cfgrammar(mm__, parser);
HCFGrammar *g = h_cfgrammar_(mm__, augment(mm__, parser));
if(g == NULL) // backend not suitable (language not context-free)
return -1;
@ -981,21 +999,18 @@ HParserBackendVTable h__lalr_backend_vtable = {
int test_lalr(void)
{
/*
S -> E
E -> E '-' T
| T
T -> '(' E ')'
| 'n' -- also try [0-9] for the charset paths
*/
#if 0
HParser *n = h_ch('n');
HParser *E = h_indirect();
HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL);
HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL);
h_bind_indirect(E, E_);
#endif
HParser *p = h_whitespace(h_ch('n')); //h_sequence(E, NULL);
HParser *p = E;
printf("\n==== G R A M M A R ====\n");
HCFGrammar *g = h_cfgrammar(&system_allocator, p);