augment grammar to ensure start symbol never occurs on rhs

This commit is contained in:
Sven M. Hallberg 2013-06-16 22:23:35 +02:00
parent 8d21c782e7
commit 24c15f34cc

View file

@ -2,6 +2,7 @@
#include "../internal.h" #include "../internal.h"
#include "../cfgrammar.h" #include "../cfgrammar.h"
#include "../parsers/parser_internal.h" #include "../parsers/parser_internal.h"
#include "contextfree.h"
@ -274,8 +275,6 @@ HLRDFA *h_lr0_dfa(HCFGrammar *g)
// assigned index. // assigned index.
HSlist *work = h_slist_new(arena); HSlist *work = h_slist_new(arena);
// XXX augment grammar?!
// make initial state (kernel) // make initial state (kernel)
HLRState *start = h_lrstate_new(arena); HLRState *start = h_lrstate_new(arena);
assert(g->start->type == HCF_CHOICE); assert(g->start->type == HCF_CHOICE);
@ -596,14 +595,33 @@ bool match_production(HLREnhGrammar *eg, HCFChoice **p,
&& state == endstate); && state == endstate);
} }
// desugar parser with a fresh start symbol
// this guarantees that the start symbol will not occur in any productions
static HCFChoice *augment(HAllocator *mm__, HParser *parser)
{
HCFChoice *augmented = h_new(HCFChoice, 1);
HCFStack *stk__ = h_cfstack_new(mm__);
stk__->prealloc = augmented;
HCFS_BEGIN_CHOICE() {
HCFS_BEGIN_SEQ() {
HCFS_DESUGAR(parser);
} HCFS_END_SEQ();
HCFS_THIS_CHOICE->reshape = h_act_first;
} HCFS_END_CHOICE();
h_cfstack_free(mm__, stk__);
return augmented;
}
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
{ {
// generate CFG from parser // generate (augmented) CFG from parser
// construct LR(0) DFA // construct LR(0) DFA
// build LR(0) table // build LR(0) table
// if necessary, resolve conflicts "by conversion to SLR" // if necessary, resolve conflicts "by conversion to SLR"
HCFGrammar *g = h_cfgrammar(mm__, parser); HCFGrammar *g = h_cfgrammar_(mm__, augment(mm__, parser));
if(g == NULL) // backend not suitable (language not context-free) if(g == NULL) // backend not suitable (language not context-free)
return -1; return -1;
@ -981,21 +999,18 @@ HParserBackendVTable h__lalr_backend_vtable = {
int test_lalr(void) int test_lalr(void)
{ {
/* /*
S -> E
E -> E '-' T E -> E '-' T
| T | T
T -> '(' E ')' T -> '(' E ')'
| 'n' -- also try [0-9] for the charset paths | 'n' -- also try [0-9] for the charset paths
*/ */
#if 0
HParser *n = h_ch('n'); HParser *n = h_ch('n');
HParser *E = h_indirect(); HParser *E = h_indirect();
HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL); HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL);
HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL); HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL);
h_bind_indirect(E, E_); h_bind_indirect(E, E_);
#endif HParser *p = E;
HParser *p = h_whitespace(h_ch('n')); //h_sequence(E, NULL);
printf("\n==== G R A M M A R ====\n"); printf("\n==== G R A M M A R ====\n");
HCFGrammar *g = h_cfgrammar(&system_allocator, p); HCFGrammar *g = h_cfgrammar(&system_allocator, p);
@ -1028,7 +1043,7 @@ int test_lalr(void)
h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0); h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0);
printf("\n==== P A R S E R E S U L T ====\n"); printf("\n==== P A R S E R E S U L T ====\n");
HParseResult *res = h_parse(p, (uint8_t *)" n-(n-((n)))-n", 13); HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
if(res) if(res)
h_pprint(stdout, res->ast, 0, 2); h_pprint(stdout, res->ast, 0, 2);
else else