2013-04-17 15:10:33 +02:00
|
|
|
#include <assert.h>
|
|
|
|
|
#include "../internal.h"
|
2013-05-07 23:56:47 +02:00
|
|
|
#include "../cfgrammar.h"
|
2013-04-17 15:10:33 +02:00
|
|
|
#include "../parsers/parser_internal.h"
|
|
|
|
|
|
|
|
|
|
|
2013-04-27 04:24:09 +02:00
|
|
|
|
2013-05-08 18:04:08 +02:00
|
|
|
/* Generating the LL parse table */
|
2013-04-27 04:24:09 +02:00
|
|
|
|
2013-05-08 17:04:19 +02:00
|
|
|
/* Maps each nonterminal (HCFChoice) of the grammar to another hash table that
|
|
|
|
|
* maps lookahead tokens (HCFToken) to productions (HCFSequence).
|
|
|
|
|
*/
|
2013-05-08 18:04:08 +02:00
|
|
|
typedef struct HLLTable_ {
|
|
|
|
|
HHashTable *rows;
|
|
|
|
|
HArena *arena;
|
|
|
|
|
HAllocator *mm__;
|
|
|
|
|
} HLLTable;
|
2013-04-27 04:24:09 +02:00
|
|
|
|
|
|
|
|
/* Interface to look up an entry in the parse table. */
|
2013-05-08 17:04:19 +02:00
|
|
|
const HCFSequence *h_ll_lookup(const HLLTable *table, const HCFChoice *x, HCFToken tok)
|
2013-04-27 04:24:09 +02:00
|
|
|
{
|
2013-05-08 18:04:08 +02:00
|
|
|
const HHashTable *row = h_hashtable_get(table->rows, x);
|
2013-05-08 17:04:19 +02:00
|
|
|
assert(row != NULL); // the table should have one row for each nonterminal
|
|
|
|
|
|
|
|
|
|
const HCFSequence *production = h_hashtable_get(row, (void *)tok);
|
|
|
|
|
return production;
|
2013-04-27 04:24:09 +02:00
|
|
|
}
|
|
|
|
|
|
2013-05-08 18:04:08 +02:00
|
|
|
/* Allocate a new parse table. */
|
|
|
|
|
HLLTable *h_lltable_new(HAllocator *mm__)
|
|
|
|
|
{
|
|
|
|
|
// NB the parse table gets an arena separate from the grammar so we can free
|
|
|
|
|
// the latter after table generation.
|
|
|
|
|
HArena *arena = h_new_arena(mm__, 0); // default blocksize
|
|
|
|
|
assert(arena != NULL);
|
|
|
|
|
HHashTable *rows = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr);
|
|
|
|
|
assert(rows != NULL);
|
|
|
|
|
|
|
|
|
|
HLLTable *table = h_new(HLLTable, 1);
|
|
|
|
|
assert(table != NULL);
|
|
|
|
|
table->mm__ = mm__;
|
|
|
|
|
table->arena = arena;
|
|
|
|
|
table->rows = rows;
|
|
|
|
|
|
|
|
|
|
return table;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void h_lltable_free(HLLTable *table)
|
|
|
|
|
{
|
|
|
|
|
HAllocator *mm__ = table->mm__;
|
|
|
|
|
h_delete_arena(table->arena);
|
|
|
|
|
h_free(table);
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-08 16:12:34 +02:00
|
|
|
/* Compute the predict set of production "A -> rhs". */
|
|
|
|
|
HHashSet *h_predict(HCFGrammar *g, const HCFChoice *A, const HCFSequence *rhs)
|
|
|
|
|
{
|
|
|
|
|
// predict(A -> rhs) = first(rhs) u follow(A) if "" can be derived from rhs
|
|
|
|
|
// predict(A -> rhs) = first(rhs) otherwise
|
|
|
|
|
HHashSet *first_rhs = h_first_sequence(g, rhs->items);
|
|
|
|
|
if(h_sequence_derives_epsilon(g, rhs->items)) {
|
|
|
|
|
HHashSet *ret = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
|
|
|
|
h_hashset_put_all(ret, first_rhs);
|
|
|
|
|
h_hashset_put_all(ret, h_follow(g, A));
|
|
|
|
|
return ret;
|
|
|
|
|
} else {
|
|
|
|
|
return first_rhs;
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-04-27 04:24:09 +02:00
|
|
|
|
2013-05-08 18:49:05 +02:00
|
|
|
/* Generate entries for the production "A -> rhs" in the given table row. */
|
|
|
|
|
static
|
|
|
|
|
int fill_table_row(HCFGrammar *g, HHashTable *row,
|
|
|
|
|
const HCFChoice *A, HCFSequence *rhs)
|
|
|
|
|
{
|
|
|
|
|
// iterate over predict(A -> rhs)
|
|
|
|
|
HHashSet *pred = h_predict(g, A, rhs);
|
|
|
|
|
|
|
|
|
|
size_t i;
|
|
|
|
|
HHashTableEntry *hte;
|
|
|
|
|
for(i=0; i < pred->capacity; i++) {
|
|
|
|
|
for(hte = &pred->contents[i]; hte; hte = hte->next) {
|
|
|
|
|
if(hte->key == NULL)
|
|
|
|
|
continue;
|
|
|
|
|
HCFToken x = (uintptr_t)hte->key;
|
|
|
|
|
|
|
|
|
|
if(h_hashtable_present(row, (void *)x))
|
|
|
|
|
return -1; // table would be ambiguous
|
|
|
|
|
|
|
|
|
|
h_hashtable_put(row, (void *)x, rhs);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-08 18:04:08 +02:00
|
|
|
/* Generate the LL parse table from the given grammar.
|
|
|
|
|
* Returns -1 on error, 0 on success.
|
|
|
|
|
*/
|
|
|
|
|
static int fill_table(HCFGrammar *g, HLLTable *table)
|
|
|
|
|
{
|
2013-05-08 18:49:05 +02:00
|
|
|
// iterate over g->nts
|
|
|
|
|
size_t i;
|
|
|
|
|
HHashTableEntry *hte;
|
|
|
|
|
for(i=0; i < g->nts->capacity; i++) {
|
|
|
|
|
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
|
|
|
|
if(hte->key == NULL)
|
|
|
|
|
continue;
|
|
|
|
|
const HCFChoice *a = hte->key; // production's left-hand symbol
|
|
|
|
|
|
|
|
|
|
// create table row for this nonterminal
|
|
|
|
|
HHashTable *row = h_hashtable_new(table->arena, h_eq_ptr, h_hash_ptr);
|
|
|
|
|
h_hashtable_put(table->rows, a, row);
|
|
|
|
|
|
|
|
|
|
// iterate over a's productions
|
|
|
|
|
HCFSequence **s;
|
|
|
|
|
for(s = a->seq; *s; s++) {
|
|
|
|
|
// record this production in row as appropriate
|
|
|
|
|
// this can signal an ambiguity conflict.
|
|
|
|
|
// NB we don't worry about deallocating anything, h_ll_compile will
|
|
|
|
|
// delete the whole arena for us.
|
|
|
|
|
if(fill_table_row(g, row, a, *s) < 0)
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
2013-05-08 18:04:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int h_ll_compile(HAllocator* mm__, HParser* parser, const void* params)
|
2013-04-27 04:24:09 +02:00
|
|
|
{
|
|
|
|
|
// Convert parser to a CFG. This can fail as indicated by a NULL return.
|
2013-05-07 23:37:02 +02:00
|
|
|
HCFGrammar *grammar = h_cfgrammar(mm__, parser);
|
2013-04-27 04:24:09 +02:00
|
|
|
if(grammar == NULL)
|
|
|
|
|
return -1; // -> Backend unsuitable for this parser.
|
|
|
|
|
|
|
|
|
|
// TODO: eliminate common prefixes
|
|
|
|
|
// TODO: eliminate left recursion
|
|
|
|
|
// TODO: avoid conflicts by splitting occurances?
|
|
|
|
|
|
2013-05-08 18:04:08 +02:00
|
|
|
// generate table and store in parser->data.
|
|
|
|
|
HLLTable *table = h_lltable_new(mm__);
|
|
|
|
|
if(fill_table(grammar, table) < 0) {
|
|
|
|
|
// the table was ambiguous
|
|
|
|
|
h_cfgrammar_free(grammar);
|
|
|
|
|
h_lltable_free(table);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
parser->data = table;
|
|
|
|
|
|
|
|
|
|
// free grammar and its arena.
|
|
|
|
|
// desugared parsers (HCFChoice and HCFSequence) are unaffected by this.
|
|
|
|
|
h_cfgrammar_free(grammar);
|
2013-04-27 04:24:09 +02:00
|
|
|
|
2013-05-08 18:04:08 +02:00
|
|
|
return 0;
|
2013-04-27 04:24:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* LL driver */
|
|
|
|
|
|
|
|
|
|
HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state)
|
|
|
|
|
{
|
|
|
|
|
// get table from parser->data.
|
|
|
|
|
// run driver.
|
2013-04-17 15:10:33 +02:00
|
|
|
return NULL; // TODO
|
|
|
|
|
}
|
|
|
|
|
|
2013-04-27 04:24:09 +02:00
|
|
|
|
|
|
|
|
|
2013-04-17 15:10:33 +02:00
|
|
|
HParserBackendVTable h__ll_backend_vtable = {
|
|
|
|
|
.compile = h_ll_compile,
|
|
|
|
|
.parse = h_ll_parse
|
|
|
|
|
};
|
2013-05-05 22:15:40 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// dummy!
|
|
|
|
|
int test_ll(void)
|
|
|
|
|
{
|
|
|
|
|
const HParser *c = h_many(h_ch('x'));
|
|
|
|
|
const HParser *q = h_sequence(c, h_ch('y'), NULL);
|
|
|
|
|
const HParser *p = h_choice(q, h_end_p(), NULL);
|
|
|
|
|
|
2013-05-07 23:37:02 +02:00
|
|
|
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
|
2013-05-05 22:15:40 +02:00
|
|
|
|
|
|
|
|
if(g == NULL) {
|
2013-05-07 23:37:02 +02:00
|
|
|
fprintf(stderr, "h_cfgrammar failed\n");
|
2013-05-05 22:15:40 +02:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
h_pprint_grammar(stdout, g, 0);
|
|
|
|
|
printf("generate epsilon: ");
|
|
|
|
|
h_pprint_symbolset(stdout, g, g->geneps, 0);
|
|
|
|
|
printf("first(A) = ");
|
|
|
|
|
h_pprint_tokenset(stdout, g, h_first_symbol(g, g->start), 0);
|
|
|
|
|
printf("follow(C) = ");
|
|
|
|
|
h_pprint_tokenset(stdout, g, h_follow(g, h_desugar(&system_allocator, c)), 0);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|