2013-04-17 15:10:33 +02:00
|
|
|
#include <assert.h>
|
|
|
|
|
#include "../internal.h"
|
2013-05-07 23:56:47 +02:00
|
|
|
#include "../cfgrammar.h"
|
2013-04-17 15:10:33 +02:00
|
|
|
#include "../parsers/parser_internal.h"
|
|
|
|
|
|
2013-04-27 04:24:09 +02:00
|
|
|
|
2013-05-11 19:04:59 +02:00
|
|
|
/* Generating the LL(k) parse table */
|
2013-04-27 04:24:09 +02:00
|
|
|
|
2013-05-23 21:01:37 +02:00
|
|
|
/* Maps each nonterminal (HCFChoice) of the grammar to a HCFStringMap that
|
|
|
|
|
* maps lookahead strings to productions (HCFSequence).
|
2013-05-08 17:04:19 +02:00
|
|
|
*/
|
2013-05-11 19:04:59 +02:00
|
|
|
typedef struct HLLkTable_ {
|
2013-05-08 18:04:08 +02:00
|
|
|
HHashTable *rows;
|
2013-05-11 15:14:10 +02:00
|
|
|
HCFChoice *start; // start symbol
|
2013-05-23 21:01:37 +02:00
|
|
|
size_t k; // lookahead depth XXX needed?
|
2013-05-08 18:04:08 +02:00
|
|
|
HArena *arena;
|
|
|
|
|
HAllocator *mm__;
|
2013-05-11 19:04:59 +02:00
|
|
|
} HLLkTable;
|
2013-04-27 04:24:09 +02:00
|
|
|
|
2013-05-22 20:38:36 +02:00
|
|
|
|
|
|
|
|
// XXX adaptation to LL(1), to be removed
|
|
|
|
|
typedef HCharKey HCFToken;
|
|
|
|
|
static const HCFToken end_token = 0x200;
|
|
|
|
|
#define char_token char_key
|
|
|
|
|
|
2013-04-27 04:24:09 +02:00
|
|
|
/* Interface to look up an entry in the parse table. */
|
2013-05-20 21:28:16 +02:00
|
|
|
const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x,
|
|
|
|
|
HInputStream lookahead)
|
2013-04-27 04:24:09 +02:00
|
|
|
{
|
2013-05-23 21:01:37 +02:00
|
|
|
const HCFStringMap *row = h_hashtable_get(table->rows, x);
|
2013-05-08 17:04:19 +02:00
|
|
|
assert(row != NULL); // the table should have one row for each nonterminal
|
|
|
|
|
|
2013-05-23 21:01:37 +02:00
|
|
|
assert(!row->epsilon_branch); // would match without looking at the input
|
|
|
|
|
// XXX cases where this could be useful?
|
|
|
|
|
|
|
|
|
|
const HCFStringMap *m = row;
|
|
|
|
|
while(m) {
|
|
|
|
|
if(m->epsilon_branch) { // input matched
|
|
|
|
|
// assert: another lookahead would not bring a more specific match.
|
|
|
|
|
// this is for the table generator to ensure.
|
|
|
|
|
return m->epsilon_branch;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// note the lookahead stream is passed by value, i.e. a copy.
|
|
|
|
|
// reading bits from it does not consume them from the real input.
|
|
|
|
|
uint8_t c = h_read_bits(&lookahead, 8, false);
|
|
|
|
|
|
|
|
|
|
if(lookahead.overrun) { // end of input
|
|
|
|
|
// XXX assumption of byte-wise grammar and input
|
|
|
|
|
return m->end_branch;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// no match yet, descend
|
|
|
|
|
m = h_stringmap_get_char(m, c);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
2013-04-27 04:24:09 +02:00
|
|
|
}
|
|
|
|
|
|
2013-05-08 18:04:08 +02:00
|
|
|
/* Allocate a new parse table. */
|
2013-05-11 19:04:59 +02:00
|
|
|
HLLkTable *h_llktable_new(HAllocator *mm__)
|
2013-05-08 18:04:08 +02:00
|
|
|
{
|
|
|
|
|
// NB the parse table gets an arena separate from the grammar so we can free
|
|
|
|
|
// the latter after table generation.
|
|
|
|
|
HArena *arena = h_new_arena(mm__, 0); // default blocksize
|
|
|
|
|
assert(arena != NULL);
|
|
|
|
|
HHashTable *rows = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr);
|
|
|
|
|
assert(rows != NULL);
|
|
|
|
|
|
2013-05-11 19:04:59 +02:00
|
|
|
HLLkTable *table = h_new(HLLkTable, 1);
|
2013-05-08 18:04:08 +02:00
|
|
|
assert(table != NULL);
|
|
|
|
|
table->mm__ = mm__;
|
|
|
|
|
table->arena = arena;
|
|
|
|
|
table->rows = rows;
|
|
|
|
|
|
|
|
|
|
return table;
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-11 19:04:59 +02:00
|
|
|
void h_llktable_free(HLLkTable *table)
|
2013-05-08 18:04:08 +02:00
|
|
|
{
|
2013-05-11 22:22:02 +02:00
|
|
|
if(table == NULL)
|
|
|
|
|
return;
|
2013-05-08 18:04:08 +02:00
|
|
|
HAllocator *mm__ = table->mm__;
|
|
|
|
|
h_delete_arena(table->arena);
|
|
|
|
|
h_free(table);
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-23 21:01:37 +02:00
|
|
|
/* Compute the predict_k set of production "A -> rhs".
|
|
|
|
|
* Always returns a newly-allocated HCFStringMap.
|
|
|
|
|
*/
|
|
|
|
|
HCFStringMap *h_predict(size_t k, HCFGrammar *g,
|
|
|
|
|
const HCFChoice *A, const HCFSequence *rhs)
|
2013-05-08 16:12:34 +02:00
|
|
|
{
|
2013-05-23 21:01:37 +02:00
|
|
|
assert(k==1); // XXX
|
|
|
|
|
HCFStringMap *ret = h_stringmap_new(g->arena);
|
|
|
|
|
|
2013-05-08 16:12:34 +02:00
|
|
|
// predict(A -> rhs) = first(rhs) u follow(A) if "" can be derived from rhs
|
|
|
|
|
// predict(A -> rhs) = first(rhs) otherwise
|
2013-05-23 21:01:37 +02:00
|
|
|
|
|
|
|
|
h_stringmap_update(ret, h_first_seq(k, g, rhs->items));
|
|
|
|
|
if(h_derives_epsilon_seq(g, rhs->items))
|
|
|
|
|
h_stringmap_update(ret, h_follow(k, g, A));
|
|
|
|
|
|
|
|
|
|
// make sure there are only strings of length _exactly_ k
|
|
|
|
|
ret->epsilon_branch = NULL;
|
2013-05-22 20:38:36 +02:00
|
|
|
|
|
|
|
|
return ret;
|
2013-05-08 16:12:34 +02:00
|
|
|
}
|
2013-04-27 04:24:09 +02:00
|
|
|
|
2013-05-23 21:01:37 +02:00
|
|
|
void *const CONFLICT = (void *)(uintptr_t)(-1);
|
|
|
|
|
|
|
|
|
|
static HHashSet *cte_workset; // emulating a closure
|
|
|
|
|
static void *combine_table_entry(void *dst, const void *src)
|
2013-05-08 18:49:05 +02:00
|
|
|
{
|
2013-05-23 21:01:37 +02:00
|
|
|
if(dst == CONFLICT) { // previous conflict
|
|
|
|
|
h_hashset_put(cte_workset, src);
|
|
|
|
|
} else if(dst != src) { // new conflict
|
|
|
|
|
h_hashset_put(cte_workset, dst);
|
|
|
|
|
h_hashset_put(cte_workset, src);
|
|
|
|
|
dst = CONFLICT;
|
|
|
|
|
}
|
|
|
|
|
return dst;
|
|
|
|
|
}
|
2013-05-08 18:49:05 +02:00
|
|
|
|
2013-05-23 21:01:37 +02:00
|
|
|
// add the mappings of src to dst, calling combine if there is a collision
|
|
|
|
|
// note: might reuse parts of src in building up dst!
|
|
|
|
|
static void stringmap_merge(void *(*combine)(void *, const void *),
|
|
|
|
|
HCFStringMap *dst, HCFStringMap *src)
|
|
|
|
|
{
|
|
|
|
|
if(src->epsilon_branch) {
|
|
|
|
|
if(dst->epsilon_branch)
|
|
|
|
|
dst->epsilon_branch = combine(dst->epsilon_branch, src->epsilon_branch);
|
|
|
|
|
else
|
|
|
|
|
dst->epsilon_branch = src->epsilon_branch;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(src->end_branch) {
|
|
|
|
|
if(dst->end_branch)
|
|
|
|
|
dst->end_branch = combine(dst->end_branch, src->end_branch);
|
|
|
|
|
else
|
|
|
|
|
dst->end_branch = src->end_branch;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// iterate over src->char_branches
|
|
|
|
|
const HHashTable *ht = src->char_branches;
|
|
|
|
|
for(size_t i=0; i < ht->capacity; i++) {
|
|
|
|
|
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
2013-05-08 18:49:05 +02:00
|
|
|
if(hte->key == NULL)
|
|
|
|
|
continue;
|
|
|
|
|
|
2013-05-23 21:01:37 +02:00
|
|
|
HCharKey c = (HCharKey)hte->key;
|
|
|
|
|
HCFStringMap *src_ = hte->value;
|
2013-05-08 18:49:05 +02:00
|
|
|
|
2013-05-23 21:01:37 +02:00
|
|
|
if(src_) {
|
|
|
|
|
HCFStringMap *dst_ = h_hashtable_get(dst->char_branches, (void *)c);
|
|
|
|
|
if(dst_)
|
|
|
|
|
stringmap_merge(combine, dst_, src_);
|
|
|
|
|
else
|
|
|
|
|
dst_ = src_;
|
|
|
|
|
}
|
2013-05-08 18:49:05 +02:00
|
|
|
}
|
|
|
|
|
}
|
2013-05-23 21:01:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Generate entries for the production "A -> rhs" in the given table row. */
|
|
|
|
|
static int fill_production_entries(size_t k, HCFGrammar *g, HCFStringMap *row,
|
|
|
|
|
const HCFChoice *A, HCFSequence *rhs)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
for(size_t i=1; i<=k; i++) {
|
|
|
|
|
HCFStringMap *pred = h_predict(i, g, A, rhs);
|
|
|
|
|
h_stringmap_replace(pred, NULL, rhs); // make all values in pred map to rhs
|
2013-05-08 18:49:05 +02:00
|
|
|
|
2013-05-23 21:01:37 +02:00
|
|
|
// clear previous conflict markers
|
|
|
|
|
h_stringmap_replace(row, CONFLICT, NULL);
|
|
|
|
|
|
|
|
|
|
// merge predict set into the row, accumulating conflicts in workset
|
|
|
|
|
cte_workset = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
|
|
|
|
// will be deleted after compile
|
|
|
|
|
stringmap_merge(combine_table_entry, row, pred);
|
|
|
|
|
|
|
|
|
|
// if the workset is empty, row is free of conflicts and we are done.
|
|
|
|
|
if(h_hashset_empty(cte_workset))
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// if we reach here, conflicts remain at maximum lookahead
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Generate entries for the production "A" in the given table row. */
|
|
|
|
|
static int fill_table_row(size_t k, HCFGrammar *g, HCFStringMap *row,
|
|
|
|
|
const HCFChoice *A)
|
|
|
|
|
{
|
|
|
|
|
// iterate over A's productions
|
|
|
|
|
for(HCFSequence **s = A->seq; *s; s++) {
|
|
|
|
|
// record this production in row as appropriate
|
|
|
|
|
if(fill_production_entries(k, g, row, A, *s) < 0)
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
2013-05-08 18:49:05 +02:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-11 19:04:59 +02:00
|
|
|
/* Generate the LL(k) parse table from the given grammar.
|
2013-05-08 18:04:08 +02:00
|
|
|
* Returns -1 on error, 0 on success.
|
|
|
|
|
*/
|
2013-05-23 21:01:37 +02:00
|
|
|
static int fill_table(size_t k, HCFGrammar *g, HLLkTable *table)
|
2013-05-08 18:04:08 +02:00
|
|
|
{
|
2013-05-11 15:14:10 +02:00
|
|
|
table->start = g->start;
|
|
|
|
|
|
2013-05-08 18:49:05 +02:00
|
|
|
// iterate over g->nts
|
|
|
|
|
size_t i;
|
|
|
|
|
HHashTableEntry *hte;
|
|
|
|
|
for(i=0; i < g->nts->capacity; i++) {
|
|
|
|
|
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
|
|
|
|
if(hte->key == NULL)
|
|
|
|
|
continue;
|
|
|
|
|
const HCFChoice *a = hte->key; // production's left-hand symbol
|
2013-05-14 16:15:58 +02:00
|
|
|
assert(a->type == HCF_CHOICE);
|
2013-05-08 18:49:05 +02:00
|
|
|
|
|
|
|
|
// create table row for this nonterminal
|
2013-05-23 21:01:37 +02:00
|
|
|
HCFStringMap *row = h_stringmap_new(table->arena);
|
2013-05-08 18:49:05 +02:00
|
|
|
h_hashtable_put(table->rows, a, row);
|
|
|
|
|
|
2013-05-23 21:01:37 +02:00
|
|
|
if(fill_table_row(k, g, row, a) < 0) {
|
|
|
|
|
// unresolvable conflicts in row
|
2013-05-11 19:04:59 +02:00
|
|
|
// NB we don't worry about deallocating anything, h_llk_compile will
|
2013-05-08 18:49:05 +02:00
|
|
|
// delete the whole arena for us.
|
2013-05-23 21:01:37 +02:00
|
|
|
return -1;
|
2013-05-08 18:49:05 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
2013-05-08 18:04:08 +02:00
|
|
|
}
|
|
|
|
|
|
2013-05-23 21:01:37 +02:00
|
|
|
static const size_t K_DEFAULT = 1;
|
|
|
|
|
|
2013-05-11 19:04:59 +02:00
|
|
|
int h_llk_compile(HAllocator* mm__, HParser* parser, const void* params)
|
2013-04-27 04:24:09 +02:00
|
|
|
{
|
2013-05-23 21:01:37 +02:00
|
|
|
size_t k = params? (uintptr_t)params : K_DEFAULT;
|
|
|
|
|
assert(k>0);
|
|
|
|
|
|
2013-04-27 04:24:09 +02:00
|
|
|
// Convert parser to a CFG. This can fail as indicated by a NULL return.
|
2013-05-07 23:37:02 +02:00
|
|
|
HCFGrammar *grammar = h_cfgrammar(mm__, parser);
|
2013-04-27 04:24:09 +02:00
|
|
|
if(grammar == NULL)
|
|
|
|
|
return -1; // -> Backend unsuitable for this parser.
|
|
|
|
|
|
|
|
|
|
// TODO: eliminate common prefixes
|
|
|
|
|
// TODO: eliminate left recursion
|
|
|
|
|
// TODO: avoid conflicts by splitting occurances?
|
|
|
|
|
|
2013-05-11 22:02:59 +02:00
|
|
|
// generate table and store in parser->backend_data.
|
2013-05-11 19:04:59 +02:00
|
|
|
HLLkTable *table = h_llktable_new(mm__);
|
2013-05-23 21:01:37 +02:00
|
|
|
if(fill_table(k, grammar, table) < 0) {
|
2013-05-08 18:04:08 +02:00
|
|
|
// the table was ambiguous
|
|
|
|
|
h_cfgrammar_free(grammar);
|
2013-05-11 19:04:59 +02:00
|
|
|
h_llktable_free(table);
|
2013-05-08 18:04:08 +02:00
|
|
|
return -1;
|
|
|
|
|
}
|
2013-05-11 22:02:59 +02:00
|
|
|
parser->backend_data = table;
|
2013-05-08 18:04:08 +02:00
|
|
|
|
|
|
|
|
// free grammar and its arena.
|
|
|
|
|
// desugared parsers (HCFChoice and HCFSequence) are unaffected by this.
|
|
|
|
|
h_cfgrammar_free(grammar);
|
2013-04-27 04:24:09 +02:00
|
|
|
|
2013-05-08 18:04:08 +02:00
|
|
|
return 0;
|
2013-04-27 04:24:09 +02:00
|
|
|
}
|
|
|
|
|
|
2013-05-11 22:02:59 +02:00
|
|
|
void h_llk_free(HParser *parser)
|
|
|
|
|
{
|
|
|
|
|
HLLkTable *table = parser->backend_data;
|
|
|
|
|
h_llktable_free(table);
|
|
|
|
|
parser->backend_data = NULL;
|
|
|
|
|
parser->backend = PB_PACKRAT;
|
|
|
|
|
}
|
|
|
|
|
|
2013-04-27 04:24:09 +02:00
|
|
|
|
|
|
|
|
|
2013-05-11 19:04:59 +02:00
|
|
|
/* LL(k) driver */
|
2013-04-27 04:24:09 +02:00
|
|
|
|
2013-05-11 19:04:59 +02:00
|
|
|
HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
|
2013-04-27 04:24:09 +02:00
|
|
|
{
|
2013-05-11 22:02:59 +02:00
|
|
|
const HLLkTable *table = parser->backend_data;
|
2013-05-11 21:41:53 +02:00
|
|
|
assert(table != NULL);
|
|
|
|
|
|
2013-05-11 19:04:59 +02:00
|
|
|
HArena *arena = h_new_arena(mm__, 0); // will hold the results
|
|
|
|
|
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
|
|
|
|
|
HSlist *stack = h_slist_new(tarena);
|
2013-05-11 15:14:10 +02:00
|
|
|
HCountedArray *seq = h_carray_new(arena); // accumulates current parse result
|
|
|
|
|
|
|
|
|
|
// in order to construct the parse tree, we delimit the symbol stack into
|
|
|
|
|
// frames corresponding to production right-hand sides. since only left-most
|
|
|
|
|
// derivations are produced this linearization is unique.
|
|
|
|
|
// the 'mark' allocated below simply reserves a memory address to use as the
|
|
|
|
|
// frame delimiter.
|
2013-05-11 19:26:22 +02:00
|
|
|
// nonterminals, instead of being popped and forgotten, are put back onto the
|
|
|
|
|
// stack below the mark to tell us which validations and semantic actions to
|
|
|
|
|
// execute on their corresponding result.
|
2013-05-11 15:14:10 +02:00
|
|
|
// also on the stack below the mark, we store the previously accumulated
|
|
|
|
|
// value for the surrounding production.
|
2013-05-11 19:04:59 +02:00
|
|
|
void *mark = h_arena_malloc(tarena, 1);
|
2013-05-11 15:14:10 +02:00
|
|
|
|
|
|
|
|
// initialize with the start symbol on the stack.
|
|
|
|
|
h_slist_push(stack, table->start);
|
|
|
|
|
|
|
|
|
|
// when we empty the stack, the parse is complete.
|
|
|
|
|
while(!h_slist_empty(stack)) {
|
2013-05-11 20:40:33 +02:00
|
|
|
// pop top of stack for inspection
|
2013-05-11 15:14:10 +02:00
|
|
|
HCFChoice *x = h_slist_pop(stack);
|
|
|
|
|
assert(x != NULL);
|
2013-05-11 19:26:22 +02:00
|
|
|
|
2013-05-11 20:40:33 +02:00
|
|
|
if(x != mark && x->type == HCF_CHOICE) {
|
|
|
|
|
// x is a nonterminal; apply the appropriate production and continue
|
2013-05-11 15:14:10 +02:00
|
|
|
|
|
|
|
|
// push stack frame
|
|
|
|
|
h_slist_push(stack, seq); // save current partial value
|
2013-05-11 19:26:22 +02:00
|
|
|
h_slist_push(stack, x); // save the nonterminal
|
2013-05-11 15:14:10 +02:00
|
|
|
h_slist_push(stack, mark); // frame delimiter
|
|
|
|
|
|
|
|
|
|
// open a fresh result sequence
|
|
|
|
|
seq = h_carray_new(arena);
|
|
|
|
|
|
|
|
|
|
// look up applicable production in parse table
|
2013-05-20 21:28:16 +02:00
|
|
|
const HCFSequence *p = h_llk_lookup(table, x, *stream);
|
2013-05-11 21:41:53 +02:00
|
|
|
if(p == NULL)
|
|
|
|
|
goto no_parse;
|
2013-05-11 15:14:10 +02:00
|
|
|
|
|
|
|
|
// push production's rhs onto the stack (in reverse order)
|
|
|
|
|
HCFChoice **s;
|
|
|
|
|
for(s = p->items; *s; s++);
|
|
|
|
|
for(s--; s >= p->items; s--)
|
|
|
|
|
h_slist_push(stack, *s);
|
2013-05-11 20:40:33 +02:00
|
|
|
|
|
|
|
|
continue; // no result to record
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// the top of stack is such that there will be a result...
|
|
|
|
|
HParsedToken *tok; // will hold result token
|
|
|
|
|
if(x == mark) {
|
|
|
|
|
// hit stack frame boundary...
|
|
|
|
|
// wrap the accumulated parse result, this sequence is finished
|
|
|
|
|
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
|
|
|
|
tok->token_type = TT_SEQUENCE;
|
|
|
|
|
tok->seq = seq;
|
|
|
|
|
|
|
|
|
|
// recover original nonterminal and result sequence
|
|
|
|
|
x = h_slist_pop(stack);
|
|
|
|
|
seq = h_slist_pop(stack);
|
|
|
|
|
// tok becomes next left-most element of higher-level sequence
|
2013-05-11 15:14:10 +02:00
|
|
|
}
|
2013-05-11 19:26:22 +02:00
|
|
|
else {
|
2013-05-11 20:40:33 +02:00
|
|
|
// x is a terminal or simple charset; match against input
|
2013-05-11 15:14:10 +02:00
|
|
|
|
|
|
|
|
// consume the input token
|
2013-05-20 21:28:16 +02:00
|
|
|
uint8_t input = h_read_bits(stream, 8, false);
|
2013-05-11 15:14:10 +02:00
|
|
|
|
|
|
|
|
switch(x->type) {
|
|
|
|
|
case HCF_END:
|
2013-05-20 21:28:16 +02:00
|
|
|
if(!stream->overrun)
|
2013-05-11 19:04:59 +02:00
|
|
|
goto no_parse;
|
2013-05-11 15:14:10 +02:00
|
|
|
tok = NULL;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case HCF_CHAR:
|
2013-05-20 21:28:16 +02:00
|
|
|
if(input != x->chr)
|
2013-05-11 19:04:59 +02:00
|
|
|
goto no_parse;
|
|
|
|
|
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
2013-05-11 15:14:10 +02:00
|
|
|
tok->token_type = TT_UINT;
|
|
|
|
|
tok->uint = x->chr;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case HCF_CHARSET:
|
2013-05-20 21:28:16 +02:00
|
|
|
if(stream->overrun)
|
2013-05-11 19:04:59 +02:00
|
|
|
goto no_parse;
|
2013-05-20 21:28:16 +02:00
|
|
|
if(!charset_isset(x->charset, input))
|
2013-05-11 19:04:59 +02:00
|
|
|
goto no_parse;
|
|
|
|
|
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
2013-05-11 15:14:10 +02:00
|
|
|
tok->token_type = TT_UINT;
|
2013-05-20 21:28:16 +02:00
|
|
|
tok->uint = input;
|
2013-05-11 15:14:10 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default: // should not be reached
|
|
|
|
|
assert_message(0, "unknown HCFChoice type");
|
2013-05-11 19:04:59 +02:00
|
|
|
goto no_parse;
|
2013-05-11 15:14:10 +02:00
|
|
|
}
|
2013-05-11 20:40:33 +02:00
|
|
|
}
|
2013-05-11 15:14:10 +02:00
|
|
|
|
2013-05-11 20:40:33 +02:00
|
|
|
// 'tok' has been parsed; process it
|
2013-05-11 15:14:10 +02:00
|
|
|
|
2013-05-11 20:40:33 +02:00
|
|
|
// XXX set tok->index and tok->bit_offset (don't take directly from stream, cuz peek!)
|
2013-05-11 15:14:10 +02:00
|
|
|
|
2013-05-14 11:54:43 +02:00
|
|
|
// perform token reshape if indicated
|
|
|
|
|
if(x->reshape)
|
|
|
|
|
tok = (HParsedToken *)x->reshape(make_result(arena, tok));
|
|
|
|
|
|
2013-05-11 20:40:33 +02:00
|
|
|
// call validation and semantic action, if present
|
|
|
|
|
if(x->pred && !x->pred(make_result(tarena, tok)))
|
|
|
|
|
goto no_parse; // validation failed -> no parse
|
|
|
|
|
if(x->action)
|
|
|
|
|
tok = (HParsedToken *)x->action(make_result(arena, tok));
|
|
|
|
|
|
|
|
|
|
// append to result sequence
|
|
|
|
|
h_carray_append(seq, tok);
|
2013-05-11 15:14:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// since we started with a single nonterminal on the stack, seq should
|
|
|
|
|
// contain exactly the parse result.
|
|
|
|
|
assert(seq->used == 1);
|
2013-05-11 19:04:59 +02:00
|
|
|
h_delete_arena(tarena);
|
|
|
|
|
return make_result(arena, seq->elements[0]);
|
|
|
|
|
|
|
|
|
|
no_parse:
|
|
|
|
|
h_delete_arena(tarena);
|
|
|
|
|
h_delete_arena(arena);
|
|
|
|
|
return NULL;
|
2013-04-17 15:10:33 +02:00
|
|
|
}
|
|
|
|
|
|
2013-04-27 04:24:09 +02:00
|
|
|
|
|
|
|
|
|
2013-05-11 19:04:59 +02:00
|
|
|
HParserBackendVTable h__llk_backend_vtable = {
|
|
|
|
|
.compile = h_llk_compile,
|
2013-05-11 22:02:59 +02:00
|
|
|
.parse = h_llk_parse,
|
|
|
|
|
.free = h_llk_free
|
2013-04-17 15:10:33 +02:00
|
|
|
};
|
2013-05-05 22:15:40 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// dummy!
|
2013-05-11 19:04:59 +02:00
|
|
|
int test_llk(void)
|
2013-05-05 22:15:40 +02:00
|
|
|
{
|
2013-05-20 21:28:16 +02:00
|
|
|
/* for k=2:
|
|
|
|
|
|
|
|
|
|
S -> A | B
|
|
|
|
|
A -> X Y a
|
|
|
|
|
B -> Y b
|
|
|
|
|
X -> x | ''
|
|
|
|
|
Y -> y -- for k=3 use "yy"
|
|
|
|
|
*/
|
|
|
|
|
|
2013-05-23 14:42:43 +02:00
|
|
|
HParser *X = h_optional(h_ch('x'));
|
2013-05-23 21:01:37 +02:00
|
|
|
HParser *Y = h_epsilon_p(); //h_sequence(h_ch('y'), NULL);
|
2013-05-23 14:42:43 +02:00
|
|
|
HParser *A = h_sequence(X, Y, h_ch('a'), NULL);
|
|
|
|
|
HParser *B = h_sequence(Y, h_ch('b'), NULL);
|
|
|
|
|
HParser *p = h_choice(A, B, NULL);
|
2013-05-05 22:15:40 +02:00
|
|
|
|
2013-05-07 23:37:02 +02:00
|
|
|
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
|
2013-05-05 22:15:40 +02:00
|
|
|
|
|
|
|
|
if(g == NULL) {
|
2013-05-07 23:37:02 +02:00
|
|
|
fprintf(stderr, "h_cfgrammar failed\n");
|
2013-05-05 22:15:40 +02:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
h_pprint_grammar(stdout, g, 0);
|
2013-05-22 22:45:25 +02:00
|
|
|
printf("derive epsilon: ");
|
2013-05-05 22:15:40 +02:00
|
|
|
h_pprint_symbolset(stdout, g, g->geneps, 0);
|
|
|
|
|
printf("first(A) = ");
|
2013-05-23 14:42:43 +02:00
|
|
|
h_pprint_stringset(stdout, g, h_first(3, g, g->start), 0);
|
|
|
|
|
//printf("follow(C) = ");
|
|
|
|
|
//h_pprint_stringset(stdout, g, h_follow(3, g, h_desugar(&system_allocator, c)), 0);
|
2013-05-05 22:15:40 +02:00
|
|
|
|
2013-05-23 14:42:43 +02:00
|
|
|
if(h_compile(p, PB_LLk, NULL)) {
|
|
|
|
|
fprintf(stderr, "does not compile\n");
|
|
|
|
|
return 2;
|
|
|
|
|
}
|
2013-05-11 19:06:23 +02:00
|
|
|
|
2013-05-23 14:42:43 +02:00
|
|
|
HParseResult *res = h_parse(p, (uint8_t *)"xya", 3);
|
2013-05-14 11:51:54 +02:00
|
|
|
if(res)
|
|
|
|
|
h_pprint(stdout, res->ast, 0, 2);
|
|
|
|
|
else
|
|
|
|
|
printf("no parse\n");
|
2013-05-11 19:06:23 +02:00
|
|
|
|
2013-05-05 22:15:40 +02:00
|
|
|
return 0;
|
|
|
|
|
}
|