add (untested) first set analysis
This commit is contained in:
parent
0b9c9a6a10
commit
73a84d17b8
1 changed files with 87 additions and 7 deletions
|
|
@ -2,7 +2,6 @@
|
||||||
#include "../internal.h"
|
#include "../internal.h"
|
||||||
#include "../parsers/parser_internal.h"
|
#include "../parsers/parser_internal.h"
|
||||||
|
|
||||||
#define H_LL_END_TOKEN 256
|
|
||||||
|
|
||||||
|
|
||||||
/* Grammar representation and analysis */
|
/* Grammar representation and analysis */
|
||||||
|
|
@ -11,10 +10,13 @@ typedef struct HCFGrammar_ {
|
||||||
HHashSet *nts; // HCFChoices, each representing the alternative
|
HHashSet *nts; // HCFChoices, each representing the alternative
|
||||||
// productions for one nonterminal
|
// productions for one nonterminal
|
||||||
HHashSet *geneps; // set of NTs that can generate the empty string
|
HHashSet *geneps; // set of NTs that can generate the empty string
|
||||||
HHashTable *first; // "first" sets of the grammar's nonterminals
|
HHashTable *first; // memoized "first" sets of the grammar's symbols
|
||||||
HArena *arena;
|
HArena *arena;
|
||||||
} HCFGrammar;
|
} HCFGrammar;
|
||||||
|
|
||||||
|
typedef int HCFTerminal;
|
||||||
|
static HCFTerminal end_token = -1;
|
||||||
|
|
||||||
bool h_eq_ptr(const void *p, const void *q) { return (p==q); }
|
bool h_eq_ptr(const void *p, const void *q) { return (p==q); }
|
||||||
HHashValue h_hash_ptr(const void *p) { return (uintptr_t)p; }
|
HHashValue h_hash_ptr(const void *p) { return (uintptr_t)p; }
|
||||||
|
|
||||||
|
|
@ -26,14 +28,14 @@ HCFGrammar *h_grammar_new(HAllocator *mm__)
|
||||||
g->arena = h_new_arena(mm__, 0); // default blocksize
|
g->arena = h_new_arena(mm__, 0); // default blocksize
|
||||||
g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
g->geneps = NULL;
|
g->geneps = NULL;
|
||||||
g->first = NULL;
|
g->first = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
|
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
// helpers
|
|
||||||
|
// helper
|
||||||
static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol);
|
static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol);
|
||||||
static void collect_geneps(HCFGrammar *grammar);
|
|
||||||
|
|
||||||
/* Convert 'parser' into CFG representation by desugaring and compiling the set
|
/* Convert 'parser' into CFG representation by desugaring and compiling the set
|
||||||
* of nonterminals.
|
* of nonterminals.
|
||||||
|
|
@ -103,6 +105,10 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// helper
|
||||||
|
static void collect_geneps(HCFGrammar *grammar);
|
||||||
|
|
||||||
/* Does the given symbol derive the empty string (under g)? */
|
/* Does the given symbol derive the empty string (under g)? */
|
||||||
bool h_symbol_derives_epsilon(HCFGrammar *g, const HCFChoice *symbol)
|
bool h_symbol_derives_epsilon(HCFGrammar *g, const HCFChoice *symbol)
|
||||||
{
|
{
|
||||||
|
|
@ -169,8 +175,82 @@ static void collect_geneps(HCFGrammar *g)
|
||||||
} while(g->nts->used != prevused);
|
} while(g->nts->used != prevused);
|
||||||
}
|
}
|
||||||
|
|
||||||
// first_set(grammar?, production)
|
|
||||||
// follow_set
|
// helper
|
||||||
|
static HHashSet *first_sequence_(HCFGrammar *g, const HCFChoice **s);
|
||||||
|
|
||||||
|
static inline HHashSet *h_first_sequence(HCFGrammar *g, const HCFSequence *s)
|
||||||
|
{
|
||||||
|
// why do I have to cast to a type that's _more_ const?
|
||||||
|
return first_sequence_(g, (const HCFChoice **)s->items);
|
||||||
|
}
|
||||||
|
|
||||||
|
static HHashSet *h_first_symbol(HCFGrammar *g, const HCFChoice *x)
|
||||||
|
{
|
||||||
|
HHashSet *ret;
|
||||||
|
HCFSequence **p;
|
||||||
|
uint8_t c;
|
||||||
|
|
||||||
|
// memoize via g->first
|
||||||
|
assert(g->first != NULL);
|
||||||
|
ret = h_hashtable_get(g->first, x);
|
||||||
|
if(ret != NULL)
|
||||||
|
return ret;
|
||||||
|
ret = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
|
assert(ret != NULL);
|
||||||
|
h_hashtable_put(g->first, x, ret);
|
||||||
|
|
||||||
|
switch(x->type) {
|
||||||
|
case HCF_END:
|
||||||
|
h_hashset_put(ret, (void *)(intptr_t)end_token);
|
||||||
|
break;
|
||||||
|
case HCF_CHAR:
|
||||||
|
h_hashset_put(ret, (void *)(intptr_t)x->chr);
|
||||||
|
break;
|
||||||
|
case HCF_CHARSET:
|
||||||
|
c=0;
|
||||||
|
do {
|
||||||
|
if(charset_isset(x->charset, c))
|
||||||
|
h_hashset_put(ret, (void *)(intptr_t)c);
|
||||||
|
} while(c++ < 255);
|
||||||
|
break;
|
||||||
|
case HCF_CHOICE:
|
||||||
|
// this is a nonterminal
|
||||||
|
// return the union of the first sets of all productions
|
||||||
|
for(p=x->seq; *p; ++p)
|
||||||
|
h_hashset_put_all(ret, h_first_sequence(g, *p));
|
||||||
|
break;
|
||||||
|
default: // should not be reached
|
||||||
|
assert_message(0, "unknown HCFChoice type");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HHashSet *first_sequence_(HCFGrammar *g, const HCFChoice **s)
|
||||||
|
{
|
||||||
|
// the first set of the empty sequence is empty
|
||||||
|
if(*s == NULL)
|
||||||
|
return h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
|
|
||||||
|
// first(X tail) = first(X) if X does not derive epsilon
|
||||||
|
// = first(X) u first(tail) otherwise
|
||||||
|
|
||||||
|
const HCFChoice *x = s[0];
|
||||||
|
const HCFChoice **tail = s+1;
|
||||||
|
|
||||||
|
HHashSet *first_x = h_first_symbol(g, x);
|
||||||
|
if(h_symbol_derives_epsilon(g, x)) {
|
||||||
|
// return the union of first(x) and first(tail)
|
||||||
|
HHashSet *first_tail = first_sequence_(g, tail);
|
||||||
|
HHashSet *ret = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
|
h_hashset_put_all(ret, first_x);
|
||||||
|
h_hashset_put_all(ret, first_tail);
|
||||||
|
return ret;
|
||||||
|
} else {
|
||||||
|
return first_x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue