Merge pull request #13 from pesco/LALR

LALR backend
This commit is contained in:
Meredith L. Patterson 2013-06-16 13:34:17 -07:00
commit 3e85648844
13 changed files with 1275 additions and 104 deletions

View file

@ -28,6 +28,7 @@ PARSERS := \
BACKENDS := \ BACKENDS := \
packrat \ packrat \
llk \ llk \
lalr \
regex regex
HAMMER_PARTS := \ HAMMER_PARTS := \

View file

@ -65,10 +65,10 @@ void* h_arena_malloc(HArena *arena, size_t size) {
if (size <= arena->head->free) { if (size <= arena->head->free) {
// fast path.. // fast path..
void* ret = arena->head->rest + arena->head->used; void* ret = arena->head->rest + arena->head->used;
arena->used += size + 1; arena->used += size;
arena->wasted -= size; arena->wasted -= size;
arena->head->used += size + 1; arena->head->used += size;
arena->head->free -= size + 1; arena->head->free -= size;
return ret; return ret;
} else if (size > arena->block_size) { } else if (size > arena->block_size) {
// We need a new, dedicated block for it, because it won't fit in a standard sized one. // We need a new, dedicated block for it, because it won't fit in a standard sized one.

1053
src/backends/lalr.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,7 @@ static const size_t DEFAULT_KMAX = 1;
/* Generating the LL(k) parse table */ /* Generating the LL(k) parse table */
/* Maps each nonterminal (HCFChoice) of the grammar to a HCFStringMap that /* Maps each nonterminal (HCFChoice) of the grammar to a HStringMap that
* maps lookahead strings to productions (HCFSequence). * maps lookahead strings to productions (HCFSequence).
*/ */
typedef struct HLLkTable_ { typedef struct HLLkTable_ {
@ -23,13 +23,13 @@ typedef struct HLLkTable_ {
const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x, const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x,
HInputStream lookahead) HInputStream lookahead)
{ {
const HCFStringMap *row = h_hashtable_get(table->rows, x); const HStringMap *row = h_hashtable_get(table->rows, x);
assert(row != NULL); // the table should have one row for each nonterminal assert(row != NULL); // the table should have one row for each nonterminal
assert(!row->epsilon_branch); // would match without looking at the input assert(!row->epsilon_branch); // would match without looking at the input
// XXX cases where this could be useful? // XXX cases where this could be useful?
const HCFStringMap *m = row; const HStringMap *m = row;
while(m) { while(m) {
if(m->epsilon_branch) { // input matched if(m->epsilon_branch) { // input matched
// assert: another lookahead would not bring a more specific match. // assert: another lookahead would not bring a more specific match.
@ -103,7 +103,7 @@ static void *combine_entries(HHashSet *workset, void *dst, const void *src)
// add the mappings of src to dst, marking conflicts and adding the conflicting // add the mappings of src to dst, marking conflicts and adding the conflicting
// values to workset. // values to workset.
// note: reuses parts of src to build dst! // note: reuses parts of src to build dst!
static void stringmap_merge(HHashSet *workset, HCFStringMap *dst, HCFStringMap *src) static void stringmap_merge(HHashSet *workset, HStringMap *dst, HStringMap *src)
{ {
if(src->epsilon_branch) { if(src->epsilon_branch) {
if(dst->epsilon_branch) if(dst->epsilon_branch)
@ -135,10 +135,10 @@ static void stringmap_merge(HHashSet *workset, HCFStringMap *dst, HCFStringMap *
continue; continue;
HCharKey c = (HCharKey)hte->key; HCharKey c = (HCharKey)hte->key;
HCFStringMap *src_ = hte->value; HStringMap *src_ = hte->value;
if(src_) { if(src_) {
HCFStringMap *dst_ = h_hashtable_get(dst->char_branches, (void *)c); HStringMap *dst_ = h_hashtable_get(dst->char_branches, (void *)c);
if(dst_) if(dst_)
stringmap_merge(workset, dst_, src_); stringmap_merge(workset, dst_, src_);
else else
@ -149,7 +149,7 @@ static void stringmap_merge(HHashSet *workset, HCFStringMap *dst, HCFStringMap *
} }
/* Generate entries for the productions of A in the given table row. */ /* Generate entries for the productions of A in the given table row. */
static int fill_table_row(size_t kmax, HCFGrammar *g, HCFStringMap *row, static int fill_table_row(size_t kmax, HCFGrammar *g, HStringMap *row,
const HCFChoice *A) const HCFChoice *A)
{ {
HHashSet *workset; HHashSet *workset;
@ -177,7 +177,7 @@ static int fill_table_row(size_t kmax, HCFGrammar *g, HCFStringMap *row,
assert(rhs != CONFLICT); // just to be sure there's no mixup assert(rhs != CONFLICT); // just to be sure there's no mixup
// calculate predict set; let values map to rhs // calculate predict set; let values map to rhs
HCFStringMap *pred = h_predict(k, g, A, rhs); HStringMap *pred = h_predict(k, g, A, rhs);
h_stringmap_replace(pred, NULL, rhs); h_stringmap_replace(pred, NULL, rhs);
// merge predict set into the row // merge predict set into the row
@ -220,7 +220,7 @@ static int fill_table(size_t kmax, HCFGrammar *g, HLLkTable *table)
assert(a->type == HCF_CHOICE); assert(a->type == HCF_CHOICE);
// create table row for this nonterminal // create table row for this nonterminal
HCFStringMap *row = h_stringmap_new(table->arena); HStringMap *row = h_stringmap_new(table->arena);
h_hashtable_put(table->rows, a, row); h_hashtable_put(table->rows, a, row);
if(fill_table_row(kmax, g, row, a) < 0) { if(fill_table_row(kmax, g, row, a) < 0) {
@ -339,10 +339,12 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
// the top of stack is such that there will be a result... // the top of stack is such that there will be a result...
HParsedToken *tok; // will hold result token HParsedToken *tok; // will hold result token
tok = h_arena_malloc(arena, sizeof(HParsedToken));
tok->index = stream->index;
tok->bit_offset = stream->bit_offset;
if(x == mark) { if(x == mark) {
// hit stack frame boundary... // hit stack frame boundary...
// wrap the accumulated parse result, this sequence is finished // wrap the accumulated parse result, this sequence is finished
tok = h_arena_malloc(arena, sizeof(HParsedToken));
tok->token_type = TT_SEQUENCE; tok->token_type = TT_SEQUENCE;
tok->seq = seq; tok->seq = seq;
@ -361,13 +363,13 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
case HCF_END: case HCF_END:
if(!stream->overrun) if(!stream->overrun)
goto no_parse; goto no_parse;
h_arena_free(arena, tok);
tok = NULL; tok = NULL;
break; break;
case HCF_CHAR: case HCF_CHAR:
if(input != x->chr) if(input != x->chr)
goto no_parse; goto no_parse;
tok = h_arena_malloc(arena, sizeof(HParsedToken));
tok->token_type = TT_UINT; tok->token_type = TT_UINT;
tok->uint = x->chr; tok->uint = x->chr;
break; break;
@ -377,7 +379,6 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
goto no_parse; goto no_parse;
if(!charset_isset(x->charset, input)) if(!charset_isset(x->charset, input))
goto no_parse; goto no_parse;
tok = h_arena_malloc(arena, sizeof(HParsedToken));
tok->token_type = TT_UINT; tok->token_type = TT_UINT;
tok->uint = input; tok->uint = input;
break; break;
@ -390,8 +391,6 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
// 'tok' has been parsed; process it // 'tok' has been parsed; process it
// XXX set tok->index and tok->bit_offset (don't take directly from stream, cuz peek!)
// perform token reshape if indicated // perform token reshape if indicated
if(x->reshape) if(x->reshape)
tok = (HParsedToken *)x->reshape(make_result(arena, tok)); tok = (HParsedToken *)x->reshape(make_result(arena, tok));

View file

@ -3,14 +3,6 @@
#include "../internal.h" #include "../internal.h"
#include "../parsers/parser_internal.h" #include "../parsers/parser_internal.h"
static uint32_t djbhash(const uint8_t *buf, size_t len) {
uint32_t hash = 5381;
while (len--) {
hash = hash * 33 + *buf++;
}
return hash;
}
// short-hand for constructing HCachedResult's // short-hand for constructing HCachedResult's
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) { static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
HCachedResult *ret = a_new(HCachedResult, 1); HCachedResult *ret = a_new(HCachedResult, 1);
@ -214,7 +206,7 @@ void h_packrat_free(HParser *parser) {
} }
static uint32_t cache_key_hash(const void* key) { static uint32_t cache_key_hash(const void* key) {
return djbhash(key, sizeof(HParserCacheKey)); return h_djbhash(key, sizeof(HParserCacheKey));
} }
static bool cache_key_equal(const void* key1, const void* key2) { static bool cache_key_equal(const void* key1, const void* key2) {
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0; return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;

View file

@ -18,12 +18,13 @@ HCFGrammar *h_cfgrammar_new(HAllocator *mm__)
g->mm__ = mm__; g->mm__ = mm__;
g->arena = h_new_arena(mm__, 0); // default blocksize g->arena = h_new_arena(mm__, 0); // default blocksize
g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr); g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
g->start = NULL;
g->geneps = NULL; g->geneps = NULL;
g->first = NULL; g->first = NULL;
g->follow = NULL; g->follow = NULL;
g->kmax = 0; // will be increased as needed by ensure_k g->kmax = 0; // will be increased as needed by ensure_k
HCFStringMap *eps = h_stringmap_new(g->arena); HStringMap *eps = h_stringmap_new(g->arena);
h_stringmap_put_epsilon(eps, INSET); h_stringmap_put_epsilon(eps, INSET);
g->singleton_epsilon = eps; g->singleton_epsilon = eps;
@ -50,6 +51,11 @@ HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
if(desugared == NULL) if(desugared == NULL)
return NULL; // -> backend not suitable for this parser return NULL; // -> backend not suitable for this parser
return h_cfgrammar_(mm__, desugared);
}
HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared)
{
HCFGrammar *g = h_cfgrammar_new(mm__); HCFGrammar *g = h_cfgrammar_new(mm__);
// recursively traverse the desugared form and collect all HCFChoices that // recursively traverse the desugared form and collect all HCFChoices that
@ -219,32 +225,34 @@ static void collect_geneps(HCFGrammar *g)
} }
HCFStringMap *h_stringmap_new(HArena *a) HStringMap *h_stringmap_new(HArena *a)
{ {
HCFStringMap *m = h_arena_malloc(a, sizeof(HCFStringMap)); HStringMap *m = h_arena_malloc(a, sizeof(HStringMap));
m->epsilon_branch = NULL;
m->end_branch = NULL;
m->char_branches = h_hashtable_new(a, h_eq_ptr, h_hash_ptr); m->char_branches = h_hashtable_new(a, h_eq_ptr, h_hash_ptr);
m->arena = a; m->arena = a;
return m; return m;
} }
void h_stringmap_put_end(HCFStringMap *m, void *v) void h_stringmap_put_end(HStringMap *m, void *v)
{ {
m->end_branch = v; m->end_branch = v;
} }
void h_stringmap_put_epsilon(HCFStringMap *m, void *v) void h_stringmap_put_epsilon(HStringMap *m, void *v)
{ {
m->epsilon_branch = v; m->epsilon_branch = v;
} }
void h_stringmap_put_after(HCFStringMap *m, uint8_t c, HCFStringMap *ends) void h_stringmap_put_after(HStringMap *m, uint8_t c, HStringMap *ends)
{ {
h_hashtable_put(m->char_branches, (void *)char_key(c), ends); h_hashtable_put(m->char_branches, (void *)char_key(c), ends);
} }
void h_stringmap_put_char(HCFStringMap *m, uint8_t c, void *v) void h_stringmap_put_char(HStringMap *m, uint8_t c, void *v)
{ {
HCFStringMap *node = h_stringmap_new(m->arena); HStringMap *node = h_stringmap_new(m->arena);
h_stringmap_put_epsilon(node, v); h_stringmap_put_epsilon(node, v);
h_stringmap_put_after(m, c, node); h_stringmap_put_after(m, c, node);
} }
@ -252,8 +260,8 @@ void h_stringmap_put_char(HCFStringMap *m, uint8_t c, void *v)
// helper for h_stringmap_update // helper for h_stringmap_update
static void *combine_stringmap(void *v1, const void *v2) static void *combine_stringmap(void *v1, const void *v2)
{ {
HCFStringMap *m1 = v1; HStringMap *m1 = v1;
const HCFStringMap *m2 = v2; const HStringMap *m2 = v2;
if(!m1) if(!m1)
m1 = h_stringmap_new(m2->arena); m1 = h_stringmap_new(m2->arena);
h_stringmap_update(m1, m2); h_stringmap_update(m1, m2);
@ -262,7 +270,7 @@ static void *combine_stringmap(void *v1, const void *v2)
} }
/* Note: Does *not* reuse submaps from n in building m. */ /* Note: Does *not* reuse submaps from n in building m. */
void h_stringmap_update(HCFStringMap *m, const HCFStringMap *n) void h_stringmap_update(HStringMap *m, const HStringMap *n)
{ {
if(n->epsilon_branch) if(n->epsilon_branch)
m->epsilon_branch = n->epsilon_branch; m->epsilon_branch = n->epsilon_branch;
@ -277,7 +285,7 @@ void h_stringmap_update(HCFStringMap *m, const HCFStringMap *n)
* If old is NULL, replace all values in m with new. * If old is NULL, replace all values in m with new.
* If new is NULL, remove the respective values. * If new is NULL, remove the respective values.
*/ */
void h_stringmap_replace(HCFStringMap *m, void *old, void *new) void h_stringmap_replace(HStringMap *m, void *old, void *new)
{ {
if(!old) { if(!old) {
if(m->epsilon_branch) m->epsilon_branch = new; if(m->epsilon_branch) m->epsilon_branch = new;
@ -294,14 +302,14 @@ void h_stringmap_replace(HCFStringMap *m, void *old, void *new)
if(hte->key == NULL) if(hte->key == NULL)
continue; continue;
HCFStringMap *m_ = hte->value; HStringMap *m_ = hte->value;
if(m_) if(m_)
h_stringmap_replace(m_, old, new); h_stringmap_replace(m_, old, new);
} }
} }
} }
void *h_stringmap_get(const HCFStringMap *m, const uint8_t *str, size_t n, bool end) void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end)
{ {
for(size_t i=0; i<n; i++) { for(size_t i=0; i<n; i++) {
if(i==n-1 && end && m->end_branch) if(i==n-1 && end && m->end_branch)
@ -313,20 +321,26 @@ void *h_stringmap_get(const HCFStringMap *m, const uint8_t *str, size_t n, bool
return m->epsilon_branch; return m->epsilon_branch;
} }
bool h_stringmap_present(const HCFStringMap *m, const uint8_t *str, size_t n, bool end) bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end)
{ {
return (h_stringmap_get(m, str, n, end) != NULL); return (h_stringmap_get(m, str, n, end) != NULL);
} }
bool h_stringmap_present_epsilon(const HCFStringMap *m) bool h_stringmap_present_epsilon(const HStringMap *m)
{ {
return (m->epsilon_branch != NULL); return (m->epsilon_branch != NULL);
} }
bool h_stringmap_empty(const HStringMap *m)
const HCFStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
{ {
HCFStringMap *ret; return (m->epsilon_branch == NULL
&& m->end_branch == NULL
&& h_hashtable_empty(m->char_branches));
}
const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
{
HStringMap *ret;
HCFSequence **p; HCFSequence **p;
uint8_t c; uint8_t c;
@ -372,18 +386,18 @@ const HCFStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
} }
// helpers for h_first_seq, definitions below // helpers for h_first_seq, definitions below
static bool is_singleton_epsilon(const HCFStringMap *m); static bool is_singleton_epsilon(const HStringMap *m);
static bool any_string_shorter(size_t k, const HCFStringMap *m); static bool any_string_shorter(size_t k, const HStringMap *m);
// pointer to functions like h_first_seq // pointer to functions like h_first_seq
typedef const HCFStringMap *(*StringSetFun)(size_t, HCFGrammar *, HCFChoice **); typedef const HStringMap *(*StringSetFun)(size_t, HCFGrammar *, HCFChoice **);
// helper for h_first_seq and h_follow // helper for h_first_seq and h_follow
static void stringset_extend(HCFGrammar *g, HCFStringMap *ret, static void stringset_extend(HCFGrammar *g, HStringMap *ret,
size_t k, const HCFStringMap *as, size_t k, const HStringMap *as,
StringSetFun f, HCFChoice **tail); StringSetFun f, HCFChoice **tail);
const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s) const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
{ {
// shortcut: the first set of the empty sequence, for any k, is {""} // shortcut: the first set of the empty sequence, for any k, is {""}
if(*s == NULL) if(*s == NULL)
@ -394,7 +408,7 @@ const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
HCFChoice *x = s[0]; HCFChoice *x = s[0];
HCFChoice **tail = s+1; HCFChoice **tail = s+1;
const HCFStringMap *first_x = h_first(k, g, x); const HStringMap *first_x = h_first(k, g, x);
// shortcut: if first_k(X) = {""}, just return first_k(tail) // shortcut: if first_k(X) = {""}, just return first_k(tail)
if(is_singleton_epsilon(first_x)) if(is_singleton_epsilon(first_x))
@ -405,7 +419,7 @@ const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
return first_x; return first_x;
// create a new result set and build up the set described above // create a new result set and build up the set described above
HCFStringMap *ret = h_stringmap_new(g->arena); HStringMap *ret = h_stringmap_new(g->arena);
// extend the elements of first_k(X) up to length k from tail // extend the elements of first_k(X) up to length k from tail
stringset_extend(g, ret, k, first_x, h_first_seq, tail); stringset_extend(g, ret, k, first_x, h_first_seq, tail);
@ -413,14 +427,14 @@ const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
return ret; return ret;
} }
static bool is_singleton_epsilon(const HCFStringMap *m) static bool is_singleton_epsilon(const HStringMap *m)
{ {
return ( m->epsilon_branch return ( m->epsilon_branch
&& !m->end_branch && !m->end_branch
&& h_hashtable_empty(m->char_branches) ); && h_hashtable_empty(m->char_branches) );
} }
static bool any_string_shorter(size_t k, const HCFStringMap *m) static bool any_string_shorter(size_t k, const HStringMap *m)
{ {
if(k==0) if(k==0)
return false; return false;
@ -434,7 +448,7 @@ static bool any_string_shorter(size_t k, const HCFStringMap *m)
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
if(hte->key == NULL) if(hte->key == NULL)
continue; continue;
HCFStringMap *m_ = hte->value; HStringMap *m_ = hte->value;
// check subtree for strings shorter than k-1 // check subtree for strings shorter than k-1
if(any_string_shorter(k-1, m_)) if(any_string_shorter(k-1, m_))
@ -446,7 +460,7 @@ static bool any_string_shorter(size_t k, const HCFStringMap *m)
} }
// helper for h_predict // helper for h_predict
static void remove_all_shorter(size_t k, HCFStringMap *m) static void remove_all_shorter(size_t k, HStringMap *m)
{ {
if(k==0) return; if(k==0) return;
m->epsilon_branch = NULL; m->epsilon_branch = NULL;
@ -465,12 +479,12 @@ static void remove_all_shorter(size_t k, HCFStringMap *m)
// h_follow adapted to the signature of StringSetFun // h_follow adapted to the signature of StringSetFun
static inline static inline
const HCFStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s) const HStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s)
{ {
return h_follow(k, g, *s); return h_follow(k, g, *s);
} }
const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x) const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
{ {
// consider all occurances of X in g // consider all occurances of X in g
// the follow set of X is the union of: // the follow set of X is the union of:
@ -481,7 +495,7 @@ const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
// first_k(tail follow_k(A)) = // first_k(tail follow_k(A)) =
// { a b | a <- first_k(tail), b <- follow_l(A), l=k-|a| } // { a b | a <- first_k(tail), b <- follow_l(A), l=k-|a| }
HCFStringMap *ret; HStringMap *ret;
// shortcut: follow_0(X) is always {""} // shortcut: follow_0(X) is always {""}
if(k==0) if(k==0)
@ -519,7 +533,7 @@ const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
if(*s == x) { // occurance found if(*s == x) { // occurance found
HCFChoice **tail = s+1; HCFChoice **tail = s+1;
const HCFStringMap *first_tail = h_first_seq(k, g, tail); const HStringMap *first_tail = h_first_seq(k, g, tail);
// extend the elems of first_k(tail) up to length k from follow(A) // extend the elems of first_k(tail) up to length k from follow(A)
stringset_extend(g, ret, k, first_tail, h_follow_, &a); stringset_extend(g, ret, k, first_tail, h_follow_, &a);
@ -532,15 +546,15 @@ const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
return ret; return ret;
} }
HCFStringMap *h_predict(size_t k, HCFGrammar *g, HStringMap *h_predict(size_t k, HCFGrammar *g,
const HCFChoice *A, const HCFSequence *rhs) const HCFChoice *A, const HCFSequence *rhs)
{ {
HCFStringMap *ret = h_stringmap_new(g->arena); HStringMap *ret = h_stringmap_new(g->arena);
// predict_k(A -> rhs) = // predict_k(A -> rhs) =
// { ab | a <- first_k(rhs), b <- follow_k(A), |ab|=k } // { ab | a <- first_k(rhs), b <- follow_k(A), |ab|=k }
const HCFStringMap *first_rhs = h_first_seq(k, g, rhs->items); const HStringMap *first_rhs = h_first_seq(k, g, rhs->items);
// casting the const off of A below. note: stringset_extend does // casting the const off of A below. note: stringset_extend does
// not touch this argument, only passes it through to h_follow // not touch this argument, only passes it through to h_follow
@ -554,8 +568,8 @@ HCFStringMap *h_predict(size_t k, HCFGrammar *g,
} }
// add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret // add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret
static void stringset_extend(HCFGrammar *g, HCFStringMap *ret, static void stringset_extend(HCFGrammar *g, HStringMap *ret,
size_t k, const HCFStringMap *as, size_t k, const HStringMap *as,
StringSetFun f, HCFChoice **tail) StringSetFun f, HCFChoice **tail)
{ {
if(as->epsilon_branch) { if(as->epsilon_branch) {
@ -578,12 +592,12 @@ static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
uint8_t c = key_char((HCharKey)hte->key); uint8_t c = key_char((HCharKey)hte->key);
// follow the branch to find the set { a' | t a' <- as } // follow the branch to find the set { a' | t a' <- as }
HCFStringMap *as_ = (HCFStringMap *)hte->value; HStringMap *as_ = (HStringMap *)hte->value;
// now the elements of ret that begin with t are given by // now the elements of ret that begin with t are given by
// t { a b | a <- as_, b <- f_l(tail), l=k-|a|-1 } // t { a b | a <- as_, b <- f_l(tail), l=k-|a|-1 }
// so we can use recursion over k // so we can use recursion over k
HCFStringMap *ret_ = h_stringmap_new(g->arena); HStringMap *ret_ = h_stringmap_new(g->arena);
h_stringmap_put_after(ret, c, ret_); h_stringmap_put_after(ret, c, ret_);
stringset_extend(g, ret_, k-1, as_, f, tail); stringset_extend(g, ret_, k-1, as_, f, tail);
@ -592,7 +606,7 @@ static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
} }
static void pprint_char(FILE *f, char c) void h_pprint_char(FILE *f, char c)
{ {
switch(c) { switch(c) {
case '"': fputs("\\\"", f); break; case '"': fputs("\\\"", f); break;
@ -616,7 +630,7 @@ static void pprint_charset_char(FILE *f, char c)
case '"': fputc(c, f); break; case '"': fputc(c, f); break;
case '-': fputs("\\-", f); break; case '-': fputs("\\-", f); break;
case ']': fputs("\\-", f); break; case ']': fputs("\\-", f); break;
default: pprint_char(f, c); default: h_pprint_char(f, c);
} }
} }
@ -664,7 +678,7 @@ static HCFChoice **pprint_string(FILE *f, HCFChoice **x)
for(; *x; x++) { for(; *x; x++) {
if((*x)->type != HCF_CHAR) if((*x)->type != HCF_CHAR)
break; break;
pprint_char(f, (*x)->chr); h_pprint_char(f, (*x)->chr);
} }
fputc('"', f); fputc('"', f);
return x; return x;
@ -675,7 +689,7 @@ void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x)
switch(x->type) { switch(x->type) {
case HCF_CHAR: case HCF_CHAR:
fputc('"', f); fputc('"', f);
pprint_char(f, x->chr); h_pprint_char(f, x->chr);
fputc('"', f); fputc('"', f);
break; break;
case HCF_END: case HCF_END:
@ -800,7 +814,7 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in
static bool static bool
pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n, pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n,
const HCFStringMap *set) const HStringMap *set)
{ {
assert(n < BUFSIZE-4); assert(n < BUFSIZE-4);
@ -827,7 +841,7 @@ pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n,
if(hte->key == NULL) if(hte->key == NULL)
continue; continue;
uint8_t c = key_char((HCharKey)hte->key); uint8_t c = key_char((HCharKey)hte->key);
HCFStringMap *ends = hte->value; HStringMap *ends = hte->value;
size_t n_ = n; size_t n_ = n;
switch(c) { switch(c) {
@ -852,7 +866,7 @@ pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n,
return first; return first;
} }
void h_pprint_stringset(FILE *file, const HCFStringMap *set, int indent) void h_pprint_stringset(FILE *file, const HStringMap *set, int indent)
{ {
int j; int j;
for(j=0; j<indent; j++) fputc(' ', file); for(j=0; j<indent; j++) fputc(' ', file);

View file

@ -16,7 +16,7 @@ typedef struct HCFGrammar_ {
// constant set containing only the empty string. // constant set containing only the empty string.
// this is only a member of HCFGrammar because it needs a pointer to arena. // this is only a member of HCFGrammar because it needs a pointer to arena.
const struct HCFStringMap_ *singleton_epsilon; const struct HStringMap_ *singleton_epsilon;
} HCFGrammar; } HCFGrammar;
@ -32,25 +32,26 @@ static inline uint8_t key_char(HCharKey k) { return (0xFF & k); }
* input tokens. * input tokens.
* Each path through the tree represents the string along its branches. * Each path through the tree represents the string along its branches.
*/ */
typedef struct HCFStringMap_ { typedef struct HStringMap_ {
void *epsilon_branch; // points to leaf value void *epsilon_branch; // points to leaf value
void *end_branch; // points to leaf value void *end_branch; // points to leaf value
HHashTable *char_branches; // maps to inner nodes (HCFStringMaps) HHashTable *char_branches; // maps to inner nodes (HStringMaps)
HArena *arena; HArena *arena;
} HCFStringMap; } HStringMap;
HCFStringMap *h_stringmap_new(HArena *a); HStringMap *h_stringmap_new(HArena *a);
void h_stringmap_put_end(HCFStringMap *m, void *v); void h_stringmap_put_end(HStringMap *m, void *v);
void h_stringmap_put_epsilon(HCFStringMap *m, void *v); void h_stringmap_put_epsilon(HStringMap *m, void *v);
void h_stringmap_put_after(HCFStringMap *m, uint8_t c, HCFStringMap *ends); void h_stringmap_put_after(HStringMap *m, uint8_t c, HStringMap *ends);
void h_stringmap_put_char(HCFStringMap *m, uint8_t c, void *v); void h_stringmap_put_char(HStringMap *m, uint8_t c, void *v);
void h_stringmap_update(HCFStringMap *m, const HCFStringMap *n); void h_stringmap_update(HStringMap *m, const HStringMap *n);
void h_stringmap_replace(HCFStringMap *m, void *old, void *new); void h_stringmap_replace(HStringMap *m, void *old, void *new);
void *h_stringmap_get(const HCFStringMap *m, const uint8_t *str, size_t n, bool end); void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end);
bool h_stringmap_present(const HCFStringMap *m, const uint8_t *str, size_t n, bool end); bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end);
bool h_stringmap_present_epsilon(const HCFStringMap *m); bool h_stringmap_present_epsilon(const HStringMap *m);
bool h_stringmap_empty(const HStringMap *m);
static inline HCFStringMap *h_stringmap_get_char(const HCFStringMap *m, const uint8_t c) static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c)
{ return h_hashtable_get(m->char_branches, (void *)char_key(c)); } { return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
@ -59,6 +60,9 @@ static inline HCFStringMap *h_stringmap_get_char(const HCFStringMap *m, const ui
* A NULL return means we are unable to represent the parser as a CFG. * A NULL return means we are unable to represent the parser as a CFG.
*/ */
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser); HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser);
HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *start);
HCFGrammar *h_cfgrammar_new(HAllocator *mm__);
/* Frees the given grammar and associated data. /* Frees the given grammar and associated data.
* Does *not* free parsers' CFG forms as created by h_desugar. * Does *not* free parsers' CFG forms as created by h_desugar.
@ -72,18 +76,18 @@ bool h_derives_epsilon(HCFGrammar *g, const HCFChoice *symbol);
bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s); bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s);
/* Compute first_k set of symbol x. Memoized. */ /* Compute first_k set of symbol x. Memoized. */
const HCFStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x); const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x);
/* Compute first_k set of sentential form s. s NULL-terminated. */ /* Compute first_k set of sentential form s. s NULL-terminated. */
const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s); const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s);
/* Compute follow_k set of symbol x. Memoized. */ /* Compute follow_k set of symbol x. Memoized. */
const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x); const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x);
/* Compute the predict_k set of production "A -> rhs". /* Compute the predict_k set of production "A -> rhs".
* Always returns a newly-allocated HCFStringMap. * Always returns a newly-allocated HStringMap.
*/ */
HCFStringMap *h_predict(size_t k, HCFGrammar *g, HStringMap *h_predict(size_t k, HCFGrammar *g,
const HCFChoice *A, const HCFSequence *rhs); const HCFChoice *A, const HCFSequence *rhs);
@ -92,4 +96,5 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent);
void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq); void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq);
void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x); void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x);
void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent); void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent);
void h_pprint_stringset(FILE *file, const HCFStringMap *set, int indent); void h_pprint_stringset(FILE *file, const HStringMap *set, int indent);
void h_pprint_char(FILE *file, char c);

View file

@ -147,6 +147,8 @@ void* h_hashtable_get(const HHashTable* ht, const void* key) {
for (hte = &ht->contents[hashval & (ht->capacity - 1)]; for (hte = &ht->contents[hashval & (ht->capacity - 1)];
hte != NULL; hte != NULL;
hte = hte->next) { hte = hte->next) {
if (hte->key == NULL)
continue;
if (hte->hashval != hashval) if (hte->hashval != hashval)
continue; continue;
if (ht->equalFunc(key, hte->key)) if (ht->equalFunc(key, hte->key))
@ -232,6 +234,7 @@ int h_hashtable_present(const HHashTable* ht, const void* key) {
} }
return false; return false;
} }
void h_hashtable_del(HHashTable* ht, const void* key) { void h_hashtable_del(HHashTable* ht, const void* key) {
HHashValue hashval = ht->hashFunc(key); HHashValue hashval = ht->hashFunc(key);
#ifdef CONSISTENCY_CHECK #ifdef CONSISTENCY_CHECK
@ -257,6 +260,7 @@ void h_hashtable_del(HHashTable* ht, const void* key) {
} }
} }
} }
void h_hashtable_free(HHashTable* ht) { void h_hashtable_free(HHashTable* ht) {
for (size_t i = 0; i < ht->capacity; i++) { for (size_t i = 0; i < ht->capacity; i++) {
HHashTableEntry *hten, *hte = &ht->contents[i]; HHashTableEntry *hten, *hte = &ht->contents[i];
@ -272,11 +276,72 @@ void h_hashtable_free(HHashTable* ht) {
h_arena_free(ht->arena, ht->contents); h_arena_free(ht->arena, ht->contents);
} }
// helper for hte_equal
static bool hte_same_length(HHashTableEntry *xs, HHashTableEntry *ys) {
while(xs && ys) {
xs=xs->next;
ys=ys->next;
// skip NULL keys (= element not present)
while(xs && xs->key == NULL) xs=xs->next;
while(ys && ys->key == NULL) ys=ys->next;
}
return (xs == ys); // both NULL
}
// helper for hte_equal: are all elements of xs present in ys?
static bool hte_subset(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys)
{
for(; xs; xs=xs->next) {
if(xs->key == NULL) continue; // element not present
HHashTableEntry *hte;
for(hte=ys; hte; hte=hte->next) {
if(hte->key == xs->key) break; // assume an element is equal to itself
if(hte->hashval != xs->hashval) continue; // shortcut
if(eq(hte->key, xs->key)) break;
}
if(hte == NULL) return false; // element not found
}
return true; // all found
}
// compare two lists of HHashTableEntries
static inline bool hte_equal(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys) {
return (hte_same_length(xs, ys) && hte_subset(eq, xs, ys));
}
/* Set equality of HHashSets.
* Obviously, 'a' and 'b' must use the same equality function.
* Not strictly necessary, but we also assume the same hash function.
*/
bool h_hashset_equal(const HHashSet *a, const HHashSet *b) {
if(a->capacity == b->capacity) {
// iterate over the buckets in parallel
for(size_t i=0; i < a->capacity; i++) {
if(!hte_equal(a->equalFunc, &a->contents[i], &b->contents[i]))
return false;
}
} else {
assert_message(0, "h_hashset_equal called on sets of different capacity");
// TODO implement general case
}
return true;
}
bool h_eq_ptr(const void *p, const void *q) { bool h_eq_ptr(const void *p, const void *q) {
return (p==q); return (p==q);
} }
HHashValue h_hash_ptr(const void *p) { HHashValue h_hash_ptr(const void *p) {
// XXX just djbhash it // XXX just djbhash it? it does make the benchmark ~7% slower.
//return h_djbhash((const uint8_t *)&p, sizeof(void *));
return (uintptr_t)p >> 4; return (uintptr_t)p >> 4;
} }
uint32_t h_djbhash(const uint8_t *buf, size_t len) {
uint32_t hash = 5381;
while (len--) {
hash = hash * 33 + *buf++;
}
return hash;
}

View file

@ -30,6 +30,7 @@ static HParserBackendVTable *backends[PB_MAX + 1] = {
&h__packrat_backend_vtable, &h__packrat_backend_vtable,
&h__regex_backend_vtable, &h__regex_backend_vtable,
&h__llk_backend_vtable, &h__llk_backend_vtable,
&h__lalr_backend_vtable,
}; };

View file

@ -34,11 +34,11 @@ typedef struct HParseState_ HParseState;
typedef enum HParserBackend_ { typedef enum HParserBackend_ {
PB_MIN = 0, PB_MIN = 0,
PB_PACKRAT = PB_MIN, // PB_MIN is always the default. PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
PB_REGULAR, // PB_REGULAR,
PB_LLk, // PB_LLk,
PB_LALR, // Not Implemented PB_LALR,
PB_GLR, // Not Implemented PB_GLR, // Not Implemented
PB_MAX = PB_LLk PB_MAX = PB_LALR
} HParserBackend; } HParserBackend;
typedef enum HTokenType_ { typedef enum HTokenType_ {

View file

@ -219,6 +219,7 @@ struct HBitWriter_ {
// Backends {{{ // Backends {{{
extern HParserBackendVTable h__packrat_backend_vtable; extern HParserBackendVTable h__packrat_backend_vtable;
extern HParserBackendVTable h__llk_backend_vtable; extern HParserBackendVTable h__llk_backend_vtable;
extern HParserBackendVTable h__lalr_backend_vtable;
// }}} // }}}
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported. // TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
@ -271,9 +272,11 @@ typedef HHashTable HHashSet;
#define h_hashset_empty(ht) h_hashtable_empty(ht) #define h_hashset_empty(ht) h_hashtable_empty(ht)
#define h_hashset_del(ht,el) h_hashtable_del(ht,el) #define h_hashset_del(ht,el) h_hashtable_del(ht,el)
#define h_hashset_free(ht) h_hashtable_free(ht) #define h_hashset_free(ht) h_hashtable_free(ht)
bool h_hashset_equal(const HHashSet *a, const HHashSet *b);
bool h_eq_ptr(const void *p, const void *q); bool h_eq_ptr(const void *p, const void *q);
HHashValue h_hash_ptr(const void *p); HHashValue h_hash_ptr(const void *p);
uint32_t h_djbhash(const uint8_t *buf, size_t len);
typedef struct HCFSequence_ HCFSequence; typedef struct HCFSequence_ HCFSequence;

View file

@ -405,7 +405,7 @@ static void test_not(gconstpointer backend) {
g_check_parse_ok(not_2, (HParserBackend)GPOINTER_TO_INT(backend), "a+b", 3, "(u0x61 (u0x2b) u0x62)"); g_check_parse_ok(not_2, (HParserBackend)GPOINTER_TO_INT(backend), "a+b", 3, "(u0x61 (u0x2b) u0x62)");
g_check_parse_ok(not_2, (HParserBackend)GPOINTER_TO_INT(backend), "a++b", 4, "(u0x61 <2b.2b> u0x62)"); g_check_parse_ok(not_2, (HParserBackend)GPOINTER_TO_INT(backend), "a++b", 4, "(u0x61 <2b.2b> u0x62)");
} }
/*
static void test_leftrec(gconstpointer backend) { static void test_leftrec(gconstpointer backend) {
HParser *a_ = h_ch('a'); HParser *a_ = h_ch('a');
@ -416,7 +416,7 @@ static void test_leftrec(gconstpointer backend) {
g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)"); g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)");
g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)"); g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)");
} }
*/
static void test_rightrec(gconstpointer backend) { static void test_rightrec(gconstpointer backend) {
HParser *a_ = h_ch('a'); HParser *a_ = h_ch('a');
@ -547,4 +547,42 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/regex/epsilon_p", GINT_TO_POINTER(PB_REGULAR), test_epsilon_p); g_test_add_data_func("/core/parser/regex/epsilon_p", GINT_TO_POINTER(PB_REGULAR), test_epsilon_p);
g_test_add_data_func("/core/parser/regex/attr_bool", GINT_TO_POINTER(PB_REGULAR), test_attr_bool); g_test_add_data_func("/core/parser/regex/attr_bool", GINT_TO_POINTER(PB_REGULAR), test_attr_bool);
g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore); g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore);
g_test_add_data_func("/core/parser/lalr/token", GINT_TO_POINTER(PB_LALR), test_token);
g_test_add_data_func("/core/parser/lalr/ch", GINT_TO_POINTER(PB_LALR), test_ch);
g_test_add_data_func("/core/parser/lalr/ch_range", GINT_TO_POINTER(PB_LALR), test_ch_range);
g_test_add_data_func("/core/parser/lalr/int64", GINT_TO_POINTER(PB_LALR), test_int64);
g_test_add_data_func("/core/parser/lalr/int32", GINT_TO_POINTER(PB_LALR), test_int32);
g_test_add_data_func("/core/parser/lalr/int16", GINT_TO_POINTER(PB_LALR), test_int16);
g_test_add_data_func("/core/parser/lalr/int8", GINT_TO_POINTER(PB_LALR), test_int8);
g_test_add_data_func("/core/parser/lalr/uint64", GINT_TO_POINTER(PB_LALR), test_uint64);
g_test_add_data_func("/core/parser/lalr/uint32", GINT_TO_POINTER(PB_LALR), test_uint32);
g_test_add_data_func("/core/parser/lalr/uint16", GINT_TO_POINTER(PB_LALR), test_uint16);
g_test_add_data_func("/core/parser/lalr/uint8", GINT_TO_POINTER(PB_LALR), test_uint8);
g_test_add_data_func("/core/parser/lalr/int_range", GINT_TO_POINTER(PB_LALR), test_int_range);
#if 0
g_test_add_data_func("/core/parser/lalr/float64", GINT_TO_POINTER(PB_LALR), test_float64);
g_test_add_data_func("/core/parser/lalr/float32", GINT_TO_POINTER(PB_LALR), test_float32);
#endif
g_test_add_data_func("/core/parser/lalr/whitespace", GINT_TO_POINTER(PB_LALR), test_whitespace);
g_test_add_data_func("/core/parser/lalr/left", GINT_TO_POINTER(PB_LALR), test_left);
g_test_add_data_func("/core/parser/lalr/right", GINT_TO_POINTER(PB_LALR), test_right);
g_test_add_data_func("/core/parser/lalr/middle", GINT_TO_POINTER(PB_LALR), test_middle);
g_test_add_data_func("/core/parser/lalr/action", GINT_TO_POINTER(PB_LALR), test_action);
g_test_add_data_func("/core/parser/lalr/in", GINT_TO_POINTER(PB_LALR), test_in);
g_test_add_data_func("/core/parser/lalr/not_in", GINT_TO_POINTER(PB_LALR), test_not_in);
g_test_add_data_func("/core/parser/lalr/end_p", GINT_TO_POINTER(PB_LALR), test_end_p);
g_test_add_data_func("/core/parser/lalr/nothing_p", GINT_TO_POINTER(PB_LALR), test_nothing_p);
g_test_add_data_func("/core/parser/lalr/sequence", GINT_TO_POINTER(PB_LALR), test_sequence);
g_test_add_data_func("/core/parser/lalr/choice", GINT_TO_POINTER(PB_LALR), test_choice);
g_test_add_data_func("/core/parser/lalr/many", GINT_TO_POINTER(PB_LALR), test_many);
g_test_add_data_func("/core/parser/lalr/many1", GINT_TO_POINTER(PB_LALR), test_many1);
g_test_add_data_func("/core/parser/lalr/optional", GINT_TO_POINTER(PB_LALR), test_optional);
g_test_add_data_func("/core/parser/lalr/sepBy", GINT_TO_POINTER(PB_LALR), test_sepBy);
g_test_add_data_func("/core/parser/lalr/sepBy1", GINT_TO_POINTER(PB_LALR), test_sepBy1);
g_test_add_data_func("/core/parser/lalr/epsilon_p", GINT_TO_POINTER(PB_LALR), test_epsilon_p);
g_test_add_data_func("/core/parser/lalr/attr_bool", GINT_TO_POINTER(PB_LALR), test_attr_bool);
g_test_add_data_func("/core/parser/lalr/ignore", GINT_TO_POINTER(PB_LALR), test_ignore);
g_test_add_data_func("/core/parser/lalr/leftrec", GINT_TO_POINTER(PB_LALR), test_leftrec);
g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec);
} }

View file

@ -153,7 +153,7 @@
} while(0) } while(0)
#define g_check_stringmap_absent(table, key) do { \ #define g_check_stringmap_absent(table, key) do { \
bool end = (key[strlen(key)-2] == '$'); \ bool end = (key[strlen(key)-1] == '$'); \
if(h_stringmap_present(table, (uint8_t *)key, strlen(key), end)) { \ if(h_stringmap_present(table, (uint8_t *)key, strlen(key), end)) { \
g_test_message("Check failed: \"%s\" shouldn't have been in map, but was", key); \ g_test_message("Check failed: \"%s\" shouldn't have been in map, but was", key); \
g_test_fail(); \ g_test_fail(); \