Added new build system
This commit is contained in:
commit
b0f567c090
27 changed files with 2255 additions and 217 deletions
2
Makefile
2
Makefile
|
|
@ -6,6 +6,8 @@
|
||||||
SUBDIRS = src examples jni
|
SUBDIRS = src examples jni
|
||||||
|
|
||||||
include config.mk
|
include config.mk
|
||||||
|
TOPLEVEL=.
|
||||||
|
include common.mk
|
||||||
|
|
||||||
CONFIG_VARS= INCLUDE_TESTS
|
CONFIG_VARS= INCLUDE_TESTS
|
||||||
|
|
||||||
|
|
|
||||||
16
README.md
16
README.md
|
|
@ -12,8 +12,8 @@ Features
|
||||||
* Parsing backends:
|
* Parsing backends:
|
||||||
* Packrat parsing
|
* Packrat parsing
|
||||||
* LL(k)
|
* LL(k)
|
||||||
* GLR (not yet implemented)
|
* GLR
|
||||||
* LALR(8) (not yet implemented)
|
* LALR
|
||||||
* Regular expressions
|
* Regular expressions
|
||||||
* Language bindings:
|
* Language bindings:
|
||||||
* C++ (not yet implemented)
|
* C++ (not yet implemented)
|
||||||
|
|
@ -28,7 +28,7 @@ Features
|
||||||
Installing
|
Installing
|
||||||
==========
|
==========
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
* make
|
* SCons
|
||||||
* a JDK
|
* a JDK
|
||||||
|
|
||||||
### Optional Dependencies
|
### Optional Dependencies
|
||||||
|
|
@ -36,11 +36,15 @@ Installing
|
||||||
* glib-2.0 (>= 2.29) (for `make test`)
|
* glib-2.0 (>= 2.29) (for `make test`)
|
||||||
* glib-2.0-dev (for `make test`)
|
* glib-2.0-dev (for `make test`)
|
||||||
|
|
||||||
To install, type `make`. To run the built-in test suite, type `make test`.
|
To build, type `scons`. To run the built-in test suite, type `scons test`. For a debug build, add `--variant=debug`
|
||||||
|
|
||||||
If jni.h and jni_md.h aren't already somewhere on your include path, prepend `C_INCLUDE_PATH=/path/to/jdk/include` to that.
|
If jni.h and jni_md.h aren't already somewhere on your include path, prepend
|
||||||
|
`C_INCLUDE_PATH=/path/to/jdk/include` to that.
|
||||||
|
|
||||||
There is not currently a `make install` target; to make Hammer available system-wide, copy `libhammer.a` to `/usr/lib/` (or `/usr/local/lib/`, or wherever ld will find it) and `hammer.h` to `/usr/include/`.
|
There is currently no `install` target; to make Hammer available system-wide,
|
||||||
|
copy `libhammer.a` and `libhammer.so` from `build/opt/src` to `/usr/lib/` (or
|
||||||
|
`/usr/local/lib/`, or wherever ld will find it) and `hammer.h` to
|
||||||
|
`/usr/include/`.
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
=====
|
=====
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ AddOption("--variant",
|
||||||
dest="variant",
|
dest="variant",
|
||||||
nargs=1, type="choice",
|
nargs=1, type="choice",
|
||||||
choices=["debug", "opt"],
|
choices=["debug", "opt"],
|
||||||
default="debug",
|
default="opt",
|
||||||
action="store",
|
action="store",
|
||||||
help="Build variant (debug or opt)")
|
help="Build variant (debug or opt)")
|
||||||
|
|
||||||
|
|
@ -28,3 +28,5 @@ Export('env')
|
||||||
|
|
||||||
env.SConscript(["src/SConscript"], variant_dir='build/$VARIANT/src')
|
env.SConscript(["src/SConscript"], variant_dir='build/$VARIANT/src')
|
||||||
env.SConscript(["examples/SConscript"], variant_dir='build/$VARIANT/examples')
|
env.SConscript(["examples/SConscript"], variant_dir='build/$VARIANT/examples')
|
||||||
|
|
||||||
|
env.Command('test', 'build/$VARIANT/src/test_suite', 'env LD_LIBRARY_PATH=build/$VARIANT/src $SOURCE')
|
||||||
|
|
@ -1,3 +1,7 @@
|
||||||
|
ifneq ($(REALLY_USE_OBSOLETE_BUILD_SYSTEM),yes)
|
||||||
|
$(error This is the old build system. Use "scons" to build, or use $(MAKE) REALLY_USE_OBSOLETE_BUILD_SYSTEM=yes)
|
||||||
|
endif
|
||||||
|
|
||||||
# Check to make sure variables are properly set
|
# Check to make sure variables are properly set
|
||||||
ifeq ($(TOPLEVEL),)
|
ifeq ($(TOPLEVEL),)
|
||||||
$(error $$TOPLEVEL is unset)
|
$(error $$TOPLEVEL is unset)
|
||||||
|
|
|
||||||
|
|
@ -181,7 +181,7 @@ HParser* init_rdata(uint16_t type) {
|
||||||
parsers[16] = txt;
|
parsers[16] = txt;
|
||||||
|
|
||||||
// All parsers must consume their input exactly.
|
// All parsers must consume their input exactly.
|
||||||
for(uint16_t i; i<sizeof(parsers); i++) {
|
for(uint16_t i = 0; i<RDATA_TYPE_MAX+1; i++) {
|
||||||
if(parsers[i]) {
|
if(parsers[i]) {
|
||||||
parsers[i] = h_action(h_sequence(parsers[i], h_end_p(), NULL),
|
parsers[i] = h_action(h_sequence(parsers[i], h_end_p(), NULL),
|
||||||
act_index0);
|
act_index0);
|
||||||
|
|
|
||||||
|
|
@ -27,8 +27,10 @@ PARSERS := \
|
||||||
|
|
||||||
BACKENDS := \
|
BACKENDS := \
|
||||||
packrat \
|
packrat \
|
||||||
|
regex \
|
||||||
llk \
|
llk \
|
||||||
regex
|
lalr \
|
||||||
|
glr
|
||||||
|
|
||||||
HAMMER_PARTS := \
|
HAMMER_PARTS := \
|
||||||
bitreader.o \
|
bitreader.o \
|
||||||
|
|
@ -42,6 +44,8 @@ HAMMER_PARTS := \
|
||||||
benchmark.o \
|
benchmark.o \
|
||||||
cfgrammar.o \
|
cfgrammar.o \
|
||||||
glue.o \
|
glue.o \
|
||||||
|
backends/lr.o \
|
||||||
|
backends/lr0.o \
|
||||||
$(PARSERS:%=parsers/%.o) \
|
$(PARSERS:%=parsers/%.o) \
|
||||||
$(BACKENDS:%=backends/%.o)
|
$(BACKENDS:%=backends/%.o)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
# -*- python -*-
|
||||||
Import('env')
|
Import('env')
|
||||||
|
|
||||||
parsers = ['parsers/%s.c'%s for s in
|
parsers = ['parsers/%s.c'%s for s in
|
||||||
|
|
@ -27,7 +28,7 @@ parsers = ['parsers/%s.c'%s for s in
|
||||||
'xor']]
|
'xor']]
|
||||||
|
|
||||||
backends = ['backends/%s.c' % s for s in
|
backends = ['backends/%s.c' % s for s in
|
||||||
['packrat', 'llk', 'regex']]
|
['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']]
|
||||||
|
|
||||||
misc_hammer_parts = [
|
misc_hammer_parts = [
|
||||||
'allocator.c',
|
'allocator.c',
|
||||||
|
|
|
||||||
|
|
@ -65,10 +65,10 @@ void* h_arena_malloc(HArena *arena, size_t size) {
|
||||||
if (size <= arena->head->free) {
|
if (size <= arena->head->free) {
|
||||||
// fast path..
|
// fast path..
|
||||||
void* ret = arena->head->rest + arena->head->used;
|
void* ret = arena->head->rest + arena->head->used;
|
||||||
arena->used += size + 1;
|
arena->used += size;
|
||||||
arena->wasted -= size;
|
arena->wasted -= size;
|
||||||
arena->head->used += size + 1;
|
arena->head->used += size;
|
||||||
arena->head->free -= size + 1;
|
arena->head->free -= size;
|
||||||
return ret;
|
return ret;
|
||||||
} else if (size > arena->block_size) {
|
} else if (size > arena->block_size) {
|
||||||
// We need a new, dedicated block for it, because it won't fit in a standard sized one.
|
// We need a new, dedicated block for it, because it won't fit in a standard sized one.
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@ struct HCFStack_ {
|
||||||
int count;
|
int count;
|
||||||
int cap;
|
int cap;
|
||||||
HCFChoice *last_completed; // Last completed choice.
|
HCFChoice *last_completed; // Last completed choice.
|
||||||
|
// XXX is last_completed still needed?
|
||||||
|
HCFChoice *prealloc; // If not NULL, will be used for the outermost choice.
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef UNUSED
|
#ifndef UNUSED
|
||||||
|
|
@ -25,11 +27,13 @@ static HCFStack* h_cfstack_new(HAllocator *mm__) {
|
||||||
stack->count = 0;
|
stack->count = 0;
|
||||||
stack->cap = 4;
|
stack->cap = 4;
|
||||||
stack->stack = h_new(HCFChoice*, stack->cap);
|
stack->stack = h_new(HCFChoice*, stack->cap);
|
||||||
|
stack->prealloc = NULL;
|
||||||
return stack;
|
return stack;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void h_cfstack_free(HAllocator *mm__, HCFStack *stk__) UNUSED;
|
static void h_cfstack_free(HAllocator *mm__, HCFStack *stk__) UNUSED;
|
||||||
static void h_cfstack_free(HAllocator *mm__, HCFStack *stk__) {
|
static void h_cfstack_free(HAllocator *mm__, HCFStack *stk__) {
|
||||||
|
h_free(stk__->prealloc);
|
||||||
h_free(stk__->stack);
|
h_free(stk__->stack);
|
||||||
h_free(stk__);
|
h_free(stk__);
|
||||||
}
|
}
|
||||||
|
|
@ -56,7 +60,9 @@ static inline void h_cfstack_add_to_seq(HAllocator *mm__, HCFStack *stk__, HCFCh
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline HCFChoice* h_cfstack_new_choice_raw(HAllocator *mm__, HCFStack *stk__) {
|
static inline HCFChoice* h_cfstack_new_choice_raw(HAllocator *mm__, HCFStack *stk__) {
|
||||||
HCFChoice *ret = h_new(HCFChoice, 1);
|
HCFChoice *ret = stk__->prealloc? stk__->prealloc : h_new(HCFChoice, 1);
|
||||||
|
stk__->prealloc = NULL;
|
||||||
|
|
||||||
ret->reshape = NULL;
|
ret->reshape = NULL;
|
||||||
ret->action = NULL;
|
ret->action = NULL;
|
||||||
ret->pred = NULL;
|
ret->pred = NULL;
|
||||||
|
|
|
||||||
294
src/backends/glr.c
Normal file
294
src/backends/glr.c
Normal file
|
|
@ -0,0 +1,294 @@
|
||||||
|
#include <assert.h>
|
||||||
|
#include "lr.h"
|
||||||
|
|
||||||
|
static bool glr_step(HParseResult **result, HSlist *engines,
|
||||||
|
HLREngine *engine, const HLRAction *action);
|
||||||
|
|
||||||
|
|
||||||
|
/* GLR compilation (LALR w/o failing on conflict) */
|
||||||
|
|
||||||
|
int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
|
{
|
||||||
|
int result = h_lalr_compile(mm__, parser, params);
|
||||||
|
|
||||||
|
if(result == -1 && parser->backend_data) {
|
||||||
|
// table is there, just has conflicts? nevermind, that's okay.
|
||||||
|
result = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_glr_free(HParser *parser)
|
||||||
|
{
|
||||||
|
h_lalr_free(parser);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Merging engines (when they converge on the same state) */
|
||||||
|
|
||||||
|
static HLREngine *lrengine_merge(HLREngine *old, HLREngine *new)
|
||||||
|
{
|
||||||
|
HArena *arena = old->arena;
|
||||||
|
|
||||||
|
HLREngine *ret = h_arena_malloc(arena, sizeof(HLREngine));
|
||||||
|
|
||||||
|
assert(old->state == new->state);
|
||||||
|
assert(old->input.input == new->input.input);
|
||||||
|
|
||||||
|
*ret = *old;
|
||||||
|
ret->stack = h_slist_new(arena);
|
||||||
|
ret->merged[0] = old;
|
||||||
|
ret->merged[1] = new;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HSlist *demerge_stack(HSlistNode *bottom, HSlist *stack)
|
||||||
|
{
|
||||||
|
HArena *arena = stack->arena;
|
||||||
|
|
||||||
|
HSlist *ret = h_slist_new(arena);
|
||||||
|
|
||||||
|
// copy the stack from the top
|
||||||
|
HSlistNode **y = &ret->head;
|
||||||
|
for(HSlistNode *x=stack->head; x; x=x->next) {
|
||||||
|
HSlistNode *node = h_arena_malloc(arena, sizeof(HSlistNode));
|
||||||
|
node->elem = x->elem;
|
||||||
|
node->next = NULL;
|
||||||
|
*y = node;
|
||||||
|
y = &node->next;
|
||||||
|
}
|
||||||
|
*y = bottom; // attach the ancestor stack
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline HLREngine *respawn(HLREngine *eng, HSlist *stack)
|
||||||
|
{
|
||||||
|
// NB: this can be a destructive update because an engine is not used for
|
||||||
|
// anything after it is merged.
|
||||||
|
eng->stack = demerge_stack(eng->stack->head, stack);
|
||||||
|
return eng;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HLREngine *
|
||||||
|
demerge(HParseResult **result, HSlist *engines,
|
||||||
|
HLREngine *engine, const HLRAction *action, size_t depth)
|
||||||
|
{
|
||||||
|
// no-op on engines that are not merged
|
||||||
|
if(!engine->merged[0])
|
||||||
|
return engine;
|
||||||
|
|
||||||
|
HSlistNode *p = engine->stack->head;
|
||||||
|
for(size_t i=0; i<depth; i++) {
|
||||||
|
// if stack hits bottom, respawn ancestors
|
||||||
|
if(p == NULL) {
|
||||||
|
HLREngine *a = respawn(engine->merged[0], engine->stack);
|
||||||
|
HLREngine *b = respawn(engine->merged[1], engine->stack);
|
||||||
|
|
||||||
|
// continue demerge until final depth reached
|
||||||
|
a = demerge(result, engines, a, action, depth-i);
|
||||||
|
b = demerge(result, engines, b, action, depth-i);
|
||||||
|
|
||||||
|
// step and stow one ancestor...
|
||||||
|
glr_step(result, engines, a, action);
|
||||||
|
|
||||||
|
// ...and return the other
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
p = p->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return engine; // there is enough stack before the merge point
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Forking engines (on conflicts */
|
||||||
|
|
||||||
|
HLREngine *fork_engine(const HLREngine *engine)
|
||||||
|
{
|
||||||
|
HLREngine *eng2 = h_arena_malloc(engine->tarena, sizeof(HLREngine));
|
||||||
|
eng2->table = engine->table;
|
||||||
|
eng2->state = engine->state;
|
||||||
|
eng2->input = engine->input;
|
||||||
|
|
||||||
|
// shallow-copy the stack
|
||||||
|
// this works because h_slist_push and h_slist_drop never modify
|
||||||
|
// the underlying structure of HSlistNodes, only the head pointer.
|
||||||
|
// in fact, this gives us prefix sharing for free.
|
||||||
|
eng2->stack = h_arena_malloc(engine->tarena, sizeof(HSlist));
|
||||||
|
*eng2->stack = *engine->stack;
|
||||||
|
|
||||||
|
eng2->arena = engine->arena;
|
||||||
|
eng2->tarena = engine->tarena;
|
||||||
|
return eng2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const HLRAction *
|
||||||
|
handle_conflict(HParseResult **result, HSlist *engines,
|
||||||
|
const HLREngine *engine, const HSlist *branches)
|
||||||
|
{
|
||||||
|
// there should be at least two conflicting actions
|
||||||
|
assert(branches->head);
|
||||||
|
assert(branches->head->next); // this is just a consistency check
|
||||||
|
|
||||||
|
// fork a new engine for all but the first action
|
||||||
|
for(HSlistNode *x=branches->head->next; x; x=x->next) {
|
||||||
|
HLRAction *act = x->elem;
|
||||||
|
HLREngine *eng = fork_engine(engine);
|
||||||
|
|
||||||
|
// perform one step and add to engines
|
||||||
|
glr_step(result, engines, eng, act);
|
||||||
|
}
|
||||||
|
|
||||||
|
// return first action for use with original engine
|
||||||
|
return branches->head->elem;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* GLR driver */
|
||||||
|
|
||||||
|
static bool glr_step(HParseResult **result, HSlist *engines,
|
||||||
|
HLREngine *engine, const HLRAction *action)
|
||||||
|
{
|
||||||
|
// handle forks and demerges (~> spawn engines)
|
||||||
|
if(action) {
|
||||||
|
if(action->type == HLR_CONFLICT) {
|
||||||
|
// fork engine on conflicts
|
||||||
|
action = handle_conflict(result, engines, engine, action->branches);
|
||||||
|
} else if(action->type == HLR_REDUCE) {
|
||||||
|
// demerge/respawn as needed
|
||||||
|
size_t depth = action->production.length;
|
||||||
|
engine = demerge(result, engines, engine, action, depth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool run = h_lrengine_step(engine, action);
|
||||||
|
|
||||||
|
if(run) {
|
||||||
|
// store engine in the list, merge if necessary
|
||||||
|
HSlistNode *x;
|
||||||
|
for(x=engines->head; x; x=x->next) {
|
||||||
|
HLREngine *eng = x->elem;
|
||||||
|
if(eng->state == engine->state) {
|
||||||
|
x->elem = lrengine_merge(eng, engine);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(!x) // no merge happened
|
||||||
|
h_slist_push(engines, engine);
|
||||||
|
} else if(engine->state == HLR_SUCCESS) {
|
||||||
|
// save the result
|
||||||
|
*result = h_lrengine_result(engine);
|
||||||
|
}
|
||||||
|
|
||||||
|
return run;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
|
||||||
|
{
|
||||||
|
HLRTable *table = parser->backend_data;
|
||||||
|
if(!table)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
HArena *arena = h_new_arena(mm__, 0); // will hold the results
|
||||||
|
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
|
||||||
|
|
||||||
|
// allocate engine lists (will hold one engine per state)
|
||||||
|
// these are swapped each iteration
|
||||||
|
HSlist *engines = h_slist_new(tarena);
|
||||||
|
HSlist *engback = h_slist_new(tarena);
|
||||||
|
|
||||||
|
// create initial engine
|
||||||
|
h_slist_push(engines, h_lrengine_new(arena, tarena, table, stream));
|
||||||
|
|
||||||
|
HParseResult *result = NULL;
|
||||||
|
while(result == NULL && !h_slist_empty(engines)) {
|
||||||
|
assert(h_slist_empty(engback));
|
||||||
|
|
||||||
|
// step all engines
|
||||||
|
while(!h_slist_empty(engines)) {
|
||||||
|
HLREngine *engine = h_slist_pop(engines);
|
||||||
|
const HLRAction *action = h_lrengine_action(engine);
|
||||||
|
glr_step(&result, engback, engine, action);
|
||||||
|
}
|
||||||
|
|
||||||
|
// swap the lists
|
||||||
|
HSlist *tmp = engines;
|
||||||
|
engines = engback;
|
||||||
|
engback = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!result)
|
||||||
|
h_delete_arena(arena);
|
||||||
|
h_delete_arena(tarena);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
HParserBackendVTable h__glr_backend_vtable = {
|
||||||
|
.compile = h_glr_compile,
|
||||||
|
.parse = h_glr_parse,
|
||||||
|
.free = h_glr_free
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// dummy!
|
||||||
|
int test_glr(void)
|
||||||
|
{
|
||||||
|
HAllocator *mm__ = &system_allocator;
|
||||||
|
|
||||||
|
/*
|
||||||
|
E -> E '+' E
|
||||||
|
| 'd'
|
||||||
|
*/
|
||||||
|
|
||||||
|
HParser *d = h_ch('d');
|
||||||
|
HParser *E = h_indirect();
|
||||||
|
HParser *E_ = h_choice(h_sequence(E, h_ch('+'), E, NULL), d, NULL);
|
||||||
|
h_bind_indirect(E, E_);
|
||||||
|
HParser *p = E;
|
||||||
|
|
||||||
|
printf("\n==== G R A M M A R ====\n");
|
||||||
|
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
|
||||||
|
if(g == NULL) {
|
||||||
|
fprintf(stderr, "h_cfgrammar failed\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
h_pprint_grammar(stdout, g, 0);
|
||||||
|
|
||||||
|
printf("\n==== D F A ====\n");
|
||||||
|
HLRDFA *dfa = h_lr0_dfa(g);
|
||||||
|
if(dfa)
|
||||||
|
h_pprint_lrdfa(stdout, g, dfa, 0);
|
||||||
|
else
|
||||||
|
fprintf(stderr, "h_lalr_dfa failed\n");
|
||||||
|
|
||||||
|
printf("\n==== L R ( 0 ) T A B L E ====\n");
|
||||||
|
HLRTable *table0 = h_lr0_table(g, dfa);
|
||||||
|
if(table0)
|
||||||
|
h_pprint_lrtable(stdout, g, table0, 0);
|
||||||
|
else
|
||||||
|
fprintf(stderr, "h_lr0_table failed\n");
|
||||||
|
h_lrtable_free(table0);
|
||||||
|
|
||||||
|
printf("\n==== L A L R T A B L E ====\n");
|
||||||
|
if(h_compile(p, PB_GLR, NULL)) {
|
||||||
|
fprintf(stderr, "does not compile\n");
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0);
|
||||||
|
|
||||||
|
printf("\n==== P A R S E R E S U L T ====\n");
|
||||||
|
HParseResult *res = h_parse(p, (uint8_t *)"d+d+d", 5);
|
||||||
|
if(res)
|
||||||
|
h_pprint(stdout, res->ast, 0, 2);
|
||||||
|
else
|
||||||
|
printf("no parse\n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
389
src/backends/lalr.c
Normal file
389
src/backends/lalr.c
Normal file
|
|
@ -0,0 +1,389 @@
|
||||||
|
#include <assert.h>
|
||||||
|
#include "contextfree.h"
|
||||||
|
#include "lr.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* LALR-via-SLR grammar transformation */
|
||||||
|
|
||||||
|
static inline size_t seqsize(void *p_)
|
||||||
|
{
|
||||||
|
size_t n=0;
|
||||||
|
for(void **p=p_; *p; p++) n++;
|
||||||
|
return n+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HLRAction *
|
||||||
|
lrtable_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol)
|
||||||
|
{
|
||||||
|
switch(symbol->type) {
|
||||||
|
case HCF_END:
|
||||||
|
return table->tmap[state]->end_branch;
|
||||||
|
case HCF_CHAR:
|
||||||
|
return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false);
|
||||||
|
default:
|
||||||
|
// nonterminal case
|
||||||
|
return h_hashtable_get(table->ntmap[state], symbol);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
|
||||||
|
{
|
||||||
|
HLRAction *action = lrtable_lookup(table, x, A);
|
||||||
|
assert(action != NULL);
|
||||||
|
assert(action->type == HLR_SHIFT);
|
||||||
|
return action->nextstate;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline HLRTransition *transition(HArena *arena,
|
||||||
|
size_t x, const HCFChoice *A, size_t y)
|
||||||
|
{
|
||||||
|
HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition));
|
||||||
|
t->from = x;
|
||||||
|
t->symbol = A;
|
||||||
|
t->to = y;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
// no-op on terminal symbols
|
||||||
|
static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
|
||||||
|
size_t x, HCFChoice *xAy)
|
||||||
|
{
|
||||||
|
if(xAy->type != HCF_CHOICE)
|
||||||
|
return;
|
||||||
|
// XXX CHARSET?
|
||||||
|
|
||||||
|
HArena *arena = eg->arena;
|
||||||
|
|
||||||
|
HCFSequence **seq = h_arena_malloc(arena, seqsize(xAy->seq)
|
||||||
|
* sizeof(HCFSequence *));
|
||||||
|
HCFSequence **p, **q;
|
||||||
|
for(p=xAy->seq, q=seq; *p; p++, q++) {
|
||||||
|
// trace rhs starting in state x and following the transitions
|
||||||
|
// xAy -> ... iBj ...
|
||||||
|
|
||||||
|
size_t i = x;
|
||||||
|
HCFChoice **B = (*p)->items;
|
||||||
|
HCFChoice **items = h_arena_malloc(arena, seqsize(B) * sizeof(HCFChoice *));
|
||||||
|
HCFChoice **iBj = items;
|
||||||
|
for(; *B; B++, iBj++) {
|
||||||
|
size_t j = follow_transition(table, i, *B);
|
||||||
|
HLRTransition *i_B_j = transition(arena, i, *B, j);
|
||||||
|
*iBj = h_hashtable_get(eg->tmap, i_B_j);
|
||||||
|
assert(*iBj != NULL);
|
||||||
|
i = j;
|
||||||
|
}
|
||||||
|
*iBj = NULL;
|
||||||
|
|
||||||
|
*q = h_arena_malloc(arena, sizeof(HCFSequence));
|
||||||
|
(*q)->items = items;
|
||||||
|
}
|
||||||
|
*q = NULL;
|
||||||
|
xAy->seq = seq;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym)
|
||||||
|
{
|
||||||
|
HArena *arena = eg->arena;
|
||||||
|
HCFChoice *esym = h_arena_malloc(arena, sizeof(HCFChoice));
|
||||||
|
*esym = *sym;
|
||||||
|
|
||||||
|
HHashSet *cs = h_hashtable_get(eg->corr, sym);
|
||||||
|
if(!cs) {
|
||||||
|
cs = h_hashset_new(arena, h_eq_symbol, h_hash_symbol);
|
||||||
|
h_hashtable_put(eg->corr, sym, cs);
|
||||||
|
}
|
||||||
|
h_hashset_put(cs, esym);
|
||||||
|
|
||||||
|
return esym;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HLREnhGrammar *enhance_grammar(const HCFGrammar *g, const HLRDFA *dfa,
|
||||||
|
const HLRTable *table)
|
||||||
|
{
|
||||||
|
HAllocator *mm__ = g->mm__;
|
||||||
|
HArena *arena = g->arena;
|
||||||
|
|
||||||
|
HLREnhGrammar *eg = h_arena_malloc(arena, sizeof(HLREnhGrammar));
|
||||||
|
eg->tmap = h_hashtable_new(arena, h_eq_transition, h_hash_transition);
|
||||||
|
eg->smap = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr);
|
||||||
|
eg->corr = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
|
||||||
|
// XXX must use h_eq/hash_ptr for symbols! so enhanced CHARs are different
|
||||||
|
eg->arena = arena;
|
||||||
|
|
||||||
|
// establish mapping between transitions and symbols
|
||||||
|
for(HSlistNode *x=dfa->transitions->head; x; x=x->next) {
|
||||||
|
HLRTransition *t = x->elem;
|
||||||
|
|
||||||
|
assert(!h_hashtable_present(eg->tmap, t));
|
||||||
|
|
||||||
|
HCFChoice *sym = new_enhanced_symbol(eg, t->symbol);
|
||||||
|
h_hashtable_put(eg->tmap, t, sym);
|
||||||
|
h_hashtable_put(eg->smap, sym, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// transform the productions
|
||||||
|
H_FOREACH(eg->tmap, HLRTransition *t, HCFChoice *sym)
|
||||||
|
transform_productions(table, eg, t->from, sym);
|
||||||
|
H_END_FOREACH
|
||||||
|
|
||||||
|
// add the start symbol
|
||||||
|
HCFChoice *start = new_enhanced_symbol(eg, g->start);
|
||||||
|
transform_productions(table, eg, 0, start);
|
||||||
|
|
||||||
|
eg->grammar = h_cfgrammar_(mm__, start);
|
||||||
|
return eg;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* LALR table generation */
|
||||||
|
|
||||||
|
static inline bool has_conflicts(HLRTable *table)
|
||||||
|
{
|
||||||
|
return !h_slist_empty(table->inadeq);
|
||||||
|
}
|
||||||
|
|
||||||
|
// for each lookahead symbol (fs), put action into tmap
|
||||||
|
// returns 0 on success, -1 on conflict
|
||||||
|
// ignores forall entries
|
||||||
|
static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *action)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if(fs->epsilon_branch) {
|
||||||
|
HLRAction *prev = tmap->epsilon_branch;
|
||||||
|
if(prev && prev != action) {
|
||||||
|
// conflict
|
||||||
|
tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action);
|
||||||
|
ret = -1;
|
||||||
|
} else {
|
||||||
|
tmap->epsilon_branch = action;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(fs->end_branch) {
|
||||||
|
HLRAction *prev = tmap->end_branch;
|
||||||
|
if(prev && prev != action) {
|
||||||
|
// conflict
|
||||||
|
tmap->end_branch = h_lr_conflict(tmap->arena, prev, action);
|
||||||
|
ret = -1;
|
||||||
|
} else {
|
||||||
|
tmap->end_branch = action;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
H_FOREACH(fs->char_branches, void *key, HStringMap *fs_)
|
||||||
|
HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key);
|
||||||
|
|
||||||
|
if(!tmap_) {
|
||||||
|
tmap_ = h_stringmap_new(tmap->arena);
|
||||||
|
h_hashtable_put(tmap->char_branches, key, tmap_);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(terminals_put(tmap_, fs_, action) < 0)
|
||||||
|
ret = -1;
|
||||||
|
H_END_FOREACH
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check whether a sequence of enhanced-grammar symbols (p) matches the given
|
||||||
|
// (original-grammar) production rhs and terminates in the given end state.
|
||||||
|
static bool match_production(HLREnhGrammar *eg, HCFChoice **p,
|
||||||
|
HCFChoice **rhs, size_t endstate)
|
||||||
|
{
|
||||||
|
size_t state = endstate; // initialized to end in case of empty rhs
|
||||||
|
for(; *p && *rhs; p++, rhs++) {
|
||||||
|
HLRTransition *t = h_hashtable_get(eg->smap, *p);
|
||||||
|
assert(t != NULL);
|
||||||
|
if(!h_eq_symbol(t->symbol, *rhs))
|
||||||
|
return false;
|
||||||
|
state = t->to;
|
||||||
|
}
|
||||||
|
return (*p == *rhs // both NULL
|
||||||
|
&& state == endstate);
|
||||||
|
}
|
||||||
|
|
||||||
|
// desugar parser with a fresh start symbol
|
||||||
|
// this guarantees that the start symbol will not occur in any productions
|
||||||
|
HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser)
|
||||||
|
{
|
||||||
|
HCFChoice *augmented = h_new(HCFChoice, 1);
|
||||||
|
|
||||||
|
HCFStack *stk__ = h_cfstack_new(mm__);
|
||||||
|
stk__->prealloc = augmented;
|
||||||
|
HCFS_BEGIN_CHOICE() {
|
||||||
|
HCFS_BEGIN_SEQ() {
|
||||||
|
HCFS_DESUGAR(parser);
|
||||||
|
} HCFS_END_SEQ();
|
||||||
|
HCFS_THIS_CHOICE->reshape = h_act_first;
|
||||||
|
} HCFS_END_CHOICE();
|
||||||
|
h_cfstack_free(mm__, stk__);
|
||||||
|
|
||||||
|
return augmented;
|
||||||
|
}
|
||||||
|
|
||||||
|
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
|
{
|
||||||
|
// generate (augmented) CFG from parser
|
||||||
|
// construct LR(0) DFA
|
||||||
|
// build LR(0) table
|
||||||
|
// if necessary, resolve conflicts "by conversion to SLR"
|
||||||
|
|
||||||
|
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser));
|
||||||
|
if(g == NULL) // backend not suitable (language not context-free)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
HLRDFA *dfa = h_lr0_dfa(g);
|
||||||
|
if(dfa == NULL) { // this should normally not happen
|
||||||
|
h_cfgrammar_free(g);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
HLRTable *table = h_lr0_table(g, dfa);
|
||||||
|
if(table == NULL) { // this should normally not happen
|
||||||
|
h_cfgrammar_free(g);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(has_conflicts(table)) {
|
||||||
|
HArena *arena = table->arena;
|
||||||
|
|
||||||
|
HLREnhGrammar *eg = enhance_grammar(g, dfa, table);
|
||||||
|
if(eg == NULL) { // this should normally not happen
|
||||||
|
h_cfgrammar_free(g);
|
||||||
|
h_lrtable_free(table);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// go through the inadequate states; replace inadeq with a new list
|
||||||
|
HSlist *inadeq = table->inadeq;
|
||||||
|
table->inadeq = h_slist_new(arena);
|
||||||
|
|
||||||
|
for(HSlistNode *x=inadeq->head; x; x=x->next) {
|
||||||
|
size_t state = (uintptr_t)x->elem;
|
||||||
|
bool inadeq = false;
|
||||||
|
|
||||||
|
// clear old forall entry, it's being replaced by more fine-grained ones
|
||||||
|
table->forall[state] = NULL;
|
||||||
|
|
||||||
|
// go through each reducible item of state
|
||||||
|
H_FOREACH_KEY(dfa->states[state], HLRItem *item)
|
||||||
|
if(item->mark < item->len)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// action to place in the table cells indicated by lookahead
|
||||||
|
HLRAction *action = h_reduce_action(arena, item);
|
||||||
|
|
||||||
|
// find all LR(0)-enhanced productions matching item
|
||||||
|
HHashSet *lhss = h_hashtable_get(eg->corr, item->lhs);
|
||||||
|
assert(lhss != NULL);
|
||||||
|
H_FOREACH_KEY(lhss, HCFChoice *lhs)
|
||||||
|
assert(lhs->type == HCF_CHOICE); // XXX could be CHARSET?
|
||||||
|
|
||||||
|
for(HCFSequence **p=lhs->seq; *p; p++) {
|
||||||
|
HCFChoice **rhs = (*p)->items;
|
||||||
|
if(!match_production(eg, rhs, item->rhs, state))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// the left-hand symbol's follow set is this production's
|
||||||
|
// contribution to the lookahead
|
||||||
|
const HStringMap *fs = h_follow(1, eg->grammar, lhs);
|
||||||
|
assert(fs != NULL);
|
||||||
|
assert(fs->epsilon_branch == NULL);
|
||||||
|
assert(!h_stringmap_empty(fs));
|
||||||
|
|
||||||
|
// for each lookahead symbol, put action into table cell
|
||||||
|
if(terminals_put(table->tmap[state], fs, action) < 0)
|
||||||
|
inadeq = true;
|
||||||
|
} H_END_FOREACH // enhanced production
|
||||||
|
H_END_FOREACH // reducible item
|
||||||
|
|
||||||
|
if(inadeq)
|
||||||
|
h_slist_push(table->inadeq, (void *)(uintptr_t)state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
h_cfgrammar_free(g);
|
||||||
|
parser->backend_data = table;
|
||||||
|
return has_conflicts(table)? -1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_lalr_free(HParser *parser)
|
||||||
|
{
|
||||||
|
HLRTable *table = parser->backend_data;
|
||||||
|
h_lrtable_free(table);
|
||||||
|
parser->backend_data = NULL;
|
||||||
|
parser->backend = PB_PACKRAT;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
HParserBackendVTable h__lalr_backend_vtable = {
|
||||||
|
.compile = h_lalr_compile,
|
||||||
|
.parse = h_lr_parse,
|
||||||
|
.free = h_lalr_free
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// dummy!
|
||||||
|
int test_lalr(void)
|
||||||
|
{
|
||||||
|
HAllocator *mm__ = &system_allocator;
|
||||||
|
|
||||||
|
/*
|
||||||
|
E -> E '-' T
|
||||||
|
| T
|
||||||
|
T -> '(' E ')'
|
||||||
|
| 'n' -- also try [0-9] for the charset paths
|
||||||
|
*/
|
||||||
|
|
||||||
|
HParser *n = h_ch('n');
|
||||||
|
HParser *E = h_indirect();
|
||||||
|
HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL);
|
||||||
|
HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL);
|
||||||
|
h_bind_indirect(E, E_);
|
||||||
|
HParser *p = E;
|
||||||
|
|
||||||
|
printf("\n==== G R A M M A R ====\n");
|
||||||
|
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
|
||||||
|
if(g == NULL) {
|
||||||
|
fprintf(stderr, "h_cfgrammar failed\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
h_pprint_grammar(stdout, g, 0);
|
||||||
|
|
||||||
|
printf("\n==== D F A ====\n");
|
||||||
|
HLRDFA *dfa = h_lr0_dfa(g);
|
||||||
|
if(dfa)
|
||||||
|
h_pprint_lrdfa(stdout, g, dfa, 0);
|
||||||
|
else
|
||||||
|
fprintf(stderr, "h_lalr_dfa failed\n");
|
||||||
|
|
||||||
|
printf("\n==== L R ( 0 ) T A B L E ====\n");
|
||||||
|
HLRTable *table0 = h_lr0_table(g, dfa);
|
||||||
|
if(table0)
|
||||||
|
h_pprint_lrtable(stdout, g, table0, 0);
|
||||||
|
else
|
||||||
|
fprintf(stderr, "h_lr0_table failed\n");
|
||||||
|
h_lrtable_free(table0);
|
||||||
|
|
||||||
|
printf("\n==== L A L R T A B L E ====\n");
|
||||||
|
if(h_compile(p, PB_LALR, NULL)) {
|
||||||
|
fprintf(stderr, "does not compile\n");
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0);
|
||||||
|
|
||||||
|
printf("\n==== P A R S E R E S U L T ====\n");
|
||||||
|
HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
|
||||||
|
if(res)
|
||||||
|
h_pprint(stdout, res->ast, 0, 2);
|
||||||
|
else
|
||||||
|
printf("no parse\n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -3,13 +3,13 @@
|
||||||
#include "../cfgrammar.h"
|
#include "../cfgrammar.h"
|
||||||
#include "../parsers/parser_internal.h"
|
#include "../parsers/parser_internal.h"
|
||||||
|
|
||||||
// XXX despite the names, this is all LL(1) right now. TODO
|
static const size_t DEFAULT_KMAX = 1;
|
||||||
|
|
||||||
|
|
||||||
/* Generating the LL(k) parse table */
|
/* Generating the LL(k) parse table */
|
||||||
|
|
||||||
/* Maps each nonterminal (HCFChoice) of the grammar to another hash table that
|
/* Maps each nonterminal (HCFChoice) of the grammar to a HStringMap that
|
||||||
* maps lookahead tokens (HCFToken) to productions (HCFSequence).
|
* maps lookahead strings to productions (HCFSequence).
|
||||||
*/
|
*/
|
||||||
typedef struct HLLkTable_ {
|
typedef struct HLLkTable_ {
|
||||||
HHashTable *rows;
|
HHashTable *rows;
|
||||||
|
|
@ -19,29 +19,17 @@ typedef struct HLLkTable_ {
|
||||||
} HLLkTable;
|
} HLLkTable;
|
||||||
|
|
||||||
|
|
||||||
// XXX adaptation to LL(1), to be removed
|
|
||||||
typedef HCharKey HCFToken;
|
|
||||||
static const HCFToken end_token = 0x200;
|
|
||||||
#define char_token char_key
|
|
||||||
|
|
||||||
/* Interface to look up an entry in the parse table. */
|
/* Interface to look up an entry in the parse table. */
|
||||||
const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x,
|
const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x,
|
||||||
HInputStream lookahead)
|
const HInputStream *stream)
|
||||||
{
|
{
|
||||||
// note the lookahead stream is passed by value, i.e. a copy.
|
const HStringMap *row = h_hashtable_get(table->rows, x);
|
||||||
// reading bits from it does not consume them from the real input.
|
|
||||||
HCFToken tok;
|
|
||||||
uint8_t c = h_read_bits(&lookahead, 8, false);
|
|
||||||
if(lookahead.overrun)
|
|
||||||
tok = end_token;
|
|
||||||
else
|
|
||||||
tok = char_token(c);
|
|
||||||
|
|
||||||
const HHashTable *row = h_hashtable_get(table->rows, x);
|
|
||||||
assert(row != NULL); // the table should have one row for each nonterminal
|
assert(row != NULL); // the table should have one row for each nonterminal
|
||||||
|
|
||||||
const HCFSequence *production = h_hashtable_get(row, (void *)tok);
|
assert(!row->epsilon_branch); // would match without looking at the input
|
||||||
return production;
|
// XXX cases where this could be useful?
|
||||||
|
|
||||||
|
return h_stringmap_get_lookahead(row, *stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Allocate a new parse table. */
|
/* Allocate a new parse table. */
|
||||||
|
|
@ -72,58 +60,131 @@ void h_llktable_free(HLLkTable *table)
|
||||||
h_free(table);
|
h_free(table);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compute the predict set of production "A -> rhs". */
|
void *const CONFLICT = (void *)(uintptr_t)(-1);
|
||||||
HHashSet *h_predict(HCFGrammar *g, const HCFChoice *A, const HCFSequence *rhs)
|
|
||||||
|
// helper for stringmap_merge
|
||||||
|
static void *combine_entries(HHashSet *workset, void *dst, const void *src)
|
||||||
{
|
{
|
||||||
// predict(A -> rhs) = first(rhs) u follow(A) if "" can be derived from rhs
|
assert(dst != NULL);
|
||||||
// predict(A -> rhs) = first(rhs) otherwise
|
assert(src != NULL);
|
||||||
const HCFStringMap *first_rhs = h_first_seq(1, g, rhs->items);
|
|
||||||
const HCFStringMap *follow_A = h_follow(1, g, A);
|
|
||||||
HHashSet *ret = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
|
||||||
|
|
||||||
h_hashset_put_all(ret, first_rhs->char_branches);
|
if(dst == CONFLICT) { // previous conflict
|
||||||
if(first_rhs->end_branch)
|
h_hashset_put(workset, src);
|
||||||
h_hashset_put(ret, (void *)end_token);
|
} else if(dst != src) { // new conflict
|
||||||
|
h_hashset_put(workset, dst);
|
||||||
if(h_derives_epsilon_seq(g, rhs->items)) {
|
h_hashset_put(workset, src);
|
||||||
h_hashset_put_all(ret, follow_A->char_branches);
|
dst = CONFLICT;
|
||||||
if(follow_A->end_branch)
|
|
||||||
h_hashset_put(ret, (void *)end_token);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generate entries for the production "A -> rhs" in the given table row. */
|
// add the mappings of src to dst, marking conflicts and adding the conflicting
|
||||||
static
|
// values to workset.
|
||||||
int fill_table_row(HCFGrammar *g, HHashTable *row,
|
// note: reuses parts of src to build dst!
|
||||||
const HCFChoice *A, HCFSequence *rhs)
|
static void stringmap_merge(HHashSet *workset, HStringMap *dst, HStringMap *src)
|
||||||
{
|
{
|
||||||
// iterate over predict(A -> rhs)
|
if(src->epsilon_branch) {
|
||||||
HHashSet *pred = h_predict(g, A, rhs);
|
if(dst->epsilon_branch)
|
||||||
|
dst->epsilon_branch =
|
||||||
|
combine_entries(workset, dst->epsilon_branch, src->epsilon_branch);
|
||||||
|
else
|
||||||
|
dst->epsilon_branch = src->epsilon_branch;
|
||||||
|
} else {
|
||||||
|
// if there is a non-conflicting value on the left (dst) side, it means
|
||||||
|
// that prediction is already unambiguous. we can drop the right (src)
|
||||||
|
// side we were going to extend with.
|
||||||
|
if(dst->epsilon_branch && dst->epsilon_branch != CONFLICT)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
size_t i;
|
if(src->end_branch) {
|
||||||
HHashTableEntry *hte;
|
if(dst->end_branch)
|
||||||
for(i=0; i < pred->capacity; i++) {
|
dst->end_branch =
|
||||||
for(hte = &pred->contents[i]; hte; hte = hte->next) {
|
combine_entries(workset, dst->end_branch, src->end_branch);
|
||||||
|
else
|
||||||
|
dst->end_branch = src->end_branch;
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate over src->char_branches
|
||||||
|
const HHashTable *ht = src->char_branches;
|
||||||
|
for(size_t i=0; i < ht->capacity; i++) {
|
||||||
|
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if(hte->key == NULL)
|
||||||
continue;
|
continue;
|
||||||
HCFToken x = (uintptr_t)hte->key;
|
|
||||||
|
|
||||||
if(h_hashtable_present(row, (void *)x))
|
HCharKey c = (HCharKey)hte->key;
|
||||||
return -1; // table would be ambiguous
|
HStringMap *src_ = hte->value;
|
||||||
|
|
||||||
h_hashtable_put(row, (void *)x, rhs);
|
if(src_) {
|
||||||
|
HStringMap *dst_ = h_hashtable_get(dst->char_branches, (void *)c);
|
||||||
|
if(dst_)
|
||||||
|
stringmap_merge(workset, dst_, src_);
|
||||||
|
else
|
||||||
|
h_hashtable_put(dst->char_branches, (void *)c, src_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Generate entries for the productions of A in the given table row. */
|
||||||
|
static int fill_table_row(size_t kmax, HCFGrammar *g, HStringMap *row,
|
||||||
|
const HCFChoice *A)
|
||||||
|
{
|
||||||
|
HHashSet *workset;
|
||||||
|
|
||||||
|
// initialize working set to the productions of A
|
||||||
|
workset = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
|
for(HCFSequence **s = A->seq; *s; s++)
|
||||||
|
h_hashset_put(workset, *s);
|
||||||
|
|
||||||
|
// run until workset exhausted or kmax hit
|
||||||
|
size_t k;
|
||||||
|
for(k=1; k<=kmax; k++) {
|
||||||
|
// allocate a fresh workset for the next round
|
||||||
|
HHashSet *nextset = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
|
|
||||||
|
// iterate over the productions in workset...
|
||||||
|
const HHashTable *ht = workset;
|
||||||
|
for(size_t i=0; i < ht->capacity; i++) {
|
||||||
|
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||||
|
if(hte->key == NULL)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
HCFSequence *rhs = (void *)hte->key;
|
||||||
|
assert(rhs != NULL);
|
||||||
|
assert(rhs != CONFLICT); // just to be sure there's no mixup
|
||||||
|
|
||||||
|
// calculate predict set; let values map to rhs
|
||||||
|
HStringMap *pred = h_predict(k, g, A, rhs);
|
||||||
|
h_stringmap_replace(pred, NULL, rhs);
|
||||||
|
|
||||||
|
// merge predict set into the row
|
||||||
|
// accumulates conflicts in new workset
|
||||||
|
stringmap_merge(nextset, row, pred);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
// switch to the updated workset
|
||||||
|
h_hashset_free(workset);
|
||||||
|
workset = nextset;
|
||||||
|
|
||||||
|
// if the workset is empty, row is without conflict; we're done
|
||||||
|
if(h_hashset_empty(workset))
|
||||||
|
break;
|
||||||
|
|
||||||
|
// clear conflict markers for next iteration
|
||||||
|
h_stringmap_replace(row, CONFLICT, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
h_hashset_free(workset);
|
||||||
|
return (k>kmax)? -1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generate the LL(k) parse table from the given grammar.
|
/* Generate the LL(k) parse table from the given grammar.
|
||||||
* Returns -1 on error, 0 on success.
|
* Returns -1 on error, 0 on success.
|
||||||
*/
|
*/
|
||||||
static int fill_table(HCFGrammar *g, HLLkTable *table)
|
static int fill_table(size_t kmax, HCFGrammar *g, HLLkTable *table)
|
||||||
{
|
{
|
||||||
table->start = g->start;
|
table->start = g->start;
|
||||||
|
|
||||||
|
|
@ -138,17 +199,13 @@ static int fill_table(HCFGrammar *g, HLLkTable *table)
|
||||||
assert(a->type == HCF_CHOICE);
|
assert(a->type == HCF_CHOICE);
|
||||||
|
|
||||||
// create table row for this nonterminal
|
// create table row for this nonterminal
|
||||||
HHashTable *row = h_hashtable_new(table->arena, h_eq_ptr, h_hash_ptr);
|
HStringMap *row = h_stringmap_new(table->arena);
|
||||||
h_hashtable_put(table->rows, a, row);
|
h_hashtable_put(table->rows, a, row);
|
||||||
|
|
||||||
// iterate over a's productions
|
if(fill_table_row(kmax, g, row, a) < 0) {
|
||||||
HCFSequence **s;
|
// unresolvable conflicts in row
|
||||||
for(s = a->seq; *s; s++) {
|
|
||||||
// record this production in row as appropriate
|
|
||||||
// this can signal an ambiguity conflict.
|
|
||||||
// NB we don't worry about deallocating anything, h_llk_compile will
|
// NB we don't worry about deallocating anything, h_llk_compile will
|
||||||
// delete the whole arena for us.
|
// delete the whole arena for us.
|
||||||
if(fill_table_row(g, row, a, *s) < 0)
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -159,6 +216,9 @@ static int fill_table(HCFGrammar *g, HLLkTable *table)
|
||||||
|
|
||||||
int h_llk_compile(HAllocator* mm__, HParser* parser, const void* params)
|
int h_llk_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
{
|
{
|
||||||
|
size_t kmax = params? (uintptr_t)params : DEFAULT_KMAX;
|
||||||
|
assert(kmax>0);
|
||||||
|
|
||||||
// Convert parser to a CFG. This can fail as indicated by a NULL return.
|
// Convert parser to a CFG. This can fail as indicated by a NULL return.
|
||||||
HCFGrammar *grammar = h_cfgrammar(mm__, parser);
|
HCFGrammar *grammar = h_cfgrammar(mm__, parser);
|
||||||
if(grammar == NULL)
|
if(grammar == NULL)
|
||||||
|
|
@ -170,7 +230,7 @@ int h_llk_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
|
|
||||||
// generate table and store in parser->backend_data.
|
// generate table and store in parser->backend_data.
|
||||||
HLLkTable *table = h_llktable_new(mm__);
|
HLLkTable *table = h_llktable_new(mm__);
|
||||||
if(fill_table(grammar, table) < 0) {
|
if(fill_table(kmax, grammar, table) < 0) {
|
||||||
// the table was ambiguous
|
// the table was ambiguous
|
||||||
h_cfgrammar_free(grammar);
|
h_cfgrammar_free(grammar);
|
||||||
h_llktable_free(table);
|
h_llktable_free(table);
|
||||||
|
|
@ -240,10 +300,13 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
seq = h_carray_new(arena);
|
seq = h_carray_new(arena);
|
||||||
|
|
||||||
// look up applicable production in parse table
|
// look up applicable production in parse table
|
||||||
const HCFSequence *p = h_llk_lookup(table, x, *stream);
|
const HCFSequence *p = h_llk_lookup(table, x, stream);
|
||||||
if(p == NULL)
|
if(p == NULL)
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
|
|
||||||
|
// an infinite loop case that shouldn't happen
|
||||||
|
assert(!p->items[0] || p->items[0] != x);
|
||||||
|
|
||||||
// push production's rhs onto the stack (in reverse order)
|
// push production's rhs onto the stack (in reverse order)
|
||||||
HCFChoice **s;
|
HCFChoice **s;
|
||||||
for(s = p->items; *s; s++);
|
for(s = p->items; *s; s++);
|
||||||
|
|
@ -255,10 +318,12 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
|
|
||||||
// the top of stack is such that there will be a result...
|
// the top of stack is such that there will be a result...
|
||||||
HParsedToken *tok; // will hold result token
|
HParsedToken *tok; // will hold result token
|
||||||
|
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||||
|
tok->index = stream->index;
|
||||||
|
tok->bit_offset = stream->bit_offset;
|
||||||
if(x == mark) {
|
if(x == mark) {
|
||||||
// hit stack frame boundary...
|
// hit stack frame boundary...
|
||||||
// wrap the accumulated parse result, this sequence is finished
|
// wrap the accumulated parse result, this sequence is finished
|
||||||
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
|
||||||
tok->token_type = TT_SEQUENCE;
|
tok->token_type = TT_SEQUENCE;
|
||||||
tok->seq = seq;
|
tok->seq = seq;
|
||||||
|
|
||||||
|
|
@ -277,13 +342,13 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
case HCF_END:
|
case HCF_END:
|
||||||
if(!stream->overrun)
|
if(!stream->overrun)
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
|
h_arena_free(arena, tok);
|
||||||
tok = NULL;
|
tok = NULL;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case HCF_CHAR:
|
case HCF_CHAR:
|
||||||
if(input != x->chr)
|
if(input != x->chr)
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
|
||||||
tok->token_type = TT_UINT;
|
tok->token_type = TT_UINT;
|
||||||
tok->uint = x->chr;
|
tok->uint = x->chr;
|
||||||
break;
|
break;
|
||||||
|
|
@ -293,7 +358,6 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
if(!charset_isset(x->charset, input))
|
if(!charset_isset(x->charset, input))
|
||||||
goto no_parse;
|
goto no_parse;
|
||||||
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
|
||||||
tok->token_type = TT_UINT;
|
tok->token_type = TT_UINT;
|
||||||
tok->uint = input;
|
tok->uint = input;
|
||||||
break;
|
break;
|
||||||
|
|
@ -306,8 +370,6 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
|
||||||
|
|
||||||
// 'tok' has been parsed; process it
|
// 'tok' has been parsed; process it
|
||||||
|
|
||||||
// XXX set tok->index and tok->bit_offset (don't take directly from stream, cuz peek!)
|
|
||||||
|
|
||||||
// perform token reshape if indicated
|
// perform token reshape if indicated
|
||||||
if(x->reshape)
|
if(x->reshape)
|
||||||
tok = (HParsedToken *)x->reshape(make_result(arena, tok));
|
tok = (HParsedToken *)x->reshape(make_result(arena, tok));
|
||||||
|
|
@ -357,9 +419,11 @@ int test_llk(void)
|
||||||
Y -> y -- for k=3 use "yy"
|
Y -> y -- for k=3 use "yy"
|
||||||
*/
|
*/
|
||||||
|
|
||||||
HParser *c = h_many(h_ch('x'));
|
HParser *X = h_optional(h_ch('x'));
|
||||||
HParser *q = h_sequence(c, h_ch('y'), NULL);
|
HParser *Y = h_sequence(h_ch('y'), h_ch('y'), NULL);
|
||||||
HParser *p = h_choice(q, h_end_p(), NULL);
|
HParser *A = h_sequence(X, Y, h_ch('a'), NULL);
|
||||||
|
HParser *B = h_sequence(Y, h_ch('b'), NULL);
|
||||||
|
HParser *p = h_choice(A, B, NULL);
|
||||||
|
|
||||||
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
|
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
|
||||||
|
|
||||||
|
|
@ -372,13 +436,16 @@ int test_llk(void)
|
||||||
printf("derive epsilon: ");
|
printf("derive epsilon: ");
|
||||||
h_pprint_symbolset(stdout, g, g->geneps, 0);
|
h_pprint_symbolset(stdout, g, g->geneps, 0);
|
||||||
printf("first(A) = ");
|
printf("first(A) = ");
|
||||||
h_pprint_stringset(stdout, g, h_first(2, g, g->start), 0);
|
h_pprint_stringset(stdout, h_first(3, g, g->start), 0);
|
||||||
printf("follow(C) = ");
|
// printf("follow(C) = ");
|
||||||
h_pprint_stringset(stdout, g, h_follow(2, g, h_desugar(&system_allocator, NULL, c)), 0);
|
// h_pprint_stringset(stdout, h_follow(3, g, h_desugar(&system_allocator, NULL, c)), 0);
|
||||||
|
|
||||||
h_compile(p, PB_LLk, NULL);
|
if(h_compile(p, PB_LLk, (void *)3)) {
|
||||||
|
fprintf(stderr, "does not compile\n");
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
HParseResult *res = h_parse(p, (uint8_t *)"xxy", 3);
|
HParseResult *res = h_parse(p, (uint8_t *)"xyya", 4);
|
||||||
if(res)
|
if(res)
|
||||||
h_pprint(stdout, res->ast, 0, 2);
|
h_pprint(stdout, res->ast, 0, 2);
|
||||||
else
|
else
|
||||||
|
|
|
||||||
538
src/backends/lr.c
Normal file
538
src/backends/lr.c
Normal file
|
|
@ -0,0 +1,538 @@
|
||||||
|
#include <assert.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include "../parsers/parser_internal.h"
|
||||||
|
#include "lr.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* Comparison and hashing functions */
|
||||||
|
|
||||||
|
// compare symbols - terminals by value, others by pointer
|
||||||
|
bool h_eq_symbol(const void *p, const void *q)
|
||||||
|
{
|
||||||
|
const HCFChoice *x=p, *y=q;
|
||||||
|
return (x==y
|
||||||
|
|| (x->type==HCF_END && y->type==HCF_END)
|
||||||
|
|| (x->type==HCF_CHAR && y->type==HCF_CHAR && x->chr==y->chr));
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash symbols - terminals by value, others by pointer
|
||||||
|
HHashValue h_hash_symbol(const void *p)
|
||||||
|
{
|
||||||
|
const HCFChoice *x=p;
|
||||||
|
if(x->type == HCF_END)
|
||||||
|
return 0;
|
||||||
|
else if(x->type == HCF_CHAR)
|
||||||
|
return x->chr * 33;
|
||||||
|
else
|
||||||
|
return h_hash_ptr(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare LR items by value
|
||||||
|
static bool eq_lr_item(const void *p, const void *q)
|
||||||
|
{
|
||||||
|
const HLRItem *a=p, *b=q;
|
||||||
|
|
||||||
|
if(!h_eq_symbol(a->lhs, b->lhs)) return false;
|
||||||
|
if(a->mark != b->mark) return false;
|
||||||
|
if(a->len != b->len) return false;
|
||||||
|
|
||||||
|
for(size_t i=0; i<a->len; i++)
|
||||||
|
if(!h_eq_symbol(a->rhs[i], b->rhs[i])) return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash LALR items
|
||||||
|
static inline HHashValue hash_lr_item(const void *p)
|
||||||
|
{
|
||||||
|
const HLRItem *x = p;
|
||||||
|
HHashValue hash = 0;
|
||||||
|
|
||||||
|
hash += h_hash_symbol(x->lhs);
|
||||||
|
for(HCFChoice **p=x->rhs; *p; p++)
|
||||||
|
hash += h_hash_symbol(*p);
|
||||||
|
hash += x->mark;
|
||||||
|
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare item sets (DFA states)
|
||||||
|
bool h_eq_lr_itemset(const void *p, const void *q)
|
||||||
|
{
|
||||||
|
return h_hashset_equal(p, q);
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash LR item sets (DFA states) - hash the elements and sum
|
||||||
|
HHashValue h_hash_lr_itemset(const void *p)
|
||||||
|
{
|
||||||
|
HHashValue hash = 0;
|
||||||
|
|
||||||
|
H_FOREACH_KEY((const HHashSet *)p, HLRItem *item)
|
||||||
|
hash += hash_lr_item(item);
|
||||||
|
H_END_FOREACH
|
||||||
|
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool h_eq_transition(const void *p, const void *q)
|
||||||
|
{
|
||||||
|
const HLRTransition *a=p, *b=q;
|
||||||
|
return (a->from == b->from && a->to == b->to && h_eq_symbol(a->symbol, b->symbol));
|
||||||
|
}
|
||||||
|
|
||||||
|
HHashValue h_hash_transition(const void *p)
|
||||||
|
{
|
||||||
|
const HLRTransition *t = p;
|
||||||
|
return (h_hash_symbol(t->symbol) + t->from + t->to); // XXX ?
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* Constructors */
|
||||||
|
|
||||||
|
HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark)
|
||||||
|
{
|
||||||
|
HLRItem *ret = h_arena_malloc(a, sizeof(HLRItem));
|
||||||
|
|
||||||
|
size_t len = 0;
|
||||||
|
for(HCFChoice **p=rhs; *p; p++) len++;
|
||||||
|
assert(mark <= len);
|
||||||
|
|
||||||
|
ret->lhs = lhs;
|
||||||
|
ret->rhs = rhs;
|
||||||
|
ret->len = len;
|
||||||
|
ret->mark = mark;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
HLRState *h_lrstate_new(HArena *arena)
|
||||||
|
{
|
||||||
|
return h_hashset_new(arena, eq_lr_item, hash_lr_item);
|
||||||
|
}
|
||||||
|
|
||||||
|
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows)
|
||||||
|
{
|
||||||
|
HArena *arena = h_new_arena(mm__, 0); // default blocksize
|
||||||
|
assert(arena != NULL);
|
||||||
|
|
||||||
|
HLRTable *ret = h_new(HLRTable, 1);
|
||||||
|
ret->nrows = nrows;
|
||||||
|
ret->ntmap = h_arena_malloc(arena, nrows * sizeof(HHashTable *));
|
||||||
|
ret->tmap = h_arena_malloc(arena, nrows * sizeof(HStringMap *));
|
||||||
|
ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *));
|
||||||
|
ret->inadeq = h_slist_new(arena);
|
||||||
|
ret->arena = arena;
|
||||||
|
ret->mm__ = mm__;
|
||||||
|
|
||||||
|
for(size_t i=0; i<nrows; i++) {
|
||||||
|
ret->ntmap[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
|
||||||
|
ret->tmap[i] = h_stringmap_new(arena);
|
||||||
|
ret->forall[i] = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_lrtable_free(HLRTable *table)
|
||||||
|
{
|
||||||
|
HAllocator *mm__ = table->mm__;
|
||||||
|
h_delete_arena(table->arena);
|
||||||
|
h_free(table);
|
||||||
|
}
|
||||||
|
|
||||||
|
HLRAction *h_shift_action(HArena *arena, size_t nextstate)
|
||||||
|
{
|
||||||
|
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
|
||||||
|
action->type = HLR_SHIFT;
|
||||||
|
action->nextstate = nextstate;
|
||||||
|
return action;
|
||||||
|
}
|
||||||
|
|
||||||
|
HLRAction *h_reduce_action(HArena *arena, const HLRItem *item)
|
||||||
|
{
|
||||||
|
HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction));
|
||||||
|
action->type = HLR_REDUCE;
|
||||||
|
action->production.lhs = item->lhs;
|
||||||
|
action->production.length = item->len;
|
||||||
|
#ifndef NDEBUG
|
||||||
|
action->production.rhs = item->rhs;
|
||||||
|
#endif
|
||||||
|
return action;
|
||||||
|
}
|
||||||
|
|
||||||
|
// adds 'new' to the branches of 'action'
|
||||||
|
// returns a 'action' if it is already of type HLR_CONFLICT
|
||||||
|
// allocates a new HLRAction otherwise
|
||||||
|
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new)
|
||||||
|
{
|
||||||
|
if(action->type != HLR_CONFLICT) {
|
||||||
|
HLRAction *old = action;
|
||||||
|
action = h_arena_malloc(arena, sizeof(HLRAction));
|
||||||
|
action->type = HLR_CONFLICT;
|
||||||
|
action->branches = h_slist_new(arena);
|
||||||
|
h_slist_push(action->branches, old);
|
||||||
|
h_slist_push(action->branches, new);
|
||||||
|
} else {
|
||||||
|
// check if 'new' is already among branches
|
||||||
|
HSlistNode *x;
|
||||||
|
for(x=action->branches->head; x; x=x->next) {
|
||||||
|
if(x->elem == new)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// add 'new' if it is not already in list
|
||||||
|
if(x == NULL)
|
||||||
|
h_slist_push(action->branches, new);
|
||||||
|
}
|
||||||
|
|
||||||
|
return action;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool h_lrtable_row_empty(const HLRTable *table, size_t i)
|
||||||
|
{
|
||||||
|
return (h_hashtable_empty(table->ntmap[i])
|
||||||
|
&& h_stringmap_empty(table->tmap[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* LR driver */
|
||||||
|
|
||||||
|
HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
|
||||||
|
const HInputStream *stream)
|
||||||
|
{
|
||||||
|
HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine));
|
||||||
|
|
||||||
|
engine->table = table;
|
||||||
|
engine->state = 0;
|
||||||
|
engine->stack = h_slist_new(tarena);
|
||||||
|
engine->input = *stream;
|
||||||
|
engine->merged[0] = NULL;
|
||||||
|
engine->merged[1] = NULL;
|
||||||
|
engine->arena = arena;
|
||||||
|
engine->tarena = tarena;
|
||||||
|
|
||||||
|
return engine;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const HLRAction *
|
||||||
|
terminal_lookup(const HLREngine *engine, const HInputStream *stream)
|
||||||
|
{
|
||||||
|
const HLRTable *table = engine->table;
|
||||||
|
size_t state = engine->state;
|
||||||
|
|
||||||
|
assert(state < table->nrows);
|
||||||
|
if(table->forall[state]) {
|
||||||
|
assert(h_lrtable_row_empty(table, state)); // that would be a conflict
|
||||||
|
return table->forall[state];
|
||||||
|
} else {
|
||||||
|
return h_stringmap_get_lookahead(table->tmap[state], *stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static const HLRAction *
|
||||||
|
nonterminal_lookup(const HLREngine *engine, const HCFChoice *symbol)
|
||||||
|
{
|
||||||
|
const HLRTable *table = engine->table;
|
||||||
|
size_t state = engine->state;
|
||||||
|
|
||||||
|
assert(state < table->nrows);
|
||||||
|
assert(!table->forall[state]); // contains only reduce entries
|
||||||
|
// we are only looking for shifts
|
||||||
|
return h_hashtable_get(table->ntmap[state], symbol);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HLRAction *h_lrengine_action(const HLREngine *engine)
|
||||||
|
{
|
||||||
|
return terminal_lookup(engine, &engine->input);
|
||||||
|
}
|
||||||
|
|
||||||
|
static HParsedToken *consume_input(HLREngine *engine)
|
||||||
|
{
|
||||||
|
HParsedToken *v;
|
||||||
|
|
||||||
|
uint8_t c = h_read_bits(&engine->input, 8, false);
|
||||||
|
|
||||||
|
if(engine->input.overrun) { // end of input
|
||||||
|
v = NULL;
|
||||||
|
} else {
|
||||||
|
v = h_arena_malloc(engine->arena, sizeof(HParsedToken));
|
||||||
|
v->token_type = TT_UINT;
|
||||||
|
v->uint = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
// run LR parser for one round; returns false when finished
|
||||||
|
bool h_lrengine_step(HLREngine *engine, const HLRAction *action)
|
||||||
|
{
|
||||||
|
// short-hand names
|
||||||
|
HSlist *stack = engine->stack;
|
||||||
|
HArena *arena = engine->arena;
|
||||||
|
HArena *tarena = engine->tarena;
|
||||||
|
|
||||||
|
if(action == NULL)
|
||||||
|
return false; // no handle recognizable in input, terminate
|
||||||
|
|
||||||
|
assert(action->type == HLR_SHIFT || action->type == HLR_REDUCE);
|
||||||
|
|
||||||
|
if(action->type == HLR_REDUCE) {
|
||||||
|
size_t len = action->production.length;
|
||||||
|
HCFChoice *symbol = action->production.lhs;
|
||||||
|
|
||||||
|
// semantic value of the reduction result
|
||||||
|
HParsedToken *value = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||||
|
value->token_type = TT_SEQUENCE;
|
||||||
|
value->seq = h_carray_new_sized(arena, len);
|
||||||
|
|
||||||
|
// pull values off the stack, rewinding state accordingly
|
||||||
|
HParsedToken *v = NULL;
|
||||||
|
for(size_t i=0; i<len; i++) {
|
||||||
|
v = h_slist_drop(stack);
|
||||||
|
engine->state = (uintptr_t)h_slist_drop(stack);
|
||||||
|
|
||||||
|
// collect values in result sequence
|
||||||
|
value->seq->elements[len-1-i] = v;
|
||||||
|
value->seq->used++;
|
||||||
|
}
|
||||||
|
if(v) {
|
||||||
|
// result position equals position of left-most symbol
|
||||||
|
value->index = v->index;
|
||||||
|
value->bit_offset = v->bit_offset;
|
||||||
|
} else {
|
||||||
|
// XXX how to get the position in this case?
|
||||||
|
}
|
||||||
|
|
||||||
|
// perform token reshape if indicated
|
||||||
|
if(symbol->reshape)
|
||||||
|
value = (HParsedToken *)symbol->reshape(make_result(arena, value));
|
||||||
|
|
||||||
|
// call validation and semantic action, if present
|
||||||
|
if(symbol->pred && !symbol->pred(make_result(tarena, value)))
|
||||||
|
return false; // validation failed -> no parse; terminate
|
||||||
|
if(symbol->action)
|
||||||
|
value = (HParsedToken *)symbol->action(make_result(arena, value));
|
||||||
|
|
||||||
|
// this is LR, building a right-most derivation bottom-up, so no reduce can
|
||||||
|
// follow a reduce. we can also assume no conflict follows for GLR if we
|
||||||
|
// use LALR tables, because only terminal symbols (lookahead) get reduces.
|
||||||
|
const HLRAction *shift = nonterminal_lookup(engine, symbol);
|
||||||
|
if(shift == NULL)
|
||||||
|
return false; // parse error
|
||||||
|
assert(shift->type == HLR_SHIFT);
|
||||||
|
|
||||||
|
// piggy-back the shift right here, never touching the input
|
||||||
|
h_slist_push(stack, (void *)(uintptr_t)engine->state);
|
||||||
|
h_slist_push(stack, value);
|
||||||
|
engine->state = shift->nextstate;
|
||||||
|
|
||||||
|
// check for success
|
||||||
|
if(engine->state == HLR_SUCCESS) {
|
||||||
|
assert(symbol == engine->table->start);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assert(action->type == HLR_SHIFT);
|
||||||
|
HParsedToken *value = consume_input(engine);
|
||||||
|
h_slist_push(stack, (void *)(uintptr_t)engine->state);
|
||||||
|
h_slist_push(stack, value);
|
||||||
|
engine->state = action->nextstate;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult *h_lrengine_result(HLREngine *engine)
|
||||||
|
{
|
||||||
|
// parsing was successful iff the engine reaches the end state
|
||||||
|
if(engine->state == HLR_SUCCESS) {
|
||||||
|
// on top of the stack is the start symbol's semantic value
|
||||||
|
assert(!h_slist_empty(engine->stack));
|
||||||
|
HParsedToken *tok = engine->stack->head->elem;
|
||||||
|
return make_result(engine->arena, tok);
|
||||||
|
} else {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
|
||||||
|
{
|
||||||
|
HLRTable *table = parser->backend_data;
|
||||||
|
if(!table)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
HArena *arena = h_new_arena(mm__, 0); // will hold the results
|
||||||
|
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
|
||||||
|
HLREngine *engine = h_lrengine_new(arena, tarena, table, stream);
|
||||||
|
|
||||||
|
// iterate engine to completion
|
||||||
|
while(h_lrengine_step(engine, h_lrengine_action(engine)));
|
||||||
|
|
||||||
|
HParseResult *result = h_lrengine_result(engine);
|
||||||
|
if(!result)
|
||||||
|
h_delete_arena(arena);
|
||||||
|
h_delete_arena(tarena);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* Pretty-printers */
|
||||||
|
|
||||||
|
void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item)
|
||||||
|
{
|
||||||
|
h_pprint_symbol(f, g, item->lhs);
|
||||||
|
fputs(" ->", f);
|
||||||
|
|
||||||
|
HCFChoice **x = item->rhs;
|
||||||
|
HCFChoice **mark = item->rhs + item->mark;
|
||||||
|
if(*x == NULL) {
|
||||||
|
fputc('.', f);
|
||||||
|
} else {
|
||||||
|
while(*x) {
|
||||||
|
if(x == mark)
|
||||||
|
fputc('.', f);
|
||||||
|
else
|
||||||
|
fputc(' ', f);
|
||||||
|
|
||||||
|
if((*x)->type == HCF_CHAR) {
|
||||||
|
// condense character strings
|
||||||
|
fputc('"', f);
|
||||||
|
h_pprint_char(f, (*x)->chr);
|
||||||
|
for(x++; *x; x++) {
|
||||||
|
if(x == mark)
|
||||||
|
break;
|
||||||
|
if((*x)->type != HCF_CHAR)
|
||||||
|
break;
|
||||||
|
h_pprint_char(f, (*x)->chr);
|
||||||
|
}
|
||||||
|
fputc('"', f);
|
||||||
|
} else {
|
||||||
|
h_pprint_symbol(f, g, *x);
|
||||||
|
x++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(x == mark)
|
||||||
|
fputs(".", f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_pprint_lrstate(FILE *f, const HCFGrammar *g,
|
||||||
|
const HLRState *state, unsigned int indent)
|
||||||
|
{
|
||||||
|
bool first = true;
|
||||||
|
H_FOREACH_KEY(state, HLRItem *item)
|
||||||
|
if(!first)
|
||||||
|
for(unsigned int i=0; i<indent; i++) fputc(' ', f);
|
||||||
|
first = false;
|
||||||
|
h_pprint_lritem(f, g, item);
|
||||||
|
fputc('\n', f);
|
||||||
|
H_END_FOREACH
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pprint_transition(FILE *f, const HCFGrammar *g, const HLRTransition *t)
|
||||||
|
{
|
||||||
|
fputs("-", f);
|
||||||
|
h_pprint_symbol(f, g, t->symbol);
|
||||||
|
fprintf(f, "->%lu", t->to);
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_pprint_lrdfa(FILE *f, const HCFGrammar *g,
|
||||||
|
const HLRDFA *dfa, unsigned int indent)
|
||||||
|
{
|
||||||
|
for(size_t i=0; i<dfa->nstates; i++) {
|
||||||
|
unsigned int indent2 = indent + fprintf(f, "%4lu: ", i);
|
||||||
|
h_pprint_lrstate(f, g, dfa->states[i], indent2);
|
||||||
|
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
|
||||||
|
const HLRTransition *t = x->elem;
|
||||||
|
if(t->from == i) {
|
||||||
|
for(unsigned int i=0; i<indent2-2; i++) fputc(' ', f);
|
||||||
|
pprint_transition(f, g, t);
|
||||||
|
fputc('\n', f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action)
|
||||||
|
{
|
||||||
|
switch(action->type) {
|
||||||
|
case HLR_SHIFT:
|
||||||
|
if(action->nextstate == HLR_SUCCESS)
|
||||||
|
fputs("s~", f);
|
||||||
|
else
|
||||||
|
fprintf(f, "s%lu", action->nextstate);
|
||||||
|
break;
|
||||||
|
case HLR_REDUCE:
|
||||||
|
fputs("r(", f);
|
||||||
|
h_pprint_symbol(f, g, action->production.lhs);
|
||||||
|
fputs(" -> ", f);
|
||||||
|
#ifdef NDEBUG
|
||||||
|
// if we can't print the production, at least print its length
|
||||||
|
fprintf(f, "[%lu]", action->production.length);
|
||||||
|
#else
|
||||||
|
HCFSequence seq = {action->production.rhs};
|
||||||
|
h_pprint_sequence(f, g, &seq);
|
||||||
|
#endif
|
||||||
|
fputc(')', f);
|
||||||
|
break;
|
||||||
|
case HLR_CONFLICT:
|
||||||
|
fputc('!', f);
|
||||||
|
for(HSlistNode *x=action->branches->head; x; x=x->next) {
|
||||||
|
HLRAction *branch = x->elem;
|
||||||
|
assert(branch->type != HLR_CONFLICT); // no nesting
|
||||||
|
pprint_lraction(f, g, branch);
|
||||||
|
if(x->next) fputc('/', f); // separator
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert_message(0, "not reached");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void valprint_lraction(FILE *file, void *env, void *val)
|
||||||
|
{
|
||||||
|
const HLRAction *action = val;
|
||||||
|
const HCFGrammar *grammar = env;
|
||||||
|
pprint_lraction(file, grammar, action);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pprint_lrtable_terminals(FILE *file, const HCFGrammar *g,
|
||||||
|
const HStringMap *map)
|
||||||
|
{
|
||||||
|
h_pprint_stringmap(file, ' ', valprint_lraction, (void *)g, map);
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
|
||||||
|
unsigned int indent)
|
||||||
|
{
|
||||||
|
for(size_t i=0; i<table->nrows; i++) {
|
||||||
|
for(unsigned int j=0; j<indent; j++) fputc(' ', f);
|
||||||
|
fprintf(f, "%4lu:", i);
|
||||||
|
if(table->forall[i]) {
|
||||||
|
fputc(' ', f);
|
||||||
|
pprint_lraction(f, g, table->forall[i]);
|
||||||
|
if(!h_lrtable_row_empty(table, i))
|
||||||
|
fputs(" !!", f);
|
||||||
|
}
|
||||||
|
H_FOREACH(table->ntmap[i], HCFChoice *symbol, HLRAction *action)
|
||||||
|
fputc(' ', f); // separator
|
||||||
|
h_pprint_symbol(f, g, symbol);
|
||||||
|
fputc(':', f);
|
||||||
|
pprint_lraction(f, g, action);
|
||||||
|
H_END_FOREACH
|
||||||
|
fputc(' ', f); // separator
|
||||||
|
pprint_lrtable_terminals(f, g, table->tmap[i]);
|
||||||
|
fputc('\n', f);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
fputs("inadeq=", f);
|
||||||
|
for(HSlistNode *x=table->inadeq->head; x; x=x->next) {
|
||||||
|
fprintf(f, "%lu ", (uintptr_t)x->elem);
|
||||||
|
}
|
||||||
|
fputc('\n', f);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
147
src/backends/lr.h
Normal file
147
src/backends/lr.h
Normal file
|
|
@ -0,0 +1,147 @@
|
||||||
|
#ifndef HAMMER_BACKENDS_LR__H
|
||||||
|
#define HAMMER_BACKENDS_LR__H
|
||||||
|
|
||||||
|
#include "../hammer.h"
|
||||||
|
#include "../cfgrammar.h"
|
||||||
|
#include "../internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef HHashSet HLRState; // states are sets of LRItems
|
||||||
|
|
||||||
|
typedef struct HLRDFA_ {
|
||||||
|
size_t nstates;
|
||||||
|
const HLRState **states; // array of size nstates
|
||||||
|
HSlist *transitions;
|
||||||
|
} HLRDFA;
|
||||||
|
|
||||||
|
typedef struct HLRTransition_ {
|
||||||
|
size_t from; // index into 'states' array
|
||||||
|
const HCFChoice *symbol;
|
||||||
|
size_t to; // index into 'states' array
|
||||||
|
} HLRTransition;
|
||||||
|
|
||||||
|
typedef struct HLRItem_ {
|
||||||
|
HCFChoice *lhs;
|
||||||
|
HCFChoice **rhs; // NULL-terminated
|
||||||
|
size_t len; // number of elements in rhs
|
||||||
|
size_t mark;
|
||||||
|
} HLRItem;
|
||||||
|
|
||||||
|
typedef struct HLRAction_ {
|
||||||
|
enum {HLR_SHIFT, HLR_REDUCE, HLR_CONFLICT} type;
|
||||||
|
union {
|
||||||
|
// used with HLR_SHIFT
|
||||||
|
size_t nextstate;
|
||||||
|
|
||||||
|
// used with HLR_REDUCE
|
||||||
|
struct {
|
||||||
|
HCFChoice *lhs; // symbol carrying semantic actions etc.
|
||||||
|
size_t length; // # of symbols in rhs
|
||||||
|
#ifndef NDEBUG
|
||||||
|
HCFChoice **rhs; // NB: the rhs symbols are not needed for the parse
|
||||||
|
#endif
|
||||||
|
} production;
|
||||||
|
|
||||||
|
// used with HLR_CONFLICT
|
||||||
|
HSlist *branches; // list of possible HLRActions
|
||||||
|
};
|
||||||
|
} HLRAction;
|
||||||
|
|
||||||
|
typedef struct HLRTable_ {
|
||||||
|
size_t nrows; // dimension of the pointer arrays below
|
||||||
|
HHashTable **ntmap; // map nonterminal symbols to HLRActions, per row
|
||||||
|
HStringMap **tmap; // map lookahead strings to HLRActions, per row
|
||||||
|
HLRAction **forall; // shortcut to set an action for an entire row
|
||||||
|
HCFChoice *start; // start symbol
|
||||||
|
HSlist *inadeq; // indices of any inadequate states
|
||||||
|
HArena *arena;
|
||||||
|
HAllocator *mm__;
|
||||||
|
} HLRTable;
|
||||||
|
|
||||||
|
typedef struct HLREnhGrammar_ {
|
||||||
|
HCFGrammar *grammar; // enhanced grammar
|
||||||
|
HHashTable *tmap; // maps transitions to enhanced-grammar symbols
|
||||||
|
HHashTable *smap; // maps enhanced-grammar symbols to transitions
|
||||||
|
HHashTable *corr; // maps symbols to sets of corresponding e. symbols
|
||||||
|
HArena *arena;
|
||||||
|
} HLREnhGrammar;
|
||||||
|
|
||||||
|
typedef struct HLREngine_ {
|
||||||
|
const HLRTable *table;
|
||||||
|
size_t state;
|
||||||
|
|
||||||
|
HSlist *stack; // holds pairs: (saved state, semantic value)
|
||||||
|
HInputStream input;
|
||||||
|
|
||||||
|
struct HLREngine_ *merged[2]; // ancestors merged into this engine
|
||||||
|
|
||||||
|
HArena *arena; // will hold the results
|
||||||
|
HArena *tarena; // tmp, deleted after parse
|
||||||
|
} HLREngine;
|
||||||
|
|
||||||
|
#define HLR_SUCCESS ((size_t)~0) // parser end state
|
||||||
|
|
||||||
|
|
||||||
|
// XXX move to internal.h or something
|
||||||
|
// XXX replace other hashtable iterations with this
|
||||||
|
#define H_FOREACH_(HT) { \
|
||||||
|
const HHashTable *ht__ = HT; \
|
||||||
|
for(size_t i__=0; i__ < ht__->capacity; i__++) { \
|
||||||
|
for(HHashTableEntry *hte__ = &ht__->contents[i__]; \
|
||||||
|
hte__; \
|
||||||
|
hte__ = hte__->next) { \
|
||||||
|
if(hte__->key == NULL) continue;
|
||||||
|
|
||||||
|
#define H_FOREACH_KEY(HT, KEYVAR) H_FOREACH_(HT) \
|
||||||
|
const KEYVAR = hte__->key;
|
||||||
|
|
||||||
|
#define H_FOREACH(HT, KEYVAR, VALVAR) H_FOREACH_KEY(HT, KEYVAR) \
|
||||||
|
VALVAR = hte__->value;
|
||||||
|
|
||||||
|
#define H_END_FOREACH \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark);
|
||||||
|
HLRState *h_lrstate_new(HArena *arena);
|
||||||
|
HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows);
|
||||||
|
void h_lrtable_free(HLRTable *table);
|
||||||
|
HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
|
||||||
|
const HInputStream *stream);
|
||||||
|
HLRAction *h_reduce_action(HArena *arena, const HLRItem *item);
|
||||||
|
HLRAction *h_shift_action(HArena *arena, size_t nextstate);
|
||||||
|
HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new);
|
||||||
|
bool h_lrtable_row_empty(const HLRTable *table, size_t i);
|
||||||
|
|
||||||
|
bool h_eq_symbol(const void *p, const void *q);
|
||||||
|
bool h_eq_lr_itemset(const void *p, const void *q);
|
||||||
|
bool h_eq_transition(const void *p, const void *q);
|
||||||
|
HHashValue h_hash_symbol(const void *p);
|
||||||
|
HHashValue h_hash_lr_itemset(const void *p);
|
||||||
|
HHashValue h_hash_transition(const void *p);
|
||||||
|
|
||||||
|
HLRDFA *h_lr0_dfa(HCFGrammar *g);
|
||||||
|
HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa);
|
||||||
|
|
||||||
|
HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser);
|
||||||
|
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params);
|
||||||
|
void h_lalr_free(HParser *parser);
|
||||||
|
|
||||||
|
const HLRAction *h_lrengine_action(const HLREngine *engine);
|
||||||
|
bool h_lrengine_step(HLREngine *engine, const HLRAction *action);
|
||||||
|
HParseResult *h_lrengine_result(HLREngine *engine);
|
||||||
|
HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
|
||||||
|
HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
|
||||||
|
|
||||||
|
void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item);
|
||||||
|
void h_pprint_lrstate(FILE *f, const HCFGrammar *g,
|
||||||
|
const HLRState *state, unsigned int indent);
|
||||||
|
void h_pprint_lrdfa(FILE *f, const HCFGrammar *g,
|
||||||
|
const HLRDFA *dfa, unsigned int indent);
|
||||||
|
void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table,
|
||||||
|
unsigned int indent);
|
||||||
|
|
||||||
|
#endif
|
||||||
233
src/backends/lr0.c
Normal file
233
src/backends/lr0.c
Normal file
|
|
@ -0,0 +1,233 @@
|
||||||
|
#include <assert.h>
|
||||||
|
#include "lr.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* Constructing the characteristic automaton (handle recognizer) */
|
||||||
|
|
||||||
|
static HLRItem *advance_mark(HArena *arena, const HLRItem *item)
|
||||||
|
{
|
||||||
|
assert(item->rhs[item->mark] != NULL);
|
||||||
|
HLRItem *ret = h_arena_malloc(arena, sizeof(HLRItem));
|
||||||
|
*ret = *item;
|
||||||
|
ret->mark++;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void expand_to_closure(HCFGrammar *g, HHashSet *items)
|
||||||
|
{
|
||||||
|
HAllocator *mm__ = g->mm__;
|
||||||
|
HArena *arena = g->arena;
|
||||||
|
HSlist *work = h_slist_new(arena);
|
||||||
|
|
||||||
|
// initialize work list with items
|
||||||
|
H_FOREACH_KEY(items, HLRItem *item)
|
||||||
|
h_slist_push(work, (void *)item);
|
||||||
|
H_END_FOREACH
|
||||||
|
|
||||||
|
while(!h_slist_empty(work)) {
|
||||||
|
const HLRItem *item = h_slist_pop(work);
|
||||||
|
HCFChoice *sym = item->rhs[item->mark]; // symbol after mark
|
||||||
|
|
||||||
|
// if there is a non-terminal after the mark, follow it
|
||||||
|
// NB: unlike LLk, we do consider HCF_CHARSET a non-terminal here
|
||||||
|
if(sym != NULL && (sym->type==HCF_CHOICE || sym->type==HCF_CHARSET)) {
|
||||||
|
// add items corresponding to the productions of sym
|
||||||
|
if(sym->type == HCF_CHOICE) {
|
||||||
|
for(HCFSequence **p=sym->seq; *p; p++) {
|
||||||
|
HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0);
|
||||||
|
if(!h_hashset_present(items, it)) {
|
||||||
|
h_hashset_put(items, it);
|
||||||
|
h_slist_push(work, it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else { // HCF_CHARSET
|
||||||
|
for(unsigned int i=0; i<256; i++) {
|
||||||
|
if(charset_isset(sym->charset, i)) {
|
||||||
|
// XXX allocate these single-character symbols statically somewhere
|
||||||
|
HCFChoice **rhs = h_new(HCFChoice *, 2);
|
||||||
|
rhs[0] = h_new(HCFChoice, 1);
|
||||||
|
rhs[0]->type = HCF_CHAR;
|
||||||
|
rhs[0]->chr = i;
|
||||||
|
rhs[1] = NULL;
|
||||||
|
HLRItem *it = h_lritem_new(arena, sym, rhs, 0);
|
||||||
|
h_hashset_put(items, it);
|
||||||
|
// single-character item needs no further work
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if sym is a non-terminal, we need a reshape on it
|
||||||
|
// this seems as good a place as any to set it
|
||||||
|
sym->reshape = h_act_first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HLRDFA *h_lr0_dfa(HCFGrammar *g)
|
||||||
|
{
|
||||||
|
HArena *arena = g->arena;
|
||||||
|
|
||||||
|
HHashSet *states = h_hashset_new(arena, h_eq_lr_itemset, h_hash_lr_itemset);
|
||||||
|
// maps itemsets to assigned array indices
|
||||||
|
HSlist *transitions = h_slist_new(arena);
|
||||||
|
|
||||||
|
// list of states that need to be processed
|
||||||
|
// to save lookups, we push two elements per state, the itemset and its
|
||||||
|
// assigned index.
|
||||||
|
HSlist *work = h_slist_new(arena);
|
||||||
|
|
||||||
|
// make initial state (kernel)
|
||||||
|
HLRState *start = h_lrstate_new(arena);
|
||||||
|
assert(g->start->type == HCF_CHOICE);
|
||||||
|
for(HCFSequence **p=g->start->seq; *p; p++)
|
||||||
|
h_hashset_put(start, h_lritem_new(arena, g->start, (*p)->items, 0));
|
||||||
|
expand_to_closure(g, start);
|
||||||
|
h_hashtable_put(states, start, 0);
|
||||||
|
h_slist_push(work, start);
|
||||||
|
h_slist_push(work, 0);
|
||||||
|
|
||||||
|
// while work to do (on some state)
|
||||||
|
// determine edge symbols
|
||||||
|
// for each edge symbol:
|
||||||
|
// advance respective items -> destination state (kernel)
|
||||||
|
// compute closure
|
||||||
|
// if destination is a new state:
|
||||||
|
// add it to state set
|
||||||
|
// add transition to it
|
||||||
|
// add it to the work list
|
||||||
|
|
||||||
|
while(!h_slist_empty(work)) {
|
||||||
|
size_t state_idx = (uintptr_t)h_slist_pop(work);
|
||||||
|
HLRState *state = h_slist_pop(work);
|
||||||
|
|
||||||
|
// maps edge symbols to neighbor states (item sets) of s
|
||||||
|
HHashTable *neighbors = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol);
|
||||||
|
|
||||||
|
// iterate over state (closure) and generate neighboring sets
|
||||||
|
H_FOREACH_KEY(state, HLRItem *item)
|
||||||
|
HCFChoice *sym = item->rhs[item->mark]; // symbol after mark
|
||||||
|
|
||||||
|
if(sym != NULL) { // mark was not at the end
|
||||||
|
// find or create prospective neighbor set
|
||||||
|
HLRState *neighbor = h_hashtable_get(neighbors, sym);
|
||||||
|
if(neighbor == NULL) {
|
||||||
|
neighbor = h_lrstate_new(arena);
|
||||||
|
h_hashtable_put(neighbors, sym, neighbor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ...and add the advanced item to it
|
||||||
|
h_hashset_put(neighbor, advance_mark(arena, item));
|
||||||
|
}
|
||||||
|
H_END_FOREACH
|
||||||
|
|
||||||
|
// merge expanded neighbor sets into the set of existing states
|
||||||
|
H_FOREACH(neighbors, HCFChoice *symbol, HLRState *neighbor)
|
||||||
|
expand_to_closure(g, neighbor);
|
||||||
|
|
||||||
|
// look up existing state, allocate new if not found
|
||||||
|
size_t neighbor_idx;
|
||||||
|
if(!h_hashset_present(states, neighbor)) {
|
||||||
|
neighbor_idx = states->used;
|
||||||
|
h_hashtable_put(states, neighbor, (void *)(uintptr_t)neighbor_idx);
|
||||||
|
h_slist_push(work, neighbor);
|
||||||
|
h_slist_push(work, (void *)(uintptr_t)neighbor_idx);
|
||||||
|
} else {
|
||||||
|
neighbor_idx = (uintptr_t)h_hashtable_get(states, neighbor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// add transition "state --symbol--> neighbor"
|
||||||
|
HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition));
|
||||||
|
t->from = state_idx;
|
||||||
|
t->to = neighbor_idx;
|
||||||
|
t->symbol = symbol;
|
||||||
|
h_slist_push(transitions, t);
|
||||||
|
H_END_FOREACH
|
||||||
|
} // end while(work)
|
||||||
|
|
||||||
|
// fill DFA struct
|
||||||
|
HLRDFA *dfa = h_arena_malloc(arena, sizeof(HLRDFA));
|
||||||
|
dfa->nstates = states->used;
|
||||||
|
dfa->states = h_arena_malloc(arena, dfa->nstates*sizeof(HLRState *));
|
||||||
|
H_FOREACH(states, HLRState *state, void *v)
|
||||||
|
size_t idx = (uintptr_t)v;
|
||||||
|
dfa->states[idx] = state;
|
||||||
|
H_END_FOREACH
|
||||||
|
dfa->transitions = transitions;
|
||||||
|
|
||||||
|
return dfa;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* LR(0) table generation */
|
||||||
|
|
||||||
|
static inline
|
||||||
|
void put_shift(HLRTable *table, size_t state, const HCFChoice *symbol,
|
||||||
|
size_t nextstate)
|
||||||
|
{
|
||||||
|
HLRAction *action = h_shift_action(table->arena, nextstate);
|
||||||
|
|
||||||
|
switch(symbol->type) {
|
||||||
|
case HCF_END:
|
||||||
|
h_stringmap_put_end(table->tmap[state], action);
|
||||||
|
break;
|
||||||
|
case HCF_CHAR:
|
||||||
|
h_stringmap_put_char(table->tmap[state], symbol->chr, action);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// nonterminal case
|
||||||
|
h_hashtable_put(table->ntmap[state], symbol, action);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
|
||||||
|
{
|
||||||
|
HAllocator *mm__ = g->mm__;
|
||||||
|
|
||||||
|
HLRTable *table = h_lrtable_new(mm__, dfa->nstates);
|
||||||
|
HArena *arena = table->arena;
|
||||||
|
|
||||||
|
// remember start symbol
|
||||||
|
table->start = g->start;
|
||||||
|
|
||||||
|
// shift to the accepting end state for the start symbol
|
||||||
|
put_shift(table, 0, g->start, HLR_SUCCESS);
|
||||||
|
|
||||||
|
// add shift entries
|
||||||
|
for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {
|
||||||
|
// for each transition x-A->y, add "shift, goto y" to table entry (x,A)
|
||||||
|
HLRTransition *t = x->elem;
|
||||||
|
|
||||||
|
put_shift(table, t->from, t->symbol, t->to);
|
||||||
|
}
|
||||||
|
|
||||||
|
// add reduce entries, record inadequate states
|
||||||
|
for(size_t i=0; i<dfa->nstates; i++) {
|
||||||
|
bool inadeq = false;
|
||||||
|
|
||||||
|
// find reducible items in state
|
||||||
|
H_FOREACH_KEY(dfa->states[i], HLRItem *item)
|
||||||
|
if(item->mark == item->len) { // mark at the end
|
||||||
|
HLRAction *reduce = h_reduce_action(arena, item);
|
||||||
|
|
||||||
|
// check for reduce/reduce conflict on forall
|
||||||
|
if(table->forall[i]) {
|
||||||
|
reduce = h_lr_conflict(arena, table->forall[i], reduce);
|
||||||
|
inadeq = true;
|
||||||
|
}
|
||||||
|
table->forall[i] = reduce;
|
||||||
|
|
||||||
|
// check for shift/reduce conflict with other entries
|
||||||
|
// NOTE: these are not recorded as HLR_CONFLICTs at this point
|
||||||
|
|
||||||
|
if(!h_lrtable_row_empty(table, i))
|
||||||
|
inadeq = true;
|
||||||
|
}
|
||||||
|
H_END_FOREACH
|
||||||
|
|
||||||
|
if(inadeq)
|
||||||
|
h_slist_push(table->inadeq, (void *)(uintptr_t)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
@ -3,14 +3,6 @@
|
||||||
#include "../internal.h"
|
#include "../internal.h"
|
||||||
#include "../parsers/parser_internal.h"
|
#include "../parsers/parser_internal.h"
|
||||||
|
|
||||||
static uint32_t djbhash(const uint8_t *buf, size_t len) {
|
|
||||||
uint32_t hash = 5381;
|
|
||||||
while (len--) {
|
|
||||||
hash = hash * 33 + *buf++;
|
|
||||||
}
|
|
||||||
return hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
// short-hand for constructing HCachedResult's
|
// short-hand for constructing HCachedResult's
|
||||||
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
|
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
|
||||||
HCachedResult *ret = a_new(HCachedResult, 1);
|
HCachedResult *ret = a_new(HCachedResult, 1);
|
||||||
|
|
@ -214,7 +206,7 @@ void h_packrat_free(HParser *parser) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t cache_key_hash(const void* key) {
|
static uint32_t cache_key_hash(const void* key) {
|
||||||
return djbhash(key, sizeof(HParserCacheKey));
|
return h_djbhash(key, sizeof(HParserCacheKey));
|
||||||
}
|
}
|
||||||
static bool cache_key_equal(const void* key1, const void* key2) {
|
static bool cache_key_equal(const void* key1, const void* key2) {
|
||||||
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
|
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
|
||||||
|
|
|
||||||
289
src/cfgrammar.c
289
src/cfgrammar.c
|
|
@ -18,12 +18,13 @@ HCFGrammar *h_cfgrammar_new(HAllocator *mm__)
|
||||||
g->mm__ = mm__;
|
g->mm__ = mm__;
|
||||||
g->arena = h_new_arena(mm__, 0); // default blocksize
|
g->arena = h_new_arena(mm__, 0); // default blocksize
|
||||||
g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
|
||||||
|
g->start = NULL;
|
||||||
g->geneps = NULL;
|
g->geneps = NULL;
|
||||||
g->first = NULL;
|
g->first = NULL;
|
||||||
g->follow = NULL;
|
g->follow = NULL;
|
||||||
g->kmax = 0; // will be increased as needed by ensure_k
|
g->kmax = 0; // will be increased as needed by ensure_k
|
||||||
|
|
||||||
HCFStringMap *eps = h_stringmap_new(g->arena);
|
HStringMap *eps = h_stringmap_new(g->arena);
|
||||||
h_stringmap_put_epsilon(eps, INSET);
|
h_stringmap_put_epsilon(eps, INSET);
|
||||||
g->singleton_epsilon = eps;
|
g->singleton_epsilon = eps;
|
||||||
|
|
||||||
|
|
@ -50,6 +51,11 @@ HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
|
||||||
if(desugared == NULL)
|
if(desugared == NULL)
|
||||||
return NULL; // -> backend not suitable for this parser
|
return NULL; // -> backend not suitable for this parser
|
||||||
|
|
||||||
|
return h_cfgrammar_(mm__, desugared);
|
||||||
|
}
|
||||||
|
|
||||||
|
HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared)
|
||||||
|
{
|
||||||
HCFGrammar *g = h_cfgrammar_new(mm__);
|
HCFGrammar *g = h_cfgrammar_new(mm__);
|
||||||
|
|
||||||
// recursively traverse the desugared form and collect all HCFChoices that
|
// recursively traverse the desugared form and collect all HCFChoices that
|
||||||
|
|
@ -219,39 +225,52 @@ static void collect_geneps(HCFGrammar *g)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
HCFStringMap *h_stringmap_new(HArena *a)
|
HStringMap *h_stringmap_new(HArena *a)
|
||||||
{
|
{
|
||||||
HCFStringMap *m = h_arena_malloc(a, sizeof(HCFStringMap));
|
HStringMap *m = h_arena_malloc(a, sizeof(HStringMap));
|
||||||
|
m->epsilon_branch = NULL;
|
||||||
|
m->end_branch = NULL;
|
||||||
m->char_branches = h_hashtable_new(a, h_eq_ptr, h_hash_ptr);
|
m->char_branches = h_hashtable_new(a, h_eq_ptr, h_hash_ptr);
|
||||||
m->arena = a;
|
m->arena = a;
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_stringmap_put_end(HCFStringMap *m, void *v)
|
void h_stringmap_put_end(HStringMap *m, void *v)
|
||||||
{
|
{
|
||||||
m->end_branch = v;
|
m->end_branch = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_stringmap_put_epsilon(HCFStringMap *m, void *v)
|
void h_stringmap_put_epsilon(HStringMap *m, void *v)
|
||||||
{
|
{
|
||||||
m->epsilon_branch = v;
|
m->epsilon_branch = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_stringmap_put_char(HCFStringMap *m, uint8_t c, void *v)
|
void h_stringmap_put_after(HStringMap *m, uint8_t c, HStringMap *ends)
|
||||||
{
|
{
|
||||||
HCFStringMap *node = h_stringmap_new(m->arena);
|
h_hashtable_put(m->char_branches, (void *)char_key(c), ends);
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_stringmap_put_char(HStringMap *m, uint8_t c, void *v)
|
||||||
|
{
|
||||||
|
HStringMap *node = h_stringmap_new(m->arena);
|
||||||
h_stringmap_put_epsilon(node, v);
|
h_stringmap_put_epsilon(node, v);
|
||||||
h_hashtable_put(m->char_branches, (void *)char_key(c), node);
|
h_stringmap_put_after(m, c, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
// helper for h_stringmap_update
|
// helper for h_stringmap_update
|
||||||
void *combine_stringmap(void *v1, void *v2)
|
static void *combine_stringmap(void *v1, const void *v2)
|
||||||
{
|
{
|
||||||
h_stringmap_update((HCFStringMap *)v1, (HCFStringMap *)v2);
|
HStringMap *m1 = v1;
|
||||||
return v1;
|
const HStringMap *m2 = v2;
|
||||||
|
if(!m1)
|
||||||
|
m1 = h_stringmap_new(m2->arena);
|
||||||
|
h_stringmap_update(m1, m2);
|
||||||
|
|
||||||
|
return m1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_stringmap_update(HCFStringMap *m, const HCFStringMap *n)
|
/* Note: Does *not* reuse submaps from n in building m. */
|
||||||
|
void h_stringmap_update(HStringMap *m, const HStringMap *n)
|
||||||
{
|
{
|
||||||
if(n->epsilon_branch)
|
if(n->epsilon_branch)
|
||||||
m->epsilon_branch = n->epsilon_branch;
|
m->epsilon_branch = n->epsilon_branch;
|
||||||
|
|
@ -262,32 +281,91 @@ void h_stringmap_update(HCFStringMap *m, const HCFStringMap *n)
|
||||||
h_hashtable_merge(combine_stringmap, m->char_branches, n->char_branches);
|
h_hashtable_merge(combine_stringmap, m->char_branches, n->char_branches);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *h_stringmap_get(const HCFStringMap *m, const uint8_t *str, size_t n, bool end)
|
/* Replace all occurances of old in m with new.
|
||||||
|
* If old is NULL, replace all values in m with new.
|
||||||
|
* If new is NULL, remove the respective values.
|
||||||
|
*/
|
||||||
|
void h_stringmap_replace(HStringMap *m, void *old, void *new)
|
||||||
|
{
|
||||||
|
if(!old) {
|
||||||
|
if(m->epsilon_branch) m->epsilon_branch = new;
|
||||||
|
if(m->end_branch) m->end_branch = new;
|
||||||
|
} else {
|
||||||
|
if(m->epsilon_branch == old) m->epsilon_branch = new;
|
||||||
|
if(m->end_branch == old) m->end_branch = new;
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate over m->char_branches
|
||||||
|
const HHashTable *ht = m->char_branches;
|
||||||
|
for(size_t i=0; i < ht->capacity; i++) {
|
||||||
|
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||||
|
if(hte->key == NULL)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
HStringMap *m_ = hte->value;
|
||||||
|
if(m_)
|
||||||
|
h_stringmap_replace(m_, old, new);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end)
|
||||||
{
|
{
|
||||||
for(size_t i=0; i<n; i++) {
|
for(size_t i=0; i<n; i++) {
|
||||||
if(i==n-1 && end && m->end_branch)
|
if(i==n-1 && end && m->end_branch)
|
||||||
return m->end_branch;
|
return m->end_branch;
|
||||||
m = h_hashtable_get(m->char_branches, (void *)char_key(str[i]));
|
m = h_stringmap_get_char(m, str[i]);
|
||||||
if(!m)
|
if(!m)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return m->epsilon_branch;
|
return m->epsilon_branch;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool h_stringmap_present(const HCFStringMap *m, const uint8_t *str, size_t n, bool end)
|
void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead)
|
||||||
|
{
|
||||||
|
while(m) {
|
||||||
|
if(m->epsilon_branch) { // input matched
|
||||||
|
// assert: another lookahead would not bring a more specific match.
|
||||||
|
// this is for the table generator to ensure. (LLk)
|
||||||
|
return m->epsilon_branch;
|
||||||
|
}
|
||||||
|
|
||||||
|
// note the lookahead stream is passed by value, i.e. a copy.
|
||||||
|
// reading bits from it does not consume them from the real input.
|
||||||
|
uint8_t c = h_read_bits(&lookahead, 8, false);
|
||||||
|
|
||||||
|
if(lookahead.overrun) { // end of input
|
||||||
|
// XXX assumption of byte-wise grammar and input
|
||||||
|
return m->end_branch;
|
||||||
|
}
|
||||||
|
|
||||||
|
// no match yet, descend
|
||||||
|
m = h_stringmap_get_char(m, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end)
|
||||||
{
|
{
|
||||||
return (h_stringmap_get(m, str, n, end) != NULL);
|
return (h_stringmap_get(m, str, n, end) != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool h_stringmap_present_epsilon(const HCFStringMap *m)
|
bool h_stringmap_present_epsilon(const HStringMap *m)
|
||||||
{
|
{
|
||||||
return (m->epsilon_branch != NULL);
|
return (m->epsilon_branch != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool h_stringmap_empty(const HStringMap *m)
|
||||||
const HCFStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
|
|
||||||
{
|
{
|
||||||
HCFStringMap *ret;
|
return (m->epsilon_branch == NULL
|
||||||
|
&& m->end_branch == NULL
|
||||||
|
&& h_hashtable_empty(m->char_branches));
|
||||||
|
}
|
||||||
|
|
||||||
|
const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
|
{
|
||||||
|
HStringMap *ret;
|
||||||
HCFSequence **p;
|
HCFSequence **p;
|
||||||
uint8_t c;
|
uint8_t c;
|
||||||
|
|
||||||
|
|
@ -333,18 +411,18 @@ const HCFStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
}
|
}
|
||||||
|
|
||||||
// helpers for h_first_seq, definitions below
|
// helpers for h_first_seq, definitions below
|
||||||
static bool is_singleton_epsilon(const HCFStringMap *m);
|
static bool is_singleton_epsilon(const HStringMap *m);
|
||||||
static bool any_string_shorter(size_t k, const HCFStringMap *m);
|
static bool any_string_shorter(size_t k, const HStringMap *m);
|
||||||
|
|
||||||
// pointer to functions like h_first_seq
|
// pointer to functions like h_first_seq
|
||||||
typedef const HCFStringMap *(*StringSetFun)(size_t, HCFGrammar *, HCFChoice **);
|
typedef const HStringMap *(*StringSetFun)(size_t, HCFGrammar *, HCFChoice **);
|
||||||
|
|
||||||
// helper for h_first_seq and h_follow
|
// helper for h_first_seq and h_follow
|
||||||
static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
|
static void stringset_extend(HCFGrammar *g, HStringMap *ret,
|
||||||
size_t k, const HCFStringMap *as,
|
size_t k, const HStringMap *as,
|
||||||
StringSetFun f, HCFChoice **tail);
|
StringSetFun f, HCFChoice **tail);
|
||||||
|
|
||||||
const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
|
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
|
||||||
{
|
{
|
||||||
// shortcut: the first set of the empty sequence, for any k, is {""}
|
// shortcut: the first set of the empty sequence, for any k, is {""}
|
||||||
if(*s == NULL)
|
if(*s == NULL)
|
||||||
|
|
@ -355,7 +433,7 @@ const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
|
||||||
HCFChoice *x = s[0];
|
HCFChoice *x = s[0];
|
||||||
HCFChoice **tail = s+1;
|
HCFChoice **tail = s+1;
|
||||||
|
|
||||||
const HCFStringMap *first_x = h_first(k, g, x);
|
const HStringMap *first_x = h_first(k, g, x);
|
||||||
|
|
||||||
// shortcut: if first_k(X) = {""}, just return first_k(tail)
|
// shortcut: if first_k(X) = {""}, just return first_k(tail)
|
||||||
if(is_singleton_epsilon(first_x))
|
if(is_singleton_epsilon(first_x))
|
||||||
|
|
@ -366,7 +444,7 @@ const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
|
||||||
return first_x;
|
return first_x;
|
||||||
|
|
||||||
// create a new result set and build up the set described above
|
// create a new result set and build up the set described above
|
||||||
HCFStringMap *ret = h_stringmap_new(g->arena);
|
HStringMap *ret = h_stringmap_new(g->arena);
|
||||||
|
|
||||||
// extend the elements of first_k(X) up to length k from tail
|
// extend the elements of first_k(X) up to length k from tail
|
||||||
stringset_extend(g, ret, k, first_x, h_first_seq, tail);
|
stringset_extend(g, ret, k, first_x, h_first_seq, tail);
|
||||||
|
|
@ -374,14 +452,14 @@ const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_singleton_epsilon(const HCFStringMap *m)
|
static bool is_singleton_epsilon(const HStringMap *m)
|
||||||
{
|
{
|
||||||
return ( m->epsilon_branch
|
return ( m->epsilon_branch
|
||||||
&& !m->end_branch
|
&& !m->end_branch
|
||||||
&& h_hashtable_empty(m->char_branches) );
|
&& h_hashtable_empty(m->char_branches) );
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool any_string_shorter(size_t k, const HCFStringMap *m)
|
static bool any_string_shorter(size_t k, const HStringMap *m)
|
||||||
{
|
{
|
||||||
if(k==0)
|
if(k==0)
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -395,7 +473,7 @@ static bool any_string_shorter(size_t k, const HCFStringMap *m)
|
||||||
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if(hte->key == NULL)
|
||||||
continue;
|
continue;
|
||||||
HCFStringMap *m_ = hte->value;
|
HStringMap *m_ = hte->value;
|
||||||
|
|
||||||
// check subtree for strings shorter than k-1
|
// check subtree for strings shorter than k-1
|
||||||
if(any_string_shorter(k-1, m_))
|
if(any_string_shorter(k-1, m_))
|
||||||
|
|
@ -406,15 +484,32 @@ static bool any_string_shorter(size_t k, const HCFStringMap *m)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x);
|
// helper for h_predict
|
||||||
|
static void remove_all_shorter(size_t k, HStringMap *m)
|
||||||
|
{
|
||||||
|
if(k==0) return;
|
||||||
|
m->epsilon_branch = NULL;
|
||||||
|
if(k==1) return;
|
||||||
|
|
||||||
|
// iterate over m->char_branches
|
||||||
|
const HHashTable *ht = m->char_branches;
|
||||||
|
for(size_t i=0; i < ht->capacity; i++) {
|
||||||
|
for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) {
|
||||||
|
if(hte->key == NULL)
|
||||||
|
continue;
|
||||||
|
remove_all_shorter(k-1, hte->value); // recursion into subtree
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// h_follow adapted to the signature of StringSetFun
|
// h_follow adapted to the signature of StringSetFun
|
||||||
static inline const HCFStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s)
|
static inline
|
||||||
|
const HStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s)
|
||||||
{
|
{
|
||||||
return h_follow(k, g, *s);
|
return h_follow(k, g, *s);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
|
const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
{
|
{
|
||||||
// consider all occurances of X in g
|
// consider all occurances of X in g
|
||||||
// the follow set of X is the union of:
|
// the follow set of X is the union of:
|
||||||
|
|
@ -425,7 +520,7 @@ const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
// first_k(tail follow_k(A)) =
|
// first_k(tail follow_k(A)) =
|
||||||
// { a b | a <- first_k(tail), b <- follow_l(A), l=k-|a| }
|
// { a b | a <- first_k(tail), b <- follow_l(A), l=k-|a| }
|
||||||
|
|
||||||
HCFStringMap *ret;
|
HStringMap *ret;
|
||||||
|
|
||||||
// shortcut: follow_0(X) is always {""}
|
// shortcut: follow_0(X) is always {""}
|
||||||
if(k==0)
|
if(k==0)
|
||||||
|
|
@ -463,9 +558,7 @@ const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
if(*s == x) { // occurance found
|
if(*s == x) { // occurance found
|
||||||
HCFChoice **tail = s+1;
|
HCFChoice **tail = s+1;
|
||||||
|
|
||||||
const HCFStringMap *first_tail = h_first_seq(k, g, tail);
|
const HStringMap *first_tail = h_first_seq(k, g, tail);
|
||||||
|
|
||||||
//h_stringmap_update(ret, first_tail);
|
|
||||||
|
|
||||||
// extend the elems of first_k(tail) up to length k from follow(A)
|
// extend the elems of first_k(tail) up to length k from follow(A)
|
||||||
stringset_extend(g, ret, k, first_tail, h_follow_, &a);
|
stringset_extend(g, ret, k, first_tail, h_follow_, &a);
|
||||||
|
|
@ -478,9 +571,30 @@ const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HStringMap *h_predict(size_t k, HCFGrammar *g,
|
||||||
|
const HCFChoice *A, const HCFSequence *rhs)
|
||||||
|
{
|
||||||
|
HStringMap *ret = h_stringmap_new(g->arena);
|
||||||
|
|
||||||
|
// predict_k(A -> rhs) =
|
||||||
|
// { ab | a <- first_k(rhs), b <- follow_k(A), |ab|=k }
|
||||||
|
|
||||||
|
const HStringMap *first_rhs = h_first_seq(k, g, rhs->items);
|
||||||
|
|
||||||
|
// casting the const off of A below. note: stringset_extend does
|
||||||
|
// not touch this argument, only passes it through to h_follow
|
||||||
|
// in this case, which accepts it, once again, as const.
|
||||||
|
stringset_extend(g, ret, k, first_rhs, h_follow_, (HCFChoice **)&A);
|
||||||
|
|
||||||
|
// make sure there are only strings of length _exactly_ k
|
||||||
|
remove_all_shorter(k, ret);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
// add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret
|
// add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret
|
||||||
static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
|
static void stringset_extend(HCFGrammar *g, HStringMap *ret,
|
||||||
size_t k, const HCFStringMap *as,
|
size_t k, const HStringMap *as,
|
||||||
StringSetFun f, HCFChoice **tail)
|
StringSetFun f, HCFChoice **tail)
|
||||||
{
|
{
|
||||||
if(as->epsilon_branch) {
|
if(as->epsilon_branch) {
|
||||||
|
|
@ -503,13 +617,13 @@ static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
|
||||||
uint8_t c = key_char((HCharKey)hte->key);
|
uint8_t c = key_char((HCharKey)hte->key);
|
||||||
|
|
||||||
// follow the branch to find the set { a' | t a' <- as }
|
// follow the branch to find the set { a' | t a' <- as }
|
||||||
HCFStringMap *as_ = (HCFStringMap *)hte->value;
|
HStringMap *as_ = (HStringMap *)hte->value;
|
||||||
|
|
||||||
// now the elements of ret that begin with t are given by
|
// now the elements of ret that begin with t are given by
|
||||||
// t { a b | a <- as_, b <- f_l(tail), l=k-|a|-1 }
|
// t { a b | a <- as_, b <- f_l(tail), l=k-|a|-1 }
|
||||||
// so we can use recursion over k
|
// so we can use recursion over k
|
||||||
HCFStringMap *ret_ = h_stringmap_new(g->arena);
|
HStringMap *ret_ = h_stringmap_new(g->arena);
|
||||||
h_stringmap_put_char(ret, c, ret_);
|
h_stringmap_put_after(ret, c, ret_);
|
||||||
|
|
||||||
stringset_extend(g, ret_, k-1, as_, f, tail);
|
stringset_extend(g, ret_, k-1, as_, f, tail);
|
||||||
}
|
}
|
||||||
|
|
@ -517,7 +631,7 @@ static void stringset_extend(HCFGrammar *g, HCFStringMap *ret,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void pprint_char(FILE *f, char c)
|
void h_pprint_char(FILE *f, char c)
|
||||||
{
|
{
|
||||||
switch(c) {
|
switch(c) {
|
||||||
case '"': fputs("\\\"", f); break;
|
case '"': fputs("\\\"", f); break;
|
||||||
|
|
@ -541,7 +655,7 @@ static void pprint_charset_char(FILE *f, char c)
|
||||||
case '"': fputc(c, f); break;
|
case '"': fputc(c, f); break;
|
||||||
case '-': fputs("\\-", f); break;
|
case '-': fputs("\\-", f); break;
|
||||||
case ']': fputs("\\-", f); break;
|
case ']': fputs("\\-", f); break;
|
||||||
default: pprint_char(f, c);
|
default: h_pprint_char(f, c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -589,18 +703,18 @@ static HCFChoice **pprint_string(FILE *f, HCFChoice **x)
|
||||||
for(; *x; x++) {
|
for(; *x; x++) {
|
||||||
if((*x)->type != HCF_CHAR)
|
if((*x)->type != HCF_CHAR)
|
||||||
break;
|
break;
|
||||||
pprint_char(f, (*x)->chr);
|
h_pprint_char(f, (*x)->chr);
|
||||||
}
|
}
|
||||||
fputc('"', f);
|
fputc('"', f);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x)
|
void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x)
|
||||||
{
|
{
|
||||||
switch(x->type) {
|
switch(x->type) {
|
||||||
case HCF_CHAR:
|
case HCF_CHAR:
|
||||||
fputc('"', f);
|
fputc('"', f);
|
||||||
pprint_char(f, x->chr);
|
h_pprint_char(f, x->chr);
|
||||||
fputc('"', f);
|
fputc('"', f);
|
||||||
break;
|
break;
|
||||||
case HCF_END:
|
case HCF_END:
|
||||||
|
|
@ -614,31 +728,36 @@ static void pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq)
|
void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq)
|
||||||
{
|
{
|
||||||
HCFChoice **x = seq->items;
|
HCFChoice **x = seq->items;
|
||||||
|
|
||||||
if(*x == NULL) { // the empty sequence
|
if(*x == NULL) { // the empty sequence
|
||||||
fputs(" \"\"", f);
|
fputs("\"\"", f);
|
||||||
} else {
|
} else {
|
||||||
while(*x) {
|
while(*x) {
|
||||||
fputc(' ', f); // separator
|
if(x != seq->items) fputc(' ', f); // internal separator
|
||||||
|
|
||||||
if((*x)->type == HCF_CHAR) {
|
if((*x)->type == HCF_CHAR) {
|
||||||
// condense character strings
|
// condense character strings
|
||||||
x = pprint_string(f, x);
|
x = pprint_string(f, x);
|
||||||
} else {
|
} else {
|
||||||
pprint_symbol(f, g, *x);
|
h_pprint_symbol(f, g, *x);
|
||||||
x++;
|
x++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// adds some separators expected below
|
||||||
|
static void pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq)
|
||||||
|
{
|
||||||
|
fputc(' ', f);
|
||||||
|
h_pprint_sequence(f, g, seq);
|
||||||
fputc('\n', f);
|
fputc('\n', f);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
|
||||||
void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
|
|
||||||
int indent, int len)
|
int indent, int len)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
@ -709,7 +828,7 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in
|
||||||
|
|
||||||
a = hte->key; // production's left-hand symbol
|
a = hte->key; // production's left-hand symbol
|
||||||
|
|
||||||
pprint_symbol(file, g, a);
|
h_pprint_symbol(file, g, a);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -718,26 +837,44 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in
|
||||||
|
|
||||||
#define BUFSIZE 512
|
#define BUFSIZE 512
|
||||||
|
|
||||||
void pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n, const HCFStringMap *set)
|
static bool
|
||||||
|
pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep,
|
||||||
|
void (*valprint)(FILE *f, void *env, void *val), void *env,
|
||||||
|
const HStringMap *map)
|
||||||
{
|
{
|
||||||
assert(n < BUFSIZE-4);
|
assert(n < BUFSIZE-4);
|
||||||
|
|
||||||
if(set->epsilon_branch) {
|
if(map->epsilon_branch) {
|
||||||
if(!first) fputc(',', file); first=false;
|
if(!first) fputc(sep, file); first=false;
|
||||||
if(n==0)
|
if(n==0) {
|
||||||
fputs("''", file);
|
fputs("\"\"", file);
|
||||||
else
|
} else {
|
||||||
|
fputs("\"", file);
|
||||||
fwrite(prefix, 1, n, file);
|
fwrite(prefix, 1, n, file);
|
||||||
|
fputs("\"", file);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(set->end_branch) {
|
if(valprint) {
|
||||||
if(!first) fputc(',', file); first=false;
|
fputc(':', file);
|
||||||
fwrite(prefix, 1, n, file);
|
valprint(file, env, map->epsilon_branch);
|
||||||
fputc('$', file);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterate over set->char_branches
|
if(map->end_branch) {
|
||||||
HHashTable *ht = set->char_branches;
|
if(!first) fputs(",\"", file); first=false;
|
||||||
|
if(n>0) fputs("\"\"", file);
|
||||||
|
fwrite(prefix, 1, n, file);
|
||||||
|
if(n>0) fputs("\"\"", file);
|
||||||
|
fputs("$", file);
|
||||||
|
|
||||||
|
if(valprint) {
|
||||||
|
fputc(':', file);
|
||||||
|
valprint(file, env, map->end_branch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate over map->char_branches
|
||||||
|
HHashTable *ht = map->char_branches;
|
||||||
size_t i;
|
size_t i;
|
||||||
HHashTableEntry *hte;
|
HHashTableEntry *hte;
|
||||||
for(i=0; i < ht->capacity; i++) {
|
for(i=0; i < ht->capacity; i++) {
|
||||||
|
|
@ -745,7 +882,7 @@ void pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n, cons
|
||||||
if(hte->key == NULL)
|
if(hte->key == NULL)
|
||||||
continue;
|
continue;
|
||||||
uint8_t c = key_char((HCharKey)hte->key);
|
uint8_t c = key_char((HCharKey)hte->key);
|
||||||
HCFStringMap *ends = hte->value;
|
HStringMap *ends = hte->value;
|
||||||
|
|
||||||
size_t n_ = n;
|
size_t n_ = n;
|
||||||
switch(c) {
|
switch(c) {
|
||||||
|
|
@ -763,18 +900,28 @@ void pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n, cons
|
||||||
n_ += sprintf(prefix+n_, "\\x%.2X", c);
|
n_ += sprintf(prefix+n_, "\\x%.2X", c);
|
||||||
}
|
}
|
||||||
|
|
||||||
pprint_stringset_elems(file, first, prefix, n_, ends);
|
first = pprint_stringmap_elems(file, first, prefix, n_,
|
||||||
|
sep, valprint, env, ends);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return first;
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_pprint_stringset(FILE *file, const HCFGrammar *g, const HCFStringMap *set, int indent)
|
void h_pprint_stringmap(FILE *file, char sep,
|
||||||
|
void (*valprint)(FILE *f, void *env, void *val), void *env,
|
||||||
|
const HStringMap *map)
|
||||||
|
{
|
||||||
|
char buf[BUFSIZE];
|
||||||
|
pprint_stringmap_elems(file, true, buf, 0, sep, valprint, env, map);
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_pprint_stringset(FILE *file, const HStringMap *set, int indent)
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
for(j=0; j<indent; j++) fputc(' ', file);
|
for(j=0; j<indent; j++) fputc(' ', file);
|
||||||
|
|
||||||
char buf[BUFSIZE];
|
|
||||||
fputc('{', file);
|
fputc('{', file);
|
||||||
pprint_stringset_elems(file, true, buf, 0, set);
|
h_pprint_stringmap(file, ',', NULL, NULL, set);
|
||||||
fputs("}\n", file);
|
fputs("}\n", file);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ typedef struct HCFGrammar_ {
|
||||||
|
|
||||||
// constant set containing only the empty string.
|
// constant set containing only the empty string.
|
||||||
// this is only a member of HCFGrammar because it needs a pointer to arena.
|
// this is only a member of HCFGrammar because it needs a pointer to arena.
|
||||||
const struct HCFStringMap_ *singleton_epsilon;
|
const struct HStringMap_ *singleton_epsilon;
|
||||||
} HCFGrammar;
|
} HCFGrammar;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -32,21 +32,28 @@ static inline uint8_t key_char(HCharKey k) { return (0xFF & k); }
|
||||||
* input tokens.
|
* input tokens.
|
||||||
* Each path through the tree represents the string along its branches.
|
* Each path through the tree represents the string along its branches.
|
||||||
*/
|
*/
|
||||||
typedef struct HCFStringMap_ {
|
typedef struct HStringMap_ {
|
||||||
void *epsilon_branch; // points to leaf value
|
void *epsilon_branch; // points to leaf value
|
||||||
void *end_branch; // points to leaf value
|
void *end_branch; // points to leaf value
|
||||||
HHashTable *char_branches; // maps to inner nodes (HCFStringMaps)
|
HHashTable *char_branches; // maps to inner nodes (HStringMaps)
|
||||||
HArena *arena;
|
HArena *arena;
|
||||||
} HCFStringMap;
|
} HStringMap;
|
||||||
|
|
||||||
HCFStringMap *h_stringmap_new(HArena *a);
|
HStringMap *h_stringmap_new(HArena *a);
|
||||||
void h_stringmap_put_end(HCFStringMap *m, void *v);
|
void h_stringmap_put_end(HStringMap *m, void *v);
|
||||||
void h_stringmap_put_epsilon(HCFStringMap *m, void *v);
|
void h_stringmap_put_epsilon(HStringMap *m, void *v);
|
||||||
void h_stringmap_put_char(HCFStringMap *m, uint8_t c, void *v);
|
void h_stringmap_put_after(HStringMap *m, uint8_t c, HStringMap *ends);
|
||||||
void h_stringmap_update(HCFStringMap *m, const HCFStringMap *n);
|
void h_stringmap_put_char(HStringMap *m, uint8_t c, void *v);
|
||||||
void *h_stringmap_get(const HCFStringMap *m, const uint8_t *str, size_t n, bool end);
|
void h_stringmap_update(HStringMap *m, const HStringMap *n);
|
||||||
bool h_stringmap_present(const HCFStringMap *m, const uint8_t *str, size_t n, bool end);
|
void h_stringmap_replace(HStringMap *m, void *old, void *new);
|
||||||
bool h_stringmap_present_epsilon(const HCFStringMap *m);
|
void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end);
|
||||||
|
void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead);
|
||||||
|
bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end);
|
||||||
|
bool h_stringmap_present_epsilon(const HStringMap *m);
|
||||||
|
bool h_stringmap_empty(const HStringMap *m);
|
||||||
|
|
||||||
|
static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c)
|
||||||
|
{ return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
|
||||||
|
|
||||||
|
|
||||||
/* Convert 'parser' into CFG representation by desugaring and compiling the set
|
/* Convert 'parser' into CFG representation by desugaring and compiling the set
|
||||||
|
|
@ -54,6 +61,9 @@ bool h_stringmap_present_epsilon(const HCFStringMap *m);
|
||||||
* A NULL return means we are unable to represent the parser as a CFG.
|
* A NULL return means we are unable to represent the parser as a CFG.
|
||||||
*/
|
*/
|
||||||
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser);
|
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser);
|
||||||
|
HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *start);
|
||||||
|
|
||||||
|
HCFGrammar *h_cfgrammar_new(HAllocator *mm__);
|
||||||
|
|
||||||
/* Frees the given grammar and associated data.
|
/* Frees the given grammar and associated data.
|
||||||
* Does *not* free parsers' CFG forms as created by h_desugar.
|
* Does *not* free parsers' CFG forms as created by h_desugar.
|
||||||
|
|
@ -67,16 +77,28 @@ bool h_derives_epsilon(HCFGrammar *g, const HCFChoice *symbol);
|
||||||
bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s);
|
bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s);
|
||||||
|
|
||||||
/* Compute first_k set of symbol x. Memoized. */
|
/* Compute first_k set of symbol x. Memoized. */
|
||||||
const HCFStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x);
|
const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x);
|
||||||
|
|
||||||
/* Compute first_k set of sentential form s. s NULL-terminated. */
|
/* Compute first_k set of sentential form s. s NULL-terminated. */
|
||||||
const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s);
|
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s);
|
||||||
|
|
||||||
/* Compute follow_k set of symbol x. Memoized. */
|
/* Compute follow_k set of symbol x. Memoized. */
|
||||||
const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x);
|
const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x);
|
||||||
|
|
||||||
|
/* Compute the predict_k set of production "A -> rhs".
|
||||||
|
* Always returns a newly-allocated HStringMap.
|
||||||
|
*/
|
||||||
|
HStringMap *h_predict(size_t k, HCFGrammar *g,
|
||||||
|
const HCFChoice *A, const HCFSequence *rhs);
|
||||||
|
|
||||||
|
|
||||||
/* Pretty-printers for grammars and associated data. */
|
/* Pretty-printers for grammars and associated data. */
|
||||||
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent);
|
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent);
|
||||||
|
void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq);
|
||||||
|
void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x);
|
||||||
void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent);
|
void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent);
|
||||||
void h_pprint_stringset(FILE *file, const HCFGrammar *g, const HCFStringMap *set, int indent);
|
void h_pprint_stringset(FILE *file, const HStringMap *set, int indent);
|
||||||
|
void h_pprint_stringmap(FILE *file, char sep,
|
||||||
|
void (*valprint)(FILE *f, void *env, void *val), void *env,
|
||||||
|
const HStringMap *map);
|
||||||
|
void h_pprint_char(FILE *file, char c);
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,16 @@ HSlist* h_slist_copy(HSlist *slist) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// like h_slist_pop, but does not deallocate the head node
|
||||||
|
void* h_slist_drop(HSlist *slist) {
|
||||||
|
HSlistNode *head = slist->head;
|
||||||
|
if (!head)
|
||||||
|
return NULL;
|
||||||
|
void* ret = head->elem;
|
||||||
|
slist->head = head->next;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
void* h_slist_pop(HSlist *slist) {
|
void* h_slist_pop(HSlist *slist) {
|
||||||
HSlistNode *head = slist->head;
|
HSlistNode *head = slist->head;
|
||||||
if (!head)
|
if (!head)
|
||||||
|
|
@ -147,6 +157,8 @@ void* h_hashtable_get(const HHashTable* ht, const void* key) {
|
||||||
for (hte = &ht->contents[hashval & (ht->capacity - 1)];
|
for (hte = &ht->contents[hashval & (ht->capacity - 1)];
|
||||||
hte != NULL;
|
hte != NULL;
|
||||||
hte = hte->next) {
|
hte = hte->next) {
|
||||||
|
if (hte->key == NULL)
|
||||||
|
continue;
|
||||||
if (hte->hashval != hashval)
|
if (hte->hashval != hashval)
|
||||||
continue;
|
continue;
|
||||||
if (ht->equalFunc(key, hte->key))
|
if (ht->equalFunc(key, hte->key))
|
||||||
|
|
@ -201,7 +213,7 @@ void h_hashtable_update(HHashTable *dst, const HHashTable *src) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_hashtable_merge(void *(*combine)(void *v1, void *v2),
|
void h_hashtable_merge(void *(*combine)(void *v1, const void *v2),
|
||||||
HHashTable *dst, const HHashTable *src) {
|
HHashTable *dst, const HHashTable *src) {
|
||||||
size_t i;
|
size_t i;
|
||||||
HHashTableEntry *hte;
|
HHashTableEntry *hte;
|
||||||
|
|
@ -209,13 +221,9 @@ void h_hashtable_merge(void *(*combine)(void *v1, void *v2),
|
||||||
for(hte = &src->contents[i]; hte; hte = hte->next) {
|
for(hte = &src->contents[i]; hte; hte = hte->next) {
|
||||||
if(hte->key == NULL)
|
if(hte->key == NULL)
|
||||||
continue;
|
continue;
|
||||||
void *oldvalue = h_hashtable_get(dst, hte->key);
|
void *dstvalue = h_hashtable_get(dst, hte->key);
|
||||||
void *newvalue;
|
void *srcvalue = hte->value;
|
||||||
if(oldvalue)
|
h_hashtable_put(dst, hte->key, combine(dstvalue, srcvalue));
|
||||||
newvalue = combine(oldvalue, hte->value);
|
|
||||||
else
|
|
||||||
newvalue = hte->value;
|
|
||||||
h_hashtable_put(dst, hte->key, newvalue);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -236,6 +244,7 @@ int h_hashtable_present(const HHashTable* ht, const void* key) {
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_hashtable_del(HHashTable* ht, const void* key) {
|
void h_hashtable_del(HHashTable* ht, const void* key) {
|
||||||
HHashValue hashval = ht->hashFunc(key);
|
HHashValue hashval = ht->hashFunc(key);
|
||||||
#ifdef CONSISTENCY_CHECK
|
#ifdef CONSISTENCY_CHECK
|
||||||
|
|
@ -261,6 +270,7 @@ void h_hashtable_del(HHashTable* ht, const void* key) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_hashtable_free(HHashTable* ht) {
|
void h_hashtable_free(HHashTable* ht) {
|
||||||
for (size_t i = 0; i < ht->capacity; i++) {
|
for (size_t i = 0; i < ht->capacity; i++) {
|
||||||
HHashTableEntry *hten, *hte = &ht->contents[i];
|
HHashTableEntry *hten, *hte = &ht->contents[i];
|
||||||
|
|
@ -276,15 +286,76 @@ void h_hashtable_free(HHashTable* ht) {
|
||||||
h_arena_free(ht->arena, ht->contents);
|
h_arena_free(ht->arena, ht->contents);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// helper for hte_equal
|
||||||
|
static bool hte_same_length(HHashTableEntry *xs, HHashTableEntry *ys) {
|
||||||
|
while(xs && ys) {
|
||||||
|
xs=xs->next;
|
||||||
|
ys=ys->next;
|
||||||
|
// skip NULL keys (= element not present)
|
||||||
|
while(xs && xs->key == NULL) xs=xs->next;
|
||||||
|
while(ys && ys->key == NULL) ys=ys->next;
|
||||||
|
}
|
||||||
|
return (xs == ys); // both NULL
|
||||||
|
}
|
||||||
|
|
||||||
|
// helper for hte_equal: are all elements of xs present in ys?
|
||||||
|
static bool hte_subset(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys)
|
||||||
|
{
|
||||||
|
for(; xs; xs=xs->next) {
|
||||||
|
if(xs->key == NULL) continue; // element not present
|
||||||
|
|
||||||
|
HHashTableEntry *hte;
|
||||||
|
for(hte=ys; hte; hte=hte->next) {
|
||||||
|
if(hte->key == xs->key) break; // assume an element is equal to itself
|
||||||
|
if(hte->hashval != xs->hashval) continue; // shortcut
|
||||||
|
if(eq(hte->key, xs->key)) break;
|
||||||
|
}
|
||||||
|
if(hte == NULL) return false; // element not found
|
||||||
|
}
|
||||||
|
return true; // all found
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare two lists of HHashTableEntries
|
||||||
|
static inline bool hte_equal(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys) {
|
||||||
|
return (hte_same_length(xs, ys) && hte_subset(eq, xs, ys));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set equality of HHashSets.
|
||||||
|
* Obviously, 'a' and 'b' must use the same equality function.
|
||||||
|
* Not strictly necessary, but we also assume the same hash function.
|
||||||
|
*/
|
||||||
|
bool h_hashset_equal(const HHashSet *a, const HHashSet *b) {
|
||||||
|
if(a->capacity == b->capacity) {
|
||||||
|
// iterate over the buckets in parallel
|
||||||
|
for(size_t i=0; i < a->capacity; i++) {
|
||||||
|
if(!hte_equal(a->equalFunc, &a->contents[i], &b->contents[i]))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assert_message(0, "h_hashset_equal called on sets of different capacity");
|
||||||
|
// TODO implement general case
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool h_eq_ptr(const void *p, const void *q) {
|
bool h_eq_ptr(const void *p, const void *q) {
|
||||||
return (p==q);
|
return (p==q);
|
||||||
}
|
}
|
||||||
|
|
||||||
HHashValue h_hash_ptr(const void *p) {
|
HHashValue h_hash_ptr(const void *p) {
|
||||||
// XXX just djbhash it
|
// XXX just djbhash it? it does make the benchmark ~7% slower.
|
||||||
|
//return h_djbhash((const uint8_t *)&p, sizeof(void *));
|
||||||
return (uintptr_t)p >> 4;
|
return (uintptr_t)p >> 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t h_djbhash(const uint8_t *buf, size_t len) {
|
||||||
|
uint32_t hash = 5381;
|
||||||
|
while (len--) {
|
||||||
|
hash = hash * 33 + *buf++;
|
||||||
|
}
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
HSArray *h_sarray_new(HAllocator *mm__, size_t size) {
|
HSArray *h_sarray_new(HAllocator *mm__, size_t size) {
|
||||||
HSArray *ret = h_new(HSArray, 1);
|
HSArray *ret = h_new(HSArray, 1);
|
||||||
ret->capacity = size;
|
ret->capacity = size;
|
||||||
|
|
|
||||||
|
|
@ -8,9 +8,12 @@ HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser) {
|
||||||
if (nstk__ == NULL) {
|
if (nstk__ == NULL) {
|
||||||
nstk__ = h_cfstack_new(mm__);
|
nstk__ = h_cfstack_new(mm__);
|
||||||
}
|
}
|
||||||
|
if(nstk__->prealloc == NULL)
|
||||||
|
nstk__->prealloc = h_new(HCFChoice, 1);
|
||||||
|
// we're going to do something naughty and cast away the const to memoize
|
||||||
assert(parser->vtable->desugar != NULL);
|
assert(parser->vtable->desugar != NULL);
|
||||||
|
((HParser *)parser)->desugared = nstk__->prealloc;
|
||||||
parser->vtable->desugar(mm__, nstk__, parser->env);
|
parser->vtable->desugar(mm__, nstk__, parser->env);
|
||||||
((HParser *)parser)->desugared = nstk__->last_completed;
|
|
||||||
if (stk__ == NULL)
|
if (stk__ == NULL)
|
||||||
h_cfstack_free(mm__, nstk__);
|
h_cfstack_free(mm__, nstk__);
|
||||||
} else if (stk__ != NULL) {
|
} else if (stk__ != NULL) {
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,8 @@ static HParserBackendVTable *backends[PB_MAX + 1] = {
|
||||||
&h__packrat_backend_vtable,
|
&h__packrat_backend_vtable,
|
||||||
&h__regex_backend_vtable,
|
&h__regex_backend_vtable,
|
||||||
&h__llk_backend_vtable,
|
&h__llk_backend_vtable,
|
||||||
|
&h__lalr_backend_vtable,
|
||||||
|
&h__glr_backend_vtable,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
10
src/hammer.h
10
src/hammer.h
|
|
@ -34,11 +34,11 @@ typedef struct HParseState_ HParseState;
|
||||||
typedef enum HParserBackend_ {
|
typedef enum HParserBackend_ {
|
||||||
PB_MIN = 0,
|
PB_MIN = 0,
|
||||||
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
|
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
|
||||||
PB_REGULAR, //
|
PB_REGULAR,
|
||||||
PB_LLk, //
|
PB_LLk,
|
||||||
PB_LALR, // Not Implemented
|
PB_LALR,
|
||||||
PB_GLR, // Not Implemented
|
PB_GLR,
|
||||||
PB_MAX = PB_LLk
|
PB_MAX = PB_GLR
|
||||||
} HParserBackend;
|
} HParserBackend;
|
||||||
|
|
||||||
typedef enum HTokenType_ {
|
typedef enum HTokenType_ {
|
||||||
|
|
|
||||||
|
|
@ -279,6 +279,8 @@ struct HBitWriter_ {
|
||||||
// Backends {{{
|
// Backends {{{
|
||||||
extern HParserBackendVTable h__packrat_backend_vtable;
|
extern HParserBackendVTable h__packrat_backend_vtable;
|
||||||
extern HParserBackendVTable h__llk_backend_vtable;
|
extern HParserBackendVTable h__llk_backend_vtable;
|
||||||
|
extern HParserBackendVTable h__lalr_backend_vtable;
|
||||||
|
extern HParserBackendVTable h__glr_backend_vtable;
|
||||||
// }}}
|
// }}}
|
||||||
|
|
||||||
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
|
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
|
||||||
|
|
@ -306,6 +308,7 @@ void h_carray_append(HCountedArray *array, void* item);
|
||||||
HSlist* h_slist_new(HArena *arena);
|
HSlist* h_slist_new(HArena *arena);
|
||||||
HSlist* h_slist_copy(HSlist *slist);
|
HSlist* h_slist_copy(HSlist *slist);
|
||||||
void* h_slist_pop(HSlist *slist);
|
void* h_slist_pop(HSlist *slist);
|
||||||
|
void* h_slist_drop(HSlist *slist);
|
||||||
void h_slist_push(HSlist *slist, void* item);
|
void h_slist_push(HSlist *slist, void* item);
|
||||||
bool h_slist_find(HSlist *slist, const void* item);
|
bool h_slist_find(HSlist *slist, const void* item);
|
||||||
HSlist* h_slist_remove_all(HSlist *slist, const void* item);
|
HSlist* h_slist_remove_all(HSlist *slist, const void* item);
|
||||||
|
|
@ -316,7 +319,7 @@ HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashF
|
||||||
void* h_hashtable_get(const HHashTable* ht, const void* key);
|
void* h_hashtable_get(const HHashTable* ht, const void* key);
|
||||||
void h_hashtable_put(HHashTable* ht, const void* key, void* value);
|
void h_hashtable_put(HHashTable* ht, const void* key, void* value);
|
||||||
void h_hashtable_update(HHashTable* dst, const HHashTable *src);
|
void h_hashtable_update(HHashTable* dst, const HHashTable *src);
|
||||||
void h_hashtable_merge(void *(*combine)(void *v1, void *v2),
|
void h_hashtable_merge(void *(*combine)(void *v1, const void *v2),
|
||||||
HHashTable *dst, const HHashTable *src);
|
HHashTable *dst, const HHashTable *src);
|
||||||
int h_hashtable_present(const HHashTable* ht, const void* key);
|
int h_hashtable_present(const HHashTable* ht, const void* key);
|
||||||
void h_hashtable_del(HHashTable* ht, const void* key);
|
void h_hashtable_del(HHashTable* ht, const void* key);
|
||||||
|
|
@ -331,9 +334,11 @@ typedef HHashTable HHashSet;
|
||||||
#define h_hashset_empty(ht) h_hashtable_empty(ht)
|
#define h_hashset_empty(ht) h_hashtable_empty(ht)
|
||||||
#define h_hashset_del(ht,el) h_hashtable_del(ht,el)
|
#define h_hashset_del(ht,el) h_hashtable_del(ht,el)
|
||||||
#define h_hashset_free(ht) h_hashtable_free(ht)
|
#define h_hashset_free(ht) h_hashtable_free(ht)
|
||||||
|
bool h_hashset_equal(const HHashSet *a, const HHashSet *b);
|
||||||
|
|
||||||
bool h_eq_ptr(const void *p, const void *q);
|
bool h_eq_ptr(const void *p, const void *q);
|
||||||
HHashValue h_hash_ptr(const void *p);
|
HHashValue h_hash_ptr(const void *p);
|
||||||
|
uint32_t h_djbhash(const uint8_t *buf, size_t len);
|
||||||
|
|
||||||
typedef struct HCFSequence_ HCFSequence;
|
typedef struct HCFSequence_ HCFSequence;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ static bool indirect_isValidCF(void *env) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void desugar_indirect(HAllocator *mm__, HCFStack *stk__, void *env) {
|
static void desugar_indirect(HAllocator *mm__, HCFStack *stk__, void *env) {
|
||||||
HCFS_DESUGAR( (HParser*)env );
|
HCFS_DESUGAR( (HParser *)env );
|
||||||
}
|
}
|
||||||
|
|
||||||
static const HParserVtable indirect_vt = {
|
static const HParserVtable indirect_vt = {
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ static const HParserVtable unimplemented_vt = {
|
||||||
.parse = parse_unimplemented,
|
.parse = parse_unimplemented,
|
||||||
.isValidRegular = h_false,
|
.isValidRegular = h_false,
|
||||||
.isValidCF = h_false,
|
.isValidCF = h_false,
|
||||||
|
.desugar = NULL,
|
||||||
.compile_to_rvm = h_not_regular,
|
.compile_to_rvm = h_not_regular,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
110
src/t_parser.c
110
src/t_parser.c
|
|
@ -405,9 +405,9 @@ static void test_not(gconstpointer backend) {
|
||||||
g_check_parse_ok(not_2, (HParserBackend)GPOINTER_TO_INT(backend), "a+b", 3, "(u0x61 (u0x2b) u0x62)");
|
g_check_parse_ok(not_2, (HParserBackend)GPOINTER_TO_INT(backend), "a+b", 3, "(u0x61 (u0x2b) u0x62)");
|
||||||
g_check_parse_ok(not_2, (HParserBackend)GPOINTER_TO_INT(backend), "a++b", 4, "(u0x61 <2b.2b> u0x62)");
|
g_check_parse_ok(not_2, (HParserBackend)GPOINTER_TO_INT(backend), "a++b", 4, "(u0x61 <2b.2b> u0x62)");
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
static void test_leftrec(gconstpointer backend) {
|
static void test_leftrec(gconstpointer backend) {
|
||||||
const HParser *a_ = h_ch('a');
|
HParser *a_ = h_ch('a');
|
||||||
|
|
||||||
HParser *lr_ = h_indirect();
|
HParser *lr_ = h_indirect();
|
||||||
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL));
|
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL));
|
||||||
|
|
@ -416,7 +416,31 @@ static void test_leftrec(gconstpointer backend) {
|
||||||
g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)");
|
g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)");
|
||||||
g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)");
|
g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)");
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
static void test_rightrec(gconstpointer backend) {
|
||||||
|
HParser *a_ = h_ch('a');
|
||||||
|
|
||||||
|
HParser *rr_ = h_indirect();
|
||||||
|
h_bind_indirect(rr_, h_choice(h_sequence(a_, rr_, NULL), h_epsilon_p(), NULL));
|
||||||
|
|
||||||
|
g_check_parse_ok(rr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "(u0x61)");
|
||||||
|
g_check_parse_ok(rr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 (u0x61))");
|
||||||
|
g_check_parse_ok(rr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(u0x61 (u0x61 (u0x61)))");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_ambiguous(gconstpointer backend) {
|
||||||
|
HParser *d_ = h_ch('d');
|
||||||
|
HParser *p_ = h_ch('+');
|
||||||
|
HParser *E_ = h_indirect();
|
||||||
|
h_bind_indirect(E_, h_choice(h_sequence(E_, p_, E_, NULL), d_, NULL));
|
||||||
|
HParser *expr_ = h_action(E_, h_act_flatten);
|
||||||
|
|
||||||
|
g_check_parse_ok(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1, "(u0x64)");
|
||||||
|
g_check_parse_ok(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d+d", 3, "(u0x64 u0x2b u0x64)");
|
||||||
|
g_check_parse_ok(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d+d+d", 5, "(u0x64 u0x2b u0x64 u0x2b u0x64)");
|
||||||
|
g_check_parse_failed(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d+", 2);
|
||||||
|
}
|
||||||
|
|
||||||
void register_parser_tests(void) {
|
void register_parser_tests(void) {
|
||||||
g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token);
|
g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token);
|
||||||
g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch);
|
g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch);
|
||||||
|
|
@ -460,6 +484,7 @@ void register_parser_tests(void) {
|
||||||
g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not);
|
g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not);
|
||||||
g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore);
|
g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore);
|
||||||
// g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec);
|
// g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec);
|
||||||
|
g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec);
|
||||||
|
|
||||||
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
|
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
|
||||||
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
|
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
|
||||||
|
|
@ -496,6 +521,8 @@ void register_parser_tests(void) {
|
||||||
g_test_add_data_func("/core/parser/llk/epsilon_p", GINT_TO_POINTER(PB_LLk), test_epsilon_p);
|
g_test_add_data_func("/core/parser/llk/epsilon_p", GINT_TO_POINTER(PB_LLk), test_epsilon_p);
|
||||||
g_test_add_data_func("/core/parser/llk/attr_bool", GINT_TO_POINTER(PB_LLk), test_attr_bool);
|
g_test_add_data_func("/core/parser/llk/attr_bool", GINT_TO_POINTER(PB_LLk), test_attr_bool);
|
||||||
g_test_add_data_func("/core/parser/llk/ignore", GINT_TO_POINTER(PB_LLk), test_ignore);
|
g_test_add_data_func("/core/parser/llk/ignore", GINT_TO_POINTER(PB_LLk), test_ignore);
|
||||||
|
//g_test_add_data_func("/core/parser/llk/leftrec", GINT_TO_POINTER(PB_LLk), test_leftrec);
|
||||||
|
g_test_add_data_func("/core/parser/llk/rightrec", GINT_TO_POINTER(PB_LLk), test_rightrec);
|
||||||
|
|
||||||
g_test_add_data_func("/core/parser/regex/token", GINT_TO_POINTER(PB_REGULAR), test_token);
|
g_test_add_data_func("/core/parser/regex/token", GINT_TO_POINTER(PB_REGULAR), test_token);
|
||||||
g_test_add_data_func("/core/parser/regex/ch", GINT_TO_POINTER(PB_REGULAR), test_ch);
|
g_test_add_data_func("/core/parser/regex/ch", GINT_TO_POINTER(PB_REGULAR), test_ch);
|
||||||
|
|
@ -533,4 +560,81 @@ void register_parser_tests(void) {
|
||||||
g_test_add_data_func("/core/parser/regex/epsilon_p", GINT_TO_POINTER(PB_REGULAR), test_epsilon_p);
|
g_test_add_data_func("/core/parser/regex/epsilon_p", GINT_TO_POINTER(PB_REGULAR), test_epsilon_p);
|
||||||
g_test_add_data_func("/core/parser/regex/attr_bool", GINT_TO_POINTER(PB_REGULAR), test_attr_bool);
|
g_test_add_data_func("/core/parser/regex/attr_bool", GINT_TO_POINTER(PB_REGULAR), test_attr_bool);
|
||||||
g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore);
|
g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore);
|
||||||
|
|
||||||
|
g_test_add_data_func("/core/parser/lalr/token", GINT_TO_POINTER(PB_LALR), test_token);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/ch", GINT_TO_POINTER(PB_LALR), test_ch);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/ch_range", GINT_TO_POINTER(PB_LALR), test_ch_range);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/int64", GINT_TO_POINTER(PB_LALR), test_int64);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/int32", GINT_TO_POINTER(PB_LALR), test_int32);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/int16", GINT_TO_POINTER(PB_LALR), test_int16);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/int8", GINT_TO_POINTER(PB_LALR), test_int8);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/uint64", GINT_TO_POINTER(PB_LALR), test_uint64);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/uint32", GINT_TO_POINTER(PB_LALR), test_uint32);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/uint16", GINT_TO_POINTER(PB_LALR), test_uint16);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/uint8", GINT_TO_POINTER(PB_LALR), test_uint8);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/int_range", GINT_TO_POINTER(PB_LALR), test_int_range);
|
||||||
|
#if 0
|
||||||
|
g_test_add_data_func("/core/parser/lalr/float64", GINT_TO_POINTER(PB_LALR), test_float64);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/float32", GINT_TO_POINTER(PB_LALR), test_float32);
|
||||||
|
#endif
|
||||||
|
g_test_add_data_func("/core/parser/lalr/whitespace", GINT_TO_POINTER(PB_LALR), test_whitespace);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/left", GINT_TO_POINTER(PB_LALR), test_left);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/right", GINT_TO_POINTER(PB_LALR), test_right);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/middle", GINT_TO_POINTER(PB_LALR), test_middle);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/action", GINT_TO_POINTER(PB_LALR), test_action);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/in", GINT_TO_POINTER(PB_LALR), test_in);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/not_in", GINT_TO_POINTER(PB_LALR), test_not_in);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/end_p", GINT_TO_POINTER(PB_LALR), test_end_p);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/nothing_p", GINT_TO_POINTER(PB_LALR), test_nothing_p);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/sequence", GINT_TO_POINTER(PB_LALR), test_sequence);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/choice", GINT_TO_POINTER(PB_LALR), test_choice);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/many", GINT_TO_POINTER(PB_LALR), test_many);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/many1", GINT_TO_POINTER(PB_LALR), test_many1);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/optional", GINT_TO_POINTER(PB_LALR), test_optional);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/sepBy", GINT_TO_POINTER(PB_LALR), test_sepBy);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/sepBy1", GINT_TO_POINTER(PB_LALR), test_sepBy1);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/epsilon_p", GINT_TO_POINTER(PB_LALR), test_epsilon_p);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/attr_bool", GINT_TO_POINTER(PB_LALR), test_attr_bool);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/ignore", GINT_TO_POINTER(PB_LALR), test_ignore);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/leftrec", GINT_TO_POINTER(PB_LALR), test_leftrec);
|
||||||
|
g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec);
|
||||||
|
|
||||||
|
g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token);
|
||||||
|
g_test_add_data_func("/core/parser/glr/ch", GINT_TO_POINTER(PB_GLR), test_ch);
|
||||||
|
g_test_add_data_func("/core/parser/glr/ch_range", GINT_TO_POINTER(PB_GLR), test_ch_range);
|
||||||
|
g_test_add_data_func("/core/parser/glr/int64", GINT_TO_POINTER(PB_GLR), test_int64);
|
||||||
|
g_test_add_data_func("/core/parser/glr/int32", GINT_TO_POINTER(PB_GLR), test_int32);
|
||||||
|
g_test_add_data_func("/core/parser/glr/int16", GINT_TO_POINTER(PB_GLR), test_int16);
|
||||||
|
g_test_add_data_func("/core/parser/glr/int8", GINT_TO_POINTER(PB_GLR), test_int8);
|
||||||
|
g_test_add_data_func("/core/parser/glr/uint64", GINT_TO_POINTER(PB_GLR), test_uint64);
|
||||||
|
g_test_add_data_func("/core/parser/glr/uint32", GINT_TO_POINTER(PB_GLR), test_uint32);
|
||||||
|
g_test_add_data_func("/core/parser/glr/uint16", GINT_TO_POINTER(PB_GLR), test_uint16);
|
||||||
|
g_test_add_data_func("/core/parser/glr/uint8", GINT_TO_POINTER(PB_GLR), test_uint8);
|
||||||
|
g_test_add_data_func("/core/parser/glr/int_range", GINT_TO_POINTER(PB_GLR), test_int_range);
|
||||||
|
#if 0
|
||||||
|
g_test_add_data_func("/core/parser/glr/float64", GINT_TO_POINTER(PB_GLR), test_float64);
|
||||||
|
g_test_add_data_func("/core/parser/glr/float32", GINT_TO_POINTER(PB_GLR), test_float32);
|
||||||
|
#endif
|
||||||
|
g_test_add_data_func("/core/parser/glr/whitespace", GINT_TO_POINTER(PB_GLR), test_whitespace);
|
||||||
|
g_test_add_data_func("/core/parser/glr/left", GINT_TO_POINTER(PB_GLR), test_left);
|
||||||
|
g_test_add_data_func("/core/parser/glr/right", GINT_TO_POINTER(PB_GLR), test_right);
|
||||||
|
g_test_add_data_func("/core/parser/glr/middle", GINT_TO_POINTER(PB_GLR), test_middle);
|
||||||
|
g_test_add_data_func("/core/parser/glr/action", GINT_TO_POINTER(PB_GLR), test_action);
|
||||||
|
g_test_add_data_func("/core/parser/glr/in", GINT_TO_POINTER(PB_GLR), test_in);
|
||||||
|
g_test_add_data_func("/core/parser/glr/not_in", GINT_TO_POINTER(PB_GLR), test_not_in);
|
||||||
|
g_test_add_data_func("/core/parser/glr/end_p", GINT_TO_POINTER(PB_GLR), test_end_p);
|
||||||
|
g_test_add_data_func("/core/parser/glr/nothing_p", GINT_TO_POINTER(PB_GLR), test_nothing_p);
|
||||||
|
g_test_add_data_func("/core/parser/glr/sequence", GINT_TO_POINTER(PB_GLR), test_sequence);
|
||||||
|
g_test_add_data_func("/core/parser/glr/choice", GINT_TO_POINTER(PB_GLR), test_choice);
|
||||||
|
g_test_add_data_func("/core/parser/glr/many", GINT_TO_POINTER(PB_GLR), test_many);
|
||||||
|
g_test_add_data_func("/core/parser/glr/many1", GINT_TO_POINTER(PB_GLR), test_many1);
|
||||||
|
g_test_add_data_func("/core/parser/glr/optional", GINT_TO_POINTER(PB_GLR), test_optional);
|
||||||
|
g_test_add_data_func("/core/parser/glr/sepBy", GINT_TO_POINTER(PB_GLR), test_sepBy);
|
||||||
|
g_test_add_data_func("/core/parser/glr/sepBy1", GINT_TO_POINTER(PB_GLR), test_sepBy1);
|
||||||
|
g_test_add_data_func("/core/parser/glr/epsilon_p", GINT_TO_POINTER(PB_GLR), test_epsilon_p);
|
||||||
|
g_test_add_data_func("/core/parser/glr/attr_bool", GINT_TO_POINTER(PB_GLR), test_attr_bool);
|
||||||
|
g_test_add_data_func("/core/parser/glr/ignore", GINT_TO_POINTER(PB_GLR), test_ignore);
|
||||||
|
g_test_add_data_func("/core/parser/glr/leftrec", GINT_TO_POINTER(PB_GLR), test_leftrec);
|
||||||
|
g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec);
|
||||||
|
g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -153,7 +153,7 @@
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#define g_check_stringmap_absent(table, key) do { \
|
#define g_check_stringmap_absent(table, key) do { \
|
||||||
bool end = (key[strlen(key)-2] == '$'); \
|
bool end = (key[strlen(key)-1] == '$'); \
|
||||||
if(h_stringmap_present(table, (uint8_t *)key, strlen(key), end)) { \
|
if(h_stringmap_present(table, (uint8_t *)key, strlen(key), end)) { \
|
||||||
g_test_message("Check failed: \"%s\" shouldn't have been in map, but was", key); \
|
g_test_message("Check failed: \"%s\" shouldn't have been in map, but was", key); \
|
||||||
g_test_fail(); \
|
g_test_fail(); \
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue