Merge remote-tracking branch 'tq/master' into LL such that it compiles

Conflicts:
	src/Makefile
	src/backends/packrat.c
	src/compile.c
	src/hammer.h
	src/internal.h
	src/parsers/action.c
	src/parsers/and.c
	src/parsers/attr_bool.c
	src/parsers/bits.c
	src/parsers/butnot.c
	src/parsers/ch.c
	src/parsers/charset.c
	src/parsers/choice.c
	src/parsers/difference.c
	src/parsers/end.c
	src/parsers/epsilon.c
	src/parsers/ignore.c
	src/parsers/ignoreseq.c
	src/parsers/indirect.c
	src/parsers/int_range.c
	src/parsers/many.c
	src/parsers/not.c
	src/parsers/nothing.c
	src/parsers/optional.c
	src/parsers/sequence.c
	src/parsers/token.c
	src/parsers/unimplemented.c
	src/parsers/whitespace.c
	src/parsers/xor.c
This commit is contained in:
Sven M. Hallberg 2013-05-11 19:04:59 +02:00
commit c64a4e435e
46 changed files with 1289 additions and 263 deletions

4
.gitignore vendored
View file

@ -8,3 +8,7 @@ examples/base64
TAGS
*.swp
*.swo
\#*
.*
docs/milestone2.dot.pdf
*.dot.pdf

View file

@ -17,6 +17,9 @@ CONFIG_VARS= INCLUDE_TESTS
test: src/test_suite
$<
examples/all: src/all
examples/compile: src/compile
define SUBDIR_TEMPLATE
$(1)/%:
$$(MAKE) -C $(1) $$*

View file

@ -6,7 +6,7 @@ endif
include $(TOPLEVEL)/config.mk
TEST_CFLAGS = $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS
TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0)
TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0) -lrt
CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes
LDFLAGS :=

36
docs/milestone2.dot Normal file
View file

@ -0,0 +1,36 @@
digraph {
graph [rankdir=LR];
subgraph complete {
node [color="gray",fontcolor="gray"];
glue;
regex_svm;
regex_rvm;
desugaring; // Needs merged.
}
/* The end result of the milestone, along with the subtasks listed */
milestone2 [color="green",style="filled"];
llk -> milestone2;
lr -> milestone2;
lalr8_gen -> lr; // Generate parse tables for LALR(8)
glr_gen -> lr; // Generate parse tables for GLR
llk_gen -> llk; // Generate parse tables for LL(k)
lr_driver -> lr; // Write driver for all LR-type algs; analagous to SVM and RVM implementations
llk_driver -> llk; // Write driver for LL(k)
regex -> milestone2;
glue -> milestone2;
tests -> milestone2;
regex_gen -> regex; // should be mostly done; the rest is concurrent with regex_svm_actions
regex_driver -> regex;
regex_svm -> regex_driver;
regex_rvm -> regex_driver;
regex_svm_actions -> regex_driver; // 1 for each way that an HParsedToken can be extracted from the stack.
/*
*
*/
desugaring -> llk_gen;
desugaring -> lalr8_gen;
desugaring -> glr_gen;
}

65
docs/milestone3.dot Normal file
View file

@ -0,0 +1,65 @@
digraph {
graph [rankdir=LR];
subgraph complete {
node [color="gray",fontcolor="gray"];
}
subgraph groups {
node [color="blue",fontcolor="blue"];
cpp;
python;
ruby;
go;
php;
dotnet;
}
milestone3 [color="green",style="filled"];
function_desc_fmt -> function_descs;
function_desc_fmt -> binding_generator;
binding_generator -> cpp_gen;
binding_generator -> python_gen;
binding_generator -> ruby_gen;
binding_generator -> go_gen;
binding_generator -> php_gen;
binding_generator -> dotnet_gen;
function_descs -> cpp_gen;
function_descs -> python_gen;
function_descs -> ruby_gen;
function_descs -> go_gen;
function_descs -> php_gen;
function_descs -> dotnet_gen;
// Plugins to generate a type of code
cpp_gen -> cpp;
python_gen -> python;
ruby_gen -> ruby;
go_gen -> go;
php_gen -> php;
dotnet_gen -> dotnet;
// base code... developed concurrently with _gen's
cpp_base -> cpp;
python_base -> python;
ruby_base -> ruby;
go_base -> go;
php_base -> php;
dotnet_base -> dotnet;
// Bindings for various languages. These are just groupings.
cpp -> milestone3;
python -> milestone3;
ruby -> milestone3;
go -> milestone3;
php -> milestone3;
dotnet -> milestone3;
}

17
docs/rvm_sample_input.rvm Normal file
View file

@ -0,0 +1,17 @@
+C
int foo() {
return 42;
}
+SVM /svm/simple
@input ""
@output "()"
0 ACCEPT
+SVM /svm/string
@input "quux"
@outut "(<5555>)"
1 MARK
2 CAPTURE
2 ACCEPT

View file

@ -27,7 +27,8 @@ PARSERS := \
BACKENDS := \
packrat \
ll
llk \
regex
HAMMER_PARTS := \
bitreader.o \
@ -39,7 +40,6 @@ HAMMER_PARTS := \
datastructures.o \
system_allocator.o \
benchmark.o \
compile.o \
cfgrammar.o \
$(PARSERS:%=parsers/%.o) \
$(BACKENDS:%=backends/%.o)

View file

@ -19,6 +19,7 @@
#define HAMMER_ALLOCATOR__H__
#include <sys/types.h>
// TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer.
typedef struct HAllocator_ {
void* (*alloc)(struct HAllocator_* allocator, size_t size);
void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size);

View file

@ -3,22 +3,23 @@
#include "../cfgrammar.h"
#include "../parsers/parser_internal.h"
// XXX despite the names, this is all LL(1) right now. TODO
/* Generating the LL parse table */
/* Generating the LL(k) parse table */
/* Maps each nonterminal (HCFChoice) of the grammar to another hash table that
* maps lookahead tokens (HCFToken) to productions (HCFSequence).
*/
typedef struct HLLTable_ {
typedef struct HLLkTable_ {
HHashTable *rows;
HCFChoice *start; // start symbol
HArena *arena;
HAllocator *mm__;
} HLLTable;
} HLLkTable;
/* Interface to look up an entry in the parse table. */
const HCFSequence *h_ll_lookup(const HLLTable *table, const HCFChoice *x, HCFToken tok)
const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x, HCFToken tok)
{
const HHashTable *row = h_hashtable_get(table->rows, x);
assert(row != NULL); // the table should have one row for each nonterminal
@ -28,7 +29,7 @@ const HCFSequence *h_ll_lookup(const HLLTable *table, const HCFChoice *x, HCFTok
}
/* Allocate a new parse table. */
HLLTable *h_lltable_new(HAllocator *mm__)
HLLkTable *h_llktable_new(HAllocator *mm__)
{
// NB the parse table gets an arena separate from the grammar so we can free
// the latter after table generation.
@ -37,7 +38,7 @@ HLLTable *h_lltable_new(HAllocator *mm__)
HHashTable *rows = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr);
assert(rows != NULL);
HLLTable *table = h_new(HLLTable, 1);
HLLkTable *table = h_new(HLLkTable, 1);
assert(table != NULL);
table->mm__ = mm__;
table->arena = arena;
@ -46,7 +47,7 @@ HLLTable *h_lltable_new(HAllocator *mm__)
return table;
}
void h_lltable_free(HLLTable *table)
void h_llktable_free(HLLkTable *table)
{
HAllocator *mm__ = table->mm__;
h_delete_arena(table->arena);
@ -95,10 +96,10 @@ int fill_table_row(HCFGrammar *g, HHashTable *row,
return 0;
}
/* Generate the LL parse table from the given grammar.
/* Generate the LL(k) parse table from the given grammar.
* Returns -1 on error, 0 on success.
*/
static int fill_table(HCFGrammar *g, HLLTable *table)
static int fill_table(HCFGrammar *g, HLLkTable *table)
{
table->start = g->start;
@ -120,7 +121,7 @@ static int fill_table(HCFGrammar *g, HLLTable *table)
for(s = a->seq; *s; s++) {
// record this production in row as appropriate
// this can signal an ambiguity conflict.
// NB we don't worry about deallocating anything, h_ll_compile will
// NB we don't worry about deallocating anything, h_llk_compile will
// delete the whole arena for us.
if(fill_table_row(g, row, a, *s) < 0)
return -1;
@ -131,7 +132,7 @@ static int fill_table(HCFGrammar *g, HLLTable *table)
return 0;
}
int h_ll_compile(HAllocator* mm__, HParser* parser, const void* params)
int h_llk_compile(HAllocator* mm__, HParser* parser, const void* params)
{
// Convert parser to a CFG. This can fail as indicated by a NULL return.
HCFGrammar *grammar = h_cfgrammar(mm__, parser);
@ -143,11 +144,11 @@ int h_ll_compile(HAllocator* mm__, HParser* parser, const void* params)
// TODO: avoid conflicts by splitting occurances?
// generate table and store in parser->data.
HLLTable *table = h_lltable_new(mm__);
HLLkTable *table = h_llktable_new(mm__);
if(fill_table(grammar, table) < 0) {
// the table was ambiguous
h_cfgrammar_free(grammar);
h_lltable_free(table);
h_llktable_free(table);
return -1;
}
parser->data = table;
@ -161,13 +162,14 @@ int h_ll_compile(HAllocator* mm__, HParser* parser, const void* params)
/* LL driver */
/* LL(k) driver */
HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* state)
HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
{
const HLLTable *table = parser->data;
HArena *arena = state->arena;
HSlist *stack = h_slist_new(arena);
const HLLkTable *table = parser->data;
HArena *arena = h_new_arena(mm__, 0); // will hold the results
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
HSlist *stack = h_slist_new(tarena);
HCountedArray *seq = h_carray_new(arena); // accumulates current parse result
// in order to construct the parse tree, we delimit the symbol stack into
@ -177,7 +179,7 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
// frame delimiter.
// also on the stack below the mark, we store the previously accumulated
// value for the surrounding production.
void *mark = h_arena_malloc(arena, 1);
void *mark = h_arena_malloc(tarena, 1);
// initialize with the start symbol on the stack.
h_slist_push(stack, table->start);
@ -188,8 +190,8 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
while(!h_slist_empty(stack)) {
// fill up lookahead buffer as required
if(lookahead == 0) {
uint8_t c = h_read_bits(&state->input_stream, 8, false);
if(state->input_stream.overrun)
uint8_t c = h_read_bits(stream, 8, false);
if(stream->overrun)
lookahead = end_token;
else
lookahead = char_token(c);
@ -203,16 +205,16 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
// hit stack frame boundary
// wrap the accumulated parse result, this sequence is finished
HParsedToken *tok = a_new(HParsedToken, 1);
HParsedToken *tok = h_arena_malloc(arena, sizeof(HParsedToken));
tok->token_type = TT_SEQUENCE;
tok->seq = seq;
// XXX tok->index and tok->bit_offset (don't take directly from stream, cuz peek!)
// call validation and semantic action, if present
if(x->pred && !x->pred(make_result(state, tok)))
return NULL; // validation failed -> no parse
if(x->pred && !x->pred(make_result(tarena, tok)))
goto no_parse; // validation failed -> no parse
if(x->action)
tok = (HParsedToken *)x->action(make_result(state, tok));
tok = (HParsedToken *)x->action(make_result(arena, tok));
// result becomes next left-most element of higher-level sequence
seq = h_slist_pop(stack);
@ -230,7 +232,7 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
seq = h_carray_new(arena);
// look up applicable production in parse table
const HCFSequence *p = h_ll_lookup(table, x, lookahead);
const HCFSequence *p = h_llk_lookup(table, x, lookahead);
// push production's rhs onto the stack (in reverse order)
HCFChoice **s;
@ -250,40 +252,40 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
switch(x->type) {
case HCF_END:
if(input != end_token)
return NULL;
goto no_parse;
tok = NULL;
break;
case HCF_CHAR:
if(input != char_token(x->chr))
return NULL;
tok = a_new(HParsedToken, 1);
goto no_parse;
tok = h_arena_malloc(arena, sizeof(HParsedToken));
tok->token_type = TT_UINT;
tok->uint = x->chr;
break;
case HCF_CHARSET:
if(input == end_token)
return NULL;
goto no_parse;
if(!charset_isset(x->charset, token_char(input)))
return NULL;
tok = a_new(HParsedToken, 1);
goto no_parse;
tok = h_arena_malloc(arena, sizeof(HParsedToken));
tok->token_type = TT_UINT;
tok->uint = token_char(input);
break;
default: // should not be reached
assert_message(0, "unknown HCFChoice type");
return NULL;
goto no_parse;
}
// XXX tok->index and tok->bit_offset (don't take directly from stream, cuz peek!)
// call validation and semantic action, if present
if(x->pred && !x->pred(make_result(state, tok)))
return NULL; // validation failed -> no parse
if(x->pred && !x->pred(make_result(tarena, tok)))
goto no_parse; // validation failed -> no parse
if(x->action)
tok = (HParsedToken *)x->action(make_result(state, tok));
tok = (HParsedToken *)x->action(make_result(arena, tok));
// append to result sequence
h_carray_append(seq, tok);
@ -293,25 +295,31 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
// since we started with a single nonterminal on the stack, seq should
// contain exactly the parse result.
assert(seq->used == 1);
return make_result(state, seq->elements[0]);
h_delete_arena(tarena);
return make_result(arena, seq->elements[0]);
no_parse:
h_delete_arena(tarena);
h_delete_arena(arena);
return NULL;
}
HParserBackendVTable h__ll_backend_vtable = {
.compile = h_ll_compile,
.parse = h_ll_parse
HParserBackendVTable h__llk_backend_vtable = {
.compile = h_llk_compile,
.parse = h_llk_parse
};
// dummy!
int test_ll(void)
int test_llk(void)
{
const HParser *c = h_many(h_ch('x'));
const HParser *q = h_sequence(c, h_ch('y'), NULL);
const HParser *p = h_choice(q, h_end_p(), NULL);
HParser *c = h_many(h_ch('x'));
HParser *q = h_sequence(c, h_ch('y'), NULL);
HParser *p = h_choice(q, h_end_p(), NULL);
HCFGrammar *g = h_cfgrammar(&system_allocator, p);

View file

@ -1,7 +1,16 @@
#include <assert.h>
#include <string.h>
#include "../internal.h"
#include "../parsers/parser_internal.h"
static uint32_t djbhash(const uint8_t *buf, size_t len) {
uint32_t hash = 5381;
while (len--) {
hash = hash * 33 + *buf++;
}
return hash;
}
// short-hand for constructing HCachedResult's
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
HCachedResult *ret = a_new(HCachedResult, 1);
@ -191,12 +200,37 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
}
int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) {
parser->backend = PB_PACKRAT;
return 0; // No compilation necessary, and everything should work
// out of the box.
}
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) {
return h_do_parse(parser, parse_state);
static uint32_t cache_key_hash(const void* key) {
return djbhash(key, sizeof(HParserCacheKey));
}
static bool cache_key_equal(const void* key1, const void* key2) {
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
}
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
HArena * arena = h_new_arena(mm__, 0);
HParseState *parse_state = a_new_(arena, HParseState, 1);
parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func
cache_key_hash); // hash_func
parse_state->input_stream = *input_stream;
parse_state->lr_stack = h_slist_new(arena);
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
cache_key_hash);
parse_state->arena = arena;
HParseResult *res = h_do_parse(parser, parse_state);
h_slist_free(parse_state->lr_stack);
h_hashtable_free(parse_state->recursion_heads);
// tear down the parse state
h_hashtable_free(parse_state->cache);
if (!res)
h_delete_arena(parse_state->arena);
return res;
}
HParserBackendVTable h__packrat_backend_vtable = {

366
src/backends/regex.c Normal file
View file

@ -0,0 +1,366 @@
#include <string.h>
#include <assert.h>
#include "../internal.h"
#include "../parsers/parser_internal.h"
#include "regex.h"
#undef a_new
#define a_new(typ, count) a_new_(arena, typ, count)
// Stack VM
typedef enum HSVMOp_ {
SVM_PUSH, // Push a mark. There is no VM insn to push an object.
SVM_NOP, // Used to start the chain, and possibly elsewhere. Does nothing.
SVM_ACTION, // Same meaning as RVM_ACTION
SVM_CAPTURE, // Same meaning as RVM_CAPTURE
SVM_ACCEPT,
} HSVMOp;
typedef struct HRVMTrace_ {
struct HRVMTrace_ *next; // When parsing, these are
// reverse-threaded. There is a postproc
// step that inverts all the pointers.
size_t input_pos;
uint16_t arg;
uint8_t opcode;
} HRVMTrace;
typedef struct HRVMThread_ {
HRVMTrace *trace;
uint16_t ip;
} HRVMThread;
HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, const uint8_t *input, int len);
HRVMTrace *invert_trace(HRVMTrace *trace) {
HRVMTrace *last = NULL;
if (!trace)
return NULL;
if (!trace->next)
return trace;
do {
HRVMTrace *next = trace->next;
trace->next = last;
last = trace;
trace = next;
} while (trace->next);
return trace;
}
void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_t len) {
HArena *arena = h_new_arena(mm__, 0);
HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length),
**heads_n = a_new(HRVMTrace*, prog->length);
HRVMTrace *ret_trace;
uint8_t *insn_seen = a_new(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued
HRVMThread *ip_queue = a_new(HRVMThread, prog->length);
size_t ipq_top;
#define THREAD ip_queue[ipq_top-1]
#define PUSH_SVM(op_, arg_) do { \
HRVMTrace *nt = a_new(HRVMTrace, 1); \
nt->arg = (arg_); \
nt->opcode = (op_); \
nt->next = THREAD.trace; \
nt->input_pos = off; \
THREAD.trace = nt; \
} while(0)
heads_n[0] = a_new(HRVMTrace, 1); // zeroing
heads_n[0]->opcode = SVM_NOP;
size_t off = 0;
int live_threads = 1;
for (off = 0; off <= len; off++) {
uint8_t ch = ((off == len) ? 0 : input[off]);
size_t ip_s; // BUG: there was an unused variable ip. Not sure if
// I intended to use it somewhere.
/* scope */ {
HRVMTrace **heads_t;
heads_t = heads_n;
heads_n = heads_p;
heads_p = heads_t;
memset(heads_n, 0, prog->length * sizeof(*heads_n));
}
memset(insn_seen, 0, prog->length); // no insns seen yet
if (!live_threads)
goto match_fail;
live_threads = 0;
for (ip_s = 0; ip_s < prog->length; ip_s++) {
ipq_top = 1;
// TODO: Write this as a threaded VM
if (!heads_p[ip_s])
continue;
THREAD.ip = ip_s;
uint8_t hi, lo;
uint16_t arg;
while(ipq_top > 0) {
if (insn_seen[THREAD.ip] == 1)
continue;
insn_seen[THREAD.ip] = 1;
arg = prog->insns[THREAD.ip].arg;
switch(prog->insns[THREAD.ip].op) {
case RVM_ACCEPT:
PUSH_SVM(SVM_ACCEPT, 0);
ret_trace = THREAD.trace;
goto run_trace;
case RVM_MATCH:
// Doesn't actually validate the "must be followed by MATCH
// or STEP. It should. Preproc perhaps?
hi = (arg >> 8) & 0xff;
lo = arg & 0xff;
THREAD.ip++;
if (ch < lo || ch > hi)
ipq_top--; // terminate thread
goto next_insn;
case RVM_GOTO:
THREAD.ip = arg;
goto next_insn;
case RVM_FORK:
THREAD.ip++;
if (!insn_seen[arg]) {
insn_seen[THREAD.ip] = 2;
HRVMTrace* tr = THREAD.trace;
ipq_top++;
THREAD.ip = arg;
THREAD.trace = tr;
}
goto next_insn;
case RVM_PUSH:
PUSH_SVM(SVM_PUSH, 0);
THREAD.ip++;
goto next_insn;
case RVM_ACTION:
PUSH_SVM(SVM_ACTION, arg);
THREAD.ip++;
goto next_insn;
case RVM_CAPTURE:
PUSH_SVM(SVM_CAPTURE, 0);
THREAD.ip++;
goto next_insn;
case RVM_EOF:
THREAD.ip++;
if (off != len)
ipq_top--; // Terminate thread
goto next_insn;
case RVM_STEP:
// save thread
live_threads++;
heads_n[THREAD.ip++] = THREAD.trace;
ipq_top--;
goto next_insn;
}
next_insn:
;
}
}
}
// No accept was reached.
match_fail:
h_delete_arena(arena);
return NULL;
run_trace:
// Invert the direction of the trace linked list.
ret_trace = invert_trace(ret_trace);
HParseResult *ret = run_trace(mm__, prog, ret_trace, input, len);
// ret is in its own arena
h_delete_arena(arena);
return ret;
}
#undef PUSH_SVM
#undef THREAD
void svm_stack_ensure_cap(HAllocator *mm__, HSVMContext *ctx, size_t addl) {
if (ctx->stack_count + addl >= ctx->stack_capacity) {
ctx->stack = mm__->realloc(mm__, ctx->stack, sizeof(*ctx->stack) * (ctx->stack_capacity *= 2));
// TODO: check for realloc failure
}
}
HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, const uint8_t *input, int len) {
// orig_prog is only used for the action table
HSVMContext ctx;
HArena *arena = h_new_arena(mm__, 0);
ctx.stack_count = 0;
ctx.stack_capacity = 16;
ctx.stack = h_new(HParsedToken*, ctx.stack_capacity);
HParsedToken *tmp_res;
HRVMTrace *cur;
for (cur = trace; cur; cur = cur->next) {
switch (cur->opcode) {
case SVM_PUSH:
svm_stack_ensure_cap(mm__, &ctx, 1);
tmp_res = a_new(HParsedToken, 1);
tmp_res->token_type = TT_MARK;
tmp_res->index = cur->input_pos;
tmp_res->bit_offset = 0;
ctx.stack[ctx.stack_count++] = tmp_res;
break;
case SVM_NOP:
break;
case SVM_ACTION:
// Action should modify stack appropriately
if (!orig_prog->actions[cur->arg].action(arena, &ctx, orig_prog->actions[cur->arg].env)) {
// action failed... abort somehow
// TODO: Actually abort
}
break;
case SVM_CAPTURE:
// Top of stack must be a mark
// This replaces said mark in-place with a TT_BYTES.
assert(ctx.stack[ctx.stack_count]->token_type == TT_MARK);
tmp_res = ctx.stack[ctx.stack_count];
tmp_res->token_type = TT_BYTES;
// TODO: Will need to copy if bit_offset is nonzero
assert(tmp_res->bit_offset == 0);
tmp_res->bytes.token = input + tmp_res->index;
tmp_res->bytes.len = cur->input_pos - tmp_res->index + 1; // inclusive
break;
case SVM_ACCEPT:
assert(ctx.stack_count == 1);
HParseResult *res = a_new(HParseResult, 1);
res->ast = ctx.stack[0];
res->bit_length = cur->input_pos * 8;
res->arena = arena;
return res;
}
}
h_delete_arena(arena);
return NULL;
}
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) {
for (uint16_t i = 0; i < prog->action_count; i++) {
if (prog->actions[i].action == action_func && prog->actions[i].env == env)
return i;
}
// Ensure that there's room in the action array...
if (!(prog->action_count & (prog->action_count + 1))) {
// needs to be scaled up.
size_t array_size = (prog->action_count + 1) * 2; // action_count+1 is a
// power of two
prog->actions = prog->allocator->realloc(prog->allocator, prog->actions, array_size * sizeof(*prog->actions));
// TODO: Handle the allocation failed case nicely.
}
HSVMAction *action = &prog->actions[prog->action_count];
action->action = action_func;
action->env = env;
return prog->action_count++;
}
uint16_t h_rvm_insert_insn(HRVMProg *prog, HRVMOp op, uint16_t arg) {
// Ensure that there's room in the insn array...
if (!(prog->length & (prog->length + 1))) {
// needs to be scaled up.
size_t array_size = (prog->length + 1) * 2; // action_count+1 is a
// power of two
prog->insns = prog->allocator->realloc(prog->allocator, prog->insns, array_size * sizeof(*prog->insns));
// TODO: Handle the allocation failed case nicely.
}
prog->insns[prog->length].op = op;
prog->insns[prog->length].arg = arg;
return prog->length++;
}
uint16_t h_rvm_get_ip(HRVMProg *prog) {
return prog->length;
}
void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val) {
assert(prog->length > ip);
prog->insns[ip].arg = new_val;
}
size_t h_svm_count_to_mark(HSVMContext *ctx) {
size_t ctm;
for (ctm = 0; ctm < ctx->stack_count-1; ctm++) {
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK)
return ctm;
}
return ctx->stack_count;
}
// TODO: Implement the primitive actions
bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) {
size_t n_items = h_svm_count_to_mark(ctx);
assert (n_items < ctx->stack_count);
HParsedToken *res = ctx->stack[ctx->stack_count - 1 - n_items];
assert (res->token_type == TT_MARK);
res->token_type = TT_SEQUENCE;
HCountedArray *ret_carray = h_carray_new_sized(arena, n_items);
res->seq = ret_carray;
// res index and bit offset are the same as the mark.
for (size_t i = 0; i < n_items; i++) {
ret_carray->elements[i] = ctx->stack[ctx->stack_count - n_items + i];
}
ctx->stack_count -= n_items;
return true;
}
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
while (ctx->stack_count > 0) {
if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK)
return true;
}
return false; // no mark found.
}
// Glue regex backend to rest of system
bool h_compile_regex(HRVMProg *prog, const HParser *parser) {
return parser->vtable->compile_to_rvm(prog, parser->env);
}
static void h_regex_free(HParser *parser) {
HRVMProg *prog = (HRVMProg*)parser->backend_data;
HAllocator *mm__ = prog->allocator;
h_free(prog->insns);
h_free(prog->actions);
h_free(prog);
parser->backend_data = NULL;
parser->backend = PB_PACKRAT;
}
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
if (!parser->vtable->isValidRegular(parser->env))
return 1;
HRVMProg *prog = h_new(HRVMProg, 1);
prog->allocator = mm__;
if (!h_compile_regex(prog, parser)) {
h_free(prog->insns);
h_free(prog->actions);
h_free(prog);
return 2;
}
parser->backend_data = prog;
return 0;
}
static HParseResult *h_regex_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
return h_rvm_run__m(mm__, (HRVMProg*)parser->backend_data, input_stream->input, input_stream->length);
}
HParserBackendVTable h__regex_backend_vtable = {
.compile = h_regex_compile,
.parse = h_regex_parse,
.free = h_regex_free
};

80
src/backends/regex.h Normal file
View file

@ -0,0 +1,80 @@
// Internal defs
#ifndef HAMMER_BACKEND_REGEX__H
#define HAMMER_BACKEND_REGEX__H
// each insn is an 8-bit opcode and a 16-bit parameter
// [a] are actions; they add an instruction to the stackvm that is being output.
// [m] are match ops; they can either succeed or fail, depending on the current character
// [c] are control ops. They affect the pc non-linearly.
typedef enum HRVMOp_ {
RVM_ACCEPT, // [a]
RVM_GOTO, // [c] parameter is an offset into the instruction table
RVM_FORK, // [c] parameter is an offset into the instruction table
RVM_PUSH, // [a] No arguments, just pushes a mark (pointer to some
// character in the input string) onto the stack
RVM_ACTION, // [a] argument is an action ID
RVM_CAPTURE, // [a] Capture the last string (up to the current
// position, non-inclusive), and push it on the
// stack. No arg.
RVM_EOF, // [m] Succeeds only if at EOF.
RVM_MATCH, // [m] The high byte of the parameter is an upper bound
// and the low byte is a lower bound, both
// inclusive. An inverted match should be handled
// as two ranges.
RVM_STEP, // [a] Step to the next byte of input
RVM_OPCOUNT
} HRVMOp;
typedef struct HRVMInsn_{
uint8_t op;
uint16_t arg;
} HRVMInsn;
#define TT_MARK TT_RESERVED_1
typedef struct HSVMContext_ {
HParsedToken **stack;
size_t stack_count; // number of items on the stack. Thus stack[stack_count] is the first unused item on the stack.
size_t stack_capacity;
} HSVMContext;
// These actions all assume that the items on the stack are not
// aliased anywhere.
typedef bool (*HSVMActionFunc)(HArena *arena, HSVMContext *ctx, void* env);
typedef struct HSVMAction_ {
HSVMActionFunc action;
void* env;
} HSVMAction;
struct HRVMProg_ {
HAllocator *allocator;
size_t length;
size_t action_count;
HRVMInsn *insns;
HSVMAction *actions;
};
// Returns true IFF the provided parser could be compiled.
bool h_compile_regex(HRVMProg *prog, const HParser* parser);
// These functions are used by the compile_to_rvm method of HParser
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env);
// returns the address of the instruction just created
uint16_t h_rvm_insert_insn(HRVMProg *prog, HRVMOp op, uint16_t arg);
// returns the address of the next insn to be created.
uint16_t h_rvm_get_ip(HRVMProg *prog);
// Used to insert forward references; the idea is to generate a JUMP
// or FORK instruction with a target of 0, then update it once the
// correct target is known.
void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val);
// Common SVM action funcs...
bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env);
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env);
extern HParserBackendVTable h__regex_backend_vtable;
#endif

112
src/backends/regexvm_asm.pl Normal file
View file

@ -0,0 +1,112 @@
#!/usr/bin/perl -w
use strict;
# The input file consists of a sequence of blocks, which can be parsed
# as SVM test cases, RVM test cases, or C functions. Each block starts
# with a header line, then a sequence of options, and finally text in
# a format defined by the block type.
#
# Header lines start with "+TYPE", optionally followed by a name. This
# name is semantically meaningful for SVM and RVM blocks; it
# determines the name of the test case.
# A C block's name is not used, and it takes no options. The body
# (which continues until the first line that looks like a header), is
# just passed straight through into the C source.
# SVM blocks' names are the GLib test case name. The underlying
# function's name is derived by substituting invalid characters with
# '_'. Note that this can result in collisions (eg, /foo_bar/baz
# collides with /foo/bar_baz). If this happens, it's your own damn
# fault; rename the blocks. SVM blocks take three different options:
# @input, @output, and @pre. The @input pragma's argument is a
# C-quoted string that gets passed into the VM as the input string,
# and @output is a C-quoted string that is compared against
# h_write_result_unamb. @pre lines are prepended verbatim to the
# function body (with the @pre stripped, of course); they can be used
# to initialize environment values.
#
# SVM instructions consist of either two or four fields:
#
# input_pos opcode [arg env]
#
# input_pos and opcode correspond to the fields in HRVMTrace. arg and
# env are used to populate an HSVMAction; arg is the function, and env
# is the object whose address should be used as the env.
# RVM blocks are very similar to SVM blocks; the name and options are
# handled exactly the same way. The assembly text is handled slightly
# differently; the format is:
#
# [label:] opcode [arg ...]
#
# For FORK and GOTO, the arg should be a label that is defined
# elsewhere.
#
# For ACTION, the arguments are handled the same way as with SVM.
#
# MATCH takes two arguments, each of which can be any C integer
# constant (not including character constants), which form the lower
# and upper bounds of the matched character, respectively.
#
# No other RVM instructions take an argument.
# At the beginning of any line, comments preceeded by '#' are allowed;
# they are replaced by C++ comments and inserted in the nearest valid
# location in the output.
my $mode == "TOP";
# common regexes:
my $re_ident = qr/[A-Za-z_][A-Za-z0-9_]*/;
my $re_cstr = qr/"(?:[^\\"]|\\["'abefnrtv0\\]|\\x[0-9a-fA-F]{2}|\\[0-7]{3})*"/;
my %svm = (
name => sub {
my ($env, $name) = @_;
$env->{name} = $name;
},
pragma => sub {
my ($env, $name, $val) = @_;
if ($name eq "input") {
chomp($env->{input} = $val);
} elsif ($name eq "output") {
chomp($env->{output} = $val);
} elsif ($name eq "pre") {
# Do I have the ref precedence right here?
push(@$env->{pre}, $val);
} else {
warn "Invalid SVM pragma";
}
},
body => sub {
my ($env, $line) = @_;
my ($ipos, $op, $arg, $argenv);
if ($line =~ /^\s*(\d+)\s+(PUSH|NOP|ACTION|CAPTURE|ACCEPT)(?:\s+($re_ident)\s+($re_ident))?/) {
if ($2 eq "PUSH") {
# TODO: implement all the opcodes
}
}
}
);
while (<>) {
if (/^+(C|RVM|SVM)/) {
$mode = $1;
}
if ($mode eq "TOP") {
if (/^#(.*)/) {
print "// $1";
next;
}
} elsif ($mode eq "SVM") {
} elsif ($mode eq "RVM") {
} elsif ($mode eq "C") {
}
}

View file

@ -21,11 +21,11 @@
*/
HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases) {
HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases) {
return h_benchmark__m(&system_allocator, parser, testcases);
}
HBenchmarkResults *h_benchmark__m(HAllocator* mm__, const HParser* parser, HParserTestcase* testcases) {
HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTestcase* testcases) {
// For now, just output the results to stderr
HParserTestcase* tc = testcases;
HParserBackend backend = PB_MIN;
@ -33,7 +33,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, const HParser* parser, HPars
ret->len = PB_MAX-PB_MIN;
ret->results = h_new(HBackendResults, ret->len);
for (backend = PB_MIN; backend < PB_MAX; backend++) {
for (backend = PB_MIN; backend <= PB_MAX; backend++) {
ret->results[backend].backend = backend;
// Step 1: Compile grammar for given parser...
if (h_compile(parser, backend, NULL) == -1) {

View file

@ -1,17 +0,0 @@
// This file contains functions related to managing multiple parse backends
#include "hammer.h"
#include "internal.h"
static HParserBackendVTable *backends[PB_MAX] = {
&h__packrat_backend_vtable,
&h__ll_backend_vtable,
};
int h_compile(const HParser* parser, HParserBackend backend, const void* params) {
return h_compile__m(&system_allocator, parser, backend, params);
}
int h_compile__m(HAllocator* mm__, const HParser* parser, HParserBackend backend, const void* params) {
// be naughty and cast off the const
return backends[backend]->compile(mm__, (HParser *)parser, params);
}

View file

@ -26,13 +26,12 @@
#include "allocator.h"
#include "parsers/parser_internal.h"
static uint32_t djbhash(const uint8_t *buf, size_t len) {
uint32_t hash = 5381;
while (len--) {
hash = hash * 33 + *buf++;
}
return hash;
}
static HParserBackendVTable *backends[PB_MAX + 1] = {
&h__packrat_backend_vtable,
&h__regex_backend_vtable,
&h__llk_backend_vtable,
};
/* Helper function, since these lines appear in every parser */
@ -42,46 +41,52 @@ typedef struct {
} HTwoParsers;
static uint32_t cache_key_hash(const void* key) {
return djbhash(key, sizeof(HParserCacheKey));
}
static bool cache_key_equal(const void* key1, const void* key2) {
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
}
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
return h_parse__m(&system_allocator, parser, input, length);
}
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
// TODO: split the creation of the parse state into h_packrat_parse
// Set up a parse state...
HArena * arena = h_new_arena(mm__, 0);
HParseState *parse_state = a_new_(arena, HParseState, 1);
parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func
cache_key_hash); // hash_func
parse_state->input_stream.input = input;
parse_state->input_stream.index = 0;
parse_state->input_stream.bit_offset = 8; // bit big endian
parse_state->input_stream.overrun = 0;
parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN;
parse_state->input_stream.length = length;
parse_state->lr_stack = h_slist_new(arena);
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
cache_key_hash);
parse_state->arena = arena;
HParseResult *res = h_do_parse(parser, parse_state);
h_slist_free(parse_state->lr_stack);
h_hashtable_free(parse_state->recursion_heads);
// tear down the parse state
h_hashtable_free(parse_state->cache);
if (!res)
h_delete_arena(parse_state->arena);
HInputStream input_stream = {
.index = 0,
.bit_offset = 8,
.overrun = 0,
.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN,
.length = length,
.input = input
};
return res;
return backends[parser->backend]->parse(mm__, parser, &input_stream);
}
void h_parse_result_free(HParseResult *result) {
h_delete_arena(result->arena);
}
bool h_false(void* env) {
(void)env;
return false;
}
bool h_true(void* env) {
(void)env;
return true;
}
bool h_not_regular(HRVMProg *prog, void *env) {
(void)env;
return false;
}
int h_compile(HParser* parser, HParserBackend backend, const void* params) {
return h_compile__m(&system_allocator, parser, backend, params);
}
int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, const void* params) {
int ret = backends[backend]->compile(mm__, parser, params);
if (!ret)
parser->backend = backend;
return ret;
}

View file

@ -34,8 +34,11 @@ typedef struct HParseState_ HParseState;
typedef enum HParserBackend_ {
PB_MIN = 0,
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
PB_LL,
PB_MAX
PB_REGULAR, //
PB_LLk, //
PB_LALR, // Not Implemented
PB_GLR, // Not Implemented
PB_MAX = PB_LLk
} HParserBackend;
typedef enum HTokenType_ {
@ -44,6 +47,7 @@ typedef enum HTokenType_ {
TT_SINT,
TT_UINT,
TT_SEQUENCE,
TT_RESERVED_1, // reserved for backend-specific internal use
TT_USER = 64,
TT_ERR,
TT_MAX
@ -75,7 +79,9 @@ typedef struct HParsedToken_ {
} HParsedToken;
/**
* The result of a successful parse.
* The result of a successful parse. Note that this may reference the
* input string.
*
* If a parse fails, the parse result will be NULL.
* If a parse is successful but there's nothing there (i.e., if end_p
* succeeds) then there's a parse result but its ast is NULL.
@ -111,12 +117,14 @@ typedef const HParsedToken* (*HAction)(const HParseResult *p);
*/
typedef bool (*HPredicate)(HParseResult *p);
typedef struct HParserVtable_ HParserVtable;
typedef struct HCFChoice_ HCFChoice;
typedef struct HRVMProg_ HRVMProg;
typedef struct HParserVtable_ HParserVtable;
typedef struct HParser_ {
const HParserVtable *vtable;
HParserBackend backend;
void* backend_data;
void *env;
void *data; /* e.g., parse tables */
HCFChoice *desugared; /* if the parser can be desugared, its desugared form */
@ -191,7 +199,7 @@ HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* inp
*
* Result token type: TT_BYTES
*/
HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len);
HAMMER_FN_DECL(HParser*, h_token, const uint8_t *str, const size_t len);
/**
* Given a single character, returns a parser that parses that
@ -199,7 +207,7 @@ HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len);
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c);
HAMMER_FN_DECL(HParser*, h_ch, const uint8_t c);
/**
* Given two single-character bounds, lower and upper, returns a parser
@ -208,14 +216,14 @@ HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c);
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL(const HParser*, h_ch_range, const uint8_t lower, const uint8_t upper);
HAMMER_FN_DECL(HParser*, h_ch_range, const uint8_t lower, const uint8_t upper);
/**
* Given an integer parser, p, and two integer bounds, lower and upper,
* returns a parser that parses an integral value within the range
* [lower, upper] (inclusive).
*/
HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper);
HAMMER_FN_DECL(HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper);
/**
* Returns a parser that parses the specified number of bits. sign ==
@ -223,63 +231,63 @@ HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lowe
*
* Result token type: TT_SINT if sign == true, TT_UINT if sign == false
*/
HAMMER_FN_DECL(const HParser*, h_bits, size_t len, bool sign);
HAMMER_FN_DECL(HParser*, h_bits, size_t len, bool sign);
/**
* Returns a parser that parses a signed 8-byte integer value.
*
* Result token type: TT_SINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_int64);
HAMMER_FN_DECL_NOARG(HParser*, h_int64);
/**
* Returns a parser that parses a signed 4-byte integer value.
*
* Result token type: TT_SINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_int32);
HAMMER_FN_DECL_NOARG(HParser*, h_int32);
/**
* Returns a parser that parses a signed 2-byte integer value.
*
* Result token type: TT_SINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_int16);
HAMMER_FN_DECL_NOARG(HParser*, h_int16);
/**
* Returns a parser that parses a signed 1-byte integer value.
*
* Result token type: TT_SINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_int8);
HAMMER_FN_DECL_NOARG(HParser*, h_int8);
/**
* Returns a parser that parses an unsigned 8-byte integer value.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_uint64);
HAMMER_FN_DECL_NOARG(HParser*, h_uint64);
/**
* Returns a parser that parses an unsigned 4-byte integer value.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_uint32);
HAMMER_FN_DECL_NOARG(HParser*, h_uint32);
/**
* Returns a parser that parses an unsigned 2-byte integer value.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_uint16);
HAMMER_FN_DECL_NOARG(HParser*, h_uint16);
/**
* Returns a parser that parses an unsigned 1-byte integer value.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_uint8);
HAMMER_FN_DECL_NOARG(HParser*, h_uint8);
/**
* Given another parser, p, returns a parser that skips any whitespace
@ -287,7 +295,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_uint8);
*
* Result token type: p's result type
*/
HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p);
HAMMER_FN_DECL(HParser*, h_whitespace, const HParser* p);
/**
* Given two parsers, p and q, returns a parser that parses them in
@ -295,7 +303,7 @@ HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p);
*
* Result token type: p's result type
*/
HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q);
HAMMER_FN_DECL(HParser*, h_left, const HParser* p, const HParser* q);
/**
* Given two parsers, p and q, returns a parser that parses them in
@ -303,7 +311,7 @@ HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q);
*
* Result token type: q's result type
*/
HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q);
HAMMER_FN_DECL(HParser*, h_right, const HParser* p, const HParser* q);
/**
* Given three parsers, p, x, and q, returns a parser that parses them in
@ -311,7 +319,7 @@ HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q);
*
* Result token type: x's result type
*/
HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q);
HAMMER_FN_DECL(HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q);
/**
* Given another parser, p, and a function f, returns a parser that
@ -319,21 +327,21 @@ HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, con
*
* Result token type: any
*/
HAMMER_FN_DECL(const HParser*, h_action, const HParser* p, const HAction a);
HAMMER_FN_DECL(HParser*, h_action, const HParser* p, const HAction a);
/**
* Parse a single character in the given charset.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL(const HParser*, h_in, const uint8_t *charset, size_t length);
HAMMER_FN_DECL(HParser*, h_in, const uint8_t *charset, size_t length);
/**
* Parse a single character *NOT* in the given charset.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length);
HAMMER_FN_DECL(HParser*, h_not_in, const uint8_t *charset, size_t length);
/**
* A no-argument parser that succeeds if there is no more input to
@ -341,14 +349,14 @@ HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_end_p);
HAMMER_FN_DECL_NOARG(HParser*, h_end_p);
/**
* This parser always fails.
*
* Result token type: NULL. Always.
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p);
HAMMER_FN_DECL_NOARG(HParser*, h_nothing_p);
/**
* Given a null-terminated list of parsers, apply each parser in order.
@ -356,7 +364,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequence, const HParser* p);
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, const HParser* p);
/**
* Given an array of parsers, p_array, apply each parser in order. The
@ -365,7 +373,7 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequenc
*
* Result token type: The type of the first successful parser's result.
*/
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice, const HParser* p);
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, const HParser* p);
/**
* Given two parsers, p1 and p2, this parser succeeds in the following
@ -375,7 +383,7 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice,
*
* Result token type: p1's result type.
*/
HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2);
HAMMER_FN_DECL(HParser*, h_butnot, const HParser* p1, const HParser* p2);
/**
* Given two parsers, p1 and p2, this parser succeeds in the following
@ -385,7 +393,7 @@ HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2);
*
* Result token type: p1's result type.
*/
HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p2);
HAMMER_FN_DECL(HParser*, h_difference, const HParser* p1, const HParser* p2);
/**
* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or
@ -393,7 +401,7 @@ HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p
*
* Result token type: The type of the result of whichever parser succeeded.
*/
HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2);
HAMMER_FN_DECL(HParser*, h_xor, const HParser* p1, const HParser* p2);
/**
* Given a parser, p, this parser succeeds for zero or more repetitions
@ -401,7 +409,7 @@ HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_many, const HParser* p);
HAMMER_FN_DECL(HParser*, h_many, const HParser* p);
/**
* Given a parser, p, this parser succeeds for one or more repetitions
@ -409,7 +417,7 @@ HAMMER_FN_DECL(const HParser*, h_many, const HParser* p);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p);
HAMMER_FN_DECL(HParser*, h_many1, const HParser* p);
/**
* Given a parser, p, this parser succeeds for exactly N repetitions
@ -417,7 +425,7 @@ HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n);
HAMMER_FN_DECL(HParser*, h_repeat_n, const HParser* p, const size_t n);
/**
* Given a parser, p, this parser succeeds with the value p parsed or
@ -425,7 +433,7 @@ HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n);
*
* Result token type: If p succeeded, the type of its result; if not, TT_NONE.
*/
HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p);
HAMMER_FN_DECL(HParser*, h_optional, const HParser* p);
/**
* Given a parser, p, this parser succeeds if p succeeds, but doesn't
@ -433,7 +441,7 @@ HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p);
HAMMER_FN_DECL(HParser*, h_ignore, const HParser* p);
/**
* Given a parser, p, and a parser for a separator, sep, this parser
@ -444,7 +452,7 @@ HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep);
HAMMER_FN_DECL(HParser*, h_sepBy, const HParser* p, const HParser* sep);
/**
* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
@ -452,14 +460,14 @@ HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_sepBy1, const HParser* p, const HParser* sep);
HAMMER_FN_DECL(HParser*, h_sepBy1, const HParser* p, const HParser* sep);
/**
* This parser always returns a zero length match, i.e., empty string.
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p);
HAMMER_FN_DECL_NOARG(HParser*, h_epsilon_p);
/**
* This parser applies its first argument to read an unsigned integer
@ -470,7 +478,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HParser* value);
HAMMER_FN_DECL(HParser*, h_length_value, const HParser* length, const HParser* value);
/**
* This parser attaches a predicate function, which returns true or
@ -485,7 +493,7 @@ HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HPar
*
* Result token type: p's result type if pred succeeded, NULL otherwise.
*/
HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred);
HAMMER_FN_DECL(HParser*, h_attr_bool, const HParser* p, HPredicate pred);
/**
* The 'and' parser asserts that a conditional syntax is satisfied,
@ -502,7 +510,7 @@ HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL(const HParser*, h_and, const HParser* p);
HAMMER_FN_DECL(HParser*, h_and, const HParser* p);
/**
* The 'not' parser asserts that a conditional syntax is *not*
@ -522,7 +530,7 @@ HAMMER_FN_DECL(const HParser*, h_and, const HParser* p);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL(const HParser*, h_not, const HParser* p);
HAMMER_FN_DECL(HParser*, h_not, const HParser* p);
/**
* Create a parser that just calls out to another, as yet unknown,
@ -565,7 +573,7 @@ HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent
*
* Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
*/
HAMMER_FN_DECL(int, h_compile, const HParser* parser, HParserBackend backend, const void* params);
HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params);
/**
* TODO: Document me
@ -590,7 +598,7 @@ const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
void h_bit_writer_free(HBitWriter* w);
// {{{ Benchmark functions
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, const HParser* parser, HParserTestcase* testcases);
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, HParser* parser, HParserTestcase* testcases);
void h_benchmark_report(FILE* stream, HBenchmarkResults* results);
void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results);
// }}}

View file

@ -48,7 +48,7 @@ static inline void h_generic_free(HAllocator *allocator, void* ptr) {
allocator->free(allocator, ptr);
}
HAllocator system_allocator;
extern HAllocator system_allocator;
typedef struct HInputStream_ {
@ -131,7 +131,8 @@ struct HParseState_ {
typedef struct HParserBackendVTable_ {
int (*compile)(HAllocator *mm__, HParser* parser, const void* params);
HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HParseState* parse_state);
HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HInputStream* parse_state);
void (*free)(HParser* parser);
} HParserBackendVTable;
@ -213,9 +214,10 @@ struct HBitWriter_ {
// }}}
// Backends {{{
extern HParserBackendVTable h__packrat_backend_vtable;
extern HParserBackendVTable h__ll_backend_vtable;
extern HParserBackendVTable h__llk_backend_vtable;
// }}}
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
@ -295,9 +297,14 @@ struct HParserVtable_ {
HParseResult* (*parse)(void *env, HParseState *state);
bool (*isValidRegular)(void *env);
bool (*isValidCF)(void *env);
bool (*compile_to_rvm)(HRVMProg *prog, void* env); // FIXME: forgot what the bool return value was supposed to mean.
HCFChoice* (*desugar)(HAllocator *mm__, void *env);
};
bool h_false(void*);
bool h_true(void*);
bool h_not_regular(HRVMProg*, void*);
#if 0
#include <stdlib.h>
#define h_arena_malloc(a, s) malloc(s)

View file

@ -12,7 +12,7 @@ static HParseResult* parse_action(void *env, HParseState *state) {
//HParsedToken *tok = a->action(h_do_parse(a->p, state));
if(tmp) {
const HParsedToken *tok = a->action(tmp);
return make_result(state, (HParsedToken*)tok);
return make_result(state->arena, (HParsedToken*)tok);
} else
return NULL;
} else // either the parser's missing or the action's missing
@ -44,18 +44,24 @@ static bool action_isValidCF(void *env) {
return a->p->vtable->isValidCF(a->p->env);
}
static bool action_ctrvm(HRVMProg *prog, void* env) {
HParseAction *a = (HParseAction*)env;
return a->p->vtable->compile_to_rvm(prog, a->p->env);
}
static const HParserVtable action_vt = {
.parse = parse_action,
.isValidRegular = action_isValidRegular,
.isValidCF = action_isValidCF,
.desugar = desugar_action,
.compile_to_rvm = action_ctrvm,
};
const HParser* h_action(const HParser* p, const HAction a) {
HParser* h_action(const HParser* p, const HAction a) {
return h_action__m(&system_allocator, p, a);
}
const HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) {
HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) {
HParseAction *env = h_new(HParseAction, 1);
env->p = p;
env->action = a;

View file

@ -5,7 +5,7 @@ static HParseResult *parse_and(void* env, HParseState* state) {
HParseResult *res = h_do_parse((HParser*)env, state);
state->input_stream = bak;
if (res)
return make_result(state, NULL);
return make_result(state->arena, NULL);
return NULL;
}
@ -22,13 +22,14 @@ static const HParserVtable and_vt = {
revision. --mlp, 18/12/12 */
.isValidCF = h_false, /* despite TODO above, this remains false. */
.desugar = desugar_and,
.compile_to_rvm = h_not_regular,
};
const HParser* h_and(const HParser* p) {
HParser* h_and(const HParser* p) {
return h_and__m(&system_allocator, p);
}
const HParser* h_and__m(HAllocator* mm__, const HParser* p) {
HParser* h_and__m(HAllocator* mm__, const HParser* p) {
// zero-width postive lookahead
return h_new_parser(mm__, &and_vt, (void *)p);
}

View file

@ -47,18 +47,24 @@ static HCFChoice* desugar_ab(HAllocator *mm__, void *env) {
return ret;
}
static bool ab_ctrvm(HRVMProg *prog, void *env) {
HAttrBool *ab = (HAttrBool*)env;
return h_compile_regex(prog, ab->p);
}
static const HParserVtable attr_bool_vt = {
.parse = parse_attr_bool,
.isValidRegular = ab_isValidRegular,
.isValidCF = ab_isValidCF,
.desugar = desugar_ab,
.compile_to_rvm = ab_ctrvm,
};
const HParser* h_attr_bool(const HParser* p, HPredicate pred) {
HParser* h_attr_bool(const HParser* p, HPredicate pred) {
return h_attr_bool__m(&system_allocator, p, pred);
}
const HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) {
HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) {
HAttrBool *env = h_new(HAttrBool, 1);
env->p = p;
env->pred = pred;

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h"
struct bits_env {
@ -13,7 +14,7 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
result->sint = h_read_bits(&state->input_stream, env_->length, true);
else
result->uint = h_read_bits(&state->input_stream, env_->length, false);
return make_result(state, result);
return make_result(state->arena, result);
}
static HCFChoice* desugar_bits(HAllocator *mm__, void *env) {
@ -41,16 +42,43 @@ static HCFChoice* desugar_bits(HAllocator *mm__, void *env) {
return ret;
}
static bool h_svm_action_bits(HArena *arena, HSVMContext *ctx, void* env) {
// BUG: relies un undefined behaviour: int64_t is a signed uint64_t; not necessarily true on 32-bit
struct bits_env *env_ = env;
HParsedToken *top = ctx->stack[ctx->stack_count-1];
assert(top->token_type == TT_BYTES);
uint64_t res = 0;
for (size_t i = 0; i < top->bytes.len; i++)
res = (res << 8) | top->bytes.token[i]; // TODO: Handle other endiannesses.
top->uint = res; // possibly cast to signed through union
top->token_type = (env_->signedp ? TT_SINT : TT_UINT);
return true;
}
static bool bits_ctrvm(HRVMProg *prog, void* env) {
struct bits_env *env_ = (struct bits_env*)env;
h_rvm_insert_insn(prog, RVM_PUSH, 0);
for (size_t i=0; (i < env_->length)/8; ++i) { // FUTURE: when we can handle non-byte-aligned, the env_->length/8 part will be different
h_rvm_insert_insn(prog, RVM_MATCH, 0xFF00);
h_rvm_insert_insn(prog, RVM_STEP, 0);
}
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_bits, env));
return true;
}
static const HParserVtable bits_vt = {
.parse = parse_bits,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_bits,
.compile_to_rvm = bits_ctrvm,
};
const HParser* h_bits(size_t len, bool sign) {
HParser* h_bits(size_t len, bool sign) {
return h_bits__m(&system_allocator, len, sign);
}
const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
struct bits_env *env = h_new(struct bits_env, 1);
env->length = len;
env->signedp = sign;
@ -58,10 +86,10 @@ const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
}
#define SIZED_BITS(name_pre, len, signedp) \
const HParser* h_##name_pre##len () { \
HParser* h_##name_pre##len () { \
return h_bits__m(&system_allocator, len, signedp); \
} \
const HParser* h_##name_pre##len##__m(HAllocator* mm__) { \
HParser* h_##name_pre##len##__m(HAllocator* mm__) { \
return h_bits__m(mm__, len, signedp); \
}
SIZED_BITS(int, 8, true)

View file

@ -43,14 +43,15 @@ static HCFChoice* desugar_butnot(HAllocator *mm__, void *env) {
static const HParserVtable butnot_vt = {
.parse = parse_butnot,
.isValidRegular = h_false,
.isValidCF = h_false,
.isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF?
.desugar = desugar_butnot,
.compile_to_rvm = h_not_regular,
};
const HParser* h_butnot(const HParser* p1, const HParser* p2) {
HParser* h_butnot(const HParser* p1, const HParser* p2) {
return h_butnot__m(&system_allocator, p1, p2);
}
const HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1;
env->p2 = p2;

View file

@ -6,7 +6,7 @@ static HParseResult* parse_ch(void* env, HParseState *state) {
if (c == r) {
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_UINT; tok->uint = r;
return make_result(state, tok);
return make_result(state->arena, tok);
} else {
return NULL;
}
@ -20,16 +20,25 @@ static HCFChoice* desugar_ch(HAllocator *mm__, void *env) {
return ret;
}
static bool ch_ctrvm(HRVMProg *prog, void* env) {
uint8_t c = (uint8_t)(unsigned long)(env);
// TODO: Does this capture anything?
h_rvm_insert_insn(prog, RVM_MATCH, c & c << 8);
h_rvm_insert_insn(prog, RVM_STEP, 0);
return true;
}
static const HParserVtable ch_vt = {
.parse = parse_ch,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_ch,
.compile_to_rvm = ch_ctrvm,
};
const HParser* h_ch(const uint8_t c) {
HParser* h_ch(const uint8_t c) {
return h_ch__m(&system_allocator, c);
}
const HParser* h_ch__m(HAllocator* mm__, const uint8_t c) {
HParser* h_ch__m(HAllocator* mm__, const uint8_t c) {
return h_new_parser(mm__, &ch_vt, (void *)(uintptr_t)c);
}

View file

@ -9,7 +9,7 @@ static HParseResult* parse_charset(void *env, HParseState *state) {
if (charset_isset(cs, in)) {
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_UINT; tok->uint = in;
return make_result(state, tok);
return make_result(state->arena, tok);
} else
return NULL;
}
@ -22,17 +22,38 @@ static HCFChoice* desugar_charset(HAllocator *mm__, void *env) {
return ret;
}
// FUTURE: this is horribly inefficient
static bool cs_ctrvm(HRVMProg *prog, void *env) {
HCharset cs = (HCharset)env;
uint16_t start = h_rvm_get_ip(prog);
for (size_t i=0; i<256; ++i) {
if (charset_isset(cs, i)) {
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
h_rvm_insert_insn(prog, RVM_MATCH, i & i << 8);
h_rvm_insert_insn(prog, RVM_GOTO, 0);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
}
}
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
for (size_t i=start; i<jump; ++i) {
if (RVM_GOTO == prog->insns[i].op)
h_rvm_patch_arg(prog, i, jump);
}
return true;
}
static const HParserVtable charset_vt = {
.parse = parse_charset,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_charset,
.compile_to_rvm = cs_ctrvm,
};
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
return h_ch_range__m(&system_allocator, lower, upper);
}
const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) {
HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) {
HCharset cs = new_charset(mm__);
for (int i = 0; i < 256; i++)
charset_set(cs, i, (lower <= i) && (i <= upper));
@ -40,7 +61,7 @@ const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_
}
static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) {
static HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) {
HCharset cs = new_charset(mm__);
for (size_t i = 0; i < 256; i++)
charset_set(cs, i, 1-val);
@ -50,19 +71,19 @@ static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, s
return h_new_parser(mm__, &charset_vt, cs);
}
const HParser* h_in(const uint8_t *options, size_t count) {
HParser* h_in(const uint8_t *options, size_t count) {
return h_in_or_not__m(&system_allocator, options, count, 1);
}
const HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
return h_in_or_not__m(mm__, options, count, 1);
}
const HParser* h_not_in(const uint8_t *options, size_t count) {
HParser* h_not_in(const uint8_t *options, size_t count) {
return h_in_or_not__m(&system_allocator, options, count, 0);
}
const HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
return h_in_or_not__m(mm__, options, count, 0);
}

View file

@ -54,34 +54,53 @@ static HCFChoice* desugar_choice(HAllocator *mm__, void *env) {
return ret;
}
static bool choice_ctrvm(HRVMProg *prog, void* env) {
HSequence *s = (HSequence*)env;
uint16_t gotos[s->len];
uint16_t start = h_rvm_get_ip(prog);
for (size_t i=0; i<s->len; ++i) {
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
if (!h_compile_regex(prog, s->p_array[i]->env))
return false;
gotos[i] = h_rvm_insert_insn(prog, RVM_GOTO, 0);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
}
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
for (size_t i=start; i<s->len; ++i) {
h_rvm_patch_arg(prog, gotos[i], jump);
}
return true;
}
static const HParserVtable choice_vt = {
.parse = parse_choice,
.isValidRegular = choice_isValidRegular,
.isValidCF = choice_isValidCF,
.desugar = desugar_choice,
.compile_to_rvm = choice_ctrvm,
};
const HParser* h_choice(const HParser* p, ...) {
HParser* h_choice(const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_choice__mv(&system_allocator, p, ap);
HParser* ret = h_choice__mv(&system_allocator, p, ap);
va_end(ap);
return ret;
}
const HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) {
HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_choice__mv(mm__, p, ap);
HParser* ret = h_choice__mv(mm__, p, ap);
va_end(ap);
return ret;
}
const HParser* h_choice__v(const HParser* p, va_list ap) {
HParser* h_choice__v(const HParser* p, va_list ap) {
return h_choice__mv(&system_allocator, p, ap);
}
const HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) {
HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) {
va_list ap;
size_t len = 0;
HSequence *s = h_new(HSequence, 1);

View file

@ -42,14 +42,15 @@ static HCFChoice* desugar_difference(HAllocator *mm__, void *env) {
static HParserVtable difference_vt = {
.parse = parse_difference,
.isValidRegular = h_false,
.isValidCF = h_false,
.isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF?
.desugar = desugar_difference,
.compile_to_rvm = h_not_regular,
};
const HParser* h_difference(const HParser* p1, const HParser* p2) {
HParser* h_difference(const HParser* p1, const HParser* p2) {
return h_difference__m(&system_allocator, p1, p2);
}
const HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1;
env->p2 = p2;

View file

@ -17,17 +17,23 @@ static HCFChoice* desugar_end(HAllocator *mm__, void *env) {
return &ret;
}
static bool end_ctrvm(HRVMProg *prog, void *env) {
h_rvm_insert_insn(prog, RVM_EOF, 0);
return true;
}
static const HParserVtable end_vt = {
.parse = parse_end,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_end,
.compile_to_rvm = end_ctrvm,
};
const HParser* h_end_p() {
HParser* h_end_p() {
return h_end_p__m(&system_allocator);
}
const HParser* h_end_p__m(HAllocator* mm__) {
HParser* h_end_p__m(HAllocator* mm__) {
return h_new_parser(mm__, &end_vt, NULL);
}

View file

@ -8,21 +8,23 @@ static HParseResult* parse_epsilon(void* env, HParseState* state) {
return res;
}
static bool epsilon_ctrvm(HRVMProg *prog, void* env) {
return true;
}
static const HParserVtable epsilon_vt = {
.parse = parse_epsilon,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_epsilon,
.compile_to_rvm = epsilon_ctrvm,
};
static HParser epsilon_p = {
.vtable = &epsilon_vt,
.env = NULL
};
const HParser* h_epsilon_p() {
return &epsilon_p;
HParser* h_epsilon_p() {
return h_epsilon_p__m(&system_allocator);
}
const HParser* h_epsilon_p__m(HAllocator* mm__) {
return &epsilon_p;
HParser* h_epsilon_p__m(HAllocator* mm__) {
HParser *epsilon_p = h_new(HParser, 1);
epsilon_p->vtable = &epsilon_vt;
return epsilon_p;
}

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h"
static HParseResult* parse_ignore(void* env, HParseState* state) {
@ -25,16 +26,30 @@ static HCFChoice* desugar_ignore(HAllocator *mm__, void *env) {
return (h_desugar(mm__, p));
}
static bool h_svm_action_pop(HArena *arena, HSVMContext *ctx, void* arg) {
assert(ctx->stack_count > 0);
ctx->stack_count--;
return true;
}
static bool ignore_ctrvm(HRVMProg *prog, void *env) {
HParser *p = (HParser*)env;
h_compile_regex(prog, p->env);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_pop, NULL));
return true;
}
static const HParserVtable ignore_vt = {
.parse = parse_ignore,
.isValidRegular = ignore_isValidRegular,
.isValidCF = ignore_isValidCF,
.desugar = desugar_ignore,
.compile_to_rvm = ignore_ctrvm,
};
const HParser* h_ignore(const HParser* p) {
HParser* h_ignore(const HParser* p) {
return h_ignore__m(&system_allocator, p);
}
const HParser* h_ignore__m(HAllocator* mm__, const HParser* p) {
HParser* h_ignore__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &ignore_vt, (void *)p);
}

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h"
@ -5,7 +6,7 @@
// general case: parse sequence, pick one result
//
typedef struct {
typedef struct HIgnoreSeq_ {
const HParser **parsers;
size_t len; // how many parsers in 'ps'
size_t which; // whose result to return
@ -61,11 +62,40 @@ static bool is_isValidCF(void *env) {
return true;
}
static bool h_svm_action_ignoreseq(HArena *arena, HSVMContext *ctx, void* env) {
HIgnoreSeq *seq = (HIgnoreSeq*)env;
HParsedToken* save;
// We can assume that each subitem generated at most one item on the
// stack.
assert(seq->len >= 1);
for (int i = seq->len - 1; i>=0; i--) {
if (i == (int)seq->which && ctx->stack[ctx->stack_count]->token_type != TT_MARK)
save = ctx->stack[ctx->stack_count-1];
// skip over everything up to and including the mark.
while (ctx->stack[--ctx->stack_count]->token_type != TT_MARK)
;
}
ctx->stack[ctx->stack_count++] = save;
return true;
}
static bool is_ctrvm(HRVMProg *prog, void* env) {
HIgnoreSeq *seq = (HIgnoreSeq*)env;
for (size_t i=0; i<seq->len; ++i) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, seq->parsers[i]->env))
return false;
}
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ignoreseq, env));
return true;
}
static const HParserVtable ignoreseq_vt = {
.parse = parse_ignoreseq,
.isValidRegular = is_isValidRegular,
.isValidCF = is_isValidCF,
.desugar = desugar_ignoreseq,
.compile_to_rvm = is_ctrvm,
};
@ -73,7 +103,7 @@ static const HParserVtable ignoreseq_vt = {
// API frontends
//
static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) {
static HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) {
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
seq->parsers = h_new(const HParser*, 2);
seq->parsers[0] = p;
@ -84,25 +114,25 @@ static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const H
return h_new_parser(mm__, &ignoreseq_vt, seq);
}
const HParser* h_left(const HParser* p, const HParser* q) {
HParser* h_left(const HParser* p, const HParser* q) {
return h_leftright__m(&system_allocator, p, q, 0);
}
const HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) {
HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) {
return h_leftright__m(mm__, p, q, 0);
}
const HParser* h_right(const HParser* p, const HParser* q) {
HParser* h_right(const HParser* p, const HParser* q) {
return h_leftright__m(&system_allocator, p, q, 1);
}
const HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) {
HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) {
return h_leftright__m(mm__, p, q, 1);
}
const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) {
HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) {
return h_middle__m(&system_allocator, p, x, q);
}
const HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) {
HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) {
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
seq->parsers = h_new(const HParser*, 3);
seq->parsers[0] = p;

View file

@ -21,6 +21,7 @@ static const HParserVtable indirect_vt = {
.isValidRegular = h_false,
.isValidCF = indirect_isValidCF,
.desugar = desugar_indirect,
.compile_to_rvm = h_not_regular,
};
void h_bind_indirect(HParser* indirect, const HParser* inner) {

View file

@ -121,17 +121,39 @@ static HCFChoice* desugar_int_range(HAllocator *mm__, void *env) {
return gen_int_range(mm__, r->lower, r->upper, bytes);
}
bool h_svm_action_validate_int_range(HArena *arena, HSVMContext *ctx, void* env) {
HRange *r_env = (HRange*)env;
HParsedToken *head = ctx->stack[ctx->stack_count-1];
switch (head-> token_type) {
case TT_SINT:
return head->sint >= r_env->lower && head->sint <= r_env->upper;
case TT_UINT:
return head->uint >= (uint64_t)r_env->lower && head->uint <= (uint64_t)r_env->upper;
default:
return false;
}
}
static bool ir_ctrvm(HRVMProg *prog, void *env) {
HRange *r_env = (HRange*)env;
h_compile_regex(prog, r_env->p);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_validate_int_range, env));
return false;
}
static const HParserVtable int_range_vt = {
.parse = parse_int_range,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_int_range,
.compile_to_rvm = ir_ctrvm,
};
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
return h_int_range__m(&system_allocator, p, lower, upper);
}
const HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) {
HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) {
// p must be an integer parser, which means it's using parse_bits
// TODO: re-add this check
//assert_message(p->vtable == &bits_vt, "int_range requires an integer parser");

View file

@ -33,7 +33,7 @@ static HParseResult *parse_many(void* env, HParseState *state) {
HParsedToken *res = a_new(HParsedToken, 1);
res->token_type = TT_SEQUENCE;
res->seq = seq;
return make_result(state, res);
return make_result(state->arena, res);
err0:
if (count >= env_->count) {
state->input_stream = bak;
@ -112,17 +112,40 @@ static HCFChoice* desugar_many(HAllocator *mm__, void *env) {
return ma;
}
static bool many_ctrvm(HRVMProg *prog, void *env) {
HRepeat *repeat = (HRepeat*)env;
// FIXME: Implement clear_to_mark
uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL);
h_rvm_insert_insn(prog, RVM_PUSH, 0);
// TODO: implement min and max properly. Right now, it's always min==0, max==inf
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
if (!h_compile_regex(prog, repeat->p))
return false;
if (repeat->sep != NULL) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, repeat->sep))
return false;
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
}
h_rvm_insert_insn(prog, RVM_GOTO, insn);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
return true;
}
static const HParserVtable many_vt = {
.parse = parse_many,
.isValidRegular = many_isValidRegular,
.isValidCF = many_isValidCF,
.desugar = desugar_many,
.compile_to_rvm = many_ctrvm,
};
const HParser* h_many(const HParser* p) {
HParser* h_many(const HParser* p) {
return h_many__m(&system_allocator, p);
}
const HParser* h_many__m(HAllocator* mm__, const HParser* p) {
HParser* h_many__m(HAllocator* mm__, const HParser* p) {
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = h_epsilon_p__m(mm__);
@ -131,10 +154,10 @@ const HParser* h_many__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &many_vt, env);
}
const HParser* h_many1(const HParser* p) {
HParser* h_many1(const HParser* p) {
return h_many1__m(&system_allocator, p);
}
const HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = h_epsilon_p__m(mm__);
@ -143,10 +166,10 @@ const HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &many_vt, env);
}
const HParser* h_repeat_n(const HParser* p, const size_t n) {
HParser* h_repeat_n(const HParser* p, const size_t n) {
return h_repeat_n__m(&system_allocator, p, n);
}
const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = h_epsilon_p__m(mm__);
@ -155,10 +178,10 @@ const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n)
return h_new_parser(mm__, &many_vt, env);
}
const HParser* h_sepBy(const HParser* p, const HParser* sep) {
HParser* h_sepBy(const HParser* p, const HParser* sep) {
return h_sepBy__m(&system_allocator, p, sep);
}
const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = sep;
@ -167,10 +190,10 @@ const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep
return h_new_parser(mm__, &many_vt, env);
}
const HParser* h_sepBy1(const HParser* p, const HParser* sep) {
HParser* h_sepBy1(const HParser* p, const HParser* sep) {
return h_sepBy1__m(&system_allocator, p, sep);
}
const HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HRepeat *env = h_new(HRepeat, 1);
env->p = p;
env->sep = sep;
@ -213,10 +236,10 @@ static const HParserVtable length_value_vt = {
.desugar = desugar_length_value,
};
const HParser* h_length_value(const HParser* length, const HParser* value) {
HParser* h_length_value(const HParser* length, const HParser* value) {
return h_length_value__m(&system_allocator, length, value);
}
const HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
HLenVal *env = h_new(HLenVal, 1);
env->length = length;
env->value = value;

View file

@ -6,7 +6,7 @@ static HParseResult* parse_not(void* env, HParseState* state) {
return NULL;
else {
state->input_stream = bak;
return make_result(state, NULL);
return make_result(state->arena, NULL);
}
}
@ -20,11 +20,12 @@ static const HParserVtable not_vt = {
.isValidRegular = h_false, /* see and.c for why */
.isValidCF = h_false, /* also see and.c for why */
.desugar = desugar_not,
.compile_to_rvm = h_not_regular, // Is actually regular, but the generation step is currently unable to handle it. TODO: fix this.
};
const HParser* h_not(const HParser* p) {
HParser* h_not(const HParser* p) {
return h_not__m(&system_allocator, p);
}
const HParser* h_not__m(HAllocator* mm__, const HParser* p) {
HParser* h_not__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &not_vt, (void *)p);
}

View file

@ -1,6 +1,5 @@
#include "parser_internal.h"
static HParseResult* parse_nothing() {
// not a mistake, this parser always fails
return NULL;
@ -15,16 +14,23 @@ static HCFChoice *desugar_nothing(HAllocator *mm__, void *env) {
return ret;
}
static bool nothing_ctrvm(HRVMProg *prog, void* env) {
h_rvm_insert_insn(prog, RVM_MATCH, 0x0000);
h_rvm_insert_insn(prog, RVM_MATCH, 0xFFFF);
return true;
}
static const HParserVtable nothing_vt = {
.parse = parse_nothing,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_nothing,
.compile_to_rvm = nothing_ctrvm,
};
const HParser* h_nothing_p() {
HParser* h_nothing_p() {
return h_nothing_p__m(&system_allocator);
}
const HParser* h_nothing_p__m(HAllocator* mm__) {
HParser* h_nothing_p__m(HAllocator* mm__) {
return h_new_parser(mm__, &nothing_vt, NULL);
}

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h"
static HParseResult* parse_optional(void* env, HParseState* state) {
@ -8,7 +9,7 @@ static HParseResult* parse_optional(void* env, HParseState* state) {
state->input_stream = bak;
HParsedToken *ast = a_new(HParsedToken, 1);
ast->token_type = TT_NONE;
return make_result(state, ast);
return make_result(state->arena, ast);
}
static bool opt_isValidRegular(void *env) {
@ -26,17 +27,40 @@ static HCFChoice* desugar_optional(HAllocator *mm__, void *env) {
return h_desugar(mm__, p);
}
static bool h_svm_action_optional(HArena *arena, HSVMContext *ctx, void *env) {
if (ctx->stack[ctx->stack_count-1]->token_type == TT_MARK) {
ctx->stack[ctx->stack_count-1]->token_type = TT_NONE;
} else {
ctx->stack_count--;
assert(ctx->stack[ctx->stack_count-1]->token_type == TT_MARK);
ctx->stack[ctx->stack_count-1] = ctx->stack[ctx->stack_count];
}
return true;
}
static bool opt_ctrvm(HRVMProg *prog, void* env) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
HParser *p = (HParser*) env;
if (!h_compile_regex(prog, p->env))
return false;
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_optional, NULL));
return true;
}
static const HParserVtable optional_vt = {
.parse = parse_optional,
.isValidRegular = opt_isValidRegular,
.isValidCF = opt_isValidCF,
.desugar = desugar_optional,
.compile_to_rvm = opt_ctrvm,
};
const HParser* h_optional(const HParser* p) {
HParser* h_optional(const HParser* p) {
return h_optional__m(&system_allocator, p);
}
const HParser* h_optional__m(HAllocator* mm__, const HParser* p) {
HParser* h_optional__m(HAllocator* mm__, const HParser* p) {
// TODO: re-add this
//assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round.");
return h_new_parser(mm__, &optional_vt, (void *)p);

View file

@ -2,15 +2,16 @@
#define HAMMER_PARSE_INTERNAL__H
#include "../hammer.h"
#include "../internal.h"
#include "../backends/regex.h"
#define a_new_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count)))
#define a_new(typ, count) a_new_(state->arena, typ, count)
// we can create a_new0 if necessary. It would allocate some memory and immediately zero it out.
static inline HParseResult* make_result(HParseState *state, HParsedToken *tok) {
HParseResult *ret = a_new(HParseResult, 1);
static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) {
HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult));
ret->ast = tok;
ret->arena = state->arena;
ret->arena = arena;
return ret;
}
@ -23,9 +24,6 @@ static inline size_t token_length(HParseResult *pr) {
}
}
static inline bool h_true(void *env) { return true; }
static inline bool h_false(void *env) { return false; }
/* Epsilon rules happen during desugaring. This handles them. */
static inline HCFChoice* desugar_epsilon(HAllocator *mm__, void *env) {
static HCFChoice *res_seq_l[] = {NULL};

View file

@ -21,7 +21,7 @@ static HParseResult* parse_sequence(void *env, HParseState *state) {
}
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_SEQUENCE; tok->seq = seq;
return make_result(state, tok);
return make_result(state->arena, tok);
}
static bool sequence_isValidRegular(void *env) {
@ -59,34 +59,46 @@ static HCFChoice* desugar_sequence(HAllocator *mm__, void *env) {
return ret;
}
static bool sequence_ctrvm(HRVMProg *prog, void *env) {
HSequence *s = (HSequence*)env;
h_rvm_insert_insn(prog, RVM_PUSH, 0);
for (size_t i=0; i<s->len; ++i) {
if (!s->p_array[i]->vtable->compile_to_rvm(prog, s->p_array[i]->env))
return false;
}
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
return true;
}
static const HParserVtable sequence_vt = {
.parse = parse_sequence,
.isValidRegular = sequence_isValidRegular,
.isValidCF = sequence_isValidCF,
.desugar = desugar_sequence,
.compile_to_rvm = sequence_ctrvm,
};
const HParser* h_sequence(const HParser* p, ...) {
HParser* h_sequence(const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_sequence__mv(&system_allocator, p, ap);
HParser* ret = h_sequence__mv(&system_allocator, p, ap);
va_end(ap);
return ret;
}
const HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) {
HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) {
va_list ap;
va_start(ap, p);
const HParser* ret = h_sequence__mv(mm__, p, ap);
HParser* ret = h_sequence__mv(mm__, p, ap);
va_end(ap);
return ret;
}
const HParser* h_sequence__v(const HParser* p, va_list ap) {
HParser* h_sequence__v(const HParser* p, va_list ap) {
return h_sequence__mv(&system_allocator, p, ap);
}
const HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) {
HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) {
va_list ap;
size_t len = 0;
const HParser *arg;

View file

@ -15,9 +15,10 @@ static HParseResult* parse_token(void *env, HParseState *state) {
}
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_BYTES; tok->bytes.token = t->str; tok->bytes.len = t->len;
return make_result(state, tok);
return make_result(state->arena, tok);
}
static HCFChoice* desugar_token(HAllocator *mm__, void *env) {
HToken *tok = (HToken*)env;
HCFSequence *seq = h_new(HCFSequence, 1);
@ -37,17 +38,29 @@ static HCFChoice* desugar_token(HAllocator *mm__, void *env) {
return ret;
}
static bool token_ctrvm(HRVMProg *prog, void *env) {
HToken *t = (HToken*)env;
h_rvm_insert_insn(prog, RVM_PUSH, 0);
for (int i=0; i<t->len; ++i) {
h_rvm_insert_insn(prog, RVM_MATCH, t->str[i] & t->str[i] << 8);
h_rvm_insert_insn(prog, RVM_STEP, 0);
}
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
return true;
}
const HParserVtable token_vt = {
.parse = parse_token,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_token,
.compile_to_rvm = token_ctrvm,
};
const HParser* h_token(const uint8_t *str, const size_t len) {
HParser* h_token(const uint8_t *str, const size_t len) {
return h_token__m(&system_allocator, str, len);
}
const HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) {
HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) {
HToken *t = h_new(HToken, 1);
t->str = (uint8_t*)str, t->len = len;
return h_new_parser(mm__, &token_vt, t);

View file

@ -22,6 +22,7 @@ static const HParserVtable unimplemented_vt = {
.isValidRegular = h_false,
.isValidCF = h_false,
.desugar = desugar_unimplemented,
.compile_to_rvm = h_not_regular,
};
static HParser unimplemented = {

View file

@ -49,16 +49,32 @@ static bool ws_isValidCF(void *env) {
return p->vtable->isValidCF(p->env);
}
static bool ws_ctrvm(HRVMProg *prog, void *env) {
HParser *p = (HParser*)env;
uint16_t start = h_rvm_get_ip(prog);
uint16_t next;
const char SPACE_CHRS[6] = {' ', '\f', '\n', '\r', '\t', '\v'};
for (int i = 0; i < 6; i++) {
next = h_rvm_insert_insn(prog, RVM_FORK, 0);
h_rvm_insert_insn(prog, RVM_MATCH, (SPACE_CHRS[i] << 8) | (SPACE_CHRS[i]));
h_rvm_insert_insn(prog, RVM_GOTO, start);
h_rvm_patch_arg(prog, next, h_rvm_get_ip(prog));
}
return h_compile_regex(prog, p->env);
}
static const HParserVtable whitespace_vt = {
.parse = parse_whitespace,
.isValidRegular = ws_isValidRegular,
.isValidCF = ws_isValidCF,
.desugar = desugar_whitespace,
.compile_to_rvm = ws_ctrvm,
};
const HParser* h_whitespace(const HParser* p) {
HParser* h_whitespace(const HParser* p) {
return h_whitespace__m(&system_allocator, p);
}
const HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) {
HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) {
return h_new_parser(mm__, &whitespace_vt, (void *)p);
}

View file

@ -39,14 +39,15 @@ static HCFChoice* desugar_xor(HAllocator *mm__, void *env) {
static const HParserVtable xor_vt = {
.parse = parse_xor,
.isValidRegular = h_false,
.isValidCF = h_false,
.isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF?
.desugar = desugar_xor,
.compile_to_rvm = h_not_regular,
};
const HParser* h_xor(const HParser* p1, const HParser* p2) {
HParser* h_xor(const HParser* p1, const HParser* p2) {
return h_xor__m(&system_allocator, p1, p2);
}
const HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1;
env->p2 = p2;

View file

@ -1,16 +1,27 @@
#include <string.h>
#include <stdlib.h>
#include "internal.h"
static void* system_alloc(HAllocator *allocator, size_t size) {
return malloc(size);
void* ptr = calloc(size + sizeof(size_t), 1);
*(size_t*)ptr = size;
return ptr + sizeof(size_t);
}
static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) {
return realloc(ptr, size);
if (ptr == NULL)
return system_alloc(allocator, size);
ptr = realloc(ptr - sizeof(size_t), size + sizeof(size_t));
size_t old_size = *(size_t*)ptr;
*(size_t*)ptr = size;
if (size > old_size)
memset(ptr+sizeof(size_t)+old_size, 0, size - old_size);
return ptr + sizeof(size_t);
}
static void system_free(HAllocator *allocator, void* ptr) {
free(ptr);
free(ptr - sizeof(size_t));
}
HAllocator system_allocator = {

View file

@ -11,7 +11,7 @@ HParserTestcase testcases[] = {
};
static void test_benchmark_1() {
const HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
HBenchmarkResults *res = h_benchmark(parser, testcases);
h_benchmark_report(stderr, res);

View file

@ -52,6 +52,28 @@
} \
} while(0)
#define g_check_regular(lang) do { \
if (!lang->isValidRegular(lang->env)) { \
g_test_message("Language is not regular"); \
g_test_fail(); \
} \
} while(0)
#define g_check_contextfree(lang) do { \
if (!lang->isValidCF(lang->env)) { \
g_test_message("Language is not context-free"); \
g_test_fail(); \
} \
} while(0)
#define g_check_compilable(lang, backend, params) do { \
if (!h_compile(lang, backend, params)) { \
g_test_message("Language is not %s(%s)", #backend, params); \
g_test_fail(); \
} \
} while(0)
// TODO: replace uses of this with g_check_parse_failed
#define g_check_failed(res) do { \
const HParseResult *result = (res); \
@ -77,7 +99,7 @@
} else { \
char* cres = h_write_result_unamb(res->ast); \
g_check_string(cres, ==, result); \
g_free(cres); \
system_allocator.free(&system_allocator, cres); \
HArenaStats stats; \
h_allocator_stats(res->arena, &stats); \
g_test_message("Parse used %zd bytes, wasted %zd bytes. " \
@ -149,4 +171,5 @@
#define g_check_cmpdouble(n1, op, n2) g_check_inttype("%g", double, n1, op, n2)
#endif // #ifndef HAMMER_TEST_SUITE__H