Finshed up the regex backend

This commit is contained in:
Dan Hirsch 2013-04-26 20:36:54 -07:00
parent 204147a3d2
commit 13088c9d7a
39 changed files with 481 additions and 250 deletions

1
.gitignore vendored
View file

@ -11,3 +11,4 @@ TAGS
\#* \#*
.* .*
docs/milestone2.dot.pdf docs/milestone2.dot.pdf
*.dot.pdf

View file

@ -17,6 +17,9 @@ CONFIG_VARS= INCLUDE_TESTS
test: src/test_suite test: src/test_suite
$< $<
examples/all: src/all
examples/compile: src/compile
define SUBDIR_TEMPLATE define SUBDIR_TEMPLATE
$(1)/%: $(1)/%:
$$(MAKE) -C $(1) $$* $$(MAKE) -C $(1) $$*

View file

@ -6,7 +6,7 @@ endif
include $(TOPLEVEL)/config.mk include $(TOPLEVEL)/config.mk
TEST_CFLAGS = $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS TEST_CFLAGS = $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS
TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0) TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0) -lrt
CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes
LDFLAGS := LDFLAGS :=

View file

@ -1,36 +1,36 @@
digraph { digraph {
graph [rankdir=LR]; graph [rankdir=LR];
subgraph complete { subgraph complete {
node [color="gray",fontcolor="gray"]; node [color="gray",fontcolor="gray"];
glue; glue;
regex_svm; regex_svm;
regex_rvm; regex_rvm;
} desugaring; // Needs merged.
/* The end result of the milestone, along with the subtasks listed */ }
milestone2 [color="green",style="filled"]; /* The end result of the milestone, along with the subtasks listed */
llk -> milestone2; milestone2 [color="green",style="filled"];
lr -> milestone2; llk -> milestone2;
lalr8_gen -> lr; lr -> milestone2;
glr_gen -> lr; lalr8_gen -> lr; // Generate parse tables for LALR(8)
lr_driver -> lr; glr_gen -> lr; // Generate parse tables for GLR
regex -> milestone2; llk_gen -> llk; // Generate parse tables for LL(k)
glue -> milestone2; // Meredith knows what glue referred to here. lr_driver -> lr; // Write driver for all LR-type algs; analagous to SVM and RVM implementations
tests -> milestone2; llk_driver -> llk; // Write driver for LL(k)
regex -> milestone2;
regex_gen -> regex; // partially done glue -> milestone2;
regex_driver -> regex; tests -> milestone2;
regex_svm -> regex_driver;
regex_rvm -> regex_driver; regex_gen -> regex; // should be mostly done; the rest is concurrent with regex_svm_actions
regex_svm_actions -> regex_driver; regex_driver -> regex;
llk_driver -> llk; regex_svm -> regex_driver;
llk_gen -> llk; regex_rvm -> regex_driver;
regex_svm_actions -> regex_driver; // 1 for each way that an HParsedToken can be extracted from the stack.
/*
* /*
*/ *
desugaring -> llk_gen; */
desugaring -> lalr8_gen; desugaring -> llk_gen;
desugaring -> glr_gen; desugaring -> lalr8_gen;
desugaring -> glr_gen;
} }

65
docs/milestone3.dot Normal file
View file

@ -0,0 +1,65 @@
digraph {
graph [rankdir=LR];
subgraph complete {
node [color="gray",fontcolor="gray"];
}
subgraph groups {
node [color="blue",fontcolor="blue"];
cpp;
python;
ruby;
go;
php;
dotnet;
}
milestone3 [color="green",style="filled"];
function_desc_fmt -> function_descs;
function_desc_fmt -> binding_generator;
binding_generator -> cpp_gen;
binding_generator -> python_gen;
binding_generator -> ruby_gen;
binding_generator -> go_gen;
binding_generator -> php_gen;
binding_generator -> dotnet_gen;
function_descs -> cpp_gen;
function_descs -> python_gen;
function_descs -> ruby_gen;
function_descs -> go_gen;
function_descs -> php_gen;
function_descs -> dotnet_gen;
// Plugins to generate a type of code
cpp_gen -> cpp;
python_gen -> python;
ruby_gen -> ruby;
go_gen -> go;
php_gen -> php;
dotnet_gen -> dotnet;
// base code... developed concurrently with _gen's
cpp_base -> cpp;
python_base -> python;
ruby_base -> ruby;
go_base -> go;
php_base -> php;
dotnet_base -> dotnet;
// Bindings for various languages. These are just groupings.
cpp -> milestone3;
python -> milestone3;
ruby -> milestone3;
go -> milestone3;
php -> milestone3;
dotnet -> milestone3;
}

View file

@ -38,7 +38,6 @@ HAMMER_PARTS := \
datastructures.o \ datastructures.o \
system_allocator.o \ system_allocator.o \
benchmark.o \ benchmark.o \
compile.o \
$(PARSERS:%=parsers/%.o) \ $(PARSERS:%=parsers/%.o) \
$(BACKENDS:%=backends/%.o) $(BACKENDS:%=backends/%.o)

View file

@ -1,7 +1,16 @@
#include <assert.h> #include <assert.h>
#include <string.h>
#include "../internal.h" #include "../internal.h"
#include "../parsers/parser_internal.h" #include "../parsers/parser_internal.h"
static uint32_t djbhash(const uint8_t *buf, size_t len) {
uint32_t hash = 5381;
while (len--) {
hash = hash * 33 + *buf++;
}
return hash;
}
// short-hand for constructing HCachedResult's // short-hand for constructing HCachedResult's
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) { static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
HCachedResult *ret = a_new(HCachedResult, 1); HCachedResult *ret = a_new(HCachedResult, 1);
@ -190,12 +199,39 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
} }
} }
int h_packrat_compile(HAllocator* mm__, const HParser* parser, const void* params) { int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) {
parser->backend = PB_PACKRAT;
return 0; // No compilation necessary, and everything should work return 0; // No compilation necessary, and everything should work
// out of the box. // out of the box.
} }
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) { static uint32_t cache_key_hash(const void* key) {
return djbhash(key, sizeof(HParserCacheKey));
}
static bool cache_key_equal(const void* key1, const void* key2) {
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
}
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
HArena * arena = h_new_arena(mm__, 0);
HParseState *parse_state = a_new_(arena, HParseState, 1);
parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func
cache_key_hash); // hash_func
parse_state->input_stream = *input_stream;
parse_state->lr_stack = h_slist_new(arena);
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
cache_key_hash);
parse_state->arena = arena;
HParseResult *res = h_do_parse(parser, parse_state);
h_slist_free(parse_state->lr_stack);
h_hashtable_free(parse_state->recursion_heads);
// tear down the parse state
h_hashtable_free(parse_state->cache);
if (!res)
h_delete_arena(parse_state->arena);
return res;
return h_do_parse(parser, parse_state); return h_do_parse(parser, parse_state);
} }

View file

@ -245,10 +245,6 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
return NULL; return NULL;
} }
bool h_compile_regex(HRVMProg *prog, const HParser *parser) {
return parser->vtable->compile_to_rvm(prog, parser->env);
}
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) { uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) {
for (uint16_t i = 0; i < prog->action_count; i++) { for (uint16_t i = 0; i < prog->action_count; i++) {
if (prog->actions[i].action == action_func && prog->actions[i].env == env) if (prog->actions[i].action == action_func && prog->actions[i].env == env)
@ -293,4 +289,78 @@ void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val) {
prog->insns[ip].arg = new_val; prog->insns[ip].arg = new_val;
} }
// TODO: Implement the primitive actions size_t h_svm_count_to_mark(HSVMContext *ctx) {
size_t ctm;
for (ctm = 0; ctm < ctx->stack_count-1; ctm++) {
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK)
return ctm;
}
return ctx->stack_count;
}
// TODO: Implement the primitive actions
bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) {
size_t n_items = h_svm_count_to_mark(ctx);
assert (n_items < ctx->stack_count);
HParsedToken *res = ctx->stack[ctx->stack_count - 1 - n_items];
assert (res->token_type == TT_MARK);
res->token_type = TT_SEQUENCE;
HCountedArray *ret_carray = h_carray_new_sized(arena, n_items);
res->seq = ret_carray;
// res index and bit offset are the same as the mark.
for (size_t i = 0; i < n_items; i++) {
ret_carray->elements[i] = ctx->stack[ctx->stack_count - n_items + i];
}
ctx->stack_count -= n_items;
return true;
}
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
while (ctx->stack_count > 0) {
if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK)
return true;
}
return false; // no mark found.
}
// Glue regex backend to rest of system
bool h_compile_regex(HRVMProg *prog, const HParser *parser) {
return parser->vtable->compile_to_rvm(prog, parser->env);
}
static void h_regex_free(HParser *parser) {
HRVMProg *prog = (HRVMProg*)parser->backend_data;
HAllocator *mm__ = prog->allocator;
h_free(prog->insns);
h_free(prog->actions);
h_free(prog);
parser->backend_data = NULL;
parser->backend = PB_PACKRAT;
}
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
if (!parser->vtable->isValidRegular(parser->env))
return 1;
HRVMProg *prog = h_new(HRVMProg, 1);
prog->allocator = mm__;
if (!h_compile_regex(prog, parser)) {
h_free(prog->insns);
h_free(prog->actions);
h_free(prog);
return 2;
}
parser->backend_data = prog;
return 0;
}
static HParseResult *h_regex_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
return h_rvm_run__m(mm__, (HRVMProg*)parser->backend_data, input_stream->input, input_stream->length);
}
HParserBackendVTable h__regex_backend_vtable = {
.compile = h_regex_compile,
.parse = h_regex_parse,
.free = h_regex_free
};

View file

@ -10,9 +10,12 @@ typedef enum HRVMOp_ {
RVM_ACCEPT, // [a] RVM_ACCEPT, // [a]
RVM_GOTO, // [c] parameter is an offset into the instruction table RVM_GOTO, // [c] parameter is an offset into the instruction table
RVM_FORK, // [c] parameter is an offset into the instruction table RVM_FORK, // [c] parameter is an offset into the instruction table
RVM_PUSH, // [a] No arguments, just pushes a mark onto the stack RVM_PUSH, // [a] No arguments, just pushes a mark (pointer to some
// character in the input string) onto the stack
RVM_ACTION, // [a] argument is an action ID RVM_ACTION, // [a] argument is an action ID
RVM_CAPTURE, // [a] Capture the last string, and push it on the stack. No arg. RVM_CAPTURE, // [a] Capture the last string (up to the current
// position, non-inclusive), and push it on the
// stack. No arg.
RVM_EOF, // [m] Succeeds only if at EOF. RVM_EOF, // [m] Succeeds only if at EOF.
RVM_MATCH, // [m] The high byte of the parameter is an upper bound RVM_MATCH, // [m] The high byte of the parameter is an upper bound
// and the low byte is a lower bound, both // and the low byte is a lower bound, both
@ -31,7 +34,7 @@ typedef struct HRVMInsn_{
typedef struct HSVMContext_ { typedef struct HSVMContext_ {
HParsedToken **stack; HParsedToken **stack;
size_t stack_count; size_t stack_count; // number of items on the stack. Thus stack[stack_count] is the first unused item on the stack.
size_t stack_capacity; size_t stack_capacity;
} HSVMContext; } HSVMContext;
@ -43,13 +46,13 @@ typedef struct HSVMAction_ {
void* env; void* env;
} HSVMAction; } HSVMAction;
typedef struct HRVMProg_ { struct HRVMProg_ {
HAllocator *allocator; HAllocator *allocator;
size_t length; size_t length;
size_t action_count; size_t action_count;
HRVMInsn *insns; HRVMInsn *insns;
HSVMAction *actions; HSVMAction *actions;
} HRVMProg; };
// Returns true IFF the provided parser could be compiled. // Returns true IFF the provided parser could be compiled.
bool h_compile_regex(HRVMProg *prog, const HParser* parser); bool h_compile_regex(HRVMProg *prog, const HParser* parser);
@ -68,4 +71,10 @@ uint16_t h_rvm_get_ip(HRVMProg *prog);
// correct target is known. // correct target is known.
void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val); void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val);
// Common SVM action funcs...
bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env);
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env);
extern HParserBackendVTable h__regex_backend_vtable;
#endif #endif

View file

@ -21,11 +21,11 @@
*/ */
HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases) { HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases) {
return h_benchmark__m(&system_allocator, parser, testcases); return h_benchmark__m(&system_allocator, parser, testcases);
} }
HBenchmarkResults *h_benchmark__m(HAllocator* mm__, const HParser* parser, HParserTestcase* testcases) { HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTestcase* testcases) {
// For now, just output the results to stderr // For now, just output the results to stderr
HParserTestcase* tc = testcases; HParserTestcase* tc = testcases;
HParserBackend backend = PB_MIN; HParserBackend backend = PB_MIN;
@ -33,10 +33,10 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, const HParser* parser, HPars
ret->len = PB_MAX-PB_MIN; ret->len = PB_MAX-PB_MIN;
ret->results = h_new(HBackendResults, ret->len); ret->results = h_new(HBackendResults, ret->len);
for (backend = PB_MIN; backend < PB_MAX; backend++) { for (backend = PB_MIN; backend <= PB_MAX; backend++) {
ret->results[backend].backend = backend; ret->results[backend].backend = backend;
// Step 1: Compile grammar for given parser... // Step 1: Compile grammar for given parser...
if (h_compile(parser, PB_MIN, NULL) == -1) { if (h_compile(parser, backend, NULL) == -1) {
// backend inappropriate for grammar... // backend inappropriate for grammar...
fprintf(stderr, "failed\n"); fprintf(stderr, "failed\n");
ret->results[backend].compile_success = false; ret->results[backend].compile_success = false;

View file

@ -1,15 +0,0 @@
// This file contains functions related to managing multiple parse backends
#include "hammer.h"
#include "internal.h"
static HParserBackendVTable *backends[PB_MAX] = {
&h__packrat_backend_vtable,
};
int h_compile(const HParser* parser, HParserBackend backend, const void* params) {
return h_compile__m(&system_allocator, parser, backend, params);
}
int h_compile__m(HAllocator* mm__, const HParser* parser, HParserBackend backend, const void* params) {
return backends[backend]->compile(mm__, parser, params);
}

View file

@ -26,13 +26,11 @@
#include "allocator.h" #include "allocator.h"
#include "parsers/parser_internal.h" #include "parsers/parser_internal.h"
static uint32_t djbhash(const uint8_t *buf, size_t len) { static HParserBackendVTable *backends[PB_MAX + 1] = {
uint32_t hash = 5381; &h__packrat_backend_vtable,
while (len--) { &h__regex_backend_vtable,
hash = hash * 33 + *buf++; };
}
return hash;
}
/* Helper function, since these lines appear in every parser */ /* Helper function, since these lines appear in every parser */
@ -42,42 +40,24 @@ typedef struct {
} HTwoParsers; } HTwoParsers;
static uint32_t cache_key_hash(const void* key) {
return djbhash(key, sizeof(HParserCacheKey));
}
static bool cache_key_equal(const void* key1, const void* key2) {
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
}
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) { HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
return h_parse__m(&system_allocator, parser, input, length); return h_parse__m(&system_allocator, parser, input, length);
} }
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) { HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
// TODO: split the creation of the parse state into h_packrat_parse
// Set up a parse state... // Set up a parse state...
HArena * arena = h_new_arena(mm__, 0); HInputStream input_stream = {
HParseState *parse_state = a_new_(arena, HParseState, 1); .index = 0,
parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func .bit_offset = 8,
cache_key_hash); // hash_func .overrun = 0,
parse_state->input_stream.input = input; .endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN,
parse_state->input_stream.index = 0; .length = length,
parse_state->input_stream.bit_offset = 8; // bit big endian .input = input
parse_state->input_stream.overrun = 0; };
parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN;
parse_state->input_stream.length = length;
parse_state->lr_stack = h_slist_new(arena);
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
cache_key_hash);
parse_state->arena = arena;
HParseResult *res = h_do_parse(parser, parse_state);
h_slist_free(parse_state->lr_stack);
h_hashtable_free(parse_state->recursion_heads);
// tear down the parse state
h_hashtable_free(parse_state->cache);
if (!res)
h_delete_arena(parse_state->arena);
return res; return backends[parser->backend]->parse(mm__, parser, &input_stream);
} }
void h_parse_result_free(HParseResult *result) { void h_parse_result_free(HParseResult *result) {
@ -98,3 +78,14 @@ bool h_not_regular(HRVMProg *prog, void *env) {
(void)env; (void)env;
return false; return false;
} }
int h_compile(HParser* parser, HParserBackend backend, const void* params) {
return h_compile__m(&system_allocator, parser, backend, params);
}
int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, const void* params) {
int ret = backends[backend]->compile(mm__, parser, params);
if (!ret)
parser->backend = backend;
return ret;
}

View file

@ -34,11 +34,11 @@ typedef struct HParseState_ HParseState;
typedef enum HParserBackend_ { typedef enum HParserBackend_ {
PB_MIN = 0, PB_MIN = 0,
PB_PACKRAT = PB_MIN, // PB_MIN is always the default. PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
PB_REGULAR, //
PB_LALR, // Not Implemented PB_LALR, // Not Implemented
PB_LLk, // Not Implemented PB_LLk, // Not Implemented
PB_GLR, // Not Implemented PB_GLR, // Not Implemented
PB_REGULAR, // Not Implemented PB_MAX = PB_REGULAR
PB_MAX
} HParserBackend; } HParserBackend;
typedef enum HTokenType_ { typedef enum HTokenType_ {
@ -47,7 +47,7 @@ typedef enum HTokenType_ {
TT_SINT, TT_SINT,
TT_UINT, TT_UINT,
TT_SEQUENCE, TT_SEQUENCE,
TT_RESERVED_1, // reserved for internal use TT_RESERVED_1, // reserved for backend-specific internal use
TT_USER = 64, TT_USER = 64,
TT_ERR, TT_ERR,
TT_MAX TT_MAX
@ -122,11 +122,13 @@ typedef struct HParserVtable_ {
HParseResult* (*parse)(void *env, HParseState *state); HParseResult* (*parse)(void *env, HParseState *state);
bool (*isValidRegular)(void *env); bool (*isValidRegular)(void *env);
bool (*isValidCF)(void *env); bool (*isValidCF)(void *env);
bool (*compile_to_rvm)(HRVMProg *prog, void* env); bool (*compile_to_rvm)(HRVMProg *prog, void* env); // FIXME: forgot what the bool return value was supposed to mean.
} HParserVtable; } HParserVtable;
typedef struct HParser_ { typedef struct HParser_ {
const HParserVtable *vtable; const HParserVtable *vtable;
HParserBackend backend;
void* backend_data;
void *env; void *env;
} HParser; } HParser;
@ -199,7 +201,7 @@ HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* inp
* *
* Result token type: TT_BYTES * Result token type: TT_BYTES
*/ */
HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len); HAMMER_FN_DECL(HParser*, h_token, const uint8_t *str, const size_t len);
/** /**
* Given a single character, returns a parser that parses that * Given a single character, returns a parser that parses that
@ -207,7 +209,7 @@ HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len);
* *
* Result token type: TT_UINT * Result token type: TT_UINT
*/ */
HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c); HAMMER_FN_DECL(HParser*, h_ch, const uint8_t c);
/** /**
* Given two single-character bounds, lower and upper, returns a parser * Given two single-character bounds, lower and upper, returns a parser
@ -216,14 +218,14 @@ HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c);
* *
* Result token type: TT_UINT * Result token type: TT_UINT
*/ */
HAMMER_FN_DECL(const HParser*, h_ch_range, const uint8_t lower, const uint8_t upper); HAMMER_FN_DECL(HParser*, h_ch_range, const uint8_t lower, const uint8_t upper);
/** /**
* Given an integer parser, p, and two integer bounds, lower and upper, * Given an integer parser, p, and two integer bounds, lower and upper,
* returns a parser that parses an integral value within the range * returns a parser that parses an integral value within the range
* [lower, upper] (inclusive). * [lower, upper] (inclusive).
*/ */
HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper); HAMMER_FN_DECL(HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper);
/** /**
* Returns a parser that parses the specified number of bits. sign == * Returns a parser that parses the specified number of bits. sign ==
@ -231,63 +233,63 @@ HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lowe
* *
* Result token type: TT_SINT if sign == true, TT_UINT if sign == false * Result token type: TT_SINT if sign == true, TT_UINT if sign == false
*/ */
HAMMER_FN_DECL(const HParser*, h_bits, size_t len, bool sign); HAMMER_FN_DECL(HParser*, h_bits, size_t len, bool sign);
/** /**
* Returns a parser that parses a signed 8-byte integer value. * Returns a parser that parses a signed 8-byte integer value.
* *
* Result token type: TT_SINT * Result token type: TT_SINT
*/ */
HAMMER_FN_DECL_NOARG(const HParser*, h_int64); HAMMER_FN_DECL_NOARG(HParser*, h_int64);
/** /**
* Returns a parser that parses a signed 4-byte integer value. * Returns a parser that parses a signed 4-byte integer value.
* *
* Result token type: TT_SINT * Result token type: TT_SINT
*/ */
HAMMER_FN_DECL_NOARG(const HParser*, h_int32); HAMMER_FN_DECL_NOARG(HParser*, h_int32);
/** /**
* Returns a parser that parses a signed 2-byte integer value. * Returns a parser that parses a signed 2-byte integer value.
* *
* Result token type: TT_SINT * Result token type: TT_SINT
*/ */
HAMMER_FN_DECL_NOARG(const HParser*, h_int16); HAMMER_FN_DECL_NOARG(HParser*, h_int16);
/** /**
* Returns a parser that parses a signed 1-byte integer value. * Returns a parser that parses a signed 1-byte integer value.
* *
* Result token type: TT_SINT * Result token type: TT_SINT
*/ */
HAMMER_FN_DECL_NOARG(const HParser*, h_int8); HAMMER_FN_DECL_NOARG(HParser*, h_int8);
/** /**
* Returns a parser that parses an unsigned 8-byte integer value. * Returns a parser that parses an unsigned 8-byte integer value.
* *
* Result token type: TT_UINT * Result token type: TT_UINT
*/ */
HAMMER_FN_DECL_NOARG(const HParser*, h_uint64); HAMMER_FN_DECL_NOARG(HParser*, h_uint64);
/** /**
* Returns a parser that parses an unsigned 4-byte integer value. * Returns a parser that parses an unsigned 4-byte integer value.
* *
* Result token type: TT_UINT * Result token type: TT_UINT
*/ */
HAMMER_FN_DECL_NOARG(const HParser*, h_uint32); HAMMER_FN_DECL_NOARG(HParser*, h_uint32);
/** /**
* Returns a parser that parses an unsigned 2-byte integer value. * Returns a parser that parses an unsigned 2-byte integer value.
* *
* Result token type: TT_UINT * Result token type: TT_UINT
*/ */
HAMMER_FN_DECL_NOARG(const HParser*, h_uint16); HAMMER_FN_DECL_NOARG(HParser*, h_uint16);
/** /**
* Returns a parser that parses an unsigned 1-byte integer value. * Returns a parser that parses an unsigned 1-byte integer value.
* *
* Result token type: TT_UINT * Result token type: TT_UINT
*/ */
HAMMER_FN_DECL_NOARG(const HParser*, h_uint8); HAMMER_FN_DECL_NOARG(HParser*, h_uint8);
/** /**
* Given another parser, p, returns a parser that skips any whitespace * Given another parser, p, returns a parser that skips any whitespace
@ -295,7 +297,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_uint8);
* *
* Result token type: p's result type * Result token type: p's result type
*/ */
HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p); HAMMER_FN_DECL(HParser*, h_whitespace, const HParser* p);
/** /**
* Given two parsers, p and q, returns a parser that parses them in * Given two parsers, p and q, returns a parser that parses them in
@ -303,7 +305,7 @@ HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p);
* *
* Result token type: p's result type * Result token type: p's result type
*/ */
HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q); HAMMER_FN_DECL(HParser*, h_left, const HParser* p, const HParser* q);
/** /**
* Given two parsers, p and q, returns a parser that parses them in * Given two parsers, p and q, returns a parser that parses them in
@ -311,7 +313,7 @@ HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q);
* *
* Result token type: q's result type * Result token type: q's result type
*/ */
HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q); HAMMER_FN_DECL(HParser*, h_right, const HParser* p, const HParser* q);
/** /**
* Given three parsers, p, x, and q, returns a parser that parses them in * Given three parsers, p, x, and q, returns a parser that parses them in
@ -319,7 +321,7 @@ HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q);
* *
* Result token type: x's result type * Result token type: x's result type
*/ */
HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q); HAMMER_FN_DECL(HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q);
/** /**
* Given another parser, p, and a function f, returns a parser that * Given another parser, p, and a function f, returns a parser that
@ -327,21 +329,21 @@ HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, con
* *
* Result token type: any * Result token type: any
*/ */
HAMMER_FN_DECL(const HParser*, h_action, const HParser* p, const HAction a); HAMMER_FN_DECL(HParser*, h_action, const HParser* p, const HAction a);
/** /**
* Parse a single character in the given charset. * Parse a single character in the given charset.
* *
* Result token type: TT_UINT * Result token type: TT_UINT
*/ */
HAMMER_FN_DECL(const HParser*, h_in, const uint8_t *charset, size_t length); HAMMER_FN_DECL(HParser*, h_in, const uint8_t *charset, size_t length);
/** /**
* Parse a single character *NOT* in the given charset. * Parse a single character *NOT* in the given charset.
* *
* Result token type: TT_UINT * Result token type: TT_UINT
*/ */
HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length); HAMMER_FN_DECL(HParser*, h_not_in, const uint8_t *charset, size_t length);
/** /**
* A no-argument parser that succeeds if there is no more input to * A no-argument parser that succeeds if there is no more input to
@ -349,14 +351,14 @@ HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length);
* *
* Result token type: None. The HParseResult exists but its AST is NULL. * Result token type: None. The HParseResult exists but its AST is NULL.
*/ */
HAMMER_FN_DECL_NOARG(const HParser*, h_end_p); HAMMER_FN_DECL_NOARG(HParser*, h_end_p);
/** /**
* This parser always fails. * This parser always fails.
* *
* Result token type: NULL. Always. * Result token type: NULL. Always.
*/ */
HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p); HAMMER_FN_DECL_NOARG(HParser*, h_nothing_p);
/** /**
* Given a null-terminated list of parsers, apply each parser in order. * Given a null-terminated list of parsers, apply each parser in order.
@ -364,7 +366,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p);
* *
* Result token type: TT_SEQUENCE * Result token type: TT_SEQUENCE
*/ */
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequence, const HParser* p); HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, const HParser* p);
/** /**
* Given an array of parsers, p_array, apply each parser in order. The * Given an array of parsers, p_array, apply each parser in order. The
@ -373,7 +375,7 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequenc
* *
* Result token type: The type of the first successful parser's result. * Result token type: The type of the first successful parser's result.
*/ */
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice, const HParser* p); HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, const HParser* p);
/** /**
* Given two parsers, p1 and p2, this parser succeeds in the following * Given two parsers, p1 and p2, this parser succeeds in the following
@ -383,7 +385,7 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice,
* *
* Result token type: p1's result type. * Result token type: p1's result type.
*/ */
HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2); HAMMER_FN_DECL(HParser*, h_butnot, const HParser* p1, const HParser* p2);
/** /**
* Given two parsers, p1 and p2, this parser succeeds in the following * Given two parsers, p1 and p2, this parser succeeds in the following
@ -393,7 +395,7 @@ HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2);
* *
* Result token type: p1's result type. * Result token type: p1's result type.
*/ */
HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p2); HAMMER_FN_DECL(HParser*, h_difference, const HParser* p1, const HParser* p2);
/** /**
* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or * Given two parsers, p1 and p2, this parser succeeds if *either* p1 or
@ -401,7 +403,7 @@ HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p
* *
* Result token type: The type of the result of whichever parser succeeded. * Result token type: The type of the result of whichever parser succeeded.
*/ */
HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2); HAMMER_FN_DECL(HParser*, h_xor, const HParser* p1, const HParser* p2);
/** /**
* Given a parser, p, this parser succeeds for zero or more repetitions * Given a parser, p, this parser succeeds for zero or more repetitions
@ -409,7 +411,7 @@ HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2);
* *
* Result token type: TT_SEQUENCE * Result token type: TT_SEQUENCE
*/ */
HAMMER_FN_DECL(const HParser*, h_many, const HParser* p); HAMMER_FN_DECL(HParser*, h_many, const HParser* p);
/** /**
* Given a parser, p, this parser succeeds for one or more repetitions * Given a parser, p, this parser succeeds for one or more repetitions
@ -417,7 +419,7 @@ HAMMER_FN_DECL(const HParser*, h_many, const HParser* p);
* *
* Result token type: TT_SEQUENCE * Result token type: TT_SEQUENCE
*/ */
HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p); HAMMER_FN_DECL(HParser*, h_many1, const HParser* p);
/** /**
* Given a parser, p, this parser succeeds for exactly N repetitions * Given a parser, p, this parser succeeds for exactly N repetitions
@ -425,7 +427,7 @@ HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p);
* *
* Result token type: TT_SEQUENCE * Result token type: TT_SEQUENCE
*/ */
HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n); HAMMER_FN_DECL(HParser*, h_repeat_n, const HParser* p, const size_t n);
/** /**
* Given a parser, p, this parser succeeds with the value p parsed or * Given a parser, p, this parser succeeds with the value p parsed or
@ -433,7 +435,7 @@ HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n);
* *
* Result token type: If p succeeded, the type of its result; if not, TT_NONE. * Result token type: If p succeeded, the type of its result; if not, TT_NONE.
*/ */
HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p); HAMMER_FN_DECL(HParser*, h_optional, const HParser* p);
/** /**
* Given a parser, p, this parser succeeds if p succeeds, but doesn't * Given a parser, p, this parser succeeds if p succeeds, but doesn't
@ -441,7 +443,7 @@ HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p);
* *
* Result token type: None. The HParseResult exists but its AST is NULL. * Result token type: None. The HParseResult exists but its AST is NULL.
*/ */
HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p); HAMMER_FN_DECL(HParser*, h_ignore, const HParser* p);
/** /**
* Given a parser, p, and a parser for a separator, sep, this parser * Given a parser, p, and a parser for a separator, sep, this parser
@ -452,7 +454,7 @@ HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p);
* *
* Result token type: TT_SEQUENCE * Result token type: TT_SEQUENCE
*/ */
HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep); HAMMER_FN_DECL(HParser*, h_sepBy, const HParser* p, const HParser* sep);
/** /**
* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element. * Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
@ -460,14 +462,14 @@ HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep);
* *
* Result token type: TT_SEQUENCE * Result token type: TT_SEQUENCE
*/ */
HAMMER_FN_DECL(const HParser*, h_sepBy1, const HParser* p, const HParser* sep); HAMMER_FN_DECL(HParser*, h_sepBy1, const HParser* p, const HParser* sep);
/** /**
* This parser always returns a zero length match, i.e., empty string. * This parser always returns a zero length match, i.e., empty string.
* *
* Result token type: None. The HParseResult exists but its AST is NULL. * Result token type: None. The HParseResult exists but its AST is NULL.
*/ */
HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p); HAMMER_FN_DECL_NOARG(HParser*, h_epsilon_p);
/** /**
* This parser applies its first argument to read an unsigned integer * This parser applies its first argument to read an unsigned integer
@ -478,7 +480,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p);
* *
* Result token type: TT_SEQUENCE * Result token type: TT_SEQUENCE
*/ */
HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HParser* value); HAMMER_FN_DECL(HParser*, h_length_value, const HParser* length, const HParser* value);
/** /**
* This parser attaches a predicate function, which returns true or * This parser attaches a predicate function, which returns true or
@ -493,7 +495,7 @@ HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HPar
* *
* Result token type: p's result type if pred succeeded, NULL otherwise. * Result token type: p's result type if pred succeeded, NULL otherwise.
*/ */
HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred); HAMMER_FN_DECL(HParser*, h_attr_bool, const HParser* p, HPredicate pred);
/** /**
* The 'and' parser asserts that a conditional syntax is satisfied, * The 'and' parser asserts that a conditional syntax is satisfied,
@ -510,7 +512,7 @@ HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred);
* *
* Result token type: None. The HParseResult exists but its AST is NULL. * Result token type: None. The HParseResult exists but its AST is NULL.
*/ */
HAMMER_FN_DECL(const HParser*, h_and, const HParser* p); HAMMER_FN_DECL(HParser*, h_and, const HParser* p);
/** /**
* The 'not' parser asserts that a conditional syntax is *not* * The 'not' parser asserts that a conditional syntax is *not*
@ -530,7 +532,7 @@ HAMMER_FN_DECL(const HParser*, h_and, const HParser* p);
* *
* Result token type: None. The HParseResult exists but its AST is NULL. * Result token type: None. The HParseResult exists but its AST is NULL.
*/ */
HAMMER_FN_DECL(const HParser*, h_not, const HParser* p); HAMMER_FN_DECL(HParser*, h_not, const HParser* p);
/** /**
* Create a parser that just calls out to another, as yet unknown, * Create a parser that just calls out to another, as yet unknown,
@ -573,7 +575,7 @@ HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent
* *
* Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise. * Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
*/ */
HAMMER_FN_DECL(int, h_compile, const HParser* parser, HParserBackend backend, const void* params); HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params);
/** /**
* TODO: Document me * TODO: Document me
@ -598,7 +600,7 @@ const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
void h_bit_writer_free(HBitWriter* w); void h_bit_writer_free(HBitWriter* w);
// {{{ Benchmark functions // {{{ Benchmark functions
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, const HParser* parser, HParserTestcase* testcases); HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, HParser* parser, HParserTestcase* testcases);
void h_benchmark_report(FILE* stream, HBenchmarkResults* results); void h_benchmark_report(FILE* stream, HBenchmarkResults* results);
void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results); void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results);
// }}} // }}}

View file

@ -47,7 +47,7 @@ static inline void h_generic_free(HAllocator *allocator, void* ptr) {
allocator->free(allocator, ptr); allocator->free(allocator, ptr);
} }
HAllocator system_allocator; extern HAllocator system_allocator;
typedef struct HInputStream_ { typedef struct HInputStream_ {
@ -110,8 +110,9 @@ struct HParseState_ {
}; };
typedef struct HParserBackendVTable_ { typedef struct HParserBackendVTable_ {
int (*compile)(HAllocator *mm__, const HParser* parser, const void* params); int (*compile)(HAllocator *mm__, HParser* parser, const void* params);
HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HParseState* parse_state); HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HInputStream* parse_state);
void (*free)(HParser* parser);
} HParserBackendVTable; } HParserBackendVTable;

View file

@ -41,11 +41,11 @@ static const HParserVtable action_vt = {
.compile_to_rvm = action_ctrvm, .compile_to_rvm = action_ctrvm,
}; };
const HParser* h_action(const HParser* p, const HAction a) { HParser* h_action(const HParser* p, const HAction a) {
return h_action__m(&system_allocator, p, a); return h_action__m(&system_allocator, p, a);
} }
const HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) { HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) {
HParser *res = h_new(HParser, 1); HParser *res = h_new(HParser, 1);
res->vtable = &action_vt; res->vtable = &action_vt;
HParseAction *env = h_new(HParseAction, 1); HParseAction *env = h_new(HParseAction, 1);

View file

@ -20,10 +20,10 @@ static const HParserVtable and_vt = {
}; };
const HParser* h_and(const HParser* p) { HParser* h_and(const HParser* p) {
return h_and__m(&system_allocator, p); return h_and__m(&system_allocator, p);
} }
const HParser* h_and__m(HAllocator* mm__, const HParser* p) { HParser* h_and__m(HAllocator* mm__, const HParser* p) {
// zero-width postive lookahead // zero-width postive lookahead
HParser *res = h_new(HParser, 1); HParser *res = h_new(HParser, 1);
res->env = (void*)p; res->env = (void*)p;

View file

@ -40,10 +40,10 @@ static const HParserVtable attr_bool_vt = {
}; };
const HParser* h_attr_bool(const HParser* p, HPredicate pred) { HParser* h_attr_bool(const HParser* p, HPredicate pred) {
return h_attr_bool__m(&system_allocator, p, pred); return h_attr_bool__m(&system_allocator, p, pred);
} }
const HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) { HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) {
HParser *res = h_new(HParser, 1); HParser *res = h_new(HParser, 1);
res->vtable = &attr_bool_vt; res->vtable = &attr_bool_vt;
HAttrBool *env = h_new(HAttrBool, 1); HAttrBool *env = h_new(HAttrBool, 1);

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h" #include "parser_internal.h"
struct bits_env { struct bits_env {
@ -16,6 +17,19 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
return make_result(state, result); return make_result(state, result);
} }
static bool h_svm_action_bits(HArena *arena, HSVMContext *ctx, void* env) {
// BUG: relies un undefined behaviour: int64_t is a signed uint64_t; not necessarily true on 32-bit
struct bits_env *env_ = env;
HParsedToken *top = ctx->stack[ctx->stack_count-1];
assert(top->token_type == TT_BYTES);
uint64_t res = 0;
for (size_t i = 0; i < top->bytes.len; i++)
res = (res << 8) | top->bytes.token[i]; // TODO: Handle other endiannesses.
top->uint = res; // possibly cast to signed through union
top->token_type = (env_->signedp ? TT_SINT : TT_UINT);
return true;
}
static bool bits_ctrvm(HRVMProg *prog, void* env) { static bool bits_ctrvm(HRVMProg *prog, void* env) {
struct bits_env *env_ = (struct bits_env*)env; struct bits_env *env_ = (struct bits_env*)env;
h_rvm_insert_insn(prog, RVM_PUSH, 0); h_rvm_insert_insn(prog, RVM_PUSH, 0);
@ -24,6 +38,7 @@ static bool bits_ctrvm(HRVMProg *prog, void* env) {
h_rvm_insert_insn(prog, RVM_STEP, 0); h_rvm_insert_insn(prog, RVM_STEP, 0);
} }
h_rvm_insert_insn(prog, RVM_CAPTURE, 0); h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_bits, env));
return true; return true;
} }
@ -34,10 +49,10 @@ static const HParserVtable bits_vt = {
.compile_to_rvm = bits_ctrvm, .compile_to_rvm = bits_ctrvm,
}; };
const HParser* h_bits(size_t len, bool sign) { HParser* h_bits(size_t len, bool sign) {
return h_bits__m(&system_allocator, len, sign); return h_bits__m(&system_allocator, len, sign);
} }
const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) { HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
struct bits_env *env = h_new(struct bits_env, 1); struct bits_env *env = h_new(struct bits_env, 1);
env->length = len; env->length = len;
env->signedp = sign; env->signedp = sign;
@ -48,10 +63,10 @@ const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
} }
#define SIZED_BITS(name_pre, len, signedp) \ #define SIZED_BITS(name_pre, len, signedp) \
const HParser* h_##name_pre##len () { \ HParser* h_##name_pre##len () { \
return h_bits__m(&system_allocator, len, signedp); \ return h_bits__m(&system_allocator, len, signedp); \
} \ } \
const HParser* h_##name_pre##len##__m(HAllocator* mm__) { \ HParser* h_##name_pre##len##__m(HAllocator* mm__) { \
return h_bits__m(mm__, len, signedp); \ return h_bits__m(mm__, len, signedp); \
} }
SIZED_BITS(int, 8, true) SIZED_BITS(int, 8, true)

View file

@ -48,10 +48,10 @@ static const HParserVtable butnot_vt = {
.compile_to_rvm = h_not_regular, .compile_to_rvm = h_not_regular,
}; };
const HParser* h_butnot(const HParser* p1, const HParser* p2) { HParser* h_butnot(const HParser* p1, const HParser* p2) {
return h_butnot__m(&system_allocator, p1, p2); return h_butnot__m(&system_allocator, p1, p2);
} }
const HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) { HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1); HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1; env->p2 = p2; env->p1 = p1; env->p2 = p2;
HParser *ret = h_new(HParser, 1); HParser *ret = h_new(HParser, 1);

View file

@ -14,6 +14,7 @@ static HParseResult* parse_ch(void* env, HParseState *state) {
static bool ch_ctrvm(HRVMProg *prog, void* env) { static bool ch_ctrvm(HRVMProg *prog, void* env) {
uint8_t c = (uint8_t)(unsigned long)(env); uint8_t c = (uint8_t)(unsigned long)(env);
// TODO: Does this capture anything?
h_rvm_insert_insn(prog, RVM_MATCH, c & c << 8); h_rvm_insert_insn(prog, RVM_MATCH, c & c << 8);
h_rvm_insert_insn(prog, RVM_STEP, 0); h_rvm_insert_insn(prog, RVM_STEP, 0);
return true; return true;
@ -26,12 +27,12 @@ static const HParserVtable ch_vt = {
.compile_to_rvm = ch_ctrvm, .compile_to_rvm = ch_ctrvm,
}; };
const HParser* h_ch(const uint8_t c) { HParser* h_ch(const uint8_t c) {
return h_ch__m(&system_allocator, c); return h_ch__m(&system_allocator, c);
} }
const HParser* h_ch__m(HAllocator* mm__, const uint8_t c) { HParser* h_ch__m(HAllocator* mm__, const uint8_t c) {
HParser *ret = h_new(HParser, 1); HParser *ret = h_new(HParser, 1);
ret->vtable = &ch_vt; ret->vtable = &ch_vt;
ret->env = (void*)(unsigned long)(c); ret->env = (void*)(unsigned long)(c);
return (const HParser*)ret; return ret;
} }

View file

@ -59,21 +59,21 @@ static const HParserVtable charset_vt = {
.compile_to_rvm = cs_ctrvm, .compile_to_rvm = cs_ctrvm,
}; };
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
return h_ch_range__m(&system_allocator, lower, upper); return h_ch_range__m(&system_allocator, lower, upper);
} }
const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) { HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) {
HParser *ret = h_new(HParser, 1); HParser *ret = h_new(HParser, 1);
HCharset cs = new_charset(mm__); HCharset cs = new_charset(mm__);
for (int i = 0; i < 256; i++) for (int i = 0; i < 256; i++)
charset_set(cs, i, (lower <= i) && (i <= upper)); charset_set(cs, i, (lower <= i) && (i <= upper));
ret->vtable = &charset_vt; ret->vtable = &charset_vt;
ret->env = (void*)cs; ret->env = (void*)cs;
return (const HParser*)ret; return ret;
} }
static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) { static HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) {
HParser *ret = h_new(HParser, 1); HParser *ret = h_new(HParser, 1);
HCharset cs = new_charset(mm__); HCharset cs = new_charset(mm__);
for (size_t i = 0; i < 256; i++) for (size_t i = 0; i < 256; i++)
@ -83,22 +83,22 @@ static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, s
ret->vtable = &charset_vt; ret->vtable = &charset_vt;
ret->env = (void*)cs; ret->env = (void*)cs;
return (const HParser*)ret; return ret;
} }
const HParser* h_in(const uint8_t *options, size_t count) { HParser* h_in(const uint8_t *options, size_t count) {
return h_in_or_not__m(&system_allocator, options, count, 1); return h_in_or_not__m(&system_allocator, options, count, 1);
} }
const HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) { HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
return h_in_or_not__m(mm__, options, count, 1); return h_in_or_not__m(mm__, options, count, 1);
} }
const HParser* h_not_in(const uint8_t *options, size_t count) { HParser* h_not_in(const uint8_t *options, size_t count) {
return h_in_or_not__m(&system_allocator, options, count, 0); return h_in_or_not__m(&system_allocator, options, count, 0);
} }
const HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) { HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
return h_in_or_not__m(mm__, options, count, 0); return h_in_or_not__m(mm__, options, count, 0);
} }

View file

@ -64,27 +64,27 @@ static const HParserVtable choice_vt = {
.compile_to_rvm = choice_ctrvm, .compile_to_rvm = choice_ctrvm,
}; };
const HParser* h_choice(const HParser* p, ...) { HParser* h_choice(const HParser* p, ...) {
va_list ap; va_list ap;
va_start(ap, p); va_start(ap, p);
const HParser* ret = h_choice__mv(&system_allocator, p, ap); HParser* ret = h_choice__mv(&system_allocator, p, ap);
va_end(ap); va_end(ap);
return ret; return ret;
} }
const HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) { HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) {
va_list ap; va_list ap;
va_start(ap, p); va_start(ap, p);
const HParser* ret = h_choice__mv(mm__, p, ap); HParser* ret = h_choice__mv(mm__, p, ap);
va_end(ap); va_end(ap);
return ret; return ret;
} }
const HParser* h_choice__v(const HParser* p, va_list ap) { HParser* h_choice__v(const HParser* p, va_list ap) {
return h_choice__mv(&system_allocator, p, ap); return h_choice__mv(&system_allocator, p, ap);
} }
const HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) { HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) {
va_list ap; va_list ap;
size_t len = 0; size_t len = 0;
HSequence *s = h_new(HSequence, 1); HSequence *s = h_new(HSequence, 1);

View file

@ -47,10 +47,10 @@ static HParserVtable difference_vt = {
.compile_to_rvm = h_not_regular, .compile_to_rvm = h_not_regular,
}; };
const HParser* h_difference(const HParser* p1, const HParser* p2) { HParser* h_difference(const HParser* p1, const HParser* p2) {
return h_difference__m(&system_allocator, p1, p2); return h_difference__m(&system_allocator, p1, p2);
} }
const HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) { HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1); HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1; env->p2 = p2; env->p1 = p1; env->p2 = p2;
HParser *ret = h_new(HParser, 1); HParser *ret = h_new(HParser, 1);

View file

@ -22,13 +22,13 @@ static const HParserVtable end_vt = {
.compile_to_rvm = end_ctrvm, .compile_to_rvm = end_ctrvm,
}; };
const HParser* h_end_p() { HParser* h_end_p() {
return h_end_p__m(&system_allocator); return h_end_p__m(&system_allocator);
} }
const HParser* h_end_p__m(HAllocator* mm__) { HParser* h_end_p__m(HAllocator* mm__) {
HParser *ret = h_new(HParser, 1); HParser *ret = h_new(HParser, 1);
ret->vtable = &end_vt; ret->vtable = &end_vt;
ret->env = NULL; ret->env = NULL;
return (const HParser*)ret; return ret;
} }

View file

@ -24,9 +24,11 @@ static const HParser epsilon_p = {
.env = NULL .env = NULL
}; };
const HParser* h_epsilon_p() { HParser* h_epsilon_p() {
return &epsilon_p; return h_epsilon_p__m(&system_allocator);
} }
const HParser* h_epsilon_p__m(HAllocator* mm__) { HParser* h_epsilon_p__m(HAllocator* mm__) {
return &epsilon_p; HParser *epsilon_p = h_new(HParser, 1);
epsilon_p->vtable = &epsilon_vt;
return epsilon_p;
} }

View file

@ -1,5 +1,5 @@
#include <assert.h>
#include "parser_internal.h" #include "parser_internal.h"
#include "backends/regex_actions.h"
static HParseResult* parse_ignore(void* env, HParseState* state) { static HParseResult* parse_ignore(void* env, HParseState* state) {
HParseResult *res0 = h_do_parse((HParser*)env, state); HParseResult *res0 = h_do_parse((HParser*)env, state);
@ -21,10 +21,16 @@ static bool ignore_isValidCF(void *env) {
return (p->vtable->isValidCF(p->env)); return (p->vtable->isValidCF(p->env));
} }
static bool h_svm_action_pop(HArena *arena, HSVMContext *ctx, void* arg) {
assert(ctx->stack_count > 0);
ctx->stack_count--;
return true;
}
static bool ignore_ctrvm(HRVMProg *prog, void *env) { static bool ignore_ctrvm(HRVMProg *prog, void *env) {
HParser *p = (HParser*)env; HParser *p = (HParser*)env;
h_compile_regex(prog, p->env); h_compile_regex(prog, p->env);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_pop)); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_pop, NULL));
return true; return true;
} }
@ -35,10 +41,10 @@ static const HParserVtable ignore_vt = {
.compile_to_rvm = ignore_ctrvm, .compile_to_rvm = ignore_ctrvm,
}; };
const HParser* h_ignore(const HParser* p) { HParser* h_ignore(const HParser* p) {
return h_ignore__m(&system_allocator, p); return h_ignore__m(&system_allocator, p);
} }
const HParser* h_ignore__m(HAllocator* mm__, const HParser* p) { HParser* h_ignore__m(HAllocator* mm__, const HParser* p) {
HParser* ret = h_new(HParser, 1); HParser* ret = h_new(HParser, 1);
ret->vtable = &ignore_vt; ret->vtable = &ignore_vt;
ret->env = (void*)p; ret->env = (void*)p;

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h" #include "parser_internal.h"
@ -49,14 +50,16 @@ static bool h_svm_action_ignoreseq(HArena *arena, HSVMContext *ctx, void* env) {
HParsedToken* save; HParsedToken* save;
// We can assume that each subitem generated at most one item on the // We can assume that each subitem generated at most one item on the
// stack. // stack.
assert(seq->len >= 1);
for (int i = seq->len - 1; i>=0; i--) { for (int i = seq->len - 1; i>=0; i--) {
if (i == seq->which && ctx->stack[ctx->stack_count]->token_type != TT_MARK) if (i == (int)seq->which && ctx->stack[ctx->stack_count]->token_type != TT_MARK)
save = ctx->stack[ctx->stack_count-1]; save = ctx->stack[ctx->stack_count-1];
// skip over everything up to and including the mark. // skip over everything up to and including the mark.
while (ctx->stack[--ctx->stack_count]->token_type != TT_MARK) while (ctx->stack[--ctx->stack_count]->token_type != TT_MARK)
; ;
} }
ctx->stack[ctx->stack_count++] = save; ctx->stack[ctx->stack_count++] = save;
return true;
} }
static bool is_ctrvm(HRVMProg *prog, void* env) { static bool is_ctrvm(HRVMProg *prog, void* env) {
@ -82,7 +85,7 @@ static const HParserVtable ignoreseq_vt = {
// API frontends // API frontends
// //
static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) { static HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) {
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1); HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
seq->parsers = h_new(const HParser*, 2); seq->parsers = h_new(const HParser*, 2);
seq->parsers[0] = p; seq->parsers[0] = p;
@ -96,25 +99,25 @@ static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const H
return ret; return ret;
} }
const HParser* h_left(const HParser* p, const HParser* q) { HParser* h_left(const HParser* p, const HParser* q) {
return h_leftright__m(&system_allocator, p, q, 0); return h_leftright__m(&system_allocator, p, q, 0);
} }
const HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) { HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) {
return h_leftright__m(mm__, p, q, 0); return h_leftright__m(mm__, p, q, 0);
} }
const HParser* h_right(const HParser* p, const HParser* q) { HParser* h_right(const HParser* p, const HParser* q) {
return h_leftright__m(&system_allocator, p, q, 1); return h_leftright__m(&system_allocator, p, q, 1);
} }
const HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) { HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) {
return h_leftright__m(mm__, p, q, 1); return h_leftright__m(mm__, p, q, 1);
} }
const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) { HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) {
return h_middle__m(&system_allocator, p, x, q); return h_middle__m(&system_allocator, p, x, q);
} }
const HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) { HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) {
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1); HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
seq->parsers = h_new(const HParser*, 3); seq->parsers = h_new(const HParser*, 3);
seq->parsers[0] = p; seq->parsers[0] = p;

View file

@ -29,7 +29,7 @@ static HParseResult* parse_int_range(void *env, HParseState *state) {
} }
bool h_svm_action_validate_int_range(HArena *arena, HSVMContext *ctx, void* env) { bool h_svm_action_validate_int_range(HArena *arena, HSVMContext *ctx, void* env) {
HRange *r_env = (*HRange)env; HRange *r_env = (HRange*)env;
HParsedToken *head = ctx->stack[ctx->stack_count-1]; HParsedToken *head = ctx->stack[ctx->stack_count-1];
switch (head-> token_type) { switch (head-> token_type) {
case TT_SINT: case TT_SINT:
@ -41,7 +41,7 @@ bool h_svm_action_validate_int_range(HArena *arena, HSVMContext *ctx, void* env)
} }
} }
static bool ir_ctrvm(HRVMProg *prog, void *env) { static bool ir_ctrvm(HRVMProg *prog, void *env) {
HRange *r_env = (*HRange)env; HRange *r_env = (HRange*)env;
h_compile_regex(prog, r_env->p); h_compile_regex(prog, r_env->p);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_validate_int_range, env)); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_validate_int_range, env));
@ -55,10 +55,10 @@ static const HParserVtable int_range_vt = {
.compile_to_rvm = ir_ctrvm, .compile_to_rvm = ir_ctrvm,
}; };
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) { HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
return h_int_range__m(&system_allocator, p, lower, upper); return h_int_range__m(&system_allocator, p, lower, upper);
} }
const HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) { HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) {
// p must be an integer parser, which means it's using parse_bits // p must be an integer parser, which means it's using parse_bits
// TODO: re-add this check // TODO: re-add this check
//assert_message(p->vtable == &bits_vt, "int_range requires an integer parser"); //assert_message(p->vtable == &bits_vt, "int_range requires an integer parser");

View file

@ -61,16 +61,20 @@ static bool many_ctrvm(HRVMProg *prog, void *env) {
// FIXME: Implement clear_to_mark // FIXME: Implement clear_to_mark
uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL); uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL);
h_rvm_insert_insn(prog, RVM_PUSH, 0); h_rvm_insert_insn(prog, RVM_PUSH, 0);
// TODO: implement min and max properly. Right now, it's always min==0, max==inf
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0); uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
if (!h_compile_regex(prog, repeat->p)) if (!h_compile_regex(prog, repeat->p))
return false; return false;
if (!h_compile_regex(prog, repeat->sep)) if (repeat->sep != NULL) {
return false; h_rvm_insert_insn(prog, RVM_PUSH, 0);
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark); if (!h_compile_regex(prog, repeat->sep))
return false;
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
}
h_rvm_insert_insn(prog, RVM_GOTO, insn); h_rvm_insert_insn(prog, RVM_GOTO, insn);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
h_rvm_insert_insn(prog, RVM_ACTION, h_svm_action_make_sequence, NULL); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
return true; return true;
} }
@ -81,10 +85,10 @@ static const HParserVtable many_vt = {
.compile_to_rvm = many_ctrvm, .compile_to_rvm = many_ctrvm,
}; };
const HParser* h_many(const HParser* p) { HParser* h_many(const HParser* p) {
return h_many__m(&system_allocator, p); return h_many__m(&system_allocator, p);
} }
const HParser* h_many__m(HAllocator* mm__, const HParser* p) { HParser* h_many__m(HAllocator* mm__, const HParser* p) {
HParser *res = h_new(HParser, 1); HParser *res = h_new(HParser, 1);
HRepeat *env = h_new(HRepeat, 1); HRepeat *env = h_new(HRepeat, 1);
env->p = p; env->p = p;
@ -96,10 +100,10 @@ const HParser* h_many__m(HAllocator* mm__, const HParser* p) {
return res; return res;
} }
const HParser* h_many1(const HParser* p) { HParser* h_many1(const HParser* p) {
return h_many1__m(&system_allocator, p); return h_many1__m(&system_allocator, p);
} }
const HParser* h_many1__m(HAllocator* mm__, const HParser* p) { HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
HParser *res = h_new(HParser, 1); HParser *res = h_new(HParser, 1);
HRepeat *env = h_new(HRepeat, 1); HRepeat *env = h_new(HRepeat, 1);
env->p = p; env->p = p;
@ -111,10 +115,10 @@ const HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
return res; return res;
} }
const HParser* h_repeat_n(const HParser* p, const size_t n) { HParser* h_repeat_n(const HParser* p, const size_t n) {
return h_repeat_n__m(&system_allocator, p, n); return h_repeat_n__m(&system_allocator, p, n);
} }
const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) { HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
HParser *res = h_new(HParser, 1); HParser *res = h_new(HParser, 1);
HRepeat *env = h_new(HRepeat, 1); HRepeat *env = h_new(HRepeat, 1);
env->p = p; env->p = p;
@ -126,10 +130,10 @@ const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n)
return res; return res;
} }
const HParser* h_sepBy(const HParser* p, const HParser* sep) { HParser* h_sepBy(const HParser* p, const HParser* sep) {
return h_sepBy__m(&system_allocator, p, sep); return h_sepBy__m(&system_allocator, p, sep);
} }
const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) { HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HParser *res = h_new(HParser, 1); HParser *res = h_new(HParser, 1);
HRepeat *env = h_new(HRepeat, 1); HRepeat *env = h_new(HRepeat, 1);
env->p = p; env->p = p;
@ -141,10 +145,10 @@ const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep
return res; return res;
} }
const HParser* h_sepBy1(const HParser* p, const HParser* sep) { HParser* h_sepBy1(const HParser* p, const HParser* sep) {
return h_sepBy1__m(&system_allocator, p, sep); return h_sepBy1__m(&system_allocator, p, sep);
} }
const HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) { HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
HParser *res = h_new(HParser, 1); HParser *res = h_new(HParser, 1);
HRepeat *env = h_new(HRepeat, 1); HRepeat *env = h_new(HRepeat, 1);
env->p = p; env->p = p;
@ -184,10 +188,10 @@ static const HParserVtable length_value_vt = {
.isValidCF = h_false, .isValidCF = h_false,
}; };
const HParser* h_length_value(const HParser* length, const HParser* value) { HParser* h_length_value(const HParser* length, const HParser* value) {
return h_length_value__m(&system_allocator, length, value); return h_length_value__m(&system_allocator, length, value);
} }
const HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) { HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
HParser *res = h_new(HParser, 1); HParser *res = h_new(HParser, 1);
res->vtable = &length_value_vt; res->vtable = &length_value_vt;
HLenVal *env = h_new(HLenVal, 1); HLenVal *env = h_new(HLenVal, 1);

View file

@ -14,13 +14,13 @@ static const HParserVtable not_vt = {
.parse = parse_not, .parse = parse_not,
.isValidRegular = h_false, /* see and.c for why */ .isValidRegular = h_false, /* see and.c for why */
.isValidCF = h_false, /* also see and.c for why */ .isValidCF = h_false, /* also see and.c for why */
.compile_to_rvm = h_not_regular, .compile_to_rvm = h_not_regular, // Is actually regular, but the generation step is currently unable to handle it. TODO: fix this.
}; };
const HParser* h_not(const HParser* p) { HParser* h_not(const HParser* p) {
return h_not__m(&system_allocator, p); return h_not__m(&system_allocator, p);
} }
const HParser* h_not__m(HAllocator* mm__, const HParser* p) { HParser* h_not__m(HAllocator* mm__, const HParser* p) {
HParser *res = h_new(HParser, 1); HParser *res = h_new(HParser, 1);
res->vtable = &not_vt; res->vtable = &not_vt;
res->env = (void*)p; res->env = (void*)p;

View file

@ -6,7 +6,8 @@ static HParseResult* parse_nothing() {
} }
static bool nothing_ctrvm(HRVMProg *prog, void* env) { static bool nothing_ctrvm(HRVMProg *prog, void* env) {
h_rvm_insert_insn(prog, RVM_MATCH, 0x00FF); h_rvm_insert_insn(prog, RVM_MATCH, 0x0000);
h_rvm_insert_insn(prog, RVM_MATCH, 0xFFFF);
return true; return true;
} }
@ -17,11 +18,11 @@ static const HParserVtable nothing_vt = {
.compile_to_rvm = nothing_ctrvm, .compile_to_rvm = nothing_ctrvm,
}; };
const HParser* h_nothing_p() { HParser* h_nothing_p() {
return h_nothing_p__m(&system_allocator); return h_nothing_p__m(&system_allocator);
} }
const HParser* h_nothing_p__m(HAllocator* mm__) { HParser* h_nothing_p__m(HAllocator* mm__) {
HParser *ret = h_new(HParser, 1); HParser *ret = h_new(HParser, 1);
ret->vtable = &nothing_vt; ret->env = NULL; ret->vtable = &nothing_vt; ret->env = NULL;
return (const HParser*)ret; return ret;
} }

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h" #include "parser_internal.h"
static HParseResult* parse_optional(void* env, HParseState* state) { static HParseResult* parse_optional(void* env, HParseState* state) {
@ -21,12 +22,25 @@ static bool opt_isValidCF(void *env) {
return p->vtable->isValidCF(p->env); return p->vtable->isValidCF(p->env);
} }
static bool h_svm_action_optional(HArena *arena, HSVMContext *ctx, void *env) {
if (ctx->stack[ctx->stack_count-1]->token_type == TT_MARK) {
ctx->stack[ctx->stack_count-1]->token_type = TT_NONE;
} else {
ctx->stack_count--;
assert(ctx->stack[ctx->stack_count-1]->token_type == TT_MARK);
ctx->stack[ctx->stack_count-1] = ctx->stack[ctx->stack_count];
}
return true;
}
static bool opt_ctrvm(HRVMProg *prog, void* env) { static bool opt_ctrvm(HRVMProg *prog, void* env) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0); uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
HParser *p = (HParser*) env; HParser *p = (HParser*) env;
if (!h_compile_regex(prog, p->env)) if (!h_compile_regex(prog, p->env))
return false; return false;
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_optional, NULL));
return true; return true;
} }
@ -37,10 +51,10 @@ static const HParserVtable optional_vt = {
.compile_to_rvm = opt_ctrvm, .compile_to_rvm = opt_ctrvm,
}; };
const HParser* h_optional(const HParser* p) { HParser* h_optional(const HParser* p) {
return h_optional__m(&system_allocator, p); return h_optional__m(&system_allocator, p);
} }
const HParser* h_optional__m(HAllocator* mm__, const HParser* p) { HParser* h_optional__m(HAllocator* mm__, const HParser* p) {
// TODO: re-add this // TODO: re-add this
//assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round."); //assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round.");
HParser *ret = h_new(HParser, 1); HParser *ret = h_new(HParser, 1);

View file

@ -44,10 +44,12 @@ static bool sequence_isValidCF(void *env) {
static bool sequence_ctrvm(HRVMProg *prog, void *env) { static bool sequence_ctrvm(HRVMProg *prog, void *env) {
HSequence *s = (HSequence*)env; HSequence *s = (HSequence*)env;
h_rvm_insert_insn(prog, RVM_PUSH, 0);
for (size_t i=0; i<s->len; ++i) { for (size_t i=0; i<s->len; ++i) {
if (!s->p_array[i]->vtable->compile_to_rvm(prog, s->p_array[i]->env)) if (!s->p_array[i]->vtable->compile_to_rvm(prog, s->p_array[i]->env))
return false; return false;
} }
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
return true; return true;
} }
@ -58,27 +60,27 @@ static const HParserVtable sequence_vt = {
.compile_to_rvm = sequence_ctrvm, .compile_to_rvm = sequence_ctrvm,
}; };
const HParser* h_sequence(const HParser* p, ...) { HParser* h_sequence(const HParser* p, ...) {
va_list ap; va_list ap;
va_start(ap, p); va_start(ap, p);
const HParser* ret = h_sequence__mv(&system_allocator, p, ap); HParser* ret = h_sequence__mv(&system_allocator, p, ap);
va_end(ap); va_end(ap);
return ret; return ret;
} }
const HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) { HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) {
va_list ap; va_list ap;
va_start(ap, p); va_start(ap, p);
const HParser* ret = h_sequence__mv(mm__, p, ap); HParser* ret = h_sequence__mv(mm__, p, ap);
va_end(ap); va_end(ap);
return ret; return ret;
} }
const HParser* h_sequence__v(const HParser* p, va_list ap) { HParser* h_sequence__v(const HParser* p, va_list ap) {
return h_sequence__mv(&system_allocator, p, ap); return h_sequence__mv(&system_allocator, p, ap);
} }
const HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) { HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) {
va_list ap; va_list ap;
size_t len = 0; size_t len = 0;
const HParser *arg; const HParser *arg;

View file

@ -38,14 +38,14 @@ const HParserVtable token_vt = {
.compile_to_rvm = token_ctrvm, .compile_to_rvm = token_ctrvm,
}; };
const HParser* h_token(const uint8_t *str, const size_t len) { HParser* h_token(const uint8_t *str, const size_t len) {
return h_token__m(&system_allocator, str, len); return h_token__m(&system_allocator, str, len);
} }
const HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) { HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) {
HToken *t = h_new(HToken, 1); HToken *t = h_new(HToken, 1);
t->str = (uint8_t*)str, t->len = len; t->str = (uint8_t*)str, t->len = len;
HParser *ret = h_new(HParser, 1); HParser *ret = h_new(HParser, 1);
ret->vtable = &token_vt; ret->vtable = &token_vt;
ret->env = t; ret->env = t;
return (const HParser*)ret; return ret;
} }

View file

@ -26,6 +26,16 @@ static bool ws_isValidCF(void *env) {
static bool ws_ctrvm(HRVMProg *prog, void *env) { static bool ws_ctrvm(HRVMProg *prog, void *env) {
HParser *p = (HParser*)env; HParser *p = (HParser*)env;
uint16_t start = h_rvm_get_ip(prog);
uint16_t next;
const char SPACE_CHRS[6] = {' ', '\f', '\n', '\r', '\t', '\v'};
for (int i = 0; i < 6; i++) {
next = h_rvm_insert_insn(prog, RVM_FORK, 0);
h_rvm_insert_insn(prog, RVM_MATCH, (SPACE_CHRS[i] << 8) | (SPACE_CHRS[i]));
h_rvm_insert_insn(prog, RVM_GOTO, start);
h_rvm_patch_arg(prog, next, h_rvm_get_ip(prog));
}
return h_compile_regex(prog, p->env); return h_compile_regex(prog, p->env);
} }
@ -36,10 +46,10 @@ static const HParserVtable whitespace_vt = {
.compile_to_rvm = ws_ctrvm, .compile_to_rvm = ws_ctrvm,
}; };
const HParser* h_whitespace(const HParser* p) { HParser* h_whitespace(const HParser* p) {
return h_whitespace__m(&system_allocator, p); return h_whitespace__m(&system_allocator, p);
} }
const HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) { HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) {
HParser *ret = h_new(HParser, 1); HParser *ret = h_new(HParser, 1);
ret->vtable = &whitespace_vt; ret->vtable = &whitespace_vt;
ret->env = (void*)p; ret->env = (void*)p;

View file

@ -44,10 +44,10 @@ static const HParserVtable xor_vt = {
.compile_to_rvm = h_not_regular, .compile_to_rvm = h_not_regular,
}; };
const HParser* h_xor(const HParser* p1, const HParser* p2) { HParser* h_xor(const HParser* p1, const HParser* p2) {
return h_xor__m(&system_allocator, p1, p2); return h_xor__m(&system_allocator, p1, p2);
} }
const HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) { HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
HTwoParsers *env = h_new(HTwoParsers, 1); HTwoParsers *env = h_new(HTwoParsers, 1);
env->p1 = p1; env->p2 = p2; env->p1 = p1; env->p2 = p2;
HParser *ret = h_new(HParser, 1); HParser *ret = h_new(HParser, 1);

View file

@ -1,16 +1,27 @@
#include <string.h>
#include <stdlib.h> #include <stdlib.h>
#include "internal.h" #include "internal.h"
static void* system_alloc(HAllocator *allocator, size_t size) { static void* system_alloc(HAllocator *allocator, size_t size) {
return malloc(size);
void* ptr = calloc(size + sizeof(size_t), 1);
*(size_t*)ptr = size;
return ptr + sizeof(size_t);
} }
static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) { static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) {
return realloc(ptr, size); if (ptr == NULL)
return system_alloc(allocator, size);
ptr = realloc(ptr - sizeof(size_t), size + sizeof(size_t));
size_t old_size = *(size_t*)ptr;
*(size_t*)ptr = size;
if (size > old_size)
memset(ptr+sizeof(size_t)+old_size, 0, size - old_size);
return ptr + sizeof(size_t);
} }
static void system_free(HAllocator *allocator, void* ptr) { static void system_free(HAllocator *allocator, void* ptr) {
free(ptr); free(ptr - sizeof(size_t));
} }
HAllocator system_allocator = { HAllocator system_allocator = {

View file

@ -11,7 +11,7 @@ HParserTestcase testcases[] = {
}; };
static void test_benchmark_1() { static void test_benchmark_1() {
const HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(',')); HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
HBenchmarkResults *res = h_benchmark(parser, testcases); HBenchmarkResults *res = h_benchmark(parser, testcases);
h_benchmark_report(stderr, res); h_benchmark_report(stderr, res);

View file

@ -99,14 +99,14 @@
} else { \ } else { \
char* cres = h_write_result_unamb(res->ast); \ char* cres = h_write_result_unamb(res->ast); \
g_check_string(cres, ==, result); \ g_check_string(cres, ==, result); \
g_free(cres); \ system_allocator.free(&system_allocator, cres); \
HArenaStats stats; \ HArenaStats stats; \
h_allocator_stats(res->arena, &stats); \ h_allocator_stats(res->arena, &stats); \
g_test_message("Parse used %zd bytes, wasted %zd bytes. " \ g_test_message("Parse used %zd bytes, wasted %zd bytes. " \
"Inefficiency: %5f%%", \ "Inefficiency: %5f%%", \
stats.used, stats.wasted, \ stats.used, stats.wasted, \
stats.wasted * 100. / (stats.used+stats.wasted)); \ stats.wasted * 100. / (stats.used+stats.wasted)); \
h_delete_arena(res->arena); \ h_delete_arena(res->arena); \
} \ } \
} while(0) } while(0)