Merge remote-tracking branch 'tq/master' into LL such that it compiles
Conflicts: src/Makefile src/backends/packrat.c src/compile.c src/hammer.h src/internal.h src/parsers/action.c src/parsers/and.c src/parsers/attr_bool.c src/parsers/bits.c src/parsers/butnot.c src/parsers/ch.c src/parsers/charset.c src/parsers/choice.c src/parsers/difference.c src/parsers/end.c src/parsers/epsilon.c src/parsers/ignore.c src/parsers/ignoreseq.c src/parsers/indirect.c src/parsers/int_range.c src/parsers/many.c src/parsers/not.c src/parsers/nothing.c src/parsers/optional.c src/parsers/sequence.c src/parsers/token.c src/parsers/unimplemented.c src/parsers/whitespace.c src/parsers/xor.c
This commit is contained in:
commit
c64a4e435e
46 changed files with 1289 additions and 263 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -8,3 +8,7 @@ examples/base64
|
||||||
TAGS
|
TAGS
|
||||||
*.swp
|
*.swp
|
||||||
*.swo
|
*.swo
|
||||||
|
\#*
|
||||||
|
.*
|
||||||
|
docs/milestone2.dot.pdf
|
||||||
|
*.dot.pdf
|
||||||
|
|
|
||||||
3
Makefile
3
Makefile
|
|
@ -17,6 +17,9 @@ CONFIG_VARS= INCLUDE_TESTS
|
||||||
test: src/test_suite
|
test: src/test_suite
|
||||||
$<
|
$<
|
||||||
|
|
||||||
|
examples/all: src/all
|
||||||
|
examples/compile: src/compile
|
||||||
|
|
||||||
define SUBDIR_TEMPLATE
|
define SUBDIR_TEMPLATE
|
||||||
$(1)/%:
|
$(1)/%:
|
||||||
$$(MAKE) -C $(1) $$*
|
$$(MAKE) -C $(1) $$*
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ endif
|
||||||
include $(TOPLEVEL)/config.mk
|
include $(TOPLEVEL)/config.mk
|
||||||
|
|
||||||
TEST_CFLAGS = $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS
|
TEST_CFLAGS = $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS
|
||||||
TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0)
|
TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0) -lrt
|
||||||
|
|
||||||
CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes
|
CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes
|
||||||
LDFLAGS :=
|
LDFLAGS :=
|
||||||
|
|
|
||||||
36
docs/milestone2.dot
Normal file
36
docs/milestone2.dot
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
digraph {
|
||||||
|
graph [rankdir=LR];
|
||||||
|
subgraph complete {
|
||||||
|
node [color="gray",fontcolor="gray"];
|
||||||
|
glue;
|
||||||
|
regex_svm;
|
||||||
|
regex_rvm;
|
||||||
|
desugaring; // Needs merged.
|
||||||
|
}
|
||||||
|
/* The end result of the milestone, along with the subtasks listed */
|
||||||
|
milestone2 [color="green",style="filled"];
|
||||||
|
llk -> milestone2;
|
||||||
|
lr -> milestone2;
|
||||||
|
lalr8_gen -> lr; // Generate parse tables for LALR(8)
|
||||||
|
glr_gen -> lr; // Generate parse tables for GLR
|
||||||
|
llk_gen -> llk; // Generate parse tables for LL(k)
|
||||||
|
lr_driver -> lr; // Write driver for all LR-type algs; analagous to SVM and RVM implementations
|
||||||
|
llk_driver -> llk; // Write driver for LL(k)
|
||||||
|
regex -> milestone2;
|
||||||
|
glue -> milestone2;
|
||||||
|
tests -> milestone2;
|
||||||
|
|
||||||
|
regex_gen -> regex; // should be mostly done; the rest is concurrent with regex_svm_actions
|
||||||
|
regex_driver -> regex;
|
||||||
|
regex_svm -> regex_driver;
|
||||||
|
regex_rvm -> regex_driver;
|
||||||
|
regex_svm_actions -> regex_driver; // 1 for each way that an HParsedToken can be extracted from the stack.
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
desugaring -> llk_gen;
|
||||||
|
desugaring -> lalr8_gen;
|
||||||
|
desugaring -> glr_gen;
|
||||||
|
}
|
||||||
65
docs/milestone3.dot
Normal file
65
docs/milestone3.dot
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
digraph {
|
||||||
|
graph [rankdir=LR];
|
||||||
|
|
||||||
|
subgraph complete {
|
||||||
|
node [color="gray",fontcolor="gray"];
|
||||||
|
}
|
||||||
|
|
||||||
|
subgraph groups {
|
||||||
|
node [color="blue",fontcolor="blue"];
|
||||||
|
cpp;
|
||||||
|
python;
|
||||||
|
ruby;
|
||||||
|
go;
|
||||||
|
php;
|
||||||
|
dotnet;
|
||||||
|
}
|
||||||
|
|
||||||
|
milestone3 [color="green",style="filled"];
|
||||||
|
|
||||||
|
|
||||||
|
function_desc_fmt -> function_descs;
|
||||||
|
function_desc_fmt -> binding_generator;
|
||||||
|
|
||||||
|
binding_generator -> cpp_gen;
|
||||||
|
binding_generator -> python_gen;
|
||||||
|
binding_generator -> ruby_gen;
|
||||||
|
binding_generator -> go_gen;
|
||||||
|
binding_generator -> php_gen;
|
||||||
|
binding_generator -> dotnet_gen;
|
||||||
|
|
||||||
|
function_descs -> cpp_gen;
|
||||||
|
function_descs -> python_gen;
|
||||||
|
function_descs -> ruby_gen;
|
||||||
|
function_descs -> go_gen;
|
||||||
|
function_descs -> php_gen;
|
||||||
|
function_descs -> dotnet_gen;
|
||||||
|
|
||||||
|
|
||||||
|
// Plugins to generate a type of code
|
||||||
|
cpp_gen -> cpp;
|
||||||
|
python_gen -> python;
|
||||||
|
ruby_gen -> ruby;
|
||||||
|
go_gen -> go;
|
||||||
|
php_gen -> php;
|
||||||
|
dotnet_gen -> dotnet;
|
||||||
|
|
||||||
|
// base code... developed concurrently with _gen's
|
||||||
|
cpp_base -> cpp;
|
||||||
|
python_base -> python;
|
||||||
|
ruby_base -> ruby;
|
||||||
|
go_base -> go;
|
||||||
|
php_base -> php;
|
||||||
|
dotnet_base -> dotnet;
|
||||||
|
|
||||||
|
// Bindings for various languages. These are just groupings.
|
||||||
|
cpp -> milestone3;
|
||||||
|
python -> milestone3;
|
||||||
|
ruby -> milestone3;
|
||||||
|
go -> milestone3;
|
||||||
|
php -> milestone3;
|
||||||
|
dotnet -> milestone3;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
17
docs/rvm_sample_input.rvm
Normal file
17
docs/rvm_sample_input.rvm
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
+C
|
||||||
|
int foo() {
|
||||||
|
return 42;
|
||||||
|
}
|
||||||
|
|
||||||
|
+SVM /svm/simple
|
||||||
|
@input ""
|
||||||
|
@output "()"
|
||||||
|
0 ACCEPT
|
||||||
|
|
||||||
|
+SVM /svm/string
|
||||||
|
@input "quux"
|
||||||
|
@outut "(<5555>)"
|
||||||
|
1 MARK
|
||||||
|
2 CAPTURE
|
||||||
|
2 ACCEPT
|
||||||
|
|
||||||
|
|
@ -27,7 +27,8 @@ PARSERS := \
|
||||||
|
|
||||||
BACKENDS := \
|
BACKENDS := \
|
||||||
packrat \
|
packrat \
|
||||||
ll
|
llk \
|
||||||
|
regex
|
||||||
|
|
||||||
HAMMER_PARTS := \
|
HAMMER_PARTS := \
|
||||||
bitreader.o \
|
bitreader.o \
|
||||||
|
|
@ -39,7 +40,6 @@ HAMMER_PARTS := \
|
||||||
datastructures.o \
|
datastructures.o \
|
||||||
system_allocator.o \
|
system_allocator.o \
|
||||||
benchmark.o \
|
benchmark.o \
|
||||||
compile.o \
|
|
||||||
cfgrammar.o \
|
cfgrammar.o \
|
||||||
$(PARSERS:%=parsers/%.o) \
|
$(PARSERS:%=parsers/%.o) \
|
||||||
$(BACKENDS:%=backends/%.o)
|
$(BACKENDS:%=backends/%.o)
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@
|
||||||
#define HAMMER_ALLOCATOR__H__
|
#define HAMMER_ALLOCATOR__H__
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
// TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer.
|
||||||
typedef struct HAllocator_ {
|
typedef struct HAllocator_ {
|
||||||
void* (*alloc)(struct HAllocator_* allocator, size_t size);
|
void* (*alloc)(struct HAllocator_* allocator, size_t size);
|
||||||
void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size);
|
void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size);
|
||||||
|
|
|
||||||
|
|
@ -3,22 +3,23 @@
|
||||||
#include "../cfgrammar.h"
|
#include "../cfgrammar.h"
|
||||||
#include "../parsers/parser_internal.h"
|
#include "../parsers/parser_internal.h"
|
||||||
|
|
||||||
|
// XXX despite the names, this is all LL(1) right now. TODO
|
||||||
|
|
||||||
|
|
||||||
/* Generating the LL parse table */
|
/* Generating the LL(k) parse table */
|
||||||
|
|
||||||
/* Maps each nonterminal (HCFChoice) of the grammar to another hash table that
|
/* Maps each nonterminal (HCFChoice) of the grammar to another hash table that
|
||||||
* maps lookahead tokens (HCFToken) to productions (HCFSequence).
|
* maps lookahead tokens (HCFToken) to productions (HCFSequence).
|
||||||
*/
|
*/
|
||||||
typedef struct HLLTable_ {
|
typedef struct HLLkTable_ {
|
||||||
HHashTable *rows;
|
HHashTable *rows;
|
||||||
HCFChoice *start; // start symbol
|
HCFChoice *start; // start symbol
|
||||||
HArena *arena;
|
HArena *arena;
|
||||||
HAllocator *mm__;
|
HAllocator *mm__;
|
||||||
} HLLTable;
|
} HLLkTable;
|
||||||
|
|
||||||
/* Interface to look up an entry in the parse table. */
|
/* Interface to look up an entry in the parse table. */
|
||||||
const HCFSequence *h_ll_lookup(const HLLTable *table, const HCFChoice *x, HCFToken tok)
|
const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x, HCFToken tok)
|
||||||
{
|
{
|
||||||
const HHashTable *row = h_hashtable_get(table->rows, x);
|
const HHashTable *row = h_hashtable_get(table->rows, x);
|
||||||
assert(row != NULL); // the table should have one row for each nonterminal
|
assert(row != NULL); // the table should have one row for each nonterminal
|
||||||
|
|
@ -28,7 +29,7 @@ const HCFSequence *h_ll_lookup(const HLLTable *table, const HCFChoice *x, HCFTok
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Allocate a new parse table. */
|
/* Allocate a new parse table. */
|
||||||
HLLTable *h_lltable_new(HAllocator *mm__)
|
HLLkTable *h_llktable_new(HAllocator *mm__)
|
||||||
{
|
{
|
||||||
// NB the parse table gets an arena separate from the grammar so we can free
|
// NB the parse table gets an arena separate from the grammar so we can free
|
||||||
// the latter after table generation.
|
// the latter after table generation.
|
||||||
|
|
@ -37,7 +38,7 @@ HLLTable *h_lltable_new(HAllocator *mm__)
|
||||||
HHashTable *rows = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr);
|
HHashTable *rows = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr);
|
||||||
assert(rows != NULL);
|
assert(rows != NULL);
|
||||||
|
|
||||||
HLLTable *table = h_new(HLLTable, 1);
|
HLLkTable *table = h_new(HLLkTable, 1);
|
||||||
assert(table != NULL);
|
assert(table != NULL);
|
||||||
table->mm__ = mm__;
|
table->mm__ = mm__;
|
||||||
table->arena = arena;
|
table->arena = arena;
|
||||||
|
|
@ -46,7 +47,7 @@ HLLTable *h_lltable_new(HAllocator *mm__)
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_lltable_free(HLLTable *table)
|
void h_llktable_free(HLLkTable *table)
|
||||||
{
|
{
|
||||||
HAllocator *mm__ = table->mm__;
|
HAllocator *mm__ = table->mm__;
|
||||||
h_delete_arena(table->arena);
|
h_delete_arena(table->arena);
|
||||||
|
|
@ -95,10 +96,10 @@ int fill_table_row(HCFGrammar *g, HHashTable *row,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generate the LL parse table from the given grammar.
|
/* Generate the LL(k) parse table from the given grammar.
|
||||||
* Returns -1 on error, 0 on success.
|
* Returns -1 on error, 0 on success.
|
||||||
*/
|
*/
|
||||||
static int fill_table(HCFGrammar *g, HLLTable *table)
|
static int fill_table(HCFGrammar *g, HLLkTable *table)
|
||||||
{
|
{
|
||||||
table->start = g->start;
|
table->start = g->start;
|
||||||
|
|
||||||
|
|
@ -120,7 +121,7 @@ static int fill_table(HCFGrammar *g, HLLTable *table)
|
||||||
for(s = a->seq; *s; s++) {
|
for(s = a->seq; *s; s++) {
|
||||||
// record this production in row as appropriate
|
// record this production in row as appropriate
|
||||||
// this can signal an ambiguity conflict.
|
// this can signal an ambiguity conflict.
|
||||||
// NB we don't worry about deallocating anything, h_ll_compile will
|
// NB we don't worry about deallocating anything, h_llk_compile will
|
||||||
// delete the whole arena for us.
|
// delete the whole arena for us.
|
||||||
if(fill_table_row(g, row, a, *s) < 0)
|
if(fill_table_row(g, row, a, *s) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
@ -131,7 +132,7 @@ static int fill_table(HCFGrammar *g, HLLTable *table)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int h_ll_compile(HAllocator* mm__, HParser* parser, const void* params)
|
int h_llk_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
{
|
{
|
||||||
// Convert parser to a CFG. This can fail as indicated by a NULL return.
|
// Convert parser to a CFG. This can fail as indicated by a NULL return.
|
||||||
HCFGrammar *grammar = h_cfgrammar(mm__, parser);
|
HCFGrammar *grammar = h_cfgrammar(mm__, parser);
|
||||||
|
|
@ -143,11 +144,11 @@ int h_ll_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
// TODO: avoid conflicts by splitting occurances?
|
// TODO: avoid conflicts by splitting occurances?
|
||||||
|
|
||||||
// generate table and store in parser->data.
|
// generate table and store in parser->data.
|
||||||
HLLTable *table = h_lltable_new(mm__);
|
HLLkTable *table = h_llktable_new(mm__);
|
||||||
if(fill_table(grammar, table) < 0) {
|
if(fill_table(grammar, table) < 0) {
|
||||||
// the table was ambiguous
|
// the table was ambiguous
|
||||||
h_cfgrammar_free(grammar);
|
h_cfgrammar_free(grammar);
|
||||||
h_lltable_free(table);
|
h_llktable_free(table);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
parser->data = table;
|
parser->data = table;
|
||||||
|
|
@ -161,13 +162,14 @@ int h_ll_compile(HAllocator* mm__, HParser* parser, const void* params)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* LL driver */
|
/* LL(k) driver */
|
||||||
|
|
||||||
HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* state)
|
HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
|
||||||
{
|
{
|
||||||
const HLLTable *table = parser->data;
|
const HLLkTable *table = parser->data;
|
||||||
HArena *arena = state->arena;
|
HArena *arena = h_new_arena(mm__, 0); // will hold the results
|
||||||
HSlist *stack = h_slist_new(arena);
|
HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse
|
||||||
|
HSlist *stack = h_slist_new(tarena);
|
||||||
HCountedArray *seq = h_carray_new(arena); // accumulates current parse result
|
HCountedArray *seq = h_carray_new(arena); // accumulates current parse result
|
||||||
|
|
||||||
// in order to construct the parse tree, we delimit the symbol stack into
|
// in order to construct the parse tree, we delimit the symbol stack into
|
||||||
|
|
@ -177,7 +179,7 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
|
||||||
// frame delimiter.
|
// frame delimiter.
|
||||||
// also on the stack below the mark, we store the previously accumulated
|
// also on the stack below the mark, we store the previously accumulated
|
||||||
// value for the surrounding production.
|
// value for the surrounding production.
|
||||||
void *mark = h_arena_malloc(arena, 1);
|
void *mark = h_arena_malloc(tarena, 1);
|
||||||
|
|
||||||
// initialize with the start symbol on the stack.
|
// initialize with the start symbol on the stack.
|
||||||
h_slist_push(stack, table->start);
|
h_slist_push(stack, table->start);
|
||||||
|
|
@ -188,8 +190,8 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
|
||||||
while(!h_slist_empty(stack)) {
|
while(!h_slist_empty(stack)) {
|
||||||
// fill up lookahead buffer as required
|
// fill up lookahead buffer as required
|
||||||
if(lookahead == 0) {
|
if(lookahead == 0) {
|
||||||
uint8_t c = h_read_bits(&state->input_stream, 8, false);
|
uint8_t c = h_read_bits(stream, 8, false);
|
||||||
if(state->input_stream.overrun)
|
if(stream->overrun)
|
||||||
lookahead = end_token;
|
lookahead = end_token;
|
||||||
else
|
else
|
||||||
lookahead = char_token(c);
|
lookahead = char_token(c);
|
||||||
|
|
@ -203,16 +205,16 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
|
||||||
// hit stack frame boundary
|
// hit stack frame boundary
|
||||||
|
|
||||||
// wrap the accumulated parse result, this sequence is finished
|
// wrap the accumulated parse result, this sequence is finished
|
||||||
HParsedToken *tok = a_new(HParsedToken, 1);
|
HParsedToken *tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||||
tok->token_type = TT_SEQUENCE;
|
tok->token_type = TT_SEQUENCE;
|
||||||
tok->seq = seq;
|
tok->seq = seq;
|
||||||
// XXX tok->index and tok->bit_offset (don't take directly from stream, cuz peek!)
|
// XXX tok->index and tok->bit_offset (don't take directly from stream, cuz peek!)
|
||||||
|
|
||||||
// call validation and semantic action, if present
|
// call validation and semantic action, if present
|
||||||
if(x->pred && !x->pred(make_result(state, tok)))
|
if(x->pred && !x->pred(make_result(tarena, tok)))
|
||||||
return NULL; // validation failed -> no parse
|
goto no_parse; // validation failed -> no parse
|
||||||
if(x->action)
|
if(x->action)
|
||||||
tok = (HParsedToken *)x->action(make_result(state, tok));
|
tok = (HParsedToken *)x->action(make_result(arena, tok));
|
||||||
|
|
||||||
// result becomes next left-most element of higher-level sequence
|
// result becomes next left-most element of higher-level sequence
|
||||||
seq = h_slist_pop(stack);
|
seq = h_slist_pop(stack);
|
||||||
|
|
@ -230,7 +232,7 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
|
||||||
seq = h_carray_new(arena);
|
seq = h_carray_new(arena);
|
||||||
|
|
||||||
// look up applicable production in parse table
|
// look up applicable production in parse table
|
||||||
const HCFSequence *p = h_ll_lookup(table, x, lookahead);
|
const HCFSequence *p = h_llk_lookup(table, x, lookahead);
|
||||||
|
|
||||||
// push production's rhs onto the stack (in reverse order)
|
// push production's rhs onto the stack (in reverse order)
|
||||||
HCFChoice **s;
|
HCFChoice **s;
|
||||||
|
|
@ -250,40 +252,40 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
|
||||||
switch(x->type) {
|
switch(x->type) {
|
||||||
case HCF_END:
|
case HCF_END:
|
||||||
if(input != end_token)
|
if(input != end_token)
|
||||||
return NULL;
|
goto no_parse;
|
||||||
tok = NULL;
|
tok = NULL;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case HCF_CHAR:
|
case HCF_CHAR:
|
||||||
if(input != char_token(x->chr))
|
if(input != char_token(x->chr))
|
||||||
return NULL;
|
goto no_parse;
|
||||||
tok = a_new(HParsedToken, 1);
|
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||||
tok->token_type = TT_UINT;
|
tok->token_type = TT_UINT;
|
||||||
tok->uint = x->chr;
|
tok->uint = x->chr;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case HCF_CHARSET:
|
case HCF_CHARSET:
|
||||||
if(input == end_token)
|
if(input == end_token)
|
||||||
return NULL;
|
goto no_parse;
|
||||||
if(!charset_isset(x->charset, token_char(input)))
|
if(!charset_isset(x->charset, token_char(input)))
|
||||||
return NULL;
|
goto no_parse;
|
||||||
tok = a_new(HParsedToken, 1);
|
tok = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||||
tok->token_type = TT_UINT;
|
tok->token_type = TT_UINT;
|
||||||
tok->uint = token_char(input);
|
tok->uint = token_char(input);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default: // should not be reached
|
default: // should not be reached
|
||||||
assert_message(0, "unknown HCFChoice type");
|
assert_message(0, "unknown HCFChoice type");
|
||||||
return NULL;
|
goto no_parse;
|
||||||
}
|
}
|
||||||
|
|
||||||
// XXX tok->index and tok->bit_offset (don't take directly from stream, cuz peek!)
|
// XXX tok->index and tok->bit_offset (don't take directly from stream, cuz peek!)
|
||||||
|
|
||||||
// call validation and semantic action, if present
|
// call validation and semantic action, if present
|
||||||
if(x->pred && !x->pred(make_result(state, tok)))
|
if(x->pred && !x->pred(make_result(tarena, tok)))
|
||||||
return NULL; // validation failed -> no parse
|
goto no_parse; // validation failed -> no parse
|
||||||
if(x->action)
|
if(x->action)
|
||||||
tok = (HParsedToken *)x->action(make_result(state, tok));
|
tok = (HParsedToken *)x->action(make_result(arena, tok));
|
||||||
|
|
||||||
// append to result sequence
|
// append to result sequence
|
||||||
h_carray_append(seq, tok);
|
h_carray_append(seq, tok);
|
||||||
|
|
@ -293,25 +295,31 @@ HParseResult *h_ll_parse(HAllocator* mm__, const HParser* parser, HParseState* s
|
||||||
// since we started with a single nonterminal on the stack, seq should
|
// since we started with a single nonterminal on the stack, seq should
|
||||||
// contain exactly the parse result.
|
// contain exactly the parse result.
|
||||||
assert(seq->used == 1);
|
assert(seq->used == 1);
|
||||||
return make_result(state, seq->elements[0]);
|
h_delete_arena(tarena);
|
||||||
|
return make_result(arena, seq->elements[0]);
|
||||||
|
|
||||||
|
no_parse:
|
||||||
|
h_delete_arena(tarena);
|
||||||
|
h_delete_arena(arena);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
HParserBackendVTable h__ll_backend_vtable = {
|
HParserBackendVTable h__llk_backend_vtable = {
|
||||||
.compile = h_ll_compile,
|
.compile = h_llk_compile,
|
||||||
.parse = h_ll_parse
|
.parse = h_llk_parse
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// dummy!
|
// dummy!
|
||||||
int test_ll(void)
|
int test_llk(void)
|
||||||
{
|
{
|
||||||
const HParser *c = h_many(h_ch('x'));
|
HParser *c = h_many(h_ch('x'));
|
||||||
const HParser *q = h_sequence(c, h_ch('y'), NULL);
|
HParser *q = h_sequence(c, h_ch('y'), NULL);
|
||||||
const HParser *p = h_choice(q, h_end_p(), NULL);
|
HParser *p = h_choice(q, h_end_p(), NULL);
|
||||||
|
|
||||||
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
|
HCFGrammar *g = h_cfgrammar(&system_allocator, p);
|
||||||
|
|
||||||
|
|
@ -1,7 +1,16 @@
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
#include "../internal.h"
|
#include "../internal.h"
|
||||||
#include "../parsers/parser_internal.h"
|
#include "../parsers/parser_internal.h"
|
||||||
|
|
||||||
|
static uint32_t djbhash(const uint8_t *buf, size_t len) {
|
||||||
|
uint32_t hash = 5381;
|
||||||
|
while (len--) {
|
||||||
|
hash = hash * 33 + *buf++;
|
||||||
|
}
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
// short-hand for constructing HCachedResult's
|
// short-hand for constructing HCachedResult's
|
||||||
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
|
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
|
||||||
HCachedResult *ret = a_new(HCachedResult, 1);
|
HCachedResult *ret = a_new(HCachedResult, 1);
|
||||||
|
|
@ -191,12 +200,37 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) {
|
int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) {
|
||||||
|
parser->backend = PB_PACKRAT;
|
||||||
return 0; // No compilation necessary, and everything should work
|
return 0; // No compilation necessary, and everything should work
|
||||||
// out of the box.
|
// out of the box.
|
||||||
}
|
}
|
||||||
|
|
||||||
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) {
|
static uint32_t cache_key_hash(const void* key) {
|
||||||
return h_do_parse(parser, parse_state);
|
return djbhash(key, sizeof(HParserCacheKey));
|
||||||
|
}
|
||||||
|
static bool cache_key_equal(const void* key1, const void* key2) {
|
||||||
|
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
|
||||||
|
HArena * arena = h_new_arena(mm__, 0);
|
||||||
|
HParseState *parse_state = a_new_(arena, HParseState, 1);
|
||||||
|
parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func
|
||||||
|
cache_key_hash); // hash_func
|
||||||
|
parse_state->input_stream = *input_stream;
|
||||||
|
parse_state->lr_stack = h_slist_new(arena);
|
||||||
|
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
|
||||||
|
cache_key_hash);
|
||||||
|
parse_state->arena = arena;
|
||||||
|
HParseResult *res = h_do_parse(parser, parse_state);
|
||||||
|
h_slist_free(parse_state->lr_stack);
|
||||||
|
h_hashtable_free(parse_state->recursion_heads);
|
||||||
|
// tear down the parse state
|
||||||
|
h_hashtable_free(parse_state->cache);
|
||||||
|
if (!res)
|
||||||
|
h_delete_arena(parse_state->arena);
|
||||||
|
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
HParserBackendVTable h__packrat_backend_vtable = {
|
HParserBackendVTable h__packrat_backend_vtable = {
|
||||||
|
|
|
||||||
366
src/backends/regex.c
Normal file
366
src/backends/regex.c
Normal file
|
|
@ -0,0 +1,366 @@
|
||||||
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include "../internal.h"
|
||||||
|
#include "../parsers/parser_internal.h"
|
||||||
|
#include "regex.h"
|
||||||
|
|
||||||
|
#undef a_new
|
||||||
|
#define a_new(typ, count) a_new_(arena, typ, count)
|
||||||
|
// Stack VM
|
||||||
|
typedef enum HSVMOp_ {
|
||||||
|
SVM_PUSH, // Push a mark. There is no VM insn to push an object.
|
||||||
|
SVM_NOP, // Used to start the chain, and possibly elsewhere. Does nothing.
|
||||||
|
SVM_ACTION, // Same meaning as RVM_ACTION
|
||||||
|
SVM_CAPTURE, // Same meaning as RVM_CAPTURE
|
||||||
|
SVM_ACCEPT,
|
||||||
|
} HSVMOp;
|
||||||
|
|
||||||
|
typedef struct HRVMTrace_ {
|
||||||
|
struct HRVMTrace_ *next; // When parsing, these are
|
||||||
|
// reverse-threaded. There is a postproc
|
||||||
|
// step that inverts all the pointers.
|
||||||
|
size_t input_pos;
|
||||||
|
uint16_t arg;
|
||||||
|
uint8_t opcode;
|
||||||
|
} HRVMTrace;
|
||||||
|
|
||||||
|
typedef struct HRVMThread_ {
|
||||||
|
HRVMTrace *trace;
|
||||||
|
uint16_t ip;
|
||||||
|
} HRVMThread;
|
||||||
|
|
||||||
|
HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, const uint8_t *input, int len);
|
||||||
|
|
||||||
|
HRVMTrace *invert_trace(HRVMTrace *trace) {
|
||||||
|
HRVMTrace *last = NULL;
|
||||||
|
if (!trace)
|
||||||
|
return NULL;
|
||||||
|
if (!trace->next)
|
||||||
|
return trace;
|
||||||
|
do {
|
||||||
|
HRVMTrace *next = trace->next;
|
||||||
|
trace->next = last;
|
||||||
|
last = trace;
|
||||||
|
trace = next;
|
||||||
|
} while (trace->next);
|
||||||
|
return trace;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_t len) {
|
||||||
|
HArena *arena = h_new_arena(mm__, 0);
|
||||||
|
HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length),
|
||||||
|
**heads_n = a_new(HRVMTrace*, prog->length);
|
||||||
|
|
||||||
|
HRVMTrace *ret_trace;
|
||||||
|
|
||||||
|
uint8_t *insn_seen = a_new(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued
|
||||||
|
HRVMThread *ip_queue = a_new(HRVMThread, prog->length);
|
||||||
|
size_t ipq_top;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define THREAD ip_queue[ipq_top-1]
|
||||||
|
#define PUSH_SVM(op_, arg_) do { \
|
||||||
|
HRVMTrace *nt = a_new(HRVMTrace, 1); \
|
||||||
|
nt->arg = (arg_); \
|
||||||
|
nt->opcode = (op_); \
|
||||||
|
nt->next = THREAD.trace; \
|
||||||
|
nt->input_pos = off; \
|
||||||
|
THREAD.trace = nt; \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
heads_n[0] = a_new(HRVMTrace, 1); // zeroing
|
||||||
|
heads_n[0]->opcode = SVM_NOP;
|
||||||
|
|
||||||
|
size_t off = 0;
|
||||||
|
int live_threads = 1;
|
||||||
|
for (off = 0; off <= len; off++) {
|
||||||
|
uint8_t ch = ((off == len) ? 0 : input[off]);
|
||||||
|
size_t ip_s; // BUG: there was an unused variable ip. Not sure if
|
||||||
|
// I intended to use it somewhere.
|
||||||
|
/* scope */ {
|
||||||
|
HRVMTrace **heads_t;
|
||||||
|
heads_t = heads_n;
|
||||||
|
heads_n = heads_p;
|
||||||
|
heads_p = heads_t;
|
||||||
|
memset(heads_n, 0, prog->length * sizeof(*heads_n));
|
||||||
|
}
|
||||||
|
memset(insn_seen, 0, prog->length); // no insns seen yet
|
||||||
|
if (!live_threads)
|
||||||
|
goto match_fail;
|
||||||
|
live_threads = 0;
|
||||||
|
for (ip_s = 0; ip_s < prog->length; ip_s++) {
|
||||||
|
ipq_top = 1;
|
||||||
|
// TODO: Write this as a threaded VM
|
||||||
|
if (!heads_p[ip_s])
|
||||||
|
continue;
|
||||||
|
THREAD.ip = ip_s;
|
||||||
|
|
||||||
|
uint8_t hi, lo;
|
||||||
|
uint16_t arg;
|
||||||
|
while(ipq_top > 0) {
|
||||||
|
if (insn_seen[THREAD.ip] == 1)
|
||||||
|
continue;
|
||||||
|
insn_seen[THREAD.ip] = 1;
|
||||||
|
arg = prog->insns[THREAD.ip].arg;
|
||||||
|
switch(prog->insns[THREAD.ip].op) {
|
||||||
|
case RVM_ACCEPT:
|
||||||
|
PUSH_SVM(SVM_ACCEPT, 0);
|
||||||
|
ret_trace = THREAD.trace;
|
||||||
|
goto run_trace;
|
||||||
|
case RVM_MATCH:
|
||||||
|
// Doesn't actually validate the "must be followed by MATCH
|
||||||
|
// or STEP. It should. Preproc perhaps?
|
||||||
|
hi = (arg >> 8) & 0xff;
|
||||||
|
lo = arg & 0xff;
|
||||||
|
THREAD.ip++;
|
||||||
|
if (ch < lo || ch > hi)
|
||||||
|
ipq_top--; // terminate thread
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_GOTO:
|
||||||
|
THREAD.ip = arg;
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_FORK:
|
||||||
|
THREAD.ip++;
|
||||||
|
if (!insn_seen[arg]) {
|
||||||
|
insn_seen[THREAD.ip] = 2;
|
||||||
|
HRVMTrace* tr = THREAD.trace;
|
||||||
|
ipq_top++;
|
||||||
|
THREAD.ip = arg;
|
||||||
|
THREAD.trace = tr;
|
||||||
|
}
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_PUSH:
|
||||||
|
PUSH_SVM(SVM_PUSH, 0);
|
||||||
|
THREAD.ip++;
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_ACTION:
|
||||||
|
PUSH_SVM(SVM_ACTION, arg);
|
||||||
|
THREAD.ip++;
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_CAPTURE:
|
||||||
|
PUSH_SVM(SVM_CAPTURE, 0);
|
||||||
|
THREAD.ip++;
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_EOF:
|
||||||
|
THREAD.ip++;
|
||||||
|
if (off != len)
|
||||||
|
ipq_top--; // Terminate thread
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_STEP:
|
||||||
|
// save thread
|
||||||
|
live_threads++;
|
||||||
|
heads_n[THREAD.ip++] = THREAD.trace;
|
||||||
|
ipq_top--;
|
||||||
|
goto next_insn;
|
||||||
|
}
|
||||||
|
next_insn:
|
||||||
|
;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// No accept was reached.
|
||||||
|
match_fail:
|
||||||
|
h_delete_arena(arena);
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
run_trace:
|
||||||
|
// Invert the direction of the trace linked list.
|
||||||
|
|
||||||
|
|
||||||
|
ret_trace = invert_trace(ret_trace);
|
||||||
|
HParseResult *ret = run_trace(mm__, prog, ret_trace, input, len);
|
||||||
|
// ret is in its own arena
|
||||||
|
h_delete_arena(arena);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#undef PUSH_SVM
|
||||||
|
#undef THREAD
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void svm_stack_ensure_cap(HAllocator *mm__, HSVMContext *ctx, size_t addl) {
|
||||||
|
if (ctx->stack_count + addl >= ctx->stack_capacity) {
|
||||||
|
ctx->stack = mm__->realloc(mm__, ctx->stack, sizeof(*ctx->stack) * (ctx->stack_capacity *= 2));
|
||||||
|
// TODO: check for realloc failure
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, const uint8_t *input, int len) {
|
||||||
|
// orig_prog is only used for the action table
|
||||||
|
HSVMContext ctx;
|
||||||
|
HArena *arena = h_new_arena(mm__, 0);
|
||||||
|
ctx.stack_count = 0;
|
||||||
|
ctx.stack_capacity = 16;
|
||||||
|
ctx.stack = h_new(HParsedToken*, ctx.stack_capacity);
|
||||||
|
|
||||||
|
HParsedToken *tmp_res;
|
||||||
|
HRVMTrace *cur;
|
||||||
|
for (cur = trace; cur; cur = cur->next) {
|
||||||
|
switch (cur->opcode) {
|
||||||
|
case SVM_PUSH:
|
||||||
|
svm_stack_ensure_cap(mm__, &ctx, 1);
|
||||||
|
tmp_res = a_new(HParsedToken, 1);
|
||||||
|
tmp_res->token_type = TT_MARK;
|
||||||
|
tmp_res->index = cur->input_pos;
|
||||||
|
tmp_res->bit_offset = 0;
|
||||||
|
ctx.stack[ctx.stack_count++] = tmp_res;
|
||||||
|
break;
|
||||||
|
case SVM_NOP:
|
||||||
|
break;
|
||||||
|
case SVM_ACTION:
|
||||||
|
// Action should modify stack appropriately
|
||||||
|
if (!orig_prog->actions[cur->arg].action(arena, &ctx, orig_prog->actions[cur->arg].env)) {
|
||||||
|
// action failed... abort somehow
|
||||||
|
// TODO: Actually abort
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SVM_CAPTURE:
|
||||||
|
// Top of stack must be a mark
|
||||||
|
// This replaces said mark in-place with a TT_BYTES.
|
||||||
|
assert(ctx.stack[ctx.stack_count]->token_type == TT_MARK);
|
||||||
|
|
||||||
|
tmp_res = ctx.stack[ctx.stack_count];
|
||||||
|
tmp_res->token_type = TT_BYTES;
|
||||||
|
// TODO: Will need to copy if bit_offset is nonzero
|
||||||
|
assert(tmp_res->bit_offset == 0);
|
||||||
|
|
||||||
|
tmp_res->bytes.token = input + tmp_res->index;
|
||||||
|
tmp_res->bytes.len = cur->input_pos - tmp_res->index + 1; // inclusive
|
||||||
|
break;
|
||||||
|
case SVM_ACCEPT:
|
||||||
|
assert(ctx.stack_count == 1);
|
||||||
|
HParseResult *res = a_new(HParseResult, 1);
|
||||||
|
res->ast = ctx.stack[0];
|
||||||
|
res->bit_length = cur->input_pos * 8;
|
||||||
|
res->arena = arena;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
h_delete_arena(arena);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) {
|
||||||
|
for (uint16_t i = 0; i < prog->action_count; i++) {
|
||||||
|
if (prog->actions[i].action == action_func && prog->actions[i].env == env)
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
// Ensure that there's room in the action array...
|
||||||
|
if (!(prog->action_count & (prog->action_count + 1))) {
|
||||||
|
// needs to be scaled up.
|
||||||
|
size_t array_size = (prog->action_count + 1) * 2; // action_count+1 is a
|
||||||
|
// power of two
|
||||||
|
prog->actions = prog->allocator->realloc(prog->allocator, prog->actions, array_size * sizeof(*prog->actions));
|
||||||
|
// TODO: Handle the allocation failed case nicely.
|
||||||
|
}
|
||||||
|
|
||||||
|
HSVMAction *action = &prog->actions[prog->action_count];
|
||||||
|
action->action = action_func;
|
||||||
|
action->env = env;
|
||||||
|
return prog->action_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t h_rvm_insert_insn(HRVMProg *prog, HRVMOp op, uint16_t arg) {
|
||||||
|
// Ensure that there's room in the insn array...
|
||||||
|
if (!(prog->length & (prog->length + 1))) {
|
||||||
|
// needs to be scaled up.
|
||||||
|
size_t array_size = (prog->length + 1) * 2; // action_count+1 is a
|
||||||
|
// power of two
|
||||||
|
prog->insns = prog->allocator->realloc(prog->allocator, prog->insns, array_size * sizeof(*prog->insns));
|
||||||
|
// TODO: Handle the allocation failed case nicely.
|
||||||
|
}
|
||||||
|
|
||||||
|
prog->insns[prog->length].op = op;
|
||||||
|
prog->insns[prog->length].arg = arg;
|
||||||
|
return prog->length++;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t h_rvm_get_ip(HRVMProg *prog) {
|
||||||
|
return prog->length;
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val) {
|
||||||
|
assert(prog->length > ip);
|
||||||
|
prog->insns[ip].arg = new_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t h_svm_count_to_mark(HSVMContext *ctx) {
|
||||||
|
size_t ctm;
|
||||||
|
for (ctm = 0; ctm < ctx->stack_count-1; ctm++) {
|
||||||
|
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK)
|
||||||
|
return ctm;
|
||||||
|
}
|
||||||
|
return ctx->stack_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Implement the primitive actions
|
||||||
|
bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) {
|
||||||
|
size_t n_items = h_svm_count_to_mark(ctx);
|
||||||
|
assert (n_items < ctx->stack_count);
|
||||||
|
HParsedToken *res = ctx->stack[ctx->stack_count - 1 - n_items];
|
||||||
|
assert (res->token_type == TT_MARK);
|
||||||
|
res->token_type = TT_SEQUENCE;
|
||||||
|
|
||||||
|
HCountedArray *ret_carray = h_carray_new_sized(arena, n_items);
|
||||||
|
res->seq = ret_carray;
|
||||||
|
// res index and bit offset are the same as the mark.
|
||||||
|
for (size_t i = 0; i < n_items; i++) {
|
||||||
|
ret_carray->elements[i] = ctx->stack[ctx->stack_count - n_items + i];
|
||||||
|
}
|
||||||
|
ctx->stack_count -= n_items;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
|
||||||
|
while (ctx->stack_count > 0) {
|
||||||
|
if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false; // no mark found.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Glue regex backend to rest of system
|
||||||
|
|
||||||
|
bool h_compile_regex(HRVMProg *prog, const HParser *parser) {
|
||||||
|
return parser->vtable->compile_to_rvm(prog, parser->env);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void h_regex_free(HParser *parser) {
|
||||||
|
HRVMProg *prog = (HRVMProg*)parser->backend_data;
|
||||||
|
HAllocator *mm__ = prog->allocator;
|
||||||
|
h_free(prog->insns);
|
||||||
|
h_free(prog->actions);
|
||||||
|
h_free(prog);
|
||||||
|
parser->backend_data = NULL;
|
||||||
|
parser->backend = PB_PACKRAT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
|
||||||
|
if (!parser->vtable->isValidRegular(parser->env))
|
||||||
|
return 1;
|
||||||
|
HRVMProg *prog = h_new(HRVMProg, 1);
|
||||||
|
prog->allocator = mm__;
|
||||||
|
if (!h_compile_regex(prog, parser)) {
|
||||||
|
h_free(prog->insns);
|
||||||
|
h_free(prog->actions);
|
||||||
|
h_free(prog);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
parser->backend_data = prog;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HParseResult *h_regex_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
|
||||||
|
return h_rvm_run__m(mm__, (HRVMProg*)parser->backend_data, input_stream->input, input_stream->length);
|
||||||
|
}
|
||||||
|
|
||||||
|
HParserBackendVTable h__regex_backend_vtable = {
|
||||||
|
.compile = h_regex_compile,
|
||||||
|
.parse = h_regex_parse,
|
||||||
|
.free = h_regex_free
|
||||||
|
};
|
||||||
80
src/backends/regex.h
Normal file
80
src/backends/regex.h
Normal file
|
|
@ -0,0 +1,80 @@
|
||||||
|
// Internal defs
|
||||||
|
#ifndef HAMMER_BACKEND_REGEX__H
|
||||||
|
#define HAMMER_BACKEND_REGEX__H
|
||||||
|
|
||||||
|
// each insn is an 8-bit opcode and a 16-bit parameter
|
||||||
|
// [a] are actions; they add an instruction to the stackvm that is being output.
|
||||||
|
// [m] are match ops; they can either succeed or fail, depending on the current character
|
||||||
|
// [c] are control ops. They affect the pc non-linearly.
|
||||||
|
typedef enum HRVMOp_ {
|
||||||
|
RVM_ACCEPT, // [a]
|
||||||
|
RVM_GOTO, // [c] parameter is an offset into the instruction table
|
||||||
|
RVM_FORK, // [c] parameter is an offset into the instruction table
|
||||||
|
RVM_PUSH, // [a] No arguments, just pushes a mark (pointer to some
|
||||||
|
// character in the input string) onto the stack
|
||||||
|
RVM_ACTION, // [a] argument is an action ID
|
||||||
|
RVM_CAPTURE, // [a] Capture the last string (up to the current
|
||||||
|
// position, non-inclusive), and push it on the
|
||||||
|
// stack. No arg.
|
||||||
|
RVM_EOF, // [m] Succeeds only if at EOF.
|
||||||
|
RVM_MATCH, // [m] The high byte of the parameter is an upper bound
|
||||||
|
// and the low byte is a lower bound, both
|
||||||
|
// inclusive. An inverted match should be handled
|
||||||
|
// as two ranges.
|
||||||
|
RVM_STEP, // [a] Step to the next byte of input
|
||||||
|
RVM_OPCOUNT
|
||||||
|
} HRVMOp;
|
||||||
|
|
||||||
|
typedef struct HRVMInsn_{
|
||||||
|
uint8_t op;
|
||||||
|
uint16_t arg;
|
||||||
|
} HRVMInsn;
|
||||||
|
|
||||||
|
#define TT_MARK TT_RESERVED_1
|
||||||
|
|
||||||
|
typedef struct HSVMContext_ {
|
||||||
|
HParsedToken **stack;
|
||||||
|
size_t stack_count; // number of items on the stack. Thus stack[stack_count] is the first unused item on the stack.
|
||||||
|
size_t stack_capacity;
|
||||||
|
} HSVMContext;
|
||||||
|
|
||||||
|
// These actions all assume that the items on the stack are not
|
||||||
|
// aliased anywhere.
|
||||||
|
typedef bool (*HSVMActionFunc)(HArena *arena, HSVMContext *ctx, void* env);
|
||||||
|
typedef struct HSVMAction_ {
|
||||||
|
HSVMActionFunc action;
|
||||||
|
void* env;
|
||||||
|
} HSVMAction;
|
||||||
|
|
||||||
|
struct HRVMProg_ {
|
||||||
|
HAllocator *allocator;
|
||||||
|
size_t length;
|
||||||
|
size_t action_count;
|
||||||
|
HRVMInsn *insns;
|
||||||
|
HSVMAction *actions;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Returns true IFF the provided parser could be compiled.
|
||||||
|
bool h_compile_regex(HRVMProg *prog, const HParser* parser);
|
||||||
|
|
||||||
|
// These functions are used by the compile_to_rvm method of HParser
|
||||||
|
uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env);
|
||||||
|
|
||||||
|
// returns the address of the instruction just created
|
||||||
|
uint16_t h_rvm_insert_insn(HRVMProg *prog, HRVMOp op, uint16_t arg);
|
||||||
|
|
||||||
|
// returns the address of the next insn to be created.
|
||||||
|
uint16_t h_rvm_get_ip(HRVMProg *prog);
|
||||||
|
|
||||||
|
// Used to insert forward references; the idea is to generate a JUMP
|
||||||
|
// or FORK instruction with a target of 0, then update it once the
|
||||||
|
// correct target is known.
|
||||||
|
void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val);
|
||||||
|
|
||||||
|
// Common SVM action funcs...
|
||||||
|
bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env);
|
||||||
|
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env);
|
||||||
|
|
||||||
|
extern HParserBackendVTable h__regex_backend_vtable;
|
||||||
|
|
||||||
|
#endif
|
||||||
112
src/backends/regexvm_asm.pl
Normal file
112
src/backends/regexvm_asm.pl
Normal file
|
|
@ -0,0 +1,112 @@
|
||||||
|
#!/usr/bin/perl -w
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
# The input file consists of a sequence of blocks, which can be parsed
|
||||||
|
# as SVM test cases, RVM test cases, or C functions. Each block starts
|
||||||
|
# with a header line, then a sequence of options, and finally text in
|
||||||
|
# a format defined by the block type.
|
||||||
|
#
|
||||||
|
# Header lines start with "+TYPE", optionally followed by a name. This
|
||||||
|
# name is semantically meaningful for SVM and RVM blocks; it
|
||||||
|
# determines the name of the test case.
|
||||||
|
|
||||||
|
# A C block's name is not used, and it takes no options. The body
|
||||||
|
# (which continues until the first line that looks like a header), is
|
||||||
|
# just passed straight through into the C source.
|
||||||
|
|
||||||
|
# SVM blocks' names are the GLib test case name. The underlying
|
||||||
|
# function's name is derived by substituting invalid characters with
|
||||||
|
# '_'. Note that this can result in collisions (eg, /foo_bar/baz
|
||||||
|
# collides with /foo/bar_baz). If this happens, it's your own damn
|
||||||
|
# fault; rename the blocks. SVM blocks take three different options:
|
||||||
|
# @input, @output, and @pre. The @input pragma's argument is a
|
||||||
|
# C-quoted string that gets passed into the VM as the input string,
|
||||||
|
# and @output is a C-quoted string that is compared against
|
||||||
|
# h_write_result_unamb. @pre lines are prepended verbatim to the
|
||||||
|
# function body (with the @pre stripped, of course); they can be used
|
||||||
|
# to initialize environment values.
|
||||||
|
#
|
||||||
|
# SVM instructions consist of either two or four fields:
|
||||||
|
#
|
||||||
|
# input_pos opcode [arg env]
|
||||||
|
#
|
||||||
|
# input_pos and opcode correspond to the fields in HRVMTrace. arg and
|
||||||
|
# env are used to populate an HSVMAction; arg is the function, and env
|
||||||
|
# is the object whose address should be used as the env.
|
||||||
|
|
||||||
|
# RVM blocks are very similar to SVM blocks; the name and options are
|
||||||
|
# handled exactly the same way. The assembly text is handled slightly
|
||||||
|
# differently; the format is:
|
||||||
|
#
|
||||||
|
# [label:] opcode [arg ...]
|
||||||
|
#
|
||||||
|
# For FORK and GOTO, the arg should be a label that is defined
|
||||||
|
# elsewhere.
|
||||||
|
#
|
||||||
|
# For ACTION, the arguments are handled the same way as with SVM.
|
||||||
|
#
|
||||||
|
# MATCH takes two arguments, each of which can be any C integer
|
||||||
|
# constant (not including character constants), which form the lower
|
||||||
|
# and upper bounds of the matched character, respectively.
|
||||||
|
#
|
||||||
|
# No other RVM instructions take an argument.
|
||||||
|
|
||||||
|
# At the beginning of any line, comments preceeded by '#' are allowed;
|
||||||
|
# they are replaced by C++ comments and inserted in the nearest valid
|
||||||
|
# location in the output.
|
||||||
|
|
||||||
|
my $mode == "TOP";
|
||||||
|
|
||||||
|
# common regexes:
|
||||||
|
my $re_ident = qr/[A-Za-z_][A-Za-z0-9_]*/;
|
||||||
|
my $re_cstr = qr/"(?:[^\\"]|\\["'abefnrtv0\\]|\\x[0-9a-fA-F]{2}|\\[0-7]{3})*"/;
|
||||||
|
|
||||||
|
|
||||||
|
my %svm = (
|
||||||
|
name => sub {
|
||||||
|
my ($env, $name) = @_;
|
||||||
|
$env->{name} = $name;
|
||||||
|
},
|
||||||
|
pragma => sub {
|
||||||
|
my ($env, $name, $val) = @_;
|
||||||
|
if ($name eq "input") {
|
||||||
|
chomp($env->{input} = $val);
|
||||||
|
} elsif ($name eq "output") {
|
||||||
|
chomp($env->{output} = $val);
|
||||||
|
} elsif ($name eq "pre") {
|
||||||
|
# Do I have the ref precedence right here?
|
||||||
|
push(@$env->{pre}, $val);
|
||||||
|
} else {
|
||||||
|
warn "Invalid SVM pragma";
|
||||||
|
}
|
||||||
|
},
|
||||||
|
body => sub {
|
||||||
|
my ($env, $line) = @_;
|
||||||
|
my ($ipos, $op, $arg, $argenv);
|
||||||
|
if ($line =~ /^\s*(\d+)\s+(PUSH|NOP|ACTION|CAPTURE|ACCEPT)(?:\s+($re_ident)\s+($re_ident))?/) {
|
||||||
|
if ($2 eq "PUSH") {
|
||||||
|
# TODO: implement all the opcodes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
while (<>) {
|
||||||
|
if (/^+(C|RVM|SVM)/) {
|
||||||
|
$mode = $1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($mode eq "TOP") {
|
||||||
|
if (/^#(.*)/) {
|
||||||
|
print "// $1";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
} elsif ($mode eq "SVM") {
|
||||||
|
} elsif ($mode eq "RVM") {
|
||||||
|
} elsif ($mode eq "C") {
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -21,11 +21,11 @@
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases) {
|
HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases) {
|
||||||
return h_benchmark__m(&system_allocator, parser, testcases);
|
return h_benchmark__m(&system_allocator, parser, testcases);
|
||||||
}
|
}
|
||||||
|
|
||||||
HBenchmarkResults *h_benchmark__m(HAllocator* mm__, const HParser* parser, HParserTestcase* testcases) {
|
HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTestcase* testcases) {
|
||||||
// For now, just output the results to stderr
|
// For now, just output the results to stderr
|
||||||
HParserTestcase* tc = testcases;
|
HParserTestcase* tc = testcases;
|
||||||
HParserBackend backend = PB_MIN;
|
HParserBackend backend = PB_MIN;
|
||||||
|
|
@ -33,7 +33,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, const HParser* parser, HPars
|
||||||
ret->len = PB_MAX-PB_MIN;
|
ret->len = PB_MAX-PB_MIN;
|
||||||
ret->results = h_new(HBackendResults, ret->len);
|
ret->results = h_new(HBackendResults, ret->len);
|
||||||
|
|
||||||
for (backend = PB_MIN; backend < PB_MAX; backend++) {
|
for (backend = PB_MIN; backend <= PB_MAX; backend++) {
|
||||||
ret->results[backend].backend = backend;
|
ret->results[backend].backend = backend;
|
||||||
// Step 1: Compile grammar for given parser...
|
// Step 1: Compile grammar for given parser...
|
||||||
if (h_compile(parser, backend, NULL) == -1) {
|
if (h_compile(parser, backend, NULL) == -1) {
|
||||||
|
|
|
||||||
|
|
@ -1,17 +0,0 @@
|
||||||
// This file contains functions related to managing multiple parse backends
|
|
||||||
#include "hammer.h"
|
|
||||||
#include "internal.h"
|
|
||||||
|
|
||||||
static HParserBackendVTable *backends[PB_MAX] = {
|
|
||||||
&h__packrat_backend_vtable,
|
|
||||||
&h__ll_backend_vtable,
|
|
||||||
};
|
|
||||||
|
|
||||||
int h_compile(const HParser* parser, HParserBackend backend, const void* params) {
|
|
||||||
return h_compile__m(&system_allocator, parser, backend, params);
|
|
||||||
}
|
|
||||||
|
|
||||||
int h_compile__m(HAllocator* mm__, const HParser* parser, HParserBackend backend, const void* params) {
|
|
||||||
// be naughty and cast off the const
|
|
||||||
return backends[backend]->compile(mm__, (HParser *)parser, params);
|
|
||||||
}
|
|
||||||
75
src/hammer.c
75
src/hammer.c
|
|
@ -26,13 +26,12 @@
|
||||||
#include "allocator.h"
|
#include "allocator.h"
|
||||||
#include "parsers/parser_internal.h"
|
#include "parsers/parser_internal.h"
|
||||||
|
|
||||||
static uint32_t djbhash(const uint8_t *buf, size_t len) {
|
static HParserBackendVTable *backends[PB_MAX + 1] = {
|
||||||
uint32_t hash = 5381;
|
&h__packrat_backend_vtable,
|
||||||
while (len--) {
|
&h__regex_backend_vtable,
|
||||||
hash = hash * 33 + *buf++;
|
&h__llk_backend_vtable,
|
||||||
}
|
};
|
||||||
return hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Helper function, since these lines appear in every parser */
|
/* Helper function, since these lines appear in every parser */
|
||||||
|
|
||||||
|
|
@ -42,46 +41,52 @@ typedef struct {
|
||||||
} HTwoParsers;
|
} HTwoParsers;
|
||||||
|
|
||||||
|
|
||||||
static uint32_t cache_key_hash(const void* key) {
|
|
||||||
return djbhash(key, sizeof(HParserCacheKey));
|
|
||||||
}
|
|
||||||
static bool cache_key_equal(const void* key1, const void* key2) {
|
|
||||||
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
|
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
|
||||||
return h_parse__m(&system_allocator, parser, input, length);
|
return h_parse__m(&system_allocator, parser, input, length);
|
||||||
}
|
}
|
||||||
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
|
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
|
||||||
|
// TODO: split the creation of the parse state into h_packrat_parse
|
||||||
// Set up a parse state...
|
// Set up a parse state...
|
||||||
HArena * arena = h_new_arena(mm__, 0);
|
HInputStream input_stream = {
|
||||||
HParseState *parse_state = a_new_(arena, HParseState, 1);
|
.index = 0,
|
||||||
parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func
|
.bit_offset = 8,
|
||||||
cache_key_hash); // hash_func
|
.overrun = 0,
|
||||||
parse_state->input_stream.input = input;
|
.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN,
|
||||||
parse_state->input_stream.index = 0;
|
.length = length,
|
||||||
parse_state->input_stream.bit_offset = 8; // bit big endian
|
.input = input
|
||||||
parse_state->input_stream.overrun = 0;
|
};
|
||||||
parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN;
|
|
||||||
parse_state->input_stream.length = length;
|
|
||||||
parse_state->lr_stack = h_slist_new(arena);
|
|
||||||
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
|
|
||||||
cache_key_hash);
|
|
||||||
parse_state->arena = arena;
|
|
||||||
HParseResult *res = h_do_parse(parser, parse_state);
|
|
||||||
h_slist_free(parse_state->lr_stack);
|
|
||||||
h_hashtable_free(parse_state->recursion_heads);
|
|
||||||
// tear down the parse state
|
|
||||||
h_hashtable_free(parse_state->cache);
|
|
||||||
if (!res)
|
|
||||||
h_delete_arena(parse_state->arena);
|
|
||||||
|
|
||||||
return res;
|
return backends[parser->backend]->parse(mm__, parser, &input_stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_parse_result_free(HParseResult *result) {
|
void h_parse_result_free(HParseResult *result) {
|
||||||
h_delete_arena(result->arena);
|
h_delete_arena(result->arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool h_false(void* env) {
|
||||||
|
(void)env;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool h_true(void* env) {
|
||||||
|
(void)env;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool h_not_regular(HRVMProg *prog, void *env) {
|
||||||
|
(void)env;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int h_compile(HParser* parser, HParserBackend backend, const void* params) {
|
||||||
|
return h_compile__m(&system_allocator, parser, backend, params);
|
||||||
|
}
|
||||||
|
|
||||||
|
int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, const void* params) {
|
||||||
|
int ret = backends[backend]->compile(mm__, parser, params);
|
||||||
|
if (!ret)
|
||||||
|
parser->backend = backend;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
|
||||||
100
src/hammer.h
100
src/hammer.h
|
|
@ -34,8 +34,11 @@ typedef struct HParseState_ HParseState;
|
||||||
typedef enum HParserBackend_ {
|
typedef enum HParserBackend_ {
|
||||||
PB_MIN = 0,
|
PB_MIN = 0,
|
||||||
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
|
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
|
||||||
PB_LL,
|
PB_REGULAR, //
|
||||||
PB_MAX
|
PB_LLk, //
|
||||||
|
PB_LALR, // Not Implemented
|
||||||
|
PB_GLR, // Not Implemented
|
||||||
|
PB_MAX = PB_LLk
|
||||||
} HParserBackend;
|
} HParserBackend;
|
||||||
|
|
||||||
typedef enum HTokenType_ {
|
typedef enum HTokenType_ {
|
||||||
|
|
@ -44,6 +47,7 @@ typedef enum HTokenType_ {
|
||||||
TT_SINT,
|
TT_SINT,
|
||||||
TT_UINT,
|
TT_UINT,
|
||||||
TT_SEQUENCE,
|
TT_SEQUENCE,
|
||||||
|
TT_RESERVED_1, // reserved for backend-specific internal use
|
||||||
TT_USER = 64,
|
TT_USER = 64,
|
||||||
TT_ERR,
|
TT_ERR,
|
||||||
TT_MAX
|
TT_MAX
|
||||||
|
|
@ -75,7 +79,9 @@ typedef struct HParsedToken_ {
|
||||||
} HParsedToken;
|
} HParsedToken;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The result of a successful parse.
|
* The result of a successful parse. Note that this may reference the
|
||||||
|
* input string.
|
||||||
|
*
|
||||||
* If a parse fails, the parse result will be NULL.
|
* If a parse fails, the parse result will be NULL.
|
||||||
* If a parse is successful but there's nothing there (i.e., if end_p
|
* If a parse is successful but there's nothing there (i.e., if end_p
|
||||||
* succeeds) then there's a parse result but its ast is NULL.
|
* succeeds) then there's a parse result but its ast is NULL.
|
||||||
|
|
@ -111,12 +117,14 @@ typedef const HParsedToken* (*HAction)(const HParseResult *p);
|
||||||
*/
|
*/
|
||||||
typedef bool (*HPredicate)(HParseResult *p);
|
typedef bool (*HPredicate)(HParseResult *p);
|
||||||
|
|
||||||
typedef struct HParserVtable_ HParserVtable;
|
|
||||||
|
|
||||||
typedef struct HCFChoice_ HCFChoice;
|
typedef struct HCFChoice_ HCFChoice;
|
||||||
|
typedef struct HRVMProg_ HRVMProg;
|
||||||
|
typedef struct HParserVtable_ HParserVtable;
|
||||||
|
|
||||||
typedef struct HParser_ {
|
typedef struct HParser_ {
|
||||||
const HParserVtable *vtable;
|
const HParserVtable *vtable;
|
||||||
|
HParserBackend backend;
|
||||||
|
void* backend_data;
|
||||||
void *env;
|
void *env;
|
||||||
void *data; /* e.g., parse tables */
|
void *data; /* e.g., parse tables */
|
||||||
HCFChoice *desugared; /* if the parser can be desugared, its desugared form */
|
HCFChoice *desugared; /* if the parser can be desugared, its desugared form */
|
||||||
|
|
@ -191,7 +199,7 @@ HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* inp
|
||||||
*
|
*
|
||||||
* Result token type: TT_BYTES
|
* Result token type: TT_BYTES
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len);
|
HAMMER_FN_DECL(HParser*, h_token, const uint8_t *str, const size_t len);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a single character, returns a parser that parses that
|
* Given a single character, returns a parser that parses that
|
||||||
|
|
@ -199,7 +207,7 @@ HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len);
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c);
|
HAMMER_FN_DECL(HParser*, h_ch, const uint8_t c);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two single-character bounds, lower and upper, returns a parser
|
* Given two single-character bounds, lower and upper, returns a parser
|
||||||
|
|
@ -208,14 +216,14 @@ HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c);
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_ch_range, const uint8_t lower, const uint8_t upper);
|
HAMMER_FN_DECL(HParser*, h_ch_range, const uint8_t lower, const uint8_t upper);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given an integer parser, p, and two integer bounds, lower and upper,
|
* Given an integer parser, p, and two integer bounds, lower and upper,
|
||||||
* returns a parser that parses an integral value within the range
|
* returns a parser that parses an integral value within the range
|
||||||
* [lower, upper] (inclusive).
|
* [lower, upper] (inclusive).
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper);
|
HAMMER_FN_DECL(HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses the specified number of bits. sign ==
|
* Returns a parser that parses the specified number of bits. sign ==
|
||||||
|
|
@ -223,63 +231,63 @@ HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lowe
|
||||||
*
|
*
|
||||||
* Result token type: TT_SINT if sign == true, TT_UINT if sign == false
|
* Result token type: TT_SINT if sign == true, TT_UINT if sign == false
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_bits, size_t len, bool sign);
|
HAMMER_FN_DECL(HParser*, h_bits, size_t len, bool sign);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses a signed 8-byte integer value.
|
* Returns a parser that parses a signed 8-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_SINT
|
* Result token type: TT_SINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_NOARG(const HParser*, h_int64);
|
HAMMER_FN_DECL_NOARG(HParser*, h_int64);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses a signed 4-byte integer value.
|
* Returns a parser that parses a signed 4-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_SINT
|
* Result token type: TT_SINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_NOARG(const HParser*, h_int32);
|
HAMMER_FN_DECL_NOARG(HParser*, h_int32);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses a signed 2-byte integer value.
|
* Returns a parser that parses a signed 2-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_SINT
|
* Result token type: TT_SINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_NOARG(const HParser*, h_int16);
|
HAMMER_FN_DECL_NOARG(HParser*, h_int16);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses a signed 1-byte integer value.
|
* Returns a parser that parses a signed 1-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_SINT
|
* Result token type: TT_SINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_NOARG(const HParser*, h_int8);
|
HAMMER_FN_DECL_NOARG(HParser*, h_int8);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses an unsigned 8-byte integer value.
|
* Returns a parser that parses an unsigned 8-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_NOARG(const HParser*, h_uint64);
|
HAMMER_FN_DECL_NOARG(HParser*, h_uint64);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses an unsigned 4-byte integer value.
|
* Returns a parser that parses an unsigned 4-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_NOARG(const HParser*, h_uint32);
|
HAMMER_FN_DECL_NOARG(HParser*, h_uint32);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses an unsigned 2-byte integer value.
|
* Returns a parser that parses an unsigned 2-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_NOARG(const HParser*, h_uint16);
|
HAMMER_FN_DECL_NOARG(HParser*, h_uint16);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses an unsigned 1-byte integer value.
|
* Returns a parser that parses an unsigned 1-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_NOARG(const HParser*, h_uint8);
|
HAMMER_FN_DECL_NOARG(HParser*, h_uint8);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given another parser, p, returns a parser that skips any whitespace
|
* Given another parser, p, returns a parser that skips any whitespace
|
||||||
|
|
@ -287,7 +295,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_uint8);
|
||||||
*
|
*
|
||||||
* Result token type: p's result type
|
* Result token type: p's result type
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p);
|
HAMMER_FN_DECL(HParser*, h_whitespace, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two parsers, p and q, returns a parser that parses them in
|
* Given two parsers, p and q, returns a parser that parses them in
|
||||||
|
|
@ -295,7 +303,7 @@ HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: p's result type
|
* Result token type: p's result type
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q);
|
HAMMER_FN_DECL(HParser*, h_left, const HParser* p, const HParser* q);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two parsers, p and q, returns a parser that parses them in
|
* Given two parsers, p and q, returns a parser that parses them in
|
||||||
|
|
@ -303,7 +311,7 @@ HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q);
|
||||||
*
|
*
|
||||||
* Result token type: q's result type
|
* Result token type: q's result type
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q);
|
HAMMER_FN_DECL(HParser*, h_right, const HParser* p, const HParser* q);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given three parsers, p, x, and q, returns a parser that parses them in
|
* Given three parsers, p, x, and q, returns a parser that parses them in
|
||||||
|
|
@ -311,7 +319,7 @@ HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q);
|
||||||
*
|
*
|
||||||
* Result token type: x's result type
|
* Result token type: x's result type
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q);
|
HAMMER_FN_DECL(HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given another parser, p, and a function f, returns a parser that
|
* Given another parser, p, and a function f, returns a parser that
|
||||||
|
|
@ -319,21 +327,21 @@ HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, con
|
||||||
*
|
*
|
||||||
* Result token type: any
|
* Result token type: any
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_action, const HParser* p, const HAction a);
|
HAMMER_FN_DECL(HParser*, h_action, const HParser* p, const HAction a);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse a single character in the given charset.
|
* Parse a single character in the given charset.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_in, const uint8_t *charset, size_t length);
|
HAMMER_FN_DECL(HParser*, h_in, const uint8_t *charset, size_t length);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse a single character *NOT* in the given charset.
|
* Parse a single character *NOT* in the given charset.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length);
|
HAMMER_FN_DECL(HParser*, h_not_in, const uint8_t *charset, size_t length);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A no-argument parser that succeeds if there is no more input to
|
* A no-argument parser that succeeds if there is no more input to
|
||||||
|
|
@ -341,14 +349,14 @@ HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length);
|
||||||
*
|
*
|
||||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_NOARG(const HParser*, h_end_p);
|
HAMMER_FN_DECL_NOARG(HParser*, h_end_p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This parser always fails.
|
* This parser always fails.
|
||||||
*
|
*
|
||||||
* Result token type: NULL. Always.
|
* Result token type: NULL. Always.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p);
|
HAMMER_FN_DECL_NOARG(HParser*, h_nothing_p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a null-terminated list of parsers, apply each parser in order.
|
* Given a null-terminated list of parsers, apply each parser in order.
|
||||||
|
|
@ -356,7 +364,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequence, const HParser* p);
|
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given an array of parsers, p_array, apply each parser in order. The
|
* Given an array of parsers, p_array, apply each parser in order. The
|
||||||
|
|
@ -365,7 +373,7 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequenc
|
||||||
*
|
*
|
||||||
* Result token type: The type of the first successful parser's result.
|
* Result token type: The type of the first successful parser's result.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice, const HParser* p);
|
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two parsers, p1 and p2, this parser succeeds in the following
|
* Given two parsers, p1 and p2, this parser succeeds in the following
|
||||||
|
|
@ -375,7 +383,7 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice,
|
||||||
*
|
*
|
||||||
* Result token type: p1's result type.
|
* Result token type: p1's result type.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2);
|
HAMMER_FN_DECL(HParser*, h_butnot, const HParser* p1, const HParser* p2);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two parsers, p1 and p2, this parser succeeds in the following
|
* Given two parsers, p1 and p2, this parser succeeds in the following
|
||||||
|
|
@ -385,7 +393,7 @@ HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2);
|
||||||
*
|
*
|
||||||
* Result token type: p1's result type.
|
* Result token type: p1's result type.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p2);
|
HAMMER_FN_DECL(HParser*, h_difference, const HParser* p1, const HParser* p2);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or
|
* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or
|
||||||
|
|
@ -393,7 +401,7 @@ HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p
|
||||||
*
|
*
|
||||||
* Result token type: The type of the result of whichever parser succeeded.
|
* Result token type: The type of the result of whichever parser succeeded.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2);
|
HAMMER_FN_DECL(HParser*, h_xor, const HParser* p1, const HParser* p2);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, this parser succeeds for zero or more repetitions
|
* Given a parser, p, this parser succeeds for zero or more repetitions
|
||||||
|
|
@ -401,7 +409,7 @@ HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_many, const HParser* p);
|
HAMMER_FN_DECL(HParser*, h_many, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, this parser succeeds for one or more repetitions
|
* Given a parser, p, this parser succeeds for one or more repetitions
|
||||||
|
|
@ -409,7 +417,7 @@ HAMMER_FN_DECL(const HParser*, h_many, const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p);
|
HAMMER_FN_DECL(HParser*, h_many1, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, this parser succeeds for exactly N repetitions
|
* Given a parser, p, this parser succeeds for exactly N repetitions
|
||||||
|
|
@ -417,7 +425,7 @@ HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n);
|
HAMMER_FN_DECL(HParser*, h_repeat_n, const HParser* p, const size_t n);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, this parser succeeds with the value p parsed or
|
* Given a parser, p, this parser succeeds with the value p parsed or
|
||||||
|
|
@ -425,7 +433,7 @@ HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n);
|
||||||
*
|
*
|
||||||
* Result token type: If p succeeded, the type of its result; if not, TT_NONE.
|
* Result token type: If p succeeded, the type of its result; if not, TT_NONE.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p);
|
HAMMER_FN_DECL(HParser*, h_optional, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, this parser succeeds if p succeeds, but doesn't
|
* Given a parser, p, this parser succeeds if p succeeds, but doesn't
|
||||||
|
|
@ -433,7 +441,7 @@ HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p);
|
HAMMER_FN_DECL(HParser*, h_ignore, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, and a parser for a separator, sep, this parser
|
* Given a parser, p, and a parser for a separator, sep, this parser
|
||||||
|
|
@ -444,7 +452,7 @@ HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep);
|
HAMMER_FN_DECL(HParser*, h_sepBy, const HParser* p, const HParser* sep);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
|
* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
|
||||||
|
|
@ -452,14 +460,14 @@ HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_sepBy1, const HParser* p, const HParser* sep);
|
HAMMER_FN_DECL(HParser*, h_sepBy1, const HParser* p, const HParser* sep);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This parser always returns a zero length match, i.e., empty string.
|
* This parser always returns a zero length match, i.e., empty string.
|
||||||
*
|
*
|
||||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p);
|
HAMMER_FN_DECL_NOARG(HParser*, h_epsilon_p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This parser applies its first argument to read an unsigned integer
|
* This parser applies its first argument to read an unsigned integer
|
||||||
|
|
@ -470,7 +478,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HParser* value);
|
HAMMER_FN_DECL(HParser*, h_length_value, const HParser* length, const HParser* value);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This parser attaches a predicate function, which returns true or
|
* This parser attaches a predicate function, which returns true or
|
||||||
|
|
@ -485,7 +493,7 @@ HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HPar
|
||||||
*
|
*
|
||||||
* Result token type: p's result type if pred succeeded, NULL otherwise.
|
* Result token type: p's result type if pred succeeded, NULL otherwise.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred);
|
HAMMER_FN_DECL(HParser*, h_attr_bool, const HParser* p, HPredicate pred);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The 'and' parser asserts that a conditional syntax is satisfied,
|
* The 'and' parser asserts that a conditional syntax is satisfied,
|
||||||
|
|
@ -502,7 +510,7 @@ HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred);
|
||||||
*
|
*
|
||||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_and, const HParser* p);
|
HAMMER_FN_DECL(HParser*, h_and, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The 'not' parser asserts that a conditional syntax is *not*
|
* The 'not' parser asserts that a conditional syntax is *not*
|
||||||
|
|
@ -522,7 +530,7 @@ HAMMER_FN_DECL(const HParser*, h_and, const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(const HParser*, h_not, const HParser* p);
|
HAMMER_FN_DECL(HParser*, h_not, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a parser that just calls out to another, as yet unknown,
|
* Create a parser that just calls out to another, as yet unknown,
|
||||||
|
|
@ -565,7 +573,7 @@ HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent
|
||||||
*
|
*
|
||||||
* Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
|
* Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
|
||||||
*/
|
*/
|
||||||
HAMMER_FN_DECL(int, h_compile, const HParser* parser, HParserBackend backend, const void* params);
|
HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO: Document me
|
* TODO: Document me
|
||||||
|
|
@ -590,7 +598,7 @@ const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
|
||||||
void h_bit_writer_free(HBitWriter* w);
|
void h_bit_writer_free(HBitWriter* w);
|
||||||
|
|
||||||
// {{{ Benchmark functions
|
// {{{ Benchmark functions
|
||||||
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, const HParser* parser, HParserTestcase* testcases);
|
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, HParser* parser, HParserTestcase* testcases);
|
||||||
void h_benchmark_report(FILE* stream, HBenchmarkResults* results);
|
void h_benchmark_report(FILE* stream, HBenchmarkResults* results);
|
||||||
void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results);
|
void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results);
|
||||||
// }}}
|
// }}}
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,7 @@ static inline void h_generic_free(HAllocator *allocator, void* ptr) {
|
||||||
allocator->free(allocator, ptr);
|
allocator->free(allocator, ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
HAllocator system_allocator;
|
extern HAllocator system_allocator;
|
||||||
|
|
||||||
|
|
||||||
typedef struct HInputStream_ {
|
typedef struct HInputStream_ {
|
||||||
|
|
@ -131,7 +131,8 @@ struct HParseState_ {
|
||||||
|
|
||||||
typedef struct HParserBackendVTable_ {
|
typedef struct HParserBackendVTable_ {
|
||||||
int (*compile)(HAllocator *mm__, HParser* parser, const void* params);
|
int (*compile)(HAllocator *mm__, HParser* parser, const void* params);
|
||||||
HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HParseState* parse_state);
|
HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HInputStream* parse_state);
|
||||||
|
void (*free)(HParser* parser);
|
||||||
} HParserBackendVTable;
|
} HParserBackendVTable;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -213,9 +214,10 @@ struct HBitWriter_ {
|
||||||
|
|
||||||
// }}}
|
// }}}
|
||||||
|
|
||||||
|
|
||||||
// Backends {{{
|
// Backends {{{
|
||||||
extern HParserBackendVTable h__packrat_backend_vtable;
|
extern HParserBackendVTable h__packrat_backend_vtable;
|
||||||
extern HParserBackendVTable h__ll_backend_vtable;
|
extern HParserBackendVTable h__llk_backend_vtable;
|
||||||
// }}}
|
// }}}
|
||||||
|
|
||||||
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
|
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
|
||||||
|
|
@ -295,9 +297,14 @@ struct HParserVtable_ {
|
||||||
HParseResult* (*parse)(void *env, HParseState *state);
|
HParseResult* (*parse)(void *env, HParseState *state);
|
||||||
bool (*isValidRegular)(void *env);
|
bool (*isValidRegular)(void *env);
|
||||||
bool (*isValidCF)(void *env);
|
bool (*isValidCF)(void *env);
|
||||||
|
bool (*compile_to_rvm)(HRVMProg *prog, void* env); // FIXME: forgot what the bool return value was supposed to mean.
|
||||||
HCFChoice* (*desugar)(HAllocator *mm__, void *env);
|
HCFChoice* (*desugar)(HAllocator *mm__, void *env);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool h_false(void*);
|
||||||
|
bool h_true(void*);
|
||||||
|
bool h_not_regular(HRVMProg*, void*);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#define h_arena_malloc(a, s) malloc(s)
|
#define h_arena_malloc(a, s) malloc(s)
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ static HParseResult* parse_action(void *env, HParseState *state) {
|
||||||
//HParsedToken *tok = a->action(h_do_parse(a->p, state));
|
//HParsedToken *tok = a->action(h_do_parse(a->p, state));
|
||||||
if(tmp) {
|
if(tmp) {
|
||||||
const HParsedToken *tok = a->action(tmp);
|
const HParsedToken *tok = a->action(tmp);
|
||||||
return make_result(state, (HParsedToken*)tok);
|
return make_result(state->arena, (HParsedToken*)tok);
|
||||||
} else
|
} else
|
||||||
return NULL;
|
return NULL;
|
||||||
} else // either the parser's missing or the action's missing
|
} else // either the parser's missing or the action's missing
|
||||||
|
|
@ -44,18 +44,24 @@ static bool action_isValidCF(void *env) {
|
||||||
return a->p->vtable->isValidCF(a->p->env);
|
return a->p->vtable->isValidCF(a->p->env);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool action_ctrvm(HRVMProg *prog, void* env) {
|
||||||
|
HParseAction *a = (HParseAction*)env;
|
||||||
|
return a->p->vtable->compile_to_rvm(prog, a->p->env);
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable action_vt = {
|
static const HParserVtable action_vt = {
|
||||||
.parse = parse_action,
|
.parse = parse_action,
|
||||||
.isValidRegular = action_isValidRegular,
|
.isValidRegular = action_isValidRegular,
|
||||||
.isValidCF = action_isValidCF,
|
.isValidCF = action_isValidCF,
|
||||||
.desugar = desugar_action,
|
.desugar = desugar_action,
|
||||||
|
.compile_to_rvm = action_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_action(const HParser* p, const HAction a) {
|
HParser* h_action(const HParser* p, const HAction a) {
|
||||||
return h_action__m(&system_allocator, p, a);
|
return h_action__m(&system_allocator, p, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) {
|
HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) {
|
||||||
HParseAction *env = h_new(HParseAction, 1);
|
HParseAction *env = h_new(HParseAction, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->action = a;
|
env->action = a;
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ static HParseResult *parse_and(void* env, HParseState* state) {
|
||||||
HParseResult *res = h_do_parse((HParser*)env, state);
|
HParseResult *res = h_do_parse((HParser*)env, state);
|
||||||
state->input_stream = bak;
|
state->input_stream = bak;
|
||||||
if (res)
|
if (res)
|
||||||
return make_result(state, NULL);
|
return make_result(state->arena, NULL);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -22,13 +22,14 @@ static const HParserVtable and_vt = {
|
||||||
revision. --mlp, 18/12/12 */
|
revision. --mlp, 18/12/12 */
|
||||||
.isValidCF = h_false, /* despite TODO above, this remains false. */
|
.isValidCF = h_false, /* despite TODO above, this remains false. */
|
||||||
.desugar = desugar_and,
|
.desugar = desugar_and,
|
||||||
|
.compile_to_rvm = h_not_regular,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
const HParser* h_and(const HParser* p) {
|
HParser* h_and(const HParser* p) {
|
||||||
return h_and__m(&system_allocator, p);
|
return h_and__m(&system_allocator, p);
|
||||||
}
|
}
|
||||||
const HParser* h_and__m(HAllocator* mm__, const HParser* p) {
|
HParser* h_and__m(HAllocator* mm__, const HParser* p) {
|
||||||
// zero-width postive lookahead
|
// zero-width postive lookahead
|
||||||
return h_new_parser(mm__, &and_vt, (void *)p);
|
return h_new_parser(mm__, &and_vt, (void *)p);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -47,18 +47,24 @@ static HCFChoice* desugar_ab(HAllocator *mm__, void *env) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool ab_ctrvm(HRVMProg *prog, void *env) {
|
||||||
|
HAttrBool *ab = (HAttrBool*)env;
|
||||||
|
return h_compile_regex(prog, ab->p);
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable attr_bool_vt = {
|
static const HParserVtable attr_bool_vt = {
|
||||||
.parse = parse_attr_bool,
|
.parse = parse_attr_bool,
|
||||||
.isValidRegular = ab_isValidRegular,
|
.isValidRegular = ab_isValidRegular,
|
||||||
.isValidCF = ab_isValidCF,
|
.isValidCF = ab_isValidCF,
|
||||||
.desugar = desugar_ab,
|
.desugar = desugar_ab,
|
||||||
|
.compile_to_rvm = ab_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
const HParser* h_attr_bool(const HParser* p, HPredicate pred) {
|
HParser* h_attr_bool(const HParser* p, HPredicate pred) {
|
||||||
return h_attr_bool__m(&system_allocator, p, pred);
|
return h_attr_bool__m(&system_allocator, p, pred);
|
||||||
}
|
}
|
||||||
const HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) {
|
HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) {
|
||||||
HAttrBool *env = h_new(HAttrBool, 1);
|
HAttrBool *env = h_new(HAttrBool, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->pred = pred;
|
env->pred = pred;
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include <assert.h>
|
||||||
#include "parser_internal.h"
|
#include "parser_internal.h"
|
||||||
|
|
||||||
struct bits_env {
|
struct bits_env {
|
||||||
|
|
@ -13,7 +14,7 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
|
||||||
result->sint = h_read_bits(&state->input_stream, env_->length, true);
|
result->sint = h_read_bits(&state->input_stream, env_->length, true);
|
||||||
else
|
else
|
||||||
result->uint = h_read_bits(&state->input_stream, env_->length, false);
|
result->uint = h_read_bits(&state->input_stream, env_->length, false);
|
||||||
return make_result(state, result);
|
return make_result(state->arena, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
static HCFChoice* desugar_bits(HAllocator *mm__, void *env) {
|
static HCFChoice* desugar_bits(HAllocator *mm__, void *env) {
|
||||||
|
|
@ -41,16 +42,43 @@ static HCFChoice* desugar_bits(HAllocator *mm__, void *env) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool h_svm_action_bits(HArena *arena, HSVMContext *ctx, void* env) {
|
||||||
|
// BUG: relies un undefined behaviour: int64_t is a signed uint64_t; not necessarily true on 32-bit
|
||||||
|
struct bits_env *env_ = env;
|
||||||
|
HParsedToken *top = ctx->stack[ctx->stack_count-1];
|
||||||
|
assert(top->token_type == TT_BYTES);
|
||||||
|
uint64_t res = 0;
|
||||||
|
for (size_t i = 0; i < top->bytes.len; i++)
|
||||||
|
res = (res << 8) | top->bytes.token[i]; // TODO: Handle other endiannesses.
|
||||||
|
top->uint = res; // possibly cast to signed through union
|
||||||
|
top->token_type = (env_->signedp ? TT_SINT : TT_UINT);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool bits_ctrvm(HRVMProg *prog, void* env) {
|
||||||
|
struct bits_env *env_ = (struct bits_env*)env;
|
||||||
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||||
|
for (size_t i=0; (i < env_->length)/8; ++i) { // FUTURE: when we can handle non-byte-aligned, the env_->length/8 part will be different
|
||||||
|
h_rvm_insert_insn(prog, RVM_MATCH, 0xFF00);
|
||||||
|
h_rvm_insert_insn(prog, RVM_STEP, 0);
|
||||||
|
}
|
||||||
|
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
|
||||||
|
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_bits, env));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable bits_vt = {
|
static const HParserVtable bits_vt = {
|
||||||
.parse = parse_bits,
|
.parse = parse_bits,
|
||||||
.isValidRegular = h_true,
|
.isValidRegular = h_true,
|
||||||
.isValidCF = h_true,
|
.isValidCF = h_true,
|
||||||
.desugar = desugar_bits,
|
.desugar = desugar_bits,
|
||||||
|
.compile_to_rvm = bits_ctrvm,
|
||||||
};
|
};
|
||||||
const HParser* h_bits(size_t len, bool sign) {
|
|
||||||
|
HParser* h_bits(size_t len, bool sign) {
|
||||||
return h_bits__m(&system_allocator, len, sign);
|
return h_bits__m(&system_allocator, len, sign);
|
||||||
}
|
}
|
||||||
const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
|
HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
|
||||||
struct bits_env *env = h_new(struct bits_env, 1);
|
struct bits_env *env = h_new(struct bits_env, 1);
|
||||||
env->length = len;
|
env->length = len;
|
||||||
env->signedp = sign;
|
env->signedp = sign;
|
||||||
|
|
@ -58,10 +86,10 @@ const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#define SIZED_BITS(name_pre, len, signedp) \
|
#define SIZED_BITS(name_pre, len, signedp) \
|
||||||
const HParser* h_##name_pre##len () { \
|
HParser* h_##name_pre##len () { \
|
||||||
return h_bits__m(&system_allocator, len, signedp); \
|
return h_bits__m(&system_allocator, len, signedp); \
|
||||||
} \
|
} \
|
||||||
const HParser* h_##name_pre##len##__m(HAllocator* mm__) { \
|
HParser* h_##name_pre##len##__m(HAllocator* mm__) { \
|
||||||
return h_bits__m(mm__, len, signedp); \
|
return h_bits__m(mm__, len, signedp); \
|
||||||
}
|
}
|
||||||
SIZED_BITS(int, 8, true)
|
SIZED_BITS(int, 8, true)
|
||||||
|
|
|
||||||
|
|
@ -43,14 +43,15 @@ static HCFChoice* desugar_butnot(HAllocator *mm__, void *env) {
|
||||||
static const HParserVtable butnot_vt = {
|
static const HParserVtable butnot_vt = {
|
||||||
.parse = parse_butnot,
|
.parse = parse_butnot,
|
||||||
.isValidRegular = h_false,
|
.isValidRegular = h_false,
|
||||||
.isValidCF = h_false,
|
.isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF?
|
||||||
.desugar = desugar_butnot,
|
.desugar = desugar_butnot,
|
||||||
|
.compile_to_rvm = h_not_regular,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_butnot(const HParser* p1, const HParser* p2) {
|
HParser* h_butnot(const HParser* p1, const HParser* p2) {
|
||||||
return h_butnot__m(&system_allocator, p1, p2);
|
return h_butnot__m(&system_allocator, p1, p2);
|
||||||
}
|
}
|
||||||
const HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
||||||
HTwoParsers *env = h_new(HTwoParsers, 1);
|
HTwoParsers *env = h_new(HTwoParsers, 1);
|
||||||
env->p1 = p1;
|
env->p1 = p1;
|
||||||
env->p2 = p2;
|
env->p2 = p2;
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ static HParseResult* parse_ch(void* env, HParseState *state) {
|
||||||
if (c == r) {
|
if (c == r) {
|
||||||
HParsedToken *tok = a_new(HParsedToken, 1);
|
HParsedToken *tok = a_new(HParsedToken, 1);
|
||||||
tok->token_type = TT_UINT; tok->uint = r;
|
tok->token_type = TT_UINT; tok->uint = r;
|
||||||
return make_result(state, tok);
|
return make_result(state->arena, tok);
|
||||||
} else {
|
} else {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
@ -20,16 +20,25 @@ static HCFChoice* desugar_ch(HAllocator *mm__, void *env) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool ch_ctrvm(HRVMProg *prog, void* env) {
|
||||||
|
uint8_t c = (uint8_t)(unsigned long)(env);
|
||||||
|
// TODO: Does this capture anything?
|
||||||
|
h_rvm_insert_insn(prog, RVM_MATCH, c & c << 8);
|
||||||
|
h_rvm_insert_insn(prog, RVM_STEP, 0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable ch_vt = {
|
static const HParserVtable ch_vt = {
|
||||||
.parse = parse_ch,
|
.parse = parse_ch,
|
||||||
.isValidRegular = h_true,
|
.isValidRegular = h_true,
|
||||||
.isValidCF = h_true,
|
.isValidCF = h_true,
|
||||||
.desugar = desugar_ch,
|
.desugar = desugar_ch,
|
||||||
|
.compile_to_rvm = ch_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_ch(const uint8_t c) {
|
HParser* h_ch(const uint8_t c) {
|
||||||
return h_ch__m(&system_allocator, c);
|
return h_ch__m(&system_allocator, c);
|
||||||
}
|
}
|
||||||
const HParser* h_ch__m(HAllocator* mm__, const uint8_t c) {
|
HParser* h_ch__m(HAllocator* mm__, const uint8_t c) {
|
||||||
return h_new_parser(mm__, &ch_vt, (void *)(uintptr_t)c);
|
return h_new_parser(mm__, &ch_vt, (void *)(uintptr_t)c);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ static HParseResult* parse_charset(void *env, HParseState *state) {
|
||||||
if (charset_isset(cs, in)) {
|
if (charset_isset(cs, in)) {
|
||||||
HParsedToken *tok = a_new(HParsedToken, 1);
|
HParsedToken *tok = a_new(HParsedToken, 1);
|
||||||
tok->token_type = TT_UINT; tok->uint = in;
|
tok->token_type = TT_UINT; tok->uint = in;
|
||||||
return make_result(state, tok);
|
return make_result(state->arena, tok);
|
||||||
} else
|
} else
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
@ -22,17 +22,38 @@ static HCFChoice* desugar_charset(HAllocator *mm__, void *env) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FUTURE: this is horribly inefficient
|
||||||
|
static bool cs_ctrvm(HRVMProg *prog, void *env) {
|
||||||
|
HCharset cs = (HCharset)env;
|
||||||
|
uint16_t start = h_rvm_get_ip(prog);
|
||||||
|
for (size_t i=0; i<256; ++i) {
|
||||||
|
if (charset_isset(cs, i)) {
|
||||||
|
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
||||||
|
h_rvm_insert_insn(prog, RVM_MATCH, i & i << 8);
|
||||||
|
h_rvm_insert_insn(prog, RVM_GOTO, 0);
|
||||||
|
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
|
||||||
|
for (size_t i=start; i<jump; ++i) {
|
||||||
|
if (RVM_GOTO == prog->insns[i].op)
|
||||||
|
h_rvm_patch_arg(prog, i, jump);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable charset_vt = {
|
static const HParserVtable charset_vt = {
|
||||||
.parse = parse_charset,
|
.parse = parse_charset,
|
||||||
.isValidRegular = h_true,
|
.isValidRegular = h_true,
|
||||||
.isValidCF = h_true,
|
.isValidCF = h_true,
|
||||||
.desugar = desugar_charset,
|
.desugar = desugar_charset,
|
||||||
|
.compile_to_rvm = cs_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
|
HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
|
||||||
return h_ch_range__m(&system_allocator, lower, upper);
|
return h_ch_range__m(&system_allocator, lower, upper);
|
||||||
}
|
}
|
||||||
const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) {
|
HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) {
|
||||||
HCharset cs = new_charset(mm__);
|
HCharset cs = new_charset(mm__);
|
||||||
for (int i = 0; i < 256; i++)
|
for (int i = 0; i < 256; i++)
|
||||||
charset_set(cs, i, (lower <= i) && (i <= upper));
|
charset_set(cs, i, (lower <= i) && (i <= upper));
|
||||||
|
|
@ -40,7 +61,7 @@ const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) {
|
static HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) {
|
||||||
HCharset cs = new_charset(mm__);
|
HCharset cs = new_charset(mm__);
|
||||||
for (size_t i = 0; i < 256; i++)
|
for (size_t i = 0; i < 256; i++)
|
||||||
charset_set(cs, i, 1-val);
|
charset_set(cs, i, 1-val);
|
||||||
|
|
@ -50,19 +71,19 @@ static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, s
|
||||||
return h_new_parser(mm__, &charset_vt, cs);
|
return h_new_parser(mm__, &charset_vt, cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_in(const uint8_t *options, size_t count) {
|
HParser* h_in(const uint8_t *options, size_t count) {
|
||||||
return h_in_or_not__m(&system_allocator, options, count, 1);
|
return h_in_or_not__m(&system_allocator, options, count, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
|
HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
|
||||||
return h_in_or_not__m(mm__, options, count, 1);
|
return h_in_or_not__m(mm__, options, count, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_not_in(const uint8_t *options, size_t count) {
|
HParser* h_not_in(const uint8_t *options, size_t count) {
|
||||||
return h_in_or_not__m(&system_allocator, options, count, 0);
|
return h_in_or_not__m(&system_allocator, options, count, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
|
HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
|
||||||
return h_in_or_not__m(mm__, options, count, 0);
|
return h_in_or_not__m(mm__, options, count, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -54,34 +54,53 @@ static HCFChoice* desugar_choice(HAllocator *mm__, void *env) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool choice_ctrvm(HRVMProg *prog, void* env) {
|
||||||
|
HSequence *s = (HSequence*)env;
|
||||||
|
uint16_t gotos[s->len];
|
||||||
|
uint16_t start = h_rvm_get_ip(prog);
|
||||||
|
for (size_t i=0; i<s->len; ++i) {
|
||||||
|
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
||||||
|
if (!h_compile_regex(prog, s->p_array[i]->env))
|
||||||
|
return false;
|
||||||
|
gotos[i] = h_rvm_insert_insn(prog, RVM_GOTO, 0);
|
||||||
|
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
|
||||||
|
}
|
||||||
|
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
|
||||||
|
for (size_t i=start; i<s->len; ++i) {
|
||||||
|
h_rvm_patch_arg(prog, gotos[i], jump);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable choice_vt = {
|
static const HParserVtable choice_vt = {
|
||||||
.parse = parse_choice,
|
.parse = parse_choice,
|
||||||
.isValidRegular = choice_isValidRegular,
|
.isValidRegular = choice_isValidRegular,
|
||||||
.isValidCF = choice_isValidCF,
|
.isValidCF = choice_isValidCF,
|
||||||
.desugar = desugar_choice,
|
.desugar = desugar_choice,
|
||||||
|
.compile_to_rvm = choice_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_choice(const HParser* p, ...) {
|
HParser* h_choice(const HParser* p, ...) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
va_start(ap, p);
|
va_start(ap, p);
|
||||||
const HParser* ret = h_choice__mv(&system_allocator, p, ap);
|
HParser* ret = h_choice__mv(&system_allocator, p, ap);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) {
|
HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
va_start(ap, p);
|
va_start(ap, p);
|
||||||
const HParser* ret = h_choice__mv(mm__, p, ap);
|
HParser* ret = h_choice__mv(mm__, p, ap);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_choice__v(const HParser* p, va_list ap) {
|
HParser* h_choice__v(const HParser* p, va_list ap) {
|
||||||
return h_choice__mv(&system_allocator, p, ap);
|
return h_choice__mv(&system_allocator, p, ap);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) {
|
HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
HSequence *s = h_new(HSequence, 1);
|
HSequence *s = h_new(HSequence, 1);
|
||||||
|
|
|
||||||
|
|
@ -42,14 +42,15 @@ static HCFChoice* desugar_difference(HAllocator *mm__, void *env) {
|
||||||
static HParserVtable difference_vt = {
|
static HParserVtable difference_vt = {
|
||||||
.parse = parse_difference,
|
.parse = parse_difference,
|
||||||
.isValidRegular = h_false,
|
.isValidRegular = h_false,
|
||||||
.isValidCF = h_false,
|
.isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF?
|
||||||
.desugar = desugar_difference,
|
.desugar = desugar_difference,
|
||||||
|
.compile_to_rvm = h_not_regular,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_difference(const HParser* p1, const HParser* p2) {
|
HParser* h_difference(const HParser* p1, const HParser* p2) {
|
||||||
return h_difference__m(&system_allocator, p1, p2);
|
return h_difference__m(&system_allocator, p1, p2);
|
||||||
}
|
}
|
||||||
const HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
||||||
HTwoParsers *env = h_new(HTwoParsers, 1);
|
HTwoParsers *env = h_new(HTwoParsers, 1);
|
||||||
env->p1 = p1;
|
env->p1 = p1;
|
||||||
env->p2 = p2;
|
env->p2 = p2;
|
||||||
|
|
|
||||||
|
|
@ -17,17 +17,23 @@ static HCFChoice* desugar_end(HAllocator *mm__, void *env) {
|
||||||
return &ret;
|
return &ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool end_ctrvm(HRVMProg *prog, void *env) {
|
||||||
|
h_rvm_insert_insn(prog, RVM_EOF, 0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable end_vt = {
|
static const HParserVtable end_vt = {
|
||||||
.parse = parse_end,
|
.parse = parse_end,
|
||||||
.isValidRegular = h_true,
|
.isValidRegular = h_true,
|
||||||
.isValidCF = h_true,
|
.isValidCF = h_true,
|
||||||
.desugar = desugar_end,
|
.desugar = desugar_end,
|
||||||
|
.compile_to_rvm = end_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_end_p() {
|
HParser* h_end_p() {
|
||||||
return h_end_p__m(&system_allocator);
|
return h_end_p__m(&system_allocator);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_end_p__m(HAllocator* mm__) {
|
HParser* h_end_p__m(HAllocator* mm__) {
|
||||||
return h_new_parser(mm__, &end_vt, NULL);
|
return h_new_parser(mm__, &end_vt, NULL);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,21 +8,23 @@ static HParseResult* parse_epsilon(void* env, HParseState* state) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool epsilon_ctrvm(HRVMProg *prog, void* env) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable epsilon_vt = {
|
static const HParserVtable epsilon_vt = {
|
||||||
.parse = parse_epsilon,
|
.parse = parse_epsilon,
|
||||||
.isValidRegular = h_true,
|
.isValidRegular = h_true,
|
||||||
.isValidCF = h_true,
|
.isValidCF = h_true,
|
||||||
.desugar = desugar_epsilon,
|
.desugar = desugar_epsilon,
|
||||||
|
.compile_to_rvm = epsilon_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
static HParser epsilon_p = {
|
HParser* h_epsilon_p() {
|
||||||
.vtable = &epsilon_vt,
|
return h_epsilon_p__m(&system_allocator);
|
||||||
.env = NULL
|
|
||||||
};
|
|
||||||
|
|
||||||
const HParser* h_epsilon_p() {
|
|
||||||
return &epsilon_p;
|
|
||||||
}
|
}
|
||||||
const HParser* h_epsilon_p__m(HAllocator* mm__) {
|
HParser* h_epsilon_p__m(HAllocator* mm__) {
|
||||||
return &epsilon_p;
|
HParser *epsilon_p = h_new(HParser, 1);
|
||||||
|
epsilon_p->vtable = &epsilon_vt;
|
||||||
|
return epsilon_p;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include <assert.h>
|
||||||
#include "parser_internal.h"
|
#include "parser_internal.h"
|
||||||
|
|
||||||
static HParseResult* parse_ignore(void* env, HParseState* state) {
|
static HParseResult* parse_ignore(void* env, HParseState* state) {
|
||||||
|
|
@ -25,16 +26,30 @@ static HCFChoice* desugar_ignore(HAllocator *mm__, void *env) {
|
||||||
return (h_desugar(mm__, p));
|
return (h_desugar(mm__, p));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool h_svm_action_pop(HArena *arena, HSVMContext *ctx, void* arg) {
|
||||||
|
assert(ctx->stack_count > 0);
|
||||||
|
ctx->stack_count--;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ignore_ctrvm(HRVMProg *prog, void *env) {
|
||||||
|
HParser *p = (HParser*)env;
|
||||||
|
h_compile_regex(prog, p->env);
|
||||||
|
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_pop, NULL));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable ignore_vt = {
|
static const HParserVtable ignore_vt = {
|
||||||
.parse = parse_ignore,
|
.parse = parse_ignore,
|
||||||
.isValidRegular = ignore_isValidRegular,
|
.isValidRegular = ignore_isValidRegular,
|
||||||
.isValidCF = ignore_isValidCF,
|
.isValidCF = ignore_isValidCF,
|
||||||
.desugar = desugar_ignore,
|
.desugar = desugar_ignore,
|
||||||
|
.compile_to_rvm = ignore_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_ignore(const HParser* p) {
|
HParser* h_ignore(const HParser* p) {
|
||||||
return h_ignore__m(&system_allocator, p);
|
return h_ignore__m(&system_allocator, p);
|
||||||
}
|
}
|
||||||
const HParser* h_ignore__m(HAllocator* mm__, const HParser* p) {
|
HParser* h_ignore__m(HAllocator* mm__, const HParser* p) {
|
||||||
return h_new_parser(mm__, &ignore_vt, (void *)p);
|
return h_new_parser(mm__, &ignore_vt, (void *)p);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include <assert.h>
|
||||||
#include "parser_internal.h"
|
#include "parser_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -5,7 +6,7 @@
|
||||||
// general case: parse sequence, pick one result
|
// general case: parse sequence, pick one result
|
||||||
//
|
//
|
||||||
|
|
||||||
typedef struct {
|
typedef struct HIgnoreSeq_ {
|
||||||
const HParser **parsers;
|
const HParser **parsers;
|
||||||
size_t len; // how many parsers in 'ps'
|
size_t len; // how many parsers in 'ps'
|
||||||
size_t which; // whose result to return
|
size_t which; // whose result to return
|
||||||
|
|
@ -61,11 +62,40 @@ static bool is_isValidCF(void *env) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool h_svm_action_ignoreseq(HArena *arena, HSVMContext *ctx, void* env) {
|
||||||
|
HIgnoreSeq *seq = (HIgnoreSeq*)env;
|
||||||
|
HParsedToken* save;
|
||||||
|
// We can assume that each subitem generated at most one item on the
|
||||||
|
// stack.
|
||||||
|
assert(seq->len >= 1);
|
||||||
|
for (int i = seq->len - 1; i>=0; i--) {
|
||||||
|
if (i == (int)seq->which && ctx->stack[ctx->stack_count]->token_type != TT_MARK)
|
||||||
|
save = ctx->stack[ctx->stack_count-1];
|
||||||
|
// skip over everything up to and including the mark.
|
||||||
|
while (ctx->stack[--ctx->stack_count]->token_type != TT_MARK)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
ctx->stack[ctx->stack_count++] = save;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_ctrvm(HRVMProg *prog, void* env) {
|
||||||
|
HIgnoreSeq *seq = (HIgnoreSeq*)env;
|
||||||
|
for (size_t i=0; i<seq->len; ++i) {
|
||||||
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||||
|
if (!h_compile_regex(prog, seq->parsers[i]->env))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ignoreseq, env));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable ignoreseq_vt = {
|
static const HParserVtable ignoreseq_vt = {
|
||||||
.parse = parse_ignoreseq,
|
.parse = parse_ignoreseq,
|
||||||
.isValidRegular = is_isValidRegular,
|
.isValidRegular = is_isValidRegular,
|
||||||
.isValidCF = is_isValidCF,
|
.isValidCF = is_isValidCF,
|
||||||
.desugar = desugar_ignoreseq,
|
.desugar = desugar_ignoreseq,
|
||||||
|
.compile_to_rvm = is_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -73,7 +103,7 @@ static const HParserVtable ignoreseq_vt = {
|
||||||
// API frontends
|
// API frontends
|
||||||
//
|
//
|
||||||
|
|
||||||
static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) {
|
static HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) {
|
||||||
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
|
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
|
||||||
seq->parsers = h_new(const HParser*, 2);
|
seq->parsers = h_new(const HParser*, 2);
|
||||||
seq->parsers[0] = p;
|
seq->parsers[0] = p;
|
||||||
|
|
@ -84,25 +114,25 @@ static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const H
|
||||||
return h_new_parser(mm__, &ignoreseq_vt, seq);
|
return h_new_parser(mm__, &ignoreseq_vt, seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_left(const HParser* p, const HParser* q) {
|
HParser* h_left(const HParser* p, const HParser* q) {
|
||||||
return h_leftright__m(&system_allocator, p, q, 0);
|
return h_leftright__m(&system_allocator, p, q, 0);
|
||||||
}
|
}
|
||||||
const HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) {
|
HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) {
|
||||||
return h_leftright__m(mm__, p, q, 0);
|
return h_leftright__m(mm__, p, q, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_right(const HParser* p, const HParser* q) {
|
HParser* h_right(const HParser* p, const HParser* q) {
|
||||||
return h_leftright__m(&system_allocator, p, q, 1);
|
return h_leftright__m(&system_allocator, p, q, 1);
|
||||||
}
|
}
|
||||||
const HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) {
|
HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) {
|
||||||
return h_leftright__m(mm__, p, q, 1);
|
return h_leftright__m(mm__, p, q, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) {
|
HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) {
|
||||||
return h_middle__m(&system_allocator, p, x, q);
|
return h_middle__m(&system_allocator, p, x, q);
|
||||||
}
|
}
|
||||||
const HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) {
|
HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) {
|
||||||
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
|
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
|
||||||
seq->parsers = h_new(const HParser*, 3);
|
seq->parsers = h_new(const HParser*, 3);
|
||||||
seq->parsers[0] = p;
|
seq->parsers[0] = p;
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ static const HParserVtable indirect_vt = {
|
||||||
.isValidRegular = h_false,
|
.isValidRegular = h_false,
|
||||||
.isValidCF = indirect_isValidCF,
|
.isValidCF = indirect_isValidCF,
|
||||||
.desugar = desugar_indirect,
|
.desugar = desugar_indirect,
|
||||||
|
.compile_to_rvm = h_not_regular,
|
||||||
};
|
};
|
||||||
|
|
||||||
void h_bind_indirect(HParser* indirect, const HParser* inner) {
|
void h_bind_indirect(HParser* indirect, const HParser* inner) {
|
||||||
|
|
|
||||||
|
|
@ -121,17 +121,39 @@ static HCFChoice* desugar_int_range(HAllocator *mm__, void *env) {
|
||||||
return gen_int_range(mm__, r->lower, r->upper, bytes);
|
return gen_int_range(mm__, r->lower, r->upper, bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool h_svm_action_validate_int_range(HArena *arena, HSVMContext *ctx, void* env) {
|
||||||
|
HRange *r_env = (HRange*)env;
|
||||||
|
HParsedToken *head = ctx->stack[ctx->stack_count-1];
|
||||||
|
switch (head-> token_type) {
|
||||||
|
case TT_SINT:
|
||||||
|
return head->sint >= r_env->lower && head->sint <= r_env->upper;
|
||||||
|
case TT_UINT:
|
||||||
|
return head->uint >= (uint64_t)r_env->lower && head->uint <= (uint64_t)r_env->upper;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ir_ctrvm(HRVMProg *prog, void *env) {
|
||||||
|
HRange *r_env = (HRange*)env;
|
||||||
|
|
||||||
|
h_compile_regex(prog, r_env->p);
|
||||||
|
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_validate_int_range, env));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable int_range_vt = {
|
static const HParserVtable int_range_vt = {
|
||||||
.parse = parse_int_range,
|
.parse = parse_int_range,
|
||||||
.isValidRegular = h_true,
|
.isValidRegular = h_true,
|
||||||
.isValidCF = h_true,
|
.isValidCF = h_true,
|
||||||
.desugar = desugar_int_range,
|
.desugar = desugar_int_range,
|
||||||
|
.compile_to_rvm = ir_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
|
HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
|
||||||
return h_int_range__m(&system_allocator, p, lower, upper);
|
return h_int_range__m(&system_allocator, p, lower, upper);
|
||||||
}
|
}
|
||||||
const HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) {
|
HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) {
|
||||||
// p must be an integer parser, which means it's using parse_bits
|
// p must be an integer parser, which means it's using parse_bits
|
||||||
// TODO: re-add this check
|
// TODO: re-add this check
|
||||||
//assert_message(p->vtable == &bits_vt, "int_range requires an integer parser");
|
//assert_message(p->vtable == &bits_vt, "int_range requires an integer parser");
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ static HParseResult *parse_many(void* env, HParseState *state) {
|
||||||
HParsedToken *res = a_new(HParsedToken, 1);
|
HParsedToken *res = a_new(HParsedToken, 1);
|
||||||
res->token_type = TT_SEQUENCE;
|
res->token_type = TT_SEQUENCE;
|
||||||
res->seq = seq;
|
res->seq = seq;
|
||||||
return make_result(state, res);
|
return make_result(state->arena, res);
|
||||||
err0:
|
err0:
|
||||||
if (count >= env_->count) {
|
if (count >= env_->count) {
|
||||||
state->input_stream = bak;
|
state->input_stream = bak;
|
||||||
|
|
@ -112,17 +112,40 @@ static HCFChoice* desugar_many(HAllocator *mm__, void *env) {
|
||||||
return ma;
|
return ma;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool many_ctrvm(HRVMProg *prog, void *env) {
|
||||||
|
HRepeat *repeat = (HRepeat*)env;
|
||||||
|
// FIXME: Implement clear_to_mark
|
||||||
|
uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL);
|
||||||
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||||
|
// TODO: implement min and max properly. Right now, it's always min==0, max==inf
|
||||||
|
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
||||||
|
if (!h_compile_regex(prog, repeat->p))
|
||||||
|
return false;
|
||||||
|
if (repeat->sep != NULL) {
|
||||||
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||||
|
if (!h_compile_regex(prog, repeat->sep))
|
||||||
|
return false;
|
||||||
|
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
|
||||||
|
}
|
||||||
|
h_rvm_insert_insn(prog, RVM_GOTO, insn);
|
||||||
|
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
|
||||||
|
|
||||||
|
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable many_vt = {
|
static const HParserVtable many_vt = {
|
||||||
.parse = parse_many,
|
.parse = parse_many,
|
||||||
.isValidRegular = many_isValidRegular,
|
.isValidRegular = many_isValidRegular,
|
||||||
.isValidCF = many_isValidCF,
|
.isValidCF = many_isValidCF,
|
||||||
.desugar = desugar_many,
|
.desugar = desugar_many,
|
||||||
|
.compile_to_rvm = many_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_many(const HParser* p) {
|
HParser* h_many(const HParser* p) {
|
||||||
return h_many__m(&system_allocator, p);
|
return h_many__m(&system_allocator, p);
|
||||||
}
|
}
|
||||||
const HParser* h_many__m(HAllocator* mm__, const HParser* p) {
|
HParser* h_many__m(HAllocator* mm__, const HParser* p) {
|
||||||
HRepeat *env = h_new(HRepeat, 1);
|
HRepeat *env = h_new(HRepeat, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->sep = h_epsilon_p__m(mm__);
|
env->sep = h_epsilon_p__m(mm__);
|
||||||
|
|
@ -131,10 +154,10 @@ const HParser* h_many__m(HAllocator* mm__, const HParser* p) {
|
||||||
return h_new_parser(mm__, &many_vt, env);
|
return h_new_parser(mm__, &many_vt, env);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_many1(const HParser* p) {
|
HParser* h_many1(const HParser* p) {
|
||||||
return h_many1__m(&system_allocator, p);
|
return h_many1__m(&system_allocator, p);
|
||||||
}
|
}
|
||||||
const HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
|
HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
|
||||||
HRepeat *env = h_new(HRepeat, 1);
|
HRepeat *env = h_new(HRepeat, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->sep = h_epsilon_p__m(mm__);
|
env->sep = h_epsilon_p__m(mm__);
|
||||||
|
|
@ -143,10 +166,10 @@ const HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
|
||||||
return h_new_parser(mm__, &many_vt, env);
|
return h_new_parser(mm__, &many_vt, env);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_repeat_n(const HParser* p, const size_t n) {
|
HParser* h_repeat_n(const HParser* p, const size_t n) {
|
||||||
return h_repeat_n__m(&system_allocator, p, n);
|
return h_repeat_n__m(&system_allocator, p, n);
|
||||||
}
|
}
|
||||||
const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
|
HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
|
||||||
HRepeat *env = h_new(HRepeat, 1);
|
HRepeat *env = h_new(HRepeat, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->sep = h_epsilon_p__m(mm__);
|
env->sep = h_epsilon_p__m(mm__);
|
||||||
|
|
@ -155,10 +178,10 @@ const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n)
|
||||||
return h_new_parser(mm__, &many_vt, env);
|
return h_new_parser(mm__, &many_vt, env);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_sepBy(const HParser* p, const HParser* sep) {
|
HParser* h_sepBy(const HParser* p, const HParser* sep) {
|
||||||
return h_sepBy__m(&system_allocator, p, sep);
|
return h_sepBy__m(&system_allocator, p, sep);
|
||||||
}
|
}
|
||||||
const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
||||||
HRepeat *env = h_new(HRepeat, 1);
|
HRepeat *env = h_new(HRepeat, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->sep = sep;
|
env->sep = sep;
|
||||||
|
|
@ -167,10 +190,10 @@ const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep
|
||||||
return h_new_parser(mm__, &many_vt, env);
|
return h_new_parser(mm__, &many_vt, env);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_sepBy1(const HParser* p, const HParser* sep) {
|
HParser* h_sepBy1(const HParser* p, const HParser* sep) {
|
||||||
return h_sepBy1__m(&system_allocator, p, sep);
|
return h_sepBy1__m(&system_allocator, p, sep);
|
||||||
}
|
}
|
||||||
const HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
||||||
HRepeat *env = h_new(HRepeat, 1);
|
HRepeat *env = h_new(HRepeat, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->sep = sep;
|
env->sep = sep;
|
||||||
|
|
@ -213,10 +236,10 @@ static const HParserVtable length_value_vt = {
|
||||||
.desugar = desugar_length_value,
|
.desugar = desugar_length_value,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_length_value(const HParser* length, const HParser* value) {
|
HParser* h_length_value(const HParser* length, const HParser* value) {
|
||||||
return h_length_value__m(&system_allocator, length, value);
|
return h_length_value__m(&system_allocator, length, value);
|
||||||
}
|
}
|
||||||
const HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
|
HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
|
||||||
HLenVal *env = h_new(HLenVal, 1);
|
HLenVal *env = h_new(HLenVal, 1);
|
||||||
env->length = length;
|
env->length = length;
|
||||||
env->value = value;
|
env->value = value;
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ static HParseResult* parse_not(void* env, HParseState* state) {
|
||||||
return NULL;
|
return NULL;
|
||||||
else {
|
else {
|
||||||
state->input_stream = bak;
|
state->input_stream = bak;
|
||||||
return make_result(state, NULL);
|
return make_result(state->arena, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -20,11 +20,12 @@ static const HParserVtable not_vt = {
|
||||||
.isValidRegular = h_false, /* see and.c for why */
|
.isValidRegular = h_false, /* see and.c for why */
|
||||||
.isValidCF = h_false, /* also see and.c for why */
|
.isValidCF = h_false, /* also see and.c for why */
|
||||||
.desugar = desugar_not,
|
.desugar = desugar_not,
|
||||||
|
.compile_to_rvm = h_not_regular, // Is actually regular, but the generation step is currently unable to handle it. TODO: fix this.
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_not(const HParser* p) {
|
HParser* h_not(const HParser* p) {
|
||||||
return h_not__m(&system_allocator, p);
|
return h_not__m(&system_allocator, p);
|
||||||
}
|
}
|
||||||
const HParser* h_not__m(HAllocator* mm__, const HParser* p) {
|
HParser* h_not__m(HAllocator* mm__, const HParser* p) {
|
||||||
return h_new_parser(mm__, ¬_vt, (void *)p);
|
return h_new_parser(mm__, ¬_vt, (void *)p);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
#include "parser_internal.h"
|
#include "parser_internal.h"
|
||||||
|
|
||||||
|
|
||||||
static HParseResult* parse_nothing() {
|
static HParseResult* parse_nothing() {
|
||||||
// not a mistake, this parser always fails
|
// not a mistake, this parser always fails
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
@ -15,16 +14,23 @@ static HCFChoice *desugar_nothing(HAllocator *mm__, void *env) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool nothing_ctrvm(HRVMProg *prog, void* env) {
|
||||||
|
h_rvm_insert_insn(prog, RVM_MATCH, 0x0000);
|
||||||
|
h_rvm_insert_insn(prog, RVM_MATCH, 0xFFFF);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable nothing_vt = {
|
static const HParserVtable nothing_vt = {
|
||||||
.parse = parse_nothing,
|
.parse = parse_nothing,
|
||||||
.isValidRegular = h_true,
|
.isValidRegular = h_true,
|
||||||
.isValidCF = h_true,
|
.isValidCF = h_true,
|
||||||
.desugar = desugar_nothing,
|
.desugar = desugar_nothing,
|
||||||
|
.compile_to_rvm = nothing_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_nothing_p() {
|
HParser* h_nothing_p() {
|
||||||
return h_nothing_p__m(&system_allocator);
|
return h_nothing_p__m(&system_allocator);
|
||||||
}
|
}
|
||||||
const HParser* h_nothing_p__m(HAllocator* mm__) {
|
HParser* h_nothing_p__m(HAllocator* mm__) {
|
||||||
return h_new_parser(mm__, ¬hing_vt, NULL);
|
return h_new_parser(mm__, ¬hing_vt, NULL);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include <assert.h>
|
||||||
#include "parser_internal.h"
|
#include "parser_internal.h"
|
||||||
|
|
||||||
static HParseResult* parse_optional(void* env, HParseState* state) {
|
static HParseResult* parse_optional(void* env, HParseState* state) {
|
||||||
|
|
@ -8,7 +9,7 @@ static HParseResult* parse_optional(void* env, HParseState* state) {
|
||||||
state->input_stream = bak;
|
state->input_stream = bak;
|
||||||
HParsedToken *ast = a_new(HParsedToken, 1);
|
HParsedToken *ast = a_new(HParsedToken, 1);
|
||||||
ast->token_type = TT_NONE;
|
ast->token_type = TT_NONE;
|
||||||
return make_result(state, ast);
|
return make_result(state->arena, ast);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool opt_isValidRegular(void *env) {
|
static bool opt_isValidRegular(void *env) {
|
||||||
|
|
@ -26,17 +27,40 @@ static HCFChoice* desugar_optional(HAllocator *mm__, void *env) {
|
||||||
return h_desugar(mm__, p);
|
return h_desugar(mm__, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool h_svm_action_optional(HArena *arena, HSVMContext *ctx, void *env) {
|
||||||
|
if (ctx->stack[ctx->stack_count-1]->token_type == TT_MARK) {
|
||||||
|
ctx->stack[ctx->stack_count-1]->token_type = TT_NONE;
|
||||||
|
} else {
|
||||||
|
ctx->stack_count--;
|
||||||
|
assert(ctx->stack[ctx->stack_count-1]->token_type == TT_MARK);
|
||||||
|
ctx->stack[ctx->stack_count-1] = ctx->stack[ctx->stack_count];
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool opt_ctrvm(HRVMProg *prog, void* env) {
|
||||||
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||||
|
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
||||||
|
HParser *p = (HParser*) env;
|
||||||
|
if (!h_compile_regex(prog, p->env))
|
||||||
|
return false;
|
||||||
|
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
|
||||||
|
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_optional, NULL));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable optional_vt = {
|
static const HParserVtable optional_vt = {
|
||||||
.parse = parse_optional,
|
.parse = parse_optional,
|
||||||
.isValidRegular = opt_isValidRegular,
|
.isValidRegular = opt_isValidRegular,
|
||||||
.isValidCF = opt_isValidCF,
|
.isValidCF = opt_isValidCF,
|
||||||
.desugar = desugar_optional,
|
.desugar = desugar_optional,
|
||||||
|
.compile_to_rvm = opt_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_optional(const HParser* p) {
|
HParser* h_optional(const HParser* p) {
|
||||||
return h_optional__m(&system_allocator, p);
|
return h_optional__m(&system_allocator, p);
|
||||||
}
|
}
|
||||||
const HParser* h_optional__m(HAllocator* mm__, const HParser* p) {
|
HParser* h_optional__m(HAllocator* mm__, const HParser* p) {
|
||||||
// TODO: re-add this
|
// TODO: re-add this
|
||||||
//assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round.");
|
//assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round.");
|
||||||
return h_new_parser(mm__, &optional_vt, (void *)p);
|
return h_new_parser(mm__, &optional_vt, (void *)p);
|
||||||
|
|
|
||||||
|
|
@ -2,15 +2,16 @@
|
||||||
#define HAMMER_PARSE_INTERNAL__H
|
#define HAMMER_PARSE_INTERNAL__H
|
||||||
#include "../hammer.h"
|
#include "../hammer.h"
|
||||||
#include "../internal.h"
|
#include "../internal.h"
|
||||||
|
#include "../backends/regex.h"
|
||||||
|
|
||||||
#define a_new_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count)))
|
#define a_new_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count)))
|
||||||
#define a_new(typ, count) a_new_(state->arena, typ, count)
|
#define a_new(typ, count) a_new_(state->arena, typ, count)
|
||||||
// we can create a_new0 if necessary. It would allocate some memory and immediately zero it out.
|
// we can create a_new0 if necessary. It would allocate some memory and immediately zero it out.
|
||||||
|
|
||||||
static inline HParseResult* make_result(HParseState *state, HParsedToken *tok) {
|
static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) {
|
||||||
HParseResult *ret = a_new(HParseResult, 1);
|
HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult));
|
||||||
ret->ast = tok;
|
ret->ast = tok;
|
||||||
ret->arena = state->arena;
|
ret->arena = arena;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -23,9 +24,6 @@ static inline size_t token_length(HParseResult *pr) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool h_true(void *env) { return true; }
|
|
||||||
static inline bool h_false(void *env) { return false; }
|
|
||||||
|
|
||||||
/* Epsilon rules happen during desugaring. This handles them. */
|
/* Epsilon rules happen during desugaring. This handles them. */
|
||||||
static inline HCFChoice* desugar_epsilon(HAllocator *mm__, void *env) {
|
static inline HCFChoice* desugar_epsilon(HAllocator *mm__, void *env) {
|
||||||
static HCFChoice *res_seq_l[] = {NULL};
|
static HCFChoice *res_seq_l[] = {NULL};
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ static HParseResult* parse_sequence(void *env, HParseState *state) {
|
||||||
}
|
}
|
||||||
HParsedToken *tok = a_new(HParsedToken, 1);
|
HParsedToken *tok = a_new(HParsedToken, 1);
|
||||||
tok->token_type = TT_SEQUENCE; tok->seq = seq;
|
tok->token_type = TT_SEQUENCE; tok->seq = seq;
|
||||||
return make_result(state, tok);
|
return make_result(state->arena, tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool sequence_isValidRegular(void *env) {
|
static bool sequence_isValidRegular(void *env) {
|
||||||
|
|
@ -59,34 +59,46 @@ static HCFChoice* desugar_sequence(HAllocator *mm__, void *env) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool sequence_ctrvm(HRVMProg *prog, void *env) {
|
||||||
|
HSequence *s = (HSequence*)env;
|
||||||
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||||
|
for (size_t i=0; i<s->len; ++i) {
|
||||||
|
if (!s->p_array[i]->vtable->compile_to_rvm(prog, s->p_array[i]->env))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable sequence_vt = {
|
static const HParserVtable sequence_vt = {
|
||||||
.parse = parse_sequence,
|
.parse = parse_sequence,
|
||||||
.isValidRegular = sequence_isValidRegular,
|
.isValidRegular = sequence_isValidRegular,
|
||||||
.isValidCF = sequence_isValidCF,
|
.isValidCF = sequence_isValidCF,
|
||||||
.desugar = desugar_sequence,
|
.desugar = desugar_sequence,
|
||||||
|
.compile_to_rvm = sequence_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_sequence(const HParser* p, ...) {
|
HParser* h_sequence(const HParser* p, ...) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
va_start(ap, p);
|
va_start(ap, p);
|
||||||
const HParser* ret = h_sequence__mv(&system_allocator, p, ap);
|
HParser* ret = h_sequence__mv(&system_allocator, p, ap);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) {
|
HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
va_start(ap, p);
|
va_start(ap, p);
|
||||||
const HParser* ret = h_sequence__mv(mm__, p, ap);
|
HParser* ret = h_sequence__mv(mm__, p, ap);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_sequence__v(const HParser* p, va_list ap) {
|
HParser* h_sequence__v(const HParser* p, va_list ap) {
|
||||||
return h_sequence__mv(&system_allocator, p, ap);
|
return h_sequence__mv(&system_allocator, p, ap);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) {
|
HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
const HParser *arg;
|
const HParser *arg;
|
||||||
|
|
|
||||||
|
|
@ -15,9 +15,10 @@ static HParseResult* parse_token(void *env, HParseState *state) {
|
||||||
}
|
}
|
||||||
HParsedToken *tok = a_new(HParsedToken, 1);
|
HParsedToken *tok = a_new(HParsedToken, 1);
|
||||||
tok->token_type = TT_BYTES; tok->bytes.token = t->str; tok->bytes.len = t->len;
|
tok->token_type = TT_BYTES; tok->bytes.token = t->str; tok->bytes.len = t->len;
|
||||||
return make_result(state, tok);
|
return make_result(state->arena, tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static HCFChoice* desugar_token(HAllocator *mm__, void *env) {
|
static HCFChoice* desugar_token(HAllocator *mm__, void *env) {
|
||||||
HToken *tok = (HToken*)env;
|
HToken *tok = (HToken*)env;
|
||||||
HCFSequence *seq = h_new(HCFSequence, 1);
|
HCFSequence *seq = h_new(HCFSequence, 1);
|
||||||
|
|
@ -37,17 +38,29 @@ static HCFChoice* desugar_token(HAllocator *mm__, void *env) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool token_ctrvm(HRVMProg *prog, void *env) {
|
||||||
|
HToken *t = (HToken*)env;
|
||||||
|
h_rvm_insert_insn(prog, RVM_PUSH, 0);
|
||||||
|
for (int i=0; i<t->len; ++i) {
|
||||||
|
h_rvm_insert_insn(prog, RVM_MATCH, t->str[i] & t->str[i] << 8);
|
||||||
|
h_rvm_insert_insn(prog, RVM_STEP, 0);
|
||||||
|
}
|
||||||
|
h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
const HParserVtable token_vt = {
|
const HParserVtable token_vt = {
|
||||||
.parse = parse_token,
|
.parse = parse_token,
|
||||||
.isValidRegular = h_true,
|
.isValidRegular = h_true,
|
||||||
.isValidCF = h_true,
|
.isValidCF = h_true,
|
||||||
.desugar = desugar_token,
|
.desugar = desugar_token,
|
||||||
|
.compile_to_rvm = token_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_token(const uint8_t *str, const size_t len) {
|
HParser* h_token(const uint8_t *str, const size_t len) {
|
||||||
return h_token__m(&system_allocator, str, len);
|
return h_token__m(&system_allocator, str, len);
|
||||||
}
|
}
|
||||||
const HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) {
|
HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) {
|
||||||
HToken *t = h_new(HToken, 1);
|
HToken *t = h_new(HToken, 1);
|
||||||
t->str = (uint8_t*)str, t->len = len;
|
t->str = (uint8_t*)str, t->len = len;
|
||||||
return h_new_parser(mm__, &token_vt, t);
|
return h_new_parser(mm__, &token_vt, t);
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ static const HParserVtable unimplemented_vt = {
|
||||||
.isValidRegular = h_false,
|
.isValidRegular = h_false,
|
||||||
.isValidCF = h_false,
|
.isValidCF = h_false,
|
||||||
.desugar = desugar_unimplemented,
|
.desugar = desugar_unimplemented,
|
||||||
|
.compile_to_rvm = h_not_regular,
|
||||||
};
|
};
|
||||||
|
|
||||||
static HParser unimplemented = {
|
static HParser unimplemented = {
|
||||||
|
|
|
||||||
|
|
@ -49,16 +49,32 @@ static bool ws_isValidCF(void *env) {
|
||||||
return p->vtable->isValidCF(p->env);
|
return p->vtable->isValidCF(p->env);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool ws_ctrvm(HRVMProg *prog, void *env) {
|
||||||
|
HParser *p = (HParser*)env;
|
||||||
|
uint16_t start = h_rvm_get_ip(prog);
|
||||||
|
uint16_t next;
|
||||||
|
const char SPACE_CHRS[6] = {' ', '\f', '\n', '\r', '\t', '\v'};
|
||||||
|
|
||||||
|
for (int i = 0; i < 6; i++) {
|
||||||
|
next = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
||||||
|
h_rvm_insert_insn(prog, RVM_MATCH, (SPACE_CHRS[i] << 8) | (SPACE_CHRS[i]));
|
||||||
|
h_rvm_insert_insn(prog, RVM_GOTO, start);
|
||||||
|
h_rvm_patch_arg(prog, next, h_rvm_get_ip(prog));
|
||||||
|
}
|
||||||
|
return h_compile_regex(prog, p->env);
|
||||||
|
}
|
||||||
|
|
||||||
static const HParserVtable whitespace_vt = {
|
static const HParserVtable whitespace_vt = {
|
||||||
.parse = parse_whitespace,
|
.parse = parse_whitespace,
|
||||||
.isValidRegular = ws_isValidRegular,
|
.isValidRegular = ws_isValidRegular,
|
||||||
.isValidCF = ws_isValidCF,
|
.isValidCF = ws_isValidCF,
|
||||||
.desugar = desugar_whitespace,
|
.desugar = desugar_whitespace,
|
||||||
|
.compile_to_rvm = ws_ctrvm,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_whitespace(const HParser* p) {
|
HParser* h_whitespace(const HParser* p) {
|
||||||
return h_whitespace__m(&system_allocator, p);
|
return h_whitespace__m(&system_allocator, p);
|
||||||
}
|
}
|
||||||
const HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) {
|
HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) {
|
||||||
return h_new_parser(mm__, &whitespace_vt, (void *)p);
|
return h_new_parser(mm__, &whitespace_vt, (void *)p);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,14 +39,15 @@ static HCFChoice* desugar_xor(HAllocator *mm__, void *env) {
|
||||||
static const HParserVtable xor_vt = {
|
static const HParserVtable xor_vt = {
|
||||||
.parse = parse_xor,
|
.parse = parse_xor,
|
||||||
.isValidRegular = h_false,
|
.isValidRegular = h_false,
|
||||||
.isValidCF = h_false,
|
.isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF?
|
||||||
.desugar = desugar_xor,
|
.desugar = desugar_xor,
|
||||||
|
.compile_to_rvm = h_not_regular,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_xor(const HParser* p1, const HParser* p2) {
|
HParser* h_xor(const HParser* p1, const HParser* p2) {
|
||||||
return h_xor__m(&system_allocator, p1, p2);
|
return h_xor__m(&system_allocator, p1, p2);
|
||||||
}
|
}
|
||||||
const HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
||||||
HTwoParsers *env = h_new(HTwoParsers, 1);
|
HTwoParsers *env = h_new(HTwoParsers, 1);
|
||||||
env->p1 = p1;
|
env->p1 = p1;
|
||||||
env->p2 = p2;
|
env->p2 = p2;
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,27 @@
|
||||||
|
#include <string.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
static void* system_alloc(HAllocator *allocator, size_t size) {
|
static void* system_alloc(HAllocator *allocator, size_t size) {
|
||||||
return malloc(size);
|
|
||||||
|
void* ptr = calloc(size + sizeof(size_t), 1);
|
||||||
|
*(size_t*)ptr = size;
|
||||||
|
return ptr + sizeof(size_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) {
|
static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) {
|
||||||
return realloc(ptr, size);
|
if (ptr == NULL)
|
||||||
|
return system_alloc(allocator, size);
|
||||||
|
ptr = realloc(ptr - sizeof(size_t), size + sizeof(size_t));
|
||||||
|
size_t old_size = *(size_t*)ptr;
|
||||||
|
*(size_t*)ptr = size;
|
||||||
|
if (size > old_size)
|
||||||
|
memset(ptr+sizeof(size_t)+old_size, 0, size - old_size);
|
||||||
|
return ptr + sizeof(size_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void system_free(HAllocator *allocator, void* ptr) {
|
static void system_free(HAllocator *allocator, void* ptr) {
|
||||||
free(ptr);
|
free(ptr - sizeof(size_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
HAllocator system_allocator = {
|
HAllocator system_allocator = {
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ HParserTestcase testcases[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static void test_benchmark_1() {
|
static void test_benchmark_1() {
|
||||||
const HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
|
HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
|
||||||
|
|
||||||
HBenchmarkResults *res = h_benchmark(parser, testcases);
|
HBenchmarkResults *res = h_benchmark(parser, testcases);
|
||||||
h_benchmark_report(stderr, res);
|
h_benchmark_report(stderr, res);
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,28 @@
|
||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
#define g_check_regular(lang) do { \
|
||||||
|
if (!lang->isValidRegular(lang->env)) { \
|
||||||
|
g_test_message("Language is not regular"); \
|
||||||
|
g_test_fail(); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define g_check_contextfree(lang) do { \
|
||||||
|
if (!lang->isValidCF(lang->env)) { \
|
||||||
|
g_test_message("Language is not context-free"); \
|
||||||
|
g_test_fail(); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define g_check_compilable(lang, backend, params) do { \
|
||||||
|
if (!h_compile(lang, backend, params)) { \
|
||||||
|
g_test_message("Language is not %s(%s)", #backend, params); \
|
||||||
|
g_test_fail(); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
|
||||||
// TODO: replace uses of this with g_check_parse_failed
|
// TODO: replace uses of this with g_check_parse_failed
|
||||||
#define g_check_failed(res) do { \
|
#define g_check_failed(res) do { \
|
||||||
const HParseResult *result = (res); \
|
const HParseResult *result = (res); \
|
||||||
|
|
@ -77,7 +99,7 @@
|
||||||
} else { \
|
} else { \
|
||||||
char* cres = h_write_result_unamb(res->ast); \
|
char* cres = h_write_result_unamb(res->ast); \
|
||||||
g_check_string(cres, ==, result); \
|
g_check_string(cres, ==, result); \
|
||||||
g_free(cres); \
|
system_allocator.free(&system_allocator, cres); \
|
||||||
HArenaStats stats; \
|
HArenaStats stats; \
|
||||||
h_allocator_stats(res->arena, &stats); \
|
h_allocator_stats(res->arena, &stats); \
|
||||||
g_test_message("Parse used %zd bytes, wasted %zd bytes. " \
|
g_test_message("Parse used %zd bytes, wasted %zd bytes. " \
|
||||||
|
|
@ -149,4 +171,5 @@
|
||||||
#define g_check_cmpdouble(n1, op, n2) g_check_inttype("%g", double, n1, op, n2)
|
#define g_check_cmpdouble(n1, op, n2) g_check_inttype("%g", double, n1, op, n2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif // #ifndef HAMMER_TEST_SUITE__H
|
#endif // #ifndef HAMMER_TEST_SUITE__H
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue