Merge remote branch 'upstream/master'
Conflicts: src/hammer.h
This commit is contained in:
commit
f817211446
66 changed files with 5165 additions and 1679 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -4,6 +4,9 @@
|
|||
src/test_suite
|
||||
lib/hush
|
||||
examples/dns
|
||||
examples/base64
|
||||
examples/base64_sem1
|
||||
examples/base64_sem2
|
||||
TAGS
|
||||
*.swp
|
||||
*.swo
|
||||
|
|
|
|||
52
HACKING
Normal file
52
HACKING
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
Privileged arguments
|
||||
====================
|
||||
|
||||
As a matter of convenience, there are several identifiers that
|
||||
internal macros use. Chances are that if you use these names for other
|
||||
things, you're gonna have a bad time.
|
||||
|
||||
In particular, these names, and the macros that use them, are:
|
||||
- state:
|
||||
Used by a_new and company. Should be an HParseState*
|
||||
- mm__:
|
||||
Used by h_new and h_free. Should be an HAllocator*
|
||||
|
||||
Function suffixes
|
||||
=================
|
||||
|
||||
Many functions come in several variants, to handle receiving optional
|
||||
parameters or parameters in multiple different forms. For example,
|
||||
often, you have a global memory manager that is used for an entire
|
||||
program. In this case, you can leave off the memory manager arguments
|
||||
off, letting them be implicit instead. Further, it is often convenient
|
||||
to pass an array or va_list to a function instead of listing the
|
||||
arguments inline (eg, for wrapping a function, generating the
|
||||
arguments programattically, or writing bindings for another language.
|
||||
|
||||
Because we have found that most variants fall into a fairly small set
|
||||
of forms, and to minimize the amount of API calls that users need to
|
||||
remember, there is a consistent naming scheme for these function
|
||||
variants: the function name is followed by two underscores and a set
|
||||
of single-character "flags" indicating what optional features that
|
||||
particular variant has (in alphabetical order, of course):
|
||||
|
||||
__a: takes variadic arguments as a void*[] (not implemented yet, but will be soon.
|
||||
__m: takes a memory manager as the first argument, to override the system memory manager.
|
||||
__v: Takes the variadic argument list as a va_list
|
||||
|
||||
|
||||
Memory managers
|
||||
===============
|
||||
|
||||
If the __m function variants are used or system_allocator is
|
||||
overridden, there come some difficult questions to answer,
|
||||
particularly regarding the behavior when multiple memory managers are
|
||||
combined. As a general rule of thumb (exceptions will be explicitly
|
||||
documented), assume that
|
||||
|
||||
If you have a function f, which is passed a memory manager m and
|
||||
returns a value r, any function that uses r as a parameter must
|
||||
also be told to use m as a memory manager.
|
||||
|
||||
In other words, don't let the (memory manager) streams cross.
|
||||
|
||||
7
Makefile
7
Makefile
|
|
@ -5,6 +5,10 @@
|
|||
|
||||
SUBDIRS = src examples
|
||||
|
||||
include config.mk
|
||||
|
||||
CONFIG_VARS= INCLUDE_TESTS
|
||||
|
||||
.DEFAULT_GOAL := all
|
||||
|
||||
%:
|
||||
|
|
@ -25,3 +29,6 @@ $(foreach dir,$(SUBDIRS),$(eval $(call SUBDIR_TEMPLATE,$(dir))))
|
|||
|
||||
TAGS: $(shell find * -name "*.c")
|
||||
etags $^
|
||||
|
||||
config:
|
||||
@printf "%30s %s\n" $(foreach var,$(CONFIG_VARS),$(var) $($(var)) )
|
||||
|
|
|
|||
1
NOTES
1
NOTES
|
|
@ -35,4 +35,3 @@ what the comments say.
|
|||
|
||||
TODO: implement datastructure linearization func
|
||||
TODO: implement free func for parsers
|
||||
TODO: Remove glib dependency (i.e., GQueue and GHashtable)
|
||||
58
README.md
Normal file
58
README.md
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
Hammer is a parsing library. Like many modern parsing libraries, it provides a parser combinator interface for writing grammars as inline domain-specific languages, but Hammer also provides a variety of parsing backends. It's also bit-oriented rather than character-oriented, making it ideal for parsing binary data such as images, network packets, audio, and executables.
|
||||
|
||||
Hammer is written in C, but will provide bindings for other languages. If you don't see a language you're interested in on the list, just ask.
|
||||
|
||||
Hammer currently builds under Linux. (Windows and OSX are coming.)
|
||||
|
||||
Features
|
||||
========
|
||||
* Bit-oriented -- grammars can include single-bit flags or multi-bit constructs that span character boundaries, with no hassle
|
||||
* Thread-safe, reentrant
|
||||
* Benchmarking for parsing backends -- determine empirically which backend will be most time-efficient for your grammar
|
||||
* Parsing backends:
|
||||
* Packrat parsing
|
||||
* LL(k) (not yet implemented)
|
||||
* GLR (not yet implemented)
|
||||
* LALR(8) (not yet implemented)
|
||||
* Regular expressions (not yet implemented)
|
||||
* Language bindings: (not yet implemented)
|
||||
* C++
|
||||
* Java
|
||||
* Python
|
||||
* Ruby
|
||||
* Perl
|
||||
* Go
|
||||
* PHP
|
||||
* .NET
|
||||
|
||||
Installing
|
||||
==========
|
||||
### Prerequisites
|
||||
* make
|
||||
|
||||
### Optional Dependencies
|
||||
* pkg-config (for `make test`)
|
||||
* glib-2.0 (>= 2.29) (for `make test`)
|
||||
* glib-2.0-dev (for `make test`)
|
||||
|
||||
To install, type `make`. To run the built-in test suite, type `make test`.
|
||||
|
||||
There is not currently a `make install` target; to make Hammer available system-wide, copy `libhammer.a` to `/usr/lib/` (or `/usr/local/lib/`, or wherever ld will find it) and `hammer.h` to `/usr/include/`.
|
||||
|
||||
Usage
|
||||
=====
|
||||
Just `#include <hammer.h>` and link with `-lhammer`.
|
||||
|
||||
Examples
|
||||
========
|
||||
The `examples/` directory contains some simple examples, currently including:
|
||||
* base64
|
||||
* DNS
|
||||
|
||||
Community
|
||||
=========
|
||||
Please join us at `#hammer` on `irc.upstandinghackers.com` if you have any questions or just want to talk about parsing.
|
||||
|
||||
Contact
|
||||
=======
|
||||
You can also email us at <hammer@upstandinghackers.com>.
|
||||
3
TODO
Normal file
3
TODO
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
- Make h_action functions be called only after parse is complete.
|
||||
- Allow alternative input streams (eg, zlib, base64)
|
||||
- Bonus points if layered...
|
||||
25
common.mk
25
common.mk
|
|
@ -1,17 +1,24 @@
|
|||
CFLAGS := $(shell pkg-config --cflags glib-2.0) -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes
|
||||
LDFLAGS := $(shell pkg-config --libs glib-2.0)
|
||||
CC ?= gcc
|
||||
$(info CC=$(CC))
|
||||
# Set V=1 for verbose mode...
|
||||
V ?= 0
|
||||
CFLAGS += -DINCLUDE_TESTS $(EXTRA_CFLAGS)
|
||||
HUSH = $(TOPLEVEL)/lib/hush
|
||||
|
||||
# Check to make sure variables are properly set
|
||||
ifeq ($(TOPLEVEL),)
|
||||
$(error $$TOPLEVEL is unset)
|
||||
endif
|
||||
|
||||
include $(TOPLEVEL)/config.mk
|
||||
|
||||
TEST_CFLAGS = $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS
|
||||
TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0)
|
||||
|
||||
CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes
|
||||
LDFLAGS :=
|
||||
|
||||
CC ?= gcc
|
||||
$(info CC=$(CC))
|
||||
# Set V=1 for verbose mode...
|
||||
V ?= 0
|
||||
CFLAGS += $(EXTRA_CFLAGS)
|
||||
HUSH = $(TOPLEVEL)/lib/hush
|
||||
|
||||
|
||||
ifsilent = $(if $(findstring 0, $(V)),$(1),)
|
||||
hush = $(call ifsilent,$(HUSH) $(1))
|
||||
#.SUFFIXES:
|
||||
|
|
|
|||
1
config.mk
Normal file
1
config.mk
Normal file
|
|
@ -0,0 +1 @@
|
|||
INCLUDE_TESTS = 0
|
||||
|
|
@ -1,20 +1,41 @@
|
|||
|
||||
OUTPUTS := dns.o \
|
||||
dns
|
||||
dns \
|
||||
base64.o \
|
||||
base64 \
|
||||
base64_sem1.o \
|
||||
base64_sem1 \
|
||||
base64_sem2.o \
|
||||
base64_sem2
|
||||
|
||||
TOPLEVEL := ../
|
||||
|
||||
include ../common.mk
|
||||
CFLAGS += $(pkg-config --cflags glib-2.0)
|
||||
LDFLAGS += $(pkg-config --libs glib-2.0)
|
||||
|
||||
|
||||
all: dns
|
||||
|
||||
all: dns base64 base64_sem1 base64_sem2
|
||||
|
||||
dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||
dns: dns.o rr.o dns_common.o
|
||||
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
dns.o: ../src/hammer.h dns_common.h
|
||||
dns.o: ../src/hammer.h dns_common.h ../src/glue.h
|
||||
rr.o: ../src/hammer.h rr.h dns_common.h ../src/glue.h
|
||||
dns_common.o: ../src/hammer.h dns_common.h ../src/glue.h
|
||||
|
||||
rr.o: ../src/hammer.h rr.h dns_common.h
|
||||
base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||
base64: base64.o
|
||||
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
dns_common.o: ../src/hammer.h dns_common.h
|
||||
base64_sem1: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||
base64_sem1: base64_sem1.o
|
||||
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
base64_sem2: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||
base64_sem2: base64_sem2.o
|
||||
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
base64%.o: ../src/hammer.h ../src/glue.h
|
||||
|
|
|
|||
63
examples/base64.c
Normal file
63
examples/base64.c
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
// Example parser: Base64, syntax only.
|
||||
//
|
||||
// Demonstrates how to construct a Hammer parser that recognizes valid Base64
|
||||
// sequences.
|
||||
//
|
||||
// Note that no semantic evaluation of the sequence is performed, i.e. the
|
||||
// byte sequence being represented is not returned, or determined. See
|
||||
// base64_sem1.c and base64_sem2.c for examples how to attach appropriate
|
||||
// semantic actions to the grammar.
|
||||
|
||||
#include "../src/hammer.h"
|
||||
|
||||
const HParser* document = NULL;
|
||||
|
||||
void init_parser(void)
|
||||
{
|
||||
// CORE
|
||||
const HParser *digit = h_ch_range(0x30, 0x39);
|
||||
const HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL);
|
||||
|
||||
// AUX.
|
||||
const HParser *plus = h_ch('+');
|
||||
const HParser *slash = h_ch('/');
|
||||
const HParser *equals = h_ch('=');
|
||||
|
||||
const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL);
|
||||
const HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16);
|
||||
const HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4);
|
||||
const HParser *base64_3 = h_repeat_n(bsfdig, 4);
|
||||
const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL);
|
||||
const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL);
|
||||
const HParser *base64 = h_sequence(h_many(base64_3),
|
||||
h_optional(h_choice(base64_2,
|
||||
base64_1, NULL)),
|
||||
NULL);
|
||||
|
||||
document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL);
|
||||
}
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
uint8_t input[102400];
|
||||
size_t inputsize;
|
||||
const HParseResult *result;
|
||||
|
||||
init_parser();
|
||||
|
||||
inputsize = fread(input, 1, sizeof(input), stdin);
|
||||
fprintf(stderr, "inputsize=%lu\ninput=", inputsize);
|
||||
fwrite(input, 1, inputsize, stderr);
|
||||
result = h_parse(document, input, inputsize);
|
||||
|
||||
if(result) {
|
||||
fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8);
|
||||
h_pprint(stdout, result->ast, 0, 0);
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
172
examples/base64_sem1.c
Normal file
172
examples/base64_sem1.c
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
// Example parser: Base64, with fine-grained semantic actions
|
||||
//
|
||||
// Demonstrates how to attach semantic actions to grammar rules and piece by
|
||||
// piece transform the parse tree into the desired semantic representation,
|
||||
// in this case a sequence of 8-bit values.
|
||||
//
|
||||
// Note how the grammar is defined by using the macros H_RULE and H_ARULE.
|
||||
// Those rules using ARULE get an attached action which must be declared (as
|
||||
// a function of type HAction) with a standard name based on the rule name.
|
||||
//
|
||||
// This variant of the example uses fine-grained semantic actions that
|
||||
// transform the parse tree in small steps in a bottom-up fashion. Compare
|
||||
// base64_sem2.c for an alternative approach using a single top-level action.
|
||||
|
||||
#include "../src/hammer.h"
|
||||
#include "../src/glue.h"
|
||||
#include <assert.h>
|
||||
|
||||
|
||||
///
|
||||
// Semantic actions for the grammar below, each corresponds to an "ARULE".
|
||||
// They must be named act_<rulename>.
|
||||
///
|
||||
|
||||
const HParsedToken *act_bsfdig(const HParseResult *p)
|
||||
{
|
||||
HParsedToken *res = H_MAKE_UINT(0);
|
||||
|
||||
uint8_t c = H_CAST_UINT(p->ast);
|
||||
|
||||
if(c >= 0x40 && c <= 0x5A) // A-Z
|
||||
res->uint = c - 0x41;
|
||||
else if(c >= 0x60 && c <= 0x7A) // a-z
|
||||
res->uint = c - 0x61 + 26;
|
||||
else if(c >= 0x30 && c <= 0x39) // 0-9
|
||||
res->uint = c - 0x30 + 52;
|
||||
else if(c == '+')
|
||||
res->uint = 62;
|
||||
else if(c == '/')
|
||||
res->uint = 63;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
H_ACT_APPLY(act_index0, h_act_index, 0);
|
||||
|
||||
#define act_bsfdig_4bit act_bsfdig
|
||||
#define act_bsfdig_2bit act_bsfdig
|
||||
|
||||
#define act_equals h_act_ignore
|
||||
#define act_ws h_act_ignore
|
||||
|
||||
#define act_document act_index0
|
||||
|
||||
// General-form action to turn a block of base64 digits into bytes.
|
||||
const HParsedToken *act_base64_n(int n, const HParseResult *p)
|
||||
{
|
||||
HParsedToken *res = H_MAKE_SEQN(n);
|
||||
|
||||
HParsedToken **digits = h_seq_elements(p->ast);
|
||||
|
||||
uint32_t x = 0;
|
||||
int bits = 0;
|
||||
for(int i=0; i<n+1; i++) {
|
||||
x <<= 6; x |= digits[i]->uint;
|
||||
bits += 6;
|
||||
}
|
||||
x >>= bits%8; // align, i.e. cut off extra bits
|
||||
|
||||
for(int i=0; i<n; i++) {
|
||||
HParsedToken *item = H_MAKE_UINT(x & 0xFF);
|
||||
|
||||
res->seq->elements[n-1-i] = item; // output the last byte and
|
||||
x >>= 8; // discard it
|
||||
}
|
||||
res->seq->used = n;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
H_ACT_APPLY(act_base64_3, act_base64_n, 3);
|
||||
H_ACT_APPLY(act_base64_2, act_base64_n, 2);
|
||||
H_ACT_APPLY(act_base64_1, act_base64_n, 1);
|
||||
|
||||
const HParsedToken *act_base64(const HParseResult *p)
|
||||
{
|
||||
assert(p->ast->token_type == TT_SEQUENCE);
|
||||
assert(p->ast->seq->used == 2);
|
||||
assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE);
|
||||
|
||||
HParsedToken *res = H_MAKE_SEQ();
|
||||
|
||||
// concatenate base64_3 blocks
|
||||
HCountedArray *seq = H_FIELD_SEQ(0);
|
||||
for(size_t i=0; i<seq->used; i++)
|
||||
h_seq_append(res, seq->elements[i]);
|
||||
|
||||
// append one trailing base64_2 or _1 block
|
||||
const HParsedToken *tok = h_seq_index(p->ast, 1);
|
||||
if(tok->token_type == TT_SEQUENCE)
|
||||
h_seq_append(res, tok);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
///
|
||||
// Set up the parser with the grammar to be recognized.
|
||||
///
|
||||
|
||||
const HParser *init_parser(void)
|
||||
{
|
||||
// CORE
|
||||
H_RULE (digit, h_ch_range(0x30, 0x39));
|
||||
H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL));
|
||||
H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6));
|
||||
|
||||
// AUX.
|
||||
H_RULE (plus, h_ch('+'));
|
||||
H_RULE (slash, h_ch('/'));
|
||||
H_ARULE(equals, h_ch('='));
|
||||
|
||||
H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL));
|
||||
H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16));
|
||||
H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4));
|
||||
H_ARULE(base64_3, h_repeat_n(bsfdig, 4));
|
||||
H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL));
|
||||
H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL));
|
||||
H_ARULE(base64, h_sequence(h_many(base64_3),
|
||||
h_optional(h_choice(base64_2,
|
||||
base64_1, NULL)),
|
||||
NULL));
|
||||
|
||||
H_ARULE(ws, h_many(space));
|
||||
H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL));
|
||||
|
||||
// BUG sometimes inputs that should just don't parse.
|
||||
// It *seemed* to happen mostly with things like "bbbbaaaaBA==".
|
||||
// Using less actions seemed to make it less likely.
|
||||
|
||||
return document;
|
||||
}
|
||||
|
||||
|
||||
///
|
||||
// Main routine: print input, parse, print result, return success/failure.
|
||||
///
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
uint8_t input[102400];
|
||||
size_t inputsize;
|
||||
const HParser *parser;
|
||||
const HParseResult *result;
|
||||
|
||||
parser = init_parser();
|
||||
|
||||
inputsize = fread(input, 1, sizeof(input), stdin);
|
||||
fprintf(stderr, "inputsize=%lu\ninput=", inputsize);
|
||||
fwrite(input, 1, inputsize, stderr);
|
||||
result = h_parse(parser, input, inputsize);
|
||||
|
||||
if(result) {
|
||||
fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8);
|
||||
h_pprint(stdout, result->ast, 0, 0);
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
176
examples/base64_sem2.c
Normal file
176
examples/base64_sem2.c
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
// Example parser: Base64, with fine-grained semantic actions
|
||||
//
|
||||
// Demonstrates how to attach semantic actions to a grammar and transform the
|
||||
// parse tree into the desired semantic representation, in this case a sequence
|
||||
// of 8-bit values.
|
||||
//
|
||||
// Note how the grammar is defined by using the macros H_RULE and H_ARULE.
|
||||
// Those rules using ARULE get an attached action which must be declared (as
|
||||
// a function of type HAction) with a standard name based on the rule name.
|
||||
//
|
||||
// This variant of the example uses coarse-grained semantic actions,
|
||||
// transforming the entire parse tree in one big step. Compare base64_sem1.c
|
||||
// for an alternative approach using a fine-grained piece-by-piece
|
||||
// transformation.
|
||||
|
||||
#include "../src/hammer.h"
|
||||
#include "../src/glue.h"
|
||||
#include <assert.h>
|
||||
|
||||
|
||||
///
|
||||
// Semantic actions for the grammar below, each corresponds to an "ARULE".
|
||||
// They must be named act_<rulename>.
|
||||
///
|
||||
|
||||
// helper: return the numeric value of a parsed base64 digit
|
||||
uint8_t bsfdig_value(const HParsedToken *p)
|
||||
{
|
||||
uint8_t value = 0;
|
||||
|
||||
if(p && p->token_type == TT_UINT) {
|
||||
uint8_t c = p->uint;
|
||||
if(c >= 0x40 && c <= 0x5A) // A-Z
|
||||
value = c - 0x41;
|
||||
else if(c >= 0x60 && c <= 0x7A) // a-z
|
||||
value = c - 0x61 + 26;
|
||||
else if(c >= 0x30 && c <= 0x39) // 0-9
|
||||
value = c - 0x30 + 52;
|
||||
else if(c == '+')
|
||||
value = 62;
|
||||
else if(c == '/')
|
||||
value = 63;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
// helper: append a byte value to a sequence
|
||||
#define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b))
|
||||
|
||||
const HParsedToken *act_base64(const HParseResult *p)
|
||||
{
|
||||
assert(p->ast->token_type == TT_SEQUENCE);
|
||||
assert(p->ast->seq->used == 2);
|
||||
assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE);
|
||||
|
||||
// grab b64_3 block sequence
|
||||
// grab and analyze b64 end block (_2 or _1)
|
||||
const HParsedToken *b64_3 = p->ast->seq->elements[0];
|
||||
const HParsedToken *b64_2 = p->ast->seq->elements[1];
|
||||
const HParsedToken *b64_1 = p->ast->seq->elements[1];
|
||||
|
||||
if(b64_2->token_type != TT_SEQUENCE)
|
||||
b64_1 = b64_2 = NULL;
|
||||
else if(b64_2->seq->elements[2]->uint == '=')
|
||||
b64_2 = NULL;
|
||||
else
|
||||
b64_1 = NULL;
|
||||
|
||||
// allocate result sequence
|
||||
HParsedToken *res = H_MAKE_SEQ();
|
||||
|
||||
// concatenate base64_3 blocks
|
||||
for(size_t i=0; i<b64_3->seq->used; i++) {
|
||||
assert(b64_3->seq->elements[i]->token_type == TT_SEQUENCE);
|
||||
HParsedToken **digits = b64_3->seq->elements[i]->seq->elements;
|
||||
|
||||
uint32_t x = bsfdig_value(digits[0]);
|
||||
x <<= 6; x |= bsfdig_value(digits[1]);
|
||||
x <<= 6; x |= bsfdig_value(digits[2]);
|
||||
x <<= 6; x |= bsfdig_value(digits[3]);
|
||||
seq_append_byte(res, (x >> 16) & 0xFF);
|
||||
seq_append_byte(res, (x >> 8) & 0xFF);
|
||||
seq_append_byte(res, x & 0xFF);
|
||||
}
|
||||
|
||||
// append one trailing base64_2 or _1 block
|
||||
if(b64_2) {
|
||||
HParsedToken **digits = b64_2->seq->elements;
|
||||
uint32_t x = bsfdig_value(digits[0]);
|
||||
x <<= 6; x |= bsfdig_value(digits[1]);
|
||||
x <<= 6; x |= bsfdig_value(digits[2]);
|
||||
seq_append_byte(res, (x >> 10) & 0xFF);
|
||||
seq_append_byte(res, (x >> 2) & 0xFF);
|
||||
} else if(b64_1) {
|
||||
HParsedToken **digits = b64_1->seq->elements;
|
||||
uint32_t x = bsfdig_value(digits[0]);
|
||||
x <<= 6; x |= bsfdig_value(digits[1]);
|
||||
seq_append_byte(res, (x >> 4) & 0xFF);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
H_ACT_APPLY(act_index0, h_act_index, 0);
|
||||
|
||||
#define act_ws h_act_ignore
|
||||
#define act_document act_index0
|
||||
|
||||
|
||||
///
|
||||
// Set up the parser with the grammar to be recognized.
|
||||
///
|
||||
|
||||
const HParser *init_parser(void)
|
||||
{
|
||||
// CORE
|
||||
H_RULE (digit, h_ch_range(0x30, 0x39));
|
||||
H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL));
|
||||
H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6));
|
||||
|
||||
// AUX.
|
||||
H_RULE (plus, h_ch('+'));
|
||||
H_RULE (slash, h_ch('/'));
|
||||
H_RULE (equals, h_ch('='));
|
||||
|
||||
H_RULE (bsfdig, h_choice(alpha, digit, plus, slash, NULL));
|
||||
H_RULE (bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16));
|
||||
H_RULE (bsfdig_2bit, h_in((uint8_t *)"AQgw", 4));
|
||||
H_RULE (base64_3, h_repeat_n(bsfdig, 4));
|
||||
H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL));
|
||||
H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL));
|
||||
H_ARULE(base64, h_sequence(h_many(base64_3),
|
||||
h_optional(h_choice(base64_2,
|
||||
base64_1, NULL)),
|
||||
NULL));
|
||||
|
||||
H_ARULE(ws, h_many(space));
|
||||
H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL));
|
||||
|
||||
// BUG sometimes inputs that should just don't parse.
|
||||
// It *seemed* to happen mostly with things like "bbbbaaaaBA==".
|
||||
// Using less actions seemed to make it less likely.
|
||||
|
||||
return document;
|
||||
}
|
||||
|
||||
|
||||
///
|
||||
// Main routine: print input, parse, print result, return success/failure.
|
||||
///
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
uint8_t input[102400];
|
||||
size_t inputsize;
|
||||
const HParser *parser;
|
||||
const HParseResult *result;
|
||||
|
||||
parser = init_parser();
|
||||
|
||||
inputsize = fread(input, 1, sizeof(input), stdin);
|
||||
fprintf(stderr, "inputsize=%lu\ninput=", inputsize);
|
||||
fwrite(input, 1, inputsize, stderr);
|
||||
result = h_parse(parser, input, inputsize);
|
||||
|
||||
if(result) {
|
||||
fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8);
|
||||
h_pprint(stdout, result->ast, 0, 0);
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
571
examples/dns.c
571
examples/dns.c
|
|
@ -10,7 +10,12 @@
|
|||
#define false 0
|
||||
#define true 1
|
||||
|
||||
bool is_zero(HParseResult *p) {
|
||||
|
||||
///
|
||||
// Validations
|
||||
///
|
||||
|
||||
bool validate_hdzero(HParseResult *p) {
|
||||
if (TT_UINT != p->ast->token_type)
|
||||
return false;
|
||||
return (0 == p->ast->uint);
|
||||
|
|
@ -20,407 +25,243 @@ bool is_zero(HParseResult *p) {
|
|||
* Every DNS message should have QDCOUNT entries in the question
|
||||
* section, and ANCOUNT+NSCOUNT+ARCOUNT resource records.
|
||||
*/
|
||||
bool validate_dns(HParseResult *p) {
|
||||
bool validate_message(HParseResult *p) {
|
||||
if (TT_SEQUENCE != p->ast->token_type)
|
||||
return false;
|
||||
// The header holds the counts as its last 4 elements.
|
||||
HParsedToken **elems = p->ast->seq->elements[0]->seq->elements;
|
||||
size_t qd = elems[8]->uint;
|
||||
size_t an = elems[9]->uint;
|
||||
size_t ns = elems[10]->uint;
|
||||
size_t ar = elems[11]->uint;
|
||||
HParsedToken *questions = p->ast->seq->elements[1];
|
||||
if (questions->seq->used != qd)
|
||||
|
||||
dns_header_t *header = H_FIELD(dns_header_t, 0);
|
||||
size_t qd = header->question_count;
|
||||
size_t an = header->answer_count;
|
||||
size_t ns = header->authority_count;
|
||||
size_t ar = header->additional_count;
|
||||
|
||||
if (H_FIELD_SEQ(1)->used != qd)
|
||||
return false;
|
||||
HParsedToken *rrs = p->ast->seq->elements[2];
|
||||
if (an+ns+ar != rrs->seq->used)
|
||||
if (an+ns+ar != H_FIELD_SEQ(2)->used)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct dns_qname get_qname(const HParsedToken *t) {
|
||||
// The qname parser parses at least 1 length-value pair, then a NULL.
|
||||
// So, t->seq->elements[0] is a sequence of at least 1 such pair,
|
||||
// and t->seq->elements[1] is the null.
|
||||
const HParsedToken *labels = t->seq->elements[0];
|
||||
struct dns_qname ret = {
|
||||
.qlen = labels->seq->used,
|
||||
.labels = h_arena_malloc(t->seq->arena, sizeof(*ret.labels)*labels->seq->used)
|
||||
};
|
||||
// i is which label we're on
|
||||
for (size_t i=0; i<labels->seq->used; ++i) {
|
||||
ret.labels[i].len = labels->seq->elements[i]->seq->used;
|
||||
ret.labels[i].label = h_arena_malloc(t->seq->arena, ret.labels[i].len + 1);
|
||||
// j is which char of the label we're on
|
||||
for (size_t j=0; j<ret.labels[i].len; ++j)
|
||||
ret.labels[i].label[j] = labels->seq->elements[i]->seq->elements[j]->uint;
|
||||
ret.labels[i].label[ret.labels[i].len] = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
char* get_domain(const HParsedToken *t) {
|
||||
switch(t->token_type) {
|
||||
case TT_UINT:
|
||||
return " ";
|
||||
case TT_SEQUENCE:
|
||||
{
|
||||
// Sequence of subdomains separated by "."
|
||||
// Each subdomain is a label, which can be no more than 63 chars.
|
||||
char *ret = h_arena_malloc(t->seq->arena, 64*t->seq->used);
|
||||
size_t count = 0;
|
||||
for (size_t i=0; i<t->seq->used; ++i) {
|
||||
HParsedToken *tmp = t->seq->elements[i];
|
||||
for (size_t j=0; j<tmp->seq->used; ++j) {
|
||||
ret[count] = tmp->seq->elements[i]->uint;
|
||||
++count;
|
||||
}
|
||||
ret[count] = '.';
|
||||
++count;
|
||||
}
|
||||
ret[count-1] = '\x00';
|
||||
return ret;
|
||||
}
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
///
|
||||
// Semantic Actions
|
||||
///
|
||||
|
||||
uint8_t* get_cs(const HCountedArray *arr) {
|
||||
uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used);
|
||||
for (size_t i=0; i<arr->used; ++i)
|
||||
ret[i] = arr->elements[i]->uint;
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint8_t** get_txt(const HCountedArray *arr) {
|
||||
uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used);
|
||||
for (size_t i=0; i<arr->used; ++i) {
|
||||
uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->elements[i]->seq->used);
|
||||
for (size_t j=0; j<arr->elements[i]->seq->used; ++j)
|
||||
tmp[j] = arr->elements[i]->seq->elements[j]->uint;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void set_rr(struct dns_rr rr, HCountedArray *rdata) {
|
||||
// Helper: Parse and pack the RDATA field of a Resource Record.
|
||||
void set_rdata(struct dns_rr *rr, HCountedArray *rdata) {
|
||||
uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used);
|
||||
for (size_t i=0; i<rdata->used; ++i)
|
||||
data[i] = rdata->elements[i]->uint;
|
||||
data[i] = H_CAST_UINT(rdata->elements[i]);
|
||||
|
||||
// Parse RDATA if possible.
|
||||
const HParseResult *p = NULL;
|
||||
const HParser *parser = init_rdata(rr->type);
|
||||
if (parser)
|
||||
p = h_parse(parser, (const uint8_t*)data, rdata->used);
|
||||
|
||||
// If the RR doesn't parse, set its type to 0.
|
||||
switch(rr.type) {
|
||||
case 1: // A
|
||||
{
|
||||
const HParseResult *r = h_parse(init_a(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else
|
||||
rr.a = r->ast->seq->elements[0]->uint;
|
||||
break;
|
||||
}
|
||||
case 2: // NS
|
||||
{
|
||||
const HParseResult *r = h_parse(init_ns(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else
|
||||
rr.ns = get_domain(r->ast->seq->elements[0]);
|
||||
break;
|
||||
}
|
||||
case 3: // MD
|
||||
{
|
||||
const HParseResult *r = h_parse(init_md(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else
|
||||
rr.md = get_domain(r->ast->seq->elements[0]);
|
||||
break;
|
||||
}
|
||||
case 4: // MF
|
||||
{
|
||||
const HParseResult *r = h_parse(init_mf(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else
|
||||
rr.md = get_domain(r->ast->seq->elements[0]);
|
||||
break;
|
||||
}
|
||||
case 5: // CNAME
|
||||
{
|
||||
const HParseResult *r = h_parse(init_cname(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else
|
||||
rr.cname = get_domain(r->ast->seq->elements[0]);
|
||||
break;
|
||||
}
|
||||
case 6: // SOA
|
||||
{
|
||||
const HParseResult *r = h_parse(init_soa(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else {
|
||||
rr.soa.mname = get_domain(r->ast->seq->elements[0]);
|
||||
rr.soa.rname = get_domain(r->ast->seq->elements[1]);
|
||||
rr.soa.serial = r->ast->seq->elements[2]->uint;
|
||||
rr.soa.refresh = r->ast->seq->elements[3]->uint;
|
||||
rr.soa.retry = r->ast->seq->elements[4]->uint;
|
||||
rr.soa.expire = r->ast->seq->elements[5]->uint;
|
||||
rr.soa.minimum = r->ast->seq->elements[6]->uint;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 7: // MB
|
||||
{
|
||||
const HParseResult *r = h_parse(init_mb(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else
|
||||
rr.mb = get_domain(r->ast->seq->elements[0]);
|
||||
break;
|
||||
}
|
||||
case 8: // MG
|
||||
{
|
||||
const HParseResult *r = h_parse(init_mg(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else
|
||||
rr.mg = get_domain(r->ast->seq->elements[0]);
|
||||
break;
|
||||
}
|
||||
case 9: // MR
|
||||
{
|
||||
const HParseResult *r = h_parse(init_mr(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else
|
||||
rr.mr = get_domain(r->ast->seq->elements[0]);
|
||||
break;
|
||||
}
|
||||
case 10: // NULL
|
||||
{
|
||||
const HParseResult *r = h_parse(init_null(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else {
|
||||
rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->used);
|
||||
for (size_t i=0; i<r->ast->seq->used; ++i)
|
||||
rr.null[i] = r->ast->seq->elements[i]->uint;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 11: // WKS
|
||||
{
|
||||
const HParseResult *r = h_parse(init_wks(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else {
|
||||
rr.wks.address = r->ast->seq->elements[0]->uint;
|
||||
rr.wks.protocol = r->ast->seq->elements[1]->uint;
|
||||
rr.wks.len = r->ast->seq->elements[2]->seq->used;
|
||||
rr.wks.bit_map = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->elements[2]->seq->used);
|
||||
for (size_t i=0; i<rr.wks.len; ++i)
|
||||
rr.wks.bit_map[i] = r->ast->seq->elements[2]->seq->elements[i]->uint;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 12: // PTR
|
||||
{
|
||||
const HParseResult *r = h_parse(init_ptr(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else
|
||||
rr.ptr = get_domain(r->ast->seq->elements[0]);
|
||||
break;
|
||||
}
|
||||
case 13: // HINFO
|
||||
{
|
||||
const HParseResult *r = h_parse(init_hinfo(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else {
|
||||
rr.hinfo.cpu = get_cs(r->ast->seq->elements[0]->seq);
|
||||
rr.hinfo.os = get_cs(r->ast->seq->elements[1]->seq);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 14: // MINFO
|
||||
{
|
||||
const HParseResult *r = h_parse(init_minfo(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else {
|
||||
rr.minfo.rmailbx = get_domain(r->ast->seq->elements[0]);
|
||||
rr.minfo.emailbx = get_domain(r->ast->seq->elements[1]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 15: // MX
|
||||
{
|
||||
const HParseResult *r = h_parse(init_mx(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else {
|
||||
rr.mx.preference = r->ast->seq->elements[0]->uint;
|
||||
rr.mx.exchange = get_domain(r->ast->seq->elements[1]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 16: // TXT
|
||||
{
|
||||
const HParseResult *r = h_parse(init_txt(), (const uint8_t*)data, rdata->used);
|
||||
if (!r)
|
||||
rr.type = 0;
|
||||
else {
|
||||
rr.txt.count = r->ast->seq->elements[0]->seq->used;
|
||||
rr.txt.txt_data = get_txt(r->ast->seq->elements[0]->seq);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
if (!p)
|
||||
rr->type = 0;
|
||||
|
||||
// Pack the parsed rdata into rr.
|
||||
switch(rr->type) {
|
||||
case 1: rr->a = H_CAST_UINT(p->ast); break;
|
||||
case 2: rr->ns = *H_CAST(dns_domain_t, p->ast); break;
|
||||
case 3: rr->md = *H_CAST(dns_domain_t, p->ast); break;
|
||||
case 4: rr->md = *H_CAST(dns_domain_t, p->ast); break;
|
||||
case 5: rr->cname = *H_CAST(dns_domain_t, p->ast); break;
|
||||
case 6: rr->soa = *H_CAST(dns_rr_soa_t, p->ast); break;
|
||||
case 7: rr->mb = *H_CAST(dns_domain_t, p->ast); break;
|
||||
case 8: rr->mg = *H_CAST(dns_domain_t, p->ast); break;
|
||||
case 9: rr->mr = *H_CAST(dns_domain_t, p->ast); break;
|
||||
case 10: rr->null = *H_CAST(dns_rr_null_t, p->ast); break;
|
||||
case 11: rr->wks = *H_CAST(dns_rr_wks_t, p->ast); break;
|
||||
case 12: rr->ptr = *H_CAST(dns_domain_t, p->ast); break;
|
||||
case 13: rr->hinfo = *H_CAST(dns_rr_hinfo_t, p->ast); break;
|
||||
case 14: rr->minfo = *H_CAST(dns_rr_minfo_t, p->ast); break;
|
||||
case 15: rr->mx = *H_CAST(dns_rr_mx_t, p->ast); break;
|
||||
case 16: rr->txt = *H_CAST(dns_rr_txt_t, p->ast); break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
const HParsedToken* pack_dns_struct(const HParseResult *p) {
|
||||
h_pprint(stdout, p->ast, 0, 2);
|
||||
HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken));
|
||||
ret->token_type = TT_USER;
|
||||
|
||||
dns_message_t *msg = h_arena_malloc(p->arena, sizeof(dns_message_t));
|
||||
|
||||
HParsedToken *hdr = p->ast->seq->elements[0];
|
||||
struct dns_header header = {
|
||||
.id = hdr->seq->elements[0]->uint,
|
||||
.qr = hdr->seq->elements[1]->uint,
|
||||
.opcode = hdr->seq->elements[2]->uint,
|
||||
.aa = hdr->seq->elements[3]->uint,
|
||||
.tc = hdr->seq->elements[4]->uint,
|
||||
.rd = hdr->seq->elements[5]->uint,
|
||||
.ra = hdr->seq->elements[6]->uint,
|
||||
.rcode = hdr->seq->elements[7]->uint,
|
||||
.question_count = hdr->seq->elements[8]->uint,
|
||||
.answer_count = hdr->seq->elements[9]->uint,
|
||||
.authority_count = hdr->seq->elements[10]->uint,
|
||||
.additional_count = hdr->seq->elements[11]->uint
|
||||
const HParsedToken* act_header(const HParseResult *p) {
|
||||
HParsedToken **fields = h_seq_elements(p->ast);
|
||||
dns_header_t header_ = {
|
||||
.id = H_CAST_UINT(fields[0]),
|
||||
.qr = H_CAST_UINT(fields[1]),
|
||||
.opcode = H_CAST_UINT(fields[2]),
|
||||
.aa = H_CAST_UINT(fields[3]),
|
||||
.tc = H_CAST_UINT(fields[4]),
|
||||
.rd = H_CAST_UINT(fields[5]),
|
||||
.ra = H_CAST_UINT(fields[6]),
|
||||
.rcode = H_CAST_UINT(fields[7]),
|
||||
.question_count = H_CAST_UINT(fields[8]),
|
||||
.answer_count = H_CAST_UINT(fields[9]),
|
||||
.authority_count = H_CAST_UINT(fields[10]),
|
||||
.additional_count = H_CAST_UINT(fields[11])
|
||||
};
|
||||
msg->header = header;
|
||||
|
||||
HParsedToken *qs = p->ast->seq->elements[1];
|
||||
dns_header_t *header = H_ALLOC(dns_header_t);
|
||||
*header = header_;
|
||||
|
||||
return H_MAKE(dns_header_t, header);
|
||||
}
|
||||
|
||||
const HParsedToken* act_label(const HParseResult *p) {
|
||||
dns_label_t *r = H_ALLOC(dns_label_t);
|
||||
|
||||
r->len = h_seq_len(p->ast);
|
||||
r->label = h_arena_malloc(p->arena, r->len + 1);
|
||||
for (size_t i=0; i<r->len; ++i)
|
||||
r->label[i] = H_FIELD_UINT(i);
|
||||
r->label[r->len] = 0;
|
||||
|
||||
return H_MAKE(dns_label_t, r);
|
||||
}
|
||||
|
||||
const HParsedToken* act_rr(const HParseResult *p) {
|
||||
dns_rr_t *rr = H_ALLOC(dns_rr_t);
|
||||
|
||||
rr->name = *H_FIELD(dns_domain_t, 0);
|
||||
rr->type = H_FIELD_UINT(1);
|
||||
rr->class = H_FIELD_UINT(2);
|
||||
rr->ttl = H_FIELD_UINT(3);
|
||||
rr->rdlength = H_FIELD_SEQ(4)->used;
|
||||
|
||||
// Parse and pack RDATA.
|
||||
set_rdata(rr, H_FIELD_SEQ(4));
|
||||
|
||||
return H_MAKE(dns_rr_t, rr);
|
||||
}
|
||||
|
||||
const HParsedToken* act_question(const HParseResult *p) {
|
||||
dns_question_t *q = H_ALLOC(dns_question_t);
|
||||
HParsedToken **fields = h_seq_elements(p->ast);
|
||||
|
||||
// QNAME is a sequence of labels. Pack them into an array.
|
||||
q->qname.qlen = h_seq_len(fields[0]);
|
||||
q->qname.labels = h_arena_malloc(p->arena, sizeof(dns_label_t)*q->qname.qlen);
|
||||
for(size_t i=0; i<q->qname.qlen; i++) {
|
||||
q->qname.labels[i] = *H_INDEX(dns_label_t, fields[0], i);
|
||||
}
|
||||
|
||||
q->qtype = H_CAST_UINT(fields[1]);
|
||||
q->qclass = H_CAST_UINT(fields[2]);
|
||||
|
||||
return H_MAKE(dns_question_t, q);
|
||||
}
|
||||
|
||||
const HParsedToken* act_message(const HParseResult *p) {
|
||||
h_pprint(stdout, p->ast, 0, 2);
|
||||
dns_message_t *msg = H_ALLOC(dns_message_t);
|
||||
|
||||
// Copy header into message struct.
|
||||
dns_header_t *header = H_FIELD(dns_header_t, 0);
|
||||
msg->header = *header;
|
||||
|
||||
// Copy questions into message struct.
|
||||
HParsedToken *qs = h_seq_index(p->ast, 1);
|
||||
struct dns_question *questions = h_arena_malloc(p->arena,
|
||||
sizeof(struct dns_question)*(header.question_count));
|
||||
for (size_t i=0; i<header.question_count; ++i) {
|
||||
// QNAME is a sequence of labels. In the parser, it's defined as
|
||||
// sequence(many1(length_value(...)), ch('\x00'), NULL).
|
||||
questions[i].qname = get_qname(qs->seq->elements[i]->seq->elements[0]);
|
||||
questions[i].qtype = qs->seq->elements[i]->seq->elements[1]->uint;
|
||||
questions[i].qclass = qs->seq->elements[i]->seq->elements[2]->uint;
|
||||
sizeof(struct dns_question)*(header->question_count));
|
||||
for (size_t i=0; i<header->question_count; ++i) {
|
||||
questions[i] = *H_INDEX(dns_question_t, qs, i);
|
||||
}
|
||||
msg->questions = questions;
|
||||
|
||||
HParsedToken *rrs = p->ast->seq->elements[2];
|
||||
// Copy answer RRs into message struct.
|
||||
HParsedToken *rrs = h_seq_index(p->ast, 2);
|
||||
struct dns_rr *answers = h_arena_malloc(p->arena,
|
||||
sizeof(struct dns_rr)*(header.answer_count));
|
||||
for (size_t i=0; i<header.answer_count; ++i) {
|
||||
answers[i].name = get_domain(rrs[i].seq->elements[0]);
|
||||
answers[i].type = rrs[i].seq->elements[1]->uint;
|
||||
answers[i].class = rrs[i].seq->elements[2]->uint;
|
||||
answers[i].ttl = rrs[i].seq->elements[3]->uint;
|
||||
answers[i].rdlength = rrs[i].seq->elements[4]->seq->used;
|
||||
set_rr(answers[i], rrs[i].seq->elements[4]->seq);
|
||||
sizeof(struct dns_rr)*(header->answer_count));
|
||||
for (size_t i=0; i<header->answer_count; ++i) {
|
||||
answers[i] = *H_INDEX(dns_rr_t, rrs, i);
|
||||
}
|
||||
msg->answers = answers;
|
||||
|
||||
// Copy authority RRs into message struct.
|
||||
struct dns_rr *authority = h_arena_malloc(p->arena,
|
||||
sizeof(struct dns_rr)*(header.authority_count));
|
||||
for (size_t i=0, j=header.answer_count; i<header.authority_count; ++i, ++j) {
|
||||
authority[i].name = get_domain(rrs[j].seq->elements[0]);
|
||||
authority[i].type = rrs[j].seq->elements[1]->uint;
|
||||
authority[i].class = rrs[j].seq->elements[2]->uint;
|
||||
authority[i].ttl = rrs[j].seq->elements[3]->uint;
|
||||
authority[i].rdlength = rrs[j].seq->elements[4]->seq->used;
|
||||
set_rr(authority[i], rrs[j].seq->elements[4]->seq);
|
||||
sizeof(struct dns_rr)*(header->authority_count));
|
||||
for (size_t i=0, j=header->answer_count; i<header->authority_count; ++i, ++j) {
|
||||
authority[i] = *H_INDEX(dns_rr_t, rrs, j);
|
||||
}
|
||||
msg->authority = authority;
|
||||
|
||||
// Copy additional RRs into message struct.
|
||||
struct dns_rr *additional = h_arena_malloc(p->arena,
|
||||
sizeof(struct dns_rr)*(header.additional_count));
|
||||
for (size_t i=0, j=header.answer_count+header.authority_count; i<header.additional_count; ++i, ++j) {
|
||||
additional[i].name = get_domain(rrs[j].seq->elements[0]);
|
||||
additional[i].type = rrs[j].seq->elements[1]->uint;
|
||||
additional[i].class = rrs[j].seq->elements[2]->uint;
|
||||
additional[i].ttl = rrs[j].seq->elements[3]->uint;
|
||||
additional[i].rdlength = rrs[j].seq->elements[4]->seq->used;
|
||||
set_rr(additional[i], rrs[j].seq->elements[4]->seq);
|
||||
sizeof(struct dns_rr)*(header->additional_count));
|
||||
for (size_t i=0, j=header->answer_count+header->authority_count; i<header->additional_count; ++i, ++j) {
|
||||
additional[i] = *H_INDEX(dns_rr_t, rrs, j);
|
||||
}
|
||||
msg->additional = additional;
|
||||
|
||||
ret->user = (void*)msg;
|
||||
return H_MAKE(dns_message_t, msg);
|
||||
}
|
||||
|
||||
#define act_hdzero h_act_ignore
|
||||
#define act_qname act_index0
|
||||
|
||||
|
||||
///
|
||||
// Grammar
|
||||
///
|
||||
|
||||
const HParser* init_parser() {
|
||||
static const HParser *ret = NULL;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
H_RULE (domain, init_domain());
|
||||
H_AVRULE(hdzero, h_bits(3, false));
|
||||
H_ARULE (header, h_sequence(h_bits(16, false), // ID
|
||||
h_bits(1, false), // QR
|
||||
h_bits(4, false), // opcode
|
||||
h_bits(1, false), // AA
|
||||
h_bits(1, false), // TC
|
||||
h_bits(1, false), // RD
|
||||
h_bits(1, false), // RA
|
||||
hdzero, // Z
|
||||
h_bits(4, false), // RCODE
|
||||
h_uint16(), // QDCOUNT
|
||||
h_uint16(), // ANCOUNT
|
||||
h_uint16(), // NSCOUNT
|
||||
h_uint16(), // ARCOUNT
|
||||
NULL));
|
||||
H_RULE (type, h_int_range(h_uint16(), 1, 16));
|
||||
H_RULE (qtype, h_choice(type,
|
||||
h_int_range(h_uint16(), 252, 255),
|
||||
NULL));
|
||||
H_RULE (class, h_int_range(h_uint16(), 1, 4));
|
||||
H_RULE (qclass, h_choice(class,
|
||||
h_int_range(h_uint16(), 255, 255),
|
||||
NULL));
|
||||
H_RULE (len, h_int_range(h_uint8(), 1, 255));
|
||||
H_ARULE (label, h_length_value(len, h_uint8()));
|
||||
H_ARULE (qname, h_sequence(h_many1(label),
|
||||
h_ch('\x00'),
|
||||
NULL));
|
||||
H_ARULE (question, h_sequence(qname, qtype, qclass, NULL));
|
||||
H_RULE (rdata, h_length_value(h_uint16(), h_uint8()));
|
||||
H_ARULE (rr, h_sequence(domain, // NAME
|
||||
type, // TYPE
|
||||
class, // CLASS
|
||||
h_uint32(), // TTL
|
||||
rdata, // RDLENGTH+RDATA
|
||||
NULL));
|
||||
H_AVRULE(message, h_sequence(header,
|
||||
h_many(question),
|
||||
h_many(rr),
|
||||
h_end_p(),
|
||||
NULL));
|
||||
|
||||
ret = message;
|
||||
return ret;
|
||||
}
|
||||
|
||||
const HParser* init_parser() {
|
||||
static HParser *dns_message = NULL;
|
||||
if (dns_message)
|
||||
return dns_message;
|
||||
|
||||
const HParser *domain = init_domain();
|
||||
|
||||
const HParser *dns_header = h_sequence(h_bits(16, false), // ID
|
||||
h_bits(1, false), // QR
|
||||
h_bits(4, false), // opcode
|
||||
h_bits(1, false), // AA
|
||||
h_bits(1, false), // TC
|
||||
h_bits(1, false), // RD
|
||||
h_bits(1, false), // RA
|
||||
h_ignore(h_attr_bool(h_bits(3, false), is_zero)), // Z
|
||||
h_bits(4, false), // RCODE
|
||||
h_uint16(), // QDCOUNT
|
||||
h_uint16(), // ANCOUNT
|
||||
h_uint16(), // NSCOUNT
|
||||
h_uint16(), // ARCOUNT
|
||||
NULL);
|
||||
|
||||
const HParser *type = h_int_range(h_uint16(), 1, 16);
|
||||
|
||||
const HParser *qtype = h_choice(type,
|
||||
h_int_range(h_uint16(), 252, 255),
|
||||
NULL);
|
||||
|
||||
const HParser *class = h_int_range(h_uint16(), 1, 4);
|
||||
|
||||
const HParser *qclass = h_choice(class,
|
||||
h_int_range(h_uint16(), 255, 255),
|
||||
NULL);
|
||||
|
||||
const HParser *dns_question = h_sequence(h_sequence(h_many1(h_length_value(h_int_range(h_uint8(), 1, 255),
|
||||
h_uint8())),
|
||||
h_ch('\x00'),
|
||||
NULL), // QNAME
|
||||
qtype, // QTYPE
|
||||
qclass, // QCLASS
|
||||
NULL);
|
||||
|
||||
|
||||
const HParser *dns_rr = h_sequence(domain, // NAME
|
||||
type, // TYPE
|
||||
class, // CLASS
|
||||
h_uint32(), // TTL
|
||||
h_length_value(h_uint16(), h_uint8()), // RDLENGTH+RDATA
|
||||
NULL);
|
||||
|
||||
|
||||
dns_message = (HParser*)h_action(h_attr_bool(h_sequence(dns_header,
|
||||
h_many(dns_question),
|
||||
h_many(dns_rr),
|
||||
h_end_p(),
|
||||
NULL),
|
||||
validate_dns),
|
||||
pack_dns_struct);
|
||||
|
||||
return dns_message;
|
||||
}
|
||||
///
|
||||
// Main Program for a Dummy DNS Server
|
||||
///
|
||||
|
||||
int start_listening() {
|
||||
// return: fd
|
||||
|
|
@ -442,7 +283,7 @@ int start_listening() {
|
|||
|
||||
const int TYPE_MAX = 16;
|
||||
typedef const char* cstr;
|
||||
const char* TYPE_STR[17] = {
|
||||
static const char* TYPE_STR[17] = {
|
||||
"nil", "A", "NS", "MD",
|
||||
"MF", "CNAME", "SOA", "MB",
|
||||
"MG", "MR", "NULL", "WKS",
|
||||
|
|
|
|||
158
examples/dns.h
158
examples/dns.h
|
|
@ -1,6 +1,27 @@
|
|||
#include "../src/hammer.h"
|
||||
|
||||
struct dns_header {
|
||||
enum DNSTokenType_ {
|
||||
TT_dns_message_t = TT_USER,
|
||||
TT_dns_header_t,
|
||||
TT_dns_label_t,
|
||||
TT_dns_qname_t,
|
||||
TT_dns_question_t,
|
||||
TT_dns_rr_t,
|
||||
TT_dns_rr_txt_t,
|
||||
TT_dns_rr_hinfo_t,
|
||||
TT_dns_rr_minfo_t,
|
||||
TT_dns_rr_mx_t,
|
||||
TT_dns_rr_soa_t,
|
||||
TT_dns_rr_wks_t,
|
||||
TT_dns_rr_null_t,
|
||||
TT_dns_domain_t,
|
||||
TT_dns_cstr_t
|
||||
};
|
||||
|
||||
typedef char *dns_domain_t;
|
||||
typedef uint8_t *dns_cstr_t;
|
||||
|
||||
typedef struct dns_header {
|
||||
uint16_t id;
|
||||
bool qr, aa, tc, rd, ra;
|
||||
char opcode, rcode;
|
||||
|
|
@ -8,74 +29,93 @@ struct dns_header {
|
|||
size_t answer_count;
|
||||
size_t authority_count;
|
||||
size_t additional_count;
|
||||
};
|
||||
struct dns_qname {
|
||||
} dns_header_t;
|
||||
|
||||
typedef struct dns_label {
|
||||
size_t len;
|
||||
uint8_t *label;
|
||||
} dns_label_t;
|
||||
|
||||
typedef struct dns_qname {
|
||||
size_t qlen;
|
||||
struct {
|
||||
size_t len;
|
||||
uint8_t *label;
|
||||
} *labels;
|
||||
};
|
||||
struct dns_question {
|
||||
struct dns_qname qname;
|
||||
dns_label_t *labels;
|
||||
} dns_qname_t;
|
||||
|
||||
typedef struct dns_question {
|
||||
dns_qname_t qname;
|
||||
uint16_t qtype;
|
||||
uint16_t qclass;
|
||||
};
|
||||
struct dns_rr {
|
||||
} dns_question_t;
|
||||
|
||||
typedef struct {
|
||||
dns_cstr_t cpu;
|
||||
dns_cstr_t os;
|
||||
} dns_rr_hinfo_t;
|
||||
|
||||
typedef struct {
|
||||
char* rmailbx;
|
||||
char* emailbx;
|
||||
} dns_rr_minfo_t;
|
||||
|
||||
typedef struct {
|
||||
uint16_t preference;
|
||||
char* exchange;
|
||||
} dns_rr_mx_t;
|
||||
|
||||
typedef struct {
|
||||
char* mname;
|
||||
char* rname;
|
||||
uint32_t serial;
|
||||
uint32_t refresh;
|
||||
uint32_t retry;
|
||||
uint32_t expire;
|
||||
uint32_t minimum;
|
||||
} dns_rr_soa_t;
|
||||
|
||||
typedef struct {
|
||||
size_t count;
|
||||
uint8_t** txt_data;
|
||||
} dns_rr_txt_t;
|
||||
|
||||
typedef struct {
|
||||
uint32_t address;
|
||||
uint8_t protocol;
|
||||
size_t len;
|
||||
uint8_t* bit_map;
|
||||
} dns_rr_wks_t;
|
||||
|
||||
typedef uint8_t *dns_rr_null_t;
|
||||
|
||||
typedef struct dns_rr {
|
||||
char* name;
|
||||
uint16_t type;
|
||||
uint16_t class;
|
||||
uint32_t ttl; // cmos is also acceptable.
|
||||
uint16_t rdlength;
|
||||
union {
|
||||
char* cname;
|
||||
struct {
|
||||
uint8_t* cpu;
|
||||
uint8_t* os;
|
||||
} hinfo;
|
||||
char* mb;
|
||||
char* md;
|
||||
char* mf;
|
||||
char* mg;
|
||||
struct {
|
||||
char* rmailbx;
|
||||
char* emailbx;
|
||||
} minfo;
|
||||
char* mr;
|
||||
struct {
|
||||
uint16_t preference;
|
||||
char* exchange;
|
||||
} mx;
|
||||
uint8_t* null;
|
||||
char* ns;
|
||||
char* ptr;
|
||||
struct {
|
||||
char* mname;
|
||||
char* rname;
|
||||
uint32_t serial;
|
||||
uint32_t refresh;
|
||||
uint32_t retry;
|
||||
uint32_t expire;
|
||||
uint32_t minimum;
|
||||
} soa;
|
||||
struct {
|
||||
size_t count;
|
||||
uint8_t** txt_data;
|
||||
} txt;
|
||||
uint32_t a;
|
||||
struct {
|
||||
uint32_t address;
|
||||
uint8_t protocol;
|
||||
size_t len;
|
||||
uint8_t* bit_map;
|
||||
} wks;
|
||||
uint32_t a;
|
||||
char* ns;
|
||||
char* md;
|
||||
char* mf;
|
||||
char* cname;
|
||||
dns_rr_soa_t soa;
|
||||
char* mb;
|
||||
char* mg;
|
||||
char* mr;
|
||||
dns_rr_null_t null;
|
||||
dns_rr_wks_t wks;
|
||||
char* ptr;
|
||||
dns_rr_hinfo_t hinfo;
|
||||
dns_rr_minfo_t minfo;
|
||||
dns_rr_mx_t mx;
|
||||
dns_rr_txt_t txt;
|
||||
};
|
||||
};
|
||||
} dns_rr_t;
|
||||
|
||||
typedef struct dns_message {
|
||||
struct dns_header header;
|
||||
struct dns_question *questions;
|
||||
struct dns_rr *answers;
|
||||
struct dns_rr *authority;
|
||||
struct dns_rr *additional;
|
||||
dns_header_t header;
|
||||
dns_question_t *questions;
|
||||
dns_rr_t *answers;
|
||||
dns_rr_t *authority;
|
||||
dns_rr_t *additional;
|
||||
} dns_message_t;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
#include "../src/hammer.h"
|
||||
#include "dns_common.h"
|
||||
#include "dns.h"
|
||||
|
||||
#define false 0
|
||||
#define true 1
|
||||
|
||||
H_ACT_APPLY(act_index0, h_act_index, 0)
|
||||
|
||||
/**
|
||||
* A label can't be more than 63 characters.
|
||||
*/
|
||||
|
|
@ -13,51 +16,64 @@ bool validate_label(HParseResult *p) {
|
|||
return (64 > p->ast->seq->used);
|
||||
}
|
||||
|
||||
#define act_label h_act_flatten
|
||||
|
||||
const HParsedToken* act_domain(const HParseResult *p) {
|
||||
const HParsedToken *ret = NULL;
|
||||
char *arr = NULL;
|
||||
|
||||
switch(p->ast->token_type) {
|
||||
case TT_UINT:
|
||||
arr = " ";
|
||||
break;
|
||||
case TT_SEQUENCE:
|
||||
// Sequence of subdomains separated by "."
|
||||
// Each subdomain is a label, which can be no more than 63 chars.
|
||||
arr = h_arena_malloc(p->arena, 64*p->ast->seq->used);
|
||||
size_t count = 0;
|
||||
for (size_t i=0; i<p->ast->seq->used; ++i) {
|
||||
HParsedToken *tmp = p->ast->seq->elements[i];
|
||||
for (size_t j=0; j<tmp->seq->used; ++j) {
|
||||
arr[count] = tmp->seq->elements[i]->uint;
|
||||
++count;
|
||||
}
|
||||
arr[count] = '.';
|
||||
++count;
|
||||
}
|
||||
arr[count-1] = '\x00';
|
||||
break;
|
||||
default:
|
||||
arr = NULL;
|
||||
ret = NULL;
|
||||
}
|
||||
|
||||
if(arr) {
|
||||
dns_domain_t *val = H_ALLOC(dns_domain_t); // dns_domain_t is char*
|
||||
*val = arr;
|
||||
ret = H_MAKE(dns_domain_t, val);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const HParser* init_domain() {
|
||||
static const HParser *domain = NULL;
|
||||
if (domain)
|
||||
return domain;
|
||||
static const HParser *ret = NULL;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
const HParser *letter = h_choice(h_ch_range('a', 'z'),
|
||||
h_ch_range('A', 'Z'),
|
||||
NULL);
|
||||
H_RULE (letter, h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL));
|
||||
H_RULE (let_dig, h_choice(letter, h_ch_range('0','9'), NULL));
|
||||
H_RULE (ldh_str, h_many1(h_choice(let_dig, h_ch('-'), NULL)));
|
||||
H_VARULE(label, h_sequence(letter,
|
||||
h_optional(h_sequence(h_optional(ldh_str),
|
||||
let_dig,
|
||||
NULL)),
|
||||
NULL));
|
||||
H_RULE (subdomain, h_sepBy1(label, h_ch('.')));
|
||||
H_ARULE (domain, h_choice(subdomain, h_ch(' '), NULL));
|
||||
|
||||
const HParser *let_dig = h_choice(letter,
|
||||
h_ch_range('0', '9'),
|
||||
NULL);
|
||||
|
||||
const HParser *ldh_str = h_many1(h_choice(let_dig,
|
||||
h_ch('-'),
|
||||
NULL));
|
||||
|
||||
const HParser *label = h_attr_bool(h_sequence(letter,
|
||||
h_optional(h_sequence(h_optional(ldh_str),
|
||||
let_dig,
|
||||
NULL)),
|
||||
NULL),
|
||||
validate_label);
|
||||
|
||||
/**
|
||||
* You could write it like this ...
|
||||
* HParser *indirect_subdomain = h_indirect();
|
||||
* const HParser *subdomain = h_choice(label,
|
||||
* h_sequence(indirect_subdomain,
|
||||
* h_ch('.'),
|
||||
* label,
|
||||
* NULL),
|
||||
* NULL);
|
||||
* h_bind_indirect(indirect_subdomain, subdomain);
|
||||
*
|
||||
* ... but this is easier and equivalent
|
||||
*/
|
||||
|
||||
const HParser *subdomain = h_sepBy1(label, h_ch('.'));
|
||||
|
||||
domain = h_choice(subdomain,
|
||||
h_ch(' '),
|
||||
NULL);
|
||||
|
||||
return domain;
|
||||
ret = domain;
|
||||
return ret;
|
||||
}
|
||||
|
||||
const HParser* init_character_string() {
|
||||
|
|
|
|||
|
|
@ -2,8 +2,11 @@
|
|||
#define HAMMER_DNS_COMMON__H
|
||||
|
||||
#include "../src/hammer.h"
|
||||
#include "../src/glue.h"
|
||||
|
||||
const HParser* init_domain();
|
||||
const HParser* init_character_string();
|
||||
|
||||
const HParsedToken* act_index0(const HParseResult *p);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
360
examples/rr.c
360
examples/rr.c
|
|
@ -1,124 +1,15 @@
|
|||
#include "../src/hammer.h"
|
||||
#include "dns_common.h"
|
||||
#include "dns.h"
|
||||
#include "rr.h"
|
||||
|
||||
#define false 0
|
||||
#define true 1
|
||||
|
||||
const HParser* init_cname() {
|
||||
static const HParser *cname = NULL;
|
||||
if (cname)
|
||||
return cname;
|
||||
|
||||
cname = h_sequence(init_domain(),
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return cname;
|
||||
}
|
||||
|
||||
const HParser* init_hinfo() {
|
||||
static const HParser *hinfo = NULL;
|
||||
if (hinfo)
|
||||
return hinfo;
|
||||
|
||||
const HParser* cstr = init_character_string();
|
||||
|
||||
hinfo = h_sequence(cstr,
|
||||
cstr,
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return hinfo;
|
||||
}
|
||||
|
||||
const HParser* init_mb() {
|
||||
static const HParser *mb = NULL;
|
||||
if (mb)
|
||||
return mb;
|
||||
|
||||
mb = h_sequence(init_domain(),
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return mb;
|
||||
}
|
||||
|
||||
const HParser* init_md() {
|
||||
static const HParser *md = NULL;
|
||||
if (md)
|
||||
return md;
|
||||
|
||||
md = h_sequence(init_domain(),
|
||||
h_end_p,
|
||||
NULL);
|
||||
|
||||
return md;
|
||||
}
|
||||
|
||||
const HParser* init_mf() {
|
||||
static const HParser *mf = NULL;
|
||||
if (mf)
|
||||
return mf;
|
||||
|
||||
mf = h_sequence(init_domain(),
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return mf;
|
||||
}
|
||||
|
||||
const HParser* init_mg() {
|
||||
static const HParser *mg = NULL;
|
||||
if (mg)
|
||||
return mg;
|
||||
|
||||
mg = h_sequence(init_domain(),
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return mg;
|
||||
}
|
||||
|
||||
const HParser* init_minfo() {
|
||||
static const HParser *minfo = NULL;
|
||||
if (minfo)
|
||||
return minfo;
|
||||
|
||||
const HParser* domain = init_domain();
|
||||
|
||||
minfo = h_sequence(domain,
|
||||
domain,
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return minfo;
|
||||
}
|
||||
|
||||
const HParser* init_mr() {
|
||||
static const HParser *mr = NULL;
|
||||
if (mr)
|
||||
return mr;
|
||||
|
||||
mr = h_sequence(init_domain(),
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return mr;
|
||||
}
|
||||
|
||||
const HParser* init_mx() {
|
||||
static const HParser *mx = NULL;
|
||||
if (mx)
|
||||
return mx;
|
||||
|
||||
mx = h_sequence(h_uint16(),
|
||||
init_domain(),
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return mx;
|
||||
}
|
||||
///
|
||||
// Validations and Semantic Actions
|
||||
///
|
||||
|
||||
bool validate_null(HParseResult *p) {
|
||||
if (TT_SEQUENCE != p->ast->token_type)
|
||||
|
|
@ -126,94 +17,177 @@ bool validate_null(HParseResult *p) {
|
|||
return (65536 > p->ast->seq->used);
|
||||
}
|
||||
|
||||
const HParser* init_null() {
|
||||
static const HParser *null_ = NULL;
|
||||
if (null_)
|
||||
return null_;
|
||||
const HParsedToken *act_null(const HParseResult *p) {
|
||||
dns_rr_null_t *null = H_ALLOC(dns_rr_null_t);
|
||||
|
||||
null_ = h_attr_bool(h_many(h_uint8()), validate_null);
|
||||
size_t len = h_seq_len(p->ast);
|
||||
uint8_t *buf = h_arena_malloc(p->arena, sizeof(uint8_t)*len);
|
||||
for (size_t i=0; i<len; ++i)
|
||||
buf[i] = H_FIELD_UINT(i);
|
||||
|
||||
return null_;
|
||||
return H_MAKE(dns_rr_null_t, null);
|
||||
}
|
||||
|
||||
const HParser* init_ns() {
|
||||
static const HParser *ns = NULL;
|
||||
if (ns)
|
||||
return ns;
|
||||
const HParsedToken *act_txt(const HParseResult *p) {
|
||||
dns_rr_txt_t *txt = H_ALLOC(dns_rr_txt_t);
|
||||
|
||||
ns = h_sequence(init_domain(),
|
||||
h_end_p(),
|
||||
NULL);
|
||||
const HCountedArray *arr = H_CAST_SEQ(p->ast);
|
||||
uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used);
|
||||
for (size_t i=0; i<arr->used; ++i) {
|
||||
size_t len = h_seq_len(arr->elements[i]);
|
||||
uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*len);
|
||||
for (size_t j=0; j<len; ++j)
|
||||
tmp[j] = H_INDEX_UINT(arr->elements[i], j);
|
||||
ret[i] = tmp;
|
||||
}
|
||||
|
||||
return ns;
|
||||
txt->count = arr->used;
|
||||
txt->txt_data = ret;
|
||||
|
||||
return H_MAKE(dns_rr_txt_t, txt);
|
||||
}
|
||||
|
||||
const HParser* init_ptr() {
|
||||
static const HParser *ptr = NULL;
|
||||
if (ptr)
|
||||
return ptr;
|
||||
const HParsedToken* act_cstr(const HParseResult *p) {
|
||||
dns_cstr_t *cs = H_ALLOC(dns_cstr_t);
|
||||
|
||||
const HCountedArray *arr = H_CAST_SEQ(p->ast);
|
||||
uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used);
|
||||
for (size_t i=0; i<arr->used; ++i)
|
||||
ret[i] = H_CAST_UINT(arr->elements[i]);
|
||||
assert(ret[arr->used-1] == '\0'); // XXX Is this right?! If so, shouldn't it be a validation?
|
||||
*cs = ret;
|
||||
|
||||
return H_MAKE(dns_cstr_t, cs);
|
||||
}
|
||||
|
||||
const HParsedToken* act_soa(const HParseResult *p) {
|
||||
dns_rr_soa_t *soa = H_ALLOC(dns_rr_soa_t);
|
||||
|
||||
soa->mname = *H_FIELD(dns_domain_t, 0);
|
||||
soa->rname = *H_FIELD(dns_domain_t, 1);
|
||||
soa->serial = H_FIELD_UINT(2);
|
||||
soa->refresh = H_FIELD_UINT(3);
|
||||
soa->retry = H_FIELD_UINT(4);
|
||||
soa->expire = H_FIELD_UINT(5);
|
||||
soa->minimum = H_FIELD_UINT(6);
|
||||
|
||||
return H_MAKE(dns_rr_soa_t, soa);
|
||||
}
|
||||
|
||||
const HParsedToken* act_wks(const HParseResult *p) {
|
||||
dns_rr_wks_t *wks = H_ALLOC(dns_rr_wks_t);
|
||||
|
||||
wks->address = H_FIELD_UINT(0);
|
||||
wks->protocol = H_FIELD_UINT(1);
|
||||
wks->len = H_FIELD_SEQ(2)->used;
|
||||
wks->bit_map = h_arena_malloc(p->arena, sizeof(uint8_t)*wks->len);
|
||||
for (size_t i=0; i<wks->len; ++i)
|
||||
wks->bit_map[i] = H_INDEX_UINT(p->ast, 2, i);
|
||||
|
||||
return H_MAKE(dns_rr_wks_t, wks);
|
||||
}
|
||||
|
||||
const HParsedToken* act_hinfo(const HParseResult *p) {
|
||||
dns_rr_hinfo_t *hinfo = H_ALLOC(dns_rr_hinfo_t);
|
||||
|
||||
hinfo->cpu = *H_FIELD(dns_cstr_t, 0);
|
||||
hinfo->os = *H_FIELD(dns_cstr_t, 1);
|
||||
|
||||
return H_MAKE(dns_rr_hinfo_t, hinfo);
|
||||
}
|
||||
|
||||
const HParsedToken* act_minfo(const HParseResult *p) {
|
||||
dns_rr_minfo_t *minfo = H_ALLOC(dns_rr_minfo_t);
|
||||
|
||||
minfo->rmailbx = *H_FIELD(dns_domain_t, 0);
|
||||
minfo->emailbx = *H_FIELD(dns_domain_t, 1);
|
||||
|
||||
return H_MAKE(dns_rr_minfo_t, minfo);
|
||||
}
|
||||
|
||||
const HParsedToken* act_mx(const HParseResult *p) {
|
||||
dns_rr_mx_t *mx = H_ALLOC(dns_rr_mx_t);
|
||||
|
||||
mx->preference = H_FIELD_UINT(0);
|
||||
mx->exchange = *H_FIELD(dns_domain_t, 1);
|
||||
|
||||
return H_MAKE(dns_rr_mx_t, mx);
|
||||
}
|
||||
|
||||
|
||||
///
|
||||
// Parsers for all types of RDATA
|
||||
///
|
||||
|
||||
#define RDATA_TYPE_MAX 16
|
||||
const HParser* init_rdata(uint16_t type) {
|
||||
static const HParser *parsers[RDATA_TYPE_MAX+1];
|
||||
static int inited = 0;
|
||||
|
||||
if (type >= sizeof(parsers))
|
||||
return NULL;
|
||||
|
||||
ptr = h_sequence(init_domain(),
|
||||
h_end_p(),
|
||||
NULL);
|
||||
if (inited)
|
||||
return parsers[type];
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
const HParser* init_soa() {
|
||||
static const HParser *soa = NULL;
|
||||
if (soa)
|
||||
return soa;
|
||||
|
||||
const HParser *domain = init_domain();
|
||||
|
||||
soa = h_sequence(domain, // MNAME
|
||||
domain, // RNAME
|
||||
h_uint32(), // SERIAL
|
||||
h_uint32(), // REFRESH
|
||||
h_uint32(), // RETRY
|
||||
h_uint32(), // EXPIRE
|
||||
h_uint32(), // MINIMUM
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return soa;
|
||||
}
|
||||
|
||||
const HParser* init_txt() {
|
||||
static const HParser *txt = NULL;
|
||||
if (txt)
|
||||
return txt;
|
||||
|
||||
txt = h_sequence(h_many1(init_character_string()),
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return txt;
|
||||
}
|
||||
|
||||
const HParser* init_a() {
|
||||
static const HParser *a = NULL;
|
||||
if (a)
|
||||
return a;
|
||||
|
||||
a = h_sequence(h_uint32(),
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
const HParser* init_wks() {
|
||||
static const HParser *wks = NULL;
|
||||
if (wks)
|
||||
return wks;
|
||||
|
||||
wks = h_sequence(h_uint32(),
|
||||
h_uint8(),
|
||||
h_many(h_uint8()),
|
||||
h_end_p(),
|
||||
NULL);
|
||||
|
||||
return wks;
|
||||
|
||||
H_RULE (domain, init_domain());
|
||||
H_ARULE(cstr, init_character_string());
|
||||
|
||||
H_RULE (a, h_uint32());
|
||||
H_RULE (ns, domain);
|
||||
H_RULE (md, domain);
|
||||
H_RULE (mf, domain);
|
||||
H_RULE (cname, domain);
|
||||
H_ARULE(soa, h_sequence(domain, // MNAME
|
||||
domain, // RNAME
|
||||
h_uint32(), // SERIAL
|
||||
h_uint32(), // REFRESH
|
||||
h_uint32(), // RETRY
|
||||
h_uint32(), // EXPIRE
|
||||
h_uint32(), // MINIMUM
|
||||
NULL));
|
||||
H_RULE (mb, domain);
|
||||
H_RULE (mg, domain);
|
||||
H_RULE (mr, domain);
|
||||
H_VRULE(null, h_many(h_uint8()));
|
||||
H_RULE (wks, h_sequence(h_uint32(),
|
||||
h_uint8(),
|
||||
h_many(h_uint8()),
|
||||
NULL));
|
||||
H_RULE (ptr, domain);
|
||||
H_RULE (hinfo, h_sequence(cstr, cstr, NULL));
|
||||
H_RULE (minfo, h_sequence(domain, domain, NULL));
|
||||
H_RULE (mx, h_sequence(h_uint16(), domain, NULL));
|
||||
H_ARULE(txt, h_many1(cstr));
|
||||
|
||||
|
||||
parsers[ 0] = NULL; // there is no type 0
|
||||
parsers[ 1] = a;
|
||||
parsers[ 2] = ns;
|
||||
parsers[ 3] = md;
|
||||
parsers[ 4] = mf;
|
||||
parsers[ 5] = cname;
|
||||
parsers[ 6] = soa;
|
||||
parsers[ 7] = mb;
|
||||
parsers[ 8] = mg;
|
||||
parsers[ 9] = mr;
|
||||
parsers[10] = null;
|
||||
parsers[11] = wks;
|
||||
parsers[12] = ptr;
|
||||
parsers[13] = hinfo;
|
||||
parsers[14] = minfo;
|
||||
parsers[15] = mx;
|
||||
parsers[16] = txt;
|
||||
|
||||
// All parsers must consume their input exactly.
|
||||
for(uint16_t i; i<sizeof(parsers); i++) {
|
||||
if(parsers[i]) {
|
||||
parsers[i] = h_action(h_sequence(parsers[i], h_end_p(), NULL),
|
||||
act_index0);
|
||||
}
|
||||
}
|
||||
|
||||
inited = 1;
|
||||
return parsers[type];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,21 +3,6 @@
|
|||
|
||||
#include "../src/hammer.h"
|
||||
|
||||
const HParser* init_cname();
|
||||
const HParser* init_hinfo();
|
||||
const HParser* init_mb();
|
||||
const HParser* init_md();
|
||||
const HParser* init_mf();
|
||||
const HParser* init_mg();
|
||||
const HParser* init_minfo();
|
||||
const HParser* init_mr();
|
||||
const HParser* init_mx();
|
||||
const HParser* init_null();
|
||||
const HParser* init_ns();
|
||||
const HParser* init_ptr();
|
||||
const HParser* init_soa();
|
||||
const HParser* init_txt();
|
||||
const HParser* init_a();
|
||||
const HParser* init_wks();
|
||||
const HParser* init_rdata(uint16_t type);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
52
src/Makefile
52
src/Makefile
|
|
@ -25,31 +25,55 @@ PARSERS := \
|
|||
attr_bool \
|
||||
indirect
|
||||
|
||||
OUTPUTS := bitreader.o \
|
||||
hammer.o \
|
||||
bitwriter.o \
|
||||
libhammer.a \
|
||||
pprint.o \
|
||||
allocator.o \
|
||||
datastructures.o \
|
||||
BACKENDS := \
|
||||
packrat
|
||||
|
||||
HAMMER_PARTS := \
|
||||
bitreader.o \
|
||||
hammer.o \
|
||||
bitwriter.o \
|
||||
pprint.o \
|
||||
allocator.o \
|
||||
datastructures.o \
|
||||
system_allocator.o \
|
||||
benchmark.o \
|
||||
compile.o \
|
||||
glue.o \
|
||||
$(PARSERS:%=parsers/%.o) \
|
||||
$(BACKENDS:%=backends/%.o)
|
||||
|
||||
TESTS := t_benchmark.o \
|
||||
t_bitreader.o \
|
||||
t_bitwriter.o \
|
||||
t_parser.o \
|
||||
t_misc.o \
|
||||
test_suite.o
|
||||
|
||||
OUTPUTS := libhammer.a \
|
||||
test_suite.o \
|
||||
test_suite \
|
||||
$(PARSERS:%=parsers/%.o)
|
||||
$(HAMMER_PARTS) \
|
||||
$(TESTS)
|
||||
|
||||
TOPLEVEL := ../
|
||||
|
||||
include ../common.mk
|
||||
|
||||
$(TESTS): CFLAGS += $(TEST_CFLAGS)
|
||||
$(TESTS): LDFLAGS += $(TEST_LDFLAGS)
|
||||
|
||||
all: libhammer.a test_suite
|
||||
all: libhammer.a
|
||||
|
||||
test_suite: test_suite.o libhammer.a
|
||||
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitwriter.o \
|
||||
$(PARSERS:%=parsers/%.o)
|
||||
libhammer.a: $(HAMMER_PARTS)
|
||||
|
||||
bitreader.o: test_suite.h
|
||||
hammer.o: hammer.h
|
||||
glue.o: hammer.h glue.h
|
||||
|
||||
all: libhammer.a
|
||||
|
||||
test: test_suite
|
||||
./test_suite -v
|
||||
|
||||
test_suite: $(TESTS) libhammer.a
|
||||
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) $(TEST_LDFLAGS)
|
||||
|
|
|
|||
|
|
@ -15,11 +15,13 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <glib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "allocator.h"
|
||||
#include "hammer.h"
|
||||
#include "internal.h"
|
||||
|
||||
|
||||
struct arena_link {
|
||||
// TODO:
|
||||
|
|
@ -36,22 +38,25 @@ struct arena_link {
|
|||
|
||||
struct HArena_ {
|
||||
struct arena_link *head;
|
||||
struct HAllocator_ *mm__;
|
||||
size_t block_size;
|
||||
size_t used;
|
||||
size_t wasted;
|
||||
};
|
||||
|
||||
HArena *h_new_arena(size_t block_size) {
|
||||
HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
|
||||
if (block_size == 0)
|
||||
block_size = 4096;
|
||||
struct HArena_ *ret = g_new(struct HArena_, 1);
|
||||
struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + block_size);
|
||||
struct HArena_ *ret = h_new(struct HArena_, 1);
|
||||
struct arena_link *link = (struct arena_link*)mm__->alloc(mm__, sizeof(struct arena_link) + block_size);
|
||||
memset(link, 0, sizeof(struct arena_link) + block_size);
|
||||
link->free = block_size;
|
||||
link->used = 0;
|
||||
link->next = NULL;
|
||||
ret->head = link;
|
||||
ret->block_size = block_size;
|
||||
ret->used = 0;
|
||||
ret->mm__ = mm__;
|
||||
ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size;
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -70,13 +75,15 @@ void* h_arena_malloc(HArena *arena, size_t size) {
|
|||
// This involves some annoying casting...
|
||||
arena->used += size;
|
||||
arena->wasted += sizeof(struct arena_link*);
|
||||
void* link = g_malloc(size + sizeof(struct arena_link*));
|
||||
void* link = arena->mm__->alloc(arena->mm__, size + sizeof(struct arena_link*));
|
||||
memset(link, 0, size + sizeof(struct arena_link*));
|
||||
*(struct arena_link**)link = arena->head->next;
|
||||
arena->head->next = (struct arena_link*)link;
|
||||
return (void*)(((uint8_t*)link) + sizeof(struct arena_link*));
|
||||
} else {
|
||||
// we just need to allocate an ordinary new block.
|
||||
struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + arena->block_size);
|
||||
struct arena_link *link = (struct arena_link*)arena->mm__->alloc(arena->mm__, sizeof(struct arena_link) + arena->block_size);
|
||||
memset(link, 0, sizeof(struct arena_link) + arena->block_size);
|
||||
link->free = arena->block_size - size;
|
||||
link->used = size;
|
||||
link->next = arena->head;
|
||||
|
|
@ -86,18 +93,23 @@ void* h_arena_malloc(HArena *arena, size_t size) {
|
|||
return link->rest;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void h_arena_free(HArena *arena, void* ptr) {
|
||||
// To be used later...
|
||||
}
|
||||
|
||||
void h_delete_arena(HArena *arena) {
|
||||
HAllocator *mm__ = arena->mm__;
|
||||
struct arena_link *link = arena->head;
|
||||
while (link) {
|
||||
struct arena_link *next = link->next;
|
||||
// Even in the case of a special block, without the full arena
|
||||
// header, this is correct, because the next pointer is the first
|
||||
// in the structure.
|
||||
g_free(link);
|
||||
h_free(link);
|
||||
link = next;
|
||||
}
|
||||
g_free(arena);
|
||||
h_free(arena);
|
||||
}
|
||||
|
||||
void h_allocator_stats(HArena *arena, HArenaStats *stats) {
|
||||
|
|
|
|||
|
|
@ -19,10 +19,17 @@
|
|||
#define HAMMER_ALLOCATOR__H__
|
||||
#include <sys/types.h>
|
||||
|
||||
typedef struct HAllocator_ {
|
||||
void* (*alloc)(struct HAllocator_* allocator, size_t size);
|
||||
void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size);
|
||||
void (*free)(struct HAllocator_* allocator, void* ptr);
|
||||
} HAllocator;
|
||||
|
||||
typedef struct HArena_ HArena ; // hidden implementation
|
||||
|
||||
HArena *h_new_arena(size_t block_size); // pass 0 for default...
|
||||
HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default...
|
||||
void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) ));
|
||||
void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers.
|
||||
void h_delete_arena(HArena *arena);
|
||||
|
||||
typedef struct {
|
||||
|
|
|
|||
209
src/backends/packrat.c
Normal file
209
src/backends/packrat.c
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
#include <assert.h>
|
||||
#include "../internal.h"
|
||||
#include "../parsers/parser_internal.h"
|
||||
|
||||
// short-hand for constructing HCachedResult's
|
||||
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
|
||||
HCachedResult *ret = a_new(HCachedResult, 1);
|
||||
ret->result = result;
|
||||
ret->input_stream = state->input_stream;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Really library-internal tool to perform an uncached parse, and handle any common error-handling.
|
||||
static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) {
|
||||
// TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when.
|
||||
HParseResult *tmp_res;
|
||||
if (parser) {
|
||||
HInputStream bak = state->input_stream;
|
||||
tmp_res = parser->vtable->parse(parser->env, state);
|
||||
if (tmp_res) {
|
||||
tmp_res->arena = state->arena;
|
||||
if (!state->input_stream.overrun) {
|
||||
tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3);
|
||||
if (state->input_stream.endianness & BIT_BIG_ENDIAN)
|
||||
tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset;
|
||||
else
|
||||
tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset;
|
||||
} else
|
||||
tmp_res->bit_length = 0;
|
||||
}
|
||||
} else
|
||||
tmp_res = NULL;
|
||||
if (state->input_stream.overrun)
|
||||
return NULL; // overrun is always failure.
|
||||
#ifdef CONSISTENCY_CHECK
|
||||
if (!tmp_res) {
|
||||
state->input_stream = INVALID;
|
||||
state->input_stream.input = key->input_pos.input;
|
||||
}
|
||||
#endif
|
||||
return tmp_res;
|
||||
}
|
||||
|
||||
HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
|
||||
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
|
||||
HRecursionHead *head = h_hashtable_get(state->recursion_heads, k);
|
||||
if (!head) { // No heads found
|
||||
return cached;
|
||||
} else { // Some heads found
|
||||
if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) {
|
||||
// Nothing in the cache, and the key parser is not involved
|
||||
HParseResult *tmp = a_new(HParseResult, 1);
|
||||
tmp->ast = NULL; tmp->arena = state->arena;
|
||||
HParserCacheValue *ret = a_new(HParserCacheValue, 1);
|
||||
ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp);
|
||||
return ret;
|
||||
}
|
||||
if (h_slist_find(head->eval_set, k->parser)) {
|
||||
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
|
||||
head->eval_set = h_slist_remove_all(head->eval_set, k->parser);
|
||||
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
||||
// we know that cached has an entry here, modify it
|
||||
if (!cached)
|
||||
cached = a_new(HParserCacheValue, 1);
|
||||
cached->value_type = PC_RIGHT;
|
||||
cached->right = cached_result(state, tmp_res);
|
||||
}
|
||||
return cached;
|
||||
}
|
||||
}
|
||||
|
||||
/* Setting up the left recursion. We have the LR for the rule head;
|
||||
* we modify the involved_sets of all LRs in the stack, until we
|
||||
* see the current parser again.
|
||||
*/
|
||||
|
||||
void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
|
||||
if (!rec_detect->head) {
|
||||
HRecursionHead *some = a_new(HRecursionHead, 1);
|
||||
some->head_parser = p;
|
||||
some->involved_set = h_slist_new(state->arena);
|
||||
some->eval_set = NULL;
|
||||
rec_detect->head = some;
|
||||
}
|
||||
assert(state->lr_stack->head != NULL);
|
||||
HSlistNode *head = state->lr_stack->head;
|
||||
HLeftRec *lr;
|
||||
while (head && (lr = head->elem)->rule != p) {
|
||||
lr->head = rec_detect->head;
|
||||
h_slist_push(lr->head->involved_set, (void*)lr->rule);
|
||||
head = head->next;
|
||||
}
|
||||
}
|
||||
|
||||
/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the
|
||||
* future parse.
|
||||
*/
|
||||
|
||||
HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) {
|
||||
// Store the head into the recursion_heads
|
||||
h_hashtable_put(state->recursion_heads, k, head);
|
||||
HParserCacheValue *old_cached = h_hashtable_get(state->cache, k);
|
||||
if (!old_cached || PC_LEFT == old_cached->value_type)
|
||||
errx(1, "impossible match");
|
||||
HParseResult *old_res = old_cached->right->result;
|
||||
|
||||
// reset the eval_set of the head of the recursion at each beginning of growth
|
||||
head->eval_set = h_slist_copy(head->involved_set);
|
||||
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
||||
|
||||
if (tmp_res) {
|
||||
if ((old_res->ast->index < tmp_res->ast->index) ||
|
||||
(old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) {
|
||||
HParserCacheValue *v = a_new(HParserCacheValue, 1);
|
||||
v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res);
|
||||
h_hashtable_put(state->cache, k, v);
|
||||
return grow(k, state, head);
|
||||
} else {
|
||||
// we're done with growing, we can remove data from the recursion head
|
||||
h_hashtable_del(state->recursion_heads, k);
|
||||
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
|
||||
if (cached && PC_RIGHT == cached->value_type) {
|
||||
return cached->right->result;
|
||||
} else {
|
||||
errx(1, "impossible match");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
h_hashtable_del(state->recursion_heads, k);
|
||||
return old_res;
|
||||
}
|
||||
}
|
||||
|
||||
HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growable) {
|
||||
if (growable->head) {
|
||||
if (growable->head->head_parser != k->parser) {
|
||||
// not the head rule, so not growing
|
||||
return growable->seed;
|
||||
}
|
||||
else {
|
||||
// update cache
|
||||
HParserCacheValue *v = a_new(HParserCacheValue, 1);
|
||||
v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed);
|
||||
h_hashtable_put(state->cache, k, v);
|
||||
if (!growable->seed)
|
||||
return NULL;
|
||||
else
|
||||
return grow(k, state, growable->head);
|
||||
}
|
||||
} else {
|
||||
errx(1, "lrAnswer with no head");
|
||||
}
|
||||
}
|
||||
|
||||
/* Warth's recursion. Hi Alessandro! */
|
||||
HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
|
||||
HParserCacheKey *key = a_new(HParserCacheKey, 1);
|
||||
key->input_pos = state->input_stream; key->parser = parser;
|
||||
HParserCacheValue *m = recall(key, state);
|
||||
// check to see if there is already a result for this object...
|
||||
if (!m) {
|
||||
// It doesn't exist, so create a dummy result to cache
|
||||
HLeftRec *base = a_new(HLeftRec, 1);
|
||||
base->seed = NULL; base->rule = parser; base->head = NULL;
|
||||
h_slist_push(state->lr_stack, base);
|
||||
// cache it
|
||||
HParserCacheValue *dummy = a_new(HParserCacheValue, 1);
|
||||
dummy->value_type = PC_LEFT; dummy->left = base;
|
||||
h_hashtable_put(state->cache, key, dummy);
|
||||
// parse the input
|
||||
HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
|
||||
// the base variable has passed equality tests with the cache
|
||||
h_slist_pop(state->lr_stack);
|
||||
// setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
|
||||
if (NULL == base->head) {
|
||||
HParserCacheValue *right = a_new(HParserCacheValue, 1);
|
||||
right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res);
|
||||
h_hashtable_put(state->cache, key, right);
|
||||
return tmp_res;
|
||||
} else {
|
||||
base->seed = tmp_res;
|
||||
HParseResult *res = lr_answer(key, state, base);
|
||||
return res;
|
||||
}
|
||||
} else {
|
||||
// it exists!
|
||||
if (PC_LEFT == m->value_type) {
|
||||
setupLR(parser, state, m->left);
|
||||
return m->left->seed; // BUG: this might not be correct
|
||||
} else {
|
||||
state->input_stream = m->right->input_stream;
|
||||
return m->right->result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int h_packrat_compile(HAllocator* mm__, const HParser* parser, const void* params) {
|
||||
return 0; // No compilation necessary, and everything should work
|
||||
// out of the box.
|
||||
}
|
||||
|
||||
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) {
|
||||
return h_do_parse(parser, parse_state);
|
||||
}
|
||||
|
||||
HParserBackendVTable h__packrat_backend_vtable = {
|
||||
.compile = h_packrat_compile,
|
||||
.parse = h_packrat_parse
|
||||
};
|
||||
114
src/benchmark.c
Normal file
114
src/benchmark.c
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <string.h>
|
||||
#include "hammer.h"
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
Usage:
|
||||
Create your parser (i.e., const HParser*), and an array of test cases
|
||||
(i.e., HParserTestcase[], terminated by { NULL, 0, NULL }) and then call
|
||||
|
||||
HBenchmarkResults* results = h_benchmark(parser, testcases);
|
||||
|
||||
Then, you can format a report with:
|
||||
|
||||
h_benchmark_report(stdout, results);
|
||||
|
||||
or just generate code to make the parser run as fast as possible with:
|
||||
|
||||
h_benchmark_dump_optimized_code(stdout, results);
|
||||
|
||||
*/
|
||||
|
||||
HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases) {
|
||||
return h_benchmark__m(&system_allocator, parser, testcases);
|
||||
}
|
||||
|
||||
HBenchmarkResults *h_benchmark__m(HAllocator* mm__, const HParser* parser, HParserTestcase* testcases) {
|
||||
// For now, just output the results to stderr
|
||||
HParserTestcase* tc = testcases;
|
||||
HParserBackend backend = PB_MIN;
|
||||
HBenchmarkResults *ret = h_new(HBenchmarkResults, 1);
|
||||
ret->len = PB_MAX-PB_MIN;
|
||||
ret->results = h_new(HBackendResults, ret->len);
|
||||
|
||||
for (backend = PB_MIN; backend < PB_MAX; backend++) {
|
||||
ret->results[backend].backend = backend;
|
||||
// Step 1: Compile grammar for given parser...
|
||||
if (h_compile(parser, PB_MIN, NULL) == -1) {
|
||||
// backend inappropriate for grammar...
|
||||
fprintf(stderr, "failed\n");
|
||||
ret->results[backend].compile_success = false;
|
||||
ret->results[backend].n_testcases = 0;
|
||||
ret->results[backend].failed_testcases = 0;
|
||||
ret->results[backend].cases = NULL;
|
||||
continue;
|
||||
}
|
||||
ret->results[backend].compile_success = true;
|
||||
int tc_failed = 0;
|
||||
// Step 1: verify all test cases.
|
||||
ret->results[backend].n_testcases = 0;
|
||||
ret->results[backend].failed_testcases = 0;
|
||||
for (tc = testcases; tc->input != NULL; tc++) {
|
||||
ret->results[backend].n_testcases++;
|
||||
HParseResult *res = h_parse(parser, tc->input, tc->length);
|
||||
char* res_unamb;
|
||||
if (res != NULL) {
|
||||
res_unamb = h_write_result_unamb(res->ast);
|
||||
} else
|
||||
res_unamb = NULL;
|
||||
if ((res_unamb == NULL && tc->output_unambiguous == NULL)
|
||||
|| (strcmp(res_unamb, tc->output_unambiguous) != 0)) {
|
||||
// test case failed...
|
||||
fprintf(stderr, "failed\n");
|
||||
// We want to run all testcases, for purposes of generating a
|
||||
// report. (eg, if users are trying to fix a grammar for a
|
||||
// faster backend)
|
||||
tc_failed++;
|
||||
ret->results[backend].failed_testcases++;
|
||||
}
|
||||
h_parse_result_free(res);
|
||||
}
|
||||
|
||||
if (tc_failed > 0) {
|
||||
// Can't use this parser; skip to the next
|
||||
fprintf(stderr, "Backend failed testcases; skipping benchmark\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
ret->results[backend].cases = h_new(HCaseResult, ret->results[backend].n_testcases);
|
||||
size_t cur_case = 0;
|
||||
|
||||
for (tc = testcases; tc->input != NULL; tc++) {
|
||||
// The goal is to run each testcase for at least 50ms each
|
||||
// TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer)
|
||||
int count = 1, cur;
|
||||
struct timespec ts_start, ts_end;
|
||||
long long time_diff;
|
||||
do {
|
||||
count *= 2; // Yes, this means that the first run will run the function twice. This is fine, as we want multiple runs anyway.
|
||||
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_start);
|
||||
for (cur = 0; cur < count; cur++) {
|
||||
h_parse_result_free(h_parse(parser, tc->input, tc->length));
|
||||
}
|
||||
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_end);
|
||||
|
||||
// time_diff is in ns
|
||||
time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec);
|
||||
} while (time_diff < 100000000);
|
||||
ret->results[backend].cases[cur_case].parse_time = (time_diff / count);
|
||||
cur_case++;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void h_benchmark_report(FILE* stream, HBenchmarkResults* result) {
|
||||
for (size_t i=0; i<result->len; ++i) {
|
||||
fprintf(stream, "Backend %ld ... \n", i);
|
||||
for (size_t j=0; j<result->results[i].n_testcases; ++j) {
|
||||
fprintf(stream, "Case %ld: %ld ns/parse\n", j, result->results[i].cases[j].parse_time);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -108,70 +108,3 @@ long long h_read_bits(HInputStream* state, int count, char signed_p) {
|
|||
out <<= final_shift;
|
||||
return (out ^ msb) - msb; // perform sign extension
|
||||
}
|
||||
|
||||
#ifdef INCLUDE_TESTS
|
||||
|
||||
#define MK_INPUT_STREAM(buf,len,endianness_) \
|
||||
{ \
|
||||
.input = (uint8_t*)buf, \
|
||||
.length = len, \
|
||||
.index = 0, \
|
||||
.bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \
|
||||
.endianness = endianness_ \
|
||||
}
|
||||
|
||||
|
||||
static void test_bitreader_ints(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 8, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
g_check_cmplong(h_read_bits(&is, 64, true), ==, -0x200000000);
|
||||
}
|
||||
|
||||
static void test_bitreader_be(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x03);
|
||||
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x52);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
|
||||
}
|
||||
static void test_bitreader_le(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x02);
|
||||
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x4D);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
|
||||
}
|
||||
|
||||
static void test_largebits_be(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x352);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
|
||||
}
|
||||
|
||||
static void test_largebits_le(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x26A);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
|
||||
}
|
||||
|
||||
static void test_offset_largebits_be(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xD);
|
||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x25A);
|
||||
}
|
||||
|
||||
static void test_offset_largebits_le(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xA);
|
||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x2D3);
|
||||
}
|
||||
|
||||
|
||||
void register_bitreader_tests(void) {
|
||||
g_test_add_func("/core/bitreader/be", test_bitreader_be);
|
||||
g_test_add_func("/core/bitreader/le", test_bitreader_le);
|
||||
g_test_add_func("/core/bitreader/largebits-be", test_largebits_be);
|
||||
g_test_add_func("/core/bitreader/largebits-le", test_largebits_le);
|
||||
g_test_add_func("/core/bitreader/offset-largebits-be", test_offset_largebits_be);
|
||||
g_test_add_func("/core/bitreader/offset-largebits-le", test_offset_largebits_le);
|
||||
g_test_add_func("/core/bitreader/ints", test_bitreader_ints);
|
||||
}
|
||||
|
||||
#endif // #ifdef INCLUDE_TESTS
|
||||
|
|
|
|||
137
src/bitwriter.c
137
src/bitwriter.c
|
|
@ -4,22 +4,16 @@
|
|||
#include "internal.h"
|
||||
#include "test_suite.h"
|
||||
|
||||
// This file provides the logical inverse of bitreader.c
|
||||
struct HBitWriter_ {
|
||||
uint8_t* buf;
|
||||
size_t index;
|
||||
size_t capacity;
|
||||
char bit_offset; // unlike in bit_reader, this is always the number
|
||||
// of used bits in the current byte. i.e., 0 always
|
||||
// means that 8 bits are available for use.
|
||||
char flags;
|
||||
};
|
||||
#define MIN(a,b) (((a)<(b))?(a):(b))
|
||||
#define MAX(a,b) (((a)>(b))?(a):(b))
|
||||
|
||||
// h_bit_writer_
|
||||
HBitWriter *h_bit_writer_new() {
|
||||
HBitWriter *writer = g_new0(HBitWriter, 1);
|
||||
writer->buf = g_malloc0(writer->capacity = 8);
|
||||
|
||||
HBitWriter *h_bit_writer_new(HAllocator* mm__) {
|
||||
HBitWriter *writer = h_new(HBitWriter, 1);
|
||||
memset(writer, 0, sizeof(*writer));
|
||||
writer->buf = mm__->alloc(mm__, writer->capacity = 8);
|
||||
memset(writer->buf, 0, writer->capacity);
|
||||
writer->mm__ = mm__;
|
||||
writer->flags = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN;
|
||||
|
||||
return writer;
|
||||
|
|
@ -41,7 +35,7 @@ static void h_bit_writer_reserve(HBitWriter* w, size_t nbits) {
|
|||
int nbytes = (nbits + 7) / 8 + ((w->bit_offset != 0) ? 1 : 0);
|
||||
size_t old_capacity = w->capacity;
|
||||
while (w->index + nbytes >= w->capacity) {
|
||||
w->buf = g_realloc(w->buf, w->capacity *= 2);
|
||||
w->buf = w->mm__->realloc(w->mm__, w->buf, w->capacity *= 2);
|
||||
}
|
||||
|
||||
if (old_capacity != w->capacity)
|
||||
|
|
@ -100,114 +94,7 @@ const uint8_t *h_bit_writer_get_buffer(HBitWriter* w, size_t *len) {
|
|||
}
|
||||
|
||||
void h_bit_writer_free(HBitWriter* w) {
|
||||
g_free(w->buf);
|
||||
g_free(w);
|
||||
HAllocator *mm__ = w->mm__;
|
||||
h_free(w->buf);
|
||||
h_free(w);
|
||||
}
|
||||
|
||||
#ifdef INCLUDE_TESTS
|
||||
// TESTS BELOW HERE
|
||||
typedef struct {
|
||||
unsigned long long data;
|
||||
size_t nbits;
|
||||
} bitwriter_test_elem; // should end with {0,0}
|
||||
|
||||
void run_bitwriter_test(bitwriter_test_elem data[], char flags) {
|
||||
size_t len;
|
||||
const uint8_t *buf;
|
||||
HBitWriter *w = h_bit_writer_new();
|
||||
int i;
|
||||
w->flags = flags;
|
||||
for (i = 0; data[i].nbits; i++) {
|
||||
h_bit_writer_put(w, data[i].data, data[i].nbits);
|
||||
}
|
||||
|
||||
buf = h_bit_writer_get_buffer(w, &len);
|
||||
HInputStream input = {
|
||||
.input = buf,
|
||||
.index = 0,
|
||||
.length = len,
|
||||
.bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0,
|
||||
.endianness = flags,
|
||||
.overrun = 0
|
||||
};
|
||||
|
||||
for (i = 0; data[i].nbits; i++) {
|
||||
g_check_cmpulonglong ((unsigned long long)h_read_bits(&input, data[i].nbits, FALSE), ==, data[i].data);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_bitwriter_ints(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ -0x200000000, 64 },
|
||||
{ 0,0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_bitwriter_be(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0x03, 3 },
|
||||
{ 0x52, 8 },
|
||||
{ 0x1A, 5 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_bitwriter_le(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0x02, 3 },
|
||||
{ 0x4D, 8 },
|
||||
{ 0x0B, 5 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_largebits_be(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0x352, 11 },
|
||||
{ 0x1A, 5 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_largebits_le(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0x26A, 11 },
|
||||
{ 0x0B, 5 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_offset_largebits_be(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0xD, 5 },
|
||||
{ 0x25A, 11 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_offset_largebits_le(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0xA, 5 },
|
||||
{ 0x2D3, 11 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
void register_bitwriter_tests(void) {
|
||||
g_test_add_func("/core/bitwriter/be", test_bitwriter_be);
|
||||
g_test_add_func("/core/bitwriter/le", test_bitwriter_le);
|
||||
g_test_add_func("/core/bitwriter/largebits-be", test_largebits_be);
|
||||
g_test_add_func("/core/bitwriter/largebits-le", test_largebits_le);
|
||||
g_test_add_func("/core/bitwriter/offset-largebits-be", test_offset_largebits_be);
|
||||
g_test_add_func("/core/bitwriter/offset-largebits-le", test_offset_largebits_le);
|
||||
g_test_add_func("/core/bitwriter/ints", test_bitwriter_ints);
|
||||
}
|
||||
|
||||
#endif // #ifdef INCLUDE_TESTS
|
||||
|
|
|
|||
15
src/compile.c
Normal file
15
src/compile.c
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
// This file contains functions related to managing multiple parse backends
|
||||
#include "hammer.h"
|
||||
#include "internal.h"
|
||||
|
||||
static HParserBackendVTable *backends[PB_MAX] = {
|
||||
&h__packrat_backend_vtable,
|
||||
};
|
||||
|
||||
int h_compile(const HParser* parser, HParserBackend backend, const void* params) {
|
||||
return h_compile__m(&system_allocator, parser, backend, params);
|
||||
}
|
||||
|
||||
int h_compile__m(HAllocator* mm__, const HParser* parser, HParserBackend backend, const void* params) {
|
||||
return backends[backend]->compile(mm__, parser, params);
|
||||
}
|
||||
|
|
@ -2,7 +2,8 @@
|
|||
#include "hammer.h"
|
||||
#include "allocator.h"
|
||||
#include <assert.h>
|
||||
#include <malloc.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
// {{{ counted arrays
|
||||
|
||||
|
||||
|
|
@ -15,6 +16,7 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size) {
|
|||
ret->elements = h_arena_malloc(arena, sizeof(void*) * size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
HCountedArray *h_carray_new(HArena * arena) {
|
||||
return h_carray_new_sized(arena, 4);
|
||||
}
|
||||
|
|
@ -30,3 +32,213 @@ void h_carray_append(HCountedArray *array, void* item) {
|
|||
}
|
||||
array->elements[array->used++] = item;
|
||||
}
|
||||
|
||||
// HSlist
|
||||
HSlist* h_slist_new(HArena *arena) {
|
||||
HSlist *ret = h_arena_malloc(arena, sizeof(HSlist));
|
||||
ret->head = NULL;
|
||||
ret->arena = arena;
|
||||
return ret;
|
||||
}
|
||||
|
||||
HSlist* h_slist_copy(HSlist *slist) {
|
||||
HSlist *ret = h_slist_new(slist->arena);
|
||||
HSlistNode *head = slist->head;
|
||||
HSlistNode *tail;
|
||||
if (head != NULL) {
|
||||
h_slist_push(ret, head->elem);
|
||||
tail = ret->head;
|
||||
head = head->next;
|
||||
}
|
||||
while (head != NULL) {
|
||||
// append head item to tail in a new node
|
||||
HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode));
|
||||
node->elem = head->elem;
|
||||
node->next = NULL;
|
||||
tail = tail->next = node;
|
||||
head = head->next;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* h_slist_pop(HSlist *slist) {
|
||||
HSlistNode *head = slist->head;
|
||||
if (!head)
|
||||
return NULL;
|
||||
void* ret = head->elem;
|
||||
slist->head = head->next;
|
||||
h_arena_free(slist->arena, head);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void h_slist_push(HSlist *slist, void* item) {
|
||||
HSlistNode *hnode = h_arena_malloc(slist->arena, sizeof(HSlistNode));
|
||||
hnode->elem = item;
|
||||
hnode->next = slist->head;
|
||||
// write memory barrier here.
|
||||
slist->head = hnode;
|
||||
}
|
||||
|
||||
bool h_slist_find(HSlist *slist, const void* item) {
|
||||
assert (item != NULL);
|
||||
HSlistNode *head = slist->head;
|
||||
while (head != NULL) {
|
||||
if (head->elem == item)
|
||||
return true;
|
||||
head = head->next;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
HSlist* h_slist_remove_all(HSlist *slist, const void* item) {
|
||||
assert (item != NULL);
|
||||
HSlistNode *node = slist->head;
|
||||
HSlistNode *prev = NULL;
|
||||
while (node != NULL) {
|
||||
if (node->elem == item) {
|
||||
HSlistNode *next = node->next;
|
||||
if (prev)
|
||||
prev->next = next;
|
||||
else
|
||||
slist->head = next;
|
||||
// FIXME free the removed node! this leaks.
|
||||
node = next;
|
||||
}
|
||||
else {
|
||||
prev = node;
|
||||
node = prev->next;
|
||||
}
|
||||
}
|
||||
return slist;
|
||||
}
|
||||
|
||||
void h_slist_free(HSlist *slist) {
|
||||
while (slist->head != NULL)
|
||||
h_slist_pop(slist);
|
||||
h_arena_free(slist->arena, slist);
|
||||
}
|
||||
|
||||
HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc) {
|
||||
HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable));
|
||||
ht->hashFunc = hashFunc;
|
||||
ht->equalFunc = equalFunc;
|
||||
ht->capacity = 64; // to start; should be tuned later...
|
||||
ht->used = 0;
|
||||
ht->arena = arena;
|
||||
ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity);
|
||||
for (size_t i = 0; i < ht->capacity; i++) {
|
||||
ht->contents[i].key = NULL;
|
||||
ht->contents[i].value = NULL;
|
||||
ht->contents[i].next = NULL;
|
||||
ht->contents[i].hashval = 0;
|
||||
}
|
||||
//memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity);
|
||||
return ht;
|
||||
}
|
||||
|
||||
void* h_hashtable_get(HHashTable* ht, void* key) {
|
||||
HHashValue hashval = ht->hashFunc(key);
|
||||
#ifdef CONSISTENCY_CHECK
|
||||
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
|
||||
#endif
|
||||
|
||||
HHashTableEntry *hte = NULL;
|
||||
for (hte = &ht->contents[hashval & (ht->capacity - 1)];
|
||||
hte != NULL;
|
||||
hte = hte->next) {
|
||||
if (hte->hashval != hashval)
|
||||
continue;
|
||||
if (ht->equalFunc(key, hte->key))
|
||||
return hte->value;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void h_hashtable_put(HHashTable* ht, void* key, void* value) {
|
||||
// # Start with a rebalancing
|
||||
//h_hashtable_ensure_capacity(ht, ht->used + 1);
|
||||
|
||||
HHashValue hashval = ht->hashFunc(key);
|
||||
#ifdef CONSISTENCY_CHECK
|
||||
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
|
||||
#endif
|
||||
|
||||
HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)];
|
||||
if (hte->key != NULL) {
|
||||
do {
|
||||
if (hte->hashval == hashval && ht->equalFunc(key, hte->key))
|
||||
goto insert_here;
|
||||
if (hte->next != NULL)
|
||||
hte = hte->next;
|
||||
} while (hte->next != NULL);
|
||||
// Add a new link...
|
||||
assert (hte->next == NULL);
|
||||
hte->next = h_arena_malloc(ht->arena, sizeof(HHashTableEntry));
|
||||
hte = hte->next;
|
||||
hte->next = NULL;
|
||||
ht->used++;
|
||||
} else
|
||||
ht->used++;
|
||||
|
||||
insert_here:
|
||||
hte->key = key;
|
||||
hte->value = value;
|
||||
hte->hashval = hashval;
|
||||
}
|
||||
|
||||
int h_hashtable_present(HHashTable* ht, void* key) {
|
||||
HHashValue hashval = ht->hashFunc(key);
|
||||
#ifdef CONSISTENCY_CHECK
|
||||
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
|
||||
#endif
|
||||
|
||||
for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)];
|
||||
hte != NULL;
|
||||
hte = hte->next) {
|
||||
if (hte->hashval != hashval)
|
||||
continue;
|
||||
if (ht->equalFunc(key, hte->key))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
void h_hashtable_del(HHashTable* ht, void* key) {
|
||||
HHashValue hashval = ht->hashFunc(key);
|
||||
#ifdef CONSISTENCY_CHECK
|
||||
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
|
||||
#endif
|
||||
|
||||
for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)];
|
||||
hte != NULL;
|
||||
hte = hte->next) {
|
||||
if (hte->hashval != hashval)
|
||||
continue;
|
||||
if (ht->equalFunc(key, hte->key)) {
|
||||
// FIXME: Leaks keys and values.
|
||||
HHashTableEntry* hten = hte->next;
|
||||
if (hten != NULL) {
|
||||
*hte = *hten;
|
||||
h_arena_free(ht->arena, hten);
|
||||
} else {
|
||||
hte->key = hte->value = NULL;
|
||||
hte->hashval = 0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
void h_hashtable_free(HHashTable* ht) {
|
||||
for (size_t i = 0; i < ht->capacity; i++) {
|
||||
HHashTableEntry *hten, *hte = &ht->contents[i];
|
||||
// FIXME: Free key and value
|
||||
hte = hte->next;
|
||||
while (hte != NULL) {
|
||||
// FIXME: leaks keys and values.
|
||||
hten = hte->next;
|
||||
h_arena_free(ht->arena, hte);
|
||||
hte = hten;
|
||||
}
|
||||
}
|
||||
h_arena_free(ht->arena, ht->contents);
|
||||
}
|
||||
|
||||
|
|
|
|||
177
src/glue.c
Normal file
177
src/glue.c
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
#include "glue.h"
|
||||
#include "../src/internal.h" // for h_carray_*
|
||||
|
||||
|
||||
// The action equivalent of h_ignore.
|
||||
const HParsedToken *h_act_ignore(const HParseResult *p)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Helper to build HAction's that pick one index out of a sequence.
|
||||
const HParsedToken *h_act_index(int i, const HParseResult *p)
|
||||
{
|
||||
if(!p) return NULL;
|
||||
|
||||
const HParsedToken *tok = p->ast;
|
||||
|
||||
if(!tok || tok->token_type != TT_SEQUENCE)
|
||||
return NULL;
|
||||
|
||||
const HCountedArray *seq = tok->seq;
|
||||
size_t n = seq->used;
|
||||
|
||||
if(i<0 || (size_t)i>=n)
|
||||
return NULL;
|
||||
else
|
||||
return tok->seq->elements[i];
|
||||
}
|
||||
|
||||
// Action version of h_seq_flatten.
|
||||
const HParsedToken *h_act_flatten(const HParseResult *p) {
|
||||
return h_seq_flatten(p->arena, p->ast);
|
||||
}
|
||||
|
||||
// Low-level helper for the h_make family.
|
||||
HParsedToken *h_make_(HArena *arena, HTokenType type)
|
||||
{
|
||||
HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||
ret->token_type = type;
|
||||
return ret;
|
||||
}
|
||||
|
||||
HParsedToken *h_make(HArena *arena, HTokenType type, void *value)
|
||||
{
|
||||
assert(type >= TT_USER);
|
||||
HParsedToken *ret = h_make_(arena, type);
|
||||
ret->user = value;
|
||||
return ret;
|
||||
}
|
||||
|
||||
HParsedToken *h_make_seq(HArena *arena)
|
||||
{
|
||||
HParsedToken *ret = h_make_(arena, TT_SEQUENCE);
|
||||
ret->seq = h_carray_new(arena);
|
||||
return ret;
|
||||
}
|
||||
|
||||
HParsedToken *h_make_seqn(HArena *arena, size_t n)
|
||||
{
|
||||
HParsedToken *ret = h_make_(arena, TT_SEQUENCE);
|
||||
ret->seq = h_carray_new_sized(arena, n);
|
||||
return ret;
|
||||
}
|
||||
|
||||
HParsedToken *h_make_bytes(HArena *arena, size_t len)
|
||||
{
|
||||
HParsedToken *ret = h_make_(arena, TT_BYTES);
|
||||
ret->bytes.len = len;
|
||||
ret->bytes.token = h_arena_malloc(arena, len);
|
||||
return ret;
|
||||
}
|
||||
|
||||
HParsedToken *h_make_sint(HArena *arena, int64_t val)
|
||||
{
|
||||
HParsedToken *ret = h_make_(arena, TT_SINT);
|
||||
ret->sint = val;
|
||||
return ret;
|
||||
}
|
||||
|
||||
HParsedToken *h_make_uint(HArena *arena, uint64_t val)
|
||||
{
|
||||
HParsedToken *ret = h_make_(arena, TT_UINT);
|
||||
ret->uint = val;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// XXX -> internal
|
||||
HParsedToken *h_carray_index(const HCountedArray *a, size_t i)
|
||||
{
|
||||
assert(i < a->used);
|
||||
return a->elements[i];
|
||||
}
|
||||
|
||||
size_t h_seq_len(const HParsedToken *p)
|
||||
{
|
||||
assert(p != NULL);
|
||||
assert(p->token_type == TT_SEQUENCE);
|
||||
return p->seq->used;
|
||||
}
|
||||
|
||||
HParsedToken **h_seq_elements(const HParsedToken *p)
|
||||
{
|
||||
assert(p != NULL);
|
||||
assert(p->token_type == TT_SEQUENCE);
|
||||
return p->seq->elements;
|
||||
}
|
||||
|
||||
HParsedToken *h_seq_index(const HParsedToken *p, size_t i)
|
||||
{
|
||||
assert(p != NULL);
|
||||
assert(p->token_type == TT_SEQUENCE);
|
||||
return h_carray_index(p->seq, i);
|
||||
}
|
||||
|
||||
HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...)
|
||||
{
|
||||
va_list va;
|
||||
|
||||
va_start(va, i);
|
||||
HParsedToken *ret = h_seq_index_vpath(p, i, va);
|
||||
va_end(va);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va)
|
||||
{
|
||||
HParsedToken *ret = h_seq_index(p, i);
|
||||
int j;
|
||||
|
||||
while((j = va_arg(va, int)) >= 0)
|
||||
ret = h_seq_index(p, j);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void h_seq_snoc(HParsedToken *xs, const HParsedToken *x)
|
||||
{
|
||||
assert(xs != NULL);
|
||||
assert(xs->token_type == TT_SEQUENCE);
|
||||
|
||||
h_carray_append(xs->seq, (HParsedToken *)x);
|
||||
}
|
||||
|
||||
void h_seq_append(HParsedToken *xs, const HParsedToken *ys)
|
||||
{
|
||||
assert(xs != NULL);
|
||||
assert(xs->token_type == TT_SEQUENCE);
|
||||
assert(ys != NULL);
|
||||
assert(ys->token_type == TT_SEQUENCE);
|
||||
|
||||
for(size_t i=0; i<ys->seq->used; i++)
|
||||
h_carray_append(xs->seq, ys->seq->elements[i]);
|
||||
}
|
||||
|
||||
// Flatten nested sequences. Always returns a sequence.
|
||||
// If input element is not a sequence, returns it as a singleton sequence.
|
||||
const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p)
|
||||
{
|
||||
assert(p != NULL);
|
||||
|
||||
HParsedToken *ret = h_make_seq(arena);
|
||||
switch(p->token_type) {
|
||||
case TT_SEQUENCE:
|
||||
// Flatten and append all.
|
||||
for(size_t i; i<p->seq->used; i++) {
|
||||
h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i)));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// Make singleton sequence.
|
||||
h_seq_snoc(ret, p);
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
253
src/glue.h
Normal file
253
src/glue.h
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
//
|
||||
// API additions for writing grammar and semantic actions more concisely
|
||||
//
|
||||
//
|
||||
// Quick Overview:
|
||||
//
|
||||
// Grammars can be succinctly specified with the family of H_RULE macros.
|
||||
// H_RULE defines a plain parser variable. H_ARULE additionally attaches a
|
||||
// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE
|
||||
// combine both.
|
||||
//
|
||||
// A few standard semantic actions are defined below. The H_ACT_APPLY macro
|
||||
// allows semantic actions to be defined by "partial application" of
|
||||
// a generic action to fixed paramters.
|
||||
//
|
||||
// The definition of more complex semantic actions will usually consist of
|
||||
// extracting data from the given parse tree and constructing a token of custom
|
||||
// type to represent the result. A number of functions and convenience macros
|
||||
// are provided to capture the most common cases and idioms.
|
||||
//
|
||||
// See the leading comment blocks on the sections below for more details.
|
||||
//
|
||||
|
||||
#ifndef HAMMER_GLUE__H
|
||||
#define HAMMER_GLUE__H
|
||||
|
||||
#include <assert.h>
|
||||
#include "hammer.h"
|
||||
|
||||
|
||||
//
|
||||
// Grammar specification
|
||||
//
|
||||
// H_RULE is simply a short-hand for the typical declaration and definition of
|
||||
// a parser variable. See its plain definition below. The goal is to save
|
||||
// horizontal space as well as to provide a clear and unified look together with
|
||||
// the other macro variants that stays close to an abstract PEG or BNF grammar.
|
||||
// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their
|
||||
// combinations as they allow the definition of syntax to be given without
|
||||
// intermingling it with the semantic specifications.
|
||||
//
|
||||
// H_ARULE defines a variable just like H_RULE but attaches a semantic action
|
||||
// to the result of the parser via h_action. The action is expected to be
|
||||
// named act_<rulename>.
|
||||
//
|
||||
// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool.
|
||||
// The validation is expected to be named validate_<rulename>.
|
||||
//
|
||||
// H_VARULE combines H_RULE with both an action and a validation. The action is
|
||||
// attached before the validation, i.e. the validation receives as input the
|
||||
// result of the action.
|
||||
//
|
||||
// H_AVRULE is like H_VARULE but the action is attached outside the validation,
|
||||
// i.e. the validation receives the uninterpreted AST as input.
|
||||
//
|
||||
|
||||
|
||||
#define H_RULE(rule, def) const HParser *rule = def
|
||||
#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule)
|
||||
#define H_VRULE(rule, def) const HParser *rule = \
|
||||
h_attr_bool(def, validate_ ## rule)
|
||||
#define H_VARULE(rule, def) const HParser *rule = \
|
||||
h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule)
|
||||
#define H_AVRULE(rule, def) const HParser *rule = \
|
||||
h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule)
|
||||
|
||||
|
||||
//
|
||||
// Pre-fab semantic actions
|
||||
//
|
||||
// A collection of generally useful semantic actions is provided.
|
||||
//
|
||||
// h_act_ignore is the action equivalent of the parser combinator h_ignore. It
|
||||
// simply causes the AST it is applied to to be replaced with NULL. This most
|
||||
// importantly causes it to be elided from the result of a surrounding
|
||||
// h_sequence.
|
||||
//
|
||||
// h_act_index is of note as it is not itself suitable to be passed to
|
||||
// h_action. It is parameterized by an index to be picked from a sequence
|
||||
// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY
|
||||
// macro provides a concise way to define such a parameter-application wrapper.
|
||||
//
|
||||
// h_act_flatten acts on a token of possibly nested sequences by recursively
|
||||
// flattening it into a single sequence. Cf. h_seq_flatten below.
|
||||
//
|
||||
// H_ACT_APPLY implements "partial application" for semantic actions. It
|
||||
// defines a new action that supplies given parameters to a parameterized
|
||||
// action such as h_act_index.
|
||||
//
|
||||
|
||||
const HParsedToken *h_act_ignore(const HParseResult *p);
|
||||
const HParsedToken *h_act_index(int i, const HParseResult *p);
|
||||
const HParsedToken *h_act_flatten(const HParseResult *p);
|
||||
|
||||
// Define 'myaction' as a specialization of 'paction' by supplying the leading
|
||||
// parameters.
|
||||
#define H_ACT_APPLY(myaction, paction, ...) \
|
||||
const HParsedToken *myaction(const HParseResult *p) { \
|
||||
return paction(__VA_ARGS__, p); \
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Working with HParsedTokens
|
||||
//
|
||||
// The type HParsedToken represents a dynamically-typed universe of values.
|
||||
// Declared below are constructors to turn ordinary values into their
|
||||
// HParsedToken equivalents, extractors to retrieve the original values from
|
||||
// inside an HParsedToken, and functions that inspect and modify tokens of
|
||||
// sequence type directly.
|
||||
//
|
||||
// In addition, there are a number of short-hand macros that work with some
|
||||
// conventions to eliminate common boilerplate. These conventions are listed
|
||||
// below. Be sure to follow them if you want to use the respective macros.
|
||||
//
|
||||
// * The single argument to semantic actions should be called 'p'.
|
||||
//
|
||||
// The H_MAKE macros suppy 'p->arena' to their underlying h_make
|
||||
// counterparts. The H_FIELD macros supply 'p->ast' to their underlying
|
||||
// H_INDEX counterparts.
|
||||
//
|
||||
// * For each custom token type, there should be a typedef for the
|
||||
// corresponding value type.
|
||||
//
|
||||
// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to
|
||||
// a pointer to the given type.
|
||||
//
|
||||
// * For each custom token type, say 'foo_t', there must be an integer
|
||||
// constant 'TT_foo_t' to identify the token type. This constant must have a
|
||||
// value greater or equal than TT_USER.
|
||||
//
|
||||
// One idiom is to define an enum for all custom token types and to assign a
|
||||
// value of TT_USER to the first element. This can be viewed as extending
|
||||
// the HTokenType enum.
|
||||
//
|
||||
// The H_MAKE and H_ASSERT macros derive the name of the token type constant
|
||||
// from the given type name.
|
||||
//
|
||||
//
|
||||
// The H_ALLOC macro is useful for allocating values of custom token types.
|
||||
//
|
||||
// The H_MAKE family of macros construct tokens of a given type. The native
|
||||
// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ.
|
||||
// The form with no suffix is used for custom token types. This convention is
|
||||
// also used for other macro and function families.
|
||||
//
|
||||
// The H_ASSERT family simply asserts that a given token has the expected type.
|
||||
// It mainly serves as an implementation aid for H_CAST. Of note in that regard
|
||||
// is that, unlike the standard 'assert' macro, these form _expressions_ that
|
||||
// return the value of their token argument; thus they can be used in a
|
||||
// "pass-through" fashion inside other expressions.
|
||||
//
|
||||
// The H_CAST family combines a type assertion with access to the
|
||||
// statically-typed value inside a token.
|
||||
//
|
||||
// A number of functions h_seq_* operate on and inspect sequence tokens.
|
||||
// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence.
|
||||
// Therefore there are h_seq_snoc and h_seq_append to build up sequences.
|
||||
//
|
||||
// The macro families H_FIELD and H_INDEX combine index access on a sequence
|
||||
// with a cast to the appropriate result type. H_FIELD is used to access the
|
||||
// elements of the argument token 'p' in an action. H_INDEX allows any sequence
|
||||
// token to be specified. Both macro families take an arbitrary number of index
|
||||
// arguments, giving access to elements in nested sequences by path.
|
||||
// These macros are very useful to avoid spaghetti chains of unchecked pointer
|
||||
// dereferences.
|
||||
//
|
||||
|
||||
// Standard short-hand for arena-allocating a variable in a semantic action.
|
||||
#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP)))
|
||||
|
||||
// Token constructors...
|
||||
|
||||
HParsedToken *h_make(HArena *arena, HTokenType type, void *value);
|
||||
HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence.
|
||||
HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n.
|
||||
HParsedToken *h_make_bytes(HArena *arena, size_t len);
|
||||
HParsedToken *h_make_sint(HArena *arena, int64_t val);
|
||||
HParsedToken *h_make_uint(HArena *arena, uint64_t val);
|
||||
|
||||
// Standard short-hands to make tokens in an action.
|
||||
#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL)
|
||||
#define H_MAKE_SEQ() h_make_seq(p->arena)
|
||||
#define H_MAKE_SEQN(N) h_make_seqn(p->arena, N)
|
||||
#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN)
|
||||
#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL)
|
||||
#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL)
|
||||
|
||||
// Extract (cast) type-specific value back from HParsedTokens...
|
||||
|
||||
// Pass-through assertion that a given token has the expected type.
|
||||
#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P)
|
||||
|
||||
// Convenience short-hand forms of h_assert_type.
|
||||
#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK)
|
||||
#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK)
|
||||
#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK)
|
||||
#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK)
|
||||
#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK)
|
||||
|
||||
// Assert expected type and return contained value.
|
||||
#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user)
|
||||
#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq)
|
||||
#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes)
|
||||
#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint)
|
||||
#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint)
|
||||
|
||||
// Sequence access...
|
||||
|
||||
// Return the length of a sequence.
|
||||
size_t h_seq_len(const HParsedToken *p);
|
||||
|
||||
// Access a sequence's element array.
|
||||
HParsedToken **h_seq_elements(const HParsedToken *p);
|
||||
|
||||
// Access a sequence element by index.
|
||||
HParsedToken *h_seq_index(const HParsedToken *p, size_t i);
|
||||
|
||||
// Access an element in a nested sequence by a path of indices.
|
||||
HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...);
|
||||
HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
|
||||
|
||||
// Convenience macros combining (nested) index access and h_cast.
|
||||
#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__))
|
||||
#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
|
||||
#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
|
||||
#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
|
||||
#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
|
||||
#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1)
|
||||
|
||||
// Standard short-hand to access and cast elements on a sequence token.
|
||||
#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__)
|
||||
#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__)
|
||||
#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__)
|
||||
#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__)
|
||||
#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__)
|
||||
|
||||
// Lower-level helper for h_seq_index.
|
||||
HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal
|
||||
|
||||
// Sequence modification...
|
||||
|
||||
// Add elements to a sequence.
|
||||
void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one
|
||||
void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many
|
||||
|
||||
// XXX TODO: Remove elements from a sequence.
|
||||
|
||||
// Flatten nested sequences into one.
|
||||
const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p);
|
||||
|
||||
|
||||
#endif
|
||||
622
src/hammer.c
622
src/hammer.c
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <error.h>
|
||||
#include <err.h>
|
||||
#include <limits.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
|
@ -26,202 +26,14 @@
|
|||
#include "allocator.h"
|
||||
#include "parsers/parser_internal.h"
|
||||
|
||||
static guint djbhash(const uint8_t *buf, size_t len) {
|
||||
guint hash = 5381;
|
||||
static uint32_t djbhash(const uint8_t *buf, size_t len) {
|
||||
uint32_t hash = 5381;
|
||||
while (len--) {
|
||||
hash = hash * 33 + *buf++;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
// short-hand for constructing HCachedResult's
|
||||
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
|
||||
HCachedResult *ret = a_new(HCachedResult, 1);
|
||||
ret->result = result;
|
||||
ret->input_stream = state->input_stream;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Really library-internal tool to perform an uncached parse, and handle any common error-handling.
|
||||
static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) {
|
||||
// TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when.
|
||||
HParseResult *tmp_res;
|
||||
if (parser) {
|
||||
HInputStream bak = state->input_stream;
|
||||
tmp_res = parser->vtable->parse(parser->env, state);
|
||||
if (tmp_res) {
|
||||
tmp_res->arena = state->arena;
|
||||
if (!state->input_stream.overrun) {
|
||||
tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3);
|
||||
if (state->input_stream.endianness & BIT_BIG_ENDIAN)
|
||||
tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset;
|
||||
else
|
||||
tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset;
|
||||
} else
|
||||
tmp_res->bit_length = 0;
|
||||
}
|
||||
} else
|
||||
tmp_res = NULL;
|
||||
if (state->input_stream.overrun)
|
||||
return NULL; // overrun is always failure.
|
||||
#ifdef CONSISTENCY_CHECK
|
||||
if (!tmp_res) {
|
||||
state->input_stream = INVALID;
|
||||
state->input_stream.input = key->input_pos.input;
|
||||
}
|
||||
#endif
|
||||
return tmp_res;
|
||||
}
|
||||
|
||||
HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
|
||||
HParserCacheValue *cached = g_hash_table_lookup(state->cache, k);
|
||||
HRecursionHead *head = g_hash_table_lookup(state->recursion_heads, k);
|
||||
if (!head) { // No heads found
|
||||
return cached;
|
||||
} else { // Some heads found
|
||||
if (!cached && head->head_parser != k->parser && !g_slist_find(head->involved_set, k->parser)) {
|
||||
// Nothing in the cache, and the key parser is not involved
|
||||
HParseResult *tmp = a_new(HParseResult, 1);
|
||||
tmp->ast = NULL; tmp->arena = state->arena;
|
||||
HParserCacheValue *ret = a_new(HParserCacheValue, 1);
|
||||
ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp);
|
||||
return ret;
|
||||
}
|
||||
if (g_slist_find(head->eval_set, k->parser)) {
|
||||
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
|
||||
head->eval_set = g_slist_remove_all(head->eval_set, k->parser);
|
||||
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
||||
// we know that cached has an entry here, modify it
|
||||
if (!cached)
|
||||
cached = a_new(HParserCacheValue, 1);
|
||||
cached->value_type = PC_RIGHT;
|
||||
cached->right = cached_result(state, tmp_res);
|
||||
}
|
||||
return cached;
|
||||
}
|
||||
}
|
||||
|
||||
/* Setting up the left recursion. We have the LR for the rule head;
|
||||
* we modify the involved_sets of all LRs in the stack, until we
|
||||
* see the current parser again.
|
||||
*/
|
||||
|
||||
void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
|
||||
if (!rec_detect->head) {
|
||||
HRecursionHead *some = a_new(HRecursionHead, 1);
|
||||
some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL;
|
||||
rec_detect->head = some;
|
||||
}
|
||||
size_t i = 0;
|
||||
HLeftRec *lr = g_queue_peek_nth(state->lr_stack, i);
|
||||
while (lr && lr->rule != p) {
|
||||
lr->head = rec_detect->head;
|
||||
lr->head->involved_set = g_slist_prepend(lr->head->involved_set, (gpointer)lr->rule);
|
||||
}
|
||||
}
|
||||
|
||||
/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the
|
||||
* future parse.
|
||||
*/
|
||||
|
||||
HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) {
|
||||
// Store the head into the recursion_heads
|
||||
g_hash_table_replace(state->recursion_heads, k, head);
|
||||
HParserCacheValue *old_cached = g_hash_table_lookup(state->cache, k);
|
||||
if (!old_cached || PC_LEFT == old_cached->value_type)
|
||||
errx(1, "impossible match");
|
||||
HParseResult *old_res = old_cached->right->result;
|
||||
|
||||
// reset the eval_set of the head of the recursion at each beginning of growth
|
||||
head->eval_set = head->involved_set;
|
||||
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
||||
|
||||
if (tmp_res) {
|
||||
if ((old_res->ast->index < tmp_res->ast->index) ||
|
||||
(old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) {
|
||||
HParserCacheValue *v = a_new(HParserCacheValue, 1);
|
||||
v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res);
|
||||
g_hash_table_replace(state->cache, k, v);
|
||||
return grow(k, state, head);
|
||||
} else {
|
||||
// we're done with growing, we can remove data from the recursion head
|
||||
g_hash_table_remove(state->recursion_heads, k);
|
||||
HParserCacheValue *cached = g_hash_table_lookup(state->cache, k);
|
||||
if (cached && PC_RIGHT == cached->value_type) {
|
||||
return cached->right->result;
|
||||
} else {
|
||||
errx(1, "impossible match");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
g_hash_table_remove(state->recursion_heads, k);
|
||||
return old_res;
|
||||
}
|
||||
}
|
||||
|
||||
HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growable) {
|
||||
if (growable->head) {
|
||||
if (growable->head->head_parser != k->parser) {
|
||||
// not the head rule, so not growing
|
||||
return growable->seed;
|
||||
}
|
||||
else {
|
||||
// update cache
|
||||
HParserCacheValue *v = a_new(HParserCacheValue, 1);
|
||||
v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed);
|
||||
g_hash_table_replace(state->cache, k, v);
|
||||
if (!growable->seed)
|
||||
return NULL;
|
||||
else
|
||||
return grow(k, state, growable->head);
|
||||
}
|
||||
} else {
|
||||
errx(1, "lrAnswer with no head");
|
||||
}
|
||||
}
|
||||
|
||||
/* Warth's recursion. Hi Alessandro! */
|
||||
HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
|
||||
HParserCacheKey *key = a_new(HParserCacheKey, 1);
|
||||
key->input_pos = state->input_stream; key->parser = parser;
|
||||
HParserCacheValue *m = recall(key, state);
|
||||
// check to see if there is already a result for this object...
|
||||
if (!m) {
|
||||
// It doesn't exist, so create a dummy result to cache
|
||||
HLeftRec *base = a_new(HLeftRec, 1);
|
||||
base->seed = NULL; base->rule = parser; base->head = NULL;
|
||||
g_queue_push_head(state->lr_stack, base);
|
||||
// cache it
|
||||
HParserCacheValue *dummy = a_new(HParserCacheValue, 1);
|
||||
dummy->value_type = PC_LEFT; dummy->left = base;
|
||||
g_hash_table_replace(state->cache, key, dummy);
|
||||
// parse the input
|
||||
HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
|
||||
// the base variable has passed equality tests with the cache
|
||||
g_queue_pop_head(state->lr_stack);
|
||||
// setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
|
||||
if (NULL == base->head) {
|
||||
HParserCacheValue *right = a_new(HParserCacheValue, 1);
|
||||
right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res);
|
||||
g_hash_table_replace(state->cache, key, right);
|
||||
return tmp_res;
|
||||
} else {
|
||||
base->seed = tmp_res;
|
||||
HParseResult *res = lr_answer(key, state, base);
|
||||
return res;
|
||||
}
|
||||
} else {
|
||||
// it exists!
|
||||
if (PC_LEFT == m->value_type) {
|
||||
setupLR(parser, state, m->left);
|
||||
return m->left->seed; // BUG: this might not be correct
|
||||
} else {
|
||||
state->input_stream = m->right->input_stream;
|
||||
return m->right->result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Helper function, since these lines appear in every parser */
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -230,35 +42,38 @@ typedef struct {
|
|||
} HTwoParsers;
|
||||
|
||||
|
||||
static guint cache_key_hash(gconstpointer key) {
|
||||
static uint32_t cache_key_hash(const void* key) {
|
||||
return djbhash(key, sizeof(HParserCacheKey));
|
||||
}
|
||||
static gboolean cache_key_equal(gconstpointer key1, gconstpointer key2) {
|
||||
static bool cache_key_equal(const void* key1, const void* key2) {
|
||||
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
|
||||
}
|
||||
|
||||
|
||||
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
|
||||
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
|
||||
return h_parse__m(&system_allocator, parser, input, length);
|
||||
}
|
||||
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
|
||||
// Set up a parse state...
|
||||
HArena * arena = h_new_arena(0);
|
||||
HArena * arena = h_new_arena(mm__, 0);
|
||||
HParseState *parse_state = a_new_(arena, HParseState, 1);
|
||||
parse_state->cache = g_hash_table_new(cache_key_hash, // hash_func
|
||||
cache_key_equal);// key_equal_func
|
||||
parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func
|
||||
cache_key_hash); // hash_func
|
||||
parse_state->input_stream.input = input;
|
||||
parse_state->input_stream.index = 0;
|
||||
parse_state->input_stream.bit_offset = 8; // bit big endian
|
||||
parse_state->input_stream.overrun = 0;
|
||||
parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN;
|
||||
parse_state->input_stream.length = length;
|
||||
parse_state->lr_stack = g_queue_new();
|
||||
parse_state->recursion_heads = g_hash_table_new(cache_key_hash,
|
||||
cache_key_equal);
|
||||
parse_state->lr_stack = h_slist_new(arena);
|
||||
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
|
||||
cache_key_hash);
|
||||
parse_state->arena = arena;
|
||||
HParseResult *res = h_do_parse(parser, parse_state);
|
||||
g_queue_free(parse_state->lr_stack);
|
||||
g_hash_table_destroy(parse_state->recursion_heads);
|
||||
h_slist_free(parse_state->lr_stack);
|
||||
h_hashtable_free(parse_state->recursion_heads);
|
||||
// tear down the parse state
|
||||
g_hash_table_destroy(parse_state->cache);
|
||||
h_hashtable_free(parse_state->cache);
|
||||
if (!res)
|
||||
h_delete_arena(parse_state->arena);
|
||||
|
||||
|
|
@ -269,405 +84,4 @@ void h_parse_result_free(HParseResult *result) {
|
|||
h_delete_arena(result->arena);
|
||||
}
|
||||
|
||||
#ifdef INCLUDE_TESTS
|
||||
|
||||
#include "test_suite.h"
|
||||
static void test_token(void) {
|
||||
const HParser *token_ = h_token((const uint8_t*)"95\xa2", 3);
|
||||
|
||||
g_check_parse_ok(token_, "95\xa2", 3, "<39.35.a2>");
|
||||
g_check_parse_failed(token_, "95", 2);
|
||||
}
|
||||
|
||||
static void test_ch(void) {
|
||||
const HParser *ch_ = h_ch(0xa2);
|
||||
|
||||
g_check_parse_ok(ch_, "\xa2", 1, "u0xa2");
|
||||
g_check_parse_failed(ch_, "\xa3", 1);
|
||||
}
|
||||
|
||||
static void test_ch_range(void) {
|
||||
const HParser *range_ = h_ch_range('a', 'c');
|
||||
|
||||
g_check_parse_ok(range_, "b", 1, "u0x62");
|
||||
g_check_parse_failed(range_, "d", 1);
|
||||
}
|
||||
|
||||
//@MARK_START
|
||||
static void test_int64(void) {
|
||||
const HParser *int64_ = h_int64();
|
||||
|
||||
g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s-0x200000000");
|
||||
g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7);
|
||||
}
|
||||
|
||||
static void test_int32(void) {
|
||||
const HParser *int32_ = h_int32();
|
||||
|
||||
g_check_parse_ok(int32_, "\xff\xfe\x00\x00", 4, "s-0x20000");
|
||||
g_check_parse_failed(int32_, "\xff\xfe\x00", 3);
|
||||
}
|
||||
|
||||
static void test_int16(void) {
|
||||
const HParser *int16_ = h_int16();
|
||||
|
||||
g_check_parse_ok(int16_, "\xfe\x00", 2, "s-0x200");
|
||||
g_check_parse_failed(int16_, "\xfe", 1);
|
||||
}
|
||||
|
||||
static void test_int8(void) {
|
||||
const HParser *int8_ = h_int8();
|
||||
|
||||
g_check_parse_ok(int8_, "\x88", 1, "s-0x78");
|
||||
g_check_parse_failed(int8_, "", 0);
|
||||
}
|
||||
|
||||
static void test_uint64(void) {
|
||||
const HParser *uint64_ = h_uint64();
|
||||
|
||||
g_check_parse_ok(uint64_, "\x00\x00\x00\x02\x00\x00\x00\x00", 8, "u0x200000000");
|
||||
g_check_parse_failed(uint64_, "\x00\x00\x00\x02\x00\x00\x00", 7);
|
||||
}
|
||||
|
||||
static void test_uint32(void) {
|
||||
const HParser *uint32_ = h_uint32();
|
||||
|
||||
g_check_parse_ok(uint32_, "\x00\x02\x00\x00", 4, "u0x20000");
|
||||
g_check_parse_failed(uint32_, "\x00\x02\x00", 3);
|
||||
}
|
||||
|
||||
static void test_uint16(void) {
|
||||
const HParser *uint16_ = h_uint16();
|
||||
|
||||
g_check_parse_ok(uint16_, "\x02\x00", 2, "u0x200");
|
||||
g_check_parse_failed(uint16_, "\x02", 1);
|
||||
}
|
||||
|
||||
static void test_uint8(void) {
|
||||
const HParser *uint8_ = h_uint8();
|
||||
|
||||
g_check_parse_ok(uint8_, "\x78", 1, "u0x78");
|
||||
g_check_parse_failed(uint8_, "", 0);
|
||||
}
|
||||
//@MARK_END
|
||||
|
||||
static void test_int_range(void) {
|
||||
const HParser *int_range_ = h_int_range(h_uint8(), 3, 10);
|
||||
|
||||
g_check_parse_ok(int_range_, "\x05", 1, "u0x5");
|
||||
g_check_parse_failed(int_range_, "\xb", 1);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void test_float64(void) {
|
||||
const HParser *float64_ = h_float64();
|
||||
|
||||
g_check_parse_ok(float64_, "\x3f\xf0\x00\x00\x00\x00\x00\x00", 8, 1.0);
|
||||
g_check_parse_failed(float64_, "\x3f\xf0\x00\x00\x00\x00\x00", 7);
|
||||
}
|
||||
|
||||
static void test_float32(void) {
|
||||
const HParser *float32_ = h_float32();
|
||||
|
||||
g_check_parse_ok(float32_, "\x3f\x80\x00\x00", 4, 1.0);
|
||||
g_check_parse_failed(float32_, "\x3f\x80\x00");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static void test_whitespace(void) {
|
||||
const HParser *whitespace_ = h_whitespace(h_ch('a'));
|
||||
|
||||
g_check_parse_ok(whitespace_, "a", 1, "u0x61");
|
||||
g_check_parse_ok(whitespace_, " a", 2, "u0x61");
|
||||
g_check_parse_ok(whitespace_, " a", 3, "u0x61");
|
||||
g_check_parse_ok(whitespace_, "\ta", 2, "u0x61");
|
||||
g_check_parse_failed(whitespace_, "_a", 2);
|
||||
}
|
||||
|
||||
static void test_left(void) {
|
||||
const HParser *left_ = h_left(h_ch('a'), h_ch(' '));
|
||||
|
||||
g_check_parse_ok(left_, "a ", 2, "u0x61");
|
||||
g_check_parse_failed(left_, "a", 1);
|
||||
g_check_parse_failed(left_, " ", 1);
|
||||
g_check_parse_failed(left_, "ab", 2);
|
||||
}
|
||||
|
||||
static void test_right(void) {
|
||||
const HParser *right_ = h_right(h_ch(' '), h_ch('a'));
|
||||
|
||||
g_check_parse_ok(right_, " a", 2, "u0x61");
|
||||
g_check_parse_failed(right_, "a", 1);
|
||||
g_check_parse_failed(right_, " ", 1);
|
||||
g_check_parse_failed(right_, "ba", 2);
|
||||
}
|
||||
|
||||
static void test_middle(void) {
|
||||
const HParser *middle_ = h_middle(h_ch(' '), h_ch('a'), h_ch(' '));
|
||||
|
||||
g_check_parse_ok(middle_, " a ", 3, "u0x61");
|
||||
g_check_parse_failed(middle_, "a", 1);
|
||||
g_check_parse_failed(middle_, " ", 1);
|
||||
g_check_parse_failed(middle_, " a", 2);
|
||||
g_check_parse_failed(middle_, "a ", 2);
|
||||
g_check_parse_failed(middle_, " b ", 3);
|
||||
g_check_parse_failed(middle_, "ba ", 3);
|
||||
g_check_parse_failed(middle_, " ab", 3);
|
||||
}
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
const HParsedToken* upcase(const HParseResult *p) {
|
||||
switch(p->ast->token_type) {
|
||||
case TT_SEQUENCE:
|
||||
{
|
||||
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
|
||||
HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used);
|
||||
ret->token_type = TT_SEQUENCE;
|
||||
for (size_t i=0; i<p->ast->seq->used; ++i) {
|
||||
if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) {
|
||||
HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1);
|
||||
tmp->token_type = TT_UINT;
|
||||
tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint);
|
||||
h_carray_append(seq, tmp);
|
||||
} else {
|
||||
h_carray_append(seq, p->ast->seq->elements[i]);
|
||||
}
|
||||
}
|
||||
ret->seq = seq;
|
||||
return (const HParsedToken*)ret;
|
||||
}
|
||||
case TT_UINT:
|
||||
{
|
||||
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
|
||||
ret->token_type = TT_UINT;
|
||||
ret->uint = toupper(p->ast->uint);
|
||||
return (const HParsedToken*)ret;
|
||||
}
|
||||
default:
|
||||
return p->ast;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_action(void) {
|
||||
const HParser *action_ = h_action(h_sequence(h_choice(h_ch('a'),
|
||||
h_ch('A'),
|
||||
NULL),
|
||||
h_choice(h_ch('b'),
|
||||
h_ch('B'),
|
||||
NULL),
|
||||
NULL),
|
||||
upcase);
|
||||
|
||||
g_check_parse_ok(action_, "ab", 2, "(u0x41 u0x42)");
|
||||
g_check_parse_ok(action_, "AB", 2, "(u0x41 u0x42)");
|
||||
g_check_parse_failed(action_, "XX", 2);
|
||||
}
|
||||
|
||||
static void test_in(void) {
|
||||
uint8_t options[3] = { 'a', 'b', 'c' };
|
||||
const HParser *in_ = h_in(options, 3);
|
||||
g_check_parse_ok(in_, "b", 1, "u0x62");
|
||||
g_check_parse_failed(in_, "d", 1);
|
||||
|
||||
}
|
||||
|
||||
static void test_not_in(void) {
|
||||
uint8_t options[3] = { 'a', 'b', 'c' };
|
||||
const HParser *not_in_ = h_not_in(options, 3);
|
||||
g_check_parse_ok(not_in_, "d", 1, "u0x64");
|
||||
g_check_parse_failed(not_in_, "a", 1);
|
||||
|
||||
}
|
||||
|
||||
static void test_end_p(void) {
|
||||
const HParser *end_p_ = h_sequence(h_ch('a'), h_end_p(), NULL);
|
||||
g_check_parse_ok(end_p_, "a", 1, "(u0x61)");
|
||||
g_check_parse_failed(end_p_, "aa", 2);
|
||||
}
|
||||
|
||||
static void test_nothing_p(void) {
|
||||
const HParser *nothing_p_ = h_nothing_p();
|
||||
g_check_parse_failed(nothing_p_, "a", 1);
|
||||
}
|
||||
|
||||
static void test_sequence(void) {
|
||||
const HParser *sequence_1 = h_sequence(h_ch('a'), h_ch('b'), NULL);
|
||||
const HParser *sequence_2 = h_sequence(h_ch('a'), h_whitespace(h_ch('b')), NULL);
|
||||
|
||||
g_check_parse_ok(sequence_1, "ab", 2, "(u0x61 u0x62)");
|
||||
g_check_parse_failed(sequence_1, "a", 1);
|
||||
g_check_parse_failed(sequence_1, "b", 1);
|
||||
g_check_parse_ok(sequence_2, "ab", 2, "(u0x61 u0x62)");
|
||||
g_check_parse_ok(sequence_2, "a b", 3, "(u0x61 u0x62)");
|
||||
g_check_parse_ok(sequence_2, "a b", 4, "(u0x61 u0x62)");
|
||||
}
|
||||
|
||||
static void test_choice(void) {
|
||||
const HParser *choice_ = h_choice(h_ch('a'), h_ch('b'), NULL);
|
||||
|
||||
g_check_parse_ok(choice_, "a", 1, "u0x61");
|
||||
g_check_parse_ok(choice_, "b", 1, "u0x62");
|
||||
g_check_parse_failed(choice_, "c", 1);
|
||||
}
|
||||
|
||||
static void test_butnot(void) {
|
||||
const HParser *butnot_1 = h_butnot(h_ch('a'), h_token((const uint8_t*)"ab", 2));
|
||||
const HParser *butnot_2 = h_butnot(h_ch_range('0', '9'), h_ch('6'));
|
||||
|
||||
g_check_parse_ok(butnot_1, "a", 1, "u0x61");
|
||||
g_check_parse_failed(butnot_1, "ab", 2);
|
||||
g_check_parse_ok(butnot_1, "aa", 2, "u0x61");
|
||||
g_check_parse_failed(butnot_2, "6", 1);
|
||||
}
|
||||
|
||||
static void test_difference(void) {
|
||||
const HParser *difference_ = h_difference(h_token((const uint8_t*)"ab", 2), h_ch('a'));
|
||||
|
||||
g_check_parse_ok(difference_, "ab", 2, "<61.62>");
|
||||
g_check_parse_failed(difference_, "a", 1);
|
||||
}
|
||||
|
||||
static void test_xor(void) {
|
||||
const HParser *xor_ = h_xor(h_ch_range('0', '6'), h_ch_range('5', '9'));
|
||||
|
||||
g_check_parse_ok(xor_, "0", 1, "u0x30");
|
||||
g_check_parse_ok(xor_, "9", 1, "u0x39");
|
||||
g_check_parse_failed(xor_, "5", 1);
|
||||
g_check_parse_failed(xor_, "a", 1);
|
||||
}
|
||||
|
||||
static void test_many(void) {
|
||||
const HParser *many_ = h_many(h_choice(h_ch('a'), h_ch('b'), NULL));
|
||||
g_check_parse_ok(many_, "adef", 4, "(u0x61)");
|
||||
g_check_parse_ok(many_, "bdef", 4, "(u0x62)");
|
||||
g_check_parse_ok(many_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
|
||||
g_check_parse_ok(many_, "daabbabadef", 11, "()");
|
||||
}
|
||||
|
||||
static void test_many1(void) {
|
||||
const HParser *many1_ = h_many1(h_choice(h_ch('a'), h_ch('b'), NULL));
|
||||
|
||||
g_check_parse_ok(many1_, "adef", 4, "(u0x61)");
|
||||
g_check_parse_ok(many1_, "bdef", 4, "(u0x62)");
|
||||
g_check_parse_ok(many1_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
|
||||
g_check_parse_failed(many1_, "daabbabadef", 11);
|
||||
}
|
||||
|
||||
static void test_repeat_n(void) {
|
||||
const HParser *repeat_n_ = h_repeat_n(h_choice(h_ch('a'), h_ch('b'), NULL), 2);
|
||||
|
||||
g_check_parse_failed(repeat_n_, "adef", 4);
|
||||
g_check_parse_ok(repeat_n_, "abdef", 5, "(u0x61 u0x62)");
|
||||
g_check_parse_failed(repeat_n_, "dabdef", 6);
|
||||
}
|
||||
|
||||
static void test_optional(void) {
|
||||
const HParser *optional_ = h_sequence(h_ch('a'), h_optional(h_choice(h_ch('b'), h_ch('c'), NULL)), h_ch('d'), NULL);
|
||||
|
||||
g_check_parse_ok(optional_, "abd", 3, "(u0x61 u0x62 u0x64)");
|
||||
g_check_parse_ok(optional_, "acd", 3, "(u0x61 u0x63 u0x64)");
|
||||
g_check_parse_ok(optional_, "ad", 2, "(u0x61 null u0x64)");
|
||||
g_check_parse_failed(optional_, "aed", 3);
|
||||
g_check_parse_failed(optional_, "ab", 2);
|
||||
g_check_parse_failed(optional_, "ac", 2);
|
||||
}
|
||||
|
||||
static void test_ignore(void) {
|
||||
const HParser *ignore_ = h_sequence(h_ch('a'), h_ignore(h_ch('b')), h_ch('c'), NULL);
|
||||
|
||||
g_check_parse_ok(ignore_, "abc", 3, "(u0x61 u0x63)");
|
||||
g_check_parse_failed(ignore_, "ac", 2);
|
||||
}
|
||||
|
||||
static void test_sepBy1(void) {
|
||||
const HParser *sepBy1_ = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
|
||||
|
||||
g_check_parse_ok(sepBy1_, "1,2,3", 5, "(u0x31 u0x32 u0x33)");
|
||||
g_check_parse_ok(sepBy1_, "1,3,2", 5, "(u0x31 u0x33 u0x32)");
|
||||
g_check_parse_ok(sepBy1_, "1,3", 3, "(u0x31 u0x33)");
|
||||
g_check_parse_ok(sepBy1_, "3", 1, "(u0x33)");
|
||||
}
|
||||
|
||||
static void test_epsilon_p(void) {
|
||||
const HParser *epsilon_p_1 = h_sequence(h_ch('a'), h_epsilon_p(), h_ch('b'), NULL);
|
||||
const HParser *epsilon_p_2 = h_sequence(h_epsilon_p(), h_ch('a'), NULL);
|
||||
const HParser *epsilon_p_3 = h_sequence(h_ch('a'), h_epsilon_p(), NULL);
|
||||
|
||||
g_check_parse_ok(epsilon_p_1, "ab", 2, "(u0x61 u0x62)");
|
||||
g_check_parse_ok(epsilon_p_2, "a", 1, "(u0x61)");
|
||||
g_check_parse_ok(epsilon_p_3, "a", 1, "(u0x61)");
|
||||
}
|
||||
|
||||
static void test_attr_bool(void) {
|
||||
|
||||
}
|
||||
|
||||
static void test_and(void) {
|
||||
const HParser *and_1 = h_sequence(h_and(h_ch('0')), h_ch('0'), NULL);
|
||||
const HParser *and_2 = h_sequence(h_and(h_ch('0')), h_ch('1'), NULL);
|
||||
const HParser *and_3 = h_sequence(h_ch('1'), h_and(h_ch('2')), NULL);
|
||||
|
||||
g_check_parse_ok(and_1, "0", 1, "(u0x30)");
|
||||
g_check_parse_failed(and_2, "0", 1);
|
||||
g_check_parse_ok(and_3, "12", 2, "(u0x31)");
|
||||
}
|
||||
|
||||
static void test_not(void) {
|
||||
const HParser *not_1 = h_sequence(h_ch('a'), h_choice(h_ch('+'), h_token((const uint8_t*)"++", 2), NULL), h_ch('b'), NULL);
|
||||
const HParser *not_2 = h_sequence(h_ch('a'),
|
||||
h_choice(h_sequence(h_ch('+'), h_not(h_ch('+')), NULL),
|
||||
h_token((const uint8_t*)"++", 2),
|
||||
NULL), h_ch('b'), NULL);
|
||||
|
||||
g_check_parse_ok(not_1, "a+b", 3, "(u0x61 u0x2b u0x62)");
|
||||
g_check_parse_failed(not_1, "a++b", 4);
|
||||
g_check_parse_ok(not_2, "a+b", 3, "(u0x61 (u0x2b) u0x62)");
|
||||
g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)");
|
||||
}
|
||||
|
||||
void register_parser_tests(void) {
|
||||
g_test_add_func("/core/parser/token", test_token);
|
||||
g_test_add_func("/core/parser/ch", test_ch);
|
||||
g_test_add_func("/core/parser/ch_range", test_ch_range);
|
||||
g_test_add_func("/core/parser/int64", test_int64);
|
||||
g_test_add_func("/core/parser/int32", test_int32);
|
||||
g_test_add_func("/core/parser/int16", test_int16);
|
||||
g_test_add_func("/core/parser/int8", test_int8);
|
||||
g_test_add_func("/core/parser/uint64", test_uint64);
|
||||
g_test_add_func("/core/parser/uint32", test_uint32);
|
||||
g_test_add_func("/core/parser/uint16", test_uint16);
|
||||
g_test_add_func("/core/parser/uint8", test_uint8);
|
||||
g_test_add_func("/core/parser/int_range", test_int_range);
|
||||
#if 0
|
||||
g_test_add_func("/core/parser/float64", test_float64);
|
||||
g_test_add_func("/core/parser/float32", test_float32);
|
||||
#endif
|
||||
g_test_add_func("/core/parser/whitespace", test_whitespace);
|
||||
g_test_add_func("/core/parser/left", test_left);
|
||||
g_test_add_func("/core/parser/right", test_right);
|
||||
g_test_add_func("/core/parser/middle", test_middle);
|
||||
g_test_add_func("/core/parser/action", test_action);
|
||||
g_test_add_func("/core/parser/in", test_in);
|
||||
g_test_add_func("/core/parser/not_in", test_not_in);
|
||||
g_test_add_func("/core/parser/end_p", test_end_p);
|
||||
g_test_add_func("/core/parser/nothing_p", test_nothing_p);
|
||||
g_test_add_func("/core/parser/sequence", test_sequence);
|
||||
g_test_add_func("/core/parser/choice", test_choice);
|
||||
g_test_add_func("/core/parser/butnot", test_butnot);
|
||||
g_test_add_func("/core/parser/difference", test_difference);
|
||||
g_test_add_func("/core/parser/xor", test_xor);
|
||||
g_test_add_func("/core/parser/many", test_many);
|
||||
g_test_add_func("/core/parser/many1", test_many1);
|
||||
g_test_add_func("/core/parser/repeat_n", test_repeat_n);
|
||||
g_test_add_func("/core/parser/optional", test_optional);
|
||||
g_test_add_func("/core/parser/sepBy1", test_sepBy1);
|
||||
g_test_add_func("/core/parser/epsilon_p", test_epsilon_p);
|
||||
g_test_add_func("/core/parser/attr_bool", test_attr_bool);
|
||||
g_test_add_func("/core/parser/and", test_and);
|
||||
g_test_add_func("/core/parser/not", test_not);
|
||||
g_test_add_func("/core/parser/ignore", test_ignore);
|
||||
}
|
||||
|
||||
#endif // #ifdef INCLUDE_TESTS
|
||||
|
|
|
|||
188
src/hammer.h
188
src/hammer.h
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
#ifndef HAMMER_HAMMER__H
|
||||
#define HAMMER_HAMMER__H
|
||||
#include <glib.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include "allocator.h"
|
||||
|
|
@ -31,6 +31,12 @@ typedef int bool;
|
|||
|
||||
typedef struct HParseState_ HParseState;
|
||||
|
||||
typedef enum HParserBackend_ {
|
||||
PB_MIN = 0,
|
||||
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
|
||||
PB_MAX
|
||||
} HParserBackend;
|
||||
|
||||
typedef enum HTokenType_ {
|
||||
// Before you change the explicit values of these, think of the poor bindings ;_;
|
||||
TT_NONE = 1,
|
||||
|
|
@ -39,8 +45,7 @@ typedef enum HTokenType_ {
|
|||
TT_UINT = 8,
|
||||
TT_SEQUENCE = 16,
|
||||
TT_ERR = 32,
|
||||
TT_USER = 64,
|
||||
TT_MAX = 128
|
||||
TT_USER = 64
|
||||
} HTokenType;
|
||||
|
||||
typedef struct HCountedArray_ {
|
||||
|
|
@ -50,13 +55,15 @@ typedef struct HCountedArray_ {
|
|||
struct HParsedToken_ **elements;
|
||||
} HCountedArray;
|
||||
|
||||
typedef struct HBytes_ {
|
||||
const uint8_t *token;
|
||||
size_t len;
|
||||
} HBytes;
|
||||
|
||||
typedef struct HParsedToken_ {
|
||||
HTokenType token_type;
|
||||
union {
|
||||
struct {
|
||||
const uint8_t *token;
|
||||
size_t len;
|
||||
} bytes;
|
||||
HBytes bytes;
|
||||
int64_t sint;
|
||||
uint64_t uint;
|
||||
double dbl;
|
||||
|
|
@ -114,18 +121,76 @@ typedef struct HParser_ {
|
|||
void *env;
|
||||
} HParser;
|
||||
|
||||
// {{{ Stuff for benchmarking
|
||||
typedef struct HParserTestcase_ {
|
||||
unsigned char* input;
|
||||
size_t length;
|
||||
char* output_unambiguous;
|
||||
} HParserTestcase;
|
||||
|
||||
typedef struct HCaseResult_ {
|
||||
bool success;
|
||||
union {
|
||||
const char* actual_results; // on failure, filled in with the results of h_write_result_unamb
|
||||
size_t parse_time; // on success, filled in with time for a single parse, in nsec
|
||||
};
|
||||
} HCaseResult;
|
||||
|
||||
typedef struct HBackendResults_ {
|
||||
HParserBackend backend;
|
||||
bool compile_success;
|
||||
size_t n_testcases;
|
||||
size_t failed_testcases; // actually a count...
|
||||
HCaseResult *cases;
|
||||
} HBackendResults;
|
||||
|
||||
typedef struct HBenchmarkResults_ {
|
||||
size_t len;
|
||||
HBackendResults *results;
|
||||
} HBenchmarkResults;
|
||||
// }}}
|
||||
|
||||
// {{{ Preprocessor definitions
|
||||
#define HAMMER_FN_DECL_NOARG(rtype_t, name) \
|
||||
rtype_t name(void); \
|
||||
rtype_t name##__m(HAllocator* mm__)
|
||||
|
||||
#define HAMMER_FN_DECL(rtype_t, name, ...) \
|
||||
rtype_t name(__VA_ARGS__); \
|
||||
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__)
|
||||
|
||||
#define HAMMER_FN_DECL_ATTR(attr, rtype_t, name, ...) \
|
||||
rtype_t name(__VA_ARGS__) attr; \
|
||||
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__) attr
|
||||
|
||||
#define HAMMER_FN_DECL_VARARGS(rtype_t, name, ...) \
|
||||
rtype_t name(__VA_ARGS__, ...); \
|
||||
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...); \
|
||||
rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \
|
||||
rtype_t name##__v(__VA_ARGS__, va_list ap)
|
||||
|
||||
// Note: this drops the attributes on the floor for the __v versions
|
||||
#define HAMMER_FN_DECL_VARARGS_ATTR(attr, rtype_t, name, ...) \
|
||||
rtype_t name(__VA_ARGS__, ...) attr; \
|
||||
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...) attr; \
|
||||
rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \
|
||||
rtype_t name##__v(__VA_ARGS__, va_list ap)
|
||||
|
||||
// }}}
|
||||
|
||||
|
||||
/**
|
||||
* Top-level function to call a parser that has been built over some
|
||||
* piece of input (of known size).
|
||||
*/
|
||||
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length);
|
||||
HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* input, size_t length);
|
||||
|
||||
/**
|
||||
* Given a string, returns a parser that parses that string value.
|
||||
*
|
||||
* Result token type: TT_BYTES
|
||||
*/
|
||||
const HParser* h_token(const uint8_t *str, const size_t len);
|
||||
HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len);
|
||||
|
||||
/**
|
||||
* Given a single character, returns a parser that parses that
|
||||
|
|
@ -133,7 +198,7 @@ const HParser* h_token(const uint8_t *str, const size_t len);
|
|||
*
|
||||
* Result token type: TT_UINT
|
||||
*/
|
||||
const HParser* h_ch(const uint8_t c);
|
||||
HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c);
|
||||
|
||||
/**
|
||||
* Given two single-character bounds, lower and upper, returns a parser
|
||||
|
|
@ -142,14 +207,14 @@ const HParser* h_ch(const uint8_t c);
|
|||
*
|
||||
* Result token type: TT_UINT
|
||||
*/
|
||||
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper);
|
||||
HAMMER_FN_DECL(const HParser*, h_ch_range, const uint8_t lower, const uint8_t upper);
|
||||
|
||||
/**
|
||||
* Given an integer parser, p, and two integer bounds, lower and upper,
|
||||
* returns a parser that parses an integral value within the range
|
||||
* [lower, upper] (inclusive).
|
||||
*/
|
||||
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper);
|
||||
HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper);
|
||||
|
||||
/**
|
||||
* Returns a parser that parses the specified number of bits. sign ==
|
||||
|
|
@ -157,63 +222,63 @@ const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t
|
|||
*
|
||||
* Result token type: TT_SINT if sign == true, TT_UINT if sign == false
|
||||
*/
|
||||
const HParser* h_bits(size_t len, bool sign);
|
||||
HAMMER_FN_DECL(const HParser*, h_bits, size_t len, bool sign);
|
||||
|
||||
/**
|
||||
* Returns a parser that parses a signed 8-byte integer value.
|
||||
*
|
||||
* Result token type: TT_SINT
|
||||
*/
|
||||
const HParser* h_int64();
|
||||
HAMMER_FN_DECL_NOARG(const HParser*, h_int64);
|
||||
|
||||
/**
|
||||
* Returns a parser that parses a signed 4-byte integer value.
|
||||
*
|
||||
* Result token type: TT_SINT
|
||||
*/
|
||||
const HParser* h_int32();
|
||||
HAMMER_FN_DECL_NOARG(const HParser*, h_int32);
|
||||
|
||||
/**
|
||||
* Returns a parser that parses a signed 2-byte integer value.
|
||||
*
|
||||
* Result token type: TT_SINT
|
||||
*/
|
||||
const HParser* h_int16();
|
||||
HAMMER_FN_DECL_NOARG(const HParser*, h_int16);
|
||||
|
||||
/**
|
||||
* Returns a parser that parses a signed 1-byte integer value.
|
||||
*
|
||||
* Result token type: TT_SINT
|
||||
*/
|
||||
const HParser* h_int8();
|
||||
HAMMER_FN_DECL_NOARG(const HParser*, h_int8);
|
||||
|
||||
/**
|
||||
* Returns a parser that parses an unsigned 8-byte integer value.
|
||||
*
|
||||
* Result token type: TT_UINT
|
||||
*/
|
||||
const HParser* h_uint64();
|
||||
HAMMER_FN_DECL_NOARG(const HParser*, h_uint64);
|
||||
|
||||
/**
|
||||
* Returns a parser that parses an unsigned 4-byte integer value.
|
||||
*
|
||||
* Result token type: TT_UINT
|
||||
*/
|
||||
const HParser* h_uint32();
|
||||
HAMMER_FN_DECL_NOARG(const HParser*, h_uint32);
|
||||
|
||||
/**
|
||||
* Returns a parser that parses an unsigned 2-byte integer value.
|
||||
*
|
||||
* Result token type: TT_UINT
|
||||
*/
|
||||
const HParser* h_uint16();
|
||||
HAMMER_FN_DECL_NOARG(const HParser*, h_uint16);
|
||||
|
||||
/**
|
||||
* Returns a parser that parses an unsigned 1-byte integer value.
|
||||
*
|
||||
* Result token type: TT_UINT
|
||||
*/
|
||||
const HParser* h_uint8();
|
||||
HAMMER_FN_DECL_NOARG(const HParser*, h_uint8);
|
||||
|
||||
/**
|
||||
* Given another parser, p, returns a parser that skips any whitespace
|
||||
|
|
@ -221,7 +286,7 @@ const HParser* h_uint8();
|
|||
*
|
||||
* Result token type: p's result type
|
||||
*/
|
||||
const HParser* h_whitespace(const HParser* p);
|
||||
HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p);
|
||||
|
||||
/**
|
||||
* Given two parsers, p and q, returns a parser that parses them in
|
||||
|
|
@ -229,7 +294,7 @@ const HParser* h_whitespace(const HParser* p);
|
|||
*
|
||||
* Result token type: p's result type
|
||||
*/
|
||||
const HParser* h_left(const HParser* p, const HParser* q);
|
||||
HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q);
|
||||
|
||||
/**
|
||||
* Given two parsers, p and q, returns a parser that parses them in
|
||||
|
|
@ -237,7 +302,7 @@ const HParser* h_left(const HParser* p, const HParser* q);
|
|||
*
|
||||
* Result token type: q's result type
|
||||
*/
|
||||
const HParser* h_right(const HParser* p, const HParser* q);
|
||||
HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q);
|
||||
|
||||
/**
|
||||
* Given three parsers, p, x, and q, returns a parser that parses them in
|
||||
|
|
@ -245,7 +310,7 @@ const HParser* h_right(const HParser* p, const HParser* q);
|
|||
*
|
||||
* Result token type: x's result type
|
||||
*/
|
||||
const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q);
|
||||
HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q);
|
||||
|
||||
/**
|
||||
* Given another parser, p, and a function f, returns a parser that
|
||||
|
|
@ -253,21 +318,21 @@ const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q);
|
|||
*
|
||||
* Result token type: any
|
||||
*/
|
||||
const HParser* h_action(const HParser* p, const HAction a);
|
||||
HAMMER_FN_DECL(const HParser*, h_action, const HParser* p, const HAction a);
|
||||
|
||||
/**
|
||||
* Parse a single character in the given charset.
|
||||
*
|
||||
* Result token type: TT_UINT
|
||||
*/
|
||||
const HParser* h_in(const uint8_t *charset, size_t length);
|
||||
HAMMER_FN_DECL(const HParser*, h_in, const uint8_t *charset, size_t length);
|
||||
|
||||
/**
|
||||
* Parse a single character *NOT* in the given charset.
|
||||
*
|
||||
* Result token type: TT_UINT
|
||||
*/
|
||||
const HParser* h_not_in(const uint8_t *charset, size_t length);
|
||||
HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length);
|
||||
|
||||
/**
|
||||
* A no-argument parser that succeeds if there is no more input to
|
||||
|
|
@ -275,14 +340,14 @@ const HParser* h_not_in(const uint8_t *charset, size_t length);
|
|||
*
|
||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||
*/
|
||||
const HParser* h_end_p();
|
||||
HAMMER_FN_DECL_NOARG(const HParser*, h_end_p);
|
||||
|
||||
/**
|
||||
* This parser always fails.
|
||||
*
|
||||
* Result token type: NULL. Always.
|
||||
*/
|
||||
const HParser* h_nothing_p();
|
||||
HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p);
|
||||
|
||||
/**
|
||||
* Given a null-terminated list of parsers, apply each parser in order.
|
||||
|
|
@ -290,7 +355,7 @@ const HParser* h_nothing_p();
|
|||
*
|
||||
* Result token type: TT_SEQUENCE
|
||||
*/
|
||||
const HParser* h_sequence(const HParser* p, ...) __attribute__((sentinel));
|
||||
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequence, const HParser* p);
|
||||
|
||||
/**
|
||||
* Given an array of parsers, p_array, apply each parser in order. The
|
||||
|
|
@ -299,7 +364,7 @@ const HParser* h_sequence(const HParser* p, ...) __attribute__((sentinel));
|
|||
*
|
||||
* Result token type: The type of the first successful parser's result.
|
||||
*/
|
||||
const HParser* h_choice(const HParser* p, ...) __attribute__((sentinel));
|
||||
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice, const HParser* p);
|
||||
|
||||
/**
|
||||
* Given two parsers, p1 and p2, this parser succeeds in the following
|
||||
|
|
@ -309,7 +374,7 @@ const HParser* h_choice(const HParser* p, ...) __attribute__((sentinel));
|
|||
*
|
||||
* Result token type: p1's result type.
|
||||
*/
|
||||
const HParser* h_butnot(const HParser* p1, const HParser* p2);
|
||||
HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2);
|
||||
|
||||
/**
|
||||
* Given two parsers, p1 and p2, this parser succeeds in the following
|
||||
|
|
@ -319,7 +384,7 @@ const HParser* h_butnot(const HParser* p1, const HParser* p2);
|
|||
*
|
||||
* Result token type: p1's result type.
|
||||
*/
|
||||
const HParser* h_difference(const HParser* p1, const HParser* p2);
|
||||
HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p2);
|
||||
|
||||
/**
|
||||
* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or
|
||||
|
|
@ -327,7 +392,7 @@ const HParser* h_difference(const HParser* p1, const HParser* p2);
|
|||
*
|
||||
* Result token type: The type of the result of whichever parser succeeded.
|
||||
*/
|
||||
const HParser* h_xor(const HParser* p1, const HParser* p2);
|
||||
HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2);
|
||||
|
||||
/**
|
||||
* Given a parser, p, this parser succeeds for zero or more repetitions
|
||||
|
|
@ -335,7 +400,7 @@ const HParser* h_xor(const HParser* p1, const HParser* p2);
|
|||
*
|
||||
* Result token type: TT_SEQUENCE
|
||||
*/
|
||||
const HParser* h_many(const HParser* p);
|
||||
HAMMER_FN_DECL(const HParser*, h_many, const HParser* p);
|
||||
|
||||
/**
|
||||
* Given a parser, p, this parser succeeds for one or more repetitions
|
||||
|
|
@ -343,7 +408,7 @@ const HParser* h_many(const HParser* p);
|
|||
*
|
||||
* Result token type: TT_SEQUENCE
|
||||
*/
|
||||
const HParser* h_many1(const HParser* p);
|
||||
HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p);
|
||||
|
||||
/**
|
||||
* Given a parser, p, this parser succeeds for exactly N repetitions
|
||||
|
|
@ -351,7 +416,7 @@ const HParser* h_many1(const HParser* p);
|
|||
*
|
||||
* Result token type: TT_SEQUENCE
|
||||
*/
|
||||
const HParser* h_repeat_n(const HParser* p, const size_t n);
|
||||
HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n);
|
||||
|
||||
/**
|
||||
* Given a parser, p, this parser succeeds with the value p parsed or
|
||||
|
|
@ -359,7 +424,7 @@ const HParser* h_repeat_n(const HParser* p, const size_t n);
|
|||
*
|
||||
* Result token type: If p succeeded, the type of its result; if not, TT_NONE.
|
||||
*/
|
||||
const HParser* h_optional(const HParser* p);
|
||||
HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p);
|
||||
|
||||
/**
|
||||
* Given a parser, p, this parser succeeds if p succeeds, but doesn't
|
||||
|
|
@ -367,7 +432,7 @@ const HParser* h_optional(const HParser* p);
|
|||
*
|
||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||
*/
|
||||
const HParser* h_ignore(const HParser* p);
|
||||
HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p);
|
||||
|
||||
/**
|
||||
* Given a parser, p, and a parser for a separator, sep, this parser
|
||||
|
|
@ -378,7 +443,7 @@ const HParser* h_ignore(const HParser* p);
|
|||
*
|
||||
* Result token type: TT_SEQUENCE
|
||||
*/
|
||||
const HParser* h_sepBy(const HParser* p, const HParser* sep);
|
||||
HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep);
|
||||
|
||||
/**
|
||||
* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
|
||||
|
|
@ -386,14 +451,14 @@ const HParser* h_sepBy(const HParser* p, const HParser* sep);
|
|||
*
|
||||
* Result token type: TT_SEQUENCE
|
||||
*/
|
||||
const HParser* h_sepBy1(const HParser* p, const HParser* sep);
|
||||
HAMMER_FN_DECL(const HParser*, h_sepBy1, const HParser* p, const HParser* sep);
|
||||
|
||||
/**
|
||||
* This parser always returns a zero length match, i.e., empty string.
|
||||
*
|
||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||
*/
|
||||
const HParser* h_epsilon_p();
|
||||
HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p);
|
||||
|
||||
/**
|
||||
* This parser applies its first argument to read an unsigned integer
|
||||
|
|
@ -404,7 +469,7 @@ const HParser* h_epsilon_p();
|
|||
*
|
||||
* Result token type: TT_SEQUENCE
|
||||
*/
|
||||
const HParser* h_length_value(const HParser* length, const HParser* value);
|
||||
HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HParser* value);
|
||||
|
||||
/**
|
||||
* This parser attaches a predicate function, which returns true or
|
||||
|
|
@ -419,7 +484,7 @@ const HParser* h_length_value(const HParser* length, const HParser* value);
|
|||
*
|
||||
* Result token type: p's result type if pred succeeded, NULL otherwise.
|
||||
*/
|
||||
const HParser* h_attr_bool(const HParser* p, HPredicate pred);
|
||||
HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred);
|
||||
|
||||
/**
|
||||
* The 'and' parser asserts that a conditional syntax is satisfied,
|
||||
|
|
@ -436,7 +501,7 @@ const HParser* h_attr_bool(const HParser* p, HPredicate pred);
|
|||
*
|
||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||
*/
|
||||
const HParser* h_and(const HParser* p);
|
||||
HAMMER_FN_DECL(const HParser*, h_and, const HParser* p);
|
||||
|
||||
/**
|
||||
* The 'not' parser asserts that a conditional syntax is *not*
|
||||
|
|
@ -456,7 +521,7 @@ const HParser* h_and(const HParser* p);
|
|||
*
|
||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||
*/
|
||||
const HParser* h_not(const HParser* p);
|
||||
HAMMER_FN_DECL(const HParser*, h_not, const HParser* p);
|
||||
|
||||
/**
|
||||
* Create a parser that just calls out to another, as yet unknown,
|
||||
|
|
@ -467,35 +532,44 @@ const HParser* h_not(const HParser* p);
|
|||
* Result token type: the type of whatever parser is bound to it with
|
||||
* bind_indirect().
|
||||
*/
|
||||
HParser *h_indirect();
|
||||
HAMMER_FN_DECL_NOARG(HParser*, h_indirect);
|
||||
|
||||
/**
|
||||
* Set the inner parser of an indirect. See comments on indirect for
|
||||
* details.
|
||||
*/
|
||||
void h_bind_indirect(HParser* indirect, const HParser* inner);
|
||||
HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner);
|
||||
|
||||
/**
|
||||
* Free the memory allocated to an HParseResult when it is no longer needed.
|
||||
*/
|
||||
void h_parse_result_free(HParseResult *result);
|
||||
HAMMER_FN_DECL(void, h_parse_result_free, HParseResult *result);
|
||||
|
||||
// Some debugging aids
|
||||
/**
|
||||
* Format token into a compact unambiguous form. Useful for parser test cases.
|
||||
* Caller is responsible for freeing the result.
|
||||
*/
|
||||
char* h_write_result_unamb(const HParsedToken* tok);
|
||||
HAMMER_FN_DECL(char*, h_write_result_unamb, const HParsedToken* tok);
|
||||
/**
|
||||
* Format token to the given output stream. Indent starting at
|
||||
* [indent] spaces, with [delta] spaces between levels.
|
||||
*/
|
||||
void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta);
|
||||
HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent, int delta);
|
||||
|
||||
/**
|
||||
* Build parse tables for the given parser backend. See the
|
||||
* documentation for the parser backend in question for information
|
||||
* about the [params] parameter, or just pass in NULL for the defaults.
|
||||
*
|
||||
* Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
|
||||
*/
|
||||
HAMMER_FN_DECL(int, h_compile, const HParser* parser, HParserBackend backend, const void* params);
|
||||
|
||||
/**
|
||||
* TODO: Document me
|
||||
*/
|
||||
HBitWriter *h_bit_writer_new(void);
|
||||
HBitWriter *h_bit_writer_new(HAllocator* mm__);
|
||||
|
||||
/**
|
||||
* TODO: Document me
|
||||
|
|
@ -507,11 +581,17 @@ void h_bit_writer_put(HBitWriter* w, unsigned long long data, size_t nbits);
|
|||
* Must not free [w] until you're done with the result.
|
||||
* [len] is in bytes.
|
||||
*/
|
||||
const uint8_t *h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
|
||||
const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
|
||||
|
||||
/**
|
||||
* TODO: Document me
|
||||
*/
|
||||
void h_bit_writer_free(HBitWriter* w);
|
||||
|
||||
// {{{ Benchmark functions
|
||||
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, const HParser* parser, HParserTestcase* testcases);
|
||||
void h_benchmark_report(FILE* stream, HBenchmarkResults* results);
|
||||
void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results);
|
||||
// }}}
|
||||
|
||||
#endif // #ifndef HAMMER_HAMMER__H
|
||||
|
|
|
|||
114
src/internal.h
114
src/internal.h
|
|
@ -17,7 +17,6 @@
|
|||
|
||||
#ifndef HAMMER_INTERNAL__H
|
||||
#define HAMMER_INTERNAL__H
|
||||
#include <glib.h>
|
||||
#include <err.h>
|
||||
#include "hammer.h"
|
||||
|
||||
|
|
@ -29,9 +28,28 @@
|
|||
errx(1, "Assertion failed (programmer error): %s", message); \
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
#define HAMMER_FN_IMPL_NOARGS(rtype_t, name) \
|
||||
rtype_t name(void) { \
|
||||
return name##__m(system_allocator); \
|
||||
} \
|
||||
rtype_t name##__m(HAllocator* mm__)
|
||||
// Functions with arguments are difficult to forward cleanly. Alas, we will need to forward them manually.
|
||||
|
||||
#define h_new(type, count) ((type*)(mm__->alloc(mm__, sizeof(type)*(count))))
|
||||
#define h_free(addr) (mm__->free(mm__, (addr)))
|
||||
|
||||
#define false 0
|
||||
#define true 1
|
||||
|
||||
// This is going to be generally useful.
|
||||
static inline void h_generic_free(HAllocator *allocator, void* ptr) {
|
||||
allocator->free(allocator, ptr);
|
||||
}
|
||||
|
||||
HAllocator system_allocator;
|
||||
|
||||
|
||||
typedef struct HInputStream_ {
|
||||
// This should be considered to be a really big value type.
|
||||
const uint8_t *input;
|
||||
|
|
@ -42,6 +60,36 @@ typedef struct HInputStream_ {
|
|||
char overrun;
|
||||
} HInputStream;
|
||||
|
||||
typedef struct HSlistNode_ {
|
||||
void* elem;
|
||||
struct HSlistNode_ *next;
|
||||
} HSlistNode;
|
||||
|
||||
typedef struct HSlist_ {
|
||||
HSlistNode *head;
|
||||
struct HArena_ *arena;
|
||||
} HSlist;
|
||||
|
||||
typedef unsigned int HHashValue;
|
||||
typedef HHashValue (*HHashFunc)(const void* key);
|
||||
typedef bool (*HEqualFunc)(const void* key1, const void* key2);
|
||||
|
||||
typedef struct HHashTableEntry_ {
|
||||
struct HHashTableEntry_ *next;
|
||||
void* key;
|
||||
void* value;
|
||||
HHashValue hashval;
|
||||
} HHashTableEntry;
|
||||
|
||||
typedef struct HHashTable_ {
|
||||
HHashTableEntry *contents;
|
||||
HHashFunc hashFunc;
|
||||
HEqualFunc equalFunc;
|
||||
size_t capacity;
|
||||
size_t used;
|
||||
HArena *arena;
|
||||
} HHashTable;
|
||||
|
||||
/* The state of the parser.
|
||||
*
|
||||
* Members:
|
||||
|
|
@ -54,13 +102,19 @@ typedef struct HInputStream_ {
|
|||
*/
|
||||
|
||||
struct HParseState_ {
|
||||
GHashTable *cache;
|
||||
HHashTable *cache;
|
||||
HInputStream input_stream;
|
||||
HArena * arena;
|
||||
GQueue *lr_stack;
|
||||
GHashTable *recursion_heads;
|
||||
HSlist *lr_stack;
|
||||
HHashTable *recursion_heads;
|
||||
};
|
||||
|
||||
typedef struct HParserBackendVTable_ {
|
||||
int (*compile)(HAllocator *mm__, const HParser* parser, const void* params);
|
||||
HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HParseState* parse_state);
|
||||
} HParserBackendVTable;
|
||||
|
||||
|
||||
/* The (location, parser) tuple used to key the cache.
|
||||
*/
|
||||
|
||||
|
|
@ -90,8 +144,8 @@ typedef enum HParserCacheValueType_ {
|
|||
*/
|
||||
typedef struct HRecursionHead_ {
|
||||
const HParser *head_parser;
|
||||
GSList *involved_set;
|
||||
GSList *eval_set;
|
||||
HSlist *involved_set;
|
||||
HSlist *eval_set;
|
||||
} HRecursionHead;
|
||||
|
||||
|
||||
|
|
@ -125,23 +179,23 @@ typedef struct HParserCacheValue_t {
|
|||
};
|
||||
} HParserCacheValue;
|
||||
|
||||
typedef unsigned int *HCharset;
|
||||
// This file provides the logical inverse of bitreader.c
|
||||
struct HBitWriter_ {
|
||||
uint8_t* buf;
|
||||
HAllocator *mm__;
|
||||
size_t index;
|
||||
size_t capacity;
|
||||
char bit_offset; // unlike in bit_reader, this is always the number
|
||||
// of used bits in the current byte. i.e., 0 always
|
||||
// means that 8 bits are available for use.
|
||||
char flags;
|
||||
};
|
||||
|
||||
static inline HCharset new_charset() {
|
||||
HCharset cs = g_new0(unsigned int, 256 / sizeof(unsigned int));
|
||||
return cs;
|
||||
}
|
||||
// }}}
|
||||
|
||||
static inline int charset_isset(HCharset cs, uint8_t pos) {
|
||||
return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs))));
|
||||
}
|
||||
|
||||
static inline void charset_set(HCharset cs, uint8_t pos, int val) {
|
||||
cs[pos / sizeof(*cs)] =
|
||||
val
|
||||
? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs)))
|
||||
: cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs)));
|
||||
}
|
||||
// Backends {{{
|
||||
extern HParserBackendVTable h__packrat_backend_vtable;
|
||||
// }}}
|
||||
|
||||
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
|
||||
|
||||
|
|
@ -154,10 +208,24 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size);
|
|||
HCountedArray *h_carray_new(HArena * arena);
|
||||
void h_carray_append(HCountedArray *array, void* item);
|
||||
|
||||
HSlist* h_slist_new(HArena *arena);
|
||||
HSlist* h_slist_copy(HSlist *slist);
|
||||
void* h_slist_pop(HSlist *slist);
|
||||
void h_slist_push(HSlist *slist, void* item);
|
||||
bool h_slist_find(HSlist *slist, const void* item);
|
||||
HSlist* h_slist_remove_all(HSlist *slist, const void* item);
|
||||
void h_slist_free(HSlist *slist);
|
||||
|
||||
HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc);
|
||||
void* h_hashtable_get(HHashTable* ht, void* key);
|
||||
void h_hashtable_put(HHashTable* ht, void* key, void* value);
|
||||
int h_hashtable_present(HHashTable* ht, void* key);
|
||||
void h_hashtable_del(HHashTable* ht, void* key);
|
||||
void h_hashtable_free(HHashTable* ht);
|
||||
|
||||
#if 0
|
||||
#include <malloc.h>
|
||||
#define arena_malloc(a, s) malloc(s)
|
||||
#include <stdlib.h>
|
||||
#define h_arena_malloc(a, s) malloc(s)
|
||||
#endif
|
||||
|
||||
#endif // #ifndef HAMMER_INTERNAL__H
|
||||
|
|
|
|||
|
|
@ -23,10 +23,14 @@ static const HParserVtable action_vt = {
|
|||
.parse = parse_action,
|
||||
};
|
||||
|
||||
const HParser* h_action(const HParser* p, const HAction a) {
|
||||
HParser *res = g_new(HParser, 1);
|
||||
const HParser* h_action(const HParser* p, const HAction a) {
|
||||
return h_action__m(&system_allocator, p, a);
|
||||
}
|
||||
|
||||
const HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) {
|
||||
HParser *res = h_new(HParser, 1);
|
||||
res->vtable = &action_vt;
|
||||
HParseAction *env = g_new(HParseAction, 1);
|
||||
HParseAction *env = h_new(HParseAction, 1);
|
||||
env->p = p;
|
||||
env->action = a;
|
||||
res->env = (void*)env;
|
||||
|
|
|
|||
|
|
@ -13,9 +13,13 @@ static const HParserVtable and_vt = {
|
|||
.parse = parse_and,
|
||||
};
|
||||
|
||||
|
||||
const HParser* h_and(const HParser* p) {
|
||||
return h_and__m(&system_allocator, p);
|
||||
}
|
||||
const HParser* h_and__m(HAllocator* mm__, const HParser* p) {
|
||||
// zero-width postive lookahead
|
||||
HParser *res = g_new(HParser, 1);
|
||||
HParser *res = h_new(HParser, 1);
|
||||
res->env = (void*)p;
|
||||
res->vtable = &and_vt;
|
||||
return res;
|
||||
|
|
|
|||
|
|
@ -21,10 +21,14 @@ static const HParserVtable attr_bool_vt = {
|
|||
.parse = parse_attr_bool,
|
||||
};
|
||||
|
||||
const HParser* h_attr_bool(const HParser* p, HPredicate pred) {
|
||||
HParser *res = g_new(HParser, 1);
|
||||
|
||||
const HParser* h_attr_bool(const HParser* p, HPredicate pred) {
|
||||
return h_attr_bool__m(&system_allocator, p, pred);
|
||||
}
|
||||
const HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) {
|
||||
HParser *res = h_new(HParser, 1);
|
||||
res->vtable = &attr_bool_vt;
|
||||
HAttrBool *env = g_new(HAttrBool, 1);
|
||||
HAttrBool *env = h_new(HAttrBool, 1);
|
||||
env->p = p;
|
||||
env->pred = pred;
|
||||
res->env = (void*)env;
|
||||
|
|
|
|||
|
|
@ -20,18 +20,24 @@ static const HParserVtable bits_vt = {
|
|||
.parse = parse_bits,
|
||||
};
|
||||
const HParser* h_bits(size_t len, bool sign) {
|
||||
struct bits_env *env = g_new(struct bits_env, 1);
|
||||
return h_bits__m(&system_allocator, len, sign);
|
||||
}
|
||||
const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
|
||||
struct bits_env *env = h_new(struct bits_env, 1);
|
||||
env->length = len;
|
||||
env->signedp = sign;
|
||||
HParser *res = g_new(HParser, 1);
|
||||
HParser *res = h_new(HParser, 1);
|
||||
res->vtable = &bits_vt;
|
||||
res->env = env;
|
||||
return res;
|
||||
}
|
||||
|
||||
#define SIZED_BITS(name_pre, len, signedp) \
|
||||
const HParser* h_##name_pre##len () { \
|
||||
return h_bits(len, signedp); \
|
||||
const HParser* h_##name_pre##len () { \
|
||||
return h_bits__m(&system_allocator, len, signedp); \
|
||||
} \
|
||||
const HParser* h_##name_pre##len##__m(HAllocator* mm__) { \
|
||||
return h_bits__m(mm__, len, signedp); \
|
||||
}
|
||||
SIZED_BITS(int, 8, true)
|
||||
SIZED_BITS(int, 16, true)
|
||||
|
|
|
|||
|
|
@ -39,10 +39,13 @@ static const HParserVtable butnot_vt = {
|
|||
.parse = parse_butnot,
|
||||
};
|
||||
|
||||
const HParser* h_butnot(const HParser* p1, const HParser* p2) {
|
||||
HTwoParsers *env = g_new(HTwoParsers, 1);
|
||||
const HParser* h_butnot(const HParser* p1, const HParser* p2) {
|
||||
return h_butnot__m(&system_allocator, p1, p2);
|
||||
}
|
||||
const HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
||||
HTwoParsers *env = h_new(HTwoParsers, 1);
|
||||
env->p1 = p1; env->p2 = p2;
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &butnot_vt; ret->env = (void*)env;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include "parser_internal.h"
|
||||
|
||||
static HParseResult* parse_ch(void* env, HParseState *state) {
|
||||
uint8_t c = (uint8_t)GPOINTER_TO_UINT(env);
|
||||
uint8_t c = (uint8_t)(unsigned long)(env);
|
||||
uint8_t r = (uint8_t)h_read_bits(&state->input_stream, 8, false);
|
||||
if (c == r) {
|
||||
HParsedToken *tok = a_new(HParsedToken, 1);
|
||||
|
|
@ -15,9 +15,13 @@ static HParseResult* parse_ch(void* env, HParseState *state) {
|
|||
static const HParserVtable ch_vt = {
|
||||
.parse = parse_ch,
|
||||
};
|
||||
const HParser* h_ch(const uint8_t c) {
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
|
||||
const HParser* h_ch(const uint8_t c) {
|
||||
return h_ch__m(&system_allocator, c);
|
||||
}
|
||||
const HParser* h_ch__m(HAllocator* mm__, const uint8_t c) {
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &ch_vt;
|
||||
ret->env = GUINT_TO_POINTER(c);
|
||||
ret->env = (void*)(unsigned long)(c);
|
||||
return (const HParser*)ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,24 @@
|
|||
#include <string.h>
|
||||
#include "parser_internal.h"
|
||||
|
||||
typedef unsigned int *HCharset;
|
||||
|
||||
static inline HCharset new_charset(HAllocator* mm__) {
|
||||
HCharset cs = h_new(unsigned int, 256 / sizeof(unsigned int));
|
||||
memset(cs, 0, 256);
|
||||
return cs;
|
||||
}
|
||||
|
||||
static inline int charset_isset(HCharset cs, uint8_t pos) {
|
||||
return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs))));
|
||||
}
|
||||
|
||||
static inline void charset_set(HCharset cs, uint8_t pos, int val) {
|
||||
cs[pos / sizeof(*cs)] =
|
||||
val
|
||||
? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs)))
|
||||
: cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs)));
|
||||
}
|
||||
|
||||
static HParseResult* parse_charset(void *env, HParseState *state) {
|
||||
uint8_t in = h_read_bits(&state->input_stream, 8, false);
|
||||
|
|
@ -18,8 +37,11 @@ static const HParserVtable charset_vt = {
|
|||
};
|
||||
|
||||
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HCharset cs = new_charset();
|
||||
return h_ch_range__m(&system_allocator, lower, upper);
|
||||
}
|
||||
const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) {
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
HCharset cs = new_charset(mm__);
|
||||
for (int i = 0; i < 256; i++)
|
||||
charset_set(cs, i, (lower <= i) && (i <= upper));
|
||||
ret->vtable = &charset_vt;
|
||||
|
|
@ -28,9 +50,9 @@ const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
|
|||
}
|
||||
|
||||
|
||||
const HParser* h_in_or_not(const uint8_t *options, size_t count, int val) {
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HCharset cs = new_charset();
|
||||
static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) {
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
HCharset cs = new_charset(mm__);
|
||||
for (size_t i = 0; i < 256; i++)
|
||||
charset_set(cs, i, 1-val);
|
||||
for (size_t i = 0; i < count; i++)
|
||||
|
|
@ -42,10 +64,18 @@ const HParser* h_in_or_not(const uint8_t *options, size_t count, int val) {
|
|||
}
|
||||
|
||||
const HParser* h_in(const uint8_t *options, size_t count) {
|
||||
return h_in_or_not(options, count, 1);
|
||||
return h_in_or_not__m(&system_allocator, options, count, 1);
|
||||
}
|
||||
|
||||
const HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
|
||||
return h_in_or_not__m(mm__, options, count, 1);
|
||||
}
|
||||
|
||||
const HParser* h_not_in(const uint8_t *options, size_t count) {
|
||||
return h_in_or_not(options, count, 0);
|
||||
return h_in_or_not__m(&system_allocator, options, count, 0);
|
||||
}
|
||||
|
||||
const HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
|
||||
return h_in_or_not__m(mm__, options, count, 0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include <stdarg.h>
|
||||
#include "parser_internal.h"
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -25,20 +26,40 @@ static const HParserVtable choice_vt = {
|
|||
};
|
||||
|
||||
const HParser* h_choice(const HParser* p, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, p);
|
||||
const HParser* ret = h_choice__mv(&system_allocator, p, ap);
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, p);
|
||||
const HParser* ret = h_choice__mv(mm__, p, ap);
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const HParser* h_choice__v(const HParser* p, va_list ap) {
|
||||
return h_choice__mv(&system_allocator, p, ap);
|
||||
}
|
||||
|
||||
const HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) {
|
||||
va_list ap;
|
||||
size_t len = 0;
|
||||
HSequence *s = g_new(HSequence, 1);
|
||||
HSequence *s = h_new(HSequence, 1);
|
||||
|
||||
const HParser *arg;
|
||||
va_start(ap, p);
|
||||
va_copy(ap, ap_);
|
||||
do {
|
||||
len++;
|
||||
arg = va_arg(ap, const HParser *);
|
||||
} while (arg);
|
||||
va_end(ap);
|
||||
s->p_array = g_new(const HParser *, len);
|
||||
s->p_array = h_new(const HParser *, len);
|
||||
|
||||
va_start(ap, p);
|
||||
va_copy(ap, ap_);
|
||||
s->p_array[0] = p;
|
||||
for (size_t i = 1; i < len; i++) {
|
||||
s->p_array[i] = va_arg(ap, const HParser *);
|
||||
|
|
@ -46,7 +67,7 @@ const HParser* h_choice(const HParser* p, ...) {
|
|||
va_end(ap);
|
||||
|
||||
s->len = len;
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &choice_vt; ret->env = (void*)s;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,10 +38,13 @@ static HParserVtable difference_vt = {
|
|||
.parse = parse_difference,
|
||||
};
|
||||
|
||||
const HParser* h_difference(const HParser* p1, const HParser* p2) {
|
||||
HTwoParsers *env = g_new(HTwoParsers, 1);
|
||||
const HParser* h_difference(const HParser* p1, const HParser* p2) {
|
||||
return h_difference__m(&system_allocator, p1, p2);
|
||||
}
|
||||
const HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
||||
HTwoParsers *env = h_new(HTwoParsers, 1);
|
||||
env->p1 = p1; env->p2 = p2;
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &difference_vt; ret->env = (void*)env;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,8 +14,13 @@ static const HParserVtable end_vt = {
|
|||
.parse = parse_end,
|
||||
};
|
||||
|
||||
const HParser* h_end_p() {
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
ret->vtable = &end_vt; ret->env = NULL;
|
||||
const HParser* h_end_p() {
|
||||
return h_end_p__m(&system_allocator);
|
||||
}
|
||||
|
||||
const HParser* h_end_p__m(HAllocator* mm__) {
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &end_vt;
|
||||
ret->env = NULL;
|
||||
return (const HParser*)ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,3 +20,6 @@ static const HParser epsilon_p = {
|
|||
const HParser* h_epsilon_p() {
|
||||
return &epsilon_p;
|
||||
}
|
||||
const HParser* h_epsilon_p__m(HAllocator* mm__) {
|
||||
return &epsilon_p;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,10 @@ static const HParserVtable ignore_vt = {
|
|||
};
|
||||
|
||||
const HParser* h_ignore(const HParser* p) {
|
||||
HParser* ret = g_new(HParser, 1);
|
||||
return h_ignore__m(&system_allocator, p);
|
||||
}
|
||||
const HParser* h_ignore__m(HAllocator* mm__, const HParser* p) {
|
||||
HParser* ret = h_new(HParser, 1);
|
||||
ret->vtable = &ignore_vt;
|
||||
ret->env = (void*)p;
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -35,38 +35,48 @@ static const HParserVtable ignoreseq_vt = {
|
|||
// API frontends
|
||||
//
|
||||
|
||||
static const HParser* h_leftright(const HParser* p, const HParser* q, size_t which) {
|
||||
HIgnoreSeq *seq = g_new(HIgnoreSeq, 1);
|
||||
seq->parsers = g_new(const HParser*, 2);
|
||||
static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) {
|
||||
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
|
||||
seq->parsers = h_new(const HParser*, 2);
|
||||
seq->parsers[0] = p;
|
||||
seq->parsers[1] = q;
|
||||
seq->count = 2;
|
||||
seq->which = which;
|
||||
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &ignoreseq_vt;
|
||||
ret->env = (void*)seq;
|
||||
return ret;
|
||||
}
|
||||
|
||||
const HParser* h_left(const HParser* p, const HParser* q) {
|
||||
return h_leftright(p, q, 0);
|
||||
return h_leftright__m(&system_allocator, p, q, 0);
|
||||
}
|
||||
const HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) {
|
||||
return h_leftright__m(mm__, p, q, 0);
|
||||
}
|
||||
|
||||
const HParser* h_right(const HParser* p, const HParser* q) {
|
||||
return h_leftright(p, q, 1);
|
||||
return h_leftright__m(&system_allocator, p, q, 1);
|
||||
}
|
||||
const HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) {
|
||||
return h_leftright__m(mm__, p, q, 1);
|
||||
}
|
||||
|
||||
|
||||
const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) {
|
||||
HIgnoreSeq *seq = g_new(HIgnoreSeq, 1);
|
||||
seq->parsers = g_new(const HParser*, 3);
|
||||
return h_middle__m(&system_allocator, p, x, q);
|
||||
}
|
||||
const HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) {
|
||||
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
|
||||
seq->parsers = h_new(const HParser*, 3);
|
||||
seq->parsers[0] = p;
|
||||
seq->parsers[1] = x;
|
||||
seq->parsers[2] = q;
|
||||
seq->count = 3;
|
||||
seq->which = 1;
|
||||
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &ignoreseq_vt;
|
||||
ret->env = (void*)seq;
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -13,7 +13,10 @@ void h_bind_indirect(HParser* indirect, const HParser* inner) {
|
|||
}
|
||||
|
||||
HParser* h_indirect() {
|
||||
HParser *res = g_new(HParser, 1);
|
||||
return h_indirect__m(&system_allocator);
|
||||
}
|
||||
HParser* h_indirect__m(HAllocator* mm__) {
|
||||
HParser *res = h_new(HParser, 1);
|
||||
res->vtable = &indirect_vt;
|
||||
res->env = NULL;
|
||||
return res;
|
||||
|
|
|
|||
|
|
@ -33,6 +33,9 @@ static const HParserVtable int_range_vt = {
|
|||
};
|
||||
|
||||
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
|
||||
return h_int_range__m(&system_allocator, p, lower, upper);
|
||||
}
|
||||
const HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) {
|
||||
// p must be an integer parser, which means it's using parse_bits
|
||||
// TODO: re-add this check
|
||||
//assert_message(p->vtable == &bits_vt, "int_range requires an integer parser");
|
||||
|
|
@ -40,11 +43,11 @@ const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t
|
|||
// and regardless, the bounds need to fit in the parser in question
|
||||
// TODO: check this as well.
|
||||
|
||||
HRange *r_env = g_new(HRange, 1);
|
||||
HRange *r_env = h_new(HRange, 1);
|
||||
r_env->p = p;
|
||||
r_env->lower = lower;
|
||||
r_env->upper = upper;
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &int_range_vt;
|
||||
ret->env = (void*)r_env;
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -49,10 +49,13 @@ static const HParserVtable many_vt = {
|
|||
};
|
||||
|
||||
const HParser* h_many(const HParser* p) {
|
||||
HParser *res = g_new(HParser, 1);
|
||||
HRepeat *env = g_new(HRepeat, 1);
|
||||
return h_many__m(&system_allocator, p);
|
||||
}
|
||||
const HParser* h_many__m(HAllocator* mm__, const HParser* p) {
|
||||
HParser *res = h_new(HParser, 1);
|
||||
HRepeat *env = h_new(HRepeat, 1);
|
||||
env->p = p;
|
||||
env->sep = h_epsilon_p();
|
||||
env->sep = h_epsilon_p__m(mm__);
|
||||
env->count = 0;
|
||||
env->min_p = true;
|
||||
res->vtable = &many_vt;
|
||||
|
|
@ -61,10 +64,13 @@ const HParser* h_many(const HParser* p) {
|
|||
}
|
||||
|
||||
const HParser* h_many1(const HParser* p) {
|
||||
HParser *res = g_new(HParser, 1);
|
||||
HRepeat *env = g_new(HRepeat, 1);
|
||||
return h_many1__m(&system_allocator, p);
|
||||
}
|
||||
const HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
|
||||
HParser *res = h_new(HParser, 1);
|
||||
HRepeat *env = h_new(HRepeat, 1);
|
||||
env->p = p;
|
||||
env->sep = h_epsilon_p();
|
||||
env->sep = h_epsilon_p__m(mm__);
|
||||
env->count = 1;
|
||||
env->min_p = true;
|
||||
res->vtable = &many_vt;
|
||||
|
|
@ -73,10 +79,13 @@ const HParser* h_many1(const HParser* p) {
|
|||
}
|
||||
|
||||
const HParser* h_repeat_n(const HParser* p, const size_t n) {
|
||||
HParser *res = g_new(HParser, 1);
|
||||
HRepeat *env = g_new(HRepeat, 1);
|
||||
return h_repeat_n__m(&system_allocator, p, n);
|
||||
}
|
||||
const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
|
||||
HParser *res = h_new(HParser, 1);
|
||||
HRepeat *env = h_new(HRepeat, 1);
|
||||
env->p = p;
|
||||
env->sep = h_epsilon_p();
|
||||
env->sep = h_epsilon_p__m(mm__);
|
||||
env->count = n;
|
||||
env->min_p = false;
|
||||
res->vtable = &many_vt;
|
||||
|
|
@ -85,8 +94,11 @@ const HParser* h_repeat_n(const HParser* p, const size_t n) {
|
|||
}
|
||||
|
||||
const HParser* h_sepBy(const HParser* p, const HParser* sep) {
|
||||
HParser *res = g_new(HParser, 1);
|
||||
HRepeat *env = g_new(HRepeat, 1);
|
||||
return h_sepBy__m(&system_allocator, p, sep);
|
||||
}
|
||||
const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
||||
HParser *res = h_new(HParser, 1);
|
||||
HRepeat *env = h_new(HRepeat, 1);
|
||||
env->p = p;
|
||||
env->sep = sep;
|
||||
env->count = 0;
|
||||
|
|
@ -97,8 +109,11 @@ const HParser* h_sepBy(const HParser* p, const HParser* sep) {
|
|||
}
|
||||
|
||||
const HParser* h_sepBy1(const HParser* p, const HParser* sep) {
|
||||
HParser *res = g_new(HParser, 1);
|
||||
HRepeat *env = g_new(HRepeat, 1);
|
||||
return h_sepBy1__m(&system_allocator, p, sep);
|
||||
}
|
||||
const HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
||||
HParser *res = h_new(HParser, 1);
|
||||
HRepeat *env = h_new(HRepeat, 1);
|
||||
env->p = p;
|
||||
env->sep = sep;
|
||||
env->count = 1;
|
||||
|
|
@ -135,9 +150,12 @@ static const HParserVtable length_value_vt = {
|
|||
};
|
||||
|
||||
const HParser* h_length_value(const HParser* length, const HParser* value) {
|
||||
HParser *res = g_new(HParser, 1);
|
||||
return h_length_value__m(&system_allocator, length, value);
|
||||
}
|
||||
const HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
|
||||
HParser *res = h_new(HParser, 1);
|
||||
res->vtable = &length_value_vt;
|
||||
HLenVal *env = g_new(HLenVal, 1);
|
||||
HLenVal *env = h_new(HLenVal, 1);
|
||||
env->length = length;
|
||||
env->value = value;
|
||||
res->env = (void*)env;
|
||||
|
|
|
|||
|
|
@ -15,7 +15,10 @@ static const HParserVtable not_vt = {
|
|||
};
|
||||
|
||||
const HParser* h_not(const HParser* p) {
|
||||
HParser *res = g_new(HParser, 1);
|
||||
return h_not__m(&system_allocator, p);
|
||||
}
|
||||
const HParser* h_not__m(HAllocator* mm__, const HParser* p) {
|
||||
HParser *res = h_new(HParser, 1);
|
||||
res->vtable = ¬_vt;
|
||||
res->env = (void*)p;
|
||||
return res;
|
||||
|
|
|
|||
|
|
@ -10,8 +10,11 @@ static const HParserVtable nothing_vt = {
|
|||
.parse = parse_nothing,
|
||||
};
|
||||
|
||||
const HParser* h_nothing_p() {
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
const HParser* h_nothing_p() {
|
||||
return h_nothing_p__m(&system_allocator);
|
||||
}
|
||||
const HParser* h_nothing_p__m(HAllocator* mm__) {
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = ¬hing_vt; ret->env = NULL;
|
||||
return (const HParser*)ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,9 +16,12 @@ static const HParserVtable optional_vt = {
|
|||
};
|
||||
|
||||
const HParser* h_optional(const HParser* p) {
|
||||
return h_optional__m(&system_allocator, p);
|
||||
}
|
||||
const HParser* h_optional__m(HAllocator* mm__, const HParser* p) {
|
||||
// TODO: re-add this
|
||||
//assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round.");
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &optional_vt;
|
||||
ret->env = (void*)p;
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include <stdarg.h>
|
||||
#include "parser_internal.h"
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -27,20 +28,40 @@ static const HParserVtable sequence_vt = {
|
|||
.parse = parse_sequence,
|
||||
};
|
||||
|
||||
const HParser* h_sequence(const HParser *p, ...) {
|
||||
const HParser* h_sequence(const HParser* p, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, p);
|
||||
const HParser* ret = h_sequence__mv(&system_allocator, p, ap);
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, p);
|
||||
const HParser* ret = h_sequence__mv(mm__, p, ap);
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const HParser* h_sequence__v(const HParser* p, va_list ap) {
|
||||
return h_sequence__mv(&system_allocator, p, ap);
|
||||
}
|
||||
|
||||
const HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) {
|
||||
va_list ap;
|
||||
size_t len = 0;
|
||||
const HParser *arg;
|
||||
va_start(ap, p);
|
||||
va_copy(ap, ap_);
|
||||
do {
|
||||
len++;
|
||||
arg = va_arg(ap, const HParser *);
|
||||
} while (arg);
|
||||
va_end(ap);
|
||||
HSequence *s = g_new(HSequence, 1);
|
||||
s->p_array = g_new(const HParser *, len);
|
||||
HSequence *s = h_new(HSequence, 1);
|
||||
s->p_array = h_new(const HParser *, len);
|
||||
|
||||
va_start(ap, p);
|
||||
va_copy(ap, ap_);
|
||||
s->p_array[0] = p;
|
||||
for (size_t i = 1; i < len; i++) {
|
||||
s->p_array[i] = va_arg(ap, const HParser *);
|
||||
|
|
@ -48,7 +69,7 @@ const HParser* h_sequence(const HParser *p, ...) {
|
|||
va_end(ap);
|
||||
|
||||
s->len = len;
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &sequence_vt; ret->env = (void*)s;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,14 +20,17 @@ static HParseResult* parse_token(void *env, HParseState *state) {
|
|||
return make_result(state, tok);
|
||||
}
|
||||
|
||||
const const HParserVtable token_vt = {
|
||||
const HParserVtable token_vt = {
|
||||
.parse = parse_token,
|
||||
};
|
||||
|
||||
const HParser* h_token(const uint8_t *str, const size_t len) {
|
||||
HToken *t = g_new(HToken, 1);
|
||||
const HParser* h_token(const uint8_t *str, const size_t len) {
|
||||
return h_token__m(&system_allocator, str, len);
|
||||
}
|
||||
const HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) {
|
||||
HToken *t = h_new(HToken, 1);
|
||||
t->str = (uint8_t*)str, t->len = len;
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &token_vt;
|
||||
ret->env = t;
|
||||
return (const HParser*)ret;
|
||||
|
|
|
|||
|
|
@ -24,3 +24,6 @@ static HParser unimplemented = {
|
|||
const HParser* h_unimplemented() {
|
||||
return &unimplemented;
|
||||
}
|
||||
const HParser* h_unimplemented__m(HAllocator* mm__) {
|
||||
return &unimplemented;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) {
|
|||
bak = state->input_stream;
|
||||
c = h_read_bits(&state->input_stream, 8, false);
|
||||
if (state->input_stream.overrun)
|
||||
return NULL;
|
||||
break;
|
||||
} while (isspace(c));
|
||||
state->input_stream = bak;
|
||||
return h_do_parse((HParser*)env, state);
|
||||
|
|
@ -19,7 +19,10 @@ static const HParserVtable whitespace_vt = {
|
|||
};
|
||||
|
||||
const HParser* h_whitespace(const HParser* p) {
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
return h_whitespace__m(&system_allocator, p);
|
||||
}
|
||||
const HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) {
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &whitespace_vt;
|
||||
ret->env = (void*)p;
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -35,10 +35,13 @@ static const HParserVtable xor_vt = {
|
|||
.parse = parse_xor,
|
||||
};
|
||||
|
||||
const HParser* h_xor(const HParser* p1, const HParser* p2) {
|
||||
HTwoParsers *env = g_new(HTwoParsers, 1);
|
||||
const HParser* h_xor(const HParser* p1, const HParser* p2) {
|
||||
return h_xor__m(&system_allocator, p1, p2);
|
||||
}
|
||||
const HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
||||
HTwoParsers *env = h_new(HTwoParsers, 1);
|
||||
env->p1 = p1; env->p2 = p2;
|
||||
HParser *ret = g_new(HParser, 1);
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &xor_vt; ret->env = (void*)env;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
21
src/pprint.c
21
src/pprint.c
|
|
@ -17,10 +17,10 @@
|
|||
|
||||
#define _GNU_SOURCE
|
||||
#include <stdio.h>
|
||||
#include <glib.h>
|
||||
#include <string.h>
|
||||
#include "hammer.h"
|
||||
#include <malloc.h>
|
||||
#include "internal.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct pp_state {
|
||||
int delta;
|
||||
|
|
@ -69,20 +69,25 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
|
|||
fprintf(stream, "%*sUSER\n", indent, "");
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
if(tok->token_type > TT_USER) {
|
||||
fprintf(stream, "%*sUSER %d\n", indent, "", tok->token_type-TT_USER);
|
||||
} else {
|
||||
assert_message(0, "Should not reach here.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct result_buf {
|
||||
char* output;
|
||||
HAllocator *mm__;
|
||||
size_t len;
|
||||
size_t capacity;
|
||||
};
|
||||
|
||||
static inline void ensure_capacity(struct result_buf *buf, int amt) {
|
||||
while (buf->len + amt >= buf->capacity)
|
||||
buf->output = g_realloc(buf->output, buf->capacity *= 2);
|
||||
buf->output = buf->mm__->realloc(buf->mm__, buf->output, buf->capacity *= 2);
|
||||
}
|
||||
|
||||
static inline void append_buf(struct result_buf *buf, const char* input, int len) {
|
||||
|
|
@ -149,15 +154,19 @@ static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) {
|
|||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unexpected token type %d\n", tok->token_type);
|
||||
g_assert_not_reached();
|
||||
assert_message(0, "Should not reach here.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
char* h_write_result_unamb(const HParsedToken* tok) {
|
||||
return h_write_result_unamb__m(&system_allocator, tok);
|
||||
}
|
||||
char* h_write_result_unamb__m(HAllocator* mm__, const HParsedToken* tok) {
|
||||
struct result_buf buf = {
|
||||
.output = g_malloc0(16),
|
||||
.output = mm__->alloc(mm__, 16),
|
||||
.len = 0,
|
||||
.mm__ = mm__,
|
||||
.capacity = 16
|
||||
};
|
||||
unamb_sub(tok, &buf);
|
||||
|
|
|
|||
20
src/system_allocator.c
Normal file
20
src/system_allocator.c
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#include <stdlib.h>
|
||||
#include "internal.h"
|
||||
|
||||
static void* system_alloc(HAllocator *allocator, size_t size) {
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) {
|
||||
return realloc(ptr, size);
|
||||
}
|
||||
|
||||
static void system_free(HAllocator *allocator, void* ptr) {
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
HAllocator system_allocator = {
|
||||
.alloc = system_alloc,
|
||||
.realloc = system_realloc,
|
||||
.free = system_free,
|
||||
};
|
||||
22
src/t_benchmark.c
Normal file
22
src/t_benchmark.c
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
#include <glib.h>
|
||||
#include "hammer.h"
|
||||
#include "test_suite.h"
|
||||
|
||||
HParserTestcase testcases[] = {
|
||||
{(unsigned char*)"1,2,3", 5, "(u0x31 u0x32 u0x33)"},
|
||||
{(unsigned char*)"1,3,2", 5, "(u0x31 u0x33 u0x32)"},
|
||||
{(unsigned char*)"1,3", 3, "(u0x31 u0x33)"},
|
||||
{(unsigned char*)"3", 1, "(u0x33)"},
|
||||
{ NULL, 0, NULL }
|
||||
};
|
||||
|
||||
static void test_benchmark_1() {
|
||||
const HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
|
||||
|
||||
HBenchmarkResults *res = h_benchmark(parser, testcases);
|
||||
h_benchmark_report(stderr, res);
|
||||
}
|
||||
|
||||
void register_benchmark_tests(void) {
|
||||
g_test_add_func("/core/benchmark/1", test_benchmark_1);
|
||||
}
|
||||
67
src/t_bitreader.c
Normal file
67
src/t_bitreader.c
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
#include <glib.h>
|
||||
#include "hammer.h"
|
||||
#include "internal.h"
|
||||
#include "test_suite.h"
|
||||
|
||||
#define MK_INPUT_STREAM(buf,len,endianness_) \
|
||||
{ \
|
||||
.input = (uint8_t*)buf, \
|
||||
.length = len, \
|
||||
.index = 0, \
|
||||
.bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \
|
||||
.endianness = endianness_ \
|
||||
}
|
||||
|
||||
|
||||
static void test_bitreader_ints(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 8, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
g_check_cmplong(h_read_bits(&is, 64, true), ==, -0x200000000);
|
||||
}
|
||||
|
||||
static void test_bitreader_be(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x03);
|
||||
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x52);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
|
||||
}
|
||||
static void test_bitreader_le(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x02);
|
||||
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x4D);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
|
||||
}
|
||||
|
||||
static void test_largebits_be(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x352);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
|
||||
}
|
||||
|
||||
static void test_largebits_le(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x26A);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
|
||||
}
|
||||
|
||||
static void test_offset_largebits_be(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xD);
|
||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x25A);
|
||||
}
|
||||
|
||||
static void test_offset_largebits_le(void) {
|
||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xA);
|
||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x2D3);
|
||||
}
|
||||
|
||||
|
||||
void register_bitreader_tests(void) {
|
||||
g_test_add_func("/core/bitreader/be", test_bitreader_be);
|
||||
g_test_add_func("/core/bitreader/le", test_bitreader_le);
|
||||
g_test_add_func("/core/bitreader/largebits-be", test_largebits_be);
|
||||
g_test_add_func("/core/bitreader/largebits-le", test_largebits_le);
|
||||
g_test_add_func("/core/bitreader/offset-largebits-be", test_offset_largebits_be);
|
||||
g_test_add_func("/core/bitreader/offset-largebits-le", test_offset_largebits_le);
|
||||
g_test_add_func("/core/bitreader/ints", test_bitreader_ints);
|
||||
}
|
||||
108
src/t_bitwriter.c
Normal file
108
src/t_bitwriter.c
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
#include <glib.h>
|
||||
#include "hammer.h"
|
||||
#include "internal.h"
|
||||
#include "test_suite.h"
|
||||
|
||||
typedef struct {
|
||||
unsigned long long data;
|
||||
size_t nbits;
|
||||
} bitwriter_test_elem; // should end with {0,0}
|
||||
|
||||
void run_bitwriter_test(bitwriter_test_elem data[], char flags) {
|
||||
size_t len;
|
||||
const uint8_t *buf;
|
||||
HBitWriter *w = h_bit_writer_new(&system_allocator);
|
||||
int i;
|
||||
w->flags = flags;
|
||||
for (i = 0; data[i].nbits; i++) {
|
||||
h_bit_writer_put(w, data[i].data, data[i].nbits);
|
||||
}
|
||||
|
||||
buf = h_bit_writer_get_buffer(w, &len);
|
||||
HInputStream input = {
|
||||
.input = buf,
|
||||
.index = 0,
|
||||
.length = len,
|
||||
.bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0,
|
||||
.endianness = flags,
|
||||
.overrun = 0
|
||||
};
|
||||
|
||||
for (i = 0; data[i].nbits; i++) {
|
||||
g_check_cmpulonglong ((unsigned long long)h_read_bits(&input, data[i].nbits, FALSE), ==, data[i].data);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_bitwriter_ints(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ -0x200000000, 64 },
|
||||
{ 0,0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_bitwriter_be(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0x03, 3 },
|
||||
{ 0x52, 8 },
|
||||
{ 0x1A, 5 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_bitwriter_le(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0x02, 3 },
|
||||
{ 0x4D, 8 },
|
||||
{ 0x0B, 5 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_largebits_be(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0x352, 11 },
|
||||
{ 0x1A, 5 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_largebits_le(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0x26A, 11 },
|
||||
{ 0x0B, 5 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_offset_largebits_be(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0xD, 5 },
|
||||
{ 0x25A, 11 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||
}
|
||||
|
||||
static void test_offset_largebits_le(void) {
|
||||
bitwriter_test_elem data[] = {
|
||||
{ 0xA, 5 },
|
||||
{ 0x2D3, 11 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
void register_bitwriter_tests(void) {
|
||||
g_test_add_func("/core/bitwriter/be", test_bitwriter_be);
|
||||
g_test_add_func("/core/bitwriter/le", test_bitwriter_le);
|
||||
g_test_add_func("/core/bitwriter/largebits-be", test_largebits_be);
|
||||
g_test_add_func("/core/bitwriter/largebits-le", test_largebits_le);
|
||||
g_test_add_func("/core/bitwriter/offset-largebits-be", test_offset_largebits_be);
|
||||
g_test_add_func("/core/bitwriter/offset-largebits-le", test_offset_largebits_le);
|
||||
g_test_add_func("/core/bitwriter/ints", test_bitwriter_ints);
|
||||
}
|
||||
16
src/t_misc.c
Normal file
16
src/t_misc.c
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#include <glib.h>
|
||||
#include "test_suite.h"
|
||||
#include "hammer.h"
|
||||
|
||||
static void test_tt_user(void) {
|
||||
g_check_cmpint(TT_USER, >, TT_NONE);
|
||||
g_check_cmpint(TT_USER, >, TT_BYTES);
|
||||
g_check_cmpint(TT_USER, >, TT_SINT);
|
||||
g_check_cmpint(TT_USER, >, TT_UINT);
|
||||
g_check_cmpint(TT_USER, >, TT_SEQUENCE);
|
||||
g_check_cmpint(TT_USER, >, TT_ERR);
|
||||
}
|
||||
|
||||
void register_misc_tests(void) {
|
||||
g_test_add_func("/core/misc/tt_user", test_tt_user);
|
||||
}
|
||||
421
src/t_parser.c
Normal file
421
src/t_parser.c
Normal file
|
|
@ -0,0 +1,421 @@
|
|||
#include <glib.h>
|
||||
#include <string.h>
|
||||
#include "hammer.h"
|
||||
#include "internal.h"
|
||||
#include "test_suite.h"
|
||||
#include "parsers/parser_internal.h"
|
||||
|
||||
static void test_token(void) {
|
||||
const HParser *token_ = h_token((const uint8_t*)"95\xa2", 3);
|
||||
|
||||
g_check_parse_ok(token_, "95\xa2", 3, "<39.35.a2>");
|
||||
g_check_parse_failed(token_, "95", 2);
|
||||
}
|
||||
|
||||
static void test_ch(void) {
|
||||
const HParser *ch_ = h_ch(0xa2);
|
||||
|
||||
g_check_parse_ok(ch_, "\xa2", 1, "u0xa2");
|
||||
g_check_parse_failed(ch_, "\xa3", 1);
|
||||
}
|
||||
|
||||
static void test_ch_range(void) {
|
||||
const HParser *range_ = h_ch_range('a', 'c');
|
||||
|
||||
g_check_parse_ok(range_, "b", 1, "u0x62");
|
||||
g_check_parse_failed(range_, "d", 1);
|
||||
}
|
||||
|
||||
//@MARK_START
|
||||
static void test_int64(void) {
|
||||
const HParser *int64_ = h_int64();
|
||||
|
||||
g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s-0x200000000");
|
||||
g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7);
|
||||
}
|
||||
|
||||
static void test_int32(void) {
|
||||
const HParser *int32_ = h_int32();
|
||||
|
||||
g_check_parse_ok(int32_, "\xff\xfe\x00\x00", 4, "s-0x20000");
|
||||
g_check_parse_failed(int32_, "\xff\xfe\x00", 3);
|
||||
}
|
||||
|
||||
static void test_int16(void) {
|
||||
const HParser *int16_ = h_int16();
|
||||
|
||||
g_check_parse_ok(int16_, "\xfe\x00", 2, "s-0x200");
|
||||
g_check_parse_failed(int16_, "\xfe", 1);
|
||||
}
|
||||
|
||||
static void test_int8(void) {
|
||||
const HParser *int8_ = h_int8();
|
||||
|
||||
g_check_parse_ok(int8_, "\x88", 1, "s-0x78");
|
||||
g_check_parse_failed(int8_, "", 0);
|
||||
}
|
||||
|
||||
static void test_uint64(void) {
|
||||
const HParser *uint64_ = h_uint64();
|
||||
|
||||
g_check_parse_ok(uint64_, "\x00\x00\x00\x02\x00\x00\x00\x00", 8, "u0x200000000");
|
||||
g_check_parse_failed(uint64_, "\x00\x00\x00\x02\x00\x00\x00", 7);
|
||||
}
|
||||
|
||||
static void test_uint32(void) {
|
||||
const HParser *uint32_ = h_uint32();
|
||||
|
||||
g_check_parse_ok(uint32_, "\x00\x02\x00\x00", 4, "u0x20000");
|
||||
g_check_parse_failed(uint32_, "\x00\x02\x00", 3);
|
||||
}
|
||||
|
||||
static void test_uint16(void) {
|
||||
const HParser *uint16_ = h_uint16();
|
||||
|
||||
g_check_parse_ok(uint16_, "\x02\x00", 2, "u0x200");
|
||||
g_check_parse_failed(uint16_, "\x02", 1);
|
||||
}
|
||||
|
||||
static void test_uint8(void) {
|
||||
const HParser *uint8_ = h_uint8();
|
||||
|
||||
g_check_parse_ok(uint8_, "\x78", 1, "u0x78");
|
||||
g_check_parse_failed(uint8_, "", 0);
|
||||
}
|
||||
//@MARK_END
|
||||
|
||||
static void test_int_range(void) {
|
||||
const HParser *int_range_ = h_int_range(h_uint8(), 3, 10);
|
||||
|
||||
g_check_parse_ok(int_range_, "\x05", 1, "u0x5");
|
||||
g_check_parse_failed(int_range_, "\xb", 1);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void test_float64(void) {
|
||||
const HParser *float64_ = h_float64();
|
||||
|
||||
g_check_parse_ok(float64_, "\x3f\xf0\x00\x00\x00\x00\x00\x00", 8, 1.0);
|
||||
g_check_parse_failed(float64_, "\x3f\xf0\x00\x00\x00\x00\x00", 7);
|
||||
}
|
||||
|
||||
static void test_float32(void) {
|
||||
const HParser *float32_ = h_float32();
|
||||
|
||||
g_check_parse_ok(float32_, "\x3f\x80\x00\x00", 4, 1.0);
|
||||
g_check_parse_failed(float32_, "\x3f\x80\x00");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static void test_whitespace(void) {
|
||||
const HParser *whitespace_ = h_whitespace(h_ch('a'));
|
||||
const HParser *whitespace_end = h_whitespace(h_end_p());
|
||||
|
||||
g_check_parse_ok(whitespace_, "a", 1, "u0x61");
|
||||
g_check_parse_ok(whitespace_, " a", 2, "u0x61");
|
||||
g_check_parse_ok(whitespace_, " a", 3, "u0x61");
|
||||
g_check_parse_ok(whitespace_, "\ta", 2, "u0x61");
|
||||
g_check_parse_failed(whitespace_, "_a", 2);
|
||||
|
||||
g_check_parse_ok(whitespace_end, "", 0, "NULL");
|
||||
g_check_parse_ok(whitespace_end, " ", 2, "NULL");
|
||||
g_check_parse_failed(whitespace_end, " x", 3);
|
||||
}
|
||||
|
||||
static void test_left(void) {
|
||||
const HParser *left_ = h_left(h_ch('a'), h_ch(' '));
|
||||
|
||||
g_check_parse_ok(left_, "a ", 2, "u0x61");
|
||||
g_check_parse_failed(left_, "a", 1);
|
||||
g_check_parse_failed(left_, " ", 1);
|
||||
g_check_parse_failed(left_, "ab", 2);
|
||||
}
|
||||
|
||||
static void test_right(void) {
|
||||
const HParser *right_ = h_right(h_ch(' '), h_ch('a'));
|
||||
|
||||
g_check_parse_ok(right_, " a", 2, "u0x61");
|
||||
g_check_parse_failed(right_, "a", 1);
|
||||
g_check_parse_failed(right_, " ", 1);
|
||||
g_check_parse_failed(right_, "ba", 2);
|
||||
}
|
||||
|
||||
static void test_middle(void) {
|
||||
const HParser *middle_ = h_middle(h_ch(' '), h_ch('a'), h_ch(' '));
|
||||
|
||||
g_check_parse_ok(middle_, " a ", 3, "u0x61");
|
||||
g_check_parse_failed(middle_, "a", 1);
|
||||
g_check_parse_failed(middle_, " ", 1);
|
||||
g_check_parse_failed(middle_, " a", 2);
|
||||
g_check_parse_failed(middle_, "a ", 2);
|
||||
g_check_parse_failed(middle_, " b ", 3);
|
||||
g_check_parse_failed(middle_, "ba ", 3);
|
||||
g_check_parse_failed(middle_, " ab", 3);
|
||||
}
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
const HParsedToken* upcase(const HParseResult *p) {
|
||||
switch(p->ast->token_type) {
|
||||
case TT_SEQUENCE:
|
||||
{
|
||||
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
|
||||
HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used);
|
||||
ret->token_type = TT_SEQUENCE;
|
||||
for (size_t i=0; i<p->ast->seq->used; ++i) {
|
||||
if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) {
|
||||
HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1);
|
||||
tmp->token_type = TT_UINT;
|
||||
tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint);
|
||||
h_carray_append(seq, tmp);
|
||||
} else {
|
||||
h_carray_append(seq, p->ast->seq->elements[i]);
|
||||
}
|
||||
}
|
||||
ret->seq = seq;
|
||||
return (const HParsedToken*)ret;
|
||||
}
|
||||
case TT_UINT:
|
||||
{
|
||||
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
|
||||
ret->token_type = TT_UINT;
|
||||
ret->uint = toupper(p->ast->uint);
|
||||
return (const HParsedToken*)ret;
|
||||
}
|
||||
default:
|
||||
return p->ast;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_action(void) {
|
||||
const HParser *action_ = h_action(h_sequence(h_choice(h_ch('a'),
|
||||
h_ch('A'),
|
||||
NULL),
|
||||
h_choice(h_ch('b'),
|
||||
h_ch('B'),
|
||||
NULL),
|
||||
NULL),
|
||||
upcase);
|
||||
|
||||
g_check_parse_ok(action_, "ab", 2, "(u0x41 u0x42)");
|
||||
g_check_parse_ok(action_, "AB", 2, "(u0x41 u0x42)");
|
||||
g_check_parse_failed(action_, "XX", 2);
|
||||
}
|
||||
|
||||
static void test_in(void) {
|
||||
uint8_t options[3] = { 'a', 'b', 'c' };
|
||||
const HParser *in_ = h_in(options, 3);
|
||||
g_check_parse_ok(in_, "b", 1, "u0x62");
|
||||
g_check_parse_failed(in_, "d", 1);
|
||||
|
||||
}
|
||||
|
||||
static void test_not_in(void) {
|
||||
uint8_t options[3] = { 'a', 'b', 'c' };
|
||||
const HParser *not_in_ = h_not_in(options, 3);
|
||||
g_check_parse_ok(not_in_, "d", 1, "u0x64");
|
||||
g_check_parse_failed(not_in_, "a", 1);
|
||||
|
||||
}
|
||||
|
||||
static void test_end_p(void) {
|
||||
const HParser *end_p_ = h_sequence(h_ch('a'), h_end_p(), NULL);
|
||||
g_check_parse_ok(end_p_, "a", 1, "(u0x61)");
|
||||
g_check_parse_failed(end_p_, "aa", 2);
|
||||
}
|
||||
|
||||
static void test_nothing_p(void) {
|
||||
const HParser *nothing_p_ = h_nothing_p();
|
||||
g_check_parse_failed(nothing_p_, "a", 1);
|
||||
}
|
||||
|
||||
static void test_sequence(void) {
|
||||
const HParser *sequence_1 = h_sequence(h_ch('a'), h_ch('b'), NULL);
|
||||
const HParser *sequence_2 = h_sequence(h_ch('a'), h_whitespace(h_ch('b')), NULL);
|
||||
|
||||
g_check_parse_ok(sequence_1, "ab", 2, "(u0x61 u0x62)");
|
||||
g_check_parse_failed(sequence_1, "a", 1);
|
||||
g_check_parse_failed(sequence_1, "b", 1);
|
||||
g_check_parse_ok(sequence_2, "ab", 2, "(u0x61 u0x62)");
|
||||
g_check_parse_ok(sequence_2, "a b", 3, "(u0x61 u0x62)");
|
||||
g_check_parse_ok(sequence_2, "a b", 4, "(u0x61 u0x62)");
|
||||
}
|
||||
|
||||
static void test_choice(void) {
|
||||
const HParser *choice_ = h_choice(h_ch('a'), h_ch('b'), NULL);
|
||||
|
||||
g_check_parse_ok(choice_, "a", 1, "u0x61");
|
||||
g_check_parse_ok(choice_, "b", 1, "u0x62");
|
||||
g_check_parse_failed(choice_, "c", 1);
|
||||
}
|
||||
|
||||
static void test_butnot(void) {
|
||||
const HParser *butnot_1 = h_butnot(h_ch('a'), h_token((const uint8_t*)"ab", 2));
|
||||
const HParser *butnot_2 = h_butnot(h_ch_range('0', '9'), h_ch('6'));
|
||||
|
||||
g_check_parse_ok(butnot_1, "a", 1, "u0x61");
|
||||
g_check_parse_failed(butnot_1, "ab", 2);
|
||||
g_check_parse_ok(butnot_1, "aa", 2, "u0x61");
|
||||
g_check_parse_failed(butnot_2, "6", 1);
|
||||
}
|
||||
|
||||
static void test_difference(void) {
|
||||
const HParser *difference_ = h_difference(h_token((const uint8_t*)"ab", 2), h_ch('a'));
|
||||
|
||||
g_check_parse_ok(difference_, "ab", 2, "<61.62>");
|
||||
g_check_parse_failed(difference_, "a", 1);
|
||||
}
|
||||
|
||||
static void test_xor(void) {
|
||||
const HParser *xor_ = h_xor(h_ch_range('0', '6'), h_ch_range('5', '9'));
|
||||
|
||||
g_check_parse_ok(xor_, "0", 1, "u0x30");
|
||||
g_check_parse_ok(xor_, "9", 1, "u0x39");
|
||||
g_check_parse_failed(xor_, "5", 1);
|
||||
g_check_parse_failed(xor_, "a", 1);
|
||||
}
|
||||
|
||||
static void test_many(void) {
|
||||
const HParser *many_ = h_many(h_choice(h_ch('a'), h_ch('b'), NULL));
|
||||
g_check_parse_ok(many_, "adef", 4, "(u0x61)");
|
||||
g_check_parse_ok(many_, "bdef", 4, "(u0x62)");
|
||||
g_check_parse_ok(many_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
|
||||
g_check_parse_ok(many_, "daabbabadef", 11, "()");
|
||||
}
|
||||
|
||||
static void test_many1(void) {
|
||||
const HParser *many1_ = h_many1(h_choice(h_ch('a'), h_ch('b'), NULL));
|
||||
|
||||
g_check_parse_ok(many1_, "adef", 4, "(u0x61)");
|
||||
g_check_parse_ok(many1_, "bdef", 4, "(u0x62)");
|
||||
g_check_parse_ok(many1_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
|
||||
g_check_parse_failed(many1_, "daabbabadef", 11);
|
||||
}
|
||||
|
||||
static void test_repeat_n(void) {
|
||||
const HParser *repeat_n_ = h_repeat_n(h_choice(h_ch('a'), h_ch('b'), NULL), 2);
|
||||
|
||||
g_check_parse_failed(repeat_n_, "adef", 4);
|
||||
g_check_parse_ok(repeat_n_, "abdef", 5, "(u0x61 u0x62)");
|
||||
g_check_parse_failed(repeat_n_, "dabdef", 6);
|
||||
}
|
||||
|
||||
static void test_optional(void) {
|
||||
const HParser *optional_ = h_sequence(h_ch('a'), h_optional(h_choice(h_ch('b'), h_ch('c'), NULL)), h_ch('d'), NULL);
|
||||
|
||||
g_check_parse_ok(optional_, "abd", 3, "(u0x61 u0x62 u0x64)");
|
||||
g_check_parse_ok(optional_, "acd", 3, "(u0x61 u0x63 u0x64)");
|
||||
g_check_parse_ok(optional_, "ad", 2, "(u0x61 null u0x64)");
|
||||
g_check_parse_failed(optional_, "aed", 3);
|
||||
g_check_parse_failed(optional_, "ab", 2);
|
||||
g_check_parse_failed(optional_, "ac", 2);
|
||||
}
|
||||
|
||||
static void test_ignore(void) {
|
||||
const HParser *ignore_ = h_sequence(h_ch('a'), h_ignore(h_ch('b')), h_ch('c'), NULL);
|
||||
|
||||
g_check_parse_ok(ignore_, "abc", 3, "(u0x61 u0x63)");
|
||||
g_check_parse_failed(ignore_, "ac", 2);
|
||||
}
|
||||
|
||||
static void test_sepBy1(void) {
|
||||
const HParser *sepBy1_ = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
|
||||
|
||||
g_check_parse_ok(sepBy1_, "1,2,3", 5, "(u0x31 u0x32 u0x33)");
|
||||
g_check_parse_ok(sepBy1_, "1,3,2", 5, "(u0x31 u0x33 u0x32)");
|
||||
g_check_parse_ok(sepBy1_, "1,3", 3, "(u0x31 u0x33)");
|
||||
g_check_parse_ok(sepBy1_, "3", 1, "(u0x33)");
|
||||
}
|
||||
|
||||
static void test_epsilon_p(void) {
|
||||
const HParser *epsilon_p_1 = h_sequence(h_ch('a'), h_epsilon_p(), h_ch('b'), NULL);
|
||||
const HParser *epsilon_p_2 = h_sequence(h_epsilon_p(), h_ch('a'), NULL);
|
||||
const HParser *epsilon_p_3 = h_sequence(h_ch('a'), h_epsilon_p(), NULL);
|
||||
|
||||
g_check_parse_ok(epsilon_p_1, "ab", 2, "(u0x61 u0x62)");
|
||||
g_check_parse_ok(epsilon_p_2, "a", 1, "(u0x61)");
|
||||
g_check_parse_ok(epsilon_p_3, "a", 1, "(u0x61)");
|
||||
}
|
||||
|
||||
static void test_attr_bool(void) {
|
||||
|
||||
}
|
||||
|
||||
static void test_and(void) {
|
||||
const HParser *and_1 = h_sequence(h_and(h_ch('0')), h_ch('0'), NULL);
|
||||
const HParser *and_2 = h_sequence(h_and(h_ch('0')), h_ch('1'), NULL);
|
||||
const HParser *and_3 = h_sequence(h_ch('1'), h_and(h_ch('2')), NULL);
|
||||
|
||||
g_check_parse_ok(and_1, "0", 1, "(u0x30)");
|
||||
g_check_parse_failed(and_2, "0", 1);
|
||||
g_check_parse_ok(and_3, "12", 2, "(u0x31)");
|
||||
}
|
||||
|
||||
static void test_not(void) {
|
||||
const HParser *not_1 = h_sequence(h_ch('a'), h_choice(h_ch('+'), h_token((const uint8_t*)"++", 2), NULL), h_ch('b'), NULL);
|
||||
const HParser *not_2 = h_sequence(h_ch('a'),
|
||||
h_choice(h_sequence(h_ch('+'), h_not(h_ch('+')), NULL),
|
||||
h_token((const uint8_t*)"++", 2),
|
||||
NULL), h_ch('b'), NULL);
|
||||
|
||||
g_check_parse_ok(not_1, "a+b", 3, "(u0x61 u0x2b u0x62)");
|
||||
g_check_parse_failed(not_1, "a++b", 4);
|
||||
g_check_parse_ok(not_2, "a+b", 3, "(u0x61 (u0x2b) u0x62)");
|
||||
g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)");
|
||||
}
|
||||
|
||||
static void test_leftrec(void) {
|
||||
const HParser *a_ = h_ch('a');
|
||||
|
||||
HParser *lr_ = h_indirect();
|
||||
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL));
|
||||
|
||||
g_check_parse_ok(lr_, "a", 1, "u0x61");
|
||||
g_check_parse_ok(lr_, "aa", 2, "(u0x61 u0x61)");
|
||||
g_check_parse_ok(lr_, "aaa", 3, "((u0x61 u0x61) u0x61)");
|
||||
}
|
||||
|
||||
void register_parser_tests(void) {
|
||||
g_test_add_func("/core/parser/token", test_token);
|
||||
g_test_add_func("/core/parser/ch", test_ch);
|
||||
g_test_add_func("/core/parser/ch_range", test_ch_range);
|
||||
g_test_add_func("/core/parser/int64", test_int64);
|
||||
g_test_add_func("/core/parser/int32", test_int32);
|
||||
g_test_add_func("/core/parser/int16", test_int16);
|
||||
g_test_add_func("/core/parser/int8", test_int8);
|
||||
g_test_add_func("/core/parser/uint64", test_uint64);
|
||||
g_test_add_func("/core/parser/uint32", test_uint32);
|
||||
g_test_add_func("/core/parser/uint16", test_uint16);
|
||||
g_test_add_func("/core/parser/uint8", test_uint8);
|
||||
g_test_add_func("/core/parser/int_range", test_int_range);
|
||||
#if 0
|
||||
g_test_add_func("/core/parser/float64", test_float64);
|
||||
g_test_add_func("/core/parser/float32", test_float32);
|
||||
#endif
|
||||
g_test_add_func("/core/parser/whitespace", test_whitespace);
|
||||
g_test_add_func("/core/parser/left", test_left);
|
||||
g_test_add_func("/core/parser/right", test_right);
|
||||
g_test_add_func("/core/parser/middle", test_middle);
|
||||
g_test_add_func("/core/parser/action", test_action);
|
||||
g_test_add_func("/core/parser/in", test_in);
|
||||
g_test_add_func("/core/parser/not_in", test_not_in);
|
||||
g_test_add_func("/core/parser/end_p", test_end_p);
|
||||
g_test_add_func("/core/parser/nothing_p", test_nothing_p);
|
||||
g_test_add_func("/core/parser/sequence", test_sequence);
|
||||
g_test_add_func("/core/parser/choice", test_choice);
|
||||
g_test_add_func("/core/parser/butnot", test_butnot);
|
||||
g_test_add_func("/core/parser/difference", test_difference);
|
||||
g_test_add_func("/core/parser/xor", test_xor);
|
||||
g_test_add_func("/core/parser/many", test_many);
|
||||
g_test_add_func("/core/parser/many1", test_many1);
|
||||
g_test_add_func("/core/parser/repeat_n", test_repeat_n);
|
||||
g_test_add_func("/core/parser/optional", test_optional);
|
||||
g_test_add_func("/core/parser/sepBy1", test_sepBy1);
|
||||
g_test_add_func("/core/parser/epsilon_p", test_epsilon_p);
|
||||
g_test_add_func("/core/parser/attr_bool", test_attr_bool);
|
||||
g_test_add_func("/core/parser/and", test_and);
|
||||
g_test_add_func("/core/parser/not", test_not);
|
||||
g_test_add_func("/core/parser/ignore", test_ignore);
|
||||
g_test_add_func("/core/parser/leftrec", test_leftrec);
|
||||
}
|
||||
|
|
@ -15,12 +15,15 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <glib.h>
|
||||
#include "hammer.h"
|
||||
#include "test_suite.h"
|
||||
|
||||
extern void register_bitreader_tests();
|
||||
extern void register_bitwriter_tests();
|
||||
extern void register_parser_tests();
|
||||
extern void register_misc_tests();
|
||||
extern void register_benchmark_tests();
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
g_test_init(&argc, &argv, NULL);
|
||||
|
|
@ -29,6 +32,8 @@ int main(int argc, char** argv) {
|
|||
register_bitreader_tests();
|
||||
register_bitwriter_tests();
|
||||
register_parser_tests();
|
||||
register_misc_tests();
|
||||
register_benchmark_tests();
|
||||
|
||||
g_test_run();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
#ifndef HAMMER_TEST_SUITE__H
|
||||
#define HAMMER_TEST_SUITE__H
|
||||
#include <malloc.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// Equivalent to g_assert_*, but not using g_assert...
|
||||
#define g_check_inttype(fmt, typ, n1, op, n2) do { \
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue