Merge remote branch 'upstream/master'
Conflicts: src/hammer.h
This commit is contained in:
commit
f817211446
66 changed files with 5165 additions and 1679 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -4,6 +4,9 @@
|
||||||
src/test_suite
|
src/test_suite
|
||||||
lib/hush
|
lib/hush
|
||||||
examples/dns
|
examples/dns
|
||||||
|
examples/base64
|
||||||
|
examples/base64_sem1
|
||||||
|
examples/base64_sem2
|
||||||
TAGS
|
TAGS
|
||||||
*.swp
|
*.swp
|
||||||
*.swo
|
*.swo
|
||||||
|
|
|
||||||
52
HACKING
Normal file
52
HACKING
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
Privileged arguments
|
||||||
|
====================
|
||||||
|
|
||||||
|
As a matter of convenience, there are several identifiers that
|
||||||
|
internal macros use. Chances are that if you use these names for other
|
||||||
|
things, you're gonna have a bad time.
|
||||||
|
|
||||||
|
In particular, these names, and the macros that use them, are:
|
||||||
|
- state:
|
||||||
|
Used by a_new and company. Should be an HParseState*
|
||||||
|
- mm__:
|
||||||
|
Used by h_new and h_free. Should be an HAllocator*
|
||||||
|
|
||||||
|
Function suffixes
|
||||||
|
=================
|
||||||
|
|
||||||
|
Many functions come in several variants, to handle receiving optional
|
||||||
|
parameters or parameters in multiple different forms. For example,
|
||||||
|
often, you have a global memory manager that is used for an entire
|
||||||
|
program. In this case, you can leave off the memory manager arguments
|
||||||
|
off, letting them be implicit instead. Further, it is often convenient
|
||||||
|
to pass an array or va_list to a function instead of listing the
|
||||||
|
arguments inline (eg, for wrapping a function, generating the
|
||||||
|
arguments programattically, or writing bindings for another language.
|
||||||
|
|
||||||
|
Because we have found that most variants fall into a fairly small set
|
||||||
|
of forms, and to minimize the amount of API calls that users need to
|
||||||
|
remember, there is a consistent naming scheme for these function
|
||||||
|
variants: the function name is followed by two underscores and a set
|
||||||
|
of single-character "flags" indicating what optional features that
|
||||||
|
particular variant has (in alphabetical order, of course):
|
||||||
|
|
||||||
|
__a: takes variadic arguments as a void*[] (not implemented yet, but will be soon.
|
||||||
|
__m: takes a memory manager as the first argument, to override the system memory manager.
|
||||||
|
__v: Takes the variadic argument list as a va_list
|
||||||
|
|
||||||
|
|
||||||
|
Memory managers
|
||||||
|
===============
|
||||||
|
|
||||||
|
If the __m function variants are used or system_allocator is
|
||||||
|
overridden, there come some difficult questions to answer,
|
||||||
|
particularly regarding the behavior when multiple memory managers are
|
||||||
|
combined. As a general rule of thumb (exceptions will be explicitly
|
||||||
|
documented), assume that
|
||||||
|
|
||||||
|
If you have a function f, which is passed a memory manager m and
|
||||||
|
returns a value r, any function that uses r as a parameter must
|
||||||
|
also be told to use m as a memory manager.
|
||||||
|
|
||||||
|
In other words, don't let the (memory manager) streams cross.
|
||||||
|
|
||||||
7
Makefile
7
Makefile
|
|
@ -5,6 +5,10 @@
|
||||||
|
|
||||||
SUBDIRS = src examples
|
SUBDIRS = src examples
|
||||||
|
|
||||||
|
include config.mk
|
||||||
|
|
||||||
|
CONFIG_VARS= INCLUDE_TESTS
|
||||||
|
|
||||||
.DEFAULT_GOAL := all
|
.DEFAULT_GOAL := all
|
||||||
|
|
||||||
%:
|
%:
|
||||||
|
|
@ -25,3 +29,6 @@ $(foreach dir,$(SUBDIRS),$(eval $(call SUBDIR_TEMPLATE,$(dir))))
|
||||||
|
|
||||||
TAGS: $(shell find * -name "*.c")
|
TAGS: $(shell find * -name "*.c")
|
||||||
etags $^
|
etags $^
|
||||||
|
|
||||||
|
config:
|
||||||
|
@printf "%30s %s\n" $(foreach var,$(CONFIG_VARS),$(var) $($(var)) )
|
||||||
|
|
|
||||||
1
NOTES
1
NOTES
|
|
@ -35,4 +35,3 @@ what the comments say.
|
||||||
|
|
||||||
TODO: implement datastructure linearization func
|
TODO: implement datastructure linearization func
|
||||||
TODO: implement free func for parsers
|
TODO: implement free func for parsers
|
||||||
TODO: Remove glib dependency (i.e., GQueue and GHashtable)
|
|
||||||
58
README.md
Normal file
58
README.md
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
Hammer is a parsing library. Like many modern parsing libraries, it provides a parser combinator interface for writing grammars as inline domain-specific languages, but Hammer also provides a variety of parsing backends. It's also bit-oriented rather than character-oriented, making it ideal for parsing binary data such as images, network packets, audio, and executables.
|
||||||
|
|
||||||
|
Hammer is written in C, but will provide bindings for other languages. If you don't see a language you're interested in on the list, just ask.
|
||||||
|
|
||||||
|
Hammer currently builds under Linux. (Windows and OSX are coming.)
|
||||||
|
|
||||||
|
Features
|
||||||
|
========
|
||||||
|
* Bit-oriented -- grammars can include single-bit flags or multi-bit constructs that span character boundaries, with no hassle
|
||||||
|
* Thread-safe, reentrant
|
||||||
|
* Benchmarking for parsing backends -- determine empirically which backend will be most time-efficient for your grammar
|
||||||
|
* Parsing backends:
|
||||||
|
* Packrat parsing
|
||||||
|
* LL(k) (not yet implemented)
|
||||||
|
* GLR (not yet implemented)
|
||||||
|
* LALR(8) (not yet implemented)
|
||||||
|
* Regular expressions (not yet implemented)
|
||||||
|
* Language bindings: (not yet implemented)
|
||||||
|
* C++
|
||||||
|
* Java
|
||||||
|
* Python
|
||||||
|
* Ruby
|
||||||
|
* Perl
|
||||||
|
* Go
|
||||||
|
* PHP
|
||||||
|
* .NET
|
||||||
|
|
||||||
|
Installing
|
||||||
|
==========
|
||||||
|
### Prerequisites
|
||||||
|
* make
|
||||||
|
|
||||||
|
### Optional Dependencies
|
||||||
|
* pkg-config (for `make test`)
|
||||||
|
* glib-2.0 (>= 2.29) (for `make test`)
|
||||||
|
* glib-2.0-dev (for `make test`)
|
||||||
|
|
||||||
|
To install, type `make`. To run the built-in test suite, type `make test`.
|
||||||
|
|
||||||
|
There is not currently a `make install` target; to make Hammer available system-wide, copy `libhammer.a` to `/usr/lib/` (or `/usr/local/lib/`, or wherever ld will find it) and `hammer.h` to `/usr/include/`.
|
||||||
|
|
||||||
|
Usage
|
||||||
|
=====
|
||||||
|
Just `#include <hammer.h>` and link with `-lhammer`.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
========
|
||||||
|
The `examples/` directory contains some simple examples, currently including:
|
||||||
|
* base64
|
||||||
|
* DNS
|
||||||
|
|
||||||
|
Community
|
||||||
|
=========
|
||||||
|
Please join us at `#hammer` on `irc.upstandinghackers.com` if you have any questions or just want to talk about parsing.
|
||||||
|
|
||||||
|
Contact
|
||||||
|
=======
|
||||||
|
You can also email us at <hammer@upstandinghackers.com>.
|
||||||
3
TODO
Normal file
3
TODO
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
- Make h_action functions be called only after parse is complete.
|
||||||
|
- Allow alternative input streams (eg, zlib, base64)
|
||||||
|
- Bonus points if layered...
|
||||||
25
common.mk
25
common.mk
|
|
@ -1,17 +1,24 @@
|
||||||
CFLAGS := $(shell pkg-config --cflags glib-2.0) -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes
|
|
||||||
LDFLAGS := $(shell pkg-config --libs glib-2.0)
|
|
||||||
CC ?= gcc
|
|
||||||
$(info CC=$(CC))
|
|
||||||
# Set V=1 for verbose mode...
|
|
||||||
V ?= 0
|
|
||||||
CFLAGS += -DINCLUDE_TESTS $(EXTRA_CFLAGS)
|
|
||||||
HUSH = $(TOPLEVEL)/lib/hush
|
|
||||||
|
|
||||||
# Check to make sure variables are properly set
|
# Check to make sure variables are properly set
|
||||||
ifeq ($(TOPLEVEL),)
|
ifeq ($(TOPLEVEL),)
|
||||||
$(error $$TOPLEVEL is unset)
|
$(error $$TOPLEVEL is unset)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
include $(TOPLEVEL)/config.mk
|
||||||
|
|
||||||
|
TEST_CFLAGS = $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS
|
||||||
|
TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0)
|
||||||
|
|
||||||
|
CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes
|
||||||
|
LDFLAGS :=
|
||||||
|
|
||||||
|
CC ?= gcc
|
||||||
|
$(info CC=$(CC))
|
||||||
|
# Set V=1 for verbose mode...
|
||||||
|
V ?= 0
|
||||||
|
CFLAGS += $(EXTRA_CFLAGS)
|
||||||
|
HUSH = $(TOPLEVEL)/lib/hush
|
||||||
|
|
||||||
|
|
||||||
ifsilent = $(if $(findstring 0, $(V)),$(1),)
|
ifsilent = $(if $(findstring 0, $(V)),$(1),)
|
||||||
hush = $(call ifsilent,$(HUSH) $(1))
|
hush = $(call ifsilent,$(HUSH) $(1))
|
||||||
#.SUFFIXES:
|
#.SUFFIXES:
|
||||||
|
|
|
||||||
1
config.mk
Normal file
1
config.mk
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
INCLUDE_TESTS = 0
|
||||||
|
|
@ -1,20 +1,41 @@
|
||||||
|
|
||||||
OUTPUTS := dns.o \
|
OUTPUTS := dns.o \
|
||||||
dns
|
dns \
|
||||||
|
base64.o \
|
||||||
|
base64 \
|
||||||
|
base64_sem1.o \
|
||||||
|
base64_sem1 \
|
||||||
|
base64_sem2.o \
|
||||||
|
base64_sem2
|
||||||
|
|
||||||
TOPLEVEL := ../
|
TOPLEVEL := ../
|
||||||
|
|
||||||
include ../common.mk
|
include ../common.mk
|
||||||
|
CFLAGS += $(pkg-config --cflags glib-2.0)
|
||||||
|
LDFLAGS += $(pkg-config --libs glib-2.0)
|
||||||
|
|
||||||
|
|
||||||
all: dns
|
|
||||||
|
all: dns base64 base64_sem1 base64_sem2
|
||||||
|
|
||||||
dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||||
dns: dns.o rr.o dns_common.o
|
dns: dns.o rr.o dns_common.o
|
||||||
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
dns.o: ../src/hammer.h dns_common.h
|
dns.o: ../src/hammer.h dns_common.h ../src/glue.h
|
||||||
|
rr.o: ../src/hammer.h rr.h dns_common.h ../src/glue.h
|
||||||
|
dns_common.o: ../src/hammer.h dns_common.h ../src/glue.h
|
||||||
|
|
||||||
rr.o: ../src/hammer.h rr.h dns_common.h
|
base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||||
|
base64: base64.o
|
||||||
|
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
dns_common.o: ../src/hammer.h dns_common.h
|
base64_sem1: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||||
|
base64_sem1: base64_sem1.o
|
||||||
|
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
|
base64_sem2: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||||
|
base64_sem2: base64_sem2.o
|
||||||
|
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
|
base64%.o: ../src/hammer.h ../src/glue.h
|
||||||
|
|
|
||||||
63
examples/base64.c
Normal file
63
examples/base64.c
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
// Example parser: Base64, syntax only.
|
||||||
|
//
|
||||||
|
// Demonstrates how to construct a Hammer parser that recognizes valid Base64
|
||||||
|
// sequences.
|
||||||
|
//
|
||||||
|
// Note that no semantic evaluation of the sequence is performed, i.e. the
|
||||||
|
// byte sequence being represented is not returned, or determined. See
|
||||||
|
// base64_sem1.c and base64_sem2.c for examples how to attach appropriate
|
||||||
|
// semantic actions to the grammar.
|
||||||
|
|
||||||
|
#include "../src/hammer.h"
|
||||||
|
|
||||||
|
const HParser* document = NULL;
|
||||||
|
|
||||||
|
void init_parser(void)
|
||||||
|
{
|
||||||
|
// CORE
|
||||||
|
const HParser *digit = h_ch_range(0x30, 0x39);
|
||||||
|
const HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL);
|
||||||
|
|
||||||
|
// AUX.
|
||||||
|
const HParser *plus = h_ch('+');
|
||||||
|
const HParser *slash = h_ch('/');
|
||||||
|
const HParser *equals = h_ch('=');
|
||||||
|
|
||||||
|
const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL);
|
||||||
|
const HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16);
|
||||||
|
const HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4);
|
||||||
|
const HParser *base64_3 = h_repeat_n(bsfdig, 4);
|
||||||
|
const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL);
|
||||||
|
const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL);
|
||||||
|
const HParser *base64 = h_sequence(h_many(base64_3),
|
||||||
|
h_optional(h_choice(base64_2,
|
||||||
|
base64_1, NULL)),
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
uint8_t input[102400];
|
||||||
|
size_t inputsize;
|
||||||
|
const HParseResult *result;
|
||||||
|
|
||||||
|
init_parser();
|
||||||
|
|
||||||
|
inputsize = fread(input, 1, sizeof(input), stdin);
|
||||||
|
fprintf(stderr, "inputsize=%lu\ninput=", inputsize);
|
||||||
|
fwrite(input, 1, inputsize, stderr);
|
||||||
|
result = h_parse(document, input, inputsize);
|
||||||
|
|
||||||
|
if(result) {
|
||||||
|
fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8);
|
||||||
|
h_pprint(stdout, result->ast, 0, 0);
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
172
examples/base64_sem1.c
Normal file
172
examples/base64_sem1.c
Normal file
|
|
@ -0,0 +1,172 @@
|
||||||
|
// Example parser: Base64, with fine-grained semantic actions
|
||||||
|
//
|
||||||
|
// Demonstrates how to attach semantic actions to grammar rules and piece by
|
||||||
|
// piece transform the parse tree into the desired semantic representation,
|
||||||
|
// in this case a sequence of 8-bit values.
|
||||||
|
//
|
||||||
|
// Note how the grammar is defined by using the macros H_RULE and H_ARULE.
|
||||||
|
// Those rules using ARULE get an attached action which must be declared (as
|
||||||
|
// a function of type HAction) with a standard name based on the rule name.
|
||||||
|
//
|
||||||
|
// This variant of the example uses fine-grained semantic actions that
|
||||||
|
// transform the parse tree in small steps in a bottom-up fashion. Compare
|
||||||
|
// base64_sem2.c for an alternative approach using a single top-level action.
|
||||||
|
|
||||||
|
#include "../src/hammer.h"
|
||||||
|
#include "../src/glue.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Semantic actions for the grammar below, each corresponds to an "ARULE".
|
||||||
|
// They must be named act_<rulename>.
|
||||||
|
///
|
||||||
|
|
||||||
|
const HParsedToken *act_bsfdig(const HParseResult *p)
|
||||||
|
{
|
||||||
|
HParsedToken *res = H_MAKE_UINT(0);
|
||||||
|
|
||||||
|
uint8_t c = H_CAST_UINT(p->ast);
|
||||||
|
|
||||||
|
if(c >= 0x40 && c <= 0x5A) // A-Z
|
||||||
|
res->uint = c - 0x41;
|
||||||
|
else if(c >= 0x60 && c <= 0x7A) // a-z
|
||||||
|
res->uint = c - 0x61 + 26;
|
||||||
|
else if(c >= 0x30 && c <= 0x39) // 0-9
|
||||||
|
res->uint = c - 0x30 + 52;
|
||||||
|
else if(c == '+')
|
||||||
|
res->uint = 62;
|
||||||
|
else if(c == '/')
|
||||||
|
res->uint = 63;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
H_ACT_APPLY(act_index0, h_act_index, 0);
|
||||||
|
|
||||||
|
#define act_bsfdig_4bit act_bsfdig
|
||||||
|
#define act_bsfdig_2bit act_bsfdig
|
||||||
|
|
||||||
|
#define act_equals h_act_ignore
|
||||||
|
#define act_ws h_act_ignore
|
||||||
|
|
||||||
|
#define act_document act_index0
|
||||||
|
|
||||||
|
// General-form action to turn a block of base64 digits into bytes.
|
||||||
|
const HParsedToken *act_base64_n(int n, const HParseResult *p)
|
||||||
|
{
|
||||||
|
HParsedToken *res = H_MAKE_SEQN(n);
|
||||||
|
|
||||||
|
HParsedToken **digits = h_seq_elements(p->ast);
|
||||||
|
|
||||||
|
uint32_t x = 0;
|
||||||
|
int bits = 0;
|
||||||
|
for(int i=0; i<n+1; i++) {
|
||||||
|
x <<= 6; x |= digits[i]->uint;
|
||||||
|
bits += 6;
|
||||||
|
}
|
||||||
|
x >>= bits%8; // align, i.e. cut off extra bits
|
||||||
|
|
||||||
|
for(int i=0; i<n; i++) {
|
||||||
|
HParsedToken *item = H_MAKE_UINT(x & 0xFF);
|
||||||
|
|
||||||
|
res->seq->elements[n-1-i] = item; // output the last byte and
|
||||||
|
x >>= 8; // discard it
|
||||||
|
}
|
||||||
|
res->seq->used = n;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
H_ACT_APPLY(act_base64_3, act_base64_n, 3);
|
||||||
|
H_ACT_APPLY(act_base64_2, act_base64_n, 2);
|
||||||
|
H_ACT_APPLY(act_base64_1, act_base64_n, 1);
|
||||||
|
|
||||||
|
const HParsedToken *act_base64(const HParseResult *p)
|
||||||
|
{
|
||||||
|
assert(p->ast->token_type == TT_SEQUENCE);
|
||||||
|
assert(p->ast->seq->used == 2);
|
||||||
|
assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE);
|
||||||
|
|
||||||
|
HParsedToken *res = H_MAKE_SEQ();
|
||||||
|
|
||||||
|
// concatenate base64_3 blocks
|
||||||
|
HCountedArray *seq = H_FIELD_SEQ(0);
|
||||||
|
for(size_t i=0; i<seq->used; i++)
|
||||||
|
h_seq_append(res, seq->elements[i]);
|
||||||
|
|
||||||
|
// append one trailing base64_2 or _1 block
|
||||||
|
const HParsedToken *tok = h_seq_index(p->ast, 1);
|
||||||
|
if(tok->token_type == TT_SEQUENCE)
|
||||||
|
h_seq_append(res, tok);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Set up the parser with the grammar to be recognized.
|
||||||
|
///
|
||||||
|
|
||||||
|
const HParser *init_parser(void)
|
||||||
|
{
|
||||||
|
// CORE
|
||||||
|
H_RULE (digit, h_ch_range(0x30, 0x39));
|
||||||
|
H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL));
|
||||||
|
H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6));
|
||||||
|
|
||||||
|
// AUX.
|
||||||
|
H_RULE (plus, h_ch('+'));
|
||||||
|
H_RULE (slash, h_ch('/'));
|
||||||
|
H_ARULE(equals, h_ch('='));
|
||||||
|
|
||||||
|
H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL));
|
||||||
|
H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16));
|
||||||
|
H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4));
|
||||||
|
H_ARULE(base64_3, h_repeat_n(bsfdig, 4));
|
||||||
|
H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL));
|
||||||
|
H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL));
|
||||||
|
H_ARULE(base64, h_sequence(h_many(base64_3),
|
||||||
|
h_optional(h_choice(base64_2,
|
||||||
|
base64_1, NULL)),
|
||||||
|
NULL));
|
||||||
|
|
||||||
|
H_ARULE(ws, h_many(space));
|
||||||
|
H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL));
|
||||||
|
|
||||||
|
// BUG sometimes inputs that should just don't parse.
|
||||||
|
// It *seemed* to happen mostly with things like "bbbbaaaaBA==".
|
||||||
|
// Using less actions seemed to make it less likely.
|
||||||
|
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Main routine: print input, parse, print result, return success/failure.
|
||||||
|
///
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
uint8_t input[102400];
|
||||||
|
size_t inputsize;
|
||||||
|
const HParser *parser;
|
||||||
|
const HParseResult *result;
|
||||||
|
|
||||||
|
parser = init_parser();
|
||||||
|
|
||||||
|
inputsize = fread(input, 1, sizeof(input), stdin);
|
||||||
|
fprintf(stderr, "inputsize=%lu\ninput=", inputsize);
|
||||||
|
fwrite(input, 1, inputsize, stderr);
|
||||||
|
result = h_parse(parser, input, inputsize);
|
||||||
|
|
||||||
|
if(result) {
|
||||||
|
fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8);
|
||||||
|
h_pprint(stdout, result->ast, 0, 0);
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
176
examples/base64_sem2.c
Normal file
176
examples/base64_sem2.c
Normal file
|
|
@ -0,0 +1,176 @@
|
||||||
|
// Example parser: Base64, with fine-grained semantic actions
|
||||||
|
//
|
||||||
|
// Demonstrates how to attach semantic actions to a grammar and transform the
|
||||||
|
// parse tree into the desired semantic representation, in this case a sequence
|
||||||
|
// of 8-bit values.
|
||||||
|
//
|
||||||
|
// Note how the grammar is defined by using the macros H_RULE and H_ARULE.
|
||||||
|
// Those rules using ARULE get an attached action which must be declared (as
|
||||||
|
// a function of type HAction) with a standard name based on the rule name.
|
||||||
|
//
|
||||||
|
// This variant of the example uses coarse-grained semantic actions,
|
||||||
|
// transforming the entire parse tree in one big step. Compare base64_sem1.c
|
||||||
|
// for an alternative approach using a fine-grained piece-by-piece
|
||||||
|
// transformation.
|
||||||
|
|
||||||
|
#include "../src/hammer.h"
|
||||||
|
#include "../src/glue.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Semantic actions for the grammar below, each corresponds to an "ARULE".
|
||||||
|
// They must be named act_<rulename>.
|
||||||
|
///
|
||||||
|
|
||||||
|
// helper: return the numeric value of a parsed base64 digit
|
||||||
|
uint8_t bsfdig_value(const HParsedToken *p)
|
||||||
|
{
|
||||||
|
uint8_t value = 0;
|
||||||
|
|
||||||
|
if(p && p->token_type == TT_UINT) {
|
||||||
|
uint8_t c = p->uint;
|
||||||
|
if(c >= 0x40 && c <= 0x5A) // A-Z
|
||||||
|
value = c - 0x41;
|
||||||
|
else if(c >= 0x60 && c <= 0x7A) // a-z
|
||||||
|
value = c - 0x61 + 26;
|
||||||
|
else if(c >= 0x30 && c <= 0x39) // 0-9
|
||||||
|
value = c - 0x30 + 52;
|
||||||
|
else if(c == '+')
|
||||||
|
value = 62;
|
||||||
|
else if(c == '/')
|
||||||
|
value = 63;
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// helper: append a byte value to a sequence
|
||||||
|
#define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b))
|
||||||
|
|
||||||
|
const HParsedToken *act_base64(const HParseResult *p)
|
||||||
|
{
|
||||||
|
assert(p->ast->token_type == TT_SEQUENCE);
|
||||||
|
assert(p->ast->seq->used == 2);
|
||||||
|
assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE);
|
||||||
|
|
||||||
|
// grab b64_3 block sequence
|
||||||
|
// grab and analyze b64 end block (_2 or _1)
|
||||||
|
const HParsedToken *b64_3 = p->ast->seq->elements[0];
|
||||||
|
const HParsedToken *b64_2 = p->ast->seq->elements[1];
|
||||||
|
const HParsedToken *b64_1 = p->ast->seq->elements[1];
|
||||||
|
|
||||||
|
if(b64_2->token_type != TT_SEQUENCE)
|
||||||
|
b64_1 = b64_2 = NULL;
|
||||||
|
else if(b64_2->seq->elements[2]->uint == '=')
|
||||||
|
b64_2 = NULL;
|
||||||
|
else
|
||||||
|
b64_1 = NULL;
|
||||||
|
|
||||||
|
// allocate result sequence
|
||||||
|
HParsedToken *res = H_MAKE_SEQ();
|
||||||
|
|
||||||
|
// concatenate base64_3 blocks
|
||||||
|
for(size_t i=0; i<b64_3->seq->used; i++) {
|
||||||
|
assert(b64_3->seq->elements[i]->token_type == TT_SEQUENCE);
|
||||||
|
HParsedToken **digits = b64_3->seq->elements[i]->seq->elements;
|
||||||
|
|
||||||
|
uint32_t x = bsfdig_value(digits[0]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[1]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[2]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[3]);
|
||||||
|
seq_append_byte(res, (x >> 16) & 0xFF);
|
||||||
|
seq_append_byte(res, (x >> 8) & 0xFF);
|
||||||
|
seq_append_byte(res, x & 0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
// append one trailing base64_2 or _1 block
|
||||||
|
if(b64_2) {
|
||||||
|
HParsedToken **digits = b64_2->seq->elements;
|
||||||
|
uint32_t x = bsfdig_value(digits[0]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[1]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[2]);
|
||||||
|
seq_append_byte(res, (x >> 10) & 0xFF);
|
||||||
|
seq_append_byte(res, (x >> 2) & 0xFF);
|
||||||
|
} else if(b64_1) {
|
||||||
|
HParsedToken **digits = b64_1->seq->elements;
|
||||||
|
uint32_t x = bsfdig_value(digits[0]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[1]);
|
||||||
|
seq_append_byte(res, (x >> 4) & 0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
H_ACT_APPLY(act_index0, h_act_index, 0);
|
||||||
|
|
||||||
|
#define act_ws h_act_ignore
|
||||||
|
#define act_document act_index0
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Set up the parser with the grammar to be recognized.
|
||||||
|
///
|
||||||
|
|
||||||
|
const HParser *init_parser(void)
|
||||||
|
{
|
||||||
|
// CORE
|
||||||
|
H_RULE (digit, h_ch_range(0x30, 0x39));
|
||||||
|
H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL));
|
||||||
|
H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6));
|
||||||
|
|
||||||
|
// AUX.
|
||||||
|
H_RULE (plus, h_ch('+'));
|
||||||
|
H_RULE (slash, h_ch('/'));
|
||||||
|
H_RULE (equals, h_ch('='));
|
||||||
|
|
||||||
|
H_RULE (bsfdig, h_choice(alpha, digit, plus, slash, NULL));
|
||||||
|
H_RULE (bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16));
|
||||||
|
H_RULE (bsfdig_2bit, h_in((uint8_t *)"AQgw", 4));
|
||||||
|
H_RULE (base64_3, h_repeat_n(bsfdig, 4));
|
||||||
|
H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL));
|
||||||
|
H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL));
|
||||||
|
H_ARULE(base64, h_sequence(h_many(base64_3),
|
||||||
|
h_optional(h_choice(base64_2,
|
||||||
|
base64_1, NULL)),
|
||||||
|
NULL));
|
||||||
|
|
||||||
|
H_ARULE(ws, h_many(space));
|
||||||
|
H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL));
|
||||||
|
|
||||||
|
// BUG sometimes inputs that should just don't parse.
|
||||||
|
// It *seemed* to happen mostly with things like "bbbbaaaaBA==".
|
||||||
|
// Using less actions seemed to make it less likely.
|
||||||
|
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Main routine: print input, parse, print result, return success/failure.
|
||||||
|
///
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
uint8_t input[102400];
|
||||||
|
size_t inputsize;
|
||||||
|
const HParser *parser;
|
||||||
|
const HParseResult *result;
|
||||||
|
|
||||||
|
parser = init_parser();
|
||||||
|
|
||||||
|
inputsize = fread(input, 1, sizeof(input), stdin);
|
||||||
|
fprintf(stderr, "inputsize=%lu\ninput=", inputsize);
|
||||||
|
fwrite(input, 1, inputsize, stderr);
|
||||||
|
result = h_parse(parser, input, inputsize);
|
||||||
|
|
||||||
|
if(result) {
|
||||||
|
fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8);
|
||||||
|
h_pprint(stdout, result->ast, 0, 0);
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
531
examples/dns.c
531
examples/dns.c
|
|
@ -10,7 +10,12 @@
|
||||||
#define false 0
|
#define false 0
|
||||||
#define true 1
|
#define true 1
|
||||||
|
|
||||||
bool is_zero(HParseResult *p) {
|
|
||||||
|
///
|
||||||
|
// Validations
|
||||||
|
///
|
||||||
|
|
||||||
|
bool validate_hdzero(HParseResult *p) {
|
||||||
if (TT_UINT != p->ast->token_type)
|
if (TT_UINT != p->ast->token_type)
|
||||||
return false;
|
return false;
|
||||||
return (0 == p->ast->uint);
|
return (0 == p->ast->uint);
|
||||||
|
|
@ -20,408 +25,244 @@ bool is_zero(HParseResult *p) {
|
||||||
* Every DNS message should have QDCOUNT entries in the question
|
* Every DNS message should have QDCOUNT entries in the question
|
||||||
* section, and ANCOUNT+NSCOUNT+ARCOUNT resource records.
|
* section, and ANCOUNT+NSCOUNT+ARCOUNT resource records.
|
||||||
*/
|
*/
|
||||||
bool validate_dns(HParseResult *p) {
|
bool validate_message(HParseResult *p) {
|
||||||
if (TT_SEQUENCE != p->ast->token_type)
|
if (TT_SEQUENCE != p->ast->token_type)
|
||||||
return false;
|
return false;
|
||||||
// The header holds the counts as its last 4 elements.
|
|
||||||
HParsedToken **elems = p->ast->seq->elements[0]->seq->elements;
|
dns_header_t *header = H_FIELD(dns_header_t, 0);
|
||||||
size_t qd = elems[8]->uint;
|
size_t qd = header->question_count;
|
||||||
size_t an = elems[9]->uint;
|
size_t an = header->answer_count;
|
||||||
size_t ns = elems[10]->uint;
|
size_t ns = header->authority_count;
|
||||||
size_t ar = elems[11]->uint;
|
size_t ar = header->additional_count;
|
||||||
HParsedToken *questions = p->ast->seq->elements[1];
|
|
||||||
if (questions->seq->used != qd)
|
if (H_FIELD_SEQ(1)->used != qd)
|
||||||
return false;
|
return false;
|
||||||
HParsedToken *rrs = p->ast->seq->elements[2];
|
if (an+ns+ar != H_FIELD_SEQ(2)->used)
|
||||||
if (an+ns+ar != rrs->seq->used)
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct dns_qname get_qname(const HParsedToken *t) {
|
|
||||||
// The qname parser parses at least 1 length-value pair, then a NULL.
|
|
||||||
// So, t->seq->elements[0] is a sequence of at least 1 such pair,
|
|
||||||
// and t->seq->elements[1] is the null.
|
|
||||||
const HParsedToken *labels = t->seq->elements[0];
|
|
||||||
struct dns_qname ret = {
|
|
||||||
.qlen = labels->seq->used,
|
|
||||||
.labels = h_arena_malloc(t->seq->arena, sizeof(*ret.labels)*labels->seq->used)
|
|
||||||
};
|
|
||||||
// i is which label we're on
|
|
||||||
for (size_t i=0; i<labels->seq->used; ++i) {
|
|
||||||
ret.labels[i].len = labels->seq->elements[i]->seq->used;
|
|
||||||
ret.labels[i].label = h_arena_malloc(t->seq->arena, ret.labels[i].len + 1);
|
|
||||||
// j is which char of the label we're on
|
|
||||||
for (size_t j=0; j<ret.labels[i].len; ++j)
|
|
||||||
ret.labels[i].label[j] = labels->seq->elements[i]->seq->elements[j]->uint;
|
|
||||||
ret.labels[i].label[ret.labels[i].len] = 0;
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
char* get_domain(const HParsedToken *t) {
|
///
|
||||||
switch(t->token_type) {
|
// Semantic Actions
|
||||||
case TT_UINT:
|
///
|
||||||
return " ";
|
|
||||||
case TT_SEQUENCE:
|
|
||||||
{
|
|
||||||
// Sequence of subdomains separated by "."
|
|
||||||
// Each subdomain is a label, which can be no more than 63 chars.
|
|
||||||
char *ret = h_arena_malloc(t->seq->arena, 64*t->seq->used);
|
|
||||||
size_t count = 0;
|
|
||||||
for (size_t i=0; i<t->seq->used; ++i) {
|
|
||||||
HParsedToken *tmp = t->seq->elements[i];
|
|
||||||
for (size_t j=0; j<tmp->seq->used; ++j) {
|
|
||||||
ret[count] = tmp->seq->elements[i]->uint;
|
|
||||||
++count;
|
|
||||||
}
|
|
||||||
ret[count] = '.';
|
|
||||||
++count;
|
|
||||||
}
|
|
||||||
ret[count-1] = '\x00';
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint8_t* get_cs(const HCountedArray *arr) {
|
// Helper: Parse and pack the RDATA field of a Resource Record.
|
||||||
uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used);
|
void set_rdata(struct dns_rr *rr, HCountedArray *rdata) {
|
||||||
for (size_t i=0; i<arr->used; ++i)
|
|
||||||
ret[i] = arr->elements[i]->uint;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint8_t** get_txt(const HCountedArray *arr) {
|
|
||||||
uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used);
|
|
||||||
for (size_t i=0; i<arr->used; ++i) {
|
|
||||||
uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->elements[i]->seq->used);
|
|
||||||
for (size_t j=0; j<arr->elements[i]->seq->used; ++j)
|
|
||||||
tmp[j] = arr->elements[i]->seq->elements[j]->uint;
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_rr(struct dns_rr rr, HCountedArray *rdata) {
|
|
||||||
uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used);
|
uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used);
|
||||||
for (size_t i=0; i<rdata->used; ++i)
|
for (size_t i=0; i<rdata->used; ++i)
|
||||||
data[i] = rdata->elements[i]->uint;
|
data[i] = H_CAST_UINT(rdata->elements[i]);
|
||||||
|
|
||||||
|
// Parse RDATA if possible.
|
||||||
|
const HParseResult *p = NULL;
|
||||||
|
const HParser *parser = init_rdata(rr->type);
|
||||||
|
if (parser)
|
||||||
|
p = h_parse(parser, (const uint8_t*)data, rdata->used);
|
||||||
|
|
||||||
// If the RR doesn't parse, set its type to 0.
|
// If the RR doesn't parse, set its type to 0.
|
||||||
switch(rr.type) {
|
if (!p)
|
||||||
case 1: // A
|
rr->type = 0;
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_a(), (const uint8_t*)data, rdata->used);
|
// Pack the parsed rdata into rr.
|
||||||
if (!r)
|
switch(rr->type) {
|
||||||
rr.type = 0;
|
case 1: rr->a = H_CAST_UINT(p->ast); break;
|
||||||
else
|
case 2: rr->ns = *H_CAST(dns_domain_t, p->ast); break;
|
||||||
rr.a = r->ast->seq->elements[0]->uint;
|
case 3: rr->md = *H_CAST(dns_domain_t, p->ast); break;
|
||||||
break;
|
case 4: rr->md = *H_CAST(dns_domain_t, p->ast); break;
|
||||||
}
|
case 5: rr->cname = *H_CAST(dns_domain_t, p->ast); break;
|
||||||
case 2: // NS
|
case 6: rr->soa = *H_CAST(dns_rr_soa_t, p->ast); break;
|
||||||
{
|
case 7: rr->mb = *H_CAST(dns_domain_t, p->ast); break;
|
||||||
const HParseResult *r = h_parse(init_ns(), (const uint8_t*)data, rdata->used);
|
case 8: rr->mg = *H_CAST(dns_domain_t, p->ast); break;
|
||||||
if (!r)
|
case 9: rr->mr = *H_CAST(dns_domain_t, p->ast); break;
|
||||||
rr.type = 0;
|
case 10: rr->null = *H_CAST(dns_rr_null_t, p->ast); break;
|
||||||
else
|
case 11: rr->wks = *H_CAST(dns_rr_wks_t, p->ast); break;
|
||||||
rr.ns = get_domain(r->ast->seq->elements[0]);
|
case 12: rr->ptr = *H_CAST(dns_domain_t, p->ast); break;
|
||||||
break;
|
case 13: rr->hinfo = *H_CAST(dns_rr_hinfo_t, p->ast); break;
|
||||||
}
|
case 14: rr->minfo = *H_CAST(dns_rr_minfo_t, p->ast); break;
|
||||||
case 3: // MD
|
case 15: rr->mx = *H_CAST(dns_rr_mx_t, p->ast); break;
|
||||||
{
|
case 16: rr->txt = *H_CAST(dns_rr_txt_t, p->ast); break;
|
||||||
const HParseResult *r = h_parse(init_md(), (const uint8_t*)data, rdata->used);
|
default: break;
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else
|
|
||||||
rr.md = get_domain(r->ast->seq->elements[0]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 4: // MF
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_mf(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else
|
|
||||||
rr.md = get_domain(r->ast->seq->elements[0]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 5: // CNAME
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_cname(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else
|
|
||||||
rr.cname = get_domain(r->ast->seq->elements[0]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 6: // SOA
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_soa(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else {
|
|
||||||
rr.soa.mname = get_domain(r->ast->seq->elements[0]);
|
|
||||||
rr.soa.rname = get_domain(r->ast->seq->elements[1]);
|
|
||||||
rr.soa.serial = r->ast->seq->elements[2]->uint;
|
|
||||||
rr.soa.refresh = r->ast->seq->elements[3]->uint;
|
|
||||||
rr.soa.retry = r->ast->seq->elements[4]->uint;
|
|
||||||
rr.soa.expire = r->ast->seq->elements[5]->uint;
|
|
||||||
rr.soa.minimum = r->ast->seq->elements[6]->uint;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 7: // MB
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_mb(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else
|
|
||||||
rr.mb = get_domain(r->ast->seq->elements[0]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 8: // MG
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_mg(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else
|
|
||||||
rr.mg = get_domain(r->ast->seq->elements[0]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 9: // MR
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_mr(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else
|
|
||||||
rr.mr = get_domain(r->ast->seq->elements[0]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 10: // NULL
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_null(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else {
|
|
||||||
rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->used);
|
|
||||||
for (size_t i=0; i<r->ast->seq->used; ++i)
|
|
||||||
rr.null[i] = r->ast->seq->elements[i]->uint;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 11: // WKS
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_wks(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else {
|
|
||||||
rr.wks.address = r->ast->seq->elements[0]->uint;
|
|
||||||
rr.wks.protocol = r->ast->seq->elements[1]->uint;
|
|
||||||
rr.wks.len = r->ast->seq->elements[2]->seq->used;
|
|
||||||
rr.wks.bit_map = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->elements[2]->seq->used);
|
|
||||||
for (size_t i=0; i<rr.wks.len; ++i)
|
|
||||||
rr.wks.bit_map[i] = r->ast->seq->elements[2]->seq->elements[i]->uint;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 12: // PTR
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_ptr(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else
|
|
||||||
rr.ptr = get_domain(r->ast->seq->elements[0]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 13: // HINFO
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_hinfo(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else {
|
|
||||||
rr.hinfo.cpu = get_cs(r->ast->seq->elements[0]->seq);
|
|
||||||
rr.hinfo.os = get_cs(r->ast->seq->elements[1]->seq);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 14: // MINFO
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_minfo(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else {
|
|
||||||
rr.minfo.rmailbx = get_domain(r->ast->seq->elements[0]);
|
|
||||||
rr.minfo.emailbx = get_domain(r->ast->seq->elements[1]);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 15: // MX
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_mx(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else {
|
|
||||||
rr.mx.preference = r->ast->seq->elements[0]->uint;
|
|
||||||
rr.mx.exchange = get_domain(r->ast->seq->elements[1]);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 16: // TXT
|
|
||||||
{
|
|
||||||
const HParseResult *r = h_parse(init_txt(), (const uint8_t*)data, rdata->used);
|
|
||||||
if (!r)
|
|
||||||
rr.type = 0;
|
|
||||||
else {
|
|
||||||
rr.txt.count = r->ast->seq->elements[0]->seq->used;
|
|
||||||
rr.txt.txt_data = get_txt(r->ast->seq->elements[0]->seq);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParsedToken* pack_dns_struct(const HParseResult *p) {
|
const HParsedToken* act_header(const HParseResult *p) {
|
||||||
h_pprint(stdout, p->ast, 0, 2);
|
HParsedToken **fields = h_seq_elements(p->ast);
|
||||||
HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken));
|
dns_header_t header_ = {
|
||||||
ret->token_type = TT_USER;
|
.id = H_CAST_UINT(fields[0]),
|
||||||
|
.qr = H_CAST_UINT(fields[1]),
|
||||||
dns_message_t *msg = h_arena_malloc(p->arena, sizeof(dns_message_t));
|
.opcode = H_CAST_UINT(fields[2]),
|
||||||
|
.aa = H_CAST_UINT(fields[3]),
|
||||||
HParsedToken *hdr = p->ast->seq->elements[0];
|
.tc = H_CAST_UINT(fields[4]),
|
||||||
struct dns_header header = {
|
.rd = H_CAST_UINT(fields[5]),
|
||||||
.id = hdr->seq->elements[0]->uint,
|
.ra = H_CAST_UINT(fields[6]),
|
||||||
.qr = hdr->seq->elements[1]->uint,
|
.rcode = H_CAST_UINT(fields[7]),
|
||||||
.opcode = hdr->seq->elements[2]->uint,
|
.question_count = H_CAST_UINT(fields[8]),
|
||||||
.aa = hdr->seq->elements[3]->uint,
|
.answer_count = H_CAST_UINT(fields[9]),
|
||||||
.tc = hdr->seq->elements[4]->uint,
|
.authority_count = H_CAST_UINT(fields[10]),
|
||||||
.rd = hdr->seq->elements[5]->uint,
|
.additional_count = H_CAST_UINT(fields[11])
|
||||||
.ra = hdr->seq->elements[6]->uint,
|
|
||||||
.rcode = hdr->seq->elements[7]->uint,
|
|
||||||
.question_count = hdr->seq->elements[8]->uint,
|
|
||||||
.answer_count = hdr->seq->elements[9]->uint,
|
|
||||||
.authority_count = hdr->seq->elements[10]->uint,
|
|
||||||
.additional_count = hdr->seq->elements[11]->uint
|
|
||||||
};
|
};
|
||||||
msg->header = header;
|
|
||||||
|
|
||||||
HParsedToken *qs = p->ast->seq->elements[1];
|
dns_header_t *header = H_ALLOC(dns_header_t);
|
||||||
|
*header = header_;
|
||||||
|
|
||||||
|
return H_MAKE(dns_header_t, header);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParsedToken* act_label(const HParseResult *p) {
|
||||||
|
dns_label_t *r = H_ALLOC(dns_label_t);
|
||||||
|
|
||||||
|
r->len = h_seq_len(p->ast);
|
||||||
|
r->label = h_arena_malloc(p->arena, r->len + 1);
|
||||||
|
for (size_t i=0; i<r->len; ++i)
|
||||||
|
r->label[i] = H_FIELD_UINT(i);
|
||||||
|
r->label[r->len] = 0;
|
||||||
|
|
||||||
|
return H_MAKE(dns_label_t, r);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParsedToken* act_rr(const HParseResult *p) {
|
||||||
|
dns_rr_t *rr = H_ALLOC(dns_rr_t);
|
||||||
|
|
||||||
|
rr->name = *H_FIELD(dns_domain_t, 0);
|
||||||
|
rr->type = H_FIELD_UINT(1);
|
||||||
|
rr->class = H_FIELD_UINT(2);
|
||||||
|
rr->ttl = H_FIELD_UINT(3);
|
||||||
|
rr->rdlength = H_FIELD_SEQ(4)->used;
|
||||||
|
|
||||||
|
// Parse and pack RDATA.
|
||||||
|
set_rdata(rr, H_FIELD_SEQ(4));
|
||||||
|
|
||||||
|
return H_MAKE(dns_rr_t, rr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParsedToken* act_question(const HParseResult *p) {
|
||||||
|
dns_question_t *q = H_ALLOC(dns_question_t);
|
||||||
|
HParsedToken **fields = h_seq_elements(p->ast);
|
||||||
|
|
||||||
|
// QNAME is a sequence of labels. Pack them into an array.
|
||||||
|
q->qname.qlen = h_seq_len(fields[0]);
|
||||||
|
q->qname.labels = h_arena_malloc(p->arena, sizeof(dns_label_t)*q->qname.qlen);
|
||||||
|
for(size_t i=0; i<q->qname.qlen; i++) {
|
||||||
|
q->qname.labels[i] = *H_INDEX(dns_label_t, fields[0], i);
|
||||||
|
}
|
||||||
|
|
||||||
|
q->qtype = H_CAST_UINT(fields[1]);
|
||||||
|
q->qclass = H_CAST_UINT(fields[2]);
|
||||||
|
|
||||||
|
return H_MAKE(dns_question_t, q);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParsedToken* act_message(const HParseResult *p) {
|
||||||
|
h_pprint(stdout, p->ast, 0, 2);
|
||||||
|
dns_message_t *msg = H_ALLOC(dns_message_t);
|
||||||
|
|
||||||
|
// Copy header into message struct.
|
||||||
|
dns_header_t *header = H_FIELD(dns_header_t, 0);
|
||||||
|
msg->header = *header;
|
||||||
|
|
||||||
|
// Copy questions into message struct.
|
||||||
|
HParsedToken *qs = h_seq_index(p->ast, 1);
|
||||||
struct dns_question *questions = h_arena_malloc(p->arena,
|
struct dns_question *questions = h_arena_malloc(p->arena,
|
||||||
sizeof(struct dns_question)*(header.question_count));
|
sizeof(struct dns_question)*(header->question_count));
|
||||||
for (size_t i=0; i<header.question_count; ++i) {
|
for (size_t i=0; i<header->question_count; ++i) {
|
||||||
// QNAME is a sequence of labels. In the parser, it's defined as
|
questions[i] = *H_INDEX(dns_question_t, qs, i);
|
||||||
// sequence(many1(length_value(...)), ch('\x00'), NULL).
|
|
||||||
questions[i].qname = get_qname(qs->seq->elements[i]->seq->elements[0]);
|
|
||||||
questions[i].qtype = qs->seq->elements[i]->seq->elements[1]->uint;
|
|
||||||
questions[i].qclass = qs->seq->elements[i]->seq->elements[2]->uint;
|
|
||||||
}
|
}
|
||||||
msg->questions = questions;
|
msg->questions = questions;
|
||||||
|
|
||||||
HParsedToken *rrs = p->ast->seq->elements[2];
|
// Copy answer RRs into message struct.
|
||||||
|
HParsedToken *rrs = h_seq_index(p->ast, 2);
|
||||||
struct dns_rr *answers = h_arena_malloc(p->arena,
|
struct dns_rr *answers = h_arena_malloc(p->arena,
|
||||||
sizeof(struct dns_rr)*(header.answer_count));
|
sizeof(struct dns_rr)*(header->answer_count));
|
||||||
for (size_t i=0; i<header.answer_count; ++i) {
|
for (size_t i=0; i<header->answer_count; ++i) {
|
||||||
answers[i].name = get_domain(rrs[i].seq->elements[0]);
|
answers[i] = *H_INDEX(dns_rr_t, rrs, i);
|
||||||
answers[i].type = rrs[i].seq->elements[1]->uint;
|
|
||||||
answers[i].class = rrs[i].seq->elements[2]->uint;
|
|
||||||
answers[i].ttl = rrs[i].seq->elements[3]->uint;
|
|
||||||
answers[i].rdlength = rrs[i].seq->elements[4]->seq->used;
|
|
||||||
set_rr(answers[i], rrs[i].seq->elements[4]->seq);
|
|
||||||
}
|
}
|
||||||
msg->answers = answers;
|
msg->answers = answers;
|
||||||
|
|
||||||
|
// Copy authority RRs into message struct.
|
||||||
struct dns_rr *authority = h_arena_malloc(p->arena,
|
struct dns_rr *authority = h_arena_malloc(p->arena,
|
||||||
sizeof(struct dns_rr)*(header.authority_count));
|
sizeof(struct dns_rr)*(header->authority_count));
|
||||||
for (size_t i=0, j=header.answer_count; i<header.authority_count; ++i, ++j) {
|
for (size_t i=0, j=header->answer_count; i<header->authority_count; ++i, ++j) {
|
||||||
authority[i].name = get_domain(rrs[j].seq->elements[0]);
|
authority[i] = *H_INDEX(dns_rr_t, rrs, j);
|
||||||
authority[i].type = rrs[j].seq->elements[1]->uint;
|
|
||||||
authority[i].class = rrs[j].seq->elements[2]->uint;
|
|
||||||
authority[i].ttl = rrs[j].seq->elements[3]->uint;
|
|
||||||
authority[i].rdlength = rrs[j].seq->elements[4]->seq->used;
|
|
||||||
set_rr(authority[i], rrs[j].seq->elements[4]->seq);
|
|
||||||
}
|
}
|
||||||
msg->authority = authority;
|
msg->authority = authority;
|
||||||
|
|
||||||
|
// Copy additional RRs into message struct.
|
||||||
struct dns_rr *additional = h_arena_malloc(p->arena,
|
struct dns_rr *additional = h_arena_malloc(p->arena,
|
||||||
sizeof(struct dns_rr)*(header.additional_count));
|
sizeof(struct dns_rr)*(header->additional_count));
|
||||||
for (size_t i=0, j=header.answer_count+header.authority_count; i<header.additional_count; ++i, ++j) {
|
for (size_t i=0, j=header->answer_count+header->authority_count; i<header->additional_count; ++i, ++j) {
|
||||||
additional[i].name = get_domain(rrs[j].seq->elements[0]);
|
additional[i] = *H_INDEX(dns_rr_t, rrs, j);
|
||||||
additional[i].type = rrs[j].seq->elements[1]->uint;
|
|
||||||
additional[i].class = rrs[j].seq->elements[2]->uint;
|
|
||||||
additional[i].ttl = rrs[j].seq->elements[3]->uint;
|
|
||||||
additional[i].rdlength = rrs[j].seq->elements[4]->seq->used;
|
|
||||||
set_rr(additional[i], rrs[j].seq->elements[4]->seq);
|
|
||||||
}
|
}
|
||||||
msg->additional = additional;
|
msg->additional = additional;
|
||||||
|
|
||||||
ret->user = (void*)msg;
|
return H_MAKE(dns_message_t, msg);
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define act_hdzero h_act_ignore
|
||||||
|
#define act_qname act_index0
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Grammar
|
||||||
|
///
|
||||||
|
|
||||||
const HParser* init_parser() {
|
const HParser* init_parser() {
|
||||||
static HParser *dns_message = NULL;
|
static const HParser *ret = NULL;
|
||||||
if (dns_message)
|
if (ret)
|
||||||
return dns_message;
|
return ret;
|
||||||
|
|
||||||
const HParser *domain = init_domain();
|
H_RULE (domain, init_domain());
|
||||||
|
H_AVRULE(hdzero, h_bits(3, false));
|
||||||
const HParser *dns_header = h_sequence(h_bits(16, false), // ID
|
H_ARULE (header, h_sequence(h_bits(16, false), // ID
|
||||||
h_bits(1, false), // QR
|
h_bits(1, false), // QR
|
||||||
h_bits(4, false), // opcode
|
h_bits(4, false), // opcode
|
||||||
h_bits(1, false), // AA
|
h_bits(1, false), // AA
|
||||||
h_bits(1, false), // TC
|
h_bits(1, false), // TC
|
||||||
h_bits(1, false), // RD
|
h_bits(1, false), // RD
|
||||||
h_bits(1, false), // RA
|
h_bits(1, false), // RA
|
||||||
h_ignore(h_attr_bool(h_bits(3, false), is_zero)), // Z
|
hdzero, // Z
|
||||||
h_bits(4, false), // RCODE
|
h_bits(4, false), // RCODE
|
||||||
h_uint16(), // QDCOUNT
|
h_uint16(), // QDCOUNT
|
||||||
h_uint16(), // ANCOUNT
|
h_uint16(), // ANCOUNT
|
||||||
h_uint16(), // NSCOUNT
|
h_uint16(), // NSCOUNT
|
||||||
h_uint16(), // ARCOUNT
|
h_uint16(), // ARCOUNT
|
||||||
NULL);
|
NULL));
|
||||||
|
H_RULE (type, h_int_range(h_uint16(), 1, 16));
|
||||||
const HParser *type = h_int_range(h_uint16(), 1, 16);
|
H_RULE (qtype, h_choice(type,
|
||||||
|
|
||||||
const HParser *qtype = h_choice(type,
|
|
||||||
h_int_range(h_uint16(), 252, 255),
|
h_int_range(h_uint16(), 252, 255),
|
||||||
NULL);
|
NULL));
|
||||||
|
H_RULE (class, h_int_range(h_uint16(), 1, 4));
|
||||||
const HParser *class = h_int_range(h_uint16(), 1, 4);
|
H_RULE (qclass, h_choice(class,
|
||||||
|
|
||||||
const HParser *qclass = h_choice(class,
|
|
||||||
h_int_range(h_uint16(), 255, 255),
|
h_int_range(h_uint16(), 255, 255),
|
||||||
NULL);
|
NULL));
|
||||||
|
H_RULE (len, h_int_range(h_uint8(), 1, 255));
|
||||||
const HParser *dns_question = h_sequence(h_sequence(h_many1(h_length_value(h_int_range(h_uint8(), 1, 255),
|
H_ARULE (label, h_length_value(len, h_uint8()));
|
||||||
h_uint8())),
|
H_ARULE (qname, h_sequence(h_many1(label),
|
||||||
h_ch('\x00'),
|
h_ch('\x00'),
|
||||||
NULL), // QNAME
|
NULL));
|
||||||
qtype, // QTYPE
|
H_ARULE (question, h_sequence(qname, qtype, qclass, NULL));
|
||||||
qclass, // QCLASS
|
H_RULE (rdata, h_length_value(h_uint16(), h_uint8()));
|
||||||
NULL);
|
H_ARULE (rr, h_sequence(domain, // NAME
|
||||||
|
|
||||||
|
|
||||||
const HParser *dns_rr = h_sequence(domain, // NAME
|
|
||||||
type, // TYPE
|
type, // TYPE
|
||||||
class, // CLASS
|
class, // CLASS
|
||||||
h_uint32(), // TTL
|
h_uint32(), // TTL
|
||||||
h_length_value(h_uint16(), h_uint8()), // RDLENGTH+RDATA
|
rdata, // RDLENGTH+RDATA
|
||||||
NULL);
|
NULL));
|
||||||
|
H_AVRULE(message, h_sequence(header,
|
||||||
|
h_many(question),
|
||||||
dns_message = (HParser*)h_action(h_attr_bool(h_sequence(dns_header,
|
h_many(rr),
|
||||||
h_many(dns_question),
|
|
||||||
h_many(dns_rr),
|
|
||||||
h_end_p(),
|
h_end_p(),
|
||||||
NULL),
|
NULL));
|
||||||
validate_dns),
|
|
||||||
pack_dns_struct);
|
|
||||||
|
|
||||||
return dns_message;
|
ret = message;
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Main Program for a Dummy DNS Server
|
||||||
|
///
|
||||||
|
|
||||||
int start_listening() {
|
int start_listening() {
|
||||||
// return: fd
|
// return: fd
|
||||||
int sock;
|
int sock;
|
||||||
|
|
@ -442,7 +283,7 @@ int start_listening() {
|
||||||
|
|
||||||
const int TYPE_MAX = 16;
|
const int TYPE_MAX = 16;
|
||||||
typedef const char* cstr;
|
typedef const char* cstr;
|
||||||
const char* TYPE_STR[17] = {
|
static const char* TYPE_STR[17] = {
|
||||||
"nil", "A", "NS", "MD",
|
"nil", "A", "NS", "MD",
|
||||||
"MF", "CNAME", "SOA", "MB",
|
"MF", "CNAME", "SOA", "MB",
|
||||||
"MG", "MR", "NULL", "WKS",
|
"MG", "MR", "NULL", "WKS",
|
||||||
|
|
|
||||||
134
examples/dns.h
134
examples/dns.h
|
|
@ -1,6 +1,27 @@
|
||||||
#include "../src/hammer.h"
|
#include "../src/hammer.h"
|
||||||
|
|
||||||
struct dns_header {
|
enum DNSTokenType_ {
|
||||||
|
TT_dns_message_t = TT_USER,
|
||||||
|
TT_dns_header_t,
|
||||||
|
TT_dns_label_t,
|
||||||
|
TT_dns_qname_t,
|
||||||
|
TT_dns_question_t,
|
||||||
|
TT_dns_rr_t,
|
||||||
|
TT_dns_rr_txt_t,
|
||||||
|
TT_dns_rr_hinfo_t,
|
||||||
|
TT_dns_rr_minfo_t,
|
||||||
|
TT_dns_rr_mx_t,
|
||||||
|
TT_dns_rr_soa_t,
|
||||||
|
TT_dns_rr_wks_t,
|
||||||
|
TT_dns_rr_null_t,
|
||||||
|
TT_dns_domain_t,
|
||||||
|
TT_dns_cstr_t
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef char *dns_domain_t;
|
||||||
|
typedef uint8_t *dns_cstr_t;
|
||||||
|
|
||||||
|
typedef struct dns_header {
|
||||||
uint16_t id;
|
uint16_t id;
|
||||||
bool qr, aa, tc, rd, ra;
|
bool qr, aa, tc, rd, ra;
|
||||||
char opcode, rcode;
|
char opcode, rcode;
|
||||||
|
|
@ -8,48 +29,40 @@ struct dns_header {
|
||||||
size_t answer_count;
|
size_t answer_count;
|
||||||
size_t authority_count;
|
size_t authority_count;
|
||||||
size_t additional_count;
|
size_t additional_count;
|
||||||
};
|
} dns_header_t;
|
||||||
struct dns_qname {
|
|
||||||
size_t qlen;
|
typedef struct dns_label {
|
||||||
struct {
|
|
||||||
size_t len;
|
size_t len;
|
||||||
uint8_t *label;
|
uint8_t *label;
|
||||||
} *labels;
|
} dns_label_t;
|
||||||
};
|
|
||||||
struct dns_question {
|
typedef struct dns_qname {
|
||||||
struct dns_qname qname;
|
size_t qlen;
|
||||||
|
dns_label_t *labels;
|
||||||
|
} dns_qname_t;
|
||||||
|
|
||||||
|
typedef struct dns_question {
|
||||||
|
dns_qname_t qname;
|
||||||
uint16_t qtype;
|
uint16_t qtype;
|
||||||
uint16_t qclass;
|
uint16_t qclass;
|
||||||
};
|
} dns_question_t;
|
||||||
struct dns_rr {
|
|
||||||
char* name;
|
typedef struct {
|
||||||
uint16_t type;
|
dns_cstr_t cpu;
|
||||||
uint16_t class;
|
dns_cstr_t os;
|
||||||
uint32_t ttl; // cmos is also acceptable.
|
} dns_rr_hinfo_t;
|
||||||
uint16_t rdlength;
|
|
||||||
union {
|
typedef struct {
|
||||||
char* cname;
|
|
||||||
struct {
|
|
||||||
uint8_t* cpu;
|
|
||||||
uint8_t* os;
|
|
||||||
} hinfo;
|
|
||||||
char* mb;
|
|
||||||
char* md;
|
|
||||||
char* mf;
|
|
||||||
char* mg;
|
|
||||||
struct {
|
|
||||||
char* rmailbx;
|
char* rmailbx;
|
||||||
char* emailbx;
|
char* emailbx;
|
||||||
} minfo;
|
} dns_rr_minfo_t;
|
||||||
char* mr;
|
|
||||||
struct {
|
typedef struct {
|
||||||
uint16_t preference;
|
uint16_t preference;
|
||||||
char* exchange;
|
char* exchange;
|
||||||
} mx;
|
} dns_rr_mx_t;
|
||||||
uint8_t* null;
|
|
||||||
char* ns;
|
typedef struct {
|
||||||
char* ptr;
|
|
||||||
struct {
|
|
||||||
char* mname;
|
char* mname;
|
||||||
char* rname;
|
char* rname;
|
||||||
uint32_t serial;
|
uint32_t serial;
|
||||||
|
|
@ -57,25 +70,52 @@ struct dns_rr {
|
||||||
uint32_t retry;
|
uint32_t retry;
|
||||||
uint32_t expire;
|
uint32_t expire;
|
||||||
uint32_t minimum;
|
uint32_t minimum;
|
||||||
} soa;
|
} dns_rr_soa_t;
|
||||||
struct {
|
|
||||||
|
typedef struct {
|
||||||
size_t count;
|
size_t count;
|
||||||
uint8_t** txt_data;
|
uint8_t** txt_data;
|
||||||
} txt;
|
} dns_rr_txt_t;
|
||||||
uint32_t a;
|
|
||||||
struct {
|
typedef struct {
|
||||||
uint32_t address;
|
uint32_t address;
|
||||||
uint8_t protocol;
|
uint8_t protocol;
|
||||||
size_t len;
|
size_t len;
|
||||||
uint8_t* bit_map;
|
uint8_t* bit_map;
|
||||||
} wks;
|
} dns_rr_wks_t;
|
||||||
|
|
||||||
|
typedef uint8_t *dns_rr_null_t;
|
||||||
|
|
||||||
|
typedef struct dns_rr {
|
||||||
|
char* name;
|
||||||
|
uint16_t type;
|
||||||
|
uint16_t class;
|
||||||
|
uint32_t ttl; // cmos is also acceptable.
|
||||||
|
uint16_t rdlength;
|
||||||
|
union {
|
||||||
|
uint32_t a;
|
||||||
|
char* ns;
|
||||||
|
char* md;
|
||||||
|
char* mf;
|
||||||
|
char* cname;
|
||||||
|
dns_rr_soa_t soa;
|
||||||
|
char* mb;
|
||||||
|
char* mg;
|
||||||
|
char* mr;
|
||||||
|
dns_rr_null_t null;
|
||||||
|
dns_rr_wks_t wks;
|
||||||
|
char* ptr;
|
||||||
|
dns_rr_hinfo_t hinfo;
|
||||||
|
dns_rr_minfo_t minfo;
|
||||||
|
dns_rr_mx_t mx;
|
||||||
|
dns_rr_txt_t txt;
|
||||||
};
|
};
|
||||||
};
|
} dns_rr_t;
|
||||||
|
|
||||||
typedef struct dns_message {
|
typedef struct dns_message {
|
||||||
struct dns_header header;
|
dns_header_t header;
|
||||||
struct dns_question *questions;
|
dns_question_t *questions;
|
||||||
struct dns_rr *answers;
|
dns_rr_t *answers;
|
||||||
struct dns_rr *authority;
|
dns_rr_t *authority;
|
||||||
struct dns_rr *additional;
|
dns_rr_t *additional;
|
||||||
} dns_message_t;
|
} dns_message_t;
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,12 @@
|
||||||
#include "../src/hammer.h"
|
#include "../src/hammer.h"
|
||||||
#include "dns_common.h"
|
#include "dns_common.h"
|
||||||
|
#include "dns.h"
|
||||||
|
|
||||||
#define false 0
|
#define false 0
|
||||||
#define true 1
|
#define true 1
|
||||||
|
|
||||||
|
H_ACT_APPLY(act_index0, h_act_index, 0)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A label can't be more than 63 characters.
|
* A label can't be more than 63 characters.
|
||||||
*/
|
*/
|
||||||
|
|
@ -13,51 +16,64 @@ bool validate_label(HParseResult *p) {
|
||||||
return (64 > p->ast->seq->used);
|
return (64 > p->ast->seq->used);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define act_label h_act_flatten
|
||||||
|
|
||||||
|
const HParsedToken* act_domain(const HParseResult *p) {
|
||||||
|
const HParsedToken *ret = NULL;
|
||||||
|
char *arr = NULL;
|
||||||
|
|
||||||
|
switch(p->ast->token_type) {
|
||||||
|
case TT_UINT:
|
||||||
|
arr = " ";
|
||||||
|
break;
|
||||||
|
case TT_SEQUENCE:
|
||||||
|
// Sequence of subdomains separated by "."
|
||||||
|
// Each subdomain is a label, which can be no more than 63 chars.
|
||||||
|
arr = h_arena_malloc(p->arena, 64*p->ast->seq->used);
|
||||||
|
size_t count = 0;
|
||||||
|
for (size_t i=0; i<p->ast->seq->used; ++i) {
|
||||||
|
HParsedToken *tmp = p->ast->seq->elements[i];
|
||||||
|
for (size_t j=0; j<tmp->seq->used; ++j) {
|
||||||
|
arr[count] = tmp->seq->elements[i]->uint;
|
||||||
|
++count;
|
||||||
|
}
|
||||||
|
arr[count] = '.';
|
||||||
|
++count;
|
||||||
|
}
|
||||||
|
arr[count-1] = '\x00';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
arr = NULL;
|
||||||
|
ret = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(arr) {
|
||||||
|
dns_domain_t *val = H_ALLOC(dns_domain_t); // dns_domain_t is char*
|
||||||
|
*val = arr;
|
||||||
|
ret = H_MAKE(dns_domain_t, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
const HParser* init_domain() {
|
const HParser* init_domain() {
|
||||||
static const HParser *domain = NULL;
|
static const HParser *ret = NULL;
|
||||||
if (domain)
|
if (ret)
|
||||||
return domain;
|
return ret;
|
||||||
|
|
||||||
const HParser *letter = h_choice(h_ch_range('a', 'z'),
|
H_RULE (letter, h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL));
|
||||||
h_ch_range('A', 'Z'),
|
H_RULE (let_dig, h_choice(letter, h_ch_range('0','9'), NULL));
|
||||||
NULL);
|
H_RULE (ldh_str, h_many1(h_choice(let_dig, h_ch('-'), NULL)));
|
||||||
|
H_VARULE(label, h_sequence(letter,
|
||||||
const HParser *let_dig = h_choice(letter,
|
|
||||||
h_ch_range('0', '9'),
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
const HParser *ldh_str = h_many1(h_choice(let_dig,
|
|
||||||
h_ch('-'),
|
|
||||||
NULL));
|
|
||||||
|
|
||||||
const HParser *label = h_attr_bool(h_sequence(letter,
|
|
||||||
h_optional(h_sequence(h_optional(ldh_str),
|
h_optional(h_sequence(h_optional(ldh_str),
|
||||||
let_dig,
|
let_dig,
|
||||||
NULL)),
|
NULL)),
|
||||||
NULL),
|
NULL));
|
||||||
validate_label);
|
H_RULE (subdomain, h_sepBy1(label, h_ch('.')));
|
||||||
|
H_ARULE (domain, h_choice(subdomain, h_ch(' '), NULL));
|
||||||
|
|
||||||
/**
|
ret = domain;
|
||||||
* You could write it like this ...
|
return ret;
|
||||||
* HParser *indirect_subdomain = h_indirect();
|
|
||||||
* const HParser *subdomain = h_choice(label,
|
|
||||||
* h_sequence(indirect_subdomain,
|
|
||||||
* h_ch('.'),
|
|
||||||
* label,
|
|
||||||
* NULL),
|
|
||||||
* NULL);
|
|
||||||
* h_bind_indirect(indirect_subdomain, subdomain);
|
|
||||||
*
|
|
||||||
* ... but this is easier and equivalent
|
|
||||||
*/
|
|
||||||
|
|
||||||
const HParser *subdomain = h_sepBy1(label, h_ch('.'));
|
|
||||||
|
|
||||||
domain = h_choice(subdomain,
|
|
||||||
h_ch(' '),
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
return domain;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* init_character_string() {
|
const HParser* init_character_string() {
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,11 @@
|
||||||
#define HAMMER_DNS_COMMON__H
|
#define HAMMER_DNS_COMMON__H
|
||||||
|
|
||||||
#include "../src/hammer.h"
|
#include "../src/hammer.h"
|
||||||
|
#include "../src/glue.h"
|
||||||
|
|
||||||
const HParser* init_domain();
|
const HParser* init_domain();
|
||||||
const HParser* init_character_string();
|
const HParser* init_character_string();
|
||||||
|
|
||||||
|
const HParsedToken* act_index0(const HParseResult *p);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
334
examples/rr.c
334
examples/rr.c
|
|
@ -1,124 +1,15 @@
|
||||||
#include "../src/hammer.h"
|
#include "../src/hammer.h"
|
||||||
#include "dns_common.h"
|
#include "dns_common.h"
|
||||||
|
#include "dns.h"
|
||||||
#include "rr.h"
|
#include "rr.h"
|
||||||
|
|
||||||
#define false 0
|
#define false 0
|
||||||
#define true 1
|
#define true 1
|
||||||
|
|
||||||
const HParser* init_cname() {
|
|
||||||
static const HParser *cname = NULL;
|
|
||||||
if (cname)
|
|
||||||
return cname;
|
|
||||||
|
|
||||||
cname = h_sequence(init_domain(),
|
///
|
||||||
h_end_p(),
|
// Validations and Semantic Actions
|
||||||
NULL);
|
///
|
||||||
|
|
||||||
return cname;
|
|
||||||
}
|
|
||||||
|
|
||||||
const HParser* init_hinfo() {
|
|
||||||
static const HParser *hinfo = NULL;
|
|
||||||
if (hinfo)
|
|
||||||
return hinfo;
|
|
||||||
|
|
||||||
const HParser* cstr = init_character_string();
|
|
||||||
|
|
||||||
hinfo = h_sequence(cstr,
|
|
||||||
cstr,
|
|
||||||
h_end_p(),
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
return hinfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
const HParser* init_mb() {
|
|
||||||
static const HParser *mb = NULL;
|
|
||||||
if (mb)
|
|
||||||
return mb;
|
|
||||||
|
|
||||||
mb = h_sequence(init_domain(),
|
|
||||||
h_end_p(),
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
return mb;
|
|
||||||
}
|
|
||||||
|
|
||||||
const HParser* init_md() {
|
|
||||||
static const HParser *md = NULL;
|
|
||||||
if (md)
|
|
||||||
return md;
|
|
||||||
|
|
||||||
md = h_sequence(init_domain(),
|
|
||||||
h_end_p,
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
return md;
|
|
||||||
}
|
|
||||||
|
|
||||||
const HParser* init_mf() {
|
|
||||||
static const HParser *mf = NULL;
|
|
||||||
if (mf)
|
|
||||||
return mf;
|
|
||||||
|
|
||||||
mf = h_sequence(init_domain(),
|
|
||||||
h_end_p(),
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
return mf;
|
|
||||||
}
|
|
||||||
|
|
||||||
const HParser* init_mg() {
|
|
||||||
static const HParser *mg = NULL;
|
|
||||||
if (mg)
|
|
||||||
return mg;
|
|
||||||
|
|
||||||
mg = h_sequence(init_domain(),
|
|
||||||
h_end_p(),
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
return mg;
|
|
||||||
}
|
|
||||||
|
|
||||||
const HParser* init_minfo() {
|
|
||||||
static const HParser *minfo = NULL;
|
|
||||||
if (minfo)
|
|
||||||
return minfo;
|
|
||||||
|
|
||||||
const HParser* domain = init_domain();
|
|
||||||
|
|
||||||
minfo = h_sequence(domain,
|
|
||||||
domain,
|
|
||||||
h_end_p(),
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
return minfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
const HParser* init_mr() {
|
|
||||||
static const HParser *mr = NULL;
|
|
||||||
if (mr)
|
|
||||||
return mr;
|
|
||||||
|
|
||||||
mr = h_sequence(init_domain(),
|
|
||||||
h_end_p(),
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
return mr;
|
|
||||||
}
|
|
||||||
|
|
||||||
const HParser* init_mx() {
|
|
||||||
static const HParser *mx = NULL;
|
|
||||||
if (mx)
|
|
||||||
return mx;
|
|
||||||
|
|
||||||
mx = h_sequence(h_uint16(),
|
|
||||||
init_domain(),
|
|
||||||
h_end_p(),
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
return mx;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool validate_null(HParseResult *p) {
|
bool validate_null(HParseResult *p) {
|
||||||
if (TT_SEQUENCE != p->ast->token_type)
|
if (TT_SEQUENCE != p->ast->token_type)
|
||||||
|
|
@ -126,94 +17,177 @@ bool validate_null(HParseResult *p) {
|
||||||
return (65536 > p->ast->seq->used);
|
return (65536 > p->ast->seq->used);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* init_null() {
|
const HParsedToken *act_null(const HParseResult *p) {
|
||||||
static const HParser *null_ = NULL;
|
dns_rr_null_t *null = H_ALLOC(dns_rr_null_t);
|
||||||
if (null_)
|
|
||||||
return null_;
|
|
||||||
|
|
||||||
null_ = h_attr_bool(h_many(h_uint8()), validate_null);
|
size_t len = h_seq_len(p->ast);
|
||||||
|
uint8_t *buf = h_arena_malloc(p->arena, sizeof(uint8_t)*len);
|
||||||
|
for (size_t i=0; i<len; ++i)
|
||||||
|
buf[i] = H_FIELD_UINT(i);
|
||||||
|
|
||||||
return null_;
|
return H_MAKE(dns_rr_null_t, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* init_ns() {
|
const HParsedToken *act_txt(const HParseResult *p) {
|
||||||
static const HParser *ns = NULL;
|
dns_rr_txt_t *txt = H_ALLOC(dns_rr_txt_t);
|
||||||
if (ns)
|
|
||||||
return ns;
|
|
||||||
|
|
||||||
ns = h_sequence(init_domain(),
|
const HCountedArray *arr = H_CAST_SEQ(p->ast);
|
||||||
h_end_p(),
|
uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used);
|
||||||
NULL);
|
for (size_t i=0; i<arr->used; ++i) {
|
||||||
|
size_t len = h_seq_len(arr->elements[i]);
|
||||||
|
uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*len);
|
||||||
|
for (size_t j=0; j<len; ++j)
|
||||||
|
tmp[j] = H_INDEX_UINT(arr->elements[i], j);
|
||||||
|
ret[i] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
return ns;
|
txt->count = arr->used;
|
||||||
|
txt->txt_data = ret;
|
||||||
|
|
||||||
|
return H_MAKE(dns_rr_txt_t, txt);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* init_ptr() {
|
const HParsedToken* act_cstr(const HParseResult *p) {
|
||||||
static const HParser *ptr = NULL;
|
dns_cstr_t *cs = H_ALLOC(dns_cstr_t);
|
||||||
if (ptr)
|
|
||||||
return ptr;
|
|
||||||
|
|
||||||
ptr = h_sequence(init_domain(),
|
const HCountedArray *arr = H_CAST_SEQ(p->ast);
|
||||||
h_end_p(),
|
uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used);
|
||||||
NULL);
|
for (size_t i=0; i<arr->used; ++i)
|
||||||
|
ret[i] = H_CAST_UINT(arr->elements[i]);
|
||||||
|
assert(ret[arr->used-1] == '\0'); // XXX Is this right?! If so, shouldn't it be a validation?
|
||||||
|
*cs = ret;
|
||||||
|
|
||||||
return ptr;
|
return H_MAKE(dns_cstr_t, cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* init_soa() {
|
const HParsedToken* act_soa(const HParseResult *p) {
|
||||||
static const HParser *soa = NULL;
|
dns_rr_soa_t *soa = H_ALLOC(dns_rr_soa_t);
|
||||||
if (soa)
|
|
||||||
return soa;
|
|
||||||
|
|
||||||
const HParser *domain = init_domain();
|
soa->mname = *H_FIELD(dns_domain_t, 0);
|
||||||
|
soa->rname = *H_FIELD(dns_domain_t, 1);
|
||||||
|
soa->serial = H_FIELD_UINT(2);
|
||||||
|
soa->refresh = H_FIELD_UINT(3);
|
||||||
|
soa->retry = H_FIELD_UINT(4);
|
||||||
|
soa->expire = H_FIELD_UINT(5);
|
||||||
|
soa->minimum = H_FIELD_UINT(6);
|
||||||
|
|
||||||
soa = h_sequence(domain, // MNAME
|
return H_MAKE(dns_rr_soa_t, soa);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParsedToken* act_wks(const HParseResult *p) {
|
||||||
|
dns_rr_wks_t *wks = H_ALLOC(dns_rr_wks_t);
|
||||||
|
|
||||||
|
wks->address = H_FIELD_UINT(0);
|
||||||
|
wks->protocol = H_FIELD_UINT(1);
|
||||||
|
wks->len = H_FIELD_SEQ(2)->used;
|
||||||
|
wks->bit_map = h_arena_malloc(p->arena, sizeof(uint8_t)*wks->len);
|
||||||
|
for (size_t i=0; i<wks->len; ++i)
|
||||||
|
wks->bit_map[i] = H_INDEX_UINT(p->ast, 2, i);
|
||||||
|
|
||||||
|
return H_MAKE(dns_rr_wks_t, wks);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParsedToken* act_hinfo(const HParseResult *p) {
|
||||||
|
dns_rr_hinfo_t *hinfo = H_ALLOC(dns_rr_hinfo_t);
|
||||||
|
|
||||||
|
hinfo->cpu = *H_FIELD(dns_cstr_t, 0);
|
||||||
|
hinfo->os = *H_FIELD(dns_cstr_t, 1);
|
||||||
|
|
||||||
|
return H_MAKE(dns_rr_hinfo_t, hinfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParsedToken* act_minfo(const HParseResult *p) {
|
||||||
|
dns_rr_minfo_t *minfo = H_ALLOC(dns_rr_minfo_t);
|
||||||
|
|
||||||
|
minfo->rmailbx = *H_FIELD(dns_domain_t, 0);
|
||||||
|
minfo->emailbx = *H_FIELD(dns_domain_t, 1);
|
||||||
|
|
||||||
|
return H_MAKE(dns_rr_minfo_t, minfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParsedToken* act_mx(const HParseResult *p) {
|
||||||
|
dns_rr_mx_t *mx = H_ALLOC(dns_rr_mx_t);
|
||||||
|
|
||||||
|
mx->preference = H_FIELD_UINT(0);
|
||||||
|
mx->exchange = *H_FIELD(dns_domain_t, 1);
|
||||||
|
|
||||||
|
return H_MAKE(dns_rr_mx_t, mx);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Parsers for all types of RDATA
|
||||||
|
///
|
||||||
|
|
||||||
|
#define RDATA_TYPE_MAX 16
|
||||||
|
const HParser* init_rdata(uint16_t type) {
|
||||||
|
static const HParser *parsers[RDATA_TYPE_MAX+1];
|
||||||
|
static int inited = 0;
|
||||||
|
|
||||||
|
if (type >= sizeof(parsers))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (inited)
|
||||||
|
return parsers[type];
|
||||||
|
|
||||||
|
|
||||||
|
H_RULE (domain, init_domain());
|
||||||
|
H_ARULE(cstr, init_character_string());
|
||||||
|
|
||||||
|
H_RULE (a, h_uint32());
|
||||||
|
H_RULE (ns, domain);
|
||||||
|
H_RULE (md, domain);
|
||||||
|
H_RULE (mf, domain);
|
||||||
|
H_RULE (cname, domain);
|
||||||
|
H_ARULE(soa, h_sequence(domain, // MNAME
|
||||||
domain, // RNAME
|
domain, // RNAME
|
||||||
h_uint32(), // SERIAL
|
h_uint32(), // SERIAL
|
||||||
h_uint32(), // REFRESH
|
h_uint32(), // REFRESH
|
||||||
h_uint32(), // RETRY
|
h_uint32(), // RETRY
|
||||||
h_uint32(), // EXPIRE
|
h_uint32(), // EXPIRE
|
||||||
h_uint32(), // MINIMUM
|
h_uint32(), // MINIMUM
|
||||||
h_end_p(),
|
NULL));
|
||||||
NULL);
|
H_RULE (mb, domain);
|
||||||
|
H_RULE (mg, domain);
|
||||||
return soa;
|
H_RULE (mr, domain);
|
||||||
}
|
H_VRULE(null, h_many(h_uint8()));
|
||||||
|
H_RULE (wks, h_sequence(h_uint32(),
|
||||||
const HParser* init_txt() {
|
|
||||||
static const HParser *txt = NULL;
|
|
||||||
if (txt)
|
|
||||||
return txt;
|
|
||||||
|
|
||||||
txt = h_sequence(h_many1(init_character_string()),
|
|
||||||
h_end_p(),
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
return txt;
|
|
||||||
}
|
|
||||||
|
|
||||||
const HParser* init_a() {
|
|
||||||
static const HParser *a = NULL;
|
|
||||||
if (a)
|
|
||||||
return a;
|
|
||||||
|
|
||||||
a = h_sequence(h_uint32(),
|
|
||||||
h_end_p(),
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
const HParser* init_wks() {
|
|
||||||
static const HParser *wks = NULL;
|
|
||||||
if (wks)
|
|
||||||
return wks;
|
|
||||||
|
|
||||||
wks = h_sequence(h_uint32(),
|
|
||||||
h_uint8(),
|
h_uint8(),
|
||||||
h_many(h_uint8()),
|
h_many(h_uint8()),
|
||||||
h_end_p(),
|
NULL));
|
||||||
NULL);
|
H_RULE (ptr, domain);
|
||||||
|
H_RULE (hinfo, h_sequence(cstr, cstr, NULL));
|
||||||
|
H_RULE (minfo, h_sequence(domain, domain, NULL));
|
||||||
|
H_RULE (mx, h_sequence(h_uint16(), domain, NULL));
|
||||||
|
H_ARULE(txt, h_many1(cstr));
|
||||||
|
|
||||||
return wks;
|
|
||||||
|
parsers[ 0] = NULL; // there is no type 0
|
||||||
|
parsers[ 1] = a;
|
||||||
|
parsers[ 2] = ns;
|
||||||
|
parsers[ 3] = md;
|
||||||
|
parsers[ 4] = mf;
|
||||||
|
parsers[ 5] = cname;
|
||||||
|
parsers[ 6] = soa;
|
||||||
|
parsers[ 7] = mb;
|
||||||
|
parsers[ 8] = mg;
|
||||||
|
parsers[ 9] = mr;
|
||||||
|
parsers[10] = null;
|
||||||
|
parsers[11] = wks;
|
||||||
|
parsers[12] = ptr;
|
||||||
|
parsers[13] = hinfo;
|
||||||
|
parsers[14] = minfo;
|
||||||
|
parsers[15] = mx;
|
||||||
|
parsers[16] = txt;
|
||||||
|
|
||||||
|
// All parsers must consume their input exactly.
|
||||||
|
for(uint16_t i; i<sizeof(parsers); i++) {
|
||||||
|
if(parsers[i]) {
|
||||||
|
parsers[i] = h_action(h_sequence(parsers[i], h_end_p(), NULL),
|
||||||
|
act_index0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inited = 1;
|
||||||
|
return parsers[type];
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,21 +3,6 @@
|
||||||
|
|
||||||
#include "../src/hammer.h"
|
#include "../src/hammer.h"
|
||||||
|
|
||||||
const HParser* init_cname();
|
const HParser* init_rdata(uint16_t type);
|
||||||
const HParser* init_hinfo();
|
|
||||||
const HParser* init_mb();
|
|
||||||
const HParser* init_md();
|
|
||||||
const HParser* init_mf();
|
|
||||||
const HParser* init_mg();
|
|
||||||
const HParser* init_minfo();
|
|
||||||
const HParser* init_mr();
|
|
||||||
const HParser* init_mx();
|
|
||||||
const HParser* init_null();
|
|
||||||
const HParser* init_ns();
|
|
||||||
const HParser* init_ptr();
|
|
||||||
const HParser* init_soa();
|
|
||||||
const HParser* init_txt();
|
|
||||||
const HParser* init_a();
|
|
||||||
const HParser* init_wks();
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
42
src/Makefile
42
src/Makefile
|
|
@ -25,31 +25,55 @@ PARSERS := \
|
||||||
attr_bool \
|
attr_bool \
|
||||||
indirect
|
indirect
|
||||||
|
|
||||||
OUTPUTS := bitreader.o \
|
BACKENDS := \
|
||||||
|
packrat
|
||||||
|
|
||||||
|
HAMMER_PARTS := \
|
||||||
|
bitreader.o \
|
||||||
hammer.o \
|
hammer.o \
|
||||||
bitwriter.o \
|
bitwriter.o \
|
||||||
libhammer.a \
|
|
||||||
pprint.o \
|
pprint.o \
|
||||||
allocator.o \
|
allocator.o \
|
||||||
datastructures.o \
|
datastructures.o \
|
||||||
|
system_allocator.o \
|
||||||
|
benchmark.o \
|
||||||
|
compile.o \
|
||||||
|
glue.o \
|
||||||
|
$(PARSERS:%=parsers/%.o) \
|
||||||
|
$(BACKENDS:%=backends/%.o)
|
||||||
|
|
||||||
|
TESTS := t_benchmark.o \
|
||||||
|
t_bitreader.o \
|
||||||
|
t_bitwriter.o \
|
||||||
|
t_parser.o \
|
||||||
|
t_misc.o \
|
||||||
|
test_suite.o
|
||||||
|
|
||||||
|
OUTPUTS := libhammer.a \
|
||||||
|
test_suite.o \
|
||||||
test_suite \
|
test_suite \
|
||||||
$(PARSERS:%=parsers/%.o)
|
$(HAMMER_PARTS) \
|
||||||
|
$(TESTS)
|
||||||
|
|
||||||
TOPLEVEL := ../
|
TOPLEVEL := ../
|
||||||
|
|
||||||
include ../common.mk
|
include ../common.mk
|
||||||
|
|
||||||
|
$(TESTS): CFLAGS += $(TEST_CFLAGS)
|
||||||
|
$(TESTS): LDFLAGS += $(TEST_LDFLAGS)
|
||||||
|
|
||||||
all: libhammer.a test_suite
|
all: libhammer.a
|
||||||
|
|
||||||
test_suite: test_suite.o libhammer.a
|
libhammer.a: $(HAMMER_PARTS)
|
||||||
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
|
||||||
|
|
||||||
libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitwriter.o \
|
|
||||||
$(PARSERS:%=parsers/%.o)
|
|
||||||
|
|
||||||
bitreader.o: test_suite.h
|
bitreader.o: test_suite.h
|
||||||
hammer.o: hammer.h
|
hammer.o: hammer.h
|
||||||
|
glue.o: hammer.h glue.h
|
||||||
|
|
||||||
|
all: libhammer.a
|
||||||
|
|
||||||
test: test_suite
|
test: test_suite
|
||||||
./test_suite -v
|
./test_suite -v
|
||||||
|
|
||||||
|
test_suite: $(TESTS) libhammer.a
|
||||||
|
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) $(TEST_LDFLAGS)
|
||||||
|
|
|
||||||
|
|
@ -15,11 +15,13 @@
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <glib.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
#include "allocator.h"
|
#include "hammer.h"
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
|
|
||||||
struct arena_link {
|
struct arena_link {
|
||||||
// TODO:
|
// TODO:
|
||||||
|
|
@ -36,22 +38,25 @@ struct arena_link {
|
||||||
|
|
||||||
struct HArena_ {
|
struct HArena_ {
|
||||||
struct arena_link *head;
|
struct arena_link *head;
|
||||||
|
struct HAllocator_ *mm__;
|
||||||
size_t block_size;
|
size_t block_size;
|
||||||
size_t used;
|
size_t used;
|
||||||
size_t wasted;
|
size_t wasted;
|
||||||
};
|
};
|
||||||
|
|
||||||
HArena *h_new_arena(size_t block_size) {
|
HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
|
||||||
if (block_size == 0)
|
if (block_size == 0)
|
||||||
block_size = 4096;
|
block_size = 4096;
|
||||||
struct HArena_ *ret = g_new(struct HArena_, 1);
|
struct HArena_ *ret = h_new(struct HArena_, 1);
|
||||||
struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + block_size);
|
struct arena_link *link = (struct arena_link*)mm__->alloc(mm__, sizeof(struct arena_link) + block_size);
|
||||||
|
memset(link, 0, sizeof(struct arena_link) + block_size);
|
||||||
link->free = block_size;
|
link->free = block_size;
|
||||||
link->used = 0;
|
link->used = 0;
|
||||||
link->next = NULL;
|
link->next = NULL;
|
||||||
ret->head = link;
|
ret->head = link;
|
||||||
ret->block_size = block_size;
|
ret->block_size = block_size;
|
||||||
ret->used = 0;
|
ret->used = 0;
|
||||||
|
ret->mm__ = mm__;
|
||||||
ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size;
|
ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
@ -70,13 +75,15 @@ void* h_arena_malloc(HArena *arena, size_t size) {
|
||||||
// This involves some annoying casting...
|
// This involves some annoying casting...
|
||||||
arena->used += size;
|
arena->used += size;
|
||||||
arena->wasted += sizeof(struct arena_link*);
|
arena->wasted += sizeof(struct arena_link*);
|
||||||
void* link = g_malloc(size + sizeof(struct arena_link*));
|
void* link = arena->mm__->alloc(arena->mm__, size + sizeof(struct arena_link*));
|
||||||
|
memset(link, 0, size + sizeof(struct arena_link*));
|
||||||
*(struct arena_link**)link = arena->head->next;
|
*(struct arena_link**)link = arena->head->next;
|
||||||
arena->head->next = (struct arena_link*)link;
|
arena->head->next = (struct arena_link*)link;
|
||||||
return (void*)(((uint8_t*)link) + sizeof(struct arena_link*));
|
return (void*)(((uint8_t*)link) + sizeof(struct arena_link*));
|
||||||
} else {
|
} else {
|
||||||
// we just need to allocate an ordinary new block.
|
// we just need to allocate an ordinary new block.
|
||||||
struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + arena->block_size);
|
struct arena_link *link = (struct arena_link*)arena->mm__->alloc(arena->mm__, sizeof(struct arena_link) + arena->block_size);
|
||||||
|
memset(link, 0, sizeof(struct arena_link) + arena->block_size);
|
||||||
link->free = arena->block_size - size;
|
link->free = arena->block_size - size;
|
||||||
link->used = size;
|
link->used = size;
|
||||||
link->next = arena->head;
|
link->next = arena->head;
|
||||||
|
|
@ -87,17 +94,22 @@ void* h_arena_malloc(HArena *arena, size_t size) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void h_arena_free(HArena *arena, void* ptr) {
|
||||||
|
// To be used later...
|
||||||
|
}
|
||||||
|
|
||||||
void h_delete_arena(HArena *arena) {
|
void h_delete_arena(HArena *arena) {
|
||||||
|
HAllocator *mm__ = arena->mm__;
|
||||||
struct arena_link *link = arena->head;
|
struct arena_link *link = arena->head;
|
||||||
while (link) {
|
while (link) {
|
||||||
struct arena_link *next = link->next;
|
struct arena_link *next = link->next;
|
||||||
// Even in the case of a special block, without the full arena
|
// Even in the case of a special block, without the full arena
|
||||||
// header, this is correct, because the next pointer is the first
|
// header, this is correct, because the next pointer is the first
|
||||||
// in the structure.
|
// in the structure.
|
||||||
g_free(link);
|
h_free(link);
|
||||||
link = next;
|
link = next;
|
||||||
}
|
}
|
||||||
g_free(arena);
|
h_free(arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_allocator_stats(HArena *arena, HArenaStats *stats) {
|
void h_allocator_stats(HArena *arena, HArenaStats *stats) {
|
||||||
|
|
|
||||||
|
|
@ -19,10 +19,17 @@
|
||||||
#define HAMMER_ALLOCATOR__H__
|
#define HAMMER_ALLOCATOR__H__
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
typedef struct HAllocator_ {
|
||||||
|
void* (*alloc)(struct HAllocator_* allocator, size_t size);
|
||||||
|
void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size);
|
||||||
|
void (*free)(struct HAllocator_* allocator, void* ptr);
|
||||||
|
} HAllocator;
|
||||||
|
|
||||||
typedef struct HArena_ HArena ; // hidden implementation
|
typedef struct HArena_ HArena ; // hidden implementation
|
||||||
|
|
||||||
HArena *h_new_arena(size_t block_size); // pass 0 for default...
|
HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default...
|
||||||
void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) ));
|
void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) ));
|
||||||
|
void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers.
|
||||||
void h_delete_arena(HArena *arena);
|
void h_delete_arena(HArena *arena);
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
|
||||||
209
src/backends/packrat.c
Normal file
209
src/backends/packrat.c
Normal file
|
|
@ -0,0 +1,209 @@
|
||||||
|
#include <assert.h>
|
||||||
|
#include "../internal.h"
|
||||||
|
#include "../parsers/parser_internal.h"
|
||||||
|
|
||||||
|
// short-hand for constructing HCachedResult's
|
||||||
|
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
|
||||||
|
HCachedResult *ret = a_new(HCachedResult, 1);
|
||||||
|
ret->result = result;
|
||||||
|
ret->input_stream = state->input_stream;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Really library-internal tool to perform an uncached parse, and handle any common error-handling.
|
||||||
|
static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) {
|
||||||
|
// TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when.
|
||||||
|
HParseResult *tmp_res;
|
||||||
|
if (parser) {
|
||||||
|
HInputStream bak = state->input_stream;
|
||||||
|
tmp_res = parser->vtable->parse(parser->env, state);
|
||||||
|
if (tmp_res) {
|
||||||
|
tmp_res->arena = state->arena;
|
||||||
|
if (!state->input_stream.overrun) {
|
||||||
|
tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3);
|
||||||
|
if (state->input_stream.endianness & BIT_BIG_ENDIAN)
|
||||||
|
tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset;
|
||||||
|
else
|
||||||
|
tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset;
|
||||||
|
} else
|
||||||
|
tmp_res->bit_length = 0;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
tmp_res = NULL;
|
||||||
|
if (state->input_stream.overrun)
|
||||||
|
return NULL; // overrun is always failure.
|
||||||
|
#ifdef CONSISTENCY_CHECK
|
||||||
|
if (!tmp_res) {
|
||||||
|
state->input_stream = INVALID;
|
||||||
|
state->input_stream.input = key->input_pos.input;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return tmp_res;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
|
||||||
|
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
|
||||||
|
HRecursionHead *head = h_hashtable_get(state->recursion_heads, k);
|
||||||
|
if (!head) { // No heads found
|
||||||
|
return cached;
|
||||||
|
} else { // Some heads found
|
||||||
|
if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) {
|
||||||
|
// Nothing in the cache, and the key parser is not involved
|
||||||
|
HParseResult *tmp = a_new(HParseResult, 1);
|
||||||
|
tmp->ast = NULL; tmp->arena = state->arena;
|
||||||
|
HParserCacheValue *ret = a_new(HParserCacheValue, 1);
|
||||||
|
ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
if (h_slist_find(head->eval_set, k->parser)) {
|
||||||
|
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
|
||||||
|
head->eval_set = h_slist_remove_all(head->eval_set, k->parser);
|
||||||
|
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
||||||
|
// we know that cached has an entry here, modify it
|
||||||
|
if (!cached)
|
||||||
|
cached = a_new(HParserCacheValue, 1);
|
||||||
|
cached->value_type = PC_RIGHT;
|
||||||
|
cached->right = cached_result(state, tmp_res);
|
||||||
|
}
|
||||||
|
return cached;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Setting up the left recursion. We have the LR for the rule head;
|
||||||
|
* we modify the involved_sets of all LRs in the stack, until we
|
||||||
|
* see the current parser again.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
|
||||||
|
if (!rec_detect->head) {
|
||||||
|
HRecursionHead *some = a_new(HRecursionHead, 1);
|
||||||
|
some->head_parser = p;
|
||||||
|
some->involved_set = h_slist_new(state->arena);
|
||||||
|
some->eval_set = NULL;
|
||||||
|
rec_detect->head = some;
|
||||||
|
}
|
||||||
|
assert(state->lr_stack->head != NULL);
|
||||||
|
HSlistNode *head = state->lr_stack->head;
|
||||||
|
HLeftRec *lr;
|
||||||
|
while (head && (lr = head->elem)->rule != p) {
|
||||||
|
lr->head = rec_detect->head;
|
||||||
|
h_slist_push(lr->head->involved_set, (void*)lr->rule);
|
||||||
|
head = head->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the
|
||||||
|
* future parse.
|
||||||
|
*/
|
||||||
|
|
||||||
|
HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) {
|
||||||
|
// Store the head into the recursion_heads
|
||||||
|
h_hashtable_put(state->recursion_heads, k, head);
|
||||||
|
HParserCacheValue *old_cached = h_hashtable_get(state->cache, k);
|
||||||
|
if (!old_cached || PC_LEFT == old_cached->value_type)
|
||||||
|
errx(1, "impossible match");
|
||||||
|
HParseResult *old_res = old_cached->right->result;
|
||||||
|
|
||||||
|
// reset the eval_set of the head of the recursion at each beginning of growth
|
||||||
|
head->eval_set = h_slist_copy(head->involved_set);
|
||||||
|
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
||||||
|
|
||||||
|
if (tmp_res) {
|
||||||
|
if ((old_res->ast->index < tmp_res->ast->index) ||
|
||||||
|
(old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) {
|
||||||
|
HParserCacheValue *v = a_new(HParserCacheValue, 1);
|
||||||
|
v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res);
|
||||||
|
h_hashtable_put(state->cache, k, v);
|
||||||
|
return grow(k, state, head);
|
||||||
|
} else {
|
||||||
|
// we're done with growing, we can remove data from the recursion head
|
||||||
|
h_hashtable_del(state->recursion_heads, k);
|
||||||
|
HParserCacheValue *cached = h_hashtable_get(state->cache, k);
|
||||||
|
if (cached && PC_RIGHT == cached->value_type) {
|
||||||
|
return cached->right->result;
|
||||||
|
} else {
|
||||||
|
errx(1, "impossible match");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
h_hashtable_del(state->recursion_heads, k);
|
||||||
|
return old_res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growable) {
|
||||||
|
if (growable->head) {
|
||||||
|
if (growable->head->head_parser != k->parser) {
|
||||||
|
// not the head rule, so not growing
|
||||||
|
return growable->seed;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// update cache
|
||||||
|
HParserCacheValue *v = a_new(HParserCacheValue, 1);
|
||||||
|
v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed);
|
||||||
|
h_hashtable_put(state->cache, k, v);
|
||||||
|
if (!growable->seed)
|
||||||
|
return NULL;
|
||||||
|
else
|
||||||
|
return grow(k, state, growable->head);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
errx(1, "lrAnswer with no head");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Warth's recursion. Hi Alessandro! */
|
||||||
|
HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
|
||||||
|
HParserCacheKey *key = a_new(HParserCacheKey, 1);
|
||||||
|
key->input_pos = state->input_stream; key->parser = parser;
|
||||||
|
HParserCacheValue *m = recall(key, state);
|
||||||
|
// check to see if there is already a result for this object...
|
||||||
|
if (!m) {
|
||||||
|
// It doesn't exist, so create a dummy result to cache
|
||||||
|
HLeftRec *base = a_new(HLeftRec, 1);
|
||||||
|
base->seed = NULL; base->rule = parser; base->head = NULL;
|
||||||
|
h_slist_push(state->lr_stack, base);
|
||||||
|
// cache it
|
||||||
|
HParserCacheValue *dummy = a_new(HParserCacheValue, 1);
|
||||||
|
dummy->value_type = PC_LEFT; dummy->left = base;
|
||||||
|
h_hashtable_put(state->cache, key, dummy);
|
||||||
|
// parse the input
|
||||||
|
HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
|
||||||
|
// the base variable has passed equality tests with the cache
|
||||||
|
h_slist_pop(state->lr_stack);
|
||||||
|
// setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
|
||||||
|
if (NULL == base->head) {
|
||||||
|
HParserCacheValue *right = a_new(HParserCacheValue, 1);
|
||||||
|
right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res);
|
||||||
|
h_hashtable_put(state->cache, key, right);
|
||||||
|
return tmp_res;
|
||||||
|
} else {
|
||||||
|
base->seed = tmp_res;
|
||||||
|
HParseResult *res = lr_answer(key, state, base);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// it exists!
|
||||||
|
if (PC_LEFT == m->value_type) {
|
||||||
|
setupLR(parser, state, m->left);
|
||||||
|
return m->left->seed; // BUG: this might not be correct
|
||||||
|
} else {
|
||||||
|
state->input_stream = m->right->input_stream;
|
||||||
|
return m->right->result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int h_packrat_compile(HAllocator* mm__, const HParser* parser, const void* params) {
|
||||||
|
return 0; // No compilation necessary, and everything should work
|
||||||
|
// out of the box.
|
||||||
|
}
|
||||||
|
|
||||||
|
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) {
|
||||||
|
return h_do_parse(parser, parse_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
HParserBackendVTable h__packrat_backend_vtable = {
|
||||||
|
.compile = h_packrat_compile,
|
||||||
|
.parse = h_packrat_parse
|
||||||
|
};
|
||||||
114
src/benchmark.c
Normal file
114
src/benchmark.c
Normal file
|
|
@ -0,0 +1,114 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "hammer.h"
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
Usage:
|
||||||
|
Create your parser (i.e., const HParser*), and an array of test cases
|
||||||
|
(i.e., HParserTestcase[], terminated by { NULL, 0, NULL }) and then call
|
||||||
|
|
||||||
|
HBenchmarkResults* results = h_benchmark(parser, testcases);
|
||||||
|
|
||||||
|
Then, you can format a report with:
|
||||||
|
|
||||||
|
h_benchmark_report(stdout, results);
|
||||||
|
|
||||||
|
or just generate code to make the parser run as fast as possible with:
|
||||||
|
|
||||||
|
h_benchmark_dump_optimized_code(stdout, results);
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases) {
|
||||||
|
return h_benchmark__m(&system_allocator, parser, testcases);
|
||||||
|
}
|
||||||
|
|
||||||
|
HBenchmarkResults *h_benchmark__m(HAllocator* mm__, const HParser* parser, HParserTestcase* testcases) {
|
||||||
|
// For now, just output the results to stderr
|
||||||
|
HParserTestcase* tc = testcases;
|
||||||
|
HParserBackend backend = PB_MIN;
|
||||||
|
HBenchmarkResults *ret = h_new(HBenchmarkResults, 1);
|
||||||
|
ret->len = PB_MAX-PB_MIN;
|
||||||
|
ret->results = h_new(HBackendResults, ret->len);
|
||||||
|
|
||||||
|
for (backend = PB_MIN; backend < PB_MAX; backend++) {
|
||||||
|
ret->results[backend].backend = backend;
|
||||||
|
// Step 1: Compile grammar for given parser...
|
||||||
|
if (h_compile(parser, PB_MIN, NULL) == -1) {
|
||||||
|
// backend inappropriate for grammar...
|
||||||
|
fprintf(stderr, "failed\n");
|
||||||
|
ret->results[backend].compile_success = false;
|
||||||
|
ret->results[backend].n_testcases = 0;
|
||||||
|
ret->results[backend].failed_testcases = 0;
|
||||||
|
ret->results[backend].cases = NULL;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ret->results[backend].compile_success = true;
|
||||||
|
int tc_failed = 0;
|
||||||
|
// Step 1: verify all test cases.
|
||||||
|
ret->results[backend].n_testcases = 0;
|
||||||
|
ret->results[backend].failed_testcases = 0;
|
||||||
|
for (tc = testcases; tc->input != NULL; tc++) {
|
||||||
|
ret->results[backend].n_testcases++;
|
||||||
|
HParseResult *res = h_parse(parser, tc->input, tc->length);
|
||||||
|
char* res_unamb;
|
||||||
|
if (res != NULL) {
|
||||||
|
res_unamb = h_write_result_unamb(res->ast);
|
||||||
|
} else
|
||||||
|
res_unamb = NULL;
|
||||||
|
if ((res_unamb == NULL && tc->output_unambiguous == NULL)
|
||||||
|
|| (strcmp(res_unamb, tc->output_unambiguous) != 0)) {
|
||||||
|
// test case failed...
|
||||||
|
fprintf(stderr, "failed\n");
|
||||||
|
// We want to run all testcases, for purposes of generating a
|
||||||
|
// report. (eg, if users are trying to fix a grammar for a
|
||||||
|
// faster backend)
|
||||||
|
tc_failed++;
|
||||||
|
ret->results[backend].failed_testcases++;
|
||||||
|
}
|
||||||
|
h_parse_result_free(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tc_failed > 0) {
|
||||||
|
// Can't use this parser; skip to the next
|
||||||
|
fprintf(stderr, "Backend failed testcases; skipping benchmark\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret->results[backend].cases = h_new(HCaseResult, ret->results[backend].n_testcases);
|
||||||
|
size_t cur_case = 0;
|
||||||
|
|
||||||
|
for (tc = testcases; tc->input != NULL; tc++) {
|
||||||
|
// The goal is to run each testcase for at least 50ms each
|
||||||
|
// TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer)
|
||||||
|
int count = 1, cur;
|
||||||
|
struct timespec ts_start, ts_end;
|
||||||
|
long long time_diff;
|
||||||
|
do {
|
||||||
|
count *= 2; // Yes, this means that the first run will run the function twice. This is fine, as we want multiple runs anyway.
|
||||||
|
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_start);
|
||||||
|
for (cur = 0; cur < count; cur++) {
|
||||||
|
h_parse_result_free(h_parse(parser, tc->input, tc->length));
|
||||||
|
}
|
||||||
|
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_end);
|
||||||
|
|
||||||
|
// time_diff is in ns
|
||||||
|
time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec);
|
||||||
|
} while (time_diff < 100000000);
|
||||||
|
ret->results[backend].cases[cur_case].parse_time = (time_diff / count);
|
||||||
|
cur_case++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_benchmark_report(FILE* stream, HBenchmarkResults* result) {
|
||||||
|
for (size_t i=0; i<result->len; ++i) {
|
||||||
|
fprintf(stream, "Backend %ld ... \n", i);
|
||||||
|
for (size_t j=0; j<result->results[i].n_testcases; ++j) {
|
||||||
|
fprintf(stream, "Case %ld: %ld ns/parse\n", j, result->results[i].cases[j].parse_time);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -108,70 +108,3 @@ long long h_read_bits(HInputStream* state, int count, char signed_p) {
|
||||||
out <<= final_shift;
|
out <<= final_shift;
|
||||||
return (out ^ msb) - msb; // perform sign extension
|
return (out ^ msb) - msb; // perform sign extension
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef INCLUDE_TESTS
|
|
||||||
|
|
||||||
#define MK_INPUT_STREAM(buf,len,endianness_) \
|
|
||||||
{ \
|
|
||||||
.input = (uint8_t*)buf, \
|
|
||||||
.length = len, \
|
|
||||||
.index = 0, \
|
|
||||||
.bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \
|
|
||||||
.endianness = endianness_ \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void test_bitreader_ints(void) {
|
|
||||||
HInputStream is = MK_INPUT_STREAM("\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 8, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
|
||||||
g_check_cmplong(h_read_bits(&is, 64, true), ==, -0x200000000);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_bitreader_be(void) {
|
|
||||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x03);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x52);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
|
|
||||||
}
|
|
||||||
static void test_bitreader_le(void) {
|
|
||||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x02);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x4D);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_largebits_be(void) {
|
|
||||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x352);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_largebits_le(void) {
|
|
||||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x26A);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_offset_largebits_be(void) {
|
|
||||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xD);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x25A);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_offset_largebits_le(void) {
|
|
||||||
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xA);
|
|
||||||
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x2D3);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void register_bitreader_tests(void) {
|
|
||||||
g_test_add_func("/core/bitreader/be", test_bitreader_be);
|
|
||||||
g_test_add_func("/core/bitreader/le", test_bitreader_le);
|
|
||||||
g_test_add_func("/core/bitreader/largebits-be", test_largebits_be);
|
|
||||||
g_test_add_func("/core/bitreader/largebits-le", test_largebits_le);
|
|
||||||
g_test_add_func("/core/bitreader/offset-largebits-be", test_offset_largebits_be);
|
|
||||||
g_test_add_func("/core/bitreader/offset-largebits-le", test_offset_largebits_le);
|
|
||||||
g_test_add_func("/core/bitreader/ints", test_bitreader_ints);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // #ifdef INCLUDE_TESTS
|
|
||||||
|
|
|
||||||
137
src/bitwriter.c
137
src/bitwriter.c
|
|
@ -4,22 +4,16 @@
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "test_suite.h"
|
#include "test_suite.h"
|
||||||
|
|
||||||
// This file provides the logical inverse of bitreader.c
|
#define MIN(a,b) (((a)<(b))?(a):(b))
|
||||||
struct HBitWriter_ {
|
#define MAX(a,b) (((a)>(b))?(a):(b))
|
||||||
uint8_t* buf;
|
|
||||||
size_t index;
|
|
||||||
size_t capacity;
|
|
||||||
char bit_offset; // unlike in bit_reader, this is always the number
|
|
||||||
// of used bits in the current byte. i.e., 0 always
|
|
||||||
// means that 8 bits are available for use.
|
|
||||||
char flags;
|
|
||||||
};
|
|
||||||
|
|
||||||
// h_bit_writer_
|
// h_bit_writer_
|
||||||
HBitWriter *h_bit_writer_new() {
|
HBitWriter *h_bit_writer_new(HAllocator* mm__) {
|
||||||
HBitWriter *writer = g_new0(HBitWriter, 1);
|
HBitWriter *writer = h_new(HBitWriter, 1);
|
||||||
writer->buf = g_malloc0(writer->capacity = 8);
|
memset(writer, 0, sizeof(*writer));
|
||||||
|
writer->buf = mm__->alloc(mm__, writer->capacity = 8);
|
||||||
|
memset(writer->buf, 0, writer->capacity);
|
||||||
|
writer->mm__ = mm__;
|
||||||
writer->flags = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN;
|
writer->flags = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN;
|
||||||
|
|
||||||
return writer;
|
return writer;
|
||||||
|
|
@ -41,7 +35,7 @@ static void h_bit_writer_reserve(HBitWriter* w, size_t nbits) {
|
||||||
int nbytes = (nbits + 7) / 8 + ((w->bit_offset != 0) ? 1 : 0);
|
int nbytes = (nbits + 7) / 8 + ((w->bit_offset != 0) ? 1 : 0);
|
||||||
size_t old_capacity = w->capacity;
|
size_t old_capacity = w->capacity;
|
||||||
while (w->index + nbytes >= w->capacity) {
|
while (w->index + nbytes >= w->capacity) {
|
||||||
w->buf = g_realloc(w->buf, w->capacity *= 2);
|
w->buf = w->mm__->realloc(w->mm__, w->buf, w->capacity *= 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (old_capacity != w->capacity)
|
if (old_capacity != w->capacity)
|
||||||
|
|
@ -100,114 +94,7 @@ const uint8_t *h_bit_writer_get_buffer(HBitWriter* w, size_t *len) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_bit_writer_free(HBitWriter* w) {
|
void h_bit_writer_free(HBitWriter* w) {
|
||||||
g_free(w->buf);
|
HAllocator *mm__ = w->mm__;
|
||||||
g_free(w);
|
h_free(w->buf);
|
||||||
|
h_free(w);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef INCLUDE_TESTS
|
|
||||||
// TESTS BELOW HERE
|
|
||||||
typedef struct {
|
|
||||||
unsigned long long data;
|
|
||||||
size_t nbits;
|
|
||||||
} bitwriter_test_elem; // should end with {0,0}
|
|
||||||
|
|
||||||
void run_bitwriter_test(bitwriter_test_elem data[], char flags) {
|
|
||||||
size_t len;
|
|
||||||
const uint8_t *buf;
|
|
||||||
HBitWriter *w = h_bit_writer_new();
|
|
||||||
int i;
|
|
||||||
w->flags = flags;
|
|
||||||
for (i = 0; data[i].nbits; i++) {
|
|
||||||
h_bit_writer_put(w, data[i].data, data[i].nbits);
|
|
||||||
}
|
|
||||||
|
|
||||||
buf = h_bit_writer_get_buffer(w, &len);
|
|
||||||
HInputStream input = {
|
|
||||||
.input = buf,
|
|
||||||
.index = 0,
|
|
||||||
.length = len,
|
|
||||||
.bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0,
|
|
||||||
.endianness = flags,
|
|
||||||
.overrun = 0
|
|
||||||
};
|
|
||||||
|
|
||||||
for (i = 0; data[i].nbits; i++) {
|
|
||||||
g_check_cmpulonglong ((unsigned long long)h_read_bits(&input, data[i].nbits, FALSE), ==, data[i].data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_bitwriter_ints(void) {
|
|
||||||
bitwriter_test_elem data[] = {
|
|
||||||
{ -0x200000000, 64 },
|
|
||||||
{ 0,0 }
|
|
||||||
};
|
|
||||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_bitwriter_be(void) {
|
|
||||||
bitwriter_test_elem data[] = {
|
|
||||||
{ 0x03, 3 },
|
|
||||||
{ 0x52, 8 },
|
|
||||||
{ 0x1A, 5 },
|
|
||||||
{ 0, 0 }
|
|
||||||
};
|
|
||||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_bitwriter_le(void) {
|
|
||||||
bitwriter_test_elem data[] = {
|
|
||||||
{ 0x02, 3 },
|
|
||||||
{ 0x4D, 8 },
|
|
||||||
{ 0x0B, 5 },
|
|
||||||
{ 0, 0 }
|
|
||||||
};
|
|
||||||
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_largebits_be(void) {
|
|
||||||
bitwriter_test_elem data[] = {
|
|
||||||
{ 0x352, 11 },
|
|
||||||
{ 0x1A, 5 },
|
|
||||||
{ 0, 0 }
|
|
||||||
};
|
|
||||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_largebits_le(void) {
|
|
||||||
bitwriter_test_elem data[] = {
|
|
||||||
{ 0x26A, 11 },
|
|
||||||
{ 0x0B, 5 },
|
|
||||||
{ 0, 0 }
|
|
||||||
};
|
|
||||||
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_offset_largebits_be(void) {
|
|
||||||
bitwriter_test_elem data[] = {
|
|
||||||
{ 0xD, 5 },
|
|
||||||
{ 0x25A, 11 },
|
|
||||||
{ 0, 0 }
|
|
||||||
};
|
|
||||||
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_offset_largebits_le(void) {
|
|
||||||
bitwriter_test_elem data[] = {
|
|
||||||
{ 0xA, 5 },
|
|
||||||
{ 0x2D3, 11 },
|
|
||||||
{ 0, 0 }
|
|
||||||
};
|
|
||||||
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
|
||||||
}
|
|
||||||
|
|
||||||
void register_bitwriter_tests(void) {
|
|
||||||
g_test_add_func("/core/bitwriter/be", test_bitwriter_be);
|
|
||||||
g_test_add_func("/core/bitwriter/le", test_bitwriter_le);
|
|
||||||
g_test_add_func("/core/bitwriter/largebits-be", test_largebits_be);
|
|
||||||
g_test_add_func("/core/bitwriter/largebits-le", test_largebits_le);
|
|
||||||
g_test_add_func("/core/bitwriter/offset-largebits-be", test_offset_largebits_be);
|
|
||||||
g_test_add_func("/core/bitwriter/offset-largebits-le", test_offset_largebits_le);
|
|
||||||
g_test_add_func("/core/bitwriter/ints", test_bitwriter_ints);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // #ifdef INCLUDE_TESTS
|
|
||||||
|
|
|
||||||
15
src/compile.c
Normal file
15
src/compile.c
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
// This file contains functions related to managing multiple parse backends
|
||||||
|
#include "hammer.h"
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
|
static HParserBackendVTable *backends[PB_MAX] = {
|
||||||
|
&h__packrat_backend_vtable,
|
||||||
|
};
|
||||||
|
|
||||||
|
int h_compile(const HParser* parser, HParserBackend backend, const void* params) {
|
||||||
|
return h_compile__m(&system_allocator, parser, backend, params);
|
||||||
|
}
|
||||||
|
|
||||||
|
int h_compile__m(HAllocator* mm__, const HParser* parser, HParserBackend backend, const void* params) {
|
||||||
|
return backends[backend]->compile(mm__, parser, params);
|
||||||
|
}
|
||||||
|
|
@ -2,7 +2,8 @@
|
||||||
#include "hammer.h"
|
#include "hammer.h"
|
||||||
#include "allocator.h"
|
#include "allocator.h"
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <malloc.h>
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
// {{{ counted arrays
|
// {{{ counted arrays
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -15,6 +16,7 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size) {
|
||||||
ret->elements = h_arena_malloc(arena, sizeof(void*) * size);
|
ret->elements = h_arena_malloc(arena, sizeof(void*) * size);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
HCountedArray *h_carray_new(HArena * arena) {
|
HCountedArray *h_carray_new(HArena * arena) {
|
||||||
return h_carray_new_sized(arena, 4);
|
return h_carray_new_sized(arena, 4);
|
||||||
}
|
}
|
||||||
|
|
@ -30,3 +32,213 @@ void h_carray_append(HCountedArray *array, void* item) {
|
||||||
}
|
}
|
||||||
array->elements[array->used++] = item;
|
array->elements[array->used++] = item;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HSlist
|
||||||
|
HSlist* h_slist_new(HArena *arena) {
|
||||||
|
HSlist *ret = h_arena_malloc(arena, sizeof(HSlist));
|
||||||
|
ret->head = NULL;
|
||||||
|
ret->arena = arena;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
HSlist* h_slist_copy(HSlist *slist) {
|
||||||
|
HSlist *ret = h_slist_new(slist->arena);
|
||||||
|
HSlistNode *head = slist->head;
|
||||||
|
HSlistNode *tail;
|
||||||
|
if (head != NULL) {
|
||||||
|
h_slist_push(ret, head->elem);
|
||||||
|
tail = ret->head;
|
||||||
|
head = head->next;
|
||||||
|
}
|
||||||
|
while (head != NULL) {
|
||||||
|
// append head item to tail in a new node
|
||||||
|
HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode));
|
||||||
|
node->elem = head->elem;
|
||||||
|
node->next = NULL;
|
||||||
|
tail = tail->next = node;
|
||||||
|
head = head->next;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* h_slist_pop(HSlist *slist) {
|
||||||
|
HSlistNode *head = slist->head;
|
||||||
|
if (!head)
|
||||||
|
return NULL;
|
||||||
|
void* ret = head->elem;
|
||||||
|
slist->head = head->next;
|
||||||
|
h_arena_free(slist->arena, head);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_slist_push(HSlist *slist, void* item) {
|
||||||
|
HSlistNode *hnode = h_arena_malloc(slist->arena, sizeof(HSlistNode));
|
||||||
|
hnode->elem = item;
|
||||||
|
hnode->next = slist->head;
|
||||||
|
// write memory barrier here.
|
||||||
|
slist->head = hnode;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool h_slist_find(HSlist *slist, const void* item) {
|
||||||
|
assert (item != NULL);
|
||||||
|
HSlistNode *head = slist->head;
|
||||||
|
while (head != NULL) {
|
||||||
|
if (head->elem == item)
|
||||||
|
return true;
|
||||||
|
head = head->next;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
HSlist* h_slist_remove_all(HSlist *slist, const void* item) {
|
||||||
|
assert (item != NULL);
|
||||||
|
HSlistNode *node = slist->head;
|
||||||
|
HSlistNode *prev = NULL;
|
||||||
|
while (node != NULL) {
|
||||||
|
if (node->elem == item) {
|
||||||
|
HSlistNode *next = node->next;
|
||||||
|
if (prev)
|
||||||
|
prev->next = next;
|
||||||
|
else
|
||||||
|
slist->head = next;
|
||||||
|
// FIXME free the removed node! this leaks.
|
||||||
|
node = next;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
prev = node;
|
||||||
|
node = prev->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return slist;
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_slist_free(HSlist *slist) {
|
||||||
|
while (slist->head != NULL)
|
||||||
|
h_slist_pop(slist);
|
||||||
|
h_arena_free(slist->arena, slist);
|
||||||
|
}
|
||||||
|
|
||||||
|
HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc) {
|
||||||
|
HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable));
|
||||||
|
ht->hashFunc = hashFunc;
|
||||||
|
ht->equalFunc = equalFunc;
|
||||||
|
ht->capacity = 64; // to start; should be tuned later...
|
||||||
|
ht->used = 0;
|
||||||
|
ht->arena = arena;
|
||||||
|
ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity);
|
||||||
|
for (size_t i = 0; i < ht->capacity; i++) {
|
||||||
|
ht->contents[i].key = NULL;
|
||||||
|
ht->contents[i].value = NULL;
|
||||||
|
ht->contents[i].next = NULL;
|
||||||
|
ht->contents[i].hashval = 0;
|
||||||
|
}
|
||||||
|
//memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity);
|
||||||
|
return ht;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* h_hashtable_get(HHashTable* ht, void* key) {
|
||||||
|
HHashValue hashval = ht->hashFunc(key);
|
||||||
|
#ifdef CONSISTENCY_CHECK
|
||||||
|
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
HHashTableEntry *hte = NULL;
|
||||||
|
for (hte = &ht->contents[hashval & (ht->capacity - 1)];
|
||||||
|
hte != NULL;
|
||||||
|
hte = hte->next) {
|
||||||
|
if (hte->hashval != hashval)
|
||||||
|
continue;
|
||||||
|
if (ht->equalFunc(key, hte->key))
|
||||||
|
return hte->value;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_hashtable_put(HHashTable* ht, void* key, void* value) {
|
||||||
|
// # Start with a rebalancing
|
||||||
|
//h_hashtable_ensure_capacity(ht, ht->used + 1);
|
||||||
|
|
||||||
|
HHashValue hashval = ht->hashFunc(key);
|
||||||
|
#ifdef CONSISTENCY_CHECK
|
||||||
|
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)];
|
||||||
|
if (hte->key != NULL) {
|
||||||
|
do {
|
||||||
|
if (hte->hashval == hashval && ht->equalFunc(key, hte->key))
|
||||||
|
goto insert_here;
|
||||||
|
if (hte->next != NULL)
|
||||||
|
hte = hte->next;
|
||||||
|
} while (hte->next != NULL);
|
||||||
|
// Add a new link...
|
||||||
|
assert (hte->next == NULL);
|
||||||
|
hte->next = h_arena_malloc(ht->arena, sizeof(HHashTableEntry));
|
||||||
|
hte = hte->next;
|
||||||
|
hte->next = NULL;
|
||||||
|
ht->used++;
|
||||||
|
} else
|
||||||
|
ht->used++;
|
||||||
|
|
||||||
|
insert_here:
|
||||||
|
hte->key = key;
|
||||||
|
hte->value = value;
|
||||||
|
hte->hashval = hashval;
|
||||||
|
}
|
||||||
|
|
||||||
|
int h_hashtable_present(HHashTable* ht, void* key) {
|
||||||
|
HHashValue hashval = ht->hashFunc(key);
|
||||||
|
#ifdef CONSISTENCY_CHECK
|
||||||
|
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)];
|
||||||
|
hte != NULL;
|
||||||
|
hte = hte->next) {
|
||||||
|
if (hte->hashval != hashval)
|
||||||
|
continue;
|
||||||
|
if (ht->equalFunc(key, hte->key))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
void h_hashtable_del(HHashTable* ht, void* key) {
|
||||||
|
HHashValue hashval = ht->hashFunc(key);
|
||||||
|
#ifdef CONSISTENCY_CHECK
|
||||||
|
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)];
|
||||||
|
hte != NULL;
|
||||||
|
hte = hte->next) {
|
||||||
|
if (hte->hashval != hashval)
|
||||||
|
continue;
|
||||||
|
if (ht->equalFunc(key, hte->key)) {
|
||||||
|
// FIXME: Leaks keys and values.
|
||||||
|
HHashTableEntry* hten = hte->next;
|
||||||
|
if (hten != NULL) {
|
||||||
|
*hte = *hten;
|
||||||
|
h_arena_free(ht->arena, hten);
|
||||||
|
} else {
|
||||||
|
hte->key = hte->value = NULL;
|
||||||
|
hte->hashval = 0;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void h_hashtable_free(HHashTable* ht) {
|
||||||
|
for (size_t i = 0; i < ht->capacity; i++) {
|
||||||
|
HHashTableEntry *hten, *hte = &ht->contents[i];
|
||||||
|
// FIXME: Free key and value
|
||||||
|
hte = hte->next;
|
||||||
|
while (hte != NULL) {
|
||||||
|
// FIXME: leaks keys and values.
|
||||||
|
hten = hte->next;
|
||||||
|
h_arena_free(ht->arena, hte);
|
||||||
|
hte = hten;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
h_arena_free(ht->arena, ht->contents);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
||||||
177
src/glue.c
Normal file
177
src/glue.c
Normal file
|
|
@ -0,0 +1,177 @@
|
||||||
|
#include "glue.h"
|
||||||
|
#include "../src/internal.h" // for h_carray_*
|
||||||
|
|
||||||
|
|
||||||
|
// The action equivalent of h_ignore.
|
||||||
|
const HParsedToken *h_act_ignore(const HParseResult *p)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper to build HAction's that pick one index out of a sequence.
|
||||||
|
const HParsedToken *h_act_index(int i, const HParseResult *p)
|
||||||
|
{
|
||||||
|
if(!p) return NULL;
|
||||||
|
|
||||||
|
const HParsedToken *tok = p->ast;
|
||||||
|
|
||||||
|
if(!tok || tok->token_type != TT_SEQUENCE)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
const HCountedArray *seq = tok->seq;
|
||||||
|
size_t n = seq->used;
|
||||||
|
|
||||||
|
if(i<0 || (size_t)i>=n)
|
||||||
|
return NULL;
|
||||||
|
else
|
||||||
|
return tok->seq->elements[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Action version of h_seq_flatten.
|
||||||
|
const HParsedToken *h_act_flatten(const HParseResult *p) {
|
||||||
|
return h_seq_flatten(p->arena, p->ast);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Low-level helper for the h_make family.
|
||||||
|
HParsedToken *h_make_(HArena *arena, HTokenType type)
|
||||||
|
{
|
||||||
|
HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken));
|
||||||
|
ret->token_type = type;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParsedToken *h_make(HArena *arena, HTokenType type, void *value)
|
||||||
|
{
|
||||||
|
assert(type >= TT_USER);
|
||||||
|
HParsedToken *ret = h_make_(arena, type);
|
||||||
|
ret->user = value;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParsedToken *h_make_seq(HArena *arena)
|
||||||
|
{
|
||||||
|
HParsedToken *ret = h_make_(arena, TT_SEQUENCE);
|
||||||
|
ret->seq = h_carray_new(arena);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParsedToken *h_make_seqn(HArena *arena, size_t n)
|
||||||
|
{
|
||||||
|
HParsedToken *ret = h_make_(arena, TT_SEQUENCE);
|
||||||
|
ret->seq = h_carray_new_sized(arena, n);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParsedToken *h_make_bytes(HArena *arena, size_t len)
|
||||||
|
{
|
||||||
|
HParsedToken *ret = h_make_(arena, TT_BYTES);
|
||||||
|
ret->bytes.len = len;
|
||||||
|
ret->bytes.token = h_arena_malloc(arena, len);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParsedToken *h_make_sint(HArena *arena, int64_t val)
|
||||||
|
{
|
||||||
|
HParsedToken *ret = h_make_(arena, TT_SINT);
|
||||||
|
ret->sint = val;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParsedToken *h_make_uint(HArena *arena, uint64_t val)
|
||||||
|
{
|
||||||
|
HParsedToken *ret = h_make_(arena, TT_UINT);
|
||||||
|
ret->uint = val;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX -> internal
|
||||||
|
HParsedToken *h_carray_index(const HCountedArray *a, size_t i)
|
||||||
|
{
|
||||||
|
assert(i < a->used);
|
||||||
|
return a->elements[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t h_seq_len(const HParsedToken *p)
|
||||||
|
{
|
||||||
|
assert(p != NULL);
|
||||||
|
assert(p->token_type == TT_SEQUENCE);
|
||||||
|
return p->seq->used;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParsedToken **h_seq_elements(const HParsedToken *p)
|
||||||
|
{
|
||||||
|
assert(p != NULL);
|
||||||
|
assert(p->token_type == TT_SEQUENCE);
|
||||||
|
return p->seq->elements;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParsedToken *h_seq_index(const HParsedToken *p, size_t i)
|
||||||
|
{
|
||||||
|
assert(p != NULL);
|
||||||
|
assert(p->token_type == TT_SEQUENCE);
|
||||||
|
return h_carray_index(p->seq, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...)
|
||||||
|
{
|
||||||
|
va_list va;
|
||||||
|
|
||||||
|
va_start(va, i);
|
||||||
|
HParsedToken *ret = h_seq_index_vpath(p, i, va);
|
||||||
|
va_end(va);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va)
|
||||||
|
{
|
||||||
|
HParsedToken *ret = h_seq_index(p, i);
|
||||||
|
int j;
|
||||||
|
|
||||||
|
while((j = va_arg(va, int)) >= 0)
|
||||||
|
ret = h_seq_index(p, j);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_seq_snoc(HParsedToken *xs, const HParsedToken *x)
|
||||||
|
{
|
||||||
|
assert(xs != NULL);
|
||||||
|
assert(xs->token_type == TT_SEQUENCE);
|
||||||
|
|
||||||
|
h_carray_append(xs->seq, (HParsedToken *)x);
|
||||||
|
}
|
||||||
|
|
||||||
|
void h_seq_append(HParsedToken *xs, const HParsedToken *ys)
|
||||||
|
{
|
||||||
|
assert(xs != NULL);
|
||||||
|
assert(xs->token_type == TT_SEQUENCE);
|
||||||
|
assert(ys != NULL);
|
||||||
|
assert(ys->token_type == TT_SEQUENCE);
|
||||||
|
|
||||||
|
for(size_t i=0; i<ys->seq->used; i++)
|
||||||
|
h_carray_append(xs->seq, ys->seq->elements[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flatten nested sequences. Always returns a sequence.
|
||||||
|
// If input element is not a sequence, returns it as a singleton sequence.
|
||||||
|
const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p)
|
||||||
|
{
|
||||||
|
assert(p != NULL);
|
||||||
|
|
||||||
|
HParsedToken *ret = h_make_seq(arena);
|
||||||
|
switch(p->token_type) {
|
||||||
|
case TT_SEQUENCE:
|
||||||
|
// Flatten and append all.
|
||||||
|
for(size_t i; i<p->seq->used; i++) {
|
||||||
|
h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i)));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// Make singleton sequence.
|
||||||
|
h_seq_snoc(ret, p);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
253
src/glue.h
Normal file
253
src/glue.h
Normal file
|
|
@ -0,0 +1,253 @@
|
||||||
|
//
|
||||||
|
// API additions for writing grammar and semantic actions more concisely
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Quick Overview:
|
||||||
|
//
|
||||||
|
// Grammars can be succinctly specified with the family of H_RULE macros.
|
||||||
|
// H_RULE defines a plain parser variable. H_ARULE additionally attaches a
|
||||||
|
// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE
|
||||||
|
// combine both.
|
||||||
|
//
|
||||||
|
// A few standard semantic actions are defined below. The H_ACT_APPLY macro
|
||||||
|
// allows semantic actions to be defined by "partial application" of
|
||||||
|
// a generic action to fixed paramters.
|
||||||
|
//
|
||||||
|
// The definition of more complex semantic actions will usually consist of
|
||||||
|
// extracting data from the given parse tree and constructing a token of custom
|
||||||
|
// type to represent the result. A number of functions and convenience macros
|
||||||
|
// are provided to capture the most common cases and idioms.
|
||||||
|
//
|
||||||
|
// See the leading comment blocks on the sections below for more details.
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef HAMMER_GLUE__H
|
||||||
|
#define HAMMER_GLUE__H
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include "hammer.h"
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Grammar specification
|
||||||
|
//
|
||||||
|
// H_RULE is simply a short-hand for the typical declaration and definition of
|
||||||
|
// a parser variable. See its plain definition below. The goal is to save
|
||||||
|
// horizontal space as well as to provide a clear and unified look together with
|
||||||
|
// the other macro variants that stays close to an abstract PEG or BNF grammar.
|
||||||
|
// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their
|
||||||
|
// combinations as they allow the definition of syntax to be given without
|
||||||
|
// intermingling it with the semantic specifications.
|
||||||
|
//
|
||||||
|
// H_ARULE defines a variable just like H_RULE but attaches a semantic action
|
||||||
|
// to the result of the parser via h_action. The action is expected to be
|
||||||
|
// named act_<rulename>.
|
||||||
|
//
|
||||||
|
// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool.
|
||||||
|
// The validation is expected to be named validate_<rulename>.
|
||||||
|
//
|
||||||
|
// H_VARULE combines H_RULE with both an action and a validation. The action is
|
||||||
|
// attached before the validation, i.e. the validation receives as input the
|
||||||
|
// result of the action.
|
||||||
|
//
|
||||||
|
// H_AVRULE is like H_VARULE but the action is attached outside the validation,
|
||||||
|
// i.e. the validation receives the uninterpreted AST as input.
|
||||||
|
//
|
||||||
|
|
||||||
|
|
||||||
|
#define H_RULE(rule, def) const HParser *rule = def
|
||||||
|
#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule)
|
||||||
|
#define H_VRULE(rule, def) const HParser *rule = \
|
||||||
|
h_attr_bool(def, validate_ ## rule)
|
||||||
|
#define H_VARULE(rule, def) const HParser *rule = \
|
||||||
|
h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule)
|
||||||
|
#define H_AVRULE(rule, def) const HParser *rule = \
|
||||||
|
h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule)
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Pre-fab semantic actions
|
||||||
|
//
|
||||||
|
// A collection of generally useful semantic actions is provided.
|
||||||
|
//
|
||||||
|
// h_act_ignore is the action equivalent of the parser combinator h_ignore. It
|
||||||
|
// simply causes the AST it is applied to to be replaced with NULL. This most
|
||||||
|
// importantly causes it to be elided from the result of a surrounding
|
||||||
|
// h_sequence.
|
||||||
|
//
|
||||||
|
// h_act_index is of note as it is not itself suitable to be passed to
|
||||||
|
// h_action. It is parameterized by an index to be picked from a sequence
|
||||||
|
// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY
|
||||||
|
// macro provides a concise way to define such a parameter-application wrapper.
|
||||||
|
//
|
||||||
|
// h_act_flatten acts on a token of possibly nested sequences by recursively
|
||||||
|
// flattening it into a single sequence. Cf. h_seq_flatten below.
|
||||||
|
//
|
||||||
|
// H_ACT_APPLY implements "partial application" for semantic actions. It
|
||||||
|
// defines a new action that supplies given parameters to a parameterized
|
||||||
|
// action such as h_act_index.
|
||||||
|
//
|
||||||
|
|
||||||
|
const HParsedToken *h_act_ignore(const HParseResult *p);
|
||||||
|
const HParsedToken *h_act_index(int i, const HParseResult *p);
|
||||||
|
const HParsedToken *h_act_flatten(const HParseResult *p);
|
||||||
|
|
||||||
|
// Define 'myaction' as a specialization of 'paction' by supplying the leading
|
||||||
|
// parameters.
|
||||||
|
#define H_ACT_APPLY(myaction, paction, ...) \
|
||||||
|
const HParsedToken *myaction(const HParseResult *p) { \
|
||||||
|
return paction(__VA_ARGS__, p); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Working with HParsedTokens
|
||||||
|
//
|
||||||
|
// The type HParsedToken represents a dynamically-typed universe of values.
|
||||||
|
// Declared below are constructors to turn ordinary values into their
|
||||||
|
// HParsedToken equivalents, extractors to retrieve the original values from
|
||||||
|
// inside an HParsedToken, and functions that inspect and modify tokens of
|
||||||
|
// sequence type directly.
|
||||||
|
//
|
||||||
|
// In addition, there are a number of short-hand macros that work with some
|
||||||
|
// conventions to eliminate common boilerplate. These conventions are listed
|
||||||
|
// below. Be sure to follow them if you want to use the respective macros.
|
||||||
|
//
|
||||||
|
// * The single argument to semantic actions should be called 'p'.
|
||||||
|
//
|
||||||
|
// The H_MAKE macros suppy 'p->arena' to their underlying h_make
|
||||||
|
// counterparts. The H_FIELD macros supply 'p->ast' to their underlying
|
||||||
|
// H_INDEX counterparts.
|
||||||
|
//
|
||||||
|
// * For each custom token type, there should be a typedef for the
|
||||||
|
// corresponding value type.
|
||||||
|
//
|
||||||
|
// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to
|
||||||
|
// a pointer to the given type.
|
||||||
|
//
|
||||||
|
// * For each custom token type, say 'foo_t', there must be an integer
|
||||||
|
// constant 'TT_foo_t' to identify the token type. This constant must have a
|
||||||
|
// value greater or equal than TT_USER.
|
||||||
|
//
|
||||||
|
// One idiom is to define an enum for all custom token types and to assign a
|
||||||
|
// value of TT_USER to the first element. This can be viewed as extending
|
||||||
|
// the HTokenType enum.
|
||||||
|
//
|
||||||
|
// The H_MAKE and H_ASSERT macros derive the name of the token type constant
|
||||||
|
// from the given type name.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// The H_ALLOC macro is useful for allocating values of custom token types.
|
||||||
|
//
|
||||||
|
// The H_MAKE family of macros construct tokens of a given type. The native
|
||||||
|
// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ.
|
||||||
|
// The form with no suffix is used for custom token types. This convention is
|
||||||
|
// also used for other macro and function families.
|
||||||
|
//
|
||||||
|
// The H_ASSERT family simply asserts that a given token has the expected type.
|
||||||
|
// It mainly serves as an implementation aid for H_CAST. Of note in that regard
|
||||||
|
// is that, unlike the standard 'assert' macro, these form _expressions_ that
|
||||||
|
// return the value of their token argument; thus they can be used in a
|
||||||
|
// "pass-through" fashion inside other expressions.
|
||||||
|
//
|
||||||
|
// The H_CAST family combines a type assertion with access to the
|
||||||
|
// statically-typed value inside a token.
|
||||||
|
//
|
||||||
|
// A number of functions h_seq_* operate on and inspect sequence tokens.
|
||||||
|
// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence.
|
||||||
|
// Therefore there are h_seq_snoc and h_seq_append to build up sequences.
|
||||||
|
//
|
||||||
|
// The macro families H_FIELD and H_INDEX combine index access on a sequence
|
||||||
|
// with a cast to the appropriate result type. H_FIELD is used to access the
|
||||||
|
// elements of the argument token 'p' in an action. H_INDEX allows any sequence
|
||||||
|
// token to be specified. Both macro families take an arbitrary number of index
|
||||||
|
// arguments, giving access to elements in nested sequences by path.
|
||||||
|
// These macros are very useful to avoid spaghetti chains of unchecked pointer
|
||||||
|
// dereferences.
|
||||||
|
//
|
||||||
|
|
||||||
|
// Standard short-hand for arena-allocating a variable in a semantic action.
|
||||||
|
#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP)))
|
||||||
|
|
||||||
|
// Token constructors...
|
||||||
|
|
||||||
|
HParsedToken *h_make(HArena *arena, HTokenType type, void *value);
|
||||||
|
HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence.
|
||||||
|
HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n.
|
||||||
|
HParsedToken *h_make_bytes(HArena *arena, size_t len);
|
||||||
|
HParsedToken *h_make_sint(HArena *arena, int64_t val);
|
||||||
|
HParsedToken *h_make_uint(HArena *arena, uint64_t val);
|
||||||
|
|
||||||
|
// Standard short-hands to make tokens in an action.
|
||||||
|
#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL)
|
||||||
|
#define H_MAKE_SEQ() h_make_seq(p->arena)
|
||||||
|
#define H_MAKE_SEQN(N) h_make_seqn(p->arena, N)
|
||||||
|
#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN)
|
||||||
|
#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL)
|
||||||
|
#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL)
|
||||||
|
|
||||||
|
// Extract (cast) type-specific value back from HParsedTokens...
|
||||||
|
|
||||||
|
// Pass-through assertion that a given token has the expected type.
|
||||||
|
#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P)
|
||||||
|
|
||||||
|
// Convenience short-hand forms of h_assert_type.
|
||||||
|
#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK)
|
||||||
|
#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK)
|
||||||
|
#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK)
|
||||||
|
#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK)
|
||||||
|
#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK)
|
||||||
|
|
||||||
|
// Assert expected type and return contained value.
|
||||||
|
#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user)
|
||||||
|
#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq)
|
||||||
|
#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes)
|
||||||
|
#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint)
|
||||||
|
#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint)
|
||||||
|
|
||||||
|
// Sequence access...
|
||||||
|
|
||||||
|
// Return the length of a sequence.
|
||||||
|
size_t h_seq_len(const HParsedToken *p);
|
||||||
|
|
||||||
|
// Access a sequence's element array.
|
||||||
|
HParsedToken **h_seq_elements(const HParsedToken *p);
|
||||||
|
|
||||||
|
// Access a sequence element by index.
|
||||||
|
HParsedToken *h_seq_index(const HParsedToken *p, size_t i);
|
||||||
|
|
||||||
|
// Access an element in a nested sequence by a path of indices.
|
||||||
|
HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...);
|
||||||
|
HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
|
||||||
|
|
||||||
|
// Convenience macros combining (nested) index access and h_cast.
|
||||||
|
#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__))
|
||||||
|
#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
|
||||||
|
#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
|
||||||
|
#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
|
||||||
|
#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
|
||||||
|
#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1)
|
||||||
|
|
||||||
|
// Standard short-hand to access and cast elements on a sequence token.
|
||||||
|
#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__)
|
||||||
|
#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__)
|
||||||
|
#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__)
|
||||||
|
#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__)
|
||||||
|
#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__)
|
||||||
|
|
||||||
|
// Lower-level helper for h_seq_index.
|
||||||
|
HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal
|
||||||
|
|
||||||
|
// Sequence modification...
|
||||||
|
|
||||||
|
// Add elements to a sequence.
|
||||||
|
void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one
|
||||||
|
void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many
|
||||||
|
|
||||||
|
// XXX TODO: Remove elements from a sequence.
|
||||||
|
|
||||||
|
// Flatten nested sequences into one.
|
||||||
|
const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
620
src/hammer.c
620
src/hammer.c
|
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <error.h>
|
#include <err.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
@ -26,202 +26,14 @@
|
||||||
#include "allocator.h"
|
#include "allocator.h"
|
||||||
#include "parsers/parser_internal.h"
|
#include "parsers/parser_internal.h"
|
||||||
|
|
||||||
static guint djbhash(const uint8_t *buf, size_t len) {
|
static uint32_t djbhash(const uint8_t *buf, size_t len) {
|
||||||
guint hash = 5381;
|
uint32_t hash = 5381;
|
||||||
while (len--) {
|
while (len--) {
|
||||||
hash = hash * 33 + *buf++;
|
hash = hash * 33 + *buf++;
|
||||||
}
|
}
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
// short-hand for constructing HCachedResult's
|
|
||||||
static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
|
|
||||||
HCachedResult *ret = a_new(HCachedResult, 1);
|
|
||||||
ret->result = result;
|
|
||||||
ret->input_stream = state->input_stream;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Really library-internal tool to perform an uncached parse, and handle any common error-handling.
|
|
||||||
static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) {
|
|
||||||
// TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when.
|
|
||||||
HParseResult *tmp_res;
|
|
||||||
if (parser) {
|
|
||||||
HInputStream bak = state->input_stream;
|
|
||||||
tmp_res = parser->vtable->parse(parser->env, state);
|
|
||||||
if (tmp_res) {
|
|
||||||
tmp_res->arena = state->arena;
|
|
||||||
if (!state->input_stream.overrun) {
|
|
||||||
tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3);
|
|
||||||
if (state->input_stream.endianness & BIT_BIG_ENDIAN)
|
|
||||||
tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset;
|
|
||||||
else
|
|
||||||
tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset;
|
|
||||||
} else
|
|
||||||
tmp_res->bit_length = 0;
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
tmp_res = NULL;
|
|
||||||
if (state->input_stream.overrun)
|
|
||||||
return NULL; // overrun is always failure.
|
|
||||||
#ifdef CONSISTENCY_CHECK
|
|
||||||
if (!tmp_res) {
|
|
||||||
state->input_stream = INVALID;
|
|
||||||
state->input_stream.input = key->input_pos.input;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return tmp_res;
|
|
||||||
}
|
|
||||||
|
|
||||||
HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
|
|
||||||
HParserCacheValue *cached = g_hash_table_lookup(state->cache, k);
|
|
||||||
HRecursionHead *head = g_hash_table_lookup(state->recursion_heads, k);
|
|
||||||
if (!head) { // No heads found
|
|
||||||
return cached;
|
|
||||||
} else { // Some heads found
|
|
||||||
if (!cached && head->head_parser != k->parser && !g_slist_find(head->involved_set, k->parser)) {
|
|
||||||
// Nothing in the cache, and the key parser is not involved
|
|
||||||
HParseResult *tmp = a_new(HParseResult, 1);
|
|
||||||
tmp->ast = NULL; tmp->arena = state->arena;
|
|
||||||
HParserCacheValue *ret = a_new(HParserCacheValue, 1);
|
|
||||||
ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
if (g_slist_find(head->eval_set, k->parser)) {
|
|
||||||
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
|
|
||||||
head->eval_set = g_slist_remove_all(head->eval_set, k->parser);
|
|
||||||
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
|
||||||
// we know that cached has an entry here, modify it
|
|
||||||
if (!cached)
|
|
||||||
cached = a_new(HParserCacheValue, 1);
|
|
||||||
cached->value_type = PC_RIGHT;
|
|
||||||
cached->right = cached_result(state, tmp_res);
|
|
||||||
}
|
|
||||||
return cached;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Setting up the left recursion. We have the LR for the rule head;
|
|
||||||
* we modify the involved_sets of all LRs in the stack, until we
|
|
||||||
* see the current parser again.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
|
|
||||||
if (!rec_detect->head) {
|
|
||||||
HRecursionHead *some = a_new(HRecursionHead, 1);
|
|
||||||
some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL;
|
|
||||||
rec_detect->head = some;
|
|
||||||
}
|
|
||||||
size_t i = 0;
|
|
||||||
HLeftRec *lr = g_queue_peek_nth(state->lr_stack, i);
|
|
||||||
while (lr && lr->rule != p) {
|
|
||||||
lr->head = rec_detect->head;
|
|
||||||
lr->head->involved_set = g_slist_prepend(lr->head->involved_set, (gpointer)lr->rule);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the
|
|
||||||
* future parse.
|
|
||||||
*/
|
|
||||||
|
|
||||||
HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) {
|
|
||||||
// Store the head into the recursion_heads
|
|
||||||
g_hash_table_replace(state->recursion_heads, k, head);
|
|
||||||
HParserCacheValue *old_cached = g_hash_table_lookup(state->cache, k);
|
|
||||||
if (!old_cached || PC_LEFT == old_cached->value_type)
|
|
||||||
errx(1, "impossible match");
|
|
||||||
HParseResult *old_res = old_cached->right->result;
|
|
||||||
|
|
||||||
// reset the eval_set of the head of the recursion at each beginning of growth
|
|
||||||
head->eval_set = head->involved_set;
|
|
||||||
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
|
||||||
|
|
||||||
if (tmp_res) {
|
|
||||||
if ((old_res->ast->index < tmp_res->ast->index) ||
|
|
||||||
(old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) {
|
|
||||||
HParserCacheValue *v = a_new(HParserCacheValue, 1);
|
|
||||||
v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res);
|
|
||||||
g_hash_table_replace(state->cache, k, v);
|
|
||||||
return grow(k, state, head);
|
|
||||||
} else {
|
|
||||||
// we're done with growing, we can remove data from the recursion head
|
|
||||||
g_hash_table_remove(state->recursion_heads, k);
|
|
||||||
HParserCacheValue *cached = g_hash_table_lookup(state->cache, k);
|
|
||||||
if (cached && PC_RIGHT == cached->value_type) {
|
|
||||||
return cached->right->result;
|
|
||||||
} else {
|
|
||||||
errx(1, "impossible match");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
g_hash_table_remove(state->recursion_heads, k);
|
|
||||||
return old_res;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growable) {
|
|
||||||
if (growable->head) {
|
|
||||||
if (growable->head->head_parser != k->parser) {
|
|
||||||
// not the head rule, so not growing
|
|
||||||
return growable->seed;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// update cache
|
|
||||||
HParserCacheValue *v = a_new(HParserCacheValue, 1);
|
|
||||||
v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed);
|
|
||||||
g_hash_table_replace(state->cache, k, v);
|
|
||||||
if (!growable->seed)
|
|
||||||
return NULL;
|
|
||||||
else
|
|
||||||
return grow(k, state, growable->head);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
errx(1, "lrAnswer with no head");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Warth's recursion. Hi Alessandro! */
|
|
||||||
HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
|
|
||||||
HParserCacheKey *key = a_new(HParserCacheKey, 1);
|
|
||||||
key->input_pos = state->input_stream; key->parser = parser;
|
|
||||||
HParserCacheValue *m = recall(key, state);
|
|
||||||
// check to see if there is already a result for this object...
|
|
||||||
if (!m) {
|
|
||||||
// It doesn't exist, so create a dummy result to cache
|
|
||||||
HLeftRec *base = a_new(HLeftRec, 1);
|
|
||||||
base->seed = NULL; base->rule = parser; base->head = NULL;
|
|
||||||
g_queue_push_head(state->lr_stack, base);
|
|
||||||
// cache it
|
|
||||||
HParserCacheValue *dummy = a_new(HParserCacheValue, 1);
|
|
||||||
dummy->value_type = PC_LEFT; dummy->left = base;
|
|
||||||
g_hash_table_replace(state->cache, key, dummy);
|
|
||||||
// parse the input
|
|
||||||
HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
|
|
||||||
// the base variable has passed equality tests with the cache
|
|
||||||
g_queue_pop_head(state->lr_stack);
|
|
||||||
// setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
|
|
||||||
if (NULL == base->head) {
|
|
||||||
HParserCacheValue *right = a_new(HParserCacheValue, 1);
|
|
||||||
right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res);
|
|
||||||
g_hash_table_replace(state->cache, key, right);
|
|
||||||
return tmp_res;
|
|
||||||
} else {
|
|
||||||
base->seed = tmp_res;
|
|
||||||
HParseResult *res = lr_answer(key, state, base);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// it exists!
|
|
||||||
if (PC_LEFT == m->value_type) {
|
|
||||||
setupLR(parser, state, m->left);
|
|
||||||
return m->left->seed; // BUG: this might not be correct
|
|
||||||
} else {
|
|
||||||
state->input_stream = m->right->input_stream;
|
|
||||||
return m->right->result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Helper function, since these lines appear in every parser */
|
/* Helper function, since these lines appear in every parser */
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
@ -230,35 +42,38 @@ typedef struct {
|
||||||
} HTwoParsers;
|
} HTwoParsers;
|
||||||
|
|
||||||
|
|
||||||
static guint cache_key_hash(gconstpointer key) {
|
static uint32_t cache_key_hash(const void* key) {
|
||||||
return djbhash(key, sizeof(HParserCacheKey));
|
return djbhash(key, sizeof(HParserCacheKey));
|
||||||
}
|
}
|
||||||
static gboolean cache_key_equal(gconstpointer key1, gconstpointer key2) {
|
static bool cache_key_equal(const void* key1, const void* key2) {
|
||||||
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
|
return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
|
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) {
|
||||||
|
return h_parse__m(&system_allocator, parser, input, length);
|
||||||
|
}
|
||||||
|
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) {
|
||||||
// Set up a parse state...
|
// Set up a parse state...
|
||||||
HArena * arena = h_new_arena(0);
|
HArena * arena = h_new_arena(mm__, 0);
|
||||||
HParseState *parse_state = a_new_(arena, HParseState, 1);
|
HParseState *parse_state = a_new_(arena, HParseState, 1);
|
||||||
parse_state->cache = g_hash_table_new(cache_key_hash, // hash_func
|
parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func
|
||||||
cache_key_equal);// key_equal_func
|
cache_key_hash); // hash_func
|
||||||
parse_state->input_stream.input = input;
|
parse_state->input_stream.input = input;
|
||||||
parse_state->input_stream.index = 0;
|
parse_state->input_stream.index = 0;
|
||||||
parse_state->input_stream.bit_offset = 8; // bit big endian
|
parse_state->input_stream.bit_offset = 8; // bit big endian
|
||||||
parse_state->input_stream.overrun = 0;
|
parse_state->input_stream.overrun = 0;
|
||||||
parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN;
|
parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN;
|
||||||
parse_state->input_stream.length = length;
|
parse_state->input_stream.length = length;
|
||||||
parse_state->lr_stack = g_queue_new();
|
parse_state->lr_stack = h_slist_new(arena);
|
||||||
parse_state->recursion_heads = g_hash_table_new(cache_key_hash,
|
parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal,
|
||||||
cache_key_equal);
|
cache_key_hash);
|
||||||
parse_state->arena = arena;
|
parse_state->arena = arena;
|
||||||
HParseResult *res = h_do_parse(parser, parse_state);
|
HParseResult *res = h_do_parse(parser, parse_state);
|
||||||
g_queue_free(parse_state->lr_stack);
|
h_slist_free(parse_state->lr_stack);
|
||||||
g_hash_table_destroy(parse_state->recursion_heads);
|
h_hashtable_free(parse_state->recursion_heads);
|
||||||
// tear down the parse state
|
// tear down the parse state
|
||||||
g_hash_table_destroy(parse_state->cache);
|
h_hashtable_free(parse_state->cache);
|
||||||
if (!res)
|
if (!res)
|
||||||
h_delete_arena(parse_state->arena);
|
h_delete_arena(parse_state->arena);
|
||||||
|
|
||||||
|
|
@ -269,405 +84,4 @@ void h_parse_result_free(HParseResult *result) {
|
||||||
h_delete_arena(result->arena);
|
h_delete_arena(result->arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef INCLUDE_TESTS
|
|
||||||
|
|
||||||
#include "test_suite.h"
|
|
||||||
static void test_token(void) {
|
|
||||||
const HParser *token_ = h_token((const uint8_t*)"95\xa2", 3);
|
|
||||||
|
|
||||||
g_check_parse_ok(token_, "95\xa2", 3, "<39.35.a2>");
|
|
||||||
g_check_parse_failed(token_, "95", 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_ch(void) {
|
|
||||||
const HParser *ch_ = h_ch(0xa2);
|
|
||||||
|
|
||||||
g_check_parse_ok(ch_, "\xa2", 1, "u0xa2");
|
|
||||||
g_check_parse_failed(ch_, "\xa3", 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_ch_range(void) {
|
|
||||||
const HParser *range_ = h_ch_range('a', 'c');
|
|
||||||
|
|
||||||
g_check_parse_ok(range_, "b", 1, "u0x62");
|
|
||||||
g_check_parse_failed(range_, "d", 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
//@MARK_START
|
|
||||||
static void test_int64(void) {
|
|
||||||
const HParser *int64_ = h_int64();
|
|
||||||
|
|
||||||
g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s-0x200000000");
|
|
||||||
g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_int32(void) {
|
|
||||||
const HParser *int32_ = h_int32();
|
|
||||||
|
|
||||||
g_check_parse_ok(int32_, "\xff\xfe\x00\x00", 4, "s-0x20000");
|
|
||||||
g_check_parse_failed(int32_, "\xff\xfe\x00", 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_int16(void) {
|
|
||||||
const HParser *int16_ = h_int16();
|
|
||||||
|
|
||||||
g_check_parse_ok(int16_, "\xfe\x00", 2, "s-0x200");
|
|
||||||
g_check_parse_failed(int16_, "\xfe", 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_int8(void) {
|
|
||||||
const HParser *int8_ = h_int8();
|
|
||||||
|
|
||||||
g_check_parse_ok(int8_, "\x88", 1, "s-0x78");
|
|
||||||
g_check_parse_failed(int8_, "", 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_uint64(void) {
|
|
||||||
const HParser *uint64_ = h_uint64();
|
|
||||||
|
|
||||||
g_check_parse_ok(uint64_, "\x00\x00\x00\x02\x00\x00\x00\x00", 8, "u0x200000000");
|
|
||||||
g_check_parse_failed(uint64_, "\x00\x00\x00\x02\x00\x00\x00", 7);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_uint32(void) {
|
|
||||||
const HParser *uint32_ = h_uint32();
|
|
||||||
|
|
||||||
g_check_parse_ok(uint32_, "\x00\x02\x00\x00", 4, "u0x20000");
|
|
||||||
g_check_parse_failed(uint32_, "\x00\x02\x00", 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_uint16(void) {
|
|
||||||
const HParser *uint16_ = h_uint16();
|
|
||||||
|
|
||||||
g_check_parse_ok(uint16_, "\x02\x00", 2, "u0x200");
|
|
||||||
g_check_parse_failed(uint16_, "\x02", 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_uint8(void) {
|
|
||||||
const HParser *uint8_ = h_uint8();
|
|
||||||
|
|
||||||
g_check_parse_ok(uint8_, "\x78", 1, "u0x78");
|
|
||||||
g_check_parse_failed(uint8_, "", 0);
|
|
||||||
}
|
|
||||||
//@MARK_END
|
|
||||||
|
|
||||||
static void test_int_range(void) {
|
|
||||||
const HParser *int_range_ = h_int_range(h_uint8(), 3, 10);
|
|
||||||
|
|
||||||
g_check_parse_ok(int_range_, "\x05", 1, "u0x5");
|
|
||||||
g_check_parse_failed(int_range_, "\xb", 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
static void test_float64(void) {
|
|
||||||
const HParser *float64_ = h_float64();
|
|
||||||
|
|
||||||
g_check_parse_ok(float64_, "\x3f\xf0\x00\x00\x00\x00\x00\x00", 8, 1.0);
|
|
||||||
g_check_parse_failed(float64_, "\x3f\xf0\x00\x00\x00\x00\x00", 7);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_float32(void) {
|
|
||||||
const HParser *float32_ = h_float32();
|
|
||||||
|
|
||||||
g_check_parse_ok(float32_, "\x3f\x80\x00\x00", 4, 1.0);
|
|
||||||
g_check_parse_failed(float32_, "\x3f\x80\x00");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
static void test_whitespace(void) {
|
|
||||||
const HParser *whitespace_ = h_whitespace(h_ch('a'));
|
|
||||||
|
|
||||||
g_check_parse_ok(whitespace_, "a", 1, "u0x61");
|
|
||||||
g_check_parse_ok(whitespace_, " a", 2, "u0x61");
|
|
||||||
g_check_parse_ok(whitespace_, " a", 3, "u0x61");
|
|
||||||
g_check_parse_ok(whitespace_, "\ta", 2, "u0x61");
|
|
||||||
g_check_parse_failed(whitespace_, "_a", 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_left(void) {
|
|
||||||
const HParser *left_ = h_left(h_ch('a'), h_ch(' '));
|
|
||||||
|
|
||||||
g_check_parse_ok(left_, "a ", 2, "u0x61");
|
|
||||||
g_check_parse_failed(left_, "a", 1);
|
|
||||||
g_check_parse_failed(left_, " ", 1);
|
|
||||||
g_check_parse_failed(left_, "ab", 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_right(void) {
|
|
||||||
const HParser *right_ = h_right(h_ch(' '), h_ch('a'));
|
|
||||||
|
|
||||||
g_check_parse_ok(right_, " a", 2, "u0x61");
|
|
||||||
g_check_parse_failed(right_, "a", 1);
|
|
||||||
g_check_parse_failed(right_, " ", 1);
|
|
||||||
g_check_parse_failed(right_, "ba", 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_middle(void) {
|
|
||||||
const HParser *middle_ = h_middle(h_ch(' '), h_ch('a'), h_ch(' '));
|
|
||||||
|
|
||||||
g_check_parse_ok(middle_, " a ", 3, "u0x61");
|
|
||||||
g_check_parse_failed(middle_, "a", 1);
|
|
||||||
g_check_parse_failed(middle_, " ", 1);
|
|
||||||
g_check_parse_failed(middle_, " a", 2);
|
|
||||||
g_check_parse_failed(middle_, "a ", 2);
|
|
||||||
g_check_parse_failed(middle_, " b ", 3);
|
|
||||||
g_check_parse_failed(middle_, "ba ", 3);
|
|
||||||
g_check_parse_failed(middle_, " ab", 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
const HParsedToken* upcase(const HParseResult *p) {
|
|
||||||
switch(p->ast->token_type) {
|
|
||||||
case TT_SEQUENCE:
|
|
||||||
{
|
|
||||||
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
|
|
||||||
HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used);
|
|
||||||
ret->token_type = TT_SEQUENCE;
|
|
||||||
for (size_t i=0; i<p->ast->seq->used; ++i) {
|
|
||||||
if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) {
|
|
||||||
HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1);
|
|
||||||
tmp->token_type = TT_UINT;
|
|
||||||
tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint);
|
|
||||||
h_carray_append(seq, tmp);
|
|
||||||
} else {
|
|
||||||
h_carray_append(seq, p->ast->seq->elements[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ret->seq = seq;
|
|
||||||
return (const HParsedToken*)ret;
|
|
||||||
}
|
|
||||||
case TT_UINT:
|
|
||||||
{
|
|
||||||
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
|
|
||||||
ret->token_type = TT_UINT;
|
|
||||||
ret->uint = toupper(p->ast->uint);
|
|
||||||
return (const HParsedToken*)ret;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return p->ast;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_action(void) {
|
|
||||||
const HParser *action_ = h_action(h_sequence(h_choice(h_ch('a'),
|
|
||||||
h_ch('A'),
|
|
||||||
NULL),
|
|
||||||
h_choice(h_ch('b'),
|
|
||||||
h_ch('B'),
|
|
||||||
NULL),
|
|
||||||
NULL),
|
|
||||||
upcase);
|
|
||||||
|
|
||||||
g_check_parse_ok(action_, "ab", 2, "(u0x41 u0x42)");
|
|
||||||
g_check_parse_ok(action_, "AB", 2, "(u0x41 u0x42)");
|
|
||||||
g_check_parse_failed(action_, "XX", 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_in(void) {
|
|
||||||
uint8_t options[3] = { 'a', 'b', 'c' };
|
|
||||||
const HParser *in_ = h_in(options, 3);
|
|
||||||
g_check_parse_ok(in_, "b", 1, "u0x62");
|
|
||||||
g_check_parse_failed(in_, "d", 1);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_not_in(void) {
|
|
||||||
uint8_t options[3] = { 'a', 'b', 'c' };
|
|
||||||
const HParser *not_in_ = h_not_in(options, 3);
|
|
||||||
g_check_parse_ok(not_in_, "d", 1, "u0x64");
|
|
||||||
g_check_parse_failed(not_in_, "a", 1);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_end_p(void) {
|
|
||||||
const HParser *end_p_ = h_sequence(h_ch('a'), h_end_p(), NULL);
|
|
||||||
g_check_parse_ok(end_p_, "a", 1, "(u0x61)");
|
|
||||||
g_check_parse_failed(end_p_, "aa", 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_nothing_p(void) {
|
|
||||||
const HParser *nothing_p_ = h_nothing_p();
|
|
||||||
g_check_parse_failed(nothing_p_, "a", 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_sequence(void) {
|
|
||||||
const HParser *sequence_1 = h_sequence(h_ch('a'), h_ch('b'), NULL);
|
|
||||||
const HParser *sequence_2 = h_sequence(h_ch('a'), h_whitespace(h_ch('b')), NULL);
|
|
||||||
|
|
||||||
g_check_parse_ok(sequence_1, "ab", 2, "(u0x61 u0x62)");
|
|
||||||
g_check_parse_failed(sequence_1, "a", 1);
|
|
||||||
g_check_parse_failed(sequence_1, "b", 1);
|
|
||||||
g_check_parse_ok(sequence_2, "ab", 2, "(u0x61 u0x62)");
|
|
||||||
g_check_parse_ok(sequence_2, "a b", 3, "(u0x61 u0x62)");
|
|
||||||
g_check_parse_ok(sequence_2, "a b", 4, "(u0x61 u0x62)");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_choice(void) {
|
|
||||||
const HParser *choice_ = h_choice(h_ch('a'), h_ch('b'), NULL);
|
|
||||||
|
|
||||||
g_check_parse_ok(choice_, "a", 1, "u0x61");
|
|
||||||
g_check_parse_ok(choice_, "b", 1, "u0x62");
|
|
||||||
g_check_parse_failed(choice_, "c", 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_butnot(void) {
|
|
||||||
const HParser *butnot_1 = h_butnot(h_ch('a'), h_token((const uint8_t*)"ab", 2));
|
|
||||||
const HParser *butnot_2 = h_butnot(h_ch_range('0', '9'), h_ch('6'));
|
|
||||||
|
|
||||||
g_check_parse_ok(butnot_1, "a", 1, "u0x61");
|
|
||||||
g_check_parse_failed(butnot_1, "ab", 2);
|
|
||||||
g_check_parse_ok(butnot_1, "aa", 2, "u0x61");
|
|
||||||
g_check_parse_failed(butnot_2, "6", 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_difference(void) {
|
|
||||||
const HParser *difference_ = h_difference(h_token((const uint8_t*)"ab", 2), h_ch('a'));
|
|
||||||
|
|
||||||
g_check_parse_ok(difference_, "ab", 2, "<61.62>");
|
|
||||||
g_check_parse_failed(difference_, "a", 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_xor(void) {
|
|
||||||
const HParser *xor_ = h_xor(h_ch_range('0', '6'), h_ch_range('5', '9'));
|
|
||||||
|
|
||||||
g_check_parse_ok(xor_, "0", 1, "u0x30");
|
|
||||||
g_check_parse_ok(xor_, "9", 1, "u0x39");
|
|
||||||
g_check_parse_failed(xor_, "5", 1);
|
|
||||||
g_check_parse_failed(xor_, "a", 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_many(void) {
|
|
||||||
const HParser *many_ = h_many(h_choice(h_ch('a'), h_ch('b'), NULL));
|
|
||||||
g_check_parse_ok(many_, "adef", 4, "(u0x61)");
|
|
||||||
g_check_parse_ok(many_, "bdef", 4, "(u0x62)");
|
|
||||||
g_check_parse_ok(many_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
|
|
||||||
g_check_parse_ok(many_, "daabbabadef", 11, "()");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_many1(void) {
|
|
||||||
const HParser *many1_ = h_many1(h_choice(h_ch('a'), h_ch('b'), NULL));
|
|
||||||
|
|
||||||
g_check_parse_ok(many1_, "adef", 4, "(u0x61)");
|
|
||||||
g_check_parse_ok(many1_, "bdef", 4, "(u0x62)");
|
|
||||||
g_check_parse_ok(many1_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
|
|
||||||
g_check_parse_failed(many1_, "daabbabadef", 11);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_repeat_n(void) {
|
|
||||||
const HParser *repeat_n_ = h_repeat_n(h_choice(h_ch('a'), h_ch('b'), NULL), 2);
|
|
||||||
|
|
||||||
g_check_parse_failed(repeat_n_, "adef", 4);
|
|
||||||
g_check_parse_ok(repeat_n_, "abdef", 5, "(u0x61 u0x62)");
|
|
||||||
g_check_parse_failed(repeat_n_, "dabdef", 6);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_optional(void) {
|
|
||||||
const HParser *optional_ = h_sequence(h_ch('a'), h_optional(h_choice(h_ch('b'), h_ch('c'), NULL)), h_ch('d'), NULL);
|
|
||||||
|
|
||||||
g_check_parse_ok(optional_, "abd", 3, "(u0x61 u0x62 u0x64)");
|
|
||||||
g_check_parse_ok(optional_, "acd", 3, "(u0x61 u0x63 u0x64)");
|
|
||||||
g_check_parse_ok(optional_, "ad", 2, "(u0x61 null u0x64)");
|
|
||||||
g_check_parse_failed(optional_, "aed", 3);
|
|
||||||
g_check_parse_failed(optional_, "ab", 2);
|
|
||||||
g_check_parse_failed(optional_, "ac", 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_ignore(void) {
|
|
||||||
const HParser *ignore_ = h_sequence(h_ch('a'), h_ignore(h_ch('b')), h_ch('c'), NULL);
|
|
||||||
|
|
||||||
g_check_parse_ok(ignore_, "abc", 3, "(u0x61 u0x63)");
|
|
||||||
g_check_parse_failed(ignore_, "ac", 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_sepBy1(void) {
|
|
||||||
const HParser *sepBy1_ = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
|
|
||||||
|
|
||||||
g_check_parse_ok(sepBy1_, "1,2,3", 5, "(u0x31 u0x32 u0x33)");
|
|
||||||
g_check_parse_ok(sepBy1_, "1,3,2", 5, "(u0x31 u0x33 u0x32)");
|
|
||||||
g_check_parse_ok(sepBy1_, "1,3", 3, "(u0x31 u0x33)");
|
|
||||||
g_check_parse_ok(sepBy1_, "3", 1, "(u0x33)");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_epsilon_p(void) {
|
|
||||||
const HParser *epsilon_p_1 = h_sequence(h_ch('a'), h_epsilon_p(), h_ch('b'), NULL);
|
|
||||||
const HParser *epsilon_p_2 = h_sequence(h_epsilon_p(), h_ch('a'), NULL);
|
|
||||||
const HParser *epsilon_p_3 = h_sequence(h_ch('a'), h_epsilon_p(), NULL);
|
|
||||||
|
|
||||||
g_check_parse_ok(epsilon_p_1, "ab", 2, "(u0x61 u0x62)");
|
|
||||||
g_check_parse_ok(epsilon_p_2, "a", 1, "(u0x61)");
|
|
||||||
g_check_parse_ok(epsilon_p_3, "a", 1, "(u0x61)");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_attr_bool(void) {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_and(void) {
|
|
||||||
const HParser *and_1 = h_sequence(h_and(h_ch('0')), h_ch('0'), NULL);
|
|
||||||
const HParser *and_2 = h_sequence(h_and(h_ch('0')), h_ch('1'), NULL);
|
|
||||||
const HParser *and_3 = h_sequence(h_ch('1'), h_and(h_ch('2')), NULL);
|
|
||||||
|
|
||||||
g_check_parse_ok(and_1, "0", 1, "(u0x30)");
|
|
||||||
g_check_parse_failed(and_2, "0", 1);
|
|
||||||
g_check_parse_ok(and_3, "12", 2, "(u0x31)");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_not(void) {
|
|
||||||
const HParser *not_1 = h_sequence(h_ch('a'), h_choice(h_ch('+'), h_token((const uint8_t*)"++", 2), NULL), h_ch('b'), NULL);
|
|
||||||
const HParser *not_2 = h_sequence(h_ch('a'),
|
|
||||||
h_choice(h_sequence(h_ch('+'), h_not(h_ch('+')), NULL),
|
|
||||||
h_token((const uint8_t*)"++", 2),
|
|
||||||
NULL), h_ch('b'), NULL);
|
|
||||||
|
|
||||||
g_check_parse_ok(not_1, "a+b", 3, "(u0x61 u0x2b u0x62)");
|
|
||||||
g_check_parse_failed(not_1, "a++b", 4);
|
|
||||||
g_check_parse_ok(not_2, "a+b", 3, "(u0x61 (u0x2b) u0x62)");
|
|
||||||
g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)");
|
|
||||||
}
|
|
||||||
|
|
||||||
void register_parser_tests(void) {
|
|
||||||
g_test_add_func("/core/parser/token", test_token);
|
|
||||||
g_test_add_func("/core/parser/ch", test_ch);
|
|
||||||
g_test_add_func("/core/parser/ch_range", test_ch_range);
|
|
||||||
g_test_add_func("/core/parser/int64", test_int64);
|
|
||||||
g_test_add_func("/core/parser/int32", test_int32);
|
|
||||||
g_test_add_func("/core/parser/int16", test_int16);
|
|
||||||
g_test_add_func("/core/parser/int8", test_int8);
|
|
||||||
g_test_add_func("/core/parser/uint64", test_uint64);
|
|
||||||
g_test_add_func("/core/parser/uint32", test_uint32);
|
|
||||||
g_test_add_func("/core/parser/uint16", test_uint16);
|
|
||||||
g_test_add_func("/core/parser/uint8", test_uint8);
|
|
||||||
g_test_add_func("/core/parser/int_range", test_int_range);
|
|
||||||
#if 0
|
|
||||||
g_test_add_func("/core/parser/float64", test_float64);
|
|
||||||
g_test_add_func("/core/parser/float32", test_float32);
|
|
||||||
#endif
|
|
||||||
g_test_add_func("/core/parser/whitespace", test_whitespace);
|
|
||||||
g_test_add_func("/core/parser/left", test_left);
|
|
||||||
g_test_add_func("/core/parser/right", test_right);
|
|
||||||
g_test_add_func("/core/parser/middle", test_middle);
|
|
||||||
g_test_add_func("/core/parser/action", test_action);
|
|
||||||
g_test_add_func("/core/parser/in", test_in);
|
|
||||||
g_test_add_func("/core/parser/not_in", test_not_in);
|
|
||||||
g_test_add_func("/core/parser/end_p", test_end_p);
|
|
||||||
g_test_add_func("/core/parser/nothing_p", test_nothing_p);
|
|
||||||
g_test_add_func("/core/parser/sequence", test_sequence);
|
|
||||||
g_test_add_func("/core/parser/choice", test_choice);
|
|
||||||
g_test_add_func("/core/parser/butnot", test_butnot);
|
|
||||||
g_test_add_func("/core/parser/difference", test_difference);
|
|
||||||
g_test_add_func("/core/parser/xor", test_xor);
|
|
||||||
g_test_add_func("/core/parser/many", test_many);
|
|
||||||
g_test_add_func("/core/parser/many1", test_many1);
|
|
||||||
g_test_add_func("/core/parser/repeat_n", test_repeat_n);
|
|
||||||
g_test_add_func("/core/parser/optional", test_optional);
|
|
||||||
g_test_add_func("/core/parser/sepBy1", test_sepBy1);
|
|
||||||
g_test_add_func("/core/parser/epsilon_p", test_epsilon_p);
|
|
||||||
g_test_add_func("/core/parser/attr_bool", test_attr_bool);
|
|
||||||
g_test_add_func("/core/parser/and", test_and);
|
|
||||||
g_test_add_func("/core/parser/not", test_not);
|
|
||||||
g_test_add_func("/core/parser/ignore", test_ignore);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // #ifdef INCLUDE_TESTS
|
|
||||||
|
|
|
||||||
188
src/hammer.h
188
src/hammer.h
|
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
#ifndef HAMMER_HAMMER__H
|
#ifndef HAMMER_HAMMER__H
|
||||||
#define HAMMER_HAMMER__H
|
#define HAMMER_HAMMER__H
|
||||||
#include <glib.h>
|
#include <stdarg.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "allocator.h"
|
#include "allocator.h"
|
||||||
|
|
@ -31,6 +31,12 @@ typedef int bool;
|
||||||
|
|
||||||
typedef struct HParseState_ HParseState;
|
typedef struct HParseState_ HParseState;
|
||||||
|
|
||||||
|
typedef enum HParserBackend_ {
|
||||||
|
PB_MIN = 0,
|
||||||
|
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
|
||||||
|
PB_MAX
|
||||||
|
} HParserBackend;
|
||||||
|
|
||||||
typedef enum HTokenType_ {
|
typedef enum HTokenType_ {
|
||||||
// Before you change the explicit values of these, think of the poor bindings ;_;
|
// Before you change the explicit values of these, think of the poor bindings ;_;
|
||||||
TT_NONE = 1,
|
TT_NONE = 1,
|
||||||
|
|
@ -39,8 +45,7 @@ typedef enum HTokenType_ {
|
||||||
TT_UINT = 8,
|
TT_UINT = 8,
|
||||||
TT_SEQUENCE = 16,
|
TT_SEQUENCE = 16,
|
||||||
TT_ERR = 32,
|
TT_ERR = 32,
|
||||||
TT_USER = 64,
|
TT_USER = 64
|
||||||
TT_MAX = 128
|
|
||||||
} HTokenType;
|
} HTokenType;
|
||||||
|
|
||||||
typedef struct HCountedArray_ {
|
typedef struct HCountedArray_ {
|
||||||
|
|
@ -50,13 +55,15 @@ typedef struct HCountedArray_ {
|
||||||
struct HParsedToken_ **elements;
|
struct HParsedToken_ **elements;
|
||||||
} HCountedArray;
|
} HCountedArray;
|
||||||
|
|
||||||
|
typedef struct HBytes_ {
|
||||||
|
const uint8_t *token;
|
||||||
|
size_t len;
|
||||||
|
} HBytes;
|
||||||
|
|
||||||
typedef struct HParsedToken_ {
|
typedef struct HParsedToken_ {
|
||||||
HTokenType token_type;
|
HTokenType token_type;
|
||||||
union {
|
union {
|
||||||
struct {
|
HBytes bytes;
|
||||||
const uint8_t *token;
|
|
||||||
size_t len;
|
|
||||||
} bytes;
|
|
||||||
int64_t sint;
|
int64_t sint;
|
||||||
uint64_t uint;
|
uint64_t uint;
|
||||||
double dbl;
|
double dbl;
|
||||||
|
|
@ -114,18 +121,76 @@ typedef struct HParser_ {
|
||||||
void *env;
|
void *env;
|
||||||
} HParser;
|
} HParser;
|
||||||
|
|
||||||
|
// {{{ Stuff for benchmarking
|
||||||
|
typedef struct HParserTestcase_ {
|
||||||
|
unsigned char* input;
|
||||||
|
size_t length;
|
||||||
|
char* output_unambiguous;
|
||||||
|
} HParserTestcase;
|
||||||
|
|
||||||
|
typedef struct HCaseResult_ {
|
||||||
|
bool success;
|
||||||
|
union {
|
||||||
|
const char* actual_results; // on failure, filled in with the results of h_write_result_unamb
|
||||||
|
size_t parse_time; // on success, filled in with time for a single parse, in nsec
|
||||||
|
};
|
||||||
|
} HCaseResult;
|
||||||
|
|
||||||
|
typedef struct HBackendResults_ {
|
||||||
|
HParserBackend backend;
|
||||||
|
bool compile_success;
|
||||||
|
size_t n_testcases;
|
||||||
|
size_t failed_testcases; // actually a count...
|
||||||
|
HCaseResult *cases;
|
||||||
|
} HBackendResults;
|
||||||
|
|
||||||
|
typedef struct HBenchmarkResults_ {
|
||||||
|
size_t len;
|
||||||
|
HBackendResults *results;
|
||||||
|
} HBenchmarkResults;
|
||||||
|
// }}}
|
||||||
|
|
||||||
|
// {{{ Preprocessor definitions
|
||||||
|
#define HAMMER_FN_DECL_NOARG(rtype_t, name) \
|
||||||
|
rtype_t name(void); \
|
||||||
|
rtype_t name##__m(HAllocator* mm__)
|
||||||
|
|
||||||
|
#define HAMMER_FN_DECL(rtype_t, name, ...) \
|
||||||
|
rtype_t name(__VA_ARGS__); \
|
||||||
|
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__)
|
||||||
|
|
||||||
|
#define HAMMER_FN_DECL_ATTR(attr, rtype_t, name, ...) \
|
||||||
|
rtype_t name(__VA_ARGS__) attr; \
|
||||||
|
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__) attr
|
||||||
|
|
||||||
|
#define HAMMER_FN_DECL_VARARGS(rtype_t, name, ...) \
|
||||||
|
rtype_t name(__VA_ARGS__, ...); \
|
||||||
|
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...); \
|
||||||
|
rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \
|
||||||
|
rtype_t name##__v(__VA_ARGS__, va_list ap)
|
||||||
|
|
||||||
|
// Note: this drops the attributes on the floor for the __v versions
|
||||||
|
#define HAMMER_FN_DECL_VARARGS_ATTR(attr, rtype_t, name, ...) \
|
||||||
|
rtype_t name(__VA_ARGS__, ...) attr; \
|
||||||
|
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...) attr; \
|
||||||
|
rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \
|
||||||
|
rtype_t name##__v(__VA_ARGS__, va_list ap)
|
||||||
|
|
||||||
|
// }}}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Top-level function to call a parser that has been built over some
|
* Top-level function to call a parser that has been built over some
|
||||||
* piece of input (of known size).
|
* piece of input (of known size).
|
||||||
*/
|
*/
|
||||||
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length);
|
HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* input, size_t length);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a string, returns a parser that parses that string value.
|
* Given a string, returns a parser that parses that string value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_BYTES
|
* Result token type: TT_BYTES
|
||||||
*/
|
*/
|
||||||
const HParser* h_token(const uint8_t *str, const size_t len);
|
HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a single character, returns a parser that parses that
|
* Given a single character, returns a parser that parses that
|
||||||
|
|
@ -133,7 +198,7 @@ const HParser* h_token(const uint8_t *str, const size_t len);
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_ch(const uint8_t c);
|
HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two single-character bounds, lower and upper, returns a parser
|
* Given two single-character bounds, lower and upper, returns a parser
|
||||||
|
|
@ -142,14 +207,14 @@ const HParser* h_ch(const uint8_t c);
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper);
|
HAMMER_FN_DECL(const HParser*, h_ch_range, const uint8_t lower, const uint8_t upper);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given an integer parser, p, and two integer bounds, lower and upper,
|
* Given an integer parser, p, and two integer bounds, lower and upper,
|
||||||
* returns a parser that parses an integral value within the range
|
* returns a parser that parses an integral value within the range
|
||||||
* [lower, upper] (inclusive).
|
* [lower, upper] (inclusive).
|
||||||
*/
|
*/
|
||||||
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper);
|
HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses the specified number of bits. sign ==
|
* Returns a parser that parses the specified number of bits. sign ==
|
||||||
|
|
@ -157,63 +222,63 @@ const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t
|
||||||
*
|
*
|
||||||
* Result token type: TT_SINT if sign == true, TT_UINT if sign == false
|
* Result token type: TT_SINT if sign == true, TT_UINT if sign == false
|
||||||
*/
|
*/
|
||||||
const HParser* h_bits(size_t len, bool sign);
|
HAMMER_FN_DECL(const HParser*, h_bits, size_t len, bool sign);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses a signed 8-byte integer value.
|
* Returns a parser that parses a signed 8-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_SINT
|
* Result token type: TT_SINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_int64();
|
HAMMER_FN_DECL_NOARG(const HParser*, h_int64);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses a signed 4-byte integer value.
|
* Returns a parser that parses a signed 4-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_SINT
|
* Result token type: TT_SINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_int32();
|
HAMMER_FN_DECL_NOARG(const HParser*, h_int32);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses a signed 2-byte integer value.
|
* Returns a parser that parses a signed 2-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_SINT
|
* Result token type: TT_SINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_int16();
|
HAMMER_FN_DECL_NOARG(const HParser*, h_int16);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses a signed 1-byte integer value.
|
* Returns a parser that parses a signed 1-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_SINT
|
* Result token type: TT_SINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_int8();
|
HAMMER_FN_DECL_NOARG(const HParser*, h_int8);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses an unsigned 8-byte integer value.
|
* Returns a parser that parses an unsigned 8-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_uint64();
|
HAMMER_FN_DECL_NOARG(const HParser*, h_uint64);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses an unsigned 4-byte integer value.
|
* Returns a parser that parses an unsigned 4-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_uint32();
|
HAMMER_FN_DECL_NOARG(const HParser*, h_uint32);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses an unsigned 2-byte integer value.
|
* Returns a parser that parses an unsigned 2-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_uint16();
|
HAMMER_FN_DECL_NOARG(const HParser*, h_uint16);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a parser that parses an unsigned 1-byte integer value.
|
* Returns a parser that parses an unsigned 1-byte integer value.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_uint8();
|
HAMMER_FN_DECL_NOARG(const HParser*, h_uint8);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given another parser, p, returns a parser that skips any whitespace
|
* Given another parser, p, returns a parser that skips any whitespace
|
||||||
|
|
@ -221,7 +286,7 @@ const HParser* h_uint8();
|
||||||
*
|
*
|
||||||
* Result token type: p's result type
|
* Result token type: p's result type
|
||||||
*/
|
*/
|
||||||
const HParser* h_whitespace(const HParser* p);
|
HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two parsers, p and q, returns a parser that parses them in
|
* Given two parsers, p and q, returns a parser that parses them in
|
||||||
|
|
@ -229,7 +294,7 @@ const HParser* h_whitespace(const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: p's result type
|
* Result token type: p's result type
|
||||||
*/
|
*/
|
||||||
const HParser* h_left(const HParser* p, const HParser* q);
|
HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two parsers, p and q, returns a parser that parses them in
|
* Given two parsers, p and q, returns a parser that parses them in
|
||||||
|
|
@ -237,7 +302,7 @@ const HParser* h_left(const HParser* p, const HParser* q);
|
||||||
*
|
*
|
||||||
* Result token type: q's result type
|
* Result token type: q's result type
|
||||||
*/
|
*/
|
||||||
const HParser* h_right(const HParser* p, const HParser* q);
|
HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given three parsers, p, x, and q, returns a parser that parses them in
|
* Given three parsers, p, x, and q, returns a parser that parses them in
|
||||||
|
|
@ -245,7 +310,7 @@ const HParser* h_right(const HParser* p, const HParser* q);
|
||||||
*
|
*
|
||||||
* Result token type: x's result type
|
* Result token type: x's result type
|
||||||
*/
|
*/
|
||||||
const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q);
|
HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given another parser, p, and a function f, returns a parser that
|
* Given another parser, p, and a function f, returns a parser that
|
||||||
|
|
@ -253,21 +318,21 @@ const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q);
|
||||||
*
|
*
|
||||||
* Result token type: any
|
* Result token type: any
|
||||||
*/
|
*/
|
||||||
const HParser* h_action(const HParser* p, const HAction a);
|
HAMMER_FN_DECL(const HParser*, h_action, const HParser* p, const HAction a);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse a single character in the given charset.
|
* Parse a single character in the given charset.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_in(const uint8_t *charset, size_t length);
|
HAMMER_FN_DECL(const HParser*, h_in, const uint8_t *charset, size_t length);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse a single character *NOT* in the given charset.
|
* Parse a single character *NOT* in the given charset.
|
||||||
*
|
*
|
||||||
* Result token type: TT_UINT
|
* Result token type: TT_UINT
|
||||||
*/
|
*/
|
||||||
const HParser* h_not_in(const uint8_t *charset, size_t length);
|
HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A no-argument parser that succeeds if there is no more input to
|
* A no-argument parser that succeeds if there is no more input to
|
||||||
|
|
@ -275,14 +340,14 @@ const HParser* h_not_in(const uint8_t *charset, size_t length);
|
||||||
*
|
*
|
||||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
const HParser* h_end_p();
|
HAMMER_FN_DECL_NOARG(const HParser*, h_end_p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This parser always fails.
|
* This parser always fails.
|
||||||
*
|
*
|
||||||
* Result token type: NULL. Always.
|
* Result token type: NULL. Always.
|
||||||
*/
|
*/
|
||||||
const HParser* h_nothing_p();
|
HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a null-terminated list of parsers, apply each parser in order.
|
* Given a null-terminated list of parsers, apply each parser in order.
|
||||||
|
|
@ -290,7 +355,7 @@ const HParser* h_nothing_p();
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
const HParser* h_sequence(const HParser* p, ...) __attribute__((sentinel));
|
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequence, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given an array of parsers, p_array, apply each parser in order. The
|
* Given an array of parsers, p_array, apply each parser in order. The
|
||||||
|
|
@ -299,7 +364,7 @@ const HParser* h_sequence(const HParser* p, ...) __attribute__((sentinel));
|
||||||
*
|
*
|
||||||
* Result token type: The type of the first successful parser's result.
|
* Result token type: The type of the first successful parser's result.
|
||||||
*/
|
*/
|
||||||
const HParser* h_choice(const HParser* p, ...) __attribute__((sentinel));
|
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two parsers, p1 and p2, this parser succeeds in the following
|
* Given two parsers, p1 and p2, this parser succeeds in the following
|
||||||
|
|
@ -309,7 +374,7 @@ const HParser* h_choice(const HParser* p, ...) __attribute__((sentinel));
|
||||||
*
|
*
|
||||||
* Result token type: p1's result type.
|
* Result token type: p1's result type.
|
||||||
*/
|
*/
|
||||||
const HParser* h_butnot(const HParser* p1, const HParser* p2);
|
HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two parsers, p1 and p2, this parser succeeds in the following
|
* Given two parsers, p1 and p2, this parser succeeds in the following
|
||||||
|
|
@ -319,7 +384,7 @@ const HParser* h_butnot(const HParser* p1, const HParser* p2);
|
||||||
*
|
*
|
||||||
* Result token type: p1's result type.
|
* Result token type: p1's result type.
|
||||||
*/
|
*/
|
||||||
const HParser* h_difference(const HParser* p1, const HParser* p2);
|
HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p2);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or
|
* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or
|
||||||
|
|
@ -327,7 +392,7 @@ const HParser* h_difference(const HParser* p1, const HParser* p2);
|
||||||
*
|
*
|
||||||
* Result token type: The type of the result of whichever parser succeeded.
|
* Result token type: The type of the result of whichever parser succeeded.
|
||||||
*/
|
*/
|
||||||
const HParser* h_xor(const HParser* p1, const HParser* p2);
|
HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, this parser succeeds for zero or more repetitions
|
* Given a parser, p, this parser succeeds for zero or more repetitions
|
||||||
|
|
@ -335,7 +400,7 @@ const HParser* h_xor(const HParser* p1, const HParser* p2);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
const HParser* h_many(const HParser* p);
|
HAMMER_FN_DECL(const HParser*, h_many, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, this parser succeeds for one or more repetitions
|
* Given a parser, p, this parser succeeds for one or more repetitions
|
||||||
|
|
@ -343,7 +408,7 @@ const HParser* h_many(const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
const HParser* h_many1(const HParser* p);
|
HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, this parser succeeds for exactly N repetitions
|
* Given a parser, p, this parser succeeds for exactly N repetitions
|
||||||
|
|
@ -351,7 +416,7 @@ const HParser* h_many1(const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
const HParser* h_repeat_n(const HParser* p, const size_t n);
|
HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, this parser succeeds with the value p parsed or
|
* Given a parser, p, this parser succeeds with the value p parsed or
|
||||||
|
|
@ -359,7 +424,7 @@ const HParser* h_repeat_n(const HParser* p, const size_t n);
|
||||||
*
|
*
|
||||||
* Result token type: If p succeeded, the type of its result; if not, TT_NONE.
|
* Result token type: If p succeeded, the type of its result; if not, TT_NONE.
|
||||||
*/
|
*/
|
||||||
const HParser* h_optional(const HParser* p);
|
HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, this parser succeeds if p succeeds, but doesn't
|
* Given a parser, p, this parser succeeds if p succeeds, but doesn't
|
||||||
|
|
@ -367,7 +432,7 @@ const HParser* h_optional(const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
const HParser* h_ignore(const HParser* p);
|
HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, and a parser for a separator, sep, this parser
|
* Given a parser, p, and a parser for a separator, sep, this parser
|
||||||
|
|
@ -378,7 +443,7 @@ const HParser* h_ignore(const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
const HParser* h_sepBy(const HParser* p, const HParser* sep);
|
HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
|
* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
|
||||||
|
|
@ -386,14 +451,14 @@ const HParser* h_sepBy(const HParser* p, const HParser* sep);
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
const HParser* h_sepBy1(const HParser* p, const HParser* sep);
|
HAMMER_FN_DECL(const HParser*, h_sepBy1, const HParser* p, const HParser* sep);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This parser always returns a zero length match, i.e., empty string.
|
* This parser always returns a zero length match, i.e., empty string.
|
||||||
*
|
*
|
||||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
const HParser* h_epsilon_p();
|
HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This parser applies its first argument to read an unsigned integer
|
* This parser applies its first argument to read an unsigned integer
|
||||||
|
|
@ -404,7 +469,7 @@ const HParser* h_epsilon_p();
|
||||||
*
|
*
|
||||||
* Result token type: TT_SEQUENCE
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
const HParser* h_length_value(const HParser* length, const HParser* value);
|
HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HParser* value);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This parser attaches a predicate function, which returns true or
|
* This parser attaches a predicate function, which returns true or
|
||||||
|
|
@ -419,7 +484,7 @@ const HParser* h_length_value(const HParser* length, const HParser* value);
|
||||||
*
|
*
|
||||||
* Result token type: p's result type if pred succeeded, NULL otherwise.
|
* Result token type: p's result type if pred succeeded, NULL otherwise.
|
||||||
*/
|
*/
|
||||||
const HParser* h_attr_bool(const HParser* p, HPredicate pred);
|
HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The 'and' parser asserts that a conditional syntax is satisfied,
|
* The 'and' parser asserts that a conditional syntax is satisfied,
|
||||||
|
|
@ -436,7 +501,7 @@ const HParser* h_attr_bool(const HParser* p, HPredicate pred);
|
||||||
*
|
*
|
||||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
const HParser* h_and(const HParser* p);
|
HAMMER_FN_DECL(const HParser*, h_and, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The 'not' parser asserts that a conditional syntax is *not*
|
* The 'not' parser asserts that a conditional syntax is *not*
|
||||||
|
|
@ -456,7 +521,7 @@ const HParser* h_and(const HParser* p);
|
||||||
*
|
*
|
||||||
* Result token type: None. The HParseResult exists but its AST is NULL.
|
* Result token type: None. The HParseResult exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
const HParser* h_not(const HParser* p);
|
HAMMER_FN_DECL(const HParser*, h_not, const HParser* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a parser that just calls out to another, as yet unknown,
|
* Create a parser that just calls out to another, as yet unknown,
|
||||||
|
|
@ -467,35 +532,44 @@ const HParser* h_not(const HParser* p);
|
||||||
* Result token type: the type of whatever parser is bound to it with
|
* Result token type: the type of whatever parser is bound to it with
|
||||||
* bind_indirect().
|
* bind_indirect().
|
||||||
*/
|
*/
|
||||||
HParser *h_indirect();
|
HAMMER_FN_DECL_NOARG(HParser*, h_indirect);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the inner parser of an indirect. See comments on indirect for
|
* Set the inner parser of an indirect. See comments on indirect for
|
||||||
* details.
|
* details.
|
||||||
*/
|
*/
|
||||||
void h_bind_indirect(HParser* indirect, const HParser* inner);
|
HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Free the memory allocated to an HParseResult when it is no longer needed.
|
* Free the memory allocated to an HParseResult when it is no longer needed.
|
||||||
*/
|
*/
|
||||||
void h_parse_result_free(HParseResult *result);
|
HAMMER_FN_DECL(void, h_parse_result_free, HParseResult *result);
|
||||||
|
|
||||||
// Some debugging aids
|
// Some debugging aids
|
||||||
/**
|
/**
|
||||||
* Format token into a compact unambiguous form. Useful for parser test cases.
|
* Format token into a compact unambiguous form. Useful for parser test cases.
|
||||||
* Caller is responsible for freeing the result.
|
* Caller is responsible for freeing the result.
|
||||||
*/
|
*/
|
||||||
char* h_write_result_unamb(const HParsedToken* tok);
|
HAMMER_FN_DECL(char*, h_write_result_unamb, const HParsedToken* tok);
|
||||||
/**
|
/**
|
||||||
* Format token to the given output stream. Indent starting at
|
* Format token to the given output stream. Indent starting at
|
||||||
* [indent] spaces, with [delta] spaces between levels.
|
* [indent] spaces, with [delta] spaces between levels.
|
||||||
*/
|
*/
|
||||||
void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta);
|
HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent, int delta);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build parse tables for the given parser backend. See the
|
||||||
|
* documentation for the parser backend in question for information
|
||||||
|
* about the [params] parameter, or just pass in NULL for the defaults.
|
||||||
|
*
|
||||||
|
* Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
|
||||||
|
*/
|
||||||
|
HAMMER_FN_DECL(int, h_compile, const HParser* parser, HParserBackend backend, const void* params);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO: Document me
|
* TODO: Document me
|
||||||
*/
|
*/
|
||||||
HBitWriter *h_bit_writer_new(void);
|
HBitWriter *h_bit_writer_new(HAllocator* mm__);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO: Document me
|
* TODO: Document me
|
||||||
|
|
@ -507,11 +581,17 @@ void h_bit_writer_put(HBitWriter* w, unsigned long long data, size_t nbits);
|
||||||
* Must not free [w] until you're done with the result.
|
* Must not free [w] until you're done with the result.
|
||||||
* [len] is in bytes.
|
* [len] is in bytes.
|
||||||
*/
|
*/
|
||||||
const uint8_t *h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
|
const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO: Document me
|
* TODO: Document me
|
||||||
*/
|
*/
|
||||||
void h_bit_writer_free(HBitWriter* w);
|
void h_bit_writer_free(HBitWriter* w);
|
||||||
|
|
||||||
|
// {{{ Benchmark functions
|
||||||
|
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, const HParser* parser, HParserTestcase* testcases);
|
||||||
|
void h_benchmark_report(FILE* stream, HBenchmarkResults* results);
|
||||||
|
void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results);
|
||||||
|
// }}}
|
||||||
|
|
||||||
#endif // #ifndef HAMMER_HAMMER__H
|
#endif // #ifndef HAMMER_HAMMER__H
|
||||||
|
|
|
||||||
114
src/internal.h
114
src/internal.h
|
|
@ -17,7 +17,6 @@
|
||||||
|
|
||||||
#ifndef HAMMER_INTERNAL__H
|
#ifndef HAMMER_INTERNAL__H
|
||||||
#define HAMMER_INTERNAL__H
|
#define HAMMER_INTERNAL__H
|
||||||
#include <glib.h>
|
|
||||||
#include <err.h>
|
#include <err.h>
|
||||||
#include "hammer.h"
|
#include "hammer.h"
|
||||||
|
|
||||||
|
|
@ -29,9 +28,28 @@
|
||||||
errx(1, "Assertion failed (programmer error): %s", message); \
|
errx(1, "Assertion failed (programmer error): %s", message); \
|
||||||
} while(0)
|
} while(0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define HAMMER_FN_IMPL_NOARGS(rtype_t, name) \
|
||||||
|
rtype_t name(void) { \
|
||||||
|
return name##__m(system_allocator); \
|
||||||
|
} \
|
||||||
|
rtype_t name##__m(HAllocator* mm__)
|
||||||
|
// Functions with arguments are difficult to forward cleanly. Alas, we will need to forward them manually.
|
||||||
|
|
||||||
|
#define h_new(type, count) ((type*)(mm__->alloc(mm__, sizeof(type)*(count))))
|
||||||
|
#define h_free(addr) (mm__->free(mm__, (addr)))
|
||||||
|
|
||||||
#define false 0
|
#define false 0
|
||||||
#define true 1
|
#define true 1
|
||||||
|
|
||||||
|
// This is going to be generally useful.
|
||||||
|
static inline void h_generic_free(HAllocator *allocator, void* ptr) {
|
||||||
|
allocator->free(allocator, ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
HAllocator system_allocator;
|
||||||
|
|
||||||
|
|
||||||
typedef struct HInputStream_ {
|
typedef struct HInputStream_ {
|
||||||
// This should be considered to be a really big value type.
|
// This should be considered to be a really big value type.
|
||||||
const uint8_t *input;
|
const uint8_t *input;
|
||||||
|
|
@ -42,6 +60,36 @@ typedef struct HInputStream_ {
|
||||||
char overrun;
|
char overrun;
|
||||||
} HInputStream;
|
} HInputStream;
|
||||||
|
|
||||||
|
typedef struct HSlistNode_ {
|
||||||
|
void* elem;
|
||||||
|
struct HSlistNode_ *next;
|
||||||
|
} HSlistNode;
|
||||||
|
|
||||||
|
typedef struct HSlist_ {
|
||||||
|
HSlistNode *head;
|
||||||
|
struct HArena_ *arena;
|
||||||
|
} HSlist;
|
||||||
|
|
||||||
|
typedef unsigned int HHashValue;
|
||||||
|
typedef HHashValue (*HHashFunc)(const void* key);
|
||||||
|
typedef bool (*HEqualFunc)(const void* key1, const void* key2);
|
||||||
|
|
||||||
|
typedef struct HHashTableEntry_ {
|
||||||
|
struct HHashTableEntry_ *next;
|
||||||
|
void* key;
|
||||||
|
void* value;
|
||||||
|
HHashValue hashval;
|
||||||
|
} HHashTableEntry;
|
||||||
|
|
||||||
|
typedef struct HHashTable_ {
|
||||||
|
HHashTableEntry *contents;
|
||||||
|
HHashFunc hashFunc;
|
||||||
|
HEqualFunc equalFunc;
|
||||||
|
size_t capacity;
|
||||||
|
size_t used;
|
||||||
|
HArena *arena;
|
||||||
|
} HHashTable;
|
||||||
|
|
||||||
/* The state of the parser.
|
/* The state of the parser.
|
||||||
*
|
*
|
||||||
* Members:
|
* Members:
|
||||||
|
|
@ -54,13 +102,19 @@ typedef struct HInputStream_ {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct HParseState_ {
|
struct HParseState_ {
|
||||||
GHashTable *cache;
|
HHashTable *cache;
|
||||||
HInputStream input_stream;
|
HInputStream input_stream;
|
||||||
HArena * arena;
|
HArena * arena;
|
||||||
GQueue *lr_stack;
|
HSlist *lr_stack;
|
||||||
GHashTable *recursion_heads;
|
HHashTable *recursion_heads;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef struct HParserBackendVTable_ {
|
||||||
|
int (*compile)(HAllocator *mm__, const HParser* parser, const void* params);
|
||||||
|
HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HParseState* parse_state);
|
||||||
|
} HParserBackendVTable;
|
||||||
|
|
||||||
|
|
||||||
/* The (location, parser) tuple used to key the cache.
|
/* The (location, parser) tuple used to key the cache.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
@ -90,8 +144,8 @@ typedef enum HParserCacheValueType_ {
|
||||||
*/
|
*/
|
||||||
typedef struct HRecursionHead_ {
|
typedef struct HRecursionHead_ {
|
||||||
const HParser *head_parser;
|
const HParser *head_parser;
|
||||||
GSList *involved_set;
|
HSlist *involved_set;
|
||||||
GSList *eval_set;
|
HSlist *eval_set;
|
||||||
} HRecursionHead;
|
} HRecursionHead;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -125,23 +179,23 @@ typedef struct HParserCacheValue_t {
|
||||||
};
|
};
|
||||||
} HParserCacheValue;
|
} HParserCacheValue;
|
||||||
|
|
||||||
typedef unsigned int *HCharset;
|
// This file provides the logical inverse of bitreader.c
|
||||||
|
struct HBitWriter_ {
|
||||||
|
uint8_t* buf;
|
||||||
|
HAllocator *mm__;
|
||||||
|
size_t index;
|
||||||
|
size_t capacity;
|
||||||
|
char bit_offset; // unlike in bit_reader, this is always the number
|
||||||
|
// of used bits in the current byte. i.e., 0 always
|
||||||
|
// means that 8 bits are available for use.
|
||||||
|
char flags;
|
||||||
|
};
|
||||||
|
|
||||||
static inline HCharset new_charset() {
|
// }}}
|
||||||
HCharset cs = g_new0(unsigned int, 256 / sizeof(unsigned int));
|
|
||||||
return cs;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int charset_isset(HCharset cs, uint8_t pos) {
|
// Backends {{{
|
||||||
return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs))));
|
extern HParserBackendVTable h__packrat_backend_vtable;
|
||||||
}
|
// }}}
|
||||||
|
|
||||||
static inline void charset_set(HCharset cs, uint8_t pos, int val) {
|
|
||||||
cs[pos / sizeof(*cs)] =
|
|
||||||
val
|
|
||||||
? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs)))
|
|
||||||
: cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs)));
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
|
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
|
||||||
|
|
||||||
|
|
@ -154,10 +208,24 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size);
|
||||||
HCountedArray *h_carray_new(HArena * arena);
|
HCountedArray *h_carray_new(HArena * arena);
|
||||||
void h_carray_append(HCountedArray *array, void* item);
|
void h_carray_append(HCountedArray *array, void* item);
|
||||||
|
|
||||||
|
HSlist* h_slist_new(HArena *arena);
|
||||||
|
HSlist* h_slist_copy(HSlist *slist);
|
||||||
|
void* h_slist_pop(HSlist *slist);
|
||||||
|
void h_slist_push(HSlist *slist, void* item);
|
||||||
|
bool h_slist_find(HSlist *slist, const void* item);
|
||||||
|
HSlist* h_slist_remove_all(HSlist *slist, const void* item);
|
||||||
|
void h_slist_free(HSlist *slist);
|
||||||
|
|
||||||
|
HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc);
|
||||||
|
void* h_hashtable_get(HHashTable* ht, void* key);
|
||||||
|
void h_hashtable_put(HHashTable* ht, void* key, void* value);
|
||||||
|
int h_hashtable_present(HHashTable* ht, void* key);
|
||||||
|
void h_hashtable_del(HHashTable* ht, void* key);
|
||||||
|
void h_hashtable_free(HHashTable* ht);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
#include <malloc.h>
|
#include <stdlib.h>
|
||||||
#define arena_malloc(a, s) malloc(s)
|
#define h_arena_malloc(a, s) malloc(s)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // #ifndef HAMMER_INTERNAL__H
|
#endif // #ifndef HAMMER_INTERNAL__H
|
||||||
|
|
|
||||||
|
|
@ -24,9 +24,13 @@ static const HParserVtable action_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_action(const HParser* p, const HAction a) {
|
const HParser* h_action(const HParser* p, const HAction a) {
|
||||||
HParser *res = g_new(HParser, 1);
|
return h_action__m(&system_allocator, p, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) {
|
||||||
|
HParser *res = h_new(HParser, 1);
|
||||||
res->vtable = &action_vt;
|
res->vtable = &action_vt;
|
||||||
HParseAction *env = g_new(HParseAction, 1);
|
HParseAction *env = h_new(HParseAction, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->action = a;
|
env->action = a;
|
||||||
res->env = (void*)env;
|
res->env = (void*)env;
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,13 @@ static const HParserVtable and_vt = {
|
||||||
.parse = parse_and,
|
.parse = parse_and,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
const HParser* h_and(const HParser* p) {
|
const HParser* h_and(const HParser* p) {
|
||||||
|
return h_and__m(&system_allocator, p);
|
||||||
|
}
|
||||||
|
const HParser* h_and__m(HAllocator* mm__, const HParser* p) {
|
||||||
// zero-width postive lookahead
|
// zero-width postive lookahead
|
||||||
HParser *res = g_new(HParser, 1);
|
HParser *res = h_new(HParser, 1);
|
||||||
res->env = (void*)p;
|
res->env = (void*)p;
|
||||||
res->vtable = &and_vt;
|
res->vtable = &and_vt;
|
||||||
return res;
|
return res;
|
||||||
|
|
|
||||||
|
|
@ -21,10 +21,14 @@ static const HParserVtable attr_bool_vt = {
|
||||||
.parse = parse_attr_bool,
|
.parse = parse_attr_bool,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
const HParser* h_attr_bool(const HParser* p, HPredicate pred) {
|
const HParser* h_attr_bool(const HParser* p, HPredicate pred) {
|
||||||
HParser *res = g_new(HParser, 1);
|
return h_attr_bool__m(&system_allocator, p, pred);
|
||||||
|
}
|
||||||
|
const HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) {
|
||||||
|
HParser *res = h_new(HParser, 1);
|
||||||
res->vtable = &attr_bool_vt;
|
res->vtable = &attr_bool_vt;
|
||||||
HAttrBool *env = g_new(HAttrBool, 1);
|
HAttrBool *env = h_new(HAttrBool, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->pred = pred;
|
env->pred = pred;
|
||||||
res->env = (void*)env;
|
res->env = (void*)env;
|
||||||
|
|
|
||||||
|
|
@ -20,10 +20,13 @@ static const HParserVtable bits_vt = {
|
||||||
.parse = parse_bits,
|
.parse = parse_bits,
|
||||||
};
|
};
|
||||||
const HParser* h_bits(size_t len, bool sign) {
|
const HParser* h_bits(size_t len, bool sign) {
|
||||||
struct bits_env *env = g_new(struct bits_env, 1);
|
return h_bits__m(&system_allocator, len, sign);
|
||||||
|
}
|
||||||
|
const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) {
|
||||||
|
struct bits_env *env = h_new(struct bits_env, 1);
|
||||||
env->length = len;
|
env->length = len;
|
||||||
env->signedp = sign;
|
env->signedp = sign;
|
||||||
HParser *res = g_new(HParser, 1);
|
HParser *res = h_new(HParser, 1);
|
||||||
res->vtable = &bits_vt;
|
res->vtable = &bits_vt;
|
||||||
res->env = env;
|
res->env = env;
|
||||||
return res;
|
return res;
|
||||||
|
|
@ -31,7 +34,10 @@ const HParser* h_bits(size_t len, bool sign) {
|
||||||
|
|
||||||
#define SIZED_BITS(name_pre, len, signedp) \
|
#define SIZED_BITS(name_pre, len, signedp) \
|
||||||
const HParser* h_##name_pre##len () { \
|
const HParser* h_##name_pre##len () { \
|
||||||
return h_bits(len, signedp); \
|
return h_bits__m(&system_allocator, len, signedp); \
|
||||||
|
} \
|
||||||
|
const HParser* h_##name_pre##len##__m(HAllocator* mm__) { \
|
||||||
|
return h_bits__m(mm__, len, signedp); \
|
||||||
}
|
}
|
||||||
SIZED_BITS(int, 8, true)
|
SIZED_BITS(int, 8, true)
|
||||||
SIZED_BITS(int, 16, true)
|
SIZED_BITS(int, 16, true)
|
||||||
|
|
|
||||||
|
|
@ -40,9 +40,12 @@ static const HParserVtable butnot_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_butnot(const HParser* p1, const HParser* p2) {
|
const HParser* h_butnot(const HParser* p1, const HParser* p2) {
|
||||||
HTwoParsers *env = g_new(HTwoParsers, 1);
|
return h_butnot__m(&system_allocator, p1, p2);
|
||||||
|
}
|
||||||
|
const HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
||||||
|
HTwoParsers *env = h_new(HTwoParsers, 1);
|
||||||
env->p1 = p1; env->p2 = p2;
|
env->p1 = p1; env->p2 = p2;
|
||||||
HParser *ret = g_new(HParser, 1);
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &butnot_vt; ret->env = (void*)env;
|
ret->vtable = &butnot_vt; ret->env = (void*)env;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
#include "parser_internal.h"
|
#include "parser_internal.h"
|
||||||
|
|
||||||
static HParseResult* parse_ch(void* env, HParseState *state) {
|
static HParseResult* parse_ch(void* env, HParseState *state) {
|
||||||
uint8_t c = (uint8_t)GPOINTER_TO_UINT(env);
|
uint8_t c = (uint8_t)(unsigned long)(env);
|
||||||
uint8_t r = (uint8_t)h_read_bits(&state->input_stream, 8, false);
|
uint8_t r = (uint8_t)h_read_bits(&state->input_stream, 8, false);
|
||||||
if (c == r) {
|
if (c == r) {
|
||||||
HParsedToken *tok = a_new(HParsedToken, 1);
|
HParsedToken *tok = a_new(HParsedToken, 1);
|
||||||
|
|
@ -15,9 +15,13 @@ static HParseResult* parse_ch(void* env, HParseState *state) {
|
||||||
static const HParserVtable ch_vt = {
|
static const HParserVtable ch_vt = {
|
||||||
.parse = parse_ch,
|
.parse = parse_ch,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_ch(const uint8_t c) {
|
const HParser* h_ch(const uint8_t c) {
|
||||||
HParser *ret = g_new(HParser, 1);
|
return h_ch__m(&system_allocator, c);
|
||||||
|
}
|
||||||
|
const HParser* h_ch__m(HAllocator* mm__, const uint8_t c) {
|
||||||
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &ch_vt;
|
ret->vtable = &ch_vt;
|
||||||
ret->env = GUINT_TO_POINTER(c);
|
ret->env = (void*)(unsigned long)(c);
|
||||||
return (const HParser*)ret;
|
return (const HParser*)ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,24 @@
|
||||||
|
#include <string.h>
|
||||||
#include "parser_internal.h"
|
#include "parser_internal.h"
|
||||||
|
|
||||||
|
typedef unsigned int *HCharset;
|
||||||
|
|
||||||
|
static inline HCharset new_charset(HAllocator* mm__) {
|
||||||
|
HCharset cs = h_new(unsigned int, 256 / sizeof(unsigned int));
|
||||||
|
memset(cs, 0, 256);
|
||||||
|
return cs;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int charset_isset(HCharset cs, uint8_t pos) {
|
||||||
|
return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs))));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void charset_set(HCharset cs, uint8_t pos, int val) {
|
||||||
|
cs[pos / sizeof(*cs)] =
|
||||||
|
val
|
||||||
|
? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs)))
|
||||||
|
: cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs)));
|
||||||
|
}
|
||||||
|
|
||||||
static HParseResult* parse_charset(void *env, HParseState *state) {
|
static HParseResult* parse_charset(void *env, HParseState *state) {
|
||||||
uint8_t in = h_read_bits(&state->input_stream, 8, false);
|
uint8_t in = h_read_bits(&state->input_stream, 8, false);
|
||||||
|
|
@ -18,8 +37,11 @@ static const HParserVtable charset_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
|
const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
|
||||||
HParser *ret = g_new(HParser, 1);
|
return h_ch_range__m(&system_allocator, lower, upper);
|
||||||
HCharset cs = new_charset();
|
}
|
||||||
|
const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) {
|
||||||
|
HParser *ret = h_new(HParser, 1);
|
||||||
|
HCharset cs = new_charset(mm__);
|
||||||
for (int i = 0; i < 256; i++)
|
for (int i = 0; i < 256; i++)
|
||||||
charset_set(cs, i, (lower <= i) && (i <= upper));
|
charset_set(cs, i, (lower <= i) && (i <= upper));
|
||||||
ret->vtable = &charset_vt;
|
ret->vtable = &charset_vt;
|
||||||
|
|
@ -28,9 +50,9 @@ const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const HParser* h_in_or_not(const uint8_t *options, size_t count, int val) {
|
static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) {
|
||||||
HParser *ret = g_new(HParser, 1);
|
HParser *ret = h_new(HParser, 1);
|
||||||
HCharset cs = new_charset();
|
HCharset cs = new_charset(mm__);
|
||||||
for (size_t i = 0; i < 256; i++)
|
for (size_t i = 0; i < 256; i++)
|
||||||
charset_set(cs, i, 1-val);
|
charset_set(cs, i, 1-val);
|
||||||
for (size_t i = 0; i < count; i++)
|
for (size_t i = 0; i < count; i++)
|
||||||
|
|
@ -42,10 +64,18 @@ const HParser* h_in_or_not(const uint8_t *options, size_t count, int val) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_in(const uint8_t *options, size_t count) {
|
const HParser* h_in(const uint8_t *options, size_t count) {
|
||||||
return h_in_or_not(options, count, 1);
|
return h_in_or_not__m(&system_allocator, options, count, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
|
||||||
|
return h_in_or_not__m(mm__, options, count, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_not_in(const uint8_t *options, size_t count) {
|
const HParser* h_not_in(const uint8_t *options, size_t count) {
|
||||||
return h_in_or_not(options, count, 0);
|
return h_in_or_not__m(&system_allocator, options, count, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
|
||||||
|
return h_in_or_not__m(mm__, options, count, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include <stdarg.h>
|
||||||
#include "parser_internal.h"
|
#include "parser_internal.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
@ -25,20 +26,40 @@ static const HParserVtable choice_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_choice(const HParser* p, ...) {
|
const HParser* h_choice(const HParser* p, ...) {
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, p);
|
||||||
|
const HParser* ret = h_choice__mv(&system_allocator, p, ap);
|
||||||
|
va_end(ap);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) {
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, p);
|
||||||
|
const HParser* ret = h_choice__mv(mm__, p, ap);
|
||||||
|
va_end(ap);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParser* h_choice__v(const HParser* p, va_list ap) {
|
||||||
|
return h_choice__mv(&system_allocator, p, ap);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
HSequence *s = g_new(HSequence, 1);
|
HSequence *s = h_new(HSequence, 1);
|
||||||
|
|
||||||
const HParser *arg;
|
const HParser *arg;
|
||||||
va_start(ap, p);
|
va_copy(ap, ap_);
|
||||||
do {
|
do {
|
||||||
len++;
|
len++;
|
||||||
arg = va_arg(ap, const HParser *);
|
arg = va_arg(ap, const HParser *);
|
||||||
} while (arg);
|
} while (arg);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
s->p_array = g_new(const HParser *, len);
|
s->p_array = h_new(const HParser *, len);
|
||||||
|
|
||||||
va_start(ap, p);
|
va_copy(ap, ap_);
|
||||||
s->p_array[0] = p;
|
s->p_array[0] = p;
|
||||||
for (size_t i = 1; i < len; i++) {
|
for (size_t i = 1; i < len; i++) {
|
||||||
s->p_array[i] = va_arg(ap, const HParser *);
|
s->p_array[i] = va_arg(ap, const HParser *);
|
||||||
|
|
@ -46,7 +67,7 @@ const HParser* h_choice(const HParser* p, ...) {
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
|
|
||||||
s->len = len;
|
s->len = len;
|
||||||
HParser *ret = g_new(HParser, 1);
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &choice_vt; ret->env = (void*)s;
|
ret->vtable = &choice_vt; ret->env = (void*)s;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,9 +39,12 @@ static HParserVtable difference_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_difference(const HParser* p1, const HParser* p2) {
|
const HParser* h_difference(const HParser* p1, const HParser* p2) {
|
||||||
HTwoParsers *env = g_new(HTwoParsers, 1);
|
return h_difference__m(&system_allocator, p1, p2);
|
||||||
|
}
|
||||||
|
const HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
||||||
|
HTwoParsers *env = h_new(HTwoParsers, 1);
|
||||||
env->p1 = p1; env->p2 = p2;
|
env->p1 = p1; env->p2 = p2;
|
||||||
HParser *ret = g_new(HParser, 1);
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &difference_vt; ret->env = (void*)env;
|
ret->vtable = &difference_vt; ret->env = (void*)env;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,12 @@ static const HParserVtable end_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_end_p() {
|
const HParser* h_end_p() {
|
||||||
HParser *ret = g_new(HParser, 1);
|
return h_end_p__m(&system_allocator);
|
||||||
ret->vtable = &end_vt; ret->env = NULL;
|
}
|
||||||
|
|
||||||
|
const HParser* h_end_p__m(HAllocator* mm__) {
|
||||||
|
HParser *ret = h_new(HParser, 1);
|
||||||
|
ret->vtable = &end_vt;
|
||||||
|
ret->env = NULL;
|
||||||
return (const HParser*)ret;
|
return (const HParser*)ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -20,3 +20,6 @@ static const HParser epsilon_p = {
|
||||||
const HParser* h_epsilon_p() {
|
const HParser* h_epsilon_p() {
|
||||||
return &epsilon_p;
|
return &epsilon_p;
|
||||||
}
|
}
|
||||||
|
const HParser* h_epsilon_p__m(HAllocator* mm__) {
|
||||||
|
return &epsilon_p;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,10 @@ static const HParserVtable ignore_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_ignore(const HParser* p) {
|
const HParser* h_ignore(const HParser* p) {
|
||||||
HParser* ret = g_new(HParser, 1);
|
return h_ignore__m(&system_allocator, p);
|
||||||
|
}
|
||||||
|
const HParser* h_ignore__m(HAllocator* mm__, const HParser* p) {
|
||||||
|
HParser* ret = h_new(HParser, 1);
|
||||||
ret->vtable = &ignore_vt;
|
ret->vtable = &ignore_vt;
|
||||||
ret->env = (void*)p;
|
ret->env = (void*)p;
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
||||||
|
|
@ -35,38 +35,48 @@ static const HParserVtable ignoreseq_vt = {
|
||||||
// API frontends
|
// API frontends
|
||||||
//
|
//
|
||||||
|
|
||||||
static const HParser* h_leftright(const HParser* p, const HParser* q, size_t which) {
|
static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) {
|
||||||
HIgnoreSeq *seq = g_new(HIgnoreSeq, 1);
|
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
|
||||||
seq->parsers = g_new(const HParser*, 2);
|
seq->parsers = h_new(const HParser*, 2);
|
||||||
seq->parsers[0] = p;
|
seq->parsers[0] = p;
|
||||||
seq->parsers[1] = q;
|
seq->parsers[1] = q;
|
||||||
seq->count = 2;
|
seq->count = 2;
|
||||||
seq->which = which;
|
seq->which = which;
|
||||||
|
|
||||||
HParser *ret = g_new(HParser, 1);
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &ignoreseq_vt;
|
ret->vtable = &ignoreseq_vt;
|
||||||
ret->env = (void*)seq;
|
ret->env = (void*)seq;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_left(const HParser* p, const HParser* q) {
|
const HParser* h_left(const HParser* p, const HParser* q) {
|
||||||
return h_leftright(p, q, 0);
|
return h_leftright__m(&system_allocator, p, q, 0);
|
||||||
|
}
|
||||||
|
const HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) {
|
||||||
|
return h_leftright__m(mm__, p, q, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_right(const HParser* p, const HParser* q) {
|
const HParser* h_right(const HParser* p, const HParser* q) {
|
||||||
return h_leftright(p, q, 1);
|
return h_leftright__m(&system_allocator, p, q, 1);
|
||||||
|
}
|
||||||
|
const HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) {
|
||||||
|
return h_leftright__m(mm__, p, q, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) {
|
const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) {
|
||||||
HIgnoreSeq *seq = g_new(HIgnoreSeq, 1);
|
return h_middle__m(&system_allocator, p, x, q);
|
||||||
seq->parsers = g_new(const HParser*, 3);
|
}
|
||||||
|
const HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) {
|
||||||
|
HIgnoreSeq *seq = h_new(HIgnoreSeq, 1);
|
||||||
|
seq->parsers = h_new(const HParser*, 3);
|
||||||
seq->parsers[0] = p;
|
seq->parsers[0] = p;
|
||||||
seq->parsers[1] = x;
|
seq->parsers[1] = x;
|
||||||
seq->parsers[2] = q;
|
seq->parsers[2] = q;
|
||||||
seq->count = 3;
|
seq->count = 3;
|
||||||
seq->which = 1;
|
seq->which = 1;
|
||||||
|
|
||||||
HParser *ret = g_new(HParser, 1);
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &ignoreseq_vt;
|
ret->vtable = &ignoreseq_vt;
|
||||||
ret->env = (void*)seq;
|
ret->env = (void*)seq;
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,10 @@ void h_bind_indirect(HParser* indirect, const HParser* inner) {
|
||||||
}
|
}
|
||||||
|
|
||||||
HParser* h_indirect() {
|
HParser* h_indirect() {
|
||||||
HParser *res = g_new(HParser, 1);
|
return h_indirect__m(&system_allocator);
|
||||||
|
}
|
||||||
|
HParser* h_indirect__m(HAllocator* mm__) {
|
||||||
|
HParser *res = h_new(HParser, 1);
|
||||||
res->vtable = &indirect_vt;
|
res->vtable = &indirect_vt;
|
||||||
res->env = NULL;
|
res->env = NULL;
|
||||||
return res;
|
return res;
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,9 @@ static const HParserVtable int_range_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
|
const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
|
||||||
|
return h_int_range__m(&system_allocator, p, lower, upper);
|
||||||
|
}
|
||||||
|
const HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) {
|
||||||
// p must be an integer parser, which means it's using parse_bits
|
// p must be an integer parser, which means it's using parse_bits
|
||||||
// TODO: re-add this check
|
// TODO: re-add this check
|
||||||
//assert_message(p->vtable == &bits_vt, "int_range requires an integer parser");
|
//assert_message(p->vtable == &bits_vt, "int_range requires an integer parser");
|
||||||
|
|
@ -40,11 +43,11 @@ const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t
|
||||||
// and regardless, the bounds need to fit in the parser in question
|
// and regardless, the bounds need to fit in the parser in question
|
||||||
// TODO: check this as well.
|
// TODO: check this as well.
|
||||||
|
|
||||||
HRange *r_env = g_new(HRange, 1);
|
HRange *r_env = h_new(HRange, 1);
|
||||||
r_env->p = p;
|
r_env->p = p;
|
||||||
r_env->lower = lower;
|
r_env->lower = lower;
|
||||||
r_env->upper = upper;
|
r_env->upper = upper;
|
||||||
HParser *ret = g_new(HParser, 1);
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &int_range_vt;
|
ret->vtable = &int_range_vt;
|
||||||
ret->env = (void*)r_env;
|
ret->env = (void*)r_env;
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
||||||
|
|
@ -49,10 +49,13 @@ static const HParserVtable many_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_many(const HParser* p) {
|
const HParser* h_many(const HParser* p) {
|
||||||
HParser *res = g_new(HParser, 1);
|
return h_many__m(&system_allocator, p);
|
||||||
HRepeat *env = g_new(HRepeat, 1);
|
}
|
||||||
|
const HParser* h_many__m(HAllocator* mm__, const HParser* p) {
|
||||||
|
HParser *res = h_new(HParser, 1);
|
||||||
|
HRepeat *env = h_new(HRepeat, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->sep = h_epsilon_p();
|
env->sep = h_epsilon_p__m(mm__);
|
||||||
env->count = 0;
|
env->count = 0;
|
||||||
env->min_p = true;
|
env->min_p = true;
|
||||||
res->vtable = &many_vt;
|
res->vtable = &many_vt;
|
||||||
|
|
@ -61,10 +64,13 @@ const HParser* h_many(const HParser* p) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_many1(const HParser* p) {
|
const HParser* h_many1(const HParser* p) {
|
||||||
HParser *res = g_new(HParser, 1);
|
return h_many1__m(&system_allocator, p);
|
||||||
HRepeat *env = g_new(HRepeat, 1);
|
}
|
||||||
|
const HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
|
||||||
|
HParser *res = h_new(HParser, 1);
|
||||||
|
HRepeat *env = h_new(HRepeat, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->sep = h_epsilon_p();
|
env->sep = h_epsilon_p__m(mm__);
|
||||||
env->count = 1;
|
env->count = 1;
|
||||||
env->min_p = true;
|
env->min_p = true;
|
||||||
res->vtable = &many_vt;
|
res->vtable = &many_vt;
|
||||||
|
|
@ -73,10 +79,13 @@ const HParser* h_many1(const HParser* p) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_repeat_n(const HParser* p, const size_t n) {
|
const HParser* h_repeat_n(const HParser* p, const size_t n) {
|
||||||
HParser *res = g_new(HParser, 1);
|
return h_repeat_n__m(&system_allocator, p, n);
|
||||||
HRepeat *env = g_new(HRepeat, 1);
|
}
|
||||||
|
const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
|
||||||
|
HParser *res = h_new(HParser, 1);
|
||||||
|
HRepeat *env = h_new(HRepeat, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->sep = h_epsilon_p();
|
env->sep = h_epsilon_p__m(mm__);
|
||||||
env->count = n;
|
env->count = n;
|
||||||
env->min_p = false;
|
env->min_p = false;
|
||||||
res->vtable = &many_vt;
|
res->vtable = &many_vt;
|
||||||
|
|
@ -85,8 +94,11 @@ const HParser* h_repeat_n(const HParser* p, const size_t n) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_sepBy(const HParser* p, const HParser* sep) {
|
const HParser* h_sepBy(const HParser* p, const HParser* sep) {
|
||||||
HParser *res = g_new(HParser, 1);
|
return h_sepBy__m(&system_allocator, p, sep);
|
||||||
HRepeat *env = g_new(HRepeat, 1);
|
}
|
||||||
|
const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
||||||
|
HParser *res = h_new(HParser, 1);
|
||||||
|
HRepeat *env = h_new(HRepeat, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->sep = sep;
|
env->sep = sep;
|
||||||
env->count = 0;
|
env->count = 0;
|
||||||
|
|
@ -97,8 +109,11 @@ const HParser* h_sepBy(const HParser* p, const HParser* sep) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const HParser* h_sepBy1(const HParser* p, const HParser* sep) {
|
const HParser* h_sepBy1(const HParser* p, const HParser* sep) {
|
||||||
HParser *res = g_new(HParser, 1);
|
return h_sepBy1__m(&system_allocator, p, sep);
|
||||||
HRepeat *env = g_new(HRepeat, 1);
|
}
|
||||||
|
const HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) {
|
||||||
|
HParser *res = h_new(HParser, 1);
|
||||||
|
HRepeat *env = h_new(HRepeat, 1);
|
||||||
env->p = p;
|
env->p = p;
|
||||||
env->sep = sep;
|
env->sep = sep;
|
||||||
env->count = 1;
|
env->count = 1;
|
||||||
|
|
@ -135,9 +150,12 @@ static const HParserVtable length_value_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_length_value(const HParser* length, const HParser* value) {
|
const HParser* h_length_value(const HParser* length, const HParser* value) {
|
||||||
HParser *res = g_new(HParser, 1);
|
return h_length_value__m(&system_allocator, length, value);
|
||||||
|
}
|
||||||
|
const HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) {
|
||||||
|
HParser *res = h_new(HParser, 1);
|
||||||
res->vtable = &length_value_vt;
|
res->vtable = &length_value_vt;
|
||||||
HLenVal *env = g_new(HLenVal, 1);
|
HLenVal *env = h_new(HLenVal, 1);
|
||||||
env->length = length;
|
env->length = length;
|
||||||
env->value = value;
|
env->value = value;
|
||||||
res->env = (void*)env;
|
res->env = (void*)env;
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,10 @@ static const HParserVtable not_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_not(const HParser* p) {
|
const HParser* h_not(const HParser* p) {
|
||||||
HParser *res = g_new(HParser, 1);
|
return h_not__m(&system_allocator, p);
|
||||||
|
}
|
||||||
|
const HParser* h_not__m(HAllocator* mm__, const HParser* p) {
|
||||||
|
HParser *res = h_new(HParser, 1);
|
||||||
res->vtable = ¬_vt;
|
res->vtable = ¬_vt;
|
||||||
res->env = (void*)p;
|
res->env = (void*)p;
|
||||||
return res;
|
return res;
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,10 @@ static const HParserVtable nothing_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_nothing_p() {
|
const HParser* h_nothing_p() {
|
||||||
HParser *ret = g_new(HParser, 1);
|
return h_nothing_p__m(&system_allocator);
|
||||||
|
}
|
||||||
|
const HParser* h_nothing_p__m(HAllocator* mm__) {
|
||||||
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = ¬hing_vt; ret->env = NULL;
|
ret->vtable = ¬hing_vt; ret->env = NULL;
|
||||||
return (const HParser*)ret;
|
return (const HParser*)ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,9 +16,12 @@ static const HParserVtable optional_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_optional(const HParser* p) {
|
const HParser* h_optional(const HParser* p) {
|
||||||
|
return h_optional__m(&system_allocator, p);
|
||||||
|
}
|
||||||
|
const HParser* h_optional__m(HAllocator* mm__, const HParser* p) {
|
||||||
// TODO: re-add this
|
// TODO: re-add this
|
||||||
//assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round.");
|
//assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round.");
|
||||||
HParser *ret = g_new(HParser, 1);
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &optional_vt;
|
ret->vtable = &optional_vt;
|
||||||
ret->env = (void*)p;
|
ret->env = (void*)p;
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include <stdarg.h>
|
||||||
#include "parser_internal.h"
|
#include "parser_internal.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
@ -27,20 +28,40 @@ static const HParserVtable sequence_vt = {
|
||||||
.parse = parse_sequence,
|
.parse = parse_sequence,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_sequence(const HParser *p, ...) {
|
const HParser* h_sequence(const HParser* p, ...) {
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, p);
|
||||||
|
const HParser* ret = h_sequence__mv(&system_allocator, p, ap);
|
||||||
|
va_end(ap);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) {
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, p);
|
||||||
|
const HParser* ret = h_sequence__mv(mm__, p, ap);
|
||||||
|
va_end(ap);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParser* h_sequence__v(const HParser* p, va_list ap) {
|
||||||
|
return h_sequence__mv(&system_allocator, p, ap);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
const HParser *arg;
|
const HParser *arg;
|
||||||
va_start(ap, p);
|
va_copy(ap, ap_);
|
||||||
do {
|
do {
|
||||||
len++;
|
len++;
|
||||||
arg = va_arg(ap, const HParser *);
|
arg = va_arg(ap, const HParser *);
|
||||||
} while (arg);
|
} while (arg);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
HSequence *s = g_new(HSequence, 1);
|
HSequence *s = h_new(HSequence, 1);
|
||||||
s->p_array = g_new(const HParser *, len);
|
s->p_array = h_new(const HParser *, len);
|
||||||
|
|
||||||
va_start(ap, p);
|
va_copy(ap, ap_);
|
||||||
s->p_array[0] = p;
|
s->p_array[0] = p;
|
||||||
for (size_t i = 1; i < len; i++) {
|
for (size_t i = 1; i < len; i++) {
|
||||||
s->p_array[i] = va_arg(ap, const HParser *);
|
s->p_array[i] = va_arg(ap, const HParser *);
|
||||||
|
|
@ -48,7 +69,7 @@ const HParser* h_sequence(const HParser *p, ...) {
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
|
|
||||||
s->len = len;
|
s->len = len;
|
||||||
HParser *ret = g_new(HParser, 1);
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &sequence_vt; ret->env = (void*)s;
|
ret->vtable = &sequence_vt; ret->env = (void*)s;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -20,14 +20,17 @@ static HParseResult* parse_token(void *env, HParseState *state) {
|
||||||
return make_result(state, tok);
|
return make_result(state, tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
const const HParserVtable token_vt = {
|
const HParserVtable token_vt = {
|
||||||
.parse = parse_token,
|
.parse = parse_token,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_token(const uint8_t *str, const size_t len) {
|
const HParser* h_token(const uint8_t *str, const size_t len) {
|
||||||
HToken *t = g_new(HToken, 1);
|
return h_token__m(&system_allocator, str, len);
|
||||||
|
}
|
||||||
|
const HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) {
|
||||||
|
HToken *t = h_new(HToken, 1);
|
||||||
t->str = (uint8_t*)str, t->len = len;
|
t->str = (uint8_t*)str, t->len = len;
|
||||||
HParser *ret = g_new(HParser, 1);
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &token_vt;
|
ret->vtable = &token_vt;
|
||||||
ret->env = t;
|
ret->env = t;
|
||||||
return (const HParser*)ret;
|
return (const HParser*)ret;
|
||||||
|
|
|
||||||
|
|
@ -24,3 +24,6 @@ static HParser unimplemented = {
|
||||||
const HParser* h_unimplemented() {
|
const HParser* h_unimplemented() {
|
||||||
return &unimplemented;
|
return &unimplemented;
|
||||||
}
|
}
|
||||||
|
const HParser* h_unimplemented__m(HAllocator* mm__) {
|
||||||
|
return &unimplemented;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) {
|
||||||
bak = state->input_stream;
|
bak = state->input_stream;
|
||||||
c = h_read_bits(&state->input_stream, 8, false);
|
c = h_read_bits(&state->input_stream, 8, false);
|
||||||
if (state->input_stream.overrun)
|
if (state->input_stream.overrun)
|
||||||
return NULL;
|
break;
|
||||||
} while (isspace(c));
|
} while (isspace(c));
|
||||||
state->input_stream = bak;
|
state->input_stream = bak;
|
||||||
return h_do_parse((HParser*)env, state);
|
return h_do_parse((HParser*)env, state);
|
||||||
|
|
@ -19,7 +19,10 @@ static const HParserVtable whitespace_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_whitespace(const HParser* p) {
|
const HParser* h_whitespace(const HParser* p) {
|
||||||
HParser *ret = g_new(HParser, 1);
|
return h_whitespace__m(&system_allocator, p);
|
||||||
|
}
|
||||||
|
const HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) {
|
||||||
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &whitespace_vt;
|
ret->vtable = &whitespace_vt;
|
||||||
ret->env = (void*)p;
|
ret->env = (void*)p;
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
||||||
|
|
@ -36,9 +36,12 @@ static const HParserVtable xor_vt = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const HParser* h_xor(const HParser* p1, const HParser* p2) {
|
const HParser* h_xor(const HParser* p1, const HParser* p2) {
|
||||||
HTwoParsers *env = g_new(HTwoParsers, 1);
|
return h_xor__m(&system_allocator, p1, p2);
|
||||||
|
}
|
||||||
|
const HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) {
|
||||||
|
HTwoParsers *env = h_new(HTwoParsers, 1);
|
||||||
env->p1 = p1; env->p2 = p2;
|
env->p1 = p1; env->p2 = p2;
|
||||||
HParser *ret = g_new(HParser, 1);
|
HParser *ret = h_new(HParser, 1);
|
||||||
ret->vtable = &xor_vt; ret->env = (void*)env;
|
ret->vtable = &xor_vt; ret->env = (void*)env;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
21
src/pprint.c
21
src/pprint.c
|
|
@ -17,10 +17,10 @@
|
||||||
|
|
||||||
#define _GNU_SOURCE
|
#define _GNU_SOURCE
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <glib.h>
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "hammer.h"
|
#include "hammer.h"
|
||||||
#include <malloc.h>
|
#include "internal.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
typedef struct pp_state {
|
typedef struct pp_state {
|
||||||
int delta;
|
int delta;
|
||||||
|
|
@ -69,20 +69,25 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
|
||||||
fprintf(stream, "%*sUSER\n", indent, "");
|
fprintf(stream, "%*sUSER\n", indent, "");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
g_assert_not_reached();
|
if(tok->token_type > TT_USER) {
|
||||||
|
fprintf(stream, "%*sUSER %d\n", indent, "", tok->token_type-TT_USER);
|
||||||
|
} else {
|
||||||
|
assert_message(0, "Should not reach here.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct result_buf {
|
struct result_buf {
|
||||||
char* output;
|
char* output;
|
||||||
|
HAllocator *mm__;
|
||||||
size_t len;
|
size_t len;
|
||||||
size_t capacity;
|
size_t capacity;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void ensure_capacity(struct result_buf *buf, int amt) {
|
static inline void ensure_capacity(struct result_buf *buf, int amt) {
|
||||||
while (buf->len + amt >= buf->capacity)
|
while (buf->len + amt >= buf->capacity)
|
||||||
buf->output = g_realloc(buf->output, buf->capacity *= 2);
|
buf->output = buf->mm__->realloc(buf->mm__, buf->output, buf->capacity *= 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void append_buf(struct result_buf *buf, const char* input, int len) {
|
static inline void append_buf(struct result_buf *buf, const char* input, int len) {
|
||||||
|
|
@ -149,15 +154,19 @@ static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) {
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "Unexpected token type %d\n", tok->token_type);
|
fprintf(stderr, "Unexpected token type %d\n", tok->token_type);
|
||||||
g_assert_not_reached();
|
assert_message(0, "Should not reach here.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
char* h_write_result_unamb(const HParsedToken* tok) {
|
char* h_write_result_unamb(const HParsedToken* tok) {
|
||||||
|
return h_write_result_unamb__m(&system_allocator, tok);
|
||||||
|
}
|
||||||
|
char* h_write_result_unamb__m(HAllocator* mm__, const HParsedToken* tok) {
|
||||||
struct result_buf buf = {
|
struct result_buf buf = {
|
||||||
.output = g_malloc0(16),
|
.output = mm__->alloc(mm__, 16),
|
||||||
.len = 0,
|
.len = 0,
|
||||||
|
.mm__ = mm__,
|
||||||
.capacity = 16
|
.capacity = 16
|
||||||
};
|
};
|
||||||
unamb_sub(tok, &buf);
|
unamb_sub(tok, &buf);
|
||||||
|
|
|
||||||
20
src/system_allocator.c
Normal file
20
src/system_allocator.c
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
|
static void* system_alloc(HAllocator *allocator, size_t size) {
|
||||||
|
return malloc(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) {
|
||||||
|
return realloc(ptr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void system_free(HAllocator *allocator, void* ptr) {
|
||||||
|
free(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
HAllocator system_allocator = {
|
||||||
|
.alloc = system_alloc,
|
||||||
|
.realloc = system_realloc,
|
||||||
|
.free = system_free,
|
||||||
|
};
|
||||||
22
src/t_benchmark.c
Normal file
22
src/t_benchmark.c
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
#include <glib.h>
|
||||||
|
#include "hammer.h"
|
||||||
|
#include "test_suite.h"
|
||||||
|
|
||||||
|
HParserTestcase testcases[] = {
|
||||||
|
{(unsigned char*)"1,2,3", 5, "(u0x31 u0x32 u0x33)"},
|
||||||
|
{(unsigned char*)"1,3,2", 5, "(u0x31 u0x33 u0x32)"},
|
||||||
|
{(unsigned char*)"1,3", 3, "(u0x31 u0x33)"},
|
||||||
|
{(unsigned char*)"3", 1, "(u0x33)"},
|
||||||
|
{ NULL, 0, NULL }
|
||||||
|
};
|
||||||
|
|
||||||
|
static void test_benchmark_1() {
|
||||||
|
const HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
|
||||||
|
|
||||||
|
HBenchmarkResults *res = h_benchmark(parser, testcases);
|
||||||
|
h_benchmark_report(stderr, res);
|
||||||
|
}
|
||||||
|
|
||||||
|
void register_benchmark_tests(void) {
|
||||||
|
g_test_add_func("/core/benchmark/1", test_benchmark_1);
|
||||||
|
}
|
||||||
67
src/t_bitreader.c
Normal file
67
src/t_bitreader.c
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
#include <glib.h>
|
||||||
|
#include "hammer.h"
|
||||||
|
#include "internal.h"
|
||||||
|
#include "test_suite.h"
|
||||||
|
|
||||||
|
#define MK_INPUT_STREAM(buf,len,endianness_) \
|
||||||
|
{ \
|
||||||
|
.input = (uint8_t*)buf, \
|
||||||
|
.length = len, \
|
||||||
|
.index = 0, \
|
||||||
|
.bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \
|
||||||
|
.endianness = endianness_ \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void test_bitreader_ints(void) {
|
||||||
|
HInputStream is = MK_INPUT_STREAM("\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 8, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||||
|
g_check_cmplong(h_read_bits(&is, 64, true), ==, -0x200000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_bitreader_be(void) {
|
||||||
|
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x03);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x52);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
|
||||||
|
}
|
||||||
|
static void test_bitreader_le(void) {
|
||||||
|
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x02);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x4D);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_largebits_be(void) {
|
||||||
|
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x352);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_largebits_le(void) {
|
||||||
|
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x26A);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_offset_largebits_be(void) {
|
||||||
|
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xD);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x25A);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_offset_largebits_le(void) {
|
||||||
|
HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xA);
|
||||||
|
g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x2D3);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void register_bitreader_tests(void) {
|
||||||
|
g_test_add_func("/core/bitreader/be", test_bitreader_be);
|
||||||
|
g_test_add_func("/core/bitreader/le", test_bitreader_le);
|
||||||
|
g_test_add_func("/core/bitreader/largebits-be", test_largebits_be);
|
||||||
|
g_test_add_func("/core/bitreader/largebits-le", test_largebits_le);
|
||||||
|
g_test_add_func("/core/bitreader/offset-largebits-be", test_offset_largebits_be);
|
||||||
|
g_test_add_func("/core/bitreader/offset-largebits-le", test_offset_largebits_le);
|
||||||
|
g_test_add_func("/core/bitreader/ints", test_bitreader_ints);
|
||||||
|
}
|
||||||
108
src/t_bitwriter.c
Normal file
108
src/t_bitwriter.c
Normal file
|
|
@ -0,0 +1,108 @@
|
||||||
|
#include <glib.h>
|
||||||
|
#include "hammer.h"
|
||||||
|
#include "internal.h"
|
||||||
|
#include "test_suite.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
unsigned long long data;
|
||||||
|
size_t nbits;
|
||||||
|
} bitwriter_test_elem; // should end with {0,0}
|
||||||
|
|
||||||
|
void run_bitwriter_test(bitwriter_test_elem data[], char flags) {
|
||||||
|
size_t len;
|
||||||
|
const uint8_t *buf;
|
||||||
|
HBitWriter *w = h_bit_writer_new(&system_allocator);
|
||||||
|
int i;
|
||||||
|
w->flags = flags;
|
||||||
|
for (i = 0; data[i].nbits; i++) {
|
||||||
|
h_bit_writer_put(w, data[i].data, data[i].nbits);
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = h_bit_writer_get_buffer(w, &len);
|
||||||
|
HInputStream input = {
|
||||||
|
.input = buf,
|
||||||
|
.index = 0,
|
||||||
|
.length = len,
|
||||||
|
.bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0,
|
||||||
|
.endianness = flags,
|
||||||
|
.overrun = 0
|
||||||
|
};
|
||||||
|
|
||||||
|
for (i = 0; data[i].nbits; i++) {
|
||||||
|
g_check_cmpulonglong ((unsigned long long)h_read_bits(&input, data[i].nbits, FALSE), ==, data[i].data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_bitwriter_ints(void) {
|
||||||
|
bitwriter_test_elem data[] = {
|
||||||
|
{ -0x200000000, 64 },
|
||||||
|
{ 0,0 }
|
||||||
|
};
|
||||||
|
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_bitwriter_be(void) {
|
||||||
|
bitwriter_test_elem data[] = {
|
||||||
|
{ 0x03, 3 },
|
||||||
|
{ 0x52, 8 },
|
||||||
|
{ 0x1A, 5 },
|
||||||
|
{ 0, 0 }
|
||||||
|
};
|
||||||
|
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_bitwriter_le(void) {
|
||||||
|
bitwriter_test_elem data[] = {
|
||||||
|
{ 0x02, 3 },
|
||||||
|
{ 0x4D, 8 },
|
||||||
|
{ 0x0B, 5 },
|
||||||
|
{ 0, 0 }
|
||||||
|
};
|
||||||
|
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_largebits_be(void) {
|
||||||
|
bitwriter_test_elem data[] = {
|
||||||
|
{ 0x352, 11 },
|
||||||
|
{ 0x1A, 5 },
|
||||||
|
{ 0, 0 }
|
||||||
|
};
|
||||||
|
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_largebits_le(void) {
|
||||||
|
bitwriter_test_elem data[] = {
|
||||||
|
{ 0x26A, 11 },
|
||||||
|
{ 0x0B, 5 },
|
||||||
|
{ 0, 0 }
|
||||||
|
};
|
||||||
|
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_offset_largebits_be(void) {
|
||||||
|
bitwriter_test_elem data[] = {
|
||||||
|
{ 0xD, 5 },
|
||||||
|
{ 0x25A, 11 },
|
||||||
|
{ 0, 0 }
|
||||||
|
};
|
||||||
|
run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_offset_largebits_le(void) {
|
||||||
|
bitwriter_test_elem data[] = {
|
||||||
|
{ 0xA, 5 },
|
||||||
|
{ 0x2D3, 11 },
|
||||||
|
{ 0, 0 }
|
||||||
|
};
|
||||||
|
run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
|
||||||
|
}
|
||||||
|
|
||||||
|
void register_bitwriter_tests(void) {
|
||||||
|
g_test_add_func("/core/bitwriter/be", test_bitwriter_be);
|
||||||
|
g_test_add_func("/core/bitwriter/le", test_bitwriter_le);
|
||||||
|
g_test_add_func("/core/bitwriter/largebits-be", test_largebits_be);
|
||||||
|
g_test_add_func("/core/bitwriter/largebits-le", test_largebits_le);
|
||||||
|
g_test_add_func("/core/bitwriter/offset-largebits-be", test_offset_largebits_be);
|
||||||
|
g_test_add_func("/core/bitwriter/offset-largebits-le", test_offset_largebits_le);
|
||||||
|
g_test_add_func("/core/bitwriter/ints", test_bitwriter_ints);
|
||||||
|
}
|
||||||
16
src/t_misc.c
Normal file
16
src/t_misc.c
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
#include <glib.h>
|
||||||
|
#include "test_suite.h"
|
||||||
|
#include "hammer.h"
|
||||||
|
|
||||||
|
static void test_tt_user(void) {
|
||||||
|
g_check_cmpint(TT_USER, >, TT_NONE);
|
||||||
|
g_check_cmpint(TT_USER, >, TT_BYTES);
|
||||||
|
g_check_cmpint(TT_USER, >, TT_SINT);
|
||||||
|
g_check_cmpint(TT_USER, >, TT_UINT);
|
||||||
|
g_check_cmpint(TT_USER, >, TT_SEQUENCE);
|
||||||
|
g_check_cmpint(TT_USER, >, TT_ERR);
|
||||||
|
}
|
||||||
|
|
||||||
|
void register_misc_tests(void) {
|
||||||
|
g_test_add_func("/core/misc/tt_user", test_tt_user);
|
||||||
|
}
|
||||||
421
src/t_parser.c
Normal file
421
src/t_parser.c
Normal file
|
|
@ -0,0 +1,421 @@
|
||||||
|
#include <glib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "hammer.h"
|
||||||
|
#include "internal.h"
|
||||||
|
#include "test_suite.h"
|
||||||
|
#include "parsers/parser_internal.h"
|
||||||
|
|
||||||
|
static void test_token(void) {
|
||||||
|
const HParser *token_ = h_token((const uint8_t*)"95\xa2", 3);
|
||||||
|
|
||||||
|
g_check_parse_ok(token_, "95\xa2", 3, "<39.35.a2>");
|
||||||
|
g_check_parse_failed(token_, "95", 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_ch(void) {
|
||||||
|
const HParser *ch_ = h_ch(0xa2);
|
||||||
|
|
||||||
|
g_check_parse_ok(ch_, "\xa2", 1, "u0xa2");
|
||||||
|
g_check_parse_failed(ch_, "\xa3", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_ch_range(void) {
|
||||||
|
const HParser *range_ = h_ch_range('a', 'c');
|
||||||
|
|
||||||
|
g_check_parse_ok(range_, "b", 1, "u0x62");
|
||||||
|
g_check_parse_failed(range_, "d", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
//@MARK_START
|
||||||
|
static void test_int64(void) {
|
||||||
|
const HParser *int64_ = h_int64();
|
||||||
|
|
||||||
|
g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s-0x200000000");
|
||||||
|
g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_int32(void) {
|
||||||
|
const HParser *int32_ = h_int32();
|
||||||
|
|
||||||
|
g_check_parse_ok(int32_, "\xff\xfe\x00\x00", 4, "s-0x20000");
|
||||||
|
g_check_parse_failed(int32_, "\xff\xfe\x00", 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_int16(void) {
|
||||||
|
const HParser *int16_ = h_int16();
|
||||||
|
|
||||||
|
g_check_parse_ok(int16_, "\xfe\x00", 2, "s-0x200");
|
||||||
|
g_check_parse_failed(int16_, "\xfe", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_int8(void) {
|
||||||
|
const HParser *int8_ = h_int8();
|
||||||
|
|
||||||
|
g_check_parse_ok(int8_, "\x88", 1, "s-0x78");
|
||||||
|
g_check_parse_failed(int8_, "", 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_uint64(void) {
|
||||||
|
const HParser *uint64_ = h_uint64();
|
||||||
|
|
||||||
|
g_check_parse_ok(uint64_, "\x00\x00\x00\x02\x00\x00\x00\x00", 8, "u0x200000000");
|
||||||
|
g_check_parse_failed(uint64_, "\x00\x00\x00\x02\x00\x00\x00", 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_uint32(void) {
|
||||||
|
const HParser *uint32_ = h_uint32();
|
||||||
|
|
||||||
|
g_check_parse_ok(uint32_, "\x00\x02\x00\x00", 4, "u0x20000");
|
||||||
|
g_check_parse_failed(uint32_, "\x00\x02\x00", 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_uint16(void) {
|
||||||
|
const HParser *uint16_ = h_uint16();
|
||||||
|
|
||||||
|
g_check_parse_ok(uint16_, "\x02\x00", 2, "u0x200");
|
||||||
|
g_check_parse_failed(uint16_, "\x02", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_uint8(void) {
|
||||||
|
const HParser *uint8_ = h_uint8();
|
||||||
|
|
||||||
|
g_check_parse_ok(uint8_, "\x78", 1, "u0x78");
|
||||||
|
g_check_parse_failed(uint8_, "", 0);
|
||||||
|
}
|
||||||
|
//@MARK_END
|
||||||
|
|
||||||
|
static void test_int_range(void) {
|
||||||
|
const HParser *int_range_ = h_int_range(h_uint8(), 3, 10);
|
||||||
|
|
||||||
|
g_check_parse_ok(int_range_, "\x05", 1, "u0x5");
|
||||||
|
g_check_parse_failed(int_range_, "\xb", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
static void test_float64(void) {
|
||||||
|
const HParser *float64_ = h_float64();
|
||||||
|
|
||||||
|
g_check_parse_ok(float64_, "\x3f\xf0\x00\x00\x00\x00\x00\x00", 8, 1.0);
|
||||||
|
g_check_parse_failed(float64_, "\x3f\xf0\x00\x00\x00\x00\x00", 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_float32(void) {
|
||||||
|
const HParser *float32_ = h_float32();
|
||||||
|
|
||||||
|
g_check_parse_ok(float32_, "\x3f\x80\x00\x00", 4, 1.0);
|
||||||
|
g_check_parse_failed(float32_, "\x3f\x80\x00");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
static void test_whitespace(void) {
|
||||||
|
const HParser *whitespace_ = h_whitespace(h_ch('a'));
|
||||||
|
const HParser *whitespace_end = h_whitespace(h_end_p());
|
||||||
|
|
||||||
|
g_check_parse_ok(whitespace_, "a", 1, "u0x61");
|
||||||
|
g_check_parse_ok(whitespace_, " a", 2, "u0x61");
|
||||||
|
g_check_parse_ok(whitespace_, " a", 3, "u0x61");
|
||||||
|
g_check_parse_ok(whitespace_, "\ta", 2, "u0x61");
|
||||||
|
g_check_parse_failed(whitespace_, "_a", 2);
|
||||||
|
|
||||||
|
g_check_parse_ok(whitespace_end, "", 0, "NULL");
|
||||||
|
g_check_parse_ok(whitespace_end, " ", 2, "NULL");
|
||||||
|
g_check_parse_failed(whitespace_end, " x", 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_left(void) {
|
||||||
|
const HParser *left_ = h_left(h_ch('a'), h_ch(' '));
|
||||||
|
|
||||||
|
g_check_parse_ok(left_, "a ", 2, "u0x61");
|
||||||
|
g_check_parse_failed(left_, "a", 1);
|
||||||
|
g_check_parse_failed(left_, " ", 1);
|
||||||
|
g_check_parse_failed(left_, "ab", 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_right(void) {
|
||||||
|
const HParser *right_ = h_right(h_ch(' '), h_ch('a'));
|
||||||
|
|
||||||
|
g_check_parse_ok(right_, " a", 2, "u0x61");
|
||||||
|
g_check_parse_failed(right_, "a", 1);
|
||||||
|
g_check_parse_failed(right_, " ", 1);
|
||||||
|
g_check_parse_failed(right_, "ba", 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_middle(void) {
|
||||||
|
const HParser *middle_ = h_middle(h_ch(' '), h_ch('a'), h_ch(' '));
|
||||||
|
|
||||||
|
g_check_parse_ok(middle_, " a ", 3, "u0x61");
|
||||||
|
g_check_parse_failed(middle_, "a", 1);
|
||||||
|
g_check_parse_failed(middle_, " ", 1);
|
||||||
|
g_check_parse_failed(middle_, " a", 2);
|
||||||
|
g_check_parse_failed(middle_, "a ", 2);
|
||||||
|
g_check_parse_failed(middle_, " b ", 3);
|
||||||
|
g_check_parse_failed(middle_, "ba ", 3);
|
||||||
|
g_check_parse_failed(middle_, " ab", 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
const HParsedToken* upcase(const HParseResult *p) {
|
||||||
|
switch(p->ast->token_type) {
|
||||||
|
case TT_SEQUENCE:
|
||||||
|
{
|
||||||
|
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
|
||||||
|
HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used);
|
||||||
|
ret->token_type = TT_SEQUENCE;
|
||||||
|
for (size_t i=0; i<p->ast->seq->used; ++i) {
|
||||||
|
if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) {
|
||||||
|
HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1);
|
||||||
|
tmp->token_type = TT_UINT;
|
||||||
|
tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint);
|
||||||
|
h_carray_append(seq, tmp);
|
||||||
|
} else {
|
||||||
|
h_carray_append(seq, p->ast->seq->elements[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret->seq = seq;
|
||||||
|
return (const HParsedToken*)ret;
|
||||||
|
}
|
||||||
|
case TT_UINT:
|
||||||
|
{
|
||||||
|
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
|
||||||
|
ret->token_type = TT_UINT;
|
||||||
|
ret->uint = toupper(p->ast->uint);
|
||||||
|
return (const HParsedToken*)ret;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return p->ast;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_action(void) {
|
||||||
|
const HParser *action_ = h_action(h_sequence(h_choice(h_ch('a'),
|
||||||
|
h_ch('A'),
|
||||||
|
NULL),
|
||||||
|
h_choice(h_ch('b'),
|
||||||
|
h_ch('B'),
|
||||||
|
NULL),
|
||||||
|
NULL),
|
||||||
|
upcase);
|
||||||
|
|
||||||
|
g_check_parse_ok(action_, "ab", 2, "(u0x41 u0x42)");
|
||||||
|
g_check_parse_ok(action_, "AB", 2, "(u0x41 u0x42)");
|
||||||
|
g_check_parse_failed(action_, "XX", 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_in(void) {
|
||||||
|
uint8_t options[3] = { 'a', 'b', 'c' };
|
||||||
|
const HParser *in_ = h_in(options, 3);
|
||||||
|
g_check_parse_ok(in_, "b", 1, "u0x62");
|
||||||
|
g_check_parse_failed(in_, "d", 1);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_not_in(void) {
|
||||||
|
uint8_t options[3] = { 'a', 'b', 'c' };
|
||||||
|
const HParser *not_in_ = h_not_in(options, 3);
|
||||||
|
g_check_parse_ok(not_in_, "d", 1, "u0x64");
|
||||||
|
g_check_parse_failed(not_in_, "a", 1);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_end_p(void) {
|
||||||
|
const HParser *end_p_ = h_sequence(h_ch('a'), h_end_p(), NULL);
|
||||||
|
g_check_parse_ok(end_p_, "a", 1, "(u0x61)");
|
||||||
|
g_check_parse_failed(end_p_, "aa", 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_nothing_p(void) {
|
||||||
|
const HParser *nothing_p_ = h_nothing_p();
|
||||||
|
g_check_parse_failed(nothing_p_, "a", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_sequence(void) {
|
||||||
|
const HParser *sequence_1 = h_sequence(h_ch('a'), h_ch('b'), NULL);
|
||||||
|
const HParser *sequence_2 = h_sequence(h_ch('a'), h_whitespace(h_ch('b')), NULL);
|
||||||
|
|
||||||
|
g_check_parse_ok(sequence_1, "ab", 2, "(u0x61 u0x62)");
|
||||||
|
g_check_parse_failed(sequence_1, "a", 1);
|
||||||
|
g_check_parse_failed(sequence_1, "b", 1);
|
||||||
|
g_check_parse_ok(sequence_2, "ab", 2, "(u0x61 u0x62)");
|
||||||
|
g_check_parse_ok(sequence_2, "a b", 3, "(u0x61 u0x62)");
|
||||||
|
g_check_parse_ok(sequence_2, "a b", 4, "(u0x61 u0x62)");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_choice(void) {
|
||||||
|
const HParser *choice_ = h_choice(h_ch('a'), h_ch('b'), NULL);
|
||||||
|
|
||||||
|
g_check_parse_ok(choice_, "a", 1, "u0x61");
|
||||||
|
g_check_parse_ok(choice_, "b", 1, "u0x62");
|
||||||
|
g_check_parse_failed(choice_, "c", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_butnot(void) {
|
||||||
|
const HParser *butnot_1 = h_butnot(h_ch('a'), h_token((const uint8_t*)"ab", 2));
|
||||||
|
const HParser *butnot_2 = h_butnot(h_ch_range('0', '9'), h_ch('6'));
|
||||||
|
|
||||||
|
g_check_parse_ok(butnot_1, "a", 1, "u0x61");
|
||||||
|
g_check_parse_failed(butnot_1, "ab", 2);
|
||||||
|
g_check_parse_ok(butnot_1, "aa", 2, "u0x61");
|
||||||
|
g_check_parse_failed(butnot_2, "6", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_difference(void) {
|
||||||
|
const HParser *difference_ = h_difference(h_token((const uint8_t*)"ab", 2), h_ch('a'));
|
||||||
|
|
||||||
|
g_check_parse_ok(difference_, "ab", 2, "<61.62>");
|
||||||
|
g_check_parse_failed(difference_, "a", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_xor(void) {
|
||||||
|
const HParser *xor_ = h_xor(h_ch_range('0', '6'), h_ch_range('5', '9'));
|
||||||
|
|
||||||
|
g_check_parse_ok(xor_, "0", 1, "u0x30");
|
||||||
|
g_check_parse_ok(xor_, "9", 1, "u0x39");
|
||||||
|
g_check_parse_failed(xor_, "5", 1);
|
||||||
|
g_check_parse_failed(xor_, "a", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_many(void) {
|
||||||
|
const HParser *many_ = h_many(h_choice(h_ch('a'), h_ch('b'), NULL));
|
||||||
|
g_check_parse_ok(many_, "adef", 4, "(u0x61)");
|
||||||
|
g_check_parse_ok(many_, "bdef", 4, "(u0x62)");
|
||||||
|
g_check_parse_ok(many_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
|
||||||
|
g_check_parse_ok(many_, "daabbabadef", 11, "()");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_many1(void) {
|
||||||
|
const HParser *many1_ = h_many1(h_choice(h_ch('a'), h_ch('b'), NULL));
|
||||||
|
|
||||||
|
g_check_parse_ok(many1_, "adef", 4, "(u0x61)");
|
||||||
|
g_check_parse_ok(many1_, "bdef", 4, "(u0x62)");
|
||||||
|
g_check_parse_ok(many1_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)");
|
||||||
|
g_check_parse_failed(many1_, "daabbabadef", 11);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_repeat_n(void) {
|
||||||
|
const HParser *repeat_n_ = h_repeat_n(h_choice(h_ch('a'), h_ch('b'), NULL), 2);
|
||||||
|
|
||||||
|
g_check_parse_failed(repeat_n_, "adef", 4);
|
||||||
|
g_check_parse_ok(repeat_n_, "abdef", 5, "(u0x61 u0x62)");
|
||||||
|
g_check_parse_failed(repeat_n_, "dabdef", 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_optional(void) {
|
||||||
|
const HParser *optional_ = h_sequence(h_ch('a'), h_optional(h_choice(h_ch('b'), h_ch('c'), NULL)), h_ch('d'), NULL);
|
||||||
|
|
||||||
|
g_check_parse_ok(optional_, "abd", 3, "(u0x61 u0x62 u0x64)");
|
||||||
|
g_check_parse_ok(optional_, "acd", 3, "(u0x61 u0x63 u0x64)");
|
||||||
|
g_check_parse_ok(optional_, "ad", 2, "(u0x61 null u0x64)");
|
||||||
|
g_check_parse_failed(optional_, "aed", 3);
|
||||||
|
g_check_parse_failed(optional_, "ab", 2);
|
||||||
|
g_check_parse_failed(optional_, "ac", 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_ignore(void) {
|
||||||
|
const HParser *ignore_ = h_sequence(h_ch('a'), h_ignore(h_ch('b')), h_ch('c'), NULL);
|
||||||
|
|
||||||
|
g_check_parse_ok(ignore_, "abc", 3, "(u0x61 u0x63)");
|
||||||
|
g_check_parse_failed(ignore_, "ac", 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_sepBy1(void) {
|
||||||
|
const HParser *sepBy1_ = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(','));
|
||||||
|
|
||||||
|
g_check_parse_ok(sepBy1_, "1,2,3", 5, "(u0x31 u0x32 u0x33)");
|
||||||
|
g_check_parse_ok(sepBy1_, "1,3,2", 5, "(u0x31 u0x33 u0x32)");
|
||||||
|
g_check_parse_ok(sepBy1_, "1,3", 3, "(u0x31 u0x33)");
|
||||||
|
g_check_parse_ok(sepBy1_, "3", 1, "(u0x33)");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_epsilon_p(void) {
|
||||||
|
const HParser *epsilon_p_1 = h_sequence(h_ch('a'), h_epsilon_p(), h_ch('b'), NULL);
|
||||||
|
const HParser *epsilon_p_2 = h_sequence(h_epsilon_p(), h_ch('a'), NULL);
|
||||||
|
const HParser *epsilon_p_3 = h_sequence(h_ch('a'), h_epsilon_p(), NULL);
|
||||||
|
|
||||||
|
g_check_parse_ok(epsilon_p_1, "ab", 2, "(u0x61 u0x62)");
|
||||||
|
g_check_parse_ok(epsilon_p_2, "a", 1, "(u0x61)");
|
||||||
|
g_check_parse_ok(epsilon_p_3, "a", 1, "(u0x61)");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_attr_bool(void) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_and(void) {
|
||||||
|
const HParser *and_1 = h_sequence(h_and(h_ch('0')), h_ch('0'), NULL);
|
||||||
|
const HParser *and_2 = h_sequence(h_and(h_ch('0')), h_ch('1'), NULL);
|
||||||
|
const HParser *and_3 = h_sequence(h_ch('1'), h_and(h_ch('2')), NULL);
|
||||||
|
|
||||||
|
g_check_parse_ok(and_1, "0", 1, "(u0x30)");
|
||||||
|
g_check_parse_failed(and_2, "0", 1);
|
||||||
|
g_check_parse_ok(and_3, "12", 2, "(u0x31)");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_not(void) {
|
||||||
|
const HParser *not_1 = h_sequence(h_ch('a'), h_choice(h_ch('+'), h_token((const uint8_t*)"++", 2), NULL), h_ch('b'), NULL);
|
||||||
|
const HParser *not_2 = h_sequence(h_ch('a'),
|
||||||
|
h_choice(h_sequence(h_ch('+'), h_not(h_ch('+')), NULL),
|
||||||
|
h_token((const uint8_t*)"++", 2),
|
||||||
|
NULL), h_ch('b'), NULL);
|
||||||
|
|
||||||
|
g_check_parse_ok(not_1, "a+b", 3, "(u0x61 u0x2b u0x62)");
|
||||||
|
g_check_parse_failed(not_1, "a++b", 4);
|
||||||
|
g_check_parse_ok(not_2, "a+b", 3, "(u0x61 (u0x2b) u0x62)");
|
||||||
|
g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_leftrec(void) {
|
||||||
|
const HParser *a_ = h_ch('a');
|
||||||
|
|
||||||
|
HParser *lr_ = h_indirect();
|
||||||
|
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL));
|
||||||
|
|
||||||
|
g_check_parse_ok(lr_, "a", 1, "u0x61");
|
||||||
|
g_check_parse_ok(lr_, "aa", 2, "(u0x61 u0x61)");
|
||||||
|
g_check_parse_ok(lr_, "aaa", 3, "((u0x61 u0x61) u0x61)");
|
||||||
|
}
|
||||||
|
|
||||||
|
void register_parser_tests(void) {
|
||||||
|
g_test_add_func("/core/parser/token", test_token);
|
||||||
|
g_test_add_func("/core/parser/ch", test_ch);
|
||||||
|
g_test_add_func("/core/parser/ch_range", test_ch_range);
|
||||||
|
g_test_add_func("/core/parser/int64", test_int64);
|
||||||
|
g_test_add_func("/core/parser/int32", test_int32);
|
||||||
|
g_test_add_func("/core/parser/int16", test_int16);
|
||||||
|
g_test_add_func("/core/parser/int8", test_int8);
|
||||||
|
g_test_add_func("/core/parser/uint64", test_uint64);
|
||||||
|
g_test_add_func("/core/parser/uint32", test_uint32);
|
||||||
|
g_test_add_func("/core/parser/uint16", test_uint16);
|
||||||
|
g_test_add_func("/core/parser/uint8", test_uint8);
|
||||||
|
g_test_add_func("/core/parser/int_range", test_int_range);
|
||||||
|
#if 0
|
||||||
|
g_test_add_func("/core/parser/float64", test_float64);
|
||||||
|
g_test_add_func("/core/parser/float32", test_float32);
|
||||||
|
#endif
|
||||||
|
g_test_add_func("/core/parser/whitespace", test_whitespace);
|
||||||
|
g_test_add_func("/core/parser/left", test_left);
|
||||||
|
g_test_add_func("/core/parser/right", test_right);
|
||||||
|
g_test_add_func("/core/parser/middle", test_middle);
|
||||||
|
g_test_add_func("/core/parser/action", test_action);
|
||||||
|
g_test_add_func("/core/parser/in", test_in);
|
||||||
|
g_test_add_func("/core/parser/not_in", test_not_in);
|
||||||
|
g_test_add_func("/core/parser/end_p", test_end_p);
|
||||||
|
g_test_add_func("/core/parser/nothing_p", test_nothing_p);
|
||||||
|
g_test_add_func("/core/parser/sequence", test_sequence);
|
||||||
|
g_test_add_func("/core/parser/choice", test_choice);
|
||||||
|
g_test_add_func("/core/parser/butnot", test_butnot);
|
||||||
|
g_test_add_func("/core/parser/difference", test_difference);
|
||||||
|
g_test_add_func("/core/parser/xor", test_xor);
|
||||||
|
g_test_add_func("/core/parser/many", test_many);
|
||||||
|
g_test_add_func("/core/parser/many1", test_many1);
|
||||||
|
g_test_add_func("/core/parser/repeat_n", test_repeat_n);
|
||||||
|
g_test_add_func("/core/parser/optional", test_optional);
|
||||||
|
g_test_add_func("/core/parser/sepBy1", test_sepBy1);
|
||||||
|
g_test_add_func("/core/parser/epsilon_p", test_epsilon_p);
|
||||||
|
g_test_add_func("/core/parser/attr_bool", test_attr_bool);
|
||||||
|
g_test_add_func("/core/parser/and", test_and);
|
||||||
|
g_test_add_func("/core/parser/not", test_not);
|
||||||
|
g_test_add_func("/core/parser/ignore", test_ignore);
|
||||||
|
g_test_add_func("/core/parser/leftrec", test_leftrec);
|
||||||
|
}
|
||||||
|
|
@ -15,12 +15,15 @@
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <glib.h>
|
||||||
#include "hammer.h"
|
#include "hammer.h"
|
||||||
#include "test_suite.h"
|
#include "test_suite.h"
|
||||||
|
|
||||||
extern void register_bitreader_tests();
|
extern void register_bitreader_tests();
|
||||||
extern void register_bitwriter_tests();
|
extern void register_bitwriter_tests();
|
||||||
extern void register_parser_tests();
|
extern void register_parser_tests();
|
||||||
|
extern void register_misc_tests();
|
||||||
|
extern void register_benchmark_tests();
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
g_test_init(&argc, &argv, NULL);
|
g_test_init(&argc, &argv, NULL);
|
||||||
|
|
@ -29,6 +32,8 @@ int main(int argc, char** argv) {
|
||||||
register_bitreader_tests();
|
register_bitreader_tests();
|
||||||
register_bitwriter_tests();
|
register_bitwriter_tests();
|
||||||
register_parser_tests();
|
register_parser_tests();
|
||||||
|
register_misc_tests();
|
||||||
|
register_benchmark_tests();
|
||||||
|
|
||||||
g_test_run();
|
g_test_run();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
#ifndef HAMMER_TEST_SUITE__H
|
#ifndef HAMMER_TEST_SUITE__H
|
||||||
#define HAMMER_TEST_SUITE__H
|
#define HAMMER_TEST_SUITE__H
|
||||||
#include <malloc.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
// Equivalent to g_assert_*, but not using g_assert...
|
// Equivalent to g_assert_*, but not using g_assert...
|
||||||
#define g_check_inttype(fmt, typ, n1, op, n2) do { \
|
#define g_check_inttype(fmt, typ, n1, op, n2) do { \
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue