merging in aegis' JNI bindings

This commit is contained in:
Meredith L. Patterson 2013-05-23 00:58:27 -07:00
commit 1c7e9947a4
42 changed files with 2445 additions and 712 deletions

5
.gitignore vendored
View file

@ -1,10 +1,15 @@
*.o
*~
*.a
*.class
*.so
jni/com*.h
src/test_suite
lib/hush
examples/dns
examples/base64
examples/base64_sem1
examples/base64_sem2
TAGS
*.swp
*.swo

View file

@ -3,7 +3,7 @@
# and kick off a recursive make
# Also, "make src/all" turns into "make -C src all"
SUBDIRS = src examples
SUBDIRS = src examples jni
include config.mk

View file

@ -48,3 +48,11 @@ Examples
The `examples/` directory contains some simple examples, currently including:
* base64
* DNS
Community
=========
Please join us at `#hammer` on `irc.upstandinghackers.com` if you have any questions or just want to talk about parsing.
Contact
=======
You can also email us at <hammer@upstandinghackers.com>.

View file

@ -8,7 +8,7 @@ include $(TOPLEVEL)/config.mk
TEST_CFLAGS = $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS
TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0) -lrt
CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes
CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -g
LDFLAGS :=
CC ?= gcc

View file

@ -2,7 +2,11 @@
OUTPUTS := dns.o \
dns \
base64.o \
base64
base64 \
base64_sem1.o \
base64_sem1 \
base64_sem2.o \
base64_sem2
TOPLEVEL := ../
@ -12,20 +16,26 @@ LDFLAGS += $(pkg-config --libs glib-2.0)
all: dns base64
all: dns base64 base64_sem1 base64_sem2
dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
dns: dns.o rr.o dns_common.o
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
dns.o: ../src/hammer.h dns_common.h
rr.o: ../src/hammer.h rr.h dns_common.h
dns_common.o: ../src/hammer.h dns_common.h
dns.o: ../src/hammer.h dns_common.h ../src/glue.h
rr.o: ../src/hammer.h rr.h dns_common.h ../src/glue.h
dns_common.o: ../src/hammer.h dns_common.h ../src/glue.h
base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
base64: base64.o
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
base64.o: ../src/hammer.h
base64_sem1: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
base64_sem1: base64_sem1.o
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
base64_sem2: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
base64_sem2: base64_sem2.o
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
base64%.o: ../src/hammer.h ../src/glue.h

View file

@ -1,3 +1,13 @@
// Example parser: Base64, syntax only.
//
// Demonstrates how to construct a Hammer parser that recognizes valid Base64
// sequences.
//
// Note that no semantic evaluation of the sequence is performed, i.e. the
// byte sequence being represented is not returned, or determined. See
// base64_sem1.c and base64_sem2.c for examples how to attach appropriate
// semantic actions to the grammar.
#include "../src/hammer.h"
const HParser* document = NULL;
@ -14,18 +24,17 @@ void init_parser(void)
const HParser *equals = h_ch('=');
const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL);
const HParser *bsfdig_4bit = h_choice(
h_ch('A'), h_ch('E'), h_ch('I'), h_ch('M'), h_ch('Q'), h_ch('U'),
h_ch('Y'), h_ch('c'), h_ch('g'), h_ch('k'), h_ch('o'), h_ch('s'),
h_ch('w'), h_ch('0'), h_ch('4'), h_ch('8'), NULL);
const HParser *bsfdig_2bit = h_choice(h_ch('A'), h_ch('Q'), h_ch('g'), h_ch('w'), NULL);
const HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16);
const HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4);
const HParser *base64_3 = h_repeat_n(bsfdig, 4);
const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL);
const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL);
const HParser *base64 = h_choice(base64_2, base64_1, NULL);
// why does this parse "A=="?!
// why does this parse "aaA=" but not "aA=="?!
const HParser *base64 = h_sequence(h_many(base64_3),
h_optional(h_choice(base64_2,
base64_1, NULL)),
NULL);
document = base64;
document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL);
}

172
examples/base64_sem1.c Normal file
View file

@ -0,0 +1,172 @@
// Example parser: Base64, with fine-grained semantic actions
//
// Demonstrates how to attach semantic actions to grammar rules and piece by
// piece transform the parse tree into the desired semantic representation,
// in this case a sequence of 8-bit values.
//
// Note how the grammar is defined by using the macros H_RULE and H_ARULE.
// Those rules using ARULE get an attached action which must be declared (as
// a function of type HAction) with a standard name based on the rule name.
//
// This variant of the example uses fine-grained semantic actions that
// transform the parse tree in small steps in a bottom-up fashion. Compare
// base64_sem2.c for an alternative approach using a single top-level action.
#include "../src/hammer.h"
#include "../src/glue.h"
#include <assert.h>
///
// Semantic actions for the grammar below, each corresponds to an "ARULE".
// They must be named act_<rulename>.
///
const HParsedToken *act_bsfdig(const HParseResult *p)
{
HParsedToken *res = H_MAKE_UINT(0);
uint8_t c = H_CAST_UINT(p->ast);
if(c >= 0x40 && c <= 0x5A) // A-Z
res->uint = c - 0x41;
else if(c >= 0x60 && c <= 0x7A) // a-z
res->uint = c - 0x61 + 26;
else if(c >= 0x30 && c <= 0x39) // 0-9
res->uint = c - 0x30 + 52;
else if(c == '+')
res->uint = 62;
else if(c == '/')
res->uint = 63;
return res;
}
H_ACT_APPLY(act_index0, h_act_index, 0);
#define act_bsfdig_4bit act_bsfdig
#define act_bsfdig_2bit act_bsfdig
#define act_equals h_act_ignore
#define act_ws h_act_ignore
#define act_document act_index0
// General-form action to turn a block of base64 digits into bytes.
const HParsedToken *act_base64_n(int n, const HParseResult *p)
{
HParsedToken *res = H_MAKE_SEQN(n);
HParsedToken **digits = h_seq_elements(p->ast);
uint32_t x = 0;
int bits = 0;
for(int i=0; i<n+1; i++) {
x <<= 6; x |= digits[i]->uint;
bits += 6;
}
x >>= bits%8; // align, i.e. cut off extra bits
for(int i=0; i<n; i++) {
HParsedToken *item = H_MAKE_UINT(x & 0xFF);
res->seq->elements[n-1-i] = item; // output the last byte and
x >>= 8; // discard it
}
res->seq->used = n;
return res;
}
H_ACT_APPLY(act_base64_3, act_base64_n, 3);
H_ACT_APPLY(act_base64_2, act_base64_n, 2);
H_ACT_APPLY(act_base64_1, act_base64_n, 1);
const HParsedToken *act_base64(const HParseResult *p)
{
assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used == 2);
assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE);
HParsedToken *res = H_MAKE_SEQ();
// concatenate base64_3 blocks
HCountedArray *seq = H_FIELD_SEQ(0);
for(size_t i=0; i<seq->used; i++)
h_seq_append(res, seq->elements[i]);
// append one trailing base64_2 or _1 block
const HParsedToken *tok = h_seq_index(p->ast, 1);
if(tok->token_type == TT_SEQUENCE)
h_seq_append(res, tok);
return res;
}
///
// Set up the parser with the grammar to be recognized.
///
const HParser *init_parser(void)
{
// CORE
H_RULE (digit, h_ch_range(0x30, 0x39));
H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL));
H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6));
// AUX.
H_RULE (plus, h_ch('+'));
H_RULE (slash, h_ch('/'));
H_ARULE(equals, h_ch('='));
H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL));
H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16));
H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4));
H_ARULE(base64_3, h_repeat_n(bsfdig, 4));
H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL));
H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL));
H_ARULE(base64, h_sequence(h_many(base64_3),
h_optional(h_choice(base64_2,
base64_1, NULL)),
NULL));
H_ARULE(ws, h_many(space));
H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL));
// BUG sometimes inputs that should just don't parse.
// It *seemed* to happen mostly with things like "bbbbaaaaBA==".
// Using less actions seemed to make it less likely.
return document;
}
///
// Main routine: print input, parse, print result, return success/failure.
///
#include <stdio.h>
int main(int argc, char **argv)
{
uint8_t input[102400];
size_t inputsize;
const HParser *parser;
const HParseResult *result;
parser = init_parser();
inputsize = fread(input, 1, sizeof(input), stdin);
fprintf(stderr, "inputsize=%lu\ninput=", inputsize);
fwrite(input, 1, inputsize, stderr);
result = h_parse(parser, input, inputsize);
if(result) {
fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8);
h_pprint(stdout, result->ast, 0, 0);
return 0;
} else {
return 1;
}
}

176
examples/base64_sem2.c Normal file
View file

@ -0,0 +1,176 @@
// Example parser: Base64, with fine-grained semantic actions
//
// Demonstrates how to attach semantic actions to a grammar and transform the
// parse tree into the desired semantic representation, in this case a sequence
// of 8-bit values.
//
// Note how the grammar is defined by using the macros H_RULE and H_ARULE.
// Those rules using ARULE get an attached action which must be declared (as
// a function of type HAction) with a standard name based on the rule name.
//
// This variant of the example uses coarse-grained semantic actions,
// transforming the entire parse tree in one big step. Compare base64_sem1.c
// for an alternative approach using a fine-grained piece-by-piece
// transformation.
#include "../src/hammer.h"
#include "../src/glue.h"
#include <assert.h>
///
// Semantic actions for the grammar below, each corresponds to an "ARULE".
// They must be named act_<rulename>.
///
// helper: return the numeric value of a parsed base64 digit
uint8_t bsfdig_value(const HParsedToken *p)
{
uint8_t value = 0;
if(p && p->token_type == TT_UINT) {
uint8_t c = p->uint;
if(c >= 0x40 && c <= 0x5A) // A-Z
value = c - 0x41;
else if(c >= 0x60 && c <= 0x7A) // a-z
value = c - 0x61 + 26;
else if(c >= 0x30 && c <= 0x39) // 0-9
value = c - 0x30 + 52;
else if(c == '+')
value = 62;
else if(c == '/')
value = 63;
}
return value;
}
// helper: append a byte value to a sequence
#define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b))
const HParsedToken *act_base64(const HParseResult *p)
{
assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used == 2);
assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE);
// grab b64_3 block sequence
// grab and analyze b64 end block (_2 or _1)
const HParsedToken *b64_3 = p->ast->seq->elements[0];
const HParsedToken *b64_2 = p->ast->seq->elements[1];
const HParsedToken *b64_1 = p->ast->seq->elements[1];
if(b64_2->token_type != TT_SEQUENCE)
b64_1 = b64_2 = NULL;
else if(b64_2->seq->elements[2]->uint == '=')
b64_2 = NULL;
else
b64_1 = NULL;
// allocate result sequence
HParsedToken *res = H_MAKE_SEQ();
// concatenate base64_3 blocks
for(size_t i=0; i<b64_3->seq->used; i++) {
assert(b64_3->seq->elements[i]->token_type == TT_SEQUENCE);
HParsedToken **digits = b64_3->seq->elements[i]->seq->elements;
uint32_t x = bsfdig_value(digits[0]);
x <<= 6; x |= bsfdig_value(digits[1]);
x <<= 6; x |= bsfdig_value(digits[2]);
x <<= 6; x |= bsfdig_value(digits[3]);
seq_append_byte(res, (x >> 16) & 0xFF);
seq_append_byte(res, (x >> 8) & 0xFF);
seq_append_byte(res, x & 0xFF);
}
// append one trailing base64_2 or _1 block
if(b64_2) {
HParsedToken **digits = b64_2->seq->elements;
uint32_t x = bsfdig_value(digits[0]);
x <<= 6; x |= bsfdig_value(digits[1]);
x <<= 6; x |= bsfdig_value(digits[2]);
seq_append_byte(res, (x >> 10) & 0xFF);
seq_append_byte(res, (x >> 2) & 0xFF);
} else if(b64_1) {
HParsedToken **digits = b64_1->seq->elements;
uint32_t x = bsfdig_value(digits[0]);
x <<= 6; x |= bsfdig_value(digits[1]);
seq_append_byte(res, (x >> 4) & 0xFF);
}
return res;
}
H_ACT_APPLY(act_index0, h_act_index, 0);
#define act_ws h_act_ignore
#define act_document act_index0
///
// Set up the parser with the grammar to be recognized.
///
const HParser *init_parser(void)
{
// CORE
H_RULE (digit, h_ch_range(0x30, 0x39));
H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL));
H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6));
// AUX.
H_RULE (plus, h_ch('+'));
H_RULE (slash, h_ch('/'));
H_RULE (equals, h_ch('='));
H_RULE (bsfdig, h_choice(alpha, digit, plus, slash, NULL));
H_RULE (bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16));
H_RULE (bsfdig_2bit, h_in((uint8_t *)"AQgw", 4));
H_RULE (base64_3, h_repeat_n(bsfdig, 4));
H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL));
H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL));
H_ARULE(base64, h_sequence(h_many(base64_3),
h_optional(h_choice(base64_2,
base64_1, NULL)),
NULL));
H_ARULE(ws, h_many(space));
H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL));
// BUG sometimes inputs that should just don't parse.
// It *seemed* to happen mostly with things like "bbbbaaaaBA==".
// Using less actions seemed to make it less likely.
return document;
}
///
// Main routine: print input, parse, print result, return success/failure.
///
#include <stdio.h>
int main(int argc, char **argv)
{
uint8_t input[102400];
size_t inputsize;
const HParser *parser;
const HParseResult *result;
parser = init_parser();
inputsize = fread(input, 1, sizeof(input), stdin);
fprintf(stderr, "inputsize=%lu\ninput=", inputsize);
fwrite(input, 1, inputsize, stderr);
result = h_parse(parser, input, inputsize);
if(result) {
fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8);
h_pprint(stdout, result->ast, 0, 0);
return 0;
} else {
return 1;
}
}

View file

@ -10,7 +10,12 @@
#define false 0
#define true 1
bool is_zero(HParseResult *p) {
///
// Validations
///
bool validate_hdzero(HParseResult *p) {
if (TT_UINT != p->ast->token_type)
return false;
return (0 == p->ast->uint);
@ -20,407 +25,243 @@ bool is_zero(HParseResult *p) {
* Every DNS message should have QDCOUNT entries in the question
* section, and ANCOUNT+NSCOUNT+ARCOUNT resource records.
*/
bool validate_dns(HParseResult *p) {
bool validate_message(HParseResult *p) {
if (TT_SEQUENCE != p->ast->token_type)
return false;
// The header holds the counts as its last 4 elements.
HParsedToken **elems = p->ast->seq->elements[0]->seq->elements;
size_t qd = elems[8]->uint;
size_t an = elems[9]->uint;
size_t ns = elems[10]->uint;
size_t ar = elems[11]->uint;
HParsedToken *questions = p->ast->seq->elements[1];
if (questions->seq->used != qd)
dns_header_t *header = H_FIELD(dns_header_t, 0);
size_t qd = header->question_count;
size_t an = header->answer_count;
size_t ns = header->authority_count;
size_t ar = header->additional_count;
if (H_FIELD_SEQ(1)->used != qd)
return false;
HParsedToken *rrs = p->ast->seq->elements[2];
if (an+ns+ar != rrs->seq->used)
if (an+ns+ar != H_FIELD_SEQ(2)->used)
return false;
return true;
}
struct dns_qname get_qname(const HParsedToken *t) {
// The qname parser parses at least 1 length-value pair, then a NULL.
// So, t->seq->elements[0] is a sequence of at least 1 such pair,
// and t->seq->elements[1] is the null.
const HParsedToken *labels = t->seq->elements[0];
struct dns_qname ret = {
.qlen = labels->seq->used,
.labels = h_arena_malloc(t->seq->arena, sizeof(*ret.labels)*labels->seq->used)
};
// i is which label we're on
for (size_t i=0; i<labels->seq->used; ++i) {
ret.labels[i].len = labels->seq->elements[i]->seq->used;
ret.labels[i].label = h_arena_malloc(t->seq->arena, ret.labels[i].len + 1);
// j is which char of the label we're on
for (size_t j=0; j<ret.labels[i].len; ++j)
ret.labels[i].label[j] = labels->seq->elements[i]->seq->elements[j]->uint;
ret.labels[i].label[ret.labels[i].len] = 0;
}
return ret;
}
char* get_domain(const HParsedToken *t) {
switch(t->token_type) {
case TT_UINT:
return " ";
case TT_SEQUENCE:
{
// Sequence of subdomains separated by "."
// Each subdomain is a label, which can be no more than 63 chars.
char *ret = h_arena_malloc(t->seq->arena, 64*t->seq->used);
size_t count = 0;
for (size_t i=0; i<t->seq->used; ++i) {
HParsedToken *tmp = t->seq->elements[i];
for (size_t j=0; j<tmp->seq->used; ++j) {
ret[count] = tmp->seq->elements[i]->uint;
++count;
}
ret[count] = '.';
++count;
}
ret[count-1] = '\x00';
return ret;
}
default:
return NULL;
}
}
///
// Semantic Actions
///
uint8_t* get_cs(const HCountedArray *arr) {
uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used);
for (size_t i=0; i<arr->used; ++i)
ret[i] = arr->elements[i]->uint;
return ret;
}
uint8_t** get_txt(const HCountedArray *arr) {
uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used);
for (size_t i=0; i<arr->used; ++i) {
uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->elements[i]->seq->used);
for (size_t j=0; j<arr->elements[i]->seq->used; ++j)
tmp[j] = arr->elements[i]->seq->elements[j]->uint;
}
return ret;
}
void set_rr(struct dns_rr rr, HCountedArray *rdata) {
// Helper: Parse and pack the RDATA field of a Resource Record.
void set_rdata(struct dns_rr *rr, HCountedArray *rdata) {
uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used);
for (size_t i=0; i<rdata->used; ++i)
data[i] = rdata->elements[i]->uint;
data[i] = H_CAST_UINT(rdata->elements[i]);
// Parse RDATA if possible.
const HParseResult *p = NULL;
const HParser *parser = init_rdata(rr->type);
if (parser)
p = h_parse(parser, (const uint8_t*)data, rdata->used);
// If the RR doesn't parse, set its type to 0.
switch(rr.type) {
case 1: // A
{
const HParseResult *r = h_parse(init_a(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else
rr.a = r->ast->seq->elements[0]->uint;
break;
}
case 2: // NS
{
const HParseResult *r = h_parse(init_ns(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else
rr.ns = get_domain(r->ast->seq->elements[0]);
break;
}
case 3: // MD
{
const HParseResult *r = h_parse(init_md(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else
rr.md = get_domain(r->ast->seq->elements[0]);
break;
}
case 4: // MF
{
const HParseResult *r = h_parse(init_mf(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else
rr.md = get_domain(r->ast->seq->elements[0]);
break;
}
case 5: // CNAME
{
const HParseResult *r = h_parse(init_cname(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else
rr.cname = get_domain(r->ast->seq->elements[0]);
break;
}
case 6: // SOA
{
const HParseResult *r = h_parse(init_soa(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else {
rr.soa.mname = get_domain(r->ast->seq->elements[0]);
rr.soa.rname = get_domain(r->ast->seq->elements[1]);
rr.soa.serial = r->ast->seq->elements[2]->uint;
rr.soa.refresh = r->ast->seq->elements[3]->uint;
rr.soa.retry = r->ast->seq->elements[4]->uint;
rr.soa.expire = r->ast->seq->elements[5]->uint;
rr.soa.minimum = r->ast->seq->elements[6]->uint;
}
break;
}
case 7: // MB
{
const HParseResult *r = h_parse(init_mb(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else
rr.mb = get_domain(r->ast->seq->elements[0]);
break;
}
case 8: // MG
{
const HParseResult *r = h_parse(init_mg(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else
rr.mg = get_domain(r->ast->seq->elements[0]);
break;
}
case 9: // MR
{
const HParseResult *r = h_parse(init_mr(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else
rr.mr = get_domain(r->ast->seq->elements[0]);
break;
}
case 10: // NULL
{
const HParseResult *r = h_parse(init_null(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else {
rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->used);
for (size_t i=0; i<r->ast->seq->used; ++i)
rr.null[i] = r->ast->seq->elements[i]->uint;
}
break;
}
case 11: // WKS
{
const HParseResult *r = h_parse(init_wks(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else {
rr.wks.address = r->ast->seq->elements[0]->uint;
rr.wks.protocol = r->ast->seq->elements[1]->uint;
rr.wks.len = r->ast->seq->elements[2]->seq->used;
rr.wks.bit_map = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->elements[2]->seq->used);
for (size_t i=0; i<rr.wks.len; ++i)
rr.wks.bit_map[i] = r->ast->seq->elements[2]->seq->elements[i]->uint;
}
break;
}
case 12: // PTR
{
const HParseResult *r = h_parse(init_ptr(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else
rr.ptr = get_domain(r->ast->seq->elements[0]);
break;
}
case 13: // HINFO
{
const HParseResult *r = h_parse(init_hinfo(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else {
rr.hinfo.cpu = get_cs(r->ast->seq->elements[0]->seq);
rr.hinfo.os = get_cs(r->ast->seq->elements[1]->seq);
}
break;
}
case 14: // MINFO
{
const HParseResult *r = h_parse(init_minfo(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else {
rr.minfo.rmailbx = get_domain(r->ast->seq->elements[0]);
rr.minfo.emailbx = get_domain(r->ast->seq->elements[1]);
}
break;
}
case 15: // MX
{
const HParseResult *r = h_parse(init_mx(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else {
rr.mx.preference = r->ast->seq->elements[0]->uint;
rr.mx.exchange = get_domain(r->ast->seq->elements[1]);
}
break;
}
case 16: // TXT
{
const HParseResult *r = h_parse(init_txt(), (const uint8_t*)data, rdata->used);
if (!r)
rr.type = 0;
else {
rr.txt.count = r->ast->seq->elements[0]->seq->used;
rr.txt.txt_data = get_txt(r->ast->seq->elements[0]->seq);
}
break;
}
default:
break;
if (!p)
rr->type = 0;
// Pack the parsed rdata into rr.
switch(rr->type) {
case 1: rr->a = H_CAST_UINT(p->ast); break;
case 2: rr->ns = *H_CAST(dns_domain_t, p->ast); break;
case 3: rr->md = *H_CAST(dns_domain_t, p->ast); break;
case 4: rr->md = *H_CAST(dns_domain_t, p->ast); break;
case 5: rr->cname = *H_CAST(dns_domain_t, p->ast); break;
case 6: rr->soa = *H_CAST(dns_rr_soa_t, p->ast); break;
case 7: rr->mb = *H_CAST(dns_domain_t, p->ast); break;
case 8: rr->mg = *H_CAST(dns_domain_t, p->ast); break;
case 9: rr->mr = *H_CAST(dns_domain_t, p->ast); break;
case 10: rr->null = *H_CAST(dns_rr_null_t, p->ast); break;
case 11: rr->wks = *H_CAST(dns_rr_wks_t, p->ast); break;
case 12: rr->ptr = *H_CAST(dns_domain_t, p->ast); break;
case 13: rr->hinfo = *H_CAST(dns_rr_hinfo_t, p->ast); break;
case 14: rr->minfo = *H_CAST(dns_rr_minfo_t, p->ast); break;
case 15: rr->mx = *H_CAST(dns_rr_mx_t, p->ast); break;
case 16: rr->txt = *H_CAST(dns_rr_txt_t, p->ast); break;
default: break;
}
}
const HParsedToken* pack_dns_struct(const HParseResult *p) {
h_pprint(stdout, p->ast, 0, 2);
HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken));
ret->token_type = TT_USER;
dns_message_t *msg = h_arena_malloc(p->arena, sizeof(dns_message_t));
HParsedToken *hdr = p->ast->seq->elements[0];
struct dns_header header = {
.id = hdr->seq->elements[0]->uint,
.qr = hdr->seq->elements[1]->uint,
.opcode = hdr->seq->elements[2]->uint,
.aa = hdr->seq->elements[3]->uint,
.tc = hdr->seq->elements[4]->uint,
.rd = hdr->seq->elements[5]->uint,
.ra = hdr->seq->elements[6]->uint,
.rcode = hdr->seq->elements[7]->uint,
.question_count = hdr->seq->elements[8]->uint,
.answer_count = hdr->seq->elements[9]->uint,
.authority_count = hdr->seq->elements[10]->uint,
.additional_count = hdr->seq->elements[11]->uint
const HParsedToken* act_header(const HParseResult *p) {
HParsedToken **fields = h_seq_elements(p->ast);
dns_header_t header_ = {
.id = H_CAST_UINT(fields[0]),
.qr = H_CAST_UINT(fields[1]),
.opcode = H_CAST_UINT(fields[2]),
.aa = H_CAST_UINT(fields[3]),
.tc = H_CAST_UINT(fields[4]),
.rd = H_CAST_UINT(fields[5]),
.ra = H_CAST_UINT(fields[6]),
.rcode = H_CAST_UINT(fields[7]),
.question_count = H_CAST_UINT(fields[8]),
.answer_count = H_CAST_UINT(fields[9]),
.authority_count = H_CAST_UINT(fields[10]),
.additional_count = H_CAST_UINT(fields[11])
};
msg->header = header;
HParsedToken *qs = p->ast->seq->elements[1];
dns_header_t *header = H_ALLOC(dns_header_t);
*header = header_;
return H_MAKE(dns_header_t, header);
}
const HParsedToken* act_label(const HParseResult *p) {
dns_label_t *r = H_ALLOC(dns_label_t);
r->len = h_seq_len(p->ast);
r->label = h_arena_malloc(p->arena, r->len + 1);
for (size_t i=0; i<r->len; ++i)
r->label[i] = H_FIELD_UINT(i);
r->label[r->len] = 0;
return H_MAKE(dns_label_t, r);
}
const HParsedToken* act_rr(const HParseResult *p) {
dns_rr_t *rr = H_ALLOC(dns_rr_t);
rr->name = *H_FIELD(dns_domain_t, 0);
rr->type = H_FIELD_UINT(1);
rr->class = H_FIELD_UINT(2);
rr->ttl = H_FIELD_UINT(3);
rr->rdlength = H_FIELD_SEQ(4)->used;
// Parse and pack RDATA.
set_rdata(rr, H_FIELD_SEQ(4));
return H_MAKE(dns_rr_t, rr);
}
const HParsedToken* act_question(const HParseResult *p) {
dns_question_t *q = H_ALLOC(dns_question_t);
HParsedToken **fields = h_seq_elements(p->ast);
// QNAME is a sequence of labels. Pack them into an array.
q->qname.qlen = h_seq_len(fields[0]);
q->qname.labels = h_arena_malloc(p->arena, sizeof(dns_label_t)*q->qname.qlen);
for(size_t i=0; i<q->qname.qlen; i++) {
q->qname.labels[i] = *H_INDEX(dns_label_t, fields[0], i);
}
q->qtype = H_CAST_UINT(fields[1]);
q->qclass = H_CAST_UINT(fields[2]);
return H_MAKE(dns_question_t, q);
}
const HParsedToken* act_message(const HParseResult *p) {
h_pprint(stdout, p->ast, 0, 2);
dns_message_t *msg = H_ALLOC(dns_message_t);
// Copy header into message struct.
dns_header_t *header = H_FIELD(dns_header_t, 0);
msg->header = *header;
// Copy questions into message struct.
HParsedToken *qs = h_seq_index(p->ast, 1);
struct dns_question *questions = h_arena_malloc(p->arena,
sizeof(struct dns_question)*(header.question_count));
for (size_t i=0; i<header.question_count; ++i) {
// QNAME is a sequence of labels. In the parser, it's defined as
// sequence(many1(length_value(...)), ch('\x00'), NULL).
questions[i].qname = get_qname(qs->seq->elements[i]->seq->elements[0]);
questions[i].qtype = qs->seq->elements[i]->seq->elements[1]->uint;
questions[i].qclass = qs->seq->elements[i]->seq->elements[2]->uint;
sizeof(struct dns_question)*(header->question_count));
for (size_t i=0; i<header->question_count; ++i) {
questions[i] = *H_INDEX(dns_question_t, qs, i);
}
msg->questions = questions;
HParsedToken *rrs = p->ast->seq->elements[2];
// Copy answer RRs into message struct.
HParsedToken *rrs = h_seq_index(p->ast, 2);
struct dns_rr *answers = h_arena_malloc(p->arena,
sizeof(struct dns_rr)*(header.answer_count));
for (size_t i=0; i<header.answer_count; ++i) {
answers[i].name = get_domain(rrs[i].seq->elements[0]);
answers[i].type = rrs[i].seq->elements[1]->uint;
answers[i].class = rrs[i].seq->elements[2]->uint;
answers[i].ttl = rrs[i].seq->elements[3]->uint;
answers[i].rdlength = rrs[i].seq->elements[4]->seq->used;
set_rr(answers[i], rrs[i].seq->elements[4]->seq);
sizeof(struct dns_rr)*(header->answer_count));
for (size_t i=0; i<header->answer_count; ++i) {
answers[i] = *H_INDEX(dns_rr_t, rrs, i);
}
msg->answers = answers;
// Copy authority RRs into message struct.
struct dns_rr *authority = h_arena_malloc(p->arena,
sizeof(struct dns_rr)*(header.authority_count));
for (size_t i=0, j=header.answer_count; i<header.authority_count; ++i, ++j) {
authority[i].name = get_domain(rrs[j].seq->elements[0]);
authority[i].type = rrs[j].seq->elements[1]->uint;
authority[i].class = rrs[j].seq->elements[2]->uint;
authority[i].ttl = rrs[j].seq->elements[3]->uint;
authority[i].rdlength = rrs[j].seq->elements[4]->seq->used;
set_rr(authority[i], rrs[j].seq->elements[4]->seq);
sizeof(struct dns_rr)*(header->authority_count));
for (size_t i=0, j=header->answer_count; i<header->authority_count; ++i, ++j) {
authority[i] = *H_INDEX(dns_rr_t, rrs, j);
}
msg->authority = authority;
// Copy additional RRs into message struct.
struct dns_rr *additional = h_arena_malloc(p->arena,
sizeof(struct dns_rr)*(header.additional_count));
for (size_t i=0, j=header.answer_count+header.authority_count; i<header.additional_count; ++i, ++j) {
additional[i].name = get_domain(rrs[j].seq->elements[0]);
additional[i].type = rrs[j].seq->elements[1]->uint;
additional[i].class = rrs[j].seq->elements[2]->uint;
additional[i].ttl = rrs[j].seq->elements[3]->uint;
additional[i].rdlength = rrs[j].seq->elements[4]->seq->used;
set_rr(additional[i], rrs[j].seq->elements[4]->seq);
sizeof(struct dns_rr)*(header->additional_count));
for (size_t i=0, j=header->answer_count+header->authority_count; i<header->additional_count; ++i, ++j) {
additional[i] = *H_INDEX(dns_rr_t, rrs, j);
}
msg->additional = additional;
ret->user = (void*)msg;
return H_MAKE(dns_message_t, msg);
}
#define act_hdzero h_act_ignore
#define act_qname act_index0
///
// Grammar
///
const HParser* init_parser() {
static const HParser *ret = NULL;
if (ret)
return ret;
H_RULE (domain, init_domain());
H_AVRULE(hdzero, h_bits(3, false));
H_ARULE (header, h_sequence(h_bits(16, false), // ID
h_bits(1, false), // QR
h_bits(4, false), // opcode
h_bits(1, false), // AA
h_bits(1, false), // TC
h_bits(1, false), // RD
h_bits(1, false), // RA
hdzero, // Z
h_bits(4, false), // RCODE
h_uint16(), // QDCOUNT
h_uint16(), // ANCOUNT
h_uint16(), // NSCOUNT
h_uint16(), // ARCOUNT
NULL));
H_RULE (type, h_int_range(h_uint16(), 1, 16));
H_RULE (qtype, h_choice(type,
h_int_range(h_uint16(), 252, 255),
NULL));
H_RULE (class, h_int_range(h_uint16(), 1, 4));
H_RULE (qclass, h_choice(class,
h_int_range(h_uint16(), 255, 255),
NULL));
H_RULE (len, h_int_range(h_uint8(), 1, 255));
H_ARULE (label, h_length_value(len, h_uint8()));
H_ARULE (qname, h_sequence(h_many1(label),
h_ch('\x00'),
NULL));
H_ARULE (question, h_sequence(qname, qtype, qclass, NULL));
H_RULE (rdata, h_length_value(h_uint16(), h_uint8()));
H_ARULE (rr, h_sequence(domain, // NAME
type, // TYPE
class, // CLASS
h_uint32(), // TTL
rdata, // RDLENGTH+RDATA
NULL));
H_AVRULE(message, h_sequence(header,
h_many(question),
h_many(rr),
h_end_p(),
NULL));
ret = message;
return ret;
}
const HParser* init_parser() {
static HParser *dns_message = NULL;
if (dns_message)
return dns_message;
const HParser *domain = init_domain();
const HParser *dns_header = h_sequence(h_bits(16, false), // ID
h_bits(1, false), // QR
h_bits(4, false), // opcode
h_bits(1, false), // AA
h_bits(1, false), // TC
h_bits(1, false), // RD
h_bits(1, false), // RA
h_ignore(h_attr_bool(h_bits(3, false), is_zero)), // Z
h_bits(4, false), // RCODE
h_uint16(), // QDCOUNT
h_uint16(), // ANCOUNT
h_uint16(), // NSCOUNT
h_uint16(), // ARCOUNT
NULL);
const HParser *type = h_int_range(h_uint16(), 1, 16);
const HParser *qtype = h_choice(type,
h_int_range(h_uint16(), 252, 255),
NULL);
const HParser *class = h_int_range(h_uint16(), 1, 4);
const HParser *qclass = h_choice(class,
h_int_range(h_uint16(), 255, 255),
NULL);
const HParser *dns_question = h_sequence(h_sequence(h_many1(h_length_value(h_int_range(h_uint8(), 1, 255),
h_uint8())),
h_ch('\x00'),
NULL), // QNAME
qtype, // QTYPE
qclass, // QCLASS
NULL);
const HParser *dns_rr = h_sequence(domain, // NAME
type, // TYPE
class, // CLASS
h_uint32(), // TTL
h_length_value(h_uint16(), h_uint8()), // RDLENGTH+RDATA
NULL);
dns_message = (HParser*)h_action(h_attr_bool(h_sequence(dns_header,
h_many(dns_question),
h_many(dns_rr),
h_end_p(),
NULL),
validate_dns),
pack_dns_struct);
return dns_message;
}
///
// Main Program for a Dummy DNS Server
///
int start_listening() {
// return: fd
@ -442,7 +283,7 @@ int start_listening() {
const int TYPE_MAX = 16;
typedef const char* cstr;
const char* TYPE_STR[17] = {
static const char* TYPE_STR[17] = {
"nil", "A", "NS", "MD",
"MF", "CNAME", "SOA", "MB",
"MG", "MR", "NULL", "WKS",

View file

@ -1,6 +1,27 @@
#include "../src/hammer.h"
struct dns_header {
enum DNSTokenType_ {
TT_dns_message_t = TT_USER,
TT_dns_header_t,
TT_dns_label_t,
TT_dns_qname_t,
TT_dns_question_t,
TT_dns_rr_t,
TT_dns_rr_txt_t,
TT_dns_rr_hinfo_t,
TT_dns_rr_minfo_t,
TT_dns_rr_mx_t,
TT_dns_rr_soa_t,
TT_dns_rr_wks_t,
TT_dns_rr_null_t,
TT_dns_domain_t,
TT_dns_cstr_t
};
typedef char *dns_domain_t;
typedef uint8_t *dns_cstr_t;
typedef struct dns_header {
uint16_t id;
bool qr, aa, tc, rd, ra;
char opcode, rcode;
@ -8,74 +29,93 @@ struct dns_header {
size_t answer_count;
size_t authority_count;
size_t additional_count;
};
struct dns_qname {
} dns_header_t;
typedef struct dns_label {
size_t len;
uint8_t *label;
} dns_label_t;
typedef struct dns_qname {
size_t qlen;
struct {
size_t len;
uint8_t *label;
} *labels;
};
struct dns_question {
struct dns_qname qname;
dns_label_t *labels;
} dns_qname_t;
typedef struct dns_question {
dns_qname_t qname;
uint16_t qtype;
uint16_t qclass;
};
struct dns_rr {
} dns_question_t;
typedef struct {
dns_cstr_t cpu;
dns_cstr_t os;
} dns_rr_hinfo_t;
typedef struct {
char* rmailbx;
char* emailbx;
} dns_rr_minfo_t;
typedef struct {
uint16_t preference;
char* exchange;
} dns_rr_mx_t;
typedef struct {
char* mname;
char* rname;
uint32_t serial;
uint32_t refresh;
uint32_t retry;
uint32_t expire;
uint32_t minimum;
} dns_rr_soa_t;
typedef struct {
size_t count;
uint8_t** txt_data;
} dns_rr_txt_t;
typedef struct {
uint32_t address;
uint8_t protocol;
size_t len;
uint8_t* bit_map;
} dns_rr_wks_t;
typedef uint8_t *dns_rr_null_t;
typedef struct dns_rr {
char* name;
uint16_t type;
uint16_t class;
uint32_t ttl; // cmos is also acceptable.
uint16_t rdlength;
union {
char* cname;
struct {
uint8_t* cpu;
uint8_t* os;
} hinfo;
char* mb;
char* md;
char* mf;
char* mg;
struct {
char* rmailbx;
char* emailbx;
} minfo;
char* mr;
struct {
uint16_t preference;
char* exchange;
} mx;
uint8_t* null;
char* ns;
char* ptr;
struct {
char* mname;
char* rname;
uint32_t serial;
uint32_t refresh;
uint32_t retry;
uint32_t expire;
uint32_t minimum;
} soa;
struct {
size_t count;
uint8_t** txt_data;
} txt;
uint32_t a;
struct {
uint32_t address;
uint8_t protocol;
size_t len;
uint8_t* bit_map;
} wks;
uint32_t a;
char* ns;
char* md;
char* mf;
char* cname;
dns_rr_soa_t soa;
char* mb;
char* mg;
char* mr;
dns_rr_null_t null;
dns_rr_wks_t wks;
char* ptr;
dns_rr_hinfo_t hinfo;
dns_rr_minfo_t minfo;
dns_rr_mx_t mx;
dns_rr_txt_t txt;
};
};
} dns_rr_t;
typedef struct dns_message {
struct dns_header header;
struct dns_question *questions;
struct dns_rr *answers;
struct dns_rr *authority;
struct dns_rr *additional;
dns_header_t header;
dns_question_t *questions;
dns_rr_t *answers;
dns_rr_t *authority;
dns_rr_t *additional;
} dns_message_t;

View file

@ -1,9 +1,12 @@
#include "../src/hammer.h"
#include "dns_common.h"
#include "dns.h"
#define false 0
#define true 1
H_ACT_APPLY(act_index0, h_act_index, 0)
/**
* A label can't be more than 63 characters.
*/
@ -13,51 +16,64 @@ bool validate_label(HParseResult *p) {
return (64 > p->ast->seq->used);
}
#define act_label h_act_flatten
const HParsedToken* act_domain(const HParseResult *p) {
const HParsedToken *ret = NULL;
char *arr = NULL;
switch(p->ast->token_type) {
case TT_UINT:
arr = " ";
break;
case TT_SEQUENCE:
// Sequence of subdomains separated by "."
// Each subdomain is a label, which can be no more than 63 chars.
arr = h_arena_malloc(p->arena, 64*p->ast->seq->used);
size_t count = 0;
for (size_t i=0; i<p->ast->seq->used; ++i) {
HParsedToken *tmp = p->ast->seq->elements[i];
for (size_t j=0; j<tmp->seq->used; ++j) {
arr[count] = tmp->seq->elements[i]->uint;
++count;
}
arr[count] = '.';
++count;
}
arr[count-1] = '\x00';
break;
default:
arr = NULL;
ret = NULL;
}
if(arr) {
dns_domain_t *val = H_ALLOC(dns_domain_t); // dns_domain_t is char*
*val = arr;
ret = H_MAKE(dns_domain_t, val);
}
return ret;
}
const HParser* init_domain() {
static const HParser *domain = NULL;
if (domain)
return domain;
static const HParser *ret = NULL;
if (ret)
return ret;
const HParser *letter = h_choice(h_ch_range('a', 'z'),
h_ch_range('A', 'Z'),
NULL);
H_RULE (letter, h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL));
H_RULE (let_dig, h_choice(letter, h_ch_range('0','9'), NULL));
H_RULE (ldh_str, h_many1(h_choice(let_dig, h_ch('-'), NULL)));
H_VARULE(label, h_sequence(letter,
h_optional(h_sequence(h_optional(ldh_str),
let_dig,
NULL)),
NULL));
H_RULE (subdomain, h_sepBy1(label, h_ch('.')));
H_ARULE (domain, h_choice(subdomain, h_ch(' '), NULL));
const HParser *let_dig = h_choice(letter,
h_ch_range('0', '9'),
NULL);
const HParser *ldh_str = h_many1(h_choice(let_dig,
h_ch('-'),
NULL));
const HParser *label = h_attr_bool(h_sequence(letter,
h_optional(h_sequence(h_optional(ldh_str),
let_dig,
NULL)),
NULL),
validate_label);
/**
* You could write it like this ...
* HParser *indirect_subdomain = h_indirect();
* const HParser *subdomain = h_choice(label,
* h_sequence(indirect_subdomain,
* h_ch('.'),
* label,
* NULL),
* NULL);
* h_bind_indirect(indirect_subdomain, subdomain);
*
* ... but this is easier and equivalent
*/
const HParser *subdomain = h_sepBy1(label, h_ch('.'));
domain = h_choice(subdomain,
h_ch(' '),
NULL);
return domain;
ret = domain;
return ret;
}
const HParser* init_character_string() {

View file

@ -2,8 +2,11 @@
#define HAMMER_DNS_COMMON__H
#include "../src/hammer.h"
#include "../src/glue.h"
const HParser* init_domain();
const HParser* init_character_string();
const HParsedToken* act_index0(const HParseResult *p);
#endif

View file

@ -1,124 +1,15 @@
#include "../src/hammer.h"
#include "dns_common.h"
#include "dns.h"
#include "rr.h"
#define false 0
#define true 1
const HParser* init_cname() {
static const HParser *cname = NULL;
if (cname)
return cname;
cname = h_sequence(init_domain(),
h_end_p(),
NULL);
return cname;
}
const HParser* init_hinfo() {
static const HParser *hinfo = NULL;
if (hinfo)
return hinfo;
const HParser* cstr = init_character_string();
hinfo = h_sequence(cstr,
cstr,
h_end_p(),
NULL);
return hinfo;
}
const HParser* init_mb() {
static const HParser *mb = NULL;
if (mb)
return mb;
mb = h_sequence(init_domain(),
h_end_p(),
NULL);
return mb;
}
const HParser* init_md() {
static const HParser *md = NULL;
if (md)
return md;
md = h_sequence(init_domain(),
h_end_p,
NULL);
return md;
}
const HParser* init_mf() {
static const HParser *mf = NULL;
if (mf)
return mf;
mf = h_sequence(init_domain(),
h_end_p(),
NULL);
return mf;
}
const HParser* init_mg() {
static const HParser *mg = NULL;
if (mg)
return mg;
mg = h_sequence(init_domain(),
h_end_p(),
NULL);
return mg;
}
const HParser* init_minfo() {
static const HParser *minfo = NULL;
if (minfo)
return minfo;
const HParser* domain = init_domain();
minfo = h_sequence(domain,
domain,
h_end_p(),
NULL);
return minfo;
}
const HParser* init_mr() {
static const HParser *mr = NULL;
if (mr)
return mr;
mr = h_sequence(init_domain(),
h_end_p(),
NULL);
return mr;
}
const HParser* init_mx() {
static const HParser *mx = NULL;
if (mx)
return mx;
mx = h_sequence(h_uint16(),
init_domain(),
h_end_p(),
NULL);
return mx;
}
///
// Validations and Semantic Actions
///
bool validate_null(HParseResult *p) {
if (TT_SEQUENCE != p->ast->token_type)
@ -126,94 +17,177 @@ bool validate_null(HParseResult *p) {
return (65536 > p->ast->seq->used);
}
const HParser* init_null() {
static const HParser *null_ = NULL;
if (null_)
return null_;
const HParsedToken *act_null(const HParseResult *p) {
dns_rr_null_t *null = H_ALLOC(dns_rr_null_t);
null_ = h_attr_bool(h_many(h_uint8()), validate_null);
size_t len = h_seq_len(p->ast);
uint8_t *buf = h_arena_malloc(p->arena, sizeof(uint8_t)*len);
for (size_t i=0; i<len; ++i)
buf[i] = H_FIELD_UINT(i);
return null_;
return H_MAKE(dns_rr_null_t, null);
}
const HParser* init_ns() {
static const HParser *ns = NULL;
if (ns)
return ns;
const HParsedToken *act_txt(const HParseResult *p) {
dns_rr_txt_t *txt = H_ALLOC(dns_rr_txt_t);
ns = h_sequence(init_domain(),
h_end_p(),
NULL);
const HCountedArray *arr = H_CAST_SEQ(p->ast);
uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used);
for (size_t i=0; i<arr->used; ++i) {
size_t len = h_seq_len(arr->elements[i]);
uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*len);
for (size_t j=0; j<len; ++j)
tmp[j] = H_INDEX_UINT(arr->elements[i], j);
ret[i] = tmp;
}
return ns;
txt->count = arr->used;
txt->txt_data = ret;
return H_MAKE(dns_rr_txt_t, txt);
}
const HParser* init_ptr() {
static const HParser *ptr = NULL;
if (ptr)
return ptr;
const HParsedToken* act_cstr(const HParseResult *p) {
dns_cstr_t *cs = H_ALLOC(dns_cstr_t);
const HCountedArray *arr = H_CAST_SEQ(p->ast);
uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used);
for (size_t i=0; i<arr->used; ++i)
ret[i] = H_CAST_UINT(arr->elements[i]);
assert(ret[arr->used-1] == '\0'); // XXX Is this right?! If so, shouldn't it be a validation?
*cs = ret;
return H_MAKE(dns_cstr_t, cs);
}
const HParsedToken* act_soa(const HParseResult *p) {
dns_rr_soa_t *soa = H_ALLOC(dns_rr_soa_t);
soa->mname = *H_FIELD(dns_domain_t, 0);
soa->rname = *H_FIELD(dns_domain_t, 1);
soa->serial = H_FIELD_UINT(2);
soa->refresh = H_FIELD_UINT(3);
soa->retry = H_FIELD_UINT(4);
soa->expire = H_FIELD_UINT(5);
soa->minimum = H_FIELD_UINT(6);
return H_MAKE(dns_rr_soa_t, soa);
}
const HParsedToken* act_wks(const HParseResult *p) {
dns_rr_wks_t *wks = H_ALLOC(dns_rr_wks_t);
wks->address = H_FIELD_UINT(0);
wks->protocol = H_FIELD_UINT(1);
wks->len = H_FIELD_SEQ(2)->used;
wks->bit_map = h_arena_malloc(p->arena, sizeof(uint8_t)*wks->len);
for (size_t i=0; i<wks->len; ++i)
wks->bit_map[i] = H_INDEX_UINT(p->ast, 2, i);
return H_MAKE(dns_rr_wks_t, wks);
}
const HParsedToken* act_hinfo(const HParseResult *p) {
dns_rr_hinfo_t *hinfo = H_ALLOC(dns_rr_hinfo_t);
hinfo->cpu = *H_FIELD(dns_cstr_t, 0);
hinfo->os = *H_FIELD(dns_cstr_t, 1);
return H_MAKE(dns_rr_hinfo_t, hinfo);
}
const HParsedToken* act_minfo(const HParseResult *p) {
dns_rr_minfo_t *minfo = H_ALLOC(dns_rr_minfo_t);
minfo->rmailbx = *H_FIELD(dns_domain_t, 0);
minfo->emailbx = *H_FIELD(dns_domain_t, 1);
return H_MAKE(dns_rr_minfo_t, minfo);
}
const HParsedToken* act_mx(const HParseResult *p) {
dns_rr_mx_t *mx = H_ALLOC(dns_rr_mx_t);
mx->preference = H_FIELD_UINT(0);
mx->exchange = *H_FIELD(dns_domain_t, 1);
return H_MAKE(dns_rr_mx_t, mx);
}
///
// Parsers for all types of RDATA
///
#define RDATA_TYPE_MAX 16
const HParser* init_rdata(uint16_t type) {
static const HParser *parsers[RDATA_TYPE_MAX+1];
static int inited = 0;
if (type >= sizeof(parsers))
return NULL;
ptr = h_sequence(init_domain(),
h_end_p(),
NULL);
if (inited)
return parsers[type];
return ptr;
}
const HParser* init_soa() {
static const HParser *soa = NULL;
if (soa)
return soa;
const HParser *domain = init_domain();
soa = h_sequence(domain, // MNAME
domain, // RNAME
h_uint32(), // SERIAL
h_uint32(), // REFRESH
h_uint32(), // RETRY
h_uint32(), // EXPIRE
h_uint32(), // MINIMUM
h_end_p(),
NULL);
return soa;
}
const HParser* init_txt() {
static const HParser *txt = NULL;
if (txt)
return txt;
txt = h_sequence(h_many1(init_character_string()),
h_end_p(),
NULL);
return txt;
}
const HParser* init_a() {
static const HParser *a = NULL;
if (a)
return a;
a = h_sequence(h_uint32(),
h_end_p(),
NULL);
return a;
}
const HParser* init_wks() {
static const HParser *wks = NULL;
if (wks)
return wks;
wks = h_sequence(h_uint32(),
h_uint8(),
h_many(h_uint8()),
h_end_p(),
NULL);
return wks;
H_RULE (domain, init_domain());
H_ARULE(cstr, init_character_string());
H_RULE (a, h_uint32());
H_RULE (ns, domain);
H_RULE (md, domain);
H_RULE (mf, domain);
H_RULE (cname, domain);
H_ARULE(soa, h_sequence(domain, // MNAME
domain, // RNAME
h_uint32(), // SERIAL
h_uint32(), // REFRESH
h_uint32(), // RETRY
h_uint32(), // EXPIRE
h_uint32(), // MINIMUM
NULL));
H_RULE (mb, domain);
H_RULE (mg, domain);
H_RULE (mr, domain);
H_VRULE(null, h_many(h_uint8()));
H_RULE (wks, h_sequence(h_uint32(),
h_uint8(),
h_many(h_uint8()),
NULL));
H_RULE (ptr, domain);
H_RULE (hinfo, h_sequence(cstr, cstr, NULL));
H_RULE (minfo, h_sequence(domain, domain, NULL));
H_RULE (mx, h_sequence(h_uint16(), domain, NULL));
H_ARULE(txt, h_many1(cstr));
parsers[ 0] = NULL; // there is no type 0
parsers[ 1] = a;
parsers[ 2] = ns;
parsers[ 3] = md;
parsers[ 4] = mf;
parsers[ 5] = cname;
parsers[ 6] = soa;
parsers[ 7] = mb;
parsers[ 8] = mg;
parsers[ 9] = mr;
parsers[10] = null;
parsers[11] = wks;
parsers[12] = ptr;
parsers[13] = hinfo;
parsers[14] = minfo;
parsers[15] = mx;
parsers[16] = txt;
// All parsers must consume their input exactly.
for(uint16_t i; i<sizeof(parsers); i++) {
if(parsers[i]) {
parsers[i] = h_action(h_sequence(parsers[i], h_end_p(), NULL),
act_index0);
}
}
inited = 1;
return parsers[type];
}

View file

@ -3,21 +3,6 @@
#include "../src/hammer.h"
const HParser* init_cname();
const HParser* init_hinfo();
const HParser* init_mb();
const HParser* init_md();
const HParser* init_mf();
const HParser* init_mg();
const HParser* init_minfo();
const HParser* init_mr();
const HParser* init_mx();
const HParser* init_null();
const HParser* init_ns();
const HParser* init_ptr();
const HParser* init_soa();
const HParser* init_txt();
const HParser* init_a();
const HParser* init_wks();
const HParser* init_rdata(uint16_t type);
#endif

92
jni/Example.java Normal file
View file

@ -0,0 +1,92 @@
import com.upstandinghackers.hammer.*;
import java.util.Arrays;
/**
* Example JHammer usage
*/
public class Example
{
static {
System.loadLibrary("jhammer");
}
private static void handle(ParseResult result)
{
if(result == null)
{
System.out.println("FAIL");
}
else
{
System.out.println("PASS");
handleToken(result.getAst());
}
}
private static void handleToken(ParsedToken p)
{
if(p==null)
{
System.out.println("Empty AST");
return;
}
switch(p.getTokenType())
{
case NONE: out("NONE token type"); break;
case BYTES: out("BYTES token type, value: " + Arrays.toString(p.getBytesValue())); break;
case SINT: out("SINT token type, value: " + p.getSIntValue()); break;
case UINT: out("UINT token type, value: " + p.getUIntValue()); break;
case SEQUENCE: out("SEQUENCE token type"); for(ParsedToken tok : p.getSeqValue()) {handleToken(tok);} break;
case ERR: out("ERR token type"); break;
case USER: out("USER token type"); break;
}
}
private static void out(String msg)
{
System.out.println(">> " + msg);
}
public static void main(String args[])
{
out("chRange");
handle(Hammer.parse(Hammer.chRange((byte)0x30, (byte)0x39), "1".getBytes(), 1));
handle(Hammer.parse(Hammer.chRange((byte)0x30, (byte)0x39), "a".getBytes(), 1));
out("ch");
handle(Hammer.parse(Hammer.ch((byte)0x31), "1".getBytes(), 1));
handle(Hammer.parse(Hammer.ch((byte)0x31), "0".getBytes(), 1));
out("token");
handle(Hammer.parse(Hammer.token("herp".getBytes(), 4), "herp".getBytes(), 4));
handle(Hammer.parse(Hammer.token("herp".getBytes(), 4), "derp".getBytes(), 4));
out("intRange");
byte inbytes[] = {0x31, 0x31, 0x31, 0x31};
handle(Hammer.parse(Hammer.intRange(Hammer.uInt8(), 0L, 0x32), inbytes, inbytes.length));
handle(Hammer.parse(Hammer.intRange(Hammer.uInt8(), 0L, 0x30), inbytes, inbytes.length));
out("bits");
handle(Hammer.parse(Hammer.bits(7, false), inbytes, inbytes.length));
out("int64");
byte ints[] = {(byte)0x8F, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF};
handle(Hammer.parse(Hammer.int64(), ints, ints.length));
handle(Hammer.parse(Hammer.int64(), inbytes, inbytes.length));
out("choice");
Parser two32s[] = {Hammer.intRange(Hammer.uInt32(), 0x00, 0x01), Hammer.int32()};
handle(Hammer.parse(Hammer.choice(Hammer.intRange(Hammer.uInt32(), 0x00, 0x01), Hammer.int32()), ints, ints.length));
out("sequence");
byte i3[] = {(byte)'i', (byte)3, (byte)0xFF};
Parser i3parsers[] = {Hammer.ch((byte)'i'), Hammer.uInt8(), Hammer.int8()};
handle(Hammer.parse(Hammer.sequence(Hammer.ch((byte)'i'), Hammer.uInt8(), Hammer.int8()), i3, i3.length));
}
}

42
jni/Makefile Normal file
View file

@ -0,0 +1,42 @@
JSOURCES := Action.java Hammer.java ParsedToken.java ParseResult.java Parser.java Predicate.java
JSOURCES_NATIVE := Hammer ParsedToken Parser ParseResult
CSOURCES := com_upstandinghackers_hammer_Hammer.c com_upstandinghackers_hammer_ParsedToken.c com_upstandinghackers_hammer_Parser.c com_upstandinghackers_hammer_ParseResult.c
# ls *.h *.o *.so com/upstandinghackers/hammer/*.class | grep -v jhammer.h | tr '\n' ' '; replace single $ with $$
OUTPUTS := com/upstandinghackers/hammer/Action.class com/upstandinghackers/hammer/Hammer.class com_upstandinghackers_hammer_Hammer.h com_upstandinghackers_hammer_Hammer.o com/upstandinghackers/hammer/Hammer\$TokenType.class com_upstandinghackers_hammer_Hammer_TokenType.h com/upstandinghackers/hammer/ParsedToken.class com_upstandinghackers_hammer_ParsedToken.h com_upstandinghackers_hammer_ParsedToken.o com/upstandinghackers/hammer/Parser.class com/upstandinghackers/hammer/ParseResult.class com_upstandinghackers_hammer_ParseResult.h com_upstandinghackers_hammer_ParseResult.o com_upstandinghackers_hammer_Parser.h com_upstandinghackers_hammer_Parser.o com/upstandinghackers/hammer/Predicate.class libjhammer.so
TOPLEVEL := ../
JC=javac
JH=javah
CP=com/upstandinghackers/hammer
PACKAGE=com.upstandinghackers.hammer
include ../common.mk
JNI_INCLUDE := /usr/lib/jvm/java-6-openjdk/include/
CFLAGS += -fPIC -I. -I $(TOPLEVEL)/src/ -I jni -I $(JNI_INCLUDE)
%.java: $(call ifsilent,| $(HUSH))
$(call hush, "Compiling Java source $@") $(JC) $(CP)/$@
all: javacc prepare compile link
link: compile
$(call hush, "Generating libjhammer.so") $(CC) -shared $(CFLAGS) -o libjhammer.so *.o ../src/*.o ../src/backends/*.o ../src/parsers/*.o
$(CSOURCES): prepare
$(call hush, "Compiling $@") $(CC) -c $(CFLAGS) $@
compile: prepare $(CSOURCES)
prepare: javacc $(JSOURCES_NATIVE)
$(JSOURCES_NATIVE): javacc
$(call hush, "Generating JNI headers for $@") $(JH) $(PACKAGE).$@
javacc: $(JSOURCES)
#TODO make this not-as-hardcoded
#clean:
# rm $(CP)/*.class && rm com_upstandinghackers_*.h && rm com_upstandinghackers_*.o && rm libjhammer.so

26
jni/NOTES Normal file
View file

@ -0,0 +1,26 @@
USING THE JNI BINDINGS:
1. import com.upstandinghackers.hammer.*;
2. Add a static initializer block that loads the correct library, like this: static { System.loadLibrary("jhammer"); }
3. Code stuff. Just look at Example.java for a few handy snippets (for walking the resulting syntax tree [AST] etc)
4. Compile your java sources like always
5. Add the folder containing libhammer.so/dll/whatever to Java's library path to run it, for example: java -Djava.library.path=. <CLASS>
UNIMPLEMENTED:
User-defined types, predicates and actions are unimplemented.
Memory leaks because there is no reliable garbage collection.
TODO:
Testing
TYPE MAPPING:
Hammer Java JNI
uint8_t byte jbyte jbyte/byte is signed
char byte jbyte jchar would be 16 bit wide
size_t int jint signed as well; jsize == jint, actually
int64_t long jlong
uint64_t long jlong signed!
bool boolean jboolean JNI_TRUE / JNI_FALSE
float float jfloat
double double jdouble
void void void

View file

@ -0,0 +1,8 @@
package com.upstandinghackers.hammer;
import java.util.List;
public interface Action
{
public List<ParsedToken> execute(ParseResult p);
}

View file

@ -0,0 +1,76 @@
package com.upstandinghackers.hammer;
import java.util.HashMap;
public class Hammer
{
public final static byte BYTE_BIG_ENDIAN = 0x1;
public final static byte BIT_BIG_ENDIAN = 0x2;
public final static byte BYTE_LITTLE_ENDIAN = 0x0;
public final static byte BIT_LITTLE_ENDIAN = 0x0;
static final HashMap<Integer, TokenType> tokenTypeMap = new HashMap<Integer, TokenType>();
public enum TokenType
{
NONE(1),
BYTES(2),
SINT(4),
UINT(8),
SEQUENCE(16),
ERR(32),
USER(64);
private int value;
public int getValue() { return this.value; }
private TokenType(int value) { this.value = value; }
}
static
{
for(TokenType tt : TokenType.values())
{
Hammer.tokenTypeMap.put(new Integer(tt.getValue()), tt);
}
}
public static native ParseResult parse(Parser parser, byte[] input, int length);
public static native Parser token(byte[] str, int length);
public static native Parser ch(byte c);
public static native Parser chRange(byte from, byte to);
public static native Parser intRange(Parser p, long lower, long upper);
public static native Parser bits(int len, boolean sign);
public static native Parser int64();
public static native Parser int32();
public static native Parser int16();
public static native Parser int8();
public static native Parser uInt64();
public static native Parser uInt32();
public static native Parser uInt16();
public static native Parser uInt8();
public static native Parser whitespace(Parser p);
public static native Parser left(Parser p, Parser q);
public static native Parser right(Parser p, Parser q);
public static native Parser middle(Parser p, Parser x, Parser q);
// public static native Parser action(Parser p, Action a);
public static native Parser in(byte[] charset, int length);
public static native Parser endP();
public static native Parser nothingP();
public static native Parser sequence(Parser... parsers);
public static native Parser choice(Parser... parsers);
public static native Parser butNot(Parser p1, Parser p2);
public static native Parser difference(Parser p1, Parser p2);
public static native Parser xor(Parser p1, Parser p2);
public static native Parser many(Parser p);
public static native Parser many1(Parser p);
public static native Parser repeatN(Parser p, int n);
public static native Parser optional(Parser p);
public static native Parser ignore(Parser p);
public static native Parser sepBy(Parser p, Parser sep);
public static native Parser sepBy1(Parser p, Parser sep);
public static native Parser epsilonP();
public static native Parser lengthValue(Parser length, Parser value);
// public static native Parser attrBool(Parser p, Predicate pred);
public static native Parser and(Parser p);
public static native Parser not(Parser p);
public static native Parser indirect();
}

View file

@ -0,0 +1,15 @@
package com.upstandinghackers.hammer;
import java.util.List;
public class ParseResult
{
public native ParsedToken getAst();
public native long getBitLength();
public native void free();
public long getInner() {return this.inner;}
private long inner;
ParseResult(long inner) {this.inner=inner;}
}

View file

@ -0,0 +1,40 @@
package com.upstandinghackers.hammer;
public class ParsedToken
{
public Hammer.TokenType getTokenType()
{
int tt = this.getTokenTypeInternal();
if(0==tt)
return null;
return Hammer.tokenTypeMap.get(new Integer(tt));
}
private native int getTokenTypeInternal();
public native int getIndex();
public native byte getBitOffset();
public native byte[] getBytesValue();
public native long getSIntValue();
public native long getUIntValue();
public native double getDoubleValue();
public native float getFloatValue();
public native ParsedToken[] getSeqValue();
// public native Object getUserValue();
native void setTokenType(Hammer.TokenType type);
native void setIndex(int index);
native void setBitOffset(byte offset);
native void setBytesValue(byte[] value);
native void setSIntValue(long value);
native void setUIntValue(long value);
native void setDoubleValue(double value);
native void setFloatValue(float value);
native void setSeqValue(ParsedToken value[]);
// native void setUserValue(Object value);
// public native void free();
public long getInner() {return this.inner;}
private long inner;
ParsedToken(long inner) {this.inner=inner;}
}

View file

@ -0,0 +1,11 @@
package com.upstandinghackers.hammer;
public class Parser
{
public native void bindIndirect(Parser inner);
public native void free();
public long getInner() {return this.inner;}
private long inner;
Parser(long inner) {this.inner=inner;}
}

View file

@ -0,0 +1,6 @@
package com.upstandinghackers.hammer;
public interface Predicate
{
public boolean apply(ParseResult p);
}

View file

@ -0,0 +1,335 @@
#include "jhammer.h"
#include "com_upstandinghackers_hammer_Hammer.h"
#include <stdlib.h>
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_parse
(JNIEnv *env, jclass class, jobject obj, jbyteArray input_, jint length_)
{
HParser *parser;
uint8_t* input;
size_t length;
HParseResult *result;
jclass resultClass;
jobject retVal;
parser = UNWRAP(env, obj);
input = (uint8_t *) ((*env)->GetByteArrayElements(env, input_, NULL));
length = (size_t) length_;
result = h_parse(parser, input, length);
if(result==NULL)
return NULL;
FIND_CLASS(resultClass, env, "com/upstandinghackers/hammer/ParseResult");
NEW_INSTANCE(retVal, env, resultClass, result);
return retVal;
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_token
(JNIEnv *env, jclass class, jbyteArray str, jint len)
{
RETURNWRAP(env, h_token((uint8_t *) ((*env)->GetByteArrayElements(env, str, NULL)), (size_t) len));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_ch
(JNIEnv *env, jclass class, jbyte c)
{
RETURNWRAP(env, h_ch((uint8_t) c));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_chRange
(JNIEnv *env, jclass class, jbyte lower, jbyte upper)
{
RETURNWRAP(env, h_ch_range((uint8_t) lower, (uint8_t) upper));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_intRange
(JNIEnv *env, jclass class, jobject obj, jlong lower, jlong upper)
{
HParser *parser;
parser = UNWRAP(env, obj);
RETURNWRAP(env, h_int_range(parser, (int64_t) lower, (int64_t) upper));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_bits
(JNIEnv *env, jclass class, jint len, jboolean sign)
{
RETURNWRAP(env, h_bits((size_t) len, (bool)(sign & JNI_TRUE)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_int64
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_int64());
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_int32
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_int32());
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_int16
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_int16());
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_int8
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_int8());
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_uInt64
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_uint64());
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_uInt32
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_uint32());
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_uInt16
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_uint16());
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_uInt8
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_uint8());
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_whitespace
(JNIEnv *env, jclass class, jobject parser)
{
RETURNWRAP(env, h_whitespace(UNWRAP(env, parser)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_left
(JNIEnv *env, jclass class, jobject p, jobject q)
{
RETURNWRAP(env, h_left(UNWRAP(env, p), UNWRAP(env, q)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_right
(JNIEnv *env, jclass class, jobject p, jobject q)
{
RETURNWRAP(env, h_right(UNWRAP(env, p), UNWRAP(env, q)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_middle
(JNIEnv *env, jclass class, jobject p, jobject x, jobject q)
{
RETURNWRAP(env, h_middle(UNWRAP(env, p), UNWRAP(env, x), UNWRAP(env, q)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_in
(JNIEnv *env, jclass class, jbyteArray charset, jint length)
{
RETURNWRAP(env, h_in((uint8_t *) ((*env)->GetByteArrayElements(env, charset, NULL)), (size_t)length));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_endP
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_end_p());
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_nothingP
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_nothing_p());
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_sequence
(JNIEnv *env, jclass class, jobjectArray sequence)
{
jsize length;
void **parsers;
int i;
jobject current;
const HParser *result;
length = (*env)->GetArrayLength(env, sequence);
parsers = malloc(sizeof(void *)*(length+1));
if(NULL==parsers)
{
return NULL;
}
for(i=0; i<length; i++)
{
current = (*env)->GetObjectArrayElement(env, sequence, (jsize)i);
parsers[i] = UNWRAP(env, current);
}
parsers[length] = NULL;
result = h_sequence__a(parsers);
RETURNWRAP(env, result);
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_choice
(JNIEnv *env, jclass class, jobjectArray choices)
{
jsize length;
void **parsers;
int i;
jobject current;
const HParser *result;
length = (*env)->GetArrayLength(env, choices);
parsers = malloc(sizeof(HParser *)*(length+1));
if(NULL==parsers)
{
return NULL;
}
for(i=0; i<length; i++)
{
current = (*env)->GetObjectArrayElement(env, choices, (jsize)i);
parsers[i] = UNWRAP(env, current);
}
parsers[length] = NULL;
result = h_choice__a(parsers);
RETURNWRAP(env, result);
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_butNot
(JNIEnv *env, jclass class, jobject p, jobject q)
{
RETURNWRAP(env, h_butnot(UNWRAP(env, p), UNWRAP(env, q)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_difference
(JNIEnv *env, jclass class, jobject p, jobject q)
{
RETURNWRAP(env, h_difference(UNWRAP(env, p), UNWRAP(env, q)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_xor
(JNIEnv *env, jclass class, jobject p, jobject q)
{
RETURNWRAP(env, h_xor(UNWRAP(env, p), UNWRAP(env, q)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_many
(JNIEnv *env, jclass class, jobject p)
{
RETURNWRAP(env, h_many(UNWRAP(env, p)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_many1
(JNIEnv *env, jclass class, jobject p)
{
RETURNWRAP(env, h_many1(UNWRAP(env, p)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_repeatN
(JNIEnv *env, jclass class, jobject p, jint n)
{
RETURNWRAP(env, h_repeat_n(UNWRAP(env, p), (size_t)n));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_optional
(JNIEnv *env, jclass class, jobject p)
{
RETURNWRAP(env, h_optional(UNWRAP(env, p)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_ignore
(JNIEnv *env, jclass class, jobject p)
{
RETURNWRAP(env, h_ignore(UNWRAP(env, p)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_sepBy
(JNIEnv *env, jclass class, jobject p, jobject sep)
{
RETURNWRAP(env, h_sepBy(UNWRAP(env, p), UNWRAP(env, sep)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_sepBy1
(JNIEnv *env, jclass class, jobject p, jobject sep)
{
RETURNWRAP(env, h_sepBy1(UNWRAP(env, p), UNWRAP(env, sep)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_epsilonP
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_epsilon_p());
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_lengthValue
(JNIEnv *env, jclass class, jobject length, jobject value)
{
RETURNWRAP(env, h_length_value(UNWRAP(env, length), UNWRAP(env, value)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_and
(JNIEnv *env, jclass class, jobject p)
{
RETURNWRAP(env, h_and(UNWRAP(env, p)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_not
(JNIEnv *env, jclass class, jobject p)
{
RETURNWRAP(env, h_not(UNWRAP(env, p)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_indirect
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_indirect());
}

View file

@ -0,0 +1,45 @@
#include "jhammer.h"
#include "com_upstandinghackers_hammer_ParseResult.h"
HParseResult *unwrap_parse_result(JNIEnv *env, jobject obj)
{
jclass parseResultClass;
jfieldID parseResultInner;
FIND_CLASS(parseResultClass, env, "com/upstandinghackers/hammer/ParseResult");
parseResultInner = (*env)->GetFieldID(env, parseResultClass, "inner", "J");
return (HParseResult *)((*env)->GetLongField(env, obj, parseResultInner));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_ParseResult_getAst
(JNIEnv *env, jobject this)
{
HParseResult *inner;
jclass parsedTokenClass;
jobject retVal;
if(this == NULL)
return NULL; // parse unsuccessful
inner = unwrap_parse_result(env, this);
if(inner->ast == NULL)
return NULL; // parse successful, but empty
FIND_CLASS(parsedTokenClass, env, "com/upstandinghackers/hammer/ParsedToken");
NEW_INSTANCE(retVal, env, parsedTokenClass, inner->ast);
return retVal;
}
JNIEXPORT jlong JNICALL Java_com_upstandinghackers_hammer_ParseResult_getBitLength
(JNIEnv *env, jobject this)
{
HParseResult *inner = unwrap_parse_result(env, this);
return (jlong) (inner->bit_length);
}
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParseResult_free
(JNIEnv *env, jobject this)
{
//XXX: NOT IMPLEMENTED
}

View file

@ -0,0 +1,195 @@
#include "jhammer.h"
#include "com_upstandinghackers_hammer_ParsedToken.h"
#define HPT_UNWRAP(env, this) HParsedToken *inner = unwrap_parsed_token(env, this); assert(inner!=NULL)
HParsedToken *unwrap_parsed_token(JNIEnv *env, jobject obj)
{
jclass parsedTokenClass;
jfieldID parsedTokenInner;
FIND_CLASS(parsedTokenClass, env, "com/upstandinghackers/hammer/ParsedToken");
parsedTokenInner = (*env)->GetFieldID(env, parsedTokenClass, "inner", "J");
return (HParsedToken *)((*env)->GetLongField(env, obj, parsedTokenInner));
}
JNIEXPORT jint JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getTokenTypeInternal
(JNIEnv *env, jobject this)
{
HPT_UNWRAP(env, this);
if(inner==NULL)
return (jint)0;
return (jint)(inner->token_type);
}
JNIEXPORT jint JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getIndex
(JNIEnv *env, jobject this)
{
HPT_UNWRAP(env, this);
return (jint) (inner->index);
}
JNIEXPORT jbyte JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getBitOffset
(JNIEnv *env, jobject this)
{
HPT_UNWRAP(env, this);
return (jbyte) (inner->bit_offset);
}
JNIEXPORT jbyteArray JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getBytesValue
(JNIEnv *env, jobject this)
{
jbyteArray outArray;
HPT_UNWRAP(env, this);
outArray = (*env)->NewByteArray(env, (jsize)inner->bytes.len);
(*env)->SetByteArrayRegion(env, outArray, (jsize) 0, (jsize)(inner->bytes.len), (jbyte *)(inner->bytes.token));
return outArray;
}
JNIEXPORT jlong JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getSIntValue
(JNIEnv *env, jobject this)
{
HPT_UNWRAP(env, this);
return (jlong) (inner->sint);
}
JNIEXPORT jlong JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getUIntValue
(JNIEnv *env, jobject this)
{
HPT_UNWRAP(env, this);
return (jlong) (inner->uint);
}
JNIEXPORT jdouble JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getDoubleValue
(JNIEnv *env, jobject this)
{
HPT_UNWRAP(env, this);
return (jdouble) (inner->dbl);
}
JNIEXPORT jfloat JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getFloatValue
(JNIEnv *env, jobject this)
{
HPT_UNWRAP(env, this);
return (jfloat) (inner->flt);
}
JNIEXPORT jobjectArray JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getSeqValue
(JNIEnv *env, jobject this)
{
jsize i;
HPT_UNWRAP(env, this);
jsize returnSize = inner->seq->used;
jobject currentObject;
jclass returnClass;
FIND_CLASS(returnClass, env, "com/upstandinghackers/hammer/ParsedToken");
jobjectArray retVal = (*env)->NewObjectArray(env, returnSize, returnClass, NULL);
for(i = 0; i<returnSize; i++)
{
NEW_INSTANCE(currentObject, env, returnClass, inner->seq->elements[i]);
(*env)->SetObjectArrayElement(env, retVal, i, currentObject);
}
return retVal;
}
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setTokenType
(JNIEnv *env, jobject this, jobject tokenType)
{
jclass tokenTypeClass;
jmethodID getValue;
jint typeVal;
HPT_UNWRAP(env, this);
FIND_CLASS(tokenTypeClass, env, "com/upstandinghackers/hammer/Hammer$TokenType");
getValue = (*env)->GetMethodID(env, tokenTypeClass, "getValue", "()I");
typeVal = (*env)->CallIntMethod(env, tokenType, getValue);
inner->token_type = (int32_t) typeVal; // unsafe cast, but enums should be of type int
}
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setIndex
(JNIEnv *env, jobject this, jint index)
{
HPT_UNWRAP(env, this);
inner->index = (size_t)index;
}
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setBitOffset
(JNIEnv *env, jobject this, jbyte bit_offset)
{
HPT_UNWRAP(env, this);
inner->bit_offset = (char)bit_offset;
}
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setBytesValue
(JNIEnv *env, jobject this, jbyteArray bytes_)
{
HBytes bytes;
HPT_UNWRAP(env, this);
bytes.token = (uint8_t *) ((*env)->GetByteArrayElements(env, bytes_, NULL));
bytes.len = (size_t) (*env)->GetArrayLength(env, bytes_);
inner->bytes = bytes;
inner->token_type = TT_BYTES;
}
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setSIntValue
(JNIEnv *env, jobject this, jlong sint)
{
HPT_UNWRAP(env, this);
inner->token_type = TT_SINT;
inner->sint = (int64_t)sint;
}
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setUIntValue
(JNIEnv *env, jobject this, jlong uint)
{
HPT_UNWRAP(env, this);
inner->token_type = TT_UINT;
inner->uint = (uint64_t)uint;
}
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setDoubleValue
(JNIEnv *env, jobject this, jdouble dbl)
{
HPT_UNWRAP(env, this);
//token_type?
inner->dbl = (double)dbl;
}
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setFloatValue
(JNIEnv *env, jobject this, jfloat flt)
{
HPT_UNWRAP(env, this);
//token_type?
inner->flt = (float)flt;
}
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setSeqValue
(JNIEnv *env, jobject this, jobjectArray values)
{
HArena *arena;
size_t len, i;
jobject currentValue;
HParsedToken *currentValueInner;
HCountedArray *seq;
HPT_UNWRAP(env, this);
len = (size_t) (*env)->GetArrayLength(env, values);
arena = h_new_arena(&system_allocator, 0);
seq = h_carray_new_sized(arena, len);
// unwrap each value and append it to the new HCountedArray
for(i = 0; i<len; i++)
{
currentValue = (*env)->GetObjectArrayElement(env, values, (jsize)i);
if(NULL == currentValue)
continue;
currentValueInner = unwrap_parsed_token(env, currentValue);
if(currentValueInner)
h_carray_append(seq, (void *)currentValueInner);
}
inner->token_type = TT_SEQUENCE;
inner->seq = seq;
}

View file

@ -0,0 +1,15 @@
#include "jhammer.h"
#include "com_upstandinghackers_hammer_Parser.h"
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_Parser_bindIndirect
(JNIEnv *env, jobject this, jobject parser)
{
h_bind_indirect(UNWRAP(env, this), UNWRAP(env, parser));
}
JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_Parser_free
(JNIEnv *env, jobject this)
{
//XXX NOT IMPLEMENTED
//h_free(UNWRAP(env, this));
}

31
jni/jhammer.h Normal file
View file

@ -0,0 +1,31 @@
#ifndef JHAMMER_H
#define JHAMMER_H
#include <jni.h>
#include "internal.h"
#include <assert.h>
// Unsafe (non-asserting) helpers
#define FIND_CLASS_(env, class) (*env)->FindClass(env, class)
#define REFCONSTRUCTOR_(env, class) (*env)->GetMethodID(env, class, "<init>", "(J)V")
#define NEW_INSTANCE_(env, class, inner) (*env)->NewObject(env, class, REFCONSTRUCTOR_(env, class), (jlong)inner)
// Safer versions, assert that the result is not NULL
// If one of those asserts fails, it most likely means that there's a typo (wrong class name or method signature) or big trouble (OOM)
#define FIND_CLASS(target, env, class) target = FIND_CLASS_(env, class); assert(target != NULL)
#define REFCONSTRUCTOR(target, env, class) target = REFCONSTRUCTOR_(env, class); assert(target != NULL)
#define NEW_INSTANCE(target, env, class, inner) target = NEW_INSTANCE_(env, class, inner); assert(target != NULL)
// Since there's a LOT of wrapping/unwrapping HParsers, these macros make it a bit more readable
#define PARSER_CLASS "com/upstandinghackers/hammer/Parser"
#define PARSER_REF(env) (*env)->GetFieldID(env, FIND_CLASS_(env, PARSER_CLASS), "inner", "J")
#define RETURNWRAP(env, inner) jclass __cls=FIND_CLASS_(env, PARSER_CLASS); \
assert(__cls != NULL); \
jmethodID __constructor = REFCONSTRUCTOR_(env, __cls); \
assert(__constructor != NULL); \
return (*env)->NewObject(env, __cls, __constructor, (jlong)inner)
#define UNWRAP(env, object) (HParser *)((*env)->GetLongField(env, object, PARSER_REF(env)))
#endif

View file

@ -42,6 +42,8 @@ HAMMER_PARTS := \
benchmark.o \
cfgrammar.o \
actions.o \
compile.o \
glue.o \
$(PARSERS:%=parsers/%.o) \
$(BACKENDS:%=backends/%.o)
@ -50,6 +52,7 @@ TESTS := t_benchmark.o \
t_bitwriter.o \
t_parser.o \
t_grammar.o \
t_misc.o \
test_suite.o
OUTPUTS := libhammer.a \
@ -65,12 +68,15 @@ include ../common.mk
$(TESTS): CFLAGS += $(TEST_CFLAGS)
$(TESTS): LDFLAGS += $(TEST_LDFLAGS)
CFLAGS += -fPIC
all: libhammer.a
libhammer.a: $(HAMMER_PARTS)
bitreader.o: test_suite.h
hammer.o: hammer.h
glue.o: hammer.h glue.h
all: libhammer.a

View file

@ -86,14 +86,18 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
if (!rec_detect->head) {
HRecursionHead *some = a_new(HRecursionHead, 1);
some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL;
some->head_parser = p;
some->involved_set = h_slist_new(state->arena);
some->eval_set = NULL;
rec_detect->head = some;
}
assert(state->lr_stack->head != NULL);
HLeftRec *lr = state->lr_stack->head->elem;
while (lr && lr->rule != p) {
HSlistNode *head = state->lr_stack->head;
HLeftRec *lr;
while (head && (lr = head->elem)->rule != p) {
lr->head = rec_detect->head;
h_slist_push(lr->head->involved_set, (void*)lr->rule);
head = head->next;
}
}
@ -110,7 +114,7 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head)
HParseResult *old_res = old_cached->right->result;
// reset the eval_set of the head of the recursion at each beginning of growth
head->eval_set = head->involved_set;
head->eval_set = h_slist_copy(head->involved_set);
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
if (tmp_res) {

View file

@ -41,6 +41,26 @@ HSlist* h_slist_new(HArena *arena) {
return ret;
}
HSlist* h_slist_copy(HSlist *slist) {
HSlist *ret = h_slist_new(slist->arena);
HSlistNode *head = slist->head;
HSlistNode *tail;
if (head != NULL) {
h_slist_push(ret, head->elem);
tail = ret->head;
head = head->next;
}
while (head != NULL) {
// append head item to tail in a new node
HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode));
node->elem = head->elem;
node->next = NULL;
tail = tail->next = node;
head = head->next;
}
return ret;
}
void* h_slist_pop(HSlist *slist) {
HSlistNode *head = slist->head;
if (!head)

177
src/glue.c Normal file
View file

@ -0,0 +1,177 @@
#include "glue.h"
#include "../src/internal.h" // for h_carray_*
// The action equivalent of h_ignore.
const HParsedToken *h_act_ignore(const HParseResult *p)
{
return NULL;
}
// Helper to build HAction's that pick one index out of a sequence.
const HParsedToken *h_act_index(int i, const HParseResult *p)
{
if(!p) return NULL;
const HParsedToken *tok = p->ast;
if(!tok || tok->token_type != TT_SEQUENCE)
return NULL;
const HCountedArray *seq = tok->seq;
size_t n = seq->used;
if(i<0 || (size_t)i>=n)
return NULL;
else
return tok->seq->elements[i];
}
// Action version of h_seq_flatten.
const HParsedToken *h_act_flatten(const HParseResult *p) {
return h_seq_flatten(p->arena, p->ast);
}
// Low-level helper for the h_make family.
HParsedToken *h_make_(HArena *arena, HTokenType type)
{
HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken));
ret->token_type = type;
return ret;
}
HParsedToken *h_make(HArena *arena, HTokenType type, void *value)
{
assert(type >= TT_USER);
HParsedToken *ret = h_make_(arena, type);
ret->user = value;
return ret;
}
HParsedToken *h_make_seq(HArena *arena)
{
HParsedToken *ret = h_make_(arena, TT_SEQUENCE);
ret->seq = h_carray_new(arena);
return ret;
}
HParsedToken *h_make_seqn(HArena *arena, size_t n)
{
HParsedToken *ret = h_make_(arena, TT_SEQUENCE);
ret->seq = h_carray_new_sized(arena, n);
return ret;
}
HParsedToken *h_make_bytes(HArena *arena, size_t len)
{
HParsedToken *ret = h_make_(arena, TT_BYTES);
ret->bytes.len = len;
ret->bytes.token = h_arena_malloc(arena, len);
return ret;
}
HParsedToken *h_make_sint(HArena *arena, int64_t val)
{
HParsedToken *ret = h_make_(arena, TT_SINT);
ret->sint = val;
return ret;
}
HParsedToken *h_make_uint(HArena *arena, uint64_t val)
{
HParsedToken *ret = h_make_(arena, TT_UINT);
ret->uint = val;
return ret;
}
// XXX -> internal
HParsedToken *h_carray_index(const HCountedArray *a, size_t i)
{
assert(i < a->used);
return a->elements[i];
}
size_t h_seq_len(const HParsedToken *p)
{
assert(p != NULL);
assert(p->token_type == TT_SEQUENCE);
return p->seq->used;
}
HParsedToken **h_seq_elements(const HParsedToken *p)
{
assert(p != NULL);
assert(p->token_type == TT_SEQUENCE);
return p->seq->elements;
}
HParsedToken *h_seq_index(const HParsedToken *p, size_t i)
{
assert(p != NULL);
assert(p->token_type == TT_SEQUENCE);
return h_carray_index(p->seq, i);
}
HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...)
{
va_list va;
va_start(va, i);
HParsedToken *ret = h_seq_index_vpath(p, i, va);
va_end(va);
return ret;
}
HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va)
{
HParsedToken *ret = h_seq_index(p, i);
int j;
while((j = va_arg(va, int)) >= 0)
ret = h_seq_index(p, j);
return ret;
}
void h_seq_snoc(HParsedToken *xs, const HParsedToken *x)
{
assert(xs != NULL);
assert(xs->token_type == TT_SEQUENCE);
h_carray_append(xs->seq, (HParsedToken *)x);
}
void h_seq_append(HParsedToken *xs, const HParsedToken *ys)
{
assert(xs != NULL);
assert(xs->token_type == TT_SEQUENCE);
assert(ys != NULL);
assert(ys->token_type == TT_SEQUENCE);
for(size_t i=0; i<ys->seq->used; i++)
h_carray_append(xs->seq, ys->seq->elements[i]);
}
// Flatten nested sequences. Always returns a sequence.
// If input element is not a sequence, returns it as a singleton sequence.
const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p)
{
assert(p != NULL);
HParsedToken *ret = h_make_seq(arena);
switch(p->token_type) {
case TT_SEQUENCE:
// Flatten and append all.
for(size_t i; i<p->seq->used; i++) {
h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i)));
}
break;
default:
// Make singleton sequence.
h_seq_snoc(ret, p);
break;
}
return ret;
}

253
src/glue.h Normal file
View file

@ -0,0 +1,253 @@
//
// API additions for writing grammar and semantic actions more concisely
//
//
// Quick Overview:
//
// Grammars can be succinctly specified with the family of H_RULE macros.
// H_RULE defines a plain parser variable. H_ARULE additionally attaches a
// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE
// combine both.
//
// A few standard semantic actions are defined below. The H_ACT_APPLY macro
// allows semantic actions to be defined by "partial application" of
// a generic action to fixed paramters.
//
// The definition of more complex semantic actions will usually consist of
// extracting data from the given parse tree and constructing a token of custom
// type to represent the result. A number of functions and convenience macros
// are provided to capture the most common cases and idioms.
//
// See the leading comment blocks on the sections below for more details.
//
#ifndef HAMMER_GLUE__H
#define HAMMER_GLUE__H
#include <assert.h>
#include "hammer.h"
//
// Grammar specification
//
// H_RULE is simply a short-hand for the typical declaration and definition of
// a parser variable. See its plain definition below. The goal is to save
// horizontal space as well as to provide a clear and unified look together with
// the other macro variants that stays close to an abstract PEG or BNF grammar.
// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their
// combinations as they allow the definition of syntax to be given without
// intermingling it with the semantic specifications.
//
// H_ARULE defines a variable just like H_RULE but attaches a semantic action
// to the result of the parser via h_action. The action is expected to be
// named act_<rulename>.
//
// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool.
// The validation is expected to be named validate_<rulename>.
//
// H_VARULE combines H_RULE with both an action and a validation. The action is
// attached before the validation, i.e. the validation receives as input the
// result of the action.
//
// H_AVRULE is like H_VARULE but the action is attached outside the validation,
// i.e. the validation receives the uninterpreted AST as input.
//
#define H_RULE(rule, def) const HParser *rule = def
#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule)
#define H_VRULE(rule, def) const HParser *rule = \
h_attr_bool(def, validate_ ## rule)
#define H_VARULE(rule, def) const HParser *rule = \
h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule)
#define H_AVRULE(rule, def) const HParser *rule = \
h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule)
//
// Pre-fab semantic actions
//
// A collection of generally useful semantic actions is provided.
//
// h_act_ignore is the action equivalent of the parser combinator h_ignore. It
// simply causes the AST it is applied to to be replaced with NULL. This most
// importantly causes it to be elided from the result of a surrounding
// h_sequence.
//
// h_act_index is of note as it is not itself suitable to be passed to
// h_action. It is parameterized by an index to be picked from a sequence
// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY
// macro provides a concise way to define such a parameter-application wrapper.
//
// h_act_flatten acts on a token of possibly nested sequences by recursively
// flattening it into a single sequence. Cf. h_seq_flatten below.
//
// H_ACT_APPLY implements "partial application" for semantic actions. It
// defines a new action that supplies given parameters to a parameterized
// action such as h_act_index.
//
const HParsedToken *h_act_ignore(const HParseResult *p);
const HParsedToken *h_act_index(int i, const HParseResult *p);
const HParsedToken *h_act_flatten(const HParseResult *p);
// Define 'myaction' as a specialization of 'paction' by supplying the leading
// parameters.
#define H_ACT_APPLY(myaction, paction, ...) \
const HParsedToken *myaction(const HParseResult *p) { \
return paction(__VA_ARGS__, p); \
}
//
// Working with HParsedTokens
//
// The type HParsedToken represents a dynamically-typed universe of values.
// Declared below are constructors to turn ordinary values into their
// HParsedToken equivalents, extractors to retrieve the original values from
// inside an HParsedToken, and functions that inspect and modify tokens of
// sequence type directly.
//
// In addition, there are a number of short-hand macros that work with some
// conventions to eliminate common boilerplate. These conventions are listed
// below. Be sure to follow them if you want to use the respective macros.
//
// * The single argument to semantic actions should be called 'p'.
//
// The H_MAKE macros suppy 'p->arena' to their underlying h_make
// counterparts. The H_FIELD macros supply 'p->ast' to their underlying
// H_INDEX counterparts.
//
// * For each custom token type, there should be a typedef for the
// corresponding value type.
//
// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to
// a pointer to the given type.
//
// * For each custom token type, say 'foo_t', there must be an integer
// constant 'TT_foo_t' to identify the token type. This constant must have a
// value greater or equal than TT_USER.
//
// One idiom is to define an enum for all custom token types and to assign a
// value of TT_USER to the first element. This can be viewed as extending
// the HTokenType enum.
//
// The H_MAKE and H_ASSERT macros derive the name of the token type constant
// from the given type name.
//
//
// The H_ALLOC macro is useful for allocating values of custom token types.
//
// The H_MAKE family of macros construct tokens of a given type. The native
// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ.
// The form with no suffix is used for custom token types. This convention is
// also used for other macro and function families.
//
// The H_ASSERT family simply asserts that a given token has the expected type.
// It mainly serves as an implementation aid for H_CAST. Of note in that regard
// is that, unlike the standard 'assert' macro, these form _expressions_ that
// return the value of their token argument; thus they can be used in a
// "pass-through" fashion inside other expressions.
//
// The H_CAST family combines a type assertion with access to the
// statically-typed value inside a token.
//
// A number of functions h_seq_* operate on and inspect sequence tokens.
// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence.
// Therefore there are h_seq_snoc and h_seq_append to build up sequences.
//
// The macro families H_FIELD and H_INDEX combine index access on a sequence
// with a cast to the appropriate result type. H_FIELD is used to access the
// elements of the argument token 'p' in an action. H_INDEX allows any sequence
// token to be specified. Both macro families take an arbitrary number of index
// arguments, giving access to elements in nested sequences by path.
// These macros are very useful to avoid spaghetti chains of unchecked pointer
// dereferences.
//
// Standard short-hand for arena-allocating a variable in a semantic action.
#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP)))
// Token constructors...
HParsedToken *h_make(HArena *arena, HTokenType type, void *value);
HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence.
HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n.
HParsedToken *h_make_bytes(HArena *arena, size_t len);
HParsedToken *h_make_sint(HArena *arena, int64_t val);
HParsedToken *h_make_uint(HArena *arena, uint64_t val);
// Standard short-hands to make tokens in an action.
#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL)
#define H_MAKE_SEQ() h_make_seq(p->arena)
#define H_MAKE_SEQN(N) h_make_seqn(p->arena, N)
#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN)
#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL)
#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL)
// Extract (cast) type-specific value back from HParsedTokens...
// Pass-through assertion that a given token has the expected type.
#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P)
// Convenience short-hand forms of h_assert_type.
#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK)
#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK)
#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK)
#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK)
#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK)
// Assert expected type and return contained value.
#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user)
#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq)
#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes)
#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint)
#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint)
// Sequence access...
// Return the length of a sequence.
size_t h_seq_len(const HParsedToken *p);
// Access a sequence's element array.
HParsedToken **h_seq_elements(const HParsedToken *p);
// Access a sequence element by index.
HParsedToken *h_seq_index(const HParsedToken *p, size_t i);
// Access an element in a nested sequence by a path of indices.
HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...);
HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
// Convenience macros combining (nested) index access and h_cast.
#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1)
// Standard short-hand to access and cast elements on a sequence token.
#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__)
#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__)
#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__)
#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__)
#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__)
// Lower-level helper for h_seq_index.
HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal
// Sequence modification...
// Add elements to a sequence.
void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one
void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many
// XXX TODO: Remove elements from a sequence.
// Flatten nested sequences into one.
const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p);
#endif

View file

@ -42,14 +42,15 @@ typedef enum HParserBackend_ {
} HParserBackend;
typedef enum HTokenType_ {
TT_NONE,
TT_BYTES,
TT_SINT,
TT_UINT,
TT_SEQUENCE,
// Before you change the explicit values of these, think of the poor bindings ;_;
TT_NONE = 1,
TT_BYTES = 2,
TT_SINT = 4,
TT_UINT = 8,
TT_SEQUENCE = 16,
TT_RESERVED_1, // reserved for backend-specific internal use
TT_ERR = 32,
TT_USER = 64,
TT_ERR,
TT_MAX
} HTokenType;
@ -60,13 +61,15 @@ typedef struct HCountedArray_ {
struct HParsedToken_ **elements;
} HCountedArray;
typedef struct HBytes_ {
const uint8_t *token;
size_t len;
} HBytes;
typedef struct HParsedToken_ {
HTokenType token_type;
union {
struct {
const uint8_t *token;
size_t len;
} bytes;
HBytes bytes;
int64_t sint;
uint64_t uint;
double dbl;
@ -175,14 +178,18 @@ typedef struct HBenchmarkResults_ {
rtype_t name(__VA_ARGS__, ...); \
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...); \
rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \
rtype_t name##__v(__VA_ARGS__, va_list ap)
rtype_t name##__v(__VA_ARGS__, va_list ap); \
rtype_t name##__a(void *args[]); \
rtype_t name##__ma(HAllocator *mm__, void *args[])
// Note: this drops the attributes on the floor for the __v versions
#define HAMMER_FN_DECL_VARARGS_ATTR(attr, rtype_t, name, ...) \
rtype_t name(__VA_ARGS__, ...) attr; \
rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...) attr; \
rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \
rtype_t name##__v(__VA_ARGS__, va_list ap)
rtype_t name##__v(__VA_ARGS__, va_list ap); \
rtype_t name##__a(void *args[]); \
rtype_t name##__ma(HAllocator *mm__, void *args[])
// }}}

View file

@ -243,6 +243,7 @@ HCountedArray *h_carray_new(HArena * arena);
void h_carray_append(HCountedArray *array, void* item);
HSlist* h_slist_new(HArena *arena);
HSlist* h_slist_copy(HSlist *slist);
void* h_slist_pop(HSlist *slist);
void h_slist_push(HSlist *slist, void* item);
bool h_slist_find(HSlist *slist, const void* item);

View file

@ -126,3 +126,27 @@ HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) {
return h_new_parser(mm__, &choice_vt, s);
}
HParser* h_choice__a(void *args[]) {
return h_choice__ma(&system_allocator, args);
}
HParser* h_choice__ma(HAllocator* mm__, void *args[]) {
size_t len = -1; // because do...while
const HParser *arg;
do {
arg=((HParser **)args)[++len];
} while(arg);
HSequence *s = h_new(HSequence, 1);
s->p_array = h_new(const HParser *, len);
for (size_t i = 0; i < len; i++) {
s->p_array[i] = ((HParser **)args)[i];
}
s->len = len;
HParser *ret = h_new(HParser, 1);
ret->vtable = &choice_vt; ret->env = (void*)s;
return ret;
}

View file

@ -144,3 +144,28 @@ HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) {
s->len = len;
return h_new_parser(mm__, &sequence_vt, s);
}
HParser* h_sequence__a(void *args[]) {
return h_sequence__ma(&system_allocator, args);
}
HParser* h_sequence__ma(HAllocator* mm__, void *args[]) {
size_t len = -1; // because do...while
const HParser *arg;
do {
arg=((HParser **)args)[++len];
} while(arg);
HSequence *s = h_new(HSequence, 1);
s->p_array = h_new(const HParser *, len);
for (size_t i = 0; i < len; i++) {
s->p_array[i] = ((HParser **)args)[i];
}
s->len = len;
HParser *ret = h_new(HParser, 1);
ret->vtable = &sequence_vt; ret->env = (void*)s;
return ret;
}

View file

@ -9,7 +9,7 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) {
bak = state->input_stream;
c = h_read_bits(&state->input_stream, 8, false);
if (state->input_stream.overrun)
return NULL;
break;
} while (isspace(c));
state->input_stream = bak;
return h_do_parse((HParser*)env, state);

View file

@ -69,7 +69,11 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
fprintf(stream, "%*sUSER\n", indent, "");
break;
default:
assert_message(0, "Should not reach here.");
if(tok->token_type > TT_USER) {
fprintf(stream, "%*sUSER %d\n", indent, "", tok->token_type-TT_USER);
} else {
assert_message(0, "Should not reach here.");
}
}
}

16
src/t_misc.c Normal file
View file

@ -0,0 +1,16 @@
#include <glib.h>
#include "test_suite.h"
#include "hammer.h"
static void test_tt_user(void) {
g_check_cmpint(TT_USER, >, TT_NONE);
g_check_cmpint(TT_USER, >, TT_BYTES);
g_check_cmpint(TT_USER, >, TT_SINT);
g_check_cmpint(TT_USER, >, TT_UINT);
g_check_cmpint(TT_USER, >, TT_SEQUENCE);
g_check_cmpint(TT_USER, >, TT_ERR);
}
void register_misc_tests(void) {
g_test_add_func("/core/misc/tt_user", test_tt_user);
}

View file

@ -116,12 +116,17 @@ static void test_float32(gconstpointer backend) {
static void test_whitespace(gconstpointer backend) {
const HParser *whitespace_ = h_whitespace(h_ch('a'));
const HParser *whitespace_end = h_whitespace(h_end_p());
g_check_parse_ok(whitespace_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61");
g_check_parse_ok(whitespace_, (HParserBackend)GPOINTER_TO_INT(backend), " a", 2, "u0x61");
g_check_parse_ok(whitespace_, (HParserBackend)GPOINTER_TO_INT(backend), " a", 3, "u0x61");
g_check_parse_ok(whitespace_, (HParserBackend)GPOINTER_TO_INT(backend), "\ta", 2, "u0x61");
g_check_parse_failed(whitespace_, (HParserBackend)GPOINTER_TO_INT(backend), "_a", 2);
g_check_parse_ok(whitespace_end, (HParserBackend)GPOINTER_TO_INT(backend), "", 0, "NULL");
g_check_parse_ok(whitespace_end, (HParserBackend)GPOINTER_TO_INT(backend)," ", 2, "NULL");
g_check_parse_failed(whitespace_end, (HParserBackend)GPOINTER_TO_INT(backend)," x", 3);
}
static void test_left(gconstpointer backend) {
@ -395,6 +400,17 @@ static void test_not(gconstpointer backend) {
g_check_parse_ok(not_2, (HParserBackend)GPOINTER_TO_INT(backend), "a++b", 4, "(u0x61 <2b.2b> u0x62)");
}
static void test_leftrec(gconstpointer backend) {
const HParser *a_ = h_ch('a');
HParser *lr_ = h_indirect();
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL));
g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61");
g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)");
g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)");
}
void register_parser_tests(void) {
g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token);
g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch);
@ -437,6 +453,7 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/packrat/and", GINT_TO_POINTER(PB_PACKRAT), test_and);
g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not);
g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore);
g_test_add_data_func("/core/parser/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec);
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
@ -473,6 +490,7 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/llk/epsilon_p", GINT_TO_POINTER(PB_LLk), test_epsilon_p);
g_test_add_data_func("/core/parser/llk/attr_bool", GINT_TO_POINTER(PB_LLk), test_attr_bool);
g_test_add_data_func("/core/parser/llk/ignore", GINT_TO_POINTER(PB_LLk), test_ignore);
g_test_add_data_func("/core/parser/leftrec", GINT_TO_POINTER(PB_LLk), test_leftrec);
g_test_add_data_func("/core/parser/regex/token", GINT_TO_POINTER(PB_REGULAR), test_token);
g_test_add_data_func("/core/parser/regex/ch", GINT_TO_POINTER(PB_REGULAR), test_ch);

View file

@ -23,6 +23,7 @@ extern void register_bitreader_tests();
extern void register_bitwriter_tests();
extern void register_parser_tests();
extern void register_grammar_tests();
extern void register_misc_tests();
extern void register_benchmark_tests();
int main(int argc, char** argv) {
@ -33,6 +34,7 @@ int main(int argc, char** argv) {
register_bitwriter_tests();
register_parser_tests();
register_grammar_tests();
register_misc_tests();
register_benchmark_tests();
g_test_run();