Merge pull request #29 from pesco/base64-example
Semantic base64 examples
This commit is contained in:
commit
45d28c36c6
13 changed files with 437 additions and 18 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -5,6 +5,8 @@ src/test_suite
|
||||||
lib/hush
|
lib/hush
|
||||||
examples/dns
|
examples/dns
|
||||||
examples/base64
|
examples/base64
|
||||||
|
examples/base64_sem1
|
||||||
|
examples/base64_sem2
|
||||||
TAGS
|
TAGS
|
||||||
*.swp
|
*.swp
|
||||||
*.swo
|
*.swo
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,11 @@
|
||||||
OUTPUTS := dns.o \
|
OUTPUTS := dns.o \
|
||||||
dns \
|
dns \
|
||||||
base64.o \
|
base64.o \
|
||||||
base64
|
base64 \
|
||||||
|
base64_sem1.o \
|
||||||
|
base64_sem1 \
|
||||||
|
base64_sem2.o \
|
||||||
|
base64_sem2
|
||||||
|
|
||||||
TOPLEVEL := ../
|
TOPLEVEL := ../
|
||||||
|
|
||||||
|
|
@ -12,19 +16,26 @@ LDFLAGS += $(pkg-config --libs glib-2.0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
all: dns base64
|
all: dns base64 base64_sem1 base64_sem2
|
||||||
|
|
||||||
dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||||
dns: dns.o rr.o dns_common.o glue.o
|
dns: dns.o rr.o dns_common.o
|
||||||
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
dns.o: ../src/hammer.h dns_common.h
|
dns.o: ../src/hammer.h dns_common.h ../src/glue.h
|
||||||
rr.o: ../src/hammer.h rr.h dns_common.h
|
rr.o: ../src/hammer.h rr.h dns_common.h ../src/glue.h
|
||||||
dns_common.o: ../src/hammer.h dns_common.h
|
dns_common.o: ../src/hammer.h dns_common.h ../src/glue.h
|
||||||
glue.o: ../src/hammer.h glue.h
|
|
||||||
|
|
||||||
base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||||
base64: base64.o
|
base64: base64.o
|
||||||
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
base64.o: ../src/hammer.h
|
base64_sem1: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||||
|
base64_sem1: base64_sem1.o
|
||||||
|
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
|
base64_sem2: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
|
||||||
|
base64_sem2: base64_sem2.o
|
||||||
|
$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
|
base64%.o: ../src/hammer.h ../src/glue.h
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,13 @@
|
||||||
|
// Example parser: Base64, syntax only.
|
||||||
|
//
|
||||||
|
// Demonstrates how to construct a Hammer parser that recognizes valid Base64
|
||||||
|
// sequences.
|
||||||
|
//
|
||||||
|
// Note that no semantic evaluation of the sequence is performed, i.e. the
|
||||||
|
// byte sequence being represented is not returned, or determined. See
|
||||||
|
// base64_sem1.c and base64_sem2.c for examples how to attach appropriate
|
||||||
|
// semantic actions to the grammar.
|
||||||
|
|
||||||
#include "../src/hammer.h"
|
#include "../src/hammer.h"
|
||||||
|
|
||||||
const HParser* document = NULL;
|
const HParser* document = NULL;
|
||||||
|
|
@ -24,7 +34,7 @@ void init_parser(void)
|
||||||
base64_1, NULL)),
|
base64_1, NULL)),
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
document = base64;
|
document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
172
examples/base64_sem1.c
Normal file
172
examples/base64_sem1.c
Normal file
|
|
@ -0,0 +1,172 @@
|
||||||
|
// Example parser: Base64, with fine-grained semantic actions
|
||||||
|
//
|
||||||
|
// Demonstrates how to attach semantic actions to grammar rules and piece by
|
||||||
|
// piece transform the parse tree into the desired semantic representation,
|
||||||
|
// in this case a sequence of 8-bit values.
|
||||||
|
//
|
||||||
|
// Note how the grammar is defined by using the macros H_RULE and H_ARULE.
|
||||||
|
// Those rules using ARULE get an attached action which must be declared (as
|
||||||
|
// a function of type HAction) with a standard name based on the rule name.
|
||||||
|
//
|
||||||
|
// This variant of the example uses fine-grained semantic actions that
|
||||||
|
// transform the parse tree in small steps in a bottom-up fashion. Compare
|
||||||
|
// base64_sem2.c for an alternative approach using a single top-level action.
|
||||||
|
|
||||||
|
#include "../src/hammer.h"
|
||||||
|
#include "../src/glue.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Semantic actions for the grammar below, each corresponds to an "ARULE".
|
||||||
|
// They must be named act_<rulename>.
|
||||||
|
///
|
||||||
|
|
||||||
|
const HParsedToken *act_bsfdig(const HParseResult *p)
|
||||||
|
{
|
||||||
|
HParsedToken *res = H_MAKE_UINT(0);
|
||||||
|
|
||||||
|
uint8_t c = H_CAST_UINT(p->ast);
|
||||||
|
|
||||||
|
if(c >= 0x40 && c <= 0x5A) // A-Z
|
||||||
|
res->uint = c - 0x41;
|
||||||
|
else if(c >= 0x60 && c <= 0x7A) // a-z
|
||||||
|
res->uint = c - 0x61 + 26;
|
||||||
|
else if(c >= 0x30 && c <= 0x39) // 0-9
|
||||||
|
res->uint = c - 0x30 + 52;
|
||||||
|
else if(c == '+')
|
||||||
|
res->uint = 62;
|
||||||
|
else if(c == '/')
|
||||||
|
res->uint = 63;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
H_ACT_APPLY(act_index0, h_act_index, 0);
|
||||||
|
|
||||||
|
#define act_bsfdig_4bit act_bsfdig
|
||||||
|
#define act_bsfdig_2bit act_bsfdig
|
||||||
|
|
||||||
|
#define act_equals h_act_ignore
|
||||||
|
#define act_ws h_act_ignore
|
||||||
|
|
||||||
|
#define act_document act_index0
|
||||||
|
|
||||||
|
// General-form action to turn a block of base64 digits into bytes.
|
||||||
|
const HParsedToken *act_base64_n(int n, const HParseResult *p)
|
||||||
|
{
|
||||||
|
HParsedToken *res = H_MAKE_SEQN(n);
|
||||||
|
|
||||||
|
HParsedToken **digits = h_seq_elements(p->ast);
|
||||||
|
|
||||||
|
uint32_t x = 0;
|
||||||
|
int bits = 0;
|
||||||
|
for(int i=0; i<n+1; i++) {
|
||||||
|
x <<= 6; x |= digits[i]->uint;
|
||||||
|
bits += 6;
|
||||||
|
}
|
||||||
|
x >>= bits%8; // align, i.e. cut off extra bits
|
||||||
|
|
||||||
|
for(int i=0; i<n; i++) {
|
||||||
|
HParsedToken *item = H_MAKE_UINT(x & 0xFF);
|
||||||
|
|
||||||
|
res->seq->elements[n-1-i] = item; // output the last byte and
|
||||||
|
x >>= 8; // discard it
|
||||||
|
}
|
||||||
|
res->seq->used = n;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
H_ACT_APPLY(act_base64_3, act_base64_n, 3);
|
||||||
|
H_ACT_APPLY(act_base64_2, act_base64_n, 2);
|
||||||
|
H_ACT_APPLY(act_base64_1, act_base64_n, 1);
|
||||||
|
|
||||||
|
const HParsedToken *act_base64(const HParseResult *p)
|
||||||
|
{
|
||||||
|
assert(p->ast->token_type == TT_SEQUENCE);
|
||||||
|
assert(p->ast->seq->used == 2);
|
||||||
|
assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE);
|
||||||
|
|
||||||
|
HParsedToken *res = H_MAKE_SEQ();
|
||||||
|
|
||||||
|
// concatenate base64_3 blocks
|
||||||
|
HCountedArray *seq = H_FIELD_SEQ(0);
|
||||||
|
for(size_t i=0; i<seq->used; i++)
|
||||||
|
h_seq_append(res, seq->elements[i]);
|
||||||
|
|
||||||
|
// append one trailing base64_2 or _1 block
|
||||||
|
const HParsedToken *tok = h_seq_index(p->ast, 1);
|
||||||
|
if(tok->token_type == TT_SEQUENCE)
|
||||||
|
h_seq_append(res, tok);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Set up the parser with the grammar to be recognized.
|
||||||
|
///
|
||||||
|
|
||||||
|
const HParser *init_parser(void)
|
||||||
|
{
|
||||||
|
// CORE
|
||||||
|
H_RULE (digit, h_ch_range(0x30, 0x39));
|
||||||
|
H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL));
|
||||||
|
H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6));
|
||||||
|
|
||||||
|
// AUX.
|
||||||
|
H_RULE (plus, h_ch('+'));
|
||||||
|
H_RULE (slash, h_ch('/'));
|
||||||
|
H_ARULE(equals, h_ch('='));
|
||||||
|
|
||||||
|
H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL));
|
||||||
|
H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16));
|
||||||
|
H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4));
|
||||||
|
H_ARULE(base64_3, h_repeat_n(bsfdig, 4));
|
||||||
|
H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL));
|
||||||
|
H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL));
|
||||||
|
H_ARULE(base64, h_sequence(h_many(base64_3),
|
||||||
|
h_optional(h_choice(base64_2,
|
||||||
|
base64_1, NULL)),
|
||||||
|
NULL));
|
||||||
|
|
||||||
|
H_ARULE(ws, h_many(space));
|
||||||
|
H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL));
|
||||||
|
|
||||||
|
// BUG sometimes inputs that should just don't parse.
|
||||||
|
// It *seemed* to happen mostly with things like "bbbbaaaaBA==".
|
||||||
|
// Using less actions seemed to make it less likely.
|
||||||
|
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Main routine: print input, parse, print result, return success/failure.
|
||||||
|
///
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
uint8_t input[102400];
|
||||||
|
size_t inputsize;
|
||||||
|
const HParser *parser;
|
||||||
|
const HParseResult *result;
|
||||||
|
|
||||||
|
parser = init_parser();
|
||||||
|
|
||||||
|
inputsize = fread(input, 1, sizeof(input), stdin);
|
||||||
|
fprintf(stderr, "inputsize=%lu\ninput=", inputsize);
|
||||||
|
fwrite(input, 1, inputsize, stderr);
|
||||||
|
result = h_parse(parser, input, inputsize);
|
||||||
|
|
||||||
|
if(result) {
|
||||||
|
fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8);
|
||||||
|
h_pprint(stdout, result->ast, 0, 0);
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
176
examples/base64_sem2.c
Normal file
176
examples/base64_sem2.c
Normal file
|
|
@ -0,0 +1,176 @@
|
||||||
|
// Example parser: Base64, with fine-grained semantic actions
|
||||||
|
//
|
||||||
|
// Demonstrates how to attach semantic actions to a grammar and transform the
|
||||||
|
// parse tree into the desired semantic representation, in this case a sequence
|
||||||
|
// of 8-bit values.
|
||||||
|
//
|
||||||
|
// Note how the grammar is defined by using the macros H_RULE and H_ARULE.
|
||||||
|
// Those rules using ARULE get an attached action which must be declared (as
|
||||||
|
// a function of type HAction) with a standard name based on the rule name.
|
||||||
|
//
|
||||||
|
// This variant of the example uses coarse-grained semantic actions,
|
||||||
|
// transforming the entire parse tree in one big step. Compare base64_sem1.c
|
||||||
|
// for an alternative approach using a fine-grained piece-by-piece
|
||||||
|
// transformation.
|
||||||
|
|
||||||
|
#include "../src/hammer.h"
|
||||||
|
#include "../src/glue.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Semantic actions for the grammar below, each corresponds to an "ARULE".
|
||||||
|
// They must be named act_<rulename>.
|
||||||
|
///
|
||||||
|
|
||||||
|
// helper: return the numeric value of a parsed base64 digit
|
||||||
|
uint8_t bsfdig_value(const HParsedToken *p)
|
||||||
|
{
|
||||||
|
uint8_t value = 0;
|
||||||
|
|
||||||
|
if(p && p->token_type == TT_UINT) {
|
||||||
|
uint8_t c = p->uint;
|
||||||
|
if(c >= 0x40 && c <= 0x5A) // A-Z
|
||||||
|
value = c - 0x41;
|
||||||
|
else if(c >= 0x60 && c <= 0x7A) // a-z
|
||||||
|
value = c - 0x61 + 26;
|
||||||
|
else if(c >= 0x30 && c <= 0x39) // 0-9
|
||||||
|
value = c - 0x30 + 52;
|
||||||
|
else if(c == '+')
|
||||||
|
value = 62;
|
||||||
|
else if(c == '/')
|
||||||
|
value = 63;
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// helper: append a byte value to a sequence
|
||||||
|
#define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b))
|
||||||
|
|
||||||
|
const HParsedToken *act_base64(const HParseResult *p)
|
||||||
|
{
|
||||||
|
assert(p->ast->token_type == TT_SEQUENCE);
|
||||||
|
assert(p->ast->seq->used == 2);
|
||||||
|
assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE);
|
||||||
|
|
||||||
|
// grab b64_3 block sequence
|
||||||
|
// grab and analyze b64 end block (_2 or _1)
|
||||||
|
const HParsedToken *b64_3 = p->ast->seq->elements[0];
|
||||||
|
const HParsedToken *b64_2 = p->ast->seq->elements[1];
|
||||||
|
const HParsedToken *b64_1 = p->ast->seq->elements[1];
|
||||||
|
|
||||||
|
if(b64_2->token_type != TT_SEQUENCE)
|
||||||
|
b64_1 = b64_2 = NULL;
|
||||||
|
else if(b64_2->seq->elements[2]->uint == '=')
|
||||||
|
b64_2 = NULL;
|
||||||
|
else
|
||||||
|
b64_1 = NULL;
|
||||||
|
|
||||||
|
// allocate result sequence
|
||||||
|
HParsedToken *res = H_MAKE_SEQ();
|
||||||
|
|
||||||
|
// concatenate base64_3 blocks
|
||||||
|
for(size_t i=0; i<b64_3->seq->used; i++) {
|
||||||
|
assert(b64_3->seq->elements[i]->token_type == TT_SEQUENCE);
|
||||||
|
HParsedToken **digits = b64_3->seq->elements[i]->seq->elements;
|
||||||
|
|
||||||
|
uint32_t x = bsfdig_value(digits[0]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[1]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[2]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[3]);
|
||||||
|
seq_append_byte(res, (x >> 16) & 0xFF);
|
||||||
|
seq_append_byte(res, (x >> 8) & 0xFF);
|
||||||
|
seq_append_byte(res, x & 0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
// append one trailing base64_2 or _1 block
|
||||||
|
if(b64_2) {
|
||||||
|
HParsedToken **digits = b64_2->seq->elements;
|
||||||
|
uint32_t x = bsfdig_value(digits[0]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[1]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[2]);
|
||||||
|
seq_append_byte(res, (x >> 10) & 0xFF);
|
||||||
|
seq_append_byte(res, (x >> 2) & 0xFF);
|
||||||
|
} else if(b64_1) {
|
||||||
|
HParsedToken **digits = b64_1->seq->elements;
|
||||||
|
uint32_t x = bsfdig_value(digits[0]);
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[1]);
|
||||||
|
seq_append_byte(res, (x >> 4) & 0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
H_ACT_APPLY(act_index0, h_act_index, 0);
|
||||||
|
|
||||||
|
#define act_ws h_act_ignore
|
||||||
|
#define act_document act_index0
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Set up the parser with the grammar to be recognized.
|
||||||
|
///
|
||||||
|
|
||||||
|
const HParser *init_parser(void)
|
||||||
|
{
|
||||||
|
// CORE
|
||||||
|
H_RULE (digit, h_ch_range(0x30, 0x39));
|
||||||
|
H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL));
|
||||||
|
H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6));
|
||||||
|
|
||||||
|
// AUX.
|
||||||
|
H_RULE (plus, h_ch('+'));
|
||||||
|
H_RULE (slash, h_ch('/'));
|
||||||
|
H_RULE (equals, h_ch('='));
|
||||||
|
|
||||||
|
H_RULE (bsfdig, h_choice(alpha, digit, plus, slash, NULL));
|
||||||
|
H_RULE (bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16));
|
||||||
|
H_RULE (bsfdig_2bit, h_in((uint8_t *)"AQgw", 4));
|
||||||
|
H_RULE (base64_3, h_repeat_n(bsfdig, 4));
|
||||||
|
H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL));
|
||||||
|
H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL));
|
||||||
|
H_ARULE(base64, h_sequence(h_many(base64_3),
|
||||||
|
h_optional(h_choice(base64_2,
|
||||||
|
base64_1, NULL)),
|
||||||
|
NULL));
|
||||||
|
|
||||||
|
H_ARULE(ws, h_many(space));
|
||||||
|
H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL));
|
||||||
|
|
||||||
|
// BUG sometimes inputs that should just don't parse.
|
||||||
|
// It *seemed* to happen mostly with things like "bbbbaaaaBA==".
|
||||||
|
// Using less actions seemed to make it less likely.
|
||||||
|
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///
|
||||||
|
// Main routine: print input, parse, print result, return success/failure.
|
||||||
|
///
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
uint8_t input[102400];
|
||||||
|
size_t inputsize;
|
||||||
|
const HParser *parser;
|
||||||
|
const HParseResult *result;
|
||||||
|
|
||||||
|
parser = init_parser();
|
||||||
|
|
||||||
|
inputsize = fread(input, 1, sizeof(input), stdin);
|
||||||
|
fprintf(stderr, "inputsize=%lu\ninput=", inputsize);
|
||||||
|
fwrite(input, 1, inputsize, stderr);
|
||||||
|
result = h_parse(parser, input, inputsize);
|
||||||
|
|
||||||
|
if(result) {
|
||||||
|
fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8);
|
||||||
|
h_pprint(stdout, result->ast, 0, 0);
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
#define HAMMER_DNS_COMMON__H
|
#define HAMMER_DNS_COMMON__H
|
||||||
|
|
||||||
#include "../src/hammer.h"
|
#include "../src/hammer.h"
|
||||||
#include "glue.h"
|
#include "../src/glue.h"
|
||||||
|
|
||||||
const HParser* init_domain();
|
const HParser* init_domain();
|
||||||
const HParser* init_character_string();
|
const HParser* init_character_string();
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,7 @@ HAMMER_PARTS := \
|
||||||
system_allocator.o \
|
system_allocator.o \
|
||||||
benchmark.o \
|
benchmark.o \
|
||||||
compile.o \
|
compile.o \
|
||||||
|
glue.o \
|
||||||
$(PARSERS:%=parsers/%.o) \
|
$(PARSERS:%=parsers/%.o) \
|
||||||
$(BACKENDS:%=backends/%.o)
|
$(BACKENDS:%=backends/%.o)
|
||||||
|
|
||||||
|
|
@ -67,6 +68,7 @@ libhammer.a: $(HAMMER_PARTS)
|
||||||
|
|
||||||
bitreader.o: test_suite.h
|
bitreader.o: test_suite.h
|
||||||
hammer.o: hammer.h
|
hammer.o: hammer.h
|
||||||
|
glue.o: hammer.h glue.h
|
||||||
|
|
||||||
all: libhammer.a
|
all: libhammer.a
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -77,14 +77,18 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
|
||||||
void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
|
void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
|
||||||
if (!rec_detect->head) {
|
if (!rec_detect->head) {
|
||||||
HRecursionHead *some = a_new(HRecursionHead, 1);
|
HRecursionHead *some = a_new(HRecursionHead, 1);
|
||||||
some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL;
|
some->head_parser = p;
|
||||||
|
some->involved_set = h_slist_new(state->arena);
|
||||||
|
some->eval_set = NULL;
|
||||||
rec_detect->head = some;
|
rec_detect->head = some;
|
||||||
}
|
}
|
||||||
assert(state->lr_stack->head != NULL);
|
assert(state->lr_stack->head != NULL);
|
||||||
HLeftRec *lr = state->lr_stack->head->elem;
|
HSlistNode *head = state->lr_stack->head;
|
||||||
while (lr && lr->rule != p) {
|
HLeftRec *lr;
|
||||||
|
while (head && (lr = head->elem)->rule != p) {
|
||||||
lr->head = rec_detect->head;
|
lr->head = rec_detect->head;
|
||||||
h_slist_push(lr->head->involved_set, (void*)lr->rule);
|
h_slist_push(lr->head->involved_set, (void*)lr->rule);
|
||||||
|
head = head->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -101,7 +105,7 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head)
|
||||||
HParseResult *old_res = old_cached->right->result;
|
HParseResult *old_res = old_cached->right->result;
|
||||||
|
|
||||||
// reset the eval_set of the head of the recursion at each beginning of growth
|
// reset the eval_set of the head of the recursion at each beginning of growth
|
||||||
head->eval_set = head->involved_set;
|
head->eval_set = h_slist_copy(head->involved_set);
|
||||||
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
|
||||||
|
|
||||||
if (tmp_res) {
|
if (tmp_res) {
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,26 @@ HSlist* h_slist_new(HArena *arena) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HSlist* h_slist_copy(HSlist *slist) {
|
||||||
|
HSlist *ret = h_slist_new(slist->arena);
|
||||||
|
HSlistNode *head = slist->head;
|
||||||
|
HSlistNode *tail;
|
||||||
|
if (head != NULL) {
|
||||||
|
h_slist_push(ret, head->elem);
|
||||||
|
tail = ret->head;
|
||||||
|
head = head->next;
|
||||||
|
}
|
||||||
|
while (head != NULL) {
|
||||||
|
// append head item to tail in a new node
|
||||||
|
HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode));
|
||||||
|
node->elem = head->elem;
|
||||||
|
node->next = NULL;
|
||||||
|
tail = tail->next = node;
|
||||||
|
head = head->next;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
void* h_slist_pop(HSlist *slist) {
|
void* h_slist_pop(HSlist *slist) {
|
||||||
HSlistNode *head = slist->head;
|
HSlistNode *head = slist->head;
|
||||||
if (!head)
|
if (!head)
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,13 @@ HParsedToken *h_make_seq(HArena *arena)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HParsedToken *h_make_seqn(HArena *arena, size_t n)
|
||||||
|
{
|
||||||
|
HParsedToken *ret = h_make_(arena, TT_SEQUENCE);
|
||||||
|
ret->seq = h_carray_new_sized(arena, n);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
HParsedToken *h_make_bytes(HArena *arena, size_t len)
|
HParsedToken *h_make_bytes(HArena *arena, size_t len)
|
||||||
{
|
{
|
||||||
HParsedToken *ret = h_make_(arena, TT_BYTES);
|
HParsedToken *ret = h_make_(arena, TT_BYTES);
|
||||||
|
|
@ -142,7 +149,7 @@ void h_seq_append(HParsedToken *xs, const HParsedToken *ys)
|
||||||
assert(ys != NULL);
|
assert(ys != NULL);
|
||||||
assert(ys->token_type == TT_SEQUENCE);
|
assert(ys->token_type == TT_SEQUENCE);
|
||||||
|
|
||||||
for(size_t i; i<ys->seq->used; i++)
|
for(size_t i=0; i<ys->seq->used; i++)
|
||||||
h_carray_append(xs->seq, ys->seq->elements[i]);
|
h_carray_append(xs->seq, ys->seq->elements[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -21,11 +21,11 @@
|
||||||
// See the leading comment blocks on the sections below for more details.
|
// See the leading comment blocks on the sections below for more details.
|
||||||
//
|
//
|
||||||
|
|
||||||
#ifndef HAMMER_EXAMPLES_GLUE__H
|
#ifndef HAMMER_GLUE__H
|
||||||
#define HAMMER_EXAMPLES_GLUE__H
|
#define HAMMER_GLUE__H
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "../src/hammer.h"
|
#include "hammer.h"
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
@ -173,6 +173,7 @@ const HParsedToken *h_act_flatten(const HParseResult *p);
|
||||||
|
|
||||||
HParsedToken *h_make(HArena *arena, HTokenType type, void *value);
|
HParsedToken *h_make(HArena *arena, HTokenType type, void *value);
|
||||||
HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence.
|
HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence.
|
||||||
|
HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n.
|
||||||
HParsedToken *h_make_bytes(HArena *arena, size_t len);
|
HParsedToken *h_make_bytes(HArena *arena, size_t len);
|
||||||
HParsedToken *h_make_sint(HArena *arena, int64_t val);
|
HParsedToken *h_make_sint(HArena *arena, int64_t val);
|
||||||
HParsedToken *h_make_uint(HArena *arena, uint64_t val);
|
HParsedToken *h_make_uint(HArena *arena, uint64_t val);
|
||||||
|
|
@ -180,6 +181,7 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
|
||||||
// Standard short-hands to make tokens in an action.
|
// Standard short-hands to make tokens in an action.
|
||||||
#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL)
|
#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL)
|
||||||
#define H_MAKE_SEQ() h_make_seq(p->arena)
|
#define H_MAKE_SEQ() h_make_seq(p->arena)
|
||||||
|
#define H_MAKE_SEQN(N) h_make_seqn(p->arena, N)
|
||||||
#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN)
|
#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN)
|
||||||
#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL)
|
#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL)
|
||||||
#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL)
|
#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL)
|
||||||
|
|
@ -209,6 +209,7 @@ HCountedArray *h_carray_new(HArena * arena);
|
||||||
void h_carray_append(HCountedArray *array, void* item);
|
void h_carray_append(HCountedArray *array, void* item);
|
||||||
|
|
||||||
HSlist* h_slist_new(HArena *arena);
|
HSlist* h_slist_new(HArena *arena);
|
||||||
|
HSlist* h_slist_copy(HSlist *slist);
|
||||||
void* h_slist_pop(HSlist *slist);
|
void* h_slist_pop(HSlist *slist);
|
||||||
void h_slist_push(HSlist *slist, void* item);
|
void h_slist_push(HSlist *slist, void* item);
|
||||||
bool h_slist_find(HSlist *slist, const void* item);
|
bool h_slist_find(HSlist *slist, const void* item);
|
||||||
|
|
|
||||||
|
|
@ -365,6 +365,17 @@ static void test_not(void) {
|
||||||
g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)");
|
g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_leftrec(void) {
|
||||||
|
const HParser *a_ = h_ch('a');
|
||||||
|
|
||||||
|
HParser *lr_ = h_indirect();
|
||||||
|
h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL));
|
||||||
|
|
||||||
|
g_check_parse_ok(lr_, "a", 1, "u0x61");
|
||||||
|
g_check_parse_ok(lr_, "aa", 2, "(u0x61 u0x61)");
|
||||||
|
g_check_parse_ok(lr_, "aaa", 3, "((u0x61 u0x61) u0x61)");
|
||||||
|
}
|
||||||
|
|
||||||
void register_parser_tests(void) {
|
void register_parser_tests(void) {
|
||||||
g_test_add_func("/core/parser/token", test_token);
|
g_test_add_func("/core/parser/token", test_token);
|
||||||
g_test_add_func("/core/parser/ch", test_ch);
|
g_test_add_func("/core/parser/ch", test_ch);
|
||||||
|
|
@ -406,4 +417,5 @@ void register_parser_tests(void) {
|
||||||
g_test_add_func("/core/parser/and", test_and);
|
g_test_add_func("/core/parser/and", test_and);
|
||||||
g_test_add_func("/core/parser/not", test_not);
|
g_test_add_func("/core/parser/not", test_not);
|
||||||
g_test_add_func("/core/parser/ignore", test_ignore);
|
g_test_add_func("/core/parser/ignore", test_ignore);
|
||||||
|
g_test_add_func("/core/parser/leftrec", test_leftrec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue