Merge remote-tracking branch 'mlp/master'
This commit is contained in:
commit
2dd687ea66
5 changed files with 463 additions and 63 deletions
85
examples/dns.c
Normal file
85
examples/dns.c
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
#include "../hammer.h"
|
||||||
|
|
||||||
|
bool is_zero(parse_result_t *p) {
|
||||||
|
return (0 == p->ast->uint);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool validate_dns(parse_result_t *p) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
|
||||||
|
const parser_t dns_header = sequence(bits(16, false), // ID
|
||||||
|
bits(1, false), // QR
|
||||||
|
bits(4, false), // opcode
|
||||||
|
bits(1, false), // AA
|
||||||
|
bits(1, false), // TC
|
||||||
|
bits(1, false), // RD
|
||||||
|
bits(1, false), // RA
|
||||||
|
ignore(attr_bool(bits(3, false), is_zero)), // Z
|
||||||
|
bits(4, false), // RCODE
|
||||||
|
uint16(), // QDCOUNT
|
||||||
|
uint16(), // ANCOUNT
|
||||||
|
uint16(), // NSCOUNT
|
||||||
|
uint16(), // ARCOUNT
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
const parser_t *dns_question = sequence(length_value(uint8(), uint8()), // QNAME
|
||||||
|
uint16(), // QTYPE
|
||||||
|
uint16(), // QCLASS
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
const parser_t *letter = choice(range('a', 'z'),
|
||||||
|
range('A', 'Z'),
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
const parser_t *let_dig = choice(letter,
|
||||||
|
range('0', '9'),
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
const parser_t *ldh_str = many1(choice(let_dig,
|
||||||
|
ch('-'),
|
||||||
|
NULL));
|
||||||
|
|
||||||
|
const parser_t *label = sequence(letter,
|
||||||
|
optional(sequence(optional(ldh_str),
|
||||||
|
let_dig,
|
||||||
|
NULL)),
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* You could write it like this ...
|
||||||
|
* parser_t *indirect_subdomain = indirect();
|
||||||
|
* const parser_t *subdomain = choice(label,
|
||||||
|
* sequence(indirect_subdomain,
|
||||||
|
* ch('.'),
|
||||||
|
* label,
|
||||||
|
* NULL),
|
||||||
|
* NULL);
|
||||||
|
* bind_indirect(indirect_subdomain, subdomain);
|
||||||
|
*
|
||||||
|
* ... but this is easier and equivalent
|
||||||
|
*/
|
||||||
|
|
||||||
|
parser_t *subdomain = sepBy1(label, ch('.'));
|
||||||
|
|
||||||
|
const parser_t *domain = choice(subdomain,
|
||||||
|
ch(' '),
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
const parser_t *dns_rr = sequence(domain, // NAME
|
||||||
|
uint16(), // TYPE
|
||||||
|
uint16(), // CLASS
|
||||||
|
uint32(), // TTL
|
||||||
|
length_value(uint16(), uint8()) // RDLENGTH+RDATA
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
|
||||||
|
const parser_t *dns_message = attr_bool(sequence(dns_header,
|
||||||
|
dns_question,
|
||||||
|
many(dns_rr),
|
||||||
|
end_p(),
|
||||||
|
NULL),
|
||||||
|
validate_dns);
|
||||||
|
}
|
||||||
139
src/hammer.c
139
src/hammer.c
|
|
@ -336,7 +336,31 @@ const parser_t* whitespace(const parser_t* p) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const parser_t* action(const parser_t* p, const action_t a) { return &unimplemented; }
|
typedef struct {
|
||||||
|
const parser_t *p;
|
||||||
|
action_t action;
|
||||||
|
} parse_action_t;
|
||||||
|
|
||||||
|
static parse_result_t* parse_action(void *env, parse_state_t *state) {
|
||||||
|
parse_action_t *a = (parse_action_t*)env;
|
||||||
|
if (a->p && a->action) {
|
||||||
|
parse_result_t *tmp = do_parse(a->p, state);
|
||||||
|
//parsed_token_t *tok = a->action(do_parse(a->p, state));
|
||||||
|
const parsed_token_t *tok = a->action(tmp);
|
||||||
|
return make_result(state, (parsed_token_t*)tok);
|
||||||
|
} else // either the parser's missing or the action's missing
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const parser_t* action(const parser_t* p, const action_t a) {
|
||||||
|
parser_t *res = g_new(parser_t, 1);
|
||||||
|
res->fn = parse_action;
|
||||||
|
parse_action_t *env = g_new(parse_action_t, 1);
|
||||||
|
env->p = p;
|
||||||
|
env->action = a;
|
||||||
|
res->env = (void*)env;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
static parse_result_t* parse_charset(void *env, parse_state_t *state) {
|
static parse_result_t* parse_charset(void *env, parse_state_t *state) {
|
||||||
uint8_t in = read_bits(&state->input_stream, 8, false);
|
uint8_t in = read_bits(&state->input_stream, 8, false);
|
||||||
|
|
@ -783,7 +807,6 @@ const parser_t* epsilon_p() {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static parse_result_t* parse_indirect(void* env, parse_state_t* state) {
|
static parse_result_t* parse_indirect(void* env, parse_state_t* state) {
|
||||||
return do_parse(env, state);
|
return do_parse(env, state);
|
||||||
}
|
}
|
||||||
|
|
@ -798,7 +821,68 @@ parser_t* indirect() {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
const parser_t* attr_bool(const parser_t* p, attr_bool_t a) { return &unimplemented; }
|
typedef struct {
|
||||||
|
const parser_t *p;
|
||||||
|
predicate_t pred;
|
||||||
|
} attr_bool_t;
|
||||||
|
|
||||||
|
static parse_result_t* parse_attr_bool(void *env, parse_state_t *state) {
|
||||||
|
attr_bool_t *a = (attr_bool_t*)env;
|
||||||
|
parse_result_t *res = do_parse(a->p, state);
|
||||||
|
if (res) {
|
||||||
|
if (a->pred(res))
|
||||||
|
return res;
|
||||||
|
else
|
||||||
|
return NULL;
|
||||||
|
} else
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const parser_t* attr_bool(const parser_t* p, predicate_t pred) {
|
||||||
|
parser_t *res = g_new(parser_t, 1);
|
||||||
|
res->fn = parse_attr_bool;
|
||||||
|
attr_bool_t *env = g_new(attr_bool_t, 1);
|
||||||
|
env->p = p;
|
||||||
|
env->pred = pred;
|
||||||
|
res->env = (void*)env;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const parser_t *length;
|
||||||
|
const parser_t *value;
|
||||||
|
} lv_t;
|
||||||
|
|
||||||
|
static parse_result_t* parse_length_value(void *env, parse_state_t *state) {
|
||||||
|
lv_t *lv = (lv_t*)env;
|
||||||
|
parse_result_t *len = do_parse(lv->length, state);
|
||||||
|
if (!len)
|
||||||
|
return NULL;
|
||||||
|
if (len->ast->token_type != TT_UINT)
|
||||||
|
errx(1, "Length parser must return an unsigned integer");
|
||||||
|
parser_t epsilon_local = {
|
||||||
|
.fn = parse_epsilon,
|
||||||
|
.env = NULL
|
||||||
|
};
|
||||||
|
repeat_t repeat = {
|
||||||
|
.p = lv->value,
|
||||||
|
.sep = &epsilon_local,
|
||||||
|
.count = len->ast->uint,
|
||||||
|
.min_p = false
|
||||||
|
};
|
||||||
|
return parse_many(&repeat, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
const parser_t* length_value(const parser_t* length, const parser_t* value) {
|
||||||
|
parser_t *res = g_new(parser_t, 1);
|
||||||
|
res->fn = parse_length_value;
|
||||||
|
lv_t *env = g_new(lv_t, 1);
|
||||||
|
env->length = length;
|
||||||
|
env->value = value;
|
||||||
|
res->env = (void*)env;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
const parser_t* and(const parser_t* p) { return &unimplemented; }
|
const parser_t* and(const parser_t* p) { return &unimplemented; }
|
||||||
|
|
||||||
static parse_result_t* parse_not(void* env, parse_state_t* state) {
|
static parse_result_t* parse_not(void* env, parse_state_t* state) {
|
||||||
|
|
@ -881,7 +965,7 @@ static void test_range(void) {
|
||||||
static void test_int64(void) {
|
static void test_int64(void) {
|
||||||
const parser_t *int64_ = int64();
|
const parser_t *int64_ = int64();
|
||||||
|
|
||||||
g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s-0x200000000");
|
g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s0x200000000");
|
||||||
g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7);
|
g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -962,15 +1046,52 @@ static void test_whitespace(void) {
|
||||||
g_check_parse_failed(whitespace_, "_a", 2);
|
g_check_parse_failed(whitespace_, "_a", 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_result_t* upcase(parse_result_t *p) {
|
#include <ctype.h>
|
||||||
return NULL; // shut compiler up
|
|
||||||
|
const parsed_token_t* upcase(parse_result_t *p) {
|
||||||
|
switch(p->ast->token_type) {
|
||||||
|
case TT_SEQUENCE:
|
||||||
|
{
|
||||||
|
parsed_token_t *ret = a_new_(p->arena, parsed_token_t, 1);
|
||||||
|
counted_array_t *seq = carray_new_sized(p->arena, p->ast->seq->used);
|
||||||
|
ret->token_type = TT_SEQUENCE;
|
||||||
|
for (size_t i=0; i<p->ast->seq->used; ++i) {
|
||||||
|
if (TT_UINT == ((parsed_token_t*)p->ast->seq->elements[i])->token_type) {
|
||||||
|
parsed_token_t *tmp = a_new_(p->arena, parsed_token_t, 1);
|
||||||
|
tmp->token_type = TT_UINT;
|
||||||
|
tmp->uint = toupper(((parsed_token_t*)p->ast->seq->elements[i])->uint);
|
||||||
|
carray_append(seq, tmp);
|
||||||
|
} else {
|
||||||
|
carray_append(seq, p->ast->seq->elements[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret->seq = seq;
|
||||||
|
return (const parsed_token_t*)ret;
|
||||||
|
}
|
||||||
|
case TT_UINT:
|
||||||
|
{
|
||||||
|
parsed_token_t *ret = a_new_(p->arena, parsed_token_t, 1);
|
||||||
|
ret->token_type = TT_UINT;
|
||||||
|
ret->uint = toupper(p->ast->uint);
|
||||||
|
return (const parsed_token_t*)ret;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return p->ast;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_action(void) {
|
static void test_action(void) {
|
||||||
const parser_t *action_ = action(sequence(choice(ch('a'), ch('A'), NULL), choice(ch('b'), ch('B'), NULL), NULL), upcase);
|
const parser_t *action_ = action(sequence(choice(ch('a'),
|
||||||
|
ch('A'),
|
||||||
|
NULL),
|
||||||
|
choice(ch('b'),
|
||||||
|
ch('B'),
|
||||||
|
NULL),
|
||||||
|
NULL),
|
||||||
|
upcase);
|
||||||
|
|
||||||
g_check_parse_ok(action_, "ab", 2, "(u0x41, u0x42)");
|
g_check_parse_ok(action_, "ab", 2, "(u0x41 u0x42)");
|
||||||
g_check_parse_ok(action_, "AB", 2, "(u0x41, u0x42)");
|
g_check_parse_ok(action_, "AB", 2, "(u0x41 u0x42)");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_not_in(void) {
|
static void test_not_in(void) {
|
||||||
|
|
|
||||||
296
src/hammer.h
296
src/hammer.h
|
|
@ -36,6 +36,7 @@ typedef enum token_type {
|
||||||
TT_SINT,
|
TT_SINT,
|
||||||
TT_UINT,
|
TT_UINT,
|
||||||
TT_SEQUENCE,
|
TT_SEQUENCE,
|
||||||
|
TT_USER = 64,
|
||||||
TT_ERR,
|
TT_ERR,
|
||||||
TT_MAX
|
TT_MAX
|
||||||
} token_type_t;
|
} token_type_t;
|
||||||
|
|
@ -59,173 +60,362 @@ typedef struct parsed_token {
|
||||||
double dbl;
|
double dbl;
|
||||||
float flt;
|
float flt;
|
||||||
counted_array_t *seq; // a sequence of parsed_token_t's
|
counted_array_t *seq; // a sequence of parsed_token_t's
|
||||||
|
void *user;
|
||||||
};
|
};
|
||||||
size_t index;
|
size_t index;
|
||||||
char bit_offset;
|
char bit_offset;
|
||||||
} parsed_token_t;
|
} parsed_token_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The result of a successful parse.
|
||||||
/* If a parse fails, the parse result will be NULL.
|
* If a parse fails, the parse result will be NULL.
|
||||||
* If a parse is successful but there's nothing there (i.e., if end_p succeeds) then there's a parse result but its ast is NULL.
|
* If a parse is successful but there's nothing there (i.e., if end_p
|
||||||
|
* succeeds) then there's a parse result but its ast is NULL.
|
||||||
*/
|
*/
|
||||||
typedef struct parse_result {
|
typedef struct parse_result {
|
||||||
const parsed_token_t *ast;
|
const parsed_token_t *ast;
|
||||||
arena_t arena;
|
arena_t arena;
|
||||||
} parse_result_t;
|
} parse_result_t;
|
||||||
|
|
||||||
/* Type of an action to apply to an AST, used in the action() parser. */
|
/**
|
||||||
typedef parse_result_t* (*action_t)(parse_result_t *p);
|
* Type of an action to apply to an AST, used in the action() parser.
|
||||||
|
* It can be any (user-defined) function that takes a parse_result_t*
|
||||||
|
* and returns a parsed_token_t*. (This is so that the user doesn't
|
||||||
|
* have to worry about memory allocation; action() does that for you.)
|
||||||
|
* Note that the tagged union in parsed_token_t* supports user-defined
|
||||||
|
* types, so you can create your own token types (corresponding to,
|
||||||
|
* say, structs) and stuff values for them into the void* in the
|
||||||
|
* tagged union in parsed_token_t.
|
||||||
|
*/
|
||||||
|
typedef const parsed_token_t* (*action_t)(parse_result_t *p);
|
||||||
|
|
||||||
/* Type of a boolean attribute-checking function, used in the attr_bool() parser. */
|
/**
|
||||||
typedef int (*attr_bool_t)(void *env);
|
* Type of a boolean attribute-checking function, used in the
|
||||||
|
* attr_bool() parser. It can be any (user-defined) function that takes
|
||||||
|
* a parse_result_t* and returns true or false.
|
||||||
|
*/
|
||||||
|
typedef bool (*predicate_t)(parse_result_t *p);
|
||||||
|
|
||||||
typedef struct parser {
|
typedef struct parser {
|
||||||
parse_result_t* (*fn)(void *env, parse_state_t *state);
|
parse_result_t* (*fn)(void *env, parse_state_t *state);
|
||||||
void *env;
|
void *env;
|
||||||
} parser_t;
|
} parser_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Top-level function to call a parser that has been built over some
|
||||||
|
* piece of input (of known size).
|
||||||
|
*/
|
||||||
parse_result_t* parse(const parser_t* parser, const uint8_t* input, size_t length);
|
parse_result_t* parse(const parser_t* parser, const uint8_t* input, size_t length);
|
||||||
|
|
||||||
/* Given a string, returns a parser that parses that string value. */
|
/**
|
||||||
|
* Given a string, returns a parser that parses that string value.
|
||||||
|
*
|
||||||
|
* Result token type: TT_BYTES
|
||||||
|
*/
|
||||||
const parser_t* token(const uint8_t *str, const size_t len);
|
const parser_t* token(const uint8_t *str, const size_t len);
|
||||||
|
|
||||||
/* Given a single character, returns a parser that parses that character. */
|
/**
|
||||||
|
* Given a single character, returns a parser that parses that
|
||||||
|
* character.
|
||||||
|
*
|
||||||
|
* Result token type: TT_UINT
|
||||||
|
*/
|
||||||
const parser_t* ch(const uint8_t c);
|
const parser_t* ch(const uint8_t c);
|
||||||
|
|
||||||
/* Given two single-character bounds, lower and upper, returns a parser that parses a single character within the range [lower, upper] (inclusive). */
|
/**
|
||||||
|
* Given two single-character bounds, lower and upper, returns a parser
|
||||||
|
* that parses a single character within the range [lower, upper]
|
||||||
|
* (inclusive).
|
||||||
|
*
|
||||||
|
* Result token type: TT_UINT
|
||||||
|
*/
|
||||||
const parser_t* range(const uint8_t lower, const uint8_t upper);
|
const parser_t* range(const uint8_t lower, const uint8_t upper);
|
||||||
|
|
||||||
/* Returns a parser that parses the specified number of bits. sign == true if signed, false if unsigned. */
|
/**
|
||||||
|
* Returns a parser that parses the specified number of bits. sign ==
|
||||||
|
* true if signed, false if unsigned.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SINT if sign == true, TT_UINT if sign == false
|
||||||
|
*/
|
||||||
const parser_t* bits(size_t len, bool sign);
|
const parser_t* bits(size_t len, bool sign);
|
||||||
|
|
||||||
/* Returns a parser that parses a signed 8-byte integer value. */
|
/**
|
||||||
|
* Returns a parser that parses a signed 8-byte integer value.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SINT
|
||||||
|
*/
|
||||||
const parser_t* int64();
|
const parser_t* int64();
|
||||||
|
|
||||||
/* Returns a parser that parses a signed 4-byte integer value. */
|
/**
|
||||||
|
* Returns a parser that parses a signed 4-byte integer value.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SINT
|
||||||
|
*/
|
||||||
const parser_t* int32();
|
const parser_t* int32();
|
||||||
|
|
||||||
/* Returns a parser that parses a signed 2-byte integer value. */
|
/**
|
||||||
|
* Returns a parser that parses a signed 2-byte integer value.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SINT
|
||||||
|
*/
|
||||||
const parser_t* int16();
|
const parser_t* int16();
|
||||||
|
|
||||||
/* Returns a parser that parses a signed 1-byte integer value. */
|
/**
|
||||||
|
* Returns a parser that parses a signed 1-byte integer value.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SINT
|
||||||
|
*/
|
||||||
const parser_t* int8();
|
const parser_t* int8();
|
||||||
|
|
||||||
/* Returns a parser that parses an unsigned 8-byte integer value. */
|
/**
|
||||||
|
* Returns a parser that parses an unsigned 8-byte integer value.
|
||||||
|
*
|
||||||
|
* Result token type: TT_UINT
|
||||||
|
*/
|
||||||
const parser_t* uint64();
|
const parser_t* uint64();
|
||||||
|
|
||||||
/* Returns a parser that parses an unsigned 4-byte integer value. */
|
/**
|
||||||
|
* Returns a parser that parses an unsigned 4-byte integer value.
|
||||||
|
*
|
||||||
|
* Result token type: TT_UINT
|
||||||
|
*/
|
||||||
const parser_t* uint32();
|
const parser_t* uint32();
|
||||||
|
|
||||||
/* Returns a parser that parses an unsigned 2-byte integer value. */
|
/**
|
||||||
|
* Returns a parser that parses an unsigned 2-byte integer value.
|
||||||
|
*
|
||||||
|
* Result token type: TT_UINT
|
||||||
|
*/
|
||||||
const parser_t* uint16();
|
const parser_t* uint16();
|
||||||
|
|
||||||
/* Returns a parser that parses an unsigned 1-byte integer value. */
|
/**
|
||||||
|
* Returns a parser that parses an unsigned 1-byte integer value.
|
||||||
|
*
|
||||||
|
* Result token type: TT_UINT
|
||||||
|
*/
|
||||||
const parser_t* uint8();
|
const parser_t* uint8();
|
||||||
|
|
||||||
/* Given another parser, p, returns a parser that skips any whitespace and then applies p. */
|
/**
|
||||||
|
* Given another parser, p, returns a parser that skips any whitespace
|
||||||
|
* and then applies p.
|
||||||
|
*
|
||||||
|
* Result token type: p's result type
|
||||||
|
*/
|
||||||
const parser_t* whitespace(const parser_t* p);
|
const parser_t* whitespace(const parser_t* p);
|
||||||
|
|
||||||
/* Given another parser, p, and a function f, returns a parser that applies p, then applies f to everything in the AST of p's result. */
|
/**
|
||||||
|
* Given another parser, p, and a function f, returns a parser that
|
||||||
|
* applies p, then applies f to everything in the AST of p's result.
|
||||||
|
*
|
||||||
|
* Result token type: any
|
||||||
|
*/
|
||||||
const parser_t* action(const parser_t* p, const action_t a);
|
const parser_t* action(const parser_t* p, const action_t a);
|
||||||
|
|
||||||
/* Parse a single character *NOT* in charset */
|
/**
|
||||||
|
* Parse a single character *NOT* in the given charset.
|
||||||
|
*
|
||||||
|
* Result token type: TT_UINT
|
||||||
|
*/
|
||||||
const parser_t* not_in(const uint8_t *charset, int length);
|
const parser_t* not_in(const uint8_t *charset, int length);
|
||||||
|
|
||||||
/* A no-argument parser that succeeds if there is no more input to parse. */
|
/**
|
||||||
|
* A no-argument parser that succeeds if there is no more input to
|
||||||
|
* parse.
|
||||||
|
*
|
||||||
|
* Result token type: None. The parse_result_t exists but its AST is NULL.
|
||||||
|
*/
|
||||||
const parser_t* end_p();
|
const parser_t* end_p();
|
||||||
|
|
||||||
/* This parser always fails. */
|
/**
|
||||||
|
* This parser always fails.
|
||||||
|
*
|
||||||
|
* Result token type: NULL. Always.
|
||||||
|
*/
|
||||||
const parser_t* nothing_p();
|
const parser_t* nothing_p();
|
||||||
|
|
||||||
/* Given an null-terminated list of parsers, apply each parser in order. The parse succeeds only if all parsers succeed. */
|
/**
|
||||||
|
* Given a null-terminated list of parsers, apply each parser in order.
|
||||||
|
* The parse succeeds only if all parsers succeed.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SEQUENCE
|
||||||
|
*/
|
||||||
const parser_t* sequence(const parser_t* p, ...) __attribute__((sentinel));
|
const parser_t* sequence(const parser_t* p, ...) __attribute__((sentinel));
|
||||||
|
|
||||||
/* Given an array of parsers, p_array, apply each parser in order. The first parser to succeed is the result; if no parsers succeed, the parse fails. */
|
/**
|
||||||
|
* Given an array of parsers, p_array, apply each parser in order. The
|
||||||
|
* first parser to succeed is the result; if no parsers succeed, the
|
||||||
|
* parse fails.
|
||||||
|
*
|
||||||
|
* Result token type: The type of the first successful parser's result.
|
||||||
|
*/
|
||||||
const parser_t* choice(const parser_t* p, ...) __attribute__((sentinel));
|
const parser_t* choice(const parser_t* p, ...) __attribute__((sentinel));
|
||||||
|
|
||||||
/* Given two parsers, p1 and p2, this parser succeeds in the following cases:
|
/**
|
||||||
|
* Given two parsers, p1 and p2, this parser succeeds in the following
|
||||||
|
* cases:
|
||||||
* - if p1 succeeds and p2 fails
|
* - if p1 succeeds and p2 fails
|
||||||
* - if both succeed but p1's result is as long as or shorter than p2's
|
* - if both succeed but p1's result is as long as or shorter than p2's
|
||||||
|
*
|
||||||
|
* Result token type: p1's result type.
|
||||||
*/
|
*/
|
||||||
const parser_t* butnot(const parser_t* p1, const parser_t* p2);
|
const parser_t* butnot(const parser_t* p1, const parser_t* p2);
|
||||||
|
|
||||||
/* Given two parsers, p1 and p2, this parser succeeds in the following cases:
|
/**
|
||||||
|
* Given two parsers, p1 and p2, this parser succeeds in the following
|
||||||
|
* cases:
|
||||||
* - if p1 succeeds and p2 fails
|
* - if p1 succeeds and p2 fails
|
||||||
* - if both succeed but p2's result is shorter than p1's
|
* - if both succeed but p2's result is shorter than p1's
|
||||||
|
*
|
||||||
|
* Result token type: p1's result type.
|
||||||
*/
|
*/
|
||||||
const parser_t* difference(const parser_t* p1, const parser_t* p2);
|
const parser_t* difference(const parser_t* p1, const parser_t* p2);
|
||||||
|
|
||||||
/* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or p2 succeed, but not if they both do.
|
/**
|
||||||
|
* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or
|
||||||
|
* p2 succeed, but not if they both do.
|
||||||
|
*
|
||||||
|
* Result token type: The type of the result of whichever parser succeeded.
|
||||||
*/
|
*/
|
||||||
const parser_t* xor(const parser_t* p1, const parser_t* p2);
|
const parser_t* xor(const parser_t* p1, const parser_t* p2);
|
||||||
|
|
||||||
/* Given a parser, p, this parser succeeds for zero or more repetitions of p. */
|
/**
|
||||||
|
* Given a parser, p, this parser succeeds for zero or more repetitions
|
||||||
|
* of p.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SEQUENCE
|
||||||
|
*/
|
||||||
const parser_t* many(const parser_t* p);
|
const parser_t* many(const parser_t* p);
|
||||||
|
|
||||||
/* Given a parser, p, this parser succeeds for one or more repetitions of p. */
|
/**
|
||||||
|
* Given a parser, p, this parser succeeds for one or more repetitions
|
||||||
|
* of p.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SEQUENCE
|
||||||
|
*/
|
||||||
const parser_t* many1(const parser_t* p);
|
const parser_t* many1(const parser_t* p);
|
||||||
|
|
||||||
/* Given a parser, p, this parser succeeds for exactly N repetitions of p. */
|
/**
|
||||||
|
* Given a parser, p, this parser succeeds for exactly N repetitions
|
||||||
|
* of p.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SEQUENCE
|
||||||
|
*/
|
||||||
const parser_t* repeat_n(const parser_t* p, const size_t n);
|
const parser_t* repeat_n(const parser_t* p, const size_t n);
|
||||||
|
|
||||||
/* Given a parser, p, this parser succeeds with the value p parsed or with an empty result. */
|
/**
|
||||||
|
* Given a parser, p, this parser succeeds with the value p parsed or
|
||||||
|
* with an empty result.
|
||||||
|
*
|
||||||
|
* Result token type: If p succeeded, the type of its result; if not, TT_NONE.
|
||||||
|
*/
|
||||||
const parser_t* optional(const parser_t* p);
|
const parser_t* optional(const parser_t* p);
|
||||||
|
|
||||||
/* Given a parser, p, this parser succeeds if p succeeds, but doesn't include p's result in the result. */
|
/**
|
||||||
|
* Given a parser, p, this parser succeeds if p succeeds, but doesn't
|
||||||
|
* include p's result in the result.
|
||||||
|
*
|
||||||
|
* Result token type: None. The parse_result_t exists but its AST is NULL.
|
||||||
|
*/
|
||||||
const parser_t* ignore(const parser_t* p);
|
const parser_t* ignore(const parser_t* p);
|
||||||
|
|
||||||
/* Given a parser, p, and a parser for a separator, sep, this parser matches a (possibly empty) list of things that p can parse, separated by sep.
|
/**
|
||||||
* For example, if p is repeat1(range('0','9')) and sep is ch(','), sepBy(p, sep) will match a comma-separated list of integers.
|
* Given a parser, p, and a parser for a separator, sep, this parser
|
||||||
|
* matches a (possibly empty) list of things that p can parse,
|
||||||
|
* separated by sep.
|
||||||
|
* For example, if p is repeat1(range('0','9')) and sep is ch(','),
|
||||||
|
* sepBy(p, sep) will match a comma-separated list of integers.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
const parser_t* sepBy(const parser_t* p, const parser_t* sep);
|
const parser_t* sepBy(const parser_t* p, const parser_t* sep);
|
||||||
|
|
||||||
/* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
|
/**
|
||||||
|
* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
|
||||||
* For example, if p is repeat1(range('0','9')) and sep is ch(','), sepBy1(p, sep) will match a comma-separated list of integers.
|
* For example, if p is repeat1(range('0','9')) and sep is ch(','), sepBy1(p, sep) will match a comma-separated list of integers.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
const parser_t* sepBy1(const parser_t* p, const parser_t* sep);
|
const parser_t* sepBy1(const parser_t* p, const parser_t* sep);
|
||||||
|
|
||||||
/* This parser always returns a zero length match, i.e., empty string. */
|
/**
|
||||||
|
* This parser always returns a zero length match, i.e., empty string.
|
||||||
|
*
|
||||||
|
* Result token type: None. The parse_result_t exists but its AST is NULL.
|
||||||
|
*/
|
||||||
const parser_t* epsilon_p();
|
const parser_t* epsilon_p();
|
||||||
|
|
||||||
/* This parser attaches an attribute function, which returns true or false, to a parser. The function is evaluated over the parser's result AST.
|
/**
|
||||||
* The parse only succeeds if the attribute function returns true.
|
* This parser applies its first argument to read an unsigned integer
|
||||||
|
* value, then applies its second argument that many times. length
|
||||||
|
* should parse an unsigned integer value; this is checked at runtime.
|
||||||
|
* Specifically, the token_type of the returned token must be TT_UINT.
|
||||||
|
* In future we might relax this to include TT_USER but don't count on it.
|
||||||
|
*
|
||||||
|
* Result token type: TT_SEQUENCE
|
||||||
*/
|
*/
|
||||||
const parser_t* attr_bool(const parser_t* p, const attr_bool_t a);
|
const parser_t* length_value(const parser_t* length, const parser_t* value);
|
||||||
|
|
||||||
/* The 'and' parser is a predicate. It asserts that a conditional syntax is satisfied, but consumes no input.
|
/**
|
||||||
|
* This parser attaches a predicate function, which returns true or
|
||||||
|
* false, to a parser. The function is evaluated over the parser's
|
||||||
|
* result.
|
||||||
|
* The parse only succeeds if the attribute function returns true.
|
||||||
|
*
|
||||||
|
* Result token type: p's result type if pred succeeded, NULL otherwise.
|
||||||
|
*/
|
||||||
|
const parser_t* attr_bool(const parser_t* p, predicate_t pred);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The 'and' parser asserts that a conditional syntax is satisfied,
|
||||||
|
* but doesn't consume that conditional syntax.
|
||||||
* This is useful for lookahead. As an example:
|
* This is useful for lookahead. As an example:
|
||||||
*
|
*
|
||||||
* Suppose you already have a parser, hex_p, that parses numbers in hexadecimal format (including the leading '0x'). Then
|
* Suppose you already have a parser, hex_p, that parses numbers in
|
||||||
|
* hexadecimal format (including the leading '0x'). Then
|
||||||
* sequence(and(token((const uint8_t*)"0x", 2)), hex_p)
|
* sequence(and(token((const uint8_t*)"0x", 2)), hex_p)
|
||||||
* checks to see whether there is a leading "0x", *does not* consume the "0x", and then applies hex_p to parse the hex-formatted number.
|
* checks to see whether there is a leading "0x", *does not* consume
|
||||||
|
* the "0x", and then applies hex_p to parse the hex-formatted number.
|
||||||
*
|
*
|
||||||
* 'and' succeeds if p succeeds, and fails if p fails. Like 'ignore', 'and' does not attach a result to the AST.
|
* 'and' succeeds if p succeeds, and fails if p fails.
|
||||||
|
*
|
||||||
|
* Result token type: None. The parse_result_t exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
const parser_t* and(const parser_t* p);
|
const parser_t* and(const parser_t* p);
|
||||||
|
|
||||||
/* The 'not' parser is a predicate. It asserts that a conditional syntax is *not* satisfied, and consumes no input.
|
/**
|
||||||
|
* The 'not' parser asserts that a conditional syntax is *not*
|
||||||
|
* satisfied, but doesn't consume that conditional syntax.
|
||||||
* As a somewhat contrived example:
|
* As a somewhat contrived example:
|
||||||
*
|
*
|
||||||
* Since 'choice' applies its arguments in order, the following parser:
|
* Since 'choice' applies its arguments in order, the following parser:
|
||||||
* sequence(ch('a'), choice(ch('+'), token((const uint8_t*)"++"), NULL), ch('b'), NULL)
|
* sequence(ch('a'), choice(ch('+'), token((const uint8_t*)"++"), NULL), ch('b'), NULL)
|
||||||
* will not parse "a++b", because once choice() has succeeded, it will not backtrack and try other alternatives if a later parser in the sequence
|
* will not parse "a++b", because once choice() has succeeded, it will
|
||||||
* fails.
|
* not backtrack and try other alternatives if a later parser in the
|
||||||
* Instead, you can force the use of the second alternative by turning the ch('+') alternative into a sequence with not:
|
* sequence fails.
|
||||||
|
* Instead, you can force the use of the second alternative by turning
|
||||||
|
* the ch('+') alternative into a sequence with not:
|
||||||
* sequence(ch('a'), choice(sequence(ch('+'), not(ch('+')), NULL), token((const uint8_t*)"++")), ch('b'), NULL)
|
* sequence(ch('a'), choice(sequence(ch('+'), not(ch('+')), NULL), token((const uint8_t*)"++")), ch('b'), NULL)
|
||||||
* If the input string is "a+b", the first alternative is applied; if the input string is "a++b", the second alternative is applied.
|
* If the input string is "a+b", the first alternative is applied; if
|
||||||
|
* the input string is "a++b", the second alternative is applied.
|
||||||
|
*
|
||||||
|
* Result token type: None. The parse_result_t exists but its AST is NULL.
|
||||||
*/
|
*/
|
||||||
const parser_t* not(const parser_t* p);
|
const parser_t* not(const parser_t* p);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a parser that just calls out to another, as yet unknown, parser.
|
* Create a parser that just calls out to another, as yet unknown,
|
||||||
|
* parser.
|
||||||
* Note that the inner parser gets bound later, with bind_indirect.
|
* Note that the inner parser gets bound later, with bind_indirect.
|
||||||
* This can be used to create recursive parsers.
|
* This can be used to create recursive parsers.
|
||||||
|
*
|
||||||
|
* Result token type: the type of whatever parser is bound to it with
|
||||||
|
* bind_indirect().
|
||||||
*/
|
*/
|
||||||
parser_t *indirect();
|
parser_t *indirect();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the inner parser of an indirect. See comments on indirect for details.
|
* Set the inner parser of an indirect. See comments on indirect for
|
||||||
|
* details.
|
||||||
*/
|
*/
|
||||||
void bind_indirect(parser_t* indirect, parser_t* inner);
|
void bind_indirect(parser_t* indirect, parser_t* inner);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
#else
|
#else
|
||||||
#define assert_message(check, message) do { \
|
#define assert_message(check, message) do { \
|
||||||
if (!(check)) \
|
if (!(check)) \
|
||||||
errx(1, "Assertation failed (programmer error): %s", message); \
|
errx(1, "Assertion failed (programmer error): %s", message); \
|
||||||
} while(0)
|
} while(0)
|
||||||
#endif
|
#endif
|
||||||
#define false 0
|
#define false 0
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,10 @@ static inline void append_buf_c(struct result_buf *buf, char v) {
|
||||||
static void unamb_sub(const parsed_token_t* tok, struct result_buf *buf) {
|
static void unamb_sub(const parsed_token_t* tok, struct result_buf *buf) {
|
||||||
char* tmpbuf;
|
char* tmpbuf;
|
||||||
int len;
|
int len;
|
||||||
|
if (!tok) {
|
||||||
|
append_buf(buf, "NULL", 4);
|
||||||
|
return;
|
||||||
|
}
|
||||||
switch (tok->token_type) {
|
switch (tok->token_type) {
|
||||||
case TT_NONE:
|
case TT_NONE:
|
||||||
append_buf(buf, "null", 4);
|
append_buf(buf, "null", 4);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue