Finished attr_bool, cleaned up header a little.

This commit is contained in:
Meredith L. Patterson 2012-05-18 12:18:19 +02:00
parent 3d5e9399c4
commit b10a3d8ae9
2 changed files with 175 additions and 52 deletions

View file

@ -759,15 +759,31 @@ parser_t* indirect() {
}
typedef struct {
const parser_t *p;
predicate_t pred;
} attr_bool_t;
static parse_result_t* parse_attr_bool(void *env, parse_state_t *state) {
attr_bool_t *a = (attr_bool_t*)env;
parse_result_t *res = do_parse(a->p, state);
if (res) {
if (a->pred(res))
return res;
else
return NULL;
} else
return NULL;
}
const parser_t* attr_bool(const parser_t* p, attr_bool_t a) { return &unimplemented; }
const parser_t* attr_bool(const parser_t* p, predicate_t pred) {
parser_t *res = g_new(parser_t, 1);
res->fn = parse_attr_bool;
attr_bool_t *env = g_new(attr_bool_t, 1);
env->p = p;
env->pred = pred;
res->env = (void*)env;
return res;
}
const parser_t* and(const parser_t* p) { return &unimplemented; }

View file

@ -74,164 +74,271 @@ typedef struct parse_result {
arena_t arena;
} parse_result_t;
/* Type of an action to apply to an AST, used in the action() parser. */
/**
* Type of an action to apply to an AST, used in the action() parser.
*/
typedef parse_result_t* (*action_t)(parse_result_t *p);
/* Type of a boolean attribute-checking function, used in the attr_bool() parser. */
typedef int (*predicate_t)(parse_result_t *p);
/**
* Type of a boolean attribute-checking function, used in the
* attr_bool() parser. It can be any (user-defined) function that takes
* a parse_result_t and returns true or false.
*/
typedef bool (*predicate_t)(parse_result_t *p);
typedef struct parser {
parse_result_t* (*fn)(void *env, parse_state_t *state);
void *env;
} parser_t;
/**
* Top-level function to call a parser that has been built over some
* piece of input (of known size).
*/
parse_result_t* parse(const parser_t* parser, const uint8_t* input, size_t length);
/* Given a string, returns a parser that parses that string value. */
/**
* Given a string, returns a parser that parses that string value.
*/
const parser_t* token(const uint8_t *str, const size_t len);
/* Given a single character, returns a parser that parses that character. */
/**
* Given a single character, returns a parser that parses that
* character.
*/
const parser_t* ch(const uint8_t c);
/* Given two single-character bounds, lower and upper, returns a parser that parses a single character within the range [lower, upper] (inclusive). */
/**
* Given two single-character bounds, lower and upper, returns a parser
* that parses a single character within the range [lower, upper]
* (inclusive).
*/
const parser_t* range(const uint8_t lower, const uint8_t upper);
/* Returns a parser that parses the specified number of bits. sign == true if signed, false if unsigned. */
/**
* Returns a parser that parses the specified number of bits. sign ==
* true if signed, false if unsigned.
*/
const parser_t* bits(size_t len, bool sign);
/* Returns a parser that parses a signed 8-byte integer value. */
/**
* Returns a parser that parses a signed 8-byte integer value.
*/
const parser_t* int64();
/* Returns a parser that parses a signed 4-byte integer value. */
/**
* Returns a parser that parses a signed 4-byte integer value.
*/
const parser_t* int32();
/* Returns a parser that parses a signed 2-byte integer value. */
/**
* Returns a parser that parses a signed 2-byte integer value.
*/
const parser_t* int16();
/* Returns a parser that parses a signed 1-byte integer value. */
/**
* Returns a parser that parses a signed 1-byte integer value.
*/
const parser_t* int8();
/* Returns a parser that parses an unsigned 8-byte integer value. */
/**
* Returns a parser that parses an unsigned 8-byte integer value.
*/
const parser_t* uint64();
/* Returns a parser that parses an unsigned 4-byte integer value. */
/**
* Returns a parser that parses an unsigned 4-byte integer value.
*/
const parser_t* uint32();
/* Returns a parser that parses an unsigned 2-byte integer value. */
/**
* Returns a parser that parses an unsigned 2-byte integer value.
*/
const parser_t* uint16();
/* Returns a parser that parses an unsigned 1-byte integer value. */
/**
* Returns a parser that parses an unsigned 1-byte integer value.
*/
const parser_t* uint8();
/* Returns a parser that parses a double-precision floating-point value. */
/**
* Returns a parser that parses a double-precision floating-point
* value.
*/
const parser_t* float64();
/* Returns a parser that parses a single-precision floating-point value. */
/**
* Returns a parser that parses a single-precision floating-point
* value.
*/
const parser_t* float32();
/* Given another parser, p, returns a parser that skips any whitespace and then applies p. */
/**
* Given another parser, p, returns a parser that skips any whitespace
* and then applies p.
*/
const parser_t* whitespace(const parser_t* p);
/* Given another parser, p, and a function f, returns a parser that applies p, then applies f to everything in the AST of p's result. */
/**
* Given another parser, p, and a function f, returns a parser that
* applies p, then applies f to everything in the AST of p's result.
*/
const parser_t* action(const parser_t* p, const action_t a);
/* Parse a single character *NOT* in charset */
/**
* Parse a single character *NOT* in the given charset.
*/
const parser_t* not_in(const uint8_t *charset, int length);
/* A no-argument parser that succeeds if there is no more input to parse. */
/**
* A no-argument parser that succeeds if there is no more input to
* parse.
*/
const parser_t* end_p();
/* This parser always fails. */
/**
* This parser always fails.
*/
const parser_t* nothing_p();
/* Given an null-terminated list of parsers, apply each parser in order. The parse succeeds only if all parsers succeed. */
/**
* Given a null-terminated list of parsers, apply each parser in order.
* The parse succeeds only if all parsers succeed.
*/
const parser_t* sequence(const parser_t* p, ...) __attribute__((sentinel));
/* Given an array of parsers, p_array, apply each parser in order. The first parser to succeed is the result; if no parsers succeed, the parse fails. */
/**
* Given an array of parsers, p_array, apply each parser in order. The
* first parser to succeed is the result; if no parsers succeed, the
* parse fails.
*/
const parser_t* choice(const parser_t* p, ...) __attribute__((sentinel));
/* Given two parsers, p1 and p2, this parser succeeds in the following cases:
/**
* Given two parsers, p1 and p2, this parser succeeds in the following
* cases:
* - if p1 succeeds and p2 fails
* - if both succeed but p1's result is as long as or shorter than p2's
*/
const parser_t* butnot(const parser_t* p1, const parser_t* p2);
/* Given two parsers, p1 and p2, this parser succeeds in the following cases:
/**
* Given two parsers, p1 and p2, this parser succeeds in the following
* cases:
* - if p1 succeeds and p2 fails
* - if both succeed but p2's result is shorter than p1's
*/
const parser_t* difference(const parser_t* p1, const parser_t* p2);
/* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or p2 succeed, but not if they both do.
/**
* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or
* p2 succeed, but not if they both do.
*/
const parser_t* xor(const parser_t* p1, const parser_t* p2);
/* Given a parser, p, this parser succeeds for zero or more repetitions of p. */
/**
* Given a parser, p, this parser succeeds for zero or more repetitions
* of p.
*/
const parser_t* many(const parser_t* p);
/* Given a parser, p, this parser succeeds for one or more repetitions of p. */
/**
* Given a parser, p, this parser succeeds for one or more repetitions
* of p.
*/
const parser_t* many1(const parser_t* p);
/* Given a parser, p, this parser succeeds for exactly N repetitions of p. */
/**
* Given a parser, p, this parser succeeds for exactly N repetitions
* of p.
*/
const parser_t* repeat_n(const parser_t* p, const size_t n);
/* Given a parser, p, this parser succeeds with the value p parsed or with an empty result. */
/**
* Given a parser, p, this parser succeeds with the value p parsed or
* with an empty result.
*/
const parser_t* optional(const parser_t* p);
/* Given a parser, p, this parser succeeds if p succeeds, but doesn't include p's result in the result. */
/**
* Given a parser, p, this parser succeeds if p succeeds, but doesn't
* include p's result in the result.
*/
const parser_t* ignore(const parser_t* p);
/* Given a parser, p, and a parser for a separator, sep, this parser matches a (possibly empty) list of things that p can parse, separated by sep.
* For example, if p is repeat1(range('0','9')) and sep is ch(','), sepBy(p, sep) will match a comma-separated list of integers.
/**
* Given a parser, p, and a parser for a separator, sep, this parser
* matches a (possibly empty) list of things that p can parse,
* separated by sep.
* For example, if p is repeat1(range('0','9')) and sep is ch(','),
* sepBy(p, sep) will match a comma-separated list of integers.
*/
const parser_t* sepBy(const parser_t* p, const parser_t* sep);
/* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
/**
* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
* For example, if p is repeat1(range('0','9')) and sep is ch(','), sepBy1(p, sep) will match a comma-separated list of integers.
*/
const parser_t* sepBy1(const parser_t* p, const parser_t* sep);
/* This parser always returns a zero length match, i.e., empty string. */
/**
* This parser always returns a zero length match, i.e., empty string.
*/
const parser_t* epsilon_p();
/* This parser attaches an attribute function, which returns true or false, to a parser. The function is evaluated over the parser's result AST.
/**
* This parser attaches a predicate function, which returns true or
* false, to a parser. The function is evaluated over the parser's
* result.
* The parse only succeeds if the attribute function returns true.
*/
const parser_t* attr_bool(const parser_t* p, const attr_bool_t a);
const parser_t* attr_bool(const parser_t* p, predicate_t pred);
/* The 'and' parser is a predicate. It asserts that a conditional syntax is satisfied, but consumes no input.
/**
* The 'and' parser is a predicate. It asserts that a conditional
* syntax is satisfied, but consumes no input.
* This is useful for lookahead. As an example:
*
* Suppose you already have a parser, hex_p, that parses numbers in hexadecimal format (including the leading '0x'). Then
* Suppose you already have a parser, hex_p, that parses numbers in
* hexadecimal format (including the leading '0x'). Then
* sequence(and(token((const uint8_t*)"0x", 2)), hex_p)
* checks to see whether there is a leading "0x", *does not* consume the "0x", and then applies hex_p to parse the hex-formatted number.
* checks to see whether there is a leading "0x", *does not* consume
* the "0x", and then applies hex_p to parse the hex-formatted number.
*
* 'and' succeeds if p succeeds, and fails if p fails. Like 'ignore', 'and' does not attach a result to the AST.
* 'and' succeeds if p succeeds, and fails if p fails. Like 'ignore',
* 'and' does not attach a result to the AST.
*/
const parser_t* and(const parser_t* p);
/* The 'not' parser is a predicate. It asserts that a conditional syntax is *not* satisfied, and consumes no input.
/**
* The 'not' parser is a predicate. It asserts that a conditional
* syntax is *not* satisfied, and consumes no input.
* As a somewhat contrived example:
*
* Since 'choice' applies its arguments in order, the following parser:
* sequence(ch('a'), choice(ch('+'), token((const uint8_t*)"++"), NULL), ch('b'), NULL)
* will not parse "a++b", because once choice() has succeeded, it will not backtrack and try other alternatives if a later parser in the sequence
* fails.
* Instead, you can force the use of the second alternative by turning the ch('+') alternative into a sequence with not:
* will not parse "a++b", because once choice() has succeeded, it will
* not backtrack and try other alternatives if a later parser in the
* sequence fails.
* Instead, you can force the use of the second alternative by turning
* the ch('+') alternative into a sequence with not:
* sequence(ch('a'), choice(sequence(ch('+'), not(ch('+')), NULL), token((const uint8_t*)"++")), ch('b'), NULL)
* If the input string is "a+b", the first alternative is applied; if the input string is "a++b", the second alternative is applied.
* If the input string is "a+b", the first alternative is applied; if
* the input string is "a++b", the second alternative is applied.
*/
const parser_t* not(const parser_t* p);
/**
* Create a parser that just calls out to another, as yet unknown, parser.
* Create a parser that just calls out to another, as yet unknown,
* parser.
* Note that the inner parser gets bound later, with bind_indirect.
* This can be used to create recursive parsers.
*/
parser_t *indirect();
/**
* Set the inner parser of an indirect. See comments on indirect for details.
* Set the inner parser of an indirect. See comments on indirect for
* details.
*/
void bind_indirect(parser_t* indirect, parser_t* inner);