Merge remote-tracking branch 'tq/master' into LL such that it compiles

Conflicts:
	src/Makefile
	src/backends/packrat.c
	src/compile.c
	src/hammer.h
	src/internal.h
	src/parsers/action.c
	src/parsers/and.c
	src/parsers/attr_bool.c
	src/parsers/bits.c
	src/parsers/butnot.c
	src/parsers/ch.c
	src/parsers/charset.c
	src/parsers/choice.c
	src/parsers/difference.c
	src/parsers/end.c
	src/parsers/epsilon.c
	src/parsers/ignore.c
	src/parsers/ignoreseq.c
	src/parsers/indirect.c
	src/parsers/int_range.c
	src/parsers/many.c
	src/parsers/not.c
	src/parsers/nothing.c
	src/parsers/optional.c
	src/parsers/sequence.c
	src/parsers/token.c
	src/parsers/unimplemented.c
	src/parsers/whitespace.c
	src/parsers/xor.c
This commit is contained in:
Sven M. Hallberg 2013-05-11 19:04:59 +02:00
commit c64a4e435e
46 changed files with 1289 additions and 263 deletions

View file

@ -34,8 +34,11 @@ typedef struct HParseState_ HParseState;
typedef enum HParserBackend_ {
PB_MIN = 0,
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
PB_LL,
PB_MAX
PB_REGULAR, //
PB_LLk, //
PB_LALR, // Not Implemented
PB_GLR, // Not Implemented
PB_MAX = PB_LLk
} HParserBackend;
typedef enum HTokenType_ {
@ -44,6 +47,7 @@ typedef enum HTokenType_ {
TT_SINT,
TT_UINT,
TT_SEQUENCE,
TT_RESERVED_1, // reserved for backend-specific internal use
TT_USER = 64,
TT_ERR,
TT_MAX
@ -75,7 +79,9 @@ typedef struct HParsedToken_ {
} HParsedToken;
/**
* The result of a successful parse.
* The result of a successful parse. Note that this may reference the
* input string.
*
* If a parse fails, the parse result will be NULL.
* If a parse is successful but there's nothing there (i.e., if end_p
* succeeds) then there's a parse result but its ast is NULL.
@ -111,12 +117,14 @@ typedef const HParsedToken* (*HAction)(const HParseResult *p);
*/
typedef bool (*HPredicate)(HParseResult *p);
typedef struct HParserVtable_ HParserVtable;
typedef struct HCFChoice_ HCFChoice;
typedef struct HRVMProg_ HRVMProg;
typedef struct HParserVtable_ HParserVtable;
typedef struct HParser_ {
const HParserVtable *vtable;
HParserBackend backend;
void* backend_data;
void *env;
void *data; /* e.g., parse tables */
HCFChoice *desugared; /* if the parser can be desugared, its desugared form */
@ -191,7 +199,7 @@ HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* inp
*
* Result token type: TT_BYTES
*/
HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len);
HAMMER_FN_DECL(HParser*, h_token, const uint8_t *str, const size_t len);
/**
* Given a single character, returns a parser that parses that
@ -199,7 +207,7 @@ HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len);
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c);
HAMMER_FN_DECL(HParser*, h_ch, const uint8_t c);
/**
* Given two single-character bounds, lower and upper, returns a parser
@ -208,14 +216,14 @@ HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c);
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL(const HParser*, h_ch_range, const uint8_t lower, const uint8_t upper);
HAMMER_FN_DECL(HParser*, h_ch_range, const uint8_t lower, const uint8_t upper);
/**
* Given an integer parser, p, and two integer bounds, lower and upper,
* returns a parser that parses an integral value within the range
* [lower, upper] (inclusive).
*/
HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper);
HAMMER_FN_DECL(HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper);
/**
* Returns a parser that parses the specified number of bits. sign ==
@ -223,63 +231,63 @@ HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lowe
*
* Result token type: TT_SINT if sign == true, TT_UINT if sign == false
*/
HAMMER_FN_DECL(const HParser*, h_bits, size_t len, bool sign);
HAMMER_FN_DECL(HParser*, h_bits, size_t len, bool sign);
/**
* Returns a parser that parses a signed 8-byte integer value.
*
* Result token type: TT_SINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_int64);
HAMMER_FN_DECL_NOARG(HParser*, h_int64);
/**
* Returns a parser that parses a signed 4-byte integer value.
*
* Result token type: TT_SINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_int32);
HAMMER_FN_DECL_NOARG(HParser*, h_int32);
/**
* Returns a parser that parses a signed 2-byte integer value.
*
* Result token type: TT_SINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_int16);
HAMMER_FN_DECL_NOARG(HParser*, h_int16);
/**
* Returns a parser that parses a signed 1-byte integer value.
*
* Result token type: TT_SINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_int8);
HAMMER_FN_DECL_NOARG(HParser*, h_int8);
/**
* Returns a parser that parses an unsigned 8-byte integer value.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_uint64);
HAMMER_FN_DECL_NOARG(HParser*, h_uint64);
/**
* Returns a parser that parses an unsigned 4-byte integer value.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_uint32);
HAMMER_FN_DECL_NOARG(HParser*, h_uint32);
/**
* Returns a parser that parses an unsigned 2-byte integer value.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_uint16);
HAMMER_FN_DECL_NOARG(HParser*, h_uint16);
/**
* Returns a parser that parses an unsigned 1-byte integer value.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_uint8);
HAMMER_FN_DECL_NOARG(HParser*, h_uint8);
/**
* Given another parser, p, returns a parser that skips any whitespace
@ -287,7 +295,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_uint8);
*
* Result token type: p's result type
*/
HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p);
HAMMER_FN_DECL(HParser*, h_whitespace, const HParser* p);
/**
* Given two parsers, p and q, returns a parser that parses them in
@ -295,7 +303,7 @@ HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p);
*
* Result token type: p's result type
*/
HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q);
HAMMER_FN_DECL(HParser*, h_left, const HParser* p, const HParser* q);
/**
* Given two parsers, p and q, returns a parser that parses them in
@ -303,7 +311,7 @@ HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q);
*
* Result token type: q's result type
*/
HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q);
HAMMER_FN_DECL(HParser*, h_right, const HParser* p, const HParser* q);
/**
* Given three parsers, p, x, and q, returns a parser that parses them in
@ -311,7 +319,7 @@ HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q);
*
* Result token type: x's result type
*/
HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q);
HAMMER_FN_DECL(HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q);
/**
* Given another parser, p, and a function f, returns a parser that
@ -319,21 +327,21 @@ HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, con
*
* Result token type: any
*/
HAMMER_FN_DECL(const HParser*, h_action, const HParser* p, const HAction a);
HAMMER_FN_DECL(HParser*, h_action, const HParser* p, const HAction a);
/**
* Parse a single character in the given charset.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL(const HParser*, h_in, const uint8_t *charset, size_t length);
HAMMER_FN_DECL(HParser*, h_in, const uint8_t *charset, size_t length);
/**
* Parse a single character *NOT* in the given charset.
*
* Result token type: TT_UINT
*/
HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length);
HAMMER_FN_DECL(HParser*, h_not_in, const uint8_t *charset, size_t length);
/**
* A no-argument parser that succeeds if there is no more input to
@ -341,14 +349,14 @@ HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_end_p);
HAMMER_FN_DECL_NOARG(HParser*, h_end_p);
/**
* This parser always fails.
*
* Result token type: NULL. Always.
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p);
HAMMER_FN_DECL_NOARG(HParser*, h_nothing_p);
/**
* Given a null-terminated list of parsers, apply each parser in order.
@ -356,7 +364,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequence, const HParser* p);
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, const HParser* p);
/**
* Given an array of parsers, p_array, apply each parser in order. The
@ -365,7 +373,7 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequenc
*
* Result token type: The type of the first successful parser's result.
*/
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice, const HParser* p);
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, const HParser* p);
/**
* Given two parsers, p1 and p2, this parser succeeds in the following
@ -375,7 +383,7 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice,
*
* Result token type: p1's result type.
*/
HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2);
HAMMER_FN_DECL(HParser*, h_butnot, const HParser* p1, const HParser* p2);
/**
* Given two parsers, p1 and p2, this parser succeeds in the following
@ -385,7 +393,7 @@ HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2);
*
* Result token type: p1's result type.
*/
HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p2);
HAMMER_FN_DECL(HParser*, h_difference, const HParser* p1, const HParser* p2);
/**
* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or
@ -393,7 +401,7 @@ HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p
*
* Result token type: The type of the result of whichever parser succeeded.
*/
HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2);
HAMMER_FN_DECL(HParser*, h_xor, const HParser* p1, const HParser* p2);
/**
* Given a parser, p, this parser succeeds for zero or more repetitions
@ -401,7 +409,7 @@ HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_many, const HParser* p);
HAMMER_FN_DECL(HParser*, h_many, const HParser* p);
/**
* Given a parser, p, this parser succeeds for one or more repetitions
@ -409,7 +417,7 @@ HAMMER_FN_DECL(const HParser*, h_many, const HParser* p);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p);
HAMMER_FN_DECL(HParser*, h_many1, const HParser* p);
/**
* Given a parser, p, this parser succeeds for exactly N repetitions
@ -417,7 +425,7 @@ HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n);
HAMMER_FN_DECL(HParser*, h_repeat_n, const HParser* p, const size_t n);
/**
* Given a parser, p, this parser succeeds with the value p parsed or
@ -425,7 +433,7 @@ HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n);
*
* Result token type: If p succeeded, the type of its result; if not, TT_NONE.
*/
HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p);
HAMMER_FN_DECL(HParser*, h_optional, const HParser* p);
/**
* Given a parser, p, this parser succeeds if p succeeds, but doesn't
@ -433,7 +441,7 @@ HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p);
HAMMER_FN_DECL(HParser*, h_ignore, const HParser* p);
/**
* Given a parser, p, and a parser for a separator, sep, this parser
@ -444,7 +452,7 @@ HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep);
HAMMER_FN_DECL(HParser*, h_sepBy, const HParser* p, const HParser* sep);
/**
* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element.
@ -452,14 +460,14 @@ HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_sepBy1, const HParser* p, const HParser* sep);
HAMMER_FN_DECL(HParser*, h_sepBy1, const HParser* p, const HParser* sep);
/**
* This parser always returns a zero length match, i.e., empty string.
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p);
HAMMER_FN_DECL_NOARG(HParser*, h_epsilon_p);
/**
* This parser applies its first argument to read an unsigned integer
@ -470,7 +478,7 @@ HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p);
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HParser* value);
HAMMER_FN_DECL(HParser*, h_length_value, const HParser* length, const HParser* value);
/**
* This parser attaches a predicate function, which returns true or
@ -485,7 +493,7 @@ HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HPar
*
* Result token type: p's result type if pred succeeded, NULL otherwise.
*/
HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred);
HAMMER_FN_DECL(HParser*, h_attr_bool, const HParser* p, HPredicate pred);
/**
* The 'and' parser asserts that a conditional syntax is satisfied,
@ -502,7 +510,7 @@ HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL(const HParser*, h_and, const HParser* p);
HAMMER_FN_DECL(HParser*, h_and, const HParser* p);
/**
* The 'not' parser asserts that a conditional syntax is *not*
@ -522,7 +530,7 @@ HAMMER_FN_DECL(const HParser*, h_and, const HParser* p);
*
* Result token type: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL(const HParser*, h_not, const HParser* p);
HAMMER_FN_DECL(HParser*, h_not, const HParser* p);
/**
* Create a parser that just calls out to another, as yet unknown,
@ -565,7 +573,7 @@ HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent
*
* Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
*/
HAMMER_FN_DECL(int, h_compile, const HParser* parser, HParserBackend backend, const void* params);
HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params);
/**
* TODO: Document me
@ -590,7 +598,7 @@ const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
void h_bit_writer_free(HBitWriter* w);
// {{{ Benchmark functions
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, const HParser* parser, HParserTestcase* testcases);
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, HParser* parser, HParserTestcase* testcases);
void h_benchmark_report(FILE* stream, HBenchmarkResults* results);
void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results);
// }}}