2012-04-23 00:02:42 +01:00
/* Parser combinators for binary formats.
* Copyright ( C ) 2012 Meredith L . Patterson , Dan " TQ " Hirsch
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation , version 2.
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 , USA .
*/
2012-04-23 19:39:44 +01:00
# ifndef HAMMER_HAMMER__H
# define HAMMER_HAMMER__H
2012-04-22 04:47:08 +01:00
# include <glib.h>
# include <stdint.h>
/* The state of the parser.
*
* Members :
* input - the entire string being parsed
* index - current position in input
* length - size of input
* THE FOLLOWING DESCRIBES HOW JSPARSE DOES IT . OUR MILEAGE MAY VARY .
2012-04-22 15:33:10 +01:00
* cache - a hash table describing the state of the parse , including partial parse_results .
* It ' s actually a hash table of [ parser_id , hash_table [ index , parse_result ] ] ,
2012-04-22 04:47:08 +01:00
* where the parser id is incremented as the parse goes along ( parsers that have
* already been applied once don ' t get a new parser_id . . . but the global variable
* still increments ? not sure why that is , need to debug some ) , and the locations
* at which it ' s been applied are memoized .
*
*/
2012-04-23 19:39:44 +01:00
# define BYTE_BIG_ENDIAN 0x1
# define BIT_BIG_ENDIAN 0x2
2012-04-29 01:45:52 +01:00
# define BIT_LITTLE_ENDIAN 0x0
# define BYTE_LITTLE_ENDIAN 0x0
2012-04-23 19:39:44 +01:00
2012-04-29 01:45:52 +01:00
typedef struct input_stream {
// This should be considered to be a really big value type.
2012-04-22 04:47:08 +01:00
const uint8_t * input ;
size_t index ;
size_t length ;
2012-04-23 19:39:44 +01:00
char bit_offset ;
char endianness ;
2012-04-29 01:45:52 +01:00
} input_stream_t ;
typedef struct parse_state {
GHashTable * cache ;
input_stream_t input_stream ;
2012-04-22 23:40:25 +01:00
} parse_state_t ;
2012-04-22 04:47:08 +01:00
2012-05-01 00:33:47 +01:00
typedef struct parsed_token {
const uint8_t * token ;
size_t len ;
} parsed_token_t ;
2012-04-22 23:40:25 +01:00
typedef struct parse_result {
2012-04-22 04:47:08 +01:00
const GSequence * ast ;
2012-04-22 23:40:25 +01:00
} parse_result_t ;
typedef struct parser {
2012-05-01 00:33:47 +01:00
parse_result_t * ( * fn ) ( void * env , parse_state_t * state ) ;
void * env ;
2012-04-22 23:40:25 +01:00
} parser_t ;
2012-04-30 03:44:10 +01:00
parse_result_t * parse ( const parser_t * parser , const uint8_t * input ) ;
2012-04-22 23:40:25 +01:00
2012-05-01 00:33:47 +01:00
/* Given a string, returns a parser that parses that string value. */
const parser_t * token ( const uint8_t * str , const size_t len ) ;
/* Given a single character, returns a parser that parses that character. */
2012-04-30 03:44:10 +01:00
const parser_t * ch ( const uint8_t c ) ;
2012-05-01 00:33:47 +01:00
/* Given two single-character bounds, lower and upper, returns a parser that parses a single character within the range [lower, upper] (inclusive). */
2012-04-30 03:44:10 +01:00
const parser_t * range ( const uint8_t lower , const uint8_t upper ) ;
2012-05-01 00:33:47 +01:00
/* Given another parser, p, returns a parser that skips any whitespace and then applies p. */
2012-04-30 03:44:10 +01:00
const parser_t * whitespace ( const parser_t * p ) ;
2012-05-01 00:33:47 +01:00
/* Given another parser, p, and a function f, returns a parser that applies p, then applies f to everything in the AST of p's result. */
2012-04-30 03:44:10 +01:00
//const parser_t* action(const parser_t* p, /* fptr to action on AST */);
2012-05-01 00:33:47 +01:00
/* Given another parser, p, and a separator, sep, returns a parser that applies p, then joins everything in the AST of p's result with sep. For example, if the AST of p's result is {"dog", "cat", "hedgehog"} and sep is "|", the AST of this parser's result will be {"dog|cat|hedgehog"}. */
const parser_t * join_action ( const parser_t * p , const uint8_t * sep , const size_t len ) ;
const parser_t * left_factor_action ( const parser_t * p ) ;
/* Given a single-character parser, p, returns a single-character parser that will parse any character *other* than the character p would parse. */
2012-04-30 03:44:10 +01:00
const parser_t * negate ( const parser_t * p ) ;
2012-05-01 00:33:47 +01:00
/* A no-argument parser that succeeds if there is no more input to parse. */
2012-04-30 03:44:10 +01:00
const parser_t * end_p ( ) ;
2012-05-01 00:33:47 +01:00
/* This parser always fails. */
2012-04-30 03:44:10 +01:00
const parser_t * nothing_p ( ) ;
const parser_t * sequence ( const parser_t * p_array [ ] ) ;
const parser_t * choice ( const parser_t * p_array [ ] ) ;
const parser_t * butnot ( const parser_t * p1 , const parser_t * p2 ) ;
const parser_t * difference ( const parser_t * p1 , const parser_t * p2 ) ;
const parser_t * xor ( const parser_t * p1 , const parser_t * p2 ) ;
const parser_t * repeat0 ( const parser_t * p ) ;
const parser_t * repeat1 ( const parser_t * p ) ;
const parser_t * repeat_n ( const parser_t * p , const size_t n ) ;
const parser_t * optional ( const parser_t * p ) ;
const parser_t * expect ( const parser_t * p ) ;
const parser_t * chain ( const parser_t * p1 , const parser_t * p2 , const parser_t * p3 ) ;
const parser_t * chainl ( const parser_t * p1 , const parser_t * p2 ) ;
const parser_t * list ( const parser_t * p1 , const parser_t * p2 ) ;
const parser_t * epsilon_p ( ) ;
//const parser_t* semantic(/* fptr to nullary function? */);
const parser_t * and ( const parser_t * p ) ;
const parser_t * not ( const parser_t * p ) ;
2012-04-22 15:30:49 +01:00
2012-04-23 19:39:44 +01:00
# endif // #ifndef HAMMER_HAMMER__H