2012-05-12 22:26:59 +01:00
|
|
|
/* Internals for Hammer.
|
|
|
|
|
* Copyright (C) 2012 Meredith L. Patterson, Dan "TQ" Hirsch
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
|
* as published by the Free Software Foundation, version 2.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
*/
|
|
|
|
|
|
2012-04-23 19:39:44 +01:00
|
|
|
#ifndef HAMMER_INTERNAL__H
|
|
|
|
|
#define HAMMER_INTERNAL__H
|
2012-05-04 21:23:56 +01:00
|
|
|
#include <glib.h>
|
2012-05-13 01:01:26 +01:00
|
|
|
#include <err.h>
|
2012-04-23 19:39:44 +01:00
|
|
|
#include "hammer.h"
|
|
|
|
|
|
2012-05-13 01:01:26 +01:00
|
|
|
#ifdef NDEBUG
|
|
|
|
|
#define assert_message(check, message) do { } while(0)
|
|
|
|
|
#else
|
|
|
|
|
#define assert_message(check, message) do { \
|
|
|
|
|
if (!(check)) \
|
|
|
|
|
errx(1, "Assertation failed (programmer error): %s", message); \
|
|
|
|
|
} while(0)
|
|
|
|
|
#endif
|
2012-04-29 01:45:52 +01:00
|
|
|
#define false 0
|
|
|
|
|
#define true 1
|
|
|
|
|
|
2012-05-17 13:22:56 +02:00
|
|
|
typedef struct input_stream {
|
|
|
|
|
// This should be considered to be a really big value type.
|
|
|
|
|
const uint8_t *input;
|
|
|
|
|
size_t index;
|
|
|
|
|
size_t length;
|
|
|
|
|
char bit_offset;
|
|
|
|
|
char endianness;
|
|
|
|
|
char overrun;
|
|
|
|
|
} input_stream_t;
|
|
|
|
|
|
|
|
|
|
/* The state of the parser.
|
|
|
|
|
*
|
|
|
|
|
* Members:
|
|
|
|
|
* cache - a hash table describing the state of the parse, including partial parse_results. It's a hash table from parser_cache_key_t to parser_cache_value_t.
|
|
|
|
|
* input_stream - the input stream at this state.
|
|
|
|
|
* arena - the arena that has been allocated for the parse this state is in.
|
|
|
|
|
* lr_stack - used in Warth's recursion
|
|
|
|
|
* recursion_heads - used in Warth's recursion
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
typedef struct parse_state {
|
|
|
|
|
GHashTable *cache;
|
|
|
|
|
input_stream_t input_stream;
|
|
|
|
|
arena_t arena;
|
|
|
|
|
GQueue *lr_stack;
|
|
|
|
|
GHashTable *recursion_heads;
|
|
|
|
|
} parse_state_t;
|
|
|
|
|
|
|
|
|
|
/* The (location, parser) tuple used to key the cache.
|
|
|
|
|
*/
|
|
|
|
|
|
2012-05-03 01:58:09 +01:00
|
|
|
typedef struct parser_cache_key {
|
|
|
|
|
input_stream_t input_pos;
|
|
|
|
|
const parser_t *parser;
|
|
|
|
|
} parser_cache_key_t;
|
|
|
|
|
|
2012-05-17 13:22:56 +02:00
|
|
|
/* A value in the cache is either of value Left or Right (this is a
|
|
|
|
|
* holdover from Scala, which used Either here). Left corresponds to
|
|
|
|
|
* LR_t, which is for left recursion; Right corresponds to
|
|
|
|
|
* parse_result_t.
|
|
|
|
|
*/
|
|
|
|
|
|
2012-05-12 21:26:15 +01:00
|
|
|
typedef enum parser_cache_value_type {
|
2012-05-13 01:18:18 +01:00
|
|
|
PC_LEFT,
|
|
|
|
|
PC_RIGHT
|
2012-05-12 21:26:15 +01:00
|
|
|
} parser_cache_value_type_t;
|
|
|
|
|
|
2012-05-17 13:22:56 +02:00
|
|
|
|
|
|
|
|
/* A recursion head.
|
|
|
|
|
*
|
|
|
|
|
* Members:
|
2012-05-17 14:05:10 +02:00
|
|
|
* head_parser - the parse rule that started this recursion
|
|
|
|
|
* involved_set - A list of rules (parser_t's) involved in the recursion
|
2012-05-17 13:22:56 +02:00
|
|
|
* eval_set -
|
|
|
|
|
*/
|
2012-05-13 01:18:18 +01:00
|
|
|
typedef struct head {
|
2012-05-17 13:22:56 +02:00
|
|
|
const parser_t *head_parser;
|
2012-05-13 01:18:18 +01:00
|
|
|
GSList *involved_set;
|
|
|
|
|
GSList *eval_set;
|
|
|
|
|
} head_t;
|
|
|
|
|
|
2012-05-17 13:22:56 +02:00
|
|
|
|
|
|
|
|
/* A left recursion.
|
|
|
|
|
*
|
|
|
|
|
* Members:
|
|
|
|
|
* seed -
|
|
|
|
|
* rule -
|
|
|
|
|
* head -
|
|
|
|
|
*/
|
2012-05-13 01:18:18 +01:00
|
|
|
typedef struct LR {
|
|
|
|
|
parse_result_t *seed;
|
|
|
|
|
const parser_t *rule;
|
|
|
|
|
head_t *head;
|
|
|
|
|
} LR_t;
|
|
|
|
|
|
2012-05-17 13:22:56 +02:00
|
|
|
/* Tagged union for values in the cache: either LR's (Left) or
|
|
|
|
|
* parse_result_t's (Right).
|
|
|
|
|
*/
|
2012-05-12 21:26:15 +01:00
|
|
|
typedef struct parser_cache_value {
|
|
|
|
|
parser_cache_value_type_t value_type;
|
|
|
|
|
union {
|
2012-05-13 01:18:18 +01:00
|
|
|
LR_t *left;
|
|
|
|
|
parse_result_t *right;
|
2012-05-12 21:26:15 +01:00
|
|
|
};
|
|
|
|
|
} parser_cache_value_t;
|
|
|
|
|
|
2012-05-04 21:23:56 +01:00
|
|
|
typedef unsigned int *charset;
|
|
|
|
|
|
|
|
|
|
static inline charset new_charset() {
|
|
|
|
|
charset cs = g_new0(unsigned int, 256 / sizeof(unsigned int));
|
|
|
|
|
return cs;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline int charset_isset(charset cs, uint8_t pos) {
|
|
|
|
|
return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs))));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void charset_set(charset cs, uint8_t pos, int val) {
|
|
|
|
|
cs[pos / sizeof(*cs)] =
|
|
|
|
|
val
|
|
|
|
|
? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs)))
|
|
|
|
|
: cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs)));
|
|
|
|
|
}
|
|
|
|
|
|
2012-05-03 01:58:09 +01:00
|
|
|
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
|
2012-04-23 19:39:44 +01:00
|
|
|
|
2012-05-03 01:58:09 +01:00
|
|
|
long long read_bits(input_stream_t* state, int count, char signed_p);
|
|
|
|
|
parse_result_t* do_parse(const parser_t* parser, parse_state_t *state);
|
|
|
|
|
void put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached);
|
|
|
|
|
guint djbhash(const uint8_t *buf, size_t len);
|
2012-05-11 23:46:29 +01:00
|
|
|
char* write_result_unamb(const parsed_token_t* tok);
|
2012-05-12 00:40:54 +01:00
|
|
|
void pprint(const parsed_token_t* tok, int indent, int delta);
|
2012-05-13 01:01:26 +01:00
|
|
|
|
2012-04-23 19:39:44 +01:00
|
|
|
#endif // #ifndef HAMMER_INTERNAL__H
|