Added do_parse function, and filled in parse driver

This commit is contained in:
Dan Hirsch 2012-05-03 01:58:09 +01:00
parent 37ff85eae8
commit 491c2ec2fa
3 changed files with 64 additions and 35 deletions

View file

@ -48,46 +48,37 @@ const gchar* to_string(parse_state_t *ps) {
return g_strescape((const gchar*)(ps->input_stream.input), NULL); return g_strescape((const gchar*)(ps->input_stream.input), NULL);
} }
uint8_t djbhash(size_t index, char bit_offset) { guint djbhash(const
unsigned int hash = 5381; uint8_t *buf, size_t len) {
for (uint8_t i = 0; i < sizeof(size_t); ++i) { guint hash = 5381;
hash = hash * 33 + (index & 0xFF); while (len--) {
index >>= 8; hash = hash * 33 + *buf++;
} }
hash = hash * 33 + bit_offset;
return hash; return hash;
} }
parse_result_t* get_cached(parse_state_t *ps, const parser_t *p) { parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) {
gpointer t = g_hash_table_lookup(ps->cache, p); // TODO(thequux): add caching here.
if (NULL != t) { parser_cache_key_t key = {
parse_result_t* ret = g_hash_table_lookup(t, GUINT_TO_POINTER(djbhash(ps->input_stream.index, .input_pos = state->input_stream,
ps->input_stream.length))); .parser = parser
if (NULL != ret) { };
return ret;
} else {
// TODO(mlp): need a return value for "this parser was in the cache but nothing was at this location"
return NULL;
}
} else {
// TODO(mlp): need a return value for "this parser wasn't in the cache"
return NULL;
}
}
void put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached) { // check to see if there is already a result for this object...
gpointer t = g_hash_table_lookup(ps->cache, p); if (g_hash_table_contains(state->cache, &key)) {
if (NULL != t) { // it exists!
g_hash_table_insert(t, GUINT_TO_POINTER(djbhash(ps->input_stream.index, ps->input_stream.length)), (gpointer)cached); // TODO(thequux): handle left recursion case
return g_hash_table_lookup(state->cache, &key);
} else { } else {
GHashTable *t = g_hash_table_new(g_direct_hash, g_direct_equal); // It doesn't exist... run the
g_hash_table_insert(t, GUINT_TO_POINTER(djbhash(ps->input_stream.index, ps->input_stream.length)), (gpointer)cached); parse_result_t *res;
g_hash_table_insert(ps->cache, (parser_t*)p, t); res = parser->fn(parser->env, state);
// update the cache
g_hash_table_replace(state->cache, &key, res);
return res;
} }
} }
parse_result_t* do_parse(const parser_t* parser, parse_state_t *state);
/* Helper function, since these lines appear in every parser */ /* Helper function, since these lines appear in every parser */
inline parse_result_t* make_result(parsed_token_t *tok) { inline parse_result_t* make_result(parsed_token_t *tok) {
parse_result_t *ret = g_new(parse_result_t, 1); parse_result_t *ret = g_new(parse_result_t, 1);
@ -270,4 +261,29 @@ const parser_t* epsilon_p() { return NULL; }
const parser_t* and(const parser_t* p) { return NULL; } const parser_t* and(const parser_t* p) { return NULL; }
const parser_t* not(const parser_t* p) { return NULL; } const parser_t* not(const parser_t* p) { return NULL; }
parse_result_t* parse(const parser_t* parser, const uint8_t* input) { return NULL; } static guint cache_key_hash(gconstpointer key) {
return djbhash(key, sizeof(parser_cache_key_t));
}
static gboolean cache_key_equal(gconstpointer key1, gconstpointer key2) {
return memcmp(key1, key2, sizeof(parser_cache_key_t)) == 0;
}
parse_result_t* parse(const parser_t* parser, const uint8_t* input, size_t length) {
// Set up a parse state...
parse_state_t *parse_state = g_new0(parse_state_t, 1);
parse_state->cache = g_hash_table_new(cache_key_hash, // hash_func
cache_key_equal);// key_equal_func
parse_state->input_stream.input = input;
parse_state->input_stream.bit_offset = 8; // bit big endian
parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN;
parse_state->input_stream.length = length;
parse_result_t *res = do_parse(parser, parse_state);
// tear down the parse state. For now, leak like a sieve.
// BUG: Leaks like a sieve.
// TODO(thequux): Pull in the arena allocator.
g_hash_table_destroy(parse_state->cache);
return res;
}

View file

@ -34,6 +34,8 @@
* still increments? not sure why that is, need to debug some), and the locations * still increments? not sure why that is, need to debug some), and the locations
* at which it's been applied are memoized. * at which it's been applied are memoized.
* *
* In our case, it's a hash table from parser_cache_key_t to parse_state_t.
*
*/ */
#define BYTE_BIG_ENDIAN 0x1 #define BYTE_BIG_ENDIAN 0x1
#define BIT_BIG_ENDIAN 0x2 #define BIT_BIG_ENDIAN 0x2
@ -85,7 +87,7 @@ typedef struct parser {
void *env; void *env;
} parser_t; } parser_t;
parse_result_t* parse(const parser_t* parser, const uint8_t* input); parse_result_t* parse(const parser_t* parser, const uint8_t* input, size_t length);
/* Given a string, returns a parser that parses that string value. */ /* Given a string, returns a parser that parses that string value. */
const parser_t* token(const uint8_t *str, const size_t len); const parser_t* token(const uint8_t *str, const size_t len);
@ -132,4 +134,6 @@ const parser_t* epsilon_p();
const parser_t* and(const parser_t* p); const parser_t* and(const parser_t* p);
const parser_t* not(const parser_t* p); const parser_t* not(const parser_t* p);
const parser_t* ignore(const parser_t* p); // parse p, but return no ast.
#endif // #ifndef HAMMER_HAMMER__H #endif // #ifndef HAMMER_HAMMER__H

View file

@ -5,6 +5,15 @@
#define false 0 #define false 0
#define true 1 #define true 1
long long read_bits(input_stream_t* state, int count, char signed_p); typedef struct parser_cache_key {
input_stream_t input_pos;
const parser_t *parser;
} parser_cache_key_t;
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
long long read_bits(input_stream_t* state, int count, char signed_p);
parse_result_t* do_parse(const parser_t* parser, parse_state_t *state);
void put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached);
guint djbhash(const uint8_t *buf, size_t len);
#endif // #ifndef HAMMER_INTERNAL__H #endif // #ifndef HAMMER_INTERNAL__H