Added do_parse function, and filled in parse driver
This commit is contained in:
parent
37ff85eae8
commit
491c2ec2fa
3 changed files with 64 additions and 35 deletions
82
src/hammer.c
82
src/hammer.c
|
|
@ -48,46 +48,37 @@ const gchar* to_string(parse_state_t *ps) {
|
||||||
return g_strescape((const gchar*)(ps->input_stream.input), NULL);
|
return g_strescape((const gchar*)(ps->input_stream.input), NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t djbhash(size_t index, char bit_offset) {
|
guint djbhash(const
|
||||||
unsigned int hash = 5381;
|
uint8_t *buf, size_t len) {
|
||||||
for (uint8_t i = 0; i < sizeof(size_t); ++i) {
|
guint hash = 5381;
|
||||||
hash = hash * 33 + (index & 0xFF);
|
while (len--) {
|
||||||
index >>= 8;
|
hash = hash * 33 + *buf++;
|
||||||
}
|
}
|
||||||
hash = hash * 33 + bit_offset;
|
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_result_t* get_cached(parse_state_t *ps, const parser_t *p) {
|
parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) {
|
||||||
gpointer t = g_hash_table_lookup(ps->cache, p);
|
// TODO(thequux): add caching here.
|
||||||
if (NULL != t) {
|
parser_cache_key_t key = {
|
||||||
parse_result_t* ret = g_hash_table_lookup(t, GUINT_TO_POINTER(djbhash(ps->input_stream.index,
|
.input_pos = state->input_stream,
|
||||||
ps->input_stream.length)));
|
.parser = parser
|
||||||
if (NULL != ret) {
|
};
|
||||||
return ret;
|
|
||||||
} else {
|
// check to see if there is already a result for this object...
|
||||||
// TODO(mlp): need a return value for "this parser was in the cache but nothing was at this location"
|
if (g_hash_table_contains(state->cache, &key)) {
|
||||||
return NULL;
|
// it exists!
|
||||||
}
|
// TODO(thequux): handle left recursion case
|
||||||
|
return g_hash_table_lookup(state->cache, &key);
|
||||||
} else {
|
} else {
|
||||||
// TODO(mlp): need a return value for "this parser wasn't in the cache"
|
// It doesn't exist... run the
|
||||||
return NULL;
|
parse_result_t *res;
|
||||||
|
res = parser->fn(parser->env, state);
|
||||||
|
// update the cache
|
||||||
|
g_hash_table_replace(state->cache, &key, res);
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached) {
|
|
||||||
gpointer t = g_hash_table_lookup(ps->cache, p);
|
|
||||||
if (NULL != t) {
|
|
||||||
g_hash_table_insert(t, GUINT_TO_POINTER(djbhash(ps->input_stream.index, ps->input_stream.length)), (gpointer)cached);
|
|
||||||
} else {
|
|
||||||
GHashTable *t = g_hash_table_new(g_direct_hash, g_direct_equal);
|
|
||||||
g_hash_table_insert(t, GUINT_TO_POINTER(djbhash(ps->input_stream.index, ps->input_stream.length)), (gpointer)cached);
|
|
||||||
g_hash_table_insert(ps->cache, (parser_t*)p, t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
parse_result_t* do_parse(const parser_t* parser, parse_state_t *state);
|
|
||||||
|
|
||||||
/* Helper function, since these lines appear in every parser */
|
/* Helper function, since these lines appear in every parser */
|
||||||
inline parse_result_t* make_result(parsed_token_t *tok) {
|
inline parse_result_t* make_result(parsed_token_t *tok) {
|
||||||
parse_result_t *ret = g_new(parse_result_t, 1);
|
parse_result_t *ret = g_new(parse_result_t, 1);
|
||||||
|
|
@ -270,4 +261,29 @@ const parser_t* epsilon_p() { return NULL; }
|
||||||
const parser_t* and(const parser_t* p) { return NULL; }
|
const parser_t* and(const parser_t* p) { return NULL; }
|
||||||
const parser_t* not(const parser_t* p) { return NULL; }
|
const parser_t* not(const parser_t* p) { return NULL; }
|
||||||
|
|
||||||
parse_result_t* parse(const parser_t* parser, const uint8_t* input) { return NULL; }
|
static guint cache_key_hash(gconstpointer key) {
|
||||||
|
return djbhash(key, sizeof(parser_cache_key_t));
|
||||||
|
}
|
||||||
|
static gboolean cache_key_equal(gconstpointer key1, gconstpointer key2) {
|
||||||
|
return memcmp(key1, key2, sizeof(parser_cache_key_t)) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
parse_result_t* parse(const parser_t* parser, const uint8_t* input, size_t length) {
|
||||||
|
// Set up a parse state...
|
||||||
|
parse_state_t *parse_state = g_new0(parse_state_t, 1);
|
||||||
|
parse_state->cache = g_hash_table_new(cache_key_hash, // hash_func
|
||||||
|
cache_key_equal);// key_equal_func
|
||||||
|
parse_state->input_stream.input = input;
|
||||||
|
parse_state->input_stream.bit_offset = 8; // bit big endian
|
||||||
|
parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN;
|
||||||
|
parse_state->input_stream.length = length;
|
||||||
|
|
||||||
|
parse_result_t *res = do_parse(parser, parse_state);
|
||||||
|
// tear down the parse state. For now, leak like a sieve.
|
||||||
|
// BUG: Leaks like a sieve.
|
||||||
|
// TODO(thequux): Pull in the arena allocator.
|
||||||
|
g_hash_table_destroy(parse_state->cache);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,8 @@
|
||||||
* already been applied once don't get a new parser_id ... but the global variable
|
* already been applied once don't get a new parser_id ... but the global variable
|
||||||
* still increments? not sure why that is, need to debug some), and the locations
|
* still increments? not sure why that is, need to debug some), and the locations
|
||||||
* at which it's been applied are memoized.
|
* at which it's been applied are memoized.
|
||||||
|
*
|
||||||
|
* In our case, it's a hash table from parser_cache_key_t to parse_state_t.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#define BYTE_BIG_ENDIAN 0x1
|
#define BYTE_BIG_ENDIAN 0x1
|
||||||
|
|
@ -85,7 +87,7 @@ typedef struct parser {
|
||||||
void *env;
|
void *env;
|
||||||
} parser_t;
|
} parser_t;
|
||||||
|
|
||||||
parse_result_t* parse(const parser_t* parser, const uint8_t* input);
|
parse_result_t* parse(const parser_t* parser, const uint8_t* input, size_t length);
|
||||||
|
|
||||||
/* Given a string, returns a parser that parses that string value. */
|
/* Given a string, returns a parser that parses that string value. */
|
||||||
const parser_t* token(const uint8_t *str, const size_t len);
|
const parser_t* token(const uint8_t *str, const size_t len);
|
||||||
|
|
@ -132,4 +134,6 @@ const parser_t* epsilon_p();
|
||||||
const parser_t* and(const parser_t* p);
|
const parser_t* and(const parser_t* p);
|
||||||
const parser_t* not(const parser_t* p);
|
const parser_t* not(const parser_t* p);
|
||||||
|
|
||||||
|
const parser_t* ignore(const parser_t* p); // parse p, but return no ast.
|
||||||
|
|
||||||
#endif // #ifndef HAMMER_HAMMER__H
|
#endif // #ifndef HAMMER_HAMMER__H
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,15 @@
|
||||||
#define false 0
|
#define false 0
|
||||||
#define true 1
|
#define true 1
|
||||||
|
|
||||||
long long read_bits(input_stream_t* state, int count, char signed_p);
|
typedef struct parser_cache_key {
|
||||||
|
input_stream_t input_pos;
|
||||||
|
const parser_t *parser;
|
||||||
|
} parser_cache_key_t;
|
||||||
|
|
||||||
|
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
|
||||||
|
|
||||||
|
long long read_bits(input_stream_t* state, int count, char signed_p);
|
||||||
|
parse_result_t* do_parse(const parser_t* parser, parse_state_t *state);
|
||||||
|
void put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached);
|
||||||
|
guint djbhash(const uint8_t *buf, size_t len);
|
||||||
#endif // #ifndef HAMMER_INTERNAL__H
|
#endif // #ifndef HAMMER_INTERNAL__H
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue