Moved parse_state_t into internal.h, had to add some things for Warth's recursion. More documentary comments.
This commit is contained in:
parent
bd6e7d1b63
commit
c6f2dcc257
3 changed files with 94 additions and 28 deletions
33
src/hammer.c
33
src/hammer.c
|
|
@ -35,7 +35,36 @@ guint djbhash(const uint8_t *buf, size_t len) {
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
void setupLR(const parser_t *p, GQueue *stack, LR_t *recDetect) {
|
parser_cache_value_t* recall(parser_cache_key_t *k, parse_state_t *state) {
|
||||||
|
parser_cache_value_t *cached = g_hash_table_lookup(state->cache, k);
|
||||||
|
head_t *head = g_hash_table_lookup(state->recursion_heads, &(state->input_stream));
|
||||||
|
if (!head) { // No heads found
|
||||||
|
return cached;
|
||||||
|
} else { // Some heads found
|
||||||
|
if (!cached && head->head_parser != k->parser && !g_slist_find(head->involved_set, k->parser)) {
|
||||||
|
// Nothing in the cache, and the key parser is not involved
|
||||||
|
return /* TODO(mlp): figure out what to return here instead of Some(MemoEntry(Right(Failure("dummy", in")))) */ NULL;
|
||||||
|
}
|
||||||
|
if (g_slist_find(head->eval_set, k->parser)) {
|
||||||
|
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
|
||||||
|
head->eval_set = g_slist_remove_all(head->eval_set, k->parser);
|
||||||
|
parse_result_t *tmp_res = k->parser->fn(k->parser->env, state);
|
||||||
|
if (tmp_res)
|
||||||
|
tmp_res->arena = state->arena;
|
||||||
|
// we know that cached has an entry here, modify it
|
||||||
|
cached->value_type = PC_RIGHT;
|
||||||
|
cached->right = tmp_res;
|
||||||
|
}
|
||||||
|
return cached;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void setupLR(const parser_t *p, GQueue *stack, LR_t *rec_detect) {
|
||||||
|
if (!rec_detect->head) {
|
||||||
|
head_t *some = g_new(head_t, 1);
|
||||||
|
some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL;
|
||||||
|
rec_detect->head = some;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -47,6 +76,7 @@ parse_result_t* grow(const parser_t *p, parse_state_t *state, head_t *head) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Warth's recursion. Hi Alessandro! */
|
||||||
parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) {
|
parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) {
|
||||||
// TODO(thequux): add caching here.
|
// TODO(thequux): add caching here.
|
||||||
parser_cache_key_t *key = a_new(parser_cache_key_t, 1);
|
parser_cache_key_t *key = a_new(parser_cache_key_t, 1);
|
||||||
|
|
@ -67,6 +97,7 @@ parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) {
|
||||||
parse_result_t *tmp_res;
|
parse_result_t *tmp_res;
|
||||||
if (parser) {
|
if (parser) {
|
||||||
tmp_res = parser->fn(parser->env, state);
|
tmp_res = parser->fn(parser->env, state);
|
||||||
|
if (tmp_res)
|
||||||
tmp_res->arena = state->arena;
|
tmp_res->arena = state->arena;
|
||||||
} else
|
} else
|
||||||
tmp_res = NULL;
|
tmp_res = NULL;
|
||||||
|
|
|
||||||
26
src/hammer.h
26
src/hammer.h
|
|
@ -20,37 +20,15 @@
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "allocator.h"
|
#include "allocator.h"
|
||||||
/* The state of the parser.
|
|
||||||
*
|
|
||||||
* Members:
|
|
||||||
* input - the entire string being parsed
|
|
||||||
* index - current position in input
|
|
||||||
* length - size of input
|
|
||||||
* cache - a hash table describing the state of the parse, including partial parse_results. It's a hash table from parser_cache_key_t to parse_state_t.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#define BYTE_BIG_ENDIAN 0x1
|
#define BYTE_BIG_ENDIAN 0x1
|
||||||
#define BIT_BIG_ENDIAN 0x2
|
#define BIT_BIG_ENDIAN 0x2
|
||||||
#define BIT_LITTLE_ENDIAN 0x0
|
#define BIT_LITTLE_ENDIAN 0x0
|
||||||
#define BYTE_LITTLE_ENDIAN 0x0
|
#define BYTE_LITTLE_ENDIAN 0x0
|
||||||
|
|
||||||
typedef int bool;
|
typedef int bool;
|
||||||
typedef struct input_stream {
|
|
||||||
// This should be considered to be a really big value type.
|
|
||||||
const uint8_t *input;
|
|
||||||
size_t index;
|
|
||||||
size_t length;
|
|
||||||
char bit_offset;
|
|
||||||
char endianness;
|
|
||||||
char overrun;
|
|
||||||
} input_stream_t;
|
|
||||||
|
|
||||||
typedef struct parse_state {
|
typedef struct parse_state parse_state_t;
|
||||||
GHashTable *cache;
|
|
||||||
input_stream_t input_stream;
|
|
||||||
arena_t arena;
|
|
||||||
GQueue *lr_stack;
|
|
||||||
} parse_state_t;
|
|
||||||
|
|
||||||
typedef enum token_type {
|
typedef enum token_type {
|
||||||
TT_NONE,
|
TT_NONE,
|
||||||
|
|
|
||||||
|
|
@ -32,28 +32,85 @@
|
||||||
#define false 0
|
#define false 0
|
||||||
#define true 1
|
#define true 1
|
||||||
|
|
||||||
|
typedef struct input_stream {
|
||||||
|
// This should be considered to be a really big value type.
|
||||||
|
const uint8_t *input;
|
||||||
|
size_t index;
|
||||||
|
size_t length;
|
||||||
|
char bit_offset;
|
||||||
|
char endianness;
|
||||||
|
char overrun;
|
||||||
|
} input_stream_t;
|
||||||
|
|
||||||
|
/* The state of the parser.
|
||||||
|
*
|
||||||
|
* Members:
|
||||||
|
* cache - a hash table describing the state of the parse, including partial parse_results. It's a hash table from parser_cache_key_t to parser_cache_value_t.
|
||||||
|
* input_stream - the input stream at this state.
|
||||||
|
* arena - the arena that has been allocated for the parse this state is in.
|
||||||
|
* lr_stack - used in Warth's recursion
|
||||||
|
* recursion_heads - used in Warth's recursion
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef struct parse_state {
|
||||||
|
GHashTable *cache;
|
||||||
|
input_stream_t input_stream;
|
||||||
|
arena_t arena;
|
||||||
|
GQueue *lr_stack;
|
||||||
|
GHashTable *recursion_heads;
|
||||||
|
} parse_state_t;
|
||||||
|
|
||||||
|
/* The (location, parser) tuple used to key the cache.
|
||||||
|
*/
|
||||||
|
|
||||||
typedef struct parser_cache_key {
|
typedef struct parser_cache_key {
|
||||||
input_stream_t input_pos;
|
input_stream_t input_pos;
|
||||||
const parser_t *parser;
|
const parser_t *parser;
|
||||||
} parser_cache_key_t;
|
} parser_cache_key_t;
|
||||||
|
|
||||||
|
/* A value in the cache is either of value Left or Right (this is a
|
||||||
|
* holdover from Scala, which used Either here). Left corresponds to
|
||||||
|
* LR_t, which is for left recursion; Right corresponds to
|
||||||
|
* parse_result_t.
|
||||||
|
*/
|
||||||
|
|
||||||
typedef enum parser_cache_value_type {
|
typedef enum parser_cache_value_type {
|
||||||
PC_LEFT,
|
PC_LEFT,
|
||||||
PC_RIGHT
|
PC_RIGHT
|
||||||
} parser_cache_value_type_t;
|
} parser_cache_value_type_t;
|
||||||
|
|
||||||
|
|
||||||
|
/* A recursion head.
|
||||||
|
*
|
||||||
|
* Members:
|
||||||
|
* head_parser -
|
||||||
|
* involved_set -
|
||||||
|
* eval_set -
|
||||||
|
*/
|
||||||
typedef struct head {
|
typedef struct head {
|
||||||
parser_t *head_parser;
|
const parser_t *head_parser;
|
||||||
GSList *involved_set;
|
GSList *involved_set;
|
||||||
GSList *eval_set;
|
GSList *eval_set;
|
||||||
} head_t;
|
} head_t;
|
||||||
|
|
||||||
|
|
||||||
|
/* A left recursion.
|
||||||
|
*
|
||||||
|
* Members:
|
||||||
|
* seed -
|
||||||
|
* rule -
|
||||||
|
* head -
|
||||||
|
*/
|
||||||
typedef struct LR {
|
typedef struct LR {
|
||||||
parse_result_t *seed;
|
parse_result_t *seed;
|
||||||
const parser_t *rule;
|
const parser_t *rule;
|
||||||
head_t *head;
|
head_t *head;
|
||||||
} LR_t;
|
} LR_t;
|
||||||
|
|
||||||
|
/* Tagged union for values in the cache: either LR's (Left) or
|
||||||
|
* parse_result_t's (Right).
|
||||||
|
*/
|
||||||
typedef struct parser_cache_value {
|
typedef struct parser_cache_value {
|
||||||
parser_cache_value_type_t value_type;
|
parser_cache_value_type_t value_type;
|
||||||
union {
|
union {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue