Moved parse_state_t into internal.h, had to add some things for Warth's recursion. More documentary comments.

This commit is contained in:
Meredith L. Patterson 2012-05-17 13:22:56 +02:00
parent bd6e7d1b63
commit c6f2dcc257
3 changed files with 94 additions and 28 deletions

View file

@ -35,7 +35,36 @@ guint djbhash(const uint8_t *buf, size_t len) {
return hash; return hash;
} }
void setupLR(const parser_t *p, GQueue *stack, LR_t *recDetect) { parser_cache_value_t* recall(parser_cache_key_t *k, parse_state_t *state) {
parser_cache_value_t *cached = g_hash_table_lookup(state->cache, k);
head_t *head = g_hash_table_lookup(state->recursion_heads, &(state->input_stream));
if (!head) { // No heads found
return cached;
} else { // Some heads found
if (!cached && head->head_parser != k->parser && !g_slist_find(head->involved_set, k->parser)) {
// Nothing in the cache, and the key parser is not involved
return /* TODO(mlp): figure out what to return here instead of Some(MemoEntry(Right(Failure("dummy", in")))) */ NULL;
}
if (g_slist_find(head->eval_set, k->parser)) {
// Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head.
head->eval_set = g_slist_remove_all(head->eval_set, k->parser);
parse_result_t *tmp_res = k->parser->fn(k->parser->env, state);
if (tmp_res)
tmp_res->arena = state->arena;
// we know that cached has an entry here, modify it
cached->value_type = PC_RIGHT;
cached->right = tmp_res;
}
return cached;
}
}
void setupLR(const parser_t *p, GQueue *stack, LR_t *rec_detect) {
if (!rec_detect->head) {
head_t *some = g_new(head_t, 1);
some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL;
rec_detect->head = some;
}
} }
@ -47,6 +76,7 @@ parse_result_t* grow(const parser_t *p, parse_state_t *state, head_t *head) {
return NULL; return NULL;
} }
/* Warth's recursion. Hi Alessandro! */
parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) { parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) {
// TODO(thequux): add caching here. // TODO(thequux): add caching here.
parser_cache_key_t *key = a_new(parser_cache_key_t, 1); parser_cache_key_t *key = a_new(parser_cache_key_t, 1);
@ -67,7 +97,8 @@ parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) {
parse_result_t *tmp_res; parse_result_t *tmp_res;
if (parser) { if (parser) {
tmp_res = parser->fn(parser->env, state); tmp_res = parser->fn(parser->env, state);
tmp_res->arena = state->arena; if (tmp_res)
tmp_res->arena = state->arena;
} else } else
tmp_res = NULL; tmp_res = NULL;
if (state->input_stream.overrun) if (state->input_stream.overrun)

View file

@ -20,37 +20,15 @@
#include <glib.h> #include <glib.h>
#include <stdint.h> #include <stdint.h>
#include "allocator.h" #include "allocator.h"
/* The state of the parser.
*
* Members:
* input - the entire string being parsed
* index - current position in input
* length - size of input
* cache - a hash table describing the state of the parse, including partial parse_results. It's a hash table from parser_cache_key_t to parse_state_t.
*
*/
#define BYTE_BIG_ENDIAN 0x1 #define BYTE_BIG_ENDIAN 0x1
#define BIT_BIG_ENDIAN 0x2 #define BIT_BIG_ENDIAN 0x2
#define BIT_LITTLE_ENDIAN 0x0 #define BIT_LITTLE_ENDIAN 0x0
#define BYTE_LITTLE_ENDIAN 0x0 #define BYTE_LITTLE_ENDIAN 0x0
typedef int bool; typedef int bool;
typedef struct input_stream {
// This should be considered to be a really big value type. typedef struct parse_state parse_state_t;
const uint8_t *input;
size_t index;
size_t length;
char bit_offset;
char endianness;
char overrun;
} input_stream_t;
typedef struct parse_state {
GHashTable *cache;
input_stream_t input_stream;
arena_t arena;
GQueue *lr_stack;
} parse_state_t;
typedef enum token_type { typedef enum token_type {
TT_NONE, TT_NONE,

View file

@ -32,28 +32,85 @@
#define false 0 #define false 0
#define true 1 #define true 1
typedef struct input_stream {
// This should be considered to be a really big value type.
const uint8_t *input;
size_t index;
size_t length;
char bit_offset;
char endianness;
char overrun;
} input_stream_t;
/* The state of the parser.
*
* Members:
* cache - a hash table describing the state of the parse, including partial parse_results. It's a hash table from parser_cache_key_t to parser_cache_value_t.
* input_stream - the input stream at this state.
* arena - the arena that has been allocated for the parse this state is in.
* lr_stack - used in Warth's recursion
* recursion_heads - used in Warth's recursion
*
*/
typedef struct parse_state {
GHashTable *cache;
input_stream_t input_stream;
arena_t arena;
GQueue *lr_stack;
GHashTable *recursion_heads;
} parse_state_t;
/* The (location, parser) tuple used to key the cache.
*/
typedef struct parser_cache_key { typedef struct parser_cache_key {
input_stream_t input_pos; input_stream_t input_pos;
const parser_t *parser; const parser_t *parser;
} parser_cache_key_t; } parser_cache_key_t;
/* A value in the cache is either of value Left or Right (this is a
* holdover from Scala, which used Either here). Left corresponds to
* LR_t, which is for left recursion; Right corresponds to
* parse_result_t.
*/
typedef enum parser_cache_value_type { typedef enum parser_cache_value_type {
PC_LEFT, PC_LEFT,
PC_RIGHT PC_RIGHT
} parser_cache_value_type_t; } parser_cache_value_type_t;
/* A recursion head.
*
* Members:
* head_parser -
* involved_set -
* eval_set -
*/
typedef struct head { typedef struct head {
parser_t *head_parser; const parser_t *head_parser;
GSList *involved_set; GSList *involved_set;
GSList *eval_set; GSList *eval_set;
} head_t; } head_t;
/* A left recursion.
*
* Members:
* seed -
* rule -
* head -
*/
typedef struct LR { typedef struct LR {
parse_result_t *seed; parse_result_t *seed;
const parser_t *rule; const parser_t *rule;
head_t *head; head_t *head;
} LR_t; } LR_t;
/* Tagged union for values in the cache: either LR's (Left) or
* parse_result_t's (Right).
*/
typedef struct parser_cache_value { typedef struct parser_cache_value {
parser_cache_value_type_t value_type; parser_cache_value_type_t value_type;
union { union {