Merge remote-tracking branch 'upstream/master'

Conflicts:
	NOTES
This commit is contained in:
Dan Hirsch 2012-05-04 21:27:09 +01:00
commit d6c0eaf3de
3 changed files with 103 additions and 16 deletions

20
NOTES
View file

@ -15,6 +15,20 @@ consistency checks.
TODO: Add consistency check to the bitreader
We should support the use of parse-table-based parse methods; add a
parse_compile method that must be called before the newly-created
parser is used.
TODO: We should support the use of parse-table-based parse methods; add a
parse_compile method that must be called before the newly-created
parser is used.
Regarding butnot and difference:
There's a "do what I say, not what I do" variation in how we
implemented these (versus how jsparse did it). His `butnot` succeeds
if p1 and p2 both match and p1's result is longer than p2's, though
the comments say it should succeed if p2's result is longer than
p1's. Also, his `difference` succeeds if p1 and p2 both match, full
stop, returning the result of p2 if p2's result is shorter than p1's
or the result of p1 otherwise, though the comments say it should
succeed if p2's result is shorter than p1's. Whatever; we're doing
what the comments say.

View file

@ -294,7 +294,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) {
// cache the initial state of the input stream
input_stream_t start_state = state->input_stream;
parse_result_t *r1 = do_parse(parsers->p1, state);
// if r1 is null, bail out early
// if p1 failed, bail out early
if (NULL == r1) {
return NULL;
}
@ -304,7 +304,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) {
parse_result_t *r2 = do_parse(parsers->p2, state);
// TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
state->input_stream = after_p1_state;
// if r2 is null, restore post-p1 state and bail out early
// if p2 failed, restore post-p1 state and bail out early
if (NULL == r2) {
return r1;
}
@ -326,8 +326,76 @@ const parser_t* butnot(const parser_t* p1, const parser_t* p2) {
return ret;
}
const parser_t* difference(const parser_t* p1, const parser_t* p2) { return NULL; }
const parser_t* xor(const parser_t* p1, const parser_t* p2) { return NULL; }
static parse_result_t* parse_difference(void *env, parse_state_t *state) {
two_parsers_t *parsers = (two_parsers_t*)env;
// cache the initial state of the input stream
input_stream_t start_state = state->input_stream;
parse_result_t *r1 = do_parse(parsers->p1, state);
// if p1 failed, bail out early
if (NULL == r1) {
return NULL;
}
// cache the state after parse #1, since we might have to back up to it
input_stream_t after_p1_state = state->input_stream;
state->input_stream = start_state;
parse_result_t *r2 = do_parse(parsers->p2, state);
// TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
state->input_stream = after_p1_state;
// if p2 failed, restore post-p1 state and bail out early
if (NULL == r2) {
return r1;
}
size_t r1len = token_length(r1);
size_t r2len = token_length(r2);
// if both match but p1's text is shorter than p2's, fail
if (r1len < r2len) {
return NULL;
} else {
return r1;
}
}
const parser_t* difference(const parser_t* p1, const parser_t* p2) {
two_parsers_t *env = g_new(two_parsers_t, 1);
env->p1 = p1; env->p2 = p2;
parser_t *ret = g_new(parser_t, 1);
ret->fn = parse_difference; ret->env = (void*)env;
return ret;
}
static parse_result_t* parse_xor(void *env, parse_state_t *state) {
two_parsers_t *parsers = (two_parsers_t*)env;
// cache the initial state of the input stream
input_stream_t start_state = state->input_stream;
parse_result_t *r1 = do_parse(parsers->p1, state);
input_stream_t after_p1_state = state->input_stream;
// reset input stream, parse again
state->input_stream = start_state;
parse_result_t *r2 = do_parse(parsers->p2, state);
if (NULL == r1) {
if (NULL != r2) {
return r2;
} else {
return NULL;
}
} else {
if (NULL == r2) {
state->input_stream = after_p1_state;
return r1;
} else {
return NULL;
}
}
}
const parser_t* xor(const parser_t* p1, const parser_t* p2) {
two_parsers_t *env = g_new(two_parsers_t, 1);
env->p1 = p1; env->p2 = p2;
parser_t *ret = g_new(parser_t, 1);
ret->fn = parse_xor; ret->env = (void*)env;
return ret;
}
const parser_t* repeat0(const parser_t* p) { return NULL; }
const parser_t* repeat1(const parser_t* p) { return NULL; }
const parser_t* repeat_n(const parser_t* p, const size_t n) { return NULL; }

View file

@ -26,15 +26,7 @@
* input - the entire string being parsed
* index - current position in input
* length - size of input
* THE FOLLOWING DESCRIBES HOW JSPARSE DOES IT. OUR MILEAGE MAY VARY.
* cache - a hash table describing the state of the parse, including partial parse_results.
* It's actually a hash table of [parser_id, hash_table[index, parse_result]],
* where the parser id is incremented as the parse goes along (parsers that have
* already been applied once don't get a new parser_id ... but the global variable
* still increments? not sure why that is, need to debug some), and the locations
* at which it's been applied are memoized.
*
* In our case, it's a hash table from parser_cache_key_t to parse_state_t.
* cache - a hash table describing the state of the parse, including partial parse_results. It's a hash table from parser_cache_key_t to parse_state_t.
*
*/
#define BYTE_BIG_ENDIAN 0x1
@ -124,9 +116,22 @@ const parser_t* sequence(const parser_t* p_array[]);
/* Given an array of parsers, p_array, apply each parser in order. The first parser to succeed is the result; if no parsers succeed, the parse fails. */
const parser_t* choice(const parser_t* p_array[]);
/* Given two parsers, p1 and p2, this parser succeeds in the following cases:
* - if p1 succeeds and p2 fails
* - if both succeed but p1's result is shorter than p2's
*/
const parser_t* butnot(const parser_t* p1, const parser_t* p2);
/* Given two parsers, p1 and p2, this parser succeeds in the following cases:
* - if p1 succeeds and p2 fails
* - if both succeed but p2's result is shorter than p1's
*/
const parser_t* difference(const parser_t* p1, const parser_t* p2);
/* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or p2 succeed, but not if they both do.
*/
const parser_t* xor(const parser_t* p1, const parser_t* p2);
const parser_t* repeat0(const parser_t* p);
const parser_t* repeat1(const parser_t* p);
const parser_t* repeat_n(const parser_t* p, const size_t n);