Merge remote-tracking branch 'upstream/master'
Conflicts: NOTES
This commit is contained in:
commit
d6c0eaf3de
3 changed files with 103 additions and 16 deletions
20
NOTES
20
NOTES
|
|
@ -15,6 +15,20 @@ consistency checks.
|
|||
|
||||
TODO: Add consistency check to the bitreader
|
||||
|
||||
We should support the use of parse-table-based parse methods; add a
|
||||
parse_compile method that must be called before the newly-created
|
||||
parser is used.
|
||||
TODO: We should support the use of parse-table-based parse methods; add a
|
||||
parse_compile method that must be called before the newly-created
|
||||
parser is used.
|
||||
|
||||
|
||||
Regarding butnot and difference:
|
||||
|
||||
There's a "do what I say, not what I do" variation in how we
|
||||
implemented these (versus how jsparse did it). His `butnot` succeeds
|
||||
if p1 and p2 both match and p1's result is longer than p2's, though
|
||||
the comments say it should succeed if p2's result is longer than
|
||||
p1's. Also, his `difference` succeeds if p1 and p2 both match, full
|
||||
stop, returning the result of p2 if p2's result is shorter than p1's
|
||||
or the result of p1 otherwise, though the comments say it should
|
||||
succeed if p2's result is shorter than p1's. Whatever; we're doing
|
||||
what the comments say.
|
||||
|
||||
|
|
|
|||
76
src/hammer.c
76
src/hammer.c
|
|
@ -294,7 +294,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) {
|
|||
// cache the initial state of the input stream
|
||||
input_stream_t start_state = state->input_stream;
|
||||
parse_result_t *r1 = do_parse(parsers->p1, state);
|
||||
// if r1 is null, bail out early
|
||||
// if p1 failed, bail out early
|
||||
if (NULL == r1) {
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -304,7 +304,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) {
|
|||
parse_result_t *r2 = do_parse(parsers->p2, state);
|
||||
// TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
|
||||
state->input_stream = after_p1_state;
|
||||
// if r2 is null, restore post-p1 state and bail out early
|
||||
// if p2 failed, restore post-p1 state and bail out early
|
||||
if (NULL == r2) {
|
||||
return r1;
|
||||
}
|
||||
|
|
@ -326,8 +326,76 @@ const parser_t* butnot(const parser_t* p1, const parser_t* p2) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
const parser_t* difference(const parser_t* p1, const parser_t* p2) { return NULL; }
|
||||
const parser_t* xor(const parser_t* p1, const parser_t* p2) { return NULL; }
|
||||
static parse_result_t* parse_difference(void *env, parse_state_t *state) {
|
||||
two_parsers_t *parsers = (two_parsers_t*)env;
|
||||
// cache the initial state of the input stream
|
||||
input_stream_t start_state = state->input_stream;
|
||||
parse_result_t *r1 = do_parse(parsers->p1, state);
|
||||
// if p1 failed, bail out early
|
||||
if (NULL == r1) {
|
||||
return NULL;
|
||||
}
|
||||
// cache the state after parse #1, since we might have to back up to it
|
||||
input_stream_t after_p1_state = state->input_stream;
|
||||
state->input_stream = start_state;
|
||||
parse_result_t *r2 = do_parse(parsers->p2, state);
|
||||
// TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
|
||||
state->input_stream = after_p1_state;
|
||||
// if p2 failed, restore post-p1 state and bail out early
|
||||
if (NULL == r2) {
|
||||
return r1;
|
||||
}
|
||||
size_t r1len = token_length(r1);
|
||||
size_t r2len = token_length(r2);
|
||||
// if both match but p1's text is shorter than p2's, fail
|
||||
if (r1len < r2len) {
|
||||
return NULL;
|
||||
} else {
|
||||
return r1;
|
||||
}
|
||||
}
|
||||
|
||||
const parser_t* difference(const parser_t* p1, const parser_t* p2) {
|
||||
two_parsers_t *env = g_new(two_parsers_t, 1);
|
||||
env->p1 = p1; env->p2 = p2;
|
||||
parser_t *ret = g_new(parser_t, 1);
|
||||
ret->fn = parse_difference; ret->env = (void*)env;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static parse_result_t* parse_xor(void *env, parse_state_t *state) {
|
||||
two_parsers_t *parsers = (two_parsers_t*)env;
|
||||
// cache the initial state of the input stream
|
||||
input_stream_t start_state = state->input_stream;
|
||||
parse_result_t *r1 = do_parse(parsers->p1, state);
|
||||
input_stream_t after_p1_state = state->input_stream;
|
||||
// reset input stream, parse again
|
||||
state->input_stream = start_state;
|
||||
parse_result_t *r2 = do_parse(parsers->p2, state);
|
||||
if (NULL == r1) {
|
||||
if (NULL != r2) {
|
||||
return r2;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
if (NULL == r2) {
|
||||
state->input_stream = after_p1_state;
|
||||
return r1;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const parser_t* xor(const parser_t* p1, const parser_t* p2) {
|
||||
two_parsers_t *env = g_new(two_parsers_t, 1);
|
||||
env->p1 = p1; env->p2 = p2;
|
||||
parser_t *ret = g_new(parser_t, 1);
|
||||
ret->fn = parse_xor; ret->env = (void*)env;
|
||||
return ret;
|
||||
}
|
||||
|
||||
const parser_t* repeat0(const parser_t* p) { return NULL; }
|
||||
const parser_t* repeat1(const parser_t* p) { return NULL; }
|
||||
const parser_t* repeat_n(const parser_t* p, const size_t n) { return NULL; }
|
||||
|
|
|
|||
23
src/hammer.h
23
src/hammer.h
|
|
@ -26,15 +26,7 @@
|
|||
* input - the entire string being parsed
|
||||
* index - current position in input
|
||||
* length - size of input
|
||||
* THE FOLLOWING DESCRIBES HOW JSPARSE DOES IT. OUR MILEAGE MAY VARY.
|
||||
* cache - a hash table describing the state of the parse, including partial parse_results.
|
||||
* It's actually a hash table of [parser_id, hash_table[index, parse_result]],
|
||||
* where the parser id is incremented as the parse goes along (parsers that have
|
||||
* already been applied once don't get a new parser_id ... but the global variable
|
||||
* still increments? not sure why that is, need to debug some), and the locations
|
||||
* at which it's been applied are memoized.
|
||||
*
|
||||
* In our case, it's a hash table from parser_cache_key_t to parse_state_t.
|
||||
* cache - a hash table describing the state of the parse, including partial parse_results. It's a hash table from parser_cache_key_t to parse_state_t.
|
||||
*
|
||||
*/
|
||||
#define BYTE_BIG_ENDIAN 0x1
|
||||
|
|
@ -124,9 +116,22 @@ const parser_t* sequence(const parser_t* p_array[]);
|
|||
/* Given an array of parsers, p_array, apply each parser in order. The first parser to succeed is the result; if no parsers succeed, the parse fails. */
|
||||
const parser_t* choice(const parser_t* p_array[]);
|
||||
|
||||
/* Given two parsers, p1 and p2, this parser succeeds in the following cases:
|
||||
* - if p1 succeeds and p2 fails
|
||||
* - if both succeed but p1's result is shorter than p2's
|
||||
*/
|
||||
const parser_t* butnot(const parser_t* p1, const parser_t* p2);
|
||||
|
||||
/* Given two parsers, p1 and p2, this parser succeeds in the following cases:
|
||||
* - if p1 succeeds and p2 fails
|
||||
* - if both succeed but p2's result is shorter than p1's
|
||||
*/
|
||||
const parser_t* difference(const parser_t* p1, const parser_t* p2);
|
||||
|
||||
/* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or p2 succeed, but not if they both do.
|
||||
*/
|
||||
const parser_t* xor(const parser_t* p1, const parser_t* p2);
|
||||
|
||||
const parser_t* repeat0(const parser_t* p);
|
||||
const parser_t* repeat1(const parser_t* p);
|
||||
const parser_t* repeat_n(const parser_t* p, const size_t n);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue