Merge remote-tracking branch 'upstream/master'
Conflicts: NOTES
This commit is contained in:
commit
d6c0eaf3de
3 changed files with 103 additions and 16 deletions
20
NOTES
20
NOTES
|
|
@ -15,6 +15,20 @@ consistency checks.
|
||||||
|
|
||||||
TODO: Add consistency check to the bitreader
|
TODO: Add consistency check to the bitreader
|
||||||
|
|
||||||
We should support the use of parse-table-based parse methods; add a
|
TODO: We should support the use of parse-table-based parse methods; add a
|
||||||
parse_compile method that must be called before the newly-created
|
parse_compile method that must be called before the newly-created
|
||||||
parser is used.
|
parser is used.
|
||||||
|
|
||||||
|
|
||||||
|
Regarding butnot and difference:
|
||||||
|
|
||||||
|
There's a "do what I say, not what I do" variation in how we
|
||||||
|
implemented these (versus how jsparse did it). His `butnot` succeeds
|
||||||
|
if p1 and p2 both match and p1's result is longer than p2's, though
|
||||||
|
the comments say it should succeed if p2's result is longer than
|
||||||
|
p1's. Also, his `difference` succeeds if p1 and p2 both match, full
|
||||||
|
stop, returning the result of p2 if p2's result is shorter than p1's
|
||||||
|
or the result of p1 otherwise, though the comments say it should
|
||||||
|
succeed if p2's result is shorter than p1's. Whatever; we're doing
|
||||||
|
what the comments say.
|
||||||
|
|
||||||
|
|
|
||||||
76
src/hammer.c
76
src/hammer.c
|
|
@ -294,7 +294,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) {
|
||||||
// cache the initial state of the input stream
|
// cache the initial state of the input stream
|
||||||
input_stream_t start_state = state->input_stream;
|
input_stream_t start_state = state->input_stream;
|
||||||
parse_result_t *r1 = do_parse(parsers->p1, state);
|
parse_result_t *r1 = do_parse(parsers->p1, state);
|
||||||
// if r1 is null, bail out early
|
// if p1 failed, bail out early
|
||||||
if (NULL == r1) {
|
if (NULL == r1) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
@ -304,7 +304,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) {
|
||||||
parse_result_t *r2 = do_parse(parsers->p2, state);
|
parse_result_t *r2 = do_parse(parsers->p2, state);
|
||||||
// TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
|
// TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
|
||||||
state->input_stream = after_p1_state;
|
state->input_stream = after_p1_state;
|
||||||
// if r2 is null, restore post-p1 state and bail out early
|
// if p2 failed, restore post-p1 state and bail out early
|
||||||
if (NULL == r2) {
|
if (NULL == r2) {
|
||||||
return r1;
|
return r1;
|
||||||
}
|
}
|
||||||
|
|
@ -326,8 +326,76 @@ const parser_t* butnot(const parser_t* p1, const parser_t* p2) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const parser_t* difference(const parser_t* p1, const parser_t* p2) { return NULL; }
|
static parse_result_t* parse_difference(void *env, parse_state_t *state) {
|
||||||
const parser_t* xor(const parser_t* p1, const parser_t* p2) { return NULL; }
|
two_parsers_t *parsers = (two_parsers_t*)env;
|
||||||
|
// cache the initial state of the input stream
|
||||||
|
input_stream_t start_state = state->input_stream;
|
||||||
|
parse_result_t *r1 = do_parse(parsers->p1, state);
|
||||||
|
// if p1 failed, bail out early
|
||||||
|
if (NULL == r1) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
// cache the state after parse #1, since we might have to back up to it
|
||||||
|
input_stream_t after_p1_state = state->input_stream;
|
||||||
|
state->input_stream = start_state;
|
||||||
|
parse_result_t *r2 = do_parse(parsers->p2, state);
|
||||||
|
// TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
|
||||||
|
state->input_stream = after_p1_state;
|
||||||
|
// if p2 failed, restore post-p1 state and bail out early
|
||||||
|
if (NULL == r2) {
|
||||||
|
return r1;
|
||||||
|
}
|
||||||
|
size_t r1len = token_length(r1);
|
||||||
|
size_t r2len = token_length(r2);
|
||||||
|
// if both match but p1's text is shorter than p2's, fail
|
||||||
|
if (r1len < r2len) {
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
return r1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const parser_t* difference(const parser_t* p1, const parser_t* p2) {
|
||||||
|
two_parsers_t *env = g_new(two_parsers_t, 1);
|
||||||
|
env->p1 = p1; env->p2 = p2;
|
||||||
|
parser_t *ret = g_new(parser_t, 1);
|
||||||
|
ret->fn = parse_difference; ret->env = (void*)env;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static parse_result_t* parse_xor(void *env, parse_state_t *state) {
|
||||||
|
two_parsers_t *parsers = (two_parsers_t*)env;
|
||||||
|
// cache the initial state of the input stream
|
||||||
|
input_stream_t start_state = state->input_stream;
|
||||||
|
parse_result_t *r1 = do_parse(parsers->p1, state);
|
||||||
|
input_stream_t after_p1_state = state->input_stream;
|
||||||
|
// reset input stream, parse again
|
||||||
|
state->input_stream = start_state;
|
||||||
|
parse_result_t *r2 = do_parse(parsers->p2, state);
|
||||||
|
if (NULL == r1) {
|
||||||
|
if (NULL != r2) {
|
||||||
|
return r2;
|
||||||
|
} else {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (NULL == r2) {
|
||||||
|
state->input_stream = after_p1_state;
|
||||||
|
return r1;
|
||||||
|
} else {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const parser_t* xor(const parser_t* p1, const parser_t* p2) {
|
||||||
|
two_parsers_t *env = g_new(two_parsers_t, 1);
|
||||||
|
env->p1 = p1; env->p2 = p2;
|
||||||
|
parser_t *ret = g_new(parser_t, 1);
|
||||||
|
ret->fn = parse_xor; ret->env = (void*)env;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
const parser_t* repeat0(const parser_t* p) { return NULL; }
|
const parser_t* repeat0(const parser_t* p) { return NULL; }
|
||||||
const parser_t* repeat1(const parser_t* p) { return NULL; }
|
const parser_t* repeat1(const parser_t* p) { return NULL; }
|
||||||
const parser_t* repeat_n(const parser_t* p, const size_t n) { return NULL; }
|
const parser_t* repeat_n(const parser_t* p, const size_t n) { return NULL; }
|
||||||
|
|
|
||||||
23
src/hammer.h
23
src/hammer.h
|
|
@ -26,15 +26,7 @@
|
||||||
* input - the entire string being parsed
|
* input - the entire string being parsed
|
||||||
* index - current position in input
|
* index - current position in input
|
||||||
* length - size of input
|
* length - size of input
|
||||||
* THE FOLLOWING DESCRIBES HOW JSPARSE DOES IT. OUR MILEAGE MAY VARY.
|
* cache - a hash table describing the state of the parse, including partial parse_results. It's a hash table from parser_cache_key_t to parse_state_t.
|
||||||
* cache - a hash table describing the state of the parse, including partial parse_results.
|
|
||||||
* It's actually a hash table of [parser_id, hash_table[index, parse_result]],
|
|
||||||
* where the parser id is incremented as the parse goes along (parsers that have
|
|
||||||
* already been applied once don't get a new parser_id ... but the global variable
|
|
||||||
* still increments? not sure why that is, need to debug some), and the locations
|
|
||||||
* at which it's been applied are memoized.
|
|
||||||
*
|
|
||||||
* In our case, it's a hash table from parser_cache_key_t to parse_state_t.
|
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#define BYTE_BIG_ENDIAN 0x1
|
#define BYTE_BIG_ENDIAN 0x1
|
||||||
|
|
@ -124,9 +116,22 @@ const parser_t* sequence(const parser_t* p_array[]);
|
||||||
/* Given an array of parsers, p_array, apply each parser in order. The first parser to succeed is the result; if no parsers succeed, the parse fails. */
|
/* Given an array of parsers, p_array, apply each parser in order. The first parser to succeed is the result; if no parsers succeed, the parse fails. */
|
||||||
const parser_t* choice(const parser_t* p_array[]);
|
const parser_t* choice(const parser_t* p_array[]);
|
||||||
|
|
||||||
|
/* Given two parsers, p1 and p2, this parser succeeds in the following cases:
|
||||||
|
* - if p1 succeeds and p2 fails
|
||||||
|
* - if both succeed but p1's result is shorter than p2's
|
||||||
|
*/
|
||||||
const parser_t* butnot(const parser_t* p1, const parser_t* p2);
|
const parser_t* butnot(const parser_t* p1, const parser_t* p2);
|
||||||
|
|
||||||
|
/* Given two parsers, p1 and p2, this parser succeeds in the following cases:
|
||||||
|
* - if p1 succeeds and p2 fails
|
||||||
|
* - if both succeed but p2's result is shorter than p1's
|
||||||
|
*/
|
||||||
const parser_t* difference(const parser_t* p1, const parser_t* p2);
|
const parser_t* difference(const parser_t* p1, const parser_t* p2);
|
||||||
|
|
||||||
|
/* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or p2 succeed, but not if they both do.
|
||||||
|
*/
|
||||||
const parser_t* xor(const parser_t* p1, const parser_t* p2);
|
const parser_t* xor(const parser_t* p1, const parser_t* p2);
|
||||||
|
|
||||||
const parser_t* repeat0(const parser_t* p);
|
const parser_t* repeat0(const parser_t* p);
|
||||||
const parser_t* repeat1(const parser_t* p);
|
const parser_t* repeat1(const parser_t* p);
|
||||||
const parser_t* repeat_n(const parser_t* p, const size_t n);
|
const parser_t* repeat_n(const parser_t* p, const size_t n);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue