Merge remote-tracking branch 'upstream/master'

Conflicts: NOTES
2012-05-04 21:27:09 +01:00 · 2012-05-04 21:27:09 +01:00 · d6c0eaf3de
commit d6c0eaf3de
parent b4aca3c992 709884faf5
3 changed files with 103 additions and 16 deletions
--- a/20
+++ b/20
@ -15,6 +15,20 @@ consistency checks.
 TODO: Add consistency check to the bitreader
-We should support the use of parse-table-based parse methods; add a
+TODO: We should support the use of parse-table-based parse methods; add a
-parse_compile method that must be called before the newly-created
+      parse_compile method that must be called before the newly-created
-parser is used.
+      parser is used.
 Regarding butnot and difference:
 There's a "do what I say, not what I do" variation in how we
 implemented these (versus how jsparse did it). His `butnot` succeeds
 if p1 and p2 both match and p1's result is longer than p2's, though
 the comments say it should succeed if p2's result is longer than
 p1's. Also, his `difference` succeeds if p1 and p2 both match, full
 stop, returning the result of p2 if p2's result is shorter than p1's
 or the result of p1 otherwise, though the comments say it should
 succeed if p2's result is shorter than p1's. Whatever; we're doing
 what the comments say.
--- a/src/hammer.c
+++ b/src/hammer.c
@ -294,7 +294,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) {
  // cache the initial state of the input stream
  input_stream_t start_state = state->input_stream;
  parse_result_t *r1 = do_parse(parsers->p1, state);
-  // if r1 is null, bail out early
+  // if p1 failed, bail out early
  if (NULL == r1) {
    return NULL;
  } 
@ -304,7 +304,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) {
  parse_result_t *r2 = do_parse(parsers->p2, state);
  // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
  state->input_stream = after_p1_state;
-  // if r2 is null, restore post-p1 state and bail out early
+  // if p2 failed, restore post-p1 state and bail out early
  if (NULL == r2) {
    return r1;
  }
@ -326,8 +326,76 @@ const parser_t* butnot(const parser_t* p1, const parser_t* p2) {
  return ret;
 }
-const parser_t* difference(const parser_t* p1, const parser_t* p2) { return NULL; }
+static parse_result_t* parse_difference(void *env, parse_state_t *state) {
-const parser_t* xor(const parser_t* p1, const parser_t* p2) { return NULL; }
+  two_parsers_t *parsers = (two_parsers_t*)env;
  // cache the initial state of the input stream
  input_stream_t start_state = state->input_stream;
  parse_result_t *r1 = do_parse(parsers->p1, state);
  // if p1 failed, bail out early
  if (NULL == r1) {
    return NULL;
  } 
  // cache the state after parse #1, since we might have to back up to it
  input_stream_t after_p1_state = state->input_stream;
  state->input_stream = start_state;
  parse_result_t *r2 = do_parse(parsers->p2, state);
  // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
  state->input_stream = after_p1_state;
  // if p2 failed, restore post-p1 state and bail out early
  if (NULL == r2) {
    return r1;
  }
  size_t r1len = token_length(r1);
  size_t r2len = token_length(r2);
  // if both match but p1's text is shorter than p2's, fail
  if (r1len < r2len) {
    return NULL;
  } else {
    return r1;
  }
 }
 const parser_t* difference(const parser_t* p1, const parser_t* p2) { 
  two_parsers_t *env = g_new(two_parsers_t, 1);
  env->p1 = p1; env->p2 = p2;
  parser_t *ret = g_new(parser_t, 1);
  ret->fn = parse_difference; ret->env = (void*)env;
  return ret;
 }
 static parse_result_t* parse_xor(void *env, parse_state_t *state) {
  two_parsers_t *parsers = (two_parsers_t*)env;
  // cache the initial state of the input stream
  input_stream_t start_state = state->input_stream;
  parse_result_t *r1 = do_parse(parsers->p1, state);
  input_stream_t after_p1_state = state->input_stream;
  // reset input stream, parse again
  state->input_stream = start_state;
  parse_result_t *r2 = do_parse(parsers->p2, state);
  if (NULL == r1) {
    if (NULL != r2) {
      return r2;
    } else {
      return NULL;
    }
  } else {
    if (NULL == r2) {
      state->input_stream = after_p1_state;
      return r1;
    } else {
      return NULL;
    }
  }
 }
 const parser_t* xor(const parser_t* p1, const parser_t* p2) { 
  two_parsers_t *env = g_new(two_parsers_t, 1);
  env->p1 = p1; env->p2 = p2;
  parser_t *ret = g_new(parser_t, 1);
  ret->fn = parse_xor; ret->env = (void*)env;
  return ret;
 }
 const parser_t* repeat0(const parser_t* p) { return NULL; }
 const parser_t* repeat1(const parser_t* p) { return NULL; }
 const parser_t* repeat_n(const parser_t* p, const size_t n) { return NULL; }
--- a/src/hammer.h
+++ b/src/hammer.h
@ -26,15 +26,7 @@
 *   input - the entire string being parsed
 *   index - current position in input
 *   length - size of input
- * THE FOLLOWING DESCRIBES HOW JSPARSE DOES IT. OUR MILEAGE MAY VARY.
+ *   cache - a hash table describing the state of the parse, including partial parse_results. It's a hash table from parser_cache_key_t to parse_state_t. 
 *   cache - a hash table describing the state of the parse, including partial parse_results. 
 *           It's actually a hash table of [parser_id, hash_table[index, parse_result]],
 *           where the parser id is incremented as the parse goes along (parsers that have
 *           already been applied once don't get a new parser_id ... but the global variable
 *           still increments? not sure why that is, need to debug some), and the locations
 *           at which it's been applied are memoized.
 * 
 *           In our case, it's a hash table from parser_cache_key_t to parse_state_t. 
 *
 */
 #define BYTE_BIG_ENDIAN 0x1
@ -124,9 +116,22 @@ const parser_t* sequence(const parser_t* p_array[]);
 /* Given an array of parsers, p_array, apply each parser in order. The first parser to succeed is the result; if no parsers succeed, the parse fails. */
 const parser_t* choice(const parser_t* p_array[]);
 /* Given two parsers, p1 and p2, this parser succeeds in the following cases: 
 * - if p1 succeeds and p2 fails
 * - if both succeed but p1's result is shorter than p2's
 */
 const parser_t* butnot(const parser_t* p1, const parser_t* p2);
 /* Given two parsers, p1 and p2, this parser succeeds in the following cases:
 * - if p1 succeeds and p2 fails
 * - if both succeed but p2's result is shorter than p1's
 */
 const parser_t* difference(const parser_t* p1, const parser_t* p2);
 /* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or p2 succeed, but not if they both do.
 */
 const parser_t* xor(const parser_t* p1, const parser_t* p2);
 const parser_t* repeat0(const parser_t* p);
 const parser_t* repeat1(const parser_t* p);
 const parser_t* repeat_n(const parser_t* p, const size_t n);