Added benchmark skeleton, and at least stubs for all of the necessary functions

2012-11-02 17:16:53 -04:00 · 2012-11-02 17:16:53 -04:00 · d0d9a94fd0
commit d0d9a94fd0
parent 8eececcac8
10 changed files with 239 additions and 13 deletions
--- a/39
+++ b/39
@ -10,3 +10,42 @@ In particular, these names, and the macros that use them, are:
    Used by a_new and company. Should be an HParseState*
 - mm__:
    Used by h_new and h_free. Should be an HAllocator*
+
+Function suffixes
+=================
+
+Many functions come in several variants, to handle receiving optional
+parameters or parameters in multiple different forms.  For example,
+often, you have a global memory manager that is used for an entire
+program. In this case, you can leave off the memory manager arguments
+off, letting them be implicit instead. Further, it is often convenient
+to pass an array or va_list to a function instead of listing the
+arguments inline (eg, for wrapping a function, generating the
+arguments programattically, or writing bindings for another language.
+
+Because we have found that most variants fall into a fairly small set
+of forms, and to minimize the amount of API calls that users need to
+remember, there is a consistent naming scheme for these function
+variants: the function name is followed by two underscores and a set
+of single-character "flags" indicating what optional features that
+particular variant has (in alphabetical order, of course):
+
+  __a: takes variadic arguments as a void*[]
+  __m: takes a memory manager as the first argument, to override the system memory manager.
+  __v: Takes the variadic argument list as a va_list
+
+
+Memory managers
+===============
+
+If the __m function variants are used or system_allocator is
+overridden, there come some difficult questions to answer,
+particularly regarding the behavior when multiple memory managers are
+combined. As a general rule of thumb (exceptions will be explicitly
+documented), assume that
+
+   If you have a function f, which is passed a memory manager m and
+   returns a value r, any function that uses r as a parameter must
+   also be told to use m as a memory manager.
+
+In other words, don't let the (memory manager) streams cross.
--- a/1
+++ b/1
@ -35,4 +35,3 @@ what the comments say.

 TODO: implement datastructure linearization func
 TODO: implement free func for parsers
-TODO: Remove glib dependency (i.e., GQueue and GHashtable)
--- a/3
+++ b/3
@ -0,0 +1,3 @@
+- Make h_action functions be called only after parse is complete.
+- Allow alternative input streams (eg, zlib, base64)
+  - Bonus points if layered...
--- a/src/Makefile
+++ b/src/Makefile
@ -25,17 +25,29 @@ PARSERS := \
 	attr_bool \
 	indirect

-OUTPUTS := bitreader.o \
-	   hammer.o \
-	   bitwriter.o \
-	   libhammer.a \
-	   pprint.o \
-	   allocator.o \
-	   datastructures.o \
-	   system_allocator.o \
+BACKENDS := \
+	packrat
+
+HAMMER_PARTS := \
+	bitreader.o \
+	hammer.o \
+	bitwriter.o \
+	pprint.o \
+	allocator.o \
+	datastructures.o \
+	system_allocator.o \
+	benchmark.o \
+	compile.o \
+	$(PARSERS:%=parsers/%.o) \
+	$(BACKENDS:%=backends/%.o)
+
+TESTS := t_benchmark.o
+
+OUTPUTS := libhammer.a \
 	   test_suite.o \
 	   test_suite \
-	   $(PARSERS:%=parsers/%.o)
+	   $(HAMMER_PARTS) \
+	   $(TESTS)

 TOPLEVEL := ../

@ -44,8 +56,7 @@ include ../common.mk

 all: libhammer.a

-libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitwriter.o system_allocator.o \
-	$(PARSERS:%=parsers/%.o)
+libhammer.a: $(HAMMER_PARTS)

 bitreader.o: test_suite.h
 hammer.o: hammer.h
@ -56,6 +67,6 @@ all: test_suite
 test: test_suite
 	./test_suite -v

-test_suite: test_suite.o libhammer.a
+test_suite: test_suite.o $(TESTS) libhammer.a
 	$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
 endif
--- a/src/backends/packrat.c
+++ b/src/backends/packrat.c
@ -0,0 +1,15 @@
+#include "../internal.h"
+
+int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) {
+  return 0; // No compilation necessary, and everything should work
+	    // out of the box.
+}
+
+HParseResult *h_packrat_parse(HAllocator* mm__, HParser* parser, HParseState* parse_state) {
+  return NULL; // TODO: fill this in.
+}
+
+HParserBackendVTable h__packrat_backend_vtable = {
+  .compile = h_packrat_compile,
+  .parse = h_packrat_parse
+};
--- a/src/benchmark.c
+++ b/src/benchmark.c
@ -0,0 +1,88 @@
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#include "hammer.h"
+
+/*
+  Usage:
+  Create your parser (i.e., HParser*), and then call
+
+  HBenchmarkResults* results = h_benchmark(parser, testcases);
+
+  Then, you can format a report with:
+
+  h_benchmark_report(stdout, results);
+
+  or just generate code to make the parser run as fast as possible with:
+
+  h_benchmark_dump_optimized_code(stdout, results);
+
+*/
+
+
+HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases) {
+  // For now, just output the results to stderr
+  HParserTestcase* tc = testcases;
+  HParserBackend backend = PB_MIN;
+
+  for (backend = PB_MIN; backend < PB_MAX; backend++) {
+    fprintf(stderr, "Compiling for backend %d ... ", backend);
+    // Step 1: Compile grammar for given parser...
+    if (h_compile(parser, PB_MIN, NULL) == -1) {
+      // backend inappropriate for grammar...
+      fprintf(stderr, "failed\n");
+      continue;
+    }
+    int tc_failed = 0;
+    // Step 1: verify all test cases.
+    for (tc = testcases; tc->input != NULL; tc++) {
+      HParseResult *res = h_parse(parser, tc->input, tc->length);
+      char* res_unamb;
+      if (res != NULL) {
+	res_unamb = h_write_result_unamb(res->ast);
+      } else
+	res_unamb = NULL;
+      if ((res_unamb == NULL && tc->output_unambiguous == NULL)
+	  || (strcmp(res_unamb, tc->output_unambiguous) != 0)) {
+	// test case failed...
+	fprintf(stderr, "failed\n");
+	// We want to run all testcases, for purposes of generating a
+	// report. (eg, if users are trying to fix a grammar for a
+	// faster backend)
+	tc_failed++;
+      }
+      h_parse_result_free(res);
+    }
+
+    if (tc_failed > 0) {
+      // Can't use this parser; skip to the next
+      fprintf(stderr, "Backend failed testcases; skipping benchmark\n");
+      continue;
+    }
+
+    for (tc = testcases; tc->input != NULL; tc++) {
+      // The goal is to run each testcase for at least 50ms each
+      // TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer)
+      int count = 1, cur;
+      struct timespec ts_start, ts_end;
+      long long time_diff;
+      do {
+	count *= 2; // Yes, this means that the first run will run the function twice. This is fine, as we want multiple runs anyway.
+	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_start);
+	for (cur = 0; cur < count; cur++) {
+	  h_parse_result_free(h_parse(parser, tc->input, tc->length));
+	}
+	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_end);
+	
+	// time_diff is in ns
+	time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec);
+      } while (time_diff < 100000000);
+      fprintf(stderr, "Case %d: %lld ns/parse\n", (int)(tc - testcases),  time_diff / count);
+    }
+  }
+  return NULL;
+}
+
+void h_benchmark_report(FILE* stream, HBenchmarkResults* result) {
+  // TODO: fill in this function
+}
--- a/src/compile.c
+++ b/src/compile.c
@ -0,0 +1,15 @@
+// This file contains functions related to managing multiple parse backends
+#include "hammer.h"
+#include "internal.h"
+
+static HParserBackendVTable *backends[PB_MAX] = {
+  &h__packrat_backend_vtable,
+};
+
+int h_compile(HParser* parser, HParserBackend backend, const void* params) {
+  return h_compile__m(&system_allocator, parser, backend, params);
+}
+
+int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, const void* params) {
+  return backends[backend]->compile(mm__, parser, params);
+}
--- a/src/hammer.h
+++ b/src/hammer.h
@ -30,6 +30,12 @@ typedef int bool;

 typedef struct HParseState_ HParseState;

+typedef enum HParserBackend_ {
+  PB_MIN = 0,
+  PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
+  PB_MAX
+} HParserBackend;
+
 typedef enum HTokenType_ {
  TT_NONE,
  TT_BYTES,
@ -112,6 +118,17 @@ typedef struct HParser_ {
  void *env;
 } HParser;

+// {{{ Stuff for benchmarking
+typedef struct HParserTestcase_ {
+  unsigned char* input;
+  size_t length;
+  char* output_unambiguous;
+} HParserTestcase;
+
+typedef struct HBenchmarkResults_ {
+} HBenchmarkResults;
+// }}}
+
 // {{{ Preprocessor definitions
 #define HAMMER_FN_DECL_NOARG(rtype_t, name)		\
  rtype_t name(void);					\
@ -519,6 +536,15 @@ HAMMER_FN_DECL(char*, h_write_result_unamb, const HParsedToken* tok);
 */
 HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent, int delta);

+/**
+ * Build parse tables for the given parser backend. See the
+ * documentation for the parser backend in question for information
+ * about the [params] parameter, or just pass in NULL for the defaults.
+ *
+ * Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
+ */
+HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params);
+
 /**
 * TODO: Document me
 */
@ -541,4 +567,10 @@ const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
 */
 void h_bit_writer_free(HBitWriter* w);

+// {{{ Benchmark functions
+HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases);
+void h_benchmark_report(FILE* stream, HBenchmarkResults* results);
+void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results);
+// }}}
+
 #endif // #ifndef HAMMER_HAMMER__H
--- a/src/internal.h
+++ b/src/internal.h
@ -109,6 +109,12 @@ struct HParseState_ {
  HHashTable *recursion_heads;
 };

+typedef struct HParserBackendVTable_ {
+  int (*compile)(HAllocator *mm__, HParser* parser, const void* params);
+  HParseResult* (*parse)(HAllocator *mm__, HParser* parser, HParseState* parse_state);
+} HParserBackendVTable;
+
+
 /* The (location, parser) tuple used to key the cache.
 */

@ -173,6 +179,10 @@ typedef struct HParserCacheValue_t {
  };
 } HParserCacheValue;

+// Backends {{{
+extern HParserBackendVTable h__packrat_backend_vtable;
+// }}}
+
 // TODO(thequux): Set symbol visibility for these functions so that they aren't exported.

 long long h_read_bits(HInputStream* state, int count, char signed_p);
--- a/src/t_benchmark.c
+++ b/src/t_benchmark.c
@ -0,0 +1,14 @@
+// At this point, this is just a compile/link test.
+#include "hammer.h"
+
+HParserTestcase testcases[] = {
+  {NULL, 0, NULL}
+};
+
+void test_benchmark_1() {
+  HParser *parser = NULL; // TODO: fill this in.
+
+  HBenchmarkResults *res = h_benchmark(parser, testcases);
+  h_benchmark_report(stderr, res);
+
+}