From 195e50a7f18b86fa8d17aaaa632a454ed67a2d7e Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:24:13 +0100 Subject: [PATCH 1/2] move glue.[ch] into the library proper --- examples/Makefile | 3 +- examples/dns_common.h | 2 +- examples/glue.c | 170 ---------------------------- examples/glue.h | 251 ------------------------------------------ src/Makefile | 2 + 5 files changed, 4 insertions(+), 424 deletions(-) delete mode 100644 examples/glue.c delete mode 100644 examples/glue.h diff --git a/examples/Makefile b/examples/Makefile index 786af44..9c6ac1b 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -15,13 +15,12 @@ LDFLAGS += $(pkg-config --libs glib-2.0) all: dns base64 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) -dns: dns.o rr.o dns_common.o glue.o +dns: dns.o rr.o dns_common.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) dns.o: ../src/hammer.h dns_common.h rr.o: ../src/hammer.h rr.h dns_common.h dns_common.o: ../src/hammer.h dns_common.h -glue.o: ../src/hammer.h glue.h base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS) base64: base64.o diff --git a/examples/dns_common.h b/examples/dns_common.h index 6b04519..c1d8d7e 100644 --- a/examples/dns_common.h +++ b/examples/dns_common.h @@ -2,7 +2,7 @@ #define HAMMER_DNS_COMMON__H #include "../src/hammer.h" -#include "glue.h" +#include "../src/glue.h" const HParser* init_domain(); const HParser* init_character_string(); diff --git a/examples/glue.c b/examples/glue.c deleted file mode 100644 index 7f9c6fa..0000000 --- a/examples/glue.c +++ /dev/null @@ -1,170 +0,0 @@ -#include "glue.h" -#include "../src/internal.h" // for h_carray_* - - -// The action equivalent of h_ignore. -const HParsedToken *h_act_ignore(const HParseResult *p) -{ - return NULL; -} - -// Helper to build HAction's that pick one index out of a sequence. -const HParsedToken *h_act_index(int i, const HParseResult *p) -{ - if(!p) return NULL; - - const HParsedToken *tok = p->ast; - - if(!tok || tok->token_type != TT_SEQUENCE) - return NULL; - - const HCountedArray *seq = tok->seq; - size_t n = seq->used; - - if(i<0 || (size_t)i>=n) - return NULL; - else - return tok->seq->elements[i]; -} - -// Action version of h_seq_flatten. -const HParsedToken *h_act_flatten(const HParseResult *p) { - return h_seq_flatten(p->arena, p->ast); -} - -// Low-level helper for the h_make family. -HParsedToken *h_make_(HArena *arena, HTokenType type) -{ - HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); - ret->token_type = type; - return ret; -} - -HParsedToken *h_make(HArena *arena, HTokenType type, void *value) -{ - assert(type >= TT_USER); - HParsedToken *ret = h_make_(arena, type); - ret->user = value; - return ret; -} - -HParsedToken *h_make_seq(HArena *arena) -{ - HParsedToken *ret = h_make_(arena, TT_SEQUENCE); - ret->seq = h_carray_new(arena); - return ret; -} - -HParsedToken *h_make_bytes(HArena *arena, size_t len) -{ - HParsedToken *ret = h_make_(arena, TT_BYTES); - ret->bytes.len = len; - ret->bytes.token = h_arena_malloc(arena, len); - return ret; -} - -HParsedToken *h_make_sint(HArena *arena, int64_t val) -{ - HParsedToken *ret = h_make_(arena, TT_SINT); - ret->sint = val; - return ret; -} - -HParsedToken *h_make_uint(HArena *arena, uint64_t val) -{ - HParsedToken *ret = h_make_(arena, TT_UINT); - ret->uint = val; - return ret; -} - -// XXX -> internal -HParsedToken *h_carray_index(const HCountedArray *a, size_t i) -{ - assert(i < a->used); - return a->elements[i]; -} - -size_t h_seq_len(const HParsedToken *p) -{ - assert(p != NULL); - assert(p->token_type == TT_SEQUENCE); - return p->seq->used; -} - -HParsedToken **h_seq_elements(const HParsedToken *p) -{ - assert(p != NULL); - assert(p->token_type == TT_SEQUENCE); - return p->seq->elements; -} - -HParsedToken *h_seq_index(const HParsedToken *p, size_t i) -{ - assert(p != NULL); - assert(p->token_type == TT_SEQUENCE); - return h_carray_index(p->seq, i); -} - -HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...) -{ - va_list va; - - va_start(va, i); - HParsedToken *ret = h_seq_index_vpath(p, i, va); - va_end(va); - - return ret; -} - -HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va) -{ - HParsedToken *ret = h_seq_index(p, i); - int j; - - while((j = va_arg(va, int)) >= 0) - ret = h_seq_index(p, j); - - return ret; -} - -void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) -{ - assert(xs != NULL); - assert(xs->token_type == TT_SEQUENCE); - - h_carray_append(xs->seq, (HParsedToken *)x); -} - -void h_seq_append(HParsedToken *xs, const HParsedToken *ys) -{ - assert(xs != NULL); - assert(xs->token_type == TT_SEQUENCE); - assert(ys != NULL); - assert(ys->token_type == TT_SEQUENCE); - - for(size_t i; iseq->used; i++) - h_carray_append(xs->seq, ys->seq->elements[i]); -} - -// Flatten nested sequences. Always returns a sequence. -// If input element is not a sequence, returns it as a singleton sequence. -const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) -{ - assert(p != NULL); - - HParsedToken *ret = h_make_seq(arena); - switch(p->token_type) { - case TT_SEQUENCE: - // Flatten and append all. - for(size_t i; iseq->used; i++) { - h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); - } - break; - default: - // Make singleton sequence. - h_seq_snoc(ret, p); - break; - } - - return ret; -} diff --git a/examples/glue.h b/examples/glue.h deleted file mode 100644 index ccb488e..0000000 --- a/examples/glue.h +++ /dev/null @@ -1,251 +0,0 @@ -// -// API additions for writing grammar and semantic actions more concisely -// -// -// Quick Overview: -// -// Grammars can be succinctly specified with the family of H_RULE macros. -// H_RULE defines a plain parser variable. H_ARULE additionally attaches a -// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE -// combine both. -// -// A few standard semantic actions are defined below. The H_ACT_APPLY macro -// allows semantic actions to be defined by "partial application" of -// a generic action to fixed paramters. -// -// The definition of more complex semantic actions will usually consist of -// extracting data from the given parse tree and constructing a token of custom -// type to represent the result. A number of functions and convenience macros -// are provided to capture the most common cases and idioms. -// -// See the leading comment blocks on the sections below for more details. -// - -#ifndef HAMMER_EXAMPLES_GLUE__H -#define HAMMER_EXAMPLES_GLUE__H - -#include -#include "../src/hammer.h" - - -// -// Grammar specification -// -// H_RULE is simply a short-hand for the typical declaration and definition of -// a parser variable. See its plain definition below. The goal is to save -// horizontal space as well as to provide a clear and unified look together with -// the other macro variants that stays close to an abstract PEG or BNF grammar. -// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their -// combinations as they allow the definition of syntax to be given without -// intermingling it with the semantic specifications. -// -// H_ARULE defines a variable just like H_RULE but attaches a semantic action -// to the result of the parser via h_action. The action is expected to be -// named act_. -// -// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool. -// The validation is expected to be named validate_. -// -// H_VARULE combines H_RULE with both an action and a validation. The action is -// attached before the validation, i.e. the validation receives as input the -// result of the action. -// -// H_AVRULE is like H_VARULE but the action is attached outside the validation, -// i.e. the validation receives the uninterpreted AST as input. -// - - -#define H_RULE(rule, def) const HParser *rule = def -#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) -#define H_VRULE(rule, def) const HParser *rule = \ - h_attr_bool(def, validate_ ## rule) -#define H_VARULE(rule, def) const HParser *rule = \ - h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule) -#define H_AVRULE(rule, def) const HParser *rule = \ - h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) - - -// -// Pre-fab semantic actions -// -// A collection of generally useful semantic actions is provided. -// -// h_act_ignore is the action equivalent of the parser combinator h_ignore. It -// simply causes the AST it is applied to to be replaced with NULL. This most -// importantly causes it to be elided from the result of a surrounding -// h_sequence. -// -// h_act_index is of note as it is not itself suitable to be passed to -// h_action. It is parameterized by an index to be picked from a sequence -// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY -// macro provides a concise way to define such a parameter-application wrapper. -// -// h_act_flatten acts on a token of possibly nested sequences by recursively -// flattening it into a single sequence. Cf. h_seq_flatten below. -// -// H_ACT_APPLY implements "partial application" for semantic actions. It -// defines a new action that supplies given parameters to a parameterized -// action such as h_act_index. -// - -const HParsedToken *h_act_ignore(const HParseResult *p); -const HParsedToken *h_act_index(int i, const HParseResult *p); -const HParsedToken *h_act_flatten(const HParseResult *p); - -// Define 'myaction' as a specialization of 'paction' by supplying the leading -// parameters. -#define H_ACT_APPLY(myaction, paction, ...) \ - const HParsedToken *myaction(const HParseResult *p) { \ - return paction(__VA_ARGS__, p); \ - } - - -// -// Working with HParsedTokens -// -// The type HParsedToken represents a dynamically-typed universe of values. -// Declared below are constructors to turn ordinary values into their -// HParsedToken equivalents, extractors to retrieve the original values from -// inside an HParsedToken, and functions that inspect and modify tokens of -// sequence type directly. -// -// In addition, there are a number of short-hand macros that work with some -// conventions to eliminate common boilerplate. These conventions are listed -// below. Be sure to follow them if you want to use the respective macros. -// -// * The single argument to semantic actions should be called 'p'. -// -// The H_MAKE macros suppy 'p->arena' to their underlying h_make -// counterparts. The H_FIELD macros supply 'p->ast' to their underlying -// H_INDEX counterparts. -// -// * For each custom token type, there should be a typedef for the -// corresponding value type. -// -// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to -// a pointer to the given type. -// -// * For each custom token type, say 'foo_t', there must be an integer -// constant 'TT_foo_t' to identify the token type. This constant must have a -// value greater or equal than TT_USER. -// -// One idiom is to define an enum for all custom token types and to assign a -// value of TT_USER to the first element. This can be viewed as extending -// the HTokenType enum. -// -// The H_MAKE and H_ASSERT macros derive the name of the token type constant -// from the given type name. -// -// -// The H_ALLOC macro is useful for allocating values of custom token types. -// -// The H_MAKE family of macros construct tokens of a given type. The native -// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ. -// The form with no suffix is used for custom token types. This convention is -// also used for other macro and function families. -// -// The H_ASSERT family simply asserts that a given token has the expected type. -// It mainly serves as an implementation aid for H_CAST. Of note in that regard -// is that, unlike the standard 'assert' macro, these form _expressions_ that -// return the value of their token argument; thus they can be used in a -// "pass-through" fashion inside other expressions. -// -// The H_CAST family combines a type assertion with access to the -// statically-typed value inside a token. -// -// A number of functions h_seq_* operate on and inspect sequence tokens. -// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence. -// Therefore there are h_seq_snoc and h_seq_append to build up sequences. -// -// The macro families H_FIELD and H_INDEX combine index access on a sequence -// with a cast to the appropriate result type. H_FIELD is used to access the -// elements of the argument token 'p' in an action. H_INDEX allows any sequence -// token to be specified. Both macro families take an arbitrary number of index -// arguments, giving access to elements in nested sequences by path. -// These macros are very useful to avoid spaghetti chains of unchecked pointer -// dereferences. -// - -// Standard short-hand for arena-allocating a variable in a semantic action. -#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) - -// Token constructors... - -HParsedToken *h_make(HArena *arena, HTokenType type, void *value); -HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. -HParsedToken *h_make_bytes(HArena *arena, size_t len); -HParsedToken *h_make_sint(HArena *arena, int64_t val); -HParsedToken *h_make_uint(HArena *arena, uint64_t val); - -// Standard short-hands to make tokens in an action. -#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) -#define H_MAKE_SEQ() h_make_seq(p->arena) -#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) -#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) -#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) - -// Extract (cast) type-specific value back from HParsedTokens... - -// Pass-through assertion that a given token has the expected type. -#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P) - -// Convenience short-hand forms of h_assert_type. -#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK) -#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK) -#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK) -#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK) -#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK) - -// Assert expected type and return contained value. -#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user) -#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq) -#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes) -#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint) -#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint) - -// Sequence access... - -// Return the length of a sequence. -size_t h_seq_len(const HParsedToken *p); - -// Access a sequence's element array. -HParsedToken **h_seq_elements(const HParsedToken *p); - -// Access a sequence element by index. -HParsedToken *h_seq_index(const HParsedToken *p, size_t i); - -// Access an element in a nested sequence by a path of indices. -HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...); -HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); - -// Convenience macros combining (nested) index access and h_cast. -#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1) - -// Standard short-hand to access and cast elements on a sequence token. -#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__) -#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__) -#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__) -#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__) -#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__) - -// Lower-level helper for h_seq_index. -HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal - -// Sequence modification... - -// Add elements to a sequence. -void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one -void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many - -// XXX TODO: Remove elements from a sequence. - -// Flatten nested sequences into one. -const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); - - -#endif diff --git a/src/Makefile b/src/Makefile index 47e136d..ead0516 100644 --- a/src/Makefile +++ b/src/Makefile @@ -38,6 +38,7 @@ HAMMER_PARTS := \ system_allocator.o \ benchmark.o \ compile.o \ + glue.o \ $(PARSERS:%=parsers/%.o) \ $(BACKENDS:%=backends/%.o) @@ -67,6 +68,7 @@ libhammer.a: $(HAMMER_PARTS) bitreader.o: test_suite.h hammer.o: hammer.h +glue.o: hammer.h glue.h all: libhammer.a From 25df438832be00de17400404efecd4b1939951b9 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" Date: Sun, 17 Feb 2013 15:25:19 +0100 Subject: [PATCH 2/2] oops, moved files without adding --- src/glue.c | 170 ++++++++++++++++++++++++++++++++++++ src/glue.h | 251 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 421 insertions(+) create mode 100644 src/glue.c create mode 100644 src/glue.h diff --git a/src/glue.c b/src/glue.c new file mode 100644 index 0000000..7f9c6fa --- /dev/null +++ b/src/glue.c @@ -0,0 +1,170 @@ +#include "glue.h" +#include "../src/internal.h" // for h_carray_* + + +// The action equivalent of h_ignore. +const HParsedToken *h_act_ignore(const HParseResult *p) +{ + return NULL; +} + +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *h_act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +// Action version of h_seq_flatten. +const HParsedToken *h_act_flatten(const HParseResult *p) { + return h_seq_flatten(p->arena, p->ast); +} + +// Low-level helper for the h_make family. +HParsedToken *h_make_(HArena *arena, HTokenType type) +{ + HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); + ret->token_type = type; + return ret; +} + +HParsedToken *h_make(HArena *arena, HTokenType type, void *value) +{ + assert(type >= TT_USER); + HParsedToken *ret = h_make_(arena, type); + ret->user = value; + return ret; +} + +HParsedToken *h_make_seq(HArena *arena) +{ + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new(arena); + return ret; +} + +HParsedToken *h_make_bytes(HArena *arena, size_t len) +{ + HParsedToken *ret = h_make_(arena, TT_BYTES); + ret->bytes.len = len; + ret->bytes.token = h_arena_malloc(arena, len); + return ret; +} + +HParsedToken *h_make_sint(HArena *arena, int64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_SINT); + ret->sint = val; + return ret; +} + +HParsedToken *h_make_uint(HArena *arena, uint64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_UINT); + ret->uint = val; + return ret; +} + +// XXX -> internal +HParsedToken *h_carray_index(const HCountedArray *a, size_t i) +{ + assert(i < a->used); + return a->elements[i]; +} + +size_t h_seq_len(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->used; +} + +HParsedToken **h_seq_elements(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->elements; +} + +HParsedToken *h_seq_index(const HParsedToken *p, size_t i) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return h_carray_index(p->seq, i); +} + +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...) +{ + va_list va; + + va_start(va, i); + HParsedToken *ret = h_seq_index_vpath(p, i, va); + va_end(va); + + return ret; +} + +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va) +{ + HParsedToken *ret = h_seq_index(p, i); + int j; + + while((j = va_arg(va, int)) >= 0) + ret = h_seq_index(p, j); + + return ret; +} + +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + + h_carray_append(xs->seq, (HParsedToken *)x); +} + +void h_seq_append(HParsedToken *xs, const HParsedToken *ys) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + assert(ys != NULL); + assert(ys->token_type == TT_SEQUENCE); + + for(size_t i; iseq->used; i++) + h_carray_append(xs->seq, ys->seq->elements[i]); +} + +// Flatten nested sequences. Always returns a sequence. +// If input element is not a sequence, returns it as a singleton sequence. +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) +{ + assert(p != NULL); + + HParsedToken *ret = h_make_seq(arena); + switch(p->token_type) { + case TT_SEQUENCE: + // Flatten and append all. + for(size_t i; iseq->used; i++) { + h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); + } + break; + default: + // Make singleton sequence. + h_seq_snoc(ret, p); + break; + } + + return ret; +} diff --git a/src/glue.h b/src/glue.h new file mode 100644 index 0000000..90944ea --- /dev/null +++ b/src/glue.h @@ -0,0 +1,251 @@ +// +// API additions for writing grammar and semantic actions more concisely +// +// +// Quick Overview: +// +// Grammars can be succinctly specified with the family of H_RULE macros. +// H_RULE defines a plain parser variable. H_ARULE additionally attaches a +// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE +// combine both. +// +// A few standard semantic actions are defined below. The H_ACT_APPLY macro +// allows semantic actions to be defined by "partial application" of +// a generic action to fixed paramters. +// +// The definition of more complex semantic actions will usually consist of +// extracting data from the given parse tree and constructing a token of custom +// type to represent the result. A number of functions and convenience macros +// are provided to capture the most common cases and idioms. +// +// See the leading comment blocks on the sections below for more details. +// + +#ifndef HAMMER_GLUE__H +#define HAMMER_GLUE__H + +#include +#include "hammer.h" + + +// +// Grammar specification +// +// H_RULE is simply a short-hand for the typical declaration and definition of +// a parser variable. See its plain definition below. The goal is to save +// horizontal space as well as to provide a clear and unified look together with +// the other macro variants that stays close to an abstract PEG or BNF grammar. +// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their +// combinations as they allow the definition of syntax to be given without +// intermingling it with the semantic specifications. +// +// H_ARULE defines a variable just like H_RULE but attaches a semantic action +// to the result of the parser via h_action. The action is expected to be +// named act_. +// +// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool. +// The validation is expected to be named validate_. +// +// H_VARULE combines H_RULE with both an action and a validation. The action is +// attached before the validation, i.e. the validation receives as input the +// result of the action. +// +// H_AVRULE is like H_VARULE but the action is attached outside the validation, +// i.e. the validation receives the uninterpreted AST as input. +// + + +#define H_RULE(rule, def) const HParser *rule = def +#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) +#define H_VRULE(rule, def) const HParser *rule = \ + h_attr_bool(def, validate_ ## rule) +#define H_VARULE(rule, def) const HParser *rule = \ + h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule) +#define H_AVRULE(rule, def) const HParser *rule = \ + h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) + + +// +// Pre-fab semantic actions +// +// A collection of generally useful semantic actions is provided. +// +// h_act_ignore is the action equivalent of the parser combinator h_ignore. It +// simply causes the AST it is applied to to be replaced with NULL. This most +// importantly causes it to be elided from the result of a surrounding +// h_sequence. +// +// h_act_index is of note as it is not itself suitable to be passed to +// h_action. It is parameterized by an index to be picked from a sequence +// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY +// macro provides a concise way to define such a parameter-application wrapper. +// +// h_act_flatten acts on a token of possibly nested sequences by recursively +// flattening it into a single sequence. Cf. h_seq_flatten below. +// +// H_ACT_APPLY implements "partial application" for semantic actions. It +// defines a new action that supplies given parameters to a parameterized +// action such as h_act_index. +// + +const HParsedToken *h_act_ignore(const HParseResult *p); +const HParsedToken *h_act_index(int i, const HParseResult *p); +const HParsedToken *h_act_flatten(const HParseResult *p); + +// Define 'myaction' as a specialization of 'paction' by supplying the leading +// parameters. +#define H_ACT_APPLY(myaction, paction, ...) \ + const HParsedToken *myaction(const HParseResult *p) { \ + return paction(__VA_ARGS__, p); \ + } + + +// +// Working with HParsedTokens +// +// The type HParsedToken represents a dynamically-typed universe of values. +// Declared below are constructors to turn ordinary values into their +// HParsedToken equivalents, extractors to retrieve the original values from +// inside an HParsedToken, and functions that inspect and modify tokens of +// sequence type directly. +// +// In addition, there are a number of short-hand macros that work with some +// conventions to eliminate common boilerplate. These conventions are listed +// below. Be sure to follow them if you want to use the respective macros. +// +// * The single argument to semantic actions should be called 'p'. +// +// The H_MAKE macros suppy 'p->arena' to their underlying h_make +// counterparts. The H_FIELD macros supply 'p->ast' to their underlying +// H_INDEX counterparts. +// +// * For each custom token type, there should be a typedef for the +// corresponding value type. +// +// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to +// a pointer to the given type. +// +// * For each custom token type, say 'foo_t', there must be an integer +// constant 'TT_foo_t' to identify the token type. This constant must have a +// value greater or equal than TT_USER. +// +// One idiom is to define an enum for all custom token types and to assign a +// value of TT_USER to the first element. This can be viewed as extending +// the HTokenType enum. +// +// The H_MAKE and H_ASSERT macros derive the name of the token type constant +// from the given type name. +// +// +// The H_ALLOC macro is useful for allocating values of custom token types. +// +// The H_MAKE family of macros construct tokens of a given type. The native +// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ. +// The form with no suffix is used for custom token types. This convention is +// also used for other macro and function families. +// +// The H_ASSERT family simply asserts that a given token has the expected type. +// It mainly serves as an implementation aid for H_CAST. Of note in that regard +// is that, unlike the standard 'assert' macro, these form _expressions_ that +// return the value of their token argument; thus they can be used in a +// "pass-through" fashion inside other expressions. +// +// The H_CAST family combines a type assertion with access to the +// statically-typed value inside a token. +// +// A number of functions h_seq_* operate on and inspect sequence tokens. +// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence. +// Therefore there are h_seq_snoc and h_seq_append to build up sequences. +// +// The macro families H_FIELD and H_INDEX combine index access on a sequence +// with a cast to the appropriate result type. H_FIELD is used to access the +// elements of the argument token 'p' in an action. H_INDEX allows any sequence +// token to be specified. Both macro families take an arbitrary number of index +// arguments, giving access to elements in nested sequences by path. +// These macros are very useful to avoid spaghetti chains of unchecked pointer +// dereferences. +// + +// Standard short-hand for arena-allocating a variable in a semantic action. +#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) + +// Token constructors... + +HParsedToken *h_make(HArena *arena, HTokenType type, void *value); +HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. +HParsedToken *h_make_bytes(HArena *arena, size_t len); +HParsedToken *h_make_sint(HArena *arena, int64_t val); +HParsedToken *h_make_uint(HArena *arena, uint64_t val); + +// Standard short-hands to make tokens in an action. +#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) +#define H_MAKE_SEQ() h_make_seq(p->arena) +#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) +#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) +#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) + +// Extract (cast) type-specific value back from HParsedTokens... + +// Pass-through assertion that a given token has the expected type. +#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P) + +// Convenience short-hand forms of h_assert_type. +#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK) +#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK) +#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK) +#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK) +#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK) + +// Assert expected type and return contained value. +#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user) +#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq) +#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes) +#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint) +#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint) + +// Sequence access... + +// Return the length of a sequence. +size_t h_seq_len(const HParsedToken *p); + +// Access a sequence's element array. +HParsedToken **h_seq_elements(const HParsedToken *p); + +// Access a sequence element by index. +HParsedToken *h_seq_index(const HParsedToken *p, size_t i); + +// Access an element in a nested sequence by a path of indices. +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...); +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); + +// Convenience macros combining (nested) index access and h_cast. +#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1) + +// Standard short-hand to access and cast elements on a sequence token. +#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__) +#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__) +#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__) +#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__) +#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__) + +// Lower-level helper for h_seq_index. +HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal + +// Sequence modification... + +// Add elements to a sequence. +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one +void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many + +// XXX TODO: Remove elements from a sequence. + +// Flatten nested sequences into one. +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); + + +#endif