add h_permutation

This commit is contained in:
Sven M. Hallberg 2014-06-18 21:54:52 +02:00
parent c7161663c0
commit 41dca83631
4 changed files with 260 additions and 0 deletions

View file

@ -39,6 +39,7 @@ parsers = ['parsers/%s.c'%s for s in
'not', 'not',
'nothing', 'nothing',
'optional', 'optional',
'permutation',
'sequence', 'sequence',
'token', 'token',
'unimplemented', 'unimplemented',

View file

@ -437,6 +437,32 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, HPa
*/ */
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, HParser* p); HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, HParser* p);
/**
* Given a null-terminated list of parsers, match a permutation phrase of these
* parsers, i.e. match all parsers exactly once in any order.
*
* If multiple orders would match, the lexically smallest permutation is used;
* in other words, at any step the remaining available parsers are tried in
* the order in which they appear in the arguments.
*
* As an exception, 'h_optional' parsers (actually those that return a result
* of token type TT_NONE) are detected and the algorithm will try to match them
* with a non-empty result. Specifically, a result of TT_NONE is treated as a
* non-match as long as any other argument matches.
*
* Other parsers that succeed on any input (e.g. h_many), that match the same
* input as others, or that match input which is a prefix of another match can
* lead to unexpected results and should probably not be used as arguments.
*
* The result is a sequence of the same length as the argument list.
* Each parser's result is placed at that parser's index in the arguments.
* The permutation itself (the order in which the arguments were matched) is
* not returned.
*
* Result token type: TT_SEQUENCE
*/
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_permutation, HParser* p);
/** /**
* Given two parsers, p1 and p2, this parser succeeds in the following * Given two parsers, p1 and p2, this parser succeeds in the following
* cases: * cases:

179
src/parsers/permutation.c Normal file
View file

@ -0,0 +1,179 @@
#include <stdarg.h>
#include "parser_internal.h"
typedef struct {
size_t len;
HParser **p_array;
} HSequence;
// main recursion, used by parse_permutation below
static int parse_permutation_tail(const HSequence *s,
HCountedArray *seq,
const size_t k, char *set,
HParseState *state)
{
// shorthands
const size_t n = s->len;
HParser **ps = s->p_array;
// trivial base case
if(k >= n)
return 1;
HInputStream bak = state->input_stream;
// try available parsers as first element of the permutation tail
HParseResult *match = NULL;
size_t i;
for(i=0; i<n; i++) {
if(set[i]) {
match = h_do_parse(ps[i], state);
// save result
if(match)
seq->elements[i] = (void *)match->ast;
// treat empty optionals (TT_NONE) like failure here
if(match && match->ast && match->ast->token_type == TT_NONE)
match = NULL;
if(match) {
// remove parser from active set
set[i] = 0;
// parse the rest of the permutation phrase
if(parse_permutation_tail(s, seq, k+1, set, state)) {
// success
return 1;
} else {
// place parser back in active set and try the next
set[i] = 1;
}
}
state->input_stream = bak; // rewind input
}
}
// if all available parsers were empty optionals (TT_NONE), still succeed
for(i=0; i<n; i++) {
if(set[i]) {
HParsedToken *tok = seq->elements[i];
if(!(tok && tok->token_type == TT_NONE))
break;
}
}
if(i==n) // all were TT_NONE
return 1;
// permutations exhausted
return 0;
}
static HParseResult *parse_permutation(void *env, HParseState *state)
{
const HSequence *s = env;
const size_t n = s->len;
// current set of available (not yet matched) parsers
char *set = h_arena_malloc(state->arena, sizeof(char) * n);
memset(set, 1, sizeof(char) * n);
// parse result
HCountedArray *seq = h_carray_new_sized(state->arena, n);
if(parse_permutation_tail(s, seq, 0, set, state)) {
// success
// return the sequence of results
seq->used = n;
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_SEQUENCE;
tok->seq = seq;
return make_result(state->arena, tok);
} else {
// no parse
// XXX free seq
return NULL;
}
}
static const HParserVtable permutation_vt = {
.parse = parse_permutation,
.isValidRegular = h_false,
.isValidCF = h_false,
.desugar = NULL,
.compile_to_rvm = h_not_regular,
};
HParser* h_permutation(HParser* p, ...) {
va_list ap;
va_start(ap, p);
HParser* ret = h_permutation__mv(&system_allocator, p, ap);
va_end(ap);
return ret;
}
HParser* h_permutation__m(HAllocator* mm__, HParser* p, ...) {
va_list ap;
va_start(ap, p);
HParser* ret = h_permutation__mv(mm__, p, ap);
va_end(ap);
return ret;
}
HParser* h_permutation__v(HParser* p, va_list ap) {
return h_permutation__mv(&system_allocator, p, ap);
}
HParser* h_permutation__mv(HAllocator* mm__, HParser* p, va_list ap_) {
va_list ap;
size_t len = 0;
HSequence *s = h_new(HSequence, 1);
HParser *arg;
va_copy(ap, ap_);
do {
len++;
arg = va_arg(ap, HParser *);
} while (arg);
va_end(ap);
s->p_array = h_new(HParser *, len);
va_copy(ap, ap_);
s->p_array[0] = p;
for (size_t i = 1; i < len; i++) {
s->p_array[i] = va_arg(ap, HParser *);
} while (arg);
va_end(ap);
s->len = len;
return h_new_parser(mm__, &permutation_vt, s);
}
HParser* h_permutation__a(void *args[]) {
return h_permutation__ma(&system_allocator, args);
}
HParser* h_permutation__ma(HAllocator* mm__, void *args[]) {
size_t len = -1; // because do...while
const HParser *arg;
do {
arg=((HParser **)args)[++len];
} while(arg);
HSequence *s = h_new(HSequence, 1);
s->p_array = h_new(HParser *, len);
for (size_t i = 0; i < len; i++) {
s->p_array[i] = ((HParser **)args)[i];
}
s->len = len;
HParser *ret = h_new(HParser, 1);
ret->vtable = &permutation_vt;
ret->env = (void*)s;
ret->backend = PB_MIN;
return ret;
}

View file

@ -515,6 +515,59 @@ static void test_put_get(gconstpointer backend) {
g_check_parse_failed(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcde", 9); g_check_parse_failed(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcde", 9);
} }
static void test_permutation(gconstpointer backend) {
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
const HParser *p = h_permutation(h_ch('a'), h_ch('b'), h_ch('c'), NULL);
g_check_parse_match(p, be, "abc", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_match(p, be, "acb", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_match(p, be, "bac", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_match(p, be, "bca", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_match(p, be, "cab", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_match(p, be, "cba", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_failed(p, be, "a", 1);
g_check_parse_failed(p, be, "ab", 2);
g_check_parse_failed(p, be, "abb", 3);
const HParser *po = h_permutation(h_ch('a'), h_ch('b'), h_optional(h_ch('c')), NULL);
g_check_parse_match(po, be, "abc", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_match(po, be, "acb", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_match(po, be, "bac", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_match(po, be, "bca", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_match(po, be, "cab", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_match(po, be, "cba", 3, "(u0x61 u0x62 u0x63)");
g_check_parse_match(po, be, "ab", 2, "(u0x61 u0x62 null)");
g_check_parse_match(po, be, "ba", 2, "(u0x61 u0x62 null)");
g_check_parse_failed(po, be, "a", 1);
g_check_parse_failed(po, be, "b", 1);
g_check_parse_failed(po, be, "c", 1);
g_check_parse_failed(po, be, "ca", 2);
g_check_parse_failed(po, be, "cb", 2);
g_check_parse_failed(po, be, "cc", 2);
g_check_parse_failed(po, be, "ccab", 4);
g_check_parse_failed(po, be, "ccc", 3);
const HParser *po2 = h_permutation(h_optional(h_ch('c')), h_ch('a'), h_ch('b'), NULL);
g_check_parse_match(po2, be, "abc", 3, "(u0x63 u0x61 u0x62)");
g_check_parse_match(po2, be, "acb", 3, "(u0x63 u0x61 u0x62)");
g_check_parse_match(po2, be, "bac", 3, "(u0x63 u0x61 u0x62)");
g_check_parse_match(po2, be, "bca", 3, "(u0x63 u0x61 u0x62)");
g_check_parse_match(po2, be, "cab", 3, "(u0x63 u0x61 u0x62)");
g_check_parse_match(po2, be, "cba", 3, "(u0x63 u0x61 u0x62)");
g_check_parse_match(po2, be, "ab", 2, "(null u0x61 u0x62)");
g_check_parse_match(po2, be, "ba", 2, "(null u0x61 u0x62)");
g_check_parse_failed(po2, be, "a", 1);
g_check_parse_failed(po2, be, "b", 1);
g_check_parse_failed(po2, be, "c", 1);
g_check_parse_failed(po2, be, "ca", 2);
g_check_parse_failed(po2, be, "cb", 2);
g_check_parse_failed(po2, be, "cc", 2);
g_check_parse_failed(po2, be, "ccab", 4);
g_check_parse_failed(po2, be, "ccc", 3);
}
void register_parser_tests(void) { void register_parser_tests(void) {
g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token);
g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch);
@ -563,6 +616,7 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec);
g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness); g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness);
g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get); g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get);
g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation);
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);