Merge pull request #106 from pesco/98-permutation-combinator
add h_permutation
This commit is contained in:
commit
0dff293f6a
4 changed files with 260 additions and 0 deletions
|
|
@ -39,6 +39,7 @@ parsers = ['parsers/%s.c'%s for s in
|
|||
'not',
|
||||
'nothing',
|
||||
'optional',
|
||||
'permutation',
|
||||
'sequence',
|
||||
'token',
|
||||
'unimplemented',
|
||||
|
|
|
|||
26
src/hammer.h
26
src/hammer.h
|
|
@ -437,6 +437,32 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, HPa
|
|||
*/
|
||||
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, HParser* p);
|
||||
|
||||
/**
|
||||
* Given a null-terminated list of parsers, match a permutation phrase of these
|
||||
* parsers, i.e. match all parsers exactly once in any order.
|
||||
*
|
||||
* If multiple orders would match, the lexically smallest permutation is used;
|
||||
* in other words, at any step the remaining available parsers are tried in
|
||||
* the order in which they appear in the arguments.
|
||||
*
|
||||
* As an exception, 'h_optional' parsers (actually those that return a result
|
||||
* of token type TT_NONE) are detected and the algorithm will try to match them
|
||||
* with a non-empty result. Specifically, a result of TT_NONE is treated as a
|
||||
* non-match as long as any other argument matches.
|
||||
*
|
||||
* Other parsers that succeed on any input (e.g. h_many), that match the same
|
||||
* input as others, or that match input which is a prefix of another match can
|
||||
* lead to unexpected results and should probably not be used as arguments.
|
||||
*
|
||||
* The result is a sequence of the same length as the argument list.
|
||||
* Each parser's result is placed at that parser's index in the arguments.
|
||||
* The permutation itself (the order in which the arguments were matched) is
|
||||
* not returned.
|
||||
*
|
||||
* Result token type: TT_SEQUENCE
|
||||
*/
|
||||
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_permutation, HParser* p);
|
||||
|
||||
/**
|
||||
* Given two parsers, p1 and p2, this parser succeeds in the following
|
||||
* cases:
|
||||
|
|
|
|||
179
src/parsers/permutation.c
Normal file
179
src/parsers/permutation.c
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
#include <stdarg.h>
|
||||
#include "parser_internal.h"
|
||||
|
||||
typedef struct {
|
||||
size_t len;
|
||||
HParser **p_array;
|
||||
} HSequence;
|
||||
|
||||
// main recursion, used by parse_permutation below
|
||||
static int parse_permutation_tail(const HSequence *s,
|
||||
HCountedArray *seq,
|
||||
const size_t k, char *set,
|
||||
HParseState *state)
|
||||
{
|
||||
// shorthands
|
||||
const size_t n = s->len;
|
||||
HParser **ps = s->p_array;
|
||||
|
||||
// trivial base case
|
||||
if(k >= n)
|
||||
return 1;
|
||||
|
||||
HInputStream bak = state->input_stream;
|
||||
|
||||
// try available parsers as first element of the permutation tail
|
||||
HParseResult *match = NULL;
|
||||
size_t i;
|
||||
for(i=0; i<n; i++) {
|
||||
if(set[i]) {
|
||||
match = h_do_parse(ps[i], state);
|
||||
|
||||
// save result
|
||||
if(match)
|
||||
seq->elements[i] = (void *)match->ast;
|
||||
|
||||
// treat empty optionals (TT_NONE) like failure here
|
||||
if(match && match->ast && match->ast->token_type == TT_NONE)
|
||||
match = NULL;
|
||||
|
||||
if(match) {
|
||||
// remove parser from active set
|
||||
set[i] = 0;
|
||||
|
||||
// parse the rest of the permutation phrase
|
||||
if(parse_permutation_tail(s, seq, k+1, set, state)) {
|
||||
// success
|
||||
return 1;
|
||||
} else {
|
||||
// place parser back in active set and try the next
|
||||
set[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
state->input_stream = bak; // rewind input
|
||||
}
|
||||
}
|
||||
|
||||
// if all available parsers were empty optionals (TT_NONE), still succeed
|
||||
for(i=0; i<n; i++) {
|
||||
if(set[i]) {
|
||||
HParsedToken *tok = seq->elements[i];
|
||||
if(!(tok && tok->token_type == TT_NONE))
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(i==n) // all were TT_NONE
|
||||
return 1;
|
||||
|
||||
// permutations exhausted
|
||||
return 0;
|
||||
}
|
||||
|
||||
static HParseResult *parse_permutation(void *env, HParseState *state)
|
||||
{
|
||||
const HSequence *s = env;
|
||||
const size_t n = s->len;
|
||||
|
||||
// current set of available (not yet matched) parsers
|
||||
char *set = h_arena_malloc(state->arena, sizeof(char) * n);
|
||||
memset(set, 1, sizeof(char) * n);
|
||||
|
||||
// parse result
|
||||
HCountedArray *seq = h_carray_new_sized(state->arena, n);
|
||||
|
||||
if(parse_permutation_tail(s, seq, 0, set, state)) {
|
||||
// success
|
||||
// return the sequence of results
|
||||
seq->used = n;
|
||||
HParsedToken *tok = a_new(HParsedToken, 1);
|
||||
tok->token_type = TT_SEQUENCE;
|
||||
tok->seq = seq;
|
||||
return make_result(state->arena, tok);
|
||||
} else {
|
||||
// no parse
|
||||
// XXX free seq
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const HParserVtable permutation_vt = {
|
||||
.parse = parse_permutation,
|
||||
.isValidRegular = h_false,
|
||||
.isValidCF = h_false,
|
||||
.desugar = NULL,
|
||||
.compile_to_rvm = h_not_regular,
|
||||
};
|
||||
|
||||
HParser* h_permutation(HParser* p, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, p);
|
||||
HParser* ret = h_permutation__mv(&system_allocator, p, ap);
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
HParser* h_permutation__m(HAllocator* mm__, HParser* p, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, p);
|
||||
HParser* ret = h_permutation__mv(mm__, p, ap);
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
HParser* h_permutation__v(HParser* p, va_list ap) {
|
||||
return h_permutation__mv(&system_allocator, p, ap);
|
||||
}
|
||||
|
||||
HParser* h_permutation__mv(HAllocator* mm__, HParser* p, va_list ap_) {
|
||||
va_list ap;
|
||||
size_t len = 0;
|
||||
HSequence *s = h_new(HSequence, 1);
|
||||
|
||||
HParser *arg;
|
||||
va_copy(ap, ap_);
|
||||
do {
|
||||
len++;
|
||||
arg = va_arg(ap, HParser *);
|
||||
} while (arg);
|
||||
va_end(ap);
|
||||
s->p_array = h_new(HParser *, len);
|
||||
|
||||
va_copy(ap, ap_);
|
||||
s->p_array[0] = p;
|
||||
for (size_t i = 1; i < len; i++) {
|
||||
s->p_array[i] = va_arg(ap, HParser *);
|
||||
} while (arg);
|
||||
va_end(ap);
|
||||
|
||||
s->len = len;
|
||||
return h_new_parser(mm__, &permutation_vt, s);
|
||||
}
|
||||
|
||||
HParser* h_permutation__a(void *args[]) {
|
||||
return h_permutation__ma(&system_allocator, args);
|
||||
}
|
||||
|
||||
HParser* h_permutation__ma(HAllocator* mm__, void *args[]) {
|
||||
size_t len = -1; // because do...while
|
||||
const HParser *arg;
|
||||
|
||||
do {
|
||||
arg=((HParser **)args)[++len];
|
||||
} while(arg);
|
||||
|
||||
HSequence *s = h_new(HSequence, 1);
|
||||
s->p_array = h_new(HParser *, len);
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
s->p_array[i] = ((HParser **)args)[i];
|
||||
}
|
||||
|
||||
s->len = len;
|
||||
HParser *ret = h_new(HParser, 1);
|
||||
ret->vtable = &permutation_vt;
|
||||
ret->env = (void*)s;
|
||||
ret->backend = PB_MIN;
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -515,6 +515,59 @@ static void test_put_get(gconstpointer backend) {
|
|||
g_check_parse_failed(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcde", 9);
|
||||
}
|
||||
|
||||
static void test_permutation(gconstpointer backend) {
|
||||
HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
|
||||
const HParser *p = h_permutation(h_ch('a'), h_ch('b'), h_ch('c'), NULL);
|
||||
|
||||
g_check_parse_match(p, be, "abc", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(p, be, "acb", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(p, be, "bac", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(p, be, "bca", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(p, be, "cab", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(p, be, "cba", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_failed(p, be, "a", 1);
|
||||
g_check_parse_failed(p, be, "ab", 2);
|
||||
g_check_parse_failed(p, be, "abb", 3);
|
||||
|
||||
const HParser *po = h_permutation(h_ch('a'), h_ch('b'), h_optional(h_ch('c')), NULL);
|
||||
|
||||
g_check_parse_match(po, be, "abc", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "acb", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "bac", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "bca", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "cab", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "cba", 3, "(u0x61 u0x62 u0x63)");
|
||||
g_check_parse_match(po, be, "ab", 2, "(u0x61 u0x62 null)");
|
||||
g_check_parse_match(po, be, "ba", 2, "(u0x61 u0x62 null)");
|
||||
g_check_parse_failed(po, be, "a", 1);
|
||||
g_check_parse_failed(po, be, "b", 1);
|
||||
g_check_parse_failed(po, be, "c", 1);
|
||||
g_check_parse_failed(po, be, "ca", 2);
|
||||
g_check_parse_failed(po, be, "cb", 2);
|
||||
g_check_parse_failed(po, be, "cc", 2);
|
||||
g_check_parse_failed(po, be, "ccab", 4);
|
||||
g_check_parse_failed(po, be, "ccc", 3);
|
||||
|
||||
const HParser *po2 = h_permutation(h_optional(h_ch('c')), h_ch('a'), h_ch('b'), NULL);
|
||||
|
||||
g_check_parse_match(po2, be, "abc", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "acb", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "bac", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "bca", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "cab", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "cba", 3, "(u0x63 u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "ab", 2, "(null u0x61 u0x62)");
|
||||
g_check_parse_match(po2, be, "ba", 2, "(null u0x61 u0x62)");
|
||||
g_check_parse_failed(po2, be, "a", 1);
|
||||
g_check_parse_failed(po2, be, "b", 1);
|
||||
g_check_parse_failed(po2, be, "c", 1);
|
||||
g_check_parse_failed(po2, be, "ca", 2);
|
||||
g_check_parse_failed(po2, be, "cb", 2);
|
||||
g_check_parse_failed(po2, be, "cc", 2);
|
||||
g_check_parse_failed(po2, be, "ccab", 4);
|
||||
g_check_parse_failed(po2, be, "ccc", 3);
|
||||
}
|
||||
|
||||
void register_parser_tests(void) {
|
||||
g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token);
|
||||
g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch);
|
||||
|
|
@ -563,6 +616,7 @@ void register_parser_tests(void) {
|
|||
g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec);
|
||||
g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness);
|
||||
g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get);
|
||||
g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation);
|
||||
|
||||
g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
|
||||
g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue