Merge pull request #82 from thequux/perl-bindings
Perl bindings (minus travis-ci)
This commit is contained in:
commit
df7a7b467a
12 changed files with 755 additions and 493 deletions
|
|
@ -7,7 +7,7 @@ import sys
|
|||
vars = Variables(None, ARGUMENTS)
|
||||
vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate))
|
||||
vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept))
|
||||
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['python']))
|
||||
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['python', 'perl']))
|
||||
|
||||
env = Environment(ENV = {'PATH' : os.environ['PATH']}, variables = vars, tools=['default', 'scanreplace'], toolpath=['tools'])
|
||||
|
||||
|
|
@ -114,6 +114,7 @@ else:
|
|||
lib = env.SConscript(["src/SConscript"])
|
||||
env.Alias(env.SConscript(["examples/SConscript"]))
|
||||
|
||||
env.Alias("test", testruns)
|
||||
for testrun in testruns:
|
||||
env.Alias("test", testrun)
|
||||
|
||||
env.Alias("install", targets)
|
||||
|
|
|
|||
1
src/bindings/.gitignore
vendored
Normal file
1
src/bindings/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
hammer_wrap.c
|
||||
1
src/bindings/perl/.gitignore
vendored
Normal file
1
src/bindings/perl/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
hammer.pm
|
||||
15
src/bindings/perl/Makefile.PL
Normal file
15
src/bindings/perl/Makefile.PL
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
use ExtUtils::MakeMaker;
|
||||
use File::Basename;
|
||||
use Config;
|
||||
|
||||
# Scons hack...
|
||||
chdir(dirname($0));
|
||||
|
||||
WriteMakefile(
|
||||
NAME => "hammer",
|
||||
LIBS => ["-lhammer"],
|
||||
OBJECT => 'hammer_wrap.o',
|
||||
INC => '-I../..',
|
||||
CCFLAGS => "$Config{ccflags} -DSWIG -DHAMMER_INTERNAL__NO_STDARG_H -std=gnu99",
|
||||
);
|
||||
|
||||
9
src/bindings/perl/README.md
Normal file
9
src/bindings/perl/README.md
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
Perl Hammer bindings
|
||||
====================
|
||||
|
||||
To build and run these bindings, you will need to have
|
||||
ExtUtils::MakeMaker and make installed. On a Debian system, this just
|
||||
means that you need perl installed. On a lesser Linux distribution,
|
||||
this may be all you need, but you're on your own. On Windows or
|
||||
another UNIX, you're *really* on your own (until we get PRs with
|
||||
better instructions).
|
||||
42
src/bindings/perl/SConscript
Normal file
42
src/bindings/perl/SConscript
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
# -*- python -*-
|
||||
import os.path
|
||||
Import("env libhammer_shared testruns targets")
|
||||
|
||||
perlenv = env.Clone()
|
||||
|
||||
perlenv.Append(CCFLAGS=["-fpic", '-DSWIG', '-Wno-all',
|
||||
'-Wno-extra', '-Wno-error',
|
||||
'-DHAMMER_INTERNAL__NO_STDARG_H'],
|
||||
CPPPATH=["../.."],
|
||||
LIBS=['hammer'],
|
||||
LIBPATH=["../.."],
|
||||
SWIGFLAGS=["-DHAMMER_INTERNAL__NO_STDARG_H",
|
||||
"-Isrc/", "-perl"])
|
||||
import os
|
||||
if 'PERL_MM_OPT' in os.environ:
|
||||
perlenv['ENV']['PERL_MM_OPT'] = os.environ['PERL_MM_OPT']
|
||||
if 'PERL5LIB' in os.environ:
|
||||
perlenv['ENV']['PERL5LIB'] = os.environ['PERL5LIB']
|
||||
|
||||
swig = ['hammer.i']
|
||||
|
||||
hammer_wrap = perlenv.Command(['hammer_wrap.c', 'hammer.pm'], swig, "swig $SWIGFLAGS $SOURCE")
|
||||
makefile = perlenv.Command(['Makefile'], ['Makefile.PL'], "perl $SOURCE")
|
||||
|
||||
targetdir = os.path.dirname(str(hammer_wrap[0].path))
|
||||
|
||||
libhammer_perl = perlenv.Command(['hammer.so'], makefile + hammer_wrap, "make -C " + targetdir)
|
||||
|
||||
Default(libhammer_perl)
|
||||
|
||||
perltestenv = perlenv.Clone()
|
||||
perltestenv['ENV']['LD_LIBRARY_PATH'] = os.path.dirname(str(libhammer_shared[0]))
|
||||
perltests = ['t/hammer.t']
|
||||
perltestexec = perltestenv.Command(None, perltests + libhammer_perl + libhammer_shared, "make test -C " + targetdir)
|
||||
perltest = Alias("testperl", [perltestexec], perltestexec)
|
||||
AlwaysBuild(perltestexec)
|
||||
testruns.append(perltest)
|
||||
|
||||
perlinstallexec = perlenv.Command(None, libhammer_perl, "make install -C " + targetdir)
|
||||
perlinstall = Alias("installperl", [perlinstallexec], perlinstallexec)
|
||||
targets.append(perlinstall)
|
||||
287
src/bindings/perl/hammer.i
Normal file
287
src/bindings/perl/hammer.i
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
%module hammer;
|
||||
%begin %{
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
%}
|
||||
|
||||
%inline %{
|
||||
static int h_tt_perl;
|
||||
%}
|
||||
%init %{
|
||||
h_tt_perl = h_allocate_token_type("com.upstandinghackers.hammer.perl");
|
||||
%}
|
||||
|
||||
|
||||
%apply (char *STRING, size_t LENGTH) {(uint8_t* str, size_t len)}
|
||||
%apply (uint8_t* str, size_t len) {(const uint8_t* input, size_t length)}
|
||||
%apply (uint8_t* str, size_t len) {(const uint8_t* str, const size_t len)}
|
||||
%apply (uint8_t* str, size_t len) {(const uint8_t* charset, size_t length)}
|
||||
|
||||
%typemap(out) struct HParseResult_* {
|
||||
SV* hpt_to_perl(const struct HParsedToken_ *token);
|
||||
if ($1 == NULL) {
|
||||
// TODO: raise parse failure
|
||||
$result = newSV(0);
|
||||
} else {
|
||||
$result = hpt_to_perl($1->ast);
|
||||
//hpt_to_perl($1->ast);
|
||||
}
|
||||
}
|
||||
|
||||
%typemap(in) void*[] {
|
||||
if (!SvROK($input))
|
||||
SWIG_exception_fail(SWIG_TypeError, "Expected array ref");
|
||||
|
||||
if (SvTYPE(SvRV($input)) != SVt_PVAV)
|
||||
SWIG_exception_fail(SWIG_TypeError, "Expected array ref");
|
||||
|
||||
AV* av = (AV*) SvRV($input);
|
||||
size_t amax = av_top_index(av) + 1; // I want the length, not the top index...
|
||||
// TODO: is this array copied?
|
||||
$1 = malloc((amax+1) * sizeof(*$1));
|
||||
$1[amax] = NULL;
|
||||
for (int i = 0; i < amax; i++) {
|
||||
int res = SWIG_ConvertPtr(*av_fetch(av, i, 0), &($1[i]), SWIGTYPE_p_HParser_, 0|0);
|
||||
if (!SWIG_IsOK(res)) {
|
||||
SWIG_exception_fail(SWIG_ArgError(res), "Expected a list of parsers and only parsers");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
%typemap(in) uint8_t {
|
||||
if (SvIOKp($input)) {
|
||||
$1 = SvIV($input);
|
||||
} else if (SvPOKp($input)) {
|
||||
IV len;
|
||||
uint8_t* ival = SvPV($input, len);
|
||||
if (len < 1) {
|
||||
%type_error("Expected string with at least one character");
|
||||
SWIG_fail;
|
||||
}
|
||||
$1 = ival[0];
|
||||
} else {
|
||||
%type_error("Expected int or string");
|
||||
SWIG_fail;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
%typemap(newfree) struct HParseResult_* {
|
||||
h_parse_result_free($input);
|
||||
}
|
||||
|
||||
%rename("token") h_token;
|
||||
%rename("%(regex:/^h_(.*)/\\1/)s", regextarget=1) "^h_u?int(64|32|16|8)";
|
||||
|
||||
%define %combinator %rename("%(regex:/^h_(.*)$/\\1/)s") %enddef
|
||||
|
||||
%combinator h_end_p;
|
||||
%combinator h_left;
|
||||
%combinator h_middle;
|
||||
%combinator h_right;
|
||||
%combinator h_int_range;
|
||||
%combinator h_whitespace;
|
||||
%combinator h_nothing_p;
|
||||
|
||||
%combinator h_butnot;
|
||||
%combinator h_difference;
|
||||
%combinator h_xor;
|
||||
%combinator h_many;
|
||||
%combinator h_many1;
|
||||
%combinator h_sepBy;
|
||||
%combinator h_sepBy1;
|
||||
%combinator h_repeat_n;
|
||||
%combinator h_ignore;
|
||||
%combinator h_optional;
|
||||
%combinator h_epsilon_p;
|
||||
%combinator h_and;
|
||||
%combinator h_not;
|
||||
%combinator h_indirect;
|
||||
%combinator h_bind_indirect;
|
||||
|
||||
%include "../swig/hammer.i";
|
||||
|
||||
|
||||
%{
|
||||
SV* hpt_to_perl(const HParsedToken *token) {
|
||||
// All values that this function returns have a refcount of exactly 1.
|
||||
SV *ret;
|
||||
if (token == NULL) {
|
||||
return newSV(0); // Same as TT_NONE
|
||||
}
|
||||
switch (token->token_type) {
|
||||
case TT_NONE:
|
||||
return newSV(0);
|
||||
break;
|
||||
case TT_BYTES:
|
||||
return newSVpvn((char*)token->token_data.bytes.token, token->token_data.bytes.len);
|
||||
case TT_SINT:
|
||||
// TODO: return PyINT if appropriate
|
||||
return newSViv(token->token_data.sint);
|
||||
case TT_UINT:
|
||||
// TODO: return PyINT if appropriate
|
||||
return newSVuv(token->token_data.uint);
|
||||
case TT_SEQUENCE: {
|
||||
AV* aret = newAV();
|
||||
av_extend(aret, token->token_data.seq->used);
|
||||
for (int i = 0; i < token->token_data.seq->used; i++) {
|
||||
av_store(aret, i, hpt_to_perl(token->token_data.seq->elements[i]));
|
||||
}
|
||||
return newRV_noinc((SV*)aret);
|
||||
}
|
||||
default:
|
||||
if (token->token_type == h_tt_perl) {
|
||||
return SvREFCNT_inc((SV*)token->token_data.user);
|
||||
} else {
|
||||
return SWIG_NewPointerObj((void*)token, SWIGTYPE_p_HParsedToken_, 0 | 0);
|
||||
// TODO: support registry
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
/*
|
||||
HParser* ch(uint8_t chr) {
|
||||
return h_action(h_ch(chr), h__to_dual_char, NULL);
|
||||
}
|
||||
HParser* in(const uint8_t *charset, size_t length) {
|
||||
return h_action(h_in(charset, length), h__to_dual_char, NULL);
|
||||
}
|
||||
HParser* not_in(const uint8_t *charset, size_t length) {
|
||||
return h_action(h_not_in(charset, length), h__to_dual_char, NULL);
|
||||
}
|
||||
*/
|
||||
HParsedToken* h__to_char(const HParseResult* result, void* user_data) {
|
||||
assert(result != NULL);
|
||||
assert(result->ast != NULL);
|
||||
assert(result->ast->token_type == TT_UINT);
|
||||
|
||||
uint8_t buf = result->ast->token_data.uint;
|
||||
SV *sv = newSVpvn(&buf, 1);
|
||||
// This was a failed experiment; for now, you'll have to use ord yourself.
|
||||
//sv_setuv(sv, buf);
|
||||
//SvPOK_on(sv);
|
||||
|
||||
HParsedToken *res = h_arena_malloc(result->arena, sizeof(HParsedToken));
|
||||
res->token_type = h_tt_perl;
|
||||
res->token_data.user = sv;
|
||||
return res;
|
||||
}
|
||||
|
||||
static HParsedToken* call_action(const HParseResult *p, void* user_data ) {
|
||||
SV *func = (SV*)user_data;
|
||||
|
||||
dSP;
|
||||
ENTER;
|
||||
SAVETMPS;
|
||||
PUSHMARK(SP);
|
||||
if (p->ast != NULL) {
|
||||
mXPUSHs(hpt_to_perl(p->ast));
|
||||
} else {
|
||||
mXPUSHs(newSV(0));
|
||||
}
|
||||
PUTBACK;
|
||||
|
||||
int nret = call_sv(func, G_SCALAR);
|
||||
|
||||
SPAGAIN;
|
||||
if (nret != 1)
|
||||
croak("Expected 1 return value, got %d", nret);
|
||||
|
||||
HParsedToken *ret = h_arena_malloc(p->arena, sizeof(*ret));
|
||||
memset(ret, 0, sizeof(*ret));
|
||||
ret->token_type = h_tt_perl;
|
||||
ret->token_data.user = SvREFCNT_inc(POPs);
|
||||
if (p->ast != NULL) {
|
||||
ret->index = p->ast->index;
|
||||
ret->bit_offset = p->ast->bit_offset;
|
||||
}
|
||||
PUTBACK;
|
||||
FREETMPS;
|
||||
LEAVE;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int call_predicate(HParseResult *p, void* user_data) {
|
||||
SV *func = (SV*)user_data;
|
||||
|
||||
dSP;
|
||||
ENTER;
|
||||
SAVETMPS;
|
||||
PUSHMARK(SP);
|
||||
if (p->ast != NULL) {
|
||||
mXPUSHs(hpt_to_perl(p->ast));
|
||||
} else {
|
||||
mXPUSHs(newSV(0));
|
||||
}
|
||||
PUTBACK;
|
||||
|
||||
int nret = call_sv(func, G_SCALAR);
|
||||
|
||||
SPAGAIN;
|
||||
if (nret != 1)
|
||||
croak("Expected 1 return value, got %d", nret);
|
||||
|
||||
SV* svret = POPs;
|
||||
int ret = SvTRUE(svret);
|
||||
PUTBACK;
|
||||
FREETMPS;
|
||||
LEAVE;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
%}
|
||||
%inline {
|
||||
HParser* ch(uint8_t chr) {
|
||||
return h_action(h_ch(chr), h__to_char, NULL);
|
||||
}
|
||||
HParser* ch_range(uint8_t c0, uint8_t c1) {
|
||||
return h_action(h_ch_range(c0,c1), h__to_char, NULL);
|
||||
}
|
||||
HParser* h__in(const uint8_t *charset, size_t length) {
|
||||
return h_action(h_in(charset, length), h__to_char, NULL);
|
||||
}
|
||||
HParser* h__not_in(const uint8_t *charset, size_t length) {
|
||||
return h_action(h_not_in(charset, length), h__to_char, NULL);
|
||||
}
|
||||
HParser* action(HParser *parser, SV* sub) {
|
||||
return h_action(parser, call_action, SvREFCNT_inc(sub));
|
||||
}
|
||||
HParser* attr_bool(HParser *parser, SV* sub) {
|
||||
return h_attr_bool(parser, call_predicate, SvREFCNT_inc(sub));
|
||||
}
|
||||
}
|
||||
|
||||
%extend HParser_ {
|
||||
SV* parse(const uint8_t* input, size_t length) {
|
||||
SV* hpt_to_perl(const struct HParsedToken_ *token);
|
||||
HParseResult *res = h_parse($self, input, length);
|
||||
if (res) {
|
||||
return hpt_to_perl(res->ast);
|
||||
} else {
|
||||
croak("Parse failure");
|
||||
}
|
||||
}
|
||||
bool compile(HParserBackend backend) {
|
||||
return h_compile($self, backend, NULL) == 0;
|
||||
}
|
||||
}
|
||||
|
||||
%perlcode %{
|
||||
sub sequence {
|
||||
return hammerc::h_sequence__a([@_]);
|
||||
}
|
||||
sub choice {
|
||||
return hammerc::h_choice__a([@_]);
|
||||
}
|
||||
sub in {
|
||||
return h__in(join('',@_));
|
||||
}
|
||||
sub not_in {
|
||||
return h__not_in(join('',@_));
|
||||
}
|
||||
|
||||
|
||||
%}
|
||||
390
src/bindings/perl/t/hammer.t
Normal file
390
src/bindings/perl/t/hammer.t
Normal file
|
|
@ -0,0 +1,390 @@
|
|||
# -*- cperl -*-
|
||||
use warnings;
|
||||
use strict;
|
||||
use Data::Dumper;
|
||||
use Test::More tests => 41;
|
||||
use hammer;
|
||||
|
||||
# differences from C version:
|
||||
|
||||
# - in takes any number of arguments, which are concatenated. This
|
||||
# makes ch_range irrelevant.
|
||||
#
|
||||
# - foo
|
||||
|
||||
|
||||
sub check_parse_eq {
|
||||
my ($parser, $input, $expected) = @_;
|
||||
my $actual;
|
||||
eval {
|
||||
$actual = $parser->parse($input);
|
||||
};
|
||||
if ($@) {
|
||||
diag($@);
|
||||
ok($@ eq "");
|
||||
} else {
|
||||
#diag(Dumper($actual));
|
||||
is_deeply($actual, $expected);
|
||||
}
|
||||
}
|
||||
|
||||
sub check_parse_failed {
|
||||
my ($parser, $input) = @_;
|
||||
eval {
|
||||
my $actual = $parser->parse($input);
|
||||
};
|
||||
ok($@ ne "");
|
||||
}
|
||||
|
||||
subtest "token" => sub {
|
||||
my $parser = hammer::token("95\xa2");
|
||||
|
||||
check_parse_eq($parser, "95\xa2", "95\xa2");
|
||||
check_parse_failed($parser, "95");
|
||||
};
|
||||
|
||||
subtest "ch" => sub {
|
||||
my $parser = hammer::ch("\xa2");
|
||||
#check_parse_eq($parser, "\xa2", 0xa2);
|
||||
check_parse_eq($parser, "\xa2", "\xa2");
|
||||
check_parse_failed($parser, "\xa3");
|
||||
};
|
||||
|
||||
subtest "ch_range" => sub {
|
||||
# ch_range doesn't need to be part of hammer-perl; the equivalent
|
||||
# effect can be achieved with hammer::in('a'..'z')
|
||||
#
|
||||
# However, the function is provided just in case.
|
||||
my $parser = hammer::ch_range('a','c');
|
||||
check_parse_eq($parser, 'b', 'b');
|
||||
#check_parse_eq($parser, 'b', 0x62);
|
||||
check_parse_failed($parser, 'd');
|
||||
};
|
||||
|
||||
SKIP: {
|
||||
use integer;
|
||||
no warnings 'portable'; # I know the hex constants are not portable. that's why this test is skipped on <64 bit systems.
|
||||
skip "Needs 64-bit support", 2 if 0x4000000 * 2 eq -1; # TODO: Not sure if this works; may need $Config{ivsize} >= 8
|
||||
subtest "int64" => sub {
|
||||
my $parser = hammer::int64();
|
||||
check_parse_eq($parser, "\xff\xff\xff\xfe\x00\x00\x00\x00", -0x200000000);
|
||||
check_parse_failed($parser, "\xff\xff\xff\xfe\x00\x00\x00");
|
||||
};
|
||||
subtest "uint64" => sub {
|
||||
my $parser = hammer::uint64();
|
||||
check_parse_eq($parser, "\x00\x00\x00\x02\x00\x00\x00\x00", 0x200000000);
|
||||
check_parse_failed($parser, "\x00\x00\x00\x02\x00\x00\x00");
|
||||
};
|
||||
}
|
||||
|
||||
subtest "int32" => sub {
|
||||
my $parser = hammer::int32();
|
||||
check_parse_eq($parser, "\xff\xfe\x00\x00", -0x20000);
|
||||
check_parse_eq($parser, "\x00\x02\x00\x00", 0x20000);
|
||||
check_parse_failed($parser, "\xff\xfe\x00");
|
||||
check_parse_failed($parser, "\x00\x02\x00");
|
||||
};
|
||||
|
||||
subtest "uint32" => sub {
|
||||
my $parser = hammer::uint32();
|
||||
check_parse_eq($parser, "\x00\x02\x00\x00", 0x20000);
|
||||
check_parse_failed($parser, "\x00\x02\x00")
|
||||
};
|
||||
|
||||
subtest "int16" => sub {
|
||||
my $parser = hammer::int16();
|
||||
check_parse_eq($parser, "\xfe\x00", -0x200);
|
||||
check_parse_eq($parser, "\x02\x00", 0x200);
|
||||
check_parse_failed($parser, "\xfe");
|
||||
check_parse_failed($parser, "\x02");
|
||||
};
|
||||
|
||||
subtest "uint16" => sub {
|
||||
my $parser = hammer::uint16();
|
||||
check_parse_eq($parser, "\x02\x00", 0x200);
|
||||
check_parse_failed($parser, "\x02");
|
||||
};
|
||||
|
||||
subtest "int8" => sub {
|
||||
my $parser = hammer::int8();
|
||||
check_parse_eq($parser, "\x88", -0x78);
|
||||
check_parse_failed($parser, "");
|
||||
};
|
||||
|
||||
subtest "uint8" => sub {
|
||||
my $parser = hammer::uint8();
|
||||
check_parse_eq($parser, "\x78", 0x78);
|
||||
check_parse_failed($parser, "");
|
||||
};
|
||||
|
||||
subtest "int_range" => sub { # test 12
|
||||
my $parser = hammer::int_range(hammer::uint8(), 3, 10);
|
||||
check_parse_eq($parser, "\x05", 5);
|
||||
check_parse_failed($parser, "\x0b");
|
||||
};
|
||||
|
||||
subtest "whitespace" => sub {
|
||||
my $parser = hammer::whitespace(hammer::ch('a'));
|
||||
check_parse_eq($parser, "a", "a");
|
||||
check_parse_eq($parser, " a", "a");
|
||||
check_parse_eq($parser, " a", "a");
|
||||
check_parse_eq($parser, "\t\n\ra", "a");
|
||||
};
|
||||
|
||||
subtest "whitespace-end" => sub {
|
||||
my $parser = hammer::whitespace(hammer::end_p());
|
||||
check_parse_eq($parser, "", undef);
|
||||
check_parse_eq($parser, " ", undef);
|
||||
check_parse_failed($parser, " x", undef)
|
||||
};
|
||||
|
||||
subtest "left" => sub { # test 15
|
||||
my $parser = hammer::left(hammer::ch('a'),
|
||||
hammer::ch(' '));
|
||||
check_parse_eq($parser, "a ", "a");
|
||||
check_parse_failed($parser, "a");
|
||||
check_parse_failed($parser, " ");
|
||||
};
|
||||
|
||||
subtest "right" => sub {
|
||||
my $parser = hammer::right(hammer::ch(' '),
|
||||
hammer::ch('a'));
|
||||
check_parse_eq($parser, " a", "a");
|
||||
check_parse_failed($parser, "a");
|
||||
check_parse_failed($parser, " ");
|
||||
};
|
||||
|
||||
subtest "middle" => sub {
|
||||
my $parser = hammer::middle(hammer::ch(' '),
|
||||
hammer::ch('a'),
|
||||
hammer::ch(' '));
|
||||
check_parse_eq($parser, " a ", "a");
|
||||
for my $test_string (split('/', "a/ / a/a / b /ba / ab")) {
|
||||
check_parse_failed($parser, $test_string);
|
||||
}
|
||||
};
|
||||
|
||||
subtest "action" => sub {
|
||||
my $parser = hammer::action(hammer::sequence(hammer::choice(hammer::ch('a'),
|
||||
hammer::ch('A')),
|
||||
hammer::choice(hammer::ch('b'),
|
||||
hammer::ch('B'))),
|
||||
sub { [map(uc, @{+shift})]; });
|
||||
check_parse_eq($parser, "ab", ['A', 'B']);
|
||||
check_parse_eq($parser, "AB", ['A', 'B']);
|
||||
check_parse_eq($parser, 'Ab', ['A', 'B']);
|
||||
check_parse_failed($parser, "XX");
|
||||
};
|
||||
|
||||
|
||||
subtest "in" => sub {
|
||||
my $parser = hammer::in('a'..'c');
|
||||
check_parse_eq($parser, 'a', 'a');
|
||||
check_parse_eq($parser, 'b', 'b');
|
||||
check_parse_eq($parser, 'c', 'c');
|
||||
check_parse_failed($parser, 'd');
|
||||
};
|
||||
|
||||
subtest "not_in" => sub { # test 20
|
||||
my $parser = hammer::not_in('a'..'c');
|
||||
check_parse_failed($parser, 'a');
|
||||
check_parse_failed($parser, 'b');
|
||||
check_parse_failed($parser, 'c');
|
||||
check_parse_eq($parser, 'd', 'd');
|
||||
};
|
||||
|
||||
subtest "end_p" => sub {
|
||||
my $parser = hammer::sequence(hammer::ch('a'), hammer::end_p());
|
||||
check_parse_eq($parser, 'a', ['a']);
|
||||
check_parse_failed($parser, 'aa');
|
||||
};
|
||||
|
||||
subtest "nothing_p" => sub {
|
||||
my $parser = hammer::nothing_p();
|
||||
check_parse_failed($parser, "");
|
||||
check_parse_failed($parser, "foo");
|
||||
};
|
||||
|
||||
subtest "sequence" => sub {
|
||||
my $parser = hammer::sequence(hammer::ch('a'), hammer::ch('b'));
|
||||
check_parse_eq($parser, "ab", ['a','b']);
|
||||
check_parse_failed($parser, 'a');
|
||||
check_parse_failed($parser, 'b');
|
||||
};
|
||||
|
||||
subtest "sequence-whitespace" => sub {
|
||||
my $parser = hammer::sequence(hammer::ch('a'),
|
||||
hammer::whitespace(hammer::ch('b')));
|
||||
check_parse_eq($parser, "ab", ['a', 'b']);
|
||||
check_parse_eq($parser, "a b", ['a', 'b']);
|
||||
check_parse_eq($parser, "a b", ['a', 'b']);
|
||||
check_parse_failed($parser, "a c");
|
||||
};
|
||||
|
||||
subtest "choice" => sub { # test 25
|
||||
my $parser = hammer::choice(hammer::ch('a'),
|
||||
hammer::ch('b'));
|
||||
check_parse_eq($parser, 'a', 'a');
|
||||
check_parse_eq($parser, 'b', 'b');
|
||||
check_parse_failed($parser, 'c');
|
||||
};
|
||||
|
||||
subtest "butnot" => sub {
|
||||
my $parser = hammer::butnot(hammer::ch('a'), hammer::token('ab'));
|
||||
check_parse_eq($parser, 'a', 'a');
|
||||
check_parse_eq($parser, 'aa', 'a');
|
||||
check_parse_failed($parser, 'ab');
|
||||
};
|
||||
|
||||
subtest "butnot-range" => sub {
|
||||
my $parser = hammer::butnot(hammer::ch_range('0', '9'), hammer::ch('6'));
|
||||
check_parse_eq($parser, '4', '4');
|
||||
check_parse_failed($parser, '6');
|
||||
};
|
||||
|
||||
subtest "difference" => sub {
|
||||
my $parser = hammer::difference(hammer::token('ab'),
|
||||
hammer::ch('a'));
|
||||
check_parse_eq($parser, 'ab', 'ab');
|
||||
check_parse_failed($parser, 'a');
|
||||
};
|
||||
|
||||
subtest "xor" => sub {
|
||||
my $parser = hammer::xor(hammer::in('0'..'6'),
|
||||
hammer::in('5'..'9'));
|
||||
check_parse_eq($parser, '0', '0');
|
||||
check_parse_eq($parser, '9', '9');
|
||||
check_parse_failed($parser, '5');
|
||||
check_parse_failed($parser, 'a');
|
||||
};
|
||||
|
||||
subtest "many" => sub { # test 30
|
||||
my $parser = hammer::many(hammer::in('ab'));
|
||||
check_parse_eq($parser, '', []);
|
||||
check_parse_eq($parser, 'a', ['a']);
|
||||
check_parse_eq($parser, 'b', ['b']);
|
||||
check_parse_eq($parser, 'aabbaba', [qw/a a b b a b a/]);
|
||||
};
|
||||
|
||||
subtest "many1" => sub {
|
||||
my $parser = hammer::many1(hammer::in('ab'));
|
||||
check_parse_eq($parser, 'a', ['a']);
|
||||
check_parse_eq($parser, 'b', ['b']);
|
||||
check_parse_eq($parser, 'aabbaba', [qw/a a b b a b a/]);
|
||||
check_parse_failed($parser, '');
|
||||
check_parse_failed($parser, 'daabbabadef');
|
||||
};
|
||||
subtest "repeat_n" => sub {
|
||||
my $parser = hammer::repeat_n(hammer::in('ab'), 2);
|
||||
check_parse_eq($parser, 'abdef', ['a','b']);
|
||||
check_parse_failed($parser, 'adef');
|
||||
};
|
||||
|
||||
subtest "optional" => sub {
|
||||
my $parser = hammer::sequence(hammer::ch('a'),
|
||||
hammer::optional(hammer::in('bc')),
|
||||
hammer::ch('d'));
|
||||
check_parse_eq($parser, 'abd', [qw/a b d/]);
|
||||
check_parse_eq($parser, 'abd', [qw/a b d/]);
|
||||
check_parse_eq($parser, 'ad', ['a',undef,'d']);
|
||||
check_parse_failed($parser, 'aed');
|
||||
check_parse_failed($parser, 'ab');
|
||||
check_parse_failed($parser, 'ac');
|
||||
};
|
||||
|
||||
subtest "ignore" => sub {
|
||||
my $parser = hammer::sequence(hammer::ch('a'),
|
||||
hammer::ignore(hammer::ch('b')),
|
||||
hammer::ch('c'));
|
||||
check_parse_eq($parser, "abc", ['a','c']);
|
||||
check_parse_failed($parser, 'ac');
|
||||
};
|
||||
|
||||
subtest "sepBy" => sub { # Test 35
|
||||
my $parser = hammer::sepBy(hammer::in('1'..'3'),
|
||||
hammer::ch(','));
|
||||
check_parse_eq($parser, '1,2,3', ['1','2','3']);
|
||||
check_parse_eq($parser, '1,3,2', ['1','3','2']);
|
||||
check_parse_eq($parser, '1,3', ['1','3']);
|
||||
check_parse_eq($parser, '3', ['3']);
|
||||
check_parse_eq($parser, '', []);
|
||||
};
|
||||
|
||||
subtest "sepBy1" => sub {
|
||||
my $parser = hammer::sepBy1(hammer::in("123"),
|
||||
hammer::ch(','));
|
||||
check_parse_eq($parser, '1,2,3', ['1','2','3']);
|
||||
check_parse_eq($parser, '1,3,2', ['1','3','2']);
|
||||
check_parse_eq($parser, '1,3', ['1','3']);
|
||||
check_parse_eq($parser, '3', ['3']);
|
||||
check_parse_failed($parser, '');
|
||||
};
|
||||
|
||||
subtest "epsilon" => sub {
|
||||
check_parse_eq(hammer::sequence(hammer::ch('a'),
|
||||
hammer::epsilon_p(),
|
||||
hammer::ch('b')),
|
||||
'ab', ['a','b']);
|
||||
check_parse_eq(hammer::sequence(hammer::epsilon_p(),
|
||||
hammer::ch('a')),
|
||||
'a', ['a']);
|
||||
check_parse_eq(hammer::sequence(hammer::ch('a'),
|
||||
hammer::epsilon_p()),
|
||||
'a', ['a']);
|
||||
};
|
||||
|
||||
|
||||
subtest "attr_bool" => sub {
|
||||
my $parser = hammer::attr_bool(hammer::many1(hammer::in('ab')),
|
||||
sub { my ($a, $b) = @{+shift}; $a eq $b });
|
||||
check_parse_eq($parser, "aa", ['a','a']);
|
||||
check_parse_eq($parser, "bb", ['b','b']);
|
||||
check_parse_failed($parser, "ab");
|
||||
};
|
||||
|
||||
subtest "and" => sub {
|
||||
check_parse_eq(hammer::sequence(hammer::and(hammer::ch('0')),
|
||||
hammer::ch('0')),
|
||||
'0', ['0']);
|
||||
check_parse_failed(hammer::sequence(hammer::and(hammer::ch('0')),
|
||||
hammer::ch('1')),
|
||||
'0');
|
||||
my $parser = hammer::sequence(hammer::ch('1'),
|
||||
hammer::and(hammer::ch('2')));
|
||||
check_parse_eq($parser, '12', ['1']);
|
||||
check_parse_failed($parser, '1');
|
||||
check_parse_failed($parser, '13');
|
||||
};
|
||||
|
||||
subtest "not" => sub { # test 40
|
||||
# This is not how you'd *actually* write the parser for this
|
||||
# language; in case of Packrat, it's better to swap the order of the
|
||||
# arguments, and for other backends, the problem doesn't appear at
|
||||
# all.
|
||||
my $parser = hammer::sequence(hammer::ch('a'),
|
||||
hammer::choice(hammer::ch('+'),
|
||||
hammer::token('++')),
|
||||
hammer::ch('b'));
|
||||
check_parse_eq($parser, 'a+b', ['a','+','b']);
|
||||
check_parse_failed($parser, 'a++b'); # ordered choice
|
||||
|
||||
$parser = hammer::sequence(hammer::ch('a'),
|
||||
hammer::choice(hammer::sequence(hammer::ch('+'),
|
||||
hammer::not(hammer::ch('+'))),
|
||||
hammer::token('++')),
|
||||
hammer::ch('b'));
|
||||
check_parse_eq($parser, 'a+b', ['a',['+'],'b']);
|
||||
check_parse_eq($parser, 'a++b', ['a', '++', 'b']);
|
||||
};
|
||||
|
||||
subtest "rightrec" => sub {
|
||||
my $parser = hammer::indirect();
|
||||
hammer::bind_indirect($parser,
|
||||
hammer::choice(hammer::sequence(hammer::ch('a'),
|
||||
$parser),
|
||||
hammer::epsilon_p));
|
||||
check_parse_eq($parser, 'a', ['a']);
|
||||
check_parse_eq($parser, 'aa', ['a', ['a']]);
|
||||
check_parse_eq($parser, 'aaa', ['a', ['a', ['a']]]);
|
||||
};
|
||||
|
||||
|
|
@ -15,8 +15,8 @@ pytestenv['ENV']['LD_LIBRARY_PATH'] = os.path.dirname(str(libhammer_shared[0]))
|
|||
pytests = ['hammer_tests.py']
|
||||
pytestexec = pytestenv.Command(['hammer.pyc', 'hammer_tests.pyc'], pytests + libhammer_python, "LD_LIBRARY_PATH=" + os.path.dirname(str(libhammer_shared[0])) + " nosetests -vv $SOURCE")
|
||||
pytest = Alias("testpython", [pytestexec], pytestexec)
|
||||
AlwaysBuild(pytest)
|
||||
testruns.append(pytest)
|
||||
AlwaysBuild(pytestexec)
|
||||
testruns.extend(pytest)
|
||||
|
||||
pyinstallexec = pythonenv.Command(None, libhammer_python, 'python ' + os.path.join(pydir, 'setup.py ') + ' install')
|
||||
pyinstall = Alias("installpython", [pyinstallexec], pyinstallexec)
|
||||
|
|
|
|||
|
|
@ -1,488 +0,0 @@
|
|||
from cffi import FFI
|
||||
import threading
|
||||
import sys
|
||||
|
||||
_ffi = FFI()
|
||||
|
||||
# {{{ Types
|
||||
|
||||
_ffi.cdef("typedef struct HAllocator_ HAllocator;")
|
||||
_ffi.cdef("typedef struct HArena_ HArena;")
|
||||
_ffi.cdef("typedef int bool;")
|
||||
_ffi.cdef("typedef struct HParseState_ HParseState;")
|
||||
_ffi.cdef("""
|
||||
typedef enum HParserBackend_ {
|
||||
PB_MIN = 0,
|
||||
PB_PACKRAT = 0, // PB_MIN is always the default.
|
||||
PB_REGULAR,
|
||||
PB_LLk,
|
||||
PB_LALR,
|
||||
PB_GLR
|
||||
// TODO: support PB_MAX
|
||||
} HParserBackend;
|
||||
""")
|
||||
_ffi.cdef("""
|
||||
typedef enum HTokenType_ {
|
||||
// Before you change the explicit values of these, think of the poor bindings ;_;
|
||||
TT_NONE = 1,
|
||||
TT_BYTES = 2,
|
||||
TT_SINT = 4,
|
||||
TT_UINT = 8,
|
||||
TT_SEQUENCE = 16,
|
||||
TT_RESERVED_1, // reserved for backend-specific internal use
|
||||
TT_ERR = 32,
|
||||
TT_USER = 64,
|
||||
TT_MAX
|
||||
} HTokenType;
|
||||
""")
|
||||
_ffi.cdef("""
|
||||
typedef struct HCountedArray_ {
|
||||
size_t capacity;
|
||||
size_t used;
|
||||
HArena * arena;
|
||||
struct HParsedToken_ **elements;
|
||||
} HCountedArray;
|
||||
""")
|
||||
_ffi.cdef("""
|
||||
typedef struct HBytes_ {
|
||||
const uint8_t *token;
|
||||
size_t len;
|
||||
} HBytes;
|
||||
""")
|
||||
_ffi.cdef("""
|
||||
typedef struct HParsedToken_ {
|
||||
HTokenType token_type;
|
||||
union {
|
||||
HBytes bytes;
|
||||
int64_t sint;
|
||||
uint64_t uint;
|
||||
double dbl;
|
||||
float flt;
|
||||
HCountedArray *seq; // a sequence of HParsedToken's
|
||||
void *user;
|
||||
};
|
||||
size_t index;
|
||||
char bit_offset;
|
||||
} HParsedToken;
|
||||
""")
|
||||
_ffi.cdef("""
|
||||
typedef struct HParseResult_ {
|
||||
const HParsedToken *ast;
|
||||
long long bit_length;
|
||||
HArena * arena;
|
||||
} HParseResult;
|
||||
""")
|
||||
|
||||
_ffi.cdef("""typedef HParsedToken* (*HAction)(const HParseResult *p);""")
|
||||
_ffi.cdef("""typedef bool (*HPredicate)(HParseResult *p);""")
|
||||
_ffi.cdef("""
|
||||
typedef struct HCFChoice_ HCFChoice;
|
||||
typedef struct HRVMProg_ HRVMProg;
|
||||
typedef struct HParserVtable_ HParserVtable;
|
||||
""")
|
||||
|
||||
_ffi.cdef("typedef struct HParser_ HParser;")
|
||||
_ffi.cdef("""
|
||||
typedef struct HParserTestcase_ {
|
||||
unsigned char* input;
|
||||
size_t length;
|
||||
char* output_unambiguous;
|
||||
} HParserTestcase;
|
||||
|
||||
typedef struct HCaseResult_ {
|
||||
bool success;
|
||||
union {
|
||||
const char* actual_results; // on failure, filled in with the results of h_write_result_unamb
|
||||
size_t parse_time; // on success, filled in with time for a single parse, in nsec
|
||||
};
|
||||
} HCaseResult;
|
||||
|
||||
typedef struct HBackendResults_ {
|
||||
HParserBackend backend;
|
||||
bool compile_success;
|
||||
size_t n_testcases;
|
||||
size_t failed_testcases; // actually a count...
|
||||
HCaseResult *cases;
|
||||
} HBackendResults;
|
||||
|
||||
typedef struct HBenchmarkResults_ {
|
||||
size_t len;
|
||||
HBackendResults *results;
|
||||
} HBenchmarkResults;
|
||||
""")
|
||||
|
||||
# }}}
|
||||
# {{{ Arena functions
|
||||
_ffi.cdef("void* h_arena_malloc(HArena *arena, size_t count);")
|
||||
_ffi.cdef("void h_arena_free(HArena *arena, void* ptr);")
|
||||
# }}}
|
||||
# {{{ cdefs
|
||||
## The following section was generated by
|
||||
## $ perl ../desugar-header.pl <../../hammer.h |sed -e 's/.*/_ffi.cdef("&")/'
|
||||
_ffi.cdef("HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length);")
|
||||
_ffi.cdef("HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length);")
|
||||
_ffi.cdef("HParser* h_token(const uint8_t *str, const size_t len);")
|
||||
_ffi.cdef("HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len);")
|
||||
_ffi.cdef("HParser* h_ch(const uint8_t c);")
|
||||
_ffi.cdef("HParser* h_ch__m(HAllocator* mm__, const uint8_t c);")
|
||||
_ffi.cdef("HParser* h_ch_range(const uint8_t lower, const uint8_t upper);")
|
||||
_ffi.cdef("HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper);")
|
||||
_ffi.cdef("HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper);")
|
||||
_ffi.cdef("HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper);")
|
||||
_ffi.cdef("HParser* h_bits(size_t len, bool sign);")
|
||||
_ffi.cdef("HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign);")
|
||||
_ffi.cdef("HParser* h_int64(void);")
|
||||
_ffi.cdef("HParser* h_int64__m(HAllocator* mm__);")
|
||||
_ffi.cdef("HParser* h_int32(void);")
|
||||
_ffi.cdef("HParser* h_int32__m(HAllocator* mm__);")
|
||||
_ffi.cdef("HParser* h_int16(void);")
|
||||
_ffi.cdef("HParser* h_int16__m(HAllocator* mm__);")
|
||||
_ffi.cdef("HParser* h_int8(void);")
|
||||
_ffi.cdef("HParser* h_int8__m(HAllocator* mm__);")
|
||||
_ffi.cdef("HParser* h_uint64(void);")
|
||||
_ffi.cdef("HParser* h_uint64__m(HAllocator* mm__);")
|
||||
_ffi.cdef("HParser* h_uint32(void);")
|
||||
_ffi.cdef("HParser* h_uint32__m(HAllocator* mm__);")
|
||||
_ffi.cdef("HParser* h_uint16(void);")
|
||||
_ffi.cdef("HParser* h_uint16__m(HAllocator* mm__);")
|
||||
_ffi.cdef("HParser* h_uint8(void);")
|
||||
_ffi.cdef("HParser* h_uint8__m(HAllocator* mm__);")
|
||||
_ffi.cdef("HParser* h_whitespace(const HParser* p);")
|
||||
_ffi.cdef("HParser* h_whitespace__m(HAllocator* mm__, const HParser* p);")
|
||||
_ffi.cdef("HParser* h_left(const HParser* p, const HParser* q);")
|
||||
_ffi.cdef("HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q);")
|
||||
_ffi.cdef("HParser* h_right(const HParser* p, const HParser* q);")
|
||||
_ffi.cdef("HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q);")
|
||||
_ffi.cdef("HParser* h_middle(const HParser* p, const HParser* x, const HParser* q);")
|
||||
_ffi.cdef("HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q);")
|
||||
_ffi.cdef("HParser* h_action(const HParser* p, const HAction a);")
|
||||
_ffi.cdef("HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a);")
|
||||
_ffi.cdef("HParser* h_in(const uint8_t *charset, size_t length);")
|
||||
_ffi.cdef("HParser* h_in__m(HAllocator* mm__, const uint8_t *charset, size_t length);")
|
||||
_ffi.cdef("HParser* h_not_in(const uint8_t *charset, size_t length);")
|
||||
_ffi.cdef("HParser* h_not_in__m(HAllocator* mm__, const uint8_t *charset, size_t length);")
|
||||
_ffi.cdef("HParser* h_end_p(void);")
|
||||
_ffi.cdef("HParser* h_end_p__m(HAllocator* mm__);")
|
||||
_ffi.cdef("HParser* h_nothing_p(void);")
|
||||
_ffi.cdef("HParser* h_nothing_p__m(HAllocator* mm__);")
|
||||
_ffi.cdef("HParser* h_sequence(HParser* p, ...);")
|
||||
_ffi.cdef("HParser* h_sequence__m(HAllocator *mm__, HParser* p, ...);")
|
||||
_ffi.cdef("HParser* h_sequence__a(void* args);")
|
||||
_ffi.cdef("HParser* h_sequence__ma(HAllocator* mm__, void* args);")
|
||||
_ffi.cdef("HParser* h_choice(HParser* p, ...);")
|
||||
_ffi.cdef("HParser* h_choice__m(HAllocator *mm__, HParser* p, ...);")
|
||||
_ffi.cdef("HParser* h_choice__a(void* args);")
|
||||
_ffi.cdef("HParser* h_choice__ma(HAllocator* mm__, void* args);")
|
||||
_ffi.cdef("HParser* h_butnot(const HParser* p1, const HParser* p2);")
|
||||
_ffi.cdef("HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2);")
|
||||
_ffi.cdef("HParser* h_difference(const HParser* p1, const HParser* p2);")
|
||||
_ffi.cdef("HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2);")
|
||||
_ffi.cdef("HParser* h_xor(const HParser* p1, const HParser* p2);")
|
||||
_ffi.cdef("HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2);")
|
||||
_ffi.cdef("HParser* h_many(const HParser* p);")
|
||||
_ffi.cdef("HParser* h_many__m(HAllocator* mm__, const HParser* p);")
|
||||
_ffi.cdef("HParser* h_many1(const HParser* p);")
|
||||
_ffi.cdef("HParser* h_many1__m(HAllocator* mm__, const HParser* p);")
|
||||
_ffi.cdef("HParser* h_repeat_n(const HParser* p, const size_t n);")
|
||||
_ffi.cdef("HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n);")
|
||||
_ffi.cdef("HParser* h_optional(const HParser* p);")
|
||||
_ffi.cdef("HParser* h_optional__m(HAllocator* mm__, const HParser* p);")
|
||||
_ffi.cdef("HParser* h_ignore(const HParser* p);")
|
||||
_ffi.cdef("HParser* h_ignore__m(HAllocator* mm__, const HParser* p);")
|
||||
_ffi.cdef("HParser* h_sepBy(const HParser* p, const HParser* sep);")
|
||||
_ffi.cdef("HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep);")
|
||||
_ffi.cdef("HParser* h_sepBy1(const HParser* p, const HParser* sep);")
|
||||
_ffi.cdef("HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep);")
|
||||
_ffi.cdef("HParser* h_epsilon_p(void);")
|
||||
_ffi.cdef("HParser* h_epsilon_p__m(HAllocator* mm__);")
|
||||
_ffi.cdef("HParser* h_length_value(const HParser* length, const HParser* value);")
|
||||
_ffi.cdef("HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value);")
|
||||
_ffi.cdef("HParser* h_attr_bool(const HParser* p, HPredicate pred);")
|
||||
_ffi.cdef("HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred);")
|
||||
_ffi.cdef("HParser* h_and(const HParser* p);")
|
||||
_ffi.cdef("HParser* h_and__m(HAllocator* mm__, const HParser* p);")
|
||||
_ffi.cdef("HParser* h_not(const HParser* p);")
|
||||
_ffi.cdef("HParser* h_not__m(HAllocator* mm__, const HParser* p);")
|
||||
_ffi.cdef("HParser* h_indirect(void);")
|
||||
_ffi.cdef("HParser* h_indirect__m(HAllocator* mm__);")
|
||||
_ffi.cdef("void h_bind_indirect(HParser* indirect, const HParser* inner);")
|
||||
_ffi.cdef("void h_bind_indirect__m(HAllocator* mm__, HParser* indirect, const HParser* inner);")
|
||||
_ffi.cdef("void h_parse_result_free(HParseResult *result);")
|
||||
_ffi.cdef("void h_parse_result_free__m(HAllocator* mm__, HParseResult *result);")
|
||||
_ffi.cdef("void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta);")
|
||||
_ffi.cdef("int h_compile(HParser* parser, HParserBackend backend, const void* params);")
|
||||
_ffi.cdef("int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, const void* params);")
|
||||
_ffi.cdef("HBenchmarkResults * h_benchmark(HParser* parser, HParserTestcase* testcases);")
|
||||
_ffi.cdef("HBenchmarkResults * h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTestcase* testcases);")
|
||||
|
||||
_lib = _ffi.verify("#include <hammer/hammer.h>",
|
||||
libraries=['hammer'])
|
||||
|
||||
_lib.TT_PYTHON = _lib.TT_USER # TODO: Use the token type allocator from #45
|
||||
# }}}
|
||||
class _DynamicScopeHolder(threading.local):
|
||||
"""A dynamically-scoped holder of python objects, which may or may not
|
||||
otherwise appear in the object graph. Intended for use with CFFI """
|
||||
def __init__(self):
|
||||
self._ctxstack = []
|
||||
def __enter__(self):
|
||||
self._ctxstack.append([])
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self._ctxstack.pop()
|
||||
return False
|
||||
def stash(self, *objs):
|
||||
if len(self._ctxstack) < 1:
|
||||
raise Exception("Not in any dynamic scope")
|
||||
for obj in objs:
|
||||
self._ctxstack[-1].append(obj)
|
||||
def _fromHParsedToken(cobj):
|
||||
# TODO: Free the toplevel parser
|
||||
tt = cobj.token_type
|
||||
|
||||
if cobj.token_type == _lib.TT_BYTES:
|
||||
return _ffi.buffer(cobj.bytes.token, cobj.bytes.len)[:]
|
||||
elif cobj.token_type == _lib.TT_ERR:
|
||||
# I have no idea what this is for
|
||||
pass
|
||||
elif cobj.token_type == _lib.TT_NONE:
|
||||
return None
|
||||
elif cobj.token_type == _lib.TT_SEQUENCE:
|
||||
return [_fromHParsedToken(cobj.seq.elements[i])
|
||||
for i in range(cobj.seq.used)]
|
||||
elif cobj.token_type == _lib.TT_SINT:
|
||||
return cobj.sint
|
||||
elif cobj.token_type == _lib.TT_UINT:
|
||||
return cobj.uint
|
||||
elif cobj.token_type == _lib.TT_PYTHON:
|
||||
return _ffi.from_handle(cobj.user)
|
||||
|
||||
_parser_result_holder = _DynamicScopeHolder()
|
||||
def _toHParsedToken(arena, pyobj):
|
||||
if pyobj is None:
|
||||
return _ffi.NULL
|
||||
cobj = _ffi.new_handle(pyobj)
|
||||
_parser_result_holder.stash(cobj)
|
||||
|
||||
hpt = _ffi.cast("HParsedToken*", _lib.h_arena_malloc(arena, _ffi.sizeof("HParsedToken")))
|
||||
hpt.token_type = _lib.TT_PYTHON
|
||||
hpt.user = cobj
|
||||
hpt.bit_offset = chr(127)
|
||||
hpt.index = 0
|
||||
return hpt
|
||||
|
||||
def _fromParseResult(cobj):
|
||||
ret = _fromHParsedToken(cobj.ast)
|
||||
_lib.h_parse_result_free(cobj)
|
||||
return ret
|
||||
|
||||
def _to_haction(fn):
|
||||
"""Turn a function that transforms a parsed value into an HAction"""
|
||||
def action(parse_result):
|
||||
res = _toHParsedToken(parse_result.arena, fn(_fromParseResult(parse_result)))
|
||||
if res != _ffi.NULL and parse_result.ast != _ffi.NULL:
|
||||
res.index = parse_result.ast.index
|
||||
res.bit_offset = parse_result.ast.bit_offset
|
||||
return res
|
||||
return _ffi.callback("HParsedToken*(HParseResult*)", action)
|
||||
|
||||
def _to_hpredicate(fn):
|
||||
"""Turn a function that transforms a parsed value into an HAction"""
|
||||
def predicate(parse_result):
|
||||
res = fn(_fromParseResult(parse_result))
|
||||
# TODO: Handle exceptions; parse should fail.
|
||||
if type(res) != bool:
|
||||
raise TypeError("Predicates should return a bool")
|
||||
return res
|
||||
return _ffi.callback("bool(HParseResult*)", predicate)
|
||||
|
||||
class Parser(object):
|
||||
# TODO: Map these to individually garbage-collected blocks of
|
||||
# memory. Perhaps with an arena allocator with block size of 1?
|
||||
# There has to be something more efficient than that, though.
|
||||
|
||||
# TODO: How do we handle encodings? By default, we're using UTF-8
|
||||
def __init__(self, internal, deps):
|
||||
"""Create a new parser from an FFI object. Not for user code"""
|
||||
self._parser = internal
|
||||
self._deps = deps
|
||||
|
||||
def parse(self, string):
|
||||
with _parser_result_holder:
|
||||
pres = _lib.h_parse(self._parser, string, len(string))
|
||||
if pres:
|
||||
return _fromParseResult(pres)
|
||||
else:
|
||||
return None
|
||||
|
||||
def __mul__(self, count):
|
||||
return repeat_n(self, count)
|
||||
|
||||
|
||||
|
||||
class IndirectParser(Parser):
|
||||
def bind(self, inner):
|
||||
_lib.h_bind_indirect(self._parser, inner._parser)
|
||||
self._deps = (inner,)
|
||||
|
||||
class BitsParser(Parser):
|
||||
pass
|
||||
|
||||
def token(token):
|
||||
# TODO: Does not clone argument.
|
||||
if isinstance(token, unicode):
|
||||
token = token.encode("utf-8")
|
||||
return Parser(_lib.h_token(token, len(token)), ())
|
||||
|
||||
def ch(char):
|
||||
"""Returns either a token or an int, depending on the type of the
|
||||
argument"""
|
||||
if isinstance(char, int):
|
||||
return Parser(_lib.h_ch(char), ())
|
||||
else:
|
||||
return token(char)
|
||||
|
||||
def ch_range(chr1, chr2):
|
||||
if not isinstance(chr1, str) or not isinstance(chr2, str):
|
||||
raise TypeError("ch_range can't handle unicode")
|
||||
def my_action(pr):
|
||||
# print "In action: ", pr
|
||||
return pr
|
||||
return action(Parser(_lib.h_ch_range(ord(chr1), ord(chr2)), ()), my_action)
|
||||
|
||||
def int_range(parser, i1, i2):
|
||||
if type(parser) != BitsParser:
|
||||
raise TypeError("int_range is only valid when used with a bits parser")
|
||||
return Parser(_lib.h_int_range(parser._parser, i1, i2), (parser,))
|
||||
|
||||
def bits(length, signedp):
|
||||
return BitsParser(_lib.h_bits(length, signedp), ())
|
||||
|
||||
def int64(): return bits(64, True)
|
||||
def int32(): return bits(32, True)
|
||||
def int16(): return bits(16, True)
|
||||
def int8 (): return bits(8, True)
|
||||
def uint64(): return bits(64, False)
|
||||
def uint32(): return bits(32, False)
|
||||
def uint16(): return bits(16, False)
|
||||
def uint8 (): return bits(8, False)
|
||||
|
||||
def whitespace(p):
|
||||
return Parser(_lib.h_whitespace(p._parser), (p,))
|
||||
def left(p1, p2):
|
||||
return Parser(_lib.h_left(p1._parser, p2._parser), (p1, p2))
|
||||
def right(p1, p2):
|
||||
return Parser(_lib.h_right(p1._parser, p2._parser), (p1, p2))
|
||||
def middle(p1, p2, p3):
|
||||
return Parser(_lib.h_middle(p1._parser, p2._parser, p3._parser), (p1, p2, p3))
|
||||
def action(parser, action):
|
||||
caction = _to_haction(action)
|
||||
return Parser(_lib.h_action(parser._parser, caction), (parser, caction))
|
||||
|
||||
def in_(charset):
|
||||
if not isinstance(charset, str):
|
||||
# TODO/Python3: change str to bytes
|
||||
raise TypeError("in_ can't deal with unicode")
|
||||
return Parser(_lib.h_in(charset, len(charset)), ())
|
||||
def not_in(charset):
|
||||
if not isinstance(charset, str):
|
||||
# TODO/Python3: change str to bytes
|
||||
raise TypeError("in_ can't deal with unicode")
|
||||
return Parser(_lib.h_not_in(charset, len(charset)), ())
|
||||
def end_p():
|
||||
return Parser(_lib.h_end_p(), ())
|
||||
def nothing_p():
|
||||
return Parser(_lib.h_nothing_p(), ())
|
||||
def sequence(*parsers):
|
||||
plist = [p._parser for p in parsers]
|
||||
plist.append(_ffi.NULL)
|
||||
return Parser(_lib.h_sequence(*plist), (plist,))
|
||||
def choice(*parsers):
|
||||
plist = [p._parser for p in parsers]
|
||||
plist.append(_ffi.NULL)
|
||||
return Parser(_lib.h_choice(*plist), (plist,))
|
||||
def butnot(p1, p2):
|
||||
return Parser(_lib.h_butnot(p1._parser, p2._parser), (p1, p2))
|
||||
def difference(p1, p2):
|
||||
return Parser(_lib.h_difference(p1._parser, p2._parser), (p1, p2))
|
||||
def xor(p1, p2):
|
||||
return Parser(_lib.h_xor(p1._parser, p2._parser), (p1, p2))
|
||||
def many(p1):
|
||||
return Parser(_lib.h_many(p1._parser), (p1,))
|
||||
def many1(p1):
|
||||
return Parser(_lib.h_many1(p1._parser), (p1,))
|
||||
def repeat_n(p1, n):
|
||||
return Parser(_lib.h_repeat_n(p1._parser, n), (p1,))
|
||||
def optional(p1):
|
||||
return Parser(_lib.h_optional(p1._parser), (p1,))
|
||||
def ignore(p1):
|
||||
return Parser(_lib.h_ignore(p1._parser), (p1,))
|
||||
def sepBy(p, sep):
|
||||
return Parser(_lib.h_sepBy(p._parser, sep._parser), (p, sep))
|
||||
def sepBy1(p, sep):
|
||||
return Parser(_lib.h_sepBy1(p._parser, sep._parser), (p, sep))
|
||||
def epsilon_p():
|
||||
return Parser(_lib.h_epsilon_p(), ())
|
||||
def length_value(p_len, p_value):
|
||||
return Parser(_lib.h_length_value(p_len._parser, p_value._parser), (p_len, p_value))
|
||||
def attr_bool(parser, predicate):
|
||||
cpredicate = _to_hpredicate(predicate)
|
||||
return Parser(_lib.h_attr_bool(parser._parser, cpredicate), (parser, cpredicate))
|
||||
def and_(parser):
|
||||
return Parser(_lib.h_and(parser._parser), (parser,))
|
||||
def not_(parser):
|
||||
return Parser(_lib.h_not(parser._parser), (parser,))
|
||||
def indirect():
|
||||
return IndirectParser(_lib.h_indirect(), ())
|
||||
def bind_indirect(indirect, inner):
|
||||
indirect.bind(inner)
|
||||
|
||||
def parse(parser):
|
||||
return parser.parse()
|
||||
|
||||
# Unfortunately, "in", "and", and "not" are keywords. This makes them
|
||||
# show up in the module namespace for the use of automated tools. Do
|
||||
# not attempt to use them by hand; only use the mangled forms (with
|
||||
# the '_')
|
||||
sys.modules[__name__].__dict__["in"] = in_
|
||||
sys.modules[__name__].__dict__["and"] = and_
|
||||
sys.modules[__name__].__dict__["not"] = not_
|
||||
|
||||
def run_test():
|
||||
p_test = sepBy1(choice(ch('1'),
|
||||
ch('2'),
|
||||
ch('3')),
|
||||
ch(','))
|
||||
return p_test.parse("1,2,3")
|
||||
|
||||
# {{{ Automatic parser construction... python specific
|
||||
|
||||
# TODO: Implement Parsable metaclass, which requires the existence of
|
||||
# a "parse" method.
|
||||
|
||||
# This is expected to be extended by user code. As a general rule,
|
||||
# only provide auto-parsers for your own types.
|
||||
AUTO_PARSERS = {
|
||||
str: token,
|
||||
unicode: token,
|
||||
}
|
||||
|
||||
def _auto_seq(lst):
|
||||
return sequence(*(auto_1(p, default_method=_auto_choice)
|
||||
for p in lst))
|
||||
|
||||
def _auto_choice(lst):
|
||||
return choice(*(auto_1(p, default_method=_auto_seq)
|
||||
for p in lst))
|
||||
|
||||
def auto_1(arg, default_method=_auto_choice):
|
||||
if isinstance(arg, Parser):
|
||||
return arg
|
||||
elif type(arg) in AUTO_PARSERS:
|
||||
return AUTO_PARSERS[type(arg)](arg)
|
||||
else:
|
||||
return default_method(arg)
|
||||
|
||||
def auto(*args):
|
||||
return auto_1(args, default_method=_auto_choice)
|
||||
|
||||
# }}}
|
||||
|
|
@ -135,7 +135,10 @@
|
|||
%{
|
||||
#include "allocator.h"
|
||||
#include "hammer.h"
|
||||
#ifndef SWIGPERL
|
||||
// Perl's embed.h conflicts with err.h, which internal.h includes. Ugh.
|
||||
#include "internal.h"
|
||||
#endif
|
||||
#include "glue.h"
|
||||
%}
|
||||
%include "allocator.h"
|
||||
|
|
|
|||
|
|
@ -35,7 +35,8 @@ int main(int argc, char** argv) {
|
|||
register_parser_tests();
|
||||
register_grammar_tests();
|
||||
register_misc_tests();
|
||||
register_benchmark_tests();
|
||||
if (g_test_slow() || g_test_perf())
|
||||
register_benchmark_tests();
|
||||
|
||||
g_test_run();
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue