Merge branch 'python-bindings' of https://github.com/thequux/hammer into python-bindings

Conflicts:
	src/bindings/python/SConscript
	src/bindings/python/hammer_tests.py
	src/bindings/swig/hammer.i
	src/hammer.h
This commit is contained in:
Meredith L. Patterson 2013-11-26 11:25:22 -08:00
commit 2ee82f3ac7
33 changed files with 822 additions and 328 deletions

2
.gitignore vendored
View file

@ -21,3 +21,5 @@ Session.vim
cscope.out
build/
.sconsign.dblite
*.os
*.pyc

View file

@ -8,4 +8,9 @@ before_install:
script:
- scons
notifications:
irc: "irc.upstandinghackers.com#hammer"
irc:
channels:
- "irc.upstandinghackers.com#hammer"
use_notice: true
skip_join: true

View file

@ -8,7 +8,7 @@ vars = Variables(None, ARGUMENTS)
vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate))
vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept))
env = Environment(ENV = {'PATH' : os.environ['PATH']}, variables = vars)
env = Environment(ENV = {'PATH' : os.environ['PATH']}, variables = vars, tools=['default', 'scanreplace'], toolpath=['tools'])
def calcInstallPath(*elements):
path = os.path.abspath(os.path.join(*map(env.subst, elements)))
@ -28,11 +28,16 @@ if 'DESTDIR' in env:
env['libpath'] = calcInstallPath("$prefix", "lib")
env['incpath'] = calcInstallPath("$prefix", "include", "hammer")
# TODO: Add pkgconfig
env['parsersincpath'] = calcInstallPath("$prefix", "include", "hammer", "parsers")
env['backendsincpath'] = calcInstallPath("$prefix", "include", "hammer", "backends")
env['pkgconfigpath'] = calcInstallPath("$prefix", "lib", "pkgconfig")
env.ScanReplace('libhammer.pc.in')
env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes")
if not env['PLATFORM'] == 'darwin':
if env['PLATFORM'] == 'darwin':
env.Append(SHLINKFLAGS = ['-install_name', '$TARGET'])
else:
env.MergeFlags("-lrt")
AddOption("--variant",
@ -49,7 +54,12 @@ AddOption("--coverage",
action="store_true",
help="Build with coverage instrumentation")
env['BUILDDIR'] = 'build/$VARIANT'
AddOption("--in-place",
dest="in_place",
default=False,
action="store_true",
help="Build in-place, rather than in the build/<variant> tree")
dbg = env.Clone(VARIANT='debug')
dbg.Append(CCFLAGS=['-g'])
@ -68,19 +78,33 @@ if GetOption("coverage"):
LDFLAGS=["-fprofile-arcs", "-ftest-coverage"],
LIBS=['gcov'])
env["CC"] = os.getenv("CC") or env["CC"]
env["CXX"] = os.getenv("CXX") or env["CXX"]
if os.getenv("CC") == "clang" or env['PLATFORM'] == 'darwin':
env.Replace(CC="clang",
CXX="clang++")
env["ENV"].update(x for x in os.environ.items() if x[0].startswith("CCC_"))
#rootpath = env['ROOTPATH'] = os.path.abspath('.')
#env.Append(CPPPATH=os.path.join('#', "hammer"))
Export('env')
env.SConscript(["src/SConscript"], variant_dir='build/$VARIANT/src')
env.SConscript(["examples/SConscript"], variant_dir='build/$VARIANT/examples')
if not GetOption("in_place"):
env['BUILD_BASE'] = 'build/$VARIANT'
env.SConscript(["src/SConscript"], variant_dir='$BUILD_BASE/src')
env.SConscript(["examples/SConscript"], variant_dir='$BUILD_BASE/examples')
else:
env['BUILD_BASE'] = '.'
env.SConscript(["src/SConscript"])
env.SConscript(["examples/SConscript"])
env.Command('test', 'build/$VARIANT/src/test_suite', 'env LD_LIBRARY_PATH=build/$VARIANT/src $SOURCE')
env.Command('test', '$BUILD_BASE/src/test_suite', 'env LD_LIBRARY_PATH=$BUILD_BASE/src $SOURCE')
env.Alias("install", "$libpath")
env.Alias("install", "$incpath")
env.Alias("install", "$parsersincpath")
env.Alias("install", "$backendsincpath")
env.Alias("install", "$pkgconfigpath")

View file

@ -23,7 +23,7 @@
// They must be named act_<rulename>.
///
HParsedToken *act_bsfdig(const HParseResult *p)
HParsedToken *act_bsfdig(const HParseResult *p, void* user_data)
{
HParsedToken *res = H_MAKE_UINT(0);
@ -54,7 +54,7 @@ H_ACT_APPLY(act_index0, h_act_index, 0);
#define act_document act_index0
// General-form action to turn a block of base64 digits into bytes.
HParsedToken *act_base64_n(int n, const HParseResult *p)
HParsedToken *act_base64_n(int n, const HParseResult *p, void* user_data)
{
HParsedToken *res = H_MAKE_SEQN(n);
@ -83,7 +83,7 @@ H_ACT_APPLY(act_base64_3, act_base64_n, 3);
H_ACT_APPLY(act_base64_2, act_base64_n, 2);
H_ACT_APPLY(act_base64_1, act_base64_n, 1);
HParsedToken *act_base64(const HParseResult *p)
HParsedToken *act_base64(const HParseResult *p, void* user_data)
{
assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used == 2);

View file

@ -49,7 +49,7 @@ uint8_t bsfdig_value(const HParsedToken *p)
// helper: append a byte value to a sequence
#define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b))
HParsedToken *act_base64(const HParseResult *p)
HParsedToken *act_base64(const HParseResult *p, void* user_data)
{
assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used == 2);

View file

@ -15,7 +15,7 @@
// Validations
///
bool validate_hdzero(HParseResult *p) {
bool validate_hdzero(HParseResult *p, void* user_data) {
if (TT_UINT != p->ast->token_type)
return false;
return (0 == p->ast->uint);
@ -25,7 +25,7 @@ bool validate_hdzero(HParseResult *p) {
* Every DNS message should have QDCOUNT entries in the question
* section, and ANCOUNT+NSCOUNT+ARCOUNT resource records.
*/
bool validate_message(HParseResult *p) {
bool validate_message(HParseResult *p, void* user_data) {
if (TT_SEQUENCE != p->ast->token_type)
return false;
@ -86,7 +86,7 @@ void set_rdata(struct dns_rr *rr, HCountedArray *rdata) {
}
}
HParsedToken* act_header(const HParseResult *p) {
HParsedToken* act_header(const HParseResult *p, void* user_data) {
HParsedToken **fields = h_seq_elements(p->ast);
dns_header_t header_ = {
.id = H_CAST_UINT(fields[0]),
@ -109,7 +109,7 @@ HParsedToken* act_header(const HParseResult *p) {
return H_MAKE(dns_header_t, header);
}
HParsedToken* act_label(const HParseResult *p) {
HParsedToken* act_label(const HParseResult *p, void* user_data) {
dns_label_t *r = H_ALLOC(dns_label_t);
r->len = h_seq_len(p->ast);
@ -121,7 +121,7 @@ HParsedToken* act_label(const HParseResult *p) {
return H_MAKE(dns_label_t, r);
}
HParsedToken* act_rr(const HParseResult *p) {
HParsedToken* act_rr(const HParseResult *p, void* user_data) {
dns_rr_t *rr = H_ALLOC(dns_rr_t);
rr->name = *H_FIELD(dns_domain_t, 0);
@ -136,7 +136,7 @@ HParsedToken* act_rr(const HParseResult *p) {
return H_MAKE(dns_rr_t, rr);
}
HParsedToken* act_question(const HParseResult *p) {
HParsedToken* act_question(const HParseResult *p, void* user_data) {
dns_question_t *q = H_ALLOC(dns_question_t);
HParsedToken **fields = h_seq_elements(p->ast);
@ -153,7 +153,7 @@ HParsedToken* act_question(const HParseResult *p) {
return H_MAKE(dns_question_t, q);
}
HParsedToken* act_message(const HParseResult *p) {
HParsedToken* act_message(const HParseResult *p, void* user_data) {
h_pprint(stdout, p->ast, 0, 2);
dns_message_t *msg = H_ALLOC(dns_message_t);

View file

@ -10,7 +10,7 @@ H_ACT_APPLY(act_index0, h_act_index, 0)
/**
* A label can't be more than 63 characters.
*/
bool validate_label(HParseResult *p) {
bool validate_label(HParseResult *p, void* user_data) {
if (TT_SEQUENCE != p->ast->token_type)
return false;
return (64 > p->ast->seq->used);
@ -18,7 +18,7 @@ bool validate_label(HParseResult *p) {
#define act_label h_act_flatten
HParsedToken* act_domain(const HParseResult *p) {
HParsedToken* act_domain(const HParseResult *p, void* user_data) {
HParsedToken *ret = NULL;
char *arr = NULL;

View file

@ -7,6 +7,6 @@
HParser* init_domain();
HParser* init_character_string();
HParsedToken* act_index0(const HParseResult *p);
HParsedToken* act_index0(const HParseResult *p, void* user_data);
#endif

View file

@ -11,13 +11,13 @@
// Validations and Semantic Actions
///
bool validate_null(HParseResult *p) {
bool validate_null(HParseResult *p, void* user_data) {
if (TT_SEQUENCE != p->ast->token_type)
return false;
return (65536 > p->ast->seq->used);
}
HParsedToken *act_null(const HParseResult *p) {
HParsedToken *act_null(const HParseResult *p, void* user_data) {
dns_rr_null_t *null = H_ALLOC(dns_rr_null_t);
size_t len = h_seq_len(p->ast);
@ -28,7 +28,7 @@ HParsedToken *act_null(const HParseResult *p) {
return H_MAKE(dns_rr_null_t, null);
}
HParsedToken *act_txt(const HParseResult *p) {
HParsedToken *act_txt(const HParseResult *p, void* user_data) {
dns_rr_txt_t *txt = H_ALLOC(dns_rr_txt_t);
const HCountedArray *arr = H_CAST_SEQ(p->ast);
@ -47,7 +47,7 @@ HParsedToken *act_txt(const HParseResult *p) {
return H_MAKE(dns_rr_txt_t, txt);
}
HParsedToken* act_cstr(const HParseResult *p) {
HParsedToken* act_cstr(const HParseResult *p, void* user_data) {
dns_cstr_t *cs = H_ALLOC(dns_cstr_t);
const HCountedArray *arr = H_CAST_SEQ(p->ast);
@ -60,7 +60,7 @@ HParsedToken* act_cstr(const HParseResult *p) {
return H_MAKE(dns_cstr_t, cs);
}
HParsedToken* act_soa(const HParseResult *p) {
HParsedToken* act_soa(const HParseResult *p, void* user_data) {
dns_rr_soa_t *soa = H_ALLOC(dns_rr_soa_t);
soa->mname = *H_FIELD(dns_domain_t, 0);
@ -74,7 +74,7 @@ HParsedToken* act_soa(const HParseResult *p) {
return H_MAKE(dns_rr_soa_t, soa);
}
HParsedToken* act_wks(const HParseResult *p) {
HParsedToken* act_wks(const HParseResult *p, void* user_data) {
dns_rr_wks_t *wks = H_ALLOC(dns_rr_wks_t);
wks->address = H_FIELD_UINT(0);
@ -87,7 +87,7 @@ HParsedToken* act_wks(const HParseResult *p) {
return H_MAKE(dns_rr_wks_t, wks);
}
HParsedToken* act_hinfo(const HParseResult *p) {
HParsedToken* act_hinfo(const HParseResult *p, void* user_data) {
dns_rr_hinfo_t *hinfo = H_ALLOC(dns_rr_hinfo_t);
hinfo->cpu = *H_FIELD(dns_cstr_t, 0);
@ -96,7 +96,7 @@ HParsedToken* act_hinfo(const HParseResult *p) {
return H_MAKE(dns_rr_hinfo_t, hinfo);
}
HParsedToken* act_minfo(const HParseResult *p) {
HParsedToken* act_minfo(const HParseResult *p, void* user_data) {
dns_rr_minfo_t *minfo = H_ALLOC(dns_rr_minfo_t);
minfo->rmailbx = *H_FIELD(dns_domain_t, 0);
@ -105,7 +105,7 @@ HParsedToken* act_minfo(const HParseResult *p) {
return H_MAKE(dns_rr_minfo_t, minfo);
}
HParsedToken* act_mx(const HParseResult *p) {
HParsedToken* act_mx(const HParseResult *p, void* user_data) {
dns_rr_mx_t *mx = H_ALLOC(dns_rr_mx_t);
mx->preference = H_FIELD_UINT(0);
@ -184,7 +184,7 @@ HParser* init_rdata(uint16_t type) {
for(uint16_t i = 0; i<RDATA_TYPE_MAX+1; i++) {
if(parsers[i]) {
parsers[i] = h_action(h_sequence(parsers[i], h_end_p(), NULL),
act_index0);
act_index0, NULL);
}
}

10
libhammer.pc.in Normal file
View file

@ -0,0 +1,10 @@
prefix=/usr
exec_prefix=${prefix}
includedir=${prefix}/include
libdir=${exec_prefix}/lib
Name: libhammer
Description: The Hammer parsing library
Version: 0.9.0
Cflags: -I${includedir}/hammer
Libs: -L${libdir} -lhammer

View file

@ -6,7 +6,17 @@ bindings = ['python']
dist_headers = [
"hammer.h",
"allocator.h",
"glue.h"
"glue.h",
"internal.h"
]
parsers_headers = [
"parsers/parser_internal.h"
]
backends_headers = [
"backends/regex.h",
"backends/contextfree.h"
]
parsers = ['parsers/%s.c'%s for s in
@ -49,6 +59,7 @@ misc_hammer_parts = [
'glue.c',
'hammer.c',
'pprint.c',
'registry.c',
'system_allocator.c']
tests = ['t_benchmark.c',
@ -63,6 +74,9 @@ libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_
env.Install("$libpath", [libhammer_static, libhammer_shared])
env.Install("$incpath", dist_headers)
env.Install("$parsersincpath", parsers_headers)
env.Install("$backendsincpath", backends_headers)
env.Install("$pkgconfigpath", "../../../libhammer.pc")
testenv = env.Clone()
testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')

View file

@ -1,3 +1,8 @@
/*
* NOTE: This is an internal header and installed for use by extensions. The
* API is not guaranteed stable.
*/
// This is an internal header; it provides macros to make desugaring cleaner.
#include <assert.h>
#include "../internal.h"

View file

@ -372,13 +372,13 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
// perform token reshape if indicated
if(x->reshape)
tok = (HParsedToken *)x->reshape(make_result(arena, tok));
tok = (HParsedToken *)x->reshape(make_result(arena, tok), x->user_data);
// call validation and semantic action, if present
if(x->pred && !x->pred(make_result(tarena, tok)))
if(x->pred && !x->pred(make_result(tarena, tok), x->user_data))
goto no_parse; // validation failed -> no parse
if(x->action)
tok = (HParsedToken *)x->action(make_result(arena, tok));
tok = (HParsedToken *)x->action(make_result(arena, tok), x->user_data);
// append to result sequence
h_carray_append(seq, tok);

View file

@ -307,13 +307,13 @@ bool h_lrengine_step(HLREngine *engine, const HLRAction *action)
// perform token reshape if indicated
if(symbol->reshape)
value = (HParsedToken *)symbol->reshape(make_result(arena, value));
value = (HParsedToken *)symbol->reshape(make_result(arena, value), symbol->user_data);
// call validation and semantic action, if present
if(symbol->pred && !symbol->pred(make_result(tarena, value)))
if(symbol->pred && !symbol->pred(make_result(tarena, value), symbol->user_data))
return false; // validation failed -> no parse; terminate
if(symbol->action)
value = (HParsedToken *)symbol->action(make_result(arena, value));
value = (HParsedToken *)symbol->action(make_result(arena, value), symbol->user_data);
// this is LR, building a right-most derivation bottom-up, so no reduce can
// follow a reduce. we can also assume no conflict follows for GLR if we

View file

@ -1,3 +1,8 @@
/*
* NOTE: This is an internal header and installed for use by extensions. The
* API is not guaranteed stable.
*/
// Internal defs
#ifndef HAMMER_BACKEND_REGEX__H
#define HAMMER_BACKEND_REGEX__H

View file

@ -18,6 +18,6 @@ libhammer_python = pythonenv.SharedLibrary('hammer', swig, SHLIBPREFIX='_')
pytestenv = pythonenv.Clone()
pytestenv['ENV']['LD_LIBRARY_PATH'] = os.path.dirname(str(libhammer_shared[0]))
pytestenv.Command(None, ['hammer_tests.py', libhammer_python], "nosetests -vv $SOURCE")
pytestenv.Command(None, ['hammer_tests.py'] + libhammer_python, "nosetests -vv $SOURCE")
Clean('.', ['hammer.pyc', 'hammer_tests.py', 'hammer_tests.pyc'])

View file

@ -4,462 +4,461 @@ import hammer as h
class TestTokenParser(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_token("95\xa2", 3)
cls.parser = h.token("95\xa2")
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "95\xa2", 3).ast.token_data.bytes, "95\xa2")
self.assertEqual(self.parser.parse("95\xa2"), "95\xa2")
def test_partial_fails(self):
self.assertEqual(h.h_parse(self.parser, "95", 2), None)
self.assertEqual(self.parser.parse("95"), None)
class TestChParser(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser_int = h.h_ch(0xa2)
cls.parser_chr = h.h_ch("\xa2")
cls.parser_int = h.ch(0xa2)
cls.parser_chr = h.ch("\xa2")
def test_success(self):
self.assertEqual(h.h_parse(self.parser_int, "\xa2", 1).ast.token_data.uint, 0xa2)
self.assertEqual(h.h_parse(self.parser_chr, "\xa2", 1).ast.token_data.uint, ord("\xa2"))
self.assertEqual(self.parser_int.parse("\xa2"), 0xa2)
self.assertEqual(self.parser_chr.parse("\xa2"), "\xa2")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser_int, "\xa3", 1), None)
self.assertEqual(h.h_parse(self.parser_chr, "\xa3", 1), None)
self.assertEqual(self.parser_int.parse("\xa3"), None)
self.assertEqual(self.parser_chr.parse("\xa3"), None)
class TestChRange(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_ch_range("a", "c")
cls.parser = h.ch_range("a", "c")
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "b", 1).ast.token_data.uint, ord("b"))
self.assertEqual(self.parser.parse("b"), "b")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "d", 1), None)
self.assertEqual(self.parser.parse("d"), None)
class TestInt64(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_int64()
cls.parser = h.int64()
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8).ast.token_data.sint, -0x200000000)
self.assertEqual(self.parser.parse("\xff\xff\xff\xfe\x00\x00\x00\x00"), -0x200000000)
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "\xff\xff\xff\xfe\x00\x00\x00", 7), None)
self.assertEqual(self.parser.parse("\xff\xff\xff\xfe\x00\x00\x00"), None)
class TestInt32(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_int32()
cls.parser = h.int32()
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "\xff\xfe\x00\x00", 4).ast.token_data.sint, -0x20000)
self.assertEqual(h.h_parse(self.parser, "\x00\x02\x00\x00", 4).ast.token_data.sint, 0x20000)
self.assertEqual(self.parser.parse("\xff\xfe\x00\x00"), -0x20000)
self.assertEqual(self.parser.parse("\x00\x02\x00\x00"), 0x20000)
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "\xff\xfe\x00", 3), None)
self.assertEqual(h.h_parse(self.parser, "\x00\x02\x00", 3), None)
self.assertEqual(self.parser.parse("\xff\xfe\x00"), None)
self.assertEqual(self.parser.parse("\x00\x02\x00"), None)
class TestInt16(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_int16()
cls.parser = h.int16()
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "\xfe\x00", 2).ast.token_data.sint, -0x200)
self.assertEqual(h.h_parse(self.parser, "\x02\x00", 2).ast.token_data.sint, 0x200)
self.assertEqual(self.parser.parse("\xfe\x00"), -0x200)
self.assertEqual(self.parser.parse("\x02\x00"), 0x200)
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "\xfe", 1), None)
self.assertEqual(h.h_parse(self.parser, "\x02", 1), None)
self.assertEqual(self.parser.parse("\xfe"), None)
self.assertEqual(self.parser.parse("\x02"), None)
class TestInt8(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_int8()
cls.parser = h.int8()
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "\x88", 1).ast.token_data.sint, -0x78)
self.assertEqual(self.parser.parse("\x88"), -0x78)
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "", 0), None)
self.assertEqual(self.parser.parse(""), None)
class TestUint64(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_uint64()
cls.parser = h.uint64()
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "\x00\x00\x00\x02\x00\x00\x00\x00", 8).ast.token_data.uint, 0x200000000)
self.assertEqual(self.parser.parse("\x00\x00\x00\x02\x00\x00\x00\x00"), 0x200000000)
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "\x00\x00\x00\x02\x00\x00\x00", 7), None)
self.assertEqual(self.parser.parse("\x00\x00\x00\x02\x00\x00\x00"), None)
class TestUint32(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_uint32()
cls.parser = h.uint32()
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "\x00\x02\x00\x00", 4).ast.token_data.uint, 0x20000)
self.assertEqual(self.parser.parse("\x00\x02\x00\x00"), 0x20000)
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "\x00\x02\x00", 3), None)
self.assertEqual(self.parser.parse("\x00\x02\x00"), None)
class TestUint16(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_uint16()
cls.parser = h.uint16()
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "\x02\x00", 2).ast.token_data.uint, 0x200)
self.assertEqual(self.parser.parse("\x02\x00"), 0x200)
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "\x02", 1), None)
self.assertEqual(self.parser.parse("\x02"), None)
class TestUint8(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_uint8()
cls.parser = h.uint8()
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "\x78", 1).ast.token_data.uint, 0x78)
self.assertEqual(self.parser.parse("\x78"), 0x78)
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "", 0), None)
self.assertEqual(self.parser.parse(""), None)
class TestIntRange(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_int_range(h.h_uint8(), 3, 10)
cls.parser = h.int_range(h.uint8(), 3, 10)
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "\x05", 1).ast.token_data.uint, 5)
self.assertEqual(self.parser.parse("\x05"), 5)
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "\x0b", 1), None)
self.assertEqual(self.parser.parse("\x0b"), None)
class TestWhitespace(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_whitespace(h.h_ch("a"))
cls.parser = h.whitespace(h.ch("a"))
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "a", 1).ast.token_data.uint, ord("a"))
self.assertEqual(h.h_parse(self.parser, " a", 2).ast.token_data.uint, ord("a"))
self.assertEqual(h.h_parse(self.parser, " a", 3).ast.token_data.uint, ord("a"))
self.assertEqual(h.h_parse(self.parser, "\ta", 2).ast.token_data.uint, ord("a"))
self.assertEqual(self.parser.parse("a"), "a")
self.assertEqual(self.parser.parse(" a"), "a")
self.assertEqual(self.parser.parse(" a"), "a")
self.assertEqual(self.parser.parse("\ta"), "a")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "_a", 2), None)
self.assertEqual(self.parser.parse("_a"), None)
class TestWhitespaceEnd(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_whitespace(h.h_end_p())
cls.parser = h.whitespace(h.end_p())
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "", 0).ast, None) # empty string
self.assertEqual(h.h_parse(self.parser, " ", 2).ast, None) # empty string
self.assertEqual(self.parser.parse(""), None) # empty string
self.assertEqual(self.parser.parse(" "), None) # empty string
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, " x", 3), None)
self.assertEqual(self.parser.parse(" x"), None)
class TestLeft(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_left(h.h_ch("a"), h.h_ch(" "))
cls.parser = h.left(h.ch("a"), h.ch(" "))
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "a ", 2).ast.token_data.uint, ord("a"))
self.assertEqual(self.parser.parse("a "), "a")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "a", 1), None)
self.assertEqual(h.h_parse(self.parser, " ", 1), None)
self.assertEqual(h.h_parse(self.parser, "ab", 2), None)
self.assertEqual(self.parser.parse("a"), None)
self.assertEqual(self.parser.parse(" "), None)
self.assertEqual(self.parser.parse("ab"), None)
class TestRight(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_right(h.h_ch(" "), h.h_ch("a"))
cls.parser = h.right(h.ch(" "), h.ch("a"))
def test_success(self):
self.assertEqual(h.h_parse(self.parser, " a", 2).ast.token_data.uint, ord("a"))
self.assertEqual(self.parser.parse(" a"), "a")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "a", 1), None)
self.assertEqual(h.h_parse(self.parser, " ", 1), None)
self.assertEqual(h.h_parse(self.parser, "ba", 2), None)
self.assertEqual(self.parser.parse("a"), None)
self.assertEqual(self.parser.parse(" "), None)
self.assertEqual(self.parser.parse("ba"), None)
class TestMiddle(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_middle(h.h_ch(" "), h.h_ch("a"), h.h_ch(" "))
cls.parser = h.middle(h.ch(" "), h.ch("a"), h.ch(" "))
def test_success(self):
self.assertEqual(h.h_parse(self.parser, " a ", 3).ast.token_data.uint, ord("a"))
self.assertEqual(self.parser.parse(" a "), "a")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "a", 1), None)
self.assertEqual(h.h_parse(self.parser, " ", 1), None)
self.assertEqual(h.h_parse(self.parser, " a", 2), None)
self.assertEqual(h.h_parse(self.parser, "a ", 2), None)
self.assertEqual(h.h_parse(self.parser, " b ", 3), None)
self.assertEqual(h.h_parse(self.parser, "ba ", 3), None)
self.assertEqual(h.h_parse(self.parser, " ab", 3), None)
self.assertEqual(self.parser.parse("a"), None)
self.assertEqual(self.parser.parse(" "), None)
self.assertEqual(self.parser.parse(" a"), None)
self.assertEqual(self.parser.parse("a "), None)
self.assertEqual(self.parser.parse(" b "), None)
self.assertEqual(self.parser.parse("ba "), None)
self.assertEqual(self.parser.parse(" ab"), None)
# class TestAction(unittest.TestCase):
# @classmethod
# def setUpClass(cls):
# cls.parser = h.h_action(h.h_sequence__a([h.h_choice__a([h.h_ch("a"), h.h_ch("A"), None]), h.h_choice__a([h.h_ch("b"), h.h_ch("B"), None]), None]), lambda x: [y.upper() for y in x])
# def test_success(self):
# self.assertEqual(h.h_parse(self.parser, "ab", 2).ast.token_data.seq, ["A", "B"])
# self.assertEqual(h.h_parse(self.parser, "AB", 2).ast.token_data.seq, ["A", "B"])
# def test_failure(self):
# self.assertEqual(h.h_parse(self.parser, "XX", 2), None)
class TestAction(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.action(h.sequence(h.choice(h.ch("a"), h.ch("A")),
h.choice(h.ch("b"), h.ch("B"))),
lambda x: [y.upper() for y in x])
def test_success(self):
self.assertEqual(self.parser.parse("ab"), ["A", "B"])
self.assertEqual(self.parser.parse("AB"), ["A", "B"])
def test_failure(self):
self.assertEqual(self.parser.parse("XX"), None)
class TestIn(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_in("abc", 3)
cls.parser = h.in_("abc")
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "b", 1).ast.token_data.uint, ord("b"))
self.assertEqual(self.parser.parse("b"), "b")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "d", 1), None)
self.assertEqual(self.parser.parse("d"), None)
class TestNotIn(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_not_in("abc", 3)
cls.parser = h.not_in("abc")
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "d", 1).ast.token_data.uint, ord("d"))
self.assertEqual(self.parser.parse("d"), "d")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "a", 1), None)
self.assertEqual(self.parser.parse("a"), None)
class TestEndP(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_ch("a"), h.h_end_p(), None])
cls.parser = h.sequence(h.ch("a"), h.end_p())
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "a", 1).ast.token_data.seq], [ord(y) for y in ["a"]])
self.assertEqual(self.parser.parse("a"), ("a",))
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "aa", 2), None)
self.assertEqual(self.parser.parse("aa"), None)
class TestNothingP(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_nothing_p()
cls.parser = h.nothing_p()
def test_success(self):
pass
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "a", 1), None)
self.assertEqual(self.parser.parse("a"), None)
class TestSequence(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_ch("a"), h.h_ch("b"), None])
cls.parser = h.sequence(h.ch("a"), h.ch("b"))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "ab", 2).ast.token_data.seq], [ord(y) for y in ["a", "b"]])
self.assertEqual(self.parser.parse("ab"), ('a','b'))
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "a", 1), None)
self.assertEqual(h.h_parse(self.parser, "b", 1), None)
self.assertEqual(self.parser.parse("a"), None)
self.assertEqual(self.parser.parse("b"), None)
class TestSequenceWhitespace(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_ch("a"), h.h_whitespace(h.h_ch("b")), None])
cls.parser = h.sequence(h.ch("a"), h.whitespace(h.ch("b")))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "ab", 2).ast.token_data.seq], [ord(y) for y in ["a", "b"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "a b", 3).ast.token_data.seq], [ord(y) for y in ["a", "b"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "a b", 4).ast.token_data.seq], [ord(y) for y in ["a", "b"]])
self.assertEqual(self.parser.parse("ab"), ('a','b'))
self.assertEqual(self.parser.parse("a b"), ('a','b'))
self.assertEqual(self.parser.parse("a b"), ('a','b'))
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "a c", 4), None)
self.assertEqual(self.parser.parse("a c"), None)
class TestChoice(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_choice__a([h.h_ch("a"), h.h_ch("b"), None])
cls.parser = h.choice(h.ch("a"), h.ch("b"))
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "a", 1).ast.token_data.uint, ord("a"))
self.assertEqual(h.h_parse(self.parser, "b", 1).ast.token_data.uint, ord("b"))
self.assertEqual(self.parser.parse("a"), "a")
self.assertEqual(self.parser.parse("b"), "b")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "c", 1), None)
self.assertEqual(self.parser.parse("c"), None)
class TestButNot(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_butnot(h.h_ch("a"), h.h_token("ab", 2))
cls.parser = h.butnot(h.ch("a"), h.token("ab"))
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "a", 1).ast.token_data.uint, ord("a"))
self.assertEqual(h.h_parse(self.parser, "aa", 2).ast.token_data.uint, ord("a"))
self.assertEqual(self.parser.parse("a"), "a")
self.assertEqual(self.parser.parse("aa"), "a")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "ab", 2), None)
self.assertEqual(self.parser.parse("ab"), None)
class TestButNotRange(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_butnot(h.h_ch_range("0", "9"), h.h_ch("6"))
cls.parser = h.butnot(h.ch_range("0", "9"), h.ch("6"))
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "4", 1).ast.token_data.uint, ord("4"))
self.assertEqual(self.parser.parse("4"), "4")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "6", 1), None)
self.assertEqual(self.parser.parse("6"), None)
class TestDifference(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_difference(h.h_token("ab", 2), h.h_ch("a"))
cls.parser = h.difference(h.token("ab"), h.ch("a"))
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "ab", 2).ast.token_data.bytes, "ab")
self.assertEqual(self.parser.parse("ab"), "ab")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "a", 1), None)
self.assertEqual(self.parser.parse("a"), None)
class TestXor(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_xor(h.h_ch_range("0", "6"), h.h_ch_range("5", "9"))
cls.parser = h.xor(h.ch_range("0", "6"), h.ch_range("5", "9"))
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "0", 1).ast.token_data.uint, ord("0"))
self.assertEqual(h.h_parse(self.parser, "9", 1).ast.token_data.uint, ord("9"))
self.assertEqual(self.parser.parse("0"), "0")
self.assertEqual(self.parser.parse("9"), "9")
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "5", 1), None)
self.assertEqual(h.h_parse(self.parser, "a", 1), None)
self.assertEqual(self.parser.parse("5"), None)
self.assertEqual(self.parser.parse("a"), None)
class TestMany(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_many(h.h_choice__a([h.h_ch("a"), h.h_ch("b"), None]))
cls.parser = h.many(h.choice(h.ch("a"), h.ch("b")))
def test_success(self):
self.assertEqual(h.h_parse(self.parser, "", 0).ast.token_data.seq, [])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "a", 1).ast.token_data.seq], [ord(y) for y in ["a"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "b", 1).ast.token_data.seq], [ord(y) for y in ["b"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "aabbaba", 7).ast.token_data.seq], [ord(y) for y in ["a", "a", "b", "b", "a", "b", "a"]])
self.assertEqual(self.parser.parse(""), ())
self.assertEqual(self.parser.parse("a"), ('a',))
self.assertEqual(self.parser.parse("b"), ('b',))
self.assertEqual(self.parser.parse("aabbaba"), ('a','a','b','b','a','b','a'))
def test_failure(self):
pass
class TestMany1(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_many1(h.h_choice__a([h.h_ch("a"), h.h_ch("b"), None]))
cls.parser = h.many1(h.choice(h.ch("a"), h.ch("b")))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "a", 1).ast.token_data.seq], [ord(y) for y in ["a"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "b", 1).ast.token_data.seq], [ord(y) for y in ["b"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "aabbaba", 7).ast.token_data.seq], [ord(y) for y in ["a", "a", "b", "b", "a", "b", "a"]])
self.assertEqual(self.parser.parse("a"), ("a",))
self.assertEqual(self.parser.parse("b"), ("b",))
self.assertEqual(self.parser.parse("aabbaba"), ("a", "a", "b", "b", "a", "b", "a"))
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "", 0), None)
self.assertEqual(h.h_parse(self.parser, "daabbabadef", 11), None)
self.assertEqual(self.parser.parse(""), None)
self.assertEqual(self.parser.parse("daabbabadef"), None)
class TestRepeatN(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_repeat_n(h.h_choice__a([h.h_ch("a"), h.h_ch("b"), None]), 2)
cls.parser = h.repeat_n(h.choice(h.ch("a"), h.ch("b")), 2)
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "abdef", 5).ast.token_data.seq], [ord(y) for y in ["a", "b"]])
self.assertEqual(self.parser.parse("abdef"), ('a', 'b'))
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "adef", 4), None)
self.assertEqual(h.h_parse(self.parser, "dabdef", 5), None)
self.assertEqual(self.parser.parse("adef"), None)
self.assertEqual(self.parser.parse("dabdef"), None)
class TestOptional(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_ch("a"), h.h_optional(h.h_choice__a([h.h_ch("b"), h.h_ch("c"), None])), h.h_ch("d"), None])
cls.parser = h.sequence(h.ch("a"), h.optional(h.choice(h.ch("b"), h.ch("c"))), h.ch("d"))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "abd", 3).ast.token_data.seq], [ord(y) for y in ["a", "b", "d"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "acd", 3).ast.token_data.seq], [ord(y) for y in ["a", "c", "d"]])
### FIXME check this out in repl, what does tree look like
#self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "ad", 2).ast.token_data.seq], [ord(y)["a", None, "d"]])
self.assertEqual(self.parser.parse("abd"), ('a','b','d'))
self.assertEqual(self.parser.parse("acd"), ('a','c','d'))
self.assertEqual(self.parser.parse("ad"), ('a',h.Placeholder(), 'd'))
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "aed", 3), None)
self.assertEqual(h.h_parse(self.parser, "ab", 2), None)
self.assertEqual(h.h_parse(self.parser, "ac", 2), None)
self.assertEqual(self.parser.parse("aed"), None)
self.assertEqual(self.parser.parse("ab"), None)
self.assertEqual(self.parser.parse("ac"), None)
class TestIgnore(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_ch("a"), h.h_ignore(h.h_ch("b")), h.h_ch("c"), None])
cls.parser = h.sequence(h.ch("a"), h.ignore(h.ch("b")), h.ch("c"))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "abc", 3).ast.token_data.seq], [ord(y) for y in ["a", "c"]])
self.assertEqual(self.parser.parse("abc"), ("a","c"))
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "ac", 2), None)
self.assertEqual(self.parser.parse("ac"), None)
class TestSepBy(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sepBy(h.h_choice__a([h.h_ch("1"), h.h_ch("2"), h.h_ch("3"), None]), h.h_ch(","))
cls.parser = h.sepBy(h.choice(h.ch("1"), h.ch("2"), h.ch("3")), h.ch(","))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "1,2,3", 5).ast.token_data.seq], [ord(y) for y in ["1", "2", "3"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "1,3,2", 5).ast.token_data.seq], [ord(y) for y in ["1", "3", "2"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "1,3", 3).ast.token_data.seq], [ord(y) for y in ["1", "3"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "3", 1).ast.token_data.seq], [ord(y) for y in ["3"]])
self.assertEqual(h.h_parse(self.parser, "", 0).ast.token_data.seq, [])
self.assertEqual(self.parser.parse("1,2,3"), ('1','2','3'))
self.assertEqual(self.parser.parse("1,3,2"), ('1','3','2'))
self.assertEqual(self.parser.parse("1,3"), ('1','3'))
self.assertEqual(self.parser.parse("3"), ('3',))
self.assertEqual(self.parser.parse(""), ())
def test_failure(self):
pass
class TestSepBy1(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sepBy1(h.h_choice__a([h.h_ch("1"), h.h_ch("2"), h.h_ch("3"), None]), h.h_ch(","))
cls.parser = h.sepBy1(h.choice(h.ch("1"), h.ch("2"), h.ch("3")), h.ch(","))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "1,2,3", 5).ast.token_data.seq], [ord(y) for y in ["1", "2", "3"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "1,3,2", 5).ast.token_data.seq], [ord(y) for y in ["1", "3", "2"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "1,3", 3).ast.token_data.seq], [ord(y) for y in ["1", "3"]])
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "3", 1).ast.token_data.seq], [ord(y) for y in ["3"]])
self.assertEqual(self.parser.parse("1,2,3"), ('1','2','3'))
self.assertEqual(self.parser.parse("1,3,2"), ('1','3','2'))
self.assertEqual(self.parser.parse("1,3"), ('1','3'))
self.assertEqual(self.parser.parse("3"), ('3',))
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "", 0), None)
self.assertEqual(self.parser.parse(""), None)
### segfaults
class TestEpsilonP1(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_ch("a"), h.h_epsilon_p(), h.h_ch("b"), None])
cls.parser = h.sequence(h.ch("a"), h.epsilon_p(), h.ch("b"))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "ab", 2).ast.token_data.seq], [ord(y) for y in ["a", "b"]])
self.assertEqual(self.parser.parse("ab"), ("a", "b"))
def test_failure(self):
pass
class TestEpsilonP2(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_epsilon_p(), h.h_ch("a"), None])
cls.parser = h.sequence(h.epsilon_p(), h.ch("a"))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "a", 1).ast.token_data.seq], [ord(y) for y in ["a"]])
self.assertEqual(self.parser.parse("a"), ("a",))
def test_failure(self):
pass
class TestEpsilonP3(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_ch("a"), h.h_epsilon_p(), None])
cls.parser = h.sequence(h.ch("a"), h.epsilon_p())
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "a", 1).ast.token_data.seq], [ord(y) for y in ["a"]])
self.assertEqual(self.parser.parse("a"), ("a",))
def test_failure(self):
pass
# class TestAttrBool(unittest.TestCase):
# @classmethod
# def setUpClass(cls):
# cls.parser = h.h_attr_bool(h.h_many1(h.h_choice__a([h.h_ch("a"), h.h_ch("b"), None])), lambda x: x[0] == x[1])
# def test_success(self):
# self.assertEqual(h.h_parse(self.parser, "aa", 2).ast.token_data.seq, ["a", "a"])
# self.assertEqual(h.h_parse(self.parser, "bb", 2).ast.token_data.seq, ["b", "b"])
# def test_failure(self):
# self.assertEqual(h.h_parse(self.parser, "ab", 2), None)
class TestAttrBool(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.attr_bool(h.many1(h.choice(h.ch("a"), h.ch("b"))),
lambda x: x[0] == x[1])
def test_success(self):
self.assertEqual(self.parser.parse("aa"), ("a", "a"))
self.assertEqual(self.parser.parse("bb"), ("b", "b"))
def test_failure(self):
self.assertEqual(self.parser.parse("ab"), None)
class TestAnd1(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_and(h.h_ch("0")), h.h_ch("0"), None])
cls.parser = h.sequence(h.and_(h.ch("0")), h.ch("0"))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "0", 1).ast.token_data.seq], [ord(y) for y in ["0"]])
self.assertEqual(self.parser.parse("0"), ("0",))
def test_failure(self):
pass
class TestAnd2(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_and(h.h_ch("0")), h.h_ch("1"), None])
cls.parser = h.sequence(h.and_(h.ch("0")), h.ch("1"))
def test_success(self):
pass
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "0", 1), None)
self.assertEqual(self.parser.parse("0"), None)
class TestAnd3(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_ch("1"), h.h_and(h.h_ch("2")), None])
cls.parser = h.sequence(h.ch("1"), h.and_(h.ch("2")))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "12", 2).ast.token_data.seq], [ord(y) for y in ["1"]])
self.assertEqual(self.parser.parse("12"), ('1',))
def test_failure(self):
pass
class TestNot1(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_ch("a"), h.h_choice__a([h.h_ch("+"), h.h_token("++", 2), None]), h.h_ch("b"), None])
cls.parser = h.sequence(h.ch("a"),
h.choice(h.ch("+"), h.token("++")),
h.ch("b"))
def test_success(self):
self.assertEqual([x.token_data.uint for x in h.h_parse(self.parser, "a+b", 3).ast.token_data.seq], [ord(y) for y in ["a", "+", "b"]])
self.assertEqual(self.parser.parse("a+b"), ("a", "+", "b"))
def test_failure(self):
self.assertEqual(h.h_parse(self.parser, "a++b", 4), None)
self.assertEqual(self.parser.parse("a++b"), None)
class TestNot2(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_sequence__a([h.h_ch("a"), h.h_choice__a([h.h_sequence__a([h.h_ch("+"), h.h_not(h.h_ch("+")), None]), h.h_token("++", 2), None]), h.h_ch("b"), None])
cls.parser = h.sequence(h.ch("a"), h.choice(h.sequence(h.ch("+"), h.not_(h.ch("+"))),
h.token("++")),
h.ch("b"))
def test_success(self):
tree = h.h_parse(self.parser, "a+b", 3).ast.token_data.seq
tree[1] = tree[1].token_data.seq[0]
self.assertEqual([x.token_data.uint for x in tree], [ord(y) for y in ["a", "+", "b"]])
tree = h.h_parse(self.parser, "a++b", 4).ast.token_data.seq
tree[0] = chr(tree[0].token_data.uint)
tree[1] = tree[1].token_data.bytes
tree[2] = chr(tree[2].token_data.uint)
self.assertEqual(tree, ["a", "++", "b"])
self.assertEqual(self.parser.parse("a+b"), ('a', ('+',), 'b'))
self.assertEqual(self.parser.parse("a++b"), ('a', "++", 'b'))
def test_failure(self):
pass
@ -467,32 +466,29 @@ class TestNot2(unittest.TestCase):
# #class TestLeftrec(unittest.TestCase):
# # @classmethod
# # def setUpClass(cls):
# # cls.parser = h.h_indirect()
# # a = h.h_ch("a")
# # h.h_bind_indirect(cls.parser, h.h_choice(h.h_sequence(cls.parser, a), a))
# # cls.parser = h.indirect()
# # a = h.ch("a")
# # h.bind_indirect(cls.parser, h.choice(h.sequence(cls.parser, a), a))
# # def test_success(self):
# # self.assertEqual(h.h_parse(self.parser, "a", 1).ast.token_data.bytes, "a")
# # self.assertEqual(h.h_parse(self.parser, "aa", 2).ast.token_data.seq, ["a", "a"])
# # self.assertEqual(h.h_parse(self.parser, "aaa", 3).ast.token_data.seq, ["a", "a", "a"])
# # self.assertEqual(self.parser.parse("a"), "a")
# # self.assertEqual(self.parser.parse("aa"), ["a", "a"])
# # self.assertEqual(self.parser.parse("aaa"), ["a", "a", "a"])
# # def test_failure(self):
# # pass
class TestRightrec(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = h.h_indirect()
a = h.h_ch("a")
h.h_bind_indirect(cls.parser, h.h_choice__a([h.h_sequence__a([a, cls.parser, None]), h.h_epsilon_p(), None]))
#raise unittest.SkipTest("Bind doesn't work right now")
cls.parser = h.indirect()
a = h.ch("a")
cls.parser.bind(h.choice(h.sequence(a, cls.parser),
h.epsilon_p()))
def test_success(self):
tree = h.h_parse(self.parser, "a", 1).ast.token_data.seq
self.assertEqual(tree[0].token_data.uint, ord("a"))
tree = h.h_parse(self.parser, "aa", 2).ast.token_data.seq
self.assertEqual(tree[0].token_data.uint, ord("a"))
self.assertEqual(tree[1].token_data.seq[0].token_data.uint, ord("a"))
tree = h.h_parse(self.parser, "aaa", 3).ast.token_data.seq
self.assertEqual(tree[0].token_data.uint, ord("a"))
self.assertEqual(tree[1].token_data.seq[0].token_data.uint, ord("a"))
self.assertEqual(tree[1].token_data.seq[1].token_data.seq[0].token_data.uint, ord("a"))
self.assertEqual(self.parser.parse("a"), ('a',))
self.assertEqual(self.parser.parse("aa"), ('a', ('a',)))
self.assertEqual(self.parser.parse("aaa"), ('a', ('a', ('a',))))
def test_failure(self):
pass
@ -500,15 +496,15 @@ class TestRightrec(unittest.TestCase):
# #class TestAmbiguous(unittest.TestCase):
# # @classmethod
# # def setUpClass(cls):
# # cls.parser = h.h_indirect()
# # d = h.h_ch("d")
# # p = h.h_ch("+")
# # h.h_bind_indirect(cls.parser, h.h_choice(h.h_sequence(cls.parser, p, cls.parser), d))
# # cls.parser = h.indirect()
# # d = h.ch("d")
# # p = h.ch("+")
# # h.bind_indirect(cls.parser, h.choice(h.sequence(cls.parser, p, cls.parser), d))
# # # this is supposed to be flattened
# # def test_success(self):
# # self.assertEqual(h.h_parse(self.parser, "d", 1).ast.token_data.seq, ["d"])
# # self.assertEqual(h.h_parse(self.parser, "d+d", 3).ast.token_data.seq, ["d", "+", "d"])
# # self.assertEqual(h.h_parse(self.parser, "d+d+d", 5).ast.token_data.seq, ["d", "+", "d", "+", "d"])
# # self.assertEqual(self.parser.parse("d"), ["d"])
# # self.assertEqual(self.parser.parse("d+d"), ["d", "+", "d"])
# # self.assertEqual(self.parser.parse("d+d+d"), ["d", "+", "d", "+", "d"])
# # def test_failure(self):
# # self.assertEqual(h.h_parse(self.parser, "d+", 2), None)
# # self.assertEqual(self.parser.parse("d+"), None)

View file

@ -1,26 +1,53 @@
%module hammer
%nodefaultctor;
%include "stdint.i"
//%include "typemaps.i"
//%apply char [ANY] { uint8_t [ANY] };
#if defined(SWIGPYTHON)
%ignore HCountedArray_;
%typemap(in) uint8_t* {
Py_INCREF($input);
$1 = (uint8_t*)PyString_AsString($input);
}
%typemap(out) uint8_t* {
$result = PyString_FromString((char*)$1);
%apply (char *STRING, size_t LENGTH) {(uint8_t* str, size_t len)}
%apply (uint8_t* str, size_t len) {(const uint8_t* input, size_t length)}
%apply (uint8_t* str, size_t len) {(const uint8_t* str, const size_t len)}
%apply (uint8_t* str, size_t len) {(const uint8_t* charset, size_t length)}
%rename("_%s") "";
// %rename(_h_ch) h_ch;
%inline {
static PyObject *_helper_Placeholder = NULL, *_helper_ParseError = NULL;
static void register_helpers(PyObject* parse_error, PyObject *placeholder) {
_helper_ParseError = parse_error;
_helper_Placeholder = placeholder;
}
}
%pythoncode %{
class Placeholder(object):
"""The python equivalent of TT_NONE"""
def __str__(self):
return "Placeholder"
def __repr__(self):
return "Placeholder"
def __eq__(self, other):
return type(self) == type(other)
class ParseError(Exception):
"""The parse failed; the message may have more information"""
pass
_hammer._register_helpers(ParseError,
Placeholder)
%}
%typemap(in) void*[] {
if (PyList_Check($input)) {
Py_INCREF($input);
int size = PyList_Size($input);
int i = 0;
int res = 0;
$1 = (void**)malloc(size*sizeof(HParser*));
$1 = (void**)malloc((size+1)*sizeof(HParser*));
for (i=0; i<size; i++) {
PyObject *o = PyList_GetItem($input, i);
res = SWIG_ConvertPtr(o, &($1[i]), SWIGTYPE_p_HParser_, 0 | 0);
@ -28,6 +55,7 @@
SWIG_exception_fail(SWIG_ArgError(res), "that wasn't an HParser" );
}
}
$1[size] = NULL;
} else {
PyErr_SetString(PyExc_TypeError, "__a functions take lists of parsers as their argument");
return NULL;
@ -56,6 +84,49 @@
PyList_SetItem($result, i, o);
}
}
%typemap(out) struct HParseResult_* {
if ($1 == NULL) {
// TODO: raise parse failure
Py_INCREF(Py_None);
$result = Py_None;
} else {
$result = hpt_to_python($1->ast);
}
}
%typemap(newfree) struct HParseResult_* {
h_parse_result_free($input);
}
%inline %{
static int h_tt_python;
%}
%init %{
h_tt_python = h_allocate_token_type("com.upstandinghackers.hammer.python");
%}
%typemap(in) (HPredicate pred, void* user_data) {
Py_INCREF($input);
$2 = $input;
$1 = call_predicate;
}
%typemap(in) (const HAction a, void* user_data) {
Py_INCREF($input);
$2 = $input;
$1 = call_action;
}
%inline %{
struct HParsedToken_;
struct HParseResult_;
static PyObject* hpt_to_python(const struct HParsedToken_ *token);
static struct HParsedToken_* call_action(const struct HParseResult_ *p, void* user_data);
static int call_predicate(const struct HParseResult_ *p, void* user_data);
%}
#else
#warning no uint8_t* typemaps defined
#endif
@ -65,7 +136,208 @@
#include "allocator.h"
#include "hammer.h"
#include "internal.h"
#include "glue.h"
%}
%include "allocator.h"
%include "hammer.h"
%extend HArena_ {
~HArena_() {
h_delete_arena($self);
}
};
%extend HParseResult_ {
~HParseResult_() {
h_parse_result_free($self);
}
};
%newobject h_parse;
%delobject h_parse_result_free;
%newobject h_new_arena;
%delobject h_delete_arena;
#ifdef SWIGPYTHON
%inline {
static PyObject* hpt_to_python(const HParsedToken *token) {
// Caller holds a reference to returned object
PyObject *ret;
if (token == NULL) {
Py_RETURN_NONE;
}
switch (token->token_type) {
case TT_NONE:
return PyObject_CallFunctionObjArgs(_helper_Placeholder, NULL);
break;
case TT_BYTES:
return PyString_FromStringAndSize((char*)token->token_data.bytes.token, token->token_data.bytes.len);
case TT_SINT:
// TODO: return PyINT if appropriate
return PyLong_FromLong(token->token_data.sint);
case TT_UINT:
// TODO: return PyINT if appropriate
return PyLong_FromUnsignedLong(token->token_data.uint);
case TT_SEQUENCE:
ret = PyTuple_New(token->token_data.seq->used);
for (int i = 0; i < token->token_data.seq->used; i++) {
PyTuple_SET_ITEM(ret, i, hpt_to_python(token->token_data.seq->elements[i]));
}
return ret;
default:
if (token->token_type == h_tt_python) {
ret = (PyObject*)token->token_data.user;
Py_INCREF(ret);
return ret;
} else {
return SWIG_NewPointerObj((void*)token, SWIGTYPE_p_HParsedToken_, 0 | 0);
// TODO: support registry
}
}
}
static struct HParsedToken_* call_action(const struct HParseResult_ *p, void* user_data) {
PyObject *callable = user_data;
PyObject *ret = PyObject_CallFunctionObjArgs(callable,
hpt_to_python(p->ast),
NULL);
if (ret == NULL) {
PyErr_Print();
assert(ret != NULL);
}
// TODO: add reference to ret to parse-local data
// For now, just hold onto reference
HParsedToken *tok = h_make(p->arena, h_tt_python, ret);
return tok;
}
static int call_predicate(const struct HParseResult_ *p, void* user_data) {
PyObject *callable = user_data;
PyObject *ret = PyObject_CallFunctionObjArgs(callable,
hpt_to_python(p->ast),
NULL);
int rret = 0;
if (ret == NULL) {
// TODO: throw exception
PyErr_Print();
assert(ret != NULL);
}
// TODO: add reference to ret to parse-local data
rret = PyObject_IsTrue(ret);
Py_DECREF(ret);
return rret;
}
}
%rename("%s") "";
%extend HParser_ {
HParseResult* parse(const uint8_t* input, size_t length) {
return h_parse($self, input, length);
}
bool compile(HParserBackend backend) {
return h_compile($self, backend, NULL) == 0;
}
PyObject* __dir__() {
PyObject* ret = PyList_New(2);
PyList_SET_ITEM(ret, 0, PyString_FromString("parse"));
PyList_SET_ITEM(ret, 1, PyString_FromString("compile"));
return ret;
}
}
%pythoncode %{
def action(p, act):
return _h_action(p, act)
def attr_bool(p, pred):
return _h_attr_bool(p, pred)
def ch(ch):
if isinstance(ch, str) or isinstance(ch, unicode):
return token(ch)
else:
return _h_ch(ch)
def ch_range(c1, c2):
dostr = isinstance(c1, str)
dostr2 = isinstance(c2, str)
if isinstance(c1, unicode) or isinstance(c2, unicode):
raise TypeError("ch_range only works on bytes")
if dostr != dostr2:
raise TypeError("Both arguments to ch_range must be the same type")
if dostr:
return action(_h_ch_range(c1, c2), chr)
else:
return _h_ch_range(c1, c2)
def epsilon_p(): return _h_epsilon_p()
def end_p():
return _h_end_p()
def in_(charset):
return action(_h_in(charset), chr)
def not_in(charset):
return action(_h_not_in(charset), chr)
def not_(p): return _h_not(p)
def int_range(p, i1, i2):
return _h_int_range(p, i1, i2)
def token(string):
return _h_token(string)
def whitespace(p):
return _h_whitespace(p)
def xor(p1, p2):
return _h_xor(p1, p2)
def butnot(p1, p2):
return _h_butnot(p1, p2)
def and_(p1):
return _h_and(p1)
def difference(p1, p2):
return _h_difference(p1, p2)
def sepBy(p, sep): return _h_sepBy(p, sep)
def sepBy1(p, sep): return _h_sepBy1(p, sep)
def many(p): return _h_many(p)
def many1(p): return _h_many1(p)
def repeat_n(p, n): return _h_repeat_n(p, n)
def choice(*args): return _h_choice__a(list(args))
def sequence(*args): return _h_sequence__a(list(args))
def optional(p): return _h_optional(p)
def nothing_p(): return _h_nothing_p()
def ignore(p): return _h_ignore(p)
def left(p1, p2): return _h_left(p1, p2)
def middle(p1, p2, p3): return _h_middle(p1, p2, p3)
def right(p1, p2): return _h_right(p1, p2)
class HIndirectParser(_HParser_):
def __init__(self):
# Shoves the guts of an _HParser_ into a HIndirectParser.
tret = _h_indirect()
self.__dict__.clear()
self.__dict__.update(tret.__dict__)
def __dir__(self):
return super(HIndirectParser, self).__dir__() + ['bind']
def bind(self, parser):
_h_bind_indirect(self, parser)
def indirect():
return HIndirectParser()
def bind_indirect(indirect, new_parser):
indirect.bind(new_parser)
def uint8(): return _h_uint8()
def uint16(): return _h_uint16()
def uint32(): return _h_uint32()
def uint64(): return _h_uint64()
def int8(): return _h_int8()
def int16(): return _h_int16()
def int32(): return _h_int32()
def int64(): return _h_int64()
%}
#endif

View file

@ -5,7 +5,7 @@
#include "parsers/parser_internal.h"
// Helper to build HAction's that pick one index out of a sequence.
HParsedToken *h_act_index(int i, const HParseResult *p)
HParsedToken *h_act_index(int i, const HParseResult *p, void* user_data)
{
if(!p) return NULL;
@ -23,7 +23,7 @@ HParsedToken *h_act_index(int i, const HParseResult *p)
return tok->seq->elements[i];
}
HParsedToken *h_act_first(const HParseResult *p) {
HParsedToken *h_act_first(const HParseResult *p, void* user_data) {
assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used > 0);
@ -31,7 +31,7 @@ HParsedToken *h_act_first(const HParseResult *p) {
return p->ast->seq->elements[0];
}
HParsedToken *h_act_second(const HParseResult *p) {
HParsedToken *h_act_second(const HParseResult *p, void* user_data) {
assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used > 0);
@ -39,7 +39,7 @@ HParsedToken *h_act_second(const HParseResult *p) {
return p->ast->seq->elements[1];
}
HParsedToken *h_act_last(const HParseResult *p) {
HParsedToken *h_act_last(const HParseResult *p, void* user_data) {
assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used > 0);
@ -59,7 +59,7 @@ static void act_flatten_(HCountedArray *seq, const HParsedToken *tok) {
}
}
HParsedToken *h_act_flatten(const HParseResult *p) {
HParsedToken *h_act_flatten(const HParseResult *p, void* user_data) {
HCountedArray *seq = h_carray_new(p->arena);
act_flatten_(seq, p->ast);
@ -72,7 +72,7 @@ HParsedToken *h_act_flatten(const HParseResult *p) {
return res;
}
HParsedToken *h_act_ignore(const HParseResult *p) {
HParsedToken *h_act_ignore(const HParseResult *p, void* user_data) {
return NULL;
}

View file

@ -53,16 +53,27 @@
// H_AVRULE is like H_VARULE but the action is attached outside the validation,
// i.e. the validation receives the uninterpreted AST as input.
//
// H_ADRULE, H_VDRULE, H_AVDRULE, and H_VADRULE are the same as the
// equivalent non-D variants, except that they also allow you to uset
// the user_data pointer. In cases where both an attr_bool and an
// action are used, the same userdata pointer is given to both.
#define H_RULE(rule, def) HParser *rule = def
#define H_ARULE(rule, def) HParser *rule = h_action(def, act_ ## rule)
#define H_ARULE(rule, def) HParser *rule = h_action(def, act_ ## rule, NULL)
#define H_VRULE(rule, def) HParser *rule = \
h_attr_bool(def, validate_ ## rule)
h_attr_bool(def, validate_ ## rule, NULL)
#define H_VARULE(rule, def) HParser *rule = \
h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule)
h_attr_bool(h_action(def, act_ ## rule, NULL), validate_ ## rule, NULL)
#define H_AVRULE(rule, def) HParser *rule = \
h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule)
h_action(h_attr_bool(def, validate_ ## rule, NULL), act_ ## rule, NULL)
#define H_ADRULE(rule, def, data) HParser *rule = \
h_action(def, act_ ## rule, data)
#define H_VDRULE(rule, def, data) HParser *rule = \
h_attr_bool(def, validate_ ## rule, data)
#define H_VADRULE(rule, def, data) HParser *rule = \
h_attr_bool(h_action(def, act_ ## rule, data), validate_ ## rule, data)
#define H_AVDRULE(rule, def, data) HParser *rule = \
h_action(h_attr_bool(def, validate_ ## rule, data), act_ ## rule, data)
//
@ -88,18 +99,18 @@
// action such as h_act_index.
//
HParsedToken *h_act_index(int i, const HParseResult *p);
HParsedToken *h_act_first(const HParseResult *p);
HParsedToken *h_act_second(const HParseResult *p);
HParsedToken *h_act_last(const HParseResult *p);
HParsedToken *h_act_flatten(const HParseResult *p);
HParsedToken *h_act_ignore(const HParseResult *p);
HParsedToken *h_act_index(int i, const HParseResult *p, void* user_data);
HParsedToken *h_act_first(const HParseResult *p, void* user_data);
HParsedToken *h_act_second(const HParseResult *p, void* user_data);
HParsedToken *h_act_last(const HParseResult *p, void* user_data);
HParsedToken *h_act_flatten(const HParseResult *p, void* user_data);
HParsedToken *h_act_ignore(const HParseResult *p, void* user_data);
// Define 'myaction' as a specialization of 'paction' by supplying the leading
// parameters.
#define H_ACT_APPLY(myaction, paction, ...) \
HParsedToken *myaction(const HParseResult *p) { \
return paction(__VA_ARGS__, p); \
HParsedToken *myaction(const HParseResult *p, void* user_data) { \
return paction(__VA_ARGS__, p, user_data); \
}

View file

@ -137,14 +137,14 @@ typedef struct HBitWriter_ HBitWriter;
* say, structs) and stuff values for them into the void* in the
* tagged union in HParsedToken.
*/
typedef HParsedToken* (*HAction)(const HParseResult *p);
typedef HParsedToken* (*HAction)(const HParseResult *p, void* user_data);
/**
* Type of a boolean attribute-checking function, used in the
* attr_bool() parser. It can be any (user-defined) function that takes
* a HParseResult* and returns true or false.
*/
typedef bool (*HPredicate)(HParseResult *p);
typedef bool (*HPredicate)(HParseResult *p, void* user_data);
typedef struct HCFChoice_ HCFChoice;
typedef struct HRVMProg_ HRVMProg;
@ -385,7 +385,7 @@ HAMMER_FN_DECL(HParser*, h_middle, const HParser* p, const HParser* x, const HPa
*
* Result token type: any
*/
HAMMER_FN_DECL(HParser*, h_action, const HParser* p, const HAction a);
HAMMER_FN_DECL(HParser*, h_action, const HParser* p, const HAction a, void* user_data);
/**
* Parse a single character in the given charset.
@ -551,7 +551,7 @@ HAMMER_FN_DECL(HParser*, h_length_value, const HParser* length, const HParser* v
*
* Result token type: p's result type if pred succeeded, NULL otherwise.
*/
HAMMER_FN_DECL(HParser*, h_attr_bool, const HParser* p, HPredicate pred);
HAMMER_FN_DECL(HParser*, h_attr_bool, const HParser* p, HPredicate pred, void* user_data);
/**
* The 'and' parser asserts that a conditional syntax is satisfied,
@ -657,11 +657,11 @@ void h_bit_writer_free(HBitWriter* w);
// General-purpose actions for use with h_action
// XXX to be consolidated with glue.h when merged upstream
HParsedToken *h_act_first(const HParseResult *p);
HParsedToken *h_act_second(const HParseResult *p);
HParsedToken *h_act_last(const HParseResult *p);
HParsedToken *h_act_flatten(const HParseResult *p);
HParsedToken *h_act_ignore(const HParseResult *p);
HParsedToken *h_act_first(const HParseResult *p, void* userdata);
HParsedToken *h_act_second(const HParseResult *p, void* userdata);
HParsedToken *h_act_last(const HParseResult *p, void* userdata);
HParsedToken *h_act_flatten(const HParseResult *p, void* userdata);
HParsedToken *h_act_ignore(const HParseResult *p, void* userdata);
// {{{ Benchmark functions
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, HParser* parser, HParserTestcase* testcases);
@ -669,6 +669,17 @@ void h_benchmark_report(FILE* stream, HBenchmarkResults* results);
//void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results);
// }}}
// {{{ Token type registry
/// Allocate a new, unused (as far as this function knows) token type.
int h_allocate_token_type(const char* name);
/// Get the token type associated with name. Returns -1 if name is unkown
int h_get_token_type_number(const char* name);
/// Get the name associated with token_type. Returns NULL if the token type is unkown
const char* h_get_token_type_name(int token_type);
// }}}
#ifdef __cplusplus
}
#endif

View file

@ -15,6 +15,11 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
/*
* NOTE: This is an internal header and installed for use by extensions. The
* API is not guaranteed stable.
*/
#ifndef HAMMER_INTERNAL__H
#define HAMMER_INTERNAL__H
#include <stdint.h>
@ -360,6 +365,7 @@ struct HCFChoice_ {
// to execute before action and pred are applied.
HAction action;
HPredicate pred;
void* user_data;
};
struct HCFSequence_ {

View file

@ -4,6 +4,7 @@
typedef struct {
const HParser *p;
HAction action;
void* user_data;
} HParseAction;
static HParseResult* parse_action(void *env, HParseState *state) {
@ -12,8 +13,8 @@ static HParseResult* parse_action(void *env, HParseState *state) {
HParseResult *tmp = h_do_parse(a->p, state);
//HParsedToken *tok = a->action(h_do_parse(a->p, state));
if(tmp) {
const HParsedToken *tok = a->action(tmp);
return make_result(state->arena, (HParsedToken*)tok);
const HParsedToken *tok = a->action(tmp, a->user_data);
return make_result(state->arena, (HParsedToken*)tok);
} else
return NULL;
} else // either the parser's missing or the action's missing
@ -27,6 +28,7 @@ static void desugar_action(HAllocator *mm__, HCFStack *stk__, void *env) {
HCFS_BEGIN_SEQ() {
HCFS_DESUGAR(a->p);
} HCFS_END_SEQ();
HCFS_THIS_CHOICE->user_data = a->user_data;
HCFS_THIS_CHOICE->action = a->action;
HCFS_THIS_CHOICE->reshape = h_act_first;
} HCFS_END_CHOICE();
@ -44,7 +46,7 @@ static bool action_isValidCF(void *env) {
static bool h_svm_action_action(HArena *arena, HSVMContext *ctx, void* arg) {
HParseResult res;
HAction action = arg;
HParseAction *a = arg;
assert(ctx->stack_count >= 1);
if (ctx->stack[ctx->stack_count-1]->token_type != TT_MARK) {
assert(ctx->stack_count >= 2 && ctx->stack[ctx->stack_count-2]->token_type == TT_MARK);
@ -56,7 +58,7 @@ static bool h_svm_action_action(HArena *arena, HSVMContext *ctx, void* arg) {
}
res.arena = arena;
HParsedToken *tok = action(&res);
HParsedToken *tok = a->action(&res, a->user_data);
if (tok != NULL)
ctx->stack[ctx->stack_count-1] = tok;
else
@ -69,7 +71,7 @@ static bool action_ctrvm(HRVMProg *prog, void* env) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, a->p))
return false;
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_action, a->action));
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_action, a));
return true;
}
@ -81,13 +83,14 @@ static const HParserVtable action_vt = {
.compile_to_rvm = action_ctrvm,
};
HParser* h_action(const HParser* p, const HAction a) {
return h_action__m(&system_allocator, p, a);
HParser* h_action(const HParser* p, const HAction a, void* user_data) {
return h_action__m(&system_allocator, p, a, user_data);
}
HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) {
HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a, void* user_data) {
HParseAction *env = h_new(HParseAction, 1);
env->p = p;
env->action = a;
env->user_data = user_data;
return h_new_parser(mm__, &action_vt, env);
}

View file

@ -4,13 +4,14 @@
typedef struct {
const HParser *p;
HPredicate pred;
void* user_data;
} HAttrBool;
static HParseResult* parse_attr_bool(void *env, HParseState *state) {
HAttrBool *a = (HAttrBool*)env;
HParseResult *res = h_do_parse(a->p, state);
if (res && res->ast) {
if (a->pred(res))
if (a->pred(res, a->user_data))
return res;
else
return NULL;
@ -42,12 +43,13 @@ static void desugar_ab(HAllocator *mm__, HCFStack *stk__, void *env) {
} HCFS_END_SEQ();
HCFS_THIS_CHOICE->pred = a->pred;
HCFS_THIS_CHOICE->reshape = h_act_first;
HCFS_THIS_CHOICE->user_data = a->user_data;
} HCFS_END_CHOICE();
}
static bool h_svm_action_attr_bool(HArena *arena, HSVMContext *ctx, void* arg) {
HParseResult res;
HPredicate pred = arg;
HAttrBool *ab = arg;
assert(ctx->stack_count >= 1);
if (ctx->stack[ctx->stack_count-1]->token_type != TT_MARK) {
assert(ctx->stack_count >= 2 && ctx->stack[ctx->stack_count-2]->token_type == TT_MARK);
@ -59,7 +61,7 @@ static bool h_svm_action_attr_bool(HArena *arena, HSVMContext *ctx, void* arg) {
res.ast = NULL;
}
res.arena = arena;
return pred(&res);
return ab->pred(&res, ab->user_data);
}
static bool ab_ctrvm(HRVMProg *prog, void *env) {
@ -67,7 +69,7 @@ static bool ab_ctrvm(HRVMProg *prog, void *env) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, ab->p))
return false;
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_attr_bool, ab->pred));
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_attr_bool, ab));
return true;
}
@ -80,12 +82,13 @@ static const HParserVtable attr_bool_vt = {
};
HParser* h_attr_bool(const HParser* p, HPredicate pred) {
return h_attr_bool__m(&system_allocator, p, pred);
HParser* h_attr_bool(const HParser* p, HPredicate pred, void* user_data) {
return h_attr_bool__m(&system_allocator, p, pred, user_data);
}
HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) {
HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred, void* user_data) {
HAttrBool *env = h_new(HAttrBool, 1);
env->p = p;
env->pred = pred;
env->user_data = user_data;
return h_new_parser(mm__, &attr_bool_vt, env);
}

View file

@ -17,7 +17,9 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
return make_result(state->arena, result);
}
static HParsedToken *reshape_bits(const HParseResult *p, bool signedp) {
static HParsedToken *reshape_bits(const HParseResult *p, void* signedp_p) {
// signedp == NULL iff unsigned
bool signedp = (signedp_p != NULL);
// XXX works only for whole bytes
// XXX assumes big-endian
assert(p->ast);
@ -45,12 +47,6 @@ static HParsedToken *reshape_bits(const HParseResult *p, bool signedp) {
return ret;
}
static HParsedToken *reshape_bits_unsigned(const HParseResult *p) {
return reshape_bits(p, false);
}
static HParsedToken *reshape_bits_signed(const HParseResult *p) {
return reshape_bits(p, true);
}
static void desugar_bits(HAllocator *mm__, HCFStack *stk__, void *env) {
struct bits_env *bits = (struct bits_env*)env;
@ -67,9 +63,9 @@ static void desugar_bits(HAllocator *mm__, HCFStack *stk__, void *env) {
HCFS_ADD_CHARSET(match_all);
}
} HCFS_END_SEQ();
HCFS_THIS_CHOICE->reshape = bits->signedp
? reshape_bits_signed
: reshape_bits_unsigned;
HCFS_THIS_CHOICE->reshape = reshape_bits;
HCFS_THIS_CHOICE->user_data = bits->signedp ? HCFS_THIS_CHOICE : NULL; // HCFS_THIS_CHOICE is an arbitrary non-null pointer
} HCFS_END_CHOICE();
}

View file

@ -22,7 +22,7 @@ static bool opt_isValidCF(void *env) {
return p->vtable->isValidCF(p->env);
}
static HParsedToken* reshape_optional(const HParseResult *p) {
static HParsedToken* reshape_optional(const HParseResult *p, void* user_data) {
assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE);
@ -52,6 +52,7 @@ static void desugar_optional(HAllocator *mm__, HCFStack *stk__, void *env) {
HCFS_BEGIN_SEQ() {
} HCFS_END_SEQ();
HCFS_THIS_CHOICE->reshape = reshape_optional;
HCFS_THIS_CHOICE->user_data = NULL;
} HCFS_END_CHOICE();
}

View file

@ -1,3 +1,8 @@
/*
* NOTE: This is an internal header and installed for use by extensions. The
* API is not guaranteed stable.
*/
#ifndef HAMMER_PARSE_INTERNAL__H
#define HAMMER_PARSE_INTERNAL__H
#include "../hammer.h"

View file

@ -43,7 +43,7 @@ static bool sequence_isValidCF(void *env) {
return true;
}
static HParsedToken *reshape_sequence(const HParseResult *p) {
static HParsedToken *reshape_sequence(const HParseResult *p, void* user_data) {
assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE);
@ -72,6 +72,7 @@ static void desugar_sequence(HAllocator *mm__, HCFStack *stk__, void *env) {
HCFS_DESUGAR(s->p_array[i]);
} HCFS_END_SEQ();
HCFS_THIS_CHOICE->reshape = reshape_sequence;
HCFS_THIS_CHOICE->user_data = NULL;
} HCFS_END_CHOICE();
}

View file

@ -20,7 +20,7 @@ static HParseResult* parse_token(void *env, HParseState *state) {
}
static HParsedToken *reshape_token(const HParseResult *p) {
static HParsedToken *reshape_token(const HParseResult *p, void* user_data) {
// fetch sequence of uints from p
assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE);
@ -52,6 +52,7 @@ static void desugar_token(HAllocator *mm__, HCFStack *stk__, void *env) {
HCFS_ADD_CHAR(tok->str[i]);
} HCFS_END_SEQ();
HCFS_THIS_CHOICE->reshape = reshape_token;
HCFS_THIS_CHOICE->user_data = NULL;
} HCFS_END_CHOICE();
}

87
src/registry.c Normal file
View file

@ -0,0 +1,87 @@
/* Parser combinators for binary formats.
* Copyright (C) 2012 Meredith L. Patterson, Dan "TQ" Hirsch
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <search.h>
#include <stdlib.h>
#include "hammer.h"
#include "internal.h"
typedef struct Entry_ {
const char* name;
int value;
} Entry;
static void *tt_registry = NULL;
static Entry** tt_by_id = NULL;
static int tt_by_id_sz = 0;
#define TT_START TT_USER
static int tt_next = TT_START;
/*
// TODO: These are for the extension registry, which does not yet have a good name.
static void *ext_registry = NULL;
static Entry** ext_by_id = NULL;
static int ext_by_id_sz = 0;
static int ext_next = 0;
*/
static int compare_entries(const void* v1, const void* v2) {
const Entry *e1 = (Entry*)v1, *e2 = (Entry*)v2;
return strcmp(e1->name, e2->name);
}
int h_allocate_token_type(const char* name) {
Entry* new_entry = malloc(sizeof(*new_entry));
new_entry->name = name;
new_entry->value = -1;
Entry* probe = *(Entry**)tsearch(new_entry, &tt_registry, compare_entries);
if (probe->value != -1) {
// Token type already exists...
// TODO: treat this as a bug?
free(new_entry);
return probe->value;
} else {
// new value
probe->name = strdup(probe->name); // drop ownership of name
probe->value = tt_next++;
if ((probe->value - TT_START) >= tt_by_id_sz) {
if (tt_by_id_sz == 0)
tt_by_id = malloc(sizeof(*tt_by_id) * ((tt_by_id_sz = (tt_next - TT_START) * 16)));
else
tt_by_id = realloc(tt_by_id, sizeof(*tt_by_id) * ((tt_by_id_sz *= 2)));
}
assert(probe->value - TT_START < tt_by_id_sz);
tt_by_id[probe->value - TT_START] = probe;
return probe->value;
}
}
int h_get_token_type_number(const char* name) {
Entry e;
e.name = name;
Entry **ret = (Entry**)tfind(&e, &tt_registry, compare_entries);
if (ret == NULL)
return -1;
else
return (*ret)->value;
}
const char* h_get_token_type_name(int token_type) {
if (token_type >= tt_next || token_type < TT_START)
return NULL;
else
return tt_by_id[token_type - TT_START]->name;
}

View file

@ -1,4 +1,5 @@
#include <glib.h>
#include <string.h>
#include "test_suite.h"
#include "hammer.h"
@ -11,6 +12,24 @@ static void test_tt_user(void) {
g_check_cmp_int32(TT_USER, >, TT_ERR);
}
static void test_tt_registry(void) {
int id = h_allocate_token_type("com.upstandinghackers.test.token_type");
g_check_cmp_int32(id, >=, TT_USER);
int id2 = h_allocate_token_type("com.upstandinghackers.test.token_type_2");
g_check_cmp_int32(id2, !=, id);
g_check_cmp_int32(id2, >=, TT_USER);
g_check_cmp_int32(id, ==, h_get_token_type_number("com.upstandinghackers.test.token_type"));
g_check_cmp_int32(id2, ==, h_get_token_type_number("com.upstandinghackers.test.token_type_2"));
g_check_string("com.upstandinghackers.test.token_type", ==, h_get_token_type_name(id));
g_check_string("com.upstandinghackers.test.token_type_2", ==, h_get_token_type_name(id2));
if (h_get_token_type_name(0) != NULL) {
g_test_message("Unknown token type should not return a name");
g_test_fail();
}
g_check_cmp_int32(h_get_token_type_number("com.upstandinghackers.test.unkown_token_type"), ==, -1);
}
void register_misc_tests(void) {
g_test_add_func("/core/misc/tt_user", test_tt_user);
g_test_add_func("/core/misc/tt_registry", test_tt_registry);
}

View file

@ -162,7 +162,7 @@ static void test_middle(gconstpointer backend) {
#include <ctype.h>
HParsedToken* upcase(const HParseResult *p) {
HParsedToken* upcase(const HParseResult *p, void* user_data) {
switch(p->ast->token_type) {
case TT_SEQUENCE:
{
@ -202,7 +202,8 @@ static void test_action(gconstpointer backend) {
h_ch('B'),
NULL),
NULL),
upcase);
upcase,
NULL);
g_check_parse_match(action_, (HParserBackend)GPOINTER_TO_INT(backend), "ab", 2, "(u0x41 u0x42)");
g_check_parse_match(action_, (HParserBackend)GPOINTER_TO_INT(backend), "AB", 2, "(u0x41 u0x42)");
@ -364,7 +365,7 @@ static void test_epsilon_p(gconstpointer backend) {
g_check_parse_match(epsilon_p_3, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "(u0x61)");
}
bool validate_test_ab(HParseResult *p) {
bool validate_test_ab(HParseResult *p, void* user_data) {
if (TT_SEQUENCE != p->ast->token_type)
return false;
if (TT_UINT != p->ast->seq->elements[0]->token_type)
@ -376,7 +377,8 @@ bool validate_test_ab(HParseResult *p) {
static void test_attr_bool(gconstpointer backend) {
const HParser *ab_ = h_attr_bool(h_many1(h_choice(h_ch('a'), h_ch('b'), NULL)),
validate_test_ab);
validate_test_ab,
NULL);
g_check_parse_match(ab_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)");
g_check_parse_match(ab_, (HParserBackend)GPOINTER_TO_INT(backend), "bb", 2, "(u0x62 u0x62)");
@ -433,7 +435,7 @@ static void test_ambiguous(gconstpointer backend) {
HParser *p_ = h_ch('+');
HParser *E_ = h_indirect();
h_bind_indirect(E_, h_choice(h_sequence(E_, p_, E_, NULL), d_, NULL));
HParser *expr_ = h_action(E_, h_act_flatten);
HParser *expr_ = h_action(E_, h_act_flatten, NULL);
g_check_parse_match(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1, "(u0x64)");
g_check_parse_match(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d+d", 3, "(u0x64 u0x2b u0x64)");

15
tools/scanreplace.py Normal file
View file

@ -0,0 +1,15 @@
from string import Template
def replace_action(target, source, env):
open(str(target[0]), 'w').write(Template(open(str(source[0]), 'r').read()).safe_substitute(env))
return 0
def replace_string(target, source, env):
return "building '%s' from '%s'" % (str(target[0]), str(source[0]))
def generate(env, **kw):
action = env.Action(replace_action, replace_string)
env['BUILDERS']['ScanReplace'] = env.Builder(action=action, src_suffix='.in', single_source=True)
def exists(env):
return 1