These have no effect in Python 3.x, they are the default. Enabling them in Python 2.x, enabling them in Python 2.x allows single source compatiblity.
169 lines
4.7 KiB
Python
169 lines
4.7 KiB
Python
#!/usr/bin/env python2
|
|
|
|
# Example parser: Base64, with fine-grained semantic actions
|
|
#
|
|
# Demonstrates how to attach semantic actions to grammar rules and piece by
|
|
# piece transform the parse tree into the desired semantic representation,
|
|
# in this case a sequence of 8-bit values.
|
|
#
|
|
# Those rules using h.action get an attached action, which must be declared
|
|
# (as a function).
|
|
#
|
|
# This variant of the example uses fine-grained semantic actions that
|
|
# transform the parse tree in small steps in a bottom-up fashion. Compare
|
|
# base64_sem2.py for an alternative approach using a single top-level action.
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
|
|
import functools
|
|
import sys
|
|
|
|
import hammer as h
|
|
|
|
|
|
# Semantic actions for the grammar below, each corresponds to an "ARULE".
|
|
# They must be named act_<rulename>.
|
|
|
|
def act_bsfdig(p, user_data=None):
|
|
# FIXME See the note in init_parser()
|
|
c = p if isinstance(p, (int, long)) else ord(p)
|
|
|
|
if 0x41 <= c <= 0x5A: # A-Z
|
|
return c - 0x41
|
|
elif 0x61 <= c <= 0x7A: # a-z
|
|
return c - 0x61 + 26
|
|
elif 0x30 <= c <= 0x39: # 0-9
|
|
return c - 0x30 + 52
|
|
elif c == '+':
|
|
return 62
|
|
elif c == '/':
|
|
return 63
|
|
else:
|
|
raise ValueError
|
|
|
|
# Hammer's Python bindings don't currently expose h_act_index or hact_ignore
|
|
|
|
def act_index0(p, user_data=None):
|
|
return p[0]
|
|
|
|
def act_ignore(p, user_data=None):
|
|
return None
|
|
|
|
act_bsfdig_4bit = act_bsfdig
|
|
act_bsfdig_2bit = act_bsfdig
|
|
|
|
act_equals = act_ignore
|
|
act_ws = act_ignore
|
|
|
|
act_document = act_index0
|
|
|
|
|
|
def act_base64_n(n, p, user_data=None):
|
|
"""General-form action to turn a block of base64 digits into bytes.
|
|
"""
|
|
res = [0]*n
|
|
|
|
x = 0
|
|
bits = 0
|
|
for i in xrange(0, n+1):
|
|
x <<= 6
|
|
x |= p[i] or 0
|
|
bits += 6
|
|
|
|
x >>= bits % 8 # align, i.e. cut off extra bits
|
|
|
|
for i in xrange(n):
|
|
item = x & 0xFF
|
|
|
|
res[n-1-i] = item # output the last byte and
|
|
x >>= 8 # discard it
|
|
|
|
return tuple(res)
|
|
|
|
|
|
act_base64_3 = functools.partial(act_base64_n, 3)
|
|
act_base64_2 = functools.partial(act_base64_n, 2)
|
|
act_base64_1 = functools.partial(act_base64_n, 1)
|
|
|
|
|
|
def act_base64(p, user_data=None):
|
|
assert isinstance(p, tuple)
|
|
assert len(p) == 2
|
|
assert isinstance(p[0], tuple)
|
|
|
|
res = []
|
|
|
|
# concatenate base64_3 blocks
|
|
for elem in p[0]:
|
|
res.extend(elem)
|
|
|
|
# append one trailing base64_2 or _1 block
|
|
tok = p[1]
|
|
if isinstance(tok, tuple):
|
|
res.extend(tok)
|
|
|
|
return tuple(res)
|
|
|
|
|
|
def init_parser():
|
|
"""Return a parser with the grammar to be recognized.
|
|
"""
|
|
# CORE
|
|
|
|
# This is a direct translation of the C example. In C the literal 0x30
|
|
# is interchangable with the char literal '0' (note the single quotes).
|
|
# This is not the case in Python.
|
|
|
|
# TODO In the interests of being more Pythonic settle on either string
|
|
# literals, or integers
|
|
digit = h.ch_range(0x30, 0x39)
|
|
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
|
|
space = h.in_(" \t\n\r\f\v")
|
|
|
|
# AUX.
|
|
plus = h.ch('+')
|
|
slash = h.ch('/')
|
|
equals = h.action(h.ch('='), act_equals)
|
|
|
|
bsfdig = h.action(h.choice(alpha, digit, plus, slash), act_bsfdig)
|
|
bsfdig_4bit = h.action(h.in_("AEIMQUYcgkosw048"), act_bsfdig_4bit)
|
|
bsfdig_2bit = h.action(h.in_("AQgw"), act_bsfdig_2bit)
|
|
base64_3 = h.action(h.repeat_n(bsfdig, 4), act_base64_3)
|
|
base64_2 = h.action(h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals),
|
|
act_base64_2)
|
|
base64_1 = h.action(h.sequence(bsfdig, bsfdig_2bit, equals, equals),
|
|
act_base64_1)
|
|
base64 = h.action(h.sequence(h.many(base64_3),
|
|
h.optional(h.choice(base64_2,
|
|
base64_1))),
|
|
act_base64)
|
|
|
|
# TODO This is not quite the same as the C example, with uses act_ignore.
|
|
# But I can't get hammer to filter any value returned by act_ignore.
|
|
ws = h.ignore(h.many(space))
|
|
document = h.action(h.sequence(ws, base64, ws, h.end_p()),
|
|
act_document)
|
|
|
|
# BUG sometimes inputs that should just don't parse.
|
|
# It *seemed* to happen mostly with things like "bbbbaaaaBA==".
|
|
# Using less actions seemed to make it less likely.
|
|
|
|
return document
|
|
|
|
def main():
|
|
parser = init_parser()
|
|
|
|
s = sys.stdin.read()
|
|
inputsize = len(s)
|
|
print('inputsize=%i' % inputsize, file=sys.stderr)
|
|
print('input=%s' % s, file=sys.stderr, end='')
|
|
|
|
result = parser.parse(s)
|
|
|
|
if result:
|
|
#print('parsed=%i bytes', result.bit_length/8, file=sys.stderr)
|
|
print(result)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|