Added Python versions of base64 examples
These are transliterations of the existing C files. They're not particularly Pythonic or performant, but they're a start. Example of usage ``` $ echo ' YW55IGNhcm5hbCBwbGVhcw==' | PYTHONPATH=../build/opt/src/bindings/python/ python base64.py inputsize=27 input= YW55IGNhcm5hbCBwbGVhcw== ((((89L, 87L, 53L, 53L), (73L, 71L, 78L, 104L), (99L, 109L, 53L, 104L), (98L, 67L, 66L, 119L), (98L, 71L, 86L, 104L)), (99L, 'w', '=', '=')),) $ echo ' YW55IGNhcm5hbCBwbGVhcw==' | PYTHONPATH=../build/opt/src/bindings/python/ python base64_sem1.py inputsize=27 input= YW55IGNhcm5hbCBwbGVhcw== (97L, 110L, 121L, 32L, 99L, 97L, 114L, 110L, 97L, 108L, 32L, 112L, 108L, 101L, 97L, 115L) $ echo ' YW55IGNhcm5hbCBwbGVhcw==' | PYTHONPATH=../build/opt/src/bindings/python/ python base64_sem2.py inputsize=27 input= YW55IGNhcm5hbCBwbGVhcw== (97L, 110L, 121L, 32L, 99L, 97L, 114L, 110L, 97L, 108L, 32L, 112L, 108L, 101L, 97L, 115L) ```
This commit is contained in:
parent
7017ea6d91
commit
c6280a98bc
3 changed files with 388 additions and 0 deletions
60
examples/base64.py
Normal file
60
examples/base64.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
|
# Example parser: Base64, syntax only.
|
||||||
|
#
|
||||||
|
# Demonstrates how to construct a Hammer parser that recognizes valid Base64
|
||||||
|
# sequences.
|
||||||
|
#
|
||||||
|
# Note that no semantic evaluation of the sequence is performed, i.e. the
|
||||||
|
# byte sequence being represented is not returned, or determined. See
|
||||||
|
# base64_sem1.py and base64_sem2.py for examples how to attach appropriate
|
||||||
|
# semantic actions to the grammar.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import hammer as h
|
||||||
|
|
||||||
|
|
||||||
|
def init_parser():
|
||||||
|
# CORE
|
||||||
|
digit = h.ch_range(0x30, 0x39)
|
||||||
|
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
|
||||||
|
|
||||||
|
# AUX.
|
||||||
|
plus = h.ch('+')
|
||||||
|
slash = h.ch('/')
|
||||||
|
equals = h.ch('=')
|
||||||
|
|
||||||
|
bsfdig = h.choice(alpha, digit, plus, slash)
|
||||||
|
bsfdig_4bit = h.in_('AEIMQUYcgkosw048')
|
||||||
|
bsfdig_2bit = h.in_('AQgw')
|
||||||
|
base64_3 = h.repeat_n(bsfdig, 4)
|
||||||
|
base64_2 = h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals)
|
||||||
|
base64_1 = h.sequence(bsfdig, bsfdig_2bit, equals, equals)
|
||||||
|
base64 = h.sequence(h.many(base64_3),
|
||||||
|
h.optional(h.choice(base64_2, base64_1)))
|
||||||
|
|
||||||
|
return h.sequence(h.whitespace(base64), h.whitespace(h.end_p()))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
document = init_parser()
|
||||||
|
|
||||||
|
s = sys.stdin.read()
|
||||||
|
inputsize = len(s)
|
||||||
|
print('inputsize=%i' % inputsize, file=sys.stderr)
|
||||||
|
print('input=%s' % s, file=sys.stderr, end='')
|
||||||
|
|
||||||
|
result = document.parse(s)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
#print('parsed=%i bytes', result.bit_length/8, file=sys.stderr)
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
|
||||||
|
main()
|
||||||
169
examples/base64_sem1.py
Normal file
169
examples/base64_sem1.py
Normal file
|
|
@ -0,0 +1,169 @@
|
||||||
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
|
# Example parser: Base64, with fine-grained semantic actions
|
||||||
|
#
|
||||||
|
# Demonstrates how to attach semantic actions to grammar rules and piece by
|
||||||
|
# piece transform the parse tree into the desired semantic representation,
|
||||||
|
# in this case a sequence of 8-bit values.
|
||||||
|
#
|
||||||
|
# Those rules using h.action get an attached action, which must be declared
|
||||||
|
# (as a function).
|
||||||
|
#
|
||||||
|
# This variant of the example uses fine-grained semantic actions that
|
||||||
|
# transform the parse tree in small steps in a bottom-up fashion. Compare
|
||||||
|
# base64_sem2.py for an alternative approach using a single top-level action.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import functools
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import hammer as h
|
||||||
|
|
||||||
|
|
||||||
|
# Semantic actions for the grammar below, each corresponds to an "ARULE".
|
||||||
|
# They must be named act_<rulename>.
|
||||||
|
|
||||||
|
def act_bsfdig(p, user_data=None):
|
||||||
|
# FIXME See the note in init_parser()
|
||||||
|
c = p if isinstance(p, (int, long)) else ord(p)
|
||||||
|
|
||||||
|
if 0x41 <= c <= 0x5A: # A-Z
|
||||||
|
return c - 0x41
|
||||||
|
elif 0x61 <= c <= 0x7A: # a-z
|
||||||
|
return c - 0x61 + 26
|
||||||
|
elif 0x30 <= c <= 0x39: # 0-9
|
||||||
|
return c - 0x30 + 52
|
||||||
|
elif c == '+':
|
||||||
|
return 62
|
||||||
|
elif c == '/':
|
||||||
|
return 63
|
||||||
|
else:
|
||||||
|
raise ValueError
|
||||||
|
|
||||||
|
# Hammer's Python bindings don't currently expose h_act_index or hact_ignore
|
||||||
|
|
||||||
|
def act_index0(p, user_data=None):
|
||||||
|
return p[0]
|
||||||
|
|
||||||
|
def act_ignore(p, user_data=None):
|
||||||
|
return None
|
||||||
|
|
||||||
|
act_bsfdig_4bit = act_bsfdig
|
||||||
|
act_bsfdig_2bit = act_bsfdig
|
||||||
|
|
||||||
|
act_equals = act_ignore
|
||||||
|
act_ws = act_ignore
|
||||||
|
|
||||||
|
act_document = act_index0
|
||||||
|
|
||||||
|
|
||||||
|
def act_base64_n(n, p, user_data=None):
|
||||||
|
"""General-form action to turn a block of base64 digits into bytes.
|
||||||
|
"""
|
||||||
|
res = [0]*n
|
||||||
|
|
||||||
|
x = 0
|
||||||
|
bits = 0
|
||||||
|
for i in xrange(0, n+1):
|
||||||
|
x <<= 6
|
||||||
|
x |= p[i] or 0
|
||||||
|
bits += 6
|
||||||
|
|
||||||
|
x >>= bits % 8 # align, i.e. cut off extra bits
|
||||||
|
|
||||||
|
for i in xrange(n):
|
||||||
|
item = x & 0xFF
|
||||||
|
|
||||||
|
res[n-1-i] = item # output the last byte and
|
||||||
|
x >>= 8 # discard it
|
||||||
|
|
||||||
|
return tuple(res)
|
||||||
|
|
||||||
|
|
||||||
|
act_base64_3 = functools.partial(act_base64_n, 3)
|
||||||
|
act_base64_2 = functools.partial(act_base64_n, 2)
|
||||||
|
act_base64_1 = functools.partial(act_base64_n, 1)
|
||||||
|
|
||||||
|
|
||||||
|
def act_base64(p, user_data=None):
|
||||||
|
assert isinstance(p, tuple)
|
||||||
|
assert len(p) == 2
|
||||||
|
assert isinstance(p[0], tuple)
|
||||||
|
|
||||||
|
res = []
|
||||||
|
|
||||||
|
# concatenate base64_3 blocks
|
||||||
|
for elem in p[0]:
|
||||||
|
res.extend(elem)
|
||||||
|
|
||||||
|
# append one trailing base64_2 or _1 block
|
||||||
|
tok = p[1]
|
||||||
|
if isinstance(tok, tuple):
|
||||||
|
res.extend(tok)
|
||||||
|
|
||||||
|
return tuple(res)
|
||||||
|
|
||||||
|
|
||||||
|
def init_parser():
|
||||||
|
"""Return a parser with the grammar to be recognized.
|
||||||
|
"""
|
||||||
|
# CORE
|
||||||
|
|
||||||
|
# This is a direct translation of the C example. In C the literal 0x30
|
||||||
|
# is interchangable with the char literal '0' (note the single quotes).
|
||||||
|
# This is not the case in Python.
|
||||||
|
|
||||||
|
# TODO In the interests of being more Pythonic settle on either string
|
||||||
|
# literals, or integers
|
||||||
|
digit = h.ch_range(0x30, 0x39)
|
||||||
|
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
|
||||||
|
space = h.in_(" \t\n\r\f\v")
|
||||||
|
|
||||||
|
# AUX.
|
||||||
|
plus = h.ch('+')
|
||||||
|
slash = h.ch('/')
|
||||||
|
equals = h.action(h.ch('='), act_equals)
|
||||||
|
|
||||||
|
bsfdig = h.action(h.choice(alpha, digit, plus, slash), act_bsfdig)
|
||||||
|
bsfdig_4bit = h.action(h.in_("AEIMQUYcgkosw048"), act_bsfdig_4bit)
|
||||||
|
bsfdig_2bit = h.action(h.in_("AQgw"), act_bsfdig_2bit)
|
||||||
|
base64_3 = h.action(h.repeat_n(bsfdig, 4), act_base64_3)
|
||||||
|
base64_2 = h.action(h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals),
|
||||||
|
act_base64_2)
|
||||||
|
base64_1 = h.action(h.sequence(bsfdig, bsfdig_2bit, equals, equals),
|
||||||
|
act_base64_1)
|
||||||
|
base64 = h.action(h.sequence(h.many(base64_3),
|
||||||
|
h.optional(h.choice(base64_2,
|
||||||
|
base64_1))),
|
||||||
|
act_base64)
|
||||||
|
|
||||||
|
# TODO This is not quite the same as the C example, with uses act_ignore.
|
||||||
|
# But I can't get hammer to filter any value returned by act_ignore.
|
||||||
|
ws = h.ignore(h.many(space))
|
||||||
|
document = h.action(h.sequence(ws, base64, ws, h.end_p()),
|
||||||
|
act_document)
|
||||||
|
|
||||||
|
# BUG sometimes inputs that should just don't parse.
|
||||||
|
# It *seemed* to happen mostly with things like "bbbbaaaaBA==".
|
||||||
|
# Using less actions seemed to make it less likely.
|
||||||
|
|
||||||
|
return document
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = init_parser()
|
||||||
|
|
||||||
|
s = sys.stdin.read()
|
||||||
|
inputsize = len(s)
|
||||||
|
print('inputsize=%i' % inputsize, file=sys.stderr)
|
||||||
|
print('input=%s' % s, file=sys.stderr, end='')
|
||||||
|
|
||||||
|
result = parser.parse(s)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
#print('parsed=%i bytes', result.bit_length/8, file=sys.stderr)
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
159
examples/base64_sem2.py
Normal file
159
examples/base64_sem2.py
Normal file
|
|
@ -0,0 +1,159 @@
|
||||||
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
|
# Example parser: Base64, with fine-grained semantic actions
|
||||||
|
#
|
||||||
|
# Demonstrates how to attach semantic actions to a grammar and transform the
|
||||||
|
# parse tree into the desired semantic representation, in this case a sequence
|
||||||
|
# of 8-bit values.
|
||||||
|
#
|
||||||
|
# Those rules using h.action get an attached action, which must be declared
|
||||||
|
# (as a function).
|
||||||
|
#
|
||||||
|
# This variant of the example uses coarse-grained semantic actions,
|
||||||
|
# transforming the entire parse tree in one big step. Compare base64_sem1.py
|
||||||
|
# for an alternative approach using a fine-grained piece-by-piece
|
||||||
|
# transformation.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import functools
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import hammer as h
|
||||||
|
|
||||||
|
|
||||||
|
# Semantic actions for the grammar below, each corresponds to an "ARULE".
|
||||||
|
# They must be named act_<rulename>.
|
||||||
|
|
||||||
|
def bsfdig_value(p):
|
||||||
|
"""Return the numeric value of a parsed base64 digit.
|
||||||
|
"""
|
||||||
|
c = p if isinstance(p, (int, long)) else ord(p)
|
||||||
|
if c:
|
||||||
|
if 0x41 <= c <= 0x5A: # A-Z
|
||||||
|
return c - 0x41
|
||||||
|
elif 0x61 <= c <= 0x7A: # a-z
|
||||||
|
return c - 0x61 + 26
|
||||||
|
elif 0x30 <= c <= 0x39: # 0-9
|
||||||
|
return c - 0x30 + 52
|
||||||
|
elif c == '+':
|
||||||
|
return 62
|
||||||
|
elif c == '/':
|
||||||
|
return 63
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def act_base64(p, user_data=None):
|
||||||
|
assert isinstance(p, tuple)
|
||||||
|
assert len(p) == 2
|
||||||
|
assert isinstance(p[0], tuple)
|
||||||
|
|
||||||
|
# grab b64_3 block sequence
|
||||||
|
# grab and analyze b64 end block (_2 or _1)
|
||||||
|
b64_3 = p[0]
|
||||||
|
b64_2 = p[1]
|
||||||
|
b64_1 = p[1]
|
||||||
|
|
||||||
|
if not isinstance(b64_2, tuple):
|
||||||
|
b64_1 = b64_2 = None
|
||||||
|
elif b64_2[2] == '=':
|
||||||
|
b64_2 = None
|
||||||
|
else:
|
||||||
|
b64_1 = None
|
||||||
|
|
||||||
|
# allocate result sequence
|
||||||
|
res = []
|
||||||
|
|
||||||
|
# concatenate base64_3 blocks
|
||||||
|
for digits in b64_3:
|
||||||
|
assert isinstance(digits, tuple)
|
||||||
|
|
||||||
|
x = bsfdig_value(digits[0])
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[1])
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[2])
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[3])
|
||||||
|
res.append((x >> 16) & 0xFF)
|
||||||
|
res.append((x >> 8) & 0xFF)
|
||||||
|
res.append(x & 0xFF)
|
||||||
|
|
||||||
|
# append one trailing base64_2 or _1 block
|
||||||
|
if b64_2:
|
||||||
|
digits = b64_2
|
||||||
|
x = bsfdig_value(digits[0])
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[1])
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[2])
|
||||||
|
res.append((x >> 10) & 0xFF)
|
||||||
|
res.append((x >> 2) & 0xFF)
|
||||||
|
elif b64_1:
|
||||||
|
digits = b64_1
|
||||||
|
x = bsfdig_value(digits[0])
|
||||||
|
x <<= 6; x |= bsfdig_value(digits[1])
|
||||||
|
res.append((x >> 4) & 0xFF)
|
||||||
|
|
||||||
|
return tuple(res)
|
||||||
|
|
||||||
|
# Hammer's Python bindings don't currently expose h_act_index or hact_ignore
|
||||||
|
|
||||||
|
def act_index0(p, user_data=None):
|
||||||
|
return p[0]
|
||||||
|
|
||||||
|
def act_ignore(p, user_data=None):
|
||||||
|
return None
|
||||||
|
|
||||||
|
act_ws = act_ignore
|
||||||
|
act_document = act_index0
|
||||||
|
|
||||||
|
|
||||||
|
def init_parser():
|
||||||
|
"""Set up the parser with the grammar to be recognized.
|
||||||
|
"""
|
||||||
|
# CORE
|
||||||
|
digit = h.ch_range(0x30, 0x39)
|
||||||
|
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
|
||||||
|
space = h.in_(" \t\n\r\f\v")
|
||||||
|
|
||||||
|
# AUX.
|
||||||
|
plus = h.ch('+')
|
||||||
|
slash = h.ch('/')
|
||||||
|
equals = h.ch('=')
|
||||||
|
|
||||||
|
bsfdig = h.choice(alpha, digit, plus, slash)
|
||||||
|
bsfdig_4bit = h.in_("AEIMQUYcgkosw048")
|
||||||
|
bsfdig_2bit = h.in_("AQgw")
|
||||||
|
base64_3 = h.repeat_n(bsfdig, 4)
|
||||||
|
base64_2 = h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals)
|
||||||
|
base64_1 = h.sequence(bsfdig, bsfdig_2bit, equals, equals)
|
||||||
|
base64 = h.action(h.sequence(h.many(base64_3),
|
||||||
|
h.optional(h.choice(base64_2,
|
||||||
|
base64_1))),
|
||||||
|
act_base64)
|
||||||
|
|
||||||
|
# TODO This is not quite the same as the C example, with uses act_ignore.
|
||||||
|
# But I can't get hammer to filter any value returned by act_ignore.
|
||||||
|
ws = h.ignore(h.many(space))
|
||||||
|
document = h.action(h.sequence(ws, base64, ws, h.end_p()),
|
||||||
|
act_document)
|
||||||
|
|
||||||
|
# BUG sometimes inputs that should just don't parse.
|
||||||
|
# It *seemed* to happen mostly with things like "bbbbaaaaBA==".
|
||||||
|
# Using less actions seemed to make it less likely.
|
||||||
|
|
||||||
|
return document
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = init_parser()
|
||||||
|
|
||||||
|
s = sys.stdin.read()
|
||||||
|
inputsize = len(s)
|
||||||
|
print('inputsize=%i' % inputsize, file=sys.stderr)
|
||||||
|
print('input=%s' % s, file=sys.stderr, end='')
|
||||||
|
|
||||||
|
result = parser.parse(s)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
#print('parsed=%i bytes', result.bit_length/8, file=sys.stderr)
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue