Merge pull request #89 from thequux/ruby-bindings

Ruby bindings
This commit is contained in:
Meredith L. Patterson 2014-01-10 21:12:52 -08:00
commit 61f79252a5
19 changed files with 2120 additions and 17 deletions

View file

@ -6,6 +6,30 @@ env:
- BINDINGS=none - BINDINGS=none
matrix: matrix:
include: include:
- compiler: gcc
language: ruby
rvm: ruby-1.9.3-p484
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-1.9.3-p484
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: ruby
rvm: ruby-2.0.0-p353
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-2.0.0-p353
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: ruby
rvm: ruby-2.1.0
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-2.1.0
env: BINDINGS=ruby CC=clang
- compiler: gcc - compiler: gcc
language: python language: python
python: "2.7" python: "2.7"
@ -66,7 +90,6 @@ before_install:
- if [ "$BINDINGS" == "perl" ]; then sudo add-apt-repository ppa:dns/irc -y; sudo apt-get update -qq; sudo apt-get install -qq swig=2.0.8-1irc1~12.04; fi - if [ "$BINDINGS" == "perl" ]; then sudo add-apt-repository ppa:dns/irc -y; sudo apt-get update -qq; sudo apt-get install -qq swig=2.0.8-1irc1~12.04; fi
- if [ "$BINDINGS" == "python" ]; then sudo apt-get install -qq python-dev; fi - if [ "$BINDINGS" == "python" ]; then sudo apt-get install -qq python-dev; fi
- if [ "$BINDINGS" == "dotnet" ]; then sudo add-apt-repository ppa:directhex/monoxide -y; sudo apt-get update -qq; sudo apt-get install -qq mono-devel mono-mcs nunit nunit-console; mozroots --import --sync; fi - if [ "$BINDINGS" == "dotnet" ]; then sudo add-apt-repository ppa:directhex/monoxide -y; sudo apt-get update -qq; sudo apt-get install -qq mono-devel mono-mcs nunit nunit-console; mozroots --import --sync; fi
install: true install: true
before_script: before_script:
- if [ "$BINDINGS" == "php" ]; then phpenv config-add src/bindings/php/hammer.ini; fi - if [ "$BINDINGS" == "php" ]; then phpenv config-add src/bindings/php/hammer.ini; fi

View file

@ -20,7 +20,7 @@ Features
* C++ (not yet implemented) * C++ (not yet implemented)
* Java (not currently building; give us a few days) * Java (not currently building; give us a few days)
* Python * Python
* Ruby (not yet implemented) * Ruby
* Perl * Perl
* [Go](https://github.com/prevoty/hammer) * [Go](https://github.com/prevoty/hammer)
* PHP * PHP
@ -39,6 +39,7 @@ Installing
* python2.7-dev (for Python bindings) * python2.7-dev (for Python bindings)
* a JDK (for Java bindings) * a JDK (for Java bindings)
* a working [phpenv](https://github.com/CHH/phpenv) configuration (for PHP bindings) * a working [phpenv](https://github.com/CHH/phpenv) configuration (for PHP bindings)
* Ruby >= 1.9.3 and bundler, for the Ruby bindings
* mono-devel and mono-mcs (>= 3.0.6) (for .NET bindings) * mono-devel and mono-mcs (>= 3.0.6) (for .NET bindings)
* nunit (for testing .NET bindings) * nunit (for testing .NET bindings)

View file

@ -7,7 +7,7 @@ import sys
vars = Variables(None, ARGUMENTS) vars = Variables(None, ARGUMENTS)
vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate)) vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate))
vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept)) vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept))
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['dotnet', 'perl', 'php', 'python'])) vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['dotnet', 'perl', 'php', 'python', 'ruby']))
env = Environment(ENV = {'PATH' : os.environ['PATH']}, env = Environment(ENV = {'PATH' : os.environ['PATH']},
variables = vars, variables = vars,

View file

@ -19,7 +19,7 @@
token { token {
parser token("95\xa2"); parser token("95\xa2");
test "95\xa2" --> "95\xa2"; test "95\xa2" --> "95\xa2";
test "95\xa2" --> fail; test "95\xa3" --> fail;
} }
ch { ch {
@ -87,7 +87,7 @@ uint8 {
} }
int_range { int_range {
parser int_range(uint8(), 0x3, 0x10); parser int_range(uint8(), 0x3, 0xa);
test <05> --> u0x05; test <05> --> u0x05;
test <0b> --> fail; test <0b> --> fail;
} }
@ -299,17 +299,17 @@ rightrec {
test "aa" --> ['a',['a']]; test "aa" --> ['a',['a']];
test "aaa" --> ['a',['a',['a']]]; test "aaa" --> ['a',['a',['a']]];
} }
## Only for GLR
ambiguous { #ambiguous {
subparser $d = ch('d'); # subparser $d = ch('d');
subparser $p = ch('+'); # subparser $p = ch('+');
subparser $e = choice(sequence($e, $p, $e), $d); # subparser $e = choice(sequence($e, $p, $e), $d);
# TODO: implement action/h_act_flatten # # TODO: implement action/h_act_flatten
parser $e; # parser $e;
#
test "d" --> 'd'; # test "d" --> 'd';
test "d+d" --> ['d','+','d']; # test "d+d" --> ['d','+','d'];
test "d+d+d" --> [['d','+','d'],'+','d']; # test "d+d+d" --> [['d','+','d'],'+','d'];
} #}

259
lib/tsgenruby.pl Normal file
View file

@ -0,0 +1,259 @@
% -*- prolog -*-
% Run with:
% $ swipl -q -t halt -g tsgenruby:prolog tsgenruby.pl >output-file
% Note: this needs to be run from the lib/ directory.
% So, from the ruby directory
% (cd ../../../lib && swipl -q -t halt -g tsgenruby:prolog tsgenruby.pl ) >test/autogen_test.rb
:- module(tsgenruby,
[gen_ts/2]).
:- expects_dialect(swi).
:- use_module(tsparser).
:- use_module(library(record)).
:- record testsuite_state(parser_no:integer = 0, test_no:integer=0).
% TODO: build a Box-like pretty-printer
to_title_case([], []) :- !.
to_title_case([WSep,S0|Ss], [R0|Rs]) :-
memberchk(WSep, "_-"), !,
code_type(R0, to_upper(S0)),
to_title_case(Ss,Rs).
to_title_case([S0|Ss], [S0|Rs]) :-
\+ memberchk(S0, "_-"),
!, to_title_case(Ss,Rs).
format_parser_name(Name, Result) :-
atom_codes(Name, CName),
append("h.", CName, Result), !.
format_test_name(Name, Result) :-
atom_codes(Name, CName),
to_title_case([0x5f|CName], RName),
append("Test", RName, Result), !.
indent(0) --> "", !.
indent(N) -->
{N > 0},
" ",
{Np is N - 1},
indent(Np).
pp_char_guts(0x22) -->
"\\\"", !.
pp_char_guts(0x27) -->
"\\'", !.
pp_char_guts(A) -->
{ A >= 0x20, A < 0x7F } ->
[A];
"\\x",
{ H is A >> 4, L is A /\ 0xF,
code_type(Hc, xdigit(H)),
code_type(Lc, xdigit(L)) },
[Hc,Lc].
pp_hexnum_guts(0) --> !.
pp_hexnum_guts(A) -->
{ L is A /\ 0xF,
H is A >> 4,
code_type(Lc, xdigit(L)) },
pp_hexnum_guts(H),
[Lc], !.
pp_string_guts([]) --> !.
pp_string_guts([X|Xs]) -->
pp_char_guts(X),
pp_string_guts(Xs), !.
pp_parser_args([]) --> !.
pp_parser_args([X|Rest]) -->
pp_parser(X),
pp_parser_args_rest(Rest).
pp_parser_args_rest([]) --> !.
pp_parser_args_rest([X|Xs]) -->
", ",
pp_parser(X),
pp_parser_args_rest(Xs).
pp_parser(parser(Name, Args)) -->
!,
{format_parser_name(Name,Fname)},
Fname,
({Args \= []} ->
"(", pp_parser_args(Args), ")"
; "") .
pp_parser(string(Str)) --> !,
"\"",
pp_string_guts(Str),
"\"", !.
pp_parser(num(0)) --> "0", !.
pp_parser(num(Num)) --> !,
( {Num < 0} ->
"-0x", {RNum is -Num}; "0x", {RNum = Num} ),
pp_hexnum_guts(RNum).
pp_parser(char(C)) --> !,
pp_parser(num(C)), ".chr". % Ruby is encoding-aware; this is a
% more reasonable implementation
pp_parser(ref(Name)) -->
{atom_codes(Name,CName)},
"@sp_", CName, !.
pp_parser(A) -->
{ writef("WTF is a %w?\n", [A]),
!, fail
}.
upd_state_test_elem(parser(_), OldSt, NewSt) :- !,
testsuite_state_parser_no(OldSt, OldRNo),
NewRNo is OldRNo + 1,
set_parser_no_of_testsuite_state(NewRNo, OldSt, NewSt).
upd_state_test_elem(test(_, _), OldSt, NewSt) :- !,
testsuite_state_test_no(OldSt, OldTNo),
NewTNo is OldTNo + 1,
set_test_no_of_testsuite_state(NewTNo, OldSt, NewSt).
upd_state_test_elem(testFail(_), OldSt, NewSt) :- !,
testsuite_state_test_no(OldSt, OldTNo),
NewTNo is OldTNo + 1,
set_test_no_of_testsuite_state(NewTNo, OldSt, NewSt).
upd_state_test_elem(_, St, St).
curparser_name(St) --> !,
{ testsuite_state_parser_no(St, RNo),
format(string(X), "@parser_~w", RNo) },
X.
curtest_name(St) --> !,
{ testsuite_state_test_no(St, RNo),
format(string(X), "test_~w", RNo) },
X.
pp_test_elem(decl, parser(_), _) --> !.
pp_test_elem(init, parser(P), St) -->
!, indent(2),
curparser_name(St), " = ",
pp_parser(P),
"\n".
pp_test_elem(exec, parser(_), _) --> !.
pp_test_elem(decl, subparser(Name,_), _) -->
!, indent(2),
pp_parser(ref(Name)),
" = ",
pp_parser(parser(indirect,[])),
"\n".
pp_test_elem(init, subparser(Name, Parser), _) -->
!, indent(2),
pp_parser(ref(Name)), ".bind ",
pp_parser(Parser),
"\n".
pp_test_elem(exec, subparser(_,_), _) --> !.
pp_test_elem(decl, test(_,_), _) --> !.
pp_test_elem(init, test(_,_), _) --> !.
pp_test_elem(decl, testFail(_), _) --> !.
pp_test_elem(init, testFail(_), _) --> !.
pp_test_elem(exec, test(Str, Result), St) -->
!,
"\n",
indent(1), "def ", curtest_name(St), "\n",
indent(2), "assert_parse_ok ", curparser_name(St), ", ", pp_parser(string(Str)),
", ",
pp_parse_result(Result),
"\n",
indent(1), "end\n".
pp_test_elem(exec, testFail(Str), St) -->
!,
"\n",
indent(1), "def ", curtest_name(St), "\n",
indent(2), "refute_parse_ok ", curparser_name(St), ", ", pp_parser(string(Str)), "\n",
indent(1), "end\n".
% pp_test_elem(_, _) --> !.
pp_result_seq([]) --> !.
pp_result_seq([X|Xs]) --> !,
pp_parse_result(X),
pp_result_seq_r(Xs).
pp_result_seq_r([]) --> !.
pp_result_seq_r([X|Xs]) --> !,
", ",
pp_parse_result(X),
pp_result_seq_r(Xs).
pp_byte_seq([]) --> !.
pp_byte_seq([X|Xs]) --> !,
pp_parser(num(X)),
pp_byte_seq_r(Xs).
pp_byte_seq_r([]) --> !.
pp_byte_seq_r([X|Xs]) --> !,
", ",
pp_parser(num(X)),
pp_byte_seq_r(Xs).
pp_parse_result(char(C)) --> !,
%"(System.UInt64)",
pp_parser(char(C)).
pp_parse_result(seq(Args)) --> !,
"[", pp_result_seq(Args), "]".
pp_parse_result(none) --> !,
"nil".
pp_parse_result(uint(V)) --> !,
pp_parser(num(V)).
pp_parse_result(sint(V)) --> !,
pp_parser(num(V)).
pp_parse_result(string(A)) --> !,
pp_parser(string(A)).
%pp_parse_result(A) -->
% "\x1b[1;31m",
% {with_output_to(codes(C), write(A))},
% C,
% "\x1b[0m".
pp_test_elems(Phase, Elems) -->
{ default_testsuite_state(State) },
pp_test_elems(Phase, Elems, State).
pp_test_elems(_, [], _) --> !.
pp_test_elems(Phase, [X|Xs], St) -->
!,
{ upd_state_test_elem(X, St, NewSt) },
%{NewSt = St},
pp_test_elem(Phase,X, NewSt),
pp_test_elems(Phase,Xs, NewSt).
pp_test_case(testcase(Name, Elems)) -->
!,
{ format_test_name(Name, TName) },
indent(0), "class ", TName, " < Minitest::Test\n",
indent(1), "def setup\n",
indent(2), "super\n",
indent(2), "h = Hammer::Parser\n",
pp_test_elems(decl, Elems),
pp_test_elems(init, Elems),
indent(1), "end\n",
pp_test_elems(exec, Elems),
indent(0), "end\n\n".
pp_test_cases([]) --> !.
pp_test_cases([A|As]) -->
pp_test_case(A),
pp_test_cases(As).
pp_test_suite(Suite) -->
"require 'bundler/setup'\n",
"require 'minitest/autorun'\n",
"require 'hammer'\n",
pp_test_cases(Suite).
gen_ts(Foo,Str) :-
phrase(pp_test_suite(Foo),Str).
prolog :-
read_tc(A),
gen_ts(A, Res),
writef("%s", [Res]).

2
src/bindings/ruby/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/Gemfile.lock
.bundle

View file

@ -0,0 +1,8 @@
source 'https://rubygems.org'
gemspec
group :test do
gem 'minitest', '~> 5.2'
gem 'rake', '>10'
end

View file

@ -0,0 +1,76 @@
# hammer-parser
Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsing library.
## Notes
* I called the gem `hammer-parser`, since there already is a [gem named `hammer`](https://rubygems.org/gems/hammer).
## Development
1. `cd src/bindings/ruby`.
2. Run `bundle install` to install dependencies.
3. Run `bundle console` to open `irb` with hammer loaded.
4. To run tests, just run `bundle exec rake test`.
## Installation
TODO
## Examples
### Building a parser
```ruby
parser = Hammer::Parser.build {
token 'Hello '
choice {
token 'Mom'
token 'Dad'
}
token '!'
}
```
Also possible:
```ruby
parser = Hammer::ParserBuilder.new
.token('Hello ')
.choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad'))
.token('!')
.build
```
More like hammer in C:
```ruby
h = Hammer::Parser
parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!'))
```
### Parsing
```ruby
result = parser.parse 'Hello Mom!'
=> #<HParseResult>
result = parser.parse 'Hello Someone!'
=> nil
```
The `parse` method returns an `HParseResult` object, which needs to be
kept around until you're entirely done with the parse tree, which can
be accessed with `result.ast`.
While the AST can be accessed using the same interface as the C
HParsedToken type, we recommend using `result.ast.unmarshal` instead.
This converts the entire parse tree into a standalone Ruby-native
datastructure which will likely be much easier to work with.

View file

@ -0,0 +1,8 @@
require 'rake/testtask'
Rake::TestTask.new do |t|
#t.pattern = "test/*_test.rb"
t.test_files = FileList['test/*_test.rb']
end
task :default => [:test]

View file

@ -0,0 +1,33 @@
# -*- python -*-
import os.path
Import("env libhammer_shared testruns targets")
rubysources = [
Glob("test/*.rb"),
Glob("lib/hammer.rb"),
Glob("lib/*/*.rb"),
"hammer-parser.gemspec",
"Rakefile",
"Gemfile",
"README.md",
]
rubyenv = env.Clone()
for k,v in os.environ.items():
if "RUBY" in k or "GEM" in k or "rvm" in k:
rubyenv['ENV'][k] = v
rubyenv['ENV']['LD_LIBRARY_PATH'] = os.path.dirname(str(libhammer_shared[0]))
rubyenv['RBDIR'] = os.path.dirname(str(rubyenv.File("Gemfile").path))
setup = rubyenv.Command(Dir(".bundle"), rubysources, "cd $RBDIR && bundle install")
AlwaysBuild(setup)
rubytestexec = rubyenv.Command(None, [setup] + rubysources, "cd $RBDIR && bundle exec rake test")
rubytest = Alias("testruby", [rubytestexec], rubytestexec)
AlwaysBuild(rubytestexec)
testruns.append(rubytest)
# No need for an install target; everybody just uses gems for that.

View file

@ -0,0 +1,31 @@
#encoding: UTF-8
Gem::Specification.new do |s|
s.name = 'hammer-parser'
s.version = '0.1.0'
s.summary = 'Ruby bindings to the hammer parsing library.'
s.description = s.summary # TODO: longer description?
s.authors = ['Meredith L. Patterson', 'TQ Hirsch', 'Jakob Rath']
# TODO:
# s.email = ...
# s.homepage = ...
files = []
files << 'README.md'
files << [
"lib/hammer/internal.rb",
"lib/hammer/parser.rb",
"lib/hammer/parser_builder.rb",
"lib/hammer.rb",
"lib/minitest/hamer-parser_plugin.rb",
"test/autogen_test.rb",
"test/parser_test.rb"
]
s.files = files
s.test_files = s.files.select { |path| path =~ /^test\/.*_test.rb/ }
s.require_paths = %w[lib]
s.add_dependency 'ffi', '~> 1.9'
s.add_dependency 'docile', '~> 1.1' # TODO: Find a way to make this optional
end

View file

@ -0,0 +1,52 @@
require 'hammer/internal'
require 'hammer/parser'
require 'hammer/parser_builder'
# TODO:
# Probably need to rename this file to 'hammer-parser.rb', so
# people can use "require 'hammer-parser'" in their code.
# Leave this in for now to be able to play around with HParseResult in irb.
x = nil
parser = Hammer::Parser.build {
token 'abc'
x = indirect
end_p
}
x.bind(Hammer::Parser.token('abd'))
#$p = parser
$r = parser.parse 'abcabd'
#p $r[:ast][:data][:seq].elements.map {|e| e[:data][:bytes].token }
h = Hammer::Parser
parser =
h.many(
h.action(h.uint8) { |r|
#p "TT=#{r[:ast][:token_type]}, value=#{r[:ast][:data][:uint]}"
r.data * 2
})
#parser = Hammer::Parser.build {
# many {
# uint8
# action { |r|
# p r
# r[:ast]
# }
# }
#}
$r = parser.parse 'abcdefgh'
#p $r[:ast][:data][:seq].elements.map {|e| e[:data][:uint]}
# or:
#p $r.ast.data.map(&:data)
h = Hammer::Parser
parser = h.many(h.attr_bool(h.uint8) { |r| r.data <= 100 })
#p parser.parse('abcdefgh').ast.data.map(&:data)

View file

@ -0,0 +1,346 @@
require 'ffi'
module Hammer
module Internal
extend FFI::Library
ffi_lib 'hammer'
class DynamicVariable
SYMBOL_PREFIX = "Hammer::Internal::DynamicVariable gensym "
@@current_symbol = 0
def initialize(default=nil, name=nil, &block)
# This can take either a default value or a block. If a
# default value is given, all threads' dynvars are initialized
# to that object. If a block is given, the block is lazilly
# called on each thread to generate the initial value. If
# both a block and a default value are passed, the block is
# called with the literal value.
@default = default
@block = block || Proc.new{|x| x}
@@current_symbol += 1
@sym = (SYMBOL_PREFIX + @@current_symbol.to_s).to_sym
end
def value
if Thread.current.key? @sym
return Thread.current[@sym]
else
return Thread.current[@sym] = @block.call(@default)
end
end
def value=(new_value)
Thread.current[@sym] = new_value
end
def with(new_value, &block)
old_value = value
begin
self.value = new_value
return block.call
ensure
self.value = old_value
end
end
end
# Maybe we can implement Hammer::Parser with FFI::DataConverter.
# That way, most hammer functions won't need to be wrapped.
# (Probably need to wrap token, sequence and choice only).
# See http://www.elabs.se/blog/61-advanced-topics-in-ruby-ffi
typedef :pointer, :h_parser
class HTokenType
extend FFI::DataConverter
@@known_type_map = {
:none => 1,
:bytes => 2,
:sint => 4,
:uint => 8,
:sequence => 16,
}
@@inverse_type_map = @@known_type_map.invert
@@from_hpt = {
:none => Proc.new { nil },
:bytes => Proc.new {|hpt| hpt[:data][:bytes].token},
:sint => Proc.new {|hpt| hpt[:data][:sint]},
:uint => Proc.new {|hpt| hpt[:data][:uint]},
:sequence => Proc.new {|hpt| hpt[:data][:seq].map {|x| x.unmarshal}},
}
def self.new(name, &block)
if name.is_a?(Symbol)
name_sym = name
name_str = name.to_s
else
name_str = name.to_s
name_sym = name.to_sym
end
num = Hammer::Internal.h_allocate_token_type(name_str)
@@known_type_map[name_sym] = num
@@inverse_type_map[num] = name_sym
@@from_hpt[name_sym] = block
end
def self.from_name(name)
unless @@known_type_map.key? name
num = Hammer::Internal.h_get_token_type_number(name.to_s)
if num <= 0
raise ArgumentError, "Unknown token type #{name}"
end
@@known_type_map[name] = num
@@inverse_type_map[num] = name
end
return @@known_type_map[name]
end
def self.from_num(num)
unless @@inverse_type_map.key? num
name = Hammer::Internal.h_get_token_type_name(num)
if name.nil?
return nil
end
name = name.to_sym
@@known_type_map[name] = num
@@inverse_type_map[num] = name
end
return @@inverse_type_map[num]
end
def self.native_type
FFI::Type::INT
end
def self.to_native(val, ctx)
return val if val.is_a?(Integer)
return from_name(val)
end
def self.from_native(val, ctx)
return from_num(val) || val
end
end
# Define these as soon as possible, so that they can be used
# without fear elsewhere
attach_function :h_allocate_token_type, [:string], :int
attach_function :h_get_token_type_number, [:string], :int
attach_function :h_get_token_type_name, [:int], :string
class HCountedArray < FFI::Struct
layout :capacity, :size_t,
:used, :size_t,
:arena, :pointer,
:elements, :pointer # HParsedToken**
def length
self[:used]
end
def elements
elem_array = FFI::Pointer.new(:pointer, self[:elements])
return (0...self[:used]).map { |i| HParsedToken.new(elem_array[i].read_pointer) }
end
#def [](idx)
# raise ArgumentError, "Index out of range" unless idx >= 0 and idx < length
# elem_array = FFI::Pointer.new(:pointer, self[:elements])
# return HParsedToken.new(elem_array[i].read_pointer)
#end
def map(&code)
elements.map {|x| code.call x}
end
def each(&code)
elements.each {|x| code.call x}
end
end
class HBytes < FFI::Struct
layout :token, :pointer, # uint8_t*
:len, :size_t
def token
# TODO: Encoding?
# Should be the same encoding as the string the token was created with.
# But how do we get to this knowledge at this point?
# Cheap solution: Just ask the user (additional parameter with default value of UTF-8).
self[:token].read_string(self[:len])
end
# TODO: Probably should rename this to match ruby conventions: length, count, size
def len
self[:len]
end
end
class HString < FFI::Struct
layout :content, HBytes.by_ref,
:encoding, :uint64
def token
return self[:content].token.force_encoding(
ObjectSpace._id2ref(self[:encoding]))
end
end
HTokenType.new(:"com.upstandinghackers.hammer.ruby.encodedStr") {|hpt|
hpt.user(HString).token
}
HTokenType.new(:"com.upstandinghackers.hammer.ruby.object") {|hpt|
ObjectSpace._id2ref(hpt[:data][:uint])
}
class HParsedTokenDataUnion < FFI::Union
layout :bytes, HBytes.by_value,
:sint, :int64,
:uint, :uint64,
:dbl, :double,
:flt, :float,
:seq, HCountedArray.by_ref,
:user, :pointer
end
class HParsedToken < FFI::Struct
layout :token_type, HTokenType,
:data, HParsedTokenDataUnion.by_value,
:index, :size_t,
:bit_offset, :char
def normalize
# If I'm null, return nil.
return nil if null?
return self
end
def token_type
self[:token_type]
end
# TODO: Is this name ok?
def data
return self[:data][:bytes].token if token_type == :bytes
return self[:data][:sint] if token_type == :sint
return self[:data][:uint] if token_type == :uint
return self[:data][:seq].elements if token_type == :sequence
return self[:data][:user] if token_type == :user
end
def bytes
raise ArgumentError, 'wrong token type' unless token_type == :bytes
self[:data][:bytes]
end
def seq
raise ArgumentError, 'wrong token type' unless token_type == :sequence
self[:data][:seq]
end
def index
self[:index]
end
def bit_offset
self[:bit_offset]
end
def user(struct)
struct.by_ref.from_native(self[:data][:user], nil)
end
def unmarshal
Hammer::Internal::HTokenType.class_variable_get(:@@from_hpt)[token_type].call self
end
end
class HParseResult < FFI::Struct
layout :ast, HParsedToken.by_ref,
:bit_length, :long_long,
:arena, :pointer
def ast
self[:ast].normalize
end
def bit_length
self[:bit_length]
end
def self.release(ptr)
Hammer::Internal.h_parse_result_free(ptr) unless ptr.null?
end
def arena_alloc(type)
Hammer::Internal.arena_alloc(self[:arena], type)
end
end
def self.arena_alloc(arena, type)
ptr = h_arena_malloc(arena, type.size)
return type.by_ref.from_native(ptr, nil)
end
# run a parser
attach_function :h_parse, [:h_parser, :pointer, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string?
# build a parser
attach_function :h_token, [:buffer_in, :size_t], :h_parser
attach_function :h_ch, [:uint8], :h_parser
attach_function :h_ch_range, [:uint8, :uint8], :h_parser
attach_function :h_int_range, [:h_parser, :int64, :int64], :h_parser
attach_function :h_bits, [:size_t, :bool], :h_parser
attach_function :h_int64, [], :h_parser
attach_function :h_int32, [], :h_parser
attach_function :h_int16, [], :h_parser
attach_function :h_int8, [], :h_parser
attach_function :h_uint64, [], :h_parser
attach_function :h_uint32, [], :h_parser
attach_function :h_uint16, [], :h_parser
attach_function :h_uint8, [], :h_parser
attach_function :h_whitespace, [:h_parser], :h_parser
attach_function :h_left, [:h_parser, :h_parser], :h_parser
attach_function :h_right, [:h_parser, :h_parser], :h_parser
attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser
attach_function :h_in, [:pointer, :size_t], :h_parser
attach_function :h_not_in, [:pointer, :size_t], :h_parser
attach_function :h_end_p, [], :h_parser
attach_function :h_nothing_p, [], :h_parser
attach_function :h_sequence, [:varargs], :h_parser
attach_function :h_choice, [:varargs], :h_parser
attach_function :h_butnot, [:h_parser, :h_parser], :h_parser
attach_function :h_difference, [:h_parser, :h_parser], :h_parser
attach_function :h_xor, [:h_parser, :h_parser], :h_parser
attach_function :h_many, [:h_parser], :h_parser
attach_function :h_many1, [:h_parser], :h_parser
attach_function :h_repeat_n, [:h_parser, :size_t], :h_parser
attach_function :h_optional, [:h_parser], :h_parser
attach_function :h_ignore, [:h_parser], :h_parser
attach_function :h_sepBy, [:h_parser, :h_parser], :h_parser
attach_function :h_sepBy1, [:h_parser, :h_parser], :h_parser
attach_function :h_epsilon_p, [], :h_parser
attach_function :h_length_value, [:h_parser, :h_parser], :h_parser
attach_function :h_and, [:h_parser], :h_parser
attach_function :h_not, [:h_parser], :h_parser
attach_function :h_indirect, [], :h_parser
attach_function :h_bind_indirect, [:h_parser, :h_parser], :void
callback :HAction, [HParseResult.by_ref], HParsedToken.by_ref
attach_function :h_action, [:h_parser, :HAction], :h_parser
callback :HPredicate, [HParseResult.by_ref], :bool
attach_function :h_attr_bool, [:h_parser, :HPredicate], :h_parser
# free the parse result
attach_function :h_parse_result_free, [HParseResult.by_ref], :void
# TODO: Does the HParser* need to be freed?
# Add the arena
attach_function :h_arena_malloc, [:pointer, :size_t], :pointer
end
end

View file

@ -0,0 +1,222 @@
require 'hammer/internal'
module Hammer
class Parser
@@saved_objects = Hammer::Internal::DynamicVariable.new nil, "Hammer parse-time pins"
# Don't create new instances with Hammer::Parser.new,
# use the constructor methods instead (i.e. Hammer::Parser.int64 etc.)
#
# name: Name of the parser. Should be a symbol.
# h_parser: The pointer to the parser as returned by hammer.
# dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector (at least as long this object lives).
def initialize(name, h_parser, dont_gc=[])
@name = name
@h_parser = h_parser
# Always store as array, so we can easily add stuff later on
dont_gc = [dont_gc] unless dont_gc.is_a? Array
@dont_gc = dont_gc.dup
end
attr_reader :name
attr_reader :h_parser
# Parse the given data. Returns the parse result if successful, nil otherwise.
#
# data: A string containing the data to parse.
def parse(data)
raise RuntimeError, '@h_parser is nil' if @h_parser.nil?
raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that.
ibuf = FFI::MemoryPointer.from_string(data)
@@saved_objects.with([]) do
result = Hammer::Internal.h_parse(@h_parser, ibuf, data.bytesize) # Don't include the trailing null
if result.null?
return nil
else
# NOTE:
# The parse result *must* hold a reference to the parser that created it!
# Otherwise, the parser might get garbage-collected while the result is still valid.
# Any pointers to token strings will then be invalid.
result.instance_variable_set :@parser, self
result.instance_variable_set :@pins, @@saved_objects.value
return result
end
end
end
# Binds an indirect parser.
def bind(other_parser)
raise RuntimeError, 'can only bind indirect parsers' unless self.name == :indirect
Hammer::Internal.h_bind_indirect(self.h_parser, other_parser.h_parser)
@dont_gc << other_parser
end
# Can pass the action either as a Proc in second parameter, or as block.
def self.action(parser, action=nil, &block)
action = block if action.nil?
raise ArgumentError, 'no action' if action.nil?
real_action = Proc.new {|hpr|
ret = action.call(hpr.ast)
# Pin the result
@@saved_objects.value << ret
hpt = hpr.arena_alloc(Hammer::Internal::HParsedToken)
unless hpr.ast.nil?
hpt[:index] = hpr[:ast][:index]
hpt[:bit_offset] = hpr[:ast][:bit_offset]
end
hpt[:token_type] = :"com.upstandinghackers.hammer.ruby.object"
hpt[:data][:uint] = ret.object_id
hpt
}
h_parser = Hammer::Internal.h_action(parser.h_parser, real_action)
return Hammer::Parser.new(:action, h_parser, [parser, action, real_action])
end
# Can pass the predicate either as a Proc in second parameter, or as block.
def self.attr_bool(parser, predicate=nil, &block)
predicate = block if predicate.nil?
raise ArgumentError, 'no predicate' if predicate.nil?
real_pred = Proc.new {|hpr| predicate.call hpr.ast}
h_parser = Hammer::Internal.h_attr_bool(parser.h_parser, real_pred)
return Hammer::Parser.new(:attr_bool, h_parser, [parser, predicate, real_pred])
end
def self.token(string)
# Need to copy string to a memory buffer (not just string.dup)
# * Original string might be modified, this must not affect existing tokens
# * We need a constant memory address (Ruby string might be moved around by the Ruby VM)
buffer = FFI::MemoryPointer.from_string(string)
h_parser = Hammer::Internal.h_token(buffer, buffer.size-1) # buffer.size includes the null byte at the end
encoding = string.encoding
wrapping_action = Proc.new {|hpr|
hstr = hpr.arena_alloc(Hammer::Internal::HString)
hstr[:content] = hpr[:ast][:data][:bytes]
hstr[:encoding] = encoding.object_id
hpt = hpr.arena_alloc(Hammer::Internal::HParsedToken)
hpt[:token_type] = :"com.upstandinghackers.hammer.ruby.encodedStr"
hpt[:data][:user] = hstr.to_ptr
hpt[:bit_offset] = hpr[:ast][:bit_offset]
hpt[:index] = hpr[:ast][:index]
hpt
}
wrapped_parser = Hammer::Internal.h_action(h_parser, wrapping_action)
return Hammer::Parser.new(:token, wrapped_parser, [buffer, string, encoding, wrapping_action, h_parser])
end
def self.marshal_ch_arg(num)
if num.is_a?(String)
raise ArgumentError, "Expecting either a fixnum in 0..255 or a single-byte String" unless num.bytesize == 1
num = num.bytes.first
end
raise ArgumentError, 'Expecting a Fixnum in 0..255 or a single-byte String' unless num.is_a?(Fixnum) and num.between?(0, 255)
return num
end
private_class_method :marshal_ch_arg
def self.ch_parser_wrapper(parser)
return Hammer::Parser.action(parser) {|x| x.data.chr}
end
def self.ch(ch)
num = marshal_ch_arg(ch)
h_parser = Hammer::Internal.h_ch(num)
return ch_parser_wrapper(Hammer::Parser.new(:ch, h_parser, nil))
end
def self.ch_range(ch1, ch2)
ch1 = marshal_ch_arg(ch1)
ch2 = marshal_ch_arg(ch2)
h_parser = Hammer::Internal.h_ch_range(ch1, ch2)
return ch_parser_wrapper(Hammer::Parser.new(:ch_range, h_parser, nil))
end
def self.int_range(parser, i1, i2)
h_parser = Hammer::Internal.h_int_range(parser.h_parser, i1, i2)
return Hammer::Parser.new(:int_range, h_parser, [parser])
end
def self.in(charset)
raise ArgumentError, "Expected a String" unless charset.is_a?(String)
ibuf = FFI::MemoryPointer.from_string(charset)
h_parser = Hammer::Internal.h_in(ibuf, charset.bytesize)
return ch_parser_wrapper(Hammer::Parser.new(:in, h_parser, nil))
end
def self.repeat_n(parser, count)
h_parser = Hammer::Internal.h_repeat_n(parser.h_parser, count)
return Hammer::Parser.new(:repeat_n, h_parser, nil)
end
def self.not_in(charset)
raise ArgumentError, "Expected a String" unless charset.is_a?(String)
ibuf = FFI::MemoryPointer.from_string(charset)
h_parser = Hammer::Internal.h_not_in(ibuf, charset.bytesize)
return ch_parser_wrapper(Hammer::Parser.new(:not_in, h_parser, nil))
end
# Defines a parser constructor with the given name.
# Options:
# hammer_function: name of the hammer function to call (default: 'h_'+name)
# varargs: Whether the function is taking a variable number of arguments (default: false)
def self.define_parser(name, options = {})
hammer_function = options[:hammer_function] || ('h_' + name.to_s).to_sym
varargs = options[:varargs] || false
# Define a new class method
define_singleton_method name do |*parsers|
if varargs
args = parsers.flat_map { |p| [:pointer, p.h_parser] }
args += [:pointer, nil]
else
args = parsers.map(&:h_parser)
end
h_parser = Hammer::Internal.send hammer_function, *args
return Hammer::Parser.new(name, h_parser, parsers)
end
end
private_class_method :define_parser
define_parser :sequence, varargs: true
define_parser :choice, varargs: true
define_parser :int64
define_parser :int32
define_parser :int16
define_parser :int8
define_parser :uint64
define_parser :uint32
define_parser :uint16
define_parser :uint8
define_parser :whitespace
define_parser :left
define_parser :right
define_parser :middle
define_parser :end_p
define_parser :nothing_p
define_parser :butnot
define_parser :difference
define_parser :xor
define_parser :many
define_parser :many1
define_parser :optional
define_parser :ignore
define_parser :sepBy
define_parser :sepBy1
define_parser :epsilon_p
define_parser :length_value
define_parser :and
define_parser :not
define_parser :indirect
end
end

View file

@ -0,0 +1,124 @@
# TODO: Find a way to make docile an optional dependency
# (autoload for this file? and throw some informative error when docile isn't available.
# should also check gem version with a 'gem' call and appropriate version specifier.)
require 'docile'
module Hammer
class Parser
def self.build(&block)
ParserBuilder.new.sequence(&block).build
end
def self.build_choice(&block)
ParserBuilder.new.choice(&block).build
end
end # class Parser
class ParserBuilder
attr_reader :parsers
def initialize
@parsers = []
end
def build
if @parsers.length > 1
Hammer::Parser.sequence(*@parsers)
else
@parsers.first
end
end
# can call it either as ParserBuiler.new.sequence(parser1, parser2, parser3)
# or as Parser.build { sequence { call parser1; call parser2; call parser3 } }
def sequence(*parsers, &block)
@parsers += parsers
@parsers << Docile.dsl_eval(ParserBuilder.new, &block).build if block_given?
return self
end
def choice(*parsers, &block)
if block_given?
parsers += Docile.dsl_eval(ParserBuilder.new, &block).parsers
end
@parsers << Hammer::Parser.choice(*parsers)
return self
end
def call(parser)
@parsers << parser
return self
end
# modifies previous parser
def action(&block)
parser = @parsers.last
raise RuntimeError, 'need a parser before action' if parser.nil?
@parsers << Hammer::Parser.action(parser, &block)
return self
end
# Defines a parser constructor with the given name.
def self.define_parser(name, options = {})
define_method name do |*args|
# TODO: This is wrong!! Needs to accept a block for nested parsers!
@parsers << Hammer::Parser.send(name, *args)
return self
end
end
private_class_method :define_parser
define_parser :token
define_parser :ch
define_parser :int64
define_parser :int32
define_parser :int16
define_parser :int8
define_parser :uint64
define_parser :uint32
define_parser :uint16
define_parser :uint8
define_parser :whitespace
define_parser :left
define_parser :right
define_parser :middle
define_parser :end_p
define_parser :nothing_p
define_parser :butnot
define_parser :difference
define_parser :xor
define_parser :many
define_parser :many1
define_parser :optional
define_parser :ignore
define_parser :sepBy
define_parser :sepBy1
define_parser :epsilon_p
define_parser :length_value
define_parser :and
define_parser :not
# At least indirect must return the parser instead of the builder, so it can be stored in a variable.
# Other possible solution:
# Make indirect take a name parameter, and use the name to bind it later.
# Example:
# p = Hammer::Parser.build { indirect(:the_name) }
# p.bind(:the_name, inner_parser)
# (store names and parsers in hash in the builder,
# when building merge hashes from sub builders and store everything in the resulting sequence or choice.
# make Parser#bind take and optional symbol. if it is given, the name is looked up in the table.)
# TODO:
# Think about this more.
# Do we need to be able to build parsers by chaining function calls? DSL should be sufficient.
# If yes, the parser methods in this class should not return "self", but the Hammer::Parser object they create.
def indirect
parser = Hammer::Parser.indirect
@parsers << parser
return parser
end
end # class ParserBuilder
end # module Hammer

View file

@ -0,0 +1,31 @@
module Minitest
module Assertions
HAMMER_JUST_PARSE = Object.new
def assert_parse_ok(parser, probe, expected=HAMMER_JUST_PARSE)
refute_nil parser, "Parser must not be nil (this is a problem with your test)"
parse_result = parser.parse(probe)
refute_nil parse_result, "Parse failed"
if HAMMER_JUST_PARSE != expected
if parse_result.ast == nil
assert_nil expected, "Parser returned nil AST; expected #{expected}"
else
assert_equal parse_result.ast.unmarshal, expected
end
end
end
def refute_parse_ok(parser, probe)
refute_nil parser, "Parser must not be nil (this is a problem with your test)"
parse_result = parser.parse(probe)
if not parse_result.nil?
assert_nil parse_result, "Parse succeeded unexpectedly with " + parse_result.ast.inspect
end
end
end
#def self.plugin_hammer-parser_init(options)
end

View file

@ -0,0 +1,755 @@
require 'bundler/setup'
require 'minitest/autorun'
require 'hammer'
class TestToken < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.token("95\xa2")
end
def test_1
assert_parse_ok @parser_1, "95\xa2", "95\xa2"
end
def test_2
refute_parse_ok @parser_1, "95\xa3"
end
end
class TestCh < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.ch(0xa2)
end
def test_1
assert_parse_ok @parser_1, "\xa2", 0xa2.chr
end
def test_2
refute_parse_ok @parser_1, "\xa3"
end
end
class TestChRange < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.ch_range(0x61, 0x63)
end
def test_1
assert_parse_ok @parser_1, "b", 0x62.chr
end
def test_2
refute_parse_ok @parser_1, "d"
end
end
class TestInt64 < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.int64
end
def test_1
assert_parse_ok @parser_1, "\xff\xff\xff\xfe\x00\x00\x00\x00", -0x200000000
end
def test_2
refute_parse_ok @parser_1, "\xff\xff\xff\xfe\x00\x00\x00"
end
end
class TestInt32 < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.int32
end
def test_1
assert_parse_ok @parser_1, "\xff\xfe\x00\x00", -0x20000
end
def test_2
refute_parse_ok @parser_1, "\xff\xfe\x00"
end
def test_3
assert_parse_ok @parser_1, "\x00\x02\x00\x00", 0x20000
end
def test_4
refute_parse_ok @parser_1, "\x00\x02\x00"
end
end
class TestInt16 < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.int16
end
def test_1
assert_parse_ok @parser_1, "\xfe\x00", -0x200
end
def test_2
refute_parse_ok @parser_1, "\xfe"
end
def test_3
assert_parse_ok @parser_1, "\x02\x00", 0x200
end
def test_4
refute_parse_ok @parser_1, "\x02"
end
end
class TestInt8 < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.int8
end
def test_1
assert_parse_ok @parser_1, "\x88", -0x78
end
def test_2
refute_parse_ok @parser_1, ""
end
end
class TestUint64 < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.uint64
end
def test_1
assert_parse_ok @parser_1, "\x00\x00\x00\x02\x00\x00\x00\x00", 0x200000000
end
def test_2
refute_parse_ok @parser_1, "\x00\x00\x00\x02\x00\x00\x00"
end
end
class TestUint32 < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.uint32
end
def test_1
assert_parse_ok @parser_1, "\x00\x02\x00\x00", 0x20000
end
def test_2
refute_parse_ok @parser_1, "\x00\x02\x00"
end
end
class TestUint16 < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.uint16
end
def test_1
assert_parse_ok @parser_1, "\x02\x00", 0x200
end
def test_2
refute_parse_ok @parser_1, "\x02"
end
end
class TestUint8 < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.uint8
end
def test_1
assert_parse_ok @parser_1, "x", 0x78
end
def test_2
refute_parse_ok @parser_1, ""
end
end
class TestIntRange < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.int_range(h.uint8, 0x3, 0xa)
end
def test_1
assert_parse_ok @parser_1, "\x05", 0x5
end
def test_2
refute_parse_ok @parser_1, "\x0b"
end
end
class TestWhitespace < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.whitespace(h.ch(0x61))
@parser_2 = h.whitespace(h.end_p)
end
def test_1
assert_parse_ok @parser_1, "a", 0x61.chr
end
def test_2
assert_parse_ok @parser_1, " a", 0x61.chr
end
def test_3
assert_parse_ok @parser_1, " a", 0x61.chr
end
def test_4
assert_parse_ok @parser_1, "\x09a", 0x61.chr
end
def test_5
refute_parse_ok @parser_1, "_a"
end
def test_6
assert_parse_ok @parser_2, "", nil
end
def test_7
assert_parse_ok @parser_2, " ", nil
end
def test_8
refute_parse_ok @parser_2, " x"
end
end
class TestLeft < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.left(h.ch(0x61), h.ch(0x20))
end
def test_1
assert_parse_ok @parser_1, "a ", 0x61.chr
end
def test_2
refute_parse_ok @parser_1, "a"
end
def test_3
refute_parse_ok @parser_1, " "
end
def test_4
refute_parse_ok @parser_1, "ba"
end
end
class TestMiddle < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.middle(h.ch(0x20.chr), h.ch(0x61.chr), h.ch(0x20.chr))
end
def test_1
assert_parse_ok @parser_1, " a ", 0x61.chr
end
def test_2
refute_parse_ok @parser_1, "a"
end
def test_3
refute_parse_ok @parser_1, " a"
end
def test_4
refute_parse_ok @parser_1, "a "
end
def test_5
refute_parse_ok @parser_1, " b "
end
def test_6
refute_parse_ok @parser_1, "ba "
end
def test_7
refute_parse_ok @parser_1, " ab"
end
end
class TestIn < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.in("abc")
end
def test_1
assert_parse_ok @parser_1, "b", 0x62.chr
end
def test_2
refute_parse_ok @parser_1, "d"
end
end
class TestNotIn < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.not_in("abc")
end
def test_1
assert_parse_ok @parser_1, "d", 0x64.chr
end
def test_2
refute_parse_ok @parser_1, "a"
end
end
class TestEndP < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.sequence(h.ch(0x61.chr), h.end_p)
end
def test_1
assert_parse_ok @parser_1, "a", [0x61.chr]
end
def test_2
refute_parse_ok @parser_1, "aa"
end
end
class TestNothingP < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.nothing_p
end
def test_1
refute_parse_ok @parser_1, "a"
end
end
class TestSequence < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.sequence(h.ch(0x61.chr), h.ch(0x62.chr))
@parser_2 = h.sequence(h.ch(0x61.chr), h.whitespace(h.ch(0x62.chr)))
end
def test_1
assert_parse_ok @parser_1, "ab", [0x61.chr, 0x62.chr]
end
def test_2
refute_parse_ok @parser_1, "a"
end
def test_3
refute_parse_ok @parser_1, "b"
end
def test_4
assert_parse_ok @parser_2, "ab", [0x61.chr, 0x62.chr]
end
def test_5
assert_parse_ok @parser_2, "a b", [0x61.chr, 0x62.chr]
end
def test_6
assert_parse_ok @parser_2, "a b", [0x61.chr, 0x62.chr]
end
end
class TestChoice < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.choice(h.ch(0x61.chr), h.ch(0x62.chr))
end
def test_1
assert_parse_ok @parser_1, "a", 0x61.chr
end
def test_2
assert_parse_ok @parser_1, "b", 0x62.chr
end
def test_3
assert_parse_ok @parser_1, "ab", 0x61.chr
end
def test_4
refute_parse_ok @parser_1, "c"
end
end
class TestButnot < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.butnot(h.ch(0x61.chr), h.token("ab"))
@parser_2 = h.butnot(h.ch_range(0x30.chr, 0x39.chr), h.ch(0x36.chr))
end
def test_1
assert_parse_ok @parser_1, "a", 0x61.chr
end
def test_2
refute_parse_ok @parser_1, "ab"
end
def test_3
assert_parse_ok @parser_1, "aa", 0x61.chr
end
def test_4
assert_parse_ok @parser_2, "5", 0x35.chr
end
def test_5
refute_parse_ok @parser_2, "6"
end
end
class TestDifference < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.difference(h.token("ab"), h.ch(0x61.chr))
end
def test_1
assert_parse_ok @parser_1, "ab", "ab"
end
def test_2
refute_parse_ok @parser_1, "a"
end
end
class TestXor < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.xor(h.ch_range(0x30.chr, 0x36.chr), h.ch_range(0x35.chr, 0x39.chr))
end
def test_1
assert_parse_ok @parser_1, "0", 0x30.chr
end
def test_2
assert_parse_ok @parser_1, "9", 0x39.chr
end
def test_3
refute_parse_ok @parser_1, "5"
end
def test_4
refute_parse_ok @parser_1, "a"
end
end
class TestMany < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.many(h.choice(h.ch(0x61.chr), h.ch(0x62.chr)))
end
def test_1
assert_parse_ok @parser_1, "", []
end
def test_2
assert_parse_ok @parser_1, "a", [0x61.chr]
end
def test_3
assert_parse_ok @parser_1, "b", [0x62.chr]
end
def test_4
assert_parse_ok @parser_1, "aabbaba", [0x61.chr, 0x61.chr, 0x62.chr, 0x62.chr, 0x61.chr, 0x62.chr, 0x61.chr]
end
end
class TestMany1 < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.many1(h.choice(h.ch(0x61.chr), h.ch(0x62.chr)))
end
def test_1
refute_parse_ok @parser_1, ""
end
def test_2
assert_parse_ok @parser_1, "a", [0x61.chr]
end
def test_3
assert_parse_ok @parser_1, "b", [0x62.chr]
end
def test_4
assert_parse_ok @parser_1, "aabbaba", [0x61.chr, 0x61.chr, 0x62.chr, 0x62.chr, 0x61.chr, 0x62.chr, 0x61.chr]
end
def test_5
refute_parse_ok @parser_1, "daabbabadef"
end
end
class TestRepeatN < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.repeat_n(h.choice(h.ch(0x61.chr), h.ch(0x62.chr)), 0x2)
end
def test_1
refute_parse_ok @parser_1, "adef"
end
def test_2
assert_parse_ok @parser_1, "abdef", [0x61.chr, 0x62.chr]
end
def test_3
refute_parse_ok @parser_1, "dabdef"
end
end
class TestOptional < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.sequence(h.ch(0x61.chr), h.optional(h.choice(h.ch(0x62.chr), h.ch(0x63.chr))), h.ch(0x64.chr))
end
def test_1
assert_parse_ok @parser_1, "abd", [0x61.chr, 0x62.chr, 0x64.chr]
end
def test_2
assert_parse_ok @parser_1, "acd", [0x61.chr, 0x63.chr, 0x64.chr]
end
def test_3
assert_parse_ok @parser_1, "ad", [0x61.chr, nil, 0x64.chr]
end
def test_4
refute_parse_ok @parser_1, "aed"
end
def test_5
refute_parse_ok @parser_1, "ab"
end
def test_6
refute_parse_ok @parser_1, "ac"
end
end
class TestIgnore < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.sequence(h.ch(0x61.chr), h.ignore(h.ch(0x62.chr)), h.ch(0x63.chr))
end
def test_1
assert_parse_ok @parser_1, "abc", [0x61.chr, 0x63.chr]
end
def test_2
refute_parse_ok @parser_1, "ac"
end
end
class TestSepBy < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.sepBy(h.choice(h.ch(0x31.chr), h.ch(0x32.chr), h.ch(0x33.chr)), h.ch(0x2c.chr))
end
def test_1
assert_parse_ok @parser_1, "1,2,3", [0x31.chr, 0x32.chr, 0x33.chr]
end
def test_2
assert_parse_ok @parser_1, "1,3,2", [0x31.chr, 0x33.chr, 0x32.chr]
end
def test_3
assert_parse_ok @parser_1, "1,3", [0x31.chr, 0x33.chr]
end
def test_4
assert_parse_ok @parser_1, "3", [0x33.chr]
end
def test_5
assert_parse_ok @parser_1, "", []
end
end
class TestSepBy1 < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.sepBy1(h.choice(h.ch(0x31.chr), h.ch(0x32.chr), h.ch(0x33.chr)), h.ch(0x2c.chr))
end
def test_1
assert_parse_ok @parser_1, "1,2,3", [0x31.chr, 0x32.chr, 0x33.chr]
end
def test_2
assert_parse_ok @parser_1, "1,3,2", [0x31.chr, 0x33.chr, 0x32.chr]
end
def test_3
assert_parse_ok @parser_1, "1,3", [0x31.chr, 0x33.chr]
end
def test_4
assert_parse_ok @parser_1, "3", [0x33.chr]
end
def test_5
refute_parse_ok @parser_1, ""
end
end
class TestAnd < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.sequence(h.and(h.ch(0x30.chr)), h.ch(0x30.chr))
@parser_2 = h.sequence(h.and(h.ch(0x30.chr)), h.ch(0x31.chr))
@parser_3 = h.sequence(h.ch(0x31.chr), h.and(h.ch(0x32.chr)))
end
def test_1
assert_parse_ok @parser_1, "0", [0x30.chr]
end
def test_2
refute_parse_ok @parser_1, "1"
end
def test_3
refute_parse_ok @parser_2, "0"
end
def test_4
refute_parse_ok @parser_2, "1"
end
def test_5
assert_parse_ok @parser_3, "12", [0x31.chr]
end
def test_6
refute_parse_ok @parser_3, "13"
end
end
class TestNot < Minitest::Test
def setup
super
h = Hammer::Parser
@parser_1 = h.sequence(h.ch(0x61.chr), h.choice(h.token("+"), h.token("++")), h.ch(0x62.chr))
@parser_2 = h.sequence(h.ch(0x61.chr), h.choice(h.sequence(h.token("+"), h.not(h.ch(0x2b.chr))), h.token("++")), h.ch(0x62.chr))
end
def test_1
assert_parse_ok @parser_1, "a+b", [0x61.chr, "+", 0x62.chr]
end
def test_2
refute_parse_ok @parser_1, "a++b"
end
def test_3
assert_parse_ok @parser_2, "a+b", [0x61.chr, ["+"], 0x62.chr]
end
def test_4
assert_parse_ok @parser_2, "a++b", [0x61.chr, "++", 0x62.chr]
end
end
class TestRightrec < Minitest::Test
def setup
super
h = Hammer::Parser
@sp_rr = h.indirect
@sp_rr.bind h.choice(h.sequence(h.ch(0x61.chr), @sp_rr), h.epsilon_p)
@parser_1 = @sp_rr
end
def test_1
assert_parse_ok @parser_1, "a", [0x61.chr]
end
def test_2
assert_parse_ok @parser_1, "aa", [0x61.chr, [0x61.chr]]
end
def test_3
assert_parse_ok @parser_1, "aaa", [0x61.chr, [0x61.chr, [0x61.chr]]]
end
end

View file

@ -0,0 +1,132 @@
# -*- coding: utf-8 -*-
require 'bundler/setup'
require 'hammer'
require 'minitest/autorun'
class ParserTest < Minitest::Test
def test_builder_1
parser = Hammer::Parser.build {
token 'blah'
ch 'a'.ord
choice {
sequence {
token 'abc'
}
token 'def'
}
}
refute_nil parser
refute_nil parser.parse('blahaabcd')
refute_nil parser.parse('blahadefd')
assert_nil parser.parse('blahablad')
assert_nil parser.parse('blaha')
assert_nil parser.parse('blah')
end
def test_builder_2
parser = Hammer::ParserBuilder.new
.token('Hello ')
.choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad'))
.token('!')
.build
refute_nil parser
refute_nil parser.parse('Hello Mom!')
end
def test_builder_3
h = Hammer::Parser
parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!'))
refute_nil parser
refute_nil parser.parse('Hello Mom!')
end
def test_string_copied
s = 'blah'
parser = Hammer::Parser.token(s)
refute_equal s, 'BLAH'
assert_nil parser.parse('BLAH')
# parser still shouldn't match, even if we modify the string in-place
s.upcase!
assert_equal s, 'BLAH'
assert_nil parser.parse('BLAH')
end
def test_indirect
x = nil
parser = Hammer::Parser.build {
token 'abc'
x = indirect
end_p
}
x.bind(Hammer::Parser.token('abd'))
assert_nil parser.parse('abcabdabd')
refute_nil parser.parse('abcabd')
assert_nil parser.parse('abdabd')
assert_nil parser.parse('abc')
end
def test_multibyte_token
parser = Hammer::Parser.build {
token '今日'
token 'a'
end_p
}
refute_nil parser.parse('今日a')
end
def test_token_encoding(encoding='UTF-8')
string = '今日'.encode(encoding)
parser = Hammer::Parser.token(string)
assert_equal string, parser.parse(string).ast.unmarshal
end
def test_token_encoding_2
test_token_encoding('EUC-JP')
end
end
class AttrBoolTest < Minitest::Test
def setup
h = Hammer::Parser
@parser = h.attr_bool(h.many1(h.choice(h.ch('a'), h.ch('b')))) {|x|
data = x.unmarshal
data.length > 1 && data[0] == data[1]
}
end
def test_1
assert_parse_ok @parser, "aa", ['a','a']
end
def test_2
assert_parse_ok @parser, "bb", ['b','b']
end
def test_3
refute_parse_ok @parser, "ab"
end
end
class ActionTest < Minitest::Test
def setup
h = Hammer::Parser
@parser = h.action(h.sequence(h.choice(h.ch('a'), h.ch('A')),
h.choice(h.ch('b'), h.ch('B')))) {|x|
x.unmarshal.join(",")}
end
def test_1
assert_parse_ok @parser, "ab", "a,b"
end
def test_2
assert_parse_ok @parser, "AB", "A,B"
end
def test_3
refute_parse_ok @parser, "XX"
end
end