From 6368214fcedd65b189e7e8af9438fe5111777a7f Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Sun, 10 Nov 2013 15:13:15 +0100 Subject: [PATCH 01/35] Add first version of ruby bindings. --- src/bindings/ruby/.gitignore | 3 + src/bindings/ruby/Gemfile | 9 ++ src/bindings/ruby/README.md | 72 +++++++++ src/bindings/ruby/Rakefile | 8 + src/bindings/ruby/ext/hammer_ext/extconf.rb | 9 ++ src/bindings/ruby/ext/hammer_ext/hammer_ext.c | 6 + src/bindings/ruby/ext/hammer_ext/hammer_ext.h | 6 + src/bindings/ruby/ext/hammer_ext/token_type.c | 20 +++ src/bindings/ruby/ext/hammer_ext/token_type.h | 6 + src/bindings/ruby/hammer-parser.gemspec | 23 +++ src/bindings/ruby/lib/hammer.rb | 65 ++++++++ src/bindings/ruby/lib/hammer/internal.rb | 60 +++++++ src/bindings/ruby/lib/hammer/parser.rb | 146 ++++++++++++++++++ .../ruby/lib/hammer/parser_builder.rb | 75 +++++++++ 14 files changed, 508 insertions(+) create mode 100644 src/bindings/ruby/.gitignore create mode 100644 src/bindings/ruby/Gemfile create mode 100644 src/bindings/ruby/README.md create mode 100644 src/bindings/ruby/Rakefile create mode 100644 src/bindings/ruby/ext/hammer_ext/extconf.rb create mode 100644 src/bindings/ruby/ext/hammer_ext/hammer_ext.c create mode 100644 src/bindings/ruby/ext/hammer_ext/hammer_ext.h create mode 100644 src/bindings/ruby/ext/hammer_ext/token_type.c create mode 100644 src/bindings/ruby/ext/hammer_ext/token_type.h create mode 100644 src/bindings/ruby/hammer-parser.gemspec create mode 100644 src/bindings/ruby/lib/hammer.rb create mode 100644 src/bindings/ruby/lib/hammer/internal.rb create mode 100644 src/bindings/ruby/lib/hammer/parser.rb create mode 100644 src/bindings/ruby/lib/hammer/parser_builder.rb diff --git a/src/bindings/ruby/.gitignore b/src/bindings/ruby/.gitignore new file mode 100644 index 0000000..0f57d62 --- /dev/null +++ b/src/bindings/ruby/.gitignore @@ -0,0 +1,3 @@ +/tmp/ +/lib/hammer/hammer_ext.bundle +/Gemfile.lock diff --git a/src/bindings/ruby/Gemfile b/src/bindings/ruby/Gemfile new file mode 100644 index 0000000..6f6178d --- /dev/null +++ b/src/bindings/ruby/Gemfile @@ -0,0 +1,9 @@ +source 'https://rubygems.org' + +gemspec + +gem 'rake' + +group :test do + # ... +end diff --git a/src/bindings/ruby/README.md b/src/bindings/ruby/README.md new file mode 100644 index 0000000..5ed26ae --- /dev/null +++ b/src/bindings/ruby/README.md @@ -0,0 +1,72 @@ +# hammer-parser + +Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsing library. + + +## Notes + +* I called the gem `hammer-parser`, since there already is a [gem named `hammer`](https://rubygems.org/gems/hammer). + +* C extension not really needed at the moment, if we don't mind hardcoding the token types in the ruby code. + + +## Development + +1. `cd src/bindings/ruby`. + +2. Run `bundle install` to install dependencies. + +3. Run `rake compile` to compile the C extension. + +4. Run `irb -I ./lib -r hammer` to open `irb` with hammer loaded. + + +## Installation + +TODO + + + +## Examples + +### Building a parser + +```ruby +parser = Hammer::Parser.build { + token 'Hello ' + choice { + token 'Mom' + token 'Dad' + } + token '!' +} +``` + +Also possible: + +```ruby +parser = Hammer::ParserBuilder.new + .token('Hello ') + .choice(Hammer::Parser::Token.new('Mom'), Hammer::Parser::Token.new('Dad')) + .token('!') + .build +``` + +More like hammer in C: + +```ruby +h = Hammer::Parser +parser = h.sequence(h.token('Hello'), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) +``` + +### Parsing + +```ruby +parser.parse 'Hello Mom!' +=> true +parser.parse 'Hello Someone!' +=> false +``` + +Currently you only get `true` or `false` depending on whether the parse succeeded or failed. +There's no way to access the parsed data yet. diff --git a/src/bindings/ruby/Rakefile b/src/bindings/ruby/Rakefile new file mode 100644 index 0000000..646654d --- /dev/null +++ b/src/bindings/ruby/Rakefile @@ -0,0 +1,8 @@ +require 'rake/extensiontask' + +#spec = Gem::Specification.load('hammer-parser-ruby.gemspec') +#Rake::ExtensionTask.new('hammer_ext', spec) + +Rake::ExtensionTask.new 'hammer_ext' do |ext| + ext.lib_dir = 'lib/hammer' +end diff --git a/src/bindings/ruby/ext/hammer_ext/extconf.rb b/src/bindings/ruby/ext/hammer_ext/extconf.rb new file mode 100644 index 0000000..d5158a7 --- /dev/null +++ b/src/bindings/ruby/ext/hammer_ext/extconf.rb @@ -0,0 +1,9 @@ +require 'mkmf' + +extension_name = 'hammer_ext' +dir_config extension_name + +abort 'ERROR: missing hammer library' unless have_library 'hammer' +abort 'ERROR: missing hammer.h' unless have_header 'hammer.h' + +create_makefile extension_name diff --git a/src/bindings/ruby/ext/hammer_ext/hammer_ext.c b/src/bindings/ruby/ext/hammer_ext/hammer_ext.c new file mode 100644 index 0000000..6b461c6 --- /dev/null +++ b/src/bindings/ruby/ext/hammer_ext/hammer_ext.c @@ -0,0 +1,6 @@ +#include "token_type.h" + +void Init_hammer_ext(void) +{ + Init_token_type(); +} diff --git a/src/bindings/ruby/ext/hammer_ext/hammer_ext.h b/src/bindings/ruby/ext/hammer_ext/hammer_ext.h new file mode 100644 index 0000000..98fc2da --- /dev/null +++ b/src/bindings/ruby/ext/hammer_ext/hammer_ext.h @@ -0,0 +1,6 @@ +#ifndef HAMMER_EXT__H +#define HAMMER_EXT__H + +// ... + +#endif diff --git a/src/bindings/ruby/ext/hammer_ext/token_type.c b/src/bindings/ruby/ext/hammer_ext/token_type.c new file mode 100644 index 0000000..a154d7e --- /dev/null +++ b/src/bindings/ruby/ext/hammer_ext/token_type.c @@ -0,0 +1,20 @@ +#include +#include + +#include "token_type.h" + +#define DefineHammerInternalConst(name) rb_define_const(mHammerInternal, #name, INT2FIX(name)); + +void Init_token_type(void) +{ + VALUE mHammer = rb_define_module("Hammer"); + VALUE mHammerInternal = rb_define_module_under(mHammer, "Internal"); + + DefineHammerInternalConst(TT_NONE); + DefineHammerInternalConst(TT_BYTES); + DefineHammerInternalConst(TT_SINT); + DefineHammerInternalConst(TT_UINT); + DefineHammerInternalConst(TT_SEQUENCE); + DefineHammerInternalConst(TT_ERR); + DefineHammerInternalConst(TT_USER); +} diff --git a/src/bindings/ruby/ext/hammer_ext/token_type.h b/src/bindings/ruby/ext/hammer_ext/token_type.h new file mode 100644 index 0000000..5652ce6 --- /dev/null +++ b/src/bindings/ruby/ext/hammer_ext/token_type.h @@ -0,0 +1,6 @@ +#ifndef HAMMER_EXT_TOKEN_TYPE__H +#define HAMMER_EXT_TOKEN_TYPE__H + +void Init_token_type(void); + +#endif diff --git a/src/bindings/ruby/hammer-parser.gemspec b/src/bindings/ruby/hammer-parser.gemspec new file mode 100644 index 0000000..80b7529 --- /dev/null +++ b/src/bindings/ruby/hammer-parser.gemspec @@ -0,0 +1,23 @@ +#encoding: UTF-8 +Gem::Specification.new do |s| + s.name = 'hammer-parser' + s.version = '0.1.0' + s.summary = 'Ruby bindings to the hammer parsing library.' + s.description = s.summary # TODO: longer description? + s.authors = ['Meredith L. Patterson', 'TQ Hirsch', 'Jakob Rath'] + # TODO: + # s.email = ... + # s.homepage = ... + + files = [] + files << 'README.md' + files << Dir['{lib,test}/**/*.rb'] + s.files = files + s.test_files = s.files.select { |path| path =~ /^test\/.*_test.rb/ } + + s.require_paths = %w[lib] + + s.add_dependency 'ffi', '~> 1.9' + s.add_dependency 'docile', '~> 1.1' # TODO: Find a way to make this optional +end + diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb new file mode 100644 index 0000000..2699d96 --- /dev/null +++ b/src/bindings/ruby/lib/hammer.rb @@ -0,0 +1,65 @@ +require 'hammer/hammer_ext' +require 'hammer/internal' +require 'hammer/parser' +require 'hammer/parser_builder' + +# TODO: +# Probably need to rename this file to 'hammer-parser.rb', so +# people can use "require 'hammer-parser'" in their code. + + + +# TODO: Put tests in test/ directory. + +parser = Hammer::Parser.build do + token 'blah' + ch 'a' + choice { + sequence { + token 'abc' + } + token 'def' + } +end + +p parser + +if parser + p parser.parse 'blahaabcd' + p parser.parse 'blahadefd' + p parser.parse 'blahablad' + p parser.parse 'blaha' + p parser.parse 'blah' +end + +parser = Hammer::Parser::Sequence.new( + Hammer::Parser::Token.new('Hello '), + Hammer::Parser::Choice.new( + Hammer::Parser::Token.new('Mom'), + Hammer::Parser::Token.new('Dad') + ), + Hammer::Parser::Token.new('!') +) +p parser.parse 'Hello Mom!' + +parser = Hammer::Parser.build { + token 'Hello ' + choice { + token 'Mom' + token 'Dad' + } + token '!' +} +p parser.parse 'Hello Mom!' + +parser = Hammer::ParserBuilder.new + .token('Hello ') + .choice(Hammer::Parser::Token.new('Mom'), Hammer::Parser::Token.new('Dad')) + .token('!') + .build +p parser.parse 'Hello Mom!' + +# not yet working +#h = Hammer::Parser +#parser = h.sequence(h.token('Hello'), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) +#p parser.parse 'Hello Mom!' diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb new file mode 100644 index 0000000..0083ebd --- /dev/null +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -0,0 +1,60 @@ +require 'ffi' + +module Hammer + module Internal + extend FFI::Library + + ffi_lib 'libhammer.dylib' + + # run a parser + attach_function :h_parse, [:pointer, :string, :size_t], :pointer + + # build a parser + attach_function :h_token, [:string, :size_t], :pointer + attach_function :h_ch, [:uint8], :pointer + attach_function :h_ch_range, [:uint8, :uint8], :pointer + attach_function :h_int_range, [:int64, :int64], :pointer + attach_function :h_bits, [:size_t, :bool], :pointer + attach_function :h_int64, [], :pointer + attach_function :h_int32, [], :pointer + attach_function :h_int16, [], :pointer + attach_function :h_int8, [], :pointer + attach_function :h_uint64, [], :pointer + attach_function :h_uint32, [], :pointer + attach_function :h_uint16, [], :pointer + attach_function :h_uint8, [], :pointer + attach_function :h_whitespace, [:pointer], :pointer + attach_function :h_left, [:pointer, :pointer], :pointer + attach_function :h_right, [:pointer, :pointer], :pointer + attach_function :h_middle, [:pointer, :pointer, :pointer], :pointer + # h_action + # h_in + # h_not_in + attach_function :h_end_p, [], :pointer + attach_function :h_nothing_p, [], :pointer + attach_function :h_sequence, [:varargs], :pointer + attach_function :h_choice, [:varargs], :pointer + attach_function :h_butnot, [:pointer, :pointer], :pointer + attach_function :h_difference, [:pointer, :pointer], :pointer + attach_function :h_xor, [:pointer, :pointer], :pointer + attach_function :h_many, [:pointer], :pointer + attach_function :h_many1, [:pointer], :pointer + # h_repeat_n + # h_optional + # h_ignore + # h_sepBy + # h_sepBy1 + # h_epsilon_p + # h_length_value + # h_attr_bool + # h_and + # h_not + # h_indirect + # h_bind_indirect + + # free the parse result + # h_parse_result_free + + # TODO: Does the HParser* need to be freed? + end +end diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb new file mode 100644 index 0000000..a7b75e2 --- /dev/null +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -0,0 +1,146 @@ +module Hammer + class Parser + + # Don't create new instances with Hammer::Parser.new, + # use the constructor methods instead (i.e. Hammer::Parser.int64 etc.) + def initialize + end + + def parse(data) + raise RuntimeError, '@h_parser is nil' if @h_parser.nil? + raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. + result = Hammer::Internal.h_parse(@h_parser, data, data.length); + # TODO: Do something with the data + !result.null? + end + + class Token < Parser + def initialize(string) + @h_parser = Hammer::Internal.h_token(string, string.length) + end + end + + class Ch < Parser + def initialize(char) + # TODO: Really? Should probably accept Fixnum in appropriate range + # Also, char.ord gives unexptected results if you pass e.g. Japanese characters: '今'.ord == 20170; Hammer::Parser::Ch.new('今').parse(202.chr) == true + # Not really unexpected though, since 20170 & 255 == 202. + # But probably it's better to use Ch for Fixnum in 0..255 only, and only Token for strings. + raise ArgumentError, 'expecting a one-character String' unless char.is_a?(String) && char.length == 1 + @h_parser = Hammer::Internal.h_ch(char.ord) + end + end + + class Sequence < Parser + def initialize(*parsers) + #args = [] + #parsers.each { |p| args += [:pointer, p.h_parser] } + args = parsers.flat_map { |p| [:pointer, p.h_parser] } + @h_parser = Hammer::Internal.h_sequence(*args, :pointer, nil) + @sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) + # TODO: Use (managed?) FFI struct instead of void pointers + end + end + + class Choice < Parser + def initialize(*parsers) + #args = [] + #parsers.each { |p| args += [:pointer, p.h_parser] } + args = parsers.flat_map { |p| [:pointer, p.h_parser] } + @h_parser = Hammer::Internal.h_choice(*args, :pointer, nil) + @sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) + # TODO: Use (managed?) FFI struct instead of void pointers + end + end + + # Define parsers that take some number of other parsers + # TODO: Maybe use -1 for variable number, and use this for Sequence and Choice too + # TODO: Refactor this code as a method? And call it like: define_parser :Int64, :h_int64, 0 + [ + [:Int64, :h_int64, 0], + [:Int32, :h_int32, 0], + [:Int16, :h_int16, 0], + [:Int8, :h_int8, 0], + [:UInt64, :h_uint64, 0], + [:UInt32, :h_uint32, 0], + [:UInt16, :h_uint16, 0], + [:UInt8, :h_uint8, 0], + [:Whitespace, :h_whitespace, 1], + [:Left, :h_left, 2], + [:Right, :h_right, 2], + [:Middle, :h_middle, 3], + [:End, :h_end_p, 0], + [:Nothing, :h_nothing_p, 0], + [:ButNot, :h_butnot, 2], + [:Difference, :h_difference, 2], + [:Xor, :h_xor, 2], + [:Many, :h_many, 1], + [:Many1, :h_many1, 1] + ].each do |class_name, h_function_name, parameter_count| + # Create new subclass of Hammer::Parser + klass = Class.new(Hammer::Parser) do + # Need to use define_method instead of def to be able to access h_function_name in the method's body + define_method :initialize do |*parsers| + # Checking parameter_count is not really needed, since the h_* methods will complain anyways + @h_parser = Hammer::Internal.send(h_function_name, *parsers.map(&:h_parser)) + # TODO: Do we need to store sub-parsers to prevent them from getting garbage-collected? + end + end + # Register class with name Hammer::Parser::ClassName + Hammer::Parser.const_set class_name, klass + end + + # TODO: + # Hammer::Parser::Token.new('...') is a bit too long. Find a shorter way to use the parsers. + # Maybe: + # class Hammer::Parser + # def self.token(*args) + # Hammer::Parser::Token.new(*args) + # end + # end + # Can create functions like that automatically. Usage: + # h = Hammer::Parser + # parser = h.sequence(h.token('blah'), h.token('other_token')) + # Looks almost like hammer in C! + + # Defines a parser constructor with the given name. + # Options: + # hammer_function: name of the hammer function to call (default: 'h_'+name) + def self.define_parser(name, options = {}) + hammer_function = options[:hammer_function] || ('h_' + name.to_s) + + # Define a new class method + define_singleton_method name do |*parsers| + #args = parsers.map { |p| p.instance_variable_get :@h_parser } + h_parser = Hammer::Internal.send hammer_function, *parsers.map(&:h_parser) + + parser = Hammer::Parser.new + parser.instance_variable_set :@h_parser, h_parser + return parser + end + end + private_class_method :define_parser + + define_parser :int64 + define_parser :int32 + define_parser :int16 + define_parser :int8 + define_parser :uint64 + define_parser :uint32 + define_parser :uint16 + define_parser :uint8 + define_parser :whitespace + define_parser :left + define_parser :right + define_parser :middle + define_parser :end + define_parser :nothing + define_parser :butnot + define_parser :difference + define_parser :xor + define_parser :many + define_parser :many1 + + attr_reader :h_parser + end +end diff --git a/src/bindings/ruby/lib/hammer/parser_builder.rb b/src/bindings/ruby/lib/hammer/parser_builder.rb new file mode 100644 index 0000000..2f36c84 --- /dev/null +++ b/src/bindings/ruby/lib/hammer/parser_builder.rb @@ -0,0 +1,75 @@ +# TODO: Find a way to make docile an optional dependency +# (autoload for this file? and throw some informative error when docile isn't available. +# should also check gem version with a 'gem' call and appropriate version specifier.) +require 'docile' + +module Hammer + + class Parser + def self.build(&block) + ParserBuilder.new.sequence(&block).build + end + end + + # TODO: Is this even useful for "real" usage? + class ParserBuilder + attr_reader :parsers + + def initialize + @parsers = [] + # TODO: Store an aggregator, e.g.: + # @aggregator = Hammer::Parser::Sequence + # Sequence is the default, set to Hammer::Parser::Choice for choice() calls + # In the build method, use @aggregator.new(*@parsers) to build the final parser. + end + + def build + if @parsers.length > 1 + Hammer::Parser::Sequence.new(*@parsers) + else + @parsers.first + end + end + + + # TODO: Need to check if that's really needed + def call(parser) + @parsers << parser + return self + end + + + def token(str) + #@h_parsers << Hammer::Internal.h_token(str, str.length) + @parsers << Hammer::Parser::Token.new(str) + return self + end + + def ch(char) + #@h_parsers << Hammer::Internal.h_ch(char.ord) + @parsers << Hammer::Parser::Ch.new(char) + return self + end + + # can call it either as ParserBuiler.new.sequence(parser1, parser2, parser3) + # or as Parser.build { sequence { call parser1; call parser2; call parser3 } } + def sequence(*parsers, &block) + @parsers += parsers + @parsers << Docile.dsl_eval(ParserBuilder.new, &block).build if block_given? + return self + #builder = Hammer::ParserBuilder.new + #builder.instance_eval &block + #@parsers << Hammer::Parser::Sequence.new(*builder.parsers) + ## TODO: Save original receiver and redirect missing methods! + end + + def choice(*parsers, &block) + if block_given? + parsers += Docile.dsl_eval(ParserBuilder.new, &block).parsers + end + @parsers << Hammer::Parser::Choice.new(*parsers) + return self + end + end + +end From daeabb587dc60490156009fdbaf70084ebaa16b3 Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Fri, 15 Nov 2013 14:00:09 +0100 Subject: [PATCH 02/35] Need rake-compiler gem. --- src/bindings/ruby/Gemfile | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bindings/ruby/Gemfile b/src/bindings/ruby/Gemfile index 6f6178d..96ca089 100644 --- a/src/bindings/ruby/Gemfile +++ b/src/bindings/ruby/Gemfile @@ -3,6 +3,7 @@ source 'https://rubygems.org' gemspec gem 'rake' +gem 'rake-compiler' group :test do # ... From 6e34e0c8fbfcbbe1784f4629652b6c1e391239a9 Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Fri, 15 Nov 2013 14:21:20 +0100 Subject: [PATCH 03/35] Remove Hammer::Parser subclasses. --- src/bindings/ruby/README.md | 4 +- src/bindings/ruby/lib/hammer.rb | 19 +--- src/bindings/ruby/lib/hammer/parser.rb | 100 +++++------------- .../ruby/lib/hammer/parser_builder.rb | 14 +-- 4 files changed, 38 insertions(+), 99 deletions(-) diff --git a/src/bindings/ruby/README.md b/src/bindings/ruby/README.md index 5ed26ae..1876fc7 100644 --- a/src/bindings/ruby/README.md +++ b/src/bindings/ruby/README.md @@ -47,7 +47,7 @@ Also possible: ```ruby parser = Hammer::ParserBuilder.new .token('Hello ') - .choice(Hammer::Parser::Token.new('Mom'), Hammer::Parser::Token.new('Dad')) + .choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad')) .token('!') .build ``` @@ -56,7 +56,7 @@ More like hammer in C: ```ruby h = Hammer::Parser -parser = h.sequence(h.token('Hello'), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) +parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) ``` ### Parsing diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index 2699d96..0f10ab1 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -32,16 +32,6 @@ if parser p parser.parse 'blah' end -parser = Hammer::Parser::Sequence.new( - Hammer::Parser::Token.new('Hello '), - Hammer::Parser::Choice.new( - Hammer::Parser::Token.new('Mom'), - Hammer::Parser::Token.new('Dad') - ), - Hammer::Parser::Token.new('!') -) -p parser.parse 'Hello Mom!' - parser = Hammer::Parser.build { token 'Hello ' choice { @@ -54,12 +44,11 @@ p parser.parse 'Hello Mom!' parser = Hammer::ParserBuilder.new .token('Hello ') - .choice(Hammer::Parser::Token.new('Mom'), Hammer::Parser::Token.new('Dad')) + .choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad')) .token('!') .build p parser.parse 'Hello Mom!' -# not yet working -#h = Hammer::Parser -#parser = h.sequence(h.token('Hello'), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) -#p parser.parse 'Hello Mom!' +h = Hammer::Parser +parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) +p parser.parse 'Hello Mom!' diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index a7b75e2..5d1e8e5 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -14,95 +14,51 @@ module Hammer !result.null? end - class Token < Parser - def initialize(string) - @h_parser = Hammer::Internal.h_token(string, string.length) - end + def self.token(string) + h_parser = Hammer::Internal.h_token(string, string.length) + + parser = Hammer::Parser.new + parser.instance_variable_set :@h_parser, h_parser + return parser end - class Ch < Parser - def initialize(char) + def self.ch(char) # TODO: Really? Should probably accept Fixnum in appropriate range - # Also, char.ord gives unexptected results if you pass e.g. Japanese characters: '今'.ord == 20170; Hammer::Parser::Ch.new('今').parse(202.chr) == true + # Also, char.ord gives unexpected results if you pass e.g. Japanese characters: '今'.ord == 20170; Hammer::Parser::Ch.new('今').parse(202.chr) == true # Not really unexpected though, since 20170 & 255 == 202. # But probably it's better to use Ch for Fixnum in 0..255 only, and only Token for strings. raise ArgumentError, 'expecting a one-character String' unless char.is_a?(String) && char.length == 1 - @h_parser = Hammer::Internal.h_ch(char.ord) - end + h_parser = Hammer::Internal.h_ch(char.ord) + + parser = Hammer::Parser.new + parser.instance_variable_set :@h_parser, h_parser + return parser end - class Sequence < Parser - def initialize(*parsers) - #args = [] - #parsers.each { |p| args += [:pointer, p.h_parser] } + def self.sequence(*parsers) args = parsers.flat_map { |p| [:pointer, p.h_parser] } - @h_parser = Hammer::Internal.h_sequence(*args, :pointer, nil) - @sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) + h_parser = Hammer::Internal.h_sequence(*args, :pointer, nil) + sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) # TODO: Use (managed?) FFI struct instead of void pointers - end + + parser = Hammer::Parser.new + parser.instance_variable_set :@h_parser, h_parser + parser.instance_variable_set :@sub_parsers, sub_parsers + return parser end - class Choice < Parser - def initialize(*parsers) - #args = [] - #parsers.each { |p| args += [:pointer, p.h_parser] } + def self.choice(*parsers) args = parsers.flat_map { |p| [:pointer, p.h_parser] } - @h_parser = Hammer::Internal.h_choice(*args, :pointer, nil) - @sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) + h_parser = Hammer::Internal.h_choice(*args, :pointer, nil) + sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) # TODO: Use (managed?) FFI struct instead of void pointers - end - end - # Define parsers that take some number of other parsers - # TODO: Maybe use -1 for variable number, and use this for Sequence and Choice too - # TODO: Refactor this code as a method? And call it like: define_parser :Int64, :h_int64, 0 - [ - [:Int64, :h_int64, 0], - [:Int32, :h_int32, 0], - [:Int16, :h_int16, 0], - [:Int8, :h_int8, 0], - [:UInt64, :h_uint64, 0], - [:UInt32, :h_uint32, 0], - [:UInt16, :h_uint16, 0], - [:UInt8, :h_uint8, 0], - [:Whitespace, :h_whitespace, 1], - [:Left, :h_left, 2], - [:Right, :h_right, 2], - [:Middle, :h_middle, 3], - [:End, :h_end_p, 0], - [:Nothing, :h_nothing_p, 0], - [:ButNot, :h_butnot, 2], - [:Difference, :h_difference, 2], - [:Xor, :h_xor, 2], - [:Many, :h_many, 1], - [:Many1, :h_many1, 1] - ].each do |class_name, h_function_name, parameter_count| - # Create new subclass of Hammer::Parser - klass = Class.new(Hammer::Parser) do - # Need to use define_method instead of def to be able to access h_function_name in the method's body - define_method :initialize do |*parsers| - # Checking parameter_count is not really needed, since the h_* methods will complain anyways - @h_parser = Hammer::Internal.send(h_function_name, *parsers.map(&:h_parser)) - # TODO: Do we need to store sub-parsers to prevent them from getting garbage-collected? - end - end - # Register class with name Hammer::Parser::ClassName - Hammer::Parser.const_set class_name, klass + parser = Hammer::Parser.new + parser.instance_variable_set :@h_parser, h_parser + parser.instance_variable_set :@sub_parsers, sub_parsers + return parser end - # TODO: - # Hammer::Parser::Token.new('...') is a bit too long. Find a shorter way to use the parsers. - # Maybe: - # class Hammer::Parser - # def self.token(*args) - # Hammer::Parser::Token.new(*args) - # end - # end - # Can create functions like that automatically. Usage: - # h = Hammer::Parser - # parser = h.sequence(h.token('blah'), h.token('other_token')) - # Looks almost like hammer in C! - # Defines a parser constructor with the given name. # Options: # hammer_function: name of the hammer function to call (default: 'h_'+name) diff --git a/src/bindings/ruby/lib/hammer/parser_builder.rb b/src/bindings/ruby/lib/hammer/parser_builder.rb index 2f36c84..de8ce0d 100644 --- a/src/bindings/ruby/lib/hammer/parser_builder.rb +++ b/src/bindings/ruby/lib/hammer/parser_builder.rb @@ -25,7 +25,7 @@ module Hammer def build if @parsers.length > 1 - Hammer::Parser::Sequence.new(*@parsers) + Hammer::Parser.sequence(*@parsers) else @parsers.first end @@ -40,14 +40,12 @@ module Hammer def token(str) - #@h_parsers << Hammer::Internal.h_token(str, str.length) - @parsers << Hammer::Parser::Token.new(str) + @parsers << Hammer::Parser.token(str) return self end def ch(char) - #@h_parsers << Hammer::Internal.h_ch(char.ord) - @parsers << Hammer::Parser::Ch.new(char) + @parsers << Hammer::Parser.ch(char) return self end @@ -57,17 +55,13 @@ module Hammer @parsers += parsers @parsers << Docile.dsl_eval(ParserBuilder.new, &block).build if block_given? return self - #builder = Hammer::ParserBuilder.new - #builder.instance_eval &block - #@parsers << Hammer::Parser::Sequence.new(*builder.parsers) - ## TODO: Save original receiver and redirect missing methods! end def choice(*parsers, &block) if block_given? parsers += Docile.dsl_eval(ParserBuilder.new, &block).parsers end - @parsers << Hammer::Parser::Choice.new(*parsers) + @parsers << Hammer::Parser.choice(*parsers) return self end end From 7bdd8b7ce2396a5bb9616cb50fd2c23fa83b8b2f Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Fri, 15 Nov 2013 14:26:00 +0100 Subject: [PATCH 04/35] Free parse result. --- src/bindings/ruby/lib/hammer/internal.rb | 2 +- src/bindings/ruby/lib/hammer/parser.rb | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 0083ebd..4fadb50 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -53,7 +53,7 @@ module Hammer # h_bind_indirect # free the parse result - # h_parse_result_free + attach_function :h_parse_result_free, [:pointer], :void # TODO: Does the HParser* need to be freed? end diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 5d1e8e5..79e0370 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -9,8 +9,10 @@ module Hammer def parse(data) raise RuntimeError, '@h_parser is nil' if @h_parser.nil? raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. - result = Hammer::Internal.h_parse(@h_parser, data, data.length); + result = Hammer::Internal.h_parse(@h_parser, data, data.length) # TODO: Do something with the data + # (wrap in garbage-collected object, call h_parse_result_free when destroyed by GC) + Hammer::Internal.h_parse_result_free(result) !result.null? end From 3f661b91e3eaaa51fa1dbc11a0345407a1862cba Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Fri, 15 Nov 2013 14:49:18 +0100 Subject: [PATCH 05/35] Duplicate string argument to token parser. --- src/bindings/ruby/lib/hammer.rb | 6 ++++++ src/bindings/ruby/lib/hammer/parser.rb | 11 +++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index 0f10ab1..4b86690 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -52,3 +52,9 @@ p parser.parse 'Hello Mom!' h = Hammer::Parser parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) p parser.parse 'Hello Mom!' + +s = 'blah' +parser = h.token(s) +p parser.parse 'BLAH' # => false +s.upcase! +p parser.parse 'BLAH' # => false diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 79e0370..defb776 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -17,11 +17,14 @@ module Hammer end def self.token(string) - h_parser = Hammer::Internal.h_token(string, string.length) + h_string = string.dup + h_parser = Hammer::Internal.h_token(h_string, h_string.length) - parser = Hammer::Parser.new - parser.instance_variable_set :@h_parser, h_parser - return parser + parser = Hammer::Parser.new + parser.instance_variable_set :@h_parser, h_parser + # prevent string from getting garbage-collected + parser.instance_variable_set :@h_string, h_string + return parser end def self.ch(char) From 049a64946b5240fa75bb19a8846427a27d264bb0 Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Fri, 15 Nov 2013 14:50:31 +0100 Subject: [PATCH 06/35] Fix indentation. --- src/bindings/ruby/lib/hammer/parser.rb | 50 +++++++++++++------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index defb776..b5d95c8 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -28,40 +28,40 @@ module Hammer end def self.ch(char) - # TODO: Really? Should probably accept Fixnum in appropriate range - # Also, char.ord gives unexpected results if you pass e.g. Japanese characters: '今'.ord == 20170; Hammer::Parser::Ch.new('今').parse(202.chr) == true - # Not really unexpected though, since 20170 & 255 == 202. - # But probably it's better to use Ch for Fixnum in 0..255 only, and only Token for strings. - raise ArgumentError, 'expecting a one-character String' unless char.is_a?(String) && char.length == 1 - h_parser = Hammer::Internal.h_ch(char.ord) + # TODO: Really? Should probably accept Fixnum in appropriate range + # Also, char.ord gives unexpected results if you pass e.g. Japanese characters: '今'.ord == 20170; Hammer::Parser::Ch.new('今').parse(202.chr) == true + # Not really unexpected though, since 20170 & 255 == 202. + # But probably it's better to use Ch for Fixnum in 0..255 only, and only Token for strings. + raise ArgumentError, 'expecting a one-character String' unless char.is_a?(String) && char.length == 1 + h_parser = Hammer::Internal.h_ch(char.ord) - parser = Hammer::Parser.new - parser.instance_variable_set :@h_parser, h_parser - return parser + parser = Hammer::Parser.new + parser.instance_variable_set :@h_parser, h_parser + return parser end def self.sequence(*parsers) - args = parsers.flat_map { |p| [:pointer, p.h_parser] } - h_parser = Hammer::Internal.h_sequence(*args, :pointer, nil) - sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) - # TODO: Use (managed?) FFI struct instead of void pointers + args = parsers.flat_map { |p| [:pointer, p.h_parser] } + h_parser = Hammer::Internal.h_sequence(*args, :pointer, nil) + sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) + # TODO: Use (managed?) FFI struct instead of void pointers - parser = Hammer::Parser.new - parser.instance_variable_set :@h_parser, h_parser - parser.instance_variable_set :@sub_parsers, sub_parsers - return parser + parser = Hammer::Parser.new + parser.instance_variable_set :@h_parser, h_parser + parser.instance_variable_set :@sub_parsers, sub_parsers + return parser end def self.choice(*parsers) - args = parsers.flat_map { |p| [:pointer, p.h_parser] } - h_parser = Hammer::Internal.h_choice(*args, :pointer, nil) - sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) - # TODO: Use (managed?) FFI struct instead of void pointers + args = parsers.flat_map { |p| [:pointer, p.h_parser] } + h_parser = Hammer::Internal.h_choice(*args, :pointer, nil) + sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) + # TODO: Use (managed?) FFI struct instead of void pointers - parser = Hammer::Parser.new - parser.instance_variable_set :@h_parser, h_parser - parser.instance_variable_set :@sub_parsers, sub_parsers - return parser + parser = Hammer::Parser.new + parser.instance_variable_set :@h_parser, h_parser + parser.instance_variable_set :@sub_parsers, sub_parsers + return parser end # Defines a parser constructor with the given name. From 5ea0b727d016df9686a3122b67b699c69a60b57e Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Tue, 26 Nov 2013 18:20:02 +0100 Subject: [PATCH 07/35] Don't use C extensions (JRuby compatibility). --- src/bindings/ruby/.gitignore | 2 -- src/bindings/ruby/Gemfile | 3 --- src/bindings/ruby/README.md | 6 +----- src/bindings/ruby/Rakefile | 8 -------- src/bindings/ruby/ext/hammer_ext/extconf.rb | 9 --------- src/bindings/ruby/ext/hammer_ext/hammer_ext.c | 6 ------ src/bindings/ruby/ext/hammer_ext/hammer_ext.h | 6 ------ src/bindings/ruby/ext/hammer_ext/token_type.c | 20 ------------------- src/bindings/ruby/ext/hammer_ext/token_type.h | 6 ------ src/bindings/ruby/lib/hammer.rb | 1 - 10 files changed, 1 insertion(+), 66 deletions(-) delete mode 100644 src/bindings/ruby/Rakefile delete mode 100644 src/bindings/ruby/ext/hammer_ext/extconf.rb delete mode 100644 src/bindings/ruby/ext/hammer_ext/hammer_ext.c delete mode 100644 src/bindings/ruby/ext/hammer_ext/hammer_ext.h delete mode 100644 src/bindings/ruby/ext/hammer_ext/token_type.c delete mode 100644 src/bindings/ruby/ext/hammer_ext/token_type.h diff --git a/src/bindings/ruby/.gitignore b/src/bindings/ruby/.gitignore index 0f57d62..66f8ed3 100644 --- a/src/bindings/ruby/.gitignore +++ b/src/bindings/ruby/.gitignore @@ -1,3 +1 @@ -/tmp/ -/lib/hammer/hammer_ext.bundle /Gemfile.lock diff --git a/src/bindings/ruby/Gemfile b/src/bindings/ruby/Gemfile index 96ca089..9e7a9be 100644 --- a/src/bindings/ruby/Gemfile +++ b/src/bindings/ruby/Gemfile @@ -2,9 +2,6 @@ source 'https://rubygems.org' gemspec -gem 'rake' -gem 'rake-compiler' - group :test do # ... end diff --git a/src/bindings/ruby/README.md b/src/bindings/ruby/README.md index 1876fc7..e69a504 100644 --- a/src/bindings/ruby/README.md +++ b/src/bindings/ruby/README.md @@ -7,8 +7,6 @@ Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsi * I called the gem `hammer-parser`, since there already is a [gem named `hammer`](https://rubygems.org/gems/hammer). -* C extension not really needed at the moment, if we don't mind hardcoding the token types in the ruby code. - ## Development @@ -16,9 +14,7 @@ Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsi 2. Run `bundle install` to install dependencies. -3. Run `rake compile` to compile the C extension. - -4. Run `irb -I ./lib -r hammer` to open `irb` with hammer loaded. +3. Run `irb -I ./lib -r hammer` to open `irb` with hammer loaded. ## Installation diff --git a/src/bindings/ruby/Rakefile b/src/bindings/ruby/Rakefile deleted file mode 100644 index 646654d..0000000 --- a/src/bindings/ruby/Rakefile +++ /dev/null @@ -1,8 +0,0 @@ -require 'rake/extensiontask' - -#spec = Gem::Specification.load('hammer-parser-ruby.gemspec') -#Rake::ExtensionTask.new('hammer_ext', spec) - -Rake::ExtensionTask.new 'hammer_ext' do |ext| - ext.lib_dir = 'lib/hammer' -end diff --git a/src/bindings/ruby/ext/hammer_ext/extconf.rb b/src/bindings/ruby/ext/hammer_ext/extconf.rb deleted file mode 100644 index d5158a7..0000000 --- a/src/bindings/ruby/ext/hammer_ext/extconf.rb +++ /dev/null @@ -1,9 +0,0 @@ -require 'mkmf' - -extension_name = 'hammer_ext' -dir_config extension_name - -abort 'ERROR: missing hammer library' unless have_library 'hammer' -abort 'ERROR: missing hammer.h' unless have_header 'hammer.h' - -create_makefile extension_name diff --git a/src/bindings/ruby/ext/hammer_ext/hammer_ext.c b/src/bindings/ruby/ext/hammer_ext/hammer_ext.c deleted file mode 100644 index 6b461c6..0000000 --- a/src/bindings/ruby/ext/hammer_ext/hammer_ext.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "token_type.h" - -void Init_hammer_ext(void) -{ - Init_token_type(); -} diff --git a/src/bindings/ruby/ext/hammer_ext/hammer_ext.h b/src/bindings/ruby/ext/hammer_ext/hammer_ext.h deleted file mode 100644 index 98fc2da..0000000 --- a/src/bindings/ruby/ext/hammer_ext/hammer_ext.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef HAMMER_EXT__H -#define HAMMER_EXT__H - -// ... - -#endif diff --git a/src/bindings/ruby/ext/hammer_ext/token_type.c b/src/bindings/ruby/ext/hammer_ext/token_type.c deleted file mode 100644 index a154d7e..0000000 --- a/src/bindings/ruby/ext/hammer_ext/token_type.c +++ /dev/null @@ -1,20 +0,0 @@ -#include -#include - -#include "token_type.h" - -#define DefineHammerInternalConst(name) rb_define_const(mHammerInternal, #name, INT2FIX(name)); - -void Init_token_type(void) -{ - VALUE mHammer = rb_define_module("Hammer"); - VALUE mHammerInternal = rb_define_module_under(mHammer, "Internal"); - - DefineHammerInternalConst(TT_NONE); - DefineHammerInternalConst(TT_BYTES); - DefineHammerInternalConst(TT_SINT); - DefineHammerInternalConst(TT_UINT); - DefineHammerInternalConst(TT_SEQUENCE); - DefineHammerInternalConst(TT_ERR); - DefineHammerInternalConst(TT_USER); -} diff --git a/src/bindings/ruby/ext/hammer_ext/token_type.h b/src/bindings/ruby/ext/hammer_ext/token_type.h deleted file mode 100644 index 5652ce6..0000000 --- a/src/bindings/ruby/ext/hammer_ext/token_type.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef HAMMER_EXT_TOKEN_TYPE__H -#define HAMMER_EXT_TOKEN_TYPE__H - -void Init_token_type(void); - -#endif diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index 4b86690..2032b36 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -1,4 +1,3 @@ -require 'hammer/hammer_ext' require 'hammer/internal' require 'hammer/parser' require 'hammer/parser_builder' From d3ff5f5b1e4f6487b988081e42f0ad5973107ddc Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Mon, 2 Dec 2013 17:00:34 +0100 Subject: [PATCH 08/35] Define choice and sequence with define_parser method. --- src/bindings/ruby/lib/hammer/parser.rb | 39 +++++++++----------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index b5d95c8..665b870 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -40,48 +40,35 @@ module Hammer return parser end - def self.sequence(*parsers) - args = parsers.flat_map { |p| [:pointer, p.h_parser] } - h_parser = Hammer::Internal.h_sequence(*args, :pointer, nil) - sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) - # TODO: Use (managed?) FFI struct instead of void pointers - - parser = Hammer::Parser.new - parser.instance_variable_set :@h_parser, h_parser - parser.instance_variable_set :@sub_parsers, sub_parsers - return parser - end - - def self.choice(*parsers) - args = parsers.flat_map { |p| [:pointer, p.h_parser] } - h_parser = Hammer::Internal.h_choice(*args, :pointer, nil) - sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) - # TODO: Use (managed?) FFI struct instead of void pointers - - parser = Hammer::Parser.new - parser.instance_variable_set :@h_parser, h_parser - parser.instance_variable_set :@sub_parsers, sub_parsers - return parser - end - # Defines a parser constructor with the given name. # Options: # hammer_function: name of the hammer function to call (default: 'h_'+name) + # varargs: Whether the function is taking a variable number of arguments (default: false) def self.define_parser(name, options = {}) hammer_function = options[:hammer_function] || ('h_' + name.to_s) + varargs = options[:varargs] || false # Define a new class method define_singleton_method name do |*parsers| - #args = parsers.map { |p| p.instance_variable_get :@h_parser } - h_parser = Hammer::Internal.send hammer_function, *parsers.map(&:h_parser) + if varargs + args = parsers.flat_map { |p| [:pointer, p.h_parser] } + args += [:pointer, nil] + else + args = parsers.map(&:h_parser) + end + h_parser = Hammer::Internal.send hammer_function, *args parser = Hammer::Parser.new parser.instance_variable_set :@h_parser, h_parser + parser.instance_variable_set :@sub_parsers, parsers # store sub parsers to prevent them from being garbage-collected return parser end end private_class_method :define_parser + define_parser :sequence, varargs: true + define_parser :choice, varargs: true + define_parser :int64 define_parser :int32 define_parser :int16 From 676799fbbc13a550f8259c4e23a0edf6615a0af0 Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Mon, 2 Dec 2013 17:03:45 +0100 Subject: [PATCH 09/35] Add Parser.build_choice method. --- src/bindings/ruby/lib/hammer/parser_builder.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/parser_builder.rb b/src/bindings/ruby/lib/hammer/parser_builder.rb index de8ce0d..95dd4c9 100644 --- a/src/bindings/ruby/lib/hammer/parser_builder.rb +++ b/src/bindings/ruby/lib/hammer/parser_builder.rb @@ -9,9 +9,12 @@ module Hammer def self.build(&block) ParserBuilder.new.sequence(&block).build end - end - # TODO: Is this even useful for "real" usage? + def self.build_choice(&block) + ParserBuilder.new.choice(&block).build + end + end # class Parser + class ParserBuilder attr_reader :parsers From cfff00f8ed22c9f129c0bfd4473dbe9ec397f82d Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Mon, 2 Dec 2013 17:04:21 +0100 Subject: [PATCH 10/35] Add more parsers to ParserBuilder. --- .../ruby/lib/hammer/parser_builder.rb | 61 ++++++++++++------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/parser_builder.rb b/src/bindings/ruby/lib/hammer/parser_builder.rb index 95dd4c9..9547c6e 100644 --- a/src/bindings/ruby/lib/hammer/parser_builder.rb +++ b/src/bindings/ruby/lib/hammer/parser_builder.rb @@ -20,10 +20,6 @@ module Hammer def initialize @parsers = [] - # TODO: Store an aggregator, e.g.: - # @aggregator = Hammer::Parser::Sequence - # Sequence is the default, set to Hammer::Parser::Choice for choice() calls - # In the build method, use @aggregator.new(*@parsers) to build the final parser. end def build @@ -35,23 +31,6 @@ module Hammer end - # TODO: Need to check if that's really needed - def call(parser) - @parsers << parser - return self - end - - - def token(str) - @parsers << Hammer::Parser.token(str) - return self - end - - def ch(char) - @parsers << Hammer::Parser.ch(char) - return self - end - # can call it either as ParserBuiler.new.sequence(parser1, parser2, parser3) # or as Parser.build { sequence { call parser1; call parser2; call parser3 } } def sequence(*parsers, &block) @@ -67,6 +46,42 @@ module Hammer @parsers << Hammer::Parser.choice(*parsers) return self end - end -end + def call(parser) + @parsers << parser + return self + end + + # Defines a parser constructor with the given name. + def self.define_parser(name, options = {}) + define_method name do |*args| + @parsers << Hammer::Parser.send(name, *args) + return self + end + end + private_class_method :define_parser + + define_parser :token + define_parser :ch + define_parser :int64 + define_parser :int32 + define_parser :int16 + define_parser :int8 + define_parser :uint64 + define_parser :uint32 + define_parser :uint16 + define_parser :uint8 + define_parser :whitespace + define_parser :left + define_parser :right + define_parser :middle + define_parser :end + define_parser :nothing + define_parser :butnot + define_parser :difference + define_parser :xor + define_parser :many + define_parser :many1 + end # class ParserBuilder + +end # module Hammer From 8048f2e7312d98170f49548ba1fefd4806efa44f Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Wed, 11 Dec 2013 13:58:22 +0100 Subject: [PATCH 11/35] Use Fixnum for Parser.ch() --- src/bindings/ruby/lib/hammer/parser.rb | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 665b870..5e4e3c0 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -27,13 +27,9 @@ module Hammer return parser end - def self.ch(char) - # TODO: Really? Should probably accept Fixnum in appropriate range - # Also, char.ord gives unexpected results if you pass e.g. Japanese characters: '今'.ord == 20170; Hammer::Parser::Ch.new('今').parse(202.chr) == true - # Not really unexpected though, since 20170 & 255 == 202. - # But probably it's better to use Ch for Fixnum in 0..255 only, and only Token for strings. - raise ArgumentError, 'expecting a one-character String' unless char.is_a?(String) && char.length == 1 - h_parser = Hammer::Internal.h_ch(char.ord) + def self.ch(num) + raise ArgumentError, 'expecting a Fixnum in 0..255', unless num.is_a?(Fixnum) and num.between?(0, 255) + h_parser = Hammer::Internal.h_ch(num) parser = Hammer::Parser.new parser.instance_variable_set :@h_parser, h_parser From 8ff5e181cefa4afa7b197221ce39aa575c2ce363 Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Wed, 11 Dec 2013 14:01:43 +0100 Subject: [PATCH 12/35] Add more parsers. --- src/bindings/ruby/lib/hammer/internal.rb | 38 +++++++++++-------- src/bindings/ruby/lib/hammer/parser.rb | 11 ++++++ .../ruby/lib/hammer/parser_builder.rb | 10 +++++ 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 4fadb50..8132567 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -27,9 +27,8 @@ module Hammer attach_function :h_left, [:pointer, :pointer], :pointer attach_function :h_right, [:pointer, :pointer], :pointer attach_function :h_middle, [:pointer, :pointer, :pointer], :pointer - # h_action - # h_in - # h_not_in + #attach_function :h_in, [:string, :size_t], :pointer + #attach_function :h_not_in, [:string, :size_t], :pointer attach_function :h_end_p, [], :pointer attach_function :h_nothing_p, [], :pointer attach_function :h_sequence, [:varargs], :pointer @@ -39,18 +38,27 @@ module Hammer attach_function :h_xor, [:pointer, :pointer], :pointer attach_function :h_many, [:pointer], :pointer attach_function :h_many1, [:pointer], :pointer - # h_repeat_n - # h_optional - # h_ignore - # h_sepBy - # h_sepBy1 - # h_epsilon_p - # h_length_value - # h_attr_bool - # h_and - # h_not - # h_indirect - # h_bind_indirect + #attach_function :h_repeat_n, [:pointer, :size_t], :pointer + attach_function :h_optional, [:pointer], :pointer + attach_function :h_ignore, [:pointer], :pointer + attach_function :h_sepBy, [:pointer, :pointer], :pointer + attach_function :h_sepBy1, [:pointer, :pointer], :pointer + attach_function :h_epsilon_p, [], :pointer + attach_function :h_length_value, [:pointer, :pointer], :pointer + attach_function :h_and, [:pointer], :pointer + attach_function :h_not, [:pointer], :pointer + + attach_function :h_indirect, [], :pointer + attach_function :h_bind_indirect, [:pointer, :pointer], :void + + #attach_function :h_action, [:pointer, ...], :pointer + #attach_function :h_attr_bool, [:pointer, ...], :pointer + + #class HParseResult < FFI::Struct + # layout :ast, :pointer, + # :bit_length, :longlong, + # :arena, :pointer + #end # free the parse result attach_function :h_parse_result_free, [:pointer], :void diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 5e4e3c0..12d0476 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -84,6 +84,17 @@ module Hammer define_parser :xor define_parser :many define_parser :many1 + define_parser :optional + define_parser :ignore + define_parser :sepBy + define_parser :sepBy1 + define_parser :epsilon_p + define_parser :length_value + define_parser :and + define_parser :not + + # TODO: If indirect, add a bind method that calls h_bind_indirect + define_parser :indirect attr_reader :h_parser end diff --git a/src/bindings/ruby/lib/hammer/parser_builder.rb b/src/bindings/ruby/lib/hammer/parser_builder.rb index 9547c6e..d1618c5 100644 --- a/src/bindings/ruby/lib/hammer/parser_builder.rb +++ b/src/bindings/ruby/lib/hammer/parser_builder.rb @@ -82,6 +82,16 @@ module Hammer define_parser :xor define_parser :many define_parser :many1 + define_parser :optional + define_parser :ignore + define_parser :sepBy + define_parser :sepBy1 + define_parser :epsilon_p + define_parser :length_value + define_parser :and + define_parser :not + define_parser :indirect + end # class ParserBuilder end # module Hammer From 76782bfa4a2c30509d8d7511096eb36a56064399 Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Wed, 11 Dec 2013 14:37:36 +0100 Subject: [PATCH 13/35] Implement indirect parser and fix some bugs. --- src/bindings/ruby/lib/hammer.rb | 16 ++++++- src/bindings/ruby/lib/hammer/parser.rb | 47 +++++++++++-------- .../ruby/lib/hammer/parser_builder.rb | 24 ++++++++-- 3 files changed, 63 insertions(+), 24 deletions(-) diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index 2032b36..54b2501 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -12,7 +12,7 @@ require 'hammer/parser_builder' parser = Hammer::Parser.build do token 'blah' - ch 'a' + ch 'a'.ord choice { sequence { token 'abc' @@ -57,3 +57,17 @@ parser = h.token(s) p parser.parse 'BLAH' # => false s.upcase! p parser.parse 'BLAH' # => false + + +x = nil +parser = Hammer::Parser.build { + token 'abc' + x = indirect + end_p +} +x.bind(h.token('abd')) + +p parser.parse 'abcabdabd' +p parser.parse 'abcabd' +p parser.parse 'abdabd' +p parser.parse 'abd' diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 12d0476..8ae6ee8 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -3,9 +3,22 @@ module Hammer # Don't create new instances with Hammer::Parser.new, # use the constructor methods instead (i.e. Hammer::Parser.int64 etc.) - def initialize + # + # name: Name of the parser. Should be a symbol. + # h_parser: The pointer to the parser as returned by hammer. + # dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector. + def initialize(name, h_parser, dont_gc) + @name = name + @h_parser = h_parser + @dont_gc = dont_gc end + attr_reader :name + attr_reader :h_parser + + # Parse the given data. Returns true if successful, false otherwise. + # + # data: A string containing the data to parse. def parse(data) raise RuntimeError, '@h_parser is nil' if @h_parser.nil? raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. @@ -16,24 +29,24 @@ module Hammer !result.null? end + # Binds an indirect parser. + def bind(other_parser) + raise RuntimeError, 'can only bind indirect parsers' unless self.name == :indirect + Hammer::Internal.h_bind_indirect(self.h_parser, other_parser.h_parser) + end + def self.token(string) h_string = string.dup h_parser = Hammer::Internal.h_token(h_string, h_string.length) - parser = Hammer::Parser.new - parser.instance_variable_set :@h_parser, h_parser - # prevent string from getting garbage-collected - parser.instance_variable_set :@h_string, h_string - return parser + return Hammer::Parser.new(:token, h_parser, h_string) end def self.ch(num) - raise ArgumentError, 'expecting a Fixnum in 0..255', unless num.is_a?(Fixnum) and num.between?(0, 255) + raise ArgumentError, 'expecting a Fixnum in 0..255' unless num.is_a?(Fixnum) and num.between?(0, 255) h_parser = Hammer::Internal.h_ch(num) - parser = Hammer::Parser.new - parser.instance_variable_set :@h_parser, h_parser - return parser + return Hammer::Parser.new(:ch, h_parser, nil) end # Defines a parser constructor with the given name. @@ -41,7 +54,7 @@ module Hammer # hammer_function: name of the hammer function to call (default: 'h_'+name) # varargs: Whether the function is taking a variable number of arguments (default: false) def self.define_parser(name, options = {}) - hammer_function = options[:hammer_function] || ('h_' + name.to_s) + hammer_function = options[:hammer_function] || ('h_' + name.to_s).to_sym varargs = options[:varargs] || false # Define a new class method @@ -54,10 +67,7 @@ module Hammer end h_parser = Hammer::Internal.send hammer_function, *args - parser = Hammer::Parser.new - parser.instance_variable_set :@h_parser, h_parser - parser.instance_variable_set :@sub_parsers, parsers # store sub parsers to prevent them from being garbage-collected - return parser + return Hammer::Parser.new(name, h_parser, parsers) end end private_class_method :define_parser @@ -77,8 +87,8 @@ module Hammer define_parser :left define_parser :right define_parser :middle - define_parser :end - define_parser :nothing + define_parser :end_p + define_parser :nothing_p define_parser :butnot define_parser :difference define_parser :xor @@ -92,10 +102,7 @@ module Hammer define_parser :length_value define_parser :and define_parser :not - - # TODO: If indirect, add a bind method that calls h_bind_indirect define_parser :indirect - attr_reader :h_parser end end diff --git a/src/bindings/ruby/lib/hammer/parser_builder.rb b/src/bindings/ruby/lib/hammer/parser_builder.rb index d1618c5..d610db9 100644 --- a/src/bindings/ruby/lib/hammer/parser_builder.rb +++ b/src/bindings/ruby/lib/hammer/parser_builder.rb @@ -75,8 +75,8 @@ module Hammer define_parser :left define_parser :right define_parser :middle - define_parser :end - define_parser :nothing + define_parser :end_p + define_parser :nothing_p define_parser :butnot define_parser :difference define_parser :xor @@ -90,7 +90,25 @@ module Hammer define_parser :length_value define_parser :and define_parser :not - define_parser :indirect + + # At least indirect must return the parser instead of the builder, so it can be stored in a variable. + # Other possible solution: + # Make indirect take a name parameter, and use the name to bind it later. + # Example: + # p = Hammer::Parser.build { indirect(:the_name) } + # p.bind(:the_name, inner_parser) + # (store names and parsers in hash in the builder, + # when building merge hashes from sub builders and store everything in the resulting sequence or choice. + # make Parser#bind take and optional symbol. if it is given, the name is looked up in the table.) + # TODO: + # Think about this more. + # Do we need to be able to build parsers by chaining function calls? DSL should be sufficient. + # If yes, the parser methods in this class should not return "self", but the Hammer::Parser object they create. + def indirect + parser = Hammer::Parser.indirect + @parsers << parser + return parser + end end # class ParserBuilder From 6d38b8e82ab0e46e1acdddc93de4efb4fee9a55e Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Sun, 15 Dec 2013 11:29:04 +0100 Subject: [PATCH 14/35] Use typedefs for pointers. --- src/bindings/ruby/lib/hammer/internal.rb | 89 ++++++++++++------------ 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 8132567..3ef39a1 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -6,53 +6,56 @@ module Hammer ffi_lib 'libhammer.dylib' + typedef :pointer, :h_parser + typedef :pointer, :h_parse_result + # run a parser - attach_function :h_parse, [:pointer, :string, :size_t], :pointer + attach_function :h_parse, [:h_parser, :string, :size_t], :h_parse_result # build a parser - attach_function :h_token, [:string, :size_t], :pointer - attach_function :h_ch, [:uint8], :pointer - attach_function :h_ch_range, [:uint8, :uint8], :pointer - attach_function :h_int_range, [:int64, :int64], :pointer - attach_function :h_bits, [:size_t, :bool], :pointer - attach_function :h_int64, [], :pointer - attach_function :h_int32, [], :pointer - attach_function :h_int16, [], :pointer - attach_function :h_int8, [], :pointer - attach_function :h_uint64, [], :pointer - attach_function :h_uint32, [], :pointer - attach_function :h_uint16, [], :pointer - attach_function :h_uint8, [], :pointer - attach_function :h_whitespace, [:pointer], :pointer - attach_function :h_left, [:pointer, :pointer], :pointer - attach_function :h_right, [:pointer, :pointer], :pointer - attach_function :h_middle, [:pointer, :pointer, :pointer], :pointer - #attach_function :h_in, [:string, :size_t], :pointer - #attach_function :h_not_in, [:string, :size_t], :pointer - attach_function :h_end_p, [], :pointer - attach_function :h_nothing_p, [], :pointer - attach_function :h_sequence, [:varargs], :pointer - attach_function :h_choice, [:varargs], :pointer - attach_function :h_butnot, [:pointer, :pointer], :pointer - attach_function :h_difference, [:pointer, :pointer], :pointer - attach_function :h_xor, [:pointer, :pointer], :pointer - attach_function :h_many, [:pointer], :pointer - attach_function :h_many1, [:pointer], :pointer - #attach_function :h_repeat_n, [:pointer, :size_t], :pointer - attach_function :h_optional, [:pointer], :pointer - attach_function :h_ignore, [:pointer], :pointer - attach_function :h_sepBy, [:pointer, :pointer], :pointer - attach_function :h_sepBy1, [:pointer, :pointer], :pointer - attach_function :h_epsilon_p, [], :pointer - attach_function :h_length_value, [:pointer, :pointer], :pointer - attach_function :h_and, [:pointer], :pointer - attach_function :h_not, [:pointer], :pointer + attach_function :h_token, [:string, :size_t], :h_parser + attach_function :h_ch, [:uint8], :h_parser + attach_function :h_ch_range, [:uint8, :uint8], :h_parser + attach_function :h_int_range, [:int64, :int64], :h_parser + attach_function :h_bits, [:size_t, :bool], :h_parser + attach_function :h_int64, [], :h_parser + attach_function :h_int32, [], :h_parser + attach_function :h_int16, [], :h_parser + attach_function :h_int8, [], :h_parser + attach_function :h_uint64, [], :h_parser + attach_function :h_uint32, [], :h_parser + attach_function :h_uint16, [], :h_parser + attach_function :h_uint8, [], :h_parser + attach_function :h_whitespace, [:h_parser], :h_parser + attach_function :h_left, [:h_parser, :h_parser], :h_parser + attach_function :h_right, [:h_parser, :h_parser], :h_parser + attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser + #attach_function :h_in, [:string, :size_t], :h_parser + #attach_function :h_not_in, [:string, :size_t], :h_parser + attach_function :h_end_p, [], :h_parser + attach_function :h_nothing_p, [], :h_parser + attach_function :h_sequence, [:varargs], :h_parser + attach_function :h_choice, [:varargs], :h_parser + attach_function :h_butnot, [:h_parser, :h_parser], :h_parser + attach_function :h_difference, [:h_parser, :h_parser], :h_parser + attach_function :h_xor, [:h_parser, :h_parser], :h_parser + attach_function :h_many, [:h_parser], :h_parser + attach_function :h_many1, [:h_parser], :h_parser + #attach_function :h_repeat_n, [:h_parser, :size_t], :h_parser + attach_function :h_optional, [:h_parser], :h_parser + attach_function :h_ignore, [:h_parser], :h_parser + attach_function :h_sepBy, [:h_parser, :h_parser], :h_parser + attach_function :h_sepBy1, [:h_parser, :h_parser], :h_parser + attach_function :h_epsilon_p, [], :h_parser + attach_function :h_length_value, [:h_parser, :h_parser], :h_parser + attach_function :h_and, [:h_parser], :h_parser + attach_function :h_not, [:h_parser], :h_parser - attach_function :h_indirect, [], :pointer - attach_function :h_bind_indirect, [:pointer, :pointer], :void + attach_function :h_indirect, [], :h_parser + attach_function :h_bind_indirect, [:h_parser, :h_parser], :void - #attach_function :h_action, [:pointer, ...], :pointer - #attach_function :h_attr_bool, [:pointer, ...], :pointer + #attach_function :h_action, [:h_parser, ...], :h_parser + #attach_function :h_attr_bool, [:h_parser, ...], :h_parser #class HParseResult < FFI::Struct # layout :ast, :pointer, @@ -61,7 +64,7 @@ module Hammer #end # free the parse result - attach_function :h_parse_result_free, [:pointer], :void + attach_function :h_parse_result_free, [:h_parse_result], :void # TODO: Does the HParser* need to be freed? end From c08818e5a5a912f224a7ffa290a1c8544612ab2d Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Sun, 15 Dec 2013 12:01:41 +0100 Subject: [PATCH 15/35] Implement first version of HParseResult. --- src/bindings/ruby/lib/hammer.rb | 2 + src/bindings/ruby/lib/hammer/internal.rb | 61 ++++++++++++++++++++---- src/bindings/ruby/lib/hammer/parser.rb | 11 +++-- 3 files changed, 60 insertions(+), 14 deletions(-) diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index 54b2501..0ae353c 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -71,3 +71,5 @@ p parser.parse 'abcabdabd' p parser.parse 'abcabd' p parser.parse 'abdabd' p parser.parse 'abd' + +$r = parser.parse 'abcabd' diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 3ef39a1..9cd82fa 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -7,10 +7,59 @@ module Hammer ffi_lib 'libhammer.dylib' typedef :pointer, :h_parser - typedef :pointer, :h_parse_result + + HTokenType = enum(:none, 1, + :bytes, 2, + :sint, 4, + :uint, 8, + :sequence, 16, + :reserved_1, + :err, 32, + :user, 64, + :max) + + class HCountedArray < FFI::Struct + layout :capacity, :size_t, + :used, :size_t, + :arena, :pointer, + :elements, :pointer # TODO + end + + class HBytes < FFI::Struct + layout :token, :uint8, + :len, :size_t + end + + class HParsedTokenDataUnion < FFI::Union + layout :bytes, HBytes.by_value, + :sint, :int64, + :uint, :uint64, + :dbl, :double, + :flt, :float, + :seq, HCountedArray.by_ref, + :user, :pointer + end + + class HParsedToken < FFI::Struct + layout :token_type, HTokenType, + :data, HParsedTokenDataUnion.by_value, + :index, :size_t, + :bit_offset, :char + end + + class HParseResult < FFI::Struct + layout :ast, HParsedToken.by_ref, + :bit_length, :long_long, + :arena, :pointer + + def self.release(ptr) + p "freeing #{ptr}" + Hammer::Internal.h_parse_result_free(ptr) unless ptr.null? + end + end # run a parser - attach_function :h_parse, [:h_parser, :string, :size_t], :h_parse_result + attach_function :h_parse, [:h_parser, :string, :size_t], HParseResult.auto_ptr # build a parser attach_function :h_token, [:string, :size_t], :h_parser @@ -57,14 +106,8 @@ module Hammer #attach_function :h_action, [:h_parser, ...], :h_parser #attach_function :h_attr_bool, [:h_parser, ...], :h_parser - #class HParseResult < FFI::Struct - # layout :ast, :pointer, - # :bit_length, :longlong, - # :arena, :pointer - #end - # free the parse result - attach_function :h_parse_result_free, [:h_parse_result], :void + attach_function :h_parse_result_free, [HParseResult.by_ref], :void # TODO: Does the HParser* need to be freed? end diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 8ae6ee8..a7e175d 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -16,17 +16,15 @@ module Hammer attr_reader :name attr_reader :h_parser - # Parse the given data. Returns true if successful, false otherwise. + # Parse the given data. Returns the parse result if successful, nil otherwise. # # data: A string containing the data to parse. def parse(data) raise RuntimeError, '@h_parser is nil' if @h_parser.nil? raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. + result = Hammer::Internal.h_parse(@h_parser, data, data.length) - # TODO: Do something with the data - # (wrap in garbage-collected object, call h_parse_result_free when destroyed by GC) - Hammer::Internal.h_parse_result_free(result) - !result.null? + return result unless result.null? end # Binds an indirect parser. @@ -36,6 +34,9 @@ module Hammer end def self.token(string) + # TODO: + # This might fail in JRuby. + # See "String Memory Allocation" at https://github.com/ffi/ffi/wiki/Core-Concepts h_string = string.dup h_parser = Hammer::Internal.h_token(h_string, h_string.length) From d53ee0749984eafbf85464dded43012d42f9b1a9 Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Sun, 15 Dec 2013 14:49:12 +0100 Subject: [PATCH 16/35] Allow access to HCountedArray's elements. --- src/bindings/ruby/lib/hammer/internal.rb | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 9cd82fa..badac10 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -6,6 +6,10 @@ module Hammer ffi_lib 'libhammer.dylib' + # Maybe we can implement Hammer::Parser with FFI::DataConverter. + # That way, most hammer functions won't need to be wrapped. + # (Probably need to wrap token, sequence and choice only). + # See http://www.elabs.se/blog/61-advanced-topics-in-ruby-ffi typedef :pointer, :h_parser HTokenType = enum(:none, 1, @@ -22,11 +26,16 @@ module Hammer layout :capacity, :size_t, :used, :size_t, :arena, :pointer, - :elements, :pointer # TODO + :elements, :pointer # HParsedToken** + + def elements + elem_array = FFI::Pointer.new(:pointer, self[:elements]) + return (0...self[:used]).map { |i| HParsedToken.new(elem_array[i].read_pointer) } + end end class HBytes < FFI::Struct - layout :token, :uint8, + layout :token, :pointer, # uint8_t* :len, :size_t end @@ -53,16 +62,15 @@ module Hammer :arena, :pointer def self.release(ptr) - p "freeing #{ptr}" Hammer::Internal.h_parse_result_free(ptr) unless ptr.null? end end # run a parser - attach_function :h_parse, [:h_parser, :string, :size_t], HParseResult.auto_ptr + attach_function :h_parse, [:h_parser, :string, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string? # build a parser - attach_function :h_token, [:string, :size_t], :h_parser + attach_function :h_token, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? attach_function :h_ch, [:uint8], :h_parser attach_function :h_ch_range, [:uint8, :uint8], :h_parser attach_function :h_int_range, [:int64, :int64], :h_parser @@ -79,8 +87,8 @@ module Hammer attach_function :h_left, [:h_parser, :h_parser], :h_parser attach_function :h_right, [:h_parser, :h_parser], :h_parser attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser - #attach_function :h_in, [:string, :size_t], :h_parser - #attach_function :h_not_in, [:string, :size_t], :h_parser + #attach_function :h_in, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? + #attach_function :h_not_in, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? attach_function :h_end_p, [], :h_parser attach_function :h_nothing_p, [], :h_parser attach_function :h_sequence, [:varargs], :h_parser From 96dfad01789d1c780c496b66c79115e3f25adb33 Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Mon, 16 Dec 2013 17:07:50 +0100 Subject: [PATCH 17/35] h_token: Copy string correctly. See https://github.com/ffi/ffi/wiki/Core-Concepts#string-memory-allocation for additional information. --- src/bindings/ruby/lib/hammer.rb | 12 +++++++++++- src/bindings/ruby/lib/hammer/internal.rb | 6 +++--- src/bindings/ruby/lib/hammer/parser.rb | 15 ++++++++------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index 0ae353c..db987b4 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -72,4 +72,14 @@ p parser.parse 'abcabd' p parser.parse 'abdabd' p parser.parse 'abd' -$r = parser.parse 'abcabd' +#$r = parser.parse 'abcabd' + + +# Test multibyte characters +parser = Hammer::Parser.build { + token '今日' + end_p +} + +p ($r = parser.parse('今日')) # should succeed + diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index badac10..8f673d4 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -70,7 +70,7 @@ module Hammer attach_function :h_parse, [:h_parser, :string, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string? # build a parser - attach_function :h_token, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? + attach_function :h_token, [:buffer_in, :size_t], :h_parser attach_function :h_ch, [:uint8], :h_parser attach_function :h_ch_range, [:uint8, :uint8], :h_parser attach_function :h_int_range, [:int64, :int64], :h_parser @@ -87,8 +87,8 @@ module Hammer attach_function :h_left, [:h_parser, :h_parser], :h_parser attach_function :h_right, [:h_parser, :h_parser], :h_parser attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser - #attach_function :h_in, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? - #attach_function :h_not_in, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? + #attach_function :h_in, [:buffer_in, :size_t], :h_parser + #attach_function :h_not_in, [:buffer_in, :size_t], :h_parser attach_function :h_end_p, [], :h_parser attach_function :h_nothing_p, [], :h_parser attach_function :h_sequence, [:varargs], :h_parser diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index a7e175d..8722cc6 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -6,7 +6,7 @@ module Hammer # # name: Name of the parser. Should be a symbol. # h_parser: The pointer to the parser as returned by hammer. - # dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector. + # dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector (at least as long this object lives). def initialize(name, h_parser, dont_gc) @name = name @h_parser = h_parser @@ -34,13 +34,14 @@ module Hammer end def self.token(string) - # TODO: - # This might fail in JRuby. - # See "String Memory Allocation" at https://github.com/ffi/ffi/wiki/Core-Concepts - h_string = string.dup - h_parser = Hammer::Internal.h_token(h_string, h_string.length) + # Need to copy string to a memory buffer (not just string.dup) + # * Original string might be modified, this must not affect existing tokens + # * We need a constant memory address (Ruby string might be moved around by the Ruby VM) + # * Use string.length instead of h_string.size to handle multibyte characters correctly. + buffer = FFI::MemoryPointer.from_string(string) + h_parser = Hammer::Internal.h_token(buffer, string.length) - return Hammer::Parser.new(:token, h_parser, h_string) + return Hammer::Parser.new(:token, h_parser, buffer) end def self.ch(num) From 3aa2ac263499967f70353e148195332dbe7dc5db Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Mon, 16 Dec 2013 17:56:49 +0100 Subject: [PATCH 18/35] Add real tests. --- src/bindings/ruby/Gemfile | 2 +- src/bindings/ruby/README.md | 2 + src/bindings/ruby/Rakefile | 7 +++ src/bindings/ruby/lib/hammer.rb | 72 +---------------------- src/bindings/ruby/test/parser_test.rb | 82 +++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 70 deletions(-) create mode 100644 src/bindings/ruby/Rakefile create mode 100644 src/bindings/ruby/test/parser_test.rb diff --git a/src/bindings/ruby/Gemfile b/src/bindings/ruby/Gemfile index 9e7a9be..df42814 100644 --- a/src/bindings/ruby/Gemfile +++ b/src/bindings/ruby/Gemfile @@ -3,5 +3,5 @@ source 'https://rubygems.org' gemspec group :test do - # ... + gem 'minitest', '~> 5.2' end diff --git a/src/bindings/ruby/README.md b/src/bindings/ruby/README.md index e69a504..61ce0b3 100644 --- a/src/bindings/ruby/README.md +++ b/src/bindings/ruby/README.md @@ -16,6 +16,8 @@ Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsi 3. Run `irb -I ./lib -r hammer` to open `irb` with hammer loaded. +4. To run tests, just run `rake`. + ## Installation diff --git a/src/bindings/ruby/Rakefile b/src/bindings/ruby/Rakefile new file mode 100644 index 0000000..70b8662 --- /dev/null +++ b/src/bindings/ruby/Rakefile @@ -0,0 +1,7 @@ +require 'rake/testtask' + +Rake::TestTask.new do |t| + t.pattern = "test/*_test.rb" +end + +task :default => [:test] diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index db987b4..bb0e2a2 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -7,79 +7,13 @@ require 'hammer/parser_builder' # people can use "require 'hammer-parser'" in their code. - -# TODO: Put tests in test/ directory. - -parser = Hammer::Parser.build do - token 'blah' - ch 'a'.ord - choice { - sequence { - token 'abc' - } - token 'def' - } -end - -p parser - -if parser - p parser.parse 'blahaabcd' - p parser.parse 'blahadefd' - p parser.parse 'blahablad' - p parser.parse 'blaha' - p parser.parse 'blah' -end - -parser = Hammer::Parser.build { - token 'Hello ' - choice { - token 'Mom' - token 'Dad' - } - token '!' -} -p parser.parse 'Hello Mom!' - -parser = Hammer::ParserBuilder.new - .token('Hello ') - .choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad')) - .token('!') - .build -p parser.parse 'Hello Mom!' - -h = Hammer::Parser -parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) -p parser.parse 'Hello Mom!' - -s = 'blah' -parser = h.token(s) -p parser.parse 'BLAH' # => false -s.upcase! -p parser.parse 'BLAH' # => false - - +# Leave this in for now to be able to play around with HParseResult in irb. x = nil parser = Hammer::Parser.build { token 'abc' x = indirect end_p } -x.bind(h.token('abd')) - -p parser.parse 'abcabdabd' -p parser.parse 'abcabd' -p parser.parse 'abdabd' -p parser.parse 'abd' - -#$r = parser.parse 'abcabd' - - -# Test multibyte characters -parser = Hammer::Parser.build { - token '今日' - end_p -} - -p ($r = parser.parse('今日')) # should succeed +x.bind(Hammer::Parser.token('abd')) +$r = parser.parse 'abcabd' diff --git a/src/bindings/ruby/test/parser_test.rb b/src/bindings/ruby/test/parser_test.rb new file mode 100644 index 0000000..d3c4bbb --- /dev/null +++ b/src/bindings/ruby/test/parser_test.rb @@ -0,0 +1,82 @@ +require 'bundler/setup' +require 'hammer' +require 'minitest/autorun' + +class ParserTest < Minitest::Test + def test_builder_1 + parser = Hammer::Parser.build { + token 'blah' + ch 'a'.ord + choice { + sequence { + token 'abc' + } + token 'def' + } + } + + refute_nil parser + + refute_nil parser.parse('blahaabcd') + refute_nil parser.parse('blahadefd') + assert_nil parser.parse('blahablad') + assert_nil parser.parse('blaha') + assert_nil parser.parse('blah') + end + + def test_builder_2 + parser = Hammer::ParserBuilder.new + .token('Hello ') + .choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad')) + .token('!') + .build + + refute_nil parser + refute_nil parser.parse('Hello Mom!') + end + + def test_builder_3 + h = Hammer::Parser + parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) + + refute_nil parser + refute_nil parser.parse('Hello Mom!') + end + + def test_string_copied + s = 'blah' + parser = Hammer::Parser.token(s) + + refute_equal s, 'BLAH' + assert_nil parser.parse('BLAH') + + # parser still shouldn't match, even if we modify the string in-place + s.upcase! + assert_equal s, 'BLAH' + assert_nil parser.parse('BLAH') + end + + def test_indirect + x = nil + parser = Hammer::Parser.build { + token 'abc' + x = indirect + end_p + } + x.bind(Hammer::Parser.token('abd')) + + assert_nil parser.parse('abcabdabd') + refute_nil parser.parse('abcabd') + assert_nil parser.parse('abdabd') + assert_nil parser.parse('abc') + end + + def test_multibyte_token + parser = Hammer::Parser.build { + token '今日' + end_p + } + + refute_nil parser.parse('今日') + end +end From cf59ec83edc99cc249759fcb799d55dcb492312d Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Mon, 16 Dec 2013 20:20:27 +0100 Subject: [PATCH 19/35] Fix parsing of strings with multibyte characters. --- src/bindings/ruby/lib/hammer/parser.rb | 5 ++--- src/bindings/ruby/test/parser_test.rb | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 8722cc6..1d7a76e 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -23,7 +23,7 @@ module Hammer raise RuntimeError, '@h_parser is nil' if @h_parser.nil? raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. - result = Hammer::Internal.h_parse(@h_parser, data, data.length) + result = Hammer::Internal.h_parse(@h_parser, data, data.bytesize) return result unless result.null? end @@ -37,9 +37,8 @@ module Hammer # Need to copy string to a memory buffer (not just string.dup) # * Original string might be modified, this must not affect existing tokens # * We need a constant memory address (Ruby string might be moved around by the Ruby VM) - # * Use string.length instead of h_string.size to handle multibyte characters correctly. buffer = FFI::MemoryPointer.from_string(string) - h_parser = Hammer::Internal.h_token(buffer, string.length) + h_parser = Hammer::Internal.h_token(buffer, buffer.size-1) # buffer.size includes the null byte at the end return Hammer::Parser.new(:token, h_parser, buffer) end diff --git a/src/bindings/ruby/test/parser_test.rb b/src/bindings/ruby/test/parser_test.rb index d3c4bbb..f5f12f2 100644 --- a/src/bindings/ruby/test/parser_test.rb +++ b/src/bindings/ruby/test/parser_test.rb @@ -74,9 +74,10 @@ class ParserTest < Minitest::Test def test_multibyte_token parser = Hammer::Parser.build { token '今日' + token 'a' end_p } - refute_nil parser.parse('今日') + refute_nil parser.parse('今日a') end end From b16eab8f335a4c17850b77f1f896c29b8632cf6e Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Mon, 16 Dec 2013 21:00:59 +0100 Subject: [PATCH 20/35] Fix memory issues, add HBytes#token. --- src/bindings/ruby/lib/hammer.rb | 3 +++ src/bindings/ruby/lib/hammer/internal.rb | 5 +++++ src/bindings/ruby/lib/hammer/parser.rb | 18 +++++++++++++++--- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index bb0e2a2..cfb62dc 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -16,4 +16,7 @@ parser = Hammer::Parser.build { } x.bind(Hammer::Parser.token('abd')) +#$p = parser $r = parser.parse 'abcabd' + +p $r[:ast][:data][:seq].elements.map {|e| e[:data][:bytes].token } diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 8f673d4..0c462fe 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -37,6 +37,11 @@ module Hammer class HBytes < FFI::Struct layout :token, :pointer, # uint8_t* :len, :size_t + + def token + # TODO: Encoding? Should probably be the same encoding as the string the token was created with. + return self[:token].read_string(self[:len]) #.force_encoding('UTF-8') + end end class HParsedTokenDataUnion < FFI::Union diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 1d7a76e..c0c1cfc 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -7,10 +7,12 @@ module Hammer # name: Name of the parser. Should be a symbol. # h_parser: The pointer to the parser as returned by hammer. # dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector (at least as long this object lives). - def initialize(name, h_parser, dont_gc) + def initialize(name, h_parser, dont_gc=[]) @name = name @h_parser = h_parser - @dont_gc = dont_gc + # Always store as array, so we can easily add stuff later on + dont_gc = [dont_gc] unless dont_gc.is_a? Array + @dont_gc = dont_gc.dup end attr_reader :name @@ -24,13 +26,23 @@ module Hammer raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. result = Hammer::Internal.h_parse(@h_parser, data, data.bytesize) - return result unless result.null? + if result.null? + return nil + else + # NOTE: + # The parse result *must* hold a reference to the parser that created it! + # Otherwise, the parser might get garbage-collected while the result is still valid. + # Any pointers to token strings will then be invalid. + result.instance_variable_set :@parser, self + return result + end end # Binds an indirect parser. def bind(other_parser) raise RuntimeError, 'can only bind indirect parsers' unless self.name == :indirect Hammer::Internal.h_bind_indirect(self.h_parser, other_parser.h_parser) + @dont_gc << other_parser end def self.token(string) From 8c653b519e4874e3b7868f91dbaf4c4ef3eb362a Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Mon, 16 Dec 2013 21:28:23 +0100 Subject: [PATCH 21/35] Add tests about token encoding (failing for now). --- src/bindings/ruby/lib/hammer/internal.rb | 7 +++++-- src/bindings/ruby/test/parser_test.rb | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 0c462fe..12d797f 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -39,8 +39,11 @@ module Hammer :len, :size_t def token - # TODO: Encoding? Should probably be the same encoding as the string the token was created with. - return self[:token].read_string(self[:len]) #.force_encoding('UTF-8') + # TODO: Encoding? + # Should be the same encoding as the string the token was created with. + # But how do we get to this knowledge at this point? + # Cheap solution: Just ask the user (additional parameter with default value of UTF-8). + return self[:token].read_string(self[:len]).force_encoding('UTF-8') end end diff --git a/src/bindings/ruby/test/parser_test.rb b/src/bindings/ruby/test/parser_test.rb index f5f12f2..abbd1c1 100644 --- a/src/bindings/ruby/test/parser_test.rb +++ b/src/bindings/ruby/test/parser_test.rb @@ -80,4 +80,14 @@ class ParserTest < Minitest::Test refute_nil parser.parse('今日a') end + + def test_token_encoding(encoding='UTF-8') + string = '今日'.encode(encoding) + parser = Hammer::Parser.token(string) + assert_equal string, parser.parse(string)[:ast][:data][:bytes].token + end + + def test_token_encoding_2 + test_token_encoding('EUC-JP') + end end From a302953dfd734a368bddf2032710a7c15f141209 Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Mon, 16 Dec 2013 23:30:50 +0100 Subject: [PATCH 22/35] Implement h_action. --- src/bindings/ruby/lib/hammer.rb | 22 +++++++++++++++++++ src/bindings/ruby/lib/hammer/internal.rb | 3 ++- src/bindings/ruby/lib/hammer/parser.rb | 9 ++++++++ .../ruby/lib/hammer/parser_builder.rb | 9 ++++++++ 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index cfb62dc..63ac2c7 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -20,3 +20,25 @@ x.bind(Hammer::Parser.token('abd')) $r = parser.parse 'abcabd' p $r[:ast][:data][:seq].elements.map {|e| e[:data][:bytes].token } + + +h = Hammer::Parser +parser = + h.many( + h.action(h.uint8) { |r| + p "TT=#{r[:ast][:token_type]}, value=#{r[:ast][:data][:uint]}" + r[:ast][:data][:uint] *= 2 + r[:ast] if r[:ast][:data][:uint] % 3 == 0 + }) + +#parser = Hammer::Parser.build { +# many { +# uint8 +# action { |r| +# p r +# r[:ast] +# } +# } +#} +$r = parser.parse 'abcdefgh' +p $r[:ast][:data][:seq].elements.map {|e| e[:data][:uint]} diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 12d797f..bceff32 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -119,7 +119,8 @@ module Hammer attach_function :h_indirect, [], :h_parser attach_function :h_bind_indirect, [:h_parser, :h_parser], :void - #attach_function :h_action, [:h_parser, ...], :h_parser + callback :HAction, [HParseResult.by_ref], HParsedToken.by_ref + attach_function :h_action, [:h_parser, :HAction], :h_parser #attach_function :h_attr_bool, [:h_parser, ...], :h_parser # free the parse result diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index c0c1cfc..f9ff4c7 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -45,6 +45,15 @@ module Hammer @dont_gc << other_parser end + # Can pass the action either as a Proc in second parameter, or as block. + def self.action(parser, action=nil, &block) + action = block if action.nil? + raise ArgumentError, 'no action' if action.nil? + + h_parser = Hammer::Internal.h_action(parser.h_parser, action) + return Hammer::Parser.new(:action, h_parser, [parser, action]) + end + def self.token(string) # Need to copy string to a memory buffer (not just string.dup) # * Original string might be modified, this must not affect existing tokens diff --git a/src/bindings/ruby/lib/hammer/parser_builder.rb b/src/bindings/ruby/lib/hammer/parser_builder.rb index d610db9..6756314 100644 --- a/src/bindings/ruby/lib/hammer/parser_builder.rb +++ b/src/bindings/ruby/lib/hammer/parser_builder.rb @@ -52,9 +52,18 @@ module Hammer return self end + # modifies previous parser + def action(&block) + parser = @parsers.last + raise RuntimeError, 'need a parser before action' if parser.nil? + @parsers << Hammer::Parser.action(parser, &block) + return self + end + # Defines a parser constructor with the given name. def self.define_parser(name, options = {}) define_method name do |*args| + # TODO: This is wrong!! Needs to accept a block for nested parsers! @parsers << Hammer::Parser.send(name, *args) return self end From d343b0c8f0a9aeb750bb85c32a908e60a4f2cf1c Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Mon, 16 Dec 2013 23:51:17 +0100 Subject: [PATCH 23/35] Add accessor methods. --- src/bindings/ruby/lib/hammer.rb | 4 ++ src/bindings/ruby/lib/hammer/internal.rb | 50 +++++++++++++++++++++++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index 63ac2c7..18c693a 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -40,5 +40,9 @@ parser = # } # } #} + $r = parser.parse 'abcdefgh' + p $r[:ast][:data][:seq].elements.map {|e| e[:data][:uint]} +# or: +p $r.ast.data.map(&:data) diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index bceff32..214145a 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -28,6 +28,10 @@ module Hammer :arena, :pointer, :elements, :pointer # HParsedToken** + def used + self[:used] + end + def elements elem_array = FFI::Pointer.new(:pointer, self[:elements]) return (0...self[:used]).map { |i| HParsedToken.new(elem_array[i].read_pointer) } @@ -43,7 +47,12 @@ module Hammer # Should be the same encoding as the string the token was created with. # But how do we get to this knowledge at this point? # Cheap solution: Just ask the user (additional parameter with default value of UTF-8). - return self[:token].read_string(self[:len]).force_encoding('UTF-8') + self[:token].read_string(self[:len]).force_encoding('UTF-8') + end + + # TODO: Probably should rename this to match ruby conventions: length, count, size + def len + self[:len] end end @@ -62,6 +71,37 @@ module Hammer :data, HParsedTokenDataUnion.by_value, :index, :size_t, :bit_offset, :char + + def token_type + self[:token_type] + end + + # TODO: Is this name ok? + def data + return self[:data][:bytes].token if token_type == :bytes + return self[:data][:sint] if token_type == :sint + return self[:data][:uint] if token_type == :uint + return self[:data][:seq].elements if token_type == :sequence + return self[:data][:user] if token_type == :user + end + + def bytes + raise ArgumentError, 'wrong token type' unless token_type == :bytes + self[:data][:bytes] + end + + def seq + raise ArgumentError, 'wrong token type' unless token_type == :sequence + self[:data][:seq] + end + + def index + self[:index] + end + + def bit_offset + self[:bit_offset] + end end class HParseResult < FFI::Struct @@ -69,6 +109,14 @@ module Hammer :bit_length, :long_long, :arena, :pointer + def ast + self[:ast] + end + + def bit_length + self[:bit_length] + end + def self.release(ptr) Hammer::Internal.h_parse_result_free(ptr) unless ptr.null? end From 905183cddc5557da45387e99dac1f5134d975efb Mon Sep 17 00:00:00 2001 From: Jakob Rath Date: Tue, 17 Dec 2013 00:06:29 +0100 Subject: [PATCH 24/35] Implement h_attr_bool. --- src/bindings/ruby/lib/hammer.rb | 5 +++++ src/bindings/ruby/lib/hammer/internal.rb | 4 +++- src/bindings/ruby/lib/hammer/parser.rb | 9 +++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index 18c693a..ebc75be 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -46,3 +46,8 @@ $r = parser.parse 'abcdefgh' p $r[:ast][:data][:seq].elements.map {|e| e[:data][:uint]} # or: p $r.ast.data.map(&:data) + + +h = Hammer::Parser +parser = h.many(h.attr_bool(h.uint8) { |r| r.ast.data <= 100 }) +p parser.parse('abcdefgh').ast.data.map(&:data) diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 214145a..e799b27 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -169,7 +169,9 @@ module Hammer callback :HAction, [HParseResult.by_ref], HParsedToken.by_ref attach_function :h_action, [:h_parser, :HAction], :h_parser - #attach_function :h_attr_bool, [:h_parser, ...], :h_parser + + callback :HPredicate, [HParseResult.by_ref], :bool + attach_function :h_attr_bool, [:h_parser, :HPredicate], :h_parser # free the parse result attach_function :h_parse_result_free, [HParseResult.by_ref], :void diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index f9ff4c7..b496558 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -54,6 +54,15 @@ module Hammer return Hammer::Parser.new(:action, h_parser, [parser, action]) end + # Can pass the predicate either as a Proc in second parameter, or as block. + def self.attr_bool(parser, predicate=nil, &block) + predicate = block if predicate.nil? + raise ArgumentError, 'no predicate' if predicate.nil? + + h_parser = Hammer::Internal.h_attr_bool(parser.h_parser, predicate) + return Hammer::Parser.new(:attr_bool, h_parser, [parser, predicate]) + end + def self.token(string) # Need to copy string to a memory buffer (not just string.dup) # * Original string might be modified, this must not affect existing tokens From 8bd6671f9001042ec347445a14e58d2ef4f27d12 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Wed, 8 Jan 2014 20:17:56 +0100 Subject: [PATCH 25/35] Made library name cross-platform --- src/bindings/ruby/lib/hammer/internal.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index e799b27..63d2527 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -4,7 +4,7 @@ module Hammer module Internal extend FFI::Library - ffi_lib 'libhammer.dylib' + ffi_lib 'hammer' # Maybe we can implement Hammer::Parser with FFI::DataConverter. # That way, most hammer functions won't need to be wrapped. From 6a35872470d859075b24802776a8be5b068eb341 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Fri, 10 Jan 2014 21:24:50 +0100 Subject: [PATCH 26/35] Added test suite generator for ruby --- lib/tsgenruby.pl | 258 ++++++ lib/{testgen.pl => tsparser.pl} | 0 src/bindings/ruby/Rakefile | 3 +- src/bindings/ruby/lib/hammer.rb | 10 +- src/bindings/ruby/lib/hammer/internal.rb | 17 + .../ruby/lib/minitest/hamer-parser_plugin.rb | 28 + src/bindings/ruby/test/autogen_test.rb | 781 ++++++++++++++++++ 7 files changed, 1091 insertions(+), 6 deletions(-) create mode 100644 lib/tsgenruby.pl rename lib/{testgen.pl => tsparser.pl} (100%) create mode 100644 src/bindings/ruby/lib/minitest/hamer-parser_plugin.rb create mode 100644 src/bindings/ruby/test/autogen_test.rb diff --git a/lib/tsgenruby.pl b/lib/tsgenruby.pl new file mode 100644 index 0000000..ebcc86a --- /dev/null +++ b/lib/tsgenruby.pl @@ -0,0 +1,258 @@ +% -*- prolog -*- +% Run with: +% $ swipl -q -t halt -g tsgenruby:prolog tsgenruby.pl >output-file +% Note: this needs to be run from the lib/ directory. + +% So, from the ruby directory +% (cd ../../../lib && swipl -q -t halt -g tsgenruby:prolog tsgenruby.pl ) >test/autogen_test.rb + + + +:- module(tsgenruby, + [gen_ts/2]). + +:- expects_dialect(swi). +:- use_module(tsparser). +:- use_module(library(record)). + +:- record testsuite_state(parser_no:integer = 0, test_no:integer=0). +% TODO: build a Box-like pretty-printer + +to_title_case([], []) :- !. +to_title_case([WSep,S0|Ss], [R0|Rs]) :- + memberchk(WSep, "_-"), !, + code_type(R0, to_upper(S0)), + to_title_case(Ss,Rs). +to_title_case([S0|Ss], [S0|Rs]) :- + \+ memberchk(S0, "_-"), + !, to_title_case(Ss,Rs). + +format_parser_name(Name, Result) :- + atom_codes(Name, CName), + append("h.", CName, Result), !. + +format_test_name(Name, Result) :- + atom_codes(Name, CName), + to_title_case([0x5f|CName], RName), + append("Test", RName, Result), !. + +indent(0) --> "", !. +indent(N) --> + {N > 0}, + " ", + {Np is N - 1}, + indent(Np). + +pp_char_guts(0x22) --> + "\\\"", !. +pp_char_guts(0x27) --> + "\\'", !. +pp_char_guts(A) --> + { A >= 0x20, A < 0x7F } -> + [A]; + "\\x", + { H is A >> 4, L is A /\ 0xF, + code_type(Hc, xdigit(H)), + code_type(Lc, xdigit(L)) }, + [Hc,Lc]. + +pp_hexnum_guts(0) --> !. +pp_hexnum_guts(A) --> + { L is A /\ 0xF, + H is A >> 4, + code_type(Lc, xdigit(L)) }, + pp_hexnum_guts(H), + [Lc], !. +pp_string_guts([]) --> !. +pp_string_guts([X|Xs]) --> + pp_char_guts(X), + pp_string_guts(Xs), !. + +pp_parser_args([]) --> !. +pp_parser_args([X|Rest]) --> + pp_parser(X), + pp_parser_args_rest(Rest). +pp_parser_args_rest([]) --> !. +pp_parser_args_rest([X|Xs]) --> + ", ", + pp_parser(X), + pp_parser_args_rest(Xs). + +pp_parser(parser(Name, Args)) --> + !, + {format_parser_name(Name,Fname)}, + Fname, + ({Args \= []} -> + + "(", pp_parser_args(Args), ")" + ; "") . +pp_parser(string(Str)) --> !, + "\"", + pp_string_guts(Str), + "\"", !. +pp_parser(num(0)) --> "0", !. +pp_parser(num(Num)) --> !, + ( {Num < 0} -> + "-0x", {RNum is -Num}; "0x", {RNum = Num} ), + pp_hexnum_guts(RNum). +pp_parser(char(C)) --> !, + "'", pp_char_guts(C), "'", !. + +pp_parser(ref(Name)) --> + {atom_codes(Name,CName)}, + "@sp_", CName, !. + + +pp_parser(A) --> + { writef("WTF is a %w?\n", [A]), + !, fail + }. + +upd_state_test_elem(parser(_), OldSt, NewSt) :- !, + testsuite_state_parser_no(OldSt, OldRNo), + NewRNo is OldRNo + 1, + set_parser_no_of_testsuite_state(NewRNo, OldSt, NewSt). +upd_state_test_elem(test(_, _), OldSt, NewSt) :- !, + testsuite_state_test_no(OldSt, OldTNo), + NewTNo is OldTNo + 1, + set_test_no_of_testsuite_state(NewTNo, OldSt, NewSt). +upd_state_test_elem(testFail(_), OldSt, NewSt) :- !, + testsuite_state_test_no(OldSt, OldTNo), + NewTNo is OldTNo + 1, + set_test_no_of_testsuite_state(NewTNo, OldSt, NewSt). +upd_state_test_elem(_, St, St). + +curparser_name(St) --> !, + { testsuite_state_parser_no(St, RNo), + format(string(X), "@parser_~w", RNo) }, + X. +curtest_name(St) --> !, + { testsuite_state_test_no(St, RNo), + format(string(X), "test_~w", RNo) }, + X. + +pp_test_elem(decl, parser(_), _) --> !. +pp_test_elem(init, parser(P), St) --> + !, indent(2), + curparser_name(St), " = ", + pp_parser(P), + "\n". +pp_test_elem(exec, parser(_), _) --> !. +pp_test_elem(decl, subparser(Name,_), _) --> + !, indent(2), + pp_parser(ref(Name)), + " = ", + pp_parser(parser(indirect,[])), + "\n". +pp_test_elem(init, subparser(Name, Parser), _) --> + !, indent(2), + pp_parser(ref(Name)), ".bind ", + pp_parser(Parser), + "\n". +pp_test_elem(exec, subparser(_,_), _) --> !. +pp_test_elem(decl, test(_,_), _) --> !. +pp_test_elem(init, test(_,_), _) --> !. +pp_test_elem(decl, testFail(_), _) --> !. +pp_test_elem(init, testFail(_), _) --> !. +pp_test_elem(exec, test(Str, Result), St) --> + !, + "\n", + indent(1), "def ", curtest_name(St), "\n", + indent(2), "assert_parse_ok ", curparser_name(St), ", ", pp_parser(string(Str)), + ", ", + pp_parse_result(Result), + "\n", + indent(1), "end\n". +pp_test_elem(exec, testFail(Str), St) --> + !, + "\n", + indent(1), "def ", curtest_name(St), "\n", + indent(2), "refute_parse_ok ", curparser_name(St), ", ", pp_parser(string(Str)), "\n", + indent(1), "end\n". + +% pp_test_elem(_, _) --> !. + +pp_result_seq([]) --> !. +pp_result_seq([X|Xs]) --> !, + pp_parse_result(X), + pp_result_seq_r(Xs). +pp_result_seq_r([]) --> !. +pp_result_seq_r([X|Xs]) --> !, + ", ", + pp_parse_result(X), + pp_result_seq_r(Xs). + +pp_byte_seq([]) --> !. +pp_byte_seq([X|Xs]) --> !, + pp_parser(num(X)), + pp_byte_seq_r(Xs). +pp_byte_seq_r([]) --> !. +pp_byte_seq_r([X|Xs]) --> !, + ", ", + pp_parser(num(X)), + pp_byte_seq_r(Xs). + +pp_parse_result(char(C)) --> !, + %"(System.UInt64)", + pp_parser(char(C)). +pp_parse_result(seq(Args)) --> !, + "[", pp_result_seq(Args), "]". +pp_parse_result(none) --> !, + "null". +pp_parse_result(uint(V)) --> !, + pp_parser(num(V)). +pp_parse_result(sint(V)) --> !, + pp_parser(num(V)). +pp_parse_result(string(A)) --> !, + pp_parser(string(A)). + +%pp_parse_result(A) --> +% "\x1b[1;31m", +% {with_output_to(codes(C), write(A))}, +% C, +% "\x1b[0m". + + +pp_test_elems(Phase, Elems) --> + { default_testsuite_state(State) }, + pp_test_elems(Phase, Elems, State). +pp_test_elems(_, [], _) --> !. +pp_test_elems(Phase, [X|Xs], St) --> + !, + { upd_state_test_elem(X, St, NewSt) }, + %{NewSt = St}, + pp_test_elem(Phase,X, NewSt), + pp_test_elems(Phase,Xs, NewSt). + +pp_test_case(testcase(Name, Elems)) --> + !, + { format_test_name(Name, TName) }, + indent(0), "class ", TName, " < Minitest::Test\n", + indent(1), "def setup\n", + indent(2), "super\n", + indent(2), "h = Hammer::Parser\n", + pp_test_elems(decl, Elems), + pp_test_elems(init, Elems), + indent(1), "end\n", + pp_test_elems(exec, Elems), + indent(0), "end\n\n". + + +pp_test_cases([]) --> !. +pp_test_cases([A|As]) --> + pp_test_case(A), + pp_test_cases(As). + +pp_test_suite(Suite) --> + "require 'bundler/setup'\n", + "require 'minitest/autorun'\n", + "require 'hammer'\n", + pp_test_cases(Suite). + +gen_ts(Foo,Str) :- + phrase(pp_test_suite(Foo),Str). + +prolog :- + read_tc(A), + gen_ts(A, Res), + writef("%s", [Res]). diff --git a/lib/testgen.pl b/lib/tsparser.pl similarity index 100% rename from lib/testgen.pl rename to lib/tsparser.pl diff --git a/src/bindings/ruby/Rakefile b/src/bindings/ruby/Rakefile index 70b8662..c738470 100644 --- a/src/bindings/ruby/Rakefile +++ b/src/bindings/ruby/Rakefile @@ -1,7 +1,8 @@ require 'rake/testtask' Rake::TestTask.new do |t| - t.pattern = "test/*_test.rb" + #t.pattern = "test/*_test.rb" + t.test_files = FileList['test/*_test.rb'] end task :default => [:test] diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index ebc75be..cec33fc 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -19,14 +19,14 @@ x.bind(Hammer::Parser.token('abd')) #$p = parser $r = parser.parse 'abcabd' -p $r[:ast][:data][:seq].elements.map {|e| e[:data][:bytes].token } +#p $r[:ast][:data][:seq].elements.map {|e| e[:data][:bytes].token } h = Hammer::Parser parser = h.many( h.action(h.uint8) { |r| - p "TT=#{r[:ast][:token_type]}, value=#{r[:ast][:data][:uint]}" + #p "TT=#{r[:ast][:token_type]}, value=#{r[:ast][:data][:uint]}" r[:ast][:data][:uint] *= 2 r[:ast] if r[:ast][:data][:uint] % 3 == 0 }) @@ -43,11 +43,11 @@ parser = $r = parser.parse 'abcdefgh' -p $r[:ast][:data][:seq].elements.map {|e| e[:data][:uint]} +#p $r[:ast][:data][:seq].elements.map {|e| e[:data][:uint]} # or: -p $r.ast.data.map(&:data) +#p $r.ast.data.map(&:data) h = Hammer::Parser parser = h.many(h.attr_bool(h.uint8) { |r| r.ast.data <= 100 }) -p parser.parse('abcdefgh').ast.data.map(&:data) +#p parser.parse('abcdefgh').ast.data.map(&:data) diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 63d2527..03bc45d 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -102,6 +102,23 @@ module Hammer def bit_offset self[:bit_offset] end + + def unmarshal + case token_type + when :sequence + self[:data][:seq].each {|x| x.unmarshal} + when :bytes + self[:data][:bytes].token + when :uint + self[:data][:uint] + when :sint + self[:data][:sint] + when :none + nil + end + end + + end class HParseResult < FFI::Struct diff --git a/src/bindings/ruby/lib/minitest/hamer-parser_plugin.rb b/src/bindings/ruby/lib/minitest/hamer-parser_plugin.rb new file mode 100644 index 0000000..393a540 --- /dev/null +++ b/src/bindings/ruby/lib/minitest/hamer-parser_plugin.rb @@ -0,0 +1,28 @@ +module Minitest + + module Assertions + HAMMER_JUST_PARSE = Object.new + def assert_parse_ok(parser, probe, expected=HAMMER_JUST_PARSE) + refute_nil parser, "Parser must not be nil (this is a problem with your test)" + parse_result = parser.parse(probe) + refute_nil parse_result, "Parse failed" + if HAMMER_JUST_PARSE != expected + if parse_result.ast == nil + assert_nil expected, "Parser returned nil AST; expected #{expected}" + else + assert_equal parse_result.ast.unmarshal, expected + end + end + end + + def refute_parse_ok(parser, probe) + refute_nil parser, "Parser must not be nil (this is a problem with your test)" + parse_result = parser.parse(probe) + assert_nil parse_result, "Parse succeeded unexpectedly with " + parse_result.ast.inspect + end + end + + + #def self.plugin_hammer-parser_init(options) +end + diff --git a/src/bindings/ruby/test/autogen_test.rb b/src/bindings/ruby/test/autogen_test.rb new file mode 100644 index 0000000..a2566bc --- /dev/null +++ b/src/bindings/ruby/test/autogen_test.rb @@ -0,0 +1,781 @@ +require 'bundler/setup' +require 'minitest/autorun' +require 'hammer' +class TestToken < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.token("95\xa2") + end + + def test_1 + assert_parse_ok @parser_1, "95\xa2", "95\xa2" + end + + def test_2 + refute_parse_ok @parser_1, "95\xa2" + end +end + +class TestCh < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.ch(0xa2) + end + + def test_1 + assert_parse_ok @parser_1, "\xa2", '\xa2' + end + + def test_2 + refute_parse_ok @parser_1, "\xa3" + end +end + +class TestChRange < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.ch_range(0x61, 0x63) + end + + def test_1 + assert_parse_ok @parser_1, "b", 'b' + end + + def test_2 + refute_parse_ok @parser_1, "d" + end +end + +class TestInt64 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.int64 + end + + def test_1 + assert_parse_ok @parser_1, "\xff\xff\xff\xfe\x00\x00\x00\x00", -0x200000000 + end + + def test_2 + refute_parse_ok @parser_1, "\xff\xff\xff\xfe\x00\x00\x00" + end +end + +class TestInt32 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.int32 + end + + def test_1 + assert_parse_ok @parser_1, "\xff\xfe\x00\x00", -0x20000 + end + + def test_2 + refute_parse_ok @parser_1, "\xff\xfe\x00" + end + + def test_3 + assert_parse_ok @parser_1, "\x00\x02\x00\x00", 0x20000 + end + + def test_4 + refute_parse_ok @parser_1, "\x00\x02\x00" + end +end + +class TestInt16 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.int16 + end + + def test_1 + assert_parse_ok @parser_1, "\xfe\x00", -0x200 + end + + def test_2 + refute_parse_ok @parser_1, "\xfe" + end + + def test_3 + assert_parse_ok @parser_1, "\x02\x00", 0x200 + end + + def test_4 + refute_parse_ok @parser_1, "\x02" + end +end + +class TestInt8 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.int8 + end + + def test_1 + assert_parse_ok @parser_1, "\x88", -0x78 + end + + def test_2 + refute_parse_ok @parser_1, "" + end +end + +class TestUint64 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.uint64 + end + + def test_1 + assert_parse_ok @parser_1, "\x00\x00\x00\x02\x00\x00\x00\x00", 0x200000000 + end + + def test_2 + refute_parse_ok @parser_1, "\x00\x00\x00\x02\x00\x00\x00" + end +end + +class TestUint32 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.uint32 + end + + def test_1 + assert_parse_ok @parser_1, "\x00\x02\x00\x00", 0x20000 + end + + def test_2 + refute_parse_ok @parser_1, "\x00\x02\x00" + end +end + +class TestUint16 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.uint16 + end + + def test_1 + assert_parse_ok @parser_1, "\x02\x00", 0x200 + end + + def test_2 + refute_parse_ok @parser_1, "\x02" + end +end + +class TestUint8 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.uint8 + end + + def test_1 + assert_parse_ok @parser_1, "x", 0x78 + end + + def test_2 + refute_parse_ok @parser_1, "" + end +end + +class TestIntRange < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.int_range(h.uint8, 0x3, 0x10) + end + + def test_1 + assert_parse_ok @parser_1, "\x05", 0x5 + end + + def test_2 + refute_parse_ok @parser_1, "\x0b" + end +end + +class TestWhitespace < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.whitespace(h.ch(0x61)) + @parser_2 = h.whitespace(h.end_p) + end + + def test_1 + assert_parse_ok @parser_1, "a", 'a' + end + + def test_2 + assert_parse_ok @parser_1, " a", 'a' + end + + def test_3 + assert_parse_ok @parser_1, " a", 'a' + end + + def test_4 + assert_parse_ok @parser_1, "\x09a", 'a' + end + + def test_5 + refute_parse_ok @parser_1, "_a" + end + + def test_6 + assert_parse_ok @parser_2, "", null + end + + def test_7 + assert_parse_ok @parser_2, " ", null + end + + def test_8 + refute_parse_ok @parser_2, " x" + end +end + +class TestLeft < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.left(h.ch(0x61), h.ch(0x20)) + end + + def test_1 + assert_parse_ok @parser_1, "a ", 'a' + end + + def test_2 + refute_parse_ok @parser_1, "a" + end + + def test_3 + refute_parse_ok @parser_1, " " + end + + def test_4 + refute_parse_ok @parser_1, "ba" + end +end + +class TestMiddle < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.middle(h.ch(' '), h.ch('a'), h.ch(' ')) + end + + def test_1 + assert_parse_ok @parser_1, " a ", 'a' + end + + def test_2 + refute_parse_ok @parser_1, "a" + end + + def test_3 + refute_parse_ok @parser_1, " a" + end + + def test_4 + refute_parse_ok @parser_1, "a " + end + + def test_5 + refute_parse_ok @parser_1, " b " + end + + def test_6 + refute_parse_ok @parser_1, "ba " + end + + def test_7 + refute_parse_ok @parser_1, " ab" + end +end + +class TestIn < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.in("abc") + end + + def test_1 + assert_parse_ok @parser_1, "b", 'b' + end + + def test_2 + refute_parse_ok @parser_1, "d" + end +end + +class TestNotIn < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.not_in("abc") + end + + def test_1 + assert_parse_ok @parser_1, "d", 'd' + end + + def test_2 + refute_parse_ok @parser_1, "a" + end +end + +class TestEndP < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.ch('a'), h.end_p) + end + + def test_1 + assert_parse_ok @parser_1, "a", ['a'] + end + + def test_2 + refute_parse_ok @parser_1, "aa" + end +end + +class TestNothingP < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.nothing_p + end + + def test_1 + refute_parse_ok @parser_1, "a" + end +end + +class TestSequence < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.ch('a'), h.ch('b')) + @parser_2 = h.sequence(h.ch('a'), h.whitespace(h.ch('b'))) + end + + def test_1 + assert_parse_ok @parser_1, "ab", ['a', 'b'] + end + + def test_2 + refute_parse_ok @parser_1, "a" + end + + def test_3 + refute_parse_ok @parser_1, "b" + end + + def test_4 + assert_parse_ok @parser_2, "ab", ['a', 'b'] + end + + def test_5 + assert_parse_ok @parser_2, "a b", ['a', 'b'] + end + + def test_6 + assert_parse_ok @parser_2, "a b", ['a', 'b'] + end +end + +class TestChoice < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.choice(h.ch('a'), h.ch('b')) + end + + def test_1 + assert_parse_ok @parser_1, "a", 'a' + end + + def test_2 + assert_parse_ok @parser_1, "b", 'b' + end + + def test_3 + assert_parse_ok @parser_1, "ab", 'a' + end + + def test_4 + refute_parse_ok @parser_1, "c" + end +end + +class TestButnot < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.butnot(h.ch('a'), h.token("ab")) + @parser_2 = h.butnot(h.ch_range('0', '9'), h.ch('6')) + end + + def test_1 + assert_parse_ok @parser_1, "a", 'a' + end + + def test_2 + refute_parse_ok @parser_1, "ab" + end + + def test_3 + assert_parse_ok @parser_1, "aa", 'a' + end + + def test_4 + assert_parse_ok @parser_2, "5", '5' + end + + def test_5 + refute_parse_ok @parser_2, "6" + end +end + +class TestDifference < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.difference(h.token("ab"), h.ch('a')) + end + + def test_1 + assert_parse_ok @parser_1, "ab", "ab" + end + + def test_2 + refute_parse_ok @parser_1, "a" + end +end + +class TestXor < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.xor(h.ch_range('0', '6'), h.ch_range('5', '9')) + end + + def test_1 + assert_parse_ok @parser_1, "0", '0' + end + + def test_2 + assert_parse_ok @parser_1, "9", '9' + end + + def test_3 + refute_parse_ok @parser_1, "5" + end + + def test_4 + refute_parse_ok @parser_1, "a" + end +end + +class TestMany < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.many(h.choice(h.ch('a'), h.ch('b'))) + end + + def test_1 + assert_parse_ok @parser_1, "", [] + end + + def test_2 + assert_parse_ok @parser_1, "a", ['a'] + end + + def test_3 + assert_parse_ok @parser_1, "b", ['b'] + end + + def test_4 + assert_parse_ok @parser_1, "aabbaba", ['a', 'a', 'b', 'b', 'a', 'b', 'a'] + end +end + +class TestMany1 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.many1(h.choice(h.ch('a'), h.ch('b'))) + end + + def test_1 + refute_parse_ok @parser_1, "" + end + + def test_2 + assert_parse_ok @parser_1, "a", ['a'] + end + + def test_3 + assert_parse_ok @parser_1, "b", ['b'] + end + + def test_4 + assert_parse_ok @parser_1, "aabbaba", ['a', 'a', 'b', 'b', 'a', 'b', 'a'] + end + + def test_5 + refute_parse_ok @parser_1, "daabbabadef" + end +end + +class TestRepeatN < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.repeat_n(h.choice(h.ch('a'), h.ch('b')), 0x2) + end + + def test_1 + refute_parse_ok @parser_1, "adef" + end + + def test_2 + assert_parse_ok @parser_1, "abdef", ['a', 'b'] + end + + def test_3 + refute_parse_ok @parser_1, "dabdef" + end +end + +class TestOptional < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.ch('a'), h.optional(h.choice(h.ch('b'), h.ch('c'))), h.ch('d')) + end + + def test_1 + assert_parse_ok @parser_1, "abd", ['a', 'b', 'd'] + end + + def test_2 + assert_parse_ok @parser_1, "acd", ['a', 'c', 'd'] + end + + def test_3 + assert_parse_ok @parser_1, "ad", ['a', null, 'd'] + end + + def test_4 + refute_parse_ok @parser_1, "aed" + end + + def test_5 + refute_parse_ok @parser_1, "ab" + end + + def test_6 + refute_parse_ok @parser_1, "ac" + end +end + +class TestIgnore < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.ch('a'), h.ignore(h.ch('b')), h.ch('c')) + end + + def test_1 + assert_parse_ok @parser_1, "abc", ['a', 'c'] + end + + def test_2 + refute_parse_ok @parser_1, "ac" + end +end + +class TestSepBy < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sepBy(h.choice(h.ch('1'), h.ch('2'), h.ch('3')), h.ch(',')) + end + + def test_1 + assert_parse_ok @parser_1, "1,2,3", ['1', '2', '3'] + end + + def test_2 + assert_parse_ok @parser_1, "1,3,2", ['1', '3', '2'] + end + + def test_3 + assert_parse_ok @parser_1, "1,3", ['1', '3'] + end + + def test_4 + assert_parse_ok @parser_1, "3", ['3'] + end + + def test_5 + assert_parse_ok @parser_1, "", [] + end +end + +class TestSepBy1 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sepBy1(h.choice(h.ch('1'), h.ch('2'), h.ch('3')), h.ch(',')) + end + + def test_1 + assert_parse_ok @parser_1, "1,2,3", ['1', '2', '3'] + end + + def test_2 + assert_parse_ok @parser_1, "1,3,2", ['1', '3', '2'] + end + + def test_3 + assert_parse_ok @parser_1, "1,3", ['1', '3'] + end + + def test_4 + assert_parse_ok @parser_1, "3", ['3'] + end + + def test_5 + refute_parse_ok @parser_1, "" + end +end + +class TestAnd < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.and(h.ch('0')), h.ch('0')) + @parser_2 = h.sequence(h.and(h.ch('0')), h.ch('1')) + @parser_3 = h.sequence(h.ch('1'), h.and(h.ch('2'))) + end + + def test_1 + assert_parse_ok @parser_1, "0", ['0'] + end + + def test_2 + refute_parse_ok @parser_1, "1" + end + + def test_3 + refute_parse_ok @parser_2, "0" + end + + def test_4 + refute_parse_ok @parser_2, "1" + end + + def test_5 + assert_parse_ok @parser_3, "12", ['1'] + end + + def test_6 + refute_parse_ok @parser_3, "13" + end +end + +class TestNot < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.ch('a'), h.choice(h.token("+"), h.token("++")), h.ch('b')) + @parser_2 = h.sequence(h.ch('a'), h.choice(h.sequence(h.token("+"), h.not(h.ch('+'))), h.token("++")), h.ch('b')) + end + + def test_1 + assert_parse_ok @parser_1, "a+b", ['a', "+", 'b'] + end + + def test_2 + refute_parse_ok @parser_1, "a++b" + end + + def test_3 + assert_parse_ok @parser_2, "a+b", ['a', ["+"], 'b'] + end + + def test_4 + assert_parse_ok @parser_2, "a++b", ['a', "++", 'b'] + end +end + +class TestRightrec < Minitest::Test + def setup + super + h = Hammer::Parser + @sp_rr = h.indirect + @sp_rr.bind h.choice(h.sequence(h.ch('a'), @sp_rr), h.epsilon_p) + @parser_1 = @sp_rr + end + + def test_1 + assert_parse_ok @parser_1, "a", ['a'] + end + + def test_2 + assert_parse_ok @parser_1, "aa", ['a', ['a']] + end + + def test_3 + assert_parse_ok @parser_1, "aaa", ['a', ['a', ['a']]] + end +end + +class TestAmbiguous < Minitest::Test + def setup + super + h = Hammer::Parser + @sp_d = h.indirect + @sp_p = h.indirect + @sp_e = h.indirect + @sp_d.bind h.ch('d') + @sp_p.bind h.ch('+') + @sp_e.bind h.choice(h.sequence(@sp_e, @sp_p, @sp_e), @sp_d) + @parser_1 = @sp_e + end + + def test_1 + assert_parse_ok @parser_1, "d", 'd' + end + + def test_2 + assert_parse_ok @parser_1, "d+d", ['d', '+', 'd'] + end + + def test_3 + assert_parse_ok @parser_1, "d+d+d", [['d', '+', 'd'], '+', 'd'] + end +end + From 753120f27ee806d1d2d703df93718d7ba51887a6 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Fri, 10 Jan 2014 22:30:57 +0100 Subject: [PATCH 27/35] All of the main test suite works --- lib/test-suite | 28 +-- lib/tsgenruby.pl | 5 +- src/bindings/ruby/lib/hammer/internal.rb | 35 +++- src/bindings/ruby/lib/hammer/parser.rb | 48 ++++- .../ruby/lib/minitest/hamer-parser_plugin.rb | 5 +- src/bindings/ruby/test/autogen_test.rb | 178 ++++++++---------- 6 files changed, 169 insertions(+), 130 deletions(-) diff --git a/lib/test-suite b/lib/test-suite index 6c15b3d..7f00b8e 100644 --- a/lib/test-suite +++ b/lib/test-suite @@ -19,7 +19,7 @@ token { parser token("95\xa2"); test "95\xa2" --> "95\xa2"; - test "95\xa2" --> fail; + test "95\xa3" --> fail; } ch { @@ -87,7 +87,7 @@ uint8 { } int_range { - parser int_range(uint8(), 0x3, 0x10); + parser int_range(uint8(), 0x3, 0xa); test <05> --> u0x05; test <0b> --> fail; } @@ -299,17 +299,17 @@ rightrec { test "aa" --> ['a',['a']]; test "aaa" --> ['a',['a',['a']]]; } - -ambiguous { - subparser $d = ch('d'); - subparser $p = ch('+'); - subparser $e = choice(sequence($e, $p, $e), $d); - # TODO: implement action/h_act_flatten - parser $e; - - test "d" --> 'd'; - test "d+d" --> ['d','+','d']; - test "d+d+d" --> [['d','+','d'],'+','d']; -} +## Only for GLR +#ambiguous { +# subparser $d = ch('d'); +# subparser $p = ch('+'); +# subparser $e = choice(sequence($e, $p, $e), $d); +# # TODO: implement action/h_act_flatten +# parser $e; +# +# test "d" --> 'd'; +# test "d+d" --> ['d','+','d']; +# test "d+d+d" --> [['d','+','d'],'+','d']; +#} diff --git a/lib/tsgenruby.pl b/lib/tsgenruby.pl index ebcc86a..ad83cf1 100644 --- a/lib/tsgenruby.pl +++ b/lib/tsgenruby.pl @@ -96,7 +96,8 @@ pp_parser(num(Num)) --> !, "-0x", {RNum is -Num}; "0x", {RNum = Num} ), pp_hexnum_guts(RNum). pp_parser(char(C)) --> !, - "'", pp_char_guts(C), "'", !. + pp_parser(num(C)). + %"'", pp_char_guts(C), "'", !. pp_parser(ref(Name)) --> {atom_codes(Name,CName)}, @@ -198,7 +199,7 @@ pp_parse_result(char(C)) --> !, pp_parse_result(seq(Args)) --> !, "[", pp_result_seq(Args), "]". pp_parse_result(none) --> !, - "null". + "nil". pp_parse_result(uint(V)) --> !, pp_parser(num(V)). pp_parse_result(sint(V)) --> !, diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 03bc45d..469dd73 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -28,7 +28,7 @@ module Hammer :arena, :pointer, :elements, :pointer # HParsedToken** - def used + def length self[:used] end @@ -36,6 +36,19 @@ module Hammer elem_array = FFI::Pointer.new(:pointer, self[:elements]) return (0...self[:used]).map { |i| HParsedToken.new(elem_array[i].read_pointer) } end + + #def [](idx) + # raise ArgumentError, "Index out of range" unless idx >= 0 and idx < length + # elem_array = FFI::Pointer.new(:pointer, self[:elements]) + # return HParsedToken.new(elem_array[i].read_pointer) + #end + + def map(&code) + elements.map {|x| code.call x} + end + def each(&code) + elements.each {|x| code.call x} + end end class HBytes < FFI::Struct @@ -72,6 +85,12 @@ module Hammer :index, :size_t, :bit_offset, :char + def normalize + # If I'm null, return nil. + return nil if null? + return self + end + def token_type self[:token_type] end @@ -106,7 +125,7 @@ module Hammer def unmarshal case token_type when :sequence - self[:data][:seq].each {|x| x.unmarshal} + self[:data][:seq].map {|x| x.unmarshal} when :bytes self[:data][:bytes].token when :uint @@ -127,7 +146,7 @@ module Hammer :arena, :pointer def ast - self[:ast] + self[:ast].normalize end def bit_length @@ -140,13 +159,13 @@ module Hammer end # run a parser - attach_function :h_parse, [:h_parser, :string, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string? + attach_function :h_parse, [:h_parser, :pointer, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string? # build a parser attach_function :h_token, [:buffer_in, :size_t], :h_parser attach_function :h_ch, [:uint8], :h_parser attach_function :h_ch_range, [:uint8, :uint8], :h_parser - attach_function :h_int_range, [:int64, :int64], :h_parser + attach_function :h_int_range, [:h_parser, :int64, :int64], :h_parser attach_function :h_bits, [:size_t, :bool], :h_parser attach_function :h_int64, [], :h_parser attach_function :h_int32, [], :h_parser @@ -160,8 +179,8 @@ module Hammer attach_function :h_left, [:h_parser, :h_parser], :h_parser attach_function :h_right, [:h_parser, :h_parser], :h_parser attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser - #attach_function :h_in, [:buffer_in, :size_t], :h_parser - #attach_function :h_not_in, [:buffer_in, :size_t], :h_parser + attach_function :h_in, [:pointer, :size_t], :h_parser + attach_function :h_not_in, [:pointer, :size_t], :h_parser attach_function :h_end_p, [], :h_parser attach_function :h_nothing_p, [], :h_parser attach_function :h_sequence, [:varargs], :h_parser @@ -171,7 +190,7 @@ module Hammer attach_function :h_xor, [:h_parser, :h_parser], :h_parser attach_function :h_many, [:h_parser], :h_parser attach_function :h_many1, [:h_parser], :h_parser - #attach_function :h_repeat_n, [:h_parser, :size_t], :h_parser + attach_function :h_repeat_n, [:h_parser, :size_t], :h_parser attach_function :h_optional, [:h_parser], :h_parser attach_function :h_ignore, [:h_parser], :h_parser attach_function :h_sepBy, [:h_parser, :h_parser], :h_parser diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index b496558..4d9f432 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -25,7 +25,8 @@ module Hammer raise RuntimeError, '@h_parser is nil' if @h_parser.nil? raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. - result = Hammer::Internal.h_parse(@h_parser, data, data.bytesize) + ibuf = FFI::MemoryPointer.from_string(data) + result = Hammer::Internal.h_parse(@h_parser, ibuf, data.bytesize) # Don't include the trailing null if result.null? return nil else @@ -73,13 +74,54 @@ module Hammer return Hammer::Parser.new(:token, h_parser, buffer) end - def self.ch(num) - raise ArgumentError, 'expecting a Fixnum in 0..255' unless num.is_a?(Fixnum) and num.between?(0, 255) + def self.marshal_ch_arg(num) + if num.is_a?(String) + raise ArgumentError, "Expecting either a fixnum in 0..255 or a single-byte String" unless num.bytes.length == 1 + num = num.bytes[0] + end + raise ArgumentError, 'Expecting a Fixnum in 0..255 or a single-byte String' unless num.is_a?(Fixnum) and num.between?(0, 255) + return num + end + private_class_method :marshal_ch_arg + + def self.ch(ch) + num = marshal_ch_arg(ch) h_parser = Hammer::Internal.h_ch(num) return Hammer::Parser.new(:ch, h_parser, nil) end + def self.ch_range(ch1, ch2) + ch1 = marshal_ch_arg(ch1) + ch2 = marshal_ch_arg(ch2) + h_parser = Hammer::Internal.h_ch_range(ch1, ch2) + return Hammer::Parser.new(:ch_range, h_parser, nil) + end + + def self.int_range(parser, i1, i2) + h_parser = Hammer::Internal.h_int_range(parser.h_parser, i1, i2) + return Hammer::Parser.new(:int_range, h_parser, nil) + end + + def self.in(charset) + raise ArgumentError, "Expected a String" unless charset.is_a?(String) + ibuf = FFI::MemoryPointer.from_string(charset) + h_parser = Hammer::Internal.h_in(ibuf, charset.bytesize) + return Hammer::Parser.new(:in, h_parser, nil) + end + + def self.repeat_n(parser, count) + h_parser = Hammer::Internal.h_repeat_n(parser.h_parser, count) + return Hammer::Parser.new(:repeat_n, h_parser, nil) + end + + def self.not_in(charset) + raise ArgumentError, "Expected a String" unless charset.is_a?(String) + ibuf = FFI::MemoryPointer.from_string(charset) + h_parser = Hammer::Internal.h_not_in(ibuf, charset.bytesize) + return Hammer::Parser.new(:not_in, h_parser, nil) + end + # Defines a parser constructor with the given name. # Options: # hammer_function: name of the hammer function to call (default: 'h_'+name) diff --git a/src/bindings/ruby/lib/minitest/hamer-parser_plugin.rb b/src/bindings/ruby/lib/minitest/hamer-parser_plugin.rb index 393a540..a23d8b9 100644 --- a/src/bindings/ruby/lib/minitest/hamer-parser_plugin.rb +++ b/src/bindings/ruby/lib/minitest/hamer-parser_plugin.rb @@ -18,7 +18,10 @@ module Minitest def refute_parse_ok(parser, probe) refute_nil parser, "Parser must not be nil (this is a problem with your test)" parse_result = parser.parse(probe) - assert_nil parse_result, "Parse succeeded unexpectedly with " + parse_result.ast.inspect + + if not parse_result.nil? + assert_nil parse_result, "Parse succeeded unexpectedly with " + parse_result.ast.inspect + end end end diff --git a/src/bindings/ruby/test/autogen_test.rb b/src/bindings/ruby/test/autogen_test.rb index a2566bc..93a0c7a 100644 --- a/src/bindings/ruby/test/autogen_test.rb +++ b/src/bindings/ruby/test/autogen_test.rb @@ -13,7 +13,7 @@ class TestToken < Minitest::Test end def test_2 - refute_parse_ok @parser_1, "95\xa2" + refute_parse_ok @parser_1, "95\xa3" end end @@ -25,7 +25,7 @@ class TestCh < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "\xa2", '\xa2' + assert_parse_ok @parser_1, "\xa2", 0xa2 end def test_2 @@ -41,7 +41,7 @@ class TestChRange < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "b", 'b' + assert_parse_ok @parser_1, "b", 0x62 end def test_2 @@ -197,7 +197,7 @@ class TestIntRange < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.int_range(h.uint8, 0x3, 0x10) + @parser_1 = h.int_range(h.uint8, 0x3, 0xa) end def test_1 @@ -218,19 +218,19 @@ class TestWhitespace < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "a", 'a' + assert_parse_ok @parser_1, "a", 0x61 end def test_2 - assert_parse_ok @parser_1, " a", 'a' + assert_parse_ok @parser_1, " a", 0x61 end def test_3 - assert_parse_ok @parser_1, " a", 'a' + assert_parse_ok @parser_1, " a", 0x61 end def test_4 - assert_parse_ok @parser_1, "\x09a", 'a' + assert_parse_ok @parser_1, "\x09a", 0x61 end def test_5 @@ -238,11 +238,11 @@ class TestWhitespace < Minitest::Test end def test_6 - assert_parse_ok @parser_2, "", null + assert_parse_ok @parser_2, "", nil end def test_7 - assert_parse_ok @parser_2, " ", null + assert_parse_ok @parser_2, " ", nil end def test_8 @@ -258,7 +258,7 @@ class TestLeft < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "a ", 'a' + assert_parse_ok @parser_1, "a ", 0x61 end def test_2 @@ -278,11 +278,11 @@ class TestMiddle < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.middle(h.ch(' '), h.ch('a'), h.ch(' ')) + @parser_1 = h.middle(h.ch(0x20), h.ch(0x61), h.ch(0x20)) end def test_1 - assert_parse_ok @parser_1, " a ", 'a' + assert_parse_ok @parser_1, " a ", 0x61 end def test_2 @@ -318,7 +318,7 @@ class TestIn < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "b", 'b' + assert_parse_ok @parser_1, "b", 0x62 end def test_2 @@ -334,7 +334,7 @@ class TestNotIn < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "d", 'd' + assert_parse_ok @parser_1, "d", 0x64 end def test_2 @@ -346,11 +346,11 @@ class TestEndP < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.ch('a'), h.end_p) + @parser_1 = h.sequence(h.ch(0x61), h.end_p) end def test_1 - assert_parse_ok @parser_1, "a", ['a'] + assert_parse_ok @parser_1, "a", [0x61] end def test_2 @@ -374,12 +374,12 @@ class TestSequence < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.ch('a'), h.ch('b')) - @parser_2 = h.sequence(h.ch('a'), h.whitespace(h.ch('b'))) + @parser_1 = h.sequence(h.ch(0x61), h.ch(0x62)) + @parser_2 = h.sequence(h.ch(0x61), h.whitespace(h.ch(0x62))) end def test_1 - assert_parse_ok @parser_1, "ab", ['a', 'b'] + assert_parse_ok @parser_1, "ab", [0x61, 0x62] end def test_2 @@ -391,15 +391,15 @@ class TestSequence < Minitest::Test end def test_4 - assert_parse_ok @parser_2, "ab", ['a', 'b'] + assert_parse_ok @parser_2, "ab", [0x61, 0x62] end def test_5 - assert_parse_ok @parser_2, "a b", ['a', 'b'] + assert_parse_ok @parser_2, "a b", [0x61, 0x62] end def test_6 - assert_parse_ok @parser_2, "a b", ['a', 'b'] + assert_parse_ok @parser_2, "a b", [0x61, 0x62] end end @@ -407,19 +407,19 @@ class TestChoice < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.choice(h.ch('a'), h.ch('b')) + @parser_1 = h.choice(h.ch(0x61), h.ch(0x62)) end def test_1 - assert_parse_ok @parser_1, "a", 'a' + assert_parse_ok @parser_1, "a", 0x61 end def test_2 - assert_parse_ok @parser_1, "b", 'b' + assert_parse_ok @parser_1, "b", 0x62 end def test_3 - assert_parse_ok @parser_1, "ab", 'a' + assert_parse_ok @parser_1, "ab", 0x61 end def test_4 @@ -431,12 +431,12 @@ class TestButnot < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.butnot(h.ch('a'), h.token("ab")) - @parser_2 = h.butnot(h.ch_range('0', '9'), h.ch('6')) + @parser_1 = h.butnot(h.ch(0x61), h.token("ab")) + @parser_2 = h.butnot(h.ch_range(0x30, 0x39), h.ch(0x36)) end def test_1 - assert_parse_ok @parser_1, "a", 'a' + assert_parse_ok @parser_1, "a", 0x61 end def test_2 @@ -444,11 +444,11 @@ class TestButnot < Minitest::Test end def test_3 - assert_parse_ok @parser_1, "aa", 'a' + assert_parse_ok @parser_1, "aa", 0x61 end def test_4 - assert_parse_ok @parser_2, "5", '5' + assert_parse_ok @parser_2, "5", 0x35 end def test_5 @@ -460,7 +460,7 @@ class TestDifference < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.difference(h.token("ab"), h.ch('a')) + @parser_1 = h.difference(h.token("ab"), h.ch(0x61)) end def test_1 @@ -476,15 +476,15 @@ class TestXor < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.xor(h.ch_range('0', '6'), h.ch_range('5', '9')) + @parser_1 = h.xor(h.ch_range(0x30, 0x36), h.ch_range(0x35, 0x39)) end def test_1 - assert_parse_ok @parser_1, "0", '0' + assert_parse_ok @parser_1, "0", 0x30 end def test_2 - assert_parse_ok @parser_1, "9", '9' + assert_parse_ok @parser_1, "9", 0x39 end def test_3 @@ -500,7 +500,7 @@ class TestMany < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.many(h.choice(h.ch('a'), h.ch('b'))) + @parser_1 = h.many(h.choice(h.ch(0x61), h.ch(0x62))) end def test_1 @@ -508,15 +508,15 @@ class TestMany < Minitest::Test end def test_2 - assert_parse_ok @parser_1, "a", ['a'] + assert_parse_ok @parser_1, "a", [0x61] end def test_3 - assert_parse_ok @parser_1, "b", ['b'] + assert_parse_ok @parser_1, "b", [0x62] end def test_4 - assert_parse_ok @parser_1, "aabbaba", ['a', 'a', 'b', 'b', 'a', 'b', 'a'] + assert_parse_ok @parser_1, "aabbaba", [0x61, 0x61, 0x62, 0x62, 0x61, 0x62, 0x61] end end @@ -524,7 +524,7 @@ class TestMany1 < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.many1(h.choice(h.ch('a'), h.ch('b'))) + @parser_1 = h.many1(h.choice(h.ch(0x61), h.ch(0x62))) end def test_1 @@ -532,15 +532,15 @@ class TestMany1 < Minitest::Test end def test_2 - assert_parse_ok @parser_1, "a", ['a'] + assert_parse_ok @parser_1, "a", [0x61] end def test_3 - assert_parse_ok @parser_1, "b", ['b'] + assert_parse_ok @parser_1, "b", [0x62] end def test_4 - assert_parse_ok @parser_1, "aabbaba", ['a', 'a', 'b', 'b', 'a', 'b', 'a'] + assert_parse_ok @parser_1, "aabbaba", [0x61, 0x61, 0x62, 0x62, 0x61, 0x62, 0x61] end def test_5 @@ -552,7 +552,7 @@ class TestRepeatN < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.repeat_n(h.choice(h.ch('a'), h.ch('b')), 0x2) + @parser_1 = h.repeat_n(h.choice(h.ch(0x61), h.ch(0x62)), 0x2) end def test_1 @@ -560,7 +560,7 @@ class TestRepeatN < Minitest::Test end def test_2 - assert_parse_ok @parser_1, "abdef", ['a', 'b'] + assert_parse_ok @parser_1, "abdef", [0x61, 0x62] end def test_3 @@ -572,19 +572,19 @@ class TestOptional < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.ch('a'), h.optional(h.choice(h.ch('b'), h.ch('c'))), h.ch('d')) + @parser_1 = h.sequence(h.ch(0x61), h.optional(h.choice(h.ch(0x62), h.ch(0x63))), h.ch(0x64)) end def test_1 - assert_parse_ok @parser_1, "abd", ['a', 'b', 'd'] + assert_parse_ok @parser_1, "abd", [0x61, 0x62, 0x64] end def test_2 - assert_parse_ok @parser_1, "acd", ['a', 'c', 'd'] + assert_parse_ok @parser_1, "acd", [0x61, 0x63, 0x64] end def test_3 - assert_parse_ok @parser_1, "ad", ['a', null, 'd'] + assert_parse_ok @parser_1, "ad", [0x61, nil, 0x64] end def test_4 @@ -604,11 +604,11 @@ class TestIgnore < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.ch('a'), h.ignore(h.ch('b')), h.ch('c')) + @parser_1 = h.sequence(h.ch(0x61), h.ignore(h.ch(0x62)), h.ch(0x63)) end def test_1 - assert_parse_ok @parser_1, "abc", ['a', 'c'] + assert_parse_ok @parser_1, "abc", [0x61, 0x63] end def test_2 @@ -620,23 +620,23 @@ class TestSepBy < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sepBy(h.choice(h.ch('1'), h.ch('2'), h.ch('3')), h.ch(',')) + @parser_1 = h.sepBy(h.choice(h.ch(0x31), h.ch(0x32), h.ch(0x33)), h.ch(0x2c)) end def test_1 - assert_parse_ok @parser_1, "1,2,3", ['1', '2', '3'] + assert_parse_ok @parser_1, "1,2,3", [0x31, 0x32, 0x33] end def test_2 - assert_parse_ok @parser_1, "1,3,2", ['1', '3', '2'] + assert_parse_ok @parser_1, "1,3,2", [0x31, 0x33, 0x32] end def test_3 - assert_parse_ok @parser_1, "1,3", ['1', '3'] + assert_parse_ok @parser_1, "1,3", [0x31, 0x33] end def test_4 - assert_parse_ok @parser_1, "3", ['3'] + assert_parse_ok @parser_1, "3", [0x33] end def test_5 @@ -648,23 +648,23 @@ class TestSepBy1 < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sepBy1(h.choice(h.ch('1'), h.ch('2'), h.ch('3')), h.ch(',')) + @parser_1 = h.sepBy1(h.choice(h.ch(0x31), h.ch(0x32), h.ch(0x33)), h.ch(0x2c)) end def test_1 - assert_parse_ok @parser_1, "1,2,3", ['1', '2', '3'] + assert_parse_ok @parser_1, "1,2,3", [0x31, 0x32, 0x33] end def test_2 - assert_parse_ok @parser_1, "1,3,2", ['1', '3', '2'] + assert_parse_ok @parser_1, "1,3,2", [0x31, 0x33, 0x32] end def test_3 - assert_parse_ok @parser_1, "1,3", ['1', '3'] + assert_parse_ok @parser_1, "1,3", [0x31, 0x33] end def test_4 - assert_parse_ok @parser_1, "3", ['3'] + assert_parse_ok @parser_1, "3", [0x33] end def test_5 @@ -676,13 +676,13 @@ class TestAnd < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.and(h.ch('0')), h.ch('0')) - @parser_2 = h.sequence(h.and(h.ch('0')), h.ch('1')) - @parser_3 = h.sequence(h.ch('1'), h.and(h.ch('2'))) + @parser_1 = h.sequence(h.and(h.ch(0x30)), h.ch(0x30)) + @parser_2 = h.sequence(h.and(h.ch(0x30)), h.ch(0x31)) + @parser_3 = h.sequence(h.ch(0x31), h.and(h.ch(0x32))) end def test_1 - assert_parse_ok @parser_1, "0", ['0'] + assert_parse_ok @parser_1, "0", [0x30] end def test_2 @@ -698,7 +698,7 @@ class TestAnd < Minitest::Test end def test_5 - assert_parse_ok @parser_3, "12", ['1'] + assert_parse_ok @parser_3, "12", [0x31] end def test_6 @@ -710,12 +710,12 @@ class TestNot < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.ch('a'), h.choice(h.token("+"), h.token("++")), h.ch('b')) - @parser_2 = h.sequence(h.ch('a'), h.choice(h.sequence(h.token("+"), h.not(h.ch('+'))), h.token("++")), h.ch('b')) + @parser_1 = h.sequence(h.ch(0x61), h.choice(h.token("+"), h.token("++")), h.ch(0x62)) + @parser_2 = h.sequence(h.ch(0x61), h.choice(h.sequence(h.token("+"), h.not(h.ch(0x2b))), h.token("++")), h.ch(0x62)) end def test_1 - assert_parse_ok @parser_1, "a+b", ['a', "+", 'b'] + assert_parse_ok @parser_1, "a+b", [0x61, "+", 0x62] end def test_2 @@ -723,11 +723,11 @@ class TestNot < Minitest::Test end def test_3 - assert_parse_ok @parser_2, "a+b", ['a', ["+"], 'b'] + assert_parse_ok @parser_2, "a+b", [0x61, ["+"], 0x62] end def test_4 - assert_parse_ok @parser_2, "a++b", ['a', "++", 'b'] + assert_parse_ok @parser_2, "a++b", [0x61, "++", 0x62] end end @@ -736,46 +736,20 @@ class TestRightrec < Minitest::Test super h = Hammer::Parser @sp_rr = h.indirect - @sp_rr.bind h.choice(h.sequence(h.ch('a'), @sp_rr), h.epsilon_p) + @sp_rr.bind h.choice(h.sequence(h.ch(0x61), @sp_rr), h.epsilon_p) @parser_1 = @sp_rr end def test_1 - assert_parse_ok @parser_1, "a", ['a'] + assert_parse_ok @parser_1, "a", [0x61] end def test_2 - assert_parse_ok @parser_1, "aa", ['a', ['a']] + assert_parse_ok @parser_1, "aa", [0x61, [0x61]] end def test_3 - assert_parse_ok @parser_1, "aaa", ['a', ['a', ['a']]] - end -end - -class TestAmbiguous < Minitest::Test - def setup - super - h = Hammer::Parser - @sp_d = h.indirect - @sp_p = h.indirect - @sp_e = h.indirect - @sp_d.bind h.ch('d') - @sp_p.bind h.ch('+') - @sp_e.bind h.choice(h.sequence(@sp_e, @sp_p, @sp_e), @sp_d) - @parser_1 = @sp_e - end - - def test_1 - assert_parse_ok @parser_1, "d", 'd' - end - - def test_2 - assert_parse_ok @parser_1, "d+d", ['d', '+', 'd'] - end - - def test_3 - assert_parse_ok @parser_1, "d+d+d", [['d', '+', 'd'], '+', 'd'] + assert_parse_ok @parser_1, "aaa", [0x61, [0x61, [0x61]]] end end From 482e89adf8e2b1aaaf27648714f2abbe1dccf8c5 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Sat, 11 Jan 2014 01:09:45 +0100 Subject: [PATCH 28/35] Added support for the token type registry --- src/bindings/ruby/hammer-parser.gemspec | 10 +- src/bindings/ruby/lib/hammer/internal.rb | 121 +++++++++++++++++++++-- src/bindings/ruby/lib/hammer/parser.rb | 33 ++++--- 3 files changed, 141 insertions(+), 23 deletions(-) diff --git a/src/bindings/ruby/hammer-parser.gemspec b/src/bindings/ruby/hammer-parser.gemspec index 80b7529..18b4db7 100644 --- a/src/bindings/ruby/hammer-parser.gemspec +++ b/src/bindings/ruby/hammer-parser.gemspec @@ -11,7 +11,15 @@ Gem::Specification.new do |s| files = [] files << 'README.md' - files << Dir['{lib,test}/**/*.rb'] + files << [ + "lib/hammer/internal.rb", + "lib/hammer/parser.rb", + "lib/hammer/parser_builder.rb", + "lib/hammer.rb", + "lib/minitest/hamer-parser_plugin.rb", + "test/autogen_test.rb", + "test/parser_test.rb" + ] s.files = files s.test_files = s.files.select { |path| path =~ /^test\/.*_test.rb/ } diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 469dd73..62a4bc6 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -6,22 +6,123 @@ module Hammer ffi_lib 'hammer' + class DynamicVariable + SYMBOL_PREFIX = "Hammer::Internal::DynamicVariable gensym " + @@current_symbol = 0 + + def initialize(default=nil, name=nil, &block) + # This can take either a default value or a block. If a + # default value is given, all threads' dynvars are initialized + # to that object. If a block is given, the block is lazilly + # called on each thread to generate the initial value. If + # both a block and a default value are passed, the block is + # called with the literal value. + @default = default + @block = block || Proc.new{|x| x} + @@current_symbol += 1 + @sym = (SYMBOL_PREFIX + @@current_symbol.to_s).to_sym + end + + def value + if Thread.current.key? @sym + return Thread.current[@sym] + else + return Thread.current[@sym] = @block.call(@default) + end + end + + def value=(new_value) + Thread.current[@sym] = new_value + end + + def with(new_value, &block) + old_value = value + begin + self.value = new_value + return block.call + ensure + self.value = old_value + end + end + end + # Maybe we can implement Hammer::Parser with FFI::DataConverter. # That way, most hammer functions won't need to be wrapped. # (Probably need to wrap token, sequence and choice only). # See http://www.elabs.se/blog/61-advanced-topics-in-ruby-ffi typedef :pointer, :h_parser - HTokenType = enum(:none, 1, - :bytes, 2, - :sint, 4, - :uint, 8, - :sequence, 16, - :reserved_1, - :err, 32, - :user, 64, - :max) + class HTokenType + extend FFI::DataConverter + @@known_type_map = { + :none => 1, + :bytes => 2, + :sint => 4, + :uint => 8, + :sequence => 16, + } + + @@inverse_type_map = @@known_type_map.invert + + def self.new(name) + if name.is_a?(Symbol) + name_sym = name + name_str = name.to_s + else + name_str = name.to_s + name_sym = name.to_sym + end + num = h_allocate_token_type(name_str) + @@known_type_map[name_sym] = num + @@inverse_type_map[num] = name + end + + def self.from_name(name) + unless @@known_type_map.key? name + num = h_get_token_type_number(name.to_s) + if num <= 0 + raise ArgumentError, "Unknown token type #{name}" + end + @@known_type_map[name] = num + @@inverse_type_map[num] = name + end + return @@known_type_map[name] + end + + def self.from_num(num) + unless @@inverse_type_map.key? num + name = h_get_token_type_name(num) + if name.nil? + return nil + end + name = name.to_sym + @@known_type_map[name] = num + @@inverse_type_map_type_map[num] = name + end + return @@inverse_type_map[num] + end + + def self.native_type + FFI::Type::INT + end + + def self.to_native(val, ctx) + return val if val.is_a?(Integer) + return from_name(val) + end + + def self.from_native(val, ctx) + return from_num(val) || val + end + end + + # Define these as soon as possible, so that they can be used + # without fear elsewhere + attach_function :h_allocate_token_type, [:string], HTokenType + attach_function :h_get_token_type_number, [:string], HTokenType + attach_function :h_get_token_type_name, [HTokenType], :string + class HCountedArray < FFI::Struct layout :capacity, :size_t, :used, :size_t, @@ -213,5 +314,7 @@ module Hammer attach_function :h_parse_result_free, [HParseResult.by_ref], :void # TODO: Does the HParser* need to be freed? + + # Token type registry end end diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 4d9f432..09f2ff4 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -1,6 +1,10 @@ +require 'hammer/internal' + module Hammer class Parser + @@saved_objects = Hammer::Internal::DynamicVariable.new nil, "Hammer parse-time pins" + # Don't create new instances with Hammer::Parser.new, # use the constructor methods instead (i.e. Hammer::Parser.int64 etc.) # @@ -26,17 +30,20 @@ module Hammer raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. ibuf = FFI::MemoryPointer.from_string(data) - result = Hammer::Internal.h_parse(@h_parser, ibuf, data.bytesize) # Don't include the trailing null - if result.null? - return nil - else - # NOTE: - # The parse result *must* hold a reference to the parser that created it! - # Otherwise, the parser might get garbage-collected while the result is still valid. - # Any pointers to token strings will then be invalid. - result.instance_variable_set :@parser, self - return result - end + @@saved_objects.with([]) do + result = Hammer::Internal.h_parse(@h_parser, ibuf, data.bytesize) # Don't include the trailing null + if result.null? + return nil + else + # NOTE: + # The parse result *must* hold a reference to the parser that created it! + # Otherwise, the parser might get garbage-collected while the result is still valid. + # Any pointers to token strings will then be invalid. + result.instance_variable_set :@parser, self + result.instance_variable_set :@pins, @@saved_objects.value + return result + end + end end # Binds an indirect parser. @@ -71,7 +78,7 @@ module Hammer buffer = FFI::MemoryPointer.from_string(string) h_parser = Hammer::Internal.h_token(buffer, buffer.size-1) # buffer.size includes the null byte at the end - return Hammer::Parser.new(:token, h_parser, buffer) + return Hammer::Parser.new(:token, h_parser, [buffer, string]) end def self.marshal_ch_arg(num) @@ -100,7 +107,7 @@ module Hammer def self.int_range(parser, i1, i2) h_parser = Hammer::Internal.h_int_range(parser.h_parser, i1, i2) - return Hammer::Parser.new(:int_range, h_parser, nil) + return Hammer::Parser.new(:int_range, h_parser, [parser]) end def self.in(charset) From 0da5867a8174406e503bd0ff19a2ebedb92d4866 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Sat, 11 Jan 2014 02:55:02 +0100 Subject: [PATCH 29/35] Action is now completely working --- src/bindings/ruby/lib/hammer.rb | 3 +- src/bindings/ruby/lib/hammer/internal.rb | 78 ++++++++++++++++-------- src/bindings/ruby/lib/hammer/parser.rb | 34 ++++++++++- src/bindings/ruby/test/parser_test.rb | 3 +- 4 files changed, 86 insertions(+), 32 deletions(-) diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index cec33fc..79fb52d 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -27,8 +27,7 @@ parser = h.many( h.action(h.uint8) { |r| #p "TT=#{r[:ast][:token_type]}, value=#{r[:ast][:data][:uint]}" - r[:ast][:data][:uint] *= 2 - r[:ast] if r[:ast][:data][:uint] % 3 == 0 + r.data * 2 }) #parser = Hammer::Parser.build { diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 62a4bc6..bac3b3e 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -65,7 +65,15 @@ module Hammer @@inverse_type_map = @@known_type_map.invert - def self.new(name) + @@from_hpt = { + :none => Proc.new { nil }, + :bytes => Proc.new {|hpt| hpt[:data][:bytes].token}, + :sint => Proc.new {|hpt| hpt[:data][:sint]}, + :uint => Proc.new {|hpt| hpt[:data][:uint]}, + :sequence => Proc.new {|hpt| hpt[:data][:seq].map {|x| x.unmarshal}}, + } + + def self.new(name, &block) if name.is_a?(Symbol) name_sym = name name_str = name.to_s @@ -73,14 +81,15 @@ module Hammer name_str = name.to_s name_sym = name.to_sym end - num = h_allocate_token_type(name_str) + num = Hammer::Internal.h_allocate_token_type(name_str) @@known_type_map[name_sym] = num - @@inverse_type_map[num] = name + @@inverse_type_map[num] = name_sym + @@from_hpt[name_sym] = block end def self.from_name(name) unless @@known_type_map.key? name - num = h_get_token_type_number(name.to_s) + num = Hammer::Internal.h_get_token_type_number(name.to_s) if num <= 0 raise ArgumentError, "Unknown token type #{name}" end @@ -92,13 +101,13 @@ module Hammer def self.from_num(num) unless @@inverse_type_map.key? num - name = h_get_token_type_name(num) + name = Hammer::Internal.h_get_token_type_name(num) if name.nil? return nil end name = name.to_sym @@known_type_map[name] = num - @@inverse_type_map_type_map[num] = name + @@inverse_type_map[num] = name end return @@inverse_type_map[num] end @@ -119,10 +128,10 @@ module Hammer # Define these as soon as possible, so that they can be used # without fear elsewhere - attach_function :h_allocate_token_type, [:string], HTokenType - attach_function :h_get_token_type_number, [:string], HTokenType - attach_function :h_get_token_type_name, [HTokenType], :string - + attach_function :h_allocate_token_type, [:string], :int + attach_function :h_get_token_type_number, [:string], :int + attach_function :h_get_token_type_name, [:int], :string + class HCountedArray < FFI::Struct layout :capacity, :size_t, :used, :size_t, @@ -161,7 +170,7 @@ module Hammer # Should be the same encoding as the string the token was created with. # But how do we get to this knowledge at this point? # Cheap solution: Just ask the user (additional parameter with default value of UTF-8). - self[:token].read_string(self[:len]).force_encoding('UTF-8') + self[:token].read_string(self[:len]) end # TODO: Probably should rename this to match ruby conventions: length, count, size @@ -170,6 +179,22 @@ module Hammer end end + class HString < FFI::Struct + layout :content, HBytes.by_ref, + :encoding, :uint64 + def token + return self[:content].token.force_encoding( + ObjectSpace._id2ref(self[:encoding])) + end + end + + HTokenType.new(:"com.upstandinghackers.hammer.ruby.encodedStr") {|hpt| + hpt.user(HString).token + } + HTokenType.new(:"com.upstandinghackers.hammer.ruby.object") {|hpt| + ObjectSpace._id2ref(hpt[:data][:uint]) + } + class HParsedTokenDataUnion < FFI::Union layout :bytes, HBytes.by_value, :sint, :int64, @@ -223,22 +248,13 @@ module Hammer self[:bit_offset] end - def unmarshal - case token_type - when :sequence - self[:data][:seq].map {|x| x.unmarshal} - when :bytes - self[:data][:bytes].token - when :uint - self[:data][:uint] - when :sint - self[:data][:sint] - when :none - nil - end + def user(struct) + struct.by_ref.from_native(self[:data][:user], nil) end - + def unmarshal + Hammer::Internal::HTokenType.class_variable_get(:@@from_hpt)[token_type].call self + end end class HParseResult < FFI::Struct @@ -257,6 +273,15 @@ module Hammer def self.release(ptr) Hammer::Internal.h_parse_result_free(ptr) unless ptr.null? end + + def arena_alloc(type) + Hammer::Internal.arena_alloc(self[:arena], type) + end + end + + def self.arena_alloc(arena, type) + ptr = h_arena_malloc(arena, type.size) + return type.by_ref.from_native(ptr, nil) end # run a parser @@ -315,6 +340,7 @@ module Hammer # TODO: Does the HParser* need to be freed? - # Token type registry + # Add the arena + attach_function :h_arena_malloc, [:pointer, :size_t], :pointer end end diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 09f2ff4..cdd2c34 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -58,8 +58,22 @@ module Hammer action = block if action.nil? raise ArgumentError, 'no action' if action.nil? - h_parser = Hammer::Internal.h_action(parser.h_parser, action) - return Hammer::Parser.new(:action, h_parser, [parser, action]) + real_action = Proc.new {|hpr| + ret = action.call(hpr.ast) + # Pin the result + @@saved_objects.value << ret + hpt = hpr.arena_alloc(Hammer::Internal::HParsedToken) + unless hpr.ast.nil? + hpt[:index] = hpr[:ast][:index] + hpt[:bit_offset] = hpr[:ast][:bit_offset] + end + hpt[:token_type] = :"com.upstandinghackers.hammer.ruby.object" + hpt[:data][:uint] = ret.object_id + hpt + } + + h_parser = Hammer::Internal.h_action(parser.h_parser, real_action) + return Hammer::Parser.new(:action, h_parser, [parser, action, real_action]) end # Can pass the predicate either as a Proc in second parameter, or as block. @@ -77,8 +91,22 @@ module Hammer # * We need a constant memory address (Ruby string might be moved around by the Ruby VM) buffer = FFI::MemoryPointer.from_string(string) h_parser = Hammer::Internal.h_token(buffer, buffer.size-1) # buffer.size includes the null byte at the end + encoding = string.encoding - return Hammer::Parser.new(:token, h_parser, [buffer, string]) + wrapping_action = Proc.new {|hpr| + hstr = hpr.arena_alloc(Hammer::Internal::HString) + hstr[:content] = hpr[:ast][:data][:bytes] + hstr[:encoding] = encoding.object_id + + hpt = hpr.arena_alloc(Hammer::Internal::HParsedToken) + hpt[:token_type] = :"com.upstandinghackers.hammer.ruby.encodedStr" + hpt[:data][:user] = hstr.to_ptr + hpt[:bit_offset] = hpr[:ast][:bit_offset] + hpt[:index] = hpr[:ast][:index] + hpt + } + wrapped_parser = Hammer::Internal.h_action(h_parser, wrapping_action) + return Hammer::Parser.new(:token, wrapped_parser, [buffer, string, encoding, wrapping_action, h_parser]) end def self.marshal_ch_arg(num) diff --git a/src/bindings/ruby/test/parser_test.rb b/src/bindings/ruby/test/parser_test.rb index abbd1c1..b9fb37f 100644 --- a/src/bindings/ruby/test/parser_test.rb +++ b/src/bindings/ruby/test/parser_test.rb @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- require 'bundler/setup' require 'hammer' require 'minitest/autorun' @@ -84,7 +85,7 @@ class ParserTest < Minitest::Test def test_token_encoding(encoding='UTF-8') string = '今日'.encode(encoding) parser = Hammer::Parser.token(string) - assert_equal string, parser.parse(string)[:ast][:data][:bytes].token + assert_equal string, parser.parse(string).ast.unmarshal end def test_token_encoding_2 From 3d791412f0eff46e6a052c79a30e5213153361a1 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Sat, 11 Jan 2014 03:24:39 +0100 Subject: [PATCH 30/35] Fixed character parsing --- lib/tsgenruby.pl | 4 +- src/bindings/ruby/lib/hammer.rb | 2 +- src/bindings/ruby/lib/hammer/parser.rb | 20 ++-- src/bindings/ruby/test/autogen_test.rb | 144 ++++++++++++------------- src/bindings/ruby/test/parser_test.rb | 20 ++++ 5 files changed, 108 insertions(+), 82 deletions(-) diff --git a/lib/tsgenruby.pl b/lib/tsgenruby.pl index ad83cf1..d866eee 100644 --- a/lib/tsgenruby.pl +++ b/lib/tsgenruby.pl @@ -96,8 +96,8 @@ pp_parser(num(Num)) --> !, "-0x", {RNum is -Num}; "0x", {RNum = Num} ), pp_hexnum_guts(RNum). pp_parser(char(C)) --> !, - pp_parser(num(C)). - %"'", pp_char_guts(C), "'", !. + pp_parser(num(C)), ".chr". % Ruby is encoding-aware; this is a + % more reasonable implementation pp_parser(ref(Name)) --> {atom_codes(Name,CName)}, diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index 79fb52d..916a0a5 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -48,5 +48,5 @@ $r = parser.parse 'abcdefgh' h = Hammer::Parser -parser = h.many(h.attr_bool(h.uint8) { |r| r.ast.data <= 100 }) +parser = h.many(h.attr_bool(h.uint8) { |r| r.data <= 100 }) #p parser.parse('abcdefgh').ast.data.map(&:data) diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index cdd2c34..3a11d2e 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -81,8 +81,10 @@ module Hammer predicate = block if predicate.nil? raise ArgumentError, 'no predicate' if predicate.nil? - h_parser = Hammer::Internal.h_attr_bool(parser.h_parser, predicate) - return Hammer::Parser.new(:attr_bool, h_parser, [parser, predicate]) + real_pred = Proc.new {|hpr| predicate.call hpr.ast} + + h_parser = Hammer::Internal.h_attr_bool(parser.h_parser, real_pred) + return Hammer::Parser.new(:attr_bool, h_parser, [parser, predicate, real_pred]) end def self.token(string) @@ -118,19 +120,23 @@ module Hammer return num end private_class_method :marshal_ch_arg - + + def self.ch_parser_wrapper(parser) + return Hammer::Parser.action(parser) {|x| x.data.chr} + end + def self.ch(ch) num = marshal_ch_arg(ch) h_parser = Hammer::Internal.h_ch(num) - return Hammer::Parser.new(:ch, h_parser, nil) + return ch_parser_wrapper(Hammer::Parser.new(:ch, h_parser, nil)) end def self.ch_range(ch1, ch2) ch1 = marshal_ch_arg(ch1) ch2 = marshal_ch_arg(ch2) h_parser = Hammer::Internal.h_ch_range(ch1, ch2) - return Hammer::Parser.new(:ch_range, h_parser, nil) + return ch_parser_wrapper(Hammer::Parser.new(:ch_range, h_parser, nil)) end def self.int_range(parser, i1, i2) @@ -142,7 +148,7 @@ module Hammer raise ArgumentError, "Expected a String" unless charset.is_a?(String) ibuf = FFI::MemoryPointer.from_string(charset) h_parser = Hammer::Internal.h_in(ibuf, charset.bytesize) - return Hammer::Parser.new(:in, h_parser, nil) + return ch_parser_wrapper(Hammer::Parser.new(:in, h_parser, nil)) end def self.repeat_n(parser, count) @@ -154,7 +160,7 @@ module Hammer raise ArgumentError, "Expected a String" unless charset.is_a?(String) ibuf = FFI::MemoryPointer.from_string(charset) h_parser = Hammer::Internal.h_not_in(ibuf, charset.bytesize) - return Hammer::Parser.new(:not_in, h_parser, nil) + return ch_parser_wrapper(Hammer::Parser.new(:not_in, h_parser, nil)) end # Defines a parser constructor with the given name. diff --git a/src/bindings/ruby/test/autogen_test.rb b/src/bindings/ruby/test/autogen_test.rb index 93a0c7a..0600c0f 100644 --- a/src/bindings/ruby/test/autogen_test.rb +++ b/src/bindings/ruby/test/autogen_test.rb @@ -25,7 +25,7 @@ class TestCh < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "\xa2", 0xa2 + assert_parse_ok @parser_1, "\xa2", 0xa2.chr end def test_2 @@ -41,7 +41,7 @@ class TestChRange < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "b", 0x62 + assert_parse_ok @parser_1, "b", 0x62.chr end def test_2 @@ -218,19 +218,19 @@ class TestWhitespace < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "a", 0x61 + assert_parse_ok @parser_1, "a", 0x61.chr end def test_2 - assert_parse_ok @parser_1, " a", 0x61 + assert_parse_ok @parser_1, " a", 0x61.chr end def test_3 - assert_parse_ok @parser_1, " a", 0x61 + assert_parse_ok @parser_1, " a", 0x61.chr end def test_4 - assert_parse_ok @parser_1, "\x09a", 0x61 + assert_parse_ok @parser_1, "\x09a", 0x61.chr end def test_5 @@ -258,7 +258,7 @@ class TestLeft < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "a ", 0x61 + assert_parse_ok @parser_1, "a ", 0x61.chr end def test_2 @@ -278,11 +278,11 @@ class TestMiddle < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.middle(h.ch(0x20), h.ch(0x61), h.ch(0x20)) + @parser_1 = h.middle(h.ch(0x20.chr), h.ch(0x61.chr), h.ch(0x20.chr)) end def test_1 - assert_parse_ok @parser_1, " a ", 0x61 + assert_parse_ok @parser_1, " a ", 0x61.chr end def test_2 @@ -318,7 +318,7 @@ class TestIn < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "b", 0x62 + assert_parse_ok @parser_1, "b", 0x62.chr end def test_2 @@ -334,7 +334,7 @@ class TestNotIn < Minitest::Test end def test_1 - assert_parse_ok @parser_1, "d", 0x64 + assert_parse_ok @parser_1, "d", 0x64.chr end def test_2 @@ -346,11 +346,11 @@ class TestEndP < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.ch(0x61), h.end_p) + @parser_1 = h.sequence(h.ch(0x61.chr), h.end_p) end def test_1 - assert_parse_ok @parser_1, "a", [0x61] + assert_parse_ok @parser_1, "a", [0x61.chr] end def test_2 @@ -374,12 +374,12 @@ class TestSequence < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.ch(0x61), h.ch(0x62)) - @parser_2 = h.sequence(h.ch(0x61), h.whitespace(h.ch(0x62))) + @parser_1 = h.sequence(h.ch(0x61.chr), h.ch(0x62.chr)) + @parser_2 = h.sequence(h.ch(0x61.chr), h.whitespace(h.ch(0x62.chr))) end def test_1 - assert_parse_ok @parser_1, "ab", [0x61, 0x62] + assert_parse_ok @parser_1, "ab", [0x61.chr, 0x62.chr] end def test_2 @@ -391,15 +391,15 @@ class TestSequence < Minitest::Test end def test_4 - assert_parse_ok @parser_2, "ab", [0x61, 0x62] + assert_parse_ok @parser_2, "ab", [0x61.chr, 0x62.chr] end def test_5 - assert_parse_ok @parser_2, "a b", [0x61, 0x62] + assert_parse_ok @parser_2, "a b", [0x61.chr, 0x62.chr] end def test_6 - assert_parse_ok @parser_2, "a b", [0x61, 0x62] + assert_parse_ok @parser_2, "a b", [0x61.chr, 0x62.chr] end end @@ -407,19 +407,19 @@ class TestChoice < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.choice(h.ch(0x61), h.ch(0x62)) + @parser_1 = h.choice(h.ch(0x61.chr), h.ch(0x62.chr)) end def test_1 - assert_parse_ok @parser_1, "a", 0x61 + assert_parse_ok @parser_1, "a", 0x61.chr end def test_2 - assert_parse_ok @parser_1, "b", 0x62 + assert_parse_ok @parser_1, "b", 0x62.chr end def test_3 - assert_parse_ok @parser_1, "ab", 0x61 + assert_parse_ok @parser_1, "ab", 0x61.chr end def test_4 @@ -431,12 +431,12 @@ class TestButnot < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.butnot(h.ch(0x61), h.token("ab")) - @parser_2 = h.butnot(h.ch_range(0x30, 0x39), h.ch(0x36)) + @parser_1 = h.butnot(h.ch(0x61.chr), h.token("ab")) + @parser_2 = h.butnot(h.ch_range(0x30.chr, 0x39.chr), h.ch(0x36.chr)) end def test_1 - assert_parse_ok @parser_1, "a", 0x61 + assert_parse_ok @parser_1, "a", 0x61.chr end def test_2 @@ -444,11 +444,11 @@ class TestButnot < Minitest::Test end def test_3 - assert_parse_ok @parser_1, "aa", 0x61 + assert_parse_ok @parser_1, "aa", 0x61.chr end def test_4 - assert_parse_ok @parser_2, "5", 0x35 + assert_parse_ok @parser_2, "5", 0x35.chr end def test_5 @@ -460,7 +460,7 @@ class TestDifference < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.difference(h.token("ab"), h.ch(0x61)) + @parser_1 = h.difference(h.token("ab"), h.ch(0x61.chr)) end def test_1 @@ -476,15 +476,15 @@ class TestXor < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.xor(h.ch_range(0x30, 0x36), h.ch_range(0x35, 0x39)) + @parser_1 = h.xor(h.ch_range(0x30.chr, 0x36.chr), h.ch_range(0x35.chr, 0x39.chr)) end def test_1 - assert_parse_ok @parser_1, "0", 0x30 + assert_parse_ok @parser_1, "0", 0x30.chr end def test_2 - assert_parse_ok @parser_1, "9", 0x39 + assert_parse_ok @parser_1, "9", 0x39.chr end def test_3 @@ -500,7 +500,7 @@ class TestMany < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.many(h.choice(h.ch(0x61), h.ch(0x62))) + @parser_1 = h.many(h.choice(h.ch(0x61.chr), h.ch(0x62.chr))) end def test_1 @@ -508,15 +508,15 @@ class TestMany < Minitest::Test end def test_2 - assert_parse_ok @parser_1, "a", [0x61] + assert_parse_ok @parser_1, "a", [0x61.chr] end def test_3 - assert_parse_ok @parser_1, "b", [0x62] + assert_parse_ok @parser_1, "b", [0x62.chr] end def test_4 - assert_parse_ok @parser_1, "aabbaba", [0x61, 0x61, 0x62, 0x62, 0x61, 0x62, 0x61] + assert_parse_ok @parser_1, "aabbaba", [0x61.chr, 0x61.chr, 0x62.chr, 0x62.chr, 0x61.chr, 0x62.chr, 0x61.chr] end end @@ -524,7 +524,7 @@ class TestMany1 < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.many1(h.choice(h.ch(0x61), h.ch(0x62))) + @parser_1 = h.many1(h.choice(h.ch(0x61.chr), h.ch(0x62.chr))) end def test_1 @@ -532,15 +532,15 @@ class TestMany1 < Minitest::Test end def test_2 - assert_parse_ok @parser_1, "a", [0x61] + assert_parse_ok @parser_1, "a", [0x61.chr] end def test_3 - assert_parse_ok @parser_1, "b", [0x62] + assert_parse_ok @parser_1, "b", [0x62.chr] end def test_4 - assert_parse_ok @parser_1, "aabbaba", [0x61, 0x61, 0x62, 0x62, 0x61, 0x62, 0x61] + assert_parse_ok @parser_1, "aabbaba", [0x61.chr, 0x61.chr, 0x62.chr, 0x62.chr, 0x61.chr, 0x62.chr, 0x61.chr] end def test_5 @@ -552,7 +552,7 @@ class TestRepeatN < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.repeat_n(h.choice(h.ch(0x61), h.ch(0x62)), 0x2) + @parser_1 = h.repeat_n(h.choice(h.ch(0x61.chr), h.ch(0x62.chr)), 0x2) end def test_1 @@ -560,7 +560,7 @@ class TestRepeatN < Minitest::Test end def test_2 - assert_parse_ok @parser_1, "abdef", [0x61, 0x62] + assert_parse_ok @parser_1, "abdef", [0x61.chr, 0x62.chr] end def test_3 @@ -572,19 +572,19 @@ class TestOptional < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.ch(0x61), h.optional(h.choice(h.ch(0x62), h.ch(0x63))), h.ch(0x64)) + @parser_1 = h.sequence(h.ch(0x61.chr), h.optional(h.choice(h.ch(0x62.chr), h.ch(0x63.chr))), h.ch(0x64.chr)) end def test_1 - assert_parse_ok @parser_1, "abd", [0x61, 0x62, 0x64] + assert_parse_ok @parser_1, "abd", [0x61.chr, 0x62.chr, 0x64.chr] end def test_2 - assert_parse_ok @parser_1, "acd", [0x61, 0x63, 0x64] + assert_parse_ok @parser_1, "acd", [0x61.chr, 0x63.chr, 0x64.chr] end def test_3 - assert_parse_ok @parser_1, "ad", [0x61, nil, 0x64] + assert_parse_ok @parser_1, "ad", [0x61.chr, nil, 0x64.chr] end def test_4 @@ -604,11 +604,11 @@ class TestIgnore < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.ch(0x61), h.ignore(h.ch(0x62)), h.ch(0x63)) + @parser_1 = h.sequence(h.ch(0x61.chr), h.ignore(h.ch(0x62.chr)), h.ch(0x63.chr)) end def test_1 - assert_parse_ok @parser_1, "abc", [0x61, 0x63] + assert_parse_ok @parser_1, "abc", [0x61.chr, 0x63.chr] end def test_2 @@ -620,23 +620,23 @@ class TestSepBy < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sepBy(h.choice(h.ch(0x31), h.ch(0x32), h.ch(0x33)), h.ch(0x2c)) + @parser_1 = h.sepBy(h.choice(h.ch(0x31.chr), h.ch(0x32.chr), h.ch(0x33.chr)), h.ch(0x2c.chr)) end def test_1 - assert_parse_ok @parser_1, "1,2,3", [0x31, 0x32, 0x33] + assert_parse_ok @parser_1, "1,2,3", [0x31.chr, 0x32.chr, 0x33.chr] end def test_2 - assert_parse_ok @parser_1, "1,3,2", [0x31, 0x33, 0x32] + assert_parse_ok @parser_1, "1,3,2", [0x31.chr, 0x33.chr, 0x32.chr] end def test_3 - assert_parse_ok @parser_1, "1,3", [0x31, 0x33] + assert_parse_ok @parser_1, "1,3", [0x31.chr, 0x33.chr] end def test_4 - assert_parse_ok @parser_1, "3", [0x33] + assert_parse_ok @parser_1, "3", [0x33.chr] end def test_5 @@ -648,23 +648,23 @@ class TestSepBy1 < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sepBy1(h.choice(h.ch(0x31), h.ch(0x32), h.ch(0x33)), h.ch(0x2c)) + @parser_1 = h.sepBy1(h.choice(h.ch(0x31.chr), h.ch(0x32.chr), h.ch(0x33.chr)), h.ch(0x2c.chr)) end def test_1 - assert_parse_ok @parser_1, "1,2,3", [0x31, 0x32, 0x33] + assert_parse_ok @parser_1, "1,2,3", [0x31.chr, 0x32.chr, 0x33.chr] end def test_2 - assert_parse_ok @parser_1, "1,3,2", [0x31, 0x33, 0x32] + assert_parse_ok @parser_1, "1,3,2", [0x31.chr, 0x33.chr, 0x32.chr] end def test_3 - assert_parse_ok @parser_1, "1,3", [0x31, 0x33] + assert_parse_ok @parser_1, "1,3", [0x31.chr, 0x33.chr] end def test_4 - assert_parse_ok @parser_1, "3", [0x33] + assert_parse_ok @parser_1, "3", [0x33.chr] end def test_5 @@ -676,13 +676,13 @@ class TestAnd < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.and(h.ch(0x30)), h.ch(0x30)) - @parser_2 = h.sequence(h.and(h.ch(0x30)), h.ch(0x31)) - @parser_3 = h.sequence(h.ch(0x31), h.and(h.ch(0x32))) + @parser_1 = h.sequence(h.and(h.ch(0x30.chr)), h.ch(0x30.chr)) + @parser_2 = h.sequence(h.and(h.ch(0x30.chr)), h.ch(0x31.chr)) + @parser_3 = h.sequence(h.ch(0x31.chr), h.and(h.ch(0x32.chr))) end def test_1 - assert_parse_ok @parser_1, "0", [0x30] + assert_parse_ok @parser_1, "0", [0x30.chr] end def test_2 @@ -698,7 +698,7 @@ class TestAnd < Minitest::Test end def test_5 - assert_parse_ok @parser_3, "12", [0x31] + assert_parse_ok @parser_3, "12", [0x31.chr] end def test_6 @@ -710,12 +710,12 @@ class TestNot < Minitest::Test def setup super h = Hammer::Parser - @parser_1 = h.sequence(h.ch(0x61), h.choice(h.token("+"), h.token("++")), h.ch(0x62)) - @parser_2 = h.sequence(h.ch(0x61), h.choice(h.sequence(h.token("+"), h.not(h.ch(0x2b))), h.token("++")), h.ch(0x62)) + @parser_1 = h.sequence(h.ch(0x61.chr), h.choice(h.token("+"), h.token("++")), h.ch(0x62.chr)) + @parser_2 = h.sequence(h.ch(0x61.chr), h.choice(h.sequence(h.token("+"), h.not(h.ch(0x2b.chr))), h.token("++")), h.ch(0x62.chr)) end def test_1 - assert_parse_ok @parser_1, "a+b", [0x61, "+", 0x62] + assert_parse_ok @parser_1, "a+b", [0x61.chr, "+", 0x62.chr] end def test_2 @@ -723,11 +723,11 @@ class TestNot < Minitest::Test end def test_3 - assert_parse_ok @parser_2, "a+b", [0x61, ["+"], 0x62] + assert_parse_ok @parser_2, "a+b", [0x61.chr, ["+"], 0x62.chr] end def test_4 - assert_parse_ok @parser_2, "a++b", [0x61, "++", 0x62] + assert_parse_ok @parser_2, "a++b", [0x61.chr, "++", 0x62.chr] end end @@ -736,20 +736,20 @@ class TestRightrec < Minitest::Test super h = Hammer::Parser @sp_rr = h.indirect - @sp_rr.bind h.choice(h.sequence(h.ch(0x61), @sp_rr), h.epsilon_p) + @sp_rr.bind h.choice(h.sequence(h.ch(0x61.chr), @sp_rr), h.epsilon_p) @parser_1 = @sp_rr end def test_1 - assert_parse_ok @parser_1, "a", [0x61] + assert_parse_ok @parser_1, "a", [0x61.chr] end def test_2 - assert_parse_ok @parser_1, "aa", [0x61, [0x61]] + assert_parse_ok @parser_1, "aa", [0x61.chr, [0x61.chr]] end def test_3 - assert_parse_ok @parser_1, "aaa", [0x61, [0x61, [0x61]]] + assert_parse_ok @parser_1, "aaa", [0x61.chr, [0x61.chr, [0x61.chr]]] end end diff --git a/src/bindings/ruby/test/parser_test.rb b/src/bindings/ruby/test/parser_test.rb index b9fb37f..a8b9c7b 100644 --- a/src/bindings/ruby/test/parser_test.rb +++ b/src/bindings/ruby/test/parser_test.rb @@ -92,3 +92,23 @@ class ParserTest < Minitest::Test test_token_encoding('EUC-JP') end end + +class AttrBoolTest < Minitest::Test + def setup + h = Hammer::Parser + @parser = h.attr_bool(h.many1(h.choice(h.ch('a'), h.ch('b')))) {|x| + data = x.unmarshal + data.length > 1 && data[0] == data[1] + } + end + + def test_1 + assert_parse_ok @parser, "aa", ['a','a'] + end + def test_2 + assert_parse_ok @parser, "bb", ['b','b'] + end + def test_3 + refute_parse_ok @parser, "ab" + end +end From cafa9adb1a1d2dc1fdfecf2be8d407bb2c44b083 Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Sat, 11 Jan 2014 03:28:31 +0100 Subject: [PATCH 31/35] Action works, too --- src/bindings/ruby/test/parser_test.rb | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/bindings/ruby/test/parser_test.rb b/src/bindings/ruby/test/parser_test.rb index a8b9c7b..6bbdc36 100644 --- a/src/bindings/ruby/test/parser_test.rb +++ b/src/bindings/ruby/test/parser_test.rb @@ -112,3 +112,21 @@ class AttrBoolTest < Minitest::Test refute_parse_ok @parser, "ab" end end + +class ActionTest < Minitest::Test + def setup + h = Hammer::Parser + @parser = h.action(h.sequence(h.choice(h.ch('a'), h.ch('A')), + h.choice(h.ch('b'), h.ch('B')))) {|x| + x.unmarshal.join(",")} + end + def test_1 + assert_parse_ok @parser, "ab", "a,b" + end + def test_2 + assert_parse_ok @parser, "AB", "A,B" + end + def test_3 + refute_parse_ok @parser, "XX" + end +end From df196aa34652eb4c5c20a8b20c8656d8ab0f51eb Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Sat, 11 Jan 2014 04:26:47 +0100 Subject: [PATCH 32/35] Added sconscript --- .travis.yml | 15 ++++++++++++++- SConstruct | 2 +- src/bindings/ruby/.gitignore | 1 + src/bindings/ruby/SConscript | 29 +++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 src/bindings/ruby/SConscript diff --git a/.travis.yml b/.travis.yml index 2328d03..dddf760 100644 --- a/.travis.yml +++ b/.travis.yml @@ -60,13 +60,26 @@ matrix: - compiler: clang language: dotnet env: BINDINGS=dotnet CC=clang + - compiler: gcc + language: ruby + rvm: + - ruby-1.9.3-p484 + - ruby-2.0.0-p353 + - ruby-2.1.0 + env: BINDINGS=ruby + - compiler: clang + language: ruby + rvm: + - ruby-1.9.3-p484 + - ruby-2.0.0-p353 + - ruby-2.1.0 + env: BINDINGS=ruby CC=clang before_install: - sudo apt-get update -qq - if [ "$BINDINGS" != "none" ]; then sudo apt-get install -qq swig; fi - if [ "$BINDINGS" == "perl" ]; then sudo add-apt-repository ppa:dns/irc -y; sudo apt-get update -qq; sudo apt-get install -qq swig=2.0.8-1irc1~12.04; fi - if [ "$BINDINGS" == "python" ]; then sudo apt-get install -qq python-dev; fi - if [ "$BINDINGS" == "dotnet" ]; then sudo add-apt-repository ppa:directhex/monoxide -y; sudo apt-get update -qq; sudo apt-get install -qq mono-devel mono-mcs nunit nunit-console; mozroots --import --sync; fi - install: true before_script: - if [ "$BINDINGS" == "php" ]; then phpenv config-add src/bindings/php/hammer.ini; fi diff --git a/SConstruct b/SConstruct index 17b1009..fe1c78e 100644 --- a/SConstruct +++ b/SConstruct @@ -7,7 +7,7 @@ import sys vars = Variables(None, ARGUMENTS) vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate)) vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept)) -vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['dotnet', 'perl', 'php', 'python'])) +vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['dotnet', 'perl', 'php', 'python', 'ruby'])) env = Environment(ENV = {'PATH' : os.environ['PATH']}, variables = vars, diff --git a/src/bindings/ruby/.gitignore b/src/bindings/ruby/.gitignore index 66f8ed3..4ceda1b 100644 --- a/src/bindings/ruby/.gitignore +++ b/src/bindings/ruby/.gitignore @@ -1 +1,2 @@ /Gemfile.lock +.bundle diff --git a/src/bindings/ruby/SConscript b/src/bindings/ruby/SConscript new file mode 100644 index 0000000..290eb16 --- /dev/null +++ b/src/bindings/ruby/SConscript @@ -0,0 +1,29 @@ +# -*- python -*- +import os.path +Import("env libhammer_shared testruns targets") + +rubysources = [ + Glob("test/*.rb"), + Glob("lib/hammer.rb"), + Glob("lib/*/*.rb"), + "hammer-parser.gemspec", + "Rakefile", + "Gemfile", + "Gemfile.lock", + "README.md", +] + +rubyenv = env.Clone() +rubyenv['ENV']['LD_LIBRARY_PATH'] = os.path.dirname(str(libhammer_shared[0])) +rubyenv['RBDIR'] = os.path.dirname(str(rubyenv.File("Gemfile").path)) + +setup = rubyenv.Command(Dir(".bundle"), rubysources, "cd $RBDIR && bundle install") +AlwaysBuild(setup) + +rubytestexec = rubyenv.Command(None, [setup] + rubysources, "cd $RBDIR && bundle exec rake test") + +rubytest = Alias("testruby", [rubytestexec], rubytestexec) +AlwaysBuild(rubytestexec) +testruns.append(rubytest) + +# No need for an install target; everybody just uses gems for that. From 982665aafa36aa7ca900f6ccc6fd4142637805ca Mon Sep 17 00:00:00 2001 From: Dan Hirsch Date: Sat, 11 Jan 2014 04:27:40 +0100 Subject: [PATCH 33/35] Got travis working, fixed ruby 1.9.3 compatibility --- .travis.yml | 38 ++++++++++++++++---------- src/bindings/ruby/Gemfile | 1 + src/bindings/ruby/SConscript | 6 +++- src/bindings/ruby/lib/hammer/parser.rb | 4 +-- 4 files changed, 32 insertions(+), 17 deletions(-) diff --git a/.travis.yml b/.travis.yml index dddf760..e483b5f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,30 @@ env: - BINDINGS=none matrix: include: + - compiler: gcc + language: ruby + rvm: ruby-1.9.3-p484 + env: BINDINGS=ruby + - compiler: clang + language: ruby + rvm: ruby-1.9.3-p484 + env: BINDINGS=ruby CC=clang + - compiler: gcc + language: ruby + rvm: ruby-2.0.0-p353 + env: BINDINGS=ruby + - compiler: clang + language: ruby + rvm: ruby-2.0.0-p353 + env: BINDINGS=ruby CC=clang + - compiler: gcc + language: ruby + rvm: ruby-2.1.0 + env: BINDINGS=ruby + - compiler: clang + language: ruby + rvm: ruby-2.1.0 + env: BINDINGS=ruby CC=clang - compiler: gcc language: python python: "2.7" @@ -60,20 +84,6 @@ matrix: - compiler: clang language: dotnet env: BINDINGS=dotnet CC=clang - - compiler: gcc - language: ruby - rvm: - - ruby-1.9.3-p484 - - ruby-2.0.0-p353 - - ruby-2.1.0 - env: BINDINGS=ruby - - compiler: clang - language: ruby - rvm: - - ruby-1.9.3-p484 - - ruby-2.0.0-p353 - - ruby-2.1.0 - env: BINDINGS=ruby CC=clang before_install: - sudo apt-get update -qq - if [ "$BINDINGS" != "none" ]; then sudo apt-get install -qq swig; fi diff --git a/src/bindings/ruby/Gemfile b/src/bindings/ruby/Gemfile index df42814..c5029fc 100644 --- a/src/bindings/ruby/Gemfile +++ b/src/bindings/ruby/Gemfile @@ -4,4 +4,5 @@ gemspec group :test do gem 'minitest', '~> 5.2' + gem 'rake', '>10' end diff --git a/src/bindings/ruby/SConscript b/src/bindings/ruby/SConscript index 290eb16..6d85a93 100644 --- a/src/bindings/ruby/SConscript +++ b/src/bindings/ruby/SConscript @@ -9,11 +9,15 @@ rubysources = [ "hammer-parser.gemspec", "Rakefile", "Gemfile", - "Gemfile.lock", "README.md", ] rubyenv = env.Clone() + +for k,v in os.environ.items(): + if "RUBY" in k or "GEM" in k or "rvm" in k: + rubyenv['ENV'][k] = v + rubyenv['ENV']['LD_LIBRARY_PATH'] = os.path.dirname(str(libhammer_shared[0])) rubyenv['RBDIR'] = os.path.dirname(str(rubyenv.File("Gemfile").path)) diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 3a11d2e..d1177c5 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -113,8 +113,8 @@ module Hammer def self.marshal_ch_arg(num) if num.is_a?(String) - raise ArgumentError, "Expecting either a fixnum in 0..255 or a single-byte String" unless num.bytes.length == 1 - num = num.bytes[0] + raise ArgumentError, "Expecting either a fixnum in 0..255 or a single-byte String" unless num.bytesize == 1 + num = num.bytes.first end raise ArgumentError, 'Expecting a Fixnum in 0..255 or a single-byte String' unless num.is_a?(Fixnum) and num.between?(0, 255) return num From 06f7060b3b96b17f54b707f1dd3c648673e7a5dd Mon Sep 17 00:00:00 2001 From: TQ Hirsch Date: Sat, 11 Jan 2014 05:51:25 +0100 Subject: [PATCH 34/35] Updated README to reflect the new Ruby bindings. Closes #38 --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1e1dee9..4334e68 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Features * C++ (not yet implemented) * Java (not currently building; give us a few days) * Python - * Ruby (not yet implemented) + * Ruby * Perl * [Go](https://github.com/prevoty/hammer) * PHP @@ -39,6 +39,7 @@ Installing * python2.7-dev (for Python bindings) * a JDK (for Java bindings) * a working [phpenv](https://github.com/CHH/phpenv) configuration (for PHP bindings) +* Ruby >= 1.9.3 and bundler, for the Ruby bindings * mono-devel and mono-mcs (>= 3.0.6) (for .NET bindings) * nunit (for testing .NET bindings) From f751400e624b2c3f45caf03f9bbf895479c32858 Mon Sep 17 00:00:00 2001 From: TQ Hirsch Date: Sat, 11 Jan 2014 06:05:23 +0100 Subject: [PATCH 35/35] Updated Ruby-specific README --- src/bindings/ruby/README.md | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/bindings/ruby/README.md b/src/bindings/ruby/README.md index 61ce0b3..ae29459 100644 --- a/src/bindings/ruby/README.md +++ b/src/bindings/ruby/README.md @@ -14,9 +14,9 @@ Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsi 2. Run `bundle install` to install dependencies. -3. Run `irb -I ./lib -r hammer` to open `irb` with hammer loaded. +3. Run `bundle console` to open `irb` with hammer loaded. -4. To run tests, just run `rake`. +4. To run tests, just run `bundle exec rake test`. ## Installation @@ -60,11 +60,17 @@ parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), ### Parsing ```ruby -parser.parse 'Hello Mom!' -=> true -parser.parse 'Hello Someone!' -=> false +result = parser.parse 'Hello Mom!' +=> # +result = parser.parse 'Hello Someone!' +=> nil ``` -Currently you only get `true` or `false` depending on whether the parse succeeded or failed. -There's no way to access the parsed data yet. +The `parse` method returns an `HParseResult` object, which needs to be +kept around until you're entirely done with the parse tree, which can +be accessed with `result.ast`. + +While the AST can be accessed using the same interface as the C +HParsedToken type, we recommend using `result.ast.unmarshal` instead. +This converts the entire parse tree into a standalone Ruby-native +datastructure which will likely be much easier to work with.