Add first version of ruby bindings.

This commit is contained in:
Jakob Rath 2013-11-10 15:13:15 +01:00 committed by Dan Hirsch
parent ae0158a1cd
commit 6368214fce
14 changed files with 508 additions and 0 deletions

3
src/bindings/ruby/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
/tmp/
/lib/hammer/hammer_ext.bundle
/Gemfile.lock

View file

@ -0,0 +1,9 @@
source 'https://rubygems.org'
gemspec
gem 'rake'
group :test do
# ...
end

View file

@ -0,0 +1,72 @@
# hammer-parser
Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsing library.
## Notes
* I called the gem `hammer-parser`, since there already is a [gem named `hammer`](https://rubygems.org/gems/hammer).
* C extension not really needed at the moment, if we don't mind hardcoding the token types in the ruby code.
## Development
1. `cd src/bindings/ruby`.
2. Run `bundle install` to install dependencies.
3. Run `rake compile` to compile the C extension.
4. Run `irb -I ./lib -r hammer` to open `irb` with hammer loaded.
## Installation
TODO
## Examples
### Building a parser
```ruby
parser = Hammer::Parser.build {
token 'Hello '
choice {
token 'Mom'
token 'Dad'
}
token '!'
}
```
Also possible:
```ruby
parser = Hammer::ParserBuilder.new
.token('Hello ')
.choice(Hammer::Parser::Token.new('Mom'), Hammer::Parser::Token.new('Dad'))
.token('!')
.build
```
More like hammer in C:
```ruby
h = Hammer::Parser
parser = h.sequence(h.token('Hello'), h.choice(h.token('Mom'), h.token('Dad')), h.token('!'))
```
### Parsing
```ruby
parser.parse 'Hello Mom!'
=> true
parser.parse 'Hello Someone!'
=> false
```
Currently you only get `true` or `false` depending on whether the parse succeeded or failed.
There's no way to access the parsed data yet.

View file

@ -0,0 +1,8 @@
require 'rake/extensiontask'
#spec = Gem::Specification.load('hammer-parser-ruby.gemspec')
#Rake::ExtensionTask.new('hammer_ext', spec)
Rake::ExtensionTask.new 'hammer_ext' do |ext|
ext.lib_dir = 'lib/hammer'
end

View file

@ -0,0 +1,9 @@
require 'mkmf'
extension_name = 'hammer_ext'
dir_config extension_name
abort 'ERROR: missing hammer library' unless have_library 'hammer'
abort 'ERROR: missing hammer.h' unless have_header 'hammer.h'
create_makefile extension_name

View file

@ -0,0 +1,6 @@
#include "token_type.h"
void Init_hammer_ext(void)
{
Init_token_type();
}

View file

@ -0,0 +1,6 @@
#ifndef HAMMER_EXT__H
#define HAMMER_EXT__H
// ...
#endif

View file

@ -0,0 +1,20 @@
#include <ruby.h>
#include <hammer.h>
#include "token_type.h"
#define DefineHammerInternalConst(name) rb_define_const(mHammerInternal, #name, INT2FIX(name));
void Init_token_type(void)
{
VALUE mHammer = rb_define_module("Hammer");
VALUE mHammerInternal = rb_define_module_under(mHammer, "Internal");
DefineHammerInternalConst(TT_NONE);
DefineHammerInternalConst(TT_BYTES);
DefineHammerInternalConst(TT_SINT);
DefineHammerInternalConst(TT_UINT);
DefineHammerInternalConst(TT_SEQUENCE);
DefineHammerInternalConst(TT_ERR);
DefineHammerInternalConst(TT_USER);
}

View file

@ -0,0 +1,6 @@
#ifndef HAMMER_EXT_TOKEN_TYPE__H
#define HAMMER_EXT_TOKEN_TYPE__H
void Init_token_type(void);
#endif

View file

@ -0,0 +1,23 @@
#encoding: UTF-8
Gem::Specification.new do |s|
s.name = 'hammer-parser'
s.version = '0.1.0'
s.summary = 'Ruby bindings to the hammer parsing library.'
s.description = s.summary # TODO: longer description?
s.authors = ['Meredith L. Patterson', 'TQ Hirsch', 'Jakob Rath']
# TODO:
# s.email = ...
# s.homepage = ...
files = []
files << 'README.md'
files << Dir['{lib,test}/**/*.rb']
s.files = files
s.test_files = s.files.select { |path| path =~ /^test\/.*_test.rb/ }
s.require_paths = %w[lib]
s.add_dependency 'ffi', '~> 1.9'
s.add_dependency 'docile', '~> 1.1' # TODO: Find a way to make this optional
end

View file

@ -0,0 +1,65 @@
require 'hammer/hammer_ext'
require 'hammer/internal'
require 'hammer/parser'
require 'hammer/parser_builder'
# TODO:
# Probably need to rename this file to 'hammer-parser.rb', so
# people can use "require 'hammer-parser'" in their code.
# TODO: Put tests in test/ directory.
parser = Hammer::Parser.build do
token 'blah'
ch 'a'
choice {
sequence {
token 'abc'
}
token 'def'
}
end
p parser
if parser
p parser.parse 'blahaabcd'
p parser.parse 'blahadefd'
p parser.parse 'blahablad'
p parser.parse 'blaha'
p parser.parse 'blah'
end
parser = Hammer::Parser::Sequence.new(
Hammer::Parser::Token.new('Hello '),
Hammer::Parser::Choice.new(
Hammer::Parser::Token.new('Mom'),
Hammer::Parser::Token.new('Dad')
),
Hammer::Parser::Token.new('!')
)
p parser.parse 'Hello Mom!'
parser = Hammer::Parser.build {
token 'Hello '
choice {
token 'Mom'
token 'Dad'
}
token '!'
}
p parser.parse 'Hello Mom!'
parser = Hammer::ParserBuilder.new
.token('Hello ')
.choice(Hammer::Parser::Token.new('Mom'), Hammer::Parser::Token.new('Dad'))
.token('!')
.build
p parser.parse 'Hello Mom!'
# not yet working
#h = Hammer::Parser
#parser = h.sequence(h.token('Hello'), h.choice(h.token('Mom'), h.token('Dad')), h.token('!'))
#p parser.parse 'Hello Mom!'

View file

@ -0,0 +1,60 @@
require 'ffi'
module Hammer
module Internal
extend FFI::Library
ffi_lib 'libhammer.dylib'
# run a parser
attach_function :h_parse, [:pointer, :string, :size_t], :pointer
# build a parser
attach_function :h_token, [:string, :size_t], :pointer
attach_function :h_ch, [:uint8], :pointer
attach_function :h_ch_range, [:uint8, :uint8], :pointer
attach_function :h_int_range, [:int64, :int64], :pointer
attach_function :h_bits, [:size_t, :bool], :pointer
attach_function :h_int64, [], :pointer
attach_function :h_int32, [], :pointer
attach_function :h_int16, [], :pointer
attach_function :h_int8, [], :pointer
attach_function :h_uint64, [], :pointer
attach_function :h_uint32, [], :pointer
attach_function :h_uint16, [], :pointer
attach_function :h_uint8, [], :pointer
attach_function :h_whitespace, [:pointer], :pointer
attach_function :h_left, [:pointer, :pointer], :pointer
attach_function :h_right, [:pointer, :pointer], :pointer
attach_function :h_middle, [:pointer, :pointer, :pointer], :pointer
# h_action
# h_in
# h_not_in
attach_function :h_end_p, [], :pointer
attach_function :h_nothing_p, [], :pointer
attach_function :h_sequence, [:varargs], :pointer
attach_function :h_choice, [:varargs], :pointer
attach_function :h_butnot, [:pointer, :pointer], :pointer
attach_function :h_difference, [:pointer, :pointer], :pointer
attach_function :h_xor, [:pointer, :pointer], :pointer
attach_function :h_many, [:pointer], :pointer
attach_function :h_many1, [:pointer], :pointer
# h_repeat_n
# h_optional
# h_ignore
# h_sepBy
# h_sepBy1
# h_epsilon_p
# h_length_value
# h_attr_bool
# h_and
# h_not
# h_indirect
# h_bind_indirect
# free the parse result
# h_parse_result_free
# TODO: Does the HParser* need to be freed?
end
end

View file

@ -0,0 +1,146 @@
module Hammer
class Parser
# Don't create new instances with Hammer::Parser.new,
# use the constructor methods instead (i.e. Hammer::Parser.int64 etc.)
def initialize
end
def parse(data)
raise RuntimeError, '@h_parser is nil' if @h_parser.nil?
raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that.
result = Hammer::Internal.h_parse(@h_parser, data, data.length);
# TODO: Do something with the data
!result.null?
end
class Token < Parser
def initialize(string)
@h_parser = Hammer::Internal.h_token(string, string.length)
end
end
class Ch < Parser
def initialize(char)
# TODO: Really? Should probably accept Fixnum in appropriate range
# Also, char.ord gives unexptected results if you pass e.g. Japanese characters: '今'.ord == 20170; Hammer::Parser::Ch.new('今').parse(202.chr) == true
# Not really unexpected though, since 20170 & 255 == 202.
# But probably it's better to use Ch for Fixnum in 0..255 only, and only Token for strings.
raise ArgumentError, 'expecting a one-character String' unless char.is_a?(String) && char.length == 1
@h_parser = Hammer::Internal.h_ch(char.ord)
end
end
class Sequence < Parser
def initialize(*parsers)
#args = []
#parsers.each { |p| args += [:pointer, p.h_parser] }
args = parsers.flat_map { |p| [:pointer, p.h_parser] }
@h_parser = Hammer::Internal.h_sequence(*args, :pointer, nil)
@sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though)
# TODO: Use (managed?) FFI struct instead of void pointers
end
end
class Choice < Parser
def initialize(*parsers)
#args = []
#parsers.each { |p| args += [:pointer, p.h_parser] }
args = parsers.flat_map { |p| [:pointer, p.h_parser] }
@h_parser = Hammer::Internal.h_choice(*args, :pointer, nil)
@sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though)
# TODO: Use (managed?) FFI struct instead of void pointers
end
end
# Define parsers that take some number of other parsers
# TODO: Maybe use -1 for variable number, and use this for Sequence and Choice too
# TODO: Refactor this code as a method? And call it like: define_parser :Int64, :h_int64, 0
[
[:Int64, :h_int64, 0],
[:Int32, :h_int32, 0],
[:Int16, :h_int16, 0],
[:Int8, :h_int8, 0],
[:UInt64, :h_uint64, 0],
[:UInt32, :h_uint32, 0],
[:UInt16, :h_uint16, 0],
[:UInt8, :h_uint8, 0],
[:Whitespace, :h_whitespace, 1],
[:Left, :h_left, 2],
[:Right, :h_right, 2],
[:Middle, :h_middle, 3],
[:End, :h_end_p, 0],
[:Nothing, :h_nothing_p, 0],
[:ButNot, :h_butnot, 2],
[:Difference, :h_difference, 2],
[:Xor, :h_xor, 2],
[:Many, :h_many, 1],
[:Many1, :h_many1, 1]
].each do |class_name, h_function_name, parameter_count|
# Create new subclass of Hammer::Parser
klass = Class.new(Hammer::Parser) do
# Need to use define_method instead of def to be able to access h_function_name in the method's body
define_method :initialize do |*parsers|
# Checking parameter_count is not really needed, since the h_* methods will complain anyways
@h_parser = Hammer::Internal.send(h_function_name, *parsers.map(&:h_parser))
# TODO: Do we need to store sub-parsers to prevent them from getting garbage-collected?
end
end
# Register class with name Hammer::Parser::ClassName
Hammer::Parser.const_set class_name, klass
end
# TODO:
# Hammer::Parser::Token.new('...') is a bit too long. Find a shorter way to use the parsers.
# Maybe:
# class Hammer::Parser
# def self.token(*args)
# Hammer::Parser::Token.new(*args)
# end
# end
# Can create functions like that automatically. Usage:
# h = Hammer::Parser
# parser = h.sequence(h.token('blah'), h.token('other_token'))
# Looks almost like hammer in C!
# Defines a parser constructor with the given name.
# Options:
# hammer_function: name of the hammer function to call (default: 'h_'+name)
def self.define_parser(name, options = {})
hammer_function = options[:hammer_function] || ('h_' + name.to_s)
# Define a new class method
define_singleton_method name do |*parsers|
#args = parsers.map { |p| p.instance_variable_get :@h_parser }
h_parser = Hammer::Internal.send hammer_function, *parsers.map(&:h_parser)
parser = Hammer::Parser.new
parser.instance_variable_set :@h_parser, h_parser
return parser
end
end
private_class_method :define_parser
define_parser :int64
define_parser :int32
define_parser :int16
define_parser :int8
define_parser :uint64
define_parser :uint32
define_parser :uint16
define_parser :uint8
define_parser :whitespace
define_parser :left
define_parser :right
define_parser :middle
define_parser :end
define_parser :nothing
define_parser :butnot
define_parser :difference
define_parser :xor
define_parser :many
define_parser :many1
attr_reader :h_parser
end
end

View file

@ -0,0 +1,75 @@
# TODO: Find a way to make docile an optional dependency
# (autoload for this file? and throw some informative error when docile isn't available.
# should also check gem version with a 'gem' call and appropriate version specifier.)
require 'docile'
module Hammer
class Parser
def self.build(&block)
ParserBuilder.new.sequence(&block).build
end
end
# TODO: Is this even useful for "real" usage?
class ParserBuilder
attr_reader :parsers
def initialize
@parsers = []
# TODO: Store an aggregator, e.g.:
# @aggregator = Hammer::Parser::Sequence
# Sequence is the default, set to Hammer::Parser::Choice for choice() calls
# In the build method, use @aggregator.new(*@parsers) to build the final parser.
end
def build
if @parsers.length > 1
Hammer::Parser::Sequence.new(*@parsers)
else
@parsers.first
end
end
# TODO: Need to check if that's really needed
def call(parser)
@parsers << parser
return self
end
def token(str)
#@h_parsers << Hammer::Internal.h_token(str, str.length)
@parsers << Hammer::Parser::Token.new(str)
return self
end
def ch(char)
#@h_parsers << Hammer::Internal.h_ch(char.ord)
@parsers << Hammer::Parser::Ch.new(char)
return self
end
# can call it either as ParserBuiler.new.sequence(parser1, parser2, parser3)
# or as Parser.build { sequence { call parser1; call parser2; call parser3 } }
def sequence(*parsers, &block)
@parsers += parsers
@parsers << Docile.dsl_eval(ParserBuilder.new, &block).build if block_given?
return self
#builder = Hammer::ParserBuilder.new
#builder.instance_eval &block
#@parsers << Hammer::Parser::Sequence.new(*builder.parsers)
## TODO: Save original receiver and redirect missing methods!
end
def choice(*parsers, &block)
if block_given?
parsers += Docile.dsl_eval(ParserBuilder.new, &block).parsers
end
@parsers << Hammer::Parser::Choice.new(*parsers)
return self
end
end
end