Added support for the token type registry

This commit is contained in:
Dan Hirsch 2014-01-11 01:09:45 +01:00
parent 753120f27e
commit 482e89adf8
3 changed files with 141 additions and 23 deletions

View file

@ -11,7 +11,15 @@ Gem::Specification.new do |s|
files = [] files = []
files << 'README.md' files << 'README.md'
files << Dir['{lib,test}/**/*.rb'] files << [
"lib/hammer/internal.rb",
"lib/hammer/parser.rb",
"lib/hammer/parser_builder.rb",
"lib/hammer.rb",
"lib/minitest/hamer-parser_plugin.rb",
"test/autogen_test.rb",
"test/parser_test.rb"
]
s.files = files s.files = files
s.test_files = s.files.select { |path| path =~ /^test\/.*_test.rb/ } s.test_files = s.files.select { |path| path =~ /^test\/.*_test.rb/ }

View file

@ -6,21 +6,122 @@ module Hammer
ffi_lib 'hammer' ffi_lib 'hammer'
class DynamicVariable
SYMBOL_PREFIX = "Hammer::Internal::DynamicVariable gensym "
@@current_symbol = 0
def initialize(default=nil, name=nil, &block)
# This can take either a default value or a block. If a
# default value is given, all threads' dynvars are initialized
# to that object. If a block is given, the block is lazilly
# called on each thread to generate the initial value. If
# both a block and a default value are passed, the block is
# called with the literal value.
@default = default
@block = block || Proc.new{|x| x}
@@current_symbol += 1
@sym = (SYMBOL_PREFIX + @@current_symbol.to_s).to_sym
end
def value
if Thread.current.key? @sym
return Thread.current[@sym]
else
return Thread.current[@sym] = @block.call(@default)
end
end
def value=(new_value)
Thread.current[@sym] = new_value
end
def with(new_value, &block)
old_value = value
begin
self.value = new_value
return block.call
ensure
self.value = old_value
end
end
end
# Maybe we can implement Hammer::Parser with FFI::DataConverter. # Maybe we can implement Hammer::Parser with FFI::DataConverter.
# That way, most hammer functions won't need to be wrapped. # That way, most hammer functions won't need to be wrapped.
# (Probably need to wrap token, sequence and choice only). # (Probably need to wrap token, sequence and choice only).
# See http://www.elabs.se/blog/61-advanced-topics-in-ruby-ffi # See http://www.elabs.se/blog/61-advanced-topics-in-ruby-ffi
typedef :pointer, :h_parser typedef :pointer, :h_parser
HTokenType = enum(:none, 1, class HTokenType
:bytes, 2, extend FFI::DataConverter
:sint, 4,
:uint, 8, @@known_type_map = {
:sequence, 16, :none => 1,
:reserved_1, :bytes => 2,
:err, 32, :sint => 4,
:user, 64, :uint => 8,
:max) :sequence => 16,
}
@@inverse_type_map = @@known_type_map.invert
def self.new(name)
if name.is_a?(Symbol)
name_sym = name
name_str = name.to_s
else
name_str = name.to_s
name_sym = name.to_sym
end
num = h_allocate_token_type(name_str)
@@known_type_map[name_sym] = num
@@inverse_type_map[num] = name
end
def self.from_name(name)
unless @@known_type_map.key? name
num = h_get_token_type_number(name.to_s)
if num <= 0
raise ArgumentError, "Unknown token type #{name}"
end
@@known_type_map[name] = num
@@inverse_type_map[num] = name
end
return @@known_type_map[name]
end
def self.from_num(num)
unless @@inverse_type_map.key? num
name = h_get_token_type_name(num)
if name.nil?
return nil
end
name = name.to_sym
@@known_type_map[name] = num
@@inverse_type_map_type_map[num] = name
end
return @@inverse_type_map[num]
end
def self.native_type
FFI::Type::INT
end
def self.to_native(val, ctx)
return val if val.is_a?(Integer)
return from_name(val)
end
def self.from_native(val, ctx)
return from_num(val) || val
end
end
# Define these as soon as possible, so that they can be used
# without fear elsewhere
attach_function :h_allocate_token_type, [:string], HTokenType
attach_function :h_get_token_type_number, [:string], HTokenType
attach_function :h_get_token_type_name, [HTokenType], :string
class HCountedArray < FFI::Struct class HCountedArray < FFI::Struct
layout :capacity, :size_t, layout :capacity, :size_t,
@ -213,5 +314,7 @@ module Hammer
attach_function :h_parse_result_free, [HParseResult.by_ref], :void attach_function :h_parse_result_free, [HParseResult.by_ref], :void
# TODO: Does the HParser* need to be freed? # TODO: Does the HParser* need to be freed?
# Token type registry
end end
end end

View file

@ -1,6 +1,10 @@
require 'hammer/internal'
module Hammer module Hammer
class Parser class Parser
@@saved_objects = Hammer::Internal::DynamicVariable.new nil, "Hammer parse-time pins"
# Don't create new instances with Hammer::Parser.new, # Don't create new instances with Hammer::Parser.new,
# use the constructor methods instead (i.e. Hammer::Parser.int64 etc.) # use the constructor methods instead (i.e. Hammer::Parser.int64 etc.)
# #
@ -26,17 +30,20 @@ module Hammer
raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that.
ibuf = FFI::MemoryPointer.from_string(data) ibuf = FFI::MemoryPointer.from_string(data)
result = Hammer::Internal.h_parse(@h_parser, ibuf, data.bytesize) # Don't include the trailing null @@saved_objects.with([]) do
if result.null? result = Hammer::Internal.h_parse(@h_parser, ibuf, data.bytesize) # Don't include the trailing null
return nil if result.null?
else return nil
# NOTE: else
# The parse result *must* hold a reference to the parser that created it! # NOTE:
# Otherwise, the parser might get garbage-collected while the result is still valid. # The parse result *must* hold a reference to the parser that created it!
# Any pointers to token strings will then be invalid. # Otherwise, the parser might get garbage-collected while the result is still valid.
result.instance_variable_set :@parser, self # Any pointers to token strings will then be invalid.
return result result.instance_variable_set :@parser, self
end result.instance_variable_set :@pins, @@saved_objects.value
return result
end
end
end end
# Binds an indirect parser. # Binds an indirect parser.
@ -71,7 +78,7 @@ module Hammer
buffer = FFI::MemoryPointer.from_string(string) buffer = FFI::MemoryPointer.from_string(string)
h_parser = Hammer::Internal.h_token(buffer, buffer.size-1) # buffer.size includes the null byte at the end h_parser = Hammer::Internal.h_token(buffer, buffer.size-1) # buffer.size includes the null byte at the end
return Hammer::Parser.new(:token, h_parser, buffer) return Hammer::Parser.new(:token, h_parser, [buffer, string])
end end
def self.marshal_ch_arg(num) def self.marshal_ch_arg(num)
@ -100,7 +107,7 @@ module Hammer
def self.int_range(parser, i1, i2) def self.int_range(parser, i1, i2)
h_parser = Hammer::Internal.h_int_range(parser.h_parser, i1, i2) h_parser = Hammer::Internal.h_int_range(parser.h_parser, i1, i2)
return Hammer::Parser.new(:int_range, h_parser, nil) return Hammer::Parser.new(:int_range, h_parser, [parser])
end end
def self.in(charset) def self.in(charset)