h_token: Copy string correctly.

See https://github.com/ffi/ffi/wiki/Core-Concepts#string-memory-allocation for additional information.
This commit is contained in:
Jakob Rath 2013-12-16 17:07:50 +01:00 committed by Dan Hirsch
parent d53ee07499
commit 96dfad0178
3 changed files with 22 additions and 11 deletions

View file

@ -72,4 +72,14 @@ p parser.parse 'abcabd'
p parser.parse 'abdabd' p parser.parse 'abdabd'
p parser.parse 'abd' p parser.parse 'abd'
$r = parser.parse 'abcabd' #$r = parser.parse 'abcabd'
# Test multibyte characters
parser = Hammer::Parser.build {
token '今日'
end_p
}
p ($r = parser.parse('今日')) # should succeed

View file

@ -70,7 +70,7 @@ module Hammer
attach_function :h_parse, [:h_parser, :string, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string? attach_function :h_parse, [:h_parser, :string, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string?
# build a parser # build a parser
attach_function :h_token, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? attach_function :h_token, [:buffer_in, :size_t], :h_parser
attach_function :h_ch, [:uint8], :h_parser attach_function :h_ch, [:uint8], :h_parser
attach_function :h_ch_range, [:uint8, :uint8], :h_parser attach_function :h_ch_range, [:uint8, :uint8], :h_parser
attach_function :h_int_range, [:int64, :int64], :h_parser attach_function :h_int_range, [:int64, :int64], :h_parser
@ -87,8 +87,8 @@ module Hammer
attach_function :h_left, [:h_parser, :h_parser], :h_parser attach_function :h_left, [:h_parser, :h_parser], :h_parser
attach_function :h_right, [:h_parser, :h_parser], :h_parser attach_function :h_right, [:h_parser, :h_parser], :h_parser
attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser
#attach_function :h_in, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? #attach_function :h_in, [:buffer_in, :size_t], :h_parser
#attach_function :h_not_in, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? #attach_function :h_not_in, [:buffer_in, :size_t], :h_parser
attach_function :h_end_p, [], :h_parser attach_function :h_end_p, [], :h_parser
attach_function :h_nothing_p, [], :h_parser attach_function :h_nothing_p, [], :h_parser
attach_function :h_sequence, [:varargs], :h_parser attach_function :h_sequence, [:varargs], :h_parser

View file

@ -6,7 +6,7 @@ module Hammer
# #
# name: Name of the parser. Should be a symbol. # name: Name of the parser. Should be a symbol.
# h_parser: The pointer to the parser as returned by hammer. # h_parser: The pointer to the parser as returned by hammer.
# dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector. # dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector (at least as long this object lives).
def initialize(name, h_parser, dont_gc) def initialize(name, h_parser, dont_gc)
@name = name @name = name
@h_parser = h_parser @h_parser = h_parser
@ -34,13 +34,14 @@ module Hammer
end end
def self.token(string) def self.token(string)
# TODO: # Need to copy string to a memory buffer (not just string.dup)
# This might fail in JRuby. # * Original string might be modified, this must not affect existing tokens
# See "String Memory Allocation" at https://github.com/ffi/ffi/wiki/Core-Concepts # * We need a constant memory address (Ruby string might be moved around by the Ruby VM)
h_string = string.dup # * Use string.length instead of h_string.size to handle multibyte characters correctly.
h_parser = Hammer::Internal.h_token(h_string, h_string.length) buffer = FFI::MemoryPointer.from_string(string)
h_parser = Hammer::Internal.h_token(buffer, string.length)
return Hammer::Parser.new(:token, h_parser, h_string) return Hammer::Parser.new(:token, h_parser, buffer)
end end
def self.ch(num) def self.ch(num)