Fix parsing of strings with multibyte characters.

This commit is contained in:
Jakob Rath 2013-12-16 20:20:27 +01:00 committed by Dan Hirsch
parent 3aa2ac2634
commit cf59ec83ed
2 changed files with 4 additions and 4 deletions

View file

@ -23,7 +23,7 @@ module Hammer
raise RuntimeError, '@h_parser is nil' if @h_parser.nil? raise RuntimeError, '@h_parser is nil' if @h_parser.nil?
raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that.
result = Hammer::Internal.h_parse(@h_parser, data, data.length) result = Hammer::Internal.h_parse(@h_parser, data, data.bytesize)
return result unless result.null? return result unless result.null?
end end
@ -37,9 +37,8 @@ module Hammer
# Need to copy string to a memory buffer (not just string.dup) # Need to copy string to a memory buffer (not just string.dup)
# * Original string might be modified, this must not affect existing tokens # * Original string might be modified, this must not affect existing tokens
# * We need a constant memory address (Ruby string might be moved around by the Ruby VM) # * We need a constant memory address (Ruby string might be moved around by the Ruby VM)
# * Use string.length instead of h_string.size to handle multibyte characters correctly.
buffer = FFI::MemoryPointer.from_string(string) buffer = FFI::MemoryPointer.from_string(string)
h_parser = Hammer::Internal.h_token(buffer, string.length) h_parser = Hammer::Internal.h_token(buffer, buffer.size-1) # buffer.size includes the null byte at the end
return Hammer::Parser.new(:token, h_parser, buffer) return Hammer::Parser.new(:token, h_parser, buffer)
end end

View file

@ -74,9 +74,10 @@ class ParserTest < Minitest::Test
def test_multibyte_token def test_multibyte_token
parser = Hammer::Parser.build { parser = Hammer::Parser.build {
token '今日' token '今日'
token 'a'
end_p end_p
} }
refute_nil parser.parse('今日') refute_nil parser.parse('今日a')
end end
end end