Fix parsing of strings with multibyte characters.
This commit is contained in:
parent
3aa2ac2634
commit
cf59ec83ed
2 changed files with 4 additions and 4 deletions
|
|
@ -23,7 +23,7 @@ module Hammer
|
||||||
raise RuntimeError, '@h_parser is nil' if @h_parser.nil?
|
raise RuntimeError, '@h_parser is nil' if @h_parser.nil?
|
||||||
raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that.
|
raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that.
|
||||||
|
|
||||||
result = Hammer::Internal.h_parse(@h_parser, data, data.length)
|
result = Hammer::Internal.h_parse(@h_parser, data, data.bytesize)
|
||||||
return result unless result.null?
|
return result unless result.null?
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -37,9 +37,8 @@ module Hammer
|
||||||
# Need to copy string to a memory buffer (not just string.dup)
|
# Need to copy string to a memory buffer (not just string.dup)
|
||||||
# * Original string might be modified, this must not affect existing tokens
|
# * Original string might be modified, this must not affect existing tokens
|
||||||
# * We need a constant memory address (Ruby string might be moved around by the Ruby VM)
|
# * We need a constant memory address (Ruby string might be moved around by the Ruby VM)
|
||||||
# * Use string.length instead of h_string.size to handle multibyte characters correctly.
|
|
||||||
buffer = FFI::MemoryPointer.from_string(string)
|
buffer = FFI::MemoryPointer.from_string(string)
|
||||||
h_parser = Hammer::Internal.h_token(buffer, string.length)
|
h_parser = Hammer::Internal.h_token(buffer, buffer.size-1) # buffer.size includes the null byte at the end
|
||||||
|
|
||||||
return Hammer::Parser.new(:token, h_parser, buffer)
|
return Hammer::Parser.new(:token, h_parser, buffer)
|
||||||
end
|
end
|
||||||
|
|
|
||||||
|
|
@ -74,9 +74,10 @@ class ParserTest < Minitest::Test
|
||||||
def test_multibyte_token
|
def test_multibyte_token
|
||||||
parser = Hammer::Parser.build {
|
parser = Hammer::Parser.build {
|
||||||
token '今日'
|
token '今日'
|
||||||
|
token 'a'
|
||||||
end_p
|
end_p
|
||||||
}
|
}
|
||||||
|
|
||||||
refute_nil parser.parse('今日')
|
refute_nil parser.parse('今日a')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue