From 2268efb49e5bcfb1208e2b6c5ebc67712eef2694 Mon Sep 17 00:00:00 2001 From: Tsuda Kageyu Date: Mon, 23 Feb 2015 09:38:12 +0900 Subject: [PATCH] Add a test for strings that contains surrogate pairs. --- tests/test_string.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/test_string.cpp b/tests/test_string.cpp index 866acabb..becce47b 100644 --- a/tests/test_string.cpp +++ b/tests/test_string.cpp @@ -38,6 +38,7 @@ class TestString : public CppUnit::TestFixture CPPUNIT_TEST(testUTF16Decode); CPPUNIT_TEST(testUTF16DecodeInvalidBOM); CPPUNIT_TEST(testUTF16DecodeEmptyWithBOM); + CPPUNIT_TEST(testSurrogatePair); CPPUNIT_TEST(testAppendCharDetach); CPPUNIT_TEST(testAppendStringDetach); CPPUNIT_TEST(testToInt); @@ -119,12 +120,10 @@ public: CPPUNIT_ASSERT(memcmp(String("foo").data(String::Latin1).data(), "foo", 3) == 0); CPPUNIT_ASSERT(memcmp(String("f").data(String::Latin1).data(), "f", 1) == 0); - ByteVector utf16 = unicode.data(String::UTF16); - - // Check to make sure that the BOM is there and that the data size is correct + // Check to make sure that the BOM is there and that the data size is correct + const ByteVector utf16 = unicode.data(String::UTF16); CPPUNIT_ASSERT(utf16.size() == 2 + (unicode.size() * 2)); - CPPUNIT_ASSERT(unicode == String(utf16, String::UTF16)); } @@ -171,6 +170,21 @@ public: CPPUNIT_ASSERT_EQUAL(String(), String(b, String::UTF16)); } + void testSurrogatePair() + { + // Make sure that a surrogate pair is converted into single UTF-8 char + // and vice versa. + + const ByteVector v1("\xff\xfe\x42\xd8\xb7\xdf\xce\x91\x4b\x5c"); + const ByteVector v2("\xf0\xa0\xae\xb7\xe9\x87\x8e\xe5\xb1\x8b"); + + const String s1(v1, String::UTF16); + CPPUNIT_ASSERT_EQUAL(s1.data(String::UTF8), v2); + + const String s2(v2, String::UTF8); + CPPUNIT_ASSERT_EQUAL(s2.data(String::UTF16), v1); + } + void testAppendStringDetach() { String a("a");