Check an invalid UTF-8 sequence consists of single char.

Single char can be an invalid UTF sequence. For example, { 0x80 } is invalid.
This commit is contained in:
Tsuda Kageyu 2017-01-30 11:35:39 +09:00
parent 922fd611ae
commit 038b52ae01
2 changed files with 13 additions and 8 deletions

View File

@ -231,10 +231,6 @@ public:
StringPrivate() :
RefCounter() {}
StringPrivate(unsigned int n, wchar_t c) :
RefCounter(),
data(static_cast<size_t>(n), c) {}
/*!
* Stores string in UTF-16. The byte order depends on the CPU endian.
*/
@ -334,9 +330,13 @@ String::String(wchar_t c, Type t) :
}
String::String(char c, Type t) :
d(new StringPrivate(1, static_cast<unsigned char>(c)))
d(new StringPrivate())
{
if(t != Latin1 && t != UTF8) {
if(t == Latin1)
copyFromLatin1(d->data, &c, 1);
else if(t == String::UTF8)
copyFromUTF8(d->data, &c, 1);
else {
debug("String::String() -- char should not contain UTF16.");
}
}

View File

@ -50,7 +50,7 @@ class TestString : public CppUnit::TestFixture
CPPUNIT_TEST(testEncodeNonLatin1);
CPPUNIT_TEST(testEncodeEmpty);
CPPUNIT_TEST(testIterator);
CPPUNIT_TEST(testRedundantUTF8);
CPPUNIT_TEST(testInvalidUTF8);
CPPUNIT_TEST_SUITE_END();
public:
@ -331,12 +331,17 @@ public:
CPPUNIT_ASSERT_EQUAL(L'I', *it2);
}
void testRedundantUTF8()
void testInvalidUTF8()
{
CPPUNIT_ASSERT_EQUAL(String("/"), String(ByteVector("\x2F"), String::UTF8));
CPPUNIT_ASSERT(String(ByteVector("\xC0\xAF"), String::UTF8).isEmpty());
CPPUNIT_ASSERT(String(ByteVector("\xE0\x80\xAF"), String::UTF8).isEmpty());
CPPUNIT_ASSERT(String(ByteVector("\xF0\x80\x80\xAF"), String::UTF8).isEmpty());
CPPUNIT_ASSERT(String(ByteVector("\xF8\x80\x80\x80\x80"), String::UTF8).isEmpty());
CPPUNIT_ASSERT(String(ByteVector("\xFC\x80\x80\x80\x80\x80"), String::UTF8).isEmpty());
CPPUNIT_ASSERT(String('\x80', String::UTF8).isEmpty());
}
};