mirror of
https://github.com/taglib/taglib.git
synced 2025-05-27 21:20:26 -04:00
Fixed conversion from empty String to ByteVector
This commit is contained in:
parent
f733077917
commit
584bbc7c78
@ -45,7 +45,7 @@
|
||||
# include "unicode.h"
|
||||
#endif
|
||||
|
||||
namespace
|
||||
namespace
|
||||
{
|
||||
|
||||
inline unsigned short combine(unsigned char c1, unsigned char c2)
|
||||
@ -142,7 +142,7 @@ namespace
|
||||
debug("String::copyFromUTF8() - Unicode conversion error.");
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -151,25 +151,25 @@ namespace TagLib {
|
||||
class String::StringPrivate : public RefCounter
|
||||
{
|
||||
public:
|
||||
StringPrivate()
|
||||
: RefCounter()
|
||||
StringPrivate()
|
||||
: RefCounter()
|
||||
{
|
||||
}
|
||||
|
||||
StringPrivate(const wstring &s)
|
||||
StringPrivate(const wstring &s)
|
||||
: RefCounter()
|
||||
, data(s)
|
||||
, data(s)
|
||||
{
|
||||
}
|
||||
|
||||
StringPrivate(uint n, wchar_t c)
|
||||
|
||||
StringPrivate(uint n, wchar_t c)
|
||||
: RefCounter()
|
||||
, data(static_cast<size_t>(n), c)
|
||||
, data(static_cast<size_t>(n), c)
|
||||
{
|
||||
}
|
||||
|
||||
/*!
|
||||
* Stores string in UTF-16. The byte order depends on the CPU endian.
|
||||
* Stores string in UTF-16. The byte order depends on the CPU endian.
|
||||
*/
|
||||
TagLib::wstring data;
|
||||
|
||||
@ -183,12 +183,12 @@ String String::null;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
String::String()
|
||||
String::String()
|
||||
: d(new StringPrivate())
|
||||
{
|
||||
}
|
||||
|
||||
String::String(const String &s)
|
||||
String::String(const String &s)
|
||||
: d(s.d)
|
||||
{
|
||||
d->ref();
|
||||
@ -210,7 +210,7 @@ String::String(const wstring &s, Type t)
|
||||
: d(new StringPrivate())
|
||||
{
|
||||
if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
|
||||
// This looks ugly but needed for the compatibility with TagLib1.8.
|
||||
// This looks ugly but needed for the compatibility with TagLib1.8.
|
||||
// Should be removed in TabLib2.0.
|
||||
if (t == UTF16BE)
|
||||
t = WCharByteOrder;
|
||||
@ -228,7 +228,7 @@ String::String(const wchar_t *s, Type t)
|
||||
: d(new StringPrivate())
|
||||
{
|
||||
if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
|
||||
// This looks ugly but needed for the compatibility with TagLib1.8.
|
||||
// This looks ugly but needed for the compatibility with TagLib1.8.
|
||||
// Should be removed in TabLib2.0.
|
||||
if (t == UTF16BE)
|
||||
t = WCharByteOrder;
|
||||
@ -278,11 +278,11 @@ String::String(const ByteVector &v, Type t)
|
||||
if(v.isEmpty())
|
||||
return;
|
||||
|
||||
if(t == Latin1)
|
||||
if(t == Latin1)
|
||||
copyFromLatin1(v.data(), v.size());
|
||||
else if(t == UTF8)
|
||||
else if(t == UTF8)
|
||||
copyFromUTF8(v.data(), v.size());
|
||||
else
|
||||
else
|
||||
copyFromUTF16(v.data(), v.size(), t);
|
||||
|
||||
// If we hit a null in the ByteVector, shrink the string again.
|
||||
@ -299,25 +299,8 @@ String::~String()
|
||||
|
||||
std::string String::to8Bit(bool unicode) const
|
||||
{
|
||||
std::string s;
|
||||
|
||||
if(!unicode) {
|
||||
s.resize(d->data.size());
|
||||
|
||||
std::string::iterator targetIt = s.begin();
|
||||
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
|
||||
*targetIt = static_cast<char>(*it);
|
||||
++targetIt;
|
||||
}
|
||||
}
|
||||
else {
|
||||
s.resize(d->data.size() * 4 + 1);
|
||||
|
||||
UTF16toUTF8(&d->data[0], d->data.size(), &s[0], s.size());
|
||||
s.resize(::strlen(s.c_str()));
|
||||
}
|
||||
|
||||
return s;
|
||||
const ByteVector v = data(unicode ? UTF8 : Latin1);
|
||||
return std::string(v.data(), v.size());
|
||||
}
|
||||
|
||||
TagLib::wstring String::toWString() const
|
||||
@ -444,7 +427,7 @@ bool String::isNull() const
|
||||
|
||||
ByteVector String::data(Type t) const
|
||||
{
|
||||
switch(t)
|
||||
switch(t)
|
||||
{
|
||||
case Latin1:
|
||||
{
|
||||
@ -457,14 +440,18 @@ ByteVector String::data(Type t) const
|
||||
return v;
|
||||
}
|
||||
case UTF8:
|
||||
if(!d->data.empty())
|
||||
{
|
||||
ByteVector v(size() * 4 + 1, 0);
|
||||
|
||||
UTF16toUTF8(&d->data[0], d->data.size(), v.data(), v.size());
|
||||
UTF16toUTF8(d->data.c_str(), d->data.size(), v.data(), v.size());
|
||||
v.resize(::strlen(v.data()));
|
||||
|
||||
return v;
|
||||
}
|
||||
else {
|
||||
return ByteVector::null;
|
||||
}
|
||||
case UTF16:
|
||||
{
|
||||
ByteVector v(2 + size() * 2, 0);
|
||||
@ -510,7 +497,7 @@ ByteVector String::data(Type t) const
|
||||
default:
|
||||
{
|
||||
debug("String::data() - Invalid Type value.");
|
||||
return ByteVector();
|
||||
return ByteVector::null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -799,9 +786,9 @@ void String::copyFromUTF16(const wchar_t *s, size_t length, Type t)
|
||||
{
|
||||
bool swap;
|
||||
if(t == UTF16) {
|
||||
if(length >= 1 && s[0] == 0xfeff)
|
||||
if(length >= 1 && s[0] == 0xfeff)
|
||||
swap = false; // Same as CPU endian. No need to swap bytes.
|
||||
else if(length >= 1 && s[0] == 0xfffe)
|
||||
else if(length >= 1 && s[0] == 0xfffe)
|
||||
swap = true; // Not same as CPU endian. Need to swap bytes.
|
||||
else {
|
||||
debug("String::copyFromUTF16() - Invalid UTF16 string.");
|
||||
@ -811,7 +798,7 @@ void String::copyFromUTF16(const wchar_t *s, size_t length, Type t)
|
||||
s++;
|
||||
length--;
|
||||
}
|
||||
else
|
||||
else
|
||||
swap = (t != WCharByteOrder);
|
||||
|
||||
d->data.resize(length);
|
||||
@ -836,9 +823,9 @@ void String::copyFromUTF16(const char *s, size_t length, Type t)
|
||||
ushort bom;
|
||||
::memcpy(&bom, s, 2);
|
||||
|
||||
if(bom == 0xfeff)
|
||||
if(bom == 0xfeff)
|
||||
swap = false; // Same as CPU endian. No need to swap bytes.
|
||||
else if(bom == 0xfffe)
|
||||
else if(bom == 0xfffe)
|
||||
swap = true; // Not same as CPU endian. Need to swap bytes.
|
||||
else {
|
||||
debug("String::copyFromUTF16() - Invalid UTF16 string.");
|
||||
@ -848,7 +835,7 @@ void String::copyFromUTF16(const char *s, size_t length, Type t)
|
||||
s += 2;
|
||||
length -= 2;
|
||||
}
|
||||
else
|
||||
else
|
||||
swap = (t != WCharByteOrder);
|
||||
|
||||
d->data.resize(length / 2);
|
||||
@ -858,7 +845,7 @@ void String::copyFromUTF16(const char *s, size_t length, Type t)
|
||||
}
|
||||
}
|
||||
|
||||
const String::Type String::WCharByteOrder
|
||||
const String::Type String::WCharByteOrder
|
||||
= (Utils::SystemByteOrder == Utils::BigEndian) ? String::UTF16BE : String::UTF16LE;
|
||||
|
||||
}
|
||||
|
@ -135,7 +135,7 @@ namespace TagLib {
|
||||
/*!
|
||||
* Makes a deep copy of the data in \a s.
|
||||
*
|
||||
* /note If \a t is UTF16LE, the byte order of \a s will be swapped regardless
|
||||
* /note If \a t is UTF16LE, the byte order of \a s will be swapped regardless
|
||||
* of the CPU byte order. If UTF16BE, it will not be swapped. This behavior
|
||||
* will be changed in TagLib2.0.
|
||||
*/
|
||||
@ -144,7 +144,7 @@ namespace TagLib {
|
||||
/*!
|
||||
* Makes a deep copy of the data in \a s.
|
||||
*
|
||||
* /note If \a t is UTF16LE, the byte order of \a s will be swapped regardless
|
||||
* /note If \a t is UTF16LE, the byte order of \a s will be swapped regardless
|
||||
* of the CPU byte order. If UTF16BE, it will not be swapped. This behavior
|
||||
* will be changed in TagLib2.0.
|
||||
*/
|
||||
@ -186,7 +186,7 @@ namespace TagLib {
|
||||
virtual ~String();
|
||||
|
||||
/*!
|
||||
* Returns a deep copy of this String as an std::string. The returned string
|
||||
* Returns a deep copy of this String as an std::string. The returned string
|
||||
* is encoded in UTF8 if \a unicode is true, otherwise Latin1.
|
||||
*
|
||||
* \see toCString()
|
||||
@ -194,7 +194,7 @@ namespace TagLib {
|
||||
std::string to8Bit(bool unicode = false) const;
|
||||
|
||||
/*!
|
||||
* Returns a deep copy of this String as a wstring. The returned string is
|
||||
* Returns a deep copy of this String as a wstring. The returned string is
|
||||
* encoded in UTF-16 (without BOM/CPU byte order).
|
||||
*
|
||||
* \see toCWString()
|
||||
@ -202,43 +202,43 @@ namespace TagLib {
|
||||
wstring toWString() const;
|
||||
|
||||
/*!
|
||||
* Creates and returns a standard C-style (null-terminated) version of this
|
||||
* String. The returned string is encoded in UTF8 if \a unicode is true,
|
||||
* Creates and returns a standard C-style (null-terminated) version of this
|
||||
* String. The returned string is encoded in UTF8 if \a unicode is true,
|
||||
* otherwise Latin1.
|
||||
*
|
||||
* The returned string is still owned by this String and should not be deleted
|
||||
*
|
||||
* The returned string is still owned by this String and should not be deleted
|
||||
* by the user.
|
||||
*
|
||||
* The returned pointer remains valid until this String instance is destroyed
|
||||
* The returned pointer remains valid until this String instance is destroyed
|
||||
* or toCString() is called again.
|
||||
*
|
||||
* \warning This however has the side effect that the returned string will remain
|
||||
* in memory <b>in addition to</b> other memory that is consumed by this
|
||||
* in memory <b>in addition to</b> other memory that is consumed by this
|
||||
* String instance. So, this method should not be used on large strings or
|
||||
* where memory is critical. Consider using to8Bit() instead to avoid it.
|
||||
*
|
||||
* \see to8Bit()
|
||||
*/
|
||||
const char *toCString(bool unicode = false) const;
|
||||
|
||||
|
||||
/*!
|
||||
* Returns a standard C-style (null-terminated) wide character version of
|
||||
* this String. The returned string is encoded in UTF-16 (without BOM/CPU byte
|
||||
* Returns a standard C-style (null-terminated) wide character version of
|
||||
* this String. The returned string is encoded in UTF-16 (without BOM/CPU byte
|
||||
* order).
|
||||
*
|
||||
* The returned string is still owned by this String and should not be deleted
|
||||
*
|
||||
* The returned string is still owned by this String and should not be deleted
|
||||
* by the user.
|
||||
*
|
||||
* The returned pointer remains valid until this String instance is destroyed
|
||||
* The returned pointer remains valid until this String instance is destroyed
|
||||
* or any other method of this String is called.
|
||||
*
|
||||
* \note This returns a pointer to the String's internal data without any
|
||||
* \note This returns a pointer to the String's internal data without any
|
||||
* conversions.
|
||||
*
|
||||
* \see toWString()
|
||||
*/
|
||||
const wchar_t *toCWString() const;
|
||||
|
||||
|
||||
/*!
|
||||
* Returns an iterator pointing to the beginning of the string.
|
||||
*/
|
||||
@ -333,6 +333,8 @@ namespace TagLib {
|
||||
* Returns a ByteVector containing the string's data. If \a t is Latin1 or
|
||||
* UTF8, this will return a vector of 8 bit characters, otherwise it will use
|
||||
* 16 bit characters.
|
||||
*
|
||||
* \note The returned data is not null terminated.
|
||||
*/
|
||||
ByteVector data(Type t) const;
|
||||
|
||||
@ -484,31 +486,31 @@ namespace TagLib {
|
||||
|
||||
private:
|
||||
/*!
|
||||
* Converts a \e Latin-1 string into \e UTF-16(without BOM/CPU byte order)
|
||||
* Converts a \e Latin-1 string into \e UTF-16(without BOM/CPU byte order)
|
||||
* and copies it to the internal buffer.
|
||||
*/
|
||||
void copyFromLatin1(const char *s, size_t length);
|
||||
|
||||
/*!
|
||||
* Converts a \e UTF-8 string into \e UTF-16(without BOM/CPU byte order)
|
||||
* Converts a \e UTF-8 string into \e UTF-16(without BOM/CPU byte order)
|
||||
* and copies it to the internal buffer.
|
||||
*/
|
||||
void copyFromUTF8(const char *s, size_t length);
|
||||
|
||||
/*!
|
||||
* Converts a \e UTF-16 (with BOM), UTF-16LE or UTF16-BE string into
|
||||
* Converts a \e UTF-16 (with BOM), UTF-16LE or UTF16-BE string into
|
||||
* \e UTF-16(without BOM/CPU byte order) and copies it to the internal buffer.
|
||||
*/
|
||||
void copyFromUTF16(const wchar_t *s, size_t length, Type t);
|
||||
|
||||
/*!
|
||||
* Converts a \e UTF-16 (with BOM), UTF-16LE or UTF16-BE string into
|
||||
* Converts a \e UTF-16 (with BOM), UTF-16LE or UTF16-BE string into
|
||||
* \e UTF-16(without BOM/CPU byte order) and copies it to the internal buffer.
|
||||
*/
|
||||
void copyFromUTF16(const char *s, size_t length, Type t);
|
||||
|
||||
|
||||
/*!
|
||||
* Indicates which byte order of UTF-16 is used to store strings internally.
|
||||
* Indicates which byte order of UTF-16 is used to store strings internally.
|
||||
*
|
||||
* \note \e String::UTF16BE or \e String::UTF16LE
|
||||
*/
|
||||
|
@ -43,6 +43,7 @@ class TestString : public CppUnit::TestFixture
|
||||
CPPUNIT_TEST(testToInt);
|
||||
CPPUNIT_TEST(testSubstr);
|
||||
CPPUNIT_TEST(testNewline);
|
||||
CPPUNIT_TEST(testEncode);
|
||||
CPPUNIT_TEST_SUITE_END();
|
||||
|
||||
public:
|
||||
@ -242,6 +243,43 @@ public:
|
||||
CPPUNIT_ASSERT_EQUAL(L'\x0a', String(crlf)[4]);
|
||||
}
|
||||
|
||||
void testEncode()
|
||||
{
|
||||
String jpn(L"\u65E5\u672C\u8A9E");
|
||||
ByteVector jpn1 = jpn.data(String::Latin1);
|
||||
ByteVector jpn2 = jpn.data(String::UTF8);
|
||||
ByteVector jpn3 = jpn.data(String::UTF16);
|
||||
ByteVector jpn4 = jpn.data(String::UTF16LE);
|
||||
ByteVector jpn5 = jpn.data(String::UTF16BE);
|
||||
std::string jpn6 = jpn.to8Bit(false);
|
||||
std::string jpn7 = jpn.to8Bit(true);
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(ByteVector("\xE5\x2C\x9E"), jpn1);
|
||||
CPPUNIT_ASSERT_EQUAL(ByteVector("\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E"), jpn2);
|
||||
CPPUNIT_ASSERT_EQUAL(ByteVector("\xFF\xFE\xE5\x65\x2C\x67\x9E\x8A"), jpn3);
|
||||
CPPUNIT_ASSERT_EQUAL(ByteVector("\xE5\x65\x2C\x67\x9E\x8A"), jpn4);
|
||||
CPPUNIT_ASSERT_EQUAL(ByteVector("\x65\xE5\x67\x2C\x8A\x9E"), jpn5);
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("\xE5\x2C\x9E"), jpn6);
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E"), jpn7);
|
||||
|
||||
String empty;
|
||||
ByteVector empty1 = empty.data(String::Latin1);
|
||||
ByteVector empty2 = empty.data(String::UTF8);
|
||||
ByteVector empty3 = empty.data(String::UTF16);
|
||||
ByteVector empty4 = empty.data(String::UTF16LE);
|
||||
ByteVector empty5 = empty.data(String::UTF16BE);
|
||||
std::string empty6 = empty.to8Bit(false);
|
||||
std::string empty7 = empty.to8Bit(true);
|
||||
|
||||
CPPUNIT_ASSERT(empty1.isEmpty());
|
||||
CPPUNIT_ASSERT(empty2.isEmpty());
|
||||
CPPUNIT_ASSERT_EQUAL(ByteVector("\xFF\xFE"), empty3);
|
||||
CPPUNIT_ASSERT(empty4.isEmpty());
|
||||
CPPUNIT_ASSERT(empty5.isEmpty());
|
||||
CPPUNIT_ASSERT(empty6.empty());
|
||||
CPPUNIT_ASSERT(empty7.empty());
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
CPPUNIT_TEST_SUITE_REGISTRATION(TestString);
|
||||
|
Loading…
Reference in New Issue
Block a user