diff --git a/taglib/toolkit/tstring.cpp b/taglib/toolkit/tstring.cpp index 479b4069..d15fbad7 100644 --- a/taglib/toolkit/tstring.cpp +++ b/taglib/toolkit/tstring.cpp @@ -26,7 +26,7 @@ // This class assumes that std::basic_string has a contiguous and null-terminated buffer. #ifdef HAVE_CONFIG_H -#include +# include #endif #include "tstring.h" @@ -36,6 +36,8 @@ #include "tutils.h" #include +#include +#include #include #include @@ -114,6 +116,114 @@ namespace return len; } + + // Returns the native format of std::wstring. + inline String::Type wcharByteOrder() + { + if(Utils::systemByteOrder() == Utils::LittleEndian) + return String::UTF16LE; + else + return String::UTF16BE; + } + + // Converts a Latin-1 string into UTF-16(without BOM/CPU byte order) + // and copies it to the internal buffer. + inline void copyFromLatin1(std::wstring &data, const char *s, size_t length) + { + data.resize(length); + + for(size_t i = 0; i < length; ++i) + data[i] = static_cast(s[i]); + } + + // Converts a UTF-8 string into UTF-16(without BOM/CPU byte order) + // and copies it to the internal buffer. + inline void copyFromUTF8(std::wstring &data, const char *s, size_t length) + { + data.resize(length); + + if(length > 0) { + const size_t len = UTF8toUTF16(s, length, &data[0], data.size()); + data.resize(len); + } + } + + // Converts a UTF-16 (with BOM), UTF-16LE or UTF16-BE string into + // UTF-16(without BOM/CPU byte order) and copies it to the internal buffer. + inline void copyFromUTF16(std::wstring &data, const wchar_t *s, size_t length, String::Type t) + { + bool swap; + if(t == String::UTF16) { + if(length >= 1 && s[0] == 0xfeff) + swap = false; // Same as CPU endian. No need to swap bytes. + else if(length >= 1 && s[0] == 0xfffe) + swap = true; // Not same as CPU endian. Need to swap bytes. + else { + debug("String::copyFromUTF16() - Invalid UTF16 string."); + return; + } + + s++; + length--; + } + else { + swap = (t != wcharByteOrder()); + } + + data.resize(length); + if(length > 0) { + if(swap) { + for(size_t i = 0; i < length; ++i) + data[i] = Utils::byteSwap(static_cast(s[i])); + } + else { + ::wmemcpy(&data[0], s, length); + } + } + } + + // Converts a UTF-16 (with BOM), UTF-16LE or UTF16-BE string into + // UTF-16(without BOM/CPU byte order) and copies it to the internal buffer. + inline void copyFromUTF16(std::wstring &data, const char *s, size_t length, String::Type t) + { + bool swap; + if(t == String::UTF16) { + if(length < 2) { + debug("String::copyFromUTF16() - Invalid UTF16 string."); + return; + } + + // Uses memcpy instead of reinterpret_cast to avoid an alignment exception. + ushort bom; + ::memcpy(&bom, s, 2); + + if(bom == 0xfeff) + swap = false; // Same as CPU endian. No need to swap bytes. + else if(bom == 0xfffe) + swap = true; // Not same as CPU endian. Need to swap bytes. + else { + debug("String::copyFromUTF16() - Invalid UTF16 string."); + return; + } + + s += 2; + length -= 2; + } + else { + swap = (t != wcharByteOrder()); + } + + data.resize(length / 2); + for(size_t i = 0; i < length / 2; ++i) { + ushort c; + ::memcpy(&c, s, 2); + if(swap) + c = Utils::byteSwap(c); + + data[i] = static_cast(c); + s += 2; + } + } } namespace TagLib { @@ -121,22 +231,12 @@ namespace TagLib { class String::StringPrivate : public RefCounter { public: - StringPrivate() - : RefCounter() - { - } + StringPrivate() : + RefCounter() {} - StringPrivate(const wstring &s) - : RefCounter() - , data(s) - { - } - - StringPrivate(uint n, wchar_t c) - : RefCounter() - , data(static_cast(n), c) - { - } + StringPrivate(uint n, wchar_t c) : + RefCounter(), + data(static_cast(n), c) {} /*! * Stores string in UTF-16. The byte order depends on the CPU endian. @@ -151,109 +251,111 @@ public: String String::null; +//////////////////////////////////////////////////////////////////////////////// +// public members //////////////////////////////////////////////////////////////////////////////// -String::String() - : d(new StringPrivate()) +String::String() : + d(new StringPrivate()) { } -String::String(const String &s) - : d(s.d) +String::String(const String &s) : + d(s.d) { d->ref(); } -String::String(const std::string &s, Type t) - : d(new StringPrivate()) +String::String(const std::string &s, Type t) : + d(new StringPrivate()) { if(t == Latin1) - copyFromLatin1(s.c_str(), s.length()); + copyFromLatin1(d->data, s.c_str(), s.length()); else if(t == String::UTF8) - copyFromUTF8(s.c_str(), s.length()); + copyFromUTF8(d->data, s.c_str(), s.length()); else { debug("String::String() -- std::string should not contain UTF16."); } } -String::String(const wstring &s, Type t) - : d(new StringPrivate()) +String::String(const wstring &s, Type t) : + d(new StringPrivate()) { if(t == UTF16 || t == UTF16BE || t == UTF16LE) { // This looks ugly but needed for the compatibility with TagLib1.8. // Should be removed in TabLib2.0. if (t == UTF16BE) - t = WCharByteOrder; + t = wcharByteOrder(); else if (t == UTF16LE) - t = (WCharByteOrder == UTF16LE ? UTF16BE : UTF16LE); + t = (wcharByteOrder() == UTF16LE ? UTF16BE : UTF16LE); - copyFromUTF16(s.c_str(), s.length(), t); + copyFromUTF16(d->data, s.c_str(), s.length(), t); } else { debug("String::String() -- TagLib::wstring should not contain Latin1 or UTF-8."); } } -String::String(const wchar_t *s, Type t) - : d(new StringPrivate()) +String::String(const wchar_t *s, Type t) : + d(new StringPrivate()) { if(t == UTF16 || t == UTF16BE || t == UTF16LE) { // This looks ugly but needed for the compatibility with TagLib1.8. // Should be removed in TabLib2.0. if (t == UTF16BE) - t = WCharByteOrder; + t = wcharByteOrder(); else if (t == UTF16LE) - t = (WCharByteOrder == UTF16LE ? UTF16BE : UTF16LE); + t = (wcharByteOrder() == UTF16LE ? UTF16BE : UTF16LE); - copyFromUTF16(s, ::wcslen(s), t); + copyFromUTF16(d->data, s, ::wcslen(s), t); } else { debug("String::String() -- const wchar_t * should not contain Latin1 or UTF-8."); } } -String::String(const char *s, Type t) - : d(new StringPrivate()) +String::String(const char *s, Type t) : + d(new StringPrivate()) { if(t == Latin1) - copyFromLatin1(s, ::strlen(s)); + copyFromLatin1(d->data, s, ::strlen(s)); else if(t == String::UTF8) - copyFromUTF8(s, ::strlen(s)); + copyFromUTF8(d->data, s, ::strlen(s)); else { debug("String::String() -- const char * should not contain UTF16."); } } -String::String(wchar_t c, Type t) - : d(new StringPrivate()) +String::String(wchar_t c, Type t) : + d(new StringPrivate()) { if(t == UTF16 || t == UTF16BE || t == UTF16LE) - copyFromUTF16(&c, 1, t); + copyFromUTF16(d->data, &c, 1, t); else { debug("String::String() -- wchar_t should not contain Latin1 or UTF-8."); } } -String::String(char c, Type t) - : d(new StringPrivate(1, static_cast(c))) +String::String(char c, Type t) : + d(new StringPrivate(1, static_cast(c))) { if(t != Latin1 && t != UTF8) { debug("String::String() -- char should not contain UTF16."); } } -String::String(const ByteVector &v, Type t) - : d(new StringPrivate()) +String::String(const ByteVector &v, Type t) : + d(new StringPrivate()) { if(v.isEmpty()) return; if(t == Latin1) - copyFromLatin1(v.data(), v.size()); + copyFromLatin1(d->data, v.data(), v.size()); else if(t == UTF8) - copyFromUTF8(v.data(), v.size()); + copyFromUTF8(d->data, v.data(), v.size()); else - copyFromUTF16(v.data(), v.size(), t); + copyFromUTF16(d->data, v.data(), v.size(), t); // If we hit a null in the ByteVector, shrink the string again. d->data.resize(::wcslen(d->data.c_str())); @@ -484,49 +586,29 @@ int String::toInt() const int String::toInt(bool *ok) const { - int value = 0; + const wchar_t *begin = d->data.c_str(); + wchar_t *end; + errno = 0; + const long value = ::wcstol(begin, &end, 10); - uint size = d->data.size(); - bool negative = size > 0 && d->data[0] == '-'; - uint start = negative ? 1 : 0; - uint i = start; + // Has wcstol() consumed the entire string and not overflowed? + if(ok) { + *ok = (errno == 0 && end > begin && *end == L'\0'); + *ok = (*ok && value > INT_MIN && value < INT_MAX); + } - for(; i < size && d->data[i] >= '0' && d->data[i] <= '9'; i++) - value = value * 10 + (d->data[i] - '0'); - - if(negative) - value = value * -1; - - if(ok) - *ok = (size > start && i == size); - - return value; -} + return static_cast(value);} String String::stripWhiteSpace() const { - wstring::const_iterator begin = d->data.begin(); - wstring::const_iterator end = d->data.end(); + static const wchar_t *WhiteSpaceChars = L"\t\n\f\r "; - while(begin != end && - (*begin == '\t' || *begin == '\n' || *begin == '\f' || - *begin == '\r' || *begin == ' ')) - { - ++begin; - } + const size_t pos1 = d->data.find_first_not_of(WhiteSpaceChars); + if(pos1 == std::wstring::npos) + return String(); - if(begin == end) - return null; - - // There must be at least one non-whitespace character here for us to have - // gotten this far, so we should be safe not doing bounds checking. - - do { - --end; - } while(*end == '\t' || *end == '\n' || - *end == '\f' || *end == '\r' || *end == ' '); - - return String(wstring(begin, end + 1)); + const size_t pos2 = d->data.find_last_not_of(WhiteSpaceChars); + return substr(pos1, pos2 - pos1 + 1); } bool String::isLatin1() const @@ -642,90 +724,62 @@ String &String::operator+=(char c) String &String::operator=(const String &s) { - if(&s == this) - return *this; - - if(d->deref()) - delete d; - d = s.d; - d->ref(); + String(s).swap(*this); return *this; } String &String::operator=(const std::string &s) { - if(d->deref()) - delete d; - - d = new StringPrivate; - copyFromLatin1(s.c_str(), s.length()); - + String(s).swap(*this); return *this; } String &String::operator=(const wstring &s) { - if(d->deref()) - delete d; - d = new StringPrivate(s); + String(s).swap(*this); return *this; } String &String::operator=(const wchar_t *s) { - if(d->deref()) - delete d; - - d = new StringPrivate(s); + String(s).swap(*this); return *this; } String &String::operator=(char c) { - if(d->deref()) - delete d; - - d = new StringPrivate(1, static_cast(c)); + String(c).swap(*this); return *this; } String &String::operator=(wchar_t c) { - if(d->deref()) - delete d; - - d = new StringPrivate(1, c); + String(c, wcharByteOrder()).swap(*this); return *this; } String &String::operator=(const char *s) { - if(d->deref()) - delete d; - - d = new StringPrivate; - copyFromLatin1(s, ::strlen(s)); - + String(s).swap(*this); return *this; } String &String::operator=(const ByteVector &v) { - if(d->deref()) - delete d; - - d = new StringPrivate; - copyFromLatin1(v.data(), v.size()); - - // If we hit a null in the ByteVector, shrink the string again. - d->data.resize(::wcslen(d->data.c_str())); - + String(v).swap(*this); return *this; } +void String::swap(String &s) +{ + using std::swap; + + swap(d, s.d); +} + bool String::operator<(const String &s) const { - return d->data < s.d->data; + return (d->data < s.d->data); } //////////////////////////////////////////////////////////////////////////////// @@ -734,112 +788,13 @@ bool String::operator<(const String &s) const void String::detach() { - if(d->count() > 1) { - d->deref(); - d = new StringPrivate(d->data); - } + if(d->count() > 1) + String(d->data.c_str()).swap(*this); +} } //////////////////////////////////////////////////////////////////////////////// -// private members -//////////////////////////////////////////////////////////////////////////////// - -void String::copyFromLatin1(const char *s, size_t length) -{ - d->data.resize(length); - - for(size_t i = 0; i < length; ++i) - d->data[i] = static_cast(s[i]); -} - -void String::copyFromUTF8(const char *s, size_t length) -{ - d->data.resize(length); - - if(length > 0) { - const size_t len = UTF8toUTF16(s, length, &d->data[0], d->data.size()); - d->data.resize(len); - } -} - -void String::copyFromUTF16(const wchar_t *s, size_t length, Type t) -{ - bool swap; - if(t == UTF16) { - if(length >= 1 && s[0] == 0xfeff) - swap = false; // Same as CPU endian. No need to swap bytes. - else if(length >= 1 && s[0] == 0xfffe) - swap = true; // Not same as CPU endian. Need to swap bytes. - else { - debug("String::copyFromUTF16() - Invalid UTF16 string."); - return; - } - - s++; - length--; - } - else - swap = (t != WCharByteOrder); - - d->data.resize(length); - if(length > 0) { - if(swap) { - for(size_t i = 0; i < length; ++i) - d->data[i] = Utils::byteSwap(static_cast(s[i])); - } - else { - ::wmemcpy(&d->data[0], s, length); - } - } -} - -void String::copyFromUTF16(const char *s, size_t length, Type t) -{ - bool swap; - if(t == UTF16) { - if(length < 2) { - debug("String::copyFromUTF16() - Invalid UTF16 string."); - return; - } - - // Uses memcpy instead of reinterpret_cast to avoid an alignment exception. - ushort bom; - ::memcpy(&bom, s, 2); - - if(bom == 0xfeff) - swap = false; // Same as CPU endian. No need to swap bytes. - else if(bom == 0xfffe) - swap = true; // Not same as CPU endian. Need to swap bytes. - else { - debug("String::copyFromUTF16() - Invalid UTF16 string."); - return; - } - - s += 2; - length -= 2; - } - else - swap = (t != WCharByteOrder); - - d->data.resize(length / 2); - for(size_t i = 0; i < length / 2; ++i) { - ushort c; - ::memcpy(&c, s, 2); - if(swap) - c = Utils::byteSwap(c); - - d->data[i] = static_cast(c); - s += 2; - } -} - -const String::Type String::WCharByteOrder - = (Utils::systemByteOrder() == Utils::BigEndian) ? String::UTF16BE : String::UTF16LE; - -} - -//////////////////////////////////////////////////////////////////////////////// -// related functions +// related non-member functions //////////////////////////////////////////////////////////////////////////////// const TagLib::String operator+(const TagLib::String &s1, const TagLib::String &s2) diff --git a/taglib/toolkit/tstring.h b/taglib/toolkit/tstring.h index 2263e5e4..d00d4947 100644 --- a/taglib/toolkit/tstring.h +++ b/taglib/toolkit/tstring.h @@ -504,6 +504,11 @@ namespace TagLib { */ String &operator=(const ByteVector &v); + /*! + * Exchanges the content of the String by the content of \a s. + */ + void swap(String &s); + /*! * To be able to use this class in a Map, this operator needed to be * implemented. Returns true if \a s is less than this string in a byte-wise @@ -531,37 +536,6 @@ namespace TagLib { void detach(); private: - /*! - * Converts a \e Latin-1 string into \e UTF-16(without BOM/CPU byte order) - * and copies it to the internal buffer. - */ - void copyFromLatin1(const char *s, size_t length); - - /*! - * Converts a \e UTF-8 string into \e UTF-16(without BOM/CPU byte order) - * and copies it to the internal buffer. - */ - void copyFromUTF8(const char *s, size_t length); - - /*! - * Converts a \e UTF-16 (with BOM), UTF-16LE or UTF16-BE string into - * \e UTF-16(without BOM/CPU byte order) and copies it to the internal buffer. - */ - void copyFromUTF16(const wchar_t *s, size_t length, Type t); - - /*! - * Converts a \e UTF-16 (with BOM), UTF-16LE or UTF16-BE string into - * \e UTF-16(without BOM/CPU byte order) and copies it to the internal buffer. - */ - void copyFromUTF16(const char *s, size_t length, Type t); - - /*! - * Indicates which byte order of UTF-16 is used to store strings internally. - * - * \note \e String::UTF16BE or \e String::UTF16LE - */ - static const Type WCharByteOrder; - class StringPrivate; StringPrivate *d; }; diff --git a/tests/test_string.cpp b/tests/test_string.cpp index 27839618..73b08073 100644 --- a/tests/test_string.cpp +++ b/tests/test_string.cpp @@ -245,6 +245,12 @@ public: CPPUNIT_ASSERT_EQUAL(String("-123").toInt(), -123); CPPUNIT_ASSERT_EQUAL(String("123aa").toInt(), 123); CPPUNIT_ASSERT_EQUAL(String("-123aa").toInt(), -123); + + String("2147483648").toInt(&ok); + CPPUNIT_ASSERT_EQUAL(ok, false); + + String("-2147483649").toInt(&ok); + CPPUNIT_ASSERT_EQUAL(ok, false); } void testSubstr()