mirror of
https://github.com/taglib/taglib.git
synced 2025-06-03 00:58:12 -04:00
Some improvements of String
This commit is contained in:
parent
40997e7fc9
commit
b52cd44c25
@ -63,6 +63,18 @@
|
||||
# define TAGLIB_ATOMIC_GCC
|
||||
#endif
|
||||
|
||||
// Detect CPU endian at compile time rather than run time if possible.
|
||||
// This is a poor list. Hope someone enrich it.
|
||||
#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) \
|
||||
|| (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) \
|
||||
|| (defined(__clang__) && (defined(__i386__) || defined(__x86_64__)))
|
||||
# define TAGLIB_LITTLE_ENDIAN
|
||||
/*
|
||||
#elif ....
|
||||
# define TAGLIB_BIG_ENDIAN
|
||||
*/
|
||||
#endif
|
||||
|
||||
//! A namespace for all TagLib related classes and functions
|
||||
|
||||
/*!
|
||||
|
@ -23,8 +23,10 @@
|
||||
* http://www.mozilla.org/MPL/ *
|
||||
***************************************************************************/
|
||||
|
||||
// This class assumes that std::basic_string<T> has a contiguous and null-terminated buffer.
|
||||
//
|
||||
|
||||
#include "tstring.h"
|
||||
#include "unicode.h"
|
||||
#include "tdebug.h"
|
||||
#include "tstringlist.h"
|
||||
|
||||
@ -32,167 +34,170 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
namespace TagLib {
|
||||
// Determine if the compiler supports codecvt.
|
||||
|
||||
#if (defined(_MSC_VER) && _MSC_VER >= 1600)
|
||||
# define TAGLIB_USE_CODECVT
|
||||
#endif
|
||||
|
||||
#ifdef TAGLIB_USE_CODECVT
|
||||
# include <codecvt>
|
||||
typedef std::codecvt_utf8_utf16<wchar_t> utf8_utf16_t;
|
||||
#else
|
||||
# include "unicode.h"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
inline unsigned short byteSwap(unsigned short x)
|
||||
{
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||
|
||||
return _byteswap_ushort(x);
|
||||
|
||||
#else
|
||||
|
||||
return (((x) >> 8) & 0xff) | (((x) & 0xff) << 8);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
inline unsigned short combine(unsigned char c1, unsigned char c2)
|
||||
{
|
||||
return (c1 << 8) | c2;
|
||||
}
|
||||
|
||||
#if !defined(TAGLIB_LITTLE_ENDIAN) && !defined(TAGLIB_BIG_ENDIAN)
|
||||
|
||||
TagLib::String::Type wcharByteOrder()
|
||||
{
|
||||
// Detect CPU endian at run time.
|
||||
union {
|
||||
TagLib::ushort w;
|
||||
char c;
|
||||
} x = { 0x1234 };
|
||||
|
||||
if(x.c == 0x34)
|
||||
return String::UTF16LE;
|
||||
else
|
||||
return String::UTF16BE;
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
using namespace TagLib;
|
||||
namespace TagLib {
|
||||
|
||||
class String::StringPrivate : public RefCounter
|
||||
{
|
||||
public:
|
||||
StringPrivate(const wstring &s) :
|
||||
RefCounter(),
|
||||
data(s),
|
||||
CString(0) {}
|
||||
|
||||
StringPrivate() :
|
||||
RefCounter(),
|
||||
CString(0) {}
|
||||
|
||||
~StringPrivate() {
|
||||
delete [] CString;
|
||||
}
|
||||
|
||||
wstring data;
|
||||
StringPrivate(const wstring &s) : RefCounter(), data(s) {}
|
||||
StringPrivate() : RefCounter() {}
|
||||
|
||||
/*!
|
||||
* This is only used to hold the a pointer to the most recent value of
|
||||
* toCString.
|
||||
* Stores string in UTF-16. The byte order depends on the CPU endian.
|
||||
*/
|
||||
char *CString;
|
||||
TagLib::wstring data;
|
||||
|
||||
/*!
|
||||
* This is only used to hold the the most recent value of toCString().
|
||||
*/
|
||||
std::string cstring;
|
||||
};
|
||||
|
||||
String String::null;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
String::String()
|
||||
String::String()
|
||||
: d(new StringPrivate())
|
||||
{
|
||||
d = new StringPrivate;
|
||||
}
|
||||
|
||||
String::String(const String &s) : d(s.d)
|
||||
String::String(const String &s)
|
||||
: d(s.d)
|
||||
{
|
||||
d->ref();
|
||||
}
|
||||
|
||||
String::String(const std::string &s, Type t)
|
||||
: d(new StringPrivate())
|
||||
{
|
||||
d = new StringPrivate;
|
||||
|
||||
if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
|
||||
if(t == Latin1)
|
||||
copyFromLatin1(&s[0], s.length());
|
||||
else if(t == String::UTF8)
|
||||
copyFromUTF8(&s[0], s.length());
|
||||
else {
|
||||
debug("String::String() -- A std::string should not contain UTF16.");
|
||||
return;
|
||||
}
|
||||
|
||||
int length = s.length();
|
||||
d->data.resize(length);
|
||||
wstring::iterator targetIt = d->data.begin();
|
||||
|
||||
for(std::string::const_iterator it = s.begin(); it != s.end(); it++) {
|
||||
*targetIt = uchar(*it);
|
||||
++targetIt;
|
||||
}
|
||||
|
||||
prepare(t);
|
||||
}
|
||||
|
||||
String::String(const wstring &s, Type t)
|
||||
: d(new StringPrivate())
|
||||
{
|
||||
d = new StringPrivate(s);
|
||||
prepare(t);
|
||||
if(t == UTF16 || t == UTF16BE || t == UTF16LE)
|
||||
copyFromUTF16(s.c_str(), s.length(), t);
|
||||
else {
|
||||
debug("String::String() -- A TagLib::wstring should not contain Latin1 or UTF-8.");
|
||||
}
|
||||
}
|
||||
|
||||
String::String(const wchar_t *s, Type t)
|
||||
: d(new StringPrivate())
|
||||
{
|
||||
d = new StringPrivate(s);
|
||||
prepare(t);
|
||||
if(t == UTF16 || t == UTF16BE || t == UTF16LE)
|
||||
copyFromUTF16(s, ::wcslen(s), t);
|
||||
else {
|
||||
debug("String::String() -- A const wchar_t * should not contain Latin1 or UTF-8.");
|
||||
}
|
||||
}
|
||||
|
||||
String::String(const char *s, Type t)
|
||||
: d(new StringPrivate())
|
||||
{
|
||||
d = new StringPrivate;
|
||||
|
||||
if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
|
||||
if(t == Latin1)
|
||||
copyFromLatin1(s, ::strlen(s));
|
||||
else if(t == String::UTF8)
|
||||
copyFromUTF8(s, ::strlen(s));
|
||||
else {
|
||||
debug("String::String() -- A const char * should not contain UTF16.");
|
||||
return;
|
||||
}
|
||||
|
||||
int length = ::strlen(s);
|
||||
d->data.resize(length);
|
||||
|
||||
wstring::iterator targetIt = d->data.begin();
|
||||
|
||||
for(int i = 0; i < length; i++) {
|
||||
*targetIt = uchar(s[i]);
|
||||
++targetIt;
|
||||
}
|
||||
|
||||
prepare(t);
|
||||
}
|
||||
|
||||
String::String(wchar_t c, Type t)
|
||||
: d(new StringPrivate())
|
||||
{
|
||||
d = new StringPrivate;
|
||||
d->data += c;
|
||||
prepare(t);
|
||||
if(t == UTF16 || t == UTF16BE || t == UTF16LE)
|
||||
copyFromUTF16(&c, 1, t);
|
||||
else {
|
||||
debug("String::String() -- A const wchar_t should not contain Latin1 or UTF-8.");
|
||||
}
|
||||
}
|
||||
|
||||
String::String(char c, Type t)
|
||||
: d(new StringPrivate())
|
||||
{
|
||||
d = new StringPrivate;
|
||||
|
||||
if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
|
||||
debug("String::String() -- A std::string should not contain UTF16.");
|
||||
return;
|
||||
if(t == Latin1 || t == UTF8) {
|
||||
d->data.resize(1);
|
||||
d->data[0] = static_cast<uchar>(c);
|
||||
}
|
||||
else {
|
||||
debug("String::String() -- A char should not contain UTF16.");
|
||||
}
|
||||
|
||||
d->data += uchar(c);
|
||||
prepare(t);
|
||||
}
|
||||
|
||||
String::String(const ByteVector &v, Type t)
|
||||
: d(new StringPrivate())
|
||||
{
|
||||
d = new StringPrivate;
|
||||
|
||||
if(v.isEmpty())
|
||||
return;
|
||||
|
||||
if(t == Latin1 || t == UTF8) {
|
||||
|
||||
int length = 0;
|
||||
d->data.resize(v.size());
|
||||
wstring::iterator targetIt = d->data.begin();
|
||||
for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it) {
|
||||
*targetIt = uchar(*it);
|
||||
++targetIt;
|
||||
++length;
|
||||
}
|
||||
d->data.resize(length);
|
||||
}
|
||||
else {
|
||||
d->data.resize(v.size() / 2);
|
||||
wstring::iterator targetIt = d->data.begin();
|
||||
|
||||
for(ByteVector::ConstIterator it = v.begin();
|
||||
it != v.end() && it + 1 != v.end() && combine(*it, *(it + 1));
|
||||
it += 2)
|
||||
{
|
||||
*targetIt = combine(*it, *(it + 1));
|
||||
++targetIt;
|
||||
}
|
||||
}
|
||||
prepare(t);
|
||||
if(t == Latin1)
|
||||
copyFromLatin1(v.data(), v.size());
|
||||
else if(t == UTF8)
|
||||
copyFromUTF8(v.data(), v.size());
|
||||
else
|
||||
copyFromUTF16(v.data(), v.size(), t);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -206,46 +211,50 @@ String::~String()
|
||||
std::string String::to8Bit(bool unicode) const
|
||||
{
|
||||
std::string s;
|
||||
s.resize(d->data.size());
|
||||
|
||||
if(!unicode) {
|
||||
s.resize(d->data.size());
|
||||
|
||||
std::string::iterator targetIt = s.begin();
|
||||
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
|
||||
*targetIt = char(*it);
|
||||
*targetIt = static_cast<char>(*it);
|
||||
++targetIt;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
else {
|
||||
s.resize(d->data.size() * 4 + 1);
|
||||
|
||||
const int outputBufferSize = d->data.size() * 3 + 1;
|
||||
#ifdef TAGLIB_USE_CODECVT
|
||||
|
||||
Unicode::UTF16 *sourceBuffer = new Unicode::UTF16[d->data.size() + 1];
|
||||
Unicode::UTF8 *targetBuffer = new Unicode::UTF8[outputBufferSize];
|
||||
std::mbstate_t st = 0;
|
||||
const wchar_t *source;
|
||||
char *target;
|
||||
std::codecvt_base::result result = utf8_utf16_t().out(
|
||||
st, &d->data[0], &d->data[d->data.size()], source, &s[0], &s[s.size()], target);
|
||||
|
||||
for(unsigned int i = 0; i < d->data.size(); i++)
|
||||
sourceBuffer[i] = Unicode::UTF16(d->data[i]);
|
||||
if(result != utf8_utf16_t::ok) {
|
||||
debug("String::copyFromUTF8() - Unicode conversion error.");
|
||||
}
|
||||
|
||||
const Unicode::UTF16 *source = sourceBuffer;
|
||||
Unicode::UTF8 *target = targetBuffer;
|
||||
#else
|
||||
|
||||
Unicode::ConversionResult result =
|
||||
Unicode::ConvertUTF16toUTF8(&source, sourceBuffer + d->data.size(),
|
||||
&target, targetBuffer + outputBufferSize,
|
||||
Unicode::lenientConversion);
|
||||
const Unicode::UTF16 *source = &d->data[0];
|
||||
Unicode::UTF8 *target = reinterpret_cast<Unicode::UTF8*>(&s[0]);
|
||||
|
||||
if(result != Unicode::conversionOK) {
|
||||
debug("String::to8Bit() - Unicode conversion error.");
|
||||
Unicode::ConversionResult result = Unicode::ConvertUTF16toUTF8(
|
||||
&source, source + d->data.size(),
|
||||
&target, target + s.size(),
|
||||
Unicode::lenientConversion);
|
||||
|
||||
if(result != Unicode::conversionOK) {
|
||||
debug("String::to8Bit() - Unicode conversion error.");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
s.resize(::strlen(s.c_str()));
|
||||
}
|
||||
|
||||
int newSize = target - targetBuffer;
|
||||
s.resize(newSize);
|
||||
targetBuffer[newSize] = 0;
|
||||
|
||||
s = (char *) targetBuffer;
|
||||
|
||||
delete [] sourceBuffer;
|
||||
delete [] targetBuffer;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -256,22 +265,8 @@ TagLib::wstring String::toWString() const
|
||||
|
||||
const char *String::toCString(bool unicode) const
|
||||
{
|
||||
delete [] d->CString;
|
||||
|
||||
std::string buffer = to8Bit(unicode);
|
||||
d->CString = new char[buffer.size() + 1];
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1400) // VC++2005 or later
|
||||
|
||||
strcpy_s(d->CString, buffer.size() + 1, buffer.c_str());
|
||||
|
||||
#else
|
||||
|
||||
strcpy(d->CString, buffer.c_str());
|
||||
|
||||
#endif
|
||||
|
||||
return d->CString;
|
||||
d->cstring = to8Bit(unicode);
|
||||
return d->cstring.c_str();
|
||||
}
|
||||
|
||||
String::Iterator String::begin()
|
||||
@ -296,23 +291,12 @@ String::ConstIterator String::end() const
|
||||
|
||||
int String::find(const String &s, int offset) const
|
||||
{
|
||||
wstring::size_type position = d->data.find(s.d->data, offset);
|
||||
|
||||
if(position != wstring::npos)
|
||||
return position;
|
||||
else
|
||||
return -1;
|
||||
return d->data.find(s.d->data, offset);
|
||||
}
|
||||
|
||||
int String::rfind(const String &s, int offset) const
|
||||
{
|
||||
wstring::size_type position =
|
||||
d->data.rfind(s.d->data, offset == -1 ? wstring::npos : offset);
|
||||
|
||||
if(position != wstring::npos)
|
||||
return position;
|
||||
else
|
||||
return -1;
|
||||
return d->data.rfind(s.d->data, offset);
|
||||
}
|
||||
|
||||
StringList String::split(const String &separator) const
|
||||
@ -345,9 +329,7 @@ bool String::startsWith(const String &s) const
|
||||
|
||||
String String::substr(uint position, uint n) const
|
||||
{
|
||||
String s;
|
||||
s.d->data = d->data.substr(position, n);
|
||||
return s;
|
||||
return String(d->data.substr(position, n));
|
||||
}
|
||||
|
||||
String &String::append(const String &s)
|
||||
@ -395,67 +377,102 @@ bool String::isNull() const
|
||||
|
||||
ByteVector String::data(Type t) const
|
||||
{
|
||||
ByteVector v;
|
||||
|
||||
switch(t) {
|
||||
|
||||
switch(t)
|
||||
{
|
||||
case Latin1:
|
||||
{
|
||||
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++)
|
||||
v.append(char(*it));
|
||||
break;
|
||||
}
|
||||
{
|
||||
ByteVector v(size(), 0);
|
||||
char *p = v.data();
|
||||
|
||||
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++)
|
||||
*p++ = static_cast<char>(*it);
|
||||
|
||||
return v;
|
||||
}
|
||||
case UTF8:
|
||||
{
|
||||
std::string s = to8Bit(true);
|
||||
v.setData(s.c_str(), s.length());
|
||||
break;
|
||||
}
|
||||
{
|
||||
ByteVector v(size() * 4 + 1, 0);
|
||||
|
||||
#ifdef TAGLIB_USE_CODECVT
|
||||
|
||||
std::mbstate_t st = 0;
|
||||
const wchar_t *source;
|
||||
char *target;
|
||||
std::codecvt_base::result result = utf8_utf16_t().out(
|
||||
st, &d->data[0], &d->data[d->data.size()], source, v.data(), v.data() + v.size(), target);
|
||||
|
||||
if(result != utf8_utf16_t::ok) {
|
||||
debug("String::data() - Unicode conversion error.");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
const Unicode::UTF16 *source = &d->data[0];
|
||||
Unicode::UTF8 *target = reinterpret_cast<Unicode::UTF8*>(v.data());
|
||||
|
||||
Unicode::ConversionResult result = Unicode::ConvertUTF16toUTF8(
|
||||
&source, source + d->data.size(),
|
||||
&target, target + v.size(),
|
||||
Unicode::lenientConversion);
|
||||
|
||||
if(result != Unicode::conversionOK) {
|
||||
debug("String::data() - Unicode conversion error.");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
v.resize(::strlen(v.data()));
|
||||
|
||||
return v;
|
||||
}
|
||||
case UTF16:
|
||||
{
|
||||
// Assume that if we're doing UTF16 and not UTF16BE that we want little
|
||||
// endian encoding. (Byte Order Mark)
|
||||
{
|
||||
ByteVector v(2 + size() * 2, 0);
|
||||
char *p = v.data();
|
||||
|
||||
v.append(char(0xff));
|
||||
v.append(char(0xfe));
|
||||
// Assume that if we're doing UTF16 and not UTF16BE that we want little
|
||||
// endian encoding. (Byte Order Mark)
|
||||
|
||||
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
|
||||
*p++ = '\xff';
|
||||
*p++ = '\xfe';
|
||||
|
||||
char c1 = *it & 0xff;
|
||||
char c2 = *it >> 8;
|
||||
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
|
||||
*p++ = static_cast<char>(*it & 0xff);
|
||||
*p++ = static_cast<char>(*it >> 8);
|
||||
}
|
||||
|
||||
v.append(c1);
|
||||
v.append(c2);
|
||||
return v;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case UTF16BE:
|
||||
{
|
||||
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
|
||||
{
|
||||
ByteVector v(size() * 2, 0);
|
||||
char *p = v.data();
|
||||
|
||||
char c1 = *it >> 8;
|
||||
char c2 = *it & 0xff;
|
||||
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
|
||||
*p++ = static_cast<char>(*it >> 8);
|
||||
*p++ = static_cast<char>(*it & 0xff);
|
||||
}
|
||||
|
||||
v.append(c1);
|
||||
v.append(c2);
|
||||
return v;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case UTF16LE:
|
||||
{
|
||||
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
|
||||
{
|
||||
ByteVector v(size() * 2, 0);
|
||||
char *p = v.data();
|
||||
|
||||
char c1 = *it & 0xff;
|
||||
char c2 = *it >> 8;
|
||||
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
|
||||
*p++ = static_cast<char>(*it & 0xff);
|
||||
*p++ = static_cast<char>(*it >> 8);
|
||||
}
|
||||
|
||||
v.append(c1);
|
||||
v.append(c2);
|
||||
return v;
|
||||
}
|
||||
default:
|
||||
{
|
||||
debug("String::data() - Invalid Type value.");
|
||||
return ByteVector();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
int String::toInt() const
|
||||
@ -560,7 +577,6 @@ String String::number(int n) // static
|
||||
TagLib::wchar &String::operator[](int i)
|
||||
{
|
||||
detach();
|
||||
|
||||
return d->data[i];
|
||||
}
|
||||
|
||||
@ -638,14 +654,7 @@ String &String::operator=(const std::string &s)
|
||||
delete d;
|
||||
|
||||
d = new StringPrivate;
|
||||
|
||||
d->data.resize(s.size());
|
||||
|
||||
wstring::iterator targetIt = d->data.begin();
|
||||
for(std::string::const_iterator it = s.begin(); it != s.end(); it++) {
|
||||
*targetIt = uchar(*it);
|
||||
++targetIt;
|
||||
}
|
||||
copyFromLatin1(s.c_str(), s.length());
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -690,15 +699,7 @@ String &String::operator=(const char *s)
|
||||
delete d;
|
||||
|
||||
d = new StringPrivate;
|
||||
|
||||
int length = ::strlen(s);
|
||||
d->data.resize(length);
|
||||
|
||||
wstring::iterator targetIt = d->data.begin();
|
||||
for(int i = 0; i < length; i++) {
|
||||
*targetIt = uchar(s[i]);
|
||||
++targetIt;
|
||||
}
|
||||
copyFromLatin1(s, ::strlen(s));
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -709,20 +710,10 @@ String &String::operator=(const ByteVector &v)
|
||||
delete d;
|
||||
|
||||
d = new StringPrivate;
|
||||
d->data.resize(v.size());
|
||||
wstring::iterator targetIt = d->data.begin();
|
||||
|
||||
uint i = 0;
|
||||
|
||||
for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it) {
|
||||
*targetIt = uchar(*it);
|
||||
++targetIt;
|
||||
++i;
|
||||
}
|
||||
copyFromLatin1(v.data(), v.size());
|
||||
|
||||
// If we hit a null in the ByteVector, shrink the string again.
|
||||
|
||||
d->data.resize(i);
|
||||
d->data.resize(::wcslen(d->data.c_str()));
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -748,68 +739,132 @@ void String::detach()
|
||||
// private members
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void String::prepare(Type t)
|
||||
void String::copyFromLatin1(const char *s, size_t length)
|
||||
{
|
||||
switch(t) {
|
||||
case UTF16:
|
||||
{
|
||||
if(d->data.size() >= 1 && (d->data[0] == 0xfeff || d->data[0] == 0xfffe)) {
|
||||
bool swap = d->data[0] != 0xfeff;
|
||||
d->data.erase(d->data.begin(), d->data.begin() + 1);
|
||||
if(swap) {
|
||||
for(uint i = 0; i < d->data.size(); i++)
|
||||
d->data[i] = byteSwap((unsigned short)d->data[i]);
|
||||
}
|
||||
}
|
||||
d->data.resize(length);
|
||||
|
||||
for(size_t i = 0; i < length; ++i)
|
||||
d->data[i] = static_cast<uchar>(s[i]);
|
||||
}
|
||||
|
||||
void String::copyFromUTF8(const char *s, size_t length)
|
||||
{
|
||||
d->data.resize(length);
|
||||
|
||||
#ifdef TAGLIB_USE_CODECVT
|
||||
|
||||
std::mbstate_t st = 0;
|
||||
const char *source;
|
||||
wchar_t *target;
|
||||
std::codecvt_base::result result = utf8_utf16_t().in(
|
||||
st, s, s + length, source, &d->data[0], &d->data[d->data.size()], target);
|
||||
|
||||
if(result != utf8_utf16_t::ok) {
|
||||
debug("String::copyFromUTF8() - Unicode conversion error.");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
const Unicode::UTF8 *source = reinterpret_cast<const Unicode::UTF8 *>(s);
|
||||
Unicode::UTF16 *target = &d->data[0];
|
||||
|
||||
Unicode::ConversionResult result = Unicode::ConvertUTF8toUTF16(
|
||||
&source, source + length,
|
||||
&target, target + length,
|
||||
Unicode::lenientConversion);
|
||||
|
||||
if(result != Unicode::conversionOK) {
|
||||
debug("String::copyFromUTF8() - Unicode conversion error.");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
d->data.resize(::wcslen(d->data.c_str()));
|
||||
}
|
||||
|
||||
void String::copyFromUTF16(const wchar_t *s, size_t length, Type t)
|
||||
{
|
||||
bool swap;
|
||||
if(t == UTF16) {
|
||||
if(length >= 1 && s[0] == 0xfeff)
|
||||
swap = false; // Same as CPU endian. No need to swap bytes.
|
||||
else if(length >= 1 && s[0] == 0xfffe)
|
||||
swap = true; // Not same as CPU endian. Need to swap bytes.
|
||||
else {
|
||||
debug("String::prepare() - Invalid UTF16 string.");
|
||||
d->data.erase(d->data.begin(), d->data.end());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case UTF8:
|
||||
{
|
||||
int bufferSize = d->data.size() + 1;
|
||||
Unicode::UTF8 *sourceBuffer = new Unicode::UTF8[bufferSize];
|
||||
Unicode::UTF16 *targetBuffer = new Unicode::UTF16[bufferSize];
|
||||
|
||||
unsigned int i = 0;
|
||||
for(; i < d->data.size(); i++)
|
||||
sourceBuffer[i] = Unicode::UTF8(d->data[i]);
|
||||
sourceBuffer[i] = 0;
|
||||
|
||||
const Unicode::UTF8 *source = sourceBuffer;
|
||||
Unicode::UTF16 *target = targetBuffer;
|
||||
|
||||
Unicode::ConversionResult result =
|
||||
Unicode::ConvertUTF8toUTF16(&source, sourceBuffer + bufferSize,
|
||||
&target, targetBuffer + bufferSize,
|
||||
Unicode::lenientConversion);
|
||||
|
||||
if(result != Unicode::conversionOK) {
|
||||
debug("String::prepare() - Unicode conversion error.");
|
||||
debug("String::copyFromUTF16() - Invalid UTF16 string.");
|
||||
return;
|
||||
}
|
||||
|
||||
int newSize = target != targetBuffer ? target - targetBuffer - 1 : 0;
|
||||
d->data.resize(newSize);
|
||||
|
||||
for(int i = 0; i < newSize; i++)
|
||||
d->data[i] = targetBuffer[i];
|
||||
|
||||
delete [] sourceBuffer;
|
||||
delete [] targetBuffer;
|
||||
|
||||
break;
|
||||
s++;
|
||||
length--;
|
||||
}
|
||||
case UTF16LE:
|
||||
{
|
||||
for(uint i = 0; i < d->data.size(); i++)
|
||||
d->data[i] = byteSwap((unsigned short)d->data[i]);
|
||||
break;
|
||||
else
|
||||
swap = (t != WCharByteOrder);
|
||||
|
||||
d->data.resize(length);
|
||||
memcpy(&d->data[0], s, length * sizeof(wchar_t));
|
||||
|
||||
if(swap) {
|
||||
for(size_t i = 0; i < length; ++i)
|
||||
d->data[i] = byteSwap(static_cast<unsigned short>(s[i]));
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
template <size_t sizeOfWcharT>
|
||||
void String::internalCopyFromUTF16(const char *s, size_t length, Type t)
|
||||
{
|
||||
// Non specialized version. Used where sizeof(wchar_t) != 2.
|
||||
|
||||
bool swap;
|
||||
if(t == UTF16) {
|
||||
if(length >= 2 && *reinterpret_cast<const TagLib::ushort*>(s) == 0xfeff)
|
||||
swap = false; // Same as CPU endian. No need to swap bytes.
|
||||
else if(length >= 2 && *reinterpret_cast<const TagLib::ushort*>(s) == 0xfffe)
|
||||
swap = true; // Not same as CPU endian. Need to swap bytes.
|
||||
else {
|
||||
debug("String::copyFromUTF16() - Invalid UTF16 string.");
|
||||
return;
|
||||
}
|
||||
|
||||
s += 2;
|
||||
length -= 2;
|
||||
}
|
||||
else
|
||||
swap = (t != WCharByteOrder);
|
||||
|
||||
d->data.resize(length / 2);
|
||||
for(size_t i = 0; i < length / 2; ++i) {
|
||||
d->data[i] = swap ? combine(*s, *(s + 1)) : combine(*(s + 1), *s);
|
||||
s += 2;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void String::internalCopyFromUTF16<2>(const char *s, size_t length, Type t)
|
||||
{
|
||||
// Specialized version for where sizeof(wchar_t) == 2.
|
||||
|
||||
copyFromUTF16(reinterpret_cast<const wchar_t*>(s), length / 2, t);
|
||||
}
|
||||
|
||||
void String::copyFromUTF16(const char *s, size_t length, Type t)
|
||||
{
|
||||
internalCopyFromUTF16<sizeof(wchar_t)>(s, length, t);
|
||||
}
|
||||
|
||||
#if defined(TAGLIB_LITTLE_ENDIAN)
|
||||
|
||||
const String::Type String::WCharByteOrder = String::UTF16LE;
|
||||
|
||||
#elif defined(TAGLIB_BIG_ENDIAN)
|
||||
|
||||
const String::Type String::WCharByteOrder = String::UTF16BE;
|
||||
|
||||
#else
|
||||
|
||||
const String::Type String::WCharByteOrder = wcharByteOrder();
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -818,27 +873,28 @@ void String::prepare(Type t)
|
||||
|
||||
const TagLib::String operator+(const TagLib::String &s1, const TagLib::String &s2)
|
||||
{
|
||||
String s(s1);
|
||||
TagLib::String s(s1);
|
||||
s.append(s2);
|
||||
return s;
|
||||
}
|
||||
|
||||
const TagLib::String operator+(const char *s1, const TagLib::String &s2)
|
||||
{
|
||||
String s(s1);
|
||||
TagLib::String s(s1);
|
||||
s.append(s2);
|
||||
return s;
|
||||
}
|
||||
|
||||
const TagLib::String operator+(const TagLib::String &s1, const char *s2)
|
||||
{
|
||||
String s(s1);
|
||||
TagLib::String s(s1);
|
||||
s.append(s2);
|
||||
return s;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, const String &str)
|
||||
std::ostream &operator<<(std::ostream &s, const TagLib::String &str)
|
||||
{
|
||||
s << str.to8Bit();
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -135,12 +135,12 @@ namespace TagLib {
|
||||
/*!
|
||||
* Makes a deep copy of the data in \a s.
|
||||
*/
|
||||
String(const wstring &s, Type t = UTF16BE);
|
||||
String(const wstring &s, Type t = WCharByteOrder);
|
||||
|
||||
/*!
|
||||
* Makes a deep copy of the data in \a s.
|
||||
*/
|
||||
String(const wchar_t *s, Type t = UTF16BE);
|
||||
String(const wchar_t *s, Type t = WCharByteOrder);
|
||||
|
||||
/*!
|
||||
* Makes a deep copy of the data in \a c.
|
||||
@ -451,17 +451,42 @@ namespace TagLib {
|
||||
|
||||
private:
|
||||
/*!
|
||||
* This checks to see if the string is in \e UTF-16 (with BOM) or \e UTF-8
|
||||
* format and if so converts it to \e UTF-16BE for internal use. \e Latin1
|
||||
* does not require conversion since it is a subset of \e UTF-16BE and
|
||||
* \e UTF16-BE requires no conversion since it is used internally.
|
||||
* Converts a \e Latin-1 string into \e UTF-16(without BOM/CPU byte order)
|
||||
* and copies it to the internal buffer.
|
||||
*/
|
||||
void prepare(Type t);
|
||||
void copyFromLatin1(const char *s, size_t length);
|
||||
|
||||
/*!
|
||||
* Converts a \e UTF-8 string into \e UTF-16(without BOM/CPU byte order)
|
||||
* and copies it to the internal buffer.
|
||||
*/
|
||||
void copyFromUTF8(const char *s, size_t length);
|
||||
|
||||
/*!
|
||||
* Converts a \e UTF-16 (with BOM), UTF-16LE or UTF16-BE string into
|
||||
* \e UTF-16(without BOM/CPU byte order) and copies it to the internal buffer.
|
||||
*/
|
||||
void copyFromUTF16(const wchar_t *s, size_t length, Type t);
|
||||
|
||||
/*!
|
||||
* Converts a \e UTF-16 (with BOM), UTF-16LE or UTF16-BE string into
|
||||
* \e UTF-16(without BOM/CPU byte order) and copies it to the internal buffer.
|
||||
*/
|
||||
void copyFromUTF16(const char *s, size_t length, Type t);
|
||||
|
||||
template <size_t sizeOfWcharT>
|
||||
void internalCopyFromUTF16(const char *s, size_t length, Type t);
|
||||
|
||||
/*!
|
||||
* Indicates which byte order of UTF-16 is used to store strings internally.
|
||||
*
|
||||
* \note \e String::UTF16BE or \e String::UTF16LE
|
||||
*/
|
||||
static const Type WCharByteOrder;
|
||||
|
||||
class StringPrivate;
|
||||
StringPrivate *d;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
/*!
|
||||
|
@ -115,8 +115,8 @@
|
||||
namespace Unicode {
|
||||
|
||||
typedef unsigned long UTF32; /* at least 32 bits */
|
||||
typedef unsigned short UTF16; /* at least 16 bits */
|
||||
typedef unsigned char UTF8; /* typically 8 bits */
|
||||
typedef wchar_t UTF16; /* TagLib assumes that wchar_t is sufficient for UTF-16. */
|
||||
typedef unsigned char UTF8; /* typically 8 bits */
|
||||
typedef unsigned char Boolean; /* 0 or 1 */
|
||||
|
||||
typedef enum {
|
||||
|
Loading…
x
Reference in New Issue
Block a user