taglib/toolkit/tstring.cpp
Scott Wheeler f8d1b7bb4f Update the TCON (genre) frame as we create it so that we don't keep ID3v2.3
formatted data sitting around waiting to be written to 2.4 tags.

BUG:132018


git-svn-id: svn://anonsvn.kde.org/home/kde/trunk/kdesupport/taglib@578687 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
2006-08-29 23:06:14 +00:00

762 lines
15 KiB
C++

/***************************************************************************
copyright : (C) 2002, 2003 by Scott Wheeler
email : wheeler@kde.org
***************************************************************************/
/***************************************************************************
* This library is free software; you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License version *
* 2.1 as published by the Free Software Foundation. *
* *
* This library is distributed in the hope that it will be useful, but *
* WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
* Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public *
* License along with this library; if not, write to the Free Software *
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *
* USA *
***************************************************************************/
#include "tstring.h"
#include "unicode.h"
#include "tdebug.h"
#include <iostream>
namespace TagLib {
inline unsigned short byteSwap(unsigned short x)
{
return ((x) >> 8) & 0xff | ((x) & 0xff) << 8;
}
inline unsigned short combine(unsigned char c1, unsigned char c2)
{
return (c1 << 8) | c2;
}
}
using namespace TagLib;
class String::StringPrivate : public RefCounter
{
public:
StringPrivate(const wstring &s) :
RefCounter(),
data(s),
CString(0) {}
StringPrivate() :
RefCounter(),
CString(0) {}
~StringPrivate() {
delete [] CString;
}
wstring data;
/*!
* This is only used to hold the a pointer to the most recent value of
* toCString.
*/
char *CString;
};
String String::null;
////////////////////////////////////////////////////////////////////////////////
String::String()
{
d = new StringPrivate;
}
String::String(const String &s) : d(s.d)
{
d->ref();
}
String::String(const std::string &s, Type t)
{
d = new StringPrivate;
if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
debug("String::String() -- A std::string should not contain UTF16.");
return;
}
int length = s.length();
d->data.resize(length);
wstring::iterator targetIt = d->data.begin();
for(std::string::const_iterator it = s.begin(); it != s.end(); it++) {
*targetIt = uchar(*it);
++targetIt;
}
prepare(t);
}
String::String(const wstring &s, Type t)
{
d = new StringPrivate(s);
prepare(t);
}
String::String(const wchar_t *s, Type t)
{
d = new StringPrivate(s);
prepare(t);
}
String::String(const char *s, Type t)
{
d = new StringPrivate;
if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
debug("String::String() -- A const char * should not contain UTF16.");
return;
}
int length = ::strlen(s);
d->data.resize(length);
wstring::iterator targetIt = d->data.begin();
for(int i = 0; i < length; i++) {
*targetIt = uchar(s[i]);
++targetIt;
}
prepare(t);
}
String::String(wchar_t c, Type t)
{
d = new StringPrivate;
d->data += c;
prepare(t);
}
String::String(char c, Type t)
{
d = new StringPrivate;
if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
debug("String::String() -- A std::string should not contain UTF16.");
return;
}
d->data += uchar(c);
prepare(t);
}
String::String(const ByteVector &v, Type t)
{
d = new StringPrivate;
if(v.isEmpty())
return;
if(t == Latin1 || t == UTF8) {
int length = 0;
d->data.resize(v.size());
wstring::iterator targetIt = d->data.begin();
for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it) {
*targetIt = uchar(*it);
++targetIt;
++length;
}
d->data.resize(length);
}
else {
d->data.resize(v.size() / 2);
wstring::iterator targetIt = d->data.begin();
for(ByteVector::ConstIterator it = v.begin();
it != v.end() && it + 1 != v.end() && combine(*it, *(it + 1));
it += 2)
{
*targetIt = combine(*it, *(it + 1));
++targetIt;
}
}
prepare(t);
}
////////////////////////////////////////////////////////////////////////////////
String::~String()
{
if(d->deref())
delete d;
}
std::string String::to8Bit(bool unicode) const
{
std::string s;
s.resize(d->data.size());
if(!unicode) {
std::string::iterator targetIt = s.begin();
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
*targetIt = char(*it);
++targetIt;
}
return s;
}
const int outputBufferSize = d->data.size() * 3 + 1;
Unicode::UTF16 *sourceBuffer = new Unicode::UTF16[d->data.size() + 1];
Unicode::UTF8 *targetBuffer = new Unicode::UTF8[outputBufferSize];
for(unsigned int i = 0; i < d->data.size(); i++)
sourceBuffer[i] = Unicode::UTF16(d->data[i]);
const Unicode::UTF16 *source = sourceBuffer;
Unicode::UTF8 *target = targetBuffer;
Unicode::ConversionResult result =
Unicode::ConvertUTF16toUTF8(&source, sourceBuffer + d->data.size(),
&target, targetBuffer + outputBufferSize,
Unicode::lenientConversion);
if(result != Unicode::conversionOK)
debug("String::to8Bit() - Unicode conversion error.");
int newSize = target - targetBuffer;
s.resize(newSize);
targetBuffer[newSize] = 0;
s = (char *) targetBuffer;
delete [] sourceBuffer;
delete [] targetBuffer;
return s;
}
TagLib::wstring String::to32Bit() const
{
return d->data;
}
const char *String::toCString(bool unicode) const
{
delete [] d->CString;
std::string buffer = to8Bit(unicode);
d->CString = new char[buffer.size() + 1];
strcpy(d->CString, buffer.c_str());
return d->CString;
}
String::Iterator String::begin()
{
return d->data.begin();
}
String::ConstIterator String::begin() const
{
return d->data.begin();
}
String::Iterator String::end()
{
return d->data.end();
}
String::ConstIterator String::end() const
{
return d->data.end();
}
int String::find(const String &s, int offset) const
{
wstring::size_type position = d->data.find(s.d->data, offset);
if(position != wstring::npos)
return position;
else
return -1;
}
bool String::startsWith(const String &s) const
{
if(s.length() > length())
return false;
return substr(0, s.length()) == s;
}
String String::substr(uint position, uint n) const
{
if(n > position + d->data.size())
n = d->data.size() - position;
String s;
s.d->data = d->data.substr(position, n);
return s;
}
String &String::append(const String &s)
{
detach();
d->data += s.d->data;
return *this;
}
String String::upper() const
{
String s;
static int shift = 'A' - 'a';
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); ++it) {
if(*it >= 'a' && *it <= 'z')
s.d->data.push_back(*it + shift);
else
s.d->data.push_back(*it);
}
return s;
}
TagLib::uint String::size() const
{
return d->data.size();
}
TagLib::uint String::length() const
{
return size();
}
bool String::isEmpty() const
{
return d->data.size() == 0;
}
bool String::isNull() const
{
return d == null.d;
}
ByteVector String::data(Type t) const
{
ByteVector v;
switch(t) {
case Latin1:
{
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++)
v.append(char(*it));
break;
}
case UTF8:
{
std::string s = to8Bit(true);
v.setData(s.c_str(), s.length());
break;
}
case UTF16:
{
// Assume that if we're doing UTF16 and not UTF16BE that we want little
// endian encoding. (Byte Order Mark)
v.append(char(0xff));
v.append(char(0xfe));
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
char c1 = *it & 0xff;
char c2 = *it >> 8;
v.append(c1);
v.append(c2);
}
break;
}
case UTF16BE:
{
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
char c1 = *it >> 8;
char c2 = *it & 0xff;
v.append(c2);
v.append(c1);
}
break;
}
case UTF16LE:
{
for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
char c1 = *it & 0xff;
char c2 = *it >> 8;
v.append(c1);
v.append(c2);
}
break;
}
}
return v;
}
int String::toInt() const
{
int value = 0;
bool negative = d->data[0] == '-';
uint i = negative ? 1 : 0;
for(; i < d->data.size() && d->data[i] >= '0' && d->data[i] <= '9'; i++)
value = value * 10 + (d->data[i] - '0');
if(negative)
value = value * -1;
return value;
}
String String::stripWhiteSpace() const
{
wstring::const_iterator begin = d->data.begin();
wstring::const_iterator end = d->data.end();
while(*begin == '\t' || *begin == '\n' || *begin == '\f' ||
*begin == '\r' || *begin == ' ' && begin != end)
{
++begin;
}
if(begin == end)
return null;
// There must be at least one non-whitespace charater here for us to have
// gotten this far, so we should be safe not doing bounds checking.
do {
--end;
} while(*end == '\t' || *end == '\n' ||
*end == '\f' || *end == '\r' || *end == ' ');
return String(wstring(begin, end + 1));
}
String String::number(int n) // static
{
if(n == 0)
return String("0");
String charStack;
bool negative = n < 0;
if(negative)
n = n * -1;
while(n > 0) {
int remainder = n % 10;
charStack += char(remainder + '0');
n = (n - remainder) / 10;
}
String s;
if(negative)
s += '-';
for(int i = charStack.d->data.size() - 1; i >= 0; i--)
s += charStack.d->data[i];
return s;
}
TagLib::wchar &String::operator[](int i)
{
return d->data[i];
}
const TagLib::wchar &String::operator[](int i) const
{
return d->data[i];
}
bool String::operator==(const String &s) const
{
return d == s.d || d->data == s.d->data;
}
String &String::operator+=(const String &s)
{
detach();
d->data += s.d->data;
return *this;
}
String &String::operator+=(const wchar_t *s)
{
detach();
d->data += s;
return *this;
}
String &String::operator+=(const char *s)
{
detach();
for(int i = 0; s[i] != 0; i++)
d->data += uchar(s[i]);
return *this;
}
String &String::operator+=(wchar_t c)
{
detach();
d->data += c;
return *this;
}
String &String::operator+=(char c)
{
d->data += uchar(c);
return *this;
}
String &String::operator=(const String &s)
{
if(&s == this)
return *this;
if(d->deref())
delete d;
d = s.d;
d->ref();
return *this;
}
String &String::operator=(const std::string &s)
{
if(d->deref())
delete d;
d = new StringPrivate;
d->data.resize(s.size());
wstring::iterator targetIt = d->data.begin();
for(std::string::const_iterator it = s.begin(); it != s.end(); it++) {
*targetIt = uchar(*it);
++targetIt;
}
return *this;
}
String &String::operator=(const wstring &s)
{
if(d->deref())
delete d;
d = new StringPrivate(s);
return *this;
}
String &String::operator=(const wchar_t *s)
{
if(d->deref())
delete d;
d = new StringPrivate(s);
return *this;
}
String &String::operator=(char c)
{
if(d->deref())
delete d;
d = new StringPrivate;
d->data += uchar(c);
return *this;
}
String &String::operator=(wchar_t c)
{
if(d->deref())
delete d;
d = new StringPrivate;
d->data += c;
return *this;
}
String &String::operator=(const char *s)
{
if(d->deref())
delete d;
d = new StringPrivate;
int length = ::strlen(s);
d->data.resize(length);
wstring::iterator targetIt = d->data.begin();
for(int i = 0; i < length; i++) {
*targetIt = uchar(s[i]);
++targetIt;
}
return *this;
}
String &String::operator=(const ByteVector &v)
{
if(d->deref())
delete d;
d = new StringPrivate;
d->data.resize(v.size());
wstring::iterator targetIt = d->data.begin();
uint i = 0;
for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it) {
*targetIt = uchar(*it);
++targetIt;
++i;
}
// If we hit a null in the ByteVector, shrink the string again.
d->data.resize(i);
return *this;
}
bool String::operator<(const String &s) const
{
return d->data < s.d->data;
}
////////////////////////////////////////////////////////////////////////////////
// protected members
////////////////////////////////////////////////////////////////////////////////
void String::detach()
{
if(d->count() > 1) {
d->deref();
d = new StringPrivate(d->data);
}
}
////////////////////////////////////////////////////////////////////////////////
// private members
////////////////////////////////////////////////////////////////////////////////
void String::prepare(Type t)
{
switch(t) {
case UTF16:
{
if(d->data.size() > 1) {
bool swap = d->data[0] != 0xfeff;
d->data.erase(d->data.begin(), d->data.begin() + 1);
if(swap) {
for(uint i = 0; i < d->data.size(); i++)
d->data[i] = byteSwap((unsigned short)d->data[i]);
}
}
else {
debug("String::prepare() - Invalid UTF16 string.");
d->data.erase(d->data.begin(), d->data.end());
}
break;
}
case UTF8:
{
int bufferSize = d->data.size() + 1;
Unicode::UTF8 *sourceBuffer = new Unicode::UTF8[bufferSize];
Unicode::UTF16 *targetBuffer = new Unicode::UTF16[bufferSize];
unsigned int i = 0;
for(; i < d->data.size(); i++)
sourceBuffer[i] = Unicode::UTF8(d->data[i]);
sourceBuffer[i] = 0;
const Unicode::UTF8 *source = sourceBuffer;
Unicode::UTF16 *target = targetBuffer;
Unicode::ConversionResult result =
Unicode::ConvertUTF8toUTF16(&source, sourceBuffer + bufferSize,
&target, targetBuffer + bufferSize,
Unicode::lenientConversion);
if(result != Unicode::conversionOK)
debug("String::prepare() - Unicode conversion error.");
int newSize = target != targetBuffer ? target - targetBuffer - 1 : 0;
d->data.resize(newSize);
for(int i = 0; i < newSize; i++)
d->data[i] = targetBuffer[i];
delete [] sourceBuffer;
delete [] targetBuffer;
break;
}
case UTF16LE:
{
for(uint i = 0; i < d->data.size(); i++)
d->data[i] = byteSwap((unsigned short)d->data[i]);
break;
}
default:
break;
}
}
////////////////////////////////////////////////////////////////////////////////
// related functions
////////////////////////////////////////////////////////////////////////////////
const TagLib::String operator+(const TagLib::String &s1, const TagLib::String &s2)
{
String s(s1);
s.append(s2);
return s;
}
const TagLib::String operator+(const char *s1, const TagLib::String &s2)
{
String s(s1);
s.append(s2);
return s;
}
const TagLib::String operator+(const TagLib::String &s1, const char *s2)
{
String s(s1);
s.append(s2);
return s;
}
std::ostream &operator<<(std::ostream &s, const String &str)
{
s << str.to8Bit();
return s;
}