mirror of
https://github.com/taglib/taglib.git
synced 2025-06-04 01:28:21 -04:00
Correctly parse ID3v2.4.0 multiple strings with single BOM (#1055)
Some ID3v2.4.0 frames such as text information frames support multiple strings separated by the termination code of the character encoding. If the encoding is $01 UTF-16 with BOM, all strings shall have the same byte order. In the multi strings written by TagLib, all string elements of such a multi string have a BOM. However, I have often seen tags where a BOM exists only at the beginning, i.e. at the start of the first string. In such a case, TagLib will only return a list with the first string and a second empty string. This commit will detect such cases and parse the strings without BOM according to the BOM of the first string.
This commit is contained in:
parent
50b89ad19a
commit
4e7f844ea6
@ -218,12 +218,32 @@ void TextIdentificationFrame::parseFields(const ByteVector &data)
|
||||
// append those split values to the list and make sure that the new string's
|
||||
// type is the same specified for this frame
|
||||
|
||||
unsigned short firstBom = 0;
|
||||
for(ByteVectorList::ConstIterator it = l.begin(); it != l.end(); it++) {
|
||||
if(!(*it).isEmpty()) {
|
||||
if(d->textEncoding == String::Latin1)
|
||||
if(d->textEncoding == String::Latin1) {
|
||||
d->fieldList.append(Tag::latin1StringHandler()->parse(*it));
|
||||
else
|
||||
d->fieldList.append(String(*it, d->textEncoding));
|
||||
}
|
||||
else {
|
||||
String::Type textEncoding = d->textEncoding;
|
||||
if(textEncoding == String::UTF16) {
|
||||
if(it == l.begin()) {
|
||||
firstBom = it->mid(0, 2).toUShort();
|
||||
}
|
||||
else {
|
||||
unsigned short subsequentBom = it->mid(0, 2).toUShort();
|
||||
if(subsequentBom != 0xfeff && subsequentBom != 0xfffe) {
|
||||
if(firstBom == 0xfeff) {
|
||||
textEncoding = String::UTF16BE;
|
||||
}
|
||||
else if(firstBom == 0xfffe) {
|
||||
textEncoding = String::UTF16LE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
d->fieldList.append(String(*it, textEncoding));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -199,7 +199,14 @@ public:
|
||||
sl.append("Foo");
|
||||
sl.append("Bar");
|
||||
f.setText(sl);
|
||||
CPPUNIT_ASSERT_EQUAL((unsigned int)(4+4+2+1+6+2+6), f.render().size());
|
||||
ByteVector data = f.render();
|
||||
CPPUNIT_ASSERT_EQUAL((unsigned int)(4+4+2+1+6+2+6), data.size());
|
||||
ByteVector noBomBeData("TPE1\x00\x00\x00\x0f\x00\x00\x02"
|
||||
"\0F\0o\0o\0\0"
|
||||
"\0B\0a\0r", 25);
|
||||
CPPUNIT_ASSERT_EQUAL(noBomBeData, data);
|
||||
f.setData(data);
|
||||
CPPUNIT_ASSERT_EQUAL(String("Foo Bar"), f.toString());
|
||||
}
|
||||
|
||||
void testUTF16Delimiter()
|
||||
@ -209,7 +216,32 @@ public:
|
||||
sl.append("Foo");
|
||||
sl.append("Bar");
|
||||
f.setText(sl);
|
||||
CPPUNIT_ASSERT_EQUAL((unsigned int)(4+4+2+1+8+2+8), f.render().size());
|
||||
ByteVector data = f.render();
|
||||
CPPUNIT_ASSERT_EQUAL((unsigned int)(4+4+2+1+8+2+8), data.size());
|
||||
ByteVector multiBomLeData("TPE1\x00\x00\x00\x13\x00\x00\x01\xff\xfe"
|
||||
"F\0o\0o\0\0\0" "\xff\xfe"
|
||||
"B\0a\0r\0", 29);
|
||||
CPPUNIT_ASSERT_EQUAL(multiBomLeData, data);
|
||||
f.setData(data);
|
||||
CPPUNIT_ASSERT_EQUAL(String("Foo Bar"), f.toString());
|
||||
|
||||
ByteVector multiBomBeData("TPE1\x00\x00\x00\x13\x00\x00\x01\xfe\xff"
|
||||
"\0F\0o\0o\0\0" "\xfe\xff"
|
||||
"\0B\0a\0r", 29);
|
||||
f.setData(multiBomBeData);
|
||||
CPPUNIT_ASSERT_EQUAL(String("Foo Bar"), f.toString());
|
||||
|
||||
ByteVector singleBomLeData("TPE1\x00\x00\x00\x13\x00\x00\x01\xff\xfe"
|
||||
"F\0o\0o\0\0\0"
|
||||
"B\0a\0r\0", 27);
|
||||
f.setData(singleBomLeData);
|
||||
CPPUNIT_ASSERT_EQUAL(String("Foo Bar"), f.toString());
|
||||
|
||||
ByteVector singleBomBeData("TPE1\x00\x00\x00\x13\x00\x00\x01\xfe\xff"
|
||||
"\0F\0o\0o\0\0"
|
||||
"\0B\0a\0r", 27);
|
||||
f.setData(singleBomBeData);
|
||||
CPPUNIT_ASSERT_EQUAL(String("Foo Bar"), f.toString());
|
||||
}
|
||||
|
||||
void testBrokenFrame1()
|
||||
|
Loading…
x
Reference in New Issue
Block a user