From 7e85d9b2020194427b509b0b73aaa5fc4c77dfd6 Mon Sep 17 00:00:00 2001 From: Tsuda Kageyu Date: Tue, 6 Jan 2015 11:32:31 +0900 Subject: [PATCH 1/5] Simplify overly complicated ByteVector::mid() implementation. Especially remove the useless nested RefCounters. --- taglib/toolkit/tbytevector.cpp | 246 ++++++++++++--------------------- taglib/toolkit/tbytevector.h | 10 ++ tests/test_bytevector.cpp | 19 +++ 3 files changed, 119 insertions(+), 156 deletions(-) diff --git a/taglib/toolkit/tbytevector.cpp b/taglib/toolkit/tbytevector.cpp index 946c1c14..1e47ee2e 100644 --- a/taglib/toolkit/tbytevector.cpp +++ b/taglib/toolkit/tbytevector.cpp @@ -49,7 +49,7 @@ // // http://www.informit.com/isapi/product_id~{9C84DAB4-FE6E-49C5-BB0A-FB50331233EA}/content/index.asp -#define DATA(x) (&(x->data->data[0])) +#define DATA(x) (&(*(x)->data)[0]) namespace TagLib { @@ -275,6 +275,8 @@ ByteVector fromFloat(TFloat value) template long double toFloat80(const ByteVector &v, size_t offset) { + using std::swap; + if(offset > v.size() - 10) { debug("toFloat80() - offset is out of range. Returning 0."); return 0.0; @@ -284,11 +286,11 @@ long double toFloat80(const ByteVector &v, size_t offset) ::memcpy(bytes, v.data() + offset, 10); if(ENDIAN == Utils::LittleEndian) { - std::swap(bytes[0], bytes[9]); - std::swap(bytes[1], bytes[8]); - std::swap(bytes[2], bytes[7]); - std::swap(bytes[3], bytes[6]); - std::swap(bytes[4], bytes[5]); + swap(bytes[0], bytes[9]); + swap(bytes[1], bytes[8]); + swap(bytes[2], bytes[7]); + swap(bytes[3], bytes[6]); + swap(bytes[4], bytes[5]); } // 1-bit sign @@ -326,108 +328,42 @@ long double toFloat80(const ByteVector &v, size_t offset) return val; } -class DataPrivate : public RefCounter +class ByteVector::ByteVectorPrivate { public: - DataPrivate() - { - } + ByteVectorPrivate(uint l, char c) : + counter(new RefCounter()), + data(new std::vector(l, c)), + offset(0), + length(l) {} - DataPrivate(const std::vector &v, uint offset, uint length) - : data(v.begin() + offset, v.begin() + offset + length) - { - } + ByteVectorPrivate(const char *s, uint l) : + counter(new RefCounter()), + data(new std::vector(s, s + l)), + offset(0), + length(l) {} - // A char* can be an iterator. - DataPrivate(const char *begin, const char *end) - : data(begin, end) + ByteVectorPrivate(const ByteVectorPrivate &d, uint o, uint l) : + counter(d.counter), + data(d.data), + offset(d.offset + o), + length(l) { - } - - DataPrivate(uint len, char c) - : data(len, c) - { - } - - std::vector data; -}; - -class ByteVector::ByteVectorPrivate : public RefCounter -{ -public: - ByteVectorPrivate() - : RefCounter() - , data(new DataPrivate()) - , offset(0) - , length(0) - { - } - - ByteVectorPrivate(ByteVectorPrivate *d, uint o, uint l) - : RefCounter() - , data(d->data) - , offset(d->offset + o) - , length(l) - { - data->ref(); - } - - ByteVectorPrivate(const std::vector &v, uint o, uint l) - : RefCounter() - , data(new DataPrivate(v, o, l)) - , offset(0) - , length(l) - { - } - - ByteVectorPrivate(uint l, char c) - : RefCounter() - , data(new DataPrivate(l, c)) - , offset(0) - , length(l) - { - } - - ByteVectorPrivate(const char *s, uint l) - : RefCounter() - , data(new DataPrivate(s, s + l)) - , offset(0) - , length(l) - { - } - - void detach() - { - if(data->count() > 1) { - data->deref(); - data = new DataPrivate(data->data, offset, length); - offset = 0; - } + counter->ref(); } ~ByteVectorPrivate() { - if(data->deref()) + if(counter->deref()) { + delete counter; delete data; - } - - ByteVectorPrivate &operator=(const ByteVectorPrivate &x) - { - if(&x != this) - { - if(data->deref()) - delete data; - - data = x.data; - data->ref(); } - - return *this; } - DataPrivate *data; - uint offset; - uint length; + RefCounter *counter; + std::vector *data; + uint offset; + uint length; }; //////////////////////////////////////////////////////////////////////////////// @@ -483,69 +419,67 @@ ByteVector ByteVector::fromFloat64BE(double value) // public members //////////////////////////////////////////////////////////////////////////////// -ByteVector::ByteVector() - : d(new ByteVectorPrivate()) +ByteVector::ByteVector() : + d(new ByteVectorPrivate(0, '\0')) { } -ByteVector::ByteVector(uint size, char value) - : d(new ByteVectorPrivate(size, value)) +ByteVector::ByteVector(uint size, char value) : + d(new ByteVectorPrivate(size, value)) { } -ByteVector::ByteVector(const ByteVector &v) - : d(v.d) -{ - d->ref(); -} - -ByteVector::ByteVector(const ByteVector &v, uint offset, uint length) - : d(new ByteVectorPrivate(v.d, offset, length)) +ByteVector::ByteVector(const ByteVector &v) : + d(new ByteVectorPrivate(*v.d, 0, v.d->length)) { } -ByteVector::ByteVector(char c) - : d(new ByteVectorPrivate(1, c)) +ByteVector::ByteVector(const ByteVector &v, uint offset, uint length) : + d(new ByteVectorPrivate(*v.d, offset, length)) { } -ByteVector::ByteVector(const char *data, uint length) - : d(new ByteVectorPrivate(data, length)) +ByteVector::ByteVector(char c) : + d(new ByteVectorPrivate(1, c)) { } -ByteVector::ByteVector(const char *data) - : d(new ByteVectorPrivate(data, ::strlen(data))) +ByteVector::ByteVector(const char *data, uint length) : + d(new ByteVectorPrivate(data, length)) +{ +} + +ByteVector::ByteVector(const char *data) : + d(new ByteVectorPrivate(data, ::strlen(data))) { } ByteVector::~ByteVector() { - if(d->deref()) - delete d; + delete d; } ByteVector &ByteVector::setData(const char *s, uint length) { - *this = ByteVector(s, length); + ByteVector(s, length).swap(*this); return *this; } ByteVector &ByteVector::setData(const char *data) { - *this = ByteVector(data); + ByteVector(data).swap(*this); return *this; } char *ByteVector::data() { detach(); - return size() > 0 ? (DATA(d) + d->offset) : 0; + return (size() > 0) ? (DATA(d) + d->offset) : 0; } const char *ByteVector::data() const { - return size() > 0 ? (DATA(d) + d->offset) : 0; + return (size() > 0) ? (DATA(d) + d->offset) : 0; } ByteVector ByteVector::mid(uint index, uint length) const @@ -558,7 +492,7 @@ ByteVector ByteVector::mid(uint index, uint length) const char ByteVector::at(uint index) const { - return index < size() ? DATA(d)[d->offset + index] : 0; + return (index < size()) ? DATA(d)[d->offset + index] : 0; } int ByteVector::find(const ByteVector &pattern, uint offset, int byteAlign) const @@ -675,10 +609,8 @@ int ByteVector::endsWithPartialMatch(const ByteVector &pattern) const ByteVector &ByteVector::append(const ByteVector &v) { - if(v.d->length != 0) - { + if(v.d->length != 0) { detach(); - uint originalSize = size(); resize(originalSize + v.size()); ::memcpy(data() + originalSize, v.data(), v.size()); @@ -687,9 +619,15 @@ ByteVector &ByteVector::append(const ByteVector &v) return *this; } +ByteVector &ByteVector::append(char c) +{ + resize(size() + 1, c); + return *this; +} + ByteVector &ByteVector::clear() { - *this = ByteVector(); + ByteVector().swap(*this); return *this; } @@ -707,8 +645,8 @@ ByteVector &ByteVector::resize(uint size, char padding) // This doesn't reallocate the buffer, since std::vector::resize() doesn't // reallocate the buffer when shrinking. - d->data->data.resize(d->offset + d->length); - d->data->data.resize(d->offset + size, padding); + d->data->resize(d->offset + d->length); + d->data->resize(d->offset + size, padding); d->length = size; } @@ -719,29 +657,29 @@ ByteVector &ByteVector::resize(uint size, char padding) ByteVector::Iterator ByteVector::begin() { detach(); - return d->data->data.begin() + d->offset; + return d->data->begin() + d->offset; } ByteVector::ConstIterator ByteVector::begin() const { - return d->data->data.begin() + d->offset; + return d->data->begin() + d->offset; } ByteVector::Iterator ByteVector::end() { detach(); - return d->data->data.begin() + d->offset + d->length; + return d->data->begin() + d->offset + d->length; } ByteVector::ConstIterator ByteVector::end() const { - return d->data->data.begin() + d->offset + d->length; + return d->data->begin() + d->offset + d->length; } ByteVector::ReverseIterator ByteVector::rbegin() { detach(); - return d->data->data.rbegin() + (d->data->data.size() - (d->offset + d->length)); + return d->data->rbegin() + (d->data->size() - (d->offset + d->length)); } ByteVector::ConstReverseIterator ByteVector::rbegin() const @@ -754,7 +692,7 @@ ByteVector::ConstReverseIterator ByteVector::rbegin() const ByteVector::ReverseIterator ByteVector::rend() { detach(); - return d->data->data.rbegin() + (d->data->data.size() - d->offset); + return d->data->rbegin() + (d->data->size() - d->offset); } ByteVector::ConstReverseIterator ByteVector::rend() const @@ -859,13 +797,13 @@ long double ByteVector::toFloat80BE(size_t offset) const const char &ByteVector::operator[](int index) const { - return d->data->data[d->offset + index]; + return DATA(d)[d->offset + index]; } char &ByteVector::operator[](int index) { detach(); - return d->data->data[d->offset + index]; + return DATA(d)[d->offset + index]; } bool ByteVector::operator==(const ByteVector &v) const @@ -878,7 +816,7 @@ bool ByteVector::operator==(const ByteVector &v) const bool ByteVector::operator!=(const ByteVector &v) const { - return !operator==(v); + return !(*this == v); } bool ByteVector::operator==(const char *s) const @@ -891,7 +829,7 @@ bool ByteVector::operator==(const char *s) const bool ByteVector::operator!=(const char *s) const { - return !operator==(s); + return !(*this == s); } bool ByteVector::operator<(const ByteVector &v) const @@ -905,7 +843,7 @@ bool ByteVector::operator<(const ByteVector &v) const bool ByteVector::operator>(const ByteVector &v) const { - return v < *this; + return (v < *this); } ByteVector ByteVector::operator+(const ByteVector &v) const @@ -917,29 +855,29 @@ ByteVector ByteVector::operator+(const ByteVector &v) const ByteVector &ByteVector::operator=(const ByteVector &v) { - if(&v == this) - return *this; - - if(d->deref()) - delete d; - - d = v.d; - d->ref(); + ByteVector(v).swap(*this); return *this; } ByteVector &ByteVector::operator=(char c) { - *this = ByteVector(c); + ByteVector(c).swap(*this); return *this; } ByteVector &ByteVector::operator=(const char *data) { - *this = ByteVector(data); + ByteVector(data).swap(*this); return *this; } +void ByteVector::swap(ByteVector &v) +{ + using std::swap; + + swap(d, v.d); +} + ByteVector ByteVector::toHex() const { ByteVector encoded(size() * 2); @@ -960,15 +898,11 @@ ByteVector ByteVector::toHex() const void ByteVector::detach() { - if(d->data->count() > 1) { - d->data->deref(); - d->data = new DataPrivate(d->data->data, d->offset, d->length); - d->offset = 0; - } - - if(d->count() > 1) { - d->deref(); - d = new ByteVectorPrivate(d->data->data, d->offset, d->length); + if(d->counter->count() > 1) { + if(!isEmpty()) + ByteVector(DATA(d) + d->offset, d->length).swap(*this); + else + ByteVector().swap(*this); } } } diff --git a/taglib/toolkit/tbytevector.h b/taglib/toolkit/tbytevector.h index 00b833f5..e04d338d 100644 --- a/taglib/toolkit/tbytevector.h +++ b/taglib/toolkit/tbytevector.h @@ -201,6 +201,11 @@ namespace TagLib { */ ByteVector &append(const ByteVector &v); + /*! + * Appends \a c to the end of the ByteVector. + */ + ByteVector &append(char c); + /*! * Clears the data. */ @@ -567,6 +572,11 @@ namespace TagLib { */ ByteVector &operator=(const char *data); + /*! + * Exchanges the content of the ByteVector by the content of \a v. + */ + void swap(ByteVector &v); + /*! * A static, empty ByteVector which is convenient and fast (since returning * an empty or "null" value does not require instantiating a new ByteVector). diff --git a/tests/test_bytevector.cpp b/tests/test_bytevector.cpp index 4b0c098f..0ab68921 100644 --- a/tests/test_bytevector.cpp +++ b/tests/test_bytevector.cpp @@ -43,6 +43,7 @@ class TestByteVector : public CppUnit::TestFixture CPPUNIT_TEST(testReplace); CPPUNIT_TEST(testIterator); CPPUNIT_TEST(testResize); + CPPUNIT_TEST(testAppend); CPPUNIT_TEST_SUITE_END(); public: @@ -312,6 +313,8 @@ public: *it2 = 'I'; CPPUNIT_ASSERT_EQUAL('i', *it1); CPPUNIT_ASSERT_EQUAL('I', *it2); + CPPUNIT_ASSERT_EQUAL(ByteVector("taglib"), v1); + CPPUNIT_ASSERT_EQUAL(ByteVector("taglIb"), v2); ByteVector::ReverseIterator it3 = v1.rbegin(); ByteVector::ReverseIterator it4 = v2.rbegin(); @@ -324,6 +327,8 @@ public: *it4 = 'A'; CPPUNIT_ASSERT_EQUAL('a', *it3); CPPUNIT_ASSERT_EQUAL('A', *it4); + CPPUNIT_ASSERT_EQUAL(ByteVector("taglib"), v1); + CPPUNIT_ASSERT_EQUAL(ByteVector("tAglIb"), v2); ByteVector v3; v3 = ByteVector("0123456789").mid(3, 4); @@ -383,6 +388,20 @@ public: CPPUNIT_ASSERT_EQUAL(-1, c.find('C')); } + void testAppend() + { + ByteVector v1("taglib"); + ByteVector v2 = v1; + + v1.append("ABC"); + CPPUNIT_ASSERT_EQUAL(ByteVector("taglibABC"), v1); + v1.append('1'); + v1.append('2'); + v1.append('3'); + CPPUNIT_ASSERT_EQUAL(ByteVector("taglibABC123"), v1); + CPPUNIT_ASSERT_EQUAL(ByteVector("taglib"), v2); + } + }; CPPUNIT_TEST_SUITE_REGISTRATION(TestByteVector); From 0ffd2e8ab96051d6db3bd8cc6da46fab5754eb20 Mon Sep 17 00:00:00 2001 From: Tsuda Kageyu Date: Mon, 18 May 2015 01:12:21 +0900 Subject: [PATCH 2/5] Replace DATA macro with more straightforward notations. --- taglib/toolkit/tbytevector.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/taglib/toolkit/tbytevector.cpp b/taglib/toolkit/tbytevector.cpp index 1e47ee2e..70939dda 100644 --- a/taglib/toolkit/tbytevector.cpp +++ b/taglib/toolkit/tbytevector.cpp @@ -49,8 +49,6 @@ // // http://www.informit.com/isapi/product_id~{9C84DAB4-FE6E-49C5-BB0A-FB50331233EA}/content/index.asp -#define DATA(x) (&(*(x)->data)[0]) - namespace TagLib { static const char hexTable[17] = "0123456789abcdef"; @@ -474,12 +472,12 @@ ByteVector &ByteVector::setData(const char *data) char *ByteVector::data() { detach(); - return (size() > 0) ? (DATA(d) + d->offset) : 0; + return (size() > 0) ? (&d->data->front() + d->offset) : 0; } const char *ByteVector::data() const { - return (size() > 0) ? (DATA(d) + d->offset) : 0; + return (size() > 0) ? (&d->data->front() + d->offset) : 0; } ByteVector ByteVector::mid(uint index, uint length) const @@ -492,7 +490,7 @@ ByteVector ByteVector::mid(uint index, uint length) const char ByteVector::at(uint index) const { - return (index < size()) ? DATA(d)[d->offset + index] : 0; + return (index < size()) ? (*d->data)[d->offset + index] : 0; } int ByteVector::find(const ByteVector &pattern, uint offset, int byteAlign) const @@ -797,13 +795,13 @@ long double ByteVector::toFloat80BE(size_t offset) const const char &ByteVector::operator[](int index) const { - return DATA(d)[d->offset + index]; + return (*d->data)[d->offset + index]; } char &ByteVector::operator[](int index) { detach(); - return DATA(d)[d->offset + index]; + return (*d->data)[d->offset + index]; } bool ByteVector::operator==(const ByteVector &v) const @@ -900,7 +898,7 @@ void ByteVector::detach() { if(d->counter->count() > 1) { if(!isEmpty()) - ByteVector(DATA(d) + d->offset, d->length).swap(*this); + ByteVector(&d->data->front() + d->offset, d->length).swap(*this); else ByteVector().swap(*this); } From 3bce77a3591f012d924d001a9fe7d263e797f1a6 Mon Sep 17 00:00:00 2001 From: Tsuda Kageyu Date: Mon, 22 Jun 2015 10:37:20 +0900 Subject: [PATCH 3/5] Use linear search instead of the Knuth-Morris-Pratt algorithm in ByteVector::find(). In almost all cases, it handles too small data for KMP to work effectively. --- taglib/toolkit/tbytevector.cpp | 53 ++++++++++------------------------ 1 file changed, 16 insertions(+), 37 deletions(-) diff --git a/taglib/toolkit/tbytevector.cpp b/taglib/toolkit/tbytevector.cpp index 70939dda..636a1a34 100644 --- a/taglib/toolkit/tbytevector.cpp +++ b/taglib/toolkit/tbytevector.cpp @@ -99,10 +99,6 @@ static const uint crcTable[256] = { 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 }; -/*! - * A templatized straightforward find that works with the types - * std::vector::iterator and std::vector::reverse_iterator. - */ template int findChar( const TIterator dataBegin, const TIterator dataEnd, @@ -125,10 +121,6 @@ int findChar( return -1; } -/*! - * A templatized KMP find that works with the types - * std::vector::iterator and std::vector::reverse_iterator. - */ template int findVector( const TIterator dataBegin, const TIterator dataEnd, @@ -140,46 +132,33 @@ int findVector( if(patternSize == 0 || offset + patternSize > dataSize) return -1; - // n % 0 is invalid - - if(byteAlign == 0) - return -1; - // Special case that pattern contains just single char. if(patternSize == 1) return findChar(dataBegin, dataEnd, *patternBegin, offset, byteAlign); - size_t lastOccurrence[256]; + // n % 0 is invalid - for(size_t i = 0; i < 256; ++i) - lastOccurrence[i] = patternSize; + if(byteAlign == 0) + return -1; - for(size_t i = 0; i < patternSize - 1; ++i) - lastOccurrence[static_cast(*(patternBegin + i))] = patternSize - i - 1; + // We don't use sophisticated algorithms like Knuth-Morris-Pratt here. - TIterator it = dataBegin + patternSize - 1 + offset; - while(true) { - TIterator itBuffer = it; - TIterator itPattern = patternBegin + patternSize - 1; + // In the current implementation of TagLib, data and patterns are too small + // for such algorithms to work effectively. - while(*itBuffer == *itPattern) { - if(itPattern == patternBegin) { - if((itBuffer - dataBegin - offset) % byteAlign == 0) - return (itBuffer - dataBegin); - else - break; - } + for(TIterator it = dataBegin + offset; it < dataEnd - patternSize + 1; it += byteAlign) { - --itBuffer; - --itPattern; + TIterator itData = it; + TIterator itPattern = patternBegin; + + while(*itData == *itPattern) { + ++itData; + ++itPattern; + + if(itPattern == patternEnd) + return (it - dataBegin); } - - const size_t step = lastOccurrence[static_cast(*it)]; - if(dataEnd - step <= it) - break; - - it += step; } return -1; From 10e1fcd686e4aebc008d1046b65d603a9af3353c Mon Sep 17 00:00:00 2001 From: Tsuda Kageyu Date: Wed, 26 Aug 2015 16:38:06 +0900 Subject: [PATCH 4/5] Consistent notations between ByteVector::data() and at(). --- taglib/toolkit/tbytevector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taglib/toolkit/tbytevector.cpp b/taglib/toolkit/tbytevector.cpp index 636a1a34..d2464974 100644 --- a/taglib/toolkit/tbytevector.cpp +++ b/taglib/toolkit/tbytevector.cpp @@ -451,12 +451,12 @@ ByteVector &ByteVector::setData(const char *data) char *ByteVector::data() { detach(); - return (size() > 0) ? (&d->data->front() + d->offset) : 0; + return (size() > 0) ? (&(*d->data)[d->offset]) : 0; } const char *ByteVector::data() const { - return (size() > 0) ? (&d->data->front() + d->offset) : 0; + return (size() > 0) ? (&(*d->data)[d->offset]) : 0; } ByteVector ByteVector::mid(uint index, uint length) const From e0f1151c5cdb4cc7b9125a70b019997b6e089382 Mon Sep 17 00:00:00 2001 From: Tsuda Kageyu Date: Thu, 19 Nov 2015 09:27:15 +0900 Subject: [PATCH 5/5] Resolve some conflicts before merging. --- taglib/toolkit/tbytevector.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/taglib/toolkit/tbytevector.cpp b/taglib/toolkit/tbytevector.cpp index d2464974..c65962da 100644 --- a/taglib/toolkit/tbytevector.cpp +++ b/taglib/toolkit/tbytevector.cpp @@ -661,8 +661,9 @@ ByteVector::ReverseIterator ByteVector::rbegin() ByteVector::ConstReverseIterator ByteVector::rbegin() const { - // we need a const reference to the data vector so we can ensure the const version of rbegin() is called - const std::vector &v = d->data->data; + // Workaround for the Solaris Studio 12.4 compiler. + // We need a const reference to the data vector so we can ensure the const version of rbegin() is called. + const std::vector &v = *d->data; return v.rbegin() + (v.size() - (d->offset + d->length)); } @@ -674,8 +675,9 @@ ByteVector::ReverseIterator ByteVector::rend() ByteVector::ConstReverseIterator ByteVector::rend() const { - // we need a const reference to the data vector so we can ensure the const version of rbegin() is called - const std::vector &v = d->data->data; + // Workaround for the Solaris Studio 12.4 compiler. + // We need a const reference to the data vector so we can ensure the const version of rbegin() is called. + const std::vector &v = *d->data; return v.rbegin() + (v.size() - d->offset); }