From b1dcdc5bd81658e0a38162112795b146e6049c0d Mon Sep 17 00:00:00 2001 From: Tsuda Kageyu Date: Thu, 21 Mar 2013 19:50:35 +0900 Subject: [PATCH] Refector ByteVector --- taglib/toolkit/tbytevector.cpp | 279 ++++++++++++++++++++------------- taglib/toolkit/tbytevector.h | 8 + 2 files changed, 181 insertions(+), 106 deletions(-) diff --git a/taglib/toolkit/tbytevector.cpp b/taglib/toolkit/tbytevector.cpp index ee29185c..b982609f 100644 --- a/taglib/toolkit/tbytevector.cpp +++ b/taglib/toolkit/tbytevector.cpp @@ -30,6 +30,16 @@ #include +#if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(_M_IX86) || defined(_M_X64)) +# define TAGLIB_MSC_BYTESWAP +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +# define TAGLIB_GCC_BYTESWAP +#endif + +#ifdef TAGLIB_GCC_BYTESWAP +# include +#endif + #include "tbytevector.h" // This is a bit ugly to keep writing over and over again. @@ -91,128 +101,163 @@ namespace TagLib { }; /*! - * A templatized KMP find that works both with a ByteVector and a ByteVectorMirror. + * A templatized straightforward find that works with the types + * std::vector::iterator and std::vector::reverse_iterator. */ - - template - int vectorFind(const Vector &v, const Vector &pattern, uint offset, int byteAlign) + template + int findChar( + const TIterator dataBegin, const TIterator dataEnd, + char c, size_t offset, size_t byteAlign) { - if(pattern.size() > v.size() || offset > v.size() - 1) + const size_t dataSize = dataEnd - dataBegin; + if(dataSize == 0 || offset > dataSize - 1) return -1; - // Let's go ahead and special case a pattern of size one since that's common - // and easy to make fast. + // n % 0 is invalid - if(pattern.size() == 1) { - char p = pattern[0]; - for(uint i = offset; i < v.size(); i++) { - if(v[i] == p && (i - offset) % byteAlign == 0) - return i; - } + if(byteAlign == 0) return -1; - } - uchar lastOccurrence[256]; - - for(uint i = 0; i < 256; ++i) - lastOccurrence[i] = uchar(pattern.size()); - - for(uint i = 0; i < pattern.size() - 1; ++i) - lastOccurrence[uchar(pattern[i])] = uchar(pattern.size() - i - 1); - - for(uint i = pattern.size() - 1 + offset; i < v.size(); i += lastOccurrence[uchar(v.at(i))]) { - int iBuffer = i; - int iPattern = pattern.size() - 1; - - while(iPattern >= 0 && v.at(iBuffer) == pattern[iPattern]) { - --iBuffer; - --iPattern; - } - - if(-1 == iPattern && (iBuffer + 1 - offset) % byteAlign == 0) - return iBuffer + 1; + for(TIterator it = dataBegin + offset; it < dataEnd; it += byteAlign) { + if(*it == c) + return (it - dataBegin); } return -1; } /*! - * Wraps the accessors to a ByteVector to make the search algorithm access the - * elements in reverse. - * - * \see vectorFind() - * \see ByteVector::rfind() + * A templatized KMP find that works with the types + * std::vector::iterator and std::vector::reverse_iterator. */ - - class ByteVectorMirror + template + int findVector( + const TIterator dataBegin, const TIterator dataEnd, + const TIterator patternBegin, const TIterator patternEnd, + size_t offset, size_t byteAlign) { - public: - ByteVectorMirror(const ByteVector &source) : v(source) {} + const size_t dataSize = dataEnd - dataBegin; + const size_t patternSize = patternEnd - patternBegin; + if(patternSize > dataSize || offset > dataSize - 1) + return -1; - char operator[](int index) const + // n % 0 is invalid + + if(byteAlign == 0) + return -1; + + // Special case that pattern has single char. + + if(patternSize == 1) + return findChar(dataBegin, dataEnd, *patternBegin, offset, byteAlign); + + size_t lastOccurrence[256]; + + for(size_t i = 0; i < 256; ++i) + lastOccurrence[i] = patternSize; + + for(size_t i = 0; i < patternSize - 1; ++i) + lastOccurrence[static_cast(*(patternBegin + i))] = patternSize - i - 1; + + for(TIterator it = dataBegin + patternSize - 1 + offset; + it < dataEnd; + it += lastOccurrence[static_cast(*it)]) { - return v[v.size() - index - 1]; - } + TIterator itBuffer = it; + TIterator itPattern = patternBegin + patternSize - 1; - char at(int index) const - { - return v.at(v.size() - index - 1); - } - - ByteVectorMirror mid(uint index, uint length = 0xffffffff) const - { - return length == 0xffffffff ? v.mid(0, index) : v.mid(index - length, length); - } - - uint size() const - { - return v.size(); - } - - int find(const ByteVectorMirror &pattern, uint offset = 0, int byteAlign = 1) const - { - ByteVectorMirror v(*this); - - if(offset > 0) { - offset = size() - offset - pattern.size(); - if(offset >= size()) - offset = 0; + while(itPattern >= patternBegin && *itBuffer == *itPattern) { + --itBuffer; + --itPattern; } - const int pos = vectorFind(v, pattern, offset, byteAlign); - - // If the offset is zero then we need to adjust the location in the search - // to be appropriately reversed. If not we need to account for the fact - // that the recursive call (called from the above line) has already ajusted - // for this but that the normal templatized find above will add the offset - // to the returned value. - // - // This is a little confusing at first if you don't first stop to think - // through the logic involved in the forward search. - - if(pos == -1) - return -1; - - return size() - pos - pattern.size(); + if(itPattern < patternBegin && (itBuffer - dataBegin + 1 - offset) % byteAlign == 0) + return (itBuffer - dataBegin + 1); } - private: - const ByteVector &v; - }; + return -1; + } + +#if defined(TAGLIB_MSC_BYTESWAP) || defined(TAGLIB_GCC_BYTESWAP) + + template + T byteSwap(T x) + { + // There should be all counterparts of to*() and from*() overloads for integral types. + debug("byteSwap() -- Non specialized version should not be called"); + return 0; + } + +#endif + +#ifdef TAGLIB_MSC_BYTESWAP + + template <> + unsigned short byteSwap(unsigned short x) + { + return _byteswap_ushort(x); + } + + template <> + unsigned int byteSwap(unsigned int x) + { + return _byteswap_ulong(x); + } + + template <> + unsigned long long byteSwap(unsigned long long x) + { + return _byteswap_uint64(x); + } + +#endif + +#ifdef TAGLIB_GCC_BYTESWAP + + template <> + unsigned short byteSwap(unsigned short x) + { + return __bswap_16(x); + } + + template <> + unsigned int byteSwap(unsigned int x) + { + return __bswap_32(x); + } + + template <> + unsigned long long byteSwap(unsigned long long x) + { + return __bswap_64(x); + } + +#endif template T toNumber(const std::vector &data, bool mostSignificantByteFirst) { - T sum = 0; - - if(data.size() <= 0) { - debug("ByteVectorMirror::toNumber() -- data is empty, returning 0"); - return sum; + if(data.empty()) { + debug("toNumber() -- data is empty, returning 0"); + return 0; } const size_t size = sizeof(T); - const size_t last = data.size() > size ? size - 1 : data.size() - 1; +#if defined(TAGLIB_MSC_BYTESWAP) || defined(TAGLIB_GCC_BYTESWAP) + + if(data.size() >= size) + { + if(mostSignificantByteFirst) + return byteSwap(*reinterpret_cast(&data[0])); + else + return *reinterpret_cast(&data[0]); + } + +#endif + + const size_t last = data.size() > size ? size - 1 : data.size() - 1; + T sum = 0; for(size_t i = 0; i <= last; i++) sum |= (T) uchar(data[i]) << ((mostSignificantByteFirst ? last - i : i) * 8); @@ -222,14 +267,24 @@ namespace TagLib { template ByteVector fromNumber(T value, bool mostSignificantByteFirst) { - const TagLib::uint size = sizeof(T); + const size_t size = sizeof(T); + +#if defined(TAGLIB_MSC_BYTESWAP) || defined(TAGLIB_GCC_BYTESWAP) + + if(mostSignificantByteFirst) + value = byteSwap(value); + + return ByteVector(reinterpret_cast(&value), size); + +#else ByteVector v(size, 0); - - for(TagLib::uint i = 0; i < size; i++) + for(size_t i = 0; i < size; i++) v[i] = uchar(value >> ((mostSignificantByteFirst ? size - 1 - i : i) * 8) & 0xff); return v; + +#endif } } @@ -281,17 +336,17 @@ ByteVector ByteVector::fromCString(const char *s, uint length) ByteVector ByteVector::fromUInt(uint value, bool mostSignificantByteFirst) { - return fromNumber(value, mostSignificantByteFirst); + return fromNumber(value, mostSignificantByteFirst); } ByteVector ByteVector::fromShort(short value, bool mostSignificantByteFirst) { - return fromNumber(value, mostSignificantByteFirst); + return fromNumber(value, mostSignificantByteFirst); } ByteVector ByteVector::fromLongLong(long long value, bool mostSignificantByteFirst) { - return fromNumber(value, mostSignificantByteFirst); + return fromNumber(value, mostSignificantByteFirst); } //////////////////////////////////////////////////////////////////////////////// @@ -411,19 +466,31 @@ char ByteVector::at(uint index) const int ByteVector::find(const ByteVector &pattern, uint offset, int byteAlign) const { - return vectorFind(*this, pattern, offset, byteAlign); + return findVector::iterator>( + d->data.begin(), d->data.end(), pattern.d->data.begin(), pattern.d->data.end(), offset, byteAlign); +} + +int ByteVector::find(char c, uint offset, int byteAlign) const +{ + return findChar::iterator>( + d->data.begin(), d->data.end(), c, offset, byteAlign); } int ByteVector::rfind(const ByteVector &pattern, uint offset, int byteAlign) const { - // Ok, this is a little goofy, but pretty cool after it sinks in. Instead of - // reversing the find method's Boyer-Moore search algorithm I created a "mirror" - // for a ByteVector to reverse the behavior of the accessors. + if(offset > 0) { + offset = size() - offset - pattern.size(); + if(offset >= size()) + offset = 0; + } - ByteVectorMirror v(*this); - ByteVectorMirror p(pattern); + const int pos = findVector::reverse_iterator>( + d->data.rbegin(), d->data.rend(), pattern.d->data.rbegin(), pattern.d->data.rend(), offset, byteAlign); - return v.find(p, offset, byteAlign); + if(pos == -1) + return -1; + else + return size() - pos - pattern.size(); } bool ByteVector::containsAt(const ByteVector &pattern, uint offset, uint patternOffset, uint patternLength) const @@ -629,7 +696,7 @@ TagLib::uint ByteVector::checksum() const TagLib::uint ByteVector::toUInt(bool mostSignificantByteFirst) const { - return toNumber(d->data, mostSignificantByteFirst); + return toNumber(d->data, mostSignificantByteFirst); } short ByteVector::toShort(bool mostSignificantByteFirst) const diff --git a/taglib/toolkit/tbytevector.h b/taglib/toolkit/tbytevector.h index 5c6a0fa9..039b47ed 100644 --- a/taglib/toolkit/tbytevector.h +++ b/taglib/toolkit/tbytevector.h @@ -146,6 +146,14 @@ namespace TagLib { */ int find(const ByteVector &pattern, uint offset = 0, int byteAlign = 1) const; + /*! + * Searches the char for \a c starting at \a offset and returns + * the offset. Returns -1 if the pattern was not found. If \a byteAlign is + * specified the pattern will only be matched if it starts on a byte divisible + * by \a byteAlign (starting from \a offset). + */ + int find(char c, uint offset = 0, int byteAlign = 1) const; + /*! * Searches the ByteVector for \a pattern starting from either the end of the * vector or \a offset and returns the offset. Returns -1 if the pattern was