From 7e1f3545cd13ce3d90f11cd2583fd5fbd4379228 Mon Sep 17 00:00:00 2001
From: Tsuda Kageyu <tsuda.kageyu@gmail.com>
Date: Thu, 2 May 2013 13:36:14 +0900
Subject: [PATCH] IEEE754 numbers support

---
 ConfigureChecks.cmake               |  13 ++-
 config-taglib.h.cmake               |  11 +-
 taglib/ebml/ebmlelement.cpp         |  33 ++----
 taglib/riff/aiff/aiffproperties.cpp |  60 +---------
 taglib/toolkit/taglib.h             |   1 -
 taglib/toolkit/tbytevector.cpp      | 168 +++++++++++++++++++++++++++-
 taglib/toolkit/tbytevector.h        |  20 ++++
 tests/test_bytevector.cpp           |  10 ++
 8 files changed, 227 insertions(+), 89 deletions(-)
diff --git a/ConfigureChecks.cmake b/ConfigureChecks.cmake
index fa525f7e..1b6f4310 100644
--- a/ConfigureChecks.cmake
+++ b/ConfigureChecks.cmake
@@ -14,11 +14,14 @@ if(NOT TAGLIB_BIG_ENDIAN)
   set(TAGLIB_LITTLE_ENDIAN 1)
 endif()
 
-# Determine the size of integral types.
-check_type_size("short"   SIZEOF_SHORT)
-check_type_size("int"     SIZEOF_INT)
-check_type_size("long long" SIZEOF_LONGLONG)
-check_type_size("wchar_t"   SIZEOF_WCHAR_T)
+# Determine the size of numeric types.
+check_type_size("short"       SIZEOF_SHORT)
+check_type_size("int"         SIZEOF_INT)
+check_type_size("long long"   SIZEOF_LONGLONG)
+check_type_size("wchar_t"     SIZEOF_WCHAR_T)
+check_type_size("float"       SIZEOF_FLOAT)
+check_type_size("double"      SIZEOF_DOUBLE)
+check_type_size("long double" SIZEOF_LONGDOUBLE)
 
 # Determine whether or not your compiler supports move semantics.
 check_cxx_source_compiles("
diff --git a/config-taglib.h.cmake b/config-taglib.h.cmake
index 58c1ff14..39b53400 100644
--- a/config-taglib.h.cmake
+++ b/config-taglib.h.cmake
@@ -5,10 +5,13 @@
 #cmakedefine   TAGLIB_BIG_ENDIAN 1
 
 /* Size of integral types */
-#cmakedefine   SIZEOF_SHORT    ${SIZEOF_SHORT}
-#cmakedefine   SIZEOF_INT      ${SIZEOF_INT}
-#cmakedefine   SIZEOF_LONGLONG ${SIZEOF_LONGLONG}
-#cmakedefine   SIZEOF_WCHAR_T  ${SIZEOF_WCHAR_T}
+#cmakedefine   SIZEOF_SHORT      ${SIZEOF_SHORT}
+#cmakedefine   SIZEOF_INT        ${SIZEOF_INT}
+#cmakedefine   SIZEOF_LONGLONG   ${SIZEOF_LONGLONG}
+#cmakedefine   SIZEOF_WCHAR_T    ${SIZEOF_WCHAR_T}
+#cmakedefine   SIZEOF_FLOAT      ${SIZEOF_FLOAT}
+#cmakedefine   SIZEOF_DOUBLE     ${SIZEOF_DOUBLE}
+#cmakedefine   SIZEOF_LONGDOUBLE ${SIZEOF_LONGDOUBLE}
 
 /* Defined if your compiler supports the move semantics */
 #cmakedefine   SUPPORT_MOVE_SEMANTICS 1
diff --git a/taglib/ebml/ebmlelement.cpp b/taglib/ebml/ebmlelement.cpp
index 930c1a4b..1419aa92 100644
--- a/taglib/ebml/ebmlelement.cpp
+++ b/taglib/ebml/ebmlelement.cpp
@@ -340,27 +340,18 @@ EBML::ulli EBML::Element::getAsUnsigned()
 
 long double EBML::Element::getAsFloat()
 {
-  // Very dirty implementation!
-  ByteVector bin = getAsBinary();
-  size_t size = bin.size();
-  ulli sum = 0;
-  
-  // For 0 byte floats and any float that is not defined in the ebml spec.
-  if (size != 4 && size != 8 /*&& size() != 10*/) // XXX: Currently no support for 10 bit floats.
-    return static_cast<long double>(sum);
-  
-  // From toNumber; Might not be portable, since it requires IEEE floats.
-  size_t last = size - 1;
-  for(size_t i = 0; i <= last; i++)
-    sum |= (ulli) uchar(bin[i]) << ((last - i) * 8);
-  
-  if (size == 4) {
-    float result = *reinterpret_cast<float *>(&sum);
-    return result;
-  }
-  else {
-    double result = *reinterpret_cast<double *>(&sum);
-    return result;
+  const ByteVector bin = getAsBinary();
+  switch (bin.size())
+  {
+  case 4:
+    return bin.toFloat32BE(0);
+  case 8:
+    return bin.toFloat64BE(0);
+  case 10:
+    return bin.toFloat80BE(0);
+  default:
+    debug("EBML::Element::getAsFloat() - Invalid data size. Returning 0.");
+    return 0.0;
   }
 }
 
diff --git a/taglib/riff/aiff/aiffproperties.cpp b/taglib/riff/aiff/aiffproperties.cpp
index 20c810ec..7c26de18 100644
--- a/taglib/riff/aiff/aiffproperties.cpp
+++ b/taglib/riff/aiff/aiffproperties.cpp
@@ -25,64 +25,10 @@
 
 #include <tstring.h>
 #include <tdebug.h>
-#include <cmath>
-// ldexp is a c99 function, which might not be defined in <cmath>
-// so we pull in math.h too and hope it does the right (wrong) thing
-// wrt. c99 functions in C++
-#include <math.h>
-
 #include "aiffproperties.h"
 
-////////////////////////////////////////////////////////////////////////////////
-// nasty 80-bit float helpers
-////////////////////////////////////////////////////////////////////////////////
-
-#define UnsignedToFloat(u) (((double)((long)(u - 2147483647L - 1))) + 2147483648.0)
-
 using namespace TagLib;
 
-static double ConvertFromIeeeExtended(const ByteVector &v, size_t offset)
-{
-  if(offset > v.size() - 10) {
-    debug("ConvertFromIeeeExtended() - offset is out of range. Returning 0.");
-    return 0.0;
-  }
-
-  const uchar *bytes = reinterpret_cast<const uchar*>(v.data() + offset);
-  double f;
-  int expon;
-  unsigned long hiMant, loMant;
-
-  expon  = ((bytes[0] & 0x7F) << 8) | (bytes[1] & 0xFF);
-
-  hiMant = ((unsigned long)(bytes[2] & 0xFF) << 24) |
-           ((unsigned long)(bytes[3] & 0xFF) << 16) |
-           ((unsigned long)(bytes[4] & 0xFF) << 8)  |
-           ((unsigned long)(bytes[5] & 0xFF));
-
-  loMant = ((unsigned long)(bytes[6] & 0xFF) << 24) |
-           ((unsigned long)(bytes[7] & 0xFF) << 16) |
-           ((unsigned long)(bytes[8] & 0xFF) << 8)  |
-           ((unsigned long)(bytes[9] & 0xFF));
-
-  if (expon == 0 && hiMant == 0 && loMant == 0)
-    f = 0;
-  else {
-    if(expon == 0x7FFF) /* Infinity or NaN */
-      f = HUGE_VAL;
-    else {
-      expon -= 16383;
-      f  = ldexp(UnsignedToFloat(hiMant), expon -= 31);
-      f += ldexp(UnsignedToFloat(loMant), expon -= 32);
-    }
-  }
-
-  if(bytes[0] & 0x80)
-    return -f;
-  else
-    return f;
-}
-
 class RIFF::AIFF::Properties::PropertiesPrivate
 {
 public:
@@ -159,8 +105,8 @@ void RIFF::AIFF::Properties::read(const ByteVector &data)
   d->channels       = data.toInt16BE(0);
   d->sampleFrames   = data.toUInt32BE(2);
   d->sampleWidth    = data.toInt16BE(6);
-  double sampleRate = ConvertFromIeeeExtended(data, 8);
-  d->sampleRate     = (int)sampleRate;
-  d->bitrate        = (int)((sampleRate * d->sampleWidth * d->channels) / 1000.0);
+  const long double sampleRate = data.toFloat80BE(8);
+  d->sampleRate     = static_cast<int>(sampleRate);
+  d->bitrate        = static_cast<int>((sampleRate * d->sampleWidth * d->channels) / 1000.0);
   d->length         = d->sampleRate > 0 ? d->sampleFrames / d->sampleRate : 0;
 }
diff --git a/taglib/toolkit/taglib.h b/taglib/toolkit/taglib.h
index e3b770ad..567ca723 100644
--- a/taglib/toolkit/taglib.h
+++ b/taglib/toolkit/taglib.h
@@ -39,7 +39,6 @@
 #endif
 
 #include <string>
-#include <climits>
 
 #ifdef _WIN32
 # if !defined(NOMINMAX)
diff --git a/taglib/toolkit/tbytevector.cpp b/taglib/toolkit/tbytevector.cpp
index d1d714d6..b4f732f8 100644
--- a/taglib/toolkit/tbytevector.cpp
+++ b/taglib/toolkit/tbytevector.cpp
@@ -24,6 +24,8 @@
  ***************************************************************************/
 
 #include <iostream>
+#include <limits>
+#include <cmath>
 #include <cstdio>
 #include <cstring>
 #include <tstring.h>
@@ -177,7 +179,7 @@ size_t findVector(
 template <class T>
 inline T byteSwap(T x)
 {
-  // There should be all counterparts of to*() and from*() overloads for integral types.
+  // There should be 16,32 and 64-bit versions.
   debug("byteSwap<T>() -- Non specialized version should not be called");
   return 0;
 }
@@ -789,6 +791,170 @@ long long ByteVector::toInt64LE(size_t offset) const
 long long ByteVector::toInt64BE(size_t offset) const
 {
   return static_cast<long long>(toNumber<ulonglong, 8, BigEndian>(*this, offset));
+}    
+
+float ByteVector::toFloat32BE(size_t offset) const
+{
+  if(offset > size() - 4) {
+    debug("ByteVector::toFloat32BE() - offset is out of range. Returning 0.");
+    return 0.0;
+  }
+
+#if defined(SIZEOF_FLOAT) && SIZEOF_FLOAT == 4
+
+  if(std::numeric_limits<float>::is_iec559) 
+  {
+    // float is 32-bit wide and IEEE754 compliant.
+
+    uint tmp;
+    ::memcpy(&tmp, data() + offset, 4);
+
+# ifdef TAGLIB_LITTLE_ENDIAN
+    tmp = byteSwap<uint>(tmp);
+# endif
+
+    return *reinterpret_cast<float*>(&tmp);
+  }
+
+#endif
+
+  const uchar *bytes = reinterpret_cast<const uchar*>(data() + offset);
+
+  // 1-bit sign 
+  const bool negative = ((bytes[0] & 0x80) != 0);
+
+  // 8-bit exponent
+  const int exponent = ((bytes[0] & 0x7F) << 1) | (bytes[1] >> 7);
+
+  // 1-bit integer part (always 1) and 23-bit fraction.
+  const uint fraction 
+    = (1U << 23)
+    | (static_cast<uint>(bytes[1] & 0x7f) << 16) 
+    | (static_cast<uint>(bytes[2]) <<  8) 
+    | (static_cast<uint>(bytes[3]));
+
+  float val;
+  if (exponent == 0 && fraction == 0)
+    val = 0;
+  else {
+    if(exponent == 0xFF) {
+      debug("ByteVector::toFloat32BE() - can't handle the infinity or NaN. Returning 0.");
+      return 0.0;
+    }
+    else 
+      val = ::ldexp(static_cast<float>(fraction), exponent - 127 - 23);
+  }
+
+  if(negative)
+    return -val;
+  else
+    return val;
+}
+
+double ByteVector::toFloat64BE(size_t offset) const
+{
+  if(offset > size() - 8) {
+    debug("ByteVector::toFloat64BE() - offset is out of range. Returning 0.");
+    return 0.0;
+  }
+
+#if defined(SIZEOF_DOUBLE) && SIZEOF_DOUBLE == 8
+
+  if(std::numeric_limits<double>::is_iec559) 
+  {
+    // double is 64-bit wide and IEEE754 compliant.
+
+    ulonglong tmp;
+    ::memcpy(&tmp, data() + offset, 8);
+
+# ifdef TAGLIB_LITTLE_ENDIAN
+    tmp = byteSwap<ulonglong>(tmp);
+# endif
+
+    return *reinterpret_cast<double*>(&tmp);
+  }
+
+#endif
+
+  const uchar *bytes = reinterpret_cast<const uchar*>(data() + offset);
+
+  // 1-bit sign 
+  const bool negative = ((bytes[0] & 0x80) != 0);
+
+  // 11-bit exponent
+  const int exponent = ((bytes[0] & 0x7F) << 4) | (bytes[1] >> 4);
+
+  // 1-bit integer part (always 1) and 52-bit fraction.
+  const ulonglong fraction 
+    = (1ULL << 52)
+    | (static_cast<ulonglong>(bytes[1] & 0x0F) << 48) 
+    | (static_cast<ulonglong>(bytes[2]) << 40) 
+    | (static_cast<ulonglong>(bytes[3]) << 32) 
+    | (static_cast<ulonglong>(bytes[4]) << 24)
+    | (static_cast<ulonglong>(bytes[5]) << 16) 
+    | (static_cast<ulonglong>(bytes[6]) <<  8) 
+    | (static_cast<ulonglong>(bytes[7]));
+
+  double val;
+  if (exponent == 0 && fraction == 0)
+    val = 0;
+  else {
+    if(exponent == 0x7FF) {
+      debug("ByteVector::toFloat64BE() - can't handle the infinity or NaN. Returning 0.");
+      return 0.0;
+    }
+    else 
+      val = ::ldexp(1.0 + static_cast<double>(fraction), exponent - 1023 - 52);
+  }
+
+  if(negative)
+    return -val;
+  else
+    return val;
+}
+
+long double ByteVector::toFloat80BE(size_t offset) const
+{
+  if(offset > size() - 10) {
+    debug("ByteVector::toFloat80BE() - offset is out of range. Returning 0.");
+    return 0.0;
+  }
+
+  const uchar *bytes = reinterpret_cast<const uchar*>(data() + offset);
+
+  // 1-bit sign 
+  const bool negative = ((bytes[0] & 0x80) != 0);
+
+  // 15-bit exponent
+  const int exponent = ((bytes[0] & 0x7F) << 8) | bytes[1];
+
+  // 1-bit integer part and 63-bit fraction.
+  const ulonglong fraction 
+    = (static_cast<ulonglong>(bytes[2]) << 56) 
+    | (static_cast<ulonglong>(bytes[3]) << 48) 
+    | (static_cast<ulonglong>(bytes[4]) << 40) 
+    | (static_cast<ulonglong>(bytes[5]) << 32)
+    | (static_cast<ulonglong>(bytes[6]) << 24) 
+    | (static_cast<ulonglong>(bytes[7]) << 16) 
+    | (static_cast<ulonglong>(bytes[8]) <<  8) 
+    | (static_cast<ulonglong>(bytes[9]));
+
+  long double val;
+  if (exponent == 0 && fraction == 0)
+    val = 0;
+  else {
+    if(exponent == 0x7FFF) {
+      debug("ByteVector::toFloat80BE() - can't handle the infinity or NaN. Returning 0.");
+      return 0.0;
+    }
+    else 
+      val = ::ldexp(static_cast<long double>(fraction), exponent - 16383 - 63);
+  }
+
+  if(negative)
+    return -val;
+  else
+    return val;
 }
 
 const char &ByteVector::operator[](size_t index) const
diff --git a/taglib/toolkit/tbytevector.h b/taglib/toolkit/tbytevector.h
index f470c071..1075afbf 100644
--- a/taglib/toolkit/tbytevector.h
+++ b/taglib/toolkit/tbytevector.h
@@ -371,6 +371,26 @@ namespace TagLib {
      */
     long long toInt64BE(size_t offset) const;
 
+    /*
+     * Converts the 4 bytes at \a offset of the vector to a float as an IEEE754
+     * 32-bit big-endian floating point number.
+     */
+    float toFloat32BE(size_t offset) const;
+
+    /*
+     * Converts the 8 bytes at \a offset of the vector to a double as an IEEE754
+     * 64-bit big-endian floating point number.
+     */
+    double toFloat64BE(size_t offset) const;
+
+    /*
+     * Converts the 10 bytes at \a offset of the vector to a long double as an IEEE754
+     * 80-bit big-endian floating point number.
+     *
+     * \note This may compromise the precision depends on the size of long double.
+     */
+    long double toFloat80BE(size_t offset) const;
+
     /*!
      * Creates a 2 byte ByteVector based on \a value as an unsigned 16-bit
      * little-endian integer.
diff --git a/tests/test_bytevector.cpp b/tests/test_bytevector.cpp
index 7dccf598..2d31bfa9 100644
--- a/tests/test_bytevector.cpp
+++ b/tests/test_bytevector.cpp
@@ -177,6 +177,16 @@ public:
     CPPUNIT_ASSERT(ByteVector::fromUInt16LE(4386) == ByteVector::fromUInt16BE(8721));
     CPPUNIT_ASSERT(ByteVector::fromUInt32LE(287454020) == ByteVector::fromUInt32BE(1144201745));
     CPPUNIT_ASSERT(ByteVector::fromUInt64LE(1234605615291183940) == ByteVector::fromUInt64BE(4914309075945333265));
+  
+
+	const uchar PI32[] = { 0x00, 0x40, 0x49, 0x0f, 0xdb };
+	const uchar PI64[] = { 0x00, 0x40, 0x09, 0x21, 0xfb, 0x54, 0x44, 0x2d, 0x18 };
+
+	ByteVector pi32(reinterpret_cast<const char*>(PI32), 5);
+	CPPUNIT_ASSERT(static_cast<int>(pi32.toFloat32BE(1) * 100) == 314);
+
+	ByteVector pi64(reinterpret_cast<const char*>(PI64), 9);
+	CPPUNIT_ASSERT(static_cast<int>(pi64.toFloat64BE(1) * 100) == 314);
   }
 
   void testReplace()