diff --git a/autotests/read/psd/laba_16bit.png b/autotests/read/psd/laba_16bit.png
index d0db793..d3d317c 100644
Binary files a/autotests/read/psd/laba_16bit.png and b/autotests/read/psd/laba_16bit.png differ
diff --git a/autotests/read/psd/laba_8bit.png b/autotests/read/psd/laba_8bit.png
index 1fd7e71..c691745 100644
Binary files a/autotests/read/psd/laba_8bit.png and b/autotests/read/psd/laba_8bit.png differ
diff --git a/src/imageformats/fastmath_p.h b/src/imageformats/fastmath_p.h
new file mode 100644
index 0000000..17bd2dd
--- /dev/null
+++ b/src/imageformats/fastmath_p.h
@@ -0,0 +1,35 @@
+/*
+    Approximated math functions used into conversions.
+
+    SPDX-FileCopyrightText: Edward Kmett
+    SPDX-FileCopyrightText: 2023 Mirco Miranda <mircomir@outlook.com>
+
+    SPDX-License-Identifier: BSD-3-Clause
+*/
+#ifndef FASTMATH_P_H
+#define FASTMATH_P_H
+
+#include <QtGlobal>
+
+/*!
+ * \brief fastPow
+ * Based on Edward Kmett code released into the public domain.
+ * See also: https://github.com/ekmett/approximate
+ */
+inline double fastPow(double x, double y)
+{
+    union {
+        double d;
+        qint32 i[2];
+    } u = {x};
+#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+    u.i[1] = qint32(y * (u.i[1] - 1072632447) + 1072632447);
+    u.i[0] = 0;
+#else // never tested
+    u.i[0] = qint32(y * (u.i[0] - 1072632447) + 1072632447);
+    u.i[1] = 0;
+#endif
+    return u.d;
+}
+
+#endif // FASTMATH_P_H
diff --git a/src/imageformats/psd.cpp b/src/imageformats/psd.cpp
index 942a300..d1a2484 100644
--- a/src/imageformats/psd.cpp
+++ b/src/imageformats/psd.cpp
@@ -3,7 +3,7 @@
 
     SPDX-FileCopyrightText: 2003 Ignacio Castaño <castano@ludicon.com>
     SPDX-FileCopyrightText: 2015 Alex Merry <alex.merry@kde.org>
-    SPDX-FileCopyrightText: 2022 Mirco Miranda <mircomir@outlook.com>
+    SPDX-FileCopyrightText: 2022-2023 Mirco Miranda <mircomir@outlook.com>
 
     SPDX-License-Identifier: LGPL-2.0-or-later
 */
@@ -21,7 +21,6 @@
 /*
  * Limitations of the current code:
  * - 32-bit float image are converted to 16-bit integer image.
- *   NOTE: Qt 6.2 allow 32-bit float images (RGB only)
  * - Other color spaces cannot directly be read due to lack of QImage support for
  *   color spaces other than RGB (and Grayscale). Where possible, a conversion
  *   to RGB is done:
@@ -33,6 +32,7 @@
  *   color management engine (e.g. LittleCMS).
  */
 
+#include "fastmath_p.h"
 #include "psd_p.h"
 #include "util_p.h"
 
@@ -51,7 +51,7 @@ typedef quint8 uchar;
  * This should not be a problem because the Qt's QColorSpace supports the linear
  * sRgb colorspace.
  *
- * Using linear conversion, the loading speed is improved by 4x. Anyway, if you are using
+ * Using linear conversion, the loading speed is slightly improved. Anyway, if you are using
  * an software that discard color info, you should comment it.
  *
  * At the time I'm writing (07/2022), Gwenview and Krita supports linear sRgb but KDE
@@ -845,6 +845,7 @@ inline void cmykToRgb(uchar *target, qint32 targetChannels, const char *source,
     auto s = reinterpret_cast<const T*>(source);
     auto t = reinterpret_cast<T*>(target);
     auto max = double(std::numeric_limits<T>::max());
+    auto invmax = 1.0 / max; // speed improvements by ~10%
 
     if (sourceChannels < 4) {
         qDebug() << "cmykToRgb: image is not a valid CMYK!";
@@ -853,10 +854,10 @@ inline void cmykToRgb(uchar *target, qint32 targetChannels, const char *source,
 
     for (qint32 w = 0; w < width; ++w) {
         auto ps = s + sourceChannels * w;
-        auto C = 1 - *(ps + 0) / max;
-        auto M = 1 - *(ps + 1) / max;
-        auto Y = 1 - *(ps + 2) / max;
-        auto K = 1 - *(ps + 3) / max;
+        auto C = 1 - *(ps + 0) * invmax;
+        auto M = 1 - *(ps + 1) * invmax;
+        auto Y = 1 - *(ps + 2) * invmax;
+        auto K = 1 - *(ps + 3) * invmax;
 
         auto pt = t + targetChannels * w;
         *(pt + 0) = T(std::min(max - (C * (1 - K) + K) * max + 0.5, max));
@@ -881,8 +882,9 @@ inline double gammaCorrection(double linear)
 #ifdef PSD_FAST_LAB_CONVERSION
     return linear;
 #else
-    // NOTE: pow() slow down the performance by a 4 factor :(
-    return (linear > 0.0031308 ? 1.055 * std::pow(linear, 1.0 / 2.4) - 0.055 : 12.92 * linear);
+    // Replacing fastPow with std::pow the conversion time is 2/3 times longer: using fastPow
+    // there are minimal differences in the conversion that are not visually noticeable.
+    return (linear > 0.0031308 ? 1.055 * fastPow(linear, 1.0 / 2.4) - 0.055 : 12.92 * linear);
 #endif
 }
 
@@ -892,6 +894,7 @@ inline void labToRgb(uchar *target, qint32 targetChannels, const char *source, q
     auto s = reinterpret_cast<const T*>(source);
     auto t = reinterpret_cast<T*>(target);
     auto max = double(std::numeric_limits<T>::max());
+    auto invmax = 1.0 / max;
 
     if (sourceChannels < 3) {
         qDebug() << "labToRgb: image is not a valid LAB!";
@@ -900,14 +903,14 @@ inline void labToRgb(uchar *target, qint32 targetChannels, const char *source, q
 
     for (qint32 w = 0; w < width; ++w) {
         auto ps = s + sourceChannels * w;
-        auto L = (*(ps + 0) / max) * 100.0;
-        auto A = (*(ps + 1) / max) * 255.0 - 128.0;
-        auto B = (*(ps + 2) / max) * 255.0 - 128.0;
+        auto L = (*(ps + 0) * invmax) * 100.0;
+        auto A = (*(ps + 1) * invmax) * 255.0 - 128.0;
+        auto B = (*(ps + 2) * invmax) * 255.0 - 128.0;
 
         // converting LAB to XYZ (D65 illuminant)
-        auto Y = (L + 16.0) / 116.0;
-        auto X = A / 500.0 + Y;
-        auto Z = Y - B / 200.0;
+        auto Y = (L + 16.0) * (1.0 / 116.0);
+        auto X = A * (1.0 / 500.0) + Y;
+        auto Z = Y - B * (1.0 / 200.0);
 
         // NOTE: use the constants of the illuminant of the target RGB color space
         X = finv(X) * 0.9504;   // D50: * 0.9642