From 819f456d6e1da74659a9d7c360f72addfe662b14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?=
 <luisangelsm@gmail.com>
Date: Sat, 31 Aug 2024 13:04:39 +0200
Subject: [PATCH] Implement lanczos for upscaling to get crispier images

---
 YACReader/YACReader.pro               |  67 ++++--
 YACReader/viewer.cpp                  |  16 +-
 image_processing/image_processing.pri |   8 +
 image_processing/resize_image.cpp     | 317 ++++++++++++++++++++++++++
 image_processing/resize_image.h       |  20 ++
 5 files changed, 401 insertions(+), 27 deletions(-)
 create mode 100644 image_processing/image_processing.pri
 create mode 100644 image_processing/resize_image.cpp
 create mode 100644 image_processing/resize_image.h

diff --git a/YACReader/YACReader.pro b/YACReader/YACReader.pro
index 08502677..08ebf150 100644
--- a/YACReader/YACReader.pro
+++ b/YACReader/YACReader.pro
@@ -10,7 +10,23 @@ DEFINES += YACREADER
 
 #load default build flags
 include (../config.pri)
+
+CONFIG(7zip) {
+include(../compressed_archive/wrapper.pri)
+} else:CONFIG(unarr) {
+include(../compressed_archive/unarr/unarr-wrapper.pri)
+} else:CONFIG(libarchive) {
+include(../compressed_archive/libarchive/libarchive-wrapper.pri)
+} else {
+  error(No compression backend specified. Did you mess with the build system?)
+}
+
+include(../custom_widgets/custom_widgets_yacreader.pri)
+include(../image_processing/image_processing.pri)
 include (../dependencies/pdf_backend.pri)
+include(../shortcuts_management/shortcuts_management.pri)
+
+include(../third_party/QsLog/QsLog.pri)
 
 CONFIG(force_angle) {
     contains(QMAKE_TARGET.arch, x86_64) {
@@ -30,7 +46,7 @@ CONFIG(force_angle) {
     }
 }
 
-SOURCES += main.cpp
+SOURCES += main.cpp \
 
 INCLUDEPATH += ../common \
                ../custom_widgets
@@ -55,6 +71,14 @@ win32 {
     msvc {
         QMAKE_CXXFLAGS_RELEASE += /MP /Ob2 /Oi /Ot /GT /GL
         QMAKE_LFLAGS_RELEASE += /LTCG
+
+        # Enable AVX and AVX2 support
+         QMAKE_CXXFLAGS += /arch:AVX
+         DEFINES += __AVX__
+
+         # Enable AVX2 if supported
+         win32:QMAKE_CXXFLAGS += /arch:AVX2
+         DEFINES += __AVX2__
     }
     CONFIG -= embed_manifest_exe
 }
@@ -67,7 +91,27 @@ macx {
     lessThan(QT_MAJOR_VERSION, 6): QT += macextras
 }
 
-QT += network widgets core multimedia svg
+unix|mingw {
+    # Enable general SIMD optimizations
+    QMAKE_CXXFLAGS += -msse2  # Baseline for x86
+
+    # Architecture-specific optimizations (adjust as needed)
+    contains(QMAKE_TARGET.arch, x86_64) {
+        QMAKE_CXXFLAGS += -mavx2 -mfma
+        DEFINES += __AVX__ __AVX2__
+    } else { # Assuming x86 (32-bit)
+        QMAKE_CXXFLAGS += -msse4.2
+        DEFINES += __SSE4_2__
+    }
+
+    # ARM
+    contains(QMAKE_HOST.arch, arm) {
+        QMAKE_CXXFLAGS += -mfpu=neon -mfloat-abi=hard
+        DEFINES += __ARM_NEON__
+    }
+}
+
+QT += network widgets core multimedia svg concurrent
 
 greaterThan(QT_MAJOR_VERSION, 5): QT += openglwidgets core5compat
 
@@ -106,7 +150,7 @@ HEADERS +=  ../common/comic.h \
             ../common/exit_check.h \
             ../common/scroll_management.h \
             ../common/opengl_checker.h \
-            ../common/pdf_comic.h
+            ../common/pdf_comic.h \
 
 !CONFIG(no_opengl) {
     HEADERS += ../common/gl/yacreader_flow_gl.h \
@@ -143,31 +187,16 @@ SOURCES +=  ../common/comic.cpp \
             ../common/yacreader_global_gui.cpp \
             ../common/exit_check.cpp \
             ../common/scroll_management.cpp \
-            ../common/opengl_checker.cpp
+            ../common/opengl_checker.cpp \
 
 !CONFIG(no_opengl) {
         SOURCES += ../common/gl/yacreader_flow_gl.cpp \
                     goto_flow_gl.cpp
 }
 
-include(../custom_widgets/custom_widgets_yacreader.pri)
-
-CONFIG(7zip) {
-include(../compressed_archive/wrapper.pri)
-} else:CONFIG(unarr) {
-include(../compressed_archive/unarr/unarr-wrapper.pri)
-} else:CONFIG(libarchive) {
-include(../compressed_archive/libarchive/libarchive-wrapper.pri)
-} else {
-  error(No compression backend specified. Did you mess with the build system?)
-}
-include(../shortcuts_management/shortcuts_management.pri)
-
 RESOURCES += yacreader_images.qrc \
              yacreader_files.qrc
 
-include(../third_party/QsLog/QsLog.pri)
-
 RC_FILE = icon.rc
 
 macx {
diff --git a/YACReader/viewer.cpp b/YACReader/viewer.cpp
index 7d6fe242..82f857a3 100644
--- a/YACReader/viewer.cpp
+++ b/YACReader/viewer.cpp
@@ -16,6 +16,7 @@
 #include "notifications_label_widget.h"
 #include "comic_db.h"
 #include "shortcuts_manager.h"
+#include "resize_image.h"
 
 #include "opengl_checker.h"
 
@@ -387,16 +388,15 @@ void Viewer::updateContentSize()
         if (zoom != 100) {
             pagefit.scale(floor(pagefit.width() * zoom / 100.0f), 0, Qt::KeepAspectRatioByExpanding);
         }
-        // apply scaling
+        // apply size to the container
         content->resize(pagefit);
 
-        // TODO: updtateContentSize should only scale the pixmap once
-        if (devicePixelRatioF() > 1) // only in HDPI displays
-        {
-            QPixmap page = currentPage->scaled(content->width() * devicePixelRatioF(), content->height() * devicePixelRatioF(), Qt::KeepAspectRatio, Qt::SmoothTransformation);
-            page.setDevicePixelRatio(devicePixelRatioF());
-            content->setPixmap(page);
-        }
+        // scale the image to fit the container
+        auto devicePixelRatioF = content->devicePixelRatioF();
+        QLOG_ERROR() << "src size: " << currentPage->size() << " content size: " << content->size() << " target size " << QSize(content->width() * devicePixelRatioF, content->height() * devicePixelRatioF);
+        QPixmap page = smartScalePixmap(*currentPage, content->width() * devicePixelRatioF, content->height() * devicePixelRatioF); // currentPage->scaled(content->width() * devicePixelRatioF(), content->height() * devicePixelRatioF(), Qt::KeepAspectRatio, Qt::SmoothTransformation);
+        page.setDevicePixelRatio(devicePixelRatioF);
+        content->setPixmap(page);
 
         emit backgroundChanges();
     }
diff --git a/image_processing/image_processing.pri b/image_processing/image_processing.pri
new file mode 100644
index 00000000..d2248f4d
--- /dev/null
+++ b/image_processing/image_processing.pri
@@ -0,0 +1,8 @@
+INCLUDEPATH += $$PWD
+DEPENDPATH += $$PWD
+
+SOURCES += \
+    $$PWD/resize_image.cpp
+
+HEADERS += \
+    $$PWD/resize_image.h
diff --git a/image_processing/resize_image.cpp b/image_processing/resize_image.cpp
new file mode 100644
index 00000000..a7ca7b61
--- /dev/null
+++ b/image_processing/resize_image.cpp
@@ -0,0 +1,317 @@
+#include "resize_image.h"
+
+#include <QtConcurrent>
+#include <QImage>
+#include <QColor>
+
+QPixmap scalePixmapBicubic(const QPixmap &pixmap, int width, int height);
+QPixmap scalePixmapLanczos(const QPixmap &pixmap, int width, int height);
+QPixmap scalePixmapArea(const QPixmap &pixmap, int width, int height);
+QPixmap scalePixmapLanczosQt(const QPixmap &pixmap, int targetWidth, int targetHeight, int a = 3);
+
+QPixmap smartScalePixmap(const QPixmap &pixmap, int width, int height)
+{
+    const int w = pixmap.width();
+    const int h = pixmap.height();
+    if ((w == width && h == height) || pixmap.isNull()) {
+        return pixmap;
+    }
+
+    if (w <= width && h <= height) { // upscaling
+        return scalePixmapLanczos(pixmap, width, height);
+    }
+
+    return pixmap;
+}
+
+QPixmap scalePixmap(const QPixmap &pixmap, int width, int height, ScaleMethod method)
+{
+    const int w = pixmap.width();
+    const int h = pixmap.height();
+    if (w == width && h == height) {
+        return pixmap;
+    }
+
+    switch (method) {
+    case ScaleMethod::QtFast:
+        return pixmap.scaled(width, height, Qt::KeepAspectRatio, Qt::FastTransformation);
+    case ScaleMethod::QtSmooth:
+        return pixmap.scaled(width, height, Qt::KeepAspectRatio, Qt::SmoothTransformation);
+    case ScaleMethod::Bicubic:
+        return scalePixmapBicubic(pixmap, width, height);
+    case ScaleMethod::Lanczos:
+        return scalePixmapLanczos(pixmap, width, height);
+    case ScaleMethod::Area:
+        return scalePixmapArea(pixmap, width, height);
+    }
+}
+
+QPixmap scalePixmapBicubic(const QPixmap &pixmap, int width, int height)
+{
+    // TODO: implement
+    return pixmap.scaled(width, height, Qt::KeepAspectRatio, Qt::SmoothTransformation);
+}
+
+QPixmap scalePixmapLanczos(const QPixmap &pixmap, int width, int height)
+{
+    return scalePixmapLanczosQt(pixmap, width, height);
+}
+
+QPixmap scalePixmapArea(const QPixmap &pixmap, int width, int height)
+{
+    // TODO: implement
+    return pixmap.scaled(width, height, Qt::KeepAspectRatio, Qt::SmoothTransformation);
+}
+
+// Platform-specific SIMD includes and checks
+#if defined(__AVX__) || defined(__AVX2__)
+#include <immintrin.h> // For x86 SSE/AVX
+#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
+#include <arm_neon.h> // For ARM NEON
+#else
+#warning "No SIMD instructions detected, falling back to scalar implementation."
+#endif
+
+// Define SIMD intrinsics for different platforms
+#if defined(__AVX__) || defined(__AVX2__)
+
+inline __m256d lanczosKernelAVX(const __m256d &x, const __m256d &a_val)
+{
+    __m256d zero = _mm256_setzero_pd();
+    __m256d one = _mm256_set1_pd(1.0);
+    __m256d pix = _mm256_mul_pd(_mm256_set1_pd(M_PI), x);
+    __m256d sin_pix = _mm256_sin_pd(pix);
+    __m256d sin_pix_a = _mm256_sin_pd(_mm256_div_pd(pix, a_val));
+    __m256d numerator = _mm256_mul_pd(_mm256_mul_pd(a_val, sin_pix), sin_pix_a);
+    __m256d denominator = _mm256_mul_pd(pix, pix);
+    __m256d result = _mm256_div_pd(numerator, denominator);
+    result = _mm256_blendv_pd(result, one, _mm256_cmp_pd(x, zero, _CMP_EQ_OQ));
+    return result;
+}
+
+QVector<QRgb> processRow(int y, int targetWidth, int targetHeight, const QImage &sourceImage, int a)
+{
+    QVector<QRgb> resultRow(targetWidth);
+    int sourceWidth = sourceImage.width();
+    int sourceHeight = sourceImage.height();
+    __m256d a_vec = _mm256_set1_pd(a);
+
+    for (int x = 0; x < targetWidth; ++x) {
+        double gx = ((double)x / targetWidth) * (sourceWidth - 1);
+        double gy = ((double)y / targetHeight) * (sourceHeight - 1);
+
+        __m256d red_vec = _mm256_setzero_pd();
+        __m256d green_vec = _mm256_setzero_pd();
+        __m256d blue_vec = _mm256_setzero_pd();
+        __m256d alpha_vec = _mm256_setzero_pd();
+        __m256d weight_vec = _mm256_setzero_pd();
+
+        for (int ix = (int)gx - a + 1; ix <= (int)gx + a; ++ix) {
+            for (int iy = (int)gy - a + 1; iy <= (int)gy + a; ++iy) {
+                if (ix >= 0 && ix < sourceWidth && iy >= 0 && iy < sourceHeight) {
+                    __m256d gx_vec = _mm256_set1_pd(gx - ix);
+                    __m256d gy_vec = _mm256_set1_pd(gy - iy);
+                    __m256d weight_x = lanczosKernelAVX(gx_vec, a_vec);
+                    __m256d weight_y = lanczosKernelAVX(gy_vec, a_vec);
+                    __m256d weight = _mm256_mul_pd(weight_x, weight_y);
+
+                    QColor color(sourceImage.pixel(ix, iy));
+                    __m256d color_red = _mm256_set1_pd(color.red());
+                    __m256d color_green = _mm256_set1_pd(color.green());
+                    __m256d color_blue = _mm256_set1_pd(color.blue());
+                    __m256d color_alpha = _mm256_set1_pd(color.alpha());
+
+                    red_vec = _mm256_add_pd(red_vec, _mm256_mul_pd(weight, color_red));
+                    green_vec = _mm256_add_pd(green_vec, _mm256_mul_pd(weight, color_green));
+                    blue_vec = _mm256_add_pd(blue_vec, _mm256_mul_pd(weight, color_blue));
+                    alpha_vec = _mm256_add_pd(alpha_vec, _mm256_mul_pd(weight, color_alpha));
+
+                    weight_vec = _mm256_add_pd(weight_vec, weight);
+                }
+            }
+        }
+
+        double red = _mm256_cvtsd_f64(_mm256_hadd_pd(red_vec, red_vec));
+        double green = _mm256_cvtsd_f64(_mm256_hadd_pd(green_vec, green_vec));
+        double blue = _mm256_cvtsd_f64(_mm256_hadd_pd(blue_vec, blue_vec));
+        double alpha = _mm256_cvtsd_f64(_mm256_hadd_pd(alpha_vec, alpha_vec));
+        double sumWeights = _mm256_cvtsd_f64(_mm256_hadd_pd(weight_vec, weight_vec));
+
+        if (sumWeights > 0.0) {
+            red = std::clamp(red / sumWeights, 0.0, 255.0);
+            green = std::clamp(green / sumWeights, 0.0, 255.0);
+            blue = std::clamp(blue / sumWeights, 0.0, 255.0);
+            alpha = std::clamp(alpha / sumWeights, 0.0, 255.0);
+        }
+
+        resultRow[x] = qRgba(static_cast<int>(red), static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
+    }
+
+    return resultRow;
+}
+
+#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
+
+inline float64x2_t lanczosKernelNEON(const float64x2_t &x, int a)
+{
+    float64x2_t zero = vdupq_n_f64(0.0);
+    float64x2_t one = vdupq_n_f64(1.0);
+    float64x2_t a_val = vdupq_n_f64(a);
+    float64x2_t pix = vmulq_f64(vdupq_n_f64(M_PI), x);
+    float64x2_t sin_pix = vsin_f64(pix);
+    float64x2_t sin_pix_a = vsin_f64(vdivq_f64(pix, a_val));
+    float64x2_t numerator = vmulq_f64(vmulq_f64(a_val, sin_pix), sin_pix_a);
+    float64x2_t denominator = vmulq_f64(pix, pix);
+    float64x2_t result = vdivq_f64(numerator, denominator);
+    uint64x2_t mask = vceqq_f64(x, zero);
+    result = vbslq_f64(mask, one, result);
+    return result;
+}
+
+QVector<QRgb> processRow(int y, int targetWidth, int targetHeight, const QImage &sourceImage, int a)
+{
+    QVector<QRgb> resultRow(targetWidth);
+    int sourceWidth = sourceImage.width();
+    int sourceHeight = sourceImage.height();
+
+    for (int x = 0; x < targetWidth; ++x) {
+        double gx = ((double)x / targetWidth) * (sourceWidth - 1);
+        double gy = ((double)y / targetHeight) * (sourceHeight - 1);
+
+        float64x2_t red_vec = vdupq_n_f64(0.0);
+        float64x2_t green_vec = vdupq_n_f64(0.0);
+        float64x2_t blue_vec = vdupq_n_f64(0.0);
+        float64x2_t alpha_vec = vdupq_n_f64(0.0);
+        float64x2_t weight_vec = vdupq_n_f64(0.0);
+
+        for (int ix = (int)gx - a + 1; ix <= (int)gx + a; ++ix) {
+            for (int iy = (int)gy - a + 1; iy <= (int)gy + a; ++iy) {
+                if (ix >= 0 && ix < sourceWidth && iy >= 0 && iy < sourceHeight) {
+                    float64x2_t gx_vec = vdupq_n_f64(gx - ix);
+                    float64x2_t gy_vec = vdupq_n_f64(gy - iy);
+                    float64x2_t weight_x = lanczosKernelNEON(gx_vec, a);
+                    float64x2_t weight_y = lanczosKernelNEON(gy_vec, a);
+                    float64x2_t weight = vmulq_f64(weight_x, weight_y);
+
+                    QColor color(sourceImage.pixel(ix, iy));
+                    float64x2_t color_red = vdupq_n_f64(color.red());
+                    float64x2_t color_green = vdupq_n_f64(color.green());
+                    float64x2_t color_blue = vdupq_n_f64(color.blue());
+                    float64x2_t color_alpha = vdupq_n_f64(color.alpha());
+
+                    red_vec = vmlaq_f64(red_vec, weight, color_red);
+                    green_vec = vmlaq_f64(green_vec, weight, color_green);
+                    blue_vec = vmlaq_f64(blue_vec, weight, color_blue);
+                    alpha_vec = vmlaq_f64(alpha_vec, weight, color_alpha);
+                    weight_vec = vaddq_f64(weight_vec, weight);
+                }
+            }
+        }
+
+        double red = vgetq_lane_f64(red_vec, 0) + vgetq_lane_f64(red_vec, 1);
+        double green = vgetq_lane_f64(green_vec, 0) + vgetq_lane_f64(green_vec, 1);
+        double blue = vgetq_lane_f64(blue_vec, 0) + vgetq_lane_f64(blue_vec, 1);
+        double alpha = vgetq_lane_f64(alpha_vec, 0) + vgetq_lane_f64(alpha_vec, 1);
+        double sumWeights = vgetq_lane_f64(weight_vec, 0) + vgetq_lane_f64(weight_vec, 1);
+
+        if (sumWeights > 0.0) {
+            red = std::clamp(red / sumWeights, 0.0, 255.0);
+            green = std::clamp(green / sumWeights, 0.0, 255.0);
+            blue = std::clamp(blue / sumWeights, 0.0, 255.0);
+            alpha = std::clamp(alpha / sumWeights, 0.0, 255.0);
+        }
+
+        resultRow[x] = qRgba(static_cast<int>(red), static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
+    }
+
+    return resultRow;
+}
+
+#else
+
+// Scalar fallback for unsupported platforms
+double lanczosKernel(double x, int a)
+{
+    if (x == 0.0)
+        return 1.0;
+    if (x < -a || x > a)
+        return 0.0;
+    double pix = M_PI * x;
+    return a * std::sin(pix) * std::sin(pix / a) / (pix * pix);
+}
+
+QVector<QRgb> processRow(int y, int targetWidth, int targetHeight, const QImage &sourceImage, int a)
+{
+    QVector<QRgb> resultRow(targetWidth);
+    int sourceWidth = sourceImage.width();
+    int sourceHeight = sourceImage.height();
+
+    for (int x = 0; x < targetWidth; ++x) {
+        double gx = ((double)x / targetWidth) * (sourceWidth - 1);
+        double gy = ((double)y / targetHeight) * (sourceHeight - 1);
+
+        double red = 0.0, green = 0.0, blue = 0.0, alpha = 0.0, sumWeights = 0.0;
+
+        for (int ix = (int)gx - a + 1; ix <= (int)gx + a; ++ix) {
+            for (int iy = (int)gy - a + 1; iy <= (int)gy + a; ++iy) {
+                if (ix >= 0 && ix < sourceWidth && iy >= 0 && iy < sourceHeight) {
+                    double weight = lanczosKernel(gx - ix, a) * lanczosKernel(gy - iy, a);
+
+                    QColor color(sourceImage.pixel(ix, iy));
+
+                    red += weight * color.red();
+                    green += weight * color.green();
+                    blue += weight * color.blue();
+                    alpha += weight * color.alpha();
+                    sumWeights += weight;
+                }
+            }
+        }
+
+        if (sumWeights > 0.0) {
+            red = std::clamp(red / sumWeights, 0.0, 255.0);
+            green = std::clamp(green / sumWeights, 0.0, 255.0);
+            blue = std::clamp(blue / sumWeights, 0.0, 255.0);
+            alpha = std::clamp(alpha / sumWeights, 0.0, 255.0);
+        }
+
+        resultRow[x] = qRgba(static_cast<int>(red), static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
+    }
+
+    return resultRow;
+}
+
+#endif
+
+// Main function to scale the image
+QImage scaleImageLanczos(const QImage &sourceImage, int targetWidth, int targetHeight, int a = 3)
+{
+    QImage targetImage(targetWidth, targetHeight, QImage::Format_ARGB32);
+
+    QVector<int> rows(targetHeight);
+    for (int i = 0; i < targetHeight; ++i) {
+        rows[i] = i;
+    }
+
+    QFuture<QVector<QRgb>> future = QtConcurrent::mapped(rows, [targetWidth, targetHeight, &sourceImage, a](int y) {
+        return processRow(y, targetWidth, targetHeight, sourceImage, a);
+    });
+
+    future.waitForFinished();
+
+    for (int y = 0; y < targetHeight; ++y) {
+        QVector<QRgb> row = future.resultAt(y);
+        for (int x = 0; x < targetWidth; ++x) {
+            targetImage.setPixel(x, y, row[x]);
+        }
+    }
+
+    return targetImage;
+}
+
+QPixmap scalePixmapLanczosQt(const QPixmap &pixmap, int targetWidth, int targetHeight, int a)
+{
+    QImage sourceImage = pixmap.toImage();
+    QImage scaledImage = scaleImageLanczos(sourceImage, targetWidth, targetHeight, a);
+    return QPixmap::fromImage(scaledImage);
+}
diff --git a/image_processing/resize_image.h b/image_processing/resize_image.h
new file mode 100644
index 00000000..17ea04a3
--- /dev/null
+++ b/image_processing/resize_image.h
@@ -0,0 +1,20 @@
+#ifndef RESIZE_IMAGE_H
+#define RESIZE_IMAGE_H
+
+#include <cmath>
+#include <QPixmap>
+#include <QImage>
+
+enum class ScaleMethod {
+    QtFast,
+    QtSmooth, // Bilinear
+    Bicubic,
+    Lanczos,
+    Area
+
+};
+
+QPixmap smartScalePixmap(const QPixmap &pixmap, int width, int height);
+QPixmap scalePixmap(const QPixmap &pixmap, int width, int height, ScaleMethod method = ScaleMethod::QtSmooth);
+
+#endif // RESIZE_IMAGE_H