#include "resize_image.h" #include "qmath.h" #include #include #include #include QPixmap scalePixmapBicubic(const QPixmap &pixmap, int width, int height); QPixmap scalePixmapLanczos(const QPixmap &pixmap, int width, int height); QPixmap scalePixmapArea(const QPixmap &pixmap, int width, int height); QPixmap scalePixmapLanczosQt(const QPixmap &pixmap, int targetWidth, int targetHeight, int a = 3); QPixmap smartScalePixmap(const QPixmap &pixmap, int width, int height) { const int w = pixmap.width(); const int h = pixmap.height(); if ((w == width && h == height) || pixmap.isNull()) { return pixmap; } if (w <= width && h <= height) { // upscaling return scalePixmapLanczos(pixmap, width, height); } return pixmap; } QPixmap scalePixmap(const QPixmap &pixmap, int width, int height, ScaleMethod method) { const int w = pixmap.width(); const int h = pixmap.height(); if (w == width && h == height) { return pixmap; } switch (method) { case ScaleMethod::QtFast: return pixmap.scaled(width, height, Qt::KeepAspectRatio, Qt::FastTransformation); case ScaleMethod::QtSmooth: return pixmap.scaled(width, height, Qt::KeepAspectRatio, Qt::SmoothTransformation); case ScaleMethod::Bicubic: return scalePixmapBicubic(pixmap, width, height); case ScaleMethod::Lanczos: return scalePixmapLanczos(pixmap, width, height); case ScaleMethod::Area: return scalePixmapArea(pixmap, width, height); } } QPixmap scalePixmapBicubic(const QPixmap &pixmap, int width, int height) { // TODO: implement return pixmap.scaled(width, height, Qt::KeepAspectRatio, Qt::SmoothTransformation); } QPixmap scalePixmapLanczos(const QPixmap &pixmap, int width, int height) { return scalePixmapLanczosQt(pixmap, width, height); } QPixmap scalePixmapArea(const QPixmap &pixmap, int width, int height) { // TODO: implement return pixmap.scaled(width, height, Qt::KeepAspectRatio, Qt::SmoothTransformation); } // Platform-specific SIMD includes and checks #if defined(__AVX__) || defined(__AVX2__) #include // For x86 SSE/AVX #elif defined(__ARM_NEON) || defined(__ARM_NEON__) #include // For ARM NEON #else #warning "No SIMD instructions detected, falling back to scalar implementation." #endif // Define SIMD intrinsics for different platforms #if defined(__AVX__) || defined(__AVX2__) // Function to normalize angles in radians to the range [-PI, PI] __m256d normalize_angle(__m256d x) { const __m256d pi = _mm256_set1_pd(M_PI); const __m256d two_pi = _mm256_set1_pd(2 * M_PI); // Calculate the quotient of x / (2*PI) __m256d quotient = _mm256_div_pd(x, two_pi); // Use floor to get the nearest lower integer quotient = _mm256_floor_pd(quotient); // Calculate the remainder __m256d remainder = _mm256_sub_pd(x, _mm256_mul_pd(quotient, two_pi)); // Adjust the range to [-PI, PI] __m256d adjust = _mm256_cmp_pd(remainder, pi, _CMP_GT_OS); remainder = _mm256_sub_pd(remainder, _mm256_and_pd(adjust, two_pi)); return remainder; } // Improved sine approximation function for __m256d using the normalized angle __m256d sin_pd_approx(__m256d x) { x = normalize_angle(x); // Normalize x to the range [-PI, PI] // Sine approximation coefficients const __m256d a0 = _mm256_set1_pd(-0.16666666666666666); const __m256d a1 = _mm256_set1_pd(0.008333333333333333); const __m256d a2 = _mm256_set1_pd(-0.0001984126984126984); __m256d x2 = _mm256_mul_pd(x, x); __m256d x3 = _mm256_mul_pd(x2, x); __m256d x5 = _mm256_mul_pd(x3, x2); __m256d x7 = _mm256_mul_pd(x5, x2); // Compute the polynomial approximation __m256d result = _mm256_add_pd(x, _mm256_mul_pd(a0, x3)); result = _mm256_add_pd(result, _mm256_mul_pd(a1, x5)); result = _mm256_add_pd(result, _mm256_mul_pd(a2, x7)); return result; } inline __m256d lanczosKernelAVX(const __m256d &x, const __m256d &a_val) { __m256d zero = _mm256_setzero_pd(); __m256d one = _mm256_set1_pd(1.0); __m256d pix = _mm256_mul_pd(_mm256_set1_pd(M_PI), x); __m256d sin_pix = sin_pd_approx(pix); __m256d sin_pix_a = sin_pd_approx(_mm256_div_pd(pix, a_val)); __m256d numerator = _mm256_mul_pd(_mm256_mul_pd(a_val, sin_pix), sin_pix_a); __m256d denominator = _mm256_mul_pd(pix, pix); __m256d result = _mm256_div_pd(numerator, denominator); result = _mm256_blendv_pd(result, one, _mm256_cmp_pd(x, zero, _CMP_EQ_OQ)); return result; } QVector processRow(int y, int targetWidth, int targetHeight, const QImage &sourceImage, int a) { QVector resultRow(targetWidth); int sourceWidth = sourceImage.width(); int sourceHeight = sourceImage.height(); __m256d a_vec = _mm256_set1_pd(a); for (int x = 0; x < targetWidth; ++x) { double gx = ((double)x / targetWidth) * (sourceWidth - 1); double gy = ((double)y / targetHeight) * (sourceHeight - 1); __m256d red_vec = _mm256_setzero_pd(); __m256d green_vec = _mm256_setzero_pd(); __m256d blue_vec = _mm256_setzero_pd(); __m256d alpha_vec = _mm256_setzero_pd(); __m256d weight_vec = _mm256_setzero_pd(); for (int ix = (int)gx - a + 1; ix <= (int)gx + a; ++ix) { for (int iy = (int)gy - a + 1; iy <= (int)gy + a; ++iy) { if (ix >= 0 && ix < sourceWidth && iy >= 0 && iy < sourceHeight) { __m256d gx_vec = _mm256_set1_pd(gx - ix); __m256d gy_vec = _mm256_set1_pd(gy - iy); __m256d weight_x = lanczosKernelAVX(gx_vec, a_vec); __m256d weight_y = lanczosKernelAVX(gy_vec, a_vec); __m256d weight = _mm256_mul_pd(weight_x, weight_y); QColor color(sourceImage.pixel(ix, iy)); __m256d color_red = _mm256_set1_pd(color.red()); __m256d color_green = _mm256_set1_pd(color.green()); __m256d color_blue = _mm256_set1_pd(color.blue()); __m256d color_alpha = _mm256_set1_pd(color.alpha()); red_vec = _mm256_add_pd(red_vec, _mm256_mul_pd(weight, color_red)); green_vec = _mm256_add_pd(green_vec, _mm256_mul_pd(weight, color_green)); blue_vec = _mm256_add_pd(blue_vec, _mm256_mul_pd(weight, color_blue)); alpha_vec = _mm256_add_pd(alpha_vec, _mm256_mul_pd(weight, color_alpha)); weight_vec = _mm256_add_pd(weight_vec, weight); } } } double red = _mm256_cvtsd_f64(_mm256_hadd_pd(red_vec, red_vec)); double green = _mm256_cvtsd_f64(_mm256_hadd_pd(green_vec, green_vec)); double blue = _mm256_cvtsd_f64(_mm256_hadd_pd(blue_vec, blue_vec)); double alpha = _mm256_cvtsd_f64(_mm256_hadd_pd(alpha_vec, alpha_vec)); double sumWeights = _mm256_cvtsd_f64(_mm256_hadd_pd(weight_vec, weight_vec)); if (sumWeights > 0.0) { red = std::clamp(red / sumWeights, 0.0, 255.0); green = std::clamp(green / sumWeights, 0.0, 255.0); blue = std::clamp(blue / sumWeights, 0.0, 255.0); alpha = std::clamp(alpha / sumWeights, 0.0, 255.0); } resultRow[x] = qRgba(static_cast(red), static_cast(green), static_cast(blue), static_cast(alpha)); } return resultRow; } #elif defined(__ARM_NEON) || defined(__ARM_NEON__) inline float64x2_t lanczosKernelNEON(const float64x2_t &x, int a) { // Load constants float64x2_t zero = vdupq_n_f64(0.0); float64x2_t one = vdupq_n_f64(1.0); float64x2_t a_val = vdupq_n_f64(a); // Convert to scalar arrays double x_array[2]; vst1q_f64(x_array, x); double a_val_array[2] = { static_cast(a), static_cast(a) }; // Compute sin(x * pi) float64x2_t pix = vmulq_f64(vdupq_n_f64(M_PI), x); double pix_array[2]; vst1q_f64(pix_array, pix); double sin_pix_array[2]; sin_pix_array[0] = std::sin(pix_array[0]); sin_pix_array[1] = std::sin(pix_array[1]); float64x2_t sin_pix = vld1q_f64(sin_pix_array); // Compute sin(x * pi / a) float64x2_t pix_div_a = vdivq_f64(pix, a_val); double pix_div_a_array[2]; vst1q_f64(pix_div_a_array, pix_div_a); double sin_pix_div_a_array[2]; sin_pix_div_a_array[0] = std::sin(pix_div_a_array[0]); sin_pix_div_a_array[1] = std::sin(pix_div_a_array[1]); float64x2_t sin_pix_a = vld1q_f64(sin_pix_div_a_array); // Compute Lanczos kernel float64x2_t numerator = vmulq_f64(vmulq_f64(a_val, sin_pix), sin_pix_a); float64x2_t denominator = vmulq_f64(pix, pix); float64x2_t result = vdivq_f64(numerator, denominator); // Handle the case where x is zero uint64x2_t mask = vceqq_f64(x, zero); result = vbslq_f64(mask, one, result); return result; } QVector processRow(int y, int targetWidth, int targetHeight, const QImage &sourceImage, int a) { QVector resultRow(targetWidth); int sourceWidth = sourceImage.width(); int sourceHeight = sourceImage.height(); for (int x = 0; x < targetWidth; ++x) { double gx = ((double)x / targetWidth) * (sourceWidth - 1); double gy = ((double)y / targetHeight) * (sourceHeight - 1); float64x2_t red_vec = vdupq_n_f64(0.0); float64x2_t green_vec = vdupq_n_f64(0.0); float64x2_t blue_vec = vdupq_n_f64(0.0); float64x2_t alpha_vec = vdupq_n_f64(0.0); float64x2_t weight_vec = vdupq_n_f64(0.0); for (int ix = (int)gx - a + 1; ix <= (int)gx + a; ++ix) { for (int iy = (int)gy - a + 1; iy <= (int)gy + a; ++iy) { if (ix >= 0 && ix < sourceWidth && iy >= 0 && iy < sourceHeight) { float64x2_t gx_vec = vdupq_n_f64(gx - ix); float64x2_t gy_vec = vdupq_n_f64(gy - iy); float64x2_t weight_x = lanczosKernelNEON(gx_vec, a); float64x2_t weight_y = lanczosKernelNEON(gy_vec, a); float64x2_t weight = vmulq_f64(weight_x, weight_y); QColor color(sourceImage.pixel(ix, iy)); float64x2_t color_red = vdupq_n_f64(color.red()); float64x2_t color_green = vdupq_n_f64(color.green()); float64x2_t color_blue = vdupq_n_f64(color.blue()); float64x2_t color_alpha = vdupq_n_f64(color.alpha()); red_vec = vmlaq_f64(red_vec, weight, color_red); green_vec = vmlaq_f64(green_vec, weight, color_green); blue_vec = vmlaq_f64(blue_vec, weight, color_blue); alpha_vec = vmlaq_f64(alpha_vec, weight, color_alpha); weight_vec = vaddq_f64(weight_vec, weight); } } } double red = vgetq_lane_f64(red_vec, 0) + vgetq_lane_f64(red_vec, 1); double green = vgetq_lane_f64(green_vec, 0) + vgetq_lane_f64(green_vec, 1); double blue = vgetq_lane_f64(blue_vec, 0) + vgetq_lane_f64(blue_vec, 1); double alpha = vgetq_lane_f64(alpha_vec, 0) + vgetq_lane_f64(alpha_vec, 1); double sumWeights = vgetq_lane_f64(weight_vec, 0) + vgetq_lane_f64(weight_vec, 1); if (sumWeights > 0.0) { red = std::clamp(red / sumWeights, 0.0, 255.0); green = std::clamp(green / sumWeights, 0.0, 255.0); blue = std::clamp(blue / sumWeights, 0.0, 255.0); alpha = std::clamp(alpha / sumWeights, 0.0, 255.0); } resultRow[x] = qRgba(static_cast(red), static_cast(green), static_cast(blue), static_cast(alpha)); } return resultRow; } #else // Scalar fallback for unsupported platforms double lanczosKernel(double x, int a) { if (x == 0.0) return 1.0; if (x < -a || x > a) return 0.0; double pix = M_PI * x; return a * std::sin(pix) * std::sin(pix / a) / (pix * pix); } QVector processRow(int y, int targetWidth, int targetHeight, const QImage &sourceImage, int a) { QVector resultRow(targetWidth); int sourceWidth = sourceImage.width(); int sourceHeight = sourceImage.height(); for (int x = 0; x < targetWidth; ++x) { double gx = ((double)x / targetWidth) * (sourceWidth - 1); double gy = ((double)y / targetHeight) * (sourceHeight - 1); double red = 0.0, green = 0.0, blue = 0.0, alpha = 0.0, sumWeights = 0.0; for (int ix = (int)gx - a + 1; ix <= (int)gx + a; ++ix) { for (int iy = (int)gy - a + 1; iy <= (int)gy + a; ++iy) { if (ix >= 0 && ix < sourceWidth && iy >= 0 && iy < sourceHeight) { double weight = lanczosKernel(gx - ix, a) * lanczosKernel(gy - iy, a); QColor color(sourceImage.pixel(ix, iy)); red += weight * color.red(); green += weight * color.green(); blue += weight * color.blue(); alpha += weight * color.alpha(); sumWeights += weight; } } } if (sumWeights > 0.0) { red = std::clamp(red / sumWeights, 0.0, 255.0); green = std::clamp(green / sumWeights, 0.0, 255.0); blue = std::clamp(blue / sumWeights, 0.0, 255.0); alpha = std::clamp(alpha / sumWeights, 0.0, 255.0); } resultRow[x] = qRgba(static_cast(red), static_cast(green), static_cast(blue), static_cast(alpha)); } return resultRow; } #endif QImage scaleImageLanczos(const QImage &sourceImage, int targetWidth, int targetHeight, int a = 3) { QImage targetImage(targetWidth, targetHeight, QImage::Format_ARGB32); QVector rows(targetHeight); for (int i = 0; i < targetHeight; ++i) { rows[i] = i; } uchar *imgBits = targetImage.bits(); int bytesPerLine = targetImage.bytesPerLine(); QtConcurrent::blockingMap(rows, [targetWidth, targetHeight, &sourceImage, a, imgBits, bytesPerLine](int y) { QVector rowPixels = processRow(y, targetWidth, targetHeight, sourceImage, a); uchar *rowPtr = imgBits + y * bytesPerLine; std::memcpy(rowPtr, rowPixels.constData(), targetWidth * sizeof(QRgb)); }); return targetImage; } QPixmap scalePixmapLanczosQt(const QPixmap &pixmap, int targetWidth, int targetHeight, int a) { QImage sourceImage = pixmap.toImage(); QImage scaledImage = scaleImageLanczos(sourceImage, targetWidth, targetHeight, a); return QPixmap::fromImage(scaledImage); }