From 35c88edf2d010c8ea4b9846a12c7e6e56486f4f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Sat, 31 Aug 2024 14:27:27 +0200 Subject: [PATCH] More portable code --- image_processing/resize_image.cpp | 44 +++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/image_processing/resize_image.cpp b/image_processing/resize_image.cpp index 167117e2..a121cca3 100644 --- a/image_processing/resize_image.cpp +++ b/image_processing/resize_image.cpp @@ -76,13 +76,53 @@ QPixmap scalePixmapArea(const QPixmap &pixmap, int width, int height) // Define SIMD intrinsics for different platforms #if defined(__AVX__) || defined(__AVX2__) +// Function to normalize angles in radians to the range [-PI, PI] +__m256d normalize_angle(__m256d x) +{ + const __m256d pi = _mm256_set1_pd(M_PI); + const __m256d two_pi = _mm256_set1_pd(2 * M_PI); + // Calculate the quotient of x / (2*PI) + __m256d quotient = _mm256_div_pd(x, two_pi); + // Use floor to get the nearest lower integer + quotient = _mm256_floor_pd(quotient); + // Calculate the remainder + __m256d remainder = _mm256_sub_pd(x, _mm256_mul_pd(quotient, two_pi)); + // Adjust the range to [-PI, PI] + __m256d adjust = _mm256_cmp_pd(remainder, pi, _CMP_GT_OS); + remainder = _mm256_sub_pd(remainder, _mm256_and_pd(adjust, two_pi)); + return remainder; +} + +// Improved sine approximation function for __m256d using the normalized angle +__m256d sin_pd_approx(__m256d x) +{ + x = normalize_angle(x); // Normalize x to the range [-PI, PI] + + // Sine approximation coefficients + const __m256d a0 = _mm256_set1_pd(-0.16666666666666666); + const __m256d a1 = _mm256_set1_pd(0.008333333333333333); + const __m256d a2 = _mm256_set1_pd(-0.0001984126984126984); + + __m256d x2 = _mm256_mul_pd(x, x); + __m256d x3 = _mm256_mul_pd(x2, x); + __m256d x5 = _mm256_mul_pd(x3, x2); + __m256d x7 = _mm256_mul_pd(x5, x2); + + // Compute the polynomial approximation + __m256d result = _mm256_add_pd(x, _mm256_mul_pd(a0, x3)); + result = _mm256_add_pd(result, _mm256_mul_pd(a1, x5)); + result = _mm256_add_pd(result, _mm256_mul_pd(a2, x7)); + + return result; +} + inline __m256d lanczosKernelAVX(const __m256d &x, const __m256d &a_val) { __m256d zero = _mm256_setzero_pd(); __m256d one = _mm256_set1_pd(1.0); __m256d pix = _mm256_mul_pd(_mm256_set1_pd(M_PI), x); - __m256d sin_pix = _mm256_sin_pd(pix); - __m256d sin_pix_a = _mm256_sin_pd(_mm256_div_pd(pix, a_val)); + __m256d sin_pix = sin_pd_approx(pix); + __m256d sin_pix_a = sin_pd_approx(_mm256_div_pd(pix, a_val)); __m256d numerator = _mm256_mul_pd(_mm256_mul_pd(a_val, sin_pix), sin_pix_a); __m256d denominator = _mm256_mul_pd(pix, pix); __m256d result = _mm256_div_pd(numerator, denominator);