From c94458d8f2f860a1c1178c3c13351777e3e3918a Mon Sep 17 00:00:00 2001 From: yuanhecai Date: Tue, 26 Aug 2025 14:21:45 +0800 Subject: [PATCH] loongarch: add lsx simd opt --- CMakeLists.txt | 28 ++++++++++++++++++++++++++-- Makefile | 27 ++++++++++++++++++++++++--- _kiss_fft_guts.h | 10 ++++++++++ kiss_fft.h | 15 ++++++++++++++- kiss_fftr.c | 10 ++++++++++ test/CMakeLists.txt | 8 +++++++- test/Makefile | 4 ++++ test/test_real.c | 5 +++++ test/test_simd.c | 14 ++++++++++++++ test/twotonetest.c | 6 ++++++ tools/kiss_fastfir.c | 15 +++++++++++++++ 11 files changed, 135 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0213798..8f120a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,6 +109,21 @@ if(CMAKE_SYSTEM_NAME MATCHES "^(Linux|kFreeBSD|GNU)$" AND NOT CMAKE_CROSSCOMPILI include(GNUInstallDirs) endif() +include(CheckCXXSourceCompiles) + +if (CMAKE_SYSTEM_PROCESSOR MATCHES "loongarch32|loongarch64") + set (CMAKE_REQUIRED_FLAGS -mlsx) + check_cxx_source_compiles( + "int main() { + #if !defined(__loongarch_sx) + static_assert(false, \"__loongarch_sx is not defined\"); + #endif + return 0; + }" + COMPILER_SUPPORT_LSX_FLAG + ) +endif() + # # Declare PKGINCLUDEDIR for kissfft include path # @@ -167,7 +182,10 @@ else() if(KISSFFT_DATATYPE MATCHES "^simd$") list(APPEND KISSFFT_COMPILE_DEFINITIONS USE_SIMD) - if (NOT MSVC) + if(COMPILER_SUPPORT_LSX_FLAG) + list(APPEND KISSFFT_COMPILE_DEFINITIONS HAVE_LSX) + target_compile_options(kissfft PRIVATE -mlsx) + elseif(NOT MSVC) target_compile_options(kissfft PRIVATE -msse) else() target_compile_options(kissfft PRIVATE "/arch:SSE") @@ -262,6 +280,10 @@ function(add_kissfft_executable NAME) target_link_libraries(${NAME} PRIVATE m) endif() + if(COMPILER_SUPPORT_LSX_FLAG) + target_compile_options(${NAME} PRIVATE -mlsx) + endif() + if (NOT KISSFFT_OPENMP) set_target_properties(${NAME} PROPERTIES OUTPUT_NAME "${NAME}-${KISSFFT_DATATYPE}") @@ -329,7 +351,9 @@ if (KISSFFT_PKGCONFIG) join_paths(PKGCONFIG_KISSFFT_INCLUDEDIR "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}") if(KISSFFT_DATATYPE MATCHES "^simd$") list(APPEND KISSFFT_COMPILE_DEFINITIONS USE_SIMD) - if (NOT MSVC) + if(COMPILER_SUPPORT_LSX_FLAG) + set(PKG_KISSFFT_DEFS "${PKG_KISSFFT_DEFS} -mlsx") + elseif(NOT MSVC) set(PKG_KISSFFT_DEFS "${PKG_KISSFFT_DEFS} -msse") else() set(PKG_KISSFFT_DEFS "${PKG_KISSFFT_DEFS} /ARCH:SSE") diff --git a/Makefile b/Makefile index 17101f9..93f540f 100644 --- a/Makefile +++ b/Makefile @@ -72,6 +72,11 @@ ifneq ($(MAKECMDGOALS),clean) ifeq ($(_UNAME_ARCH),x86_64) CANDIDATE_LIBDIR_NAME = lib64 endif + ifeq ($(_UNAME_ARCH),loongarch64) + CANDIDATE_LIBDIR_NAME = lib64 + HAVE_LSX=lsx + export HAVE_LSX + endif endif endif @@ -127,7 +132,11 @@ ifeq "$(KISSFFT_DATATYPE)" "int32_t" else ifeq "$(KISSFFT_DATATYPE)" "int16_t" TYPEFLAGS += -DFIXED_POINT=16 else ifeq "$(KISSFFT_DATATYPE)" "simd" + ifeq "$(HAVE_LSX)" "lsx" + TYPEFLAGS += -DUSE_SIMD=1 -DHAVE_LSX=1 -mlsx + else TYPEFLAGS += -DUSE_SIMD=1 -msse + endif else ifeq "$(KISSFFT_DATATYPE)" "float" TYPEFLAGS += -Dkiss_fft_scalar=$(KISSFFT_DATATYPE) else ifeq "$(KISSFFT_DATATYPE)" "double" @@ -273,21 +282,33 @@ testall: $(MAKE) KISSFFT_DATATYPE=int16_t testsingle # The simd and int32_t types may or may not work on your machine $(MAKE) KISSFFT_DATATYPE=int32_t testsingle - $(MAKE) KISSFFT_DATATYPE=simd testsingle + @if [ "$(HAVE_LSX)" = "lsx" ]; then \ + $(MAKE) KISSFFT_DATATYPE=simd HAVE_LSX=lsx testsingle; \ + else \ + $(MAKE) KISSFFT_DATATYPE=simd testsingle; \ + fi # Static libraries $(MAKE) KISSFFT_DATATYPE=double KISSFFT_STATIC=1 testsingle $(MAKE) KISSFFT_DATATYPE=float KISSFFT_STATIC=1 testsingle $(MAKE) KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 testsingle # The simd and int32_t types may or may not work on your machine $(MAKE) KISSFFT_DATATYPE=int32_t KISSFFT_STATIC=1 testsingle - $(MAKE) KISSFFT_DATATYPE=simd KISSFFT_STATIC=1 testsingle + @if [ "$(HAVE_LSX)" = "lsx" ]; then \ + $(MAKE) KISSFFT_DATATYPE=simd HAVE_LSX=lsx KISSFFT_STATIC=1 testsingle; \ + else \ + $(MAKE) KISSFFT_DATATYPE=simd KISSFFT_STATIC=1 testsingle; \ + fi # OpenMP libraries $(MAKE) KISSFFT_DATATYPE=double KISSFFT_OPENMP=1 testsingle $(MAKE) KISSFFT_DATATYPE=float KISSFFT_OPENMP=1 testsingle $(MAKE) KISSFFT_DATATYPE=int16_t KISSFFT_OPENMP=1 testsingle # The simd and int32_t types may or may not work on your machine $(MAKE) KISSFFT_DATATYPE=int32_t KISSFFT_OPENMP=1 testsingle - $(MAKE) KISSFFT_DATATYPE=simd KISSFFT_OPENMP=1 testsingle + @if [ "$(HAVE_LSX)" = "lsx" ]; then \ + $(MAKE) KISSFFT_DATATYPE=simd HAVE_LSX=lsx KISSFFT_OPENMP=1 testsingle; \ + else \ + $(MAKE) KISSFFT_DATATYPE=simd KISSFFT_OPENMP=1 testsingle; \ + fi $(warning All tests passed!) # diff --git a/_kiss_fft_guts.h b/_kiss_fft_guts.h index 4bd8d1c..4f18089 100644 --- a/_kiss_fft_guts.h +++ b/_kiss_fft_guts.h @@ -128,6 +128,16 @@ struct kiss_fft_state{ # define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase)) # define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase)) # define HALF_OF(x) ((x)>>1) +#elif defined(USE_SIMD) && defined(HAVE_LSX) +#define KISS_FFT_COS(phase) ({ \ + float __cos_val = cosf(phase); \ + (__m128)(__lsx_vldrepl_w(&__cos_val, 0)); \ +}) +#define KISS_FFT_SIN(phase) ({ \ + float __sin_val = sinf(phase); \ + (__m128)(__lsx_vldrepl_w(&__sin_val, 0)); \ +}) +#define HALF_OF(x) ((x) * (__m128)(__lsx_vreplgr2vr_w(0x3F000000))) // 0.5f #elif defined(USE_SIMD) # define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) # define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) diff --git a/kiss_fft.h b/kiss_fft.h index dce1034..dc54844 100644 --- a/kiss_fft.h +++ b/kiss_fft.h @@ -48,16 +48,29 @@ extern "C" { /* User may override KISS_FFT_MALLOC and/or KISS_FFT_FREE. */ #ifdef USE_SIMD +#ifdef HAVE_LSX +# include +# define kiss_fft_scalar __m128 +# ifndef KISS_FFT_MALLOC +# define KISS_FFT_MALLOC(nbytes) aligned_alloc(16, KISS_FFT_ALIGN_SIZE_UP(nbytes)) +# define KISS_FFT_ALIGN_CHECK(ptr) +# define KISS_FFT_ALIGN_SIZE_UP(size) ((size + 15UL) & ~0xFUL) +# endif +# ifndef KISS_FFT_FREE +# define KISS_FFT_FREE free +# endif +#else # include # define kiss_fft_scalar __m128 # ifndef KISS_FFT_MALLOC # define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16) -# define KISS_FFT_ALIGN_CHECK(ptr) +# define KISS_FFT_ALIGN_CHECK(ptr) # define KISS_FFT_ALIGN_SIZE_UP(size) ((size + 15UL) & ~0xFUL) # endif # ifndef KISS_FFT_FREE # define KISS_FFT_FREE _mm_free # endif +#endif #else # define KISS_FFT_ALIGN_CHECK(ptr) # define KISS_FFT_ALIGN_SIZE_UP(size) (size) diff --git a/kiss_fftr.c b/kiss_fftr.c index 778a9a6..6214a75 100644 --- a/kiss_fftr.c +++ b/kiss_fftr.c @@ -93,7 +93,12 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr freqdata[0].r = tdc.r + tdc.i; freqdata[ncfft].r = tdc.r - tdc.i; #ifdef USE_SIMD +#ifdef HAVE_LSX + freqdata[0].i = (__m128)(__lsx_vreplgr2vr_w(0)); + freqdata[ncfft].i = freqdata[0].i; +#else freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); +#endif #else freqdata[ncfft].i = freqdata[0].i = 0; #endif @@ -146,7 +151,12 @@ void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *t C_ADD (st->tmpbuf[k], fek, fok); C_SUB (st->tmpbuf[ncfft - k], fek, fok); #ifdef USE_SIMD +#ifdef HAVE_LSX + __m128 neg_one = (__m128)__lsx_vreplgr2vr_w(0xBF800000); // -1.0f + st->tmpbuf[ncfft - k].i = __lsx_vfmul_s(st->tmpbuf[ncfft - k].i, neg_one); +#else st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); +#endif #else st->tmpbuf[ncfft - k].i *= -1; #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0a0e403..1e89f38 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,6 +2,10 @@ function(add_kissfft_test_executable NAME) add_kissfft_executable(${NAME} ${ARGN}) target_include_directories(${NAME} PRIVATE ..) + if(COMPILER_SUPPORT_LSX_FLAG) + target_compile_options(${NAME} PRIVATE -mlsx) + endif() + add_test(NAME ${NAME} COMMAND ${NAME}) set_tests_properties(${NAME} PROPERTIES TIMEOUT 3600) endfunction() @@ -46,7 +50,9 @@ add_kissfft_test_executable(testcpp testcpp.cc) if(KISSFFT_DATATYPE MATCHES "^simd$") add_kissfft_test_executable(tsimd test_simd.c) target_compile_definitions(tsimd PRIVATE USE_SIMD) - if (NOT MSVC) + if(COMPILER_SUPPORT_LSX_FLAG) + target_compile_options(kissfft PRIVATE -mlsx) + elseif(NOT MSVC) target_compile_options(kissfft PRIVATE -msse) else() target_compile_options(kissfft PRIVATE "/arch:SSE") diff --git a/test/Makefile b/test/Makefile index 1c5b87c..5c3449d 100644 --- a/test/Makefile +++ b/test/Makefile @@ -143,7 +143,11 @@ endif $(TESTSIMD): test_simd.c ifeq "$(KISSFFT_DATATYPE)" "simd" +ifeq "$(HAVE_LSX)" "lsx" + $(CC) -o $@ -g $(CFLAGS) -DUSE_SIMD=1 -DHAVE_LSX=1 -mlsx $< -L.. -l$(KISSFFTLIB_SHORTNAME) -lm +else $(CC) -o $@ -g $(CFLAGS) -DUSE_SIMD=1 -msse $< -L.. -l$(KISSFFTLIB_SHORTNAME) -lm +endif else $(error ERROR: This test makes sense only with KISSFFT_DATATYPE=simd) endif diff --git a/test/test_real.c b/test/test_real.c index 9e4bd58..9412305 100644 --- a/test/test_real.c +++ b/test/test_real.c @@ -22,7 +22,12 @@ static kiss_fft_scalar rand_scalar(void) { #ifdef USE_SIMD +#ifdef HAVE_LSX + float tmp = rand()-RAND_MAX/2; + return (__m128)(__lsx_vldrepl_w(&tmp, 0)); +#else return _mm_set1_ps(rand()-RAND_MAX/2); +#endif #else kiss_fft_scalar s = (kiss_fft_scalar)(rand() -RAND_MAX/2); return s/2; diff --git a/test/test_simd.c b/test/test_simd.c index d9c6790..bb8ffd3 100644 --- a/test/test_simd.c +++ b/test/test_simd.c @@ -6,16 +6,30 @@ static void test1(void) int n[2] = {256,256}; size_t nbytes = sizeof(kiss_fft_cpx)*n[0]*n[1]; +#ifdef HAVE_LSX + kiss_fft_cpx * inbuf = NULL; + kiss_fft_cpx * outbuf = NULL; + if (posix_memalign((void**)&inbuf, 16, nbytes) || + posix_memalign((void**)&outbuf, 16, nbytes)) + return; +#else kiss_fft_cpx * inbuf = _mm_malloc(nbytes,16); kiss_fft_cpx * outbuf = _mm_malloc(nbytes,16); +#endif memset(inbuf,0,nbytes); memset(outbuf,0,nbytes); kiss_fftnd_cfg cfg = kiss_fftnd_alloc(n,2,is_inverse,0,0); kiss_fftnd(cfg,inbuf,outbuf); kiss_fft_free(cfg); + +#ifdef HAVE_LSX + free(inbuf); + free(outbuf); +#else _mm_free(inbuf); _mm_free(outbuf); +#endif } int main(void) diff --git a/test/twotonetest.c b/test/twotonetest.c index 5f08daf..4f896bb 100644 --- a/test/twotonetest.c +++ b/test/twotonetest.c @@ -38,8 +38,14 @@ double two_tone_test( int nfft, int bin1,int bin2) /* generate a signal with two tones*/ for (i = 0; i < nfft; i++) { #ifdef USE_SIMD +#ifdef HAVE_LSX + float tmp = (maxrange>>1)*cos(f1*i) + (maxrange>>1)*cos(f2*i); + tbuf[i] = (__m128)__lsx_vldrepl_w(&tmp, 0); + +#else tbuf[i] = _mm_set1_ps( (maxrange>>1)*cos(f1*i) + (maxrange>>1)*cos(f2*i) ); +#endif #else tbuf[i] = (maxrange>>1)*cos(f1*i) + (maxrange>>1)*cos(f2*i); diff --git a/tools/kiss_fastfir.c b/tools/kiss_fastfir.c index d4e666c..6967589 100644 --- a/tools/kiss_fastfir.c +++ b/tools/kiss_fastfir.c @@ -154,8 +154,14 @@ kiss_fastfir_cfg kiss_fastfir_alloc( for ( i=0; i < st->n_freq_bins; ++i ) { #ifdef USE_SIMD +#ifdef HAVE_LSX + __m128 tmp = (__m128)__lsx_vldrepl_w(&scale, 0); + st->fir_freq_resp[i].r = __lsx_vfmul_s(tmp, st->fir_freq_resp[i].r); + st->fir_freq_resp[i].i = __lsx_vfmul_s(tmp, st->fir_freq_resp[i].i); +#else st->fir_freq_resp[i].r *= _mm_set1_ps(scale); st->fir_freq_resp[i].i *= _mm_set1_ps(scale); +#endif #else st->fir_freq_resp[i].r *= scale; st->fir_freq_resp[i].i *= scale; @@ -286,7 +292,11 @@ void direct_file_filter( tmph = imp_resp+nlag; #ifdef REAL_FASTFIR # ifdef USE_SIMD +# ifdef HAVE_LSX + outval = (__m128)(__lsx_vreplgr2vr_w(0)); +#else outval = _mm_set1_ps(0); +#endif #else outval = 0; #endif @@ -297,7 +307,12 @@ void direct_file_filter( outval += buf[k] * *tmph; #else # ifdef USE_SIMD +# ifdef HAVE_LSX + outval.i = (__m128)(__lsx_vreplgr2vr_w(0)); + outval.r = outval.i; +#else outval.r = outval.i = _mm_set1_ps(0); +#endif #else outval.r = outval.i = 0; #endif