loongarch: add lasx simd opt

This commit is contained in:
yuanhecai
2025-08-27 11:17:54 +08:00
parent c94458d8f2
commit eeb2e98ff6
12 changed files with 99 additions and 42 deletions

View File

@@ -2,7 +2,11 @@ function(add_kissfft_test_executable NAME)
add_kissfft_executable(${NAME} ${ARGN})
target_include_directories(${NAME} PRIVATE ..)
if(COMPILER_SUPPORT_LSX_FLAG)
if(HAVE_LASX)
target_compile_options(${NAME} PRIVATE -mlasx)
endif()
if(HAVE_LSX)
target_compile_options(${NAME} PRIVATE -mlsx)
endif()
@@ -50,7 +54,9 @@ add_kissfft_test_executable(testcpp testcpp.cc)
if(KISSFFT_DATATYPE MATCHES "^simd$")
add_kissfft_test_executable(tsimd test_simd.c)
target_compile_definitions(tsimd PRIVATE USE_SIMD)
if(COMPILER_SUPPORT_LSX_FLAG)
if(HAVE_LASX)
target_compile_options(kissfft PRIVATE -mlasx)
elseif(HAVE_LSX)
target_compile_options(kissfft PRIVATE -mlsx)
elseif(NOT MSVC)
target_compile_options(kissfft PRIVATE -msse)

View File

@@ -143,7 +143,9 @@ endif
$(TESTSIMD): test_simd.c
ifeq "$(KISSFFT_DATATYPE)" "simd"
ifeq "$(HAVE_LSX)" "lsx"
ifeq "$(HAVE_LASX)" "lasx"
$(CC) -o $@ -g $(CFLAGS) -DUSE_SIMD=1 -DHAVE_LASX=1 -mlasx $< -L.. -l$(KISSFFTLIB_SHORTNAME) -lm
else ifeq "$(HAVE_LSX)" "lsx"
$(CC) -o $@ -g $(CFLAGS) -DUSE_SIMD=1 -DHAVE_LSX=1 -mlsx $< -L.. -l$(KISSFFTLIB_SHORTNAME) -lm
else
$(CC) -o $@ -g $(CFLAGS) -DUSE_SIMD=1 -msse $< -L.. -l$(KISSFFTLIB_SHORTNAME) -lm

View File

@@ -70,8 +70,13 @@ int main(int argc,char ** argv)
nbytes *= nfft[k];
#ifdef USE_SIMD
#ifdef HAVE_LASX
numffts /= 8;
fprintf(stderr,"since SIMD implementation does 8 ffts at a time, numffts is being reduced to %d\n",numffts);
#else
numffts /= 4;
fprintf(stderr,"since SIMD implementation does 4 ffts at a time, numffts is being reduced to %d\n",numffts);
#endif
#endif
buf=(kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes);

View File

@@ -22,7 +22,10 @@ static
kiss_fft_scalar rand_scalar(void)
{
#ifdef USE_SIMD
#ifdef HAVE_LSX
#ifdef HAVE_LASX
float tmp = rand()-RAND_MAX/2;
return (__m256)(__lasx_xvldrepl_w(&tmp, 0));
#elif defined(HAVE_LSX)
float tmp = rand()-RAND_MAX/2;
return (__m128)(__lsx_vldrepl_w(&tmp, 0));
#else

View File

@@ -6,7 +6,7 @@ static void test1(void)
int n[2] = {256,256};
size_t nbytes = sizeof(kiss_fft_cpx)*n[0]*n[1];
#ifdef HAVE_LSX
#if defined(HAVE_LSX) || defined(HAVE_LASX)
kiss_fft_cpx * inbuf = NULL;
kiss_fft_cpx * outbuf = NULL;
if (posix_memalign((void**)&inbuf, 16, nbytes) ||
@@ -23,7 +23,7 @@ static void test1(void)
kiss_fftnd(cfg,inbuf,outbuf);
kiss_fft_free(cfg);
#ifdef HAVE_LSX
#if defined(HAVE_LSX) || defined(HAVE_LASX)
free(inbuf);
free(outbuf);
#else

View File

@@ -38,10 +38,12 @@ double two_tone_test( int nfft, int bin1,int bin2)
/* generate a signal with two tones*/
for (i = 0; i < nfft; i++) {
#ifdef USE_SIMD
#ifdef HAVE_LSX
#ifdef HAVE_LASX
float tmp = (maxrange>>1)*cos(f1*i) + (maxrange>>1)*cos(f2*i);
tbuf[i] = (__m256)__lasx_xvldrepl_w(&tmp, 0);
#elif defined(HAVE_LSX)
float tmp = (maxrange>>1)*cos(f1*i) + (maxrange>>1)*cos(f2*i);
tbuf[i] = (__m128)__lsx_vldrepl_w(&tmp, 0);
#else
tbuf[i] = _mm_set1_ps( (maxrange>>1)*cos(f1*i)
+ (maxrange>>1)*cos(f2*i) );