added simd capability

This commit is contained in:
Mark Borgerding
2005-06-24 03:03:31 +00:00
parent 24be1c5850
commit 7f6cbeab2e
10 changed files with 112 additions and 9 deletions

View File

@ -23,6 +23,7 @@ TESTKFC=tkfc_$(DATATYPE)
FASTFILTREAL=ffr_$(DATATYPE)
SELFTESTSRC=twotonetest.c
TYPEFLAGS=-Dkiss_fft_scalar=$(DATATYPE)
ifeq "$(DATATYPE)" "short"
@ -31,7 +32,12 @@ endif
ifeq "$(DATATYPE)" "long"
TYPEFLAGS=-DFIXED_POINT=32
endif
endif
ifeq "$(DATATYPE)" "simd"
TYPEFLAGS=-DUSE_SIMD=1 -msse
endif
ifeq "$(DATATYPE)" "float"
# fftw needs to be built with --enable-float to build this lib
@ -61,6 +67,9 @@ $(TESTKFC): $(SRCFILES)
$(TESTREAL): test_real.c $(SRCFILES)
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
bm_simd: benchkiss.c ../kiss_fft.c pstats.c ../tools/kfc.c
$(CC) -o $@ $(CFLAGS) -DUSE_SIMD -msse -m3dnow -lm $+
$(BENCHKISS): benchkiss.c $(SRCFILES)
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+

View File

@ -6,6 +6,8 @@
#include "pstats.h"
#define CHK fprintf(stderr,"line %d\n" , __LINE__ )
int main(int argc,char ** argv)
{
int nfft=1024;
@ -31,20 +33,41 @@ int main(int argc,char ** argv)
break;
}
}
CHK;
#ifdef USE_SIMD
buf=(kiss_fft_cpx*)memalign(sizeof(kiss_fft_cpx),sizeof(kiss_fft_cpx) * nfft);
bufout=(kiss_fft_cpx*)memalign(sizeof(kiss_fft_cpx),sizeof(kiss_fft_cpx) * nfft);
numffts /= 4;
fprintf(stderr,"since SIMD implementation does 4 ffts at a time, numffts is being reduced to %d\n",numffts);
#else
buf=(kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx) * nfft);
bufout=(kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx) * nfft);
#endif
fprintf(stderr,"buf at %p, bufout at %p\n",buf,bufout);
CHK;
for (i=0;i<nfft;++i ) {
#ifdef USE_SIMD
buf[i].r = _mm_set_ps1((float)( rand() - RAND_MAX/2));
buf[i].i = _mm_set_ps1((float)( rand() - RAND_MAX/2));
#else
buf[i].r = rand() - RAND_MAX/2;
buf[i].i = rand() - RAND_MAX/2;
#endif
}
CHK;
pstats_init();
CHK;
st = kiss_fft_alloc( nfft ,isinverse ,0,0);
CHK;
for (i=0;i<numffts;++i)
kiss_fft( st ,buf,bufout );
CHK;
free(st);

View File

@ -5,8 +5,6 @@
#include "kiss_fftr.h"
#include <limits.h>
#define pcpx(c)\
fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
static
double two_tone_test( int nfft, int bin1,int bin2)
@ -27,20 +25,25 @@ double two_tone_test( int nfft, int bin1,int bin2)
#endif
cfg = kiss_fftr_alloc(nfft , 0, NULL, NULL);
tbuf = malloc(nfft * sizeof(kiss_fft_scalar));
kout = malloc(nfft * sizeof(kiss_fft_cpx));
tbuf = memalign(sizeof(kiss_fft_scalar),nfft * sizeof(kiss_fft_scalar));
kout = memalign(sizeof(kiss_fft_scalar),nfft * sizeof(kiss_fft_cpx));
/* generate a signal with two tones*/
for (i = 0; i < nfft; i++) {
#ifdef USE_SIMD
tbuf[i] = _mm_set1_ps( (maxrange>>1)*cos(f1*i)
+ (maxrange>>1)*cos(f2*i) );
#else
tbuf[i] = (maxrange>>1)*cos(f1*i)
+ (maxrange>>1)*cos(f2*i);
#endif
}
kiss_fftr(cfg, tbuf, kout);
for (i=0;i < (nfft/2+1);++i) {
double tmpr = (double)kout[i].r / (double)maxrange;
double tmpi = (double)kout[i].i / (double)maxrange;
double tmpr = (double)*(float*)&kout[i].r / (double)maxrange;
double tmpi = (double)*(float*)&kout[i].i / (double)maxrange;
double mag2 = tmpr*tmpr + tmpi*tmpi;
if (i!=0 && i!= nfft/2)
mag2 *= 2; /* all bins except DC and Nyquist have symmetric counterparts implied*/