update benchmark tool to allow multi-dimensional and/or real FFTs

This commit is contained in:
Mark Borgerding 2006-11-14 19:40:58 +00:00
parent 97ce553a94
commit 1922ba0d4e
3 changed files with 98 additions and 59 deletions

View File

@ -48,7 +48,7 @@ endif
FFTWLIBDIR=-L/usr/local/lib/ FFTWLIBDIR=-L/usr/local/lib/
SRCFILES=../kiss_fft.c ../tools/kiss_fftnd.c ../tools/kiss_fftr.c pstats.c ../tools/kfc.c SRCFILES=../kiss_fft.c ../tools/kiss_fftnd.c ../tools/kiss_fftr.c pstats.c ../tools/kfc.c ../tools/kiss_fftndr.c
all: tools $(BENCHKISS) $(SELFTEST) $(BENCHFFTW) $(TESTREAL) $(TESTKFC) all: tools $(BENCHKISS) $(SELFTEST) $(BENCHFFTW) $(TESTREAL) $(TESTKFC)

View File

@ -3,72 +3,117 @@
#include <sys/times.h> #include <sys/times.h>
#include <unistd.h> #include <unistd.h>
#include "kiss_fft.h" #include "kiss_fft.h"
#include "kiss_fftr.h"
#include "kiss_fftnd.h"
#include "kiss_fftndr.h"
#include "pstats.h" #include "pstats.h"
static
int getdims(int * dims, char * arg)
{
char *s;
int ndims=0;
while ( (s=strtok( arg , ",") ) ) {
dims[ndims++] = atoi(s);
//printf("%s=%d\n",s,dims[ndims-1]);
arg=NULL;
}
return ndims;
}
int main(int argc,char ** argv) int main(int argc,char ** argv)
{ {
int nfft=1024; int k;
int nfft[32];
int ndims = 1;
int isinverse=0; int isinverse=0;
int numffts=1000,i; int numffts=1000,i;
kiss_fft_cpx * buf; kiss_fft_cpx * buf;
kiss_fft_cpx * bufout; kiss_fft_cpx * bufout;
kiss_fft_cfg st; int real = 0;
nfft[0] = 1024;// default
while (1) { while (1) {
int c = getopt (argc, argv, "n:ix:"); int c = getopt (argc, argv, "n:ix:r");
if (c == -1) if (c == -1)
break; break;
switch (c) { switch (c) {
case 'n': case 'r':
nfft = atoi (optarg); real = 1;
if (nfft != kiss_fft_next_fast_size(nfft) ) { break;
int ng = kiss_fft_next_fast_size(nfft); case 'n':
fprintf(stderr,"warning: %d might be a better choice for speed than %d\n",ng,nfft); ndims = getdims(nfft, optarg );
if (nfft[0] != kiss_fft_next_fast_size(nfft[0]) ) {
int ng = kiss_fft_next_fast_size(nfft[0]);
fprintf(stderr,"warning: %d might be a better choice for speed than %d\n",ng,nfft[0]);
}
break;
case 'x':
numffts = atoi (optarg);
break;
case 'i':
isinverse = 1;
break;
} }
break;
case 'x':
numffts = atoi (optarg);
break;
case 'i':
isinverse = 1;
break;
}
} }
#ifdef USE_SIMD int nbytes = sizeof(kiss_fft_cpx);
buf=(kiss_fft_cpx*)memalign(sizeof(kiss_fft_cpx),sizeof(kiss_fft_cpx) * nfft); for (k=0;k<ndims;++k)
bufout=(kiss_fft_cpx*)memalign(sizeof(kiss_fft_cpx),sizeof(kiss_fft_cpx) * nfft); nbytes *= nfft[k];
#ifdef USE_SIMD
numffts /= 4; numffts /= 4;
nbytes /= 4;
fprintf(stderr,"since SIMD implementation does 4 ffts at a time, numffts is being reduced to %d\n",numffts); fprintf(stderr,"since SIMD implementation does 4 ffts at a time, numffts is being reduced to %d\n",numffts);
#else
buf=(kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx) * nfft);
bufout=(kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx) * nfft);
#endif #endif
for (i=0;i<nfft;++i ) { buf=(kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes);
#ifdef USE_SIMD bufout=(kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes);
buf[i].r = _mm_set_ps1((float)( rand() - RAND_MAX/2)); memset(buf,0,nbytes);
buf[i].i = _mm_set_ps1((float)( rand() - RAND_MAX/2));
#else
buf[i].r = rand() - RAND_MAX/2;
buf[i].i = rand() - RAND_MAX/2;
#endif
}
pstats_init(); pstats_init();
st = kiss_fft_alloc( nfft ,isinverse ,0,0); if (ndims==1) {
if (real) {
for (i=0;i<numffts;++i) kiss_fftr_cfg st = kiss_fftr_alloc( nfft[0] ,isinverse ,0,0);
kiss_fft( st ,buf,bufout ); if (isinverse)
for (i=0;i<numffts;++i)
free(st); kiss_fftri( st ,(kiss_fft_cpx*)buf,(kiss_fft_scalar*)bufout );
else
for (i=0;i<numffts;++i)
kiss_fftr( st ,(kiss_fft_scalar*)buf,(kiss_fft_cpx*)bufout );
free(st);
}else{
kiss_fft_cfg st = kiss_fft_alloc( nfft[0] ,isinverse ,0,0);
for (i=0;i<numffts;++i)
kiss_fft( st ,buf,bufout );
free(st);
}
}else{
if (real) {
kiss_fftndr_cfg st = kiss_fftndr_alloc( nfft,ndims ,isinverse ,0,0);
if (isinverse)
for (i=0;i<numffts;++i)
kiss_fftndri( st ,(kiss_fft_cpx*)buf,(kiss_fft_scalar*)bufout );
else
for (i=0;i<numffts;++i)
kiss_fftndr( st ,(kiss_fft_scalar*)buf,(kiss_fft_cpx*)bufout );
free(st);
}else{
kiss_fftnd_cfg st= kiss_fftnd_alloc(nfft,ndims,isinverse ,0,0);
for (i=0;i<numffts;++i)
kiss_fftnd( st ,buf,bufout );
free(st);
}
}
free(buf); free(bufout); free(buf); free(bufout);
fprintf(stderr,"KISS\tnfft=%d\tnumffts=%d\n" ,nfft,numffts); fprintf(stderr,"KISS\tnfft=");
for (k=0;k<ndims;++k)
fprintf(stderr, "%d,",nfft[k]);
fprintf(stderr,"\tnumffts=%d\n" ,numffts);
pstats_report(); pstats_report();
kiss_fft_cleanup(); kiss_fft_cleanup();

View File

@ -73,27 +73,24 @@ void kiss_fftndr(kiss_fftndr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx
int k1,k2; int k1,k2;
int dimReal = st->dimReal; int dimReal = st->dimReal;
int dimOther = st->dimOther; int dimOther = st->dimOther;
int nrbins = dimReal/2+1;
kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf; kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf;
kiss_fft_cpx * tmp2; kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther);
if (dimReal/2+1 > dimOther)
tmp2 = tmp1+dimReal/2+1;
else
tmp2 = tmp1+dimOther;
// timedata is N0 x N1 x ... x Nk real // timedata is N0 x N1 x ... x Nk real
// take a real chunk of data, fft it and place the output at correct intervals // take a real chunk of data, fft it and place the output at correct intervals
for (k1=0;k1<dimOther;++k1) { for (k1=0;k1<dimOther;++k1) {
kiss_fftr( st->cfg_r, timedata + k1*dimReal , tmp1 ); // tmp1 now holds dimReal/2+1 complex points kiss_fftr( st->cfg_r, timedata + k1*dimReal , tmp1 ); // tmp1 now holds nrbins complex points
for (k2=0;k2<dimReal/2+1;++k2) for (k2=0;k2<nrbins;++k2)
tmp2[ k2*dimOther+k1 ] = tmp1[k2]; tmp2[ k2*dimOther+k1 ] = tmp1[k2];
} }
for (k2=0;k2<dimReal/2+1;++k2) { for (k2=0;k2<nrbins;++k2) {
kiss_fftnd(st->cfg_nd, tmp2+k2*dimOther, tmp1); // tmp1 now holds dimOther complex points kiss_fftnd(st->cfg_nd, tmp2+k2*dimOther, tmp1); // tmp1 now holds dimOther complex points
for (k1=0;k1<dimOther;++k1) for (k1=0;k1<dimOther;++k1)
freqdata[ k1*(dimReal/2+1) + k2] = tmp1[k1]; freqdata[ k1*(nrbins) + k2] = tmp1[k1];
} }
} }
@ -102,21 +99,18 @@ void kiss_fftndri(kiss_fftndr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scala
int k1,k2; int k1,k2;
int dimReal = st->dimReal; int dimReal = st->dimReal;
int dimOther = st->dimOther; int dimOther = st->dimOther;
int nrbins = dimReal/2+1;
kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf; kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf;
kiss_fft_cpx * tmp2; kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther);
if (dimReal/2+1 > dimOther)
tmp2 = tmp1+dimReal/2+1;
else
tmp2 = tmp1+dimOther;
for (k2=0;k2<dimReal/2+1;++k2) { for (k2=0;k2<nrbins;++k2) {
for (k1=0;k1<dimOther;++k1) for (k1=0;k1<dimOther;++k1)
tmp1[k1] = freqdata[ k1*(dimReal/2+1) + k2 ]; tmp1[k1] = freqdata[ k1*(nrbins) + k2 ];
kiss_fftnd(st->cfg_nd, tmp1, tmp2+k2*dimOther); kiss_fftnd(st->cfg_nd, tmp1, tmp2+k2*dimOther);
} }
for (k1=0;k1<dimOther;++k1) { for (k1=0;k1<dimOther;++k1) {
for (k2=0;k2<dimReal/2+1;++k2) for (k2=0;k2<nrbins;++k2)
tmp1[k2] = tmp2[ k2*dimOther+k1 ]; tmp1[k2] = tmp2[ k2*dimOther+k1 ];
kiss_fftri( st->cfg_r,tmp1,timedata + k1*dimReal); kiss_fftri( st->cfg_r,tmp1,timedata + k1*dimReal);
} }