From 30c4ee30f550f18daa2573671cf69e6b705e9829 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 11 Oct 2003 02:21:48 +0000 Subject: [PATCH] Dog slow, but does mixed radix! 'make test' output : ### testing SNR for 1024 point FFTs #### DOUBLE snr_t2f = 295.52 snr_f2t = 307.98 #### FLOAT snr_t2f = 144.62 snr_f2t = 143.23 #### SHORT snr_t2f = -31.515 snr_f2t = -60.836 #### timing 10000 x 1024 point FFTs #### DOUBLE Elapsed:0:44.17 user:35.11 sys:0.27 #### FLOAT Elapsed:0:24.22 user:19.66 sys:0.16 #### SHORT Elapsed:0:30.39 user:25.07 sys:0.09 --- fft.py | 6 +- kiss_fft.c | 294 ++++++++++++++++------------------------------------- 2 files changed, 94 insertions(+), 206 deletions(-) diff --git a/fft.py b/fft.py index 980dd0f..a119651 100644 --- a/fft.py +++ b/fft.py @@ -15,18 +15,22 @@ def fft(f): else: raise Exception('%s not factorable ' % n) + print 'n=%d,p=%d' % (n,p) + print f,' << fin' m = n/p Fout=[] for q in range(p): # 0,1 fp = f[q::p] + print fp,'<< fp' Fp = fft( fp ) Fout.extend( Fp ) for u in range(m): scratch = Fout[u::m] # u to end in strides of m + print scratch for q1 in range(p): k = q1*m + u # indices to Fout above that became scratch - Fout[ k ] = scratch[0] + Fout[ k ] = scratch[0] # cuz e**0==1 in loop below for q in range(1,p): t = e ** ( j*2*pi*k*q/n ) Fout[ k ] += scratch[q] * t diff --git a/kiss_fft.c b/kiss_fft.c index 0649865..280347d 100644 --- a/kiss_fft.c +++ b/kiss_fft.c @@ -27,66 +27,21 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * }kiss_fft_cpx; */ -typedef struct { - int nr; // N remaining - int radix; // radix of the stage - kiss_fft_cpx * twiddle; -}kiss_fft_stage; - const double pi=3.14159265358979323846264338327; - #define MAX_STAGES 20 typedef struct { int nfft; - int nstages; - int allocsize; - kiss_fft_stage stages[MAX_STAGES]; - int * unscramble_indices; - kiss_fft_cpx * buf1; + int inverse; }kiss_fft_state; - - -#ifdef FIXED_POINT - -# define TWIDDLE_RANGE ( (1<> 1 );\ - (x).i = ( ( (a).i+(b).i +1) >> 1 ); }while(0) -# define C_SUB(x,a,b) \ - do{ (x).r = ( ( (a).r-(b).r +1) >> 1 );\ - (x).i = ( ( (a).i-(b).i +1) >> 1 ); }while(0) - - /* We don't have to worry about overflow from multiplying by twiddle factors since they - * all have unity magnitude. Still need to shift away fractional bits after adding 1/2 for - * rounding. - * */ -# define C_MUL(m,a,b) \ - do{ (m).r = ( ( (a).r*(b).r - (a).i*(b).i) + (TWIDDLE_RANGE>>1) ) >> FIXED_POINT_FRAC_BITS;\ - (m).i = ( ( (a).r*(b).i + (a).i*(b).r) + (TWIDDLE_RANGE>>1) ) >> FIXED_POINT_FRAC_BITS;\ - }while(0) -#else // not FIXED_POINT - #define C_ADD(x,a,b) \ do{ (x).r = (a).r+(b).r;\ (x).i = (a).i+(b).i;}while(0) -#define C_SUB(x,a,b) \ - do{ (x).r = (a).r-(b).r;\ - (x).i = (a).i-(b).i;}while(0) #define C_MUL(m,a,b) \ do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) -#endif - - static kiss_fft_cpx cexp(double phase) { @@ -96,69 +51,6 @@ kiss_fft_cpx cexp(double phase) return x; } -// create the twiddle factors -static -void make_twiddles(kiss_fft_cpx * twiddle,int ntwid,int symmetry,int inverse_fft) -{ - int n; - int denom = ntwid*symmetry; - - for (n=0;n>= 1 ) { - rev = rev*2 + (i&1); - i>>=1; - } - return rev; -} - -// make a list of index swaps that need to be done for bit-reversed addressing -static -int make_bit_reverse_indices(int *swap_indices ,int nfft) -{ - int n,nswap; - nswap=0; - // set up swap indices to unwrap bit-reversed addressing - for ( n=0; nradix * stage->nr); - Ny = stage->nr; - Nx = stage->radix; + int m,p=0,q,q1,u,k; + kiss_fft_cpx t; + double two_pi_divN; - for (ny=0;nynr ; ++n ) - { - C_ADD( csum, *f1 , *f2 ); - C_SUB( cdiff, *f1 , *f2 ); - C_MUL(*f2, cdiff, *twid); - *f1++ = csum; - ++f2; - ++twid; - } -*/ - } - if ( nstages == 1 ) + if (n==1) { + *Fout = *f; return; + } - for (n=0;nradix;++n) { - int offset = n * stage->nr; - fft_work( h2 + offset , h + offset ,stage+1,nstages-1); + two_pi_divN = 2*pi/n; + if (inverse) + two_pi_divN *= -1; + + if (n%2 == 0) p=2; + else if(n%3 == 0) p=3; + else if(n%5 == 0) p=5; + else if(n%7 == 0) p=7; + else { + fprintf(stderr,"%d is not divisible by %d\n",n,p); + p=n; + exit(1); + } + m = n/p; + /* n = stage->n; m = stage->m; p = stage->p; */ + +#ifdef LOUD + printf("n=%d,p=%d\n",n,p); + for (k=0;k1 && (nfft%radix)==0) { - int newsize; - int nstages=st->nstages; - nfft /= radix; - fprintf(stderr,"%d ",radix); - - newsize = st->allocsize + nfft * sizeof(kiss_fft_cpx); - - st=(kiss_fft_state*)realloc(st,newsize); - - st->stages[nstages].radix=radix; - st->stages[nstages].nr=nfft; - st->stages[nstages].twiddle = (kiss_fft_cpx*)((char*)st + st->allocsize); - - make_twiddles( st->stages[nstages].twiddle,nfft,radix,inverse_fft ); - st->allocsize = newsize; - st->nstages = nstages+1; - } - *pnfft = nfft; - return st; -} - -/* +/* * void * kiss_fft_alloc(int nfft,int inverse_fft) - * + * * User-callable function to allocate all necessary scratch space for the fft. * * The return value is a contiguous block of memory, allocated with malloc. As such, @@ -259,41 +161,23 @@ kiss_fft_state * decompose( kiss_fft_state * st, int * pnfft,int radix,int inver void * kiss_fft_alloc(int nfft,int inverse_fft) { kiss_fft_state * st=NULL; - int tmp; - int i; - int allocsize=sizeof(kiss_fft_state) + nfft*( sizeof(int) + sizeof(kiss_fft_cpx) ); - - st = ( kiss_fft_state *)malloc( allocsize ); + st = ( kiss_fft_state *)malloc( sizeof(kiss_fft_state) ); st->nfft=nfft; - st->nstages=0; - st->allocsize=allocsize; - st->unscramble_indices = (int*)(st+1);// just past end of buffer - st->buf1 = (kiss_fft_cpx*)( st->unscramble_indices + nfft); - - for (i=0;iunscramble_indices[i] = i; - - fprintf(stderr,"factoring %d: ",nfft); - - tmp=nfft; - st = decompose( st,&tmp,2,inverse_fft); - fprintf(stderr,"\n"); - - // TODO factor 3,5,7,11 ??? - if ( tmp > 1 ) { - fprintf(stderr,"%d not factorable by 2,3 (%d remaining)\n",nfft,tmp); - free(st); - return NULL; - } + st->inverse = inverse_fft; return st; } void kiss_fft(const void * cfg,kiss_fft_cpx *f) { + int i,n; const kiss_fft_state * st = cfg; - fft_work( f,st->buf1, st->stages , st->nstages ); - if (st->nstages&1) { - memcpy( st->buf1 , f,sizeof(kiss_fft_cpx) * st->nfft ); - } + kiss_fft_cpx tmpbuf[1024]; + kiss_fft_cpx scratch[1024]; + n = st->nfft; + + for (i=0;iinverse, scratch ); }