diff --git a/kiss_fft.c b/kiss_fft.c index 5a36f2b..1db9cf7 100644 --- a/kiss_fft.c +++ b/kiss_fft.c @@ -17,6 +17,14 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #include #include #include "kiss_fft.h" + +/*try this, you might get a speed improvement*/ +#if 0 +# define FUNCDECL static inline +#else +# define FUNCDECL +#endif + /* * kiss_fft.h * defines kiss_fft_scalar as either short or a float type @@ -27,6 +35,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * }kiss_fft_cpx; */ + typedef struct { int nfft; int inverse; @@ -37,14 +46,25 @@ typedef struct { }kiss_fft_state; #ifdef FIXED_POINT - /* We don't have to worry about overflow from multiplying by twiddle factors since they - * all have unity magnitude. Still need to shift away fractional bits after adding 1/2 for - * rounding. */ # define C_MUL(m,a,b) \ do{ (m).r = ( ( (a).r*(b).r - (a).i*(b).i) + (1<<14) ) >> 15;\ (m).i = ( ( (a).r*(b).i + (a).i*(b).r) + (1<<14) ) >> 15;\ }while(0) -#else // not FIXED_POINT + +# define C_FIXDIV(c,div) \ + do{ (c).r /= div; (c).i /=div; }while(0) + +#define C_MUL_SCALAR(m,s) \ + do{ (m).r = ( (m).r * (s) + (1<<14) ) >> 15;\ + (m).i = ( (m).i * (s) + (1<<14) ) >> 15;\ + }while(0) + +#else /* not FIXED_POINT*/ +#define C_MUL_SCALAR(m,s) \ + do{ (m).r *= (s);\ + (m).i *= (s);\ + }while(0) +# define C_FIXDIV(c,div) /* NOOP */ #define C_MUL(m,a,b) \ do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) @@ -58,8 +78,14 @@ typedef struct { do { (res).r += (a).r; (res).i += (a).i; }while(0) #define C_SUBFROM( res , a)\ do { (res).r -= (a).r; (res).i -= (a).i; }while(0) +#define C_ROTADDTO(sum,c,q) \ + do{ switch (q) {\ + case 0: (sum).r += (c).r; (sum).i += (c).i; break;\ + case 1: (sum).r += (c).i; (sum).i -= (c).r; break;\ + case 2: (sum).r -= (c).r; (sum).i -= (c).i; break;\ + case 3: (sum).r -= (c).i; (sum).i += (c).r; break;\ + } }while(0) -static kiss_fft_cpx cexp(double phase) { kiss_fft_cpx x; @@ -73,16 +99,72 @@ kiss_fft_cpx cexp(double phase) return x; } -#define C_ROTADDTO(sum,c,q) \ - do{ switch (q) {\ - case 0: (sum).r += (c).r; (sum).i += (c).i; break;\ - case 1: (sum).r += (c).i; (sum).i -= (c).r; break;\ - case 2: (sum).r -= (c).r; (sum).i -= (c).i; break;\ - case 3: (sum).r -= (c).i; (sum).i += (c).r; break;\ - } }while(0) +FUNCDECL +void bfly2( + kiss_fft_cpx * Fout, + int fstride, + const kiss_fft_state * st, + int m + ) +{ + kiss_fft_cpx * Fout2; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx t; + Fout2 = Fout + m; + do{ + C_MUL (t, *Fout2 , *twiddles); + twiddles += fstride; + C_FIXDIV(*Fout,2); C_FIXDIV(t,2); + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + }while (--m); +} -static -inline +FUNCDECL +void bfly3( + kiss_fft_cpx * Fout, + int fstride, + const kiss_fft_state * st, + int m + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2; + + int u; + kiss_fft_cpx * scratch = st->scratch; + kiss_fft_cpx * twiddles = st->twiddles; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + + scratch[3] = twiddles[ fstride*m ]; + + for ( u=0; utwiddles; do { -#ifdef FIXED_POINT - Fout->r >>=2; Fout->i >>=2; - Fout1->r >>=2; Fout1->i >>=2; - Fout2->r >>=2; Fout2->i >>=2; - Fout3->r >>=2; Fout3->i >>=2; -#endif + C_FIXDIV(*Fout,4); C_FIXDIV(*Fout1,4); C_FIXDIV(*Fout2,4); C_FIXDIV(*Fout3,4); + C_MUL(t1,*Fout1 , *tw1 ); tw1 += fstride; C_MUL(t2,*Fout2 , *tw2 ); @@ -138,95 +216,8 @@ void bfly4( ++Fout; ++Fout1; ++Fout2; ++Fout3; }while(--m); } - -static -inline -void bfly2( - kiss_fft_cpx * Fout, - int fstride, - const kiss_fft_state * st, - int m - ) -{ - kiss_fft_cpx * Fout2; - kiss_fft_cpx * twiddles = st->twiddles; - kiss_fft_cpx t; - Fout2 = Fout + m; - do{ - C_MUL (t, *Fout2 , *twiddles); - twiddles += fstride; -#ifdef FIXED_POINT - Fout->r>>=1; Fout->i>>=1; - t.r>>=1; t.i>>=1; -#endif - C_SUB( *Fout2 , *Fout , t ); - C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; - }while (--m); -} - -static -inline -void bfly3( - kiss_fft_cpx * Fout, - int fstride, - const kiss_fft_state * st, - int m - ) -{ - kiss_fft_cpx *Fout0,*Fout1,*Fout2; - - int u; - kiss_fft_cpx * scratch = st->scratch; - kiss_fft_cpx * twiddles = st->twiddles; - kiss_fft_cpx t[2]; - kiss_fft_cpx epi3; - epi3 = twiddles[fstride*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - - for ( u=0; ur /= 3; Fout0->i /= 3; - Fout1->r /= 3; Fout1->i /= 3; - Fout2->r /= 3; Fout2->i /= 3; -#endif - scratch[0] = *Fout0; - - C_MUL(t[0],*Fout1 , twiddles[fstride*u ] ); - C_MUL(t[1],*Fout2 , twiddles[fstride*u*2] ); - - C_ADD(t0pt1,t[0],t[1]); - C_ADD(*Fout0,scratch[0],t0pt1); - - t0pt1.r /= -2; - t0pt1.i /= -2; - - C_SUB(t0mt1,t[0],t[1]); - t0mt1.r *= epi3.i; - t0mt1.i *= epi3.i; - - sum23.r = t0pt1.r - t0mt1.i; - sum23.i = t0pt1.i + t0mt1.r; - - C_ADD( *Fout1, scratch[0] , sum23 ); - - sum23.r = t0pt1.r + t0mt1.i; - sum23.i = t0pt1.i - t0mt1.r; - C_ADD( *Fout2, scratch[0] , sum23 ); - - ++Fout0;++Fout1;++Fout2; - } -} - -static -inline +FUNCDECL void bflyp( kiss_fft_cpx * Fout, int fstride, @@ -244,10 +235,7 @@ void bflyp( k=u; for ( q1=0 ; q1

=Norig) twidx-=Norig; C_MUL(t,scratch[q] , twiddles[twidx] ); - Fout[ k ].r += t.r; - Fout[ k ].i += t.i; + C_ADDTO( Fout[ k ] ,t); } k += m; } } } -static inline +FUNCDECL void fft_work( kiss_fft_cpx * Fout, const kiss_fft_cpx * f, @@ -313,10 +300,10 @@ void * kiss_fft_alloc(int nfft,int inverse_fft) kiss_fft_state * st=NULL; allocsize = sizeof(kiss_fft_state) - + sizeof(kiss_fft_cpx)*nfft // twiddle factors - + sizeof(kiss_fft_cpx)*nfft // tmpbuf - + sizeof(int)*nfft // factors - + sizeof(kiss_fft_cpx)*nfft; // scratch + + sizeof(kiss_fft_cpx)*nfft /* twiddle factors*/ + + sizeof(kiss_fft_cpx)*nfft /* tmpbuf*/ + + sizeof(int)*nfft /* factors*/ + + sizeof(kiss_fft_cpx)*nfft; /* scratch*/ st = ( kiss_fft_state *)malloc( allocsize ); if (!st) @@ -324,10 +311,10 @@ void * kiss_fft_alloc(int nfft,int inverse_fft) st->nfft=nfft; st->inverse = inverse_fft; - st->twiddles = (kiss_fft_cpx*)(st+1); // just beyond struct - st->tmpbuf = (kiss_fft_cpx*)(st->twiddles + nfft);// just after twiddles + st->twiddles = (kiss_fft_cpx*)(st+1); /* just beyond struct*/ + st->tmpbuf = (kiss_fft_cpx*)(st->twiddles + nfft);/* just after twiddles*/ st->scratch = (kiss_fft_cpx*)(st->tmpbuf + nfft); - st->factors = (int*)(st->scratch + nfft); // just after tmpbuf + st->factors = (int*)(st->scratch + nfft); /* just after tmpbuf*/ for (i=0;infft; for ( i=0 ; i< nstages ;i+=2 ) { int p; diff --git a/kiss_fft.h b/kiss_fft.h index c601cee..03214e3 100644 --- a/kiss_fft.h +++ b/kiss_fft.h @@ -29,7 +29,7 @@ typedef struct { * Call free() on it when done using it to avoid memory leaks. * */ void* kiss_fft_alloc(int nfft,int inverse_fft); -// free() the state when done using it +/* free() the state when done using it */ /* * kiss_fft(cfg,in_out_buf) @@ -40,9 +40,9 @@ void* kiss_fft_alloc(int nfft,int inverse_fft); * the output will be F[0] , F[1] , ... ,F[nfft-1] * Note that each element is complex. * */ -void kiss_fft( const void* cfg_from_alloc , kiss_fft_cpx *f ); // call for each buffer +void kiss_fft( const void* cfg_from_alloc , kiss_fft_cpx *f ); /* call for each buffer */ -// when done with the cfg for a given fft size and direction, simply free it +/* when done with the cfg for a given fft size and direction, simply free it*/ #define kiss_fft_free free #endif diff --git a/test/Makefile b/test/Makefile index d8588aa..062732a 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,16 +1,19 @@ -NFFT=2048 +NFFT=840 ALLUTILS=kfft kffts kfftd NUMFFTS=10000 UTILSRC=../kiss_fft.c fftutil.c +CFLAGS=-Wall -O3 -lm -I.. -ansi -DFUNCDECL= -pedantic + all: $(ALLUTILS) kfft: $(UTILSRC) - gcc -Wall -O3 -o $@ -lm -I.. $(UTILSRC) + echo $(FUNCDECL) + gcc -o $@ $(CFLAGS) $(UTILSRC) kffts: $(UTILSRC) - gcc -Wall -O3 -o $@ -lm -I.. -DFIXED_POINT $(UTILSRC) + gcc -o $@ $(CFLAGS) -DFIXED_POINT $(UTILSRC) kfftd: $(UTILSRC) - gcc -Wall -O3 -o $@ -lm -I.. -Dkiss_fft_scalar=double $(UTILSRC) + gcc -o $@ $(CFLAGS) -Dkiss_fft_scalar=double $(UTILSRC) time: all @echo diff --git a/tools/Makefile b/tools/Makefile index d8588aa..062732a 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,16 +1,19 @@ -NFFT=2048 +NFFT=840 ALLUTILS=kfft kffts kfftd NUMFFTS=10000 UTILSRC=../kiss_fft.c fftutil.c +CFLAGS=-Wall -O3 -lm -I.. -ansi -DFUNCDECL= -pedantic + all: $(ALLUTILS) kfft: $(UTILSRC) - gcc -Wall -O3 -o $@ -lm -I.. $(UTILSRC) + echo $(FUNCDECL) + gcc -o $@ $(CFLAGS) $(UTILSRC) kffts: $(UTILSRC) - gcc -Wall -O3 -o $@ -lm -I.. -DFIXED_POINT $(UTILSRC) + gcc -o $@ $(CFLAGS) -DFIXED_POINT $(UTILSRC) kfftd: $(UTILSRC) - gcc -Wall -O3 -o $@ -lm -I.. -Dkiss_fft_scalar=double $(UTILSRC) + gcc -o $@ $(CFLAGS) -Dkiss_fft_scalar=double $(UTILSRC) time: all @echo