diff --git a/README b/README index c6b7381..2660033 100644 --- a/README +++ b/README @@ -17,13 +17,14 @@ USAGE: ... // transformed free(cfg); - Note: frequency-domain data is stored from dc to 2pi. + Note: frequency-domain data is stored from dc up to 2pi. so cx_out[0] is the dc bin of the FFT - and cx_out[nfft/2] is the Nyquist bin (if present) + and cx_out[nfft/2] is the Nyquist bin (if even length FFT) Declarations are in "kiss_fft.h", along with a brief description of the functions you'll need to use. Code definitions for 1d complex FFTs are in kiss_fft.c. -with sample usage code. For more functionality, like 2d FFTs you may need to add other source files to your project. +with sample usage code. For more functionality, like 2d FFTs you may need to add +other source files to your project. The code can be compiled to use float, double or 16bit short samples. The default is float. @@ -41,8 +42,7 @@ a well respected and highly optimized fft library. I don't want to criticize this great library, so let's call it FFT_BRANDX. During this process, I learned: - 1. FFT_BRANDX has 500 times as many lines of code as Kiss - (and that's just the C code). + 1. FFT_BRANDX has more than 100K lines of code. KISS has less than 1k. 2. It took me an embarrassingly long time to get FFT_BRANDX working. 3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB. 4. FFT_BRANDX is roughly twice as fast as KISS FFT. @@ -72,6 +72,9 @@ LICENSE: BSD, see COPYING for details. Basically, "free to use, give credit where due, no guarantees" TODO: + *) Add 2d real optimized FFT + *) Make a better self-test program(s). Should report snr & timing for short,float, + or double. *) Add simple windowing function, e.g. Hamming : w(i)=.54-.46*cos(2pi*i/(n-1)) *) Make the fixed point scaling and bit shifts more easily configurable. *) Document/revisit the input/output fft scaling diff --git a/_kiss_fft_guts.h b/_kiss_fft_guts.h index 37c814d..be4c4f9 100644 --- a/_kiss_fft_guts.h +++ b/_kiss_fft_guts.h @@ -70,11 +70,19 @@ typedef struct { #define C_SUBFROM( res , a)\ do { (res).r -= (a).r; (res).i -= (a).i; }while(0) -kiss_fft_cpx kf_cexp(double phase); - -int kf_allocsize(int nfft); - -void kf_init_state(kiss_fft_state * st,int nfft,int inverse_fft); +static +kiss_fft_cpx kf_cexp(double phase) /* returns e ** (j*phase) */ +{ + kiss_fft_cpx x; +#ifdef FIXED_POINT + x.r = (kiss_fft_scalar) (32767 * cos (phase)); + x.i = (kiss_fft_scalar) (32767 * sin (phase)); +#else + x.r = cos (phase); + x.i = sin (phase); +#endif + return x; +} void kf_work( kiss_fft_cpx * Fout, @@ -84,3 +92,8 @@ void kf_work( const kiss_fft_state * st ); +/* a debugging function */ +static void pcpx( kiss_fft_cpx * c) +{ + fprintf(stderr,"%g + %gi\n",c->r,c->i); +} diff --git a/kiss_fft.c b/kiss_fft.c index 4f6ffe2..9fb793e 100644 --- a/kiss_fft.c +++ b/kiss_fft.c @@ -18,19 +18,6 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND fixed or floating point complex numbers. It also delares the kf_ internal functions. */ -kiss_fft_cpx kf_cexp(double phase) /* returns e ** (j*phase) */ -{ - kiss_fft_cpx x; -#ifdef FIXED_POINT - x.r = (kiss_fft_scalar) ( 32767*cos(phase) ); - x.i = (kiss_fft_scalar) ( 32767*sin(phase) ); -#else - x.r = cos(phase); - x.i = sin(phase); -#endif - return x; -} - static void kf_bfly2( kiss_fft_cpx * Fout, int fstride, @@ -276,16 +263,6 @@ void kf_work( } } -int kf_allocsize(int nfft) -{ - int allocsize = sizeof(kiss_fft_state) - + sizeof(kiss_fft_cpx)*nfft /* twiddle factors*/ - + sizeof(kiss_fft_cpx)*nfft /* tmpbuf*/ - + sizeof(int)*nfft /* factors*/ - + sizeof(kiss_fft_cpx)*nfft; /* scratch*/ - return allocsize; -} - /* facbuf is populated by p1,m1,p2,m2, ... where p[i] * m[i] = m[i-1] @@ -312,27 +289,6 @@ void kf_factor(int n,int * facbuf) } while (n > 1); } -void kf_init_state(kiss_fft_state * st,int nfft,int inverse_fft) -{ - int i; - st->nfft=nfft; - st->inverse = inverse_fft; - st->twiddles = (kiss_fft_cpx*)(st+1); /* just beyond struct*/ - st->tmpbuf = (kiss_fft_cpx*)(st->twiddles + nfft);/* just after twiddles*/ - st->scratch = (kiss_fft_cpx*)(st->tmpbuf + nfft); - st->factors = (int*)(st->scratch + nfft); - - for (i=0;iinverse) - phase *= -1; - st->twiddles[i] = kf_cexp( phase ); - } - - kf_factor(nfft,st->factors); -} - /* * void * kiss_fft_alloc(int nfft,int inverse_fft) * @@ -341,14 +297,41 @@ void kf_init_state(kiss_fft_state * st,int nfft,int inverse_fft) * The return value is a contiguous block of memory, allocated with malloc. As such, * It can be freed with free(), rather than a kiss_fft-specific function. * */ -void * kiss_fft_alloc(int nfft,int inverse_fft) +void * kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) { kiss_fft_state * st=NULL; + size_t memneeded = sizeof(kiss_fft_state) + + sizeof(kiss_fft_cpx)*nfft /* twiddle factors*/ + + sizeof(kiss_fft_cpx)*nfft /* tmpbuf*/ + + sizeof(int)*nfft /* factors*/ + + sizeof(kiss_fft_cpx)*nfft; /* scratch*/ - st = ( kiss_fft_state *)malloc( kf_allocsize(nfft) ); - if (!st) - return NULL; - kf_init_state( st ,nfft,inverse_fft ); + if ( lenmem==NULL ) { + st = ( kiss_fft_state *)malloc( memneeded ); + }else{ + if (*lenmem >= memneeded) + st = ( kiss_fft_state *)mem; + *lenmem = memneeded; + } + if (st){ + int i; + st->nfft=nfft; + st->inverse = inverse_fft; + st->twiddles = (kiss_fft_cpx*)(st+1); /* just beyond struct*/ + st->tmpbuf = (kiss_fft_cpx*)(st->twiddles + nfft);/* just after twiddles*/ + st->scratch = (kiss_fft_cpx*)(st->tmpbuf + nfft); + st->factors = (int*)(st->scratch + nfft); + + for (i=0;iinverse) + phase *= -1; + st->twiddles[i] = kf_cexp( phase ); + } + + kf_factor(nfft,st->factors); + } return st; } diff --git a/kiss_fft.h b/kiss_fft.h index f08b5b9..86b881f 100644 --- a/kiss_fft.h +++ b/kiss_fft.h @@ -15,24 +15,34 @@ # endif #endif - typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; /* - * fft_alloc + * kiss_fft_alloc * - * Initialize a FFT (or IFFT) algorithm. + * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. + * + * typical usage: void * mycfg=kiss_fft_alloc(1024,0,NULL,NULL); * * The return value from fft_alloc is a cfg buffer used internally - * by the fft routine. + * by the fft routine or NULL. * - * Call free() on it when done using it to avoid memory leaks. + * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc. + * The returned value should be free()d when done to avoid memory leaks. + * + * The state can be placed in a user supplied buffer 'mem': + * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, + * then the function places the cfg in mem and the size used in *lenmem + * and returns mem. + * + * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), + * then the function returns NULL and places the minimum cfg + * buffer size in *lenmem. * */ -void* kiss_fft_alloc(int nfft,int inverse_fft); -/* free() the state when done using it */ +void* kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); /* * kiss_fft(cfg,in_out_buf) @@ -43,11 +53,7 @@ void* kiss_fft_alloc(int nfft,int inverse_fft); * fout will be F[0] , F[1] , ... ,F[nfft-1] * Note that each element is complex and can be accessed like f[k].r and f[k].i - - Apologies to previous users of KISS FFT, this function has changed from 2 args to 3 args. - To maintain the original behavior , use fout == fin * */ - void kiss_fft(const void * cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); @@ -55,15 +61,22 @@ void kiss_fft(const void * cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); the data should be stored rowwise, in other words, an array made up of row[0], then row[1], etc */ -void * kiss_fft2d_alloc(int nrows,int ncols,int inverse_fft); +void * kiss_fft2d_alloc(int nrows,int ncols,int inverse_fft,void * mem,size_t * lenmem); void kiss_fft2d(const void* cfg_from_alloc , const kiss_fft_cpx *fin,kiss_fft_cpx *fout ); /* Real optimized version can save about 45% cpu time vs. complex fft of a real seq. */ -void * kiss_fftr_alloc(int nfft,int inverse_fft); +void * kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem); void kiss_fftr(const void * cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata); void kiss_fftri(const void * cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); +/* 2d Real optimized version + */ +void * kiss_fftr2d_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem); +void kiss_fftr2d(const void * cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata); +void kiss_fftr2di(const void * cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); + + /* when done with the cfg for a given fft size and direction, simply free it*/ #define kiss_fft_free free diff --git a/kiss_fft2d.c b/kiss_fft2d.c index ce0cd40..b422500 100644 --- a/kiss_fft2d.c +++ b/kiss_fft2d.c @@ -21,24 +21,31 @@ typedef struct { kiss_fft_cpx * tmpbuf; }kiss_fft2d_state; -void * kiss_fft2d_alloc(int nrows,int ncols,int inverse_fft) +void * kiss_fft2d_alloc(int nrows,int ncols,int inverse_fft,void*mem,size_t*lenmem) { kiss_fft2d_state *st = NULL; - int size1,size2,sizetmp; - size1 = kf_allocsize(ncols); - size2 = kf_allocsize(nrows); - sizetmp = sizeof(kiss_fft_cpx)*(ncols > nrows ? ncols : nrows); + size_t size1, size2, sizetmp, memneeded; + kiss_fft_alloc (ncols, inverse_fft, NULL, &size1); + kiss_fft_alloc (nrows, inverse_fft, NULL, &size2); + sizetmp = sizeof (kiss_fft_cpx) * (ncols > nrows ? ncols : nrows); + memneeded = sizeof (kiss_fft2d_state) + size1 + size2 + sizetmp; - st = (kiss_fft2d_state *) malloc ( sizeof(kiss_fft2d_state) + size1 + size2 + sizetmp ); + if (lenmem == NULL) { + st = (kiss_fft2d_state *) malloc (memneeded); + } else { + if (*lenmem >= memneeded) + st = (kiss_fft2d_state *) mem; + *lenmem = memneeded; + } if (!st) return NULL; st->minus2 = -2; - st->rowst = (kiss_fft_state *)(st+1); /*just beyond kiss_fft2d_state struct */ - st->colst = (kiss_fft_state *)( (char*)(st->rowst) + size1 ); - st->tmpbuf = (kiss_fft_cpx *)( (char*)(st->rowst) + size1 + size2 ); - kf_init_state (st->rowst, ncols, inverse_fft); - kf_init_state (st->colst, nrows, inverse_fft); + st->rowst = (kiss_fft_state *) (st + 1); /*just beyond kiss_fft2d_state struct */ + st->colst = (kiss_fft_state *) ((char *) (st->rowst) + size1); + st->tmpbuf = (kiss_fft_cpx *) ((char *) (st->rowst) + size1 + size2); + kiss_fft_alloc (ncols, inverse_fft, st->rowst, &size1); + kiss_fft_alloc (nrows, inverse_fft, st->colst, &size2); return st; } diff --git a/kiss_fftr.c b/kiss_fftr.c index b52539a..3520cea 100644 --- a/kiss_fftr.c +++ b/kiss_fftr.c @@ -21,45 +21,47 @@ typedef struct { kiss_fft_cpx * super_twiddles; }kiss_fftr_state; -void * kiss_fftr_alloc(int nfft,int inverse_fft) +void * kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem) { int i; kiss_fftr_state *st = NULL; - int subsize; - - if (nfft&1) { + size_t subsize, memneeded; + + if (nfft & 1) { /*fprintf(stderr,"Real FFT optimization must be even.\n"); */ return NULL; } nfft >>= 1; - subsize = kf_allocsize(nfft); + kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize); + memneeded = sizeof(kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 2); - st = (kiss_fftr_state *) malloc ( sizeof(kiss_fftr_state) - + subsize + sizeof(kiss_fft_cpx) * ( nfft * 2) ); + if (lenmem == NULL) { + st = (kiss_fftr_state *) malloc (memneeded); + } else { + if (*lenmem >= memneeded) + st = (kiss_fftr_state *) mem; + *lenmem = memneeded; + } if (!st) return NULL; st->minus3 = -3; - st->substate = (kiss_fft_state *)(st+1); /*just beyond kiss_fftr_state struct */ - st->tmpbuf = (kiss_fft_cpx*)(((char*)st->substate) + subsize); - st->super_twiddles = st->tmpbuf + nfft; - kf_init_state (st->substate, nfft, inverse_fft); + st->substate = (kiss_fft_state *) (st + 1); /*just beyond kiss_fftr_state struct */ + st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize); + st->super_twiddles = st->tmpbuf + nfft; + kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); - for (i=0;isuper_twiddles[i] = kf_cexp( phase ); + st->super_twiddles[i] = kf_cexp (phase); } return st; } -static void pcpx( kiss_fft_cpx * c) -{ - printf("%g + %gi\n",c->r,c->i); -} - void kiss_fftr(const void * cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) { /* input buffer timedata is stored row-wise */ diff --git a/test/Makefile b/test/Makefile index f6181f4..700e777 100644 --- a/test/Makefile +++ b/test/Makefile @@ -56,9 +56,10 @@ mtime: all $(BENCHFFTW) [ -x ./$(BENCHFFTW) ] && ./$(BENCHFFTW) -x $(NUMFFTS) -n $$n || true ; \ done -snr: all +snr: all $(TESTREAL) @echo "testkiss( $(NFFT) , 1, '$(DATATYPE)' );" | octave -q @echo "testkiss( $(NFFT) , $(NROWS), '$(DATATYPE)' );" | octave -q + ./$(TESTREAL) test: snr time fftw diff --git a/test/benchkiss.c b/test/benchkiss.c index 9e288a1..032d765 100644 --- a/test/benchkiss.c +++ b/test/benchkiss.c @@ -42,7 +42,7 @@ int main(int argc,char ** argv) pstats_init(); - st = kiss_fft_alloc( nfft ,isinverse ); + st = kiss_fft_alloc( nfft ,isinverse ,0,0); for (i=0;i 0 ) { for (i=0;i