changed alloc functions to allow for initialization of user-supplied buffer

This commit is contained in:
Mark Borgerding 2003-12-14 03:02:30 +00:00
parent ab32979a47
commit 559c14b49b
11 changed files with 135 additions and 112 deletions

13
README
View File

@ -17,13 +17,14 @@ USAGE:
... // transformed ... // transformed
free(cfg); free(cfg);
Note: frequency-domain data is stored from dc to 2pi. Note: frequency-domain data is stored from dc up to 2pi.
so cx_out[0] is the dc bin of the FFT so cx_out[0] is the dc bin of the FFT
and cx_out[nfft/2] is the Nyquist bin (if present) and cx_out[nfft/2] is the Nyquist bin (if even length FFT)
Declarations are in "kiss_fft.h", along with a brief description of the Declarations are in "kiss_fft.h", along with a brief description of the
functions you'll need to use. Code definitions for 1d complex FFTs are in kiss_fft.c. functions you'll need to use. Code definitions for 1d complex FFTs are in kiss_fft.c.
with sample usage code. For more functionality, like 2d FFTs you may need to add other source files to your project. with sample usage code. For more functionality, like 2d FFTs you may need to add
other source files to your project.
The code can be compiled to use float, double or 16bit short samples. The code can be compiled to use float, double or 16bit short samples.
The default is float. The default is float.
@ -41,8 +42,7 @@ a well respected and highly optimized fft library. I don't want to criticize
this great library, so let's call it FFT_BRANDX. this great library, so let's call it FFT_BRANDX.
During this process, I learned: During this process, I learned:
1. FFT_BRANDX has 500 times as many lines of code as Kiss 1. FFT_BRANDX has more than 100K lines of code. KISS has less than 1k.
(and that's just the C code).
2. It took me an embarrassingly long time to get FFT_BRANDX working. 2. It took me an embarrassingly long time to get FFT_BRANDX working.
3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB. 3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB.
4. FFT_BRANDX is roughly twice as fast as KISS FFT. 4. FFT_BRANDX is roughly twice as fast as KISS FFT.
@ -72,6 +72,9 @@ LICENSE:
BSD, see COPYING for details. Basically, "free to use, give credit where due, no guarantees" BSD, see COPYING for details. Basically, "free to use, give credit where due, no guarantees"
TODO: TODO:
*) Add 2d real optimized FFT
*) Make a better self-test program(s). Should report snr & timing for short,float,
or double.
*) Add simple windowing function, e.g. Hamming : w(i)=.54-.46*cos(2pi*i/(n-1)) *) Add simple windowing function, e.g. Hamming : w(i)=.54-.46*cos(2pi*i/(n-1))
*) Make the fixed point scaling and bit shifts more easily configurable. *) Make the fixed point scaling and bit shifts more easily configurable.
*) Document/revisit the input/output fft scaling *) Document/revisit the input/output fft scaling

View File

@ -70,11 +70,19 @@ typedef struct {
#define C_SUBFROM( res , a)\ #define C_SUBFROM( res , a)\
do { (res).r -= (a).r; (res).i -= (a).i; }while(0) do { (res).r -= (a).r; (res).i -= (a).i; }while(0)
kiss_fft_cpx kf_cexp(double phase); static
kiss_fft_cpx kf_cexp(double phase) /* returns e ** (j*phase) */
int kf_allocsize(int nfft); {
kiss_fft_cpx x;
void kf_init_state(kiss_fft_state * st,int nfft,int inverse_fft); #ifdef FIXED_POINT
x.r = (kiss_fft_scalar) (32767 * cos (phase));
x.i = (kiss_fft_scalar) (32767 * sin (phase));
#else
x.r = cos (phase);
x.i = sin (phase);
#endif
return x;
}
void kf_work( void kf_work(
kiss_fft_cpx * Fout, kiss_fft_cpx * Fout,
@ -84,3 +92,8 @@ void kf_work(
const kiss_fft_state * st const kiss_fft_state * st
); );
/* a debugging function */
static void pcpx( kiss_fft_cpx * c)
{
fprintf(stderr,"%g + %gi\n",c->r,c->i);
}

View File

@ -18,19 +18,6 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
fixed or floating point complex numbers. It also delares the kf_ internal functions. fixed or floating point complex numbers. It also delares the kf_ internal functions.
*/ */
kiss_fft_cpx kf_cexp(double phase) /* returns e ** (j*phase) */
{
kiss_fft_cpx x;
#ifdef FIXED_POINT
x.r = (kiss_fft_scalar) ( 32767*cos(phase) );
x.i = (kiss_fft_scalar) ( 32767*sin(phase) );
#else
x.r = cos(phase);
x.i = sin(phase);
#endif
return x;
}
static void kf_bfly2( static void kf_bfly2(
kiss_fft_cpx * Fout, kiss_fft_cpx * Fout,
int fstride, int fstride,
@ -276,16 +263,6 @@ void kf_work(
} }
} }
int kf_allocsize(int nfft)
{
int allocsize = sizeof(kiss_fft_state)
+ sizeof(kiss_fft_cpx)*nfft /* twiddle factors*/
+ sizeof(kiss_fft_cpx)*nfft /* tmpbuf*/
+ sizeof(int)*nfft /* factors*/
+ sizeof(kiss_fft_cpx)*nfft; /* scratch*/
return allocsize;
}
/* facbuf is populated by p1,m1,p2,m2, ... /* facbuf is populated by p1,m1,p2,m2, ...
where where
p[i] * m[i] = m[i-1] p[i] * m[i] = m[i-1]
@ -312,27 +289,6 @@ void kf_factor(int n,int * facbuf)
} while (n > 1); } while (n > 1);
} }
void kf_init_state(kiss_fft_state * st,int nfft,int inverse_fft)
{
int i;
st->nfft=nfft;
st->inverse = inverse_fft;
st->twiddles = (kiss_fft_cpx*)(st+1); /* just beyond struct*/
st->tmpbuf = (kiss_fft_cpx*)(st->twiddles + nfft);/* just after twiddles*/
st->scratch = (kiss_fft_cpx*)(st->tmpbuf + nfft);
st->factors = (int*)(st->scratch + nfft);
for (i=0;i<nfft;++i) {
const double pi=3.14159265358979323846264338327;
double phase = ( -2*pi /nfft ) * i;
if (st->inverse)
phase *= -1;
st->twiddles[i] = kf_cexp( phase );
}
kf_factor(nfft,st->factors);
}
/* /*
* void * kiss_fft_alloc(int nfft,int inverse_fft) * void * kiss_fft_alloc(int nfft,int inverse_fft)
* *
@ -341,14 +297,41 @@ void kf_init_state(kiss_fft_state * st,int nfft,int inverse_fft)
* The return value is a contiguous block of memory, allocated with malloc. As such, * The return value is a contiguous block of memory, allocated with malloc. As such,
* It can be freed with free(), rather than a kiss_fft-specific function. * It can be freed with free(), rather than a kiss_fft-specific function.
* */ * */
void * kiss_fft_alloc(int nfft,int inverse_fft) void * kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
{ {
kiss_fft_state * st=NULL; kiss_fft_state * st=NULL;
size_t memneeded = sizeof(kiss_fft_state)
+ sizeof(kiss_fft_cpx)*nfft /* twiddle factors*/
+ sizeof(kiss_fft_cpx)*nfft /* tmpbuf*/
+ sizeof(int)*nfft /* factors*/
+ sizeof(kiss_fft_cpx)*nfft; /* scratch*/
st = ( kiss_fft_state *)malloc( kf_allocsize(nfft) ); if ( lenmem==NULL ) {
if (!st) st = ( kiss_fft_state *)malloc( memneeded );
return NULL; }else{
kf_init_state( st ,nfft,inverse_fft ); if (*lenmem >= memneeded)
st = ( kiss_fft_state *)mem;
*lenmem = memneeded;
}
if (st){
int i;
st->nfft=nfft;
st->inverse = inverse_fft;
st->twiddles = (kiss_fft_cpx*)(st+1); /* just beyond struct*/
st->tmpbuf = (kiss_fft_cpx*)(st->twiddles + nfft);/* just after twiddles*/
st->scratch = (kiss_fft_cpx*)(st->tmpbuf + nfft);
st->factors = (int*)(st->scratch + nfft);
for (i=0;i<nfft;++i) {
const double pi=3.14159265358979323846264338327;
double phase = ( -2*pi /nfft ) * i;
if (st->inverse)
phase *= -1;
st->twiddles[i] = kf_cexp( phase );
}
kf_factor(nfft,st->factors);
}
return st; return st;
} }

View File

@ -15,24 +15,34 @@
# endif # endif
#endif #endif
typedef struct { typedef struct {
kiss_fft_scalar r; kiss_fft_scalar r;
kiss_fft_scalar i; kiss_fft_scalar i;
}kiss_fft_cpx; }kiss_fft_cpx;
/* /*
* fft_alloc * kiss_fft_alloc
* *
* Initialize a FFT (or IFFT) algorithm. * Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
*
* typical usage: void * mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
* *
* The return value from fft_alloc is a cfg buffer used internally * The return value from fft_alloc is a cfg buffer used internally
* by the fft routine. * by the fft routine or NULL.
* *
* Call free() on it when done using it to avoid memory leaks. * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
* The returned value should be free()d when done to avoid memory leaks.
*
* The state can be placed in a user supplied buffer 'mem':
* If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
* then the function places the cfg in mem and the size used in *lenmem
* and returns mem.
*
* If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
* then the function returns NULL and places the minimum cfg
* buffer size in *lenmem.
* */ * */
void* kiss_fft_alloc(int nfft,int inverse_fft); void* kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem);
/* free() the state when done using it */
/* /*
* kiss_fft(cfg,in_out_buf) * kiss_fft(cfg,in_out_buf)
@ -43,11 +53,7 @@ void* kiss_fft_alloc(int nfft,int inverse_fft);
* fout will be F[0] , F[1] , ... ,F[nfft-1] * fout will be F[0] , F[1] , ... ,F[nfft-1]
* Note that each element is complex and can be accessed like * Note that each element is complex and can be accessed like
f[k].r and f[k].i f[k].r and f[k].i
Apologies to previous users of KISS FFT, this function has changed from 2 args to 3 args.
To maintain the original behavior , use fout == fin
* */ * */
void kiss_fft(const void * cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); void kiss_fft(const void * cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
@ -55,15 +61,22 @@ void kiss_fft(const void * cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
the data should be stored rowwise, the data should be stored rowwise,
in other words, an array made up of row[0], then row[1], etc in other words, an array made up of row[0], then row[1], etc
*/ */
void * kiss_fft2d_alloc(int nrows,int ncols,int inverse_fft); void * kiss_fft2d_alloc(int nrows,int ncols,int inverse_fft,void * mem,size_t * lenmem);
void kiss_fft2d(const void* cfg_from_alloc , const kiss_fft_cpx *fin,kiss_fft_cpx *fout ); void kiss_fft2d(const void* cfg_from_alloc , const kiss_fft_cpx *fin,kiss_fft_cpx *fout );
/* Real optimized version can save about 45% cpu time vs. complex fft of a real seq. /* Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
*/ */
void * kiss_fftr_alloc(int nfft,int inverse_fft); void * kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
void kiss_fftr(const void * cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata); void kiss_fftr(const void * cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata);
void kiss_fftri(const void * cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); void kiss_fftri(const void * cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata);
/* 2d Real optimized version
*/
void * kiss_fftr2d_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
void kiss_fftr2d(const void * cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata);
void kiss_fftr2di(const void * cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata);
/* when done with the cfg for a given fft size and direction, simply free it*/ /* when done with the cfg for a given fft size and direction, simply free it*/
#define kiss_fft_free free #define kiss_fft_free free

View File

@ -21,24 +21,31 @@ typedef struct {
kiss_fft_cpx * tmpbuf; kiss_fft_cpx * tmpbuf;
}kiss_fft2d_state; }kiss_fft2d_state;
void * kiss_fft2d_alloc(int nrows,int ncols,int inverse_fft) void * kiss_fft2d_alloc(int nrows,int ncols,int inverse_fft,void*mem,size_t*lenmem)
{ {
kiss_fft2d_state *st = NULL; kiss_fft2d_state *st = NULL;
int size1,size2,sizetmp; size_t size1, size2, sizetmp, memneeded;
size1 = kf_allocsize(ncols); kiss_fft_alloc (ncols, inverse_fft, NULL, &size1);
size2 = kf_allocsize(nrows); kiss_fft_alloc (nrows, inverse_fft, NULL, &size2);
sizetmp = sizeof(kiss_fft_cpx)*(ncols > nrows ? ncols : nrows); sizetmp = sizeof (kiss_fft_cpx) * (ncols > nrows ? ncols : nrows);
memneeded = sizeof (kiss_fft2d_state) + size1 + size2 + sizetmp;
st = (kiss_fft2d_state *) malloc ( sizeof(kiss_fft2d_state) + size1 + size2 + sizetmp ); if (lenmem == NULL) {
st = (kiss_fft2d_state *) malloc (memneeded);
} else {
if (*lenmem >= memneeded)
st = (kiss_fft2d_state *) mem;
*lenmem = memneeded;
}
if (!st) if (!st)
return NULL; return NULL;
st->minus2 = -2; st->minus2 = -2;
st->rowst = (kiss_fft_state *)(st+1); /*just beyond kiss_fft2d_state struct */ st->rowst = (kiss_fft_state *) (st + 1); /*just beyond kiss_fft2d_state struct */
st->colst = (kiss_fft_state *)( (char*)(st->rowst) + size1 ); st->colst = (kiss_fft_state *) ((char *) (st->rowst) + size1);
st->tmpbuf = (kiss_fft_cpx *)( (char*)(st->rowst) + size1 + size2 ); st->tmpbuf = (kiss_fft_cpx *) ((char *) (st->rowst) + size1 + size2);
kf_init_state (st->rowst, ncols, inverse_fft); kiss_fft_alloc (ncols, inverse_fft, st->rowst, &size1);
kf_init_state (st->colst, nrows, inverse_fft); kiss_fft_alloc (nrows, inverse_fft, st->colst, &size2);
return st; return st;
} }

View File

@ -21,45 +21,47 @@ typedef struct {
kiss_fft_cpx * super_twiddles; kiss_fft_cpx * super_twiddles;
}kiss_fftr_state; }kiss_fftr_state;
void * kiss_fftr_alloc(int nfft,int inverse_fft) void * kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
{ {
int i; int i;
kiss_fftr_state *st = NULL; kiss_fftr_state *st = NULL;
int subsize; size_t subsize, memneeded;
if (nfft&1) { if (nfft & 1) {
/*fprintf(stderr,"Real FFT optimization must be even.\n"); */ /*fprintf(stderr,"Real FFT optimization must be even.\n"); */
return NULL; return NULL;
} }
nfft >>= 1; nfft >>= 1;
subsize = kf_allocsize(nfft); kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
memneeded = sizeof(kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 2);
st = (kiss_fftr_state *) malloc ( sizeof(kiss_fftr_state) if (lenmem == NULL) {
+ subsize + sizeof(kiss_fft_cpx) * ( nfft * 2) ); st = (kiss_fftr_state *) malloc (memneeded);
} else {
if (*lenmem >= memneeded)
st = (kiss_fftr_state *) mem;
*lenmem = memneeded;
}
if (!st) if (!st)
return NULL; return NULL;
st->minus3 = -3; st->minus3 = -3;
st->substate = (kiss_fft_state *)(st+1); /*just beyond kiss_fftr_state struct */ st->substate = (kiss_fft_state *) (st + 1); /*just beyond kiss_fftr_state struct */
st->tmpbuf = (kiss_fft_cpx*)(((char*)st->substate) + subsize); st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
st->super_twiddles = st->tmpbuf + nfft; st->super_twiddles = st->tmpbuf + nfft;
kf_init_state (st->substate, nfft, inverse_fft); kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
for (i=0;i<nfft;++i) { for (i = 0; i < nfft; ++i) {
double phase = -3.14159265358979323846264338327 * ( (double)i / nfft + .5); double phase =
-3.14159265358979323846264338327 * ((double) i / nfft + .5);
if (inverse_fft) if (inverse_fft)
phase *= -1; phase *= -1;
st->super_twiddles[i] = kf_cexp( phase ); st->super_twiddles[i] = kf_cexp (phase);
} }
return st; return st;
} }
static void pcpx( kiss_fft_cpx * c)
{
printf("%g + %gi\n",c->r,c->i);
}
void kiss_fftr(const void * cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) void kiss_fftr(const void * cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
{ {
/* input buffer timedata is stored row-wise */ /* input buffer timedata is stored row-wise */

View File

@ -56,9 +56,10 @@ mtime: all $(BENCHFFTW)
[ -x ./$(BENCHFFTW) ] && ./$(BENCHFFTW) -x $(NUMFFTS) -n $$n || true ; \ [ -x ./$(BENCHFFTW) ] && ./$(BENCHFFTW) -x $(NUMFFTS) -n $$n || true ; \
done done
snr: all snr: all $(TESTREAL)
@echo "testkiss( $(NFFT) , 1, '$(DATATYPE)' );" | octave -q @echo "testkiss( $(NFFT) , 1, '$(DATATYPE)' );" | octave -q
@echo "testkiss( $(NFFT) , $(NROWS), '$(DATATYPE)' );" | octave -q @echo "testkiss( $(NFFT) , $(NROWS), '$(DATATYPE)' );" | octave -q
./$(TESTREAL)
test: snr time fftw test: snr time fftw

View File

@ -42,7 +42,7 @@ int main(int argc,char ** argv)
pstats_init(); pstats_init();
st = kiss_fft_alloc( nfft ,isinverse ); st = kiss_fft_alloc( nfft ,isinverse ,0,0);
for (i=0;i<numffts;++i) for (i=0;i<numffts;++i)
kiss_fft( st ,buf,bufout ); kiss_fft( st ,buf,bufout );

View File

@ -42,7 +42,7 @@ double snr_compare( kiss_fft_cpx * vec1,kiss_fft_cpx * vec2, int n)
#endif #endif
#ifndef NUMFFTS #ifndef NUMFFTS
#define NUMFFTS 10000 #define NUMFFTS 1000
#endif #endif
void pcpx(const char * msg, kiss_fft_cpx * c) void pcpx(const char * msg, kiss_fft_cpx * c)
@ -72,8 +72,8 @@ int main()
/* printf("in[%d]",i);pcpx("",cin+i); */ /* printf("in[%d]",i);pcpx("",cin+i); */
} }
kiss_fft_state = kiss_fft_alloc(NFFT,0); kiss_fft_state = kiss_fft_alloc(NFFT,0,0,0);
kiss_fftr_state = kiss_fftr_alloc(NFFT,0); kiss_fftr_state = kiss_fftr_alloc(NFFT,0,0,0);
kiss_fft(kiss_fft_state,cin,cout); kiss_fft(kiss_fft_state,cin,cout);
kiss_fftr(kiss_fftr_state,sin,sout); kiss_fftr(kiss_fftr_state,sin,sout);
printf( "nfft=%d, inverse=%d, snr=%g\n", printf( "nfft=%d, inverse=%d, snr=%g\n",
@ -97,8 +97,8 @@ int main()
free(kiss_fft_state); free(kiss_fft_state);
free(kiss_fftr_state); free(kiss_fftr_state);
kiss_fft_state = kiss_fft_alloc(NFFT,1); kiss_fft_state = kiss_fft_alloc(NFFT,1,0,0);
kiss_fftr_state = kiss_fftr_alloc(NFFT,1); kiss_fftr_state = kiss_fftr_alloc(NFFT,1,0,0);
kiss_fft(kiss_fft_state,cout,cin); kiss_fft(kiss_fft_state,cout,cin);
kiss_fftri(kiss_fftr_state,cout,sin); kiss_fftri(kiss_fftr_state,cout,sin);

View File

@ -56,9 +56,10 @@ mtime: all $(BENCHFFTW)
[ -x ./$(BENCHFFTW) ] && ./$(BENCHFFTW) -x $(NUMFFTS) -n $$n || true ; \ [ -x ./$(BENCHFFTW) ] && ./$(BENCHFFTW) -x $(NUMFFTS) -n $$n || true ; \
done done
snr: all snr: all $(TESTREAL)
@echo "testkiss( $(NFFT) , 1, '$(DATATYPE)' );" | octave -q @echo "testkiss( $(NFFT) , 1, '$(DATATYPE)' );" | octave -q
@echo "testkiss( $(NFFT) , $(NROWS), '$(DATATYPE)' );" | octave -q @echo "testkiss( $(NFFT) , $(NROWS), '$(DATATYPE)' );" | octave -q
./$(TESTREAL)
test: snr time fftw test: snr time fftw

View File

@ -37,9 +37,9 @@ void fft_file(FILE * fin,FILE * fout,int nfft,int nrows,int isinverse,int useasc
buf = (kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx) * nfft *nrows ); buf = (kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx) * nfft *nrows );
bufout = (kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx) * nfft *nrows); bufout = (kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx) * nfft *nrows);
if (nrows!=1) if (nrows!=1)
st = kiss_fft2d_alloc( nrows,nfft ,isinverse ); st = kiss_fft2d_alloc( nrows,nfft ,isinverse ,0,0);
else else
st = kiss_fft_alloc( nfft ,isinverse ); st = kiss_fft_alloc( nfft ,isinverse ,0,0);
while ( fread( buf , sizeof(kiss_fft_cpx) * nfft * nrows ,1, fin ) > 0 ) { while ( fread( buf , sizeof(kiss_fft_cpx) * nfft * nrows ,1, fin ) > 0 ) {
for (i=0;i<times;++i) for (i=0;i<times;++i)