diff --git a/Makefile b/Makefile index a840e8d..ee8f99e 100644 --- a/Makefile +++ b/Makefile @@ -7,5 +7,5 @@ tarball: clean clean: cd sample_code && make clean - rm -f kiss_fft.tar.gz *~ *.pyc kiss_fft.zip fft.py + rm -f kiss_fft.tar.gz *~ *.pyc kiss_fft.zip diff --git a/kiss_fft.h b/kiss_fft.h index 56acd87..4a89929 100644 --- a/kiss_fft.h +++ b/kiss_fft.h @@ -59,6 +59,14 @@ void * kiss_fft2d_alloc(int nrows,int ncols,int inverse_fft); void kiss_fft2d(const void* cfg_from_alloc , const kiss_fft_cpx *fin,kiss_fft_cpx *fout ); +/* Real optimized version can save about 40% cpu time vs. complex fft of a real seq. + */ +void * kiss_fftr_alloc(int nfft,int inverse_fft); +void kiss_fftr(const void * cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata); +void kiss_fftri(const void * cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); + + + /* when done with the cfg for a given fft size and direction, simply free it*/ #define kiss_fft_free free diff --git a/kiss_fftr.c b/kiss_fftr.c index 285caa7..c76cca7 100644 --- a/kiss_fftr.c +++ b/kiss_fftr.c @@ -48,6 +48,8 @@ void * kiss_fftr_alloc(int nfft,int inverse_fft) for (i=0;isuper_twiddles[i] = kf_cexp( phase ); } return st; @@ -58,13 +60,13 @@ static void pcpx( kiss_fft_cpx * c) printf("%g + %gi\n",c->r,c->i); } -void kiss_fftr(const void * cfg,const kiss_fft_scalar *fin,kiss_fft_cpx *fout) +void kiss_fftr(const void * cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) { - /* input buffer fin is stored row-wise */ + /* input buffer timedata is stored row-wise */ kiss_fftr_state *st = ( kiss_fftr_state *)cfg; int k,N; - if (st->minus3 != -3 || st->substate->inverse ) { + if ( st->minus3 != -3 || st->substate->inverse) { fprintf(stderr,"kiss fft usage error: improper alloc\n"); exit(1); } @@ -72,10 +74,10 @@ void kiss_fftr(const void * cfg,const kiss_fft_scalar *fin,kiss_fft_cpx *fout) N = st->substate->nfft; /*perform the parallel fft of two real signals packed in real,imag*/ - kiss_fft( st->substate , (const kiss_fft_cpx*)fin, st->tmpbuf ); + kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf ); - fout[0].r = st->tmpbuf[0].r + st->tmpbuf[0].i; - fout[0].i = 0; + freqdata[0].r = st->tmpbuf[0].r + st->tmpbuf[0].i; + freqdata[0].i = 0; for (k=1;ksuper_twiddles[k] ); - C_ADDTO(fout[k],f1k); + C_MUL( freqdata[k], fpk , st->super_twiddles[k] ); + C_ADDTO(freqdata[k],f1k); - fout[k].r /= 2; - fout[k].i /= 2; + freqdata[k].r /= 2; + freqdata[k].i /= 2; } + freqdata[N].r = st->tmpbuf[0].r - st->tmpbuf[0].i; + freqdata[N].i = 0; } + +void kiss_fftri(const void * cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) +{ + /* input buffer timedata is stored row-wise */ + kiss_fftr_state *st = (kiss_fftr_state *) cfg; + int k, N; + if (st->minus3 != -3 || st->substate->inverse == 0) { + fprintf (stderr, "kiss fft usage error: improper alloc\n"); + exit (1); + } + N = st->substate->nfft; + + st->tmpbuf[0].r = freqdata[0].r + freqdata[N].r; + st->tmpbuf[0].i = freqdata[0].r - freqdata[N].r; + for (k = 1; k <= N / 2; ++k) { + kiss_fft_cpx fk, fnkc, fek, fok, tmpbuf; + fk = freqdata[k]; + fnkc.r = freqdata[N - k].r; + fnkc.i = -freqdata[N - k].i; + + C_ADD (fek, fk, fnkc); + C_SUB (tmpbuf, fk, fnkc); + C_MUL (fok, tmpbuf, st->super_twiddles[k]); + C_ADD (st->tmpbuf[k], fek, fok); + C_SUB (st->tmpbuf[N - k], fek, fok); + st->tmpbuf[N - k].i *= -1; + } + kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); +} diff --git a/test/test_real.c b/test/test_real.c index fa4c85d..5ccf4d2 100644 --- a/test/test_real.c +++ b/test/test_real.c @@ -1,24 +1,24 @@ #include "kiss_fft.h" -#include +#include +#include -static double timesnap() +static double cputime() { - struct timeval tv; - gettimeofday(&tv,NULL); - return (double)tv.tv_sec + (double)tv.tv_usec/1000000; + struct tms t; + times(&t); + return (double)(t.tms_utime + t.tms_stime)/ sysconf(_SC_CLK_TCK) ; } -void * kiss_fftr_alloc(int nfft,int inverse_fft); -void kiss_fftr(const void * cfg,const kiss_fft_scalar *fin,kiss_fft_cpx *fout); + double snr_compare( kiss_fft_cpx * vec1,kiss_fft_cpx * vec2, int n) { int k; double sigpow,noisepow,err,snr,scale=0; - sigpow = noisepow = .000000000000000000000000000001; + sigpow = noisepow = .00000000000000000001; for (k=0;k