real inverse fixed point fft scaling was broken. Now fixed is fixed.

This commit is contained in:
Mark Borgerding 2005-10-27 18:56:45 +00:00
parent 18d0ad1604
commit f088e415b4
2 changed files with 100 additions and 59 deletions

View File

@ -11,6 +11,17 @@ static double cputime(void)
return (double)(t.tms_utime + t.tms_stime)/ sysconf(_SC_CLK_TCK) ; return (double)(t.tms_utime + t.tms_stime)/ sysconf(_SC_CLK_TCK) ;
} }
static
kiss_fft_scalar rand_scalar(void)
{
#ifdef USE_SIMD
return _mm_set1_ps(rand()-RAND_MAX/2);
#else
kiss_fft_scalar s = (kiss_fft_scalar)(rand() -RAND_MAX/2);
return s/2;
#endif
}
static static
double snr_compare( kiss_fft_cpx * vec1,kiss_fft_cpx * vec2, int n) double snr_compare( kiss_fft_cpx * vec1,kiss_fft_cpx * vec2, int n)
{ {
@ -65,25 +76,34 @@ int main(void)
kiss_fft_cfg kiss_fft_state; kiss_fft_cfg kiss_fft_state;
kiss_fftr_cfg kiss_fftr_state; kiss_fftr_cfg kiss_fftr_state;
kiss_fft_scalar rin[NFFT]; kiss_fft_scalar rin[NFFT+2];
kiss_fft_scalar rout[NFFT+2];
kiss_fft_scalar zero;
memset(&zero,0,sizeof(zero) ); // ugly way of setting short,int,float,double, or __m128 to zero
srand(time(0)); srand(time(0));
for (i=0;i<NFFT;++i) { for (i=0;i<NFFT;++i) {
#ifdef USE_SIMD rin[i] = rand_scalar();
rin[i] = _mm_set1_ps(rand()-RAND_MAX/2);
cin[i].i = _mm_set1_ps(0);
#else
rin[i] = (kiss_fft_scalar)(rand()-RAND_MAX/2);
cin[i].i = 0;
#endif
cin[i].r = rin[i]; cin[i].r = rin[i];
cin[i].i = zero;
} }
kiss_fft_state = kiss_fft_alloc(NFFT,0,0,0); kiss_fft_state = kiss_fft_alloc(NFFT,0,0,0);
kiss_fftr_state = kiss_fftr_alloc(NFFT,0,0,0); kiss_fftr_state = kiss_fftr_alloc(NFFT,0,0,0);
kiss_fft(kiss_fft_state,cin,cout); kiss_fft(kiss_fft_state,cin,cout);
kiss_fftr(kiss_fftr_state,rin,sout); kiss_fftr(kiss_fftr_state,rin,sout);
/*
printf(" results from kiss_fft : (%f,%f), (%f,%f), (%f,%f) ...\n "
, (float)cout[0].r , (float)cout[0].i
, (float)cout[1].r , (float)cout[1].i
, (float)cout[2].r , (float)cout[2].i);
printf(" results from kiss_fftr: (%f,%f), (%f,%f), (%f,%f) ...\n "
, (float)sout[0].r , (float)sout[0].i
, (float)sout[1].r , (float)sout[1].i
, (float)sout[2].r , (float)sout[2].i);
*/
printf( "nfft=%d, inverse=%d, snr=%g\n", printf( "nfft=%d, inverse=%d, snr=%g\n",
NFFT,0, snr_compare(cout,sout,(NFFT/2)+1) ); NFFT,0, snr_compare(cout,sout,(NFFT/2)+1) );
ts = cputime(); ts = cputime();
@ -107,23 +127,44 @@ int main(void)
kiss_fft_state = kiss_fft_alloc(NFFT,1,0,0); kiss_fft_state = kiss_fft_alloc(NFFT,1,0,0);
kiss_fftr_state = kiss_fftr_alloc(NFFT,1,0,0); kiss_fftr_state = kiss_fftr_alloc(NFFT,1,0,0);
kiss_fft(kiss_fft_state,cout,cin); memset(cin,0,sizeof(cin));
kiss_fftri(kiss_fftr_state,cout,rin); #if 1
for (i=1;i< NFFT/2;++i) {
for (i=0;i<NFFT;++i) { //cin[i].r = (kiss_fft_scalar)(rand()-RAND_MAX/2);
sout[i].r = rin[i]; cin[i].r = rand_scalar();
#ifdef USE_SIMD cin[i].i = rand_scalar();
sout[i].i ^= sout[i].i;
#else
sout[i].i = 0;
#endif
} }
#else
cin[0].r = 12000;
cin[3].r = 12000;
cin[NFFT/2].r = 12000;
#endif
// conjugate symmetry of real signal
for (i=1;i< NFFT/2;++i) {
cin[NFFT-i].r = cin[i].r;
cin[NFFT-i].i = - cin[i].i;
}
kiss_fft(kiss_fft_state,cin,cout);
kiss_fftri(kiss_fftr_state,cin,rout);
/*
printf(" results from inverse kiss_fft : (%f,%f), (%f,%f), (%f,%f), (%f,%f), (%f,%f) ...\n "
, (float)cout[0].r , (float)cout[0].i , (float)cout[1].r , (float)cout[1].i , (float)cout[2].r , (float)cout[2].i , (float)cout[3].r , (float)cout[3].i , (float)cout[4].r , (float)cout[4].i
);
printf(" results from inverse kiss_fftr: %f,%f,%f,%f,%f ... \n"
,(float)rout[0] ,(float)rout[1] ,(float)rout[2] ,(float)rout[3] ,(float)rout[4]);
*/
for (i=0;i<NFFT;++i) {
sout[i].r = rout[i];
sout[i].i = zero;
}
printf( "nfft=%d, inverse=%d, snr=%g\n", printf( "nfft=%d, inverse=%d, snr=%g\n",
NFFT,1, snr_compare(cin,cin,NFFT/2) ); NFFT,1, snr_compare(cout,sout,NFFT/2) );
free(kiss_fft_state); free(kiss_fft_state);
free(kiss_fftr_state); free(kiss_fftr_state);
return 0; return 0;
} }

View File

@ -67,7 +67,7 @@ kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenme
void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
{ {
/* input buffer timedata is stored row-wise */ /* input buffer timedata is stored row-wise */
int k,N; int k,ncfft;
kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc; kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
if ( st->substate->inverse) { if ( st->substate->inverse) {
@ -75,28 +75,37 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr
exit(1); exit(1);
} }
N = st->substate->nfft; ncfft = st->substate->nfft;
/*perform the parallel fft of two real signals packed in real,imag*/ /*perform the parallel fft of two real signals packed in real,imag*/
kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf ); kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
/* The real part of the DC element of the frequency spectrum in st->tmpbuf
* contains the sum of the even-numbered elements of the input time sequence
* The imag part is the sum of the odd-numbered elements
*
* The sum of tdc.r and tdc.i is the sum of the input time sequence.
* yielding DC of input time sequence
* The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
* yielding Nyquist bin of input time sequence
*/
tdc.r = st->tmpbuf[0].r; tdc.r = st->tmpbuf[0].r;
tdc.i = st->tmpbuf[0].i; tdc.i = st->tmpbuf[0].i;
C_FIXDIV(tdc,2); C_FIXDIV(tdc,2);
CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
freqdata[0].r = tdc.r + tdc.i; freqdata[0].r = tdc.r + tdc.i;
freqdata[ncfft].r = tdc.r - tdc.i;
#ifdef USE_SIMD #ifdef USE_SIMD
freqdata[0].i = _mm_set1_ps(0); freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
#else #else
freqdata[0].i = 0; freqdata[ncfft].i = freqdata[0].i = 0;
#endif #endif
for (k=1;k <= N/2 ; ++k ) { for ( k=1;k <= ncfft/2 ; ++k ) {
fpk = st->tmpbuf[k];
fpk = st->tmpbuf[k]; fpnk.r = st->tmpbuf[ncfft-k].r;
fpnk.r = st->tmpbuf[N-k].r; fpnk.i = - st->tmpbuf[ncfft-k].i;
fpnk.i = -st->tmpbuf[N-k].i;
C_FIXDIV(fpk,2); C_FIXDIV(fpk,2);
C_FIXDIV(fpnk,2); C_FIXDIV(fpnk,2);
@ -104,55 +113,46 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr
C_SUB( f2k, fpk , fpnk ); C_SUB( f2k, fpk , fpnk );
C_MUL( tw , f2k , st->super_twiddles[k]); C_MUL( tw , f2k , st->super_twiddles[k]);
C_ADD( freqdata[k] , f1k ,tw);
freqdata[k].r = HALF_OF(f1k.r + tw.r); freqdata[k].r = HALF_OF(f1k.r + tw.r);
freqdata[k].i = HALF_OF(f1k.i + tw.i); freqdata[k].i = HALF_OF(f1k.i + tw.i);
freqdata[N-k].r = HALF_OF(f1k.r - tw.r); freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
freqdata[N-k].i = HALF_OF(tw.i - f1k.i); freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
} }
CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
freqdata[N].r = tdc.r - tdc.i;
#ifdef USE_SIMD
freqdata[N].i = _mm_set1_ps(0);
#else
freqdata[N].i = 0;
#endif
} }
void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
{ {
/* input buffer timedata is stored row-wise */ /* input buffer timedata is stored row-wise */
int k, N; int k, ncfft;
if (st->substate->inverse == 0) { if (st->substate->inverse == 0) {
fprintf (stderr, "kiss fft usage error: improper alloc\n"); fprintf (stderr, "kiss fft usage error: improper alloc\n");
exit (1); exit (1);
} }
N = st->substate->nfft; ncfft = st->substate->nfft;
st->tmpbuf[0].r = freqdata[0].r + freqdata[N].r; st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
st->tmpbuf[0].i = freqdata[0].r - freqdata[N].r; st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
C_FIXDIV(st->tmpbuf[0],2);
for (k = 1; k <= N / 2; ++k) { for (k = 1; k <= ncfft / 2; ++k) {
kiss_fft_cpx fk, fnkc, fek, fok, tmpbuf; kiss_fft_cpx fk, fnkc, fek, fok, tmp;
fk = freqdata[k]; fk = freqdata[k];
fnkc.r = freqdata[N - k].r; fnkc.r = freqdata[ncfft - k].r;
fnkc.i = -freqdata[N - k].i; fnkc.i = -freqdata[ncfft - k].i;
C_FIXDIV( fk , 2 ); C_FIXDIV( fk , 2 );
C_FIXDIV( fnkc , 2 ); C_FIXDIV( fnkc , 2 );
C_ADD (fek, fk, fnkc); C_ADD (fek, fk, fnkc);
C_SUB (tmpbuf, fk, fnkc); C_SUB (tmp, fk, fnkc);
C_MUL (fok, tmpbuf, st->super_twiddles[k]); C_MUL (fok, tmp, st->super_twiddles[k]);
C_ADD (st->tmpbuf[k], fek, fok); C_ADD (st->tmpbuf[k], fek, fok);
C_SUB (st->tmpbuf[N - k], fek, fok); C_SUB (st->tmpbuf[ncfft - k], fek, fok);
#ifdef USE_SIMD #ifdef USE_SIMD
st->tmpbuf[N - k].i *= _mm_set1_ps(-1.0); st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
#else #else
st->tmpbuf[N - k].i *= -1; st->tmpbuf[ncfft - k].i *= -1;
#endif #endif
} }
kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);