real inverse fixed point fft scaling was broken. Now fixed is fixed.

This commit is contained in:
Mark Borgerding 2005-10-27 18:56:45 +00:00
parent 18d0ad1604
commit f088e415b4
2 changed files with 100 additions and 59 deletions

View File

@ -11,6 +11,17 @@ static double cputime(void)
return (double)(t.tms_utime + t.tms_stime)/ sysconf(_SC_CLK_TCK) ;
}
static
kiss_fft_scalar rand_scalar(void)
{
#ifdef USE_SIMD
return _mm_set1_ps(rand()-RAND_MAX/2);
#else
kiss_fft_scalar s = (kiss_fft_scalar)(rand() -RAND_MAX/2);
return s/2;
#endif
}
static
double snr_compare( kiss_fft_cpx * vec1,kiss_fft_cpx * vec2, int n)
{
@ -65,25 +76,34 @@ int main(void)
kiss_fft_cfg kiss_fft_state;
kiss_fftr_cfg kiss_fftr_state;
kiss_fft_scalar rin[NFFT];
kiss_fft_scalar rin[NFFT+2];
kiss_fft_scalar rout[NFFT+2];
kiss_fft_scalar zero;
memset(&zero,0,sizeof(zero) ); // ugly way of setting short,int,float,double, or __m128 to zero
srand(time(0));
for (i=0;i<NFFT;++i) {
#ifdef USE_SIMD
rin[i] = _mm_set1_ps(rand()-RAND_MAX/2);
cin[i].i = _mm_set1_ps(0);
#else
rin[i] = (kiss_fft_scalar)(rand()-RAND_MAX/2);
cin[i].i = 0;
#endif
rin[i] = rand_scalar();
cin[i].r = rin[i];
cin[i].i = zero;
}
kiss_fft_state = kiss_fft_alloc(NFFT,0,0,0);
kiss_fftr_state = kiss_fftr_alloc(NFFT,0,0,0);
kiss_fft(kiss_fft_state,cin,cout);
kiss_fftr(kiss_fftr_state,rin,sout);
/*
printf(" results from kiss_fft : (%f,%f), (%f,%f), (%f,%f) ...\n "
, (float)cout[0].r , (float)cout[0].i
, (float)cout[1].r , (float)cout[1].i
, (float)cout[2].r , (float)cout[2].i);
printf(" results from kiss_fftr: (%f,%f), (%f,%f), (%f,%f) ...\n "
, (float)sout[0].r , (float)sout[0].i
, (float)sout[1].r , (float)sout[1].i
, (float)sout[2].r , (float)sout[2].i);
*/
printf( "nfft=%d, inverse=%d, snr=%g\n",
NFFT,0, snr_compare(cout,sout,(NFFT/2)+1) );
ts = cputime();
@ -107,23 +127,44 @@ int main(void)
kiss_fft_state = kiss_fft_alloc(NFFT,1,0,0);
kiss_fftr_state = kiss_fftr_alloc(NFFT,1,0,0);
kiss_fft(kiss_fft_state,cout,cin);
kiss_fftri(kiss_fftr_state,cout,rin);
for (i=0;i<NFFT;++i) {
sout[i].r = rin[i];
#ifdef USE_SIMD
sout[i].i ^= sout[i].i;
#else
sout[i].i = 0;
#endif
memset(cin,0,sizeof(cin));
#if 1
for (i=1;i< NFFT/2;++i) {
//cin[i].r = (kiss_fft_scalar)(rand()-RAND_MAX/2);
cin[i].r = rand_scalar();
cin[i].i = rand_scalar();
}
#else
cin[0].r = 12000;
cin[3].r = 12000;
cin[NFFT/2].r = 12000;
#endif
// conjugate symmetry of real signal
for (i=1;i< NFFT/2;++i) {
cin[NFFT-i].r = cin[i].r;
cin[NFFT-i].i = - cin[i].i;
}
kiss_fft(kiss_fft_state,cin,cout);
kiss_fftri(kiss_fftr_state,cin,rout);
/*
printf(" results from inverse kiss_fft : (%f,%f), (%f,%f), (%f,%f), (%f,%f), (%f,%f) ...\n "
, (float)cout[0].r , (float)cout[0].i , (float)cout[1].r , (float)cout[1].i , (float)cout[2].r , (float)cout[2].i , (float)cout[3].r , (float)cout[3].i , (float)cout[4].r , (float)cout[4].i
);
printf(" results from inverse kiss_fftr: %f,%f,%f,%f,%f ... \n"
,(float)rout[0] ,(float)rout[1] ,(float)rout[2] ,(float)rout[3] ,(float)rout[4]);
*/
for (i=0;i<NFFT;++i) {
sout[i].r = rout[i];
sout[i].i = zero;
}
printf( "nfft=%d, inverse=%d, snr=%g\n",
NFFT,1, snr_compare(cin,cin,NFFT/2) );
NFFT,1, snr_compare(cout,sout,NFFT/2) );
free(kiss_fft_state);
free(kiss_fftr_state);
return 0;
}

View File

@ -67,7 +67,7 @@ kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenme
void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
{
/* input buffer timedata is stored row-wise */
int k,N;
int k,ncfft;
kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
if ( st->substate->inverse) {
@ -75,28 +75,37 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr
exit(1);
}
N = st->substate->nfft;
ncfft = st->substate->nfft;
/*perform the parallel fft of two real signals packed in real,imag*/
kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
/* The real part of the DC element of the frequency spectrum in st->tmpbuf
* contains the sum of the even-numbered elements of the input time sequence
* The imag part is the sum of the odd-numbered elements
*
* The sum of tdc.r and tdc.i is the sum of the input time sequence.
* yielding DC of input time sequence
* The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
* yielding Nyquist bin of input time sequence
*/
tdc.r = st->tmpbuf[0].r;
tdc.i = st->tmpbuf[0].i;
C_FIXDIV(tdc,2);
CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
freqdata[0].r = tdc.r + tdc.i;
freqdata[ncfft].r = tdc.r - tdc.i;
#ifdef USE_SIMD
freqdata[0].i = _mm_set1_ps(0);
freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
#else
freqdata[0].i = 0;
#endif
freqdata[ncfft].i = freqdata[0].i = 0;
#endif
for (k=1;k <= N/2 ; ++k ) {
fpk = st->tmpbuf[k];
fpnk.r = st->tmpbuf[N-k].r;
fpnk.i = -st->tmpbuf[N-k].i;
for ( k=1;k <= ncfft/2 ; ++k ) {
fpk = st->tmpbuf[k];
fpnk.r = st->tmpbuf[ncfft-k].r;
fpnk.i = - st->tmpbuf[ncfft-k].i;
C_FIXDIV(fpk,2);
C_FIXDIV(fpnk,2);
@ -104,55 +113,46 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr
C_SUB( f2k, fpk , fpnk );
C_MUL( tw , f2k , st->super_twiddles[k]);
C_ADD( freqdata[k] , f1k ,tw);
freqdata[k].r = HALF_OF(f1k.r + tw.r);
freqdata[k].i = HALF_OF(f1k.i + tw.i);
freqdata[N-k].r = HALF_OF(f1k.r - tw.r);
freqdata[N-k].i = HALF_OF(tw.i - f1k.i);
freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
}
CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
freqdata[N].r = tdc.r - tdc.i;
#ifdef USE_SIMD
freqdata[N].i = _mm_set1_ps(0);
#else
freqdata[N].i = 0;
#endif
}
void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
{
/* input buffer timedata is stored row-wise */
int k, N;
int k, ncfft;
if (st->substate->inverse == 0) {
fprintf (stderr, "kiss fft usage error: improper alloc\n");
exit (1);
}
N = st->substate->nfft;
ncfft = st->substate->nfft;
st->tmpbuf[0].r = freqdata[0].r + freqdata[N].r;
st->tmpbuf[0].i = freqdata[0].r - freqdata[N].r;
st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
C_FIXDIV(st->tmpbuf[0],2);
for (k = 1; k <= N / 2; ++k) {
kiss_fft_cpx fk, fnkc, fek, fok, tmpbuf;
for (k = 1; k <= ncfft / 2; ++k) {
kiss_fft_cpx fk, fnkc, fek, fok, tmp;
fk = freqdata[k];
fnkc.r = freqdata[N - k].r;
fnkc.i = -freqdata[N - k].i;
fnkc.r = freqdata[ncfft - k].r;
fnkc.i = -freqdata[ncfft - k].i;
C_FIXDIV( fk , 2 );
C_FIXDIV( fnkc , 2 );
C_ADD (fek, fk, fnkc);
C_SUB (tmpbuf, fk, fnkc);
C_MUL (fok, tmpbuf, st->super_twiddles[k]);
C_ADD (st->tmpbuf[k], fek, fok);
C_SUB (st->tmpbuf[N - k], fek, fok);
C_SUB (tmp, fk, fnkc);
C_MUL (fok, tmp, st->super_twiddles[k]);
C_ADD (st->tmpbuf[k], fek, fok);
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
#ifdef USE_SIMD
st->tmpbuf[N - k].i *= _mm_set1_ps(-1.0);
st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
#else
st->tmpbuf[N - k].i *= -1;
st->tmpbuf[ncfft - k].i *= -1;
#endif
}
kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);