used += on complex components

dramatic speedup -- 'make test' output:
### testing SNR for  1024 point FFTs
#### DOUBLE
snr_t2f = 295.63
snr_f2t = 307.82
#### FLOAT
snr_t2f = 146.25
snr_f2t = 143.37
#### SHORT
snr_t2f = 54.694
snr_f2t = 24.470

#### timing 10000 x 1024 point FFTs
#### DOUBLE
Elapsed:0:16.06 user:12.72 sys:0.25
#### FLOAT
Elapsed:0:04.63 user:3.79 sys:0.13
#### SHORT
Elapsed:0:05.77 user:4.56 sys:0.07
This commit is contained in:
Mark Borgerding 2003-10-11 22:39:40 +00:00
parent 043da3b65d
commit 11983e5056

View File

@ -27,9 +27,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* }kiss_fft_cpx; * }kiss_fft_cpx;
*/ */
const double pi=3.14159265358979323846264338327;
#define MAX_STAGES 20
typedef struct { typedef struct {
int nfft; int nfft;
int inverse; int inverse;
@ -46,15 +44,12 @@ typedef struct {
#ifdef FIXED_POINT #ifdef FIXED_POINT
/* We don't have to worry about overflow from multiplying by twiddle factors since they /* We don't have to worry about overflow from multiplying by twiddle factors since they
* all have unity magnitude. Still need to shift away fractional bits after adding 1/2 for * all have unity magnitude. Still need to shift away fractional bits after adding 1/2 for
* rounding. * rounding. */
* */
# define C_MUL(m,a,b) \ # define C_MUL(m,a,b) \
do{ (m).r = ( ( (a).r*(b).r - (a).i*(b).i) + (1<<14) ) >> 15;\ do{ (m).r = ( ( (a).r*(b).r - (a).i*(b).i) + (1<<14) ) >> 15;\
(m).i = ( ( (a).r*(b).i + (a).i*(b).r) + (1<<14) ) >> 15;\ (m).i = ( ( (a).r*(b).i + (a).i*(b).r) + (1<<14) ) >> 15;\
}while(0) }while(0)
#else // not FIXED_POINT #else // not FIXED_POINT
#define C_MUL(m,a,b) \ #define C_MUL(m,a,b) \
do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
(m).i = (a).r*(b).i + (a).i*(b).r; }while(0) (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
@ -74,13 +69,6 @@ kiss_fft_cpx cexp(double phase)
return x; return x;
} }
static kiss_fft_cpx cadd(kiss_fft_cpx a,kiss_fft_cpx b)
{
kiss_fft_cpx c;
C_ADD(c,a,b);
return c;
}
static kiss_fft_cpx cmul(kiss_fft_cpx a,kiss_fft_cpx b) static kiss_fft_cpx cmul(kiss_fft_cpx a,kiss_fft_cpx b)
{ {
kiss_fft_cpx c; kiss_fft_cpx c;
@ -105,12 +93,6 @@ void fft_work(
int m,p=0,q,q1,u,k; int m,p=0,q,q1,u,k;
kiss_fft_cpx t; kiss_fft_cpx t;
/*
if (n==1) {
*Fout = *f;
return;
}
*/
p=*factors++; p=*factors++;
m=*factors++;//m = n/p; m=*factors++;//m = n/p;
@ -139,9 +121,9 @@ void fft_work(
twidx += fstride * k; twidx += fstride * k;
if (twidx>=Norig) if (twidx>=Norig)
twidx-=Norig; twidx-=Norig;
t = twiddles[twidx]; t = cmul(scratch[q] , twiddles[twidx] );
Fout[ k ] = cadd( Fout[ k ] , Fout[ k ].r += t.r;
cmul( scratch[q] , t ) ); Fout[ k ].i += t.i;
} }
} }
} }
@ -170,6 +152,7 @@ void * kiss_fft_alloc(int nfft,int inverse_fft)
st->factors = (int*)malloc( sizeof(int)*nfft ); st->factors = (int*)malloc( sizeof(int)*nfft );
for (i=0;i<nfft;++i) { for (i=0;i<nfft;++i) {
const double pi=3.14159265358979323846264338327;
double phase = ( 2*pi /nfft ) * i; double phase = ( 2*pi /nfft ) * i;
if (st->inverse) if (st->inverse)
phase *= -1; phase *= -1;
@ -197,7 +180,7 @@ void * kiss_fft_alloc(int nfft,int inverse_fft)
void kiss_fft(const void * cfg,kiss_fft_cpx *f) void kiss_fft(const void * cfg,kiss_fft_cpx *f)
{ {
int i,n; int n;
const kiss_fft_state * st = cfg; const kiss_fft_state * st = cfg;
n = st->nfft; n = st->nfft;