added simd capability

This commit is contained in:
Mark Borgerding
2005-06-24 03:03:31 +00:00
parent 24be1c5850
commit 7f6cbeab2e
10 changed files with 112 additions and 9 deletions

View File

@ -19,6 +19,9 @@ struct kiss_fftr_state{
kiss_fft_cfg substate;
kiss_fft_cpx * tmpbuf;
kiss_fft_cpx * super_twiddles;
#ifdef USE_SIMD
long pad;
#endif
};
kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
@ -37,7 +40,11 @@ kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenme
memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 2);
if (lenmem == NULL) {
#ifdef USE_SIMD
st = (kiss_fftr_cfg) memalign (sizeof(kiss_fft_cpx),memneeded);
#else
st = (kiss_fftr_cfg) malloc (memneeded);
#endif
} else {
if (*lenmem >= memneeded)
st = (kiss_fftr_cfg) mem;
@ -83,7 +90,11 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr
CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
freqdata[0].r = tdc.r + tdc.i;
#ifdef USE_SIMD
freqdata[0].i = _mm_set1_ps(0);
#else
freqdata[0].i = 0;
#endif
for (k=1;k <= N/2 ; ++k ) {
@ -98,15 +109,28 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr
C_MUL( tw , f2k , st->super_twiddles[k]);
C_ADD( freqdata[k] , f1k ,tw);
#ifdef USE_SIMD
freqdata[k].r = (f1k.r + tw.r) * _mm_set1_ps(.5);
freqdata[k].i = (f1k.i + tw.i) * _mm_set1_ps(.5);
freqdata[N-k].r = (f1k.r - tw.r) * _mm_set1_ps(.5);
freqdata[N-k].i = - (f1k.i - tw.i) * _mm_set1_ps(.5);
#else
freqdata[k].r = (f1k.r + tw.r) / 2;
freqdata[k].i = (f1k.i + tw.i) / 2;
freqdata[N-k].r = (f1k.r - tw.r)/2;
freqdata[N-k].i = - (f1k.i - tw.i)/2;
#endif
}
CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
freqdata[N].r = tdc.r - tdc.i;
#ifdef USE_SIMD
freqdata[N].i = _mm_set1_ps(0);
#else
freqdata[N].i = 0;
#endif
}
void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
@ -137,7 +161,11 @@ void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *t
C_MUL (fok, tmpbuf, st->super_twiddles[k]);
C_ADD (st->tmpbuf[k], fek, fok);
C_SUB (st->tmpbuf[N - k], fek, fok);
#ifdef USE_SIMD
st->tmpbuf[N - k].i *= _mm_set1_ps(-1.0);
#else
st->tmpbuf[N - k].i *= -1;
#endif
}
kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
}