mirror of
https://github.com/mborgerding/kissfft.git
synced 2025-06-03 17:18:11 -04:00
Dog slow, but does mixed radix!
'make test' output : ### testing SNR for 1024 point FFTs #### DOUBLE snr_t2f = 295.52 snr_f2t = 307.98 #### FLOAT snr_t2f = 144.62 snr_f2t = 143.23 #### SHORT snr_t2f = -31.515 snr_f2t = -60.836 #### timing 10000 x 1024 point FFTs #### DOUBLE Elapsed:0:44.17 user:35.11 sys:0.27 #### FLOAT Elapsed:0:24.22 user:19.66 sys:0.16 #### SHORT Elapsed:0:30.39 user:25.07 sys:0.09
This commit is contained in:
parent
08be1d86b4
commit
30c4ee30f5
6
fft.py
6
fft.py
@ -15,18 +15,22 @@ def fft(f):
|
|||||||
else:
|
else:
|
||||||
raise Exception('%s not factorable ' % n)
|
raise Exception('%s not factorable ' % n)
|
||||||
|
|
||||||
|
print 'n=%d,p=%d' % (n,p)
|
||||||
|
print f,' << fin'
|
||||||
m = n/p
|
m = n/p
|
||||||
Fout=[]
|
Fout=[]
|
||||||
for q in range(p): # 0,1
|
for q in range(p): # 0,1
|
||||||
fp = f[q::p]
|
fp = f[q::p]
|
||||||
|
print fp,'<< fp'
|
||||||
Fp = fft( fp )
|
Fp = fft( fp )
|
||||||
Fout.extend( Fp )
|
Fout.extend( Fp )
|
||||||
|
|
||||||
for u in range(m):
|
for u in range(m):
|
||||||
scratch = Fout[u::m] # u to end in strides of m
|
scratch = Fout[u::m] # u to end in strides of m
|
||||||
|
print scratch
|
||||||
for q1 in range(p):
|
for q1 in range(p):
|
||||||
k = q1*m + u # indices to Fout above that became scratch
|
k = q1*m + u # indices to Fout above that became scratch
|
||||||
Fout[ k ] = scratch[0]
|
Fout[ k ] = scratch[0] # cuz e**0==1 in loop below
|
||||||
for q in range(1,p):
|
for q in range(1,p):
|
||||||
t = e ** ( j*2*pi*k*q/n )
|
t = e ** ( j*2*pi*k*q/n )
|
||||||
Fout[ k ] += scratch[q] * t
|
Fout[ k ] += scratch[q] * t
|
||||||
|
294
kiss_fft.c
294
kiss_fft.c
@ -27,66 +27,21 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|||||||
* }kiss_fft_cpx;
|
* }kiss_fft_cpx;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int nr; // N remaining
|
|
||||||
int radix; // radix of the stage
|
|
||||||
kiss_fft_cpx * twiddle;
|
|
||||||
}kiss_fft_stage;
|
|
||||||
|
|
||||||
const double pi=3.14159265358979323846264338327;
|
const double pi=3.14159265358979323846264338327;
|
||||||
|
|
||||||
|
|
||||||
#define MAX_STAGES 20
|
#define MAX_STAGES 20
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int nfft;
|
int nfft;
|
||||||
int nstages;
|
int inverse;
|
||||||
int allocsize;
|
|
||||||
kiss_fft_stage stages[MAX_STAGES];
|
|
||||||
int * unscramble_indices;
|
|
||||||
kiss_fft_cpx * buf1;
|
|
||||||
}kiss_fft_state;
|
}kiss_fft_state;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef FIXED_POINT
|
|
||||||
|
|
||||||
# define TWIDDLE_RANGE ( (1<<FIXED_POINT_FRAC_BITS) - 1 )
|
|
||||||
|
|
||||||
/* The fixed point versions of C_ADD and C_SUB divide by two
|
|
||||||
* after the add/sub.
|
|
||||||
* This is to prevent overflow.
|
|
||||||
* The cumulative effect is to multiply the sequence by 1/Nfft.
|
|
||||||
* */
|
|
||||||
# define C_ADD(x,a,b) \
|
|
||||||
do{ (x).r = ( ( (a).r+(b).r +1) >> 1 );\
|
|
||||||
(x).i = ( ( (a).i+(b).i +1) >> 1 ); }while(0)
|
|
||||||
# define C_SUB(x,a,b) \
|
|
||||||
do{ (x).r = ( ( (a).r-(b).r +1) >> 1 );\
|
|
||||||
(x).i = ( ( (a).i-(b).i +1) >> 1 ); }while(0)
|
|
||||||
|
|
||||||
/* We don't have to worry about overflow from multiplying by twiddle factors since they
|
|
||||||
* all have unity magnitude. Still need to shift away fractional bits after adding 1/2 for
|
|
||||||
* rounding.
|
|
||||||
* */
|
|
||||||
# define C_MUL(m,a,b) \
|
|
||||||
do{ (m).r = ( ( (a).r*(b).r - (a).i*(b).i) + (TWIDDLE_RANGE>>1) ) >> FIXED_POINT_FRAC_BITS;\
|
|
||||||
(m).i = ( ( (a).r*(b).i + (a).i*(b).r) + (TWIDDLE_RANGE>>1) ) >> FIXED_POINT_FRAC_BITS;\
|
|
||||||
}while(0)
|
|
||||||
#else // not FIXED_POINT
|
|
||||||
|
|
||||||
#define C_ADD(x,a,b) \
|
#define C_ADD(x,a,b) \
|
||||||
do{ (x).r = (a).r+(b).r;\
|
do{ (x).r = (a).r+(b).r;\
|
||||||
(x).i = (a).i+(b).i;}while(0)
|
(x).i = (a).i+(b).i;}while(0)
|
||||||
#define C_SUB(x,a,b) \
|
|
||||||
do{ (x).r = (a).r-(b).r;\
|
|
||||||
(x).i = (a).i-(b).i;}while(0)
|
|
||||||
#define C_MUL(m,a,b) \
|
#define C_MUL(m,a,b) \
|
||||||
do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
|
do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
|
||||||
(m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
|
(m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
static
|
static
|
||||||
kiss_fft_cpx cexp(double phase)
|
kiss_fft_cpx cexp(double phase)
|
||||||
{
|
{
|
||||||
@ -96,69 +51,6 @@ kiss_fft_cpx cexp(double phase)
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the twiddle factors
|
|
||||||
static
|
|
||||||
void make_twiddles(kiss_fft_cpx * twiddle,int ntwid,int symmetry,int inverse_fft)
|
|
||||||
{
|
|
||||||
int n;
|
|
||||||
int denom = ntwid*symmetry;
|
|
||||||
|
|
||||||
for (n=0;n<ntwid;++n) {
|
|
||||||
twiddle[n] = cexp( 2*pi*n/denom );
|
|
||||||
//twiddle[n].r = cos(2*pi*n/denom);
|
|
||||||
//twiddle[n].i = -sin(2*pi*n/denom);
|
|
||||||
// inverse fft uses complex conjugate twiddle factors
|
|
||||||
if (inverse_fft)
|
|
||||||
twiddle[n].i *= -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// reverse the bits for numbers from 0 to N-1
|
|
||||||
static
|
|
||||||
int bit_reverse(int i,int N)
|
|
||||||
{
|
|
||||||
int rev=0;
|
|
||||||
while ( N >>= 1 ) {
|
|
||||||
rev = rev*2 + (i&1);
|
|
||||||
i>>=1;
|
|
||||||
}
|
|
||||||
return rev;
|
|
||||||
}
|
|
||||||
|
|
||||||
// make a list of index swaps that need to be done for bit-reversed addressing
|
|
||||||
static
|
|
||||||
int make_bit_reverse_indices(int *swap_indices ,int nfft)
|
|
||||||
{
|
|
||||||
int n,nswap;
|
|
||||||
nswap=0;
|
|
||||||
// set up swap indices to unwrap bit-reversed addressing
|
|
||||||
for ( n=0; n<nfft; ++n ) {
|
|
||||||
swap_indices[nswap*2] = bit_reverse(n,nfft);
|
|
||||||
if ( n < swap_indices[nswap*2] ) {
|
|
||||||
swap_indices[nswap*2+1] = n;
|
|
||||||
++nswap;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nswap;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void undo_bit_rev( kiss_fft_cpx * f, const int * swap_indices,int nswap)
|
|
||||||
{
|
|
||||||
int i,i0,i1;
|
|
||||||
kiss_fft_cpx tmp;
|
|
||||||
|
|
||||||
// unwrap bit-reverse indexing
|
|
||||||
for ( i=0 ; i<nswap ; ++i ) {
|
|
||||||
i0= *swap_indices++;
|
|
||||||
i1= *swap_indices++;
|
|
||||||
tmp = f[i0];
|
|
||||||
f[i0] = f[i1];
|
|
||||||
f[i1] = tmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static kiss_fft_cpx cadd(kiss_fft_cpx a,kiss_fft_cpx b)
|
static kiss_fft_cpx cadd(kiss_fft_cpx a,kiss_fft_cpx b)
|
||||||
{
|
{
|
||||||
kiss_fft_cpx c;
|
kiss_fft_cpx c;
|
||||||
@ -173,84 +65,94 @@ static kiss_fft_cpx cmul(kiss_fft_cpx a,kiss_fft_cpx b)
|
|||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// the heart of the fft
|
// the heart of the fft
|
||||||
static
|
static
|
||||||
void fft_work( kiss_fft_cpx * h,kiss_fft_cpx * h2,const kiss_fft_stage * stage , int nstages )
|
void fft_work(
|
||||||
|
kiss_fft_cpx * Fout,
|
||||||
|
const kiss_fft_cpx * f,
|
||||||
|
int fstride,
|
||||||
|
int n,
|
||||||
|
int inverse,
|
||||||
|
kiss_fft_cpx * scratch
|
||||||
|
)
|
||||||
{
|
{
|
||||||
int n;
|
int m,p=0,q,q1,u,k;
|
||||||
{ // declare automatic variables in here to reduce stack usage
|
kiss_fft_cpx t;
|
||||||
int nx,ny,Nx,Ny;
|
double two_pi_divN;
|
||||||
kiss_fft_cpx twid;
|
|
||||||
double inc1 = -2*pi/(stage->radix * stage->nr);
|
|
||||||
Ny = stage->nr;
|
|
||||||
Nx = stage->radix;
|
|
||||||
|
|
||||||
for (ny=0;ny<Ny; ++ny) {
|
|
||||||
for ( nx=0 ; nx<Nx ; ++nx ) {
|
|
||||||
h2[ny] = h[ny];
|
|
||||||
|
|
||||||
h2[ny] = cadd( h2[ny] , cmul( twid ,h[ nx * nr + ny ] ) );
|
if (n==1) {
|
||||||
}
|
*Fout = *f;
|
||||||
|
|
||||||
twid = cexp();
|
|
||||||
cmul(h2[ny]);
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
h2[n] = h[n] + h[n+N/2]
|
|
||||||
h2[n+N/2] = ( h[n] - h[n+N/2] ) * exp ( n * j*2*pi/N)
|
|
||||||
|
|
||||||
for ( n = 0 ; n < stage->nr ; ++n )
|
|
||||||
{
|
|
||||||
C_ADD( csum, *f1 , *f2 );
|
|
||||||
C_SUB( cdiff, *f1 , *f2 );
|
|
||||||
C_MUL(*f2, cdiff, *twid);
|
|
||||||
*f1++ = csum;
|
|
||||||
++f2;
|
|
||||||
++twid;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
if ( nstages == 1 )
|
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for (n=0;n<stage->radix;++n) {
|
two_pi_divN = 2*pi/n;
|
||||||
int offset = n * stage->nr;
|
if (inverse)
|
||||||
fft_work( h2 + offset , h + offset ,stage+1,nstages-1);
|
two_pi_divN *= -1;
|
||||||
|
|
||||||
|
if (n%2 == 0) p=2;
|
||||||
|
else if(n%3 == 0) p=3;
|
||||||
|
else if(n%5 == 0) p=5;
|
||||||
|
else if(n%7 == 0) p=7;
|
||||||
|
else {
|
||||||
|
fprintf(stderr,"%d is not divisible by %d\n",n,p);
|
||||||
|
p=n;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
m = n/p;
|
||||||
|
/* n = stage->n; m = stage->m; p = stage->p; */
|
||||||
|
|
||||||
|
#ifdef LOUD
|
||||||
|
printf("n=%d,p=%d\n",n,p);
|
||||||
|
for (k=0;k<n;++k) {
|
||||||
|
t=f[k*fstride];
|
||||||
|
printf("(%.3f,%.3f) ",t.r,t.i);
|
||||||
|
}
|
||||||
|
printf(" <<< fin \n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (q=0;q<p;++q) {
|
||||||
|
#ifdef LOUD
|
||||||
|
for (k=0;k<m;++k) {
|
||||||
|
t = (f+q*fstride)[fstride*p*k];
|
||||||
|
printf("(%.3f,%.3f) ",t.r,t.i);
|
||||||
|
}
|
||||||
|
printf(" <<< f[fstride*k+q] \n");
|
||||||
|
#endif
|
||||||
|
fft_work( Fout + m*q, f+q*fstride, fstride*p, m,inverse, scratch );
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef LOUD
|
||||||
|
printf("twiddling n=%d,p=%d\n",n,p);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for ( u=0; u<m; ++u ) {
|
||||||
|
for ( q1=0 ; q1<p ; ++q1 ) {
|
||||||
|
scratch[q1] = Fout[ u+q1*m ];
|
||||||
|
#ifdef LOUD
|
||||||
|
printf("(%.3f,%.3f) ",scratch[q1].r,scratch[q1].i);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#ifdef LOUD
|
||||||
|
printf(" <<< scratch \n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for ( q1=0 ; q1<p ; ++q1 ) {
|
||||||
|
k=q1*m+u;
|
||||||
|
Fout[ k ] = scratch[0];
|
||||||
|
for (q=1;q<p;++q ) {
|
||||||
|
//t = e ** ( j*2*pi*k*q/n );
|
||||||
|
t = cexp( two_pi_divN * k * q );
|
||||||
|
Fout[ k ] = cadd( Fout[ k ] ,
|
||||||
|
cmul( scratch[q] , t ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
/*
|
||||||
kiss_fft_state * decompose( kiss_fft_state * st, int * pnfft,int radix,int inverse_fft)
|
|
||||||
{
|
|
||||||
int nfft = *pnfft;
|
|
||||||
|
|
||||||
while (nfft>1 && (nfft%radix)==0) {
|
|
||||||
int newsize;
|
|
||||||
int nstages=st->nstages;
|
|
||||||
nfft /= radix;
|
|
||||||
fprintf(stderr,"%d ",radix);
|
|
||||||
|
|
||||||
newsize = st->allocsize + nfft * sizeof(kiss_fft_cpx);
|
|
||||||
|
|
||||||
st=(kiss_fft_state*)realloc(st,newsize);
|
|
||||||
|
|
||||||
st->stages[nstages].radix=radix;
|
|
||||||
st->stages[nstages].nr=nfft;
|
|
||||||
st->stages[nstages].twiddle = (kiss_fft_cpx*)((char*)st + st->allocsize);
|
|
||||||
|
|
||||||
make_twiddles( st->stages[nstages].twiddle,nfft,radix,inverse_fft );
|
|
||||||
st->allocsize = newsize;
|
|
||||||
st->nstages = nstages+1;
|
|
||||||
}
|
|
||||||
*pnfft = nfft;
|
|
||||||
return st;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* void * kiss_fft_alloc(int nfft,int inverse_fft)
|
* void * kiss_fft_alloc(int nfft,int inverse_fft)
|
||||||
*
|
*
|
||||||
* User-callable function to allocate all necessary scratch space for the fft.
|
* User-callable function to allocate all necessary scratch space for the fft.
|
||||||
*
|
*
|
||||||
* The return value is a contiguous block of memory, allocated with malloc. As such,
|
* The return value is a contiguous block of memory, allocated with malloc. As such,
|
||||||
@ -259,41 +161,23 @@ kiss_fft_state * decompose( kiss_fft_state * st, int * pnfft,int radix,int inver
|
|||||||
void * kiss_fft_alloc(int nfft,int inverse_fft)
|
void * kiss_fft_alloc(int nfft,int inverse_fft)
|
||||||
{
|
{
|
||||||
kiss_fft_state * st=NULL;
|
kiss_fft_state * st=NULL;
|
||||||
int tmp;
|
st = ( kiss_fft_state *)malloc( sizeof(kiss_fft_state) );
|
||||||
int i;
|
|
||||||
int allocsize=sizeof(kiss_fft_state) + nfft*( sizeof(int) + sizeof(kiss_fft_cpx) );
|
|
||||||
|
|
||||||
st = ( kiss_fft_state *)malloc( allocsize );
|
|
||||||
st->nfft=nfft;
|
st->nfft=nfft;
|
||||||
st->nstages=0;
|
st->inverse = inverse_fft;
|
||||||
st->allocsize=allocsize;
|
|
||||||
st->unscramble_indices = (int*)(st+1);// just past end of buffer
|
|
||||||
st->buf1 = (kiss_fft_cpx*)( st->unscramble_indices + nfft);
|
|
||||||
|
|
||||||
for (i=0;i<nfft;++i)
|
|
||||||
st->unscramble_indices[i] = i;
|
|
||||||
|
|
||||||
fprintf(stderr,"factoring %d: ",nfft);
|
|
||||||
|
|
||||||
tmp=nfft;
|
|
||||||
st = decompose( st,&tmp,2,inverse_fft);
|
|
||||||
fprintf(stderr,"\n");
|
|
||||||
|
|
||||||
// TODO factor 3,5,7,11 ???
|
|
||||||
if ( tmp > 1 ) {
|
|
||||||
fprintf(stderr,"%d not factorable by 2,3 (%d remaining)\n",nfft,tmp);
|
|
||||||
free(st);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return st;
|
return st;
|
||||||
}
|
}
|
||||||
|
|
||||||
void kiss_fft(const void * cfg,kiss_fft_cpx *f)
|
void kiss_fft(const void * cfg,kiss_fft_cpx *f)
|
||||||
{
|
{
|
||||||
|
int i,n;
|
||||||
const kiss_fft_state * st = cfg;
|
const kiss_fft_state * st = cfg;
|
||||||
fft_work( f,st->buf1, st->stages , st->nstages );
|
kiss_fft_cpx tmpbuf[1024];
|
||||||
if (st->nstages&1) {
|
kiss_fft_cpx scratch[1024];
|
||||||
memcpy( st->buf1 , f,sizeof(kiss_fft_cpx) * st->nfft );
|
n = st->nfft;
|
||||||
}
|
|
||||||
|
for (i=0;i<n;++i)
|
||||||
|
tmpbuf[i] = f[i];
|
||||||
|
|
||||||
|
fft_work( f, tmpbuf, 1, n, st->inverse, scratch );
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user