diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fe6a9dc --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.o +*.so +*.a +*.dylib \ No newline at end of file diff --git a/Makefile b/Makefile index 96f43d3..d11c51a 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,19 @@ KFVER=130 +ifeq ($(shell uname -s),Darwin) + SHARED := -Wl,-install_name,libkissfft.dylib -o libkissfft.dylib +else + SHARED := -Wl,-soname,libkissfft.so -o libkissfft.so +endif + +all: + gcc -Wall -fPIC -c *.c -Dkiss_fft_scalar=float -o kiss_fft.o + ar crus libkissfft.a kiss_fft.o + gcc -shared $(SHARED) kiss_fft.o + +install: all + cp libkissfft.so /usr/local/lib/ + doc: @echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'" @echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not." diff --git a/kissfft.hh b/kissfft.hh index a586cb1..afa7b78 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -1,201 +1,256 @@ #ifndef KISSFFT_CLASS_HH +#define KISSFFT_CLASS_HH #include +#include #include -namespace kissfft_utils { -template -struct traits -{ - typedef T_scalar scalar_type; - typedef std::complex cpx_type; - void fill_twiddles( std::complex * dst ,int nfft,bool inverse) - { - T_scalar phinc = (inverse?2:-2)* acos( (T_scalar) -1) / nfft; - for (int i=0;i(0,i*phinc) ); - } - - void prepare( - std::vector< std::complex > & dst, - int nfft,bool inverse, - std::vector & stageRadix, - std::vector & stageRemainder ) - { - _twiddles.resize(nfft); - fill_twiddles( &_twiddles[0],nfft,inverse); - dst = _twiddles; - - //factorize - //start factoring out 4's, then 2's, then 3,5,7,9,... - int n= nfft; - int p=4; - do { - while (n % p) { - switch (p) { - case 4: p = 2; break; - case 2: p = 3; break; - default: p += 2; break; - } - if (p*p>n) - p=n;// no more factors - } - n /= p; - stageRadix.push_back(p); - stageRemainder.push_back(n); - }while(n>1); - } - std::vector _twiddles; - - - const cpx_type twiddle(int i) { return _twiddles[i]; } -}; - -} - -template - > +template class kissfft { public: - typedef T_traits traits_type; - typedef typename traits_type::scalar_type scalar_type; - typedef typename traits_type::cpx_type cpx_type; - kissfft(int nfft,bool inverse,const traits_type & traits=traits_type() ) - :_nfft(nfft),_inverse(inverse),_traits(traits) + using cpx_t = std::complex; + + kissfft( const std::size_t nfft, + const bool inverse ) + :_nfft(nfft) + ,_inverse(inverse) { - _traits.prepare(_twiddles, _nfft,_inverse ,_stageRadix, _stageRemainder); + // fill twiddle factors + _twiddles.resize(_nfft); + const scalar_t phinc = (_inverse?2:-2)* acos( (scalar_t) -1) / _nfft; + for (std::size_t i=0;i<_nfft;++i) + _twiddles[i] = exp( cpx_t(0,i*phinc) ); + + //factorize + //start factoring out 4's, then 2's, then 3,5,7,9,... + std::size_t n= _nfft; + std::size_t p=4; + do { + while (n % p) { + switch (p) { + case 4: p = 2; break; + case 2: p = 3; break; + default: p += 2; break; + } + if (p*p>n) + p = n;// no more factors + } + n /= p; + _stageRadix.push_back(p); + _stageRemainder.push_back(n); + }while(n>1); } - void transform(const cpx_type * src , cpx_type * dst) + + /// Changes the FFT-length and/or the transform direction. + /// + /// @post The @c kissfft object will be in the same state as if it + /// had been newly constructed with the passed arguments. + /// However, the implementation may be faster than constructing a + /// new fft object. + void assign( const std::size_t nfft, + const bool inverse ) { - kf_work(0, dst, src, 1,1); + if ( nfft != _nfft ) + { + kissfft tmp( nfft, inverse ); // O(n) time. + std::swap( tmp, *this ); // this is O(1) in C++11, O(n) otherwise. + } + else if ( inverse != _inverse ) + { + // conjugate the twiddle factors. + for ( typename std::vector::iterator it = _twiddles.begin(); + it != _twiddles.end(); ++it ) + it->imag( -it->imag() ); + } } - private: - void kf_work( int stage,cpx_type * Fout, const cpx_type * f, size_t fstride,size_t in_stride) + /// Calculates the complex Discrete Fourier Transform. + /// + /// The size of the passed arrays must be passed in the constructor. + /// The sum of the squares of the absolute values in the @c dst + /// array will be @c N times the sum of the squares of the absolute + /// values in the @c src array, where @c N is the size of the array. + /// In other words, the l_2 norm of the resulting array will be + /// @c sqrt(N) times as big as the l_2 norm of the input array. + /// This is also the case when the inverse flag is set in the + /// constructor. Hence when applying the same transform twice, but with + /// the inverse flag changed the second time, then the result will + /// be equal to the original input times @c N. + void transform(const cpx_t * fft_in, cpx_t * fft_out, const std::size_t stage = 0, const std::size_t fstride = 1, const std::size_t in_stride = 1) const { - int p = _stageRadix[stage]; - int m = _stageRemainder[stage]; - cpx_type * Fout_beg = Fout; - cpx_type * Fout_end = Fout + p*m; + const std::size_t p = _stageRadix[stage]; + const std::size_t m = _stageRemainder[stage]; + cpx_t * const Fout_beg = fft_out; + cpx_t * const Fout_end = fft_out + p*m; if (m==1) { do{ - *Fout = *f; - f += fstride*in_stride; - }while(++Fout != Fout_end ); + *fft_out = *fft_in; + fft_in += fstride*in_stride; + }while(++fft_out != Fout_end ); }else{ do{ // recursive call: // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, + // p instances of smaller DFTs of size m, // each one takes a decimated version of the input - kf_work(stage+1, Fout , f, fstride*p,in_stride); - f += fstride*in_stride; - }while( (Fout += m) != Fout_end ); + transform(fft_in, fft_out, stage+1, fstride*p,in_stride); + fft_in += fstride*in_stride; + }while( (fft_out += m) != Fout_end ); } - Fout=Fout_beg; + fft_out=Fout_beg; - // recombine the p smaller DFTs + // recombine the p smaller DFTs switch (p) { - case 2: kf_bfly2(Fout,fstride,m); break; - case 3: kf_bfly3(Fout,fstride,m); break; - case 4: kf_bfly4(Fout,fstride,m); break; - case 5: kf_bfly5(Fout,fstride,m); break; - default: kf_bfly_generic(Fout,fstride,m,p); break; + case 2: kf_bfly2(fft_out,fstride,m); break; + case 3: kf_bfly3(fft_out,fstride,m); break; + case 4: kf_bfly4(fft_out,fstride,m); break; + case 5: kf_bfly5(fft_out,fstride,m); break; + default: kf_bfly_generic(fft_out,fstride,m,p); break; } } - // these were #define macros in the original kiss_fft - void C_ADD( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a+b;} - void C_MUL( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a*b;} - void C_SUB( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a-b;} - void C_ADDTO( cpx_type & c,const cpx_type & a) { c+=a;} - void C_FIXDIV( cpx_type & ,int ) {} // NO-OP for float types - scalar_type S_MUL( const scalar_type & a,const scalar_type & b) { return a*b;} - scalar_type HALF_OF( const scalar_type & a) { return a*.5;} - void C_MULBYSCALAR(cpx_type & c,const scalar_type & a) {c*=a;} - - void kf_bfly2( cpx_type * Fout, const size_t fstride, int m) + /// Calculates the Discrete Fourier Transform (DFT) of a real input + /// of size @c 2*N. + /// + /// The 0-th and N-th value of the DFT are real numbers. These are + /// stored in @c dst[0].real() and @c dst[1].imag() respectively. + /// The remaining DFT values up to the index N-1 are stored in + /// @c dst[1] to @c dst[N-1]. + /// The other half of the DFT values can be calculated from the + /// symmetry relation + /// @code + /// DFT(src)[2*N-k] == conj( DFT(src)[k] ); + /// @endcode + /// The same scaling factors as in @c transform() apply. + /// + /// @note For this to work, the types @c scalar_t and @c cpx_t + /// must fulfill the following requirements: + /// + /// For any object @c z of type @c cpx_t, + /// @c reinterpret_cast(z)[0] is the real part of @c z and + /// @c reinterpret_cast(z)[1] is the imaginary part of @c z. + /// For any pointer to an element of an array of @c cpx_t named @c p + /// and any valid array index @c i, @c reinterpret_cast(p)[2*i] + /// is the real part of the complex number @c p[i], and + /// @c reinterpret_cast(p)[2*i+1] is the imaginary part of the + /// complex number @c p[i]. + /// + /// Since C++11, these requirements are guaranteed to be satisfied for + /// @c scalar_ts being @c float, @c double or @c long @c double + /// together with @c cpx_t being @c std::complex. + void transform_real( const scalar_t * const src, + cpx_t * const dst ) const { - for (int k=0;k(src), dst ); + + // post processing for k = 0 and k = N + dst[0] = cpx_t( dst[0].real() + dst[0].imag(), + dst[0].real() - dst[0].imag() ); + + // post processing for all the other k = 1, 2, ..., N-1 + const scalar_t pi = acos( (scalar_t) -1); + const scalar_t half_phi_inc = ( _inverse ? pi : -pi ) / N; + const cpx_t twiddle_mul = exp( cpx_t(0, half_phi_inc) ); + for ( std::size_t k = 1; 2*k < N; ++k ) + { + const cpx_t w = (scalar_t)0.5 * cpx_t( + dst[k].real() + dst[N-k].real(), + dst[k].imag() - dst[N-k].imag() ); + const cpx_t z = (scalar_t)0.5 * cpx_t( + dst[k].imag() + dst[N-k].imag(), + -dst[k].real() + dst[N-k].real() ); + const cpx_t twiddle = + k % 2 == 0 ? + _twiddles[k/2] : + _twiddles[k/2] * twiddle_mul; + dst[ k] = w + twiddle * z; + dst[N-k] = conj( w - twiddle * z ); + } + if ( N % 2 == 0 ) + dst[N/2] = conj( dst[N/2] ); + } + + private: + + void kf_bfly2( cpx_t * Fout, const size_t fstride, const std::size_t m) const + { + for (std::size_t k=0;kreal() - HALF_OF(scratch[3].real() ) , Fout->imag() - HALF_OF(scratch[3].imag() ) ); - - C_MULBYSCALAR( scratch[0] , epi3.imag() ); - - C_ADDTO(*Fout,scratch[3]); - - Fout[m2] = cpx_type( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - - C_ADDTO( Fout[m] , cpx_type( -scratch[0].imag(),scratch[0].real() ) ); - ++Fout; - }while(--k); - } - - void kf_bfly5( cpx_type * Fout, const size_t fstride, const size_t m) - { - cpx_type *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - cpx_type scratch[13]; - cpx_type * twiddles = &_twiddles[0]; - cpx_type *tw; - cpx_type ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; + cpx_t *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + cpx_t scratch[13]; + const cpx_t ya = _twiddles[fstride*m]; + const cpx_t yb = _twiddles[fstride*2*m]; Fout0=Fout; Fout1=Fout0+m; @@ -203,97 +258,94 @@ class kissfft Fout3=Fout0+3*m; Fout4=Fout0+4*m; - tw=twiddles; - for ( u=0; u=Norig) twidx-=Norig; - C_MUL(t,scratchbuf[q] , twiddles[twidx] ); - C_ADDTO( Fout[ k ] ,t); + if (twidx>=_nfft) + twidx-=_nfft; + Fout[ k ] += scratchbuf[q] * twiddles[twidx]; } k += m; } } } - int _nfft; + std::size_t _nfft; bool _inverse; - std::vector _twiddles; - std::vector _stageRadix; - std::vector _stageRemainder; - traits_type _traits; + std::vector _twiddles; + std::vector _stageRadix; + std::vector _stageRemainder; }; #endif diff --git a/kissfft_i32.hh b/kissfft_i32.hh new file mode 100644 index 0000000..5871e00 --- /dev/null +++ b/kissfft_i32.hh @@ -0,0 +1,304 @@ +#ifndef KISSFFT_I32_CLASS_HH +#define KISSFFT_I32_CLASS_HH + +#include +#include +#include + +// TODO1: substitute complex (behaviour not defined for nonfloats), should be faster +// TODO2: use std:: namespace +// TODO3: make unittests for all ffts (c, cpp, i32) + +template +struct complex_s +{ + DType real; + DType imag; +}; + +class kissfft_i32 +{ +private: + + using scalar_type = int32_t; + using cpx_type = complex; + + scalar_type _scale_factor; + std::size_t _nfft; + bool _inverse; + std::vector _twiddles; + std::vector _stageRadix; + std::vector _stageRemainder; + +public: + + // scale_factor: upscale twiddle-factors otherwise they lie between 0..1 (out of range for integer) --> fixed point math + kissfft_i32(const std::size_t nfft, const bool inverse, const double scale_factor = 1024.0) + : _scale_factor(scalar_type(scale_factor)), _nfft(nfft), _inverse(inverse) + { + // fill twiddle factors + _twiddles.resize(_nfft); + const double phinc = (_inverse ? 2 : -2) * acos(-1.0) / _nfft; + for (std::size_t i = 0; i < _nfft; ++i) + { + _twiddles[i] = scale_factor * exp(complex(0, i * phinc)); + } + //factorize + //start factoring out 4's, then 2's, then 3,5,7,9,... + std::size_t n = _nfft; + std::size_t p = 4; + do + { + while (n % p) + { + switch (p) + { + case 4: + p = 2; + break; + case 2: + p = 3; + break; + default: + p += 2; + break; + } + if (p * p > n) p = n;// no more factors + } + n /= p; + _stageRadix.push_back(p); + _stageRemainder.push_back(n); + } while (n > 1); + } + + /// Calculates the complex Discrete Fourier Transform. + /// + /// The size of the passed arrays must be passed in the constructor. + /// The sum of the squares of the absolute values in the @c dst + /// array will be @c N times the sum of the squares of the absolute + /// values in the @c src array, where @c N is the size of the array. + /// In other words, the l_2 norm of the resulting array will be + /// @c sqrt(N) times as big as the l_2 norm of the input array. + /// This is also the case when the inverse flag is set in the + /// constructor. Hence when applying the same transform twice, but with + /// the inverse flag changed the second time, then the result will + /// be equal to the original input times @c N. + void transform(const cpx_type * FSrc, + cpx_type * FDst, + const std::size_t stage = 0, + const std::size_t fstride = 1, + const std::size_t in_stride = 1) const + { + const std::size_t p = _stageRadix[stage]; + const std::size_t m = _stageRemainder[stage]; + cpx_type *const Fout_beg = FDst; + cpx_type *const Fout_end = FDst + p * m; + + if (m == 1) + { + do + { + *FDst = *FSrc; + FSrc += fstride * in_stride; + } while (++FDst != Fout_end); + } + else + { + do + { + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + transform(FSrc, FDst, stage + 1, fstride * p, in_stride); + FSrc += fstride * in_stride; + } while ((FDst += m) != Fout_end); + } + + FDst = Fout_beg; + + // recombine the p smaller DFTs + switch (p) + { + case 2: + kf_bfly2(FDst, fstride, m); + break; + case 3: + kf_bfly3(FDst, fstride, m); + break; + case 4: + kf_bfly4(FDst, fstride, m); + break; + case 5: + kf_bfly5(FDst, fstride, m); + break; + default: + kf_bfly_generic(FDst, fstride, m, p); + break; + } + } + +private: + + void kf_bfly2(cpx_type *const Fout, const size_t fstride, const std::size_t m) const + { + for (std::size_t k = 0; k < m; ++k) + { + const cpx_type t = (Fout[m + k] * _twiddles[k * fstride]) / _scale_factor; + Fout[m + k] = Fout[k] - t; + Fout[k] += t; + } + } + + void kf_bfly3(cpx_type *Fout, const std::size_t fstride, const std::size_t m) const + { + std::size_t k = m; + const std::size_t m2 = 2 * m; + const cpx_type *tw1, *tw2; + cpx_type scratch[5]; + const cpx_type epi3 = _twiddles[fstride * m]; + + tw1 = tw2 = &_twiddles[0]; + + do + { + scratch[1] = (Fout[m] * *tw1) / _scale_factor; + scratch[2] = (Fout[m2] * *tw2) / _scale_factor; + + scratch[3] = scratch[1] + scratch[2]; + scratch[0] = scratch[1] - scratch[2]; + tw1 += fstride; + tw2 += fstride * 2; + + Fout[m] = Fout[0] - (scratch[3] / 2); + scratch[0] *= epi3.imag(); + scratch[0] /= _scale_factor; + + Fout[0] += scratch[3]; + + Fout[m2] = cpx_type(Fout[m].real() + scratch[0].imag(), Fout[m].imag() - scratch[0].real()); + + Fout[m] += cpx_type(-scratch[0].imag(), scratch[0].real()); + ++Fout; + } while (--k); + } + + void kf_bfly4(cpx_type *const Fout, const std::size_t fstride, const std::size_t m) const + { + cpx_type scratch[7]; + const scalar_type negative_if_inverse = _inverse ? -1 : +1; + + for (std::size_t k = 0; k < m; ++k) + { + scratch[0] = (Fout[k + m] * _twiddles[k * fstride]) / _scale_factor; + scratch[1] = (Fout[k + 2 * m] * _twiddles[k * fstride * 2]) / _scale_factor; + scratch[2] = (Fout[k + 3 * m] * _twiddles[k * fstride * 3]) / _scale_factor; + scratch[5] = Fout[k] - scratch[1]; + + Fout[k] += scratch[1]; + scratch[3] = scratch[0] + scratch[2]; + scratch[4] = scratch[0] - scratch[2]; + scratch[4] = cpx_type(scratch[4].imag() * negative_if_inverse, + -scratch[4].real() * negative_if_inverse); + + Fout[k + 2 * m] = Fout[k] - scratch[3]; + Fout[k] += scratch[3]; + Fout[k + m] = scratch[5] + scratch[4]; + Fout[k + 3 * m] = scratch[5] - scratch[4]; + } + } + + void kf_bfly5(cpx_type *const Fout, const std::size_t fstride, const std::size_t m) const + { + cpx_type *Fout0, *Fout1, *Fout2, *Fout3, *Fout4; + cpx_type scratch[13]; + const cpx_type ya = _twiddles[fstride * m]; + const cpx_type yb = _twiddles[fstride * 2 * m]; + + Fout0 = Fout; + Fout1 = Fout0 + m; + Fout2 = Fout0 + 2 * m; + Fout3 = Fout0 + 3 * m; + Fout4 = Fout0 + 4 * m; + + for (std::size_t u = 0; u < m; ++u) + { + scratch[0] = *Fout0; + + scratch[1] = (*Fout1 * _twiddles[u * fstride]) / _scale_factor; + scratch[2] = (*Fout2 * _twiddles[2 * u * fstride]) / _scale_factor; + scratch[3] = (*Fout3 * _twiddles[3 * u * fstride]) / _scale_factor; + scratch[4] = (*Fout4 * _twiddles[4 * u * fstride]) / _scale_factor; + + scratch[7] = scratch[1] + scratch[4]; + scratch[10] = scratch[1] - scratch[4]; + scratch[8] = scratch[2] + scratch[3]; + scratch[9] = scratch[2] - scratch[3]; + + *Fout0 += scratch[7]; + *Fout0 += scratch[8]; + + scratch[5] = scratch[0] + (cpx_type( + scratch[7].real() * ya.real() + scratch[8].real() * yb.real(), + scratch[7].imag() * ya.real() + scratch[8].imag() * yb.real() ) / _scale_factor); + + scratch[6] = cpx_type( + scratch[10].imag() * ya.imag() + scratch[9].imag() * yb.imag(), + -scratch[10].real() * ya.imag() - scratch[9].real() * yb.imag() ) / _scale_factor; + + *Fout1 = scratch[5] - scratch[6]; + *Fout4 = scratch[5] + scratch[6]; + + scratch[11] = scratch[0] + (cpx_type( + scratch[7].real() * yb.real() + scratch[8].real() * ya.real(), + scratch[7].imag() * yb.real() + scratch[8].imag() * ya.real() ) / _scale_factor); + + scratch[12] = cpx_type( + -scratch[10].imag() * yb.imag() + scratch[9].imag() * ya.imag(), + scratch[10].real() * yb.imag() - scratch[9].real() * ya.imag() ) / _scale_factor; + + *Fout2 = scratch[11] + scratch[12]; + *Fout3 = scratch[11] - scratch[12]; + + ++Fout0; + ++Fout1; + ++Fout2; + ++Fout3; + ++Fout4; + } + } + + /* perform the butterfly for one stage of a mixed radix FFT */ + void kf_bfly_generic(cpx_type * const Fout, const size_t fstride, const std::size_t m, const std::size_t p) const + { + const cpx_type *twiddles = &_twiddles[0]; + cpx_type scratchbuf[p]; + + for (std::size_t u = 0; u < m; ++u) + { + std::size_t k = u; + for (std::size_t q1 = 0; q1 < p; ++q1) + { + scratchbuf[q1] = Fout[k]; + k += m; + } + + k = u; + for (std::size_t q1 = 0; q1 < p; ++q1) + { + std::size_t twidx = 0; + Fout[k] = scratchbuf[0]; + for (std::size_t q = 1; q < p; ++q) + { + twidx += fstride * k; + if (twidx >= _nfft) + twidx -= _nfft; + Fout[k] += (scratchbuf[q] * twiddles[twidx]) / _scale_factor; + } + k += m; + } + } + } +}; + +#endif