From 67b6c541e246dc10a17be7238b23ae1522710ac7 Mon Sep 17 00:00:00 2001 From: Ian Daniher Date: Mon, 20 Oct 2014 23:57:18 -0400 Subject: [PATCH 01/18] add target to build libkissfft.so --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 96f43d3..aaad644 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,9 @@ KFVER=130 +all: + gcc -Wall -fPIC -c *.c -Dkiss_fft_scalar=float -o kiss_fft.o + gcc -shared -Wl,-soname,libkissfft.so -o libkissfft.so kiss_fft.o + doc: @echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'" @echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not." From dcf3d68b55e4c545952c5c79fff7b1cfb1455b36 Mon Sep 17 00:00:00 2001 From: Ian Daniher Date: Tue, 21 Oct 2014 00:06:34 -0400 Subject: [PATCH 02/18] add .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9d22eb4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +*.so From e3420f1731bb7acbe952092d4313b0b743a79944 Mon Sep 17 00:00:00 2001 From: Ian Daniher Date: Wed, 8 Apr 2015 16:59:46 -0400 Subject: [PATCH 03/18] build archive for static linking --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index aaad644..e6e499e 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ KFVER=130 all: gcc -Wall -fPIC -c *.c -Dkiss_fft_scalar=float -o kiss_fft.o + ar crus libkissfft.a kiss_fft.o gcc -shared -Wl,-soname,libkissfft.so -o libkissfft.so kiss_fft.o doc: From cf0a8088e0c27cdd137e5bcf7a71747e67548c0d Mon Sep 17 00:00:00 2001 From: Ian Daniher Date: Thu, 9 Apr 2015 21:31:37 -0400 Subject: [PATCH 04/18] add install target for shared lib --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index e6e499e..b507dcd 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,9 @@ all: ar crus libkissfft.a kiss_fft.o gcc -shared -Wl,-soname,libkissfft.so -o libkissfft.so kiss_fft.o +install: all + cp libkissfft.so /usr/local/lib/ + doc: @echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'" @echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not." From 5e87358ea849dae84b63924326eaf9ddb20d1829 Mon Sep 17 00:00:00 2001 From: Jonti Olds Date: Fri, 7 Aug 2015 08:30:42 +1200 Subject: [PATCH 05/18] missing include guard added Fixed broken include guard #define KISSFFT_CLASS_HH --- kissfft.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/kissfft.hh b/kissfft.hh index a586cb1..4f6ac92 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -1,4 +1,5 @@ #ifndef KISSFFT_CLASS_HH +#define KISSFFT_CLASS_HH #include #include From 3c96a24710253c55161b3324d26bb4f40cab4303 Mon Sep 17 00:00:00 2001 From: Ralph Tandetzky Date: Thu, 21 Apr 2016 10:42:20 +0200 Subject: [PATCH 06/18] Made member functions of kissfft class const-correct. Also made member functions static where appropriate. --- kissfft.hh | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/kissfft.hh b/kissfft.hh index 4f6ac92..3ad8554 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -49,7 +49,7 @@ struct traits std::vector _twiddles; - const cpx_type twiddle(int i) { return _twiddles[i]; } + const cpx_type twiddle(int i) const { return _twiddles[i]; } }; } @@ -70,13 +70,13 @@ class kissfft _traits.prepare(_twiddles, _nfft,_inverse ,_stageRadix, _stageRemainder); } - void transform(const cpx_type * src , cpx_type * dst) + void transform(const cpx_type * src , cpx_type * dst) const { kf_work(0, dst, src, 1,1); } private: - void kf_work( int stage,cpx_type * Fout, const cpx_type * f, size_t fstride,size_t in_stride) + void kf_work( int stage,cpx_type * Fout, const cpx_type * f, size_t fstride,size_t in_stride) const { int p = _stageRadix[stage]; int m = _stageRemainder[stage]; @@ -112,16 +112,16 @@ class kissfft } // these were #define macros in the original kiss_fft - void C_ADD( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a+b;} - void C_MUL( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a*b;} - void C_SUB( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a-b;} - void C_ADDTO( cpx_type & c,const cpx_type & a) { c+=a;} - void C_FIXDIV( cpx_type & ,int ) {} // NO-OP for float types - scalar_type S_MUL( const scalar_type & a,const scalar_type & b) { return a*b;} - scalar_type HALF_OF( const scalar_type & a) { return a*.5;} - void C_MULBYSCALAR(cpx_type & c,const scalar_type & a) {c*=a;} + static void C_ADD( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a+b;} + static void C_MUL( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a*b;} + static void C_SUB( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a-b;} + static void C_ADDTO( cpx_type & c,const cpx_type & a) { c+=a;} + static void C_FIXDIV( cpx_type & ,int ) {} // NO-OP for float types + static scalar_type S_MUL( const scalar_type & a,const scalar_type & b) { return a*b;} + static scalar_type HALF_OF( const scalar_type & a) { return a*.5;} + static void C_MULBYSCALAR(cpx_type & c,const scalar_type & a) {c*=a;} - void kf_bfly2( cpx_type * Fout, const size_t fstride, int m) + void kf_bfly2( cpx_type * Fout, const size_t fstride, int m) const { for (int k=0;k Date: Thu, 21 Apr 2016 11:41:53 +0200 Subject: [PATCH 07/18] Improved code quality. * Replaced int by std::size_t where appropriate. * Made traits member functions static. * Removed redundant data from traits class. * Made local variables const where possible. * Made mutable variables as local as possible. * Removed redundant variables. * Improved white-space formatting for readability (sparingly). --- kissfft.hh | 150 +++++++++++++++++++++++++++-------------------------- 1 file changed, 76 insertions(+), 74 deletions(-) diff --git a/kissfft.hh b/kissfft.hh index 3ad8554..a38ed73 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -10,27 +10,29 @@ struct traits { typedef T_scalar scalar_type; typedef std::complex cpx_type; - void fill_twiddles( std::complex * dst ,int nfft,bool inverse) + static void fill_twiddles( cpx_type * dst, + std::size_t nfft, + bool inverse ) { T_scalar phinc = (inverse?2:-2)* acos( (T_scalar) -1) / nfft; - for (int i=0;i(0,i*phinc) ); + for (std::size_t i=0;i > & dst, - int nfft,bool inverse, - std::vector & stageRadix, - std::vector & stageRemainder ) + static void prepare( + std::vector< cpx_type > & _twiddles, + std::size_t nfft, + bool inverse, + std::vector & stageRadix, + std::vector & stageRemainder ) { _twiddles.resize(nfft); fill_twiddles( &_twiddles[0],nfft,inverse); - dst = _twiddles; //factorize //start factoring out 4's, then 2's, then 3,5,7,9,... - int n= nfft; - int p=4; + std::size_t n= nfft; + std::size_t p=4; do { while (n % p) { switch (p) { @@ -39,17 +41,13 @@ struct traits default: p += 2; break; } if (p*p>n) - p=n;// no more factors + p = n;// no more factors } n /= p; stageRadix.push_back(p); stageRemainder.push_back(n); }while(n>1); } - std::vector _twiddles; - - - const cpx_type twiddle(int i) const { return _twiddles[i]; } }; } @@ -64,24 +62,31 @@ class kissfft typedef typename traits_type::scalar_type scalar_type; typedef typename traits_type::cpx_type cpx_type; - kissfft(int nfft,bool inverse,const traits_type & traits=traits_type() ) - :_nfft(nfft),_inverse(inverse),_traits(traits) + kissfft( std::size_t nfft, + bool inverse ) + :_nfft(nfft) + ,_inverse(inverse) { - _traits.prepare(_twiddles, _nfft,_inverse ,_stageRadix, _stageRemainder); + T_traits::prepare(_twiddles, _nfft,_inverse ,_stageRadix, _stageRemainder); } - void transform(const cpx_type * src , cpx_type * dst) const + void transform( const cpx_type * src, + cpx_type * dst ) const { kf_work(0, dst, src, 1,1); } private: - void kf_work( int stage,cpx_type * Fout, const cpx_type * f, size_t fstride,size_t in_stride) const + void kf_work( std::size_t stage, + cpx_type * Fout, + const cpx_type * f, + std::size_t fstride, + std::size_t in_stride) const { - int p = _stageRadix[stage]; - int m = _stageRemainder[stage]; - cpx_type * Fout_beg = Fout; - cpx_type * Fout_end = Fout + p*m; + const std::size_t p = _stageRadix[stage]; + const std::size_t m = _stageRemainder[stage]; + cpx_type * const Fout_beg = Fout; + cpx_type * const Fout_end = Fout + p*m; if (m==1) { do{ @@ -121,45 +126,45 @@ class kissfft static scalar_type HALF_OF( const scalar_type & a) { return a*.5;} static void C_MULBYSCALAR(cpx_type & c,const scalar_type & a) {c*=a;} - void kf_bfly2( cpx_type * Fout, const size_t fstride, int m) const + void kf_bfly2( cpx_type * Fout, const size_t fstride, std::size_t m) const { - for (int k=0;k=Norig) twidx-=Norig; + if (twidx>=_nfft) + twidx-=_nfft; + cpx_type t; C_MUL(t,scratchbuf[q] , twiddles[twidx] ); C_ADDTO( Fout[ k ] ,t); } @@ -290,11 +293,10 @@ class kissfft } } - int _nfft; + std::size_t _nfft; bool _inverse; std::vector _twiddles; - std::vector _stageRadix; - std::vector _stageRemainder; - traits_type _traits; + std::vector _stageRadix; + std::vector _stageRemainder; }; #endif From b10fb43644ae417860eb2c06297737ccd4dc9a05 Mon Sep 17 00:00:00 2001 From: Ralph Tandetzky Date: Thu, 21 Apr 2016 12:11:45 +0200 Subject: [PATCH 08/18] Removed macro-like looking private methods of kissfft class. All uses of these function were replaced by their implementation (which is mostly easier to read than the functions themselves). --- kissfft.hh | 83 ++++++++++++++++++++++-------------------------------- 1 file changed, 33 insertions(+), 50 deletions(-) diff --git a/kissfft.hh b/kissfft.hh index a38ed73..9fdded3 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -14,7 +14,7 @@ struct traits std::size_t nfft, bool inverse ) { - T_scalar phinc = (inverse?2:-2)* acos( (T_scalar) -1) / nfft; + const T_scalar phinc = (inverse?2:-2)* acos( (T_scalar) -1) / nfft; for (std::size_t i=0;ireal() - HALF_OF(scratch[3].real() ) , Fout->imag() - HALF_OF(scratch[3].imag() ) ); + Fout[m] = Fout[0] - scratch[3]*scalar_type(0.5); + scratch[0] *= epi3.imag(); - C_MULBYSCALAR( scratch[0] , epi3.imag() ); - - C_ADDTO(*Fout,scratch[3]); + Fout[0] += scratch[3]; Fout[m2] = cpx_type( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - C_ADDTO( Fout[m] , cpx_type( -scratch[0].imag(),scratch[0].real() ) ); + Fout[m] += cpx_type( -scratch[0].imag(),scratch[0].real() ); ++Fout; }while(--k); } @@ -206,48 +193,47 @@ class kissfft Fout4=Fout0+4*m; for ( std::size_t u=0; u=_nfft) twidx-=_nfft; - cpx_type t; - C_MUL(t,scratchbuf[q] , twiddles[twidx] ); - C_ADDTO( Fout[ k ] ,t); + Fout[ k ] += scratchbuf[q] * twiddles[twidx]; } k += m; } From 0395af753ecb6962bf21d87855061a6b485b677a Mon Sep 17 00:00:00 2001 From: Ralph Tandetzky Date: Thu, 21 Apr 2016 13:20:29 +0200 Subject: [PATCH 09/18] Removed the needless use of a traits class. --- kissfft.hh | 80 ++++++++++++++++++------------------------------------ 1 file changed, 27 insertions(+), 53 deletions(-) diff --git a/kissfft.hh b/kissfft.hh index 9fdded3..b098d16 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -3,71 +3,45 @@ #include #include -namespace kissfft_utils { - -template -struct traits -{ - typedef T_scalar scalar_type; - typedef std::complex cpx_type; - static void fill_twiddles( cpx_type * dst, - std::size_t nfft, - bool inverse ) - { - const T_scalar phinc = (inverse?2:-2)* acos( (T_scalar) -1) / nfft; - for (std::size_t i=0;i & _twiddles, - std::size_t nfft, - bool inverse, - std::vector & stageRadix, - std::vector & stageRemainder ) - { - _twiddles.resize(nfft); - fill_twiddles( &_twiddles[0],nfft,inverse); - - //factorize - //start factoring out 4's, then 2's, then 3,5,7,9,... - std::size_t n= nfft; - std::size_t p=4; - do { - while (n % p) { - switch (p) { - case 4: p = 2; break; - case 2: p = 3; break; - default: p += 2; break; - } - if (p*p>n) - p = n;// no more factors - } - n /= p; - stageRadix.push_back(p); - stageRemainder.push_back(n); - }while(n>1); - } -}; - -} template + typename T_Complex=std::complex > class kissfft { public: - typedef T_traits traits_type; - typedef typename traits_type::scalar_type scalar_type; - typedef typename traits_type::cpx_type cpx_type; + typedef T_Scalar scalar_type; + typedef T_Complex cpx_type; kissfft( std::size_t nfft, bool inverse ) :_nfft(nfft) ,_inverse(inverse) { - T_traits::prepare(_twiddles, _nfft,_inverse ,_stageRadix, _stageRemainder); + // fill twiddle factors + _twiddles.resize(_nfft); + const scalar_type phinc = (_inverse?2:-2)* acos( (scalar_type) -1) / _nfft; + for (std::size_t i=0;i<_nfft;++i) + _twiddles[i] = std::exp( cpx_type(0,i*phinc) ); + + //factorize + //start factoring out 4's, then 2's, then 3,5,7,9,... + std::size_t n= _nfft; + std::size_t p=4; + do { + while (n % p) { + switch (p) { + case 4: p = 2; break; + case 2: p = 3; break; + default: p += 2; break; + } + if (p*p>n) + p = n;// no more factors + } + n /= p; + _stageRadix.push_back(p); + _stageRemainder.push_back(n); + }while(n>1); } void transform( const cpx_type * src, From 1fbc2b6ab46aa17a972fde4f00d871e29be83455 Mon Sep 17 00:00:00 2001 From: Ralph Tandetzky Date: Sat, 23 Apr 2016 19:09:42 +0200 Subject: [PATCH 10/18] Added FFT from real input. Improved documentation. --- kissfft.hh | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/kissfft.hh b/kissfft.hh index b098d16..8e11e41 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -22,7 +22,7 @@ class kissfft _twiddles.resize(_nfft); const scalar_type phinc = (_inverse?2:-2)* acos( (scalar_type) -1) / _nfft; for (std::size_t i=0;i<_nfft;++i) - _twiddles[i] = std::exp( cpx_type(0,i*phinc) ); + _twiddles[i] = exp( cpx_type(0,i*phinc) ); //factorize //start factoring out 4's, then 2's, then 3,5,7,9,... @@ -44,12 +44,90 @@ class kissfft }while(n>1); } + /// Calculates the complex Discrete Fourier Transform. + /// + /// The size of the passed arrays must be passed in the constructor. + /// The sum of the squares of the absolute values in the @c dst + /// array will be @c N times the sum of the squares of the absolute + /// values in the @c src array, where @c N is the size of the array. + /// In other words, the l_2 norm of the resulting array will be + /// @c sqrt(N) times as big as the l_2 norm of the input array. + /// This is also the case when the inverse flag is set in the + /// constructor. Hence when applying the same transform twice, but with + /// the inverse flag changed the second time, then the result will + /// be equal to the original input times @c N. void transform( const cpx_type * src, cpx_type * dst ) const { kf_work(0, dst, src, 1,1); } + /// Calculates the Discrete Fourier Transform (DFT) of a real input + /// of size @c 2*N. + /// + /// The 0-th and N-th value of the DFT are real numbers. These are + /// stored in @c dst[0].real() and @c dst[1].imag() respectively. + /// The remaining DFT values up to the index N-1 are stored in + /// @c dst[1] to @c dst[N-1]. + /// The other half of the DFT values can be calculated from the + /// symmetry relation + /// @code + /// DFT(src)[2*N-k] == conj( DFT(src)[k] ); + /// @endcode + /// The same scaling factors as in @c transform() apply. + /// + /// @note For this to work, the types @c scalar_type and @c cpx_type + /// must fulfill the following requirements: + /// + /// For any object @c z of type @c cpx_type, + /// @c reinterpret_cast(z)[0] is the real part of @c z and + /// @c reinterpret_cast(z)[1] is the imaginary part of @c z. + /// For any pointer to an element of an array of @c cpx_type named @c p + /// and any valid array index @c i, @c reinterpret_cast(p)[2*i] + /// is the real part of the complex number @c p[i], and + /// @c reinterpret_cast(p)[2*i+1] is the imaginary part of the + /// complex number @c p[i]. + /// + /// Since C++11, these requirements are guaranteed to be satisfied for + /// @c scalar_types being @c float, @c double or @c long @c double + /// together with @c cpx_type being @c std::complex. + void transform_real( const scalar_type * src, + cpx_type * dst ) const + { + const std::size_t N = _nfft; + if ( N == 0 ) + return; + + // perform complex FFT + transform( reinterpret_cast(src), dst ); + + // post processing for k = 0 and k = N + dst[0] = cpx_type( dst[0].real() + dst[0].imag(), + dst[0].real() - dst[0].imag() ); + + // post processing for all the other k = 1, 2, ..., N-1 + const scalar_type pi = acos( (scalar_type) -1); + const scalar_type half_phi_inc = ( _inverse ? pi : -pi ) / N; + const cpx_type twiddle_mul = exp( cpx_type(0, half_phi_inc) ); + for ( std::size_t k = 1; 2*k < N; ++k ) + { + const cpx_type w = 0.5 * cpx_type( + dst[k].real() + dst[N-k].real(), + dst[k].imag() - dst[N-k].imag() ); + const cpx_type z = 0.5 * cpx_type( + dst[k].imag() + dst[N-k].imag(), + -dst[k].real() + dst[N-k].real() ); + const cpx_type twiddle = + k % 2 == 0 ? + _twiddles[k/2] : + _twiddles[k/2] * twiddle_mul; + dst[ k] = w + twiddle * z; + dst[N-k] = conj( w - twiddle * z ); + } + if ( N % 2 == 0 ) + dst[N/2] = conj( dst[N/2] ); + } + private: void kf_work( std::size_t stage, cpx_type * Fout, From 6e0d8bbcd2a32d8fc028734449de5fdc37b424a0 Mon Sep 17 00:00:00 2001 From: Ralph Tandetzky Date: Mon, 25 Apr 2016 10:17:41 +0200 Subject: [PATCH 11/18] Normalized identation in new code. --- kissfft.hh | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/kissfft.hh b/kissfft.hh index 8e11e41..213e4e2 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -96,7 +96,7 @@ class kissfft { const std::size_t N = _nfft; if ( N == 0 ) - return; + return; // perform complex FFT transform( reinterpret_cast(src), dst ); @@ -111,21 +111,21 @@ class kissfft const cpx_type twiddle_mul = exp( cpx_type(0, half_phi_inc) ); for ( std::size_t k = 1; 2*k < N; ++k ) { - const cpx_type w = 0.5 * cpx_type( - dst[k].real() + dst[N-k].real(), - dst[k].imag() - dst[N-k].imag() ); - const cpx_type z = 0.5 * cpx_type( - dst[k].imag() + dst[N-k].imag(), - -dst[k].real() + dst[N-k].real() ); - const cpx_type twiddle = - k % 2 == 0 ? - _twiddles[k/2] : - _twiddles[k/2] * twiddle_mul; - dst[ k] = w + twiddle * z; - dst[N-k] = conj( w - twiddle * z ); + const cpx_type w = 0.5 * cpx_type( + dst[k].real() + dst[N-k].real(), + dst[k].imag() - dst[N-k].imag() ); + const cpx_type z = 0.5 * cpx_type( + dst[k].imag() + dst[N-k].imag(), + -dst[k].real() + dst[N-k].real() ); + const cpx_type twiddle = + k % 2 == 0 ? + _twiddles[k/2] : + _twiddles[k/2] * twiddle_mul; + dst[ k] = w + twiddle * z; + dst[N-k] = conj( w - twiddle * z ); } if ( N % 2 == 0 ) - dst[N/2] = conj( dst[N/2] ); + dst[N/2] = conj( dst[N/2] ); } private: From 6a8798c4539e3d9772196bba2b87f69475467a31 Mon Sep 17 00:00:00 2001 From: Ralph Tandetzky Date: Tue, 26 Apr 2016 08:40:31 +0200 Subject: [PATCH 12/18] Added the method kissfft::assign(). --- kissfft.hh | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/kissfft.hh b/kissfft.hh index 213e4e2..9a7fb7c 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -1,6 +1,7 @@ #ifndef KISSFFT_CLASS_HH #define KISSFFT_CLASS_HH #include +#include #include @@ -44,6 +45,30 @@ class kissfft }while(n>1); } + + /// Changes the FFT-length and/or the transform direction. + /// + /// @post The @c kissfft object will be in the same state as if it + /// had been newly constructed with the passed arguments. + /// However, the implementation may be faster than constructing a + /// new fft object. + void assign( std::size_t nfft, + bool inverse ) + { + if ( nfft != _nfft ) + { + kissfft tmp( nfft, inverse ); // O(n) time. + std::swap( tmp, *this ); // this is O(1) in C++11, O(n) otherwise. + } + else if ( inverse != _inverse ) + { + // conjugate the twiddle factors. + for ( typename std::vector::iterator it = _twiddles.begin(); + it != _twiddles.end(); ++it ) + it->imag( -it->imag() ); + } + } + /// Calculates the complex Discrete Fourier Transform. /// /// The size of the passed arrays must be passed in the constructor. From 64800e61d842be23a543c7d55eb3465a9476aba9 Mon Sep 17 00:00:00 2001 From: Ralph Tandetzky Date: Tue, 20 Sep 2016 14:02:49 +0200 Subject: [PATCH 13/18] Fix: Made FFT work for T = float. Also removed some trailing spaces from line ends. --- kissfft.hh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kissfft.hh b/kissfft.hh index 9a7fb7c..3c41213 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -136,10 +136,10 @@ class kissfft const cpx_type twiddle_mul = exp( cpx_type(0, half_phi_inc) ); for ( std::size_t k = 1; 2*k < N; ++k ) { - const cpx_type w = 0.5 * cpx_type( + const cpx_type w = (scalar_type)0.5 * cpx_type( dst[k].real() + dst[N-k].real(), dst[k].imag() - dst[N-k].imag() ); - const cpx_type z = 0.5 * cpx_type( + const cpx_type z = (scalar_type)0.5 * cpx_type( dst[k].imag() + dst[N-k].imag(), -dst[k].real() + dst[N-k].real() ); const cpx_type twiddle = @@ -174,7 +174,7 @@ class kissfft do{ // recursive call: // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, + // p instances of smaller DFTs of size m, // each one takes a decimated version of the input kf_work(stage+1, Fout , f, fstride*p,in_stride); f += fstride*in_stride; @@ -183,7 +183,7 @@ class kissfft Fout=Fout_beg; - // recombine the p smaller DFTs + // recombine the p smaller DFTs switch (p) { case 2: kf_bfly2(Fout,fstride,m); break; case 3: kf_bfly3(Fout,fstride,m); break; @@ -290,7 +290,7 @@ class kissfft scratch[7].imag()*ya.real() + scratch[8].imag()*yb.real() ); - scratch[6] = cpx_type( + scratch[6] = cpx_type( scratch[10].imag()*ya.imag() + scratch[9].imag()*yb.imag(), -scratch[10].real()*ya.imag() - scratch[9].real()*yb.imag() ); @@ -298,7 +298,7 @@ class kissfft *Fout1 = scratch[5] - scratch[6]; *Fout4 = scratch[5] + scratch[6]; - scratch[11] = scratch[0] + + scratch[11] = scratch[0] + cpx_type( scratch[7].real()*yb.real() + scratch[8].real()*ya.real(), scratch[7].imag()*yb.real() + scratch[8].imag()*ya.real() From 82d7f4cb8a4a0dc1abe7e9cdb3ad0d61a2d01528 Mon Sep 17 00:00:00 2001 From: Greg Thornton Date: Thu, 3 Nov 2016 06:46:07 -0500 Subject: [PATCH 14/18] Use dylib for macOS and ignore built libs --- .gitignore | 2 ++ Makefile | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9d22eb4..fe6a9dc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *.o *.so +*.a +*.dylib \ No newline at end of file diff --git a/Makefile b/Makefile index b507dcd..d11c51a 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,15 @@ KFVER=130 +ifeq ($(shell uname -s),Darwin) + SHARED := -Wl,-install_name,libkissfft.dylib -o libkissfft.dylib +else + SHARED := -Wl,-soname,libkissfft.so -o libkissfft.so +endif + all: gcc -Wall -fPIC -c *.c -Dkiss_fft_scalar=float -o kiss_fft.o ar crus libkissfft.a kiss_fft.o - gcc -shared -Wl,-soname,libkissfft.so -o libkissfft.so kiss_fft.o + gcc -shared $(SHARED) kiss_fft.o install: all cp libkissfft.so /usr/local/lib/ From daca3f4c069c40d3da8f4ca6988b9cd7cfa4d71f Mon Sep 17 00:00:00 2001 From: orgua Date: Thu, 15 Dec 2016 22:07:35 +0100 Subject: [PATCH 15/18] fix type-system, use overload for tranform() and reorder butterfly-fn --- kissfft.hh | 235 +++++++++++++++++++++++++---------------------------- 1 file changed, 112 insertions(+), 123 deletions(-) diff --git a/kissfft.hh b/kissfft.hh index 3c41213..96fdcb3 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -5,14 +5,12 @@ #include -template - > +template class kissfft { public: - typedef T_Scalar scalar_type; - typedef T_Complex cpx_type; + + using cpx_t = std::complex; kissfft( std::size_t nfft, bool inverse ) @@ -21,9 +19,9 @@ class kissfft { // fill twiddle factors _twiddles.resize(_nfft); - const scalar_type phinc = (_inverse?2:-2)* acos( (scalar_type) -1) / _nfft; + const scalar_t phinc = (_inverse?2:-2)* acos( (scalar_t) -1) / _nfft; for (std::size_t i=0;i<_nfft;++i) - _twiddles[i] = exp( cpx_type(0,i*phinc) ); + _twiddles[i] = exp( cpx_t(0,i*phinc) ); //factorize //start factoring out 4's, then 2's, then 3,5,7,9,... @@ -43,7 +41,7 @@ class kissfft _stageRadix.push_back(p); _stageRemainder.push_back(n); }while(n>1); - } + }; /// Changes the FFT-length and/or the transform direction. @@ -63,11 +61,11 @@ class kissfft else if ( inverse != _inverse ) { // conjugate the twiddle factors. - for ( typename std::vector::iterator it = _twiddles.begin(); + for ( typename std::vector::iterator it = _twiddles.begin(); it != _twiddles.end(); ++it ) it->imag( -it->imag() ); } - } + }; /// Calculates the complex Discrete Fourier Transform. /// @@ -81,11 +79,40 @@ class kissfft /// constructor. Hence when applying the same transform twice, but with /// the inverse flag changed the second time, then the result will /// be equal to the original input times @c N. - void transform( const cpx_type * src, - cpx_type * dst ) const + void transform(const cpx_t * fft_in, cpx_t * fft_out, std::size_t stage = 0, std::size_t fstride = 1, std::size_t in_stride = 1) const { - kf_work(0, dst, src, 1,1); - } + const std::size_t p = _stageRadix[stage]; + const std::size_t m = _stageRemainder[stage]; + cpx_t * const Fout_beg = fft_out; + cpx_t * const Fout_end = fft_out + p*m; + + if (m==1) { + do{ + *fft_out = *fft_in; + fft_in += fstride*in_stride; + }while(++fft_out != Fout_end ); + }else{ + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + transform(fft_in, fft_out, stage+1, fstride*p,in_stride); + fft_in += fstride*in_stride; + }while( (fft_out += m) != Fout_end ); + } + + fft_out=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: kf_bfly2(fft_out,fstride,m); break; + case 3: kf_bfly3(fft_out,fstride,m); break; + case 4: kf_bfly4(fft_out,fstride,m); break; + case 5: kf_bfly5(fft_out,fstride,m); break; + default: kf_bfly_generic(fft_out,fstride,m,p); break; + } + }; /// Calculates the Discrete Fourier Transform (DFT) of a real input /// of size @c 2*N. @@ -101,48 +128,48 @@ class kissfft /// @endcode /// The same scaling factors as in @c transform() apply. /// - /// @note For this to work, the types @c scalar_type and @c cpx_type + /// @note For this to work, the types @c scalar_t and @c cpx_t /// must fulfill the following requirements: /// - /// For any object @c z of type @c cpx_type, - /// @c reinterpret_cast(z)[0] is the real part of @c z and - /// @c reinterpret_cast(z)[1] is the imaginary part of @c z. - /// For any pointer to an element of an array of @c cpx_type named @c p + /// For any object @c z of type @c cpx_t, + /// @c reinterpret_cast(z)[0] is the real part of @c z and + /// @c reinterpret_cast(z)[1] is the imaginary part of @c z. + /// For any pointer to an element of an array of @c cpx_t named @c p /// and any valid array index @c i, @c reinterpret_cast(p)[2*i] /// is the real part of the complex number @c p[i], and /// @c reinterpret_cast(p)[2*i+1] is the imaginary part of the /// complex number @c p[i]. /// /// Since C++11, these requirements are guaranteed to be satisfied for - /// @c scalar_types being @c float, @c double or @c long @c double - /// together with @c cpx_type being @c std::complex. - void transform_real( const scalar_type * src, - cpx_type * dst ) const + /// @c scalar_ts being @c float, @c double or @c long @c double + /// together with @c cpx_t being @c std::complex. + void transform_real( const scalar_t * src, + cpx_t * dst ) const { const std::size_t N = _nfft; if ( N == 0 ) return; // perform complex FFT - transform( reinterpret_cast(src), dst ); + transform( reinterpret_cast(src), dst ); // post processing for k = 0 and k = N - dst[0] = cpx_type( dst[0].real() + dst[0].imag(), + dst[0] = cpx_t( dst[0].real() + dst[0].imag(), dst[0].real() - dst[0].imag() ); // post processing for all the other k = 1, 2, ..., N-1 - const scalar_type pi = acos( (scalar_type) -1); - const scalar_type half_phi_inc = ( _inverse ? pi : -pi ) / N; - const cpx_type twiddle_mul = exp( cpx_type(0, half_phi_inc) ); + const scalar_t pi = acos( (scalar_t) -1); + const scalar_t half_phi_inc = ( _inverse ? pi : -pi ) / N; + const cpx_t twiddle_mul = exp( cpx_t(0, half_phi_inc) ); for ( std::size_t k = 1; 2*k < N; ++k ) { - const cpx_type w = (scalar_type)0.5 * cpx_type( + const cpx_t w = (scalar_t)0.5 * cpx_t( dst[k].real() + dst[N-k].real(), dst[k].imag() - dst[N-k].imag() ); - const cpx_type z = (scalar_type)0.5 * cpx_type( + const cpx_t z = (scalar_t)0.5 * cpx_t( dst[k].imag() + dst[N-k].imag(), -dst[k].real() + dst[N-k].real() ); - const cpx_type twiddle = + const cpx_t twiddle = k % 2 == 0 ? _twiddles[k/2] : _twiddles[k/2] * twiddle_mul; @@ -151,87 +178,26 @@ class kissfft } if ( N % 2 == 0 ) dst[N/2] = conj( dst[N/2] ); - } + }; private: - void kf_work( std::size_t stage, - cpx_type * Fout, - const cpx_type * f, - std::size_t fstride, - std::size_t in_stride) const - { - const std::size_t p = _stageRadix[stage]; - const std::size_t m = _stageRemainder[stage]; - cpx_type * const Fout_beg = Fout; - cpx_type * const Fout_end = Fout + p*m; - if (m==1) { - do{ - *Fout = *f; - f += fstride*in_stride; - }while(++Fout != Fout_end ); - }else{ - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - kf_work(stage+1, Fout , f, fstride*p,in_stride); - f += fstride*in_stride; - }while( (Fout += m) != Fout_end ); - } - - Fout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: kf_bfly2(Fout,fstride,m); break; - case 3: kf_bfly3(Fout,fstride,m); break; - case 4: kf_bfly4(Fout,fstride,m); break; - case 5: kf_bfly5(Fout,fstride,m); break; - default: kf_bfly_generic(Fout,fstride,m,p); break; - } - } - - void kf_bfly2( cpx_type * Fout, const size_t fstride, std::size_t m) const + void kf_bfly2( cpx_t * Fout, const size_t fstride, std::size_t m) const { for (std::size_t k=0;k _twiddles; - std::vector _stageRadix; - std::vector _stageRemainder; + std::size_t _nfft; + bool _inverse; + std::vector _twiddles; + std::vector _stageRadix; + std::vector _stageRemainder; }; #endif From c225efda5a2def45c1ec75f1f56fa22674dd9476 Mon Sep 17 00:00:00 2001 From: orgua Date: Thu, 15 Dec 2016 22:35:01 +0100 Subject: [PATCH 16/18] fix indentation and remove not needed semicolons --- kissfft.hh | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/kissfft.hh b/kissfft.hh index 96fdcb3..8843ca0 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -41,7 +41,7 @@ class kissfft _stageRadix.push_back(p); _stageRemainder.push_back(n); }while(n>1); - }; + } /// Changes the FFT-length and/or the transform direction. @@ -65,7 +65,7 @@ class kissfft it != _twiddles.end(); ++it ) it->imag( -it->imag() ); } - }; + } /// Calculates the complex Discrete Fourier Transform. /// @@ -112,7 +112,7 @@ class kissfft case 5: kf_bfly5(fft_out,fstride,m); break; default: kf_bfly_generic(fft_out,fstride,m,p); break; } - }; + } /// Calculates the Discrete Fourier Transform (DFT) of a real input /// of size @c 2*N. @@ -178,7 +178,7 @@ class kissfft } if ( N % 2 == 0 ) dst[N/2] = conj( dst[N/2] ); - }; + } private: @@ -189,7 +189,7 @@ class kissfft Fout[m+k] = Fout[k] - t; Fout[k] += t; } - }; + } void kf_bfly3( cpx_t * Fout, const std::size_t fstride, const std::size_t m) const { @@ -220,7 +220,7 @@ class kissfft Fout[m] += cpx_t( -scratch[0].imag(),scratch[0].real() ); ++Fout; }while(--k); - }; + } void kf_bfly4( cpx_t * Fout, const std::size_t fstride, const std::size_t m) const { @@ -243,7 +243,7 @@ class kissfft Fout[k+ m] = scratch[5] + scratch[4]; Fout[k+3*m] = scratch[5] - scratch[4]; } - }; + } void kf_bfly5( cpx_t * Fout, const std::size_t fstride, const std::size_t m) const { @@ -307,7 +307,7 @@ class kissfft ++Fout3; ++Fout4; } - }; + } /* perform the butterfly for one stage of a mixed radix FFT */ void kf_bfly_generic( @@ -340,12 +340,12 @@ class kissfft k += m; } } - }; + } - std::size_t _nfft; - bool _inverse; - std::vector _twiddles; - std::vector _stageRadix; - std::vector _stageRemainder; + std::size_t _nfft; + bool _inverse; + std::vector _twiddles; + std::vector _stageRadix; + std::vector _stageRemainder; }; #endif From 68cca025658c32f03d9ccbe17a24e2f5e1b8ab40 Mon Sep 17 00:00:00 2001 From: Ingmar Splitt Date: Fri, 6 Jan 2017 12:20:33 +0100 Subject: [PATCH 17/18] add constness --- kissfft.hh | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/kissfft.hh b/kissfft.hh index 8843ca0..afa7b78 100644 --- a/kissfft.hh +++ b/kissfft.hh @@ -12,8 +12,8 @@ class kissfft using cpx_t = std::complex; - kissfft( std::size_t nfft, - bool inverse ) + kissfft( const std::size_t nfft, + const bool inverse ) :_nfft(nfft) ,_inverse(inverse) { @@ -50,8 +50,8 @@ class kissfft /// had been newly constructed with the passed arguments. /// However, the implementation may be faster than constructing a /// new fft object. - void assign( std::size_t nfft, - bool inverse ) + void assign( const std::size_t nfft, + const bool inverse ) { if ( nfft != _nfft ) { @@ -79,7 +79,7 @@ class kissfft /// constructor. Hence when applying the same transform twice, but with /// the inverse flag changed the second time, then the result will /// be equal to the original input times @c N. - void transform(const cpx_t * fft_in, cpx_t * fft_out, std::size_t stage = 0, std::size_t fstride = 1, std::size_t in_stride = 1) const + void transform(const cpx_t * fft_in, cpx_t * fft_out, const std::size_t stage = 0, const std::size_t fstride = 1, const std::size_t in_stride = 1) const { const std::size_t p = _stageRadix[stage]; const std::size_t m = _stageRemainder[stage]; @@ -143,8 +143,8 @@ class kissfft /// Since C++11, these requirements are guaranteed to be satisfied for /// @c scalar_ts being @c float, @c double or @c long @c double /// together with @c cpx_t being @c std::complex. - void transform_real( const scalar_t * src, - cpx_t * dst ) const + void transform_real( const scalar_t * const src, + cpx_t * const dst ) const { const std::size_t N = _nfft; if ( N == 0 ) @@ -182,7 +182,7 @@ class kissfft private: - void kf_bfly2( cpx_t * Fout, const size_t fstride, std::size_t m) const + void kf_bfly2( cpx_t * Fout, const size_t fstride, const std::size_t m) const { for (std::size_t k=0;k Date: Fri, 6 Jan 2017 14:02:35 +0100 Subject: [PATCH 18/18] integer cpp-version --- kissfft_i32.hh | 304 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 304 insertions(+) create mode 100644 kissfft_i32.hh diff --git a/kissfft_i32.hh b/kissfft_i32.hh new file mode 100644 index 0000000..5871e00 --- /dev/null +++ b/kissfft_i32.hh @@ -0,0 +1,304 @@ +#ifndef KISSFFT_I32_CLASS_HH +#define KISSFFT_I32_CLASS_HH + +#include +#include +#include + +// TODO1: substitute complex (behaviour not defined for nonfloats), should be faster +// TODO2: use std:: namespace +// TODO3: make unittests for all ffts (c, cpp, i32) + +template +struct complex_s +{ + DType real; + DType imag; +}; + +class kissfft_i32 +{ +private: + + using scalar_type = int32_t; + using cpx_type = complex; + + scalar_type _scale_factor; + std::size_t _nfft; + bool _inverse; + std::vector _twiddles; + std::vector _stageRadix; + std::vector _stageRemainder; + +public: + + // scale_factor: upscale twiddle-factors otherwise they lie between 0..1 (out of range for integer) --> fixed point math + kissfft_i32(const std::size_t nfft, const bool inverse, const double scale_factor = 1024.0) + : _scale_factor(scalar_type(scale_factor)), _nfft(nfft), _inverse(inverse) + { + // fill twiddle factors + _twiddles.resize(_nfft); + const double phinc = (_inverse ? 2 : -2) * acos(-1.0) / _nfft; + for (std::size_t i = 0; i < _nfft; ++i) + { + _twiddles[i] = scale_factor * exp(complex(0, i * phinc)); + } + //factorize + //start factoring out 4's, then 2's, then 3,5,7,9,... + std::size_t n = _nfft; + std::size_t p = 4; + do + { + while (n % p) + { + switch (p) + { + case 4: + p = 2; + break; + case 2: + p = 3; + break; + default: + p += 2; + break; + } + if (p * p > n) p = n;// no more factors + } + n /= p; + _stageRadix.push_back(p); + _stageRemainder.push_back(n); + } while (n > 1); + } + + /// Calculates the complex Discrete Fourier Transform. + /// + /// The size of the passed arrays must be passed in the constructor. + /// The sum of the squares of the absolute values in the @c dst + /// array will be @c N times the sum of the squares of the absolute + /// values in the @c src array, where @c N is the size of the array. + /// In other words, the l_2 norm of the resulting array will be + /// @c sqrt(N) times as big as the l_2 norm of the input array. + /// This is also the case when the inverse flag is set in the + /// constructor. Hence when applying the same transform twice, but with + /// the inverse flag changed the second time, then the result will + /// be equal to the original input times @c N. + void transform(const cpx_type * FSrc, + cpx_type * FDst, + const std::size_t stage = 0, + const std::size_t fstride = 1, + const std::size_t in_stride = 1) const + { + const std::size_t p = _stageRadix[stage]; + const std::size_t m = _stageRemainder[stage]; + cpx_type *const Fout_beg = FDst; + cpx_type *const Fout_end = FDst + p * m; + + if (m == 1) + { + do + { + *FDst = *FSrc; + FSrc += fstride * in_stride; + } while (++FDst != Fout_end); + } + else + { + do + { + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + transform(FSrc, FDst, stage + 1, fstride * p, in_stride); + FSrc += fstride * in_stride; + } while ((FDst += m) != Fout_end); + } + + FDst = Fout_beg; + + // recombine the p smaller DFTs + switch (p) + { + case 2: + kf_bfly2(FDst, fstride, m); + break; + case 3: + kf_bfly3(FDst, fstride, m); + break; + case 4: + kf_bfly4(FDst, fstride, m); + break; + case 5: + kf_bfly5(FDst, fstride, m); + break; + default: + kf_bfly_generic(FDst, fstride, m, p); + break; + } + } + +private: + + void kf_bfly2(cpx_type *const Fout, const size_t fstride, const std::size_t m) const + { + for (std::size_t k = 0; k < m; ++k) + { + const cpx_type t = (Fout[m + k] * _twiddles[k * fstride]) / _scale_factor; + Fout[m + k] = Fout[k] - t; + Fout[k] += t; + } + } + + void kf_bfly3(cpx_type *Fout, const std::size_t fstride, const std::size_t m) const + { + std::size_t k = m; + const std::size_t m2 = 2 * m; + const cpx_type *tw1, *tw2; + cpx_type scratch[5]; + const cpx_type epi3 = _twiddles[fstride * m]; + + tw1 = tw2 = &_twiddles[0]; + + do + { + scratch[1] = (Fout[m] * *tw1) / _scale_factor; + scratch[2] = (Fout[m2] * *tw2) / _scale_factor; + + scratch[3] = scratch[1] + scratch[2]; + scratch[0] = scratch[1] - scratch[2]; + tw1 += fstride; + tw2 += fstride * 2; + + Fout[m] = Fout[0] - (scratch[3] / 2); + scratch[0] *= epi3.imag(); + scratch[0] /= _scale_factor; + + Fout[0] += scratch[3]; + + Fout[m2] = cpx_type(Fout[m].real() + scratch[0].imag(), Fout[m].imag() - scratch[0].real()); + + Fout[m] += cpx_type(-scratch[0].imag(), scratch[0].real()); + ++Fout; + } while (--k); + } + + void kf_bfly4(cpx_type *const Fout, const std::size_t fstride, const std::size_t m) const + { + cpx_type scratch[7]; + const scalar_type negative_if_inverse = _inverse ? -1 : +1; + + for (std::size_t k = 0; k < m; ++k) + { + scratch[0] = (Fout[k + m] * _twiddles[k * fstride]) / _scale_factor; + scratch[1] = (Fout[k + 2 * m] * _twiddles[k * fstride * 2]) / _scale_factor; + scratch[2] = (Fout[k + 3 * m] * _twiddles[k * fstride * 3]) / _scale_factor; + scratch[5] = Fout[k] - scratch[1]; + + Fout[k] += scratch[1]; + scratch[3] = scratch[0] + scratch[2]; + scratch[4] = scratch[0] - scratch[2]; + scratch[4] = cpx_type(scratch[4].imag() * negative_if_inverse, + -scratch[4].real() * negative_if_inverse); + + Fout[k + 2 * m] = Fout[k] - scratch[3]; + Fout[k] += scratch[3]; + Fout[k + m] = scratch[5] + scratch[4]; + Fout[k + 3 * m] = scratch[5] - scratch[4]; + } + } + + void kf_bfly5(cpx_type *const Fout, const std::size_t fstride, const std::size_t m) const + { + cpx_type *Fout0, *Fout1, *Fout2, *Fout3, *Fout4; + cpx_type scratch[13]; + const cpx_type ya = _twiddles[fstride * m]; + const cpx_type yb = _twiddles[fstride * 2 * m]; + + Fout0 = Fout; + Fout1 = Fout0 + m; + Fout2 = Fout0 + 2 * m; + Fout3 = Fout0 + 3 * m; + Fout4 = Fout0 + 4 * m; + + for (std::size_t u = 0; u < m; ++u) + { + scratch[0] = *Fout0; + + scratch[1] = (*Fout1 * _twiddles[u * fstride]) / _scale_factor; + scratch[2] = (*Fout2 * _twiddles[2 * u * fstride]) / _scale_factor; + scratch[3] = (*Fout3 * _twiddles[3 * u * fstride]) / _scale_factor; + scratch[4] = (*Fout4 * _twiddles[4 * u * fstride]) / _scale_factor; + + scratch[7] = scratch[1] + scratch[4]; + scratch[10] = scratch[1] - scratch[4]; + scratch[8] = scratch[2] + scratch[3]; + scratch[9] = scratch[2] - scratch[3]; + + *Fout0 += scratch[7]; + *Fout0 += scratch[8]; + + scratch[5] = scratch[0] + (cpx_type( + scratch[7].real() * ya.real() + scratch[8].real() * yb.real(), + scratch[7].imag() * ya.real() + scratch[8].imag() * yb.real() ) / _scale_factor); + + scratch[6] = cpx_type( + scratch[10].imag() * ya.imag() + scratch[9].imag() * yb.imag(), + -scratch[10].real() * ya.imag() - scratch[9].real() * yb.imag() ) / _scale_factor; + + *Fout1 = scratch[5] - scratch[6]; + *Fout4 = scratch[5] + scratch[6]; + + scratch[11] = scratch[0] + (cpx_type( + scratch[7].real() * yb.real() + scratch[8].real() * ya.real(), + scratch[7].imag() * yb.real() + scratch[8].imag() * ya.real() ) / _scale_factor); + + scratch[12] = cpx_type( + -scratch[10].imag() * yb.imag() + scratch[9].imag() * ya.imag(), + scratch[10].real() * yb.imag() - scratch[9].real() * ya.imag() ) / _scale_factor; + + *Fout2 = scratch[11] + scratch[12]; + *Fout3 = scratch[11] - scratch[12]; + + ++Fout0; + ++Fout1; + ++Fout2; + ++Fout3; + ++Fout4; + } + } + + /* perform the butterfly for one stage of a mixed radix FFT */ + void kf_bfly_generic(cpx_type * const Fout, const size_t fstride, const std::size_t m, const std::size_t p) const + { + const cpx_type *twiddles = &_twiddles[0]; + cpx_type scratchbuf[p]; + + for (std::size_t u = 0; u < m; ++u) + { + std::size_t k = u; + for (std::size_t q1 = 0; q1 < p; ++q1) + { + scratchbuf[q1] = Fout[k]; + k += m; + } + + k = u; + for (std::size_t q1 = 0; q1 < p; ++q1) + { + std::size_t twidx = 0; + Fout[k] = scratchbuf[0]; + for (std::size_t q = 1; q < p; ++q) + { + twidx += fstride * k; + if (twidx >= _nfft) + twidx -= _nfft; + Fout[k] += (scratchbuf[q] * twiddles[twidx]) / _scale_factor; + } + k += m; + } + } + } +}; + +#endif