first addition of kissfft.hh the C++ template fft engine

2026-02-27 01:12:56 -05:00 · 2009-05-17 23:57:26 +00:00
parent 9dbaf860f2
commit 2b5477d54c
3 changed files with 407 additions and 0 deletions
--- a/kissfft.hh
+++ b/kissfft.hh
@ -0,0 +1,325 @@
+#ifndef KISSFFT_CLASS_HH
+#include <complex>
+#include <vector>
+
+namespace kissfft_utils {
+
+
+template <class T_twid>
+struct traits
+{
+    void fill_twiddles( std::complex<T_twid> * dst ,int nfft,bool inverse);
+
+    void prepare(
+            std::vector< std::complex<T_twid> > & dst,
+            int nfft,bool inverse, 
+            std::vector<int> & stageRadix, 
+            std::vector<int> & stageRemainder )
+    {
+        dst.resize(nfft);
+        fill_twiddles( &dst[0],nfft,inverse);
+
+        //factorize
+        //start factoring out 4's, then 2's, then 3,5,7,9,...
+        int n= nfft;
+        int p=4;
+        do {
+            while (n % p) {
+                switch (p) {
+                    case 4: p = 2; break;
+                    case 2: p = 3; break;
+                    default: p += 2; break;
+                }
+                if (p*p>n)
+                    p=n;// no more factors
+            }
+            n /= p;
+            stageRadix.push_back(p);
+            stageRemainder.push_back(n);
+        }while(n>1);
+    }
+};
+
+template <class T_twid>
+void traits<T_twid>::fill_twiddles( std::complex<T_twid> * dst ,int nfft,bool inverse)
+{
+    T_twid phinc =  (inverse?2:-2)* acos( (T_twid) -1)  / nfft;
+    for (int i=0;i<nfft;++i)
+        dst[i] = exp( std::complex<T_twid>(0,i*phinc) );
+}
+/*
+template <>
+void traits<long double>::fill_twiddles(std::complex<long double> * dst ,int nfft,bool inverse)
+{
+    long double phinc = (inverse?2:-2)*3.14159265358979323846264338327950288419716939937510L / nfft;
+    for (int i=0;i<nfft;++i)
+        dst[i] = std::complex<long double>(cosl(i*phinc),sinl(i*phinc));
+}
+*/
+}
+
+template <typename T_Data,typename T_traits=kissfft_utils::traits<T_Data> >
+class kissfft
+{
+    public:
+        typedef T_traits traits_type;
+        typedef T_Data scalar_type;
+        typedef std::complex<scalar_type> cpx_type;
+
+        kissfft(int nfft,bool inverse,const traits_type & traits=traits_type() ) 
+            :_nfft(nfft),_inverse(inverse),_traits(traits)
+        {
+            _traits.prepare(_twiddles, _nfft,_inverse ,_stageRadix, _stageRemainder);
+        }
+
+        void transform(const cpx_type * src , cpx_type * dst)
+        {
+            kf_work(0, dst, src, 1);
+        }
+
+    private:
+        void kf_work( int stage,cpx_type * Fout, const cpx_type * f, const size_t fstride)
+        {
+            int p = _stageRadix[stage];
+            int m = _stageRemainder[stage];
+            cpx_type * Fout_beg = Fout;
+            cpx_type * Fout_end = Fout + p*m;
+
+            if (m==1) {
+                do{
+                    *Fout = *f;
+                    f += fstride;
+                }while(++Fout != Fout_end );
+            }else{
+                do{
+                    // recursive call:
+                    // DFT of size m*p performed by doing
+                    // p instances of smaller DFTs of size m, 
+                    // each one takes a decimated version of the input
+                    kf_work(stage+1, Fout , f, fstride*p);
+                    f += fstride;
+                }while( (Fout += m) != Fout_end );
+            }
+
+            Fout=Fout_beg;
+
+            // recombine the p smaller DFTs 
+            switch (p) {
+                case 2: kf_bfly2(Fout,fstride,m); break;
+                case 3: kf_bfly3(Fout,fstride,m); break;
+                case 4: kf_bfly4(Fout,fstride,m); break;
+                case 5: kf_bfly5(Fout,fstride,m); break;
+                default: kf_bfly_generic(Fout,fstride,m,p); break;
+            }
+        }
+
+        // these were #define macros in the original kiss_fft
+        void C_ADD( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a+b;}
+        void C_MUL( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a*b;}
+        void C_SUB( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a-b;}
+        void C_ADDTO( cpx_type & c,const cpx_type & a) { c+=a;}
+        void C_FIXDIV( cpx_type & c,int n) {} // NO-OP for float types
+        scalar_type S_MUL( const scalar_type & a,const scalar_type & b) { return a*b;}
+        scalar_type HALF_OF( const scalar_type & a) { return a*.5;}
+        void C_MULBYSCALAR(cpx_type & c,const scalar_type & a) {c*=a;}
+
+        void kf_bfly2( cpx_type * Fout, const size_t fstride, int m)
+        {
+            cpx_type * Fout2;
+            cpx_type * tw1 = &_twiddles[0];
+            cpx_type t;
+            Fout2 = Fout + m;
+            do{
+                C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
+
+                C_MUL (t,  *Fout2 , *tw1);
+                tw1 += fstride;
+                C_SUB( *Fout2 ,  *Fout , t );
+                C_ADDTO( *Fout ,  t );
+                ++Fout2;
+                ++Fout;
+            }while (--m);
+        }
+
+        void kf_bfly4( cpx_type * Fout, const size_t fstride, const size_t m)
+        {
+            cpx_type *tw1,*tw2,*tw3;
+            cpx_type scratch[6];
+            size_t k=m;
+            const size_t m2=2*m;
+            const size_t m3=3*m;
+
+            tw3 = tw2 = tw1 = &_twiddles[0];
+
+            do {
+                C_MUL(scratch[0],Fout[m] , *tw1 );
+                C_MUL(scratch[1],Fout[m2] , *tw2 );
+                C_MUL(scratch[2],Fout[m3] , *tw3 );
+
+                C_SUB( scratch[5] , *Fout, scratch[1] );
+                C_ADDTO(*Fout, scratch[1]);
+                C_ADD( scratch[3] , scratch[0] , scratch[2] );
+                C_SUB( scratch[4] , scratch[0] , scratch[2] );
+                C_SUB( Fout[m2], *Fout, scratch[3] );
+                tw1 += fstride;
+                tw2 += fstride*2;
+                tw3 += fstride*3;
+                C_ADDTO( *Fout , scratch[3] );
+
+                if(_inverse) {
+                    Fout[m] = cpx_type( scratch[5].real() - scratch[4].imag() , scratch[5].imag() + scratch[4].real() );
+                    Fout[m3] = cpx_type( scratch[5].real() + scratch[4].imag() , scratch[5].imag() - scratch[4].real() );
+                }else{
+                    Fout[m] = cpx_type( scratch[5].real() + scratch[4].imag() , scratch[5].imag() - scratch[4].real() );
+                    Fout[m3] = cpx_type( scratch[5].real() - scratch[4].imag() , scratch[5].imag() + scratch[4].real() );
+                }
+                ++Fout;
+            }while(--k);
+        }
+
+        void kf_bfly3( cpx_type * Fout, const size_t fstride, const size_t m)
+        {
+            size_t k=m;
+            const size_t m2 = 2*m;
+            cpx_type *tw1,*tw2;
+            cpx_type scratch[5];
+            cpx_type epi3;
+            epi3 = _twiddles[fstride*m];
+
+            tw1=tw2=&_twiddles[0];
+
+            do{
+                C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
+
+                C_MUL(scratch[1],Fout[m] , *tw1);
+                C_MUL(scratch[2],Fout[m2] , *tw2);
+
+                C_ADD(scratch[3],scratch[1],scratch[2]);
+                C_SUB(scratch[0],scratch[1],scratch[2]);
+                tw1 += fstride;
+                tw2 += fstride*2;
+
+                Fout[m] = cpx_type( Fout->real() - HALF_OF(scratch[3].real() ) , Fout->imag() - HALF_OF(scratch[3].imag() ) );
+
+                C_MULBYSCALAR( scratch[0] , epi3.imag() );
+
+                C_ADDTO(*Fout,scratch[3]);
+
+                Fout[m2] = cpx_type(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
+
+                C_ADDTO( Fout[m] , cpx_type( -scratch[0].imag(),scratch[0].real() ) );
+                ++Fout;
+            }while(--k);
+        }
+
+        void kf_bfly5( cpx_type * Fout, const size_t fstride, const size_t m)
+        {
+            cpx_type *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+            int u;
+            cpx_type scratch[13];
+            cpx_type * twiddles = &_twiddles[0];
+            cpx_type *tw;
+            cpx_type ya,yb;
+            ya = twiddles[fstride*m];
+            yb = twiddles[fstride*2*m];
+
+            Fout0=Fout;
+            Fout1=Fout0+m;
+            Fout2=Fout0+2*m;
+            Fout3=Fout0+3*m;
+            Fout4=Fout0+4*m;
+
+            tw=twiddles;
+            for ( u=0; u<m; ++u ) {
+                C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+                scratch[0] = *Fout0;
+
+                C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+                C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+                C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+                C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+                C_ADD( scratch[7],scratch[1],scratch[4]);
+                C_SUB( scratch[10],scratch[1],scratch[4]);
+                C_ADD( scratch[8],scratch[2],scratch[3]);
+                C_SUB( scratch[9],scratch[2],scratch[3]);
+
+                C_ADDTO( *Fout0, scratch[7]);
+                C_ADDTO( *Fout0, scratch[8]);
+
+                scratch[5] = scratch[0] + cpx_type(
+                        S_MUL(scratch[7].real(),ya.real() ) + S_MUL(scratch[8].real() ,yb.real() ),
+                        S_MUL(scratch[7].imag(),ya.real()) + S_MUL(scratch[8].imag(),yb.real())
+                        );
+
+                scratch[6] =  cpx_type( 
+                        S_MUL(scratch[10].imag(),ya.imag()) + S_MUL(scratch[9].imag(),yb.imag()),
+                        -S_MUL(scratch[10].real(),ya.imag()) - S_MUL(scratch[9].real(),yb.imag()) 
+                        );
+
+                C_SUB(*Fout1,scratch[5],scratch[6]);
+                C_ADD(*Fout4,scratch[5],scratch[6]);
+
+                scratch[11] = scratch[0] + 
+                    cpx_type(
+                            S_MUL(scratch[7].real(),yb.real()) + S_MUL(scratch[8].real(),ya.real()),
+                            S_MUL(scratch[7].imag(),yb.real()) + S_MUL(scratch[8].imag(),ya.real())
+                            );
+
+                scratch[12] = cpx_type(
+                        -S_MUL(scratch[10].imag(),yb.imag()) + S_MUL(scratch[9].imag(),ya.imag()),
+                        S_MUL(scratch[10].real(),yb.imag()) - S_MUL(scratch[9].real(),ya.imag())
+                        );
+
+                C_ADD(*Fout2,scratch[11],scratch[12]);
+                C_SUB(*Fout3,scratch[11],scratch[12]);
+
+                ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+            }
+        }
+
+        /* perform the butterfly for one stage of a mixed radix FFT */
+        void kf_bfly_generic(
+                cpx_type * Fout,
+                const size_t fstride,
+                int m,
+                int p
+                )
+        {
+            int u,k,q1,q;
+            cpx_type * twiddles = &_twiddles[0];
+            cpx_type t;
+            int Norig = _nfft;
+            cpx_type scratchbuf[p];
+
+            for ( u=0; u<m; ++u ) {
+                k=u;
+                for ( q1=0 ; q1<p ; ++q1 ) {
+                    scratchbuf[q1] = Fout[ k  ];
+                    C_FIXDIV(scratchbuf[q1],p);
+                    k += m;
+                }
+
+                k=u;
+                for ( q1=0 ; q1<p ; ++q1 ) {
+                    int twidx=0;
+                    Fout[ k ] = scratchbuf[0];
+                    for (q=1;q<p;++q ) {
+                        twidx += fstride * k;
+                        if (twidx>=Norig) twidx-=Norig;
+                        C_MUL(t,scratchbuf[q] , twiddles[twidx] );
+                        C_ADDTO( Fout[ k ] ,t);
+                    }
+                    k += m;
+                }
+            }
+        }
+
+        int _nfft;
+        bool _inverse;
+        std::vector<cpx_type> _twiddles;
+        std::vector<int> _stageRadix;
+        std::vector<int> _stageRemainder;
+        traits_type _traits;
+};
+#endif
--- a/test/Makefile
+++ b/test/Makefile
@ -95,5 +95,11 @@ selftest.c:
 selftest_short.c:
 	./mk_test.py -s 10 12 14 > selftest_short.c

+
+CXXFLAGS=-O3 -ffast-math -fomit-frame-pointer  -I.. -I../tools -W -Wall
+testcpp: testcpp.cc ../kissfft.hh
+	$(CXX) -o $@ $(CXXFLAGS) -lm testcpp.cc
+
+
 clean:
 	rm -f *~ bm_* st_* tr_* kf_* tkfc_* ff_* ffr_* *.pyc *.pyo *.dat
--- a/test/testcpp.cc
+++ b/test/testcpp.cc
@ -0,0 +1,76 @@
+#include "kissfft.hh"
+#include <iostream>
+#include <cstdlib>
+#include <typeinfo>
+
+#include <sys/time.h>
+static inline
+double curtime(void)
+{
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (double)tv.tv_sec + (double)tv.tv_usec*.000001;
+}
+
+using namespace std;
+
+template <class T>
+void dotest(int nfft)
+{
+    typedef kissfft<T> FFT;
+    typedef std::complex<T> cpx_type;
+
+    cout << typeid(T).name() << ":nfft=" << nfft <<endl;
+
+    FFT fft(nfft,false);
+
+    vector<cpx_type> inbuf(nfft);
+    vector<cpx_type> outbuf(nfft);
+#if 0
+    for (int k=0;k<nfft;++k) 
+        inbuf[k]= cpx_type(
+               cosl(2*k* M_PIl / nfft ),
+               sinl(2*k* M_PIl / nfft ) );
+#else
+    for (int k=0;k<nfft;++k)
+        inbuf[k]= cpx_type( 
+                (T)(rand()/(double)RAND_MAX - .5),
+                (T)(rand()/(double)RAND_MAX - .5) );
+#endif
+    fft.transform( &inbuf[0] , &outbuf[0] );
+
+    long double totalpower=0;
+    long double difpower=0;
+    for (int k0=0;k0<nfft;++k0) {
+        complex<long double> acc = 0;
+        long double phinc = 2*k0* M_PIl / nfft;
+        for (int k1=0;k1<nfft;++k1) {
+            complex<long double> x(inbuf[k1].real(),inbuf[k1].imag()); 
+            acc += x * exp( complex<long double>(0,-k1*phinc) );
+        }
+        totalpower += norm(acc);
+        complex<long double> x(outbuf[k0].real(),outbuf[k0].imag()); 
+        complex<long double> dif = acc - x;
+        difpower += norm(dif);
+    }
+    cout << "RMSE:" << sqrt(difpower/totalpower) << "\t";
+
+    double t0 = curtime();
+    int nits=20e6/nfft;
+    for (int k=0;k<nits;++k) {
+        fft.transform( &inbuf[0] , &outbuf[0] );
+    }
+    double t1 = curtime();
+    cout << "MSPS:" << ( (nits*nfft)*1e-6/ (t1-t0) ) << endl;
+}
+
+int main(int argc,char ** argv)
+{
+    dotest<float>(32);
+    dotest<double>(32);
+    dotest<long double>(32);
+
+    dotest<float>(1024); dotest<double>(1024); dotest<long double>(1024);
+    dotest<float>(1800); dotest<double>(1800); dotest<long double>(1800);
+    return 0;
+}