made threadsafe

2025-07-21 06:24:20 -04:00 · 2010-05-27 22:54:01 -04:00
parent 583019e074
commit 57925fd126
9 changed files with 70 additions and 68 deletions
--- a/.hgignore
+++ b/.hgignore
@ -0,0 +1,8 @@
 syntax:glob
 test/bm_*
 test/st_*
 test/tkfc_*
 test/tr_*
 tools/fastconv_*
 tools/fastconvr_*
 tools/fft_*
--- a/35
+++ b/35
@ -1,8 +1,4 @@
-KFVER=1_2_9
+KFVER=129
 DISTDIR=kiss_fft_v$(KFVER)
 TARBALL=kiss_fft_v$(KFVER).tar.gz
 ZIPFILE=kiss_fft_v$(KFVER).zip
 doc:
 	@echo "Start by reading the README file.  If you want to build and test lots of stuff, do a 'make testall'"
@ -10,36 +6,25 @@ doc:
 testall:
 	# The simd and int32_t types may or may not work on your machine 
-	export DATATYPE=simd && cd test && make test
+	make -C test DATATYPE=simd CFLAGADD="$(CFLAGADD)" test
-	export DATATYPE=int32_t && cd test && make test
+	make -C test DATATYPE=int32_t CFLAGADD="$(CFLAGADD)" test
-	export DATATYPE=int16_t && cd test && make test
+	make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test
-	export DATATYPE=float && cd test && make test
+	make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test
-	export DATATYPE=double && cd test && make test
+	make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test
 tarball: clean
-	tar --exclude CVS --exclude .cvsignore --exclude $(TARBALL) -cvzf $(TARBALL) .
+	hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz 
 	hg archive -r v$(KFVER) -t zip kiss_fft$(KFVER).zip
 clean:
 	cd test && make clean
 	cd tools && make clean
 	rm -f kiss_fft*.tar.gz *~ *.pyc kiss_fft*.zip 
 	rm -rf $(DISTDIR)
 dist: tarball
 	mkdir $(DISTDIR)
 	cd $(DISTDIR) && tar -zxf ../$(TARBALL)
 	rm $(TARBALL)
 	tar -czf $(TARBALL) $(DISTDIR)
 	zip -r $(ZIPFILE) $(DISTDIR)
 	rm -rf $(DISTDIR)
 upload: dist
 	ncftpput upload.sourceforge.net incoming $(ZIPFILE) $(TARBALL)
 asm: kiss_fft.s
 kiss_fft.s: kiss_fft.c kiss_fft.h _kiss_fft_guts.h
 	[ -e kiss_fft.s ] && mv kiss_fft.s kiss_fft.s~ || true
-	gcc -S kiss_fft.c -O3 -march=pentiumpro -ffast-math -fomit-frame-pointer -dA -fverbose-asm 
+	gcc -S kiss_fft.c -O3 -mtune=native -ffast-math -fomit-frame-pointer -unroll-loops -dA -fverbose-asm 
-	gcc -o kiss_fft_short.s -S kiss_fft.c -O3 -march=pentiumpro -ffast-math -fomit-frame-pointer -dA -fverbose-asm -DFIXED_POINT
+	gcc -o kiss_fft_short.s -S kiss_fft.c -O3 -mtune=native -ffast-math -fomit-frame-pointer -dA -fverbose-asm -DFIXED_POINT
 	[ -e kiss_fft.s~ ] && diff kiss_fft.s~ kiss_fft.s || true
--- a/22
+++ b/22
@ -36,8 +36,8 @@ Code definitions for 1d complex FFTs are in kiss_fft.c.
 You can do other cool stuff with the extras you'll find in tools/
    * multi-dimensional FFTs 
-    * real-optimized FFTs
+    * real-optimized FFTs  (returns the positive half-spectrum: (nfft/2+1) complex frequency bins)
-    * fast convolution FIR filtering
+    * fast convolution FIR filtering (not available for fixed point)
    * spectrum image creation
 The core fft and most tools/ code can be compiled to use float, double 
@ -59,7 +59,7 @@ During this process, I learned:
    1. FFT_BRANDX has more than 100K lines of code. The core of kiss_fft is about 500 lines (cpx 1-d).
    2. It took me an embarrassingly long time to get FFT_BRANDX working.
-    3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB.
+    3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB (without optimizing for size).
    4. FFT_BRANDX is roughly twice as fast as KISS FFT in default mode.
    It is wonderful that free, highly optimized libraries like FFT_BRANDX exist.
@ -78,6 +78,11 @@ FREQUENTLY ASKED QUESTIONS:
 		2) mixed build environment -- all code must be compiled with same preprocessor 
 		definitions for FIXED_POINT and kiss_fft_scalar
 	Q: Will you write/debug my code for me?
 	A: Probably not unless you pay me.  I am happy to answer pointed and topical questions, but 
 	I may refer you to a book, a forum, or some other resource.
 PERFORMANCE:
    (on Athlon XP 2100+, with gcc 2.96, float data type)
@ -92,7 +97,10 @@ DO NOT:
 UNDER THE HOOD:
-    Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT. 
+    Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT. If you give it an input buffer  
    and output buffer that are the same, a temporary buffer will be created to hold the data.
    No static data is used.  The core routines of kiss_fft are thread-safe (but not all of the tools directory).
    No scaling is done for the floating point version (for speed).  
    Scaling is done both ways for the fixed-point version (for overflow prevention).
@ -100,7 +108,8 @@ UNDER THE HOOD:
    Optimized butterflies are used for factors 2,3,4, and 5. 
    The real (i.e. not complex) optimization code only works for even length ffts.  It does two half-length
-    FFTs in parallel (packed into real&imag), and then combines them via twiddling.
+    FFTs in parallel (packed into real&imag), and then combines them via twiddling.  The result is 
    nfft/2+1 complex frequency bins from DC to Nyquist.  If you don't know what this means, search the web.
    The fast convolution filtering uses the overlap-scrap method, slightly 
    modified to put the scrap at the tail.
@ -111,6 +120,9 @@ LICENSE:
    Note this license is compatible with GPL at one end of the spectrum and closed, commercial software at 
    the other end.  See http://www.fsf.org/licensing/licenses
    A commercial license is available which removes the requirement for attribution.  Contact me for details.
 TODO:
    *) Add real optimization for odd length FFTs 
    *) Document/revisit the input/output fft scaling
--- a/_kiss_fft_guts.h
+++ b/_kiss_fft_guts.h
@ -148,3 +148,17 @@ struct kiss_fft_state{
 /* a debugging function */
 #define pcpx(c)\
    fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
 #ifdef KISS_FFT_USE_ALLOCA
 // define this to allow use of alloca instead of malloc for temporary buffers
 // Temporary buffers are used in two case: 
 // 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
 // 2. "in-place" FFTs.  Notice the quotes, since kissfft does not really do an in-place transform.
 #include <alloca.h>
 #define  KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
 #define  KISS_FFT_TMP_FREE(ptr) 
 #else
 #define  KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
 #define  KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
 #endif
--- a/kiss_fft.c
+++ b/kiss_fft.c
@ -1,5 +1,5 @@
 /*
-Copyright (c) 2003-2004, Mark Borgerding
+Copyright (c) 2003-2010, Mark Borgerding
 All rights reserved.
@ -14,27 +14,10 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 #include "_kiss_fft_guts.h"
 /* The guts header contains all the multiplication and addition macros that are defined for
 fixed or floating point complex numbers.  It also delares the kf_ internal functions.
 */
 static kiss_fft_cpx *scratchbuf=NULL;
 static size_t nscratchbuf=0;
 static kiss_fft_cpx *tmpbuf=NULL;
 static size_t ntmpbuf=0;
 #define CHECKBUF(buf,nbuf,n) \
    do { \
        if ( nbuf < (size_t)(n) ) {\
            free(buf); \
            buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
            nbuf = (size_t)(n); \
        } \
   }while(0)
 static void kf_bfly2(
        kiss_fft_cpx * Fout,
        const size_t fstride,
@ -225,7 +208,7 @@ static void kf_bfly_generic(
    kiss_fft_cpx t;
    int Norig = st->nfft;
-    kiss_fft_cpx * scratch = (kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx)*p);
+    kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
    for ( u=0; u<m; ++u ) {
        k=u;
@ -248,7 +231,7 @@ static void kf_bfly_generic(
            k += m;
        }
    }
-    free(scratch);
+    KISS_FFT_TMP_FREE(scratch);
 }
 static
@ -385,14 +368,15 @@ kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem
 }
 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
 {
    if (fin == fout) {
-        CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
+        //NOTE: this is not really an in-place FFT algorithm.
        //It just performs an out-of-place FFT into a temp buffer
        kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
        kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
        memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
        KISS_FFT_TMP_FREE(tmpbuf);
    }else{
        kf_work( fout, fin, 1,in_stride, st->factors,st );
    }
@ -404,17 +388,9 @@ void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
 }
 /* not really necessary to call, but if someone is doing in-place ffts, they may want to free the 
   buffers from CHECKBUF
 */ 
 void kiss_fft_cleanup(void)
 {
-    free(scratchbuf);
+    // nothing needed any more
    scratchbuf = NULL;
    nscratchbuf=0;
    free(tmpbuf);
    tmpbuf=NULL;
    ntmpbuf=0;
 }
 int kiss_fft_next_fast_size(int n)
--- a/kiss_fft.h
+++ b/kiss_fft.h
@ -27,11 +27,11 @@ extern "C" {
 #ifdef USE_SIMD
 # include <xmmintrin.h>
 # define kiss_fft_scalar __m128
 //#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)
 #define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
 #define KISS_FFT_FREE _mm_free
 #else	
 #define KISS_FFT_MALLOC malloc
 #define KISS_FFT_FREE free
 #endif	
--- a/test/Makefile
+++ b/test/Makefile
@ -5,10 +5,12 @@ WARNINGS=-W -Wall -Wstrict-prototypes -Wmissing-prototypes -Waggregate-return \
 CFLAGS=-O3 -I.. -I../tools $(WARNINGS)
 CFLAGS+=-ffast-math -fomit-frame-pointer 
-CFLAGS+=-march=prescott 
+#CFLAGS+=-funroll-loops
-#CFLAGS+= -mtune=native
+#CFLAGS+=-march=prescott 
 CFLAGS+= -mtune=native 
 # TIP: try adding -openmp or -fopenmp  to enable OPENMP directives and use of multiple cores
-CFLAGS+=-fopenmp
+#CFLAGS+=-fopenmp
 CFLAGS+= $(CFLAGADD)
 ifeq "$(NFFT)" ""
--- a/tools/Makefile
+++ b/tools/Makefile
@ -22,6 +22,11 @@ ifeq "$(TYPEFLAGS)"  ""
 TYPEFLAGS=-Dkiss_fft_scalar=$(DATATYPE)
 endif
 ifneq ("$(KISS_FFT_USE_ALLOCA)","")
 	CFLAGS+= -DKISS_FFT_USE_ALLOCA=1
 endif 
 CFLAGS+= $(CFLAGADD)
 FFTUTIL=fft_$(DATATYPE)
 FASTFILT=fastconv_$(DATATYPE)
--- a/tools/kiss_fftr.c
+++ b/tools/kiss_fftr.c
@ -20,7 +20,7 @@ struct kiss_fftr_state{
    kiss_fft_cpx * tmpbuf;
    kiss_fft_cpx * super_twiddles;
 #ifdef USE_SIMD    
-    long pad;
+    void * pad;
 #endif    
 };