mirror of
https://github.com/mborgerding/kissfft.git
synced 2025-05-27 21:20:27 -04:00
made threadsafe
This commit is contained in:
parent
583019e074
commit
57925fd126
8
.hgignore
Normal file
8
.hgignore
Normal file
@ -0,0 +1,8 @@
|
||||
syntax:glob
|
||||
test/bm_*
|
||||
test/st_*
|
||||
test/tkfc_*
|
||||
test/tr_*
|
||||
tools/fastconv_*
|
||||
tools/fastconvr_*
|
||||
tools/fft_*
|
35
Makefile
35
Makefile
@ -1,8 +1,4 @@
|
||||
KFVER=1_2_9
|
||||
|
||||
DISTDIR=kiss_fft_v$(KFVER)
|
||||
TARBALL=kiss_fft_v$(KFVER).tar.gz
|
||||
ZIPFILE=kiss_fft_v$(KFVER).zip
|
||||
KFVER=129
|
||||
|
||||
doc:
|
||||
@echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'"
|
||||
@ -10,36 +6,25 @@ doc:
|
||||
|
||||
testall:
|
||||
# The simd and int32_t types may or may not work on your machine
|
||||
export DATATYPE=simd && cd test && make test
|
||||
export DATATYPE=int32_t && cd test && make test
|
||||
export DATATYPE=int16_t && cd test && make test
|
||||
export DATATYPE=float && cd test && make test
|
||||
export DATATYPE=double && cd test && make test
|
||||
make -C test DATATYPE=simd CFLAGADD="$(CFLAGADD)" test
|
||||
make -C test DATATYPE=int32_t CFLAGADD="$(CFLAGADD)" test
|
||||
make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test
|
||||
make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test
|
||||
make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test
|
||||
|
||||
tarball: clean
|
||||
tar --exclude CVS --exclude .cvsignore --exclude $(TARBALL) -cvzf $(TARBALL) .
|
||||
hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz
|
||||
hg archive -r v$(KFVER) -t zip kiss_fft$(KFVER).zip
|
||||
|
||||
clean:
|
||||
cd test && make clean
|
||||
cd tools && make clean
|
||||
rm -f kiss_fft*.tar.gz *~ *.pyc kiss_fft*.zip
|
||||
rm -rf $(DISTDIR)
|
||||
|
||||
dist: tarball
|
||||
mkdir $(DISTDIR)
|
||||
cd $(DISTDIR) && tar -zxf ../$(TARBALL)
|
||||
rm $(TARBALL)
|
||||
tar -czf $(TARBALL) $(DISTDIR)
|
||||
zip -r $(ZIPFILE) $(DISTDIR)
|
||||
rm -rf $(DISTDIR)
|
||||
|
||||
upload: dist
|
||||
ncftpput upload.sourceforge.net incoming $(ZIPFILE) $(TARBALL)
|
||||
|
||||
asm: kiss_fft.s
|
||||
|
||||
kiss_fft.s: kiss_fft.c kiss_fft.h _kiss_fft_guts.h
|
||||
[ -e kiss_fft.s ] && mv kiss_fft.s kiss_fft.s~ || true
|
||||
gcc -S kiss_fft.c -O3 -march=pentiumpro -ffast-math -fomit-frame-pointer -dA -fverbose-asm
|
||||
gcc -o kiss_fft_short.s -S kiss_fft.c -O3 -march=pentiumpro -ffast-math -fomit-frame-pointer -dA -fverbose-asm -DFIXED_POINT
|
||||
gcc -S kiss_fft.c -O3 -mtune=native -ffast-math -fomit-frame-pointer -unroll-loops -dA -fverbose-asm
|
||||
gcc -o kiss_fft_short.s -S kiss_fft.c -O3 -mtune=native -ffast-math -fomit-frame-pointer -dA -fverbose-asm -DFIXED_POINT
|
||||
[ -e kiss_fft.s~ ] && diff kiss_fft.s~ kiss_fft.s || true
|
||||
|
22
README
22
README
@ -36,8 +36,8 @@ Code definitions for 1d complex FFTs are in kiss_fft.c.
|
||||
You can do other cool stuff with the extras you'll find in tools/
|
||||
|
||||
* multi-dimensional FFTs
|
||||
* real-optimized FFTs
|
||||
* fast convolution FIR filtering
|
||||
* real-optimized FFTs (returns the positive half-spectrum: (nfft/2+1) complex frequency bins)
|
||||
* fast convolution FIR filtering (not available for fixed point)
|
||||
* spectrum image creation
|
||||
|
||||
The core fft and most tools/ code can be compiled to use float, double
|
||||
@ -59,7 +59,7 @@ During this process, I learned:
|
||||
|
||||
1. FFT_BRANDX has more than 100K lines of code. The core of kiss_fft is about 500 lines (cpx 1-d).
|
||||
2. It took me an embarrassingly long time to get FFT_BRANDX working.
|
||||
3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB.
|
||||
3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB (without optimizing for size).
|
||||
4. FFT_BRANDX is roughly twice as fast as KISS FFT in default mode.
|
||||
|
||||
It is wonderful that free, highly optimized libraries like FFT_BRANDX exist.
|
||||
@ -78,6 +78,11 @@ FREQUENTLY ASKED QUESTIONS:
|
||||
2) mixed build environment -- all code must be compiled with same preprocessor
|
||||
definitions for FIXED_POINT and kiss_fft_scalar
|
||||
|
||||
Q: Will you write/debug my code for me?
|
||||
A: Probably not unless you pay me. I am happy to answer pointed and topical questions, but
|
||||
I may refer you to a book, a forum, or some other resource.
|
||||
|
||||
|
||||
PERFORMANCE:
|
||||
(on Athlon XP 2100+, with gcc 2.96, float data type)
|
||||
|
||||
@ -92,7 +97,10 @@ DO NOT:
|
||||
|
||||
UNDER THE HOOD:
|
||||
|
||||
Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT.
|
||||
Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT. If you give it an input buffer
|
||||
and output buffer that are the same, a temporary buffer will be created to hold the data.
|
||||
|
||||
No static data is used. The core routines of kiss_fft are thread-safe (but not all of the tools directory).
|
||||
|
||||
No scaling is done for the floating point version (for speed).
|
||||
Scaling is done both ways for the fixed-point version (for overflow prevention).
|
||||
@ -100,7 +108,8 @@ UNDER THE HOOD:
|
||||
Optimized butterflies are used for factors 2,3,4, and 5.
|
||||
|
||||
The real (i.e. not complex) optimization code only works for even length ffts. It does two half-length
|
||||
FFTs in parallel (packed into real&imag), and then combines them via twiddling.
|
||||
FFTs in parallel (packed into real&imag), and then combines them via twiddling. The result is
|
||||
nfft/2+1 complex frequency bins from DC to Nyquist. If you don't know what this means, search the web.
|
||||
|
||||
The fast convolution filtering uses the overlap-scrap method, slightly
|
||||
modified to put the scrap at the tail.
|
||||
@ -111,6 +120,9 @@ LICENSE:
|
||||
Note this license is compatible with GPL at one end of the spectrum and closed, commercial software at
|
||||
the other end. See http://www.fsf.org/licensing/licenses
|
||||
|
||||
A commercial license is available which removes the requirement for attribution. Contact me for details.
|
||||
|
||||
|
||||
TODO:
|
||||
*) Add real optimization for odd length FFTs
|
||||
*) Document/revisit the input/output fft scaling
|
||||
|
@ -148,3 +148,17 @@ struct kiss_fft_state{
|
||||
/* a debugging function */
|
||||
#define pcpx(c)\
|
||||
fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
|
||||
|
||||
|
||||
#ifdef KISS_FFT_USE_ALLOCA
|
||||
// define this to allow use of alloca instead of malloc for temporary buffers
|
||||
// Temporary buffers are used in two case:
|
||||
// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
|
||||
// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform.
|
||||
#include <alloca.h>
|
||||
#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
|
||||
#define KISS_FFT_TMP_FREE(ptr)
|
||||
#else
|
||||
#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
|
||||
#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
|
||||
#endif
|
||||
|
40
kiss_fft.c
40
kiss_fft.c
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2003-2004, Mark Borgerding
|
||||
Copyright (c) 2003-2010, Mark Borgerding
|
||||
|
||||
All rights reserved.
|
||||
|
||||
@ -14,27 +14,10 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
|
||||
|
||||
#include "_kiss_fft_guts.h"
|
||||
|
||||
|
||||
/* The guts header contains all the multiplication and addition macros that are defined for
|
||||
fixed or floating point complex numbers. It also delares the kf_ internal functions.
|
||||
*/
|
||||
|
||||
static kiss_fft_cpx *scratchbuf=NULL;
|
||||
static size_t nscratchbuf=0;
|
||||
static kiss_fft_cpx *tmpbuf=NULL;
|
||||
static size_t ntmpbuf=0;
|
||||
|
||||
#define CHECKBUF(buf,nbuf,n) \
|
||||
do { \
|
||||
if ( nbuf < (size_t)(n) ) {\
|
||||
free(buf); \
|
||||
buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
|
||||
nbuf = (size_t)(n); \
|
||||
} \
|
||||
}while(0)
|
||||
|
||||
|
||||
static void kf_bfly2(
|
||||
kiss_fft_cpx * Fout,
|
||||
const size_t fstride,
|
||||
@ -225,7 +208,7 @@ static void kf_bfly_generic(
|
||||
kiss_fft_cpx t;
|
||||
int Norig = st->nfft;
|
||||
|
||||
kiss_fft_cpx * scratch = (kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx)*p);
|
||||
kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
|
||||
|
||||
for ( u=0; u<m; ++u ) {
|
||||
k=u;
|
||||
@ -248,7 +231,7 @@ static void kf_bfly_generic(
|
||||
k += m;
|
||||
}
|
||||
}
|
||||
free(scratch);
|
||||
KISS_FFT_TMP_FREE(scratch);
|
||||
}
|
||||
|
||||
static
|
||||
@ -385,14 +368,15 @@ kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
|
||||
{
|
||||
if (fin == fout) {
|
||||
CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
|
||||
//NOTE: this is not really an in-place FFT algorithm.
|
||||
//It just performs an out-of-place FFT into a temp buffer
|
||||
kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
|
||||
kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
|
||||
memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
|
||||
KISS_FFT_TMP_FREE(tmpbuf);
|
||||
}else{
|
||||
kf_work( fout, fin, 1,in_stride, st->factors,st );
|
||||
}
|
||||
@ -404,17 +388,9 @@ void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
||||
}
|
||||
|
||||
|
||||
/* not really necessary to call, but if someone is doing in-place ffts, they may want to free the
|
||||
buffers from CHECKBUF
|
||||
*/
|
||||
void kiss_fft_cleanup(void)
|
||||
{
|
||||
free(scratchbuf);
|
||||
scratchbuf = NULL;
|
||||
nscratchbuf=0;
|
||||
free(tmpbuf);
|
||||
tmpbuf=NULL;
|
||||
ntmpbuf=0;
|
||||
// nothing needed any more
|
||||
}
|
||||
|
||||
int kiss_fft_next_fast_size(int n)
|
||||
|
@ -27,11 +27,11 @@ extern "C" {
|
||||
#ifdef USE_SIMD
|
||||
# include <xmmintrin.h>
|
||||
# define kiss_fft_scalar __m128
|
||||
|
||||
//#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)
|
||||
#define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
|
||||
#define KISS_FFT_FREE _mm_free
|
||||
#else
|
||||
#define KISS_FFT_MALLOC malloc
|
||||
#define KISS_FFT_FREE free
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -5,10 +5,12 @@ WARNINGS=-W -Wall -Wstrict-prototypes -Wmissing-prototypes -Waggregate-return \
|
||||
|
||||
CFLAGS=-O3 -I.. -I../tools $(WARNINGS)
|
||||
CFLAGS+=-ffast-math -fomit-frame-pointer
|
||||
CFLAGS+=-march=prescott
|
||||
#CFLAGS+= -mtune=native
|
||||
#CFLAGS+=-funroll-loops
|
||||
#CFLAGS+=-march=prescott
|
||||
CFLAGS+= -mtune=native
|
||||
# TIP: try adding -openmp or -fopenmp to enable OPENMP directives and use of multiple cores
|
||||
CFLAGS+=-fopenmp
|
||||
#CFLAGS+=-fopenmp
|
||||
CFLAGS+= $(CFLAGADD)
|
||||
|
||||
|
||||
ifeq "$(NFFT)" ""
|
||||
|
@ -22,6 +22,11 @@ ifeq "$(TYPEFLAGS)" ""
|
||||
TYPEFLAGS=-Dkiss_fft_scalar=$(DATATYPE)
|
||||
endif
|
||||
|
||||
ifneq ("$(KISS_FFT_USE_ALLOCA)","")
|
||||
CFLAGS+= -DKISS_FFT_USE_ALLOCA=1
|
||||
endif
|
||||
CFLAGS+= $(CFLAGADD)
|
||||
|
||||
|
||||
FFTUTIL=fft_$(DATATYPE)
|
||||
FASTFILT=fastconv_$(DATATYPE)
|
||||
|
@ -20,7 +20,7 @@ struct kiss_fftr_state{
|
||||
kiss_fft_cpx * tmpbuf;
|
||||
kiss_fft_cpx * super_twiddles;
|
||||
#ifdef USE_SIMD
|
||||
long pad;
|
||||
void * pad;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user