mirror of
https://github.com/mborgerding/kissfft.git
synced 2025-05-27 21:20:27 -04:00
made threadsafe
This commit is contained in:
parent
583019e074
commit
57925fd126
8
.hgignore
Normal file
8
.hgignore
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
syntax:glob
|
||||||
|
test/bm_*
|
||||||
|
test/st_*
|
||||||
|
test/tkfc_*
|
||||||
|
test/tr_*
|
||||||
|
tools/fastconv_*
|
||||||
|
tools/fastconvr_*
|
||||||
|
tools/fft_*
|
35
Makefile
35
Makefile
@ -1,8 +1,4 @@
|
|||||||
KFVER=1_2_9
|
KFVER=129
|
||||||
|
|
||||||
DISTDIR=kiss_fft_v$(KFVER)
|
|
||||||
TARBALL=kiss_fft_v$(KFVER).tar.gz
|
|
||||||
ZIPFILE=kiss_fft_v$(KFVER).zip
|
|
||||||
|
|
||||||
doc:
|
doc:
|
||||||
@echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'"
|
@echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'"
|
||||||
@ -10,36 +6,25 @@ doc:
|
|||||||
|
|
||||||
testall:
|
testall:
|
||||||
# The simd and int32_t types may or may not work on your machine
|
# The simd and int32_t types may or may not work on your machine
|
||||||
export DATATYPE=simd && cd test && make test
|
make -C test DATATYPE=simd CFLAGADD="$(CFLAGADD)" test
|
||||||
export DATATYPE=int32_t && cd test && make test
|
make -C test DATATYPE=int32_t CFLAGADD="$(CFLAGADD)" test
|
||||||
export DATATYPE=int16_t && cd test && make test
|
make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test
|
||||||
export DATATYPE=float && cd test && make test
|
make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test
|
||||||
export DATATYPE=double && cd test && make test
|
make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test
|
||||||
|
|
||||||
tarball: clean
|
tarball: clean
|
||||||
tar --exclude CVS --exclude .cvsignore --exclude $(TARBALL) -cvzf $(TARBALL) .
|
hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz
|
||||||
|
hg archive -r v$(KFVER) -t zip kiss_fft$(KFVER).zip
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
cd test && make clean
|
cd test && make clean
|
||||||
cd tools && make clean
|
cd tools && make clean
|
||||||
rm -f kiss_fft*.tar.gz *~ *.pyc kiss_fft*.zip
|
rm -f kiss_fft*.tar.gz *~ *.pyc kiss_fft*.zip
|
||||||
rm -rf $(DISTDIR)
|
|
||||||
|
|
||||||
dist: tarball
|
|
||||||
mkdir $(DISTDIR)
|
|
||||||
cd $(DISTDIR) && tar -zxf ../$(TARBALL)
|
|
||||||
rm $(TARBALL)
|
|
||||||
tar -czf $(TARBALL) $(DISTDIR)
|
|
||||||
zip -r $(ZIPFILE) $(DISTDIR)
|
|
||||||
rm -rf $(DISTDIR)
|
|
||||||
|
|
||||||
upload: dist
|
|
||||||
ncftpput upload.sourceforge.net incoming $(ZIPFILE) $(TARBALL)
|
|
||||||
|
|
||||||
asm: kiss_fft.s
|
asm: kiss_fft.s
|
||||||
|
|
||||||
kiss_fft.s: kiss_fft.c kiss_fft.h _kiss_fft_guts.h
|
kiss_fft.s: kiss_fft.c kiss_fft.h _kiss_fft_guts.h
|
||||||
[ -e kiss_fft.s ] && mv kiss_fft.s kiss_fft.s~ || true
|
[ -e kiss_fft.s ] && mv kiss_fft.s kiss_fft.s~ || true
|
||||||
gcc -S kiss_fft.c -O3 -march=pentiumpro -ffast-math -fomit-frame-pointer -dA -fverbose-asm
|
gcc -S kiss_fft.c -O3 -mtune=native -ffast-math -fomit-frame-pointer -unroll-loops -dA -fverbose-asm
|
||||||
gcc -o kiss_fft_short.s -S kiss_fft.c -O3 -march=pentiumpro -ffast-math -fomit-frame-pointer -dA -fverbose-asm -DFIXED_POINT
|
gcc -o kiss_fft_short.s -S kiss_fft.c -O3 -mtune=native -ffast-math -fomit-frame-pointer -dA -fverbose-asm -DFIXED_POINT
|
||||||
[ -e kiss_fft.s~ ] && diff kiss_fft.s~ kiss_fft.s || true
|
[ -e kiss_fft.s~ ] && diff kiss_fft.s~ kiss_fft.s || true
|
||||||
|
22
README
22
README
@ -36,8 +36,8 @@ Code definitions for 1d complex FFTs are in kiss_fft.c.
|
|||||||
You can do other cool stuff with the extras you'll find in tools/
|
You can do other cool stuff with the extras you'll find in tools/
|
||||||
|
|
||||||
* multi-dimensional FFTs
|
* multi-dimensional FFTs
|
||||||
* real-optimized FFTs
|
* real-optimized FFTs (returns the positive half-spectrum: (nfft/2+1) complex frequency bins)
|
||||||
* fast convolution FIR filtering
|
* fast convolution FIR filtering (not available for fixed point)
|
||||||
* spectrum image creation
|
* spectrum image creation
|
||||||
|
|
||||||
The core fft and most tools/ code can be compiled to use float, double
|
The core fft and most tools/ code can be compiled to use float, double
|
||||||
@ -59,7 +59,7 @@ During this process, I learned:
|
|||||||
|
|
||||||
1. FFT_BRANDX has more than 100K lines of code. The core of kiss_fft is about 500 lines (cpx 1-d).
|
1. FFT_BRANDX has more than 100K lines of code. The core of kiss_fft is about 500 lines (cpx 1-d).
|
||||||
2. It took me an embarrassingly long time to get FFT_BRANDX working.
|
2. It took me an embarrassingly long time to get FFT_BRANDX working.
|
||||||
3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB.
|
3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB (without optimizing for size).
|
||||||
4. FFT_BRANDX is roughly twice as fast as KISS FFT in default mode.
|
4. FFT_BRANDX is roughly twice as fast as KISS FFT in default mode.
|
||||||
|
|
||||||
It is wonderful that free, highly optimized libraries like FFT_BRANDX exist.
|
It is wonderful that free, highly optimized libraries like FFT_BRANDX exist.
|
||||||
@ -78,6 +78,11 @@ FREQUENTLY ASKED QUESTIONS:
|
|||||||
2) mixed build environment -- all code must be compiled with same preprocessor
|
2) mixed build environment -- all code must be compiled with same preprocessor
|
||||||
definitions for FIXED_POINT and kiss_fft_scalar
|
definitions for FIXED_POINT and kiss_fft_scalar
|
||||||
|
|
||||||
|
Q: Will you write/debug my code for me?
|
||||||
|
A: Probably not unless you pay me. I am happy to answer pointed and topical questions, but
|
||||||
|
I may refer you to a book, a forum, or some other resource.
|
||||||
|
|
||||||
|
|
||||||
PERFORMANCE:
|
PERFORMANCE:
|
||||||
(on Athlon XP 2100+, with gcc 2.96, float data type)
|
(on Athlon XP 2100+, with gcc 2.96, float data type)
|
||||||
|
|
||||||
@ -92,7 +97,10 @@ DO NOT:
|
|||||||
|
|
||||||
UNDER THE HOOD:
|
UNDER THE HOOD:
|
||||||
|
|
||||||
Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT.
|
Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT. If you give it an input buffer
|
||||||
|
and output buffer that are the same, a temporary buffer will be created to hold the data.
|
||||||
|
|
||||||
|
No static data is used. The core routines of kiss_fft are thread-safe (but not all of the tools directory).
|
||||||
|
|
||||||
No scaling is done for the floating point version (for speed).
|
No scaling is done for the floating point version (for speed).
|
||||||
Scaling is done both ways for the fixed-point version (for overflow prevention).
|
Scaling is done both ways for the fixed-point version (for overflow prevention).
|
||||||
@ -100,7 +108,8 @@ UNDER THE HOOD:
|
|||||||
Optimized butterflies are used for factors 2,3,4, and 5.
|
Optimized butterflies are used for factors 2,3,4, and 5.
|
||||||
|
|
||||||
The real (i.e. not complex) optimization code only works for even length ffts. It does two half-length
|
The real (i.e. not complex) optimization code only works for even length ffts. It does two half-length
|
||||||
FFTs in parallel (packed into real&imag), and then combines them via twiddling.
|
FFTs in parallel (packed into real&imag), and then combines them via twiddling. The result is
|
||||||
|
nfft/2+1 complex frequency bins from DC to Nyquist. If you don't know what this means, search the web.
|
||||||
|
|
||||||
The fast convolution filtering uses the overlap-scrap method, slightly
|
The fast convolution filtering uses the overlap-scrap method, slightly
|
||||||
modified to put the scrap at the tail.
|
modified to put the scrap at the tail.
|
||||||
@ -111,6 +120,9 @@ LICENSE:
|
|||||||
Note this license is compatible with GPL at one end of the spectrum and closed, commercial software at
|
Note this license is compatible with GPL at one end of the spectrum and closed, commercial software at
|
||||||
the other end. See http://www.fsf.org/licensing/licenses
|
the other end. See http://www.fsf.org/licensing/licenses
|
||||||
|
|
||||||
|
A commercial license is available which removes the requirement for attribution. Contact me for details.
|
||||||
|
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
*) Add real optimization for odd length FFTs
|
*) Add real optimization for odd length FFTs
|
||||||
*) Document/revisit the input/output fft scaling
|
*) Document/revisit the input/output fft scaling
|
||||||
|
@ -148,3 +148,17 @@ struct kiss_fft_state{
|
|||||||
/* a debugging function */
|
/* a debugging function */
|
||||||
#define pcpx(c)\
|
#define pcpx(c)\
|
||||||
fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
|
fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef KISS_FFT_USE_ALLOCA
|
||||||
|
// define this to allow use of alloca instead of malloc for temporary buffers
|
||||||
|
// Temporary buffers are used in two case:
|
||||||
|
// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
|
||||||
|
// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform.
|
||||||
|
#include <alloca.h>
|
||||||
|
#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
|
||||||
|
#define KISS_FFT_TMP_FREE(ptr)
|
||||||
|
#else
|
||||||
|
#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
|
||||||
|
#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
|
||||||
|
#endif
|
||||||
|
40
kiss_fft.c
40
kiss_fft.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2003-2004, Mark Borgerding
|
Copyright (c) 2003-2010, Mark Borgerding
|
||||||
|
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
@ -14,27 +14,10 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|||||||
|
|
||||||
|
|
||||||
#include "_kiss_fft_guts.h"
|
#include "_kiss_fft_guts.h"
|
||||||
|
|
||||||
|
|
||||||
/* The guts header contains all the multiplication and addition macros that are defined for
|
/* The guts header contains all the multiplication and addition macros that are defined for
|
||||||
fixed or floating point complex numbers. It also delares the kf_ internal functions.
|
fixed or floating point complex numbers. It also delares the kf_ internal functions.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static kiss_fft_cpx *scratchbuf=NULL;
|
|
||||||
static size_t nscratchbuf=0;
|
|
||||||
static kiss_fft_cpx *tmpbuf=NULL;
|
|
||||||
static size_t ntmpbuf=0;
|
|
||||||
|
|
||||||
#define CHECKBUF(buf,nbuf,n) \
|
|
||||||
do { \
|
|
||||||
if ( nbuf < (size_t)(n) ) {\
|
|
||||||
free(buf); \
|
|
||||||
buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
|
|
||||||
nbuf = (size_t)(n); \
|
|
||||||
} \
|
|
||||||
}while(0)
|
|
||||||
|
|
||||||
|
|
||||||
static void kf_bfly2(
|
static void kf_bfly2(
|
||||||
kiss_fft_cpx * Fout,
|
kiss_fft_cpx * Fout,
|
||||||
const size_t fstride,
|
const size_t fstride,
|
||||||
@ -225,7 +208,7 @@ static void kf_bfly_generic(
|
|||||||
kiss_fft_cpx t;
|
kiss_fft_cpx t;
|
||||||
int Norig = st->nfft;
|
int Norig = st->nfft;
|
||||||
|
|
||||||
kiss_fft_cpx * scratch = (kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx)*p);
|
kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
|
||||||
|
|
||||||
for ( u=0; u<m; ++u ) {
|
for ( u=0; u<m; ++u ) {
|
||||||
k=u;
|
k=u;
|
||||||
@ -248,7 +231,7 @@ static void kf_bfly_generic(
|
|||||||
k += m;
|
k += m;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(scratch);
|
KISS_FFT_TMP_FREE(scratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -385,14 +368,15 @@ kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
|
void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
|
||||||
{
|
{
|
||||||
if (fin == fout) {
|
if (fin == fout) {
|
||||||
CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
|
//NOTE: this is not really an in-place FFT algorithm.
|
||||||
|
//It just performs an out-of-place FFT into a temp buffer
|
||||||
|
kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
|
||||||
kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
|
kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
|
||||||
memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
|
memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
|
||||||
|
KISS_FFT_TMP_FREE(tmpbuf);
|
||||||
}else{
|
}else{
|
||||||
kf_work( fout, fin, 1,in_stride, st->factors,st );
|
kf_work( fout, fin, 1,in_stride, st->factors,st );
|
||||||
}
|
}
|
||||||
@ -404,17 +388,9 @@ void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* not really necessary to call, but if someone is doing in-place ffts, they may want to free the
|
|
||||||
buffers from CHECKBUF
|
|
||||||
*/
|
|
||||||
void kiss_fft_cleanup(void)
|
void kiss_fft_cleanup(void)
|
||||||
{
|
{
|
||||||
free(scratchbuf);
|
// nothing needed any more
|
||||||
scratchbuf = NULL;
|
|
||||||
nscratchbuf=0;
|
|
||||||
free(tmpbuf);
|
|
||||||
tmpbuf=NULL;
|
|
||||||
ntmpbuf=0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int kiss_fft_next_fast_size(int n)
|
int kiss_fft_next_fast_size(int n)
|
||||||
|
@ -27,11 +27,11 @@ extern "C" {
|
|||||||
#ifdef USE_SIMD
|
#ifdef USE_SIMD
|
||||||
# include <xmmintrin.h>
|
# include <xmmintrin.h>
|
||||||
# define kiss_fft_scalar __m128
|
# define kiss_fft_scalar __m128
|
||||||
|
|
||||||
//#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)
|
|
||||||
#define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
|
#define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
|
||||||
|
#define KISS_FFT_FREE _mm_free
|
||||||
#else
|
#else
|
||||||
#define KISS_FFT_MALLOC malloc
|
#define KISS_FFT_MALLOC malloc
|
||||||
|
#define KISS_FFT_FREE free
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,10 +5,12 @@ WARNINGS=-W -Wall -Wstrict-prototypes -Wmissing-prototypes -Waggregate-return \
|
|||||||
|
|
||||||
CFLAGS=-O3 -I.. -I../tools $(WARNINGS)
|
CFLAGS=-O3 -I.. -I../tools $(WARNINGS)
|
||||||
CFLAGS+=-ffast-math -fomit-frame-pointer
|
CFLAGS+=-ffast-math -fomit-frame-pointer
|
||||||
CFLAGS+=-march=prescott
|
#CFLAGS+=-funroll-loops
|
||||||
#CFLAGS+= -mtune=native
|
#CFLAGS+=-march=prescott
|
||||||
|
CFLAGS+= -mtune=native
|
||||||
# TIP: try adding -openmp or -fopenmp to enable OPENMP directives and use of multiple cores
|
# TIP: try adding -openmp or -fopenmp to enable OPENMP directives and use of multiple cores
|
||||||
CFLAGS+=-fopenmp
|
#CFLAGS+=-fopenmp
|
||||||
|
CFLAGS+= $(CFLAGADD)
|
||||||
|
|
||||||
|
|
||||||
ifeq "$(NFFT)" ""
|
ifeq "$(NFFT)" ""
|
||||||
|
@ -22,6 +22,11 @@ ifeq "$(TYPEFLAGS)" ""
|
|||||||
TYPEFLAGS=-Dkiss_fft_scalar=$(DATATYPE)
|
TYPEFLAGS=-Dkiss_fft_scalar=$(DATATYPE)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifneq ("$(KISS_FFT_USE_ALLOCA)","")
|
||||||
|
CFLAGS+= -DKISS_FFT_USE_ALLOCA=1
|
||||||
|
endif
|
||||||
|
CFLAGS+= $(CFLAGADD)
|
||||||
|
|
||||||
|
|
||||||
FFTUTIL=fft_$(DATATYPE)
|
FFTUTIL=fft_$(DATATYPE)
|
||||||
FASTFILT=fastconv_$(DATATYPE)
|
FASTFILT=fastconv_$(DATATYPE)
|
||||||
|
@ -20,7 +20,7 @@ struct kiss_fftr_state{
|
|||||||
kiss_fft_cpx * tmpbuf;
|
kiss_fft_cpx * tmpbuf;
|
||||||
kiss_fft_cpx * super_twiddles;
|
kiss_fft_cpx * super_twiddles;
|
||||||
#ifdef USE_SIMD
|
#ifdef USE_SIMD
|
||||||
long pad;
|
void * pad;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user