From e2baa5e270959ffe8eb0163d4e4a2ca2a340020a Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 26 Mar 2011 21:25:10 -0400 Subject: [PATCH 1/5] fixed alignment issue with SIMD --- tools/kfc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/kfc.c b/tools/kfc.c index 424e119..d94d124 100644 --- a/tools/kfc.c +++ b/tools/kfc.c @@ -42,10 +42,18 @@ static kiss_fft_cfg find_cached_fft(int nfft,int inverse) if (cur== NULL) { /* no cached node found, need to create a new one*/ kiss_fft_alloc(nfft,inverse,0,&len); +#ifdef USE_SIMD + int padding = (16-sizeof(struct cached_fft)) & 15; + // make sure the cfg aligns on a 16 byte boundary + len += padding; +#endif cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len )); if (cur == NULL) return NULL; cur->cfg = (kiss_fft_cfg)(cur+1); +#ifdef USE_SIMD + cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding); +#endif kiss_fft_alloc(nfft,inverse,cur->cfg,&len); cur->nfft=nfft; cur->inverse=inverse; From dc6bfad0ab680eb7436adb20dc34649646f54090 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sun, 15 Jul 2012 22:35:28 -0400 Subject: [PATCH 2/5] previous gcc versions might've been silently adding -lm at the end of the link line. --- test/Makefile | 12 ++++++------ tools/Makefile | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/test/Makefile b/test/Makefile index 6483207..ac839ad 100644 --- a/test/Makefile +++ b/test/Makefile @@ -66,20 +66,20 @@ tools: $(SELFTEST): $(SELFTESTSRC) $(SRCFILES) - $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm $(TESTKFC): $(SRCFILES) - $(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) $+ -lm $(TESTREAL): test_real.c $(SRCFILES) - $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm $(BENCHKISS): benchkiss.c $(SRCFILES) - $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm $(BENCHFFTW): benchfftw.c pstats.c @echo "======attempting to build FFTW benchmark" - @$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ -lm $(FFTWLIB) $(FFTWLIBDIR) || echo "FFTW not available for comparison" + @$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ $(FFTWLIB) $(FFTWLIBDIR) -lm || echo "FFTW not available for comparison" test: all @./$(TESTKFC) @@ -101,7 +101,7 @@ selftest_short.c: CXXFLAGS=-O3 -ffast-math -fomit-frame-pointer -I.. -I../tools -W -Wall testcpp: testcpp.cc ../kissfft.hh - $(CXX) -o $@ $(CXXFLAGS) -lm testcpp.cc + $(CXX) -o $@ $(CXXFLAGS) testcpp.cc -lm clean: diff --git a/tools/Makefile b/tools/Makefile index bf52220..ae7646b 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -44,19 +44,19 @@ CFLAGS=-Wall -O3 $(WARNINGS) # tip: try -openmp or -fopenmp to use multiple cores $(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c - $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR -lm $+ -DFAST_FILT_UTIL + $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR $+ -DFAST_FILT_UTIL -lm $(FASTFILT): ../kiss_fft.c kiss_fastfir.c - $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL + $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -DFAST_FILT_UTIL -lm $(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c kiss_fftndr.c - $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm $(PSDPNG): ../kiss_fft.c psdpng.c kiss_fftr.c - $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm -lpng $+ + $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lpng -lm $(DUMPHDR): ../kiss_fft.c dumphdr.c - $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm clean: rm -f *~ fft fft_* fastconv fastconv_* fastconvr fastconvr_* psdpng psdpng_* From 0c1d22a9747dccfe1082ececb667cf92f555111b Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sun, 15 Jul 2012 22:36:18 -0400 Subject: [PATCH 3/5] minor documentation tweaks --- Makefile | 4 +++- README.simd | 6 ++++-- TIPS | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index e501c5c..dd31333 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,9 @@ KFVER=129 doc: @echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'" - @echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not" + @echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not." + @echo "It is generally unneeded to run these tests yourself, unless you plan on changing the inner workings" + @echo "of kissfft and would like to make use of its regression tests." testall: # The simd and int32_t types may or may not work on your machine diff --git a/README.simd b/README.simd index 915541d..b0fdac5 100644 --- a/README.simd +++ b/README.simd @@ -1,4 +1,5 @@ -If you are reading this, it means you think you may be interested in using the SIMD extensions within kissfft. +If you are reading this, it means you think you may be interested in using the SIMD extensions in kissfft +to do 4 *separate* FFTs at once. Beware! Beyond here there be dragons! @@ -29,7 +30,8 @@ Search on "SIMD alignment" for more info. Robin at Divide Concept was kind enough to share his code for formatting to/from the SIMD kissfft. -I have not run it -- use it at your own risk. +I have not run it -- use it at your own risk. It appears to do 4xN and Nx4 transpositions +(out of place). void SSETools::pack128(float* target, float* source, unsigned long size128) { diff --git a/TIPS b/TIPS index cf7ac2a..6a9579d 100644 --- a/TIPS +++ b/TIPS @@ -21,7 +21,7 @@ Speed: Reducing code size: * remove some of the butterflies. There are currently butterflies optimized for radices 2,3,4,5. It is worth mentioning that you can still use FFT sizes that contain - these factors, they just won't be quite as fast. You can decide for yourself + other factors, they just won't be quite as fast. You can decide for yourself whether to keep radix 2 or 4. If you do some work in this area, let me know what you find. From 8fedba4d91ece59375e7f8cc8746564e0f0c3d26 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 17 Jul 2012 23:30:31 -0400 Subject: [PATCH 4/5] fixed warnings about ignored return value and wrong format code in printf --- tools/kiss_fastfir.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/kiss_fastfir.c b/tools/kiss_fastfir.c index 1c96216..4560aa3 100644 --- a/tools/kiss_fastfir.c +++ b/tools/kiss_fastfir.c @@ -362,7 +362,7 @@ void do_file_filter( n_samps_buf = 8*4096/sizeof(kffsamp_t); n_samps_buf = nfft + 4*(nfft-n_imp_resp+1); - if (verbose) fprintf(stderr,"bufsize=%d\n",sizeof(kffsamp_t)*n_samps_buf ); + if (verbose) fprintf(stderr,"bufsize=%d\n",(int)(sizeof(kffsamp_t)*n_samps_buf) ); /*allocate space and initialize pointers */ @@ -449,10 +449,12 @@ int main(int argc,char**argv) } fseek(filtfile,0,SEEK_END); nh = ftell(filtfile) / sizeof(kffsamp_t); - if (verbose) fprintf(stderr,"%d samples in FIR filter\n",nh); + if (verbose) fprintf(stderr,"%d samples in FIR filter\n",(int)nh); h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh); fseek(filtfile,0,SEEK_SET); - fread(h,sizeof(kffsamp_t),nh,filtfile); + if (fread(h,sizeof(kffsamp_t),nh,filtfile) != nh) + fprintf(stderr,"short read on filter file\n"); + fclose(filtfile); if (use_direct) From 4faaa8307503fa769ed749e398711910c551ff97 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 17 Jul 2012 23:31:00 -0400 Subject: [PATCH 5/5] converted testkiss.py from Numeric to numpy --- Makefile | 1 + test/testkiss.py | 36 ++++++++++++++++++------------------ 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index dd31333..d71b925 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ testall: make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test + echo "all tests passed" tarball: clean hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz diff --git a/test/testkiss.py b/test/testkiss.py index a5f7005..5749c7c 100755 --- a/test/testkiss.py +++ b/test/testkiss.py @@ -7,8 +7,7 @@ import random import struct import popen2 import getopt -import Numeric -import FFT +import numpy pi=math.pi e=math.e @@ -26,7 +25,7 @@ elif datatype=='int16_t': fmt='h' minsnr=10 elif datatype=='int32_t': - fmt='l' + fmt='i' elif datatype=='simd': fmt='4f' sys.stderr.write('testkiss.py does not yet test simd') @@ -39,21 +38,21 @@ else: def dopack(x,cpx=1): - x = Numeric.reshape( x, ( Numeric.size(x),) ) + x = numpy.reshape( x, ( numpy.size(x),) ) if cpx: s = ''.join( [ struct.pack(fmt*2,c.real,c.imag) for c in x ] ) else: - s = ''.join( [ struct.pack(fmt,c) for c in x ] ) + s = ''.join( [ struct.pack(fmt,c.real) for c in x ] ) return s def dounpack(x,cpx): uf = fmt * ( len(x) / struct.calcsize(fmt) ) s = struct.unpack(uf,x) if cpx: - return Numeric.array(s[::2]) + Numeric.array( s[1::2] )*j + return numpy.array(s[::2]) + numpy.array( s[1::2] )*j else: - return Numeric.array(s ) + return numpy.array(s ) def make_random(dims=[1]): res = [] @@ -67,11 +66,11 @@ def make_random(dims=[1]): res.append( complex(r,i) ) else: res.append( make_random( dims[1:] ) ) - return Numeric.array(res) + return numpy.array(res) def flatten(x): - ntotal = Numeric.product(Numeric.shape(x)) - return Numeric.reshape(x,(ntotal,)) + ntotal = numpy.size(x) + return numpy.reshape(x,(ntotal,)) def randmat( ndims ): dims=[] @@ -85,11 +84,11 @@ def randmat( ndims ): def test_fft(ndims): x=randmat( ndims ) - print 'dimensions=%s' % str( Numeric.shape(x) ), + if doreal: - xver = FFT.real_fftnd(x) + xver = numpy.fft.rfftn(x) else: - xver = FFT.fftnd(x) + xver = numpy.fft.fftn(x) open('/tmp/fftexp.dat','w').write(dopack( flatten(xver) , True ) ) @@ -97,8 +96,8 @@ def test_fft(ndims): err = xver - x2 errf = flatten(err) xverf = flatten(xver) - errpow = Numeric.vdot(errf,errf)+1e-10 - sigpow = Numeric.vdot(xverf,xverf)+1e-10 + errpow = numpy.vdot(errf,errf)+1e-10 + sigpow = numpy.vdot(xverf,xverf)+1e-10 snr = 10*math.log10(abs(sigpow/errpow) ) print 'SNR (compared to NumPy) : %.1fdB' % float(snr) @@ -109,9 +108,9 @@ def test_fft(ndims): sys.exit(1) def dofft(x): - dims=list( Numeric.shape(x) ) + dims=list( numpy.shape(x) ) x = flatten(x) - iscomp = (type(x[0]) == complex) + iscomp = (all(x.conj()==x)==False) scale=1 if datatype=='int16_t': @@ -126,6 +125,7 @@ def dofft(x): if doreal: cmd += ' -R ' + print cmd p = popen2.Popen3(cmd ) open('/tmp/fftin.dat','w').write(dopack( x , iscomp ) ) @@ -141,7 +141,7 @@ def dofft(x): res = scale * res p.wait() - return Numeric.reshape(res,dims) + return numpy.reshape(res,dims) def main(): opts,args = getopt.getopt(sys.argv[1:],'r')