diff --git a/Makefile b/Makefile index e501c5c..d71b925 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,9 @@ KFVER=129 doc: @echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'" - @echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not" + @echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not." + @echo "It is generally unneeded to run these tests yourself, unless you plan on changing the inner workings" + @echo "of kissfft and would like to make use of its regression tests." testall: # The simd and int32_t types may or may not work on your machine @@ -11,6 +13,7 @@ testall: make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test + echo "all tests passed" tarball: clean hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz diff --git a/README.simd b/README.simd index 915541d..b0fdac5 100644 --- a/README.simd +++ b/README.simd @@ -1,4 +1,5 @@ -If you are reading this, it means you think you may be interested in using the SIMD extensions within kissfft. +If you are reading this, it means you think you may be interested in using the SIMD extensions in kissfft +to do 4 *separate* FFTs at once. Beware! Beyond here there be dragons! @@ -29,7 +30,8 @@ Search on "SIMD alignment" for more info. Robin at Divide Concept was kind enough to share his code for formatting to/from the SIMD kissfft. -I have not run it -- use it at your own risk. +I have not run it -- use it at your own risk. It appears to do 4xN and Nx4 transpositions +(out of place). void SSETools::pack128(float* target, float* source, unsigned long size128) { diff --git a/TIPS b/TIPS index cf7ac2a..6a9579d 100644 --- a/TIPS +++ b/TIPS @@ -21,7 +21,7 @@ Speed: Reducing code size: * remove some of the butterflies. There are currently butterflies optimized for radices 2,3,4,5. It is worth mentioning that you can still use FFT sizes that contain - these factors, they just won't be quite as fast. You can decide for yourself + other factors, they just won't be quite as fast. You can decide for yourself whether to keep radix 2 or 4. If you do some work in this area, let me know what you find. diff --git a/test/Makefile b/test/Makefile index 6483207..c204511 100644 --- a/test/Makefile +++ b/test/Makefile @@ -7,7 +7,7 @@ CFLAGS=-O3 -I.. -I../tools $(WARNINGS) CFLAGS+=-ffast-math -fomit-frame-pointer #CFLAGS+=-funroll-loops #CFLAGS+=-march=prescott -CFLAGS+= -mtune=native +#CFLAGS+= -mtune=native # TIP: try adding -openmp or -fopenmp to enable OPENMP directives and use of multiple cores #CFLAGS+=-fopenmp CFLAGS+= $(CFLAGADD) @@ -66,20 +66,20 @@ tools: $(SELFTEST): $(SELFTESTSRC) $(SRCFILES) - $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm $(TESTKFC): $(SRCFILES) - $(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) $+ -lm $(TESTREAL): test_real.c $(SRCFILES) - $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm $(BENCHKISS): benchkiss.c $(SRCFILES) - $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm $(BENCHFFTW): benchfftw.c pstats.c @echo "======attempting to build FFTW benchmark" - @$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ -lm $(FFTWLIB) $(FFTWLIBDIR) || echo "FFTW not available for comparison" + @$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ $(FFTWLIB) $(FFTWLIBDIR) -lm || echo "FFTW not available for comparison" test: all @./$(TESTKFC) @@ -101,7 +101,7 @@ selftest_short.c: CXXFLAGS=-O3 -ffast-math -fomit-frame-pointer -I.. -I../tools -W -Wall testcpp: testcpp.cc ../kissfft.hh - $(CXX) -o $@ $(CXXFLAGS) -lm testcpp.cc + $(CXX) -o $@ $(CXXFLAGS) testcpp.cc -lm clean: diff --git a/test/testkiss.py b/test/testkiss.py index a5f7005..af75065 100755 --- a/test/testkiss.py +++ b/test/testkiss.py @@ -7,8 +7,7 @@ import random import struct import popen2 import getopt -import Numeric -import FFT +import numpy pi=math.pi e=math.e @@ -26,7 +25,7 @@ elif datatype=='int16_t': fmt='h' minsnr=10 elif datatype=='int32_t': - fmt='l' + fmt='i' elif datatype=='simd': fmt='4f' sys.stderr.write('testkiss.py does not yet test simd') @@ -39,21 +38,21 @@ else: def dopack(x,cpx=1): - x = Numeric.reshape( x, ( Numeric.size(x),) ) + x = numpy.reshape( x, ( numpy.size(x),) ) if cpx: s = ''.join( [ struct.pack(fmt*2,c.real,c.imag) for c in x ] ) else: - s = ''.join( [ struct.pack(fmt,c) for c in x ] ) + s = ''.join( [ struct.pack(fmt,c.real) for c in x ] ) return s def dounpack(x,cpx): uf = fmt * ( len(x) / struct.calcsize(fmt) ) s = struct.unpack(uf,x) if cpx: - return Numeric.array(s[::2]) + Numeric.array( s[1::2] )*j + return numpy.array(s[::2]) + numpy.array( s[1::2] )*j else: - return Numeric.array(s ) + return numpy.array(s ) def make_random(dims=[1]): res = [] @@ -67,11 +66,11 @@ def make_random(dims=[1]): res.append( complex(r,i) ) else: res.append( make_random( dims[1:] ) ) - return Numeric.array(res) + return numpy.array(res) def flatten(x): - ntotal = Numeric.product(Numeric.shape(x)) - return Numeric.reshape(x,(ntotal,)) + ntotal = numpy.size(x) + return numpy.reshape(x,(ntotal,)) def randmat( ndims ): dims=[] @@ -85,20 +84,20 @@ def randmat( ndims ): def test_fft(ndims): x=randmat( ndims ) - print 'dimensions=%s' % str( Numeric.shape(x) ), + if doreal: - xver = FFT.real_fftnd(x) + xver = numpy.fft.rfftn(x) else: - xver = FFT.fftnd(x) + xver = numpy.fft.fftn(x) open('/tmp/fftexp.dat','w').write(dopack( flatten(xver) , True ) ) - x2=dofft(x) + x2=dofft(x,doreal) err = xver - x2 errf = flatten(err) xverf = flatten(xver) - errpow = Numeric.vdot(errf,errf)+1e-10 - sigpow = Numeric.vdot(xverf,xverf)+1e-10 + errpow = numpy.vdot(errf,errf)+1e-10 + sigpow = numpy.vdot(xverf,xverf)+1e-10 snr = 10*math.log10(abs(sigpow/errpow) ) print 'SNR (compared to NumPy) : %.1fdB' % float(snr) @@ -108,10 +107,9 @@ def test_fft(ndims): print 'err',err sys.exit(1) -def dofft(x): - dims=list( Numeric.shape(x) ) +def dofft(x,isreal): + dims=list( numpy.shape(x) ) x = flatten(x) - iscomp = (type(x[0]) == complex) scale=1 if datatype=='int16_t': @@ -126,11 +124,12 @@ def dofft(x): if doreal: cmd += ' -R ' + print cmd p = popen2.Popen3(cmd ) - open('/tmp/fftin.dat','w').write(dopack( x , iscomp ) ) + open('/tmp/fftin.dat','w').write(dopack( x , isreal==False ) ) - p.tochild.write( dopack( x , iscomp ) ) + p.tochild.write( dopack( x , isreal==False ) ) p.tochild.close() res = dounpack( p.fromchild.read() , 1 ) @@ -141,7 +140,7 @@ def dofft(x): res = scale * res p.wait() - return Numeric.reshape(res,dims) + return numpy.reshape(res,dims) def main(): opts,args = getopt.getopt(sys.argv[1:],'r') diff --git a/test/twotonetest.c b/test/twotonetest.c index 2b26283..7cc7190 100644 --- a/test/twotonetest.c +++ b/test/twotonetest.c @@ -89,6 +89,6 @@ int main(int argc,char ** argv) if (snr>maxsnr) maxsnr=snr; printf("TwoToneTest: snr ranges from %ddB to %ddB\n",(int)minsnr,(int)maxsnr); - printf("sizeof(kiss_fft_scalar) = %d\n",sizeof(kiss_fft_scalar) ); + printf("sizeof(kiss_fft_scalar) = %d\n",(int)sizeof(kiss_fft_scalar) ); return 0; } diff --git a/tools/Makefile b/tools/Makefile index bf52220..ae7646b 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -44,19 +44,19 @@ CFLAGS=-Wall -O3 $(WARNINGS) # tip: try -openmp or -fopenmp to use multiple cores $(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c - $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR -lm $+ -DFAST_FILT_UTIL + $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR $+ -DFAST_FILT_UTIL -lm $(FASTFILT): ../kiss_fft.c kiss_fastfir.c - $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL + $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -DFAST_FILT_UTIL -lm $(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c kiss_fftndr.c - $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm $(PSDPNG): ../kiss_fft.c psdpng.c kiss_fftr.c - $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm -lpng $+ + $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lpng -lm $(DUMPHDR): ../kiss_fft.c dumphdr.c - $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ + $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm clean: rm -f *~ fft fft_* fastconv fastconv_* fastconvr fastconvr_* psdpng psdpng_* diff --git a/tools/kfc.c b/tools/kfc.c index 424e119..d94d124 100644 --- a/tools/kfc.c +++ b/tools/kfc.c @@ -42,10 +42,18 @@ static kiss_fft_cfg find_cached_fft(int nfft,int inverse) if (cur== NULL) { /* no cached node found, need to create a new one*/ kiss_fft_alloc(nfft,inverse,0,&len); +#ifdef USE_SIMD + int padding = (16-sizeof(struct cached_fft)) & 15; + // make sure the cfg aligns on a 16 byte boundary + len += padding; +#endif cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len )); if (cur == NULL) return NULL; cur->cfg = (kiss_fft_cfg)(cur+1); +#ifdef USE_SIMD + cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding); +#endif kiss_fft_alloc(nfft,inverse,cur->cfg,&len); cur->nfft=nfft; cur->inverse=inverse; diff --git a/tools/kiss_fastfir.c b/tools/kiss_fastfir.c index 1c96216..4560aa3 100644 --- a/tools/kiss_fastfir.c +++ b/tools/kiss_fastfir.c @@ -362,7 +362,7 @@ void do_file_filter( n_samps_buf = 8*4096/sizeof(kffsamp_t); n_samps_buf = nfft + 4*(nfft-n_imp_resp+1); - if (verbose) fprintf(stderr,"bufsize=%d\n",sizeof(kffsamp_t)*n_samps_buf ); + if (verbose) fprintf(stderr,"bufsize=%d\n",(int)(sizeof(kffsamp_t)*n_samps_buf) ); /*allocate space and initialize pointers */ @@ -449,10 +449,12 @@ int main(int argc,char**argv) } fseek(filtfile,0,SEEK_END); nh = ftell(filtfile) / sizeof(kffsamp_t); - if (verbose) fprintf(stderr,"%d samples in FIR filter\n",nh); + if (verbose) fprintf(stderr,"%d samples in FIR filter\n",(int)nh); h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh); fseek(filtfile,0,SEEK_SET); - fread(h,sizeof(kffsamp_t),nh,filtfile); + if (fread(h,sizeof(kffsamp_t),nh,filtfile) != nh) + fprintf(stderr,"short read on filter file\n"); + fclose(filtfile); if (use_direct) diff --git a/tools/kiss_fftnd.c b/tools/kiss_fftnd.c index 22b54b0..d6c9124 100644 --- a/tools/kiss_fftnd.c +++ b/tools/kiss_fftnd.c @@ -33,8 +33,6 @@ kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*m size_t memneeded = sizeof(struct kiss_fftnd_state); char * ptr; - size_t pad = memneeded % sizeof(DATATYPE); - for (i=0;i