merge and fixes for RedHat 5.5 gcc 64bit

2025-07-18 13:04:19 -04:00 · 2012-07-18 00:19:37 -04:00
parent d9906a3efb 4faaa83075
commit 8a01c6085d
10 changed files with 56 additions and 44 deletions
--- a/5
+++ b/5
@ -2,7 +2,9 @@ KFVER=129

 doc:
 	@echo "Start by reading the README file.  If you want to build and test lots of stuff, do a 'make testall'"
-	@echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not"
+	@echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not."
+	@echo "It is generally unneeded to run these tests yourself, unless you plan on changing the inner workings"
+	@echo "of kissfft and would like to make use of its regression tests."

 testall:
 	# The simd and int32_t types may or may not work on your machine 
@ -11,6 +13,7 @@ testall:
 	make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test
 	make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test
 	make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test
+	echo "all tests passed"

 tarball: clean
 	hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz 
--- a/README.simd
+++ b/README.simd
@ -1,4 +1,5 @@
-If you are reading this, it means you think you may be interested in using the SIMD extensions within kissfft.
+If you are reading this, it means you think you may be interested in using the SIMD extensions in kissfft 
+to do 4 *separate* FFTs at once.

 Beware! Beyond here there be dragons!

@ -29,7 +30,8 @@ Search on "SIMD alignment" for more info.


 Robin at Divide Concept was kind enough to share his code for formatting to/from the SIMD kissfft.  
-I have not run it -- use it at your own risk.
+I have not run it -- use it at your own risk.  It appears to do 4xN and Nx4 transpositions 
+(out of place).

 void SSETools::pack128(float* target, float* source, unsigned long size128)
 {
--- a/2
+++ b/2
@ -21,7 +21,7 @@ Speed:
 Reducing code size:
    * remove some of the butterflies. There are currently butterflies optimized for radices
        2,3,4,5.  It is worth mentioning that you can still use FFT sizes that contain 
-        these factors, they just won't be quite as fast.  You can decide for yourself 
+        other factors, they just won't be quite as fast.  You can decide for yourself 
        whether to keep radix 2 or 4.  If you do some work in this area, let me 
        know what you find.

--- a/test/Makefile
+++ b/test/Makefile
@ -7,7 +7,7 @@ CFLAGS=-O3 -I.. -I../tools $(WARNINGS)
 CFLAGS+=-ffast-math -fomit-frame-pointer 
 #CFLAGS+=-funroll-loops
 #CFLAGS+=-march=prescott 
-CFLAGS+= -mtune=native 
+#CFLAGS+= -mtune=native 
 # TIP: try adding -openmp or -fopenmp  to enable OPENMP directives and use of multiple cores
 #CFLAGS+=-fopenmp
 CFLAGS+= $(CFLAGADD)
@ -66,20 +66,20 @@ tools:


 $(SELFTEST): $(SELFTESTSRC) $(SRCFILES)
-	$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
+	$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm 

 $(TESTKFC): $(SRCFILES) 
-	$(CC) -o $@ $(CFLAGS)  -DKFC_TEST $(TYPEFLAGS) -lm $+
+	$(CC) -o $@ $(CFLAGS)  -DKFC_TEST $(TYPEFLAGS) $+ -lm
 	
 $(TESTREAL): test_real.c $(SRCFILES)
-	$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
+	$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm

 $(BENCHKISS): benchkiss.c $(SRCFILES)
-	$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm  $+
+	$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS)  $+ -lm

 $(BENCHFFTW): benchfftw.c pstats.c
 	@echo "======attempting to build FFTW benchmark"
-	@$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ -lm $(FFTWLIB) $(FFTWLIBDIR) || echo "FFTW not available for comparison"
+	@$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ $(FFTWLIB) $(FFTWLIBDIR) -lm || echo "FFTW not available for comparison"

 test: all
 	@./$(TESTKFC)
@ -101,7 +101,7 @@ selftest_short.c:

 CXXFLAGS=-O3 -ffast-math -fomit-frame-pointer  -I.. -I../tools -W -Wall
 testcpp: testcpp.cc ../kissfft.hh
-	$(CXX) -o $@ $(CXXFLAGS) -lm testcpp.cc
+	$(CXX) -o $@ $(CXXFLAGS) testcpp.cc -lm


 clean:
--- a/test/testkiss.py
+++ b/test/testkiss.py
@ -7,8 +7,7 @@ import random
 import struct
 import popen2
 import getopt
-import Numeric
-import FFT
+import numpy

 pi=math.pi
 e=math.e
@ -26,7 +25,7 @@ elif datatype=='int16_t':
    fmt='h'
    minsnr=10
 elif datatype=='int32_t':
-    fmt='l'
+    fmt='i'
 elif datatype=='simd':
    fmt='4f'
    sys.stderr.write('testkiss.py does not yet test simd')
@ -39,21 +38,21 @@ else:
 

 def dopack(x,cpx=1):
-    x = Numeric.reshape( x, ( Numeric.size(x),) )
+    x = numpy.reshape( x, ( numpy.size(x),) )
    
    if cpx:
        s = ''.join( [ struct.pack(fmt*2,c.real,c.imag) for c in x ] )
    else:
-        s = ''.join( [ struct.pack(fmt,c) for c in x ] )
+        s = ''.join( [ struct.pack(fmt,c.real) for c in x ] )
    return s

 def dounpack(x,cpx):
    uf = fmt * ( len(x) / struct.calcsize(fmt) )
    s = struct.unpack(uf,x)
    if cpx:
-        return Numeric.array(s[::2]) + Numeric.array( s[1::2] )*j
+        return numpy.array(s[::2]) + numpy.array( s[1::2] )*j
    else:
-        return Numeric.array(s )
+        return numpy.array(s )

 def make_random(dims=[1]):
    res = []
@ -67,11 +66,11 @@ def make_random(dims=[1]):
                res.append( complex(r,i) )
        else:
            res.append( make_random( dims[1:] ) )
-    return Numeric.array(res)
+    return numpy.array(res)

 def flatten(x):
-    ntotal = Numeric.product(Numeric.shape(x))
-    return Numeric.reshape(x,(ntotal,))
+    ntotal = numpy.size(x)
+    return numpy.reshape(x,(ntotal,))

 def randmat( ndims ):
    dims=[]
@ -85,20 +84,20 @@ def randmat( ndims ):
 def test_fft(ndims):
    x=randmat( ndims )

-    print 'dimensions=%s' % str( Numeric.shape(x) ),
+
    if doreal:
-        xver = FFT.real_fftnd(x)
+        xver = numpy.fft.rfftn(x)
    else:
-        xver = FFT.fftnd(x)
+        xver = numpy.fft.fftn(x)
    
    open('/tmp/fftexp.dat','w').write(dopack( flatten(xver) , True ) )

-    x2=dofft(x)
+    x2=dofft(x,doreal)
    err = xver - x2
    errf = flatten(err)
    xverf = flatten(xver)
-    errpow = Numeric.vdot(errf,errf)+1e-10
-    sigpow = Numeric.vdot(xverf,xverf)+1e-10
+    errpow = numpy.vdot(errf,errf)+1e-10
+    sigpow = numpy.vdot(xverf,xverf)+1e-10
    snr = 10*math.log10(abs(sigpow/errpow) )
    print 'SNR (compared to NumPy) : %.1fdB' % float(snr)

@ -108,10 +107,9 @@ def test_fft(ndims):
        print 'err',err
        sys.exit(1)
 
-def dofft(x):
-    dims=list( Numeric.shape(x) )
+def dofft(x,isreal):
+    dims=list( numpy.shape(x) )
    x = flatten(x)
-    iscomp = (type(x[0]) == complex)

    scale=1
    if datatype=='int16_t':
@ -126,11 +124,12 @@ def dofft(x):
    if doreal:
        cmd += ' -R '

+    print cmd
    p = popen2.Popen3(cmd )

-    open('/tmp/fftin.dat','w').write(dopack( x , iscomp ) )
+    open('/tmp/fftin.dat','w').write(dopack( x , isreal==False ) )

-    p.tochild.write( dopack( x , iscomp ) )
+    p.tochild.write( dopack( x , isreal==False ) )
    p.tochild.close()

    res = dounpack( p.fromchild.read() , 1 )
@ -141,7 +140,7 @@ def dofft(x):
    res = scale * res

    p.wait()
-    return Numeric.reshape(res,dims)
+    return numpy.reshape(res,dims)

 def main():
    opts,args = getopt.getopt(sys.argv[1:],'r')
--- a/test/twotonetest.c
+++ b/test/twotonetest.c
@ -89,6 +89,6 @@ int main(int argc,char ** argv)
    if (snr>maxsnr) maxsnr=snr;

    printf("TwoToneTest: snr ranges from %ddB to %ddB\n",(int)minsnr,(int)maxsnr);
-    printf("sizeof(kiss_fft_scalar) = %d\n",sizeof(kiss_fft_scalar) );
+    printf("sizeof(kiss_fft_scalar) = %d\n",(int)sizeof(kiss_fft_scalar) );
    return 0;
 }
--- a/tools/Makefile
+++ b/tools/Makefile
@ -44,19 +44,19 @@ CFLAGS=-Wall -O3 $(WARNINGS)
 # tip: try -openmp or -fopenmp to use multiple cores

 $(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
-	$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR -lm $+ -DFAST_FILT_UTIL
+	$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR  $+ -DFAST_FILT_UTIL -lm 

 $(FASTFILT): ../kiss_fft.c kiss_fastfir.c
-	$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL
+	$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+  -DFAST_FILT_UTIL -lm

 $(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c kiss_fftndr.c
-	$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+
+	$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm

 $(PSDPNG): ../kiss_fft.c psdpng.c kiss_fftr.c
-	$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm -lpng $+
+	$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lpng -lm

 $(DUMPHDR): ../kiss_fft.c dumphdr.c
-	$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+
+	$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm

 clean:
 	rm -f *~ fft fft_* fastconv fastconv_* fastconvr fastconvr_* psdpng psdpng_*
--- a/tools/kfc.c
+++ b/tools/kfc.c
@ -42,10 +42,18 @@ static kiss_fft_cfg find_cached_fft(int nfft,int inverse)
    if (cur== NULL) {
        /* no cached node found, need to create a new one*/
        kiss_fft_alloc(nfft,inverse,0,&len);
+#ifdef USE_SIMD
+        int padding = (16-sizeof(struct cached_fft)) & 15;
+        // make sure the cfg aligns on a 16 byte boundary
+        len += padding;
+#endif
        cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len ));
        if (cur == NULL)
            return NULL;
        cur->cfg = (kiss_fft_cfg)(cur+1);
+#ifdef USE_SIMD
+        cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding);
+#endif
        kiss_fft_alloc(nfft,inverse,cur->cfg,&len);
        cur->nfft=nfft;
        cur->inverse=inverse;
--- a/tools/kiss_fastfir.c
+++ b/tools/kiss_fastfir.c
@ -362,7 +362,7 @@ void do_file_filter(
    n_samps_buf = 8*4096/sizeof(kffsamp_t); 
    n_samps_buf = nfft + 4*(nfft-n_imp_resp+1);

-    if (verbose) fprintf(stderr,"bufsize=%d\n",sizeof(kffsamp_t)*n_samps_buf );
+    if (verbose) fprintf(stderr,"bufsize=%d\n",(int)(sizeof(kffsamp_t)*n_samps_buf) );
     

    /*allocate space and initialize pointers */
@ -449,10 +449,12 @@ int main(int argc,char**argv)
    }
    fseek(filtfile,0,SEEK_END);
    nh = ftell(filtfile) / sizeof(kffsamp_t);
-    if (verbose) fprintf(stderr,"%d samples in FIR filter\n",nh);
+    if (verbose) fprintf(stderr,"%d samples in FIR filter\n",(int)nh);
    h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
    fseek(filtfile,0,SEEK_SET);
-    fread(h,sizeof(kffsamp_t),nh,filtfile);
+    if (fread(h,sizeof(kffsamp_t),nh,filtfile) != nh)
+        fprintf(stderr,"short read on filter file\n");
+
    fclose(filtfile);
 
    if (use_direct)
--- a/tools/kiss_fftnd.c
+++ b/tools/kiss_fftnd.c
@ -33,8 +33,6 @@ kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*m
    size_t memneeded = sizeof(struct kiss_fftnd_state);
    char * ptr;

-    size_t pad = memneeded % sizeof(DATATYPE);
-
    for (i=0;i<ndims;++i) {
        size_t sublen=0;
        kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen);