merge and fixes for RedHat 5.5 gcc 64bit

This commit is contained in:
Mark Borgerding 2012-07-18 00:19:37 -04:00
commit 8a01c6085d
10 changed files with 56 additions and 44 deletions

View File

@ -2,7 +2,9 @@ KFVER=129
doc: doc:
@echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'" @echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'"
@echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not" @echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not."
@echo "It is generally unneeded to run these tests yourself, unless you plan on changing the inner workings"
@echo "of kissfft and would like to make use of its regression tests."
testall: testall:
# The simd and int32_t types may or may not work on your machine # The simd and int32_t types may or may not work on your machine
@ -11,6 +13,7 @@ testall:
make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test
make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test
make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test
echo "all tests passed"
tarball: clean tarball: clean
hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz

View File

@ -1,4 +1,5 @@
If you are reading this, it means you think you may be interested in using the SIMD extensions within kissfft. If you are reading this, it means you think you may be interested in using the SIMD extensions in kissfft
to do 4 *separate* FFTs at once.
Beware! Beyond here there be dragons! Beware! Beyond here there be dragons!
@ -29,7 +30,8 @@ Search on "SIMD alignment" for more info.
Robin at Divide Concept was kind enough to share his code for formatting to/from the SIMD kissfft. Robin at Divide Concept was kind enough to share his code for formatting to/from the SIMD kissfft.
I have not run it -- use it at your own risk. I have not run it -- use it at your own risk. It appears to do 4xN and Nx4 transpositions
(out of place).
void SSETools::pack128(float* target, float* source, unsigned long size128) void SSETools::pack128(float* target, float* source, unsigned long size128)
{ {

2
TIPS
View File

@ -21,7 +21,7 @@ Speed:
Reducing code size: Reducing code size:
* remove some of the butterflies. There are currently butterflies optimized for radices * remove some of the butterflies. There are currently butterflies optimized for radices
2,3,4,5. It is worth mentioning that you can still use FFT sizes that contain 2,3,4,5. It is worth mentioning that you can still use FFT sizes that contain
these factors, they just won't be quite as fast. You can decide for yourself other factors, they just won't be quite as fast. You can decide for yourself
whether to keep radix 2 or 4. If you do some work in this area, let me whether to keep radix 2 or 4. If you do some work in this area, let me
know what you find. know what you find.

View File

@ -7,7 +7,7 @@ CFLAGS=-O3 -I.. -I../tools $(WARNINGS)
CFLAGS+=-ffast-math -fomit-frame-pointer CFLAGS+=-ffast-math -fomit-frame-pointer
#CFLAGS+=-funroll-loops #CFLAGS+=-funroll-loops
#CFLAGS+=-march=prescott #CFLAGS+=-march=prescott
CFLAGS+= -mtune=native #CFLAGS+= -mtune=native
# TIP: try adding -openmp or -fopenmp to enable OPENMP directives and use of multiple cores # TIP: try adding -openmp or -fopenmp to enable OPENMP directives and use of multiple cores
#CFLAGS+=-fopenmp #CFLAGS+=-fopenmp
CFLAGS+= $(CFLAGADD) CFLAGS+= $(CFLAGADD)
@ -66,20 +66,20 @@ tools:
$(SELFTEST): $(SELFTESTSRC) $(SRCFILES) $(SELFTEST): $(SELFTESTSRC) $(SRCFILES)
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+ $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
$(TESTKFC): $(SRCFILES) $(TESTKFC): $(SRCFILES)
$(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) -lm $+ $(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) $+ -lm
$(TESTREAL): test_real.c $(SRCFILES) $(TESTREAL): test_real.c $(SRCFILES)
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+ $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
$(BENCHKISS): benchkiss.c $(SRCFILES) $(BENCHKISS): benchkiss.c $(SRCFILES)
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+ $(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
$(BENCHFFTW): benchfftw.c pstats.c $(BENCHFFTW): benchfftw.c pstats.c
@echo "======attempting to build FFTW benchmark" @echo "======attempting to build FFTW benchmark"
@$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ -lm $(FFTWLIB) $(FFTWLIBDIR) || echo "FFTW not available for comparison" @$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ $(FFTWLIB) $(FFTWLIBDIR) -lm || echo "FFTW not available for comparison"
test: all test: all
@./$(TESTKFC) @./$(TESTKFC)
@ -101,7 +101,7 @@ selftest_short.c:
CXXFLAGS=-O3 -ffast-math -fomit-frame-pointer -I.. -I../tools -W -Wall CXXFLAGS=-O3 -ffast-math -fomit-frame-pointer -I.. -I../tools -W -Wall
testcpp: testcpp.cc ../kissfft.hh testcpp: testcpp.cc ../kissfft.hh
$(CXX) -o $@ $(CXXFLAGS) -lm testcpp.cc $(CXX) -o $@ $(CXXFLAGS) testcpp.cc -lm
clean: clean:

View File

@ -7,8 +7,7 @@ import random
import struct import struct
import popen2 import popen2
import getopt import getopt
import Numeric import numpy
import FFT
pi=math.pi pi=math.pi
e=math.e e=math.e
@ -26,7 +25,7 @@ elif datatype=='int16_t':
fmt='h' fmt='h'
minsnr=10 minsnr=10
elif datatype=='int32_t': elif datatype=='int32_t':
fmt='l' fmt='i'
elif datatype=='simd': elif datatype=='simd':
fmt='4f' fmt='4f'
sys.stderr.write('testkiss.py does not yet test simd') sys.stderr.write('testkiss.py does not yet test simd')
@ -39,21 +38,21 @@ else:
def dopack(x,cpx=1): def dopack(x,cpx=1):
x = Numeric.reshape( x, ( Numeric.size(x),) ) x = numpy.reshape( x, ( numpy.size(x),) )
if cpx: if cpx:
s = ''.join( [ struct.pack(fmt*2,c.real,c.imag) for c in x ] ) s = ''.join( [ struct.pack(fmt*2,c.real,c.imag) for c in x ] )
else: else:
s = ''.join( [ struct.pack(fmt,c) for c in x ] ) s = ''.join( [ struct.pack(fmt,c.real) for c in x ] )
return s return s
def dounpack(x,cpx): def dounpack(x,cpx):
uf = fmt * ( len(x) / struct.calcsize(fmt) ) uf = fmt * ( len(x) / struct.calcsize(fmt) )
s = struct.unpack(uf,x) s = struct.unpack(uf,x)
if cpx: if cpx:
return Numeric.array(s[::2]) + Numeric.array( s[1::2] )*j return numpy.array(s[::2]) + numpy.array( s[1::2] )*j
else: else:
return Numeric.array(s ) return numpy.array(s )
def make_random(dims=[1]): def make_random(dims=[1]):
res = [] res = []
@ -67,11 +66,11 @@ def make_random(dims=[1]):
res.append( complex(r,i) ) res.append( complex(r,i) )
else: else:
res.append( make_random( dims[1:] ) ) res.append( make_random( dims[1:] ) )
return Numeric.array(res) return numpy.array(res)
def flatten(x): def flatten(x):
ntotal = Numeric.product(Numeric.shape(x)) ntotal = numpy.size(x)
return Numeric.reshape(x,(ntotal,)) return numpy.reshape(x,(ntotal,))
def randmat( ndims ): def randmat( ndims ):
dims=[] dims=[]
@ -85,20 +84,20 @@ def randmat( ndims ):
def test_fft(ndims): def test_fft(ndims):
x=randmat( ndims ) x=randmat( ndims )
print 'dimensions=%s' % str( Numeric.shape(x) ),
if doreal: if doreal:
xver = FFT.real_fftnd(x) xver = numpy.fft.rfftn(x)
else: else:
xver = FFT.fftnd(x) xver = numpy.fft.fftn(x)
open('/tmp/fftexp.dat','w').write(dopack( flatten(xver) , True ) ) open('/tmp/fftexp.dat','w').write(dopack( flatten(xver) , True ) )
x2=dofft(x) x2=dofft(x,doreal)
err = xver - x2 err = xver - x2
errf = flatten(err) errf = flatten(err)
xverf = flatten(xver) xverf = flatten(xver)
errpow = Numeric.vdot(errf,errf)+1e-10 errpow = numpy.vdot(errf,errf)+1e-10
sigpow = Numeric.vdot(xverf,xverf)+1e-10 sigpow = numpy.vdot(xverf,xverf)+1e-10
snr = 10*math.log10(abs(sigpow/errpow) ) snr = 10*math.log10(abs(sigpow/errpow) )
print 'SNR (compared to NumPy) : %.1fdB' % float(snr) print 'SNR (compared to NumPy) : %.1fdB' % float(snr)
@ -108,10 +107,9 @@ def test_fft(ndims):
print 'err',err print 'err',err
sys.exit(1) sys.exit(1)
def dofft(x): def dofft(x,isreal):
dims=list( Numeric.shape(x) ) dims=list( numpy.shape(x) )
x = flatten(x) x = flatten(x)
iscomp = (type(x[0]) == complex)
scale=1 scale=1
if datatype=='int16_t': if datatype=='int16_t':
@ -126,11 +124,12 @@ def dofft(x):
if doreal: if doreal:
cmd += ' -R ' cmd += ' -R '
print cmd
p = popen2.Popen3(cmd ) p = popen2.Popen3(cmd )
open('/tmp/fftin.dat','w').write(dopack( x , iscomp ) ) open('/tmp/fftin.dat','w').write(dopack( x , isreal==False ) )
p.tochild.write( dopack( x , iscomp ) ) p.tochild.write( dopack( x , isreal==False ) )
p.tochild.close() p.tochild.close()
res = dounpack( p.fromchild.read() , 1 ) res = dounpack( p.fromchild.read() , 1 )
@ -141,7 +140,7 @@ def dofft(x):
res = scale * res res = scale * res
p.wait() p.wait()
return Numeric.reshape(res,dims) return numpy.reshape(res,dims)
def main(): def main():
opts,args = getopt.getopt(sys.argv[1:],'r') opts,args = getopt.getopt(sys.argv[1:],'r')

View File

@ -89,6 +89,6 @@ int main(int argc,char ** argv)
if (snr>maxsnr) maxsnr=snr; if (snr>maxsnr) maxsnr=snr;
printf("TwoToneTest: snr ranges from %ddB to %ddB\n",(int)minsnr,(int)maxsnr); printf("TwoToneTest: snr ranges from %ddB to %ddB\n",(int)minsnr,(int)maxsnr);
printf("sizeof(kiss_fft_scalar) = %d\n",sizeof(kiss_fft_scalar) ); printf("sizeof(kiss_fft_scalar) = %d\n",(int)sizeof(kiss_fft_scalar) );
return 0; return 0;
} }

View File

@ -44,19 +44,19 @@ CFLAGS=-Wall -O3 $(WARNINGS)
# tip: try -openmp or -fopenmp to use multiple cores # tip: try -openmp or -fopenmp to use multiple cores
$(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c $(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR -lm $+ -DFAST_FILT_UTIL $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR $+ -DFAST_FILT_UTIL -lm
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c $(FASTFILT): ../kiss_fft.c kiss_fastfir.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -DFAST_FILT_UTIL -lm
$(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c kiss_fftndr.c $(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c kiss_fftndr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm
$(PSDPNG): ../kiss_fft.c psdpng.c kiss_fftr.c $(PSDPNG): ../kiss_fft.c psdpng.c kiss_fftr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm -lpng $+ $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lpng -lm
$(DUMPHDR): ../kiss_fft.c dumphdr.c $(DUMPHDR): ../kiss_fft.c dumphdr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm
clean: clean:
rm -f *~ fft fft_* fastconv fastconv_* fastconvr fastconvr_* psdpng psdpng_* rm -f *~ fft fft_* fastconv fastconv_* fastconvr fastconvr_* psdpng psdpng_*

View File

@ -42,10 +42,18 @@ static kiss_fft_cfg find_cached_fft(int nfft,int inverse)
if (cur== NULL) { if (cur== NULL) {
/* no cached node found, need to create a new one*/ /* no cached node found, need to create a new one*/
kiss_fft_alloc(nfft,inverse,0,&len); kiss_fft_alloc(nfft,inverse,0,&len);
#ifdef USE_SIMD
int padding = (16-sizeof(struct cached_fft)) & 15;
// make sure the cfg aligns on a 16 byte boundary
len += padding;
#endif
cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len )); cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len ));
if (cur == NULL) if (cur == NULL)
return NULL; return NULL;
cur->cfg = (kiss_fft_cfg)(cur+1); cur->cfg = (kiss_fft_cfg)(cur+1);
#ifdef USE_SIMD
cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding);
#endif
kiss_fft_alloc(nfft,inverse,cur->cfg,&len); kiss_fft_alloc(nfft,inverse,cur->cfg,&len);
cur->nfft=nfft; cur->nfft=nfft;
cur->inverse=inverse; cur->inverse=inverse;

View File

@ -362,7 +362,7 @@ void do_file_filter(
n_samps_buf = 8*4096/sizeof(kffsamp_t); n_samps_buf = 8*4096/sizeof(kffsamp_t);
n_samps_buf = nfft + 4*(nfft-n_imp_resp+1); n_samps_buf = nfft + 4*(nfft-n_imp_resp+1);
if (verbose) fprintf(stderr,"bufsize=%d\n",sizeof(kffsamp_t)*n_samps_buf ); if (verbose) fprintf(stderr,"bufsize=%d\n",(int)(sizeof(kffsamp_t)*n_samps_buf) );
/*allocate space and initialize pointers */ /*allocate space and initialize pointers */
@ -449,10 +449,12 @@ int main(int argc,char**argv)
} }
fseek(filtfile,0,SEEK_END); fseek(filtfile,0,SEEK_END);
nh = ftell(filtfile) / sizeof(kffsamp_t); nh = ftell(filtfile) / sizeof(kffsamp_t);
if (verbose) fprintf(stderr,"%d samples in FIR filter\n",nh); if (verbose) fprintf(stderr,"%d samples in FIR filter\n",(int)nh);
h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh); h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
fseek(filtfile,0,SEEK_SET); fseek(filtfile,0,SEEK_SET);
fread(h,sizeof(kffsamp_t),nh,filtfile); if (fread(h,sizeof(kffsamp_t),nh,filtfile) != nh)
fprintf(stderr,"short read on filter file\n");
fclose(filtfile); fclose(filtfile);
if (use_direct) if (use_direct)

View File

@ -33,8 +33,6 @@ kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*m
size_t memneeded = sizeof(struct kiss_fftnd_state); size_t memneeded = sizeof(struct kiss_fftnd_state);
char * ptr; char * ptr;
size_t pad = memneeded % sizeof(DATATYPE);
for (i=0;i<ndims;++i) { for (i=0;i<ndims;++i) {
size_t sublen=0; size_t sublen=0;
kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen); kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen);