merge and fixes for RedHat 5.5 gcc 64bit

This commit is contained in:
Mark Borgerding 2012-07-18 00:19:37 -04:00
commit 8a01c6085d
10 changed files with 56 additions and 44 deletions

View File

@ -2,7 +2,9 @@ KFVER=129
doc:
@echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'"
@echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not"
@echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not."
@echo "It is generally unneeded to run these tests yourself, unless you plan on changing the inner workings"
@echo "of kissfft and would like to make use of its regression tests."
testall:
# The simd and int32_t types may or may not work on your machine
@ -11,6 +13,7 @@ testall:
make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test
make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test
make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test
echo "all tests passed"
tarball: clean
hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz

View File

@ -1,4 +1,5 @@
If you are reading this, it means you think you may be interested in using the SIMD extensions within kissfft.
If you are reading this, it means you think you may be interested in using the SIMD extensions in kissfft
to do 4 *separate* FFTs at once.
Beware! Beyond here there be dragons!
@ -29,7 +30,8 @@ Search on "SIMD alignment" for more info.
Robin at Divide Concept was kind enough to share his code for formatting to/from the SIMD kissfft.
I have not run it -- use it at your own risk.
I have not run it -- use it at your own risk. It appears to do 4xN and Nx4 transpositions
(out of place).
void SSETools::pack128(float* target, float* source, unsigned long size128)
{

2
TIPS
View File

@ -21,7 +21,7 @@ Speed:
Reducing code size:
* remove some of the butterflies. There are currently butterflies optimized for radices
2,3,4,5. It is worth mentioning that you can still use FFT sizes that contain
these factors, they just won't be quite as fast. You can decide for yourself
other factors, they just won't be quite as fast. You can decide for yourself
whether to keep radix 2 or 4. If you do some work in this area, let me
know what you find.

View File

@ -7,7 +7,7 @@ CFLAGS=-O3 -I.. -I../tools $(WARNINGS)
CFLAGS+=-ffast-math -fomit-frame-pointer
#CFLAGS+=-funroll-loops
#CFLAGS+=-march=prescott
CFLAGS+= -mtune=native
#CFLAGS+= -mtune=native
# TIP: try adding -openmp or -fopenmp to enable OPENMP directives and use of multiple cores
#CFLAGS+=-fopenmp
CFLAGS+= $(CFLAGADD)
@ -66,20 +66,20 @@ tools:
$(SELFTEST): $(SELFTESTSRC) $(SRCFILES)
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
$(TESTKFC): $(SRCFILES)
$(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) -lm $+
$(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) $+ -lm
$(TESTREAL): test_real.c $(SRCFILES)
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
$(BENCHKISS): benchkiss.c $(SRCFILES)
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
$(BENCHFFTW): benchfftw.c pstats.c
@echo "======attempting to build FFTW benchmark"
@$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ -lm $(FFTWLIB) $(FFTWLIBDIR) || echo "FFTW not available for comparison"
@$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ $(FFTWLIB) $(FFTWLIBDIR) -lm || echo "FFTW not available for comparison"
test: all
@./$(TESTKFC)
@ -101,7 +101,7 @@ selftest_short.c:
CXXFLAGS=-O3 -ffast-math -fomit-frame-pointer -I.. -I../tools -W -Wall
testcpp: testcpp.cc ../kissfft.hh
$(CXX) -o $@ $(CXXFLAGS) -lm testcpp.cc
$(CXX) -o $@ $(CXXFLAGS) testcpp.cc -lm
clean:

View File

@ -7,8 +7,7 @@ import random
import struct
import popen2
import getopt
import Numeric
import FFT
import numpy
pi=math.pi
e=math.e
@ -26,7 +25,7 @@ elif datatype=='int16_t':
fmt='h'
minsnr=10
elif datatype=='int32_t':
fmt='l'
fmt='i'
elif datatype=='simd':
fmt='4f'
sys.stderr.write('testkiss.py does not yet test simd')
@ -39,21 +38,21 @@ else:
def dopack(x,cpx=1):
x = Numeric.reshape( x, ( Numeric.size(x),) )
x = numpy.reshape( x, ( numpy.size(x),) )
if cpx:
s = ''.join( [ struct.pack(fmt*2,c.real,c.imag) for c in x ] )
else:
s = ''.join( [ struct.pack(fmt,c) for c in x ] )
s = ''.join( [ struct.pack(fmt,c.real) for c in x ] )
return s
def dounpack(x,cpx):
uf = fmt * ( len(x) / struct.calcsize(fmt) )
s = struct.unpack(uf,x)
if cpx:
return Numeric.array(s[::2]) + Numeric.array( s[1::2] )*j
return numpy.array(s[::2]) + numpy.array( s[1::2] )*j
else:
return Numeric.array(s )
return numpy.array(s )
def make_random(dims=[1]):
res = []
@ -67,11 +66,11 @@ def make_random(dims=[1]):
res.append( complex(r,i) )
else:
res.append( make_random( dims[1:] ) )
return Numeric.array(res)
return numpy.array(res)
def flatten(x):
ntotal = Numeric.product(Numeric.shape(x))
return Numeric.reshape(x,(ntotal,))
ntotal = numpy.size(x)
return numpy.reshape(x,(ntotal,))
def randmat( ndims ):
dims=[]
@ -85,20 +84,20 @@ def randmat( ndims ):
def test_fft(ndims):
x=randmat( ndims )
print 'dimensions=%s' % str( Numeric.shape(x) ),
if doreal:
xver = FFT.real_fftnd(x)
xver = numpy.fft.rfftn(x)
else:
xver = FFT.fftnd(x)
xver = numpy.fft.fftn(x)
open('/tmp/fftexp.dat','w').write(dopack( flatten(xver) , True ) )
x2=dofft(x)
x2=dofft(x,doreal)
err = xver - x2
errf = flatten(err)
xverf = flatten(xver)
errpow = Numeric.vdot(errf,errf)+1e-10
sigpow = Numeric.vdot(xverf,xverf)+1e-10
errpow = numpy.vdot(errf,errf)+1e-10
sigpow = numpy.vdot(xverf,xverf)+1e-10
snr = 10*math.log10(abs(sigpow/errpow) )
print 'SNR (compared to NumPy) : %.1fdB' % float(snr)
@ -108,10 +107,9 @@ def test_fft(ndims):
print 'err',err
sys.exit(1)
def dofft(x):
dims=list( Numeric.shape(x) )
def dofft(x,isreal):
dims=list( numpy.shape(x) )
x = flatten(x)
iscomp = (type(x[0]) == complex)
scale=1
if datatype=='int16_t':
@ -126,11 +124,12 @@ def dofft(x):
if doreal:
cmd += ' -R '
print cmd
p = popen2.Popen3(cmd )
open('/tmp/fftin.dat','w').write(dopack( x , iscomp ) )
open('/tmp/fftin.dat','w').write(dopack( x , isreal==False ) )
p.tochild.write( dopack( x , iscomp ) )
p.tochild.write( dopack( x , isreal==False ) )
p.tochild.close()
res = dounpack( p.fromchild.read() , 1 )
@ -141,7 +140,7 @@ def dofft(x):
res = scale * res
p.wait()
return Numeric.reshape(res,dims)
return numpy.reshape(res,dims)
def main():
opts,args = getopt.getopt(sys.argv[1:],'r')

View File

@ -89,6 +89,6 @@ int main(int argc,char ** argv)
if (snr>maxsnr) maxsnr=snr;
printf("TwoToneTest: snr ranges from %ddB to %ddB\n",(int)minsnr,(int)maxsnr);
printf("sizeof(kiss_fft_scalar) = %d\n",sizeof(kiss_fft_scalar) );
printf("sizeof(kiss_fft_scalar) = %d\n",(int)sizeof(kiss_fft_scalar) );
return 0;
}

View File

@ -44,19 +44,19 @@ CFLAGS=-Wall -O3 $(WARNINGS)
# tip: try -openmp or -fopenmp to use multiple cores
$(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR -lm $+ -DFAST_FILT_UTIL
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR $+ -DFAST_FILT_UTIL -lm
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -DFAST_FILT_UTIL -lm
$(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c kiss_fftndr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm
$(PSDPNG): ../kiss_fft.c psdpng.c kiss_fftr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm -lpng $+
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lpng -lm
$(DUMPHDR): ../kiss_fft.c dumphdr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm
clean:
rm -f *~ fft fft_* fastconv fastconv_* fastconvr fastconvr_* psdpng psdpng_*

View File

@ -42,10 +42,18 @@ static kiss_fft_cfg find_cached_fft(int nfft,int inverse)
if (cur== NULL) {
/* no cached node found, need to create a new one*/
kiss_fft_alloc(nfft,inverse,0,&len);
#ifdef USE_SIMD
int padding = (16-sizeof(struct cached_fft)) & 15;
// make sure the cfg aligns on a 16 byte boundary
len += padding;
#endif
cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len ));
if (cur == NULL)
return NULL;
cur->cfg = (kiss_fft_cfg)(cur+1);
#ifdef USE_SIMD
cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding);
#endif
kiss_fft_alloc(nfft,inverse,cur->cfg,&len);
cur->nfft=nfft;
cur->inverse=inverse;

View File

@ -362,7 +362,7 @@ void do_file_filter(
n_samps_buf = 8*4096/sizeof(kffsamp_t);
n_samps_buf = nfft + 4*(nfft-n_imp_resp+1);
if (verbose) fprintf(stderr,"bufsize=%d\n",sizeof(kffsamp_t)*n_samps_buf );
if (verbose) fprintf(stderr,"bufsize=%d\n",(int)(sizeof(kffsamp_t)*n_samps_buf) );
/*allocate space and initialize pointers */
@ -449,10 +449,12 @@ int main(int argc,char**argv)
}
fseek(filtfile,0,SEEK_END);
nh = ftell(filtfile) / sizeof(kffsamp_t);
if (verbose) fprintf(stderr,"%d samples in FIR filter\n",nh);
if (verbose) fprintf(stderr,"%d samples in FIR filter\n",(int)nh);
h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
fseek(filtfile,0,SEEK_SET);
fread(h,sizeof(kffsamp_t),nh,filtfile);
if (fread(h,sizeof(kffsamp_t),nh,filtfile) != nh)
fprintf(stderr,"short read on filter file\n");
fclose(filtfile);
if (use_direct)

View File

@ -33,8 +33,6 @@ kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*m
size_t memneeded = sizeof(struct kiss_fftnd_state);
char * ptr;
size_t pad = memneeded % sizeof(DATATYPE);
for (i=0;i<ndims;++i) {
size_t sublen=0;
kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen);