mirror of
https://github.com/mborgerding/kissfft.git
synced 2025-05-27 21:20:27 -04:00
merge and fixes for RedHat 5.5 gcc 64bit
This commit is contained in:
commit
8a01c6085d
5
Makefile
5
Makefile
@ -2,7 +2,9 @@ KFVER=129
|
|||||||
|
|
||||||
doc:
|
doc:
|
||||||
@echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'"
|
@echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'"
|
||||||
@echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not"
|
@echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not."
|
||||||
|
@echo "It is generally unneeded to run these tests yourself, unless you plan on changing the inner workings"
|
||||||
|
@echo "of kissfft and would like to make use of its regression tests."
|
||||||
|
|
||||||
testall:
|
testall:
|
||||||
# The simd and int32_t types may or may not work on your machine
|
# The simd and int32_t types may or may not work on your machine
|
||||||
@ -11,6 +13,7 @@ testall:
|
|||||||
make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test
|
make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test
|
||||||
make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test
|
make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test
|
||||||
make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test
|
make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test
|
||||||
|
echo "all tests passed"
|
||||||
|
|
||||||
tarball: clean
|
tarball: clean
|
||||||
hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz
|
hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
If you are reading this, it means you think you may be interested in using the SIMD extensions within kissfft.
|
If you are reading this, it means you think you may be interested in using the SIMD extensions in kissfft
|
||||||
|
to do 4 *separate* FFTs at once.
|
||||||
|
|
||||||
Beware! Beyond here there be dragons!
|
Beware! Beyond here there be dragons!
|
||||||
|
|
||||||
@ -29,7 +30,8 @@ Search on "SIMD alignment" for more info.
|
|||||||
|
|
||||||
|
|
||||||
Robin at Divide Concept was kind enough to share his code for formatting to/from the SIMD kissfft.
|
Robin at Divide Concept was kind enough to share his code for formatting to/from the SIMD kissfft.
|
||||||
I have not run it -- use it at your own risk.
|
I have not run it -- use it at your own risk. It appears to do 4xN and Nx4 transpositions
|
||||||
|
(out of place).
|
||||||
|
|
||||||
void SSETools::pack128(float* target, float* source, unsigned long size128)
|
void SSETools::pack128(float* target, float* source, unsigned long size128)
|
||||||
{
|
{
|
||||||
|
2
TIPS
2
TIPS
@ -21,7 +21,7 @@ Speed:
|
|||||||
Reducing code size:
|
Reducing code size:
|
||||||
* remove some of the butterflies. There are currently butterflies optimized for radices
|
* remove some of the butterflies. There are currently butterflies optimized for radices
|
||||||
2,3,4,5. It is worth mentioning that you can still use FFT sizes that contain
|
2,3,4,5. It is worth mentioning that you can still use FFT sizes that contain
|
||||||
these factors, they just won't be quite as fast. You can decide for yourself
|
other factors, they just won't be quite as fast. You can decide for yourself
|
||||||
whether to keep radix 2 or 4. If you do some work in this area, let me
|
whether to keep radix 2 or 4. If you do some work in this area, let me
|
||||||
know what you find.
|
know what you find.
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ CFLAGS=-O3 -I.. -I../tools $(WARNINGS)
|
|||||||
CFLAGS+=-ffast-math -fomit-frame-pointer
|
CFLAGS+=-ffast-math -fomit-frame-pointer
|
||||||
#CFLAGS+=-funroll-loops
|
#CFLAGS+=-funroll-loops
|
||||||
#CFLAGS+=-march=prescott
|
#CFLAGS+=-march=prescott
|
||||||
CFLAGS+= -mtune=native
|
#CFLAGS+= -mtune=native
|
||||||
# TIP: try adding -openmp or -fopenmp to enable OPENMP directives and use of multiple cores
|
# TIP: try adding -openmp or -fopenmp to enable OPENMP directives and use of multiple cores
|
||||||
#CFLAGS+=-fopenmp
|
#CFLAGS+=-fopenmp
|
||||||
CFLAGS+= $(CFLAGADD)
|
CFLAGS+= $(CFLAGADD)
|
||||||
@ -66,20 +66,20 @@ tools:
|
|||||||
|
|
||||||
|
|
||||||
$(SELFTEST): $(SELFTESTSRC) $(SRCFILES)
|
$(SELFTEST): $(SELFTESTSRC) $(SRCFILES)
|
||||||
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
|
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
|
||||||
|
|
||||||
$(TESTKFC): $(SRCFILES)
|
$(TESTKFC): $(SRCFILES)
|
||||||
$(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) -lm $+
|
$(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) $+ -lm
|
||||||
|
|
||||||
$(TESTREAL): test_real.c $(SRCFILES)
|
$(TESTREAL): test_real.c $(SRCFILES)
|
||||||
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
|
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
|
||||||
|
|
||||||
$(BENCHKISS): benchkiss.c $(SRCFILES)
|
$(BENCHKISS): benchkiss.c $(SRCFILES)
|
||||||
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
|
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
|
||||||
|
|
||||||
$(BENCHFFTW): benchfftw.c pstats.c
|
$(BENCHFFTW): benchfftw.c pstats.c
|
||||||
@echo "======attempting to build FFTW benchmark"
|
@echo "======attempting to build FFTW benchmark"
|
||||||
@$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ -lm $(FFTWLIB) $(FFTWLIBDIR) || echo "FFTW not available for comparison"
|
@$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ $(FFTWLIB) $(FFTWLIBDIR) -lm || echo "FFTW not available for comparison"
|
||||||
|
|
||||||
test: all
|
test: all
|
||||||
@./$(TESTKFC)
|
@./$(TESTKFC)
|
||||||
@ -101,7 +101,7 @@ selftest_short.c:
|
|||||||
|
|
||||||
CXXFLAGS=-O3 -ffast-math -fomit-frame-pointer -I.. -I../tools -W -Wall
|
CXXFLAGS=-O3 -ffast-math -fomit-frame-pointer -I.. -I../tools -W -Wall
|
||||||
testcpp: testcpp.cc ../kissfft.hh
|
testcpp: testcpp.cc ../kissfft.hh
|
||||||
$(CXX) -o $@ $(CXXFLAGS) -lm testcpp.cc
|
$(CXX) -o $@ $(CXXFLAGS) testcpp.cc -lm
|
||||||
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
@ -7,8 +7,7 @@ import random
|
|||||||
import struct
|
import struct
|
||||||
import popen2
|
import popen2
|
||||||
import getopt
|
import getopt
|
||||||
import Numeric
|
import numpy
|
||||||
import FFT
|
|
||||||
|
|
||||||
pi=math.pi
|
pi=math.pi
|
||||||
e=math.e
|
e=math.e
|
||||||
@ -26,7 +25,7 @@ elif datatype=='int16_t':
|
|||||||
fmt='h'
|
fmt='h'
|
||||||
minsnr=10
|
minsnr=10
|
||||||
elif datatype=='int32_t':
|
elif datatype=='int32_t':
|
||||||
fmt='l'
|
fmt='i'
|
||||||
elif datatype=='simd':
|
elif datatype=='simd':
|
||||||
fmt='4f'
|
fmt='4f'
|
||||||
sys.stderr.write('testkiss.py does not yet test simd')
|
sys.stderr.write('testkiss.py does not yet test simd')
|
||||||
@ -39,21 +38,21 @@ else:
|
|||||||
|
|
||||||
|
|
||||||
def dopack(x,cpx=1):
|
def dopack(x,cpx=1):
|
||||||
x = Numeric.reshape( x, ( Numeric.size(x),) )
|
x = numpy.reshape( x, ( numpy.size(x),) )
|
||||||
|
|
||||||
if cpx:
|
if cpx:
|
||||||
s = ''.join( [ struct.pack(fmt*2,c.real,c.imag) for c in x ] )
|
s = ''.join( [ struct.pack(fmt*2,c.real,c.imag) for c in x ] )
|
||||||
else:
|
else:
|
||||||
s = ''.join( [ struct.pack(fmt,c) for c in x ] )
|
s = ''.join( [ struct.pack(fmt,c.real) for c in x ] )
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def dounpack(x,cpx):
|
def dounpack(x,cpx):
|
||||||
uf = fmt * ( len(x) / struct.calcsize(fmt) )
|
uf = fmt * ( len(x) / struct.calcsize(fmt) )
|
||||||
s = struct.unpack(uf,x)
|
s = struct.unpack(uf,x)
|
||||||
if cpx:
|
if cpx:
|
||||||
return Numeric.array(s[::2]) + Numeric.array( s[1::2] )*j
|
return numpy.array(s[::2]) + numpy.array( s[1::2] )*j
|
||||||
else:
|
else:
|
||||||
return Numeric.array(s )
|
return numpy.array(s )
|
||||||
|
|
||||||
def make_random(dims=[1]):
|
def make_random(dims=[1]):
|
||||||
res = []
|
res = []
|
||||||
@ -67,11 +66,11 @@ def make_random(dims=[1]):
|
|||||||
res.append( complex(r,i) )
|
res.append( complex(r,i) )
|
||||||
else:
|
else:
|
||||||
res.append( make_random( dims[1:] ) )
|
res.append( make_random( dims[1:] ) )
|
||||||
return Numeric.array(res)
|
return numpy.array(res)
|
||||||
|
|
||||||
def flatten(x):
|
def flatten(x):
|
||||||
ntotal = Numeric.product(Numeric.shape(x))
|
ntotal = numpy.size(x)
|
||||||
return Numeric.reshape(x,(ntotal,))
|
return numpy.reshape(x,(ntotal,))
|
||||||
|
|
||||||
def randmat( ndims ):
|
def randmat( ndims ):
|
||||||
dims=[]
|
dims=[]
|
||||||
@ -85,20 +84,20 @@ def randmat( ndims ):
|
|||||||
def test_fft(ndims):
|
def test_fft(ndims):
|
||||||
x=randmat( ndims )
|
x=randmat( ndims )
|
||||||
|
|
||||||
print 'dimensions=%s' % str( Numeric.shape(x) ),
|
|
||||||
if doreal:
|
if doreal:
|
||||||
xver = FFT.real_fftnd(x)
|
xver = numpy.fft.rfftn(x)
|
||||||
else:
|
else:
|
||||||
xver = FFT.fftnd(x)
|
xver = numpy.fft.fftn(x)
|
||||||
|
|
||||||
open('/tmp/fftexp.dat','w').write(dopack( flatten(xver) , True ) )
|
open('/tmp/fftexp.dat','w').write(dopack( flatten(xver) , True ) )
|
||||||
|
|
||||||
x2=dofft(x)
|
x2=dofft(x,doreal)
|
||||||
err = xver - x2
|
err = xver - x2
|
||||||
errf = flatten(err)
|
errf = flatten(err)
|
||||||
xverf = flatten(xver)
|
xverf = flatten(xver)
|
||||||
errpow = Numeric.vdot(errf,errf)+1e-10
|
errpow = numpy.vdot(errf,errf)+1e-10
|
||||||
sigpow = Numeric.vdot(xverf,xverf)+1e-10
|
sigpow = numpy.vdot(xverf,xverf)+1e-10
|
||||||
snr = 10*math.log10(abs(sigpow/errpow) )
|
snr = 10*math.log10(abs(sigpow/errpow) )
|
||||||
print 'SNR (compared to NumPy) : %.1fdB' % float(snr)
|
print 'SNR (compared to NumPy) : %.1fdB' % float(snr)
|
||||||
|
|
||||||
@ -108,10 +107,9 @@ def test_fft(ndims):
|
|||||||
print 'err',err
|
print 'err',err
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def dofft(x):
|
def dofft(x,isreal):
|
||||||
dims=list( Numeric.shape(x) )
|
dims=list( numpy.shape(x) )
|
||||||
x = flatten(x)
|
x = flatten(x)
|
||||||
iscomp = (type(x[0]) == complex)
|
|
||||||
|
|
||||||
scale=1
|
scale=1
|
||||||
if datatype=='int16_t':
|
if datatype=='int16_t':
|
||||||
@ -126,11 +124,12 @@ def dofft(x):
|
|||||||
if doreal:
|
if doreal:
|
||||||
cmd += ' -R '
|
cmd += ' -R '
|
||||||
|
|
||||||
|
print cmd
|
||||||
p = popen2.Popen3(cmd )
|
p = popen2.Popen3(cmd )
|
||||||
|
|
||||||
open('/tmp/fftin.dat','w').write(dopack( x , iscomp ) )
|
open('/tmp/fftin.dat','w').write(dopack( x , isreal==False ) )
|
||||||
|
|
||||||
p.tochild.write( dopack( x , iscomp ) )
|
p.tochild.write( dopack( x , isreal==False ) )
|
||||||
p.tochild.close()
|
p.tochild.close()
|
||||||
|
|
||||||
res = dounpack( p.fromchild.read() , 1 )
|
res = dounpack( p.fromchild.read() , 1 )
|
||||||
@ -141,7 +140,7 @@ def dofft(x):
|
|||||||
res = scale * res
|
res = scale * res
|
||||||
|
|
||||||
p.wait()
|
p.wait()
|
||||||
return Numeric.reshape(res,dims)
|
return numpy.reshape(res,dims)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
opts,args = getopt.getopt(sys.argv[1:],'r')
|
opts,args = getopt.getopt(sys.argv[1:],'r')
|
||||||
|
@ -89,6 +89,6 @@ int main(int argc,char ** argv)
|
|||||||
if (snr>maxsnr) maxsnr=snr;
|
if (snr>maxsnr) maxsnr=snr;
|
||||||
|
|
||||||
printf("TwoToneTest: snr ranges from %ddB to %ddB\n",(int)minsnr,(int)maxsnr);
|
printf("TwoToneTest: snr ranges from %ddB to %ddB\n",(int)minsnr,(int)maxsnr);
|
||||||
printf("sizeof(kiss_fft_scalar) = %d\n",sizeof(kiss_fft_scalar) );
|
printf("sizeof(kiss_fft_scalar) = %d\n",(int)sizeof(kiss_fft_scalar) );
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -44,19 +44,19 @@ CFLAGS=-Wall -O3 $(WARNINGS)
|
|||||||
# tip: try -openmp or -fopenmp to use multiple cores
|
# tip: try -openmp or -fopenmp to use multiple cores
|
||||||
|
|
||||||
$(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
|
$(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
|
||||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR -lm $+ -DFAST_FILT_UTIL
|
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR $+ -DFAST_FILT_UTIL -lm
|
||||||
|
|
||||||
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c
|
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c
|
||||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL
|
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -DFAST_FILT_UTIL -lm
|
||||||
|
|
||||||
$(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c kiss_fftndr.c
|
$(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c kiss_fftndr.c
|
||||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+
|
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm
|
||||||
|
|
||||||
$(PSDPNG): ../kiss_fft.c psdpng.c kiss_fftr.c
|
$(PSDPNG): ../kiss_fft.c psdpng.c kiss_fftr.c
|
||||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm -lpng $+
|
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lpng -lm
|
||||||
|
|
||||||
$(DUMPHDR): ../kiss_fft.c dumphdr.c
|
$(DUMPHDR): ../kiss_fft.c dumphdr.c
|
||||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+
|
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *~ fft fft_* fastconv fastconv_* fastconvr fastconvr_* psdpng psdpng_*
|
rm -f *~ fft fft_* fastconv fastconv_* fastconvr fastconvr_* psdpng psdpng_*
|
||||||
|
@ -42,10 +42,18 @@ static kiss_fft_cfg find_cached_fft(int nfft,int inverse)
|
|||||||
if (cur== NULL) {
|
if (cur== NULL) {
|
||||||
/* no cached node found, need to create a new one*/
|
/* no cached node found, need to create a new one*/
|
||||||
kiss_fft_alloc(nfft,inverse,0,&len);
|
kiss_fft_alloc(nfft,inverse,0,&len);
|
||||||
|
#ifdef USE_SIMD
|
||||||
|
int padding = (16-sizeof(struct cached_fft)) & 15;
|
||||||
|
// make sure the cfg aligns on a 16 byte boundary
|
||||||
|
len += padding;
|
||||||
|
#endif
|
||||||
cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len ));
|
cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len ));
|
||||||
if (cur == NULL)
|
if (cur == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
cur->cfg = (kiss_fft_cfg)(cur+1);
|
cur->cfg = (kiss_fft_cfg)(cur+1);
|
||||||
|
#ifdef USE_SIMD
|
||||||
|
cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding);
|
||||||
|
#endif
|
||||||
kiss_fft_alloc(nfft,inverse,cur->cfg,&len);
|
kiss_fft_alloc(nfft,inverse,cur->cfg,&len);
|
||||||
cur->nfft=nfft;
|
cur->nfft=nfft;
|
||||||
cur->inverse=inverse;
|
cur->inverse=inverse;
|
||||||
|
@ -362,7 +362,7 @@ void do_file_filter(
|
|||||||
n_samps_buf = 8*4096/sizeof(kffsamp_t);
|
n_samps_buf = 8*4096/sizeof(kffsamp_t);
|
||||||
n_samps_buf = nfft + 4*(nfft-n_imp_resp+1);
|
n_samps_buf = nfft + 4*(nfft-n_imp_resp+1);
|
||||||
|
|
||||||
if (verbose) fprintf(stderr,"bufsize=%d\n",sizeof(kffsamp_t)*n_samps_buf );
|
if (verbose) fprintf(stderr,"bufsize=%d\n",(int)(sizeof(kffsamp_t)*n_samps_buf) );
|
||||||
|
|
||||||
|
|
||||||
/*allocate space and initialize pointers */
|
/*allocate space and initialize pointers */
|
||||||
@ -449,10 +449,12 @@ int main(int argc,char**argv)
|
|||||||
}
|
}
|
||||||
fseek(filtfile,0,SEEK_END);
|
fseek(filtfile,0,SEEK_END);
|
||||||
nh = ftell(filtfile) / sizeof(kffsamp_t);
|
nh = ftell(filtfile) / sizeof(kffsamp_t);
|
||||||
if (verbose) fprintf(stderr,"%d samples in FIR filter\n",nh);
|
if (verbose) fprintf(stderr,"%d samples in FIR filter\n",(int)nh);
|
||||||
h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
|
h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
|
||||||
fseek(filtfile,0,SEEK_SET);
|
fseek(filtfile,0,SEEK_SET);
|
||||||
fread(h,sizeof(kffsamp_t),nh,filtfile);
|
if (fread(h,sizeof(kffsamp_t),nh,filtfile) != nh)
|
||||||
|
fprintf(stderr,"short read on filter file\n");
|
||||||
|
|
||||||
fclose(filtfile);
|
fclose(filtfile);
|
||||||
|
|
||||||
if (use_direct)
|
if (use_direct)
|
||||||
|
@ -33,8 +33,6 @@ kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*m
|
|||||||
size_t memneeded = sizeof(struct kiss_fftnd_state);
|
size_t memneeded = sizeof(struct kiss_fftnd_state);
|
||||||
char * ptr;
|
char * ptr;
|
||||||
|
|
||||||
size_t pad = memneeded % sizeof(DATATYPE);
|
|
||||||
|
|
||||||
for (i=0;i<ndims;++i) {
|
for (i=0;i<ndims;++i) {
|
||||||
size_t sublen=0;
|
size_t sublen=0;
|
||||||
kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen);
|
kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen);
|
||||||
|
Loading…
Reference in New Issue
Block a user