mirror of
https://github.com/mborgerding/kissfft.git
synced 2025-05-27 21:20:27 -04:00
merge and fixes for RedHat 5.5 gcc 64bit
This commit is contained in:
commit
8a01c6085d
5
Makefile
5
Makefile
@ -2,7 +2,9 @@ KFVER=129
|
||||
|
||||
doc:
|
||||
@echo "Start by reading the README file. If you want to build and test lots of stuff, do a 'make testall'"
|
||||
@echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not"
|
||||
@echo "but be aware that 'make testall' has dependencies that the basic kissfft software does not."
|
||||
@echo "It is generally unneeded to run these tests yourself, unless you plan on changing the inner workings"
|
||||
@echo "of kissfft and would like to make use of its regression tests."
|
||||
|
||||
testall:
|
||||
# The simd and int32_t types may or may not work on your machine
|
||||
@ -11,6 +13,7 @@ testall:
|
||||
make -C test DATATYPE=int16_t CFLAGADD="$(CFLAGADD)" test
|
||||
make -C test DATATYPE=float CFLAGADD="$(CFLAGADD)" test
|
||||
make -C test DATATYPE=double CFLAGADD="$(CFLAGADD)" test
|
||||
echo "all tests passed"
|
||||
|
||||
tarball: clean
|
||||
hg archive -r v$(KFVER) -t tgz kiss_fft$(KFVER).tar.gz
|
||||
|
@ -1,4 +1,5 @@
|
||||
If you are reading this, it means you think you may be interested in using the SIMD extensions within kissfft.
|
||||
If you are reading this, it means you think you may be interested in using the SIMD extensions in kissfft
|
||||
to do 4 *separate* FFTs at once.
|
||||
|
||||
Beware! Beyond here there be dragons!
|
||||
|
||||
@ -29,7 +30,8 @@ Search on "SIMD alignment" for more info.
|
||||
|
||||
|
||||
Robin at Divide Concept was kind enough to share his code for formatting to/from the SIMD kissfft.
|
||||
I have not run it -- use it at your own risk.
|
||||
I have not run it -- use it at your own risk. It appears to do 4xN and Nx4 transpositions
|
||||
(out of place).
|
||||
|
||||
void SSETools::pack128(float* target, float* source, unsigned long size128)
|
||||
{
|
||||
|
2
TIPS
2
TIPS
@ -21,7 +21,7 @@ Speed:
|
||||
Reducing code size:
|
||||
* remove some of the butterflies. There are currently butterflies optimized for radices
|
||||
2,3,4,5. It is worth mentioning that you can still use FFT sizes that contain
|
||||
these factors, they just won't be quite as fast. You can decide for yourself
|
||||
other factors, they just won't be quite as fast. You can decide for yourself
|
||||
whether to keep radix 2 or 4. If you do some work in this area, let me
|
||||
know what you find.
|
||||
|
||||
|
@ -7,7 +7,7 @@ CFLAGS=-O3 -I.. -I../tools $(WARNINGS)
|
||||
CFLAGS+=-ffast-math -fomit-frame-pointer
|
||||
#CFLAGS+=-funroll-loops
|
||||
#CFLAGS+=-march=prescott
|
||||
CFLAGS+= -mtune=native
|
||||
#CFLAGS+= -mtune=native
|
||||
# TIP: try adding -openmp or -fopenmp to enable OPENMP directives and use of multiple cores
|
||||
#CFLAGS+=-fopenmp
|
||||
CFLAGS+= $(CFLAGADD)
|
||||
@ -66,20 +66,20 @@ tools:
|
||||
|
||||
|
||||
$(SELFTEST): $(SELFTESTSRC) $(SRCFILES)
|
||||
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
|
||||
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
|
||||
|
||||
$(TESTKFC): $(SRCFILES)
|
||||
$(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) -lm $+
|
||||
$(CC) -o $@ $(CFLAGS) -DKFC_TEST $(TYPEFLAGS) $+ -lm
|
||||
|
||||
$(TESTREAL): test_real.c $(SRCFILES)
|
||||
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
|
||||
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
|
||||
|
||||
$(BENCHKISS): benchkiss.c $(SRCFILES)
|
||||
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) -lm $+
|
||||
$(CC) -o $@ $(CFLAGS) $(TYPEFLAGS) $+ -lm
|
||||
|
||||
$(BENCHFFTW): benchfftw.c pstats.c
|
||||
@echo "======attempting to build FFTW benchmark"
|
||||
@$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ -lm $(FFTWLIB) $(FFTWLIBDIR) || echo "FFTW not available for comparison"
|
||||
@$(CC) -o $@ $(CFLAGS) -DDATATYPE$(DATATYPE) $+ $(FFTWLIB) $(FFTWLIBDIR) -lm || echo "FFTW not available for comparison"
|
||||
|
||||
test: all
|
||||
@./$(TESTKFC)
|
||||
@ -101,7 +101,7 @@ selftest_short.c:
|
||||
|
||||
CXXFLAGS=-O3 -ffast-math -fomit-frame-pointer -I.. -I../tools -W -Wall
|
||||
testcpp: testcpp.cc ../kissfft.hh
|
||||
$(CXX) -o $@ $(CXXFLAGS) -lm testcpp.cc
|
||||
$(CXX) -o $@ $(CXXFLAGS) testcpp.cc -lm
|
||||
|
||||
|
||||
clean:
|
||||
|
@ -7,8 +7,7 @@ import random
|
||||
import struct
|
||||
import popen2
|
||||
import getopt
|
||||
import Numeric
|
||||
import FFT
|
||||
import numpy
|
||||
|
||||
pi=math.pi
|
||||
e=math.e
|
||||
@ -26,7 +25,7 @@ elif datatype=='int16_t':
|
||||
fmt='h'
|
||||
minsnr=10
|
||||
elif datatype=='int32_t':
|
||||
fmt='l'
|
||||
fmt='i'
|
||||
elif datatype=='simd':
|
||||
fmt='4f'
|
||||
sys.stderr.write('testkiss.py does not yet test simd')
|
||||
@ -39,21 +38,21 @@ else:
|
||||
|
||||
|
||||
def dopack(x,cpx=1):
|
||||
x = Numeric.reshape( x, ( Numeric.size(x),) )
|
||||
x = numpy.reshape( x, ( numpy.size(x),) )
|
||||
|
||||
if cpx:
|
||||
s = ''.join( [ struct.pack(fmt*2,c.real,c.imag) for c in x ] )
|
||||
else:
|
||||
s = ''.join( [ struct.pack(fmt,c) for c in x ] )
|
||||
s = ''.join( [ struct.pack(fmt,c.real) for c in x ] )
|
||||
return s
|
||||
|
||||
def dounpack(x,cpx):
|
||||
uf = fmt * ( len(x) / struct.calcsize(fmt) )
|
||||
s = struct.unpack(uf,x)
|
||||
if cpx:
|
||||
return Numeric.array(s[::2]) + Numeric.array( s[1::2] )*j
|
||||
return numpy.array(s[::2]) + numpy.array( s[1::2] )*j
|
||||
else:
|
||||
return Numeric.array(s )
|
||||
return numpy.array(s )
|
||||
|
||||
def make_random(dims=[1]):
|
||||
res = []
|
||||
@ -67,11 +66,11 @@ def make_random(dims=[1]):
|
||||
res.append( complex(r,i) )
|
||||
else:
|
||||
res.append( make_random( dims[1:] ) )
|
||||
return Numeric.array(res)
|
||||
return numpy.array(res)
|
||||
|
||||
def flatten(x):
|
||||
ntotal = Numeric.product(Numeric.shape(x))
|
||||
return Numeric.reshape(x,(ntotal,))
|
||||
ntotal = numpy.size(x)
|
||||
return numpy.reshape(x,(ntotal,))
|
||||
|
||||
def randmat( ndims ):
|
||||
dims=[]
|
||||
@ -85,20 +84,20 @@ def randmat( ndims ):
|
||||
def test_fft(ndims):
|
||||
x=randmat( ndims )
|
||||
|
||||
print 'dimensions=%s' % str( Numeric.shape(x) ),
|
||||
|
||||
if doreal:
|
||||
xver = FFT.real_fftnd(x)
|
||||
xver = numpy.fft.rfftn(x)
|
||||
else:
|
||||
xver = FFT.fftnd(x)
|
||||
xver = numpy.fft.fftn(x)
|
||||
|
||||
open('/tmp/fftexp.dat','w').write(dopack( flatten(xver) , True ) )
|
||||
|
||||
x2=dofft(x)
|
||||
x2=dofft(x,doreal)
|
||||
err = xver - x2
|
||||
errf = flatten(err)
|
||||
xverf = flatten(xver)
|
||||
errpow = Numeric.vdot(errf,errf)+1e-10
|
||||
sigpow = Numeric.vdot(xverf,xverf)+1e-10
|
||||
errpow = numpy.vdot(errf,errf)+1e-10
|
||||
sigpow = numpy.vdot(xverf,xverf)+1e-10
|
||||
snr = 10*math.log10(abs(sigpow/errpow) )
|
||||
print 'SNR (compared to NumPy) : %.1fdB' % float(snr)
|
||||
|
||||
@ -108,10 +107,9 @@ def test_fft(ndims):
|
||||
print 'err',err
|
||||
sys.exit(1)
|
||||
|
||||
def dofft(x):
|
||||
dims=list( Numeric.shape(x) )
|
||||
def dofft(x,isreal):
|
||||
dims=list( numpy.shape(x) )
|
||||
x = flatten(x)
|
||||
iscomp = (type(x[0]) == complex)
|
||||
|
||||
scale=1
|
||||
if datatype=='int16_t':
|
||||
@ -126,11 +124,12 @@ def dofft(x):
|
||||
if doreal:
|
||||
cmd += ' -R '
|
||||
|
||||
print cmd
|
||||
p = popen2.Popen3(cmd )
|
||||
|
||||
open('/tmp/fftin.dat','w').write(dopack( x , iscomp ) )
|
||||
open('/tmp/fftin.dat','w').write(dopack( x , isreal==False ) )
|
||||
|
||||
p.tochild.write( dopack( x , iscomp ) )
|
||||
p.tochild.write( dopack( x , isreal==False ) )
|
||||
p.tochild.close()
|
||||
|
||||
res = dounpack( p.fromchild.read() , 1 )
|
||||
@ -141,7 +140,7 @@ def dofft(x):
|
||||
res = scale * res
|
||||
|
||||
p.wait()
|
||||
return Numeric.reshape(res,dims)
|
||||
return numpy.reshape(res,dims)
|
||||
|
||||
def main():
|
||||
opts,args = getopt.getopt(sys.argv[1:],'r')
|
||||
|
@ -89,6 +89,6 @@ int main(int argc,char ** argv)
|
||||
if (snr>maxsnr) maxsnr=snr;
|
||||
|
||||
printf("TwoToneTest: snr ranges from %ddB to %ddB\n",(int)minsnr,(int)maxsnr);
|
||||
printf("sizeof(kiss_fft_scalar) = %d\n",sizeof(kiss_fft_scalar) );
|
||||
printf("sizeof(kiss_fft_scalar) = %d\n",(int)sizeof(kiss_fft_scalar) );
|
||||
return 0;
|
||||
}
|
||||
|
@ -44,19 +44,19 @@ CFLAGS=-Wall -O3 $(WARNINGS)
|
||||
# tip: try -openmp or -fopenmp to use multiple cores
|
||||
|
||||
$(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
|
||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR -lm $+ -DFAST_FILT_UTIL
|
||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR $+ -DFAST_FILT_UTIL -lm
|
||||
|
||||
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c
|
||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL
|
||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -DFAST_FILT_UTIL -lm
|
||||
|
||||
$(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c kiss_fftndr.c
|
||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+
|
||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm
|
||||
|
||||
$(PSDPNG): ../kiss_fft.c psdpng.c kiss_fftr.c
|
||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm -lpng $+
|
||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lpng -lm
|
||||
|
||||
$(DUMPHDR): ../kiss_fft.c dumphdr.c
|
||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+
|
||||
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) $+ -lm
|
||||
|
||||
clean:
|
||||
rm -f *~ fft fft_* fastconv fastconv_* fastconvr fastconvr_* psdpng psdpng_*
|
||||
|
@ -42,10 +42,18 @@ static kiss_fft_cfg find_cached_fft(int nfft,int inverse)
|
||||
if (cur== NULL) {
|
||||
/* no cached node found, need to create a new one*/
|
||||
kiss_fft_alloc(nfft,inverse,0,&len);
|
||||
#ifdef USE_SIMD
|
||||
int padding = (16-sizeof(struct cached_fft)) & 15;
|
||||
// make sure the cfg aligns on a 16 byte boundary
|
||||
len += padding;
|
||||
#endif
|
||||
cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len ));
|
||||
if (cur == NULL)
|
||||
return NULL;
|
||||
cur->cfg = (kiss_fft_cfg)(cur+1);
|
||||
#ifdef USE_SIMD
|
||||
cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding);
|
||||
#endif
|
||||
kiss_fft_alloc(nfft,inverse,cur->cfg,&len);
|
||||
cur->nfft=nfft;
|
||||
cur->inverse=inverse;
|
||||
|
@ -362,7 +362,7 @@ void do_file_filter(
|
||||
n_samps_buf = 8*4096/sizeof(kffsamp_t);
|
||||
n_samps_buf = nfft + 4*(nfft-n_imp_resp+1);
|
||||
|
||||
if (verbose) fprintf(stderr,"bufsize=%d\n",sizeof(kffsamp_t)*n_samps_buf );
|
||||
if (verbose) fprintf(stderr,"bufsize=%d\n",(int)(sizeof(kffsamp_t)*n_samps_buf) );
|
||||
|
||||
|
||||
/*allocate space and initialize pointers */
|
||||
@ -449,10 +449,12 @@ int main(int argc,char**argv)
|
||||
}
|
||||
fseek(filtfile,0,SEEK_END);
|
||||
nh = ftell(filtfile) / sizeof(kffsamp_t);
|
||||
if (verbose) fprintf(stderr,"%d samples in FIR filter\n",nh);
|
||||
if (verbose) fprintf(stderr,"%d samples in FIR filter\n",(int)nh);
|
||||
h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
|
||||
fseek(filtfile,0,SEEK_SET);
|
||||
fread(h,sizeof(kffsamp_t),nh,filtfile);
|
||||
if (fread(h,sizeof(kffsamp_t),nh,filtfile) != nh)
|
||||
fprintf(stderr,"short read on filter file\n");
|
||||
|
||||
fclose(filtfile);
|
||||
|
||||
if (use_direct)
|
||||
|
@ -33,8 +33,6 @@ kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*m
|
||||
size_t memneeded = sizeof(struct kiss_fftnd_state);
|
||||
char * ptr;
|
||||
|
||||
size_t pad = memneeded % sizeof(DATATYPE);
|
||||
|
||||
for (i=0;i<ndims;++i) {
|
||||
size_t sublen=0;
|
||||
kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen);
|
||||
|
Loading…
Reference in New Issue
Block a user