real fast convolution filtering works mostly, sometimes it does not

make all the samples ( obeserved with ./fastfir.py -n 1024 -r -l 100000 )
This commit is contained in:
Mark Borgerding 2004-01-01 18:51:17 +00:00
parent 0243552944
commit e749a114cb
4 changed files with 113 additions and 65 deletions

View File

@ -16,6 +16,7 @@ TESTREAL=tr_$(DATATYPE)
TESTKFC=tkfc_$(DATATYPE) TESTKFC=tkfc_$(DATATYPE)
FFTUTIL=kf_$(DATATYPE) FFTUTIL=kf_$(DATATYPE)
FASTFILT=ff_$(DATATYPE) FASTFILT=ff_$(DATATYPE)
FASTFILTREAL=ffr_$(DATATYPE)
ifeq "$(DATATYPE)" "short" ifeq "$(DATATYPE)" "short"
TYPEFLAGS=-DFIXED_POINT -Dkiss_fft_scalar=short TYPEFLAGS=-DFIXED_POINT -Dkiss_fft_scalar=short
@ -34,14 +35,17 @@ endif
all: $(BENCHKISS) $(SELFTEST) $(BENCHFFTW) $(TESTREAL) $(FFTUTIL) \ all: $(BENCHKISS) $(SELFTEST) $(BENCHFFTW) $(TESTREAL) $(FFTUTIL) \
$(TESTKFC) $(FASTFILT) $(TESTKFC) $(FASTFILT) $(FASTFILTREAL)
CFLAGS=-Wall -O3 -pedantic -march=pentiumpro -ffast-math -fomit-frame-pointer CFLAGS=-Wall -O3 -pedantic -march=pentiumpro -ffast-math -fomit-frame-pointer
#-DUSE_SKIP #-DUSE_SKIP
# If the above flags do not work, try the following # If the above flags do not work, try the following
#CFLAGS=-Wall -O3 #CFLAGS=-Wall -O3
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c $(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR -lm $+ -DFAST_FILT_UTIL
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL
$(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c $(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c
@ -83,4 +87,4 @@ selftest_short.c:
./mk_test.py -s 10 12 14 > selftest_short.c ./mk_test.py -s 10 12 14 > selftest_short.c
clean: clean:
rm -f *~ bm_* st_* tr_* kf_* tkfc_* rm -f *~ bm_* st_* tr_* kf_* tkfc_* ff_* *.pyc *.pyo

View File

@ -44,21 +44,32 @@ def fastfilter(sig,h,nfft=None):
return concatenate( res ) return concatenate( res )
def main(): def main():
siglen = 1e4 import sys
from getopt import getopt
opts,args = getopt(sys.argv[1:],'rn:l:')
opts=dict(opts)
siglen = int(opts.get('-l',1e4 ) )
hlen = 50 hlen = 50
nfft = 128
nfft = int(opts.get('-n',128) )
usereal = opts.has_key('-r')
print 'nfft=%d'%nfft print 'nfft=%d'%nfft
# make a signal # make a signal
sig = make_random( siglen ) sig = make_random( siglen )
# make an impulse response # make an impulse response
h = make_random( hlen ) h = make_random( hlen )
#h=[1]*2+[0]*3 #h=[1]*2+[0]*3
if usereal:
sig=[c.real for c in sig]
h=[c.real for c in h]
# perform MAC filtering # perform MAC filtering
yslow = slowfilter(sig,h) yslow = slowfilter(sig,h)
#print '<YSLOW>',yslow,'</YSLOW>' #print '<YSLOW>',yslow,'</YSLOW>'
#yfast = fastfilter(sig,h,nfft) #yfast = fastfilter(sig,h,nfft)
yfast = utilfastfilter(sig,h,nfft) yfast = utilfastfilter(sig,h,nfft,usereal)
#print yfast #print yfast
print 'len(yslow)=%d'%len(yslow) print 'len(yslow)=%d'%len(yslow)
print 'len(yfast)=%d'%len(yfast) print 'len(yfast)=%d'%len(yfast)
@ -69,16 +80,20 @@ def main():
print yslow[:5] print yslow[:5]
print yfast[:5] print yfast[:5]
def utilfastfilter(sig,h,nfft): def utilfastfilter(sig,h,nfft,usereal):
import compfft import compfft
import os import os
open( 'sig.dat','w').write( compfft.dopack(sig,'f',1) ) open( 'sig.dat','w').write( compfft.dopack(sig,'f',not usereal) )
open( 'h.dat','w').write( compfft.dopack(h,'f',1) ) open( 'h.dat','w').write( compfft.dopack(h,'f',not usereal) )
cmd = './ff_float -n %d -i sig.dat -h h.dat -o out.dat' % nfft if usereal:
util = './ffr_float'
else:
util = './ff_float'
cmd = 'time %s -n %d -i sig.dat -h h.dat -o out.dat' % (util, nfft)
print cmd print cmd
ec = os.system(cmd) ec = os.system(cmd)
print 'exited->',ec print 'exited->',ec
return compfft.dounpack(open('out.dat').read(),'f',1) return compfft.dounpack(open('out.dat').read(),'f',not usereal)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -16,6 +16,7 @@ TESTREAL=tr_$(DATATYPE)
TESTKFC=tkfc_$(DATATYPE) TESTKFC=tkfc_$(DATATYPE)
FFTUTIL=kf_$(DATATYPE) FFTUTIL=kf_$(DATATYPE)
FASTFILT=ff_$(DATATYPE) FASTFILT=ff_$(DATATYPE)
FASTFILTREAL=ffr_$(DATATYPE)
ifeq "$(DATATYPE)" "short" ifeq "$(DATATYPE)" "short"
TYPEFLAGS=-DFIXED_POINT -Dkiss_fft_scalar=short TYPEFLAGS=-DFIXED_POINT -Dkiss_fft_scalar=short
@ -34,14 +35,17 @@ endif
all: $(BENCHKISS) $(SELFTEST) $(BENCHFFTW) $(TESTREAL) $(FFTUTIL) \ all: $(BENCHKISS) $(SELFTEST) $(BENCHFFTW) $(TESTREAL) $(FFTUTIL) \
$(TESTKFC) $(FASTFILT) $(TESTKFC) $(FASTFILT) $(FASTFILTREAL)
CFLAGS=-Wall -O3 -pedantic -march=pentiumpro -ffast-math -fomit-frame-pointer CFLAGS=-Wall -O3 -pedantic -march=pentiumpro -ffast-math -fomit-frame-pointer
#-DUSE_SKIP #-DUSE_SKIP
# If the above flags do not work, try the following # If the above flags do not work, try the following
#CFLAGS=-Wall -O3 #CFLAGS=-Wall -O3
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c $(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR -lm $+ -DFAST_FILT_UTIL
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL $(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL
$(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c $(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c
@ -83,4 +87,4 @@ selftest_short.c:
./mk_test.py -s 10 12 14 > selftest_short.c ./mk_test.py -s 10 12 14 > selftest_short.c
clean: clean:
rm -f *~ bm_* st_* tr_* kf_* tkfc_* rm -f *~ bm_* st_* tr_* kf_* tkfc_* ff_* *.pyc *.pyo

View File

@ -14,12 +14,25 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#include "_kiss_fft_guts.h" #include "_kiss_fft_guts.h"
void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp, #ifdef REAL_FASTFIR
#include "kiss_fftr.h"
typedef kiss_fft_scalar kffsamp_t;
#define FFT_ALLOC kiss_fftr_alloc
#define FFTFWD kiss_fftr
#define FFTINV kiss_fftri
#else
typedef kiss_fft_cpx kffsamp_t;
#define FFT_ALLOC kiss_fft_alloc
#define FFTFWD kiss_fft
#define FFTINV kiss_fft
#endif
void * kiss_fastfir_alloc(const kffsamp_t * imp_resp,size_t n_imp_resp,
int nfft,void * mem,size_t*lenmem); int nfft,void * mem,size_t*lenmem);
size_t kiss_fastfir(const void * cfg, size_t kiss_fastfir(const void * cfg,
const kiss_fft_cpx *in, size_t nin, const kffsamp_t *in, size_t nin,
kiss_fft_cpx *out, size_t nout); kffsamp_t *out, size_t nout);
typedef struct { typedef struct {
int minus5; /*magic */ int minus5; /*magic */
@ -28,15 +41,16 @@ typedef struct {
void * fftcfg; void * fftcfg;
void * ifftcfg; void * ifftcfg;
kiss_fft_cpx * fir_freq_resp; kiss_fft_cpx * fir_freq_resp;
size_t n_freq_bins;
size_t bufin_idx; size_t bufin_idx;
kiss_fft_cpx * bufin; kffsamp_t * bufin;
size_t bufout_idx; size_t bufout_idx;
kiss_fft_cpx * bufout; kffsamp_t * bufout;
kiss_fft_cpx * tmpbuf; kiss_fft_cpx * tmpbuf;
}kiss_fastfir_state; }kiss_fastfir_state;
void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp, void * kiss_fastfir_alloc(const kffsamp_t * imp_resp,size_t n_imp_resp,
int nfft,void * mem,size_t*lenmem) int nfft,void * mem,size_t*lenmem)
{ {
kiss_fastfir_state *st = NULL; kiss_fastfir_state *st = NULL;
@ -45,6 +59,7 @@ void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
char * ptr; char * ptr;
size_t i; size_t i;
float scale; float scale;
int n_freq_bins;
if (nfft<=0) { if (nfft<=0) {
/* determine fft size as next power of two at least 2x /* determine fft size as next power of two at least 2x
@ -55,21 +70,27 @@ void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
nfft<<=1; nfft<<=1;
}while (i>>=1); }while (i>>=1);
} }
#ifdef REAL_FASTFIR
n_freq_bins = nfft/2 + 1;
#else
n_freq_bins = nfft;
#endif
/*fftcfg*/ /*fftcfg*/
kiss_fft_alloc (nfft, 0, NULL, &len_fftcfg); FFT_ALLOC (nfft, 0, NULL, &len_fftcfg);
memneeded += len_fftcfg; memneeded += len_fftcfg;
/*ifftcfg*/ /*ifftcfg*/
kiss_fft_alloc (nfft, 1, NULL, &len_ifftcfg); FFT_ALLOC (nfft, 1, NULL, &len_ifftcfg);
memneeded += len_ifftcfg; memneeded += len_ifftcfg;
/* fir_freq_resp */ /* fir_freq_resp */
memneeded += sizeof(kiss_fft_cpx) * nfft; memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
/* bufin */ /* bufin */
memneeded += sizeof(kiss_fft_cpx) * nfft; memneeded += sizeof(kffsamp_t) * nfft;
/* bufout */ /* bufout */
memneeded += sizeof(kiss_fft_cpx) * nfft; memneeded += sizeof(kffsamp_t) * nfft;
/* tmpbuf */ /* tmpbuf */
memneeded += sizeof(kiss_fft_cpx) * nfft; memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
if (lenmem == NULL) { if (lenmem == NULL) {
st = (kiss_fastfir_state *) malloc (memneeded); st = (kiss_fastfir_state *) malloc (memneeded);
@ -84,6 +105,7 @@ void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
st->minus5 = -5; st->minus5 = -5;
st->nfft = nfft; st->nfft = nfft;
st->n_scrap = n_imp_resp-1; st->n_scrap = n_imp_resp-1;
st->n_freq_bins = n_freq_bins;
st->bufin_idx = 0; st->bufin_idx = 0;
st->bufout_idx = nfft; st->bufout_idx = nfft;
ptr=(char*)(st+1); ptr=(char*)(st+1);
@ -95,27 +117,27 @@ void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
ptr += len_ifftcfg; ptr += len_ifftcfg;
st->fir_freq_resp = (kiss_fft_cpx*)ptr; st->fir_freq_resp = (kiss_fft_cpx*)ptr;
ptr += sizeof(kiss_fft_cpx) * nfft; ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
st->bufin = (kiss_fft_cpx*)ptr; st->bufin = (kffsamp_t*)ptr;
ptr += sizeof(kiss_fft_cpx) * nfft; ptr += sizeof(kffsamp_t) * nfft;
st->bufout = (kiss_fft_cpx*)ptr; st->bufout = (kffsamp_t*)ptr;
ptr += sizeof(kiss_fft_cpx) * nfft; ptr += sizeof(kffsamp_t) * nfft;
st->tmpbuf = (kiss_fft_cpx*)ptr; st->tmpbuf = (kiss_fft_cpx*)ptr;
ptr += sizeof(kiss_fft_cpx) * nfft; ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
kiss_fft_alloc (nfft,0,st->fftcfg , &len_fftcfg); FFT_ALLOC (nfft,0,st->fftcfg , &len_fftcfg);
kiss_fft_alloc (nfft,1,st->ifftcfg , &len_ifftcfg); FFT_ALLOC (nfft,1,st->ifftcfg , &len_ifftcfg);
memset(st->fir_freq_resp,0,sizeof(kiss_fft_cpx)*nfft); memset(st->bufin,0,sizeof(kffsamp_t)*nfft);
memcpy(st->fir_freq_resp,imp_resp,sizeof(kiss_fft_cpx)*n_imp_resp); memcpy(st->bufin,imp_resp,sizeof(kffsamp_t)*n_imp_resp);
kiss_fft(st->fftcfg,st->fir_freq_resp,st->fir_freq_resp); FFTFWD(st->fftcfg,st->bufin,st->fir_freq_resp);
scale = 1.0 / st->nfft; scale = 1.0 / st->nfft;
for (i=0;i < st->nfft;++i) { for ( i=0; i < st->n_freq_bins; ++i ) {
st->fir_freq_resp[i].r *= scale; st->fir_freq_resp[i].r *= scale;
st->fir_freq_resp[i].i *= scale; st->fir_freq_resp[i].i *= scale;
} }
@ -125,7 +147,7 @@ void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
static static
size_t write_output(kiss_fastfir_state *st, size_t write_output(kiss_fastfir_state *st,
kiss_fft_cpx *out,size_t * pnout,size_t zpadded) kffsamp_t *out,size_t * pnout,size_t zpadded)
{ {
size_t nout = *pnout; size_t nout = *pnout;
size_t n2flush = st->nfft - st->bufout_idx; size_t n2flush = st->nfft - st->bufout_idx;
@ -133,7 +155,7 @@ size_t write_output(kiss_fastfir_state *st,
n2flush -= zpadded; n2flush -= zpadded;
if ( nout < n2flush ) if ( nout < n2flush )
n2flush=nout; n2flush=nout;
memcpy(out,st->bufout + st->bufout_idx, sizeof(kiss_fft_cpx)*n2flush ); memcpy(out,st->bufout + st->bufout_idx, sizeof(kffsamp_t)*n2flush );
st->bufout_idx += n2flush; st->bufout_idx += n2flush;
*pnout = nout - n2flush; *pnout = nout - n2flush;
return n2flush; return n2flush;
@ -147,31 +169,34 @@ static void do_fastconv(kiss_fastfir_state *st)
" output buffer size must be >= input buffer size," " output buffer size must be >= input buffer size,"
" %d samples lost\n",st->nfft - st->bufout_idx ); " %d samples lost\n",st->nfft - st->bufout_idx );
} }
//FFT st->bufin to st->bufout /*FFT st->bufin to st->tmpbuf*/
kiss_fft(st->fftcfg,st->bufin,st->bufout); FFTFWD(st->fftcfg,st->bufin,st->tmpbuf);
// shift tail to front of input buffer /* shift tail to front of input buffer*/
memcpy( st->bufin, memcpy( st->bufin,
st->bufin + st->nfft - st->n_scrap, st->bufin + st->nfft - st->n_scrap,
sizeof(kiss_fft_cpx)*st->n_scrap); sizeof(kiss_fft_cpx)*st->n_scrap);
//set input idx to the next input spot /*set input idx to the next input spot*/
st->bufin_idx = st->n_scrap; st->bufin_idx = st->n_scrap;
// multiply the frequency response of the input signal by /* multiply the frequency response of the input signal by*/
// that of the fir filter /* that of the fir filter*/
for (i=0;i<st->nfft;++i) for ( i=0; i<st->n_freq_bins; ++i ) {
C_MUL(st->tmpbuf[i],st->bufout[i],st->fir_freq_resp[i]); kiss_fft_cpx tmpsamp;
C_MUL(tmpsamp,st->tmpbuf[i],st->fir_freq_resp[i]);
st->tmpbuf[i] = tmpsamp;
}
// perform the inverse fft /* perform the inverse fft*/
kiss_fft(st->ifftcfg,st->tmpbuf,st->bufout); FFTINV(st->ifftcfg,st->tmpbuf,st->bufout);
// need to skip over junk caused by circular convolution /* need to skip over junk caused by circular convolution*/
st->bufout_idx = st->n_scrap; st->bufout_idx = st->n_scrap;
} }
size_t kiss_fastfir(const void * cfg, size_t kiss_fastfir(const void * cfg,
const kiss_fft_cpx *in, size_t nin, const kffsamp_t *in, size_t nin,
kiss_fft_cpx *out, size_t nout_avail) kffsamp_t *out, size_t nout_avail)
{ {
size_t nout_orig=nout_avail; size_t nout_orig=nout_avail;
kiss_fastfir_state *st = ( kiss_fastfir_state *)cfg; kiss_fastfir_state *st = ( kiss_fastfir_state *)cfg;
@ -188,13 +213,13 @@ size_t kiss_fastfir(const void * cfg,
} }
while (nin--) { while (nin--) {
// copy the input sample to bufin /* copy the input sample to bufin*/
st->bufin[st->bufin_idx++] = *in++; st->bufin[st->bufin_idx++] = *in++;
// when the input buffer is full, perform fast convolution /* when the input buffer is full, perform fast convolution*/
if ( st->bufin_idx == st->nfft ) { if ( st->bufin_idx == st->nfft ) {
do_fastconv(st); do_fastconv(st);
// write the output buffer /* write the output buffer*/
out += write_output(st,out,&nout_avail,0); out += write_output(st,out,&nout_avail,0);
} }
} }
@ -208,18 +233,18 @@ size_t kiss_fastfir(const void * cfg,
void do_filter( void do_filter(
FILE * fin, FILE * fin,
FILE * fout, FILE * fout,
const kiss_fft_cpx * imp_resp, const kffsamp_t * imp_resp,
size_t n_imp_resp, size_t n_imp_resp,
size_t nfft) size_t nfft)
{ {
void * cfg = kiss_fastfir_alloc(imp_resp,n_imp_resp,nfft,0,0); void * cfg = kiss_fastfir_alloc(imp_resp,n_imp_resp,nfft,0,0);
kiss_fft_cpx inbuf[BUFLEN],outbuf[BUFLEN]; kffsamp_t inbuf[BUFLEN],outbuf[BUFLEN];
size_t ninbuf,noutbuf; size_t ninbuf,noutbuf;
do{ do{
ninbuf = fread(inbuf,sizeof(kiss_fft_cpx),BUFLEN,fin ); ninbuf = fread(inbuf,sizeof(inbuf[0]),BUFLEN,fin );
// when ninbuf <= 0, that signals a flush /* when ninbuf <= 0, that signals a flush*/
noutbuf = kiss_fastfir(cfg,inbuf,ninbuf,outbuf,BUFLEN); noutbuf = kiss_fastfir(cfg,inbuf,ninbuf,outbuf,BUFLEN);
if ( fwrite(outbuf,sizeof(kiss_fft_cpx),noutbuf,fout) != noutbuf ) { if ( fwrite(outbuf,sizeof(outbuf[0]),noutbuf,fout) != noutbuf ) {
fprintf(stderr,"short write\n"); fprintf(stderr,"short write\n");
exit(1); exit(1);
} }
@ -231,7 +256,7 @@ void do_filter(
#include <unistd.h> #include <unistd.h>
int main(int argc,char**argv) int main(int argc,char**argv)
{ {
kiss_fft_cpx * h; kffsamp_t * h;
size_t nh,nfft=0; size_t nh,nfft=0;
FILE *fin=stdin; FILE *fin=stdin;
FILE *fout=stdout; FILE *fout=stdout;
@ -278,11 +303,11 @@ int main(int argc,char**argv)
exit(1); exit(1);
} }
fseek(filtfile,0,SEEK_END); fseek(filtfile,0,SEEK_END);
nh = ftell(filtfile) / sizeof(kiss_fft_cpx); nh = ftell(filtfile) / sizeof(kffsamp_t);
fprintf(stderr,"%d samples in FIR filter\n",nh); fprintf(stderr,"%d samples in FIR filter\n",nh);
h = (kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx)*nh); h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
fseek(filtfile,0,SEEK_SET); fseek(filtfile,0,SEEK_SET);
fread(h,sizeof(kiss_fft_cpx),nh,filtfile); fread(h,sizeof(kffsamp_t),nh,filtfile);
fclose(filtfile); fclose(filtfile);
do_filter( fin, fout, h,nh,nfft); do_filter( fin, fout, h,nh,nfft);