real fast convolution filtering works mostly, sometimes it does not

make all the samples ( obeserved with ./fastfir.py -n 1024 -r -l 100000 )
This commit is contained in:
Mark Borgerding 2004-01-01 18:51:17 +00:00
parent 0243552944
commit e749a114cb
4 changed files with 113 additions and 65 deletions

View File

@ -16,6 +16,7 @@ TESTREAL=tr_$(DATATYPE)
TESTKFC=tkfc_$(DATATYPE)
FFTUTIL=kf_$(DATATYPE)
FASTFILT=ff_$(DATATYPE)
FASTFILTREAL=ffr_$(DATATYPE)
ifeq "$(DATATYPE)" "short"
TYPEFLAGS=-DFIXED_POINT -Dkiss_fft_scalar=short
@ -34,14 +35,17 @@ endif
all: $(BENCHKISS) $(SELFTEST) $(BENCHFFTW) $(TESTREAL) $(FFTUTIL) \
$(TESTKFC) $(FASTFILT)
$(TESTKFC) $(FASTFILT) $(FASTFILTREAL)
CFLAGS=-Wall -O3 -pedantic -march=pentiumpro -ffast-math -fomit-frame-pointer
#-DUSE_SKIP
# If the above flags do not work, try the following
#CFLAGS=-Wall -O3
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
$(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR -lm $+ -DFAST_FILT_UTIL
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL
$(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c
@ -83,4 +87,4 @@ selftest_short.c:
./mk_test.py -s 10 12 14 > selftest_short.c
clean:
rm -f *~ bm_* st_* tr_* kf_* tkfc_*
rm -f *~ bm_* st_* tr_* kf_* tkfc_* ff_* *.pyc *.pyo

View File

@ -44,21 +44,32 @@ def fastfilter(sig,h,nfft=None):
return concatenate( res )
def main():
siglen = 1e4
import sys
from getopt import getopt
opts,args = getopt(sys.argv[1:],'rn:l:')
opts=dict(opts)
siglen = int(opts.get('-l',1e4 ) )
hlen = 50
nfft = 128
nfft = int(opts.get('-n',128) )
usereal = opts.has_key('-r')
print 'nfft=%d'%nfft
# make a signal
sig = make_random( siglen )
# make an impulse response
h = make_random( hlen )
#h=[1]*2+[0]*3
if usereal:
sig=[c.real for c in sig]
h=[c.real for c in h]
# perform MAC filtering
yslow = slowfilter(sig,h)
#print '<YSLOW>',yslow,'</YSLOW>'
#yfast = fastfilter(sig,h,nfft)
yfast = utilfastfilter(sig,h,nfft)
yfast = utilfastfilter(sig,h,nfft,usereal)
#print yfast
print 'len(yslow)=%d'%len(yslow)
print 'len(yfast)=%d'%len(yfast)
@ -69,16 +80,20 @@ def main():
print yslow[:5]
print yfast[:5]
def utilfastfilter(sig,h,nfft):
def utilfastfilter(sig,h,nfft,usereal):
import compfft
import os
open( 'sig.dat','w').write( compfft.dopack(sig,'f',1) )
open( 'h.dat','w').write( compfft.dopack(h,'f',1) )
cmd = './ff_float -n %d -i sig.dat -h h.dat -o out.dat' % nfft
open( 'sig.dat','w').write( compfft.dopack(sig,'f',not usereal) )
open( 'h.dat','w').write( compfft.dopack(h,'f',not usereal) )
if usereal:
util = './ffr_float'
else:
util = './ff_float'
cmd = 'time %s -n %d -i sig.dat -h h.dat -o out.dat' % (util, nfft)
print cmd
ec = os.system(cmd)
print 'exited->',ec
return compfft.dounpack(open('out.dat').read(),'f',1)
return compfft.dounpack(open('out.dat').read(),'f',not usereal)
if __name__ == "__main__":
main()

View File

@ -16,6 +16,7 @@ TESTREAL=tr_$(DATATYPE)
TESTKFC=tkfc_$(DATATYPE)
FFTUTIL=kf_$(DATATYPE)
FASTFILT=ff_$(DATATYPE)
FASTFILTREAL=ffr_$(DATATYPE)
ifeq "$(DATATYPE)" "short"
TYPEFLAGS=-DFIXED_POINT -Dkiss_fft_scalar=short
@ -34,14 +35,17 @@ endif
all: $(BENCHKISS) $(SELFTEST) $(BENCHFFTW) $(TESTREAL) $(FFTUTIL) \
$(TESTKFC) $(FASTFILT)
$(TESTKFC) $(FASTFILT) $(FASTFILTREAL)
CFLAGS=-Wall -O3 -pedantic -march=pentiumpro -ffast-math -fomit-frame-pointer
#-DUSE_SKIP
# If the above flags do not work, try the following
#CFLAGS=-Wall -O3
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
$(FASTFILTREAL): ../kiss_fft.c kiss_fastfir.c kiss_fftr.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -DREAL_FASTFIR -lm $+ -DFAST_FILT_UTIL
$(FASTFILT): ../kiss_fft.c kiss_fastfir.c
$(CC) -o $@ $(CFLAGS) -I.. $(TYPEFLAGS) -lm $+ -DFAST_FILT_UTIL
$(FFTUTIL): ../kiss_fft.c fftutil.c kiss_fftnd.c kiss_fftr.c
@ -83,4 +87,4 @@ selftest_short.c:
./mk_test.py -s 10 12 14 > selftest_short.c
clean:
rm -f *~ bm_* st_* tr_* kf_* tkfc_*
rm -f *~ bm_* st_* tr_* kf_* tkfc_* ff_* *.pyc *.pyo

View File

@ -14,12 +14,25 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#include "_kiss_fft_guts.h"
void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
#ifdef REAL_FASTFIR
#include "kiss_fftr.h"
typedef kiss_fft_scalar kffsamp_t;
#define FFT_ALLOC kiss_fftr_alloc
#define FFTFWD kiss_fftr
#define FFTINV kiss_fftri
#else
typedef kiss_fft_cpx kffsamp_t;
#define FFT_ALLOC kiss_fft_alloc
#define FFTFWD kiss_fft
#define FFTINV kiss_fft
#endif
void * kiss_fastfir_alloc(const kffsamp_t * imp_resp,size_t n_imp_resp,
int nfft,void * mem,size_t*lenmem);
size_t kiss_fastfir(const void * cfg,
const kiss_fft_cpx *in, size_t nin,
kiss_fft_cpx *out, size_t nout);
const kffsamp_t *in, size_t nin,
kffsamp_t *out, size_t nout);
typedef struct {
int minus5; /*magic */
@ -28,15 +41,16 @@ typedef struct {
void * fftcfg;
void * ifftcfg;
kiss_fft_cpx * fir_freq_resp;
size_t n_freq_bins;
size_t bufin_idx;
kiss_fft_cpx * bufin;
kffsamp_t * bufin;
size_t bufout_idx;
kiss_fft_cpx * bufout;
kffsamp_t * bufout;
kiss_fft_cpx * tmpbuf;
}kiss_fastfir_state;
void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
void * kiss_fastfir_alloc(const kffsamp_t * imp_resp,size_t n_imp_resp,
int nfft,void * mem,size_t*lenmem)
{
kiss_fastfir_state *st = NULL;
@ -45,6 +59,7 @@ void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
char * ptr;
size_t i;
float scale;
int n_freq_bins;
if (nfft<=0) {
/* determine fft size as next power of two at least 2x
@ -55,21 +70,27 @@ void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
nfft<<=1;
}while (i>>=1);
}
#ifdef REAL_FASTFIR
n_freq_bins = nfft/2 + 1;
#else
n_freq_bins = nfft;
#endif
/*fftcfg*/
kiss_fft_alloc (nfft, 0, NULL, &len_fftcfg);
FFT_ALLOC (nfft, 0, NULL, &len_fftcfg);
memneeded += len_fftcfg;
/*ifftcfg*/
kiss_fft_alloc (nfft, 1, NULL, &len_ifftcfg);
FFT_ALLOC (nfft, 1, NULL, &len_ifftcfg);
memneeded += len_ifftcfg;
/* fir_freq_resp */
memneeded += sizeof(kiss_fft_cpx) * nfft;
memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
/* bufin */
memneeded += sizeof(kiss_fft_cpx) * nfft;
memneeded += sizeof(kffsamp_t) * nfft;
/* bufout */
memneeded += sizeof(kiss_fft_cpx) * nfft;
memneeded += sizeof(kffsamp_t) * nfft;
/* tmpbuf */
memneeded += sizeof(kiss_fft_cpx) * nfft;
memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
if (lenmem == NULL) {
st = (kiss_fastfir_state *) malloc (memneeded);
@ -84,6 +105,7 @@ void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
st->minus5 = -5;
st->nfft = nfft;
st->n_scrap = n_imp_resp-1;
st->n_freq_bins = n_freq_bins;
st->bufin_idx = 0;
st->bufout_idx = nfft;
ptr=(char*)(st+1);
@ -95,27 +117,27 @@ void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
ptr += len_ifftcfg;
st->fir_freq_resp = (kiss_fft_cpx*)ptr;
ptr += sizeof(kiss_fft_cpx) * nfft;
ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
st->bufin = (kiss_fft_cpx*)ptr;
ptr += sizeof(kiss_fft_cpx) * nfft;
st->bufin = (kffsamp_t*)ptr;
ptr += sizeof(kffsamp_t) * nfft;
st->bufout = (kiss_fft_cpx*)ptr;
ptr += sizeof(kiss_fft_cpx) * nfft;
st->bufout = (kffsamp_t*)ptr;
ptr += sizeof(kffsamp_t) * nfft;
st->tmpbuf = (kiss_fft_cpx*)ptr;
ptr += sizeof(kiss_fft_cpx) * nfft;
ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
kiss_fft_alloc (nfft,0,st->fftcfg , &len_fftcfg);
kiss_fft_alloc (nfft,1,st->ifftcfg , &len_ifftcfg);
FFT_ALLOC (nfft,0,st->fftcfg , &len_fftcfg);
FFT_ALLOC (nfft,1,st->ifftcfg , &len_ifftcfg);
memset(st->fir_freq_resp,0,sizeof(kiss_fft_cpx)*nfft);
memcpy(st->fir_freq_resp,imp_resp,sizeof(kiss_fft_cpx)*n_imp_resp);
kiss_fft(st->fftcfg,st->fir_freq_resp,st->fir_freq_resp);
memset(st->bufin,0,sizeof(kffsamp_t)*nfft);
memcpy(st->bufin,imp_resp,sizeof(kffsamp_t)*n_imp_resp);
FFTFWD(st->fftcfg,st->bufin,st->fir_freq_resp);
scale = 1.0 / st->nfft;
for (i=0;i < st->nfft;++i) {
for ( i=0; i < st->n_freq_bins; ++i ) {
st->fir_freq_resp[i].r *= scale;
st->fir_freq_resp[i].i *= scale;
}
@ -125,7 +147,7 @@ void * kiss_fastfir_alloc(const kiss_fft_cpx * imp_resp,size_t n_imp_resp,
static
size_t write_output(kiss_fastfir_state *st,
kiss_fft_cpx *out,size_t * pnout,size_t zpadded)
kffsamp_t *out,size_t * pnout,size_t zpadded)
{
size_t nout = *pnout;
size_t n2flush = st->nfft - st->bufout_idx;
@ -133,7 +155,7 @@ size_t write_output(kiss_fastfir_state *st,
n2flush -= zpadded;
if ( nout < n2flush )
n2flush=nout;
memcpy(out,st->bufout + st->bufout_idx, sizeof(kiss_fft_cpx)*n2flush );
memcpy(out,st->bufout + st->bufout_idx, sizeof(kffsamp_t)*n2flush );
st->bufout_idx += n2flush;
*pnout = nout - n2flush;
return n2flush;
@ -147,31 +169,34 @@ static void do_fastconv(kiss_fastfir_state *st)
" output buffer size must be >= input buffer size,"
" %d samples lost\n",st->nfft - st->bufout_idx );
}
//FFT st->bufin to st->bufout
kiss_fft(st->fftcfg,st->bufin,st->bufout);
/*FFT st->bufin to st->tmpbuf*/
FFTFWD(st->fftcfg,st->bufin,st->tmpbuf);
// shift tail to front of input buffer
/* shift tail to front of input buffer*/
memcpy( st->bufin,
st->bufin + st->nfft - st->n_scrap,
sizeof(kiss_fft_cpx)*st->n_scrap);
//set input idx to the next input spot
/*set input idx to the next input spot*/
st->bufin_idx = st->n_scrap;
// multiply the frequency response of the input signal by
// that of the fir filter
for (i=0;i<st->nfft;++i)
C_MUL(st->tmpbuf[i],st->bufout[i],st->fir_freq_resp[i]);
/* multiply the frequency response of the input signal by*/
/* that of the fir filter*/
for ( i=0; i<st->n_freq_bins; ++i ) {
kiss_fft_cpx tmpsamp;
C_MUL(tmpsamp,st->tmpbuf[i],st->fir_freq_resp[i]);
st->tmpbuf[i] = tmpsamp;
}
// perform the inverse fft
kiss_fft(st->ifftcfg,st->tmpbuf,st->bufout);
/* perform the inverse fft*/
FFTINV(st->ifftcfg,st->tmpbuf,st->bufout);
// need to skip over junk caused by circular convolution
/* need to skip over junk caused by circular convolution*/
st->bufout_idx = st->n_scrap;
}
size_t kiss_fastfir(const void * cfg,
const kiss_fft_cpx *in, size_t nin,
kiss_fft_cpx *out, size_t nout_avail)
const kffsamp_t *in, size_t nin,
kffsamp_t *out, size_t nout_avail)
{
size_t nout_orig=nout_avail;
kiss_fastfir_state *st = ( kiss_fastfir_state *)cfg;
@ -188,13 +213,13 @@ size_t kiss_fastfir(const void * cfg,
}
while (nin--) {
// copy the input sample to bufin
/* copy the input sample to bufin*/
st->bufin[st->bufin_idx++] = *in++;
// when the input buffer is full, perform fast convolution
/* when the input buffer is full, perform fast convolution*/
if ( st->bufin_idx == st->nfft ) {
do_fastconv(st);
// write the output buffer
/* write the output buffer*/
out += write_output(st,out,&nout_avail,0);
}
}
@ -208,18 +233,18 @@ size_t kiss_fastfir(const void * cfg,
void do_filter(
FILE * fin,
FILE * fout,
const kiss_fft_cpx * imp_resp,
const kffsamp_t * imp_resp,
size_t n_imp_resp,
size_t nfft)
{
void * cfg = kiss_fastfir_alloc(imp_resp,n_imp_resp,nfft,0,0);
kiss_fft_cpx inbuf[BUFLEN],outbuf[BUFLEN];
kffsamp_t inbuf[BUFLEN],outbuf[BUFLEN];
size_t ninbuf,noutbuf;
do{
ninbuf = fread(inbuf,sizeof(kiss_fft_cpx),BUFLEN,fin );
// when ninbuf <= 0, that signals a flush
ninbuf = fread(inbuf,sizeof(inbuf[0]),BUFLEN,fin );
/* when ninbuf <= 0, that signals a flush*/
noutbuf = kiss_fastfir(cfg,inbuf,ninbuf,outbuf,BUFLEN);
if ( fwrite(outbuf,sizeof(kiss_fft_cpx),noutbuf,fout) != noutbuf ) {
if ( fwrite(outbuf,sizeof(outbuf[0]),noutbuf,fout) != noutbuf ) {
fprintf(stderr,"short write\n");
exit(1);
}
@ -231,7 +256,7 @@ void do_filter(
#include <unistd.h>
int main(int argc,char**argv)
{
kiss_fft_cpx * h;
kffsamp_t * h;
size_t nh,nfft=0;
FILE *fin=stdin;
FILE *fout=stdout;
@ -278,11 +303,11 @@ int main(int argc,char**argv)
exit(1);
}
fseek(filtfile,0,SEEK_END);
nh = ftell(filtfile) / sizeof(kiss_fft_cpx);
nh = ftell(filtfile) / sizeof(kffsamp_t);
fprintf(stderr,"%d samples in FIR filter\n",nh);
h = (kiss_fft_cpx*)malloc(sizeof(kiss_fft_cpx)*nh);
h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
fseek(filtfile,0,SEEK_SET);
fread(h,sizeof(kiss_fft_cpx),nh,filtfile);
fread(h,sizeof(kffsamp_t),nh,filtfile);
fclose(filtfile);
do_filter( fin, fout, h,nh,nfft);