From 7974f0ff82eedc467fa7eda6ac2067f4026b0d0d Mon Sep 17 00:00:00 2001 From: Veikko Sariola Date: Wed, 30 Dec 2020 19:50:38 +0200 Subject: [PATCH] fix(x86): denormalize delay damp filters the damp filters, after input was switched off, cause the CPU to spike up and causing the tracker audio to start chopping --- README.md | 8 ++++++++ templates/amd64-386/effects.asm | 7 +++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3802dfc..9c7ded9 100644 --- a/README.md +++ b/README.md @@ -274,6 +274,14 @@ Future goals combining multiple signals into one sync. Oh, and we probably should dump the whole thing also as a texture to the shader; to fly through the song, in a very literal way. + - **Find a solution for denormalized signals**. Denormalized floating point + numbers (floating point numbers that are very very small) can result in 100x + CPU slow down. We got hit by this already: the damp filters in delay units + were denormalizing, resulting in the synth being unusable in real time. Need + to investigate a) where denormalization can happen; b) how to prevent it: + add & substract value; c) make this optional to the user. For quick + explanation about the potential massive CPU hit, see + https://stackoverflow.com/questions/36781881/why-denormalized-floats-are-so-much-slower-than-other-floats-from-hardware-arch Crazy ideas ----------- diff --git a/templates/amd64-386/effects.asm b/templates/amd64-386/effects.asm index 4efff3b..1eed166 100644 --- a/templates/amd64-386/effects.asm +++ b/templates/amd64-386/effects.asm @@ -315,6 +315,9 @@ su_op_delay_loop: fld dword [{{.Input "delay" "damp"}}] ; da s*(1-da) dr*y+s p*p*x fmul dword [{{.CX}}+su_delayline_wrk.filtstate] ; o*da s*(1-da) dr*y+s p*p*x, where o is stored faddp st1, st0 ; o*da+s*(1-da) dr*y+s p*p*x + {{- .Float 0.5 | .Prepare | indent 4}} + fadd dword [{{.Float 0.5 | .Use}}] ; add and sub small offset to prevent denormalization. WARNING: this is highly important, as the damp filters might denormalize and give 100x CPU penalty + fsub dword [{{.Float 0.5 | .Use}}] ; See for example: https://stackoverflow.com/questions/36781881/why-denormalized-floats-are-so-much-slower-than-other-floats-from-hardware-arch fst dword [{{.CX}}+su_delayline_wrk.filtstate] ; o'=o*da+s*(1-da), o' dr*y+s p*p*x fmul dword [{{.Input "delay" "feedback"}}] ; f*o' dr*y+s p*p*x fadd st0, st2 ; f*o'+p*p*x dr*y+s p*p*x @@ -333,8 +336,8 @@ su_op_delay_loop: fst dword [{{.CX}}+su_delayline_wrk.dcin] ; i'=s, s c*o-i faddp st1 ; s+c*o-i {{- .Float 0.5 | .Prepare | indent 4}} - fadd dword [{{.Float 0.5 | .Use}}] ; add and sub small offset to prevent denormalization - fsub dword [{{.Float 0.5 | .Use}}] + fadd dword [{{.Float 0.5 | .Use}}] ; add and sub small offset to prevent denormalization. WARNING: this is highly important, as low pass filters might denormalize and give 100x CPU penalty + fsub dword [{{.Float 0.5 | .Use}}] ; See for example: https://stackoverflow.com/questions/36781881/why-denormalized-floats-are-so-much-slower-than-other-floats-from-hardware-arch fst dword [{{.CX}}+su_delayline_wrk.dcout] ; o'=s+c*o-i ret {{end}}