From db2d9cac9dc938621b8bc05b540c233c78c0dde8 Mon Sep 17 00:00:00 2001
From: "5684185+vsariola@users.noreply.github.com"
 <5684185+vsariola@users.noreply.github.com>
Date: Wed, 19 Jun 2024 18:58:20 +0300
Subject: [PATCH] fix(vm): x87 native filter unit was denormalizing and eating
 up CPU

When voice was silent, the exponential decays in the filter unit
were causing the high pass component to eventually denormalize,
causing high CPU loads. The solution is the same as in the delay
unit: add and subtract a small number from the value, causing
essentially a flush to zero.
https://en.wikipedia.org/wiki/Subnormal_number

Fixes #68.
---
 CHANGELOG.md                                | 2 ++
 vm/compiler/templates/amd64-386/effects.asm | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e3e3ab..8f91a73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
   x87 stack.
 
 ### Fixed
+- The x87 native filter unit was denormalizing and eating up a lot of CPU ([#68][i68])
 - Modulating delaytime in wasm could crash, because delay time was converted to
   int with i32.trunc_f32_u. Using i32.trunc_f32_s fixed this.
 - When recording notes from VSTI, no track was created for instruments that had
@@ -153,6 +154,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 [0.2.0]: https://github.com/vsariola/sointu/compare/v0.1.0...v0.2.0
 [0.1.0]: https://github.com/vsariola/sointu/compare/4klang-3.11...v0.1.0
 [i65]: https://github.com/vsariola/sointu/issues/65
+[i68]: https://github.com/vsariola/sointu/issues/68
 [i112]: https://github.com/vsariola/sointu/issues/112
 [i116]: https://github.com/vsariola/sointu/issues/116
 [i120]: https://github.com/vsariola/sointu/issues/120
diff --git a/vm/compiler/templates/amd64-386/effects.asm b/vm/compiler/templates/amd64-386/effects.asm
index 6d7d371..747331f 100644
--- a/vm/compiler/templates/amd64-386/effects.asm
+++ b/vm/compiler/templates/amd64-386/effects.asm
@@ -156,6 +156,9 @@ su_op_dbgain_mono:
     fsubp   st2, st0                        ; r x-l'
     fmul    dword [{{.WRK}}+8]  ; r*b x-l'
     fsubp   st1, st0                        ; x-l'-r*b
+    {{- .Float 0.5 | .Prepare | indent 4}}
+    fadd    dword [{{.Float 0.5 | .Use}}]           ; add and sub small offset to prevent denormalization
+    fsub    dword [{{.Float 0.5 | .Use}}]           ; See for example: https://stackoverflow.com/questions/36781881/why-denormalized-floats-are-so-much-slower-than-other-floats-from-hardware-arch
     fst     dword [{{.WRK}}+4]  ; h'=x-l'-r*b
     fmul    dword [{{.WRK}}+12]                   ; f2*h'
     fadd    dword [{{.WRK}}+8]  ; f2*h'+b