refactor(tracker): split Volume to PeakVolume and AverageVolume

2025-07-18 21:14:31 -04:00 · 2023-10-19 22:16:13 +03:00
parent 1a8a317464
commit 50ccfe03da
5 changed files with 65 additions and 47 deletions
--- a/tracker/volume.go
+++ b/tracker/volume.go
@ -7,53 +7,61 @@ import (
 	"github.com/vsariola/sointu"
 )

-// Volume represents an average and peak volume measurement, in decibels. 0 dB =
-// signal level of +-1.
-type Volume struct {
-	Average [2]float64
-	Peak    [2]float64
-}
+type (
+	Volume [2]float64

-// Analyze updates Average and Peak fields, by analyzing the given buffer.
+	// VolumeAnalyzer measures the volume in an AudioBuffer, in decibels relative to
+	// full scale (0 dB = signal level of +-1)
+	VolumeAnalyzer struct {
+		Level   Volume  // current volume level of left and right channels
+		Attack  float64 // attack time constant in seconds
+		Release float64 // release time constant in seconds
+		Min     float64 // minimum volume in decibels
+		Max     float64 // maximum volume in decibels
+	}
+)
+
+var nanError = errors.New("NaN detected in master output")
+
+// Update updates the Level field, by analyzing the given buffer.
 //
 // Internally, it first converts the signal to decibels (0 dB = +-1). Then, the
 // average volume level is computed by smoothing the decibel values with a
-// exponentially decaying average, with a time constant tau (in seconds).
-// Typical value could be 0.3 (seconds).
+// exponentially decaying average, with a time constant Attack (in seconds) if
+// the decibel value is greater than current level and time constant Decay (in
+// seconds) if the decibel value is less than current level.
 //
-// Peak volume detection is similar exponential smoothing, but the time
-// constants for attack and release are different. Generally attack << release.
-// Typical values could be attack 1.5e-3 and release 1.5 (seconds)
+// Typical time constants for average level detection would be 0.3 seconds for
+// both attack and release. For peak level detection, attack could be 1.5e-3 and
+// release 1.5 (seconds)
 //
-// minVolume and maxVolume are hard limits in decibels to prevent negative
+// MinVolume and MaxVolume are hard limits in decibels to prevent negative
 // infinities for volumes
-func (v *Volume) Analyze(buffer sointu.AudioBuffer, tau float64, attack float64, release float64, minVolume float64, maxVolume float64) error {
-	alpha := 1 - math.Exp(-1.0/(tau*44100)) // from https://en.wikipedia.org/wiki/Exponential_smoothing
-	alphaAttack := 1 - math.Exp(-1.0/(attack*44100))
-	alphaRelease := 1 - math.Exp(-1.0/(release*44100))
-	var err error
+func (v *VolumeAnalyzer) Update(buffer sointu.AudioBuffer) (err error) {
+	// from https://en.wikipedia.org/wiki/Exponential_smoothing
+	alphaAttack := 1 - math.Exp(-1.0/(v.Attack*44100))
+	alphaRelease := 1 - math.Exp(-1.0/(v.Release*44100))
 	for j := 0; j < 2; j++ {
 		for i := 0; i < len(buffer); i++ {
 			sample2 := float64(buffer[i][j] * buffer[i][j])
 			if math.IsNaN(sample2) {
 				if err == nil {
-					err = errors.New("NaN detected in master output")
+					err = nanError
 				}
 				continue
 			}
-			dB := 10 * math.Log10(float64(sample2))
-			if dB < minVolume || math.IsNaN(dB) {
-				dB = minVolume
+			dB := 10 * math.Log10(sample2)
+			if dB < v.Min || math.IsNaN(dB) {
+				dB = v.Min
 			}
-			if dB > maxVolume {
-				dB = maxVolume
+			if dB > v.Max {
+				dB = v.Max
 			}
-			v.Average[j] += (dB - v.Average[j]) * alpha
-			alphaPeak := alphaAttack
-			if dB < v.Peak[j] {
-				alphaPeak = alphaRelease
+			a := alphaAttack
+			if dB < v.Level[j] {
+				a = alphaRelease
 			}
-			v.Peak[j] += (dB - v.Peak[j]) * alphaPeak
+			v.Level[j] += (dB - v.Level[j]) * a
 		}
 	}
 	return err