mirror of
https://github.com/vsariola/sointu.git
synced 2025-05-27 10:50:23 -04:00
424 lines
13 KiB
Go
424 lines
13 KiB
Go
package tracker
|
|
|
|
import (
|
|
"math"
|
|
|
|
"github.com/viterin/vek/vek32"
|
|
"github.com/vsariola/sointu"
|
|
)
|
|
|
|
type (
|
|
Detector struct {
|
|
broker *Broker
|
|
loudnessDetector loudnessDetector
|
|
peakDetector peakDetector
|
|
}
|
|
|
|
WeightingType int
|
|
LoudnessType int
|
|
PeakType int
|
|
|
|
Decibel float32
|
|
|
|
LoudnessResult [NumLoudnessTypes]Decibel
|
|
PeakResult [NumPeakTypes][2]Decibel
|
|
|
|
DetectorResult struct {
|
|
Loudness LoudnessResult
|
|
Peaks PeakResult
|
|
}
|
|
|
|
loudnessDetector struct {
|
|
weighting weighting
|
|
states [2][3]biquadState
|
|
powers [2]RingBuffer[float32] // 0 = momentary, 1 = short-term
|
|
averagedPowers [2][]float32
|
|
maxPowers [2]float32
|
|
integratedPower float32
|
|
tmp, tmp2 []float32
|
|
tmpbool []bool
|
|
}
|
|
|
|
biquadState struct {
|
|
x1, x2, y1, y2 float32
|
|
}
|
|
|
|
biquadCoeff struct {
|
|
b0, b1, b2, a1, a2 float32
|
|
}
|
|
|
|
weighting []biquadCoeff
|
|
|
|
peakDetector struct {
|
|
oversampling bool
|
|
states [2]oversamplerState
|
|
windows [2][2]RingBuffer[float32]
|
|
maxPower [2]float32
|
|
tmp, tmp2 []float32
|
|
}
|
|
|
|
oversamplerState struct {
|
|
history [11]float32
|
|
tmp, tmp2 []float32
|
|
}
|
|
)
|
|
|
|
const (
|
|
LoudnessMomentary LoudnessType = iota
|
|
LoudnessShortTerm
|
|
LoudnessMaxMomentary
|
|
LoudnessMaxShortTerm
|
|
LoudnessIntegrated
|
|
NumLoudnessTypes
|
|
)
|
|
|
|
const MAX_INTEGRATED_DATA = 10 * 60 * 60 // 1 hour of samples at 10 Hz (100 ms per sample)
|
|
|
|
const (
|
|
PeakMomentary PeakType = iota
|
|
PeakShortTerm
|
|
PeakIntegrated
|
|
NumPeakTypes
|
|
)
|
|
|
|
const (
|
|
KWeighting WeightingType = iota
|
|
AWeighting
|
|
CWeighting
|
|
NoWeighting
|
|
NumWeightingTypes
|
|
)
|
|
|
|
func NewDetector(b *Broker) *Detector {
|
|
return &Detector{
|
|
broker: b,
|
|
loudnessDetector: makeLoudnessDetector(KWeighting),
|
|
peakDetector: makePeakDetector(true),
|
|
}
|
|
}
|
|
|
|
func (s *Detector) Run() {
|
|
var chunkHistory sointu.AudioBuffer
|
|
for msg := range s.broker.ToDetector {
|
|
if msg.Reset {
|
|
s.loudnessDetector.reset()
|
|
s.peakDetector.reset()
|
|
}
|
|
if msg.Quit {
|
|
return
|
|
}
|
|
if msg.HasWeightingType {
|
|
s.loudnessDetector.weighting = weightings[WeightingType(msg.WeightingType)]
|
|
s.loudnessDetector.reset()
|
|
}
|
|
if msg.HasOversampling {
|
|
s.peakDetector.oversampling = msg.Oversampling
|
|
s.peakDetector.reset()
|
|
}
|
|
|
|
switch data := msg.Data.(type) {
|
|
case *sointu.AudioBuffer:
|
|
buf := *data
|
|
for {
|
|
var chunk sointu.AudioBuffer
|
|
if len(chunkHistory) > 0 && len(chunkHistory) < 4410 {
|
|
l := min(len(buf), 4410-len(chunkHistory))
|
|
chunkHistory = append(chunkHistory, buf[:l]...)
|
|
if len(chunkHistory) < 4410 {
|
|
break
|
|
}
|
|
chunk = chunkHistory
|
|
buf = buf[l:]
|
|
} else {
|
|
if len(buf) >= 4410 {
|
|
chunk = buf[:4410]
|
|
buf = buf[4410:]
|
|
} else {
|
|
chunkHistory = chunkHistory[:0]
|
|
chunkHistory = append(chunkHistory, buf...)
|
|
break
|
|
}
|
|
}
|
|
TrySend(s.broker.ToModel, MsgToModel{
|
|
HasDetectorResult: true,
|
|
DetectorResult: DetectorResult{
|
|
Loudness: s.loudnessDetector.update(chunk),
|
|
Peaks: s.peakDetector.update(chunk),
|
|
},
|
|
})
|
|
}
|
|
s.broker.PutAudioBuffer(data)
|
|
case func():
|
|
data()
|
|
}
|
|
}
|
|
}
|
|
|
|
// Close may theoretically block if the broker is full, but it should not happen in practice
|
|
func (s *Detector) Close() {
|
|
s.broker.ToDetector <- MsgToDetector{Quit: true}
|
|
}
|
|
|
|
func makeLoudnessDetector(weighting WeightingType) loudnessDetector {
|
|
return loudnessDetector{
|
|
weighting: weightings[weighting],
|
|
powers: [2]RingBuffer[float32]{
|
|
{Buffer: make([]float32, 4)}, // momentary loudness
|
|
{Buffer: make([]float32, 30)}, // short-term loudness
|
|
},
|
|
}
|
|
}
|
|
|
|
func makePeakDetector(oversampling bool) peakDetector {
|
|
return peakDetector{
|
|
oversampling: oversampling,
|
|
windows: [2][2]RingBuffer[float32]{
|
|
{{Buffer: make([]float32, 4)}, {Buffer: make([]float32, 4)}}, // momentary peaks
|
|
{{Buffer: make([]float32, 30)}, {Buffer: make([]float32, 30)}}, // short-term peaks
|
|
},
|
|
}
|
|
}
|
|
|
|
/*
|
|
From matlab: (we bake in the scale values to the numerator coefficients)
|
|
weightings = {'A-weighting','C-weighting','k-weighting'}
|
|
for j = 1:3
|
|
disp(weightings{j})
|
|
f = getFilter(weightingFilter(weightings{j},'SampleRate',44100)); f.Numerator, f.Denominator, f.ScaleValues
|
|
if j == 3 % k-weighting has non-zero gain at 1 kHz, so normalize it to 0 dB by scaling the first filter
|
|
[h,w] = freqz(f,[1000,1000],44100);
|
|
g = abs(h(1));
|
|
fprintf("Gain %f dB\n", 20*log10(abs(h(1))));
|
|
f.Numerator(1,:) = f.Numerator(1,:)/g;
|
|
end
|
|
for i = 1:size(f.Numerator,1); fprintf("b0: %.16f, b1: %.16f, b2: %.16f, a1: %.16f, a2: %.16f\n",f.Numerator(i,:)*f.ScaleValues(i),f.Denominator(i,2:end)); end
|
|
end
|
|
*/
|
|
var weightings = map[WeightingType]weighting{
|
|
AWeighting: {
|
|
{b0: 0.2556115104436430, b1: 0.5112230208872860, b2: 0.2556115104436430, a1: -0.1405360824207108, a2: 0.0049375976155402},
|
|
{b0: 1, b1: -2, b2: 1, a1: -1.8849012174287920, a2: 0.8864214718161675},
|
|
{b0: 1, b1: -2, b2: 1, a1: -1.9941388812663283, a2: 0.9941474694445309},
|
|
},
|
|
CWeighting: {
|
|
{b0: 0.2170124955461332, b1: 0.4340249910922664, b2: 0.2170124955461332, a1: -0.1405360824207108, a2: 0.0049375976155402},
|
|
{b0: 1, b1: -2, b2: 1, a1: -1.9941388812663283, a2: 0.9941474694445309},
|
|
},
|
|
KWeighting: {
|
|
{b0: 1.4128568659906546, b1: -2.4466647580657646, b2: 1.0789762991286349, a1: -1.6636551132560204, a2: 0.7125954280732254},
|
|
{b0: 0.9995600645425144, b1: -1.9991201290850289, b2: 0.9995600645425144, a1: -1.9891696736297957, a2: 0.9891990357870394},
|
|
},
|
|
NoWeighting: {},
|
|
}
|
|
|
|
// according to https://tech.ebu.ch/docs/tech/tech3341.pdf
|
|
// we have two sliding windows: momentary loudness = last 400 ms, short-term loudness = last 3 s
|
|
// display:
|
|
//
|
|
// momentary loudness = last analyzed 400 ms blcok
|
|
// short-term loudness = last analyzed 3 s block
|
|
//
|
|
// every 100 ms, we collect one data point of the momentary loudness (starting to play song again resets the data blocks)
|
|
// then:
|
|
//
|
|
// integrated loudness = the blocks are gated, and the average loudness of the gated blocks is calculated
|
|
// maximum momentary loudness = maximum of all the momentary blocks
|
|
// maximum short-term loudness = maximum of all the short-term blocks
|
|
func (d *loudnessDetector) update(chunk sointu.AudioBuffer) LoudnessResult {
|
|
l := max(len(chunk), MAX_INTEGRATED_DATA)
|
|
setSliceLength(&d.tmp, l)
|
|
setSliceLength(&d.tmp2, l)
|
|
setSliceLength(&d.tmpbool, l)
|
|
var total float32
|
|
for chn := range 2 {
|
|
// deinterleave the channels
|
|
for i := range chunk {
|
|
d.tmp[i] = chunk[i][chn]
|
|
}
|
|
// filter the signal with the weighting filter
|
|
for k := range d.weighting {
|
|
d.states[chn][k].Filter(d.tmp[:len(chunk)], d.weighting[k])
|
|
}
|
|
// square the samples
|
|
res := vek32.Mul_Into(d.tmp2, d.tmp[:len(chunk)], d.tmp[:len(chunk)])
|
|
// calculate the mean and add it to the total
|
|
total += vek32.Mean(res)
|
|
}
|
|
var ret [NumLoudnessTypes]Decibel
|
|
for i := range d.powers {
|
|
d.powers[i].WriteWrapSingle(total) // these are sliding windows of 4 and 30 power measurements (400 ms and 3 s aka momentary and short-term windows)
|
|
mean := vek32.Mean(d.powers[i].Buffer)
|
|
if len(d.averagedPowers[i]) < MAX_INTEGRATED_DATA { // we need to have some limit on how much data we keep
|
|
d.averagedPowers[i] = append(d.averagedPowers[i], mean)
|
|
}
|
|
if d.maxPowers[i] < mean {
|
|
d.maxPowers[i] = mean
|
|
}
|
|
ret[i+int(LoudnessMomentary)] = powerToDecibel(mean) // we assume the LoudnessMomentary is followed by LoudnessShortTerm
|
|
ret[i+int(LoudnessMaxMomentary)] = powerToDecibel(d.maxPowers[i])
|
|
}
|
|
if len(d.averagedPowers[0])%10 == 0 { // every 10 samples of 100 ms i.e. every 1 s, we recalculate the integrated power
|
|
absThreshold := decibelToPower(-70) // -70 dB is the first threshold
|
|
b := vek32.GtNumber_Into(d.tmpbool, d.averagedPowers[0], absThreshold)
|
|
m2 := vek32.Select_Into(d.tmp, d.averagedPowers[0], b)
|
|
if len(m2) > 0 {
|
|
relThreshold := vek32.Mean(m2) / 10 // the relative threshold is 10 dB below the mean of the values above the absolute threshold
|
|
b2 := vek32.GtNumber_Into(d.tmpbool, m2, relThreshold)
|
|
m3 := vek32.Select_Into(d.tmp2, m2, b2)
|
|
if len(m3) > 0 {
|
|
d.integratedPower = vek32.Mean(m3)
|
|
}
|
|
}
|
|
}
|
|
ret[LoudnessIntegrated] = powerToDecibel(d.integratedPower)
|
|
return ret
|
|
}
|
|
|
|
func (d *loudnessDetector) reset() {
|
|
for i := range d.powers {
|
|
d.powers[i].Cursor = 0
|
|
l := len(d.powers[i].Buffer)
|
|
d.powers[i].Buffer = d.powers[i].Buffer[:0]
|
|
d.powers[i].Buffer = append(d.powers[i].Buffer, make([]float32, l)...)
|
|
d.averagedPowers[i] = d.averagedPowers[i][:0]
|
|
d.maxPowers[i] = 0
|
|
}
|
|
// reset the biquad states
|
|
d.states = [2][3]biquadState{}
|
|
d.integratedPower = 0
|
|
}
|
|
|
|
func powerToDecibel(power float32) Decibel {
|
|
return Decibel(float32(10 * math.Log10(float64(power))))
|
|
}
|
|
|
|
func amplitudeToDecibel(amplitude float32) Decibel {
|
|
return Decibel(float32(20 * math.Log10(float64(amplitude))))
|
|
}
|
|
|
|
func decibelToPower(loudness Decibel) float32 {
|
|
return (float32)(math.Pow(10, (float64(loudness))/10))
|
|
}
|
|
|
|
func (state *biquadState) Filter(buffer []float32, coeff biquadCoeff) {
|
|
s := *state
|
|
for i := range buffer {
|
|
x := buffer[i]
|
|
y := coeff.b0*x + coeff.b1*s.x1 + coeff.b2*s.x2 - coeff.a1*s.y1 - coeff.a2*s.y2
|
|
s.x2, s.x1 = s.x1, x
|
|
s.y2, s.y1 = s.y1, y
|
|
buffer[i] = y
|
|
}
|
|
*state = s
|
|
}
|
|
|
|
func setSliceLength[T any](slice *[]T, length int) {
|
|
if len(*slice) < length {
|
|
*slice = append(*slice, make([]T, length-len(*slice))...)
|
|
}
|
|
*slice = (*slice)[:length]
|
|
}
|
|
|
|
// ref: https://www.itu.int/dms_pubrec/itu-r/rec/bs/R-REC-BS.1770-5-202311-I!!PDF-E.pdf
|
|
var oversamplingCoeffs = [4][12]float32{
|
|
{0.0017089843750, 0.0109863281250, -0.0196533203125, 0.0332031250000, -0.0594482421875, 0.1373291015625, 0.9721679687500, -0.1022949218750, 0.0476074218750, -0.0266113281250, 0.0148925781250, -0.0083007812500},
|
|
{-0.0291748046875, 0.0292968750000, -0.0517578125000, 0.0891113281250, -0.1665039062500, 0.4650878906250, 0.7797851562500, -0.2003173828125, 0.1015625000000, -0.0582275390625, 0.0330810546875, -0.0189208984375},
|
|
{-0.0189208984375, 0.0330810546875, -0.058227539062, 0.1015625000000, -0.200317382812, 0.7797851562500, 0.4650878906250, -0.166503906250, 0.0891113281250, -0.051757812500, 0.0292968750000, -0.0291748046875},
|
|
{-0.0083007812500, 0.0148925781250, -0.0266113281250, 0.0476074218750, -0.1022949218750, 0.9721679687500, 0.1373291015625, -0.0594482421875, 0.0332031250000, -0.0196533203125, 0.0109863281250, 0.0017089843750},
|
|
}
|
|
|
|
// u[k] = x[k/4] if k%4 == 0, 0 otherwise
|
|
// y[k] = sum_{i=0}^{47} h[i] * u[k-i]
|
|
// h[i] = o[i%4][i/4]
|
|
// k = p*4+q, q=0..3
|
|
// y[p*4+q] = sum_{j=0}^{11} sum_{i=0}^{3} h[j*4+i] * u[p*4+q-j*4-i] = ...
|
|
// (q-i)%4 == 0 ==> i = q
|
|
// ... = sum_{j=0}^{11} o[q][j] * x[p-j]
|
|
// y should be at least 4 times the length of x
|
|
func (s *oversamplerState) Oversample(x []float32, y []float32) []float32 {
|
|
if len(s.tmp) < len(x) {
|
|
s.tmp = append(s.tmp, make([]float32, len(x)-len(s.tmp))...)
|
|
}
|
|
if len(s.tmp2) < len(x) {
|
|
s.tmp2 = append(s.tmp2, make([]float32, len(x)-len(s.tmp2))...)
|
|
}
|
|
for q, coeffs := range oversamplingCoeffs {
|
|
// tmp2 will be conv(o[q],x)
|
|
r := vek32.Zeros_Into(s.tmp2, len(x))
|
|
for j, c := range coeffs {
|
|
vek32.MulNumber_Into(s.tmp[:j], s.history[11-j:11], c) // convolution might pull values before x[0], so we need to use history for that
|
|
vek32.MulNumber_Into(s.tmp[j:], x[:len(x)-j], c)
|
|
vek32.Add_Inplace(r, s.tmp[:len(x)])
|
|
}
|
|
// interleave the phases
|
|
for p, v := range r {
|
|
y[p*4+q] = v
|
|
}
|
|
}
|
|
z := min(len(x), 11)
|
|
copy(s.history[:11-z], s.history[z:11])
|
|
copy(s.history[11-z:], x[len(x)-z:])
|
|
return y[:len(x)*4]
|
|
}
|
|
|
|
// we should perform the peak detection also momentary (last 400 ms), short term
|
|
// (last 3 s), and integrated (whole song) for display purposes, we can use
|
|
// always last arrived data for the integrated peak, we can use the maximum of
|
|
// all the peaks so far (there is no need show "maximum short term true peak" or
|
|
// "maximum momentary true peak" because they are same as the maximum for entire song)
|
|
//
|
|
// display:
|
|
//
|
|
// momentary true peak
|
|
// short-term true peak
|
|
// integrated true peak
|
|
func (d *peakDetector) update(buf sointu.AudioBuffer) (ret PeakResult) {
|
|
if len(d.tmp) < len(buf) {
|
|
d.tmp = append(d.tmp, make([]float32, len(buf)-len(d.tmp))...)
|
|
}
|
|
len4 := 4 * len(buf)
|
|
if len(d.tmp2) < len4 {
|
|
d.tmp2 = append(d.tmp2, make([]float32, len4-len(d.tmp2))...)
|
|
}
|
|
for chn := range 2 {
|
|
// deinterleave the channels
|
|
for i := range buf {
|
|
d.tmp[i] = buf[i][chn]
|
|
}
|
|
// 4x oversample the signal
|
|
var o []float32
|
|
if d.oversampling {
|
|
o = d.states[chn].Oversample(d.tmp[:len(buf)], d.tmp2)
|
|
} else {
|
|
o = d.tmp[:len(buf)]
|
|
}
|
|
// take absolute value of the oversampled signal
|
|
vek32.Abs_Inplace(o)
|
|
p := vek32.Max(o)
|
|
// find the maximum value in the window
|
|
for i := range d.windows {
|
|
d.windows[i][chn].WriteWrapSingle(p)
|
|
windowPeak := vek32.Max(d.windows[i][chn].Buffer)
|
|
ret[i+int(PeakMomentary)][chn] = amplitudeToDecibel(windowPeak)
|
|
}
|
|
if d.maxPower[chn] < p {
|
|
d.maxPower[chn] = p
|
|
}
|
|
ret[int(PeakIntegrated)][chn] = amplitudeToDecibel(d.maxPower[chn])
|
|
}
|
|
return
|
|
}
|
|
|
|
func (d *peakDetector) reset() {
|
|
for chn := range 2 {
|
|
d.states[chn].history = [11]float32{}
|
|
for i := range d.windows[chn] {
|
|
d.windows[i][chn].Cursor = 0
|
|
l := len(d.windows[i][chn].Buffer)
|
|
d.windows[i][chn].Buffer = d.windows[i][chn].Buffer[:0]
|
|
d.windows[i][chn].Buffer = append(d.windows[i][chn].Buffer, make([]float32, l)...)
|
|
}
|
|
d.maxPower[chn] = 0
|
|
}
|
|
}
|