mirror of
https://github.com/vsariola/sointu.git
synced 2025-05-28 03:10:24 -04:00
the damp filters, after input was switched off, cause the CPU to spike up and causing the tracker audio to start chopping
398 lines
19 KiB
NASM
398 lines
19 KiB
NASM
{{- if .HasOp "distort"}}
|
|
;-------------------------------------------------------------------------------
|
|
; DISTORT opcode: apply distortion on the signal
|
|
;-------------------------------------------------------------------------------
|
|
; Mono: x -> x*a/(1-a+(2*a-1)*abs(x)) where x is clamped first
|
|
; Stereo: l r -> l*a/(1-a+(2*a-1)*abs(l)) r*a/(1-a+(2*a-1)*abs(r))
|
|
;-------------------------------------------------------------------------------
|
|
{{.Func "su_op_distort" "Opcode"}}
|
|
{{- if .Stereo "distort" -}}
|
|
{{.Call "su_effects_stereohelper"}}
|
|
{{- end}}
|
|
fld dword [{{.Input "distort" "drive"}}]
|
|
{{.TailCall "su_waveshaper"}}
|
|
{{end}}
|
|
|
|
|
|
{{- if .HasOp "hold"}}
|
|
;-------------------------------------------------------------------------------
|
|
; HOLD opcode: sample and hold the signal, reducing sample rate
|
|
;-------------------------------------------------------------------------------
|
|
; Mono version: holds the signal at a rate defined by the freq parameter
|
|
; Stereo version: holds both channels
|
|
;-------------------------------------------------------------------------------
|
|
{{.Func "su_op_hold" "Opcode"}}
|
|
{{- if .Stereo "hold"}}
|
|
{{.Call "su_effects_stereohelper"}}
|
|
{{- end}}
|
|
fld dword [{{.Input "hold" "holdfreq"}}] ; f x
|
|
fmul st0, st0 ; f^2 x
|
|
fchs ; -f^2 x
|
|
fadd dword [{{.WRK}}] ; p-f^2 x
|
|
fst dword [{{.WRK}}] ; p <- p-f^2
|
|
fldz ; 0 p x
|
|
fucomip st1 ; p x
|
|
fstp dword [{{.SP}}-4] ; t=p, x
|
|
jc short su_op_hold_holding ; if (0 < p) goto holding
|
|
fld1 ; 1 x
|
|
fadd dword [{{.SP}}-4] ; 1+t x
|
|
fstp dword [{{.WRK}}] ; x
|
|
fst dword [{{.WRK}}+4] ; save holded value
|
|
ret ; x
|
|
su_op_hold_holding:
|
|
fstp st0 ;
|
|
fld dword [{{.WRK}}+4] ; x
|
|
ret
|
|
{{end}}
|
|
|
|
|
|
{{- if .HasOp "crush"}}
|
|
;-------------------------------------------------------------------------------
|
|
; CRUSH opcode: quantize the signal to finite number of levels
|
|
;-------------------------------------------------------------------------------
|
|
; Mono: x -> e*int(x/e)
|
|
; Stereo: l r -> e*int(l/e) e*int(r/e)
|
|
;-------------------------------------------------------------------------------
|
|
{{.Func "su_op_crush" "Opcode"}}
|
|
{{- if .Stereo "crush"}}
|
|
{{.Call "su_effects_stereohelper"}}
|
|
{{- end}}
|
|
fdiv dword [{{.Input "crush" "resolution"}}]
|
|
frndint
|
|
fmul dword [{{.Input "crush" "resolution"}}]
|
|
ret
|
|
{{end}}
|
|
|
|
|
|
{{- if .HasOp "gain"}}
|
|
;-------------------------------------------------------------------------------
|
|
; GAIN opcode: apply gain on the signal
|
|
;-------------------------------------------------------------------------------
|
|
; Mono: x -> x*g
|
|
; Stereo: l r -> l*g r*g
|
|
;-------------------------------------------------------------------------------
|
|
{{.Func "su_op_gain" "Opcode"}}
|
|
{{- if .Stereo "gain"}}
|
|
fld dword [{{.Input "gain" "gain"}}] ; g l (r)
|
|
{{- if .Mono "invgain"}}
|
|
jnc su_op_gain_mono
|
|
{{- end}}
|
|
fmul st2, st0 ; g l r/g
|
|
su_op_gain_mono:
|
|
fmulp st1, st0 ; l/g (r/)
|
|
ret
|
|
{{- else}}
|
|
fmul dword [{{.Input "gain" "gain"}}]
|
|
ret
|
|
{{- end}}
|
|
{{end}}
|
|
|
|
|
|
{{- if .HasOp "invgain"}}
|
|
;-------------------------------------------------------------------------------
|
|
; INVGAIN opcode: apply inverse gain on the signal
|
|
;-------------------------------------------------------------------------------
|
|
; Mono: x -> x/g
|
|
; Stereo: l r -> l/g r/g
|
|
;-------------------------------------------------------------------------------
|
|
{{.Func "su_op_invgain" "Opcode"}}
|
|
{{- if .Stereo "invgain"}}
|
|
fld dword [{{.Input "invgain" "invgain"}}] ; g l (r)
|
|
{{- if .Mono "invgain"}}
|
|
jnc su_op_invgain_mono
|
|
{{- end}}
|
|
fdiv st2, st0 ; g l r/g
|
|
su_op_invgain_mono:
|
|
fdivp st1, st0 ; l/g (r/)
|
|
ret
|
|
{{- else}}
|
|
fdiv dword [{{.Input "invgain" "invgain"}}]
|
|
ret
|
|
{{- end}}
|
|
{{end}}
|
|
|
|
|
|
{{- if .HasOp "filter"}}
|
|
;-------------------------------------------------------------------------------
|
|
; FILTER opcode: perform low/high/band-pass/notch etc. filtering on the signal
|
|
;-------------------------------------------------------------------------------
|
|
; Mono: x -> filtered(x)
|
|
; Stereo: l r -> filtered(l) filtered(r)
|
|
;-------------------------------------------------------------------------------
|
|
{{.Func "su_op_filter" "Opcode"}}
|
|
lodsb ; load the flags to al
|
|
{{- if .Stereo "filter"}}
|
|
{{.Call "su_effects_stereohelper"}}
|
|
{{- end}}
|
|
fld dword [{{.Input "filter" "resonance"}}] ; r x
|
|
fld dword [{{.Input "filter" "frequency"}}]; f r x
|
|
fmul st0, st0 ; f2 x (square the input so we never get negative and also have a smoother behaviour in the lower frequencies)
|
|
fst dword [{{.SP}}-4] ; f2 r x
|
|
fmul dword [{{.WRK}}+8] ; f2*b r x
|
|
fadd dword [{{.WRK}}] ; f2*b+l r x
|
|
fst dword [{{.WRK}}] ; l'=f2*b+l r x
|
|
fsubp st2, st0 ; r x-l'
|
|
fmul dword [{{.WRK}}+8] ; r*b x-l'
|
|
fsubp st1, st0 ; x-l'-r*b
|
|
fst dword [{{.WRK}}+4] ; h'=x-l'-r*b
|
|
fmul dword [{{.SP}}-4] ; f2*h'
|
|
fadd dword [{{.WRK}}+8] ; f2*h'+b
|
|
fstp dword [{{.WRK}}+8] ; b'=f2*h'+b
|
|
fldz ; 0
|
|
{{- if .SupportsParamValue "filter" "lowpass" 1}}
|
|
test al, byte 0x40
|
|
jz short su_op_filter_skiplowpass
|
|
fadd dword [{{.WRK}}]
|
|
su_op_filter_skiplowpass:
|
|
{{- end}}
|
|
{{- if .SupportsParamValue "filter" "bandpass" 1}}
|
|
test al, byte 0x20
|
|
jz short su_op_filter_skipbandpass
|
|
fadd dword [{{.WRK}}+8]
|
|
su_op_filter_skipbandpass:
|
|
{{- end}}
|
|
{{- if .SupportsParamValue "filter" "highpass" 1}}
|
|
test al, byte 0x10
|
|
jz short su_op_filter_skiphighpass
|
|
fadd dword [{{.WRK}}+4]
|
|
su_op_filter_skiphighpass:
|
|
{{- end}}
|
|
{{- if .SupportsParamValue "filter" "negbandpass" 1}}
|
|
test al, byte 0x08
|
|
jz short su_op_filter_skipnegbandpass
|
|
fsub dword [{{.WRK}}+8]
|
|
su_op_filter_skipnegbandpass:
|
|
{{- end}}
|
|
{{- if .SupportsParamValue "filter" "neghighpass" 1}}
|
|
test al, byte 0x04
|
|
jz short su_op_filter_skipneghighpass
|
|
fsub dword [{{.WRK}}+4]
|
|
su_op_filter_skipneghighpass:
|
|
{{- end}}
|
|
ret
|
|
{{end}}
|
|
|
|
|
|
{{- if .HasOp "clip"}}
|
|
;-------------------------------------------------------------------------------
|
|
; CLIP opcode: clips the signal into [-1,1] range
|
|
;-------------------------------------------------------------------------------
|
|
; Mono: x -> min(max(x,-1),1)
|
|
; Stereo: l r -> min(max(l,-1),1) min(max(r,-1),1)
|
|
;-------------------------------------------------------------------------------
|
|
{{.Func "su_op_clip" "Opcode"}}
|
|
{{- if .Stereo "clip"}}
|
|
{{.Call "su_effects_stereohelper"}}
|
|
{{- end}}
|
|
{{.TailCall "su_clip"}}
|
|
{{end}}
|
|
|
|
|
|
{{- if .HasOp "pan" -}}
|
|
;-------------------------------------------------------------------------------
|
|
; PAN opcode: pan the signal
|
|
;-------------------------------------------------------------------------------
|
|
; Mono: s -> s*(1-p) s*p
|
|
; Stereo: l r -> l*(1-p) r*p
|
|
;
|
|
; where p is the panning in [0,1] range
|
|
;-------------------------------------------------------------------------------
|
|
{{.Func "su_op_pan" "Opcode"}}
|
|
{{- if .Stereo "pan"}}
|
|
jc su_op_pan_do ; this time, if this is mono op...
|
|
fld st0 ; ...we duplicate the mono into stereo first
|
|
su_op_pan_do:
|
|
fld dword [{{.Input "pan" "panning"}}] ; p l r
|
|
fld1 ; 1 p l r
|
|
fsub st1 ; 1-p p l r
|
|
fmulp st2 ; p (1-p)*l r
|
|
fmulp st2 ; (1-p)*l p*r
|
|
ret
|
|
{{- else}}
|
|
fld dword [{{.Input "pan" "panning"}}] ; p s
|
|
fmul st1 ; p*s s
|
|
fsub st1, st0 ; p*s s-p*s
|
|
; Equal to
|
|
; s*p s*(1-p)
|
|
fxch ; s*(1-p) s*p SHOULD PROBABLY DELETE, WHY BOTHER
|
|
ret
|
|
{{- end}}
|
|
{{end}}
|
|
|
|
|
|
{{- if .HasOp "delay"}}
|
|
;-------------------------------------------------------------------------------
|
|
; DELAY opcode: adds delay effect to the signal
|
|
;-------------------------------------------------------------------------------
|
|
; Mono: perform delay on ST0, using delaycount delaylines starting
|
|
; at delayindex from the delaytable
|
|
; Stereo: perform delay on ST1, using delaycount delaylines starting
|
|
; at delayindex + delaycount from the delaytable (so the right delays
|
|
; can be different)
|
|
;-------------------------------------------------------------------------------
|
|
{{.Func "su_op_delay" "Opcode"}}
|
|
lodsw ; al = delay index, ah = delay count
|
|
{{- .PushRegs .VAL "DelayVal" .COM "DelayCom" | indent 4}}
|
|
movzx ebx, al
|
|
{{- if .Library}}
|
|
mov {{.SI}}, [{{.Stack "DelayTable"}}] ; when using runtime tables, delaytimes is pulled from the stack so can be a pointer to heap
|
|
lea {{.BX}}, [{{.SI}} + {{.BX}}*2]
|
|
{{- else}}
|
|
{{- .Prepare "su_delay_times" | indent 4}}
|
|
lea {{.BX}},[{{.Use "su_delay_times"}} + {{.BX}}*2] ; BX now points to the right position within delay time table
|
|
{{- end}}
|
|
movzx esi, word [{{.Stack "GlobalTick"}}] ; notice that we load word, so we wrap at 65536
|
|
mov {{.CX}}, {{.PTRWORD}} [{{.Stack "DelayWorkSpace"}}] ; {{.WRK}} is now the separate delay workspace, as they require a lot more space
|
|
{{- if .StereoAndMono "delay"}}
|
|
jnc su_op_delay_mono
|
|
{{- end}}
|
|
{{- if .Stereo "delay"}}
|
|
push {{.AX}} ; save _ah (delay count)
|
|
fxch ; r l
|
|
call su_op_delay_do ; D(r) l process delay for the right channel
|
|
pop {{.AX}} ; restore the count for second run
|
|
fxch ; l D(r)
|
|
su_op_delay_mono: ; flow into mono delay
|
|
{{- end}}
|
|
call su_op_delay_do ; when stereo delay is not enabled, we could inline this to save 5 bytes, but I expect stereo delay to be farely popular so maybe not worth the hassle
|
|
mov {{.PTRWORD}} [{{.Stack "DelayWorkSpace"}}],{{.CX}} ; move delay workspace pointer back to stack.
|
|
{{- .PopRegs .VAL .COM | indent 4}}
|
|
{{- if .SupportsModulation "delay" "delaytime"}}
|
|
xor eax, eax
|
|
mov dword [{{.Modulation "delay" "delaytime"}}], eax
|
|
{{- end}}
|
|
ret
|
|
|
|
;-------------------------------------------------------------------------------
|
|
; su_op_delay_do: executes the actual delay
|
|
;-------------------------------------------------------------------------------
|
|
; Pseudocode:
|
|
; q = dr*x
|
|
; for (i = 0;i < count;i++)
|
|
; s = b[(t-delaytime[i+offset])&65535]
|
|
; q += s
|
|
; o[i] = o[i]*da+s*(1-da)
|
|
; b[t] = f*o[i] +p^2*x
|
|
; Perform dc-filtering q and output q
|
|
;-------------------------------------------------------------------------------
|
|
{{.Func "su_op_delay_do"}} ; x y
|
|
fld st0
|
|
fmul dword [{{.Input "delay" "pregain"}}] ; p*x y
|
|
fmul dword [{{.Input "delay" "pregain"}}] ; p*p*x y
|
|
fxch ; y p*p*x
|
|
fmul dword [{{.Input "delay" "dry"}}] ; dr*y p*p*x
|
|
su_op_delay_loop:
|
|
{{- if or (.SupportsModulation "delay" "delaytime") (.SupportsParamValue "delay" "notetracking" 1)}} ; delaytime modulation or note syncing require computing the delay time in floats
|
|
fild word [{{.BX}}] ; k dr*y p*p*x, where k = delay time
|
|
{{- if .SupportsParamValue "delay" "notetracking" 1}}
|
|
test ah, 1 ; note syncing is the least significant bit of ah, 0 = ON, 1 = OFF
|
|
jne su_op_delay_skipnotesync
|
|
fild dword [{{.INP}}-su_voice.inputs+su_voice.note]
|
|
{{.Int 0x3DAAAAAA | .Prepare | indent 8}}
|
|
fmul dword [{{.Int 0x3DAAAAAA | .Use}}]
|
|
{{.Call "su_power"}}
|
|
fdivp st1, st0 ; use 10787 for delaytime to have neutral transpose
|
|
su_op_delay_skipnotesync:
|
|
{{- end}}
|
|
{{- if .SupportsModulation "delay" "delaytime"}}
|
|
fld dword [{{.Modulation "delay" "delaytime"}}]
|
|
{{- .Float 32767.0 | .Prepare | indent 8}}
|
|
fmul dword [{{.Float 32767.0 | .Use}}] ; scale it up, as the modulations would be too small otherwise
|
|
faddp st1, st0
|
|
{{- end}}
|
|
fistp dword [{{.SP}}-4] ; dr*y p*p*x, dword [{{.SP}}-4] = integer amount of delay (samples)
|
|
mov edi, esi ; edi = esi = current time
|
|
sub di, word [{{.SP}}-4] ; we perform the math in 16-bit to wrap around
|
|
{{- else}}
|
|
mov edi, esi
|
|
sub di, word [{{.BX}}] ; we perform the math in 16-bit to wrap around
|
|
{{- end}}
|
|
fld dword [{{.CX}}+su_delayline_wrk.buffer+{{.DI}}*4]; s dr*y p*p*x, where s is the sample from delay buffer
|
|
fadd st1, st0 ; s dr*y+s p*p*x (add comb output to current output)
|
|
fld1 ; 1 s dr*y+s p*p*x
|
|
fsub dword [{{.Input "delay" "damp"}}] ; 1-da s dr*y+s p*p*x
|
|
fmulp st1, st0 ; s*(1-da) dr*y+s p*p*x
|
|
fld dword [{{.Input "delay" "damp"}}] ; da s*(1-da) dr*y+s p*p*x
|
|
fmul dword [{{.CX}}+su_delayline_wrk.filtstate] ; o*da s*(1-da) dr*y+s p*p*x, where o is stored
|
|
faddp st1, st0 ; o*da+s*(1-da) dr*y+s p*p*x
|
|
{{- .Float 0.5 | .Prepare | indent 4}}
|
|
fadd dword [{{.Float 0.5 | .Use}}] ; add and sub small offset to prevent denormalization. WARNING: this is highly important, as the damp filters might denormalize and give 100x CPU penalty
|
|
fsub dword [{{.Float 0.5 | .Use}}] ; See for example: https://stackoverflow.com/questions/36781881/why-denormalized-floats-are-so-much-slower-than-other-floats-from-hardware-arch
|
|
fst dword [{{.CX}}+su_delayline_wrk.filtstate] ; o'=o*da+s*(1-da), o' dr*y+s p*p*x
|
|
fmul dword [{{.Input "delay" "feedback"}}] ; f*o' dr*y+s p*p*x
|
|
fadd st0, st2 ; f*o'+p*p*x dr*y+s p*p*x
|
|
fstp dword [{{.CX}}+su_delayline_wrk.buffer+{{.SI}}*4]; save f*o'+p*p*x to delay buffer
|
|
add {{.BX}},2 ; move to next index
|
|
add {{.CX}}, su_delayline_wrk.size ; go to next delay delay workspace
|
|
sub ah, 2
|
|
jg su_op_delay_loop ; if ah > 0, goto loop
|
|
fstp st1 ; dr*y+s1+s2+s3+...
|
|
; DC-filtering
|
|
fld dword [{{.CX}}+su_delayline_wrk.dcout] ; o s
|
|
{{- .Float 0.99609375 | .Prepare | indent 4}}
|
|
fmul dword [{{.Float 0.99609375 | .Use}}] ; c*o s
|
|
fsub dword [{{.CX}}+su_delayline_wrk.dcin] ; c*o-i s
|
|
fxch ; s c*o-i
|
|
fst dword [{{.CX}}+su_delayline_wrk.dcin] ; i'=s, s c*o-i
|
|
faddp st1 ; s+c*o-i
|
|
{{- .Float 0.5 | .Prepare | indent 4}}
|
|
fadd dword [{{.Float 0.5 | .Use}}] ; add and sub small offset to prevent denormalization. WARNING: this is highly important, as low pass filters might denormalize and give 100x CPU penalty
|
|
fsub dword [{{.Float 0.5 | .Use}}] ; See for example: https://stackoverflow.com/questions/36781881/why-denormalized-floats-are-so-much-slower-than-other-floats-from-hardware-arch
|
|
fst dword [{{.CX}}+su_delayline_wrk.dcout] ; o'=s+c*o-i
|
|
ret
|
|
{{end}}
|
|
|
|
|
|
{{- if .HasOp "compressor"}}
|
|
;-------------------------------------------------------------------------------
|
|
; COMPRES opcode: push compressor gain to stack
|
|
;-------------------------------------------------------------------------------
|
|
; Mono: push g on stack, where g is a suitable gain for the signal
|
|
; you can either MULP to compress the signal or SEND it to a GAIN
|
|
; somewhere else for compressor side-chaining.
|
|
; Stereo: push g g on stack, where g is calculated using l^2 + r^2
|
|
;-------------------------------------------------------------------------------
|
|
{{.Func "su_op_compressor" "Opcode"}}
|
|
fdiv dword [{{.Input "compressor" "invgain"}}]; l/g, we'll call this pre inverse gained signal x from now on
|
|
fld st0 ; x x
|
|
fmul st0, st0 ; x^2 x
|
|
{{- if .StereoAndMono "compressor"}}
|
|
jnc su_op_compressor_mono
|
|
{{- end}}
|
|
{{- if .Stereo "compressor"}}
|
|
fld st2 ; r x^2 l/g r
|
|
fdiv dword [{{.Input "compressor" "invgain"}}]; r/g, we'll call this pre inverse gained signal y from now on
|
|
fst st3 ; y x^2 l/g r/g
|
|
fmul st0, st0 ; y^2 x^2 l/g r/g
|
|
faddp st1, st0 ; y^2+x^2 l/g r/g
|
|
call su_op_compressor_mono ; So, for stereo, we square both left & right and add them up
|
|
fld st0 ; and return the computed gain two times, ready for MULP STEREO
|
|
ret
|
|
su_op_compressor_mono:
|
|
{{- end}}
|
|
fld dword [{{.WRK}}] ; l x^2 x
|
|
fucomi st0, st1
|
|
setnb al ; if (st0 >= st1) al = 1; else al = 0;
|
|
fsubp st1, st0 ; x^2-l x
|
|
{{.Call "su_nonlinear_map"}} ; c x^2-l x, c is either attack or release parameter mapped in a nonlinear way
|
|
fmulp st1, st0 ; c*(x^2-l) x
|
|
fadd dword [{{.WRK}}] ; l+c*(x^2-l) x // we could've kept level in the stack and save a few bytes, but su_env_map uses 3 stack (c + 2 temp), so the stack was getting quite big.
|
|
fst dword [{{.WRK}}] ; l'=l+c*(x^2-l), l' x
|
|
fld dword [{{.Input "compressor" "threshold"}}] ; t l' x
|
|
fmul st0, st0 ; t*t l' x
|
|
fxch ; l' t*t x
|
|
fucomi st0, st1 ; if l' < t*t
|
|
fcmovb st0, st1 ; l'=t*t
|
|
fdivp st1, st0 ; t*t/l' x
|
|
fld dword [{{.Input "compressor" "ratio"}}] ; r t*t/l' x
|
|
{{.Float 0.5 | .Prepare | indent 4}}
|
|
fmul dword [{{.Float 0.5 | .Use}}] ; p=r/2 t*t/l' x
|
|
fxch ; t*t/l' p x
|
|
fyl2x ; p*log2(t*t/l') x
|
|
{{.TailCall "su_power"}} ; 2^(p*log2(t*t/l')) x
|
|
; tail call ; Equal to:
|
|
; (t*t/l')^p x
|
|
; if ratio is at minimum => p=0 => 1 x
|
|
; if ratio is at maximum => p=0.5 => t/x => t/x*x=t
|
|
{{- end}}
|