feat(asm&go4k): Preprocess asm code using go text/template

The preprocessing is done sointu-cli and (almost) nothing is done by the NASM preprocessor anymore (some .strucs are still there. Now, sointu-cli loads the .yml song, defines bunch of macros (go functions / variables) and passes the struct to text/template parses. This a lot more powerful way to generate .asm code than trying to fight with the nasm preprocessor. At the moment, tests pass but the repository is a bit of monster, as the library is still compiled using the old approach. Go should generate the library also from the templates.
2025-07-25 08:24:50 -04:00 · 2020-12-14 15:44:16 +02:00
parent 92c8b70fd2
commit 2ad61ff6b2
19 changed files with 2934 additions and 212 deletions
--- a/templates/effects.asm
+++ b/templates/effects.asm
@ -0,0 +1,393 @@
+{{- if .Opcode "distort"}}
+;-------------------------------------------------------------------------------
+;   DISTORT opcode: apply distortion on the signal
+;-------------------------------------------------------------------------------
+;   Mono:   x   ->  x*a/(1-a+(2*a-1)*abs(x))            where x is clamped first
+;   Stereo: l r ->  l*a/(1-a+(2*a-1)*abs(l)) r*a/(1-a+(2*a-1)*abs(r))
+;-------------------------------------------------------------------------------
+{{.Func "su_op_distort" "Opcode"}}
+{{- if .Stereo "distort" -}}
+    {{.Call "su_effects_stereohelper"}}
+{{- end}}
+    fld     dword [{{.Input "distort" "drive"}}]
+    {{.TailCall "su_waveshaper"}}
+{{end}}
+
+
+{{- if .Opcode "hold"}}
+;-------------------------------------------------------------------------------
+;   HOLD opcode: sample and hold the signal, reducing sample rate
+;-------------------------------------------------------------------------------
+;   Mono version:   holds the signal at a rate defined by the freq parameter
+;   Stereo version: holds both channels
+;-------------------------------------------------------------------------------
+{{.Func "su_op_hold" "Opcode"}}
+{{- if .Stereo "hold"}}
+    {{.Call "su_effects_stereohelper"}}
+{{- end}}
+    fld     dword [{{.Input "hold" "holdfreq"}}]    ; f x
+    fmul    st0, st0                        ; f^2 x
+    fchs                                    ; -f^2 x
+    fadd    dword [{{.WRK}}]              ; p-f^2 x
+    fst     dword [{{.WRK}}]              ; p <- p-f^2
+    fldz                                    ; 0 p x
+    fucomip st1                             ; p x
+    fstp    dword [{{.SP}}-4]                   ; t=p, x
+    jc      short su_op_hold_holding        ; if (0 < p) goto holding
+    fld1                                    ; 1 x
+    fadd    dword [{{.SP}}-4]                   ; 1+t x
+    fstp    dword [{{.WRK}}]   ; x
+    fst     dword [{{.WRK}}+4] ; save holded value
+    ret                                     ; x
+su_op_hold_holding:
+    fstp    st0                             ;
+    fld     dword [{{.WRK}}+4] ; x
+    ret
+{{end}}
+
+
+{{- if .Opcode "crush"}}
+;-------------------------------------------------------------------------------
+;   CRUSH opcode: quantize the signal to finite number of levels
+;-------------------------------------------------------------------------------
+;   Mono:   x   ->  e*int(x/e)
+;   Stereo: l r ->  e*int(l/e) e*int(r/e)
+;-------------------------------------------------------------------------------
+{{.Func "su_op_crush" "Opcode"}}
+{{- if .Stereo "crush"}}
+    {{.Call "su_effects_stereohelper"}}
+{{- end}}
+    fdiv    dword [{{.Input "crush" "resolution"}}]
+    frndint
+    fmul    dword [{{.Input "crush" "resolution"}}]
+    ret
+{{end}}
+
+
+{{- if .Opcode "gain"}}
+;-------------------------------------------------------------------------------
+;   GAIN opcode: apply gain on the signal
+;-------------------------------------------------------------------------------
+;   Mono:   x   ->  x*g
+;   Stereo: l r ->  l*g r*g
+;-------------------------------------------------------------------------------
+{{.Func "su_op_gain" "Opcode"}}
+{{- if .Stereo "gain"}}
+    fld     dword [{{.Input "gain" "gain"}}] ; g l (r)
+{{- if .Mono "invgain"}}
+    jnc     su_op_gain_mono
+{{- end}}
+    fmul    st2, st0                             ; g l r/g
+su_op_gain_mono:
+    fmulp   st1, st0                             ; l/g (r/)
+    ret
+{{- else}}
+    fmul    dword [{{.Input "gain" "gain"}}]
+    ret
+{{- end}}
+{{end}}
+
+
+{{- if .Opcode "invgain"}}
+;-------------------------------------------------------------------------------
+;   INVGAIN opcode: apply inverse gain on the signal
+;-------------------------------------------------------------------------------
+;   Mono:   x   ->  x/g
+;   Stereo: l r ->  l/g r/g
+;-------------------------------------------------------------------------------
+{{.Func "su_op_invgain" "Opcode"}}
+{{- if .Stereo "invgain"}}
+    fld     dword [{{.Input "invgain" "invgain"}}] ; g l (r)
+{{- if .Mono "invgain"}}
+    jnc     su_op_invgain_mono
+{{- end}}
+    fdiv    st2, st0                             ; g l r/g
+su_op_invgain_mono:
+    fdivp   st1, st0                             ; l/g (r/)
+    ret
+{{- else}}
+    fdiv    dword [{{.Input "invgain" "invgain"}}]
+    ret
+{{- end}}
+{{end}}
+
+
+{{- if .Opcode "filter"}}
+;-------------------------------------------------------------------------------
+;   FILTER opcode: perform low/high/band-pass/notch etc. filtering on the signal
+;-------------------------------------------------------------------------------
+;   Mono:   x   ->  filtered(x)
+;   Stereo: l r ->  filtered(l) filtered(r)
+;-------------------------------------------------------------------------------
+{{.Func "su_op_filter" "Opcode"}}
+    lodsb ; load the flags to al
+{{- if .Stereo "filter"}}
+    {{.Call "su_effects_stereohelper"}}
+{{- end}}
+    fld     dword [{{.Input "filter" "resonance"}}] ; r x
+    fld     dword [{{.Input "filter" "frequency"}}]; f r x
+    fmul    st0, st0                        ; f2 x (square the input so we never get negative and also have a smoother behaviour in the lower frequencies)
+    fst     dword [{{.SP}}-4]                   ; f2 r x
+    fmul    dword [{{.WRK}}+8]  ; f2*b r x
+    fadd    dword [{{.WRK}}]   ; f2*b+l r x
+    fst     dword [{{.WRK}}]   ; l'=f2*b+l r x
+    fsubp   st2, st0                        ; r x-l'
+    fmul    dword [{{.WRK}}+8]  ; r*b x-l'
+    fsubp   st1, st0                        ; x-l'-r*b
+    fst     dword [{{.WRK}}+4]  ; h'=x-l'-r*b
+    fmul    dword [{{.SP}}-4]                   ; f2*h'
+    fadd    dword [{{.WRK}}+8]  ; f2*h'+b
+    fstp    dword [{{.WRK}}+8]  ; b'=f2*h'+b
+    fldz                                    ; 0
+{{- if .HasParamValue "filter" "lowpass" 1}}
+    test    al, byte 0x40
+    jz      short su_op_filter_skiplowpass
+    fadd    dword [{{.WRK}}]
+su_op_filter_skiplowpass:
+{{- end}}
+{{- if .HasParamValue "filter" "bandpass" 1}}
+    test    al, byte 0x20
+    jz      short su_op_filter_skipbandpass
+    fadd    dword [{{.WRK}}+8]
+su_op_filter_skipbandpass:
+{{- end}}
+{{- if .HasParamValue "filter" "highpass" 1}}
+    test    al, byte 0x10
+    jz      short su_op_filter_skiphighpass
+    fadd    dword [{{.WRK}}+4]
+su_op_filter_skiphighpass:
+{{- end}}
+{{- if .HasParamValue "filter" "negbandpass" 1}}
+    test    al, byte 0x08
+    jz      short su_op_filter_skipnegbandpass
+    fsub    dword [{{.WRK}}+8]
+su_op_filter_skipnegbandpass:
+{{- end}}
+{{- if .HasParamValue "filter" "neghighpass" 1}}
+    test    al, byte 0x04
+    jz      short su_op_filter_skipneghighpass
+    fsub    dword [{{.WRK}}+4]
+su_op_filter_skipneghighpass:
+{{- end}}
+    ret
+{{end}}
+
+
+{{- if .Opcode "clip"}}
+;-------------------------------------------------------------------------------
+;   CLIP opcode: clips the signal into [-1,1] range
+;-------------------------------------------------------------------------------
+;   Mono:   x   ->  min(max(x,-1),1)
+;   Stereo: l r ->  min(max(l,-1),1) min(max(r,-1),1)
+;-------------------------------------------------------------------------------
+{{.Func "su_op_clip" "Opcode"}}
+{{- if .Stereo "clip"}}
+    {{.Call "su_effects_stereohelper"}}
+{{- end}}
+    {{.TailCall "su_clip"}}
+{{end}}
+
+
+{{- if .Opcode "pan" -}}
+;-------------------------------------------------------------------------------
+;   PAN opcode: pan the signal
+;-------------------------------------------------------------------------------
+;   Mono:   s   ->  s*(1-p) s*p
+;   Stereo: l r ->  l*(1-p) r*p
+;
+;   where p is the panning in [0,1] range
+;-------------------------------------------------------------------------------
+{{.Func "su_op_pan" "Opcode"}}
+{{- if .Stereo "pan"}}
+    jc      su_op_pan_do    ; this time, if this is mono op...
+    fld     st0             ;   ...we duplicate the mono into stereo first
+su_op_pan_do:
+    fld     dword [{{.Input "pan" "panning"}}]    ; p l r
+    fld1                                        ; 1 p l r
+    fsub    st1                                 ; 1-p p l r
+    fmulp   st2                                 ; p (1-p)*l r
+    fmulp   st2                                 ; (1-p)*l p*r
+    ret
+{{- else}}
+    fld     dword [{{.Input "pan" "panning"}}]    ; p s
+    fmul    st1                                 ; p*s s
+    fsub    st1, st0                            ; p*s s-p*s
+                                                ; Equal to
+                                                ; s*p s*(1-p)
+    fxch                                        ; s*(1-p) s*p SHOULD PROBABLY DELETE, WHY BOTHER
+    ret
+{{- end}}
+{{end}}
+
+
+{{- if .Opcode "delay"}}
+;-------------------------------------------------------------------------------
+;   DELAY opcode: adds delay effect to the signal
+;-------------------------------------------------------------------------------
+;   Mono:   perform delay on ST0, using delaycount delaylines starting
+;           at delayindex from the delaytable
+;   Stereo: perform delay on ST1, using delaycount delaylines starting
+;           at delayindex + delaycount from the delaytable (so the right delays
+;           can be different)
+;-------------------------------------------------------------------------------
+{{.Func "su_op_delay" "Opcode"}}
+    lodsw                           ; al = delay index, ah = delay count
+    {{- .PushRegs .VAL "DelayVal" .COM "DelayCom" | indent 4}}
+    movzx   ebx, al
+    ; %ifdef RUNTIME_TABLES               ; when using runtime tables, delaytimes is pulled from the stack so can be a pointer to heap
+    ; mov     _SI, [{{.SP}} + su_stack.delaytimes + PUSH_REG_SIZE(2)]
+    ; lea     _BX, [_SI + _BX*2]
+    ; %else
+{{.Prepare "su_delay_times" | indent 4}}
+    lea     {{.BX}},[{{.Use "su_delay_times"}} + {{.BX}}*2]                  ; BX now points to the right position within delay time table
+    movzx   esi, word [{{.Stack "GlobalTick"}}]          ; notice that we load word, so we wrap at 65536
+    mov     {{.CX}}, {{.PTRWORD}} [{{.Stack "DelayWorkSpace"}}]   ; {{.WRK}} is now the separate delay workspace, as they require a lot more space
+{{- if .StereoAndMono "delay"}}
+    jnc     su_op_delay_mono
+{{- end}}
+{{- if .Stereo "delay"}}
+    push    {{.AX}}                 ; save _ah (delay count)
+    fxch                        ; r l
+    call    su_op_delay_do      ; D(r) l        process delay for the right channel
+    pop     {{.AX}}                 ; restore the count for second run
+    fxch                        ; l D(r)
+su_op_delay_mono:               ; flow into mono delay
+{{- end}}
+    call    su_op_delay_do      ; when stereo delay is not enabled, we could inline this to save 5 bytes, but I expect stereo delay to be farely popular so maybe not worth the hassle
+    mov     {{.PTRWORD}} [{{.Stack "DelayWorkSpace"}}],{{.CX}}   ; move delay workspace pointer back to stack.
+    {{- .PopRegs .VAL .COM | indent 4}}
+{{- if .UsesDelayModulation}}
+    xor     eax, eax
+    mov     dword [{{.Modulation "delay" "delaytime"}}], eax
+{{- end}}
+    ret
+
+;-------------------------------------------------------------------------------
+;   su_op_delay_do: executes the actual delay
+;-------------------------------------------------------------------------------
+;   Pseudocode:
+;   q = dr*x
+;   for (i = 0;i < count;i++)
+;     s = b[(t-delaytime[i+offset])&65535]
+;     q += s
+;     o[i] = o[i]*da+s*(1-da)
+;     b[t] = f*o[i] +p^2*x
+;  Perform dc-filtering q and output q
+;-------------------------------------------------------------------------------
+{{.Func "su_op_delay_do"}}                         ; x y
+    fld     st0
+    fmul    dword [{{.Input "delay" "pregain"}}]  ; p*x y
+    fmul    dword [{{.Input "delay" "pregain"}}]  ; p*p*x y
+    fxch                                        ; y p*p*x
+    fmul    dword [{{.Input "delay" "dry"}}]      ; dr*y p*p*x
+su_op_delay_loop:
+        {{- if or .UsesDelayModulation (.HasParamValue "delay" "notetracking" 1)}} ; delaytime modulation or note syncing require computing the delay time in floats
+        fild    word [{{.BX}}]         ; k dr*y p*p*x, where k = delay time
+        {{- if .HasParamValue "delay" "notetracking" 1}}
+        test    ah, 1 ; note syncing is the least significant bit of ah, 0 = ON, 1 = OFF
+        jne     su_op_delay_skipnotesync
+        fild    dword [{{.INP}}-su_voice.inputs+su_voice.note]
+        {{.Int 0x3DAAAAAA | .Prepare | indent 8}}
+        fmul    dword [{{.Int 0x3DAAAAAA | .Use}}]
+        {{.Call "su_power"}}
+        fdivp   st1, st0                 ; use 10787 for delaytime to have neutral transpose
+        su_op_delay_skipnotesync:
+        {{- end}}
+        {{- if .UsesDelayModulation}}
+        fld     dword [{{.Modulation "delay" "delaytime"}}]
+        {{- .Float 32767.0 | .Prepare | indent 8}}
+        fmul    dword [{{.Float 32767.0 | .Use}}] ; scale it up, as the modulations would be too small otherwise
+        faddp   st1, st0
+        {{- end}}
+        fistp   dword [{{.SP}}-4]                       ; dr*y p*p*x, dword [{{.SP}}-4] = integer amount of delay (samples)
+        mov     edi, esi                            ; edi = esi = current time
+        sub     di, word [{{.SP}}-4]                    ; we perform the math in 16-bit to wrap around
+        {{- else}}
+        mov     edi, esi
+        sub     di, word [{{.BX}}]                      ; we perform the math in 16-bit to wrap around
+        {{- end}}
+        fld     dword [{{.CX}}+su_delayline_wrk.buffer+{{.DI}}*4]; s dr*y p*p*x, where s is the sample from delay buffer
+        fadd    st1, st0                                ; s dr*y+s p*p*x (add comb output to current output)
+        fld1                                            ; 1 s dr*y+s p*p*x
+        fsub    dword [{{.Input "delay" "damp"}}]         ; 1-da s dr*y+s p*p*x
+        fmulp   st1, st0                                ; s*(1-da) dr*y+s p*p*x
+        fld     dword [{{.Input "delay" "damp"}}]         ; da s*(1-da) dr*y+s p*p*x
+        fmul    dword [{{.CX}}+su_delayline_wrk.filtstate]  ; o*da s*(1-da) dr*y+s p*p*x, where o is stored
+        faddp   st1, st0                                ; o*da+s*(1-da) dr*y+s p*p*x
+        fst     dword [{{.CX}}+su_delayline_wrk.filtstate]  ; o'=o*da+s*(1-da), o' dr*y+s p*p*x
+        fmul    dword [{{.Input "delay" "feedback"}}]     ; f*o' dr*y+s p*p*x
+        fadd    st0, st2                                ; f*o'+p*p*x dr*y+s p*p*x
+        fstp    dword [{{.CX}}+su_delayline_wrk.buffer+{{.SI}}*4]; save f*o'+p*p*x to delay buffer
+        add     {{.BX}},2                                   ; move to next index
+        add     {{.CX}}, su_delayline_wrk.size              ; go to next delay delay workspace
+        sub     ah, 2
+        jg      su_op_delay_loop                        ; if ah > 0, goto loop
+    fstp    st1                                 ; dr*y+s1+s2+s3+...
+    ; DC-filtering
+    fld     dword [{{.CX}}+su_delayline_wrk.dcout]  ; o s
+{{- .Float 0.99609375 | .Prepare | indent 4}}
+    fmul    dword [{{.Float 0.99609375 | .Use}}]                ; c*o s
+    fsub    dword [{{.CX}}+su_delayline_wrk.dcin]   ; c*o-i s
+    fxch                                        ; s c*o-i
+    fst     dword [{{.CX}}+su_delayline_wrk.dcin]   ; i'=s, s c*o-i
+    faddp   st1                                 ; s+c*o-i
+{{- .Float 0.5 | .Prepare | indent 4}}
+    fadd    dword [{{.Float 0.5 | .Use}}]                     ; add and sub small offset to prevent denormalization
+    fsub    dword [{{.Float 0.5 | .Use}}]
+    fst     dword [{{.CX}}+su_delayline_wrk.dcout]  ; o'=s+c*o-i
+    ret
+{{end}}
+
+
+{{- if .Opcode "compressor"}}
+;-------------------------------------------------------------------------------
+;   COMPRES opcode: push compressor gain to stack
+;-------------------------------------------------------------------------------
+;   Mono:   push g on stack, where g is a suitable gain for the signal
+;           you can either MULP to compress the signal or SEND it to a GAIN
+;           somewhere else for compressor side-chaining.
+;   Stereo: push g g on stack, where g is calculated using l^2 + r^2
+;-------------------------------------------------------------------------------
+{{.Func "su_op_compressor" "Opcode"}}
+    fdiv    dword [{{.Input "compressor" "invgain"}}]; l/g, we'll call this pre inverse gained signal x from now on
+    fld     st0                                 ; x x
+    fmul    st0, st0                            ; x^2 x
+{{- if .StereoAndMono "compressor"}}
+    jnc     su_op_compressor_mono
+{{- end}}
+{{- if .Stereo "compressor"}}
+    fld     st2                                 ; r x^2 l/g r
+    fdiv    dword [{{.Input "compressor" "invgain"}}]; r/g, we'll call this pre inverse gained signal y from now on
+    fst     st3                                 ; y x^2 l/g r/g
+    fmul    st0, st0                            ; y^2 x^2 l/g r/g
+    faddp   st1, st0                            ; y^2+x^2 l/g r/g
+    call    su_op_compressor_mono               ; So, for stereo, we square both left & right and add them up
+    fld     st0                                 ; and return the computed gain two times, ready for MULP STEREO
+    ret
+su_op_compressor_mono:
+{{- end}}
+    fld     dword [{{.WRK}}]    ; l x^2 x
+    fucomi  st0, st1
+    setnb   al                                  ; if (st0 >= st1) al = 1; else al = 0;
+    fsubp   st1, st0                            ; x^2-l x
+    {{.Call "su_nonlinear_map"}}                ; c x^2-l x, c is either attack or release parameter mapped in a nonlinear way
+    fmulp   st1, st0                            ; c*(x^2-l) x
+    fadd    dword [{{.WRK}}]    ; l+c*(x^2-l) x   // we could've kept level in the stack and save a few bytes, but su_env_map uses 3 stack (c + 2 temp), so the stack was getting quite big.
+    fst     dword [{{.WRK}}]    ; l'=l+c*(x^2-l), l' x
+    fld     dword [{{.Input "compressor" "threshold"}}] ; t l' x
+    fmul    st0, st0                            ; t*t l' x
+    fxch                                        ; l' t*t x
+    fucomi  st0, st1                            ; if l' < t*t
+    fcmovb  st0, st1                            ;   l'=t*t
+    fdivp   st1, st0                            ; t*t/l' x
+    fld     dword [{{.Input "compressor" "ratio"}}]  ; r t*t/l' x
+{{.Float 0.5 | .Prepare | indent 4}}
+    fmul    dword [{{.Float 0.5 | .Use}}]       ; p=r/2 t*t/l' x
+    fxch                                        ; t*t/l' p x
+    fyl2x                                       ; p*log2(t*t/l') x
+    {{.TailCall "su_power"}}                     ; 2^(p*log2(t*t/l')) x
+    ; tail call                                 ; Equal to:
+                                                ; (t*t/l')^p x
+                                                ; if ratio is at minimum => p=0 => 1 x
+                                                ; if ratio is at maximum => p=0.5 => t/x => t/x*x=t
+{{- end}}