mirror of
https://github.com/vsariola/sointu.git
synced 2025-06-04 01:28:45 -04:00
Optimize delay for better register use.
This commit is contained in:
parent
da52c10f7f
commit
5c25eacd08
@ -315,15 +315,11 @@ SECT_TEXT(sudelay)
|
|||||||
|
|
||||||
EXPORT MANGLE_FUNC(su_op_delay,0)
|
EXPORT MANGLE_FUNC(su_op_delay,0)
|
||||||
lodsw ; al = delay index, ah = delay count
|
lodsw ; al = delay index, ah = delay count
|
||||||
push_registers VAL, COM, WRK ; these are non-volatile according to our convention
|
push_registers VAL, COM ; these are non-volatile according to our convention
|
||||||
%ifdef INCLUDE_DELAY_MODULATION ; TODO: at the moment, this must be manually enabled, as the PORT macro is a singleline macro and cannot toggle this. Will have to think of a good solution.
|
|
||||||
mov esi, dword [WRK+su_unit.ports+su_delay_ports.delaymod]
|
|
||||||
mov [INP+su_delay_ports.delaymod],esi ; move the modulation to the transformed values, as we will not have access to WRK soon
|
|
||||||
%endif
|
|
||||||
movzx ebx, al
|
movzx ebx, al
|
||||||
apply {lea _BX,},MANGLE_DATA(su_delay_times),_BX*2,{} ; _BP now points to the right position within delay time table
|
apply {lea _BX,},MANGLE_DATA(su_delay_times),_BX*2,{} ; _BP now points to the right position within delay time table
|
||||||
movzx esi, word [_SP + su_stack.tick + PUSH_REG_SIZE(3)] ; notice that we load word, so we wrap at 65536
|
movzx esi, word [_SP + su_stack.tick + PUSH_REG_SIZE(2)] ; notice that we load word, so we wrap at 65536
|
||||||
mov WRK, PTRWORD [_SP + su_stack.delaywrk + PUSH_REG_SIZE(3)] ; WRK is now the separate delay workspace, as they require a lot more space
|
mov _CX, PTRWORD [_SP + su_stack.delaywrk + PUSH_REG_SIZE(2)] ; WRK is now the separate delay workspace, as they require a lot more space
|
||||||
%ifdef INCLUDE_STEREO_DELAY
|
%ifdef INCLUDE_STEREO_DELAY
|
||||||
jnc su_op_delay_mono
|
jnc su_op_delay_mono
|
||||||
push _AX ; save _ah (delay count)
|
push _AX ; save _ah (delay count)
|
||||||
@ -334,8 +330,8 @@ EXPORT MANGLE_FUNC(su_op_delay,0)
|
|||||||
su_op_delay_mono: ; flow into mono delay
|
su_op_delay_mono: ; flow into mono delay
|
||||||
%endif
|
%endif
|
||||||
call su_op_delay_do ; when stereo delay is not enabled, we could inline this to save 5 bytes, but I expect stereo delay to be farely popular so maybe not worth the hassle
|
call su_op_delay_do ; when stereo delay is not enabled, we could inline this to save 5 bytes, but I expect stereo delay to be farely popular so maybe not worth the hassle
|
||||||
mov PTRWORD [_SP + su_stack.delaywrk + PUSH_REG_SIZE(3)],WRK ; move delay workspace pointer back to stack.
|
mov PTRWORD [_SP + su_stack.delaywrk + PUSH_REG_SIZE(2)],_CX ; move delay workspace pointer back to stack.
|
||||||
pop_registers VAL, COM, WRK
|
pop_registers VAL, COM
|
||||||
%ifdef INCLUDE_DELAY_MODULATION
|
%ifdef INCLUDE_DELAY_MODULATION
|
||||||
xor eax, eax
|
xor eax, eax
|
||||||
mov dword [WRK+su_unit.ports+su_delay_ports.delaymod], eax ; zero it
|
mov dword [WRK+su_unit.ports+su_delay_ports.delaymod], eax ; zero it
|
||||||
@ -377,7 +373,7 @@ su_op_delay_loop:
|
|||||||
su_op_delay_skipnotesync:
|
su_op_delay_skipnotesync:
|
||||||
%endif
|
%endif
|
||||||
%ifdef INCLUDE_DELAY_MODULATION
|
%ifdef INCLUDE_DELAY_MODULATION
|
||||||
fld dword [INP+su_delay_ports.delaymod]
|
fld dword [WRK+su_unit.ports+su_delay_ports.delaymod]
|
||||||
apply fmul dword, c_32767 ; scale it up, as the modulations would be too small otherwise
|
apply fmul dword, c_32767 ; scale it up, as the modulations would be too small otherwise
|
||||||
faddp st1, st0
|
faddp st1, st0
|
||||||
%endif
|
%endif
|
||||||
@ -388,33 +384,33 @@ su_op_delay_loop:
|
|||||||
mov edi, esi
|
mov edi, esi
|
||||||
sub di, word [_BX] ; we perform the math in 16-bit to wrap around
|
sub di, word [_BX] ; we perform the math in 16-bit to wrap around
|
||||||
%endif
|
%endif
|
||||||
fld dword [WRK+su_delayline_wrk.buffer+_DI*4]; s dr*y p*p*x, where s is the sample from delay buffer
|
fld dword [_CX+su_delayline_wrk.buffer+_DI*4]; s dr*y p*p*x, where s is the sample from delay buffer
|
||||||
fadd st1, st0 ; s dr*y+s p*p*x (add comb output to current output)
|
fadd st1, st0 ; s dr*y+s p*p*x (add comb output to current output)
|
||||||
fld1 ; 1 s dr*y+s p*p*x
|
fld1 ; 1 s dr*y+s p*p*x
|
||||||
fsub dword [INP+su_delay_ports.damp] ; 1-da s dr*y+s p*p*x
|
fsub dword [INP+su_delay_ports.damp] ; 1-da s dr*y+s p*p*x
|
||||||
fmulp st1, st0 ; s*(1-da) dr*y+s p*p*x
|
fmulp st1, st0 ; s*(1-da) dr*y+s p*p*x
|
||||||
fld dword [INP+su_delay_ports.damp] ; da s*(1-da) dr*y+s p*p*x
|
fld dword [INP+su_delay_ports.damp] ; da s*(1-da) dr*y+s p*p*x
|
||||||
fmul dword [WRK+su_delayline_wrk.filtstate] ; o*da s*(1-da) dr*y+s p*p*x, where o is stored
|
fmul dword [_CX+su_delayline_wrk.filtstate] ; o*da s*(1-da) dr*y+s p*p*x, where o is stored
|
||||||
faddp st1, st0 ; o*da+s*(1-da) dr*y+s p*p*x
|
faddp st1, st0 ; o*da+s*(1-da) dr*y+s p*p*x
|
||||||
fst dword [WRK+su_delayline_wrk.filtstate] ; o'=o*da+s*(1-da), o' dr*y+s p*p*x
|
fst dword [_CX+su_delayline_wrk.filtstate] ; o'=o*da+s*(1-da), o' dr*y+s p*p*x
|
||||||
fmul dword [INP+su_delay_ports.feedback] ; f*o' dr*y+s p*p*x
|
fmul dword [INP+su_delay_ports.feedback] ; f*o' dr*y+s p*p*x
|
||||||
fadd st0, st2 ; f*o'+p*p*x dr*y+s p*p*x
|
fadd st0, st2 ; f*o'+p*p*x dr*y+s p*p*x
|
||||||
fstp dword [WRK+su_delayline_wrk.buffer+_SI*4]; save f*o'+p*p*x to delay buffer
|
fstp dword [_CX+su_delayline_wrk.buffer+_SI*4]; save f*o'+p*p*x to delay buffer
|
||||||
add _BX,2 ; move to next index
|
add _BX,2 ; move to next index
|
||||||
add WRK, su_delayline_wrk.size ; go to next delay delay workspace
|
add _CX, su_delayline_wrk.size ; go to next delay delay workspace
|
||||||
sub ah, 2
|
sub ah, 2
|
||||||
jg su_op_delay_loop ; if ah > 0, goto loop
|
jg su_op_delay_loop ; if ah > 0, goto loop
|
||||||
fstp st1 ; dr*y+s1+s2+s3+...
|
fstp st1 ; dr*y+s1+s2+s3+...
|
||||||
; DC-filtering
|
; DC-filtering
|
||||||
fld dword [WRK+su_delayline_wrk.dcout] ; o s
|
fld dword [_CX+su_delayline_wrk.dcout] ; o s
|
||||||
apply fmul dword, c_dc_const ; c*o s
|
apply fmul dword, c_dc_const ; c*o s
|
||||||
fsub dword [WRK+su_delayline_wrk.dcin] ; c*o-i s
|
fsub dword [_CX+su_delayline_wrk.dcin] ; c*o-i s
|
||||||
fxch ; s c*o-i
|
fxch ; s c*o-i
|
||||||
fst dword [WRK+su_delayline_wrk.dcin] ; i'=s, s c*o-i
|
fst dword [_CX+su_delayline_wrk.dcin] ; i'=s, s c*o-i
|
||||||
faddp st1 ; s+c*o-i
|
faddp st1 ; s+c*o-i
|
||||||
apply fadd dword, c_0_5 ; add and sub small offset to prevent denormalization
|
apply fadd dword, c_0_5 ; add and sub small offset to prevent denormalization
|
||||||
apply fsub dword, c_0_5
|
apply fsub dword, c_0_5
|
||||||
fst dword [WRK+su_delayline_wrk.dcout] ; o'=s+c*o-i
|
fst dword [_CX+su_delayline_wrk.dcout] ; o'=s+c*o-i
|
||||||
ret
|
ret
|
||||||
|
|
||||||
;-------------------------------------------------------------------------------
|
;-------------------------------------------------------------------------------
|
||||||
|
Loading…
x
Reference in New Issue
Block a user