Move things to stack, anticipating support for multicore rendering.

This commit is contained in:
Veikko Sariola 2020-05-24 00:32:49 +03:00
parent 5c25eacd08
commit 457c1fdc32
6 changed files with 93 additions and 75 deletions

View File

@ -416,11 +416,6 @@ su_op_delay_loop:
;-------------------------------------------------------------------------------
; Delay data
;-------------------------------------------------------------------------------
SECT_BSS(sudelbuf)
EXPORT MANGLE_DATA(su_delay_buffer)
resb NUM_DELAY_LINES*su_delayline_wrk.size
SECT_DATA(suconst)
%ifndef C_DC_CONST

View File

@ -17,7 +17,7 @@ EXPORT MANGLE_FUNC(su_op_advance,0) ; Stack: addr voice wrkptr valptr comptr
add WRK, su_voice.size ; move to next voice
mov [_SP+su_stack.wrk], WRK ; update stack
mov ecx, [_SP+su_stack.voiceno] ; ecx = voice
apply bt dword,su_polyphony_bitmask,{,ecx} ; if voice bit of su_polyphonism not set
bt dword [_SP+su_stack.polyphony], ecx ; if voice bit of su_polyphonism not set
jnc su_op_advance_next_instrument ; goto next_instrument
mov VAL, PTRWORD [_SP+su_stack.val] ; rollback to where we were earlier
mov COM, PTRWORD [_SP+su_stack.com]

View File

@ -9,16 +9,16 @@
SECT_TEXT(suopout)
EXPORT MANGLE_FUNC(su_op_out,0) ; l r
mov _AX, PTRWORD su_synth_obj + su_synth.left
mov _AX, [_SP + su_stack.synth]
%ifdef INCLUDE_STEREO_OUT
jnc su_op_out_mono
call su_op_out_mono
add _AX, 4
su_op_out_mono:
%endif
fmul dword [INP+su_out_ports.gain] ; g*l
fadd dword [_AX] ; g*l+o
fstp dword [_AX] ; o'=g*l+o
fmul dword [INP + su_out_ports.gain] ; g*l
fadd dword [_AX + su_synth.left] ; g*l+o
fstp dword [_AX + su_synth.left] ; o'=g*l+o
ret
%endif ; SU_OUT_ID > -1
@ -51,7 +51,7 @@ su_op_send_mono:
%ifdef INCLUDE_GLOBAL_SEND
test _AX, SEND_GLOBAL
jz su_op_send_skipglobal
mov _CX, PTRWORD su_synth_obj
mov _CX, [_SP + su_stack.synth]
su_op_send_skipglobal:
%endif
test _AX, SEND_POP ; if the SEND_POP bit is not set

View File

@ -75,12 +75,16 @@ kmENV_func_leave2:
SECT_TEXT(sunoise)
EXPORT MANGLE_FUNC(su_op_noise,0)
mov _CX,_SP
%ifdef INCLUDE_STEREO_NOISE
jnc su_op_noise_mono
call su_op_noise_mono
su_op_noise_mono:
%endif
call MANGLE_FUNC(FloatRandomNumber,0)
imul eax, [_CX + su_stack.randseed],16007
mov [_CX + su_stack.randseed],eax
fild dword [_CX + su_stack.randseed]
apply fidiv dword,c_RandDiv
fld dword [INP+su_noise_ports.shape]
call su_waveshaper
fld dword [INP+su_noise_ports.gain]

View File

@ -1,19 +1,16 @@
%if BITS == 32
%define BUFFER_STACK_LOC 48
%define render_prologue pushad ; stdcall & everything nonvolatile except eax, ecx, edx
%macro render_epilogue 0
popad
ret 4 ; clean the passed parameter from stack.
%endmacro
%elifidn __OUTPUT_FORMAT__,win64
%define BUFFER_STACK_LOC 56
%define render_prologue push_registers rcx,rdi,rsi,rbx,rbp ; rcx = ptr to buf. rdi,rsi,rbx,rbp nonvolatile
%macro render_epilogue 0
pop_registers rcx,rdi,rsi,rbx,rbp
ret
%endmacro
%else ; 64 bit mac & linux
%define BUFFER_STACK_LOC 40
%define render_prologue push_registers rdi,rbx,rbp ; rdi = ptr to buf. rbx & rbp nonvolatile
%macro render_epilogue 0
pop_registers rdi,rbx,rbp
@ -21,19 +18,47 @@
%endmacro
%endif
;-------------------------------------------------------------------------------
; Uninitialized data
;-------------------------------------------------------------------------------
%ifdef INCLUDE_MULTIVOICE_TRACKS
struc trackarray
.curvoices resd MAX_TRACKS
.size
endstruc
%endif
SECT_BSS(subss)
struc su_playerstack ; the structure of stack _as the output sound sees it_
.rowtick RESPTR 1 ; which tick within this row are we at
.row RESPTR 1 ; which total row of the song are we at
.tick RESPTR 1 ; which total tick of the song are we at
.randseed RESPTR 1
%ifdef INCLUDE_MULTIVOICE_TRACKS
.trackbits RESPTR 1
.trackarray resb trackarray.size
%endif
.cleanup
%if BITS == 32
.regs RESPTR 8
.retaddr RESPTR 1
%elifidn __OUTPUT_FORMAT__,win64
.regs RESPTR 4
%else
.regs RESPTR 2
%endif
.bufferptr RESPTR 1
.size
endstruc
su_current_voiceno resd MAX_TRACKS ; number of the last voice used for each track
;===============================================================================
; Uninitialized data: The one and only synth object
;===============================================================================
SECT_BSS(susynth)
SECT_DATA(suconst)
su_voicetrack_bitmask dd VOICETRACK_BITMASK; does the following voice belong to the same track
su_synth_obj resb su_synth.size
%if DELAY_ID > -1 ; if we use delay, then the synth obj should be immediately followed by the delay workspaces
resb NUM_DELAY_LINES*su_delayline_wrk.size
%endif
%ifdef INCLUDE_MULTIVOICE_TRACKS
su_curvoices resd MAX_TRACKS
%endif
;-------------------------------------------------------------------------------
@ -58,17 +83,17 @@ SECT_DATA(suconst)
%macro output_sound 0
%ifndef SU_USE_16BIT_OUTPUT
%ifndef SU_CLIP_OUTPUT ; The modern way. No need to clip; OS can do it.
mov _DI, [_SP+BUFFER_STACK_LOC] ; edi containts ptr
mov _DI, [_SP+su_playerstack.bufferptr] ; edi containts ptr
mov _SI, PTRWORD su_synth_obj + su_synth.left
movsd ; copy left channel to output buffer
movsd ; copy right channel to output buffer
mov [_SP+BUFFER_STACK_LOC], _DI ; save back the updated ptr
mov [_SP+su_playerstack.bufferptr], _DI ; save back the updated ptr
lea _DI, [_SI-8]
xor eax, eax
stosd ; clear left channel so the VM is ready to write them again
stosd ; clear right channel so the VM is ready to write them again
%else
mov _SI, qword [_SP+BUFFER_STACK_LOC] ; esi points to the output buffer
mov _SI, qword [_SP+su_playerstack.bufferptr] ; esi points to the output buffer
xor _CX,_CX
xor eax,eax
%%loop: ; loop over two channels, left & right
@ -79,10 +104,10 @@ SECT_DATA(suconst)
add _SI,4
cmp ecx,2
jl %%loop
mov dword [_SP+BUFFER_STACK_LOC], _SI ; save esi back to stack
mov dword [_SP+su_playerstack.bufferptr], _SI ; save esi back to stack
%endif
%else ; 16-bit output, always clipped. This is a bit legacy method.
mov _SI, [_SP+BUFFER_STACK_LOC] ; esi points to the output buffer
mov _SI, [_SP+su_playerstack.bufferptr] ; esi points to the output buffer
mov _DI, PTRWORD su_synth_obj+su_synth.left
mov ecx, 2
%%loop: ; loop over two channels, left & right
@ -97,7 +122,7 @@ SECT_DATA(suconst)
stosd
add _SI,2
loop %%loop
mov [_SP+BUFFER_STACK_LOC], _SI ; save esi back to stack
mov [_SP+su_playerstack.bufferptr], _SI ; save esi back to stack
%endif
%endmacro
@ -115,7 +140,15 @@ EXPORT MANGLE_FUNC(su_render,PTRSIZE) ; Stack: ptr
%ifdef INCLUDE_GMDLS
call su_gmdls_load
%endif
xor eax, eax ; ecx is the current row
xor eax, eax
%ifdef INCLUDE_MULTIVOICE_TRACKS ; TODO: it's nice to keep the curvoices in stack but it's very unnice having to clear them ourselves
sub _SP, trackarray.size; reserve space from stack for the current voiceno
mov _DI, _SP
mov ecx, trackarray.size/4
rep stosd
push VOICETRACK_BITMASK
%endif
push 1 ; randseed
push _AX ; global tick time
su_render_rowloop: ; loop through every row in the song
push _AX ; Stack: row pushad ptr
@ -123,7 +156,16 @@ su_render_rowloop: ; loop through every row in the song
xor eax, eax ; ecx is the current sample within row
su_render_sampleloop: ; loop through every sample in the row
push _AX ; Stack: sample row pushad ptr
%ifdef INCLUDE_POLYPHONY
push POLYPHONY_BITMASK ; does the next voice reuse the current opcodes?
%endif
mov WRK, PTRWORD su_synth_obj ; WRK points to the synth object
mov COM, PTRWORD MANGLE_DATA(su_commands) ; COM points to vm code
mov VAL, PTRWORD MANGLE_DATA(su_params) ; VAL points to unit params
call MANGLE_FUNC(su_run_vm,0) ; run through the VM code
%ifdef INCLUDE_POLYPHONY
pop _AX
%endif
output_sound ; *ptr++ = left, *ptr++ = right
pop _AX ; Stack: row pushad ptr
inc dword [_SP + PTRSIZE] ; increment global time, used by delays
@ -134,7 +176,12 @@ su_render_sampleloop: ; loop through every sample in the row
inc eax
cmp eax, TOTAL_ROWS
jl su_render_rowloop
%ifdef INCLUDE_MULTIVOICE_TRACKS
add _SP, su_playerstack.cleanup - su_playerstack.tick ; rewind the remaining tack
%else
pop _AX
pop _AX
%endif
render_epilogue
;-------------------------------------------------------------------------------
@ -154,7 +201,7 @@ su_update_voices: ; Stack: retaddr row
apply {lea _SI,},MANGLE_DATA(su_tracks),_AX,{} ; esi points to the pattern data for current track
xor eax, eax ; eax is the first voice of next track
xor ebx, ebx ; ebx is the first voice of current track
mov _BP, PTRWORD su_current_voiceno ; ebp points to the current_voiceno array
lea _BP, [_SP + su_playerstack.trackarray] ; ebp points to the current_voiceno array
su_update_voices_trackloop:
movzx eax, byte [_SI] ; eax = current pattern
imul eax, PATTERN_SIZE ; eax = offset to current pattern data
@ -163,7 +210,7 @@ su_update_voices_trackloop:
xor edx, edx ; edx=0
mov ecx, ebx ; ecx=first voice of the track to be done
su_calculate_voices_loop: ; do {
apply bt dword, su_voicetrack_bitmask,{,ecx}; // notice that the incs don't set carry
bt dword [_SP + su_playerstack.trackbits + PTRSIZE],ecx ; test voicetrack_bitmask// notice that the incs don't set carry
inc edx ; edx++ // edx=numvoices
inc ecx ; ecx++ // ecx=the first voice of next track
jc su_calculate_voices_loop ; } while bit ecx-1 of bitmask is on
@ -195,7 +242,8 @@ su_update_voices_nexttrack:
pop _DX ; edx=patrnrow
add _SI, MAX_PATTERNS
add _BP, 4
apply {cmp _BP,},su_current_voiceno,MAX_TRACKS*4,{}
lea _AX, [_SP + su_playerstack.trackarray + MAX_TRACKS*4]
cmp _BP,_AX
jl su_update_voices_trackloop
ret

View File

@ -103,22 +103,20 @@ struc su_stack ; the structure of stack _as the units see it_
.wrk RESPTR 1
.val RESPTR 1
.com RESPTR 1
.synth RESPTR 1
%if DELAY_ID > -1
.delaywrk RESPTR 1
%endif
.retaddrvm RESPTR 1
%ifdef INCLUDE_POLYPHONY
.polyphony RESPTR 1
%endif
.rowtick RESPTR 1 ; which tick within this row are we at
.row RESPTR 1 ; which total row of the song are we at
.tick RESPTR 1 ; which total tick of the song are we at
.randseed RESPTR 1
endstruc
;===============================================================================
; Uninitialized data: The one and only synth object
;===============================================================================
SECT_BSS(susynth)
su_synth_obj resb su_synth.size
;===============================================================================
; The opcode table jump table. This is constructed to only include the opcodes
; that are used so that the jump table is as small as possible.
@ -142,17 +140,11 @@ SECT_DATA(suconst)
c_i128 dd 0.0078125
c_RandDiv dd 65536*32768
c_0_5 dd 0.5
EXPORT MANGLE_DATA(RandSeed)
dd 1
c_24 dd 24
c_i12 dd 0x3DAAAAAA
EXPORT MANGLE_DATA(LFO_NORMALIZE)
dd DEF_LFO_NORMALIZE
%ifdef INCLUDE_POLYPHONY
su_polyphony_bitmask dd POLYPHONY_BITMASK ; does the next voice reuse the current opcodes?
%endif
;-------------------------------------------------------------------------------
; su_run_vm function: runs the entire virtual machine once, creating 1 sample
;-------------------------------------------------------------------------------
@ -165,22 +157,17 @@ su_polyphony_bitmask dd POLYPHONY_BITMASK ; does the next voice reuse th
SECT_TEXT(surunvm)
EXPORT MANGLE_FUNC(su_run_vm,0)
%if DELAY_ID > -1
%if BITS == 64 ; TODO: find a way to do this with a macro
mov _AX,PTRWORD MANGLE_DATA(su_delay_buffer-su_delayline_wrk.filtstate)
%if DELAY_ID > -1
lea _AX, [WRK + su_synth.size - su_delayline_wrk.filtstate]
push _AX ; reset delaywrk to first delayline
%else
push PTRWORD MANGLE_DATA(su_delay_buffer-su_delayline_wrk.filtstate)
%endif
%endif
mov COM, PTRWORD MANGLE_DATA(su_commands) ; COM points to vm code
mov VAL, PTRWORD MANGLE_DATA(su_params) ; VAL points to unit params
mov WRK, PTRWORD su_synth_obj + su_synth.voices ; WRK points to the first voice
push COM ; Stack: COM
push VAL ; Stack: VAL COM
push WRK ; Stack: WRK VAL COM
push WRK ; Stack: synth (delayWRK)
lea WRK, [WRK + su_synth.voices] ; WRK points to the first voice
push COM ; Stack: COM synth
push VAL ; Stack: VAL COM synth
push WRK ; Stack: WRK VAL COM synth
xor ecx, ecx ; voice = 0
push _CX ; Stack: voice WRK VAL COM
push _CX ; Stack: voice WRK VAL COM synth
su_run_vm_loop: ; loop until all voices done
movzx eax, byte [COM] ; eax = command byte
inc COM ; move to next instruction
@ -198,22 +185,6 @@ su_run_vm_loop: ; loop until all voices done
add _SP, su_stack.retaddrvm-PTRSIZE ; Stack cleared
ret
;-------------------------------------------------------------------------------
; FloatRandomNumber function
;-------------------------------------------------------------------------------
; Output: st0 : result
;-------------------------------------------------------------------------------
SECT_TEXT(surandom)
EXPORT MANGLE_FUNC(FloatRandomNumber,0)
push _AX
apply {imul eax,},MANGLE_DATA(RandSeed),{,16007}
apply mov,MANGLE_DATA(RandSeed),{, eax}
apply fild dword,MANGLE_DATA(RandSeed)
apply fidiv dword,c_RandDiv
pop _AX
ret
;-------------------------------------------------------------------------------
; su_transform_values function: transforms values and adds modulations
;-------------------------------------------------------------------------------