diff --git a/CMakeSettings.json b/CMakeSettings.json index 552c223..801a729 100644 --- a/CMakeSettings.json +++ b/CMakeSettings.json @@ -23,6 +23,30 @@ "ctestCommandArgs": "", "inheritEnvironments": [ "msvc_x86" ], "variables": [] + }, + { + "name": "x64-Debug", + "generator": "Ninja", + "configurationType": "Debug", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "-v", + "ctestCommandArgs": "", + "inheritEnvironments": [ "msvc_x64_x64" ], + "variables": [] + }, + { + "name": "x64-Release", + "generator": "Ninja", + "configurationType": "RelWithDebInfo", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "-v", + "ctestCommandArgs": "", + "inheritEnvironments": [ "msvc_x64_x64" ], + "variables": [] } ] } \ No newline at end of file diff --git a/README.md b/README.md index 3c99947..644186c 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,9 @@ New features since fork detuned and added up to together. Great for trance leads (supersaw). Unison of up to 4, or 8 if you make stereo unison oscillator and add up both left and right channels. See [this example](tests/test_oscillat_unison.asm). + - **Supports 32 and 64 bit builds**. The 64-bit version is done with minimal + changes to get it work, mainly for the future prospect of running the MIDI + instrument in 64-bit mode. All the tests are passing so it seems to work. Future goals ------------ @@ -90,7 +93,6 @@ Future goals case the signal entering skip and the signal leaving out are both close to zero. - **Even more opcodes**. Maybe an equalizer? DC-offset removal? - - **Support for 64-bit targets**. - **Browser-based GUI and MIDI instrument**. Modern browsers support WebMIDI, WebAudio and, most importantly, they are cross-platform and come installed on pretty much any computer. The only thing needed is to be able to diff --git a/src/opcodes/arithmetic.asm b/src/opcodes/arithmetic.asm index b266367..3816caa 100644 --- a/src/opcodes/arithmetic.asm +++ b/src/opcodes/arithmetic.asm @@ -70,8 +70,8 @@ EXPORT MANGLE_FUNC(su_op_loadnote,0) call su_op_loadnote_mono su_op_loadnote_mono: %endif - fild dword [ecx+su_unit.size-su_voice.workspace+su_voice.note] - fmul dword [c_i128] + fild dword [_CX+su_unit.size-su_voice.workspace+su_voice.note] + apply fmul dword, c_i128 ret %endif diff --git a/src/opcodes/effects.asm b/src/opcodes/effects.asm index ae0d0ac..db56238 100644 --- a/src/opcodes/effects.asm +++ b/src/opcodes/effects.asm @@ -14,7 +14,7 @@ EXPORT MANGLE_FUNC(su_op_distort,0) call su_effects_stereohelper %define INCLUDE_EFFECTS_STEREOHELPER %endif - fld dword [edx+su_distort_ports.drive] + fld dword [INP+su_distort_ports.drive] %define SU_INCLUDE_WAVESHAPER ; flow into waveshaper %endif @@ -25,7 +25,7 @@ su_waveshaper: call su_clip fxch ; a x' (from now on just called x) fld st0 ; a a x - fsub dword [c_0_5] ; a-.5 a x + apply fsub dword,c_0_5 ; a-.5 a x fadd st0 ; 2*a-1 a x fld st2 ; x 2*a-1 a x fabs ; abs(x) 2*a-1 a x @@ -53,17 +53,17 @@ EXPORT MANGLE_FUNC(su_op_hold,0) call su_effects_stereohelper %define INCLUDE_EFFECTS_STEREOHELPER %endif - fld dword [edx+su_hold_ports.freq] ; f x + fld dword [INP+su_hold_ports.freq] ; f x fmul st0, st0 ; f^2 x fchs ; -f^2 x fadd dword [WRK+su_hold_wrk.phase] ; p-f^2 x fst dword [WRK+su_hold_wrk.phase] ; p <- p-f^2 fldz ; 0 p x fucomip st1 ; p x - fstp dword [esp-4] ; t=p, x + fstp dword [_SP-4] ; t=p, x jc short su_op_hold_holding ; if (0 < p) goto holding fld1 ; 1 x - fadd dword [esp-4] ; 1+t x + fadd dword [_SP-4] ; 1+t x fstp dword [WRK+su_hold_wrk.phase] ; x fst dword [WRK+su_hold_wrk.holdval] ; save holded value ret ; x @@ -85,10 +85,10 @@ EXPORT MANGLE_FUNC(su_op_crush,0) %ifdef INCLUDE_STEREO_CRUSH call su_effects_stereohelper %define INCLUDE_EFFECTS_STEREOHELPER - %endif - fdiv dword [edx+su_crush_ports.resolution] + %endif + fdiv dword [INP+su_crush_ports.resolution] frndint - fmul dword [edx+su_crush_ports.resolution] + fmul dword [INP+su_crush_ports.resolution] ret %endif ; CRUSH_ID > -1 @@ -101,15 +101,15 @@ EXPORT MANGLE_FUNC(su_op_crush,0) SECT_TEXT(sugain) %ifdef INCLUDE_STEREO_GAIN EXPORT MANGLE_FUNC(su_op_gain,0) - fld dword [edx+su_gain_ports.gain] ; g l (r) + fld dword [INP+su_gain_ports.gain] ; g l (r) jnc su_op_gain_mono - fmul st2, st0 ; g l r/g + fmul st2, st0 ; g l r/g su_op_gain_mono: fmulp st1, st0 ; l/g (r/) ret %else EXPORT MANGLE_FUNC(su_op_gain,0) - fmul dword [edx+su_gain_ports.gain] + fmul dword [INP+su_gain_ports.gain] ret %endif %endif ; GAIN_ID > -1 @@ -122,7 +122,7 @@ SECT_TEXT(sugain) SECT_TEXT(suingain) %ifdef INCLUDE_STEREO_INVGAIN EXPORT MANGLE_FUNC(su_op_invgain,0) - fld dword [edx+su_invgain_ports.invgain] ; g l (r) + fld dword [INP+su_invgain_ports.invgain] ; g l (r) jnc su_op_invgain_mono fdiv st2, st0 ; g l r/g su_op_invgain_mono: @@ -130,7 +130,7 @@ SECT_TEXT(suingain) ret %else EXPORT MANGLE_FUNC(su_op_invgain,0) - fdiv dword [edx+su_invgain_ports.invgain] + fdiv dword [INP+su_invgain_ports.invgain] ret %endif %endif ; INVGAIN_ID > -1 @@ -150,14 +150,14 @@ SECT_TEXT(sufilter) EXPORT MANGLE_FUNC(su_op_filter,0) lodsb ; load the flags to al - %ifdef INCLUDE_STEREO_FILTER + %ifdef INCLUDE_STEREO_FILTER call su_effects_stereohelper %define INCLUDE_EFFECTS_STEREOHELPER %endif - fld dword [edx+su_filter_ports.res] ; r x - fld dword [edx+su_filter_ports.freq]; f r x + fld dword [INP+su_filter_ports.res] ; r x + fld dword [INP+su_filter_ports.freq]; f r x fmul st0, st0 ; f2 x (square the input so we never get negative and also have a smoother behaviour in the lower frequencies) - fst dword [esp-4] ; f2 r x + fst dword [_SP-4] ; f2 r x fmul dword [WRK+su_filter_wrk.band] ; f2*b r x fadd dword [WRK+su_filter_wrk.low] ; f2*b+l r x fst dword [WRK+su_filter_wrk.low] ; l'=f2*b+l r x @@ -165,7 +165,7 @@ EXPORT MANGLE_FUNC(su_op_filter,0) fmul dword [WRK+su_filter_wrk.band] ; r*b x-l' fsubp st1, st0 ; x-l'-r*b fst dword [WRK+su_filter_wrk.high] ; h'=x-l'-r*b - fmul dword [esp-4] ; f2*h' + fmul dword [_SP-4] ; f2*h' fadd dword [WRK+su_filter_wrk.band] ; f2*h'+b fstp dword [WRK+su_filter_wrk.band] ; b'=f2*h'+b fldz ; 0 @@ -212,9 +212,9 @@ SECT_TEXT(suclip) %if CLIP_ID > -1 EXPORT MANGLE_FUNC(su_op_clip,0) - %ifdef INCLUDE_STEREO_CLIP + %ifdef INCLUDE_STEREO_CLIP call su_effects_stereohelper - %define INCLUDE_EFFECTS_STEREOHELPER + %define INCLUDE_EFFECTS_STEREOHELPER %endif %define SU_INCLUDE_CLIP ; flow into su_doclip @@ -255,7 +255,7 @@ EXPORT MANGLE_FUNC(su_op_pan,0) jc su_op_pan_do ; this time, if this is mono op... fld st0 ; ...we duplicate the mono into stereo first su_op_pan_do: - fld dword [edx+su_pan_ports.panning] ; p l r + fld dword [INP+su_pan_ports.panning] ; p l r fld1 ; 1 p l r fsub st1 ; 1-p p l r fmulp st2 ; p (1-p)*l r @@ -265,7 +265,7 @@ su_op_pan_do: %else ; ifndef INCLUDE_STEREO_PAN EXPORT MANGLE_FUNC(su_op_pan,0) - fld dword [edx+su_pan_ports.panning] ; p s + fld dword [INP+su_pan_ports.panning] ; p s fmul st1 ; p*s s fsub st1, st0 ; p*s s-p*s ; Equal to @@ -288,7 +288,7 @@ su_effects_stereohelper: jnc su_effects_stereohelper_mono ; carry is still the stereo bit add WRK, 16 fxch ; r l - call dword [esp] ; call whoever called me... + call [_SP] ; call whoever called me... fxch ; l r sub WRK, 16 ; move WRK back to where it was su_effects_stereohelper_mono: @@ -324,68 +324,68 @@ EXPORT MANGLE_FUNC(su_op_delay,0) add edi, eax ; the second delay is done with the delay time index added by count su_op_delay_mono: %endif - pushad + push_registers _AX, _CX, _BX, WRK, _SI, _DI mov ebx, edi; ugly register juggling, refactor %ifdef DELAY_NOTE_SYNC test ebx, ebx ; note s jne su_op_delay_skipnotesync fld1 - fild dword [ecx+su_unit.size-su_voice.workspace+su_voice.note] - fmul dword [c_i12] + fild dword [_CX+su_unit.size-su_voice.workspace+su_voice.note] + apply fmul dword, c_i12 call MANGLE_FUNC(su_power,0) - fmul dword [c_freq_normalize] ; // normalize + apply fmul dword, c_freq_normalize ; // normalize fdivp st1, st0 ; // invert to get numer of samples - fistp word [MANGLE_DATA(su_delay_times)] ; store current comb size + apply fistp word, MANGLE_DATA(su_delay_times) ; store current comb size su_op_delay_skipnotesync: %endif kmDLL_func_process: mov ecx, eax ;// ecx is the number of parallel delays - mov WRK, dword [MANGLE_DATA(su_delay_buffer_ofs)] ;// ebp is current delay + apply {mov WRK, PTRWORD},MANGLE_DATA(su_delay_buffer_ofs) ;// ebp is current delay fld st0 ; x x - fmul dword [edx+su_delay_ports.dry] ; dr*x x + fmul dword [INP+su_delay_ports.dry] ; dr*x x fxch ; x dr*x - fmul dword [edx+su_delay_ports.pregain] ; p*x dr*x - fmul dword [edx+su_delay_ports.pregain] ; p^2*x dr*x + fmul dword [INP+su_delay_ports.pregain] ; p*x dr*x + fmul dword [INP+su_delay_ports.pregain] ; p^2*x dr*x kmDLL_func_loop: mov edi, dword [WRK + su_delayline_wrk.time] inc edi and edi, MAX_DELAY-1 mov dword [WRK + su_delayline_wrk.time],edi - movzx esi, word [MANGLE_DATA(su_delay_times)+ebx*2] ; esi = comb size from the delay times table + apply {movzx esi, word},MANGLE_DATA(su_delay_times),_BX*2,{} ; esi = comb size from the delay times table mov eax, edi sub eax, esi and eax, MAX_DELAY-1 - fld dword [WRK+eax*4+su_delayline_wrk.buffer] ; s p^2*x dr*x, where s is the sample from delay buffer + apply fld dword, su_delayline_wrk.buffer, WRK, _AX*4,{} ; s p^2*x dr*x, where s is the sample from delay buffer ;// add comb output to current output fadd st2, st0 ; s p^2*x dr*x+s fld1 ; 1 s p^2*x dr*x+s - fsub dword [edx+su_delay_ports.damp] ; 1-da s p^2*x dr*x+s + fsub dword [INP+su_delay_ports.damp] ; 1-da s p^2*x dr*x+s fmulp st1, st0 ; s*(1-da) p^2*x dr*x+s - fld dword [edx+su_delay_ports.damp] ; da s*(1-da) p^2*x dr*x+s + fld dword [INP+su_delay_ports.damp] ; da s*(1-da) p^2*x dr*x+s fmul dword [WRK+su_delayline_wrk.filtstate] ; o*da s*(1-da) p^2*x dr*x+s, where o is stored faddp st1, st0 ; o*da+s*(1-da) p^2*x dr*x+s fst dword [WRK+su_delayline_wrk.filtstate] ; o'=o*da+s*(1-da), o' p^2*x dr*x+s - fmul dword [edx+su_delay_ports.feedback] ; f*o' p^2*x dr*x+s + fmul dword [INP+su_delay_ports.feedback] ; f*o' p^2*x dr*x+s fadd st0, st1 ; f*o'+p^2*x p^2*x dr*x+s - fstp dword [WRK+edi*4+su_delayline_wrk.buffer]; save f*o'+p^2*x to delay buffer + fstp dword [WRK+_DI*4+su_delayline_wrk.buffer]; save f*o'+p^2*x to delay buffer inc ebx ;// go to next delay lenkmh index add WRK, su_delayline_wrk.size ;// go to next delay - mov dword [MANGLE_DATA(su_delay_buffer_ofs)], WRK ;// store next delay offset + apply mov PTRWORD, MANGLE_DATA(su_delay_buffer_ofs),{, WRK} ;// store next delay offset loopne kmDLL_func_loop fstp st0 ; dr*x+s1+s2+s3+... ; DC-filtering sub WRK, su_delayline_wrk.size ; the reason to use the last su_delayline_wrk instead of su_delay_wrk is that su_delay_wrk is wiped by retriggering fld dword [WRK+su_delayline_wrk.dcout] ; o s - fmul dword [c_dc_const] ; c*o s + apply fmul dword, c_dc_const ; c*o s fsub dword [WRK+su_delayline_wrk.dcin] ; c*o-i s fxch ; s c*o-i fst dword [WRK+su_delayline_wrk.dcin] ; i'=s, s c*o-i faddp st1 ; s+c*o-i - fadd dword [c_0_5] ;// add and sub small offset to prevent denormalization - fsub dword [c_0_5] + apply fadd dword, c_0_5 ;// add and sub small offset to prevent denormalization + apply fsub dword, c_0_5 fst dword [WRK+su_delayline_wrk.dcout] ; o'=s+c*o-i - popad + pop_registers _AX, _CX, _BX, WRK, _SI, _DI ret ;------------------------------------------------------------------------------- @@ -394,7 +394,8 @@ kmDLL_func_loop: SECT_BSS(sudelbuf) EXPORT MANGLE_DATA(su_delay_buffer_ofs) - resd 1 + RESPTR 1 + EXPORT MANGLE_DATA(su_delay_buffer) resb NUM_DELAY_LINES*su_delayline_wrk.size @@ -443,7 +444,7 @@ su_op_compressor_releasing: fmulp st2, st0 ; l c*(x^2-l) x faddp st1, st0 ; l+c*(x^2-l) x fst dword [WRK+su_compres_wrk.level] ; l'=l+c*(x^2-l), l' x - fld dword [edx+su_compres_ports.threshold] ; t l' x + fld dword [INP+su_compres_ports.threshold] ; t l' x fmul st0, st0 ; t*t fucomi st0, st1 ; if threshold < l' jb su_op_compressor_compress ; then we actually do compression @@ -453,8 +454,8 @@ su_op_compressor_releasing: ret ; return unity gain when we are below threshold su_op_compressor_compress: ; l' x fdivrp st1, st0 ; t*t/l' x - fld dword [edx+su_compres_ports.ratio] ; r t*t/l' x - fmul dword [c_0_5] ; p=r/2 t*t/l' x + fld dword [INP+su_compres_ports.ratio] ; r t*t/l' x + apply fmul dword, c_0_5 ; p=r/2 t*t/l' x fxch ; t*t/l' p x fyl2x ; p*log2(t*t/l') x jmp MANGLE_FUNC(su_power,0) ; 2^(p*log2(t*t/l')) x diff --git a/src/opcodes/flowcontrol.asm b/src/opcodes/flowcontrol.asm index c3c301e..d0ec213 100644 --- a/src/opcodes/flowcontrol.asm +++ b/src/opcodes/flowcontrol.asm @@ -21,31 +21,29 @@ SECT_TEXT(suopadvn) %ifdef INCLUDE_POLYPHONY EXPORT MANGLE_FUNC(su_op_advance,0) ; Stack: addr voice wrkptr valptr comptr - mov WRK, dword [esp+8] ; WRK = wrkptr + mov WRK, [_SP+PTRSIZE*2] ; WRK = wrkptr add WRK, su_voice.size ; move to next voice - mov dword [esp+8], WRK ; update stack - mov ecx, dword [esp+4] ; ecx = voice - bt dword [su_polyphony_bitmask],ecx ; if voice bit of su_polyphonism not set + mov [_SP+PTRSIZE*2], WRK ; update stack + mov ecx, [_SP+PTRSIZE] ; ecx = voice + apply bt dword,su_polyphony_bitmask,{,ecx} ; if voice bit of su_polyphonism not set jnc su_op_advance_next_instrument ; goto next_instrument - mov VAL, dword [esp+12] ; rollback to where we were earlier - mov COM, dword [esp+16] + mov VAL, PTRWORD [_SP+PTRSIZE*3] ; rollback to where we were earlier + mov COM, PTRWORD [_SP+PTRSIZE*4] jmp short su_op_advance_finish su_op_advance_next_instrument: - mov dword [esp+12], VAL ; save current VAL as a checkpoint - mov dword [esp+16], COM ; save current COM as a checkpoint + mov PTRWORD [_SP+PTRSIZE*3], VAL ; save current VAL as a checkpoint + mov PTRWORD [_SP+PTRSIZE*4], COM ; save current COM as a checkpoint su_op_advance_finish: - inc dword [esp+4] + inc PTRWORD [_SP+PTRSIZE] ret %else - -EXPORT MANGLE_FUNC(su_op_advance,0) ; Stack: addr voice wrkptr valptr comptr - mov WRK, dword [esp+8] ; WRK = wrkptr - add WRK, su_voice.size ; move to next voice - mov dword [esp+8], WRK ; update stack - inc dword [esp+4] ; voice++ - ret - + EXPORT MANGLE_FUNC(su_op_advance,0) ; Stack: addr voice wrkptr valptr comptr + mov WRK, PTRWORD [_SP+PTRSIZE*2] ; WRK = wrkptr + add WRK, su_voice.size ; move to next voice + mov PTRWORD [_SP+PTRSIZE*2], WRK ; update stack + inc PTRWORD [_SP+PTRSIZE] ; voice++ + ret %endif ;------------------------------------------------------------------------------- @@ -56,18 +54,18 @@ EXPORT MANGLE_FUNC(su_op_advance,0) ; Stack: addr voice wrkptr valptr comptr SECT_TEXT(suspeed) EXPORT MANGLE_FUNC(su_op_speed,0) - fsub dword [c_0_5] ; s-.5 + apply fsub dword, c_0_5 ; s-.5 fadd st0, st0 ; 2*s-1 - fmul dword [c_bpmscale] ; (2*s-1)*64/24, let's call this p from now on + apply fmul dword, c_bpmscale ; (2*s-1)*64/24, let's call this p from now on call MANGLE_FUNC(su_power,0) ; 2^p, this is how many ticks we should be taking fld1 ; 1 2^p fsubp st1, st0 ; 2^p-1, the player is advancing 1 tick by its own fadd dword [WRK+su_speed_wrk.remainder] ; t+2^p-1, t is the remainder from previous rounds as ticks have to be rounded to 1 - push eax - fist dword [esp] ; Main stack: k=int(t+2^p-1) - fisub dword [esp] ; t+2^p-1-k, the remainder - pop eax - add dword [esp+24], eax ; add the whole ticks to song tick count, [esp+24] is the current tick in the row + push _AX + fist dword [_SP] ; Main stack: k=int(t+2^p-1) + fisub dword [_SP] ; t+2^p-1-k, the remainder + pop _AX + add dword [_SP+6*PTRSIZE], eax ; add the whole ticks to song tick count, [esp+24] is the current tick in the row fstp dword [WRK+su_speed_wrk.remainder] ; save the remainder for future ret diff --git a/src/opcodes/sinks.asm b/src/opcodes/sinks.asm index 1585d91..cc40606 100644 --- a/src/opcodes/sinks.asm +++ b/src/opcodes/sinks.asm @@ -6,16 +6,16 @@ SECT_TEXT(suopout) EXPORT MANGLE_FUNC(su_op_out,0) ; l r - mov eax, su_synth_obj + su_synth.left -%ifdef INCLUDE_STEREO_OUT - jnc su_op_out_mono - call su_op_out_mono - add eax, 4 -su_op_out_mono: -%endif - fmul dword [edx+su_out_ports.gain] ; g*l - fadd dword [eax] ; g*l+o - fstp dword [eax] ; o'=g*l+o + mov _AX, PTRWORD su_synth_obj + su_synth.left + %ifdef INCLUDE_STEREO_OUT + jnc su_op_out_mono + call su_op_out_mono + add _AX, 4 + su_op_out_mono: + %endif + fmul dword [INP+su_out_ports.gain] ; g*l + fadd dword [_AX] ; g*l+o + fstp dword [_AX] ; o'=g*l+o ret %endif ; SU_OUT_ID > -1 @@ -38,33 +38,33 @@ EXPORT MANGLE_FUNC(su_op_send,0) lodsw %ifdef INCLUDE_STEREO_SEND jnc su_op_send_mono - mov edi, eax - inc eax ; send the right channel first + mov _DI, _AX + inc _AX ; send the right channel first fxch ; r l call su_op_send_mono ; (r) l - mov eax, edi ; move back to original address - test eax, SEND_POP ; if r was not popped and is still in the stack + mov _AX, _DI ; move back to original address + test _AX, SEND_POP ; if r was not popped and is still in the stack jnz su_op_send_mono fxch ; swap them back: l r su_op_send_mono: %endif %ifdef INCLUDE_GLOBAL_SEND - test eax, SEND_GLOBAL + test _AX, SEND_GLOBAL jz su_op_send_skipglobal - mov ecx, su_synth_obj - su_unit.size + mov _CX, PTRWORD su_synth_obj - su_unit.size su_op_send_skipglobal: %endif - test eax, SEND_POP ; if the SEND_POP bit is not set + test _AX, SEND_POP ; if the SEND_POP bit is not set jnz su_op_send_skippush fld st0 ; duplicate the signal on stack: s s su_op_send_skippush: ; there is signal s, but maybe also another: s (s) - fld dword [edx+su_send_ports.amount] ; a l (l) - fsub dword [c_0_5] ; a-.5 l (l) + fld dword [INP+su_send_ports.amount] ; a l (l) + apply fsub dword, c_0_5 ; a-.5 l (l) fadd st0 ; g=2*a-1 l (l) - and eax, 0x0000ffff - SEND_POP - SEND_GLOBAL ; eax = send address + and _AX, 0x0000ffff - SEND_POP - SEND_GLOBAL ; eax = send address fmulp st1, st0 ; g*l (l) - fadd dword [ecx+su_unit.size+eax*4] ; g*l+L (l),where L is the current value - fstp dword [ecx+su_unit.size+eax*4] ; (l) + fadd dword [_CX+su_unit.size+_AX*4] ; g*l+L (l),where L is the current value + fstp dword [_CX+su_unit.size+_AX*4] ; (l) ret %endif ; SU_USE_SEND > -1 diff --git a/src/opcodes/sources.asm b/src/opcodes/sources.asm index ffca345..4294a89 100644 --- a/src/opcodes/sources.asm +++ b/src/opcodes/sources.asm @@ -22,7 +22,7 @@ EXPORT MANGLE_FUNC(su_op_envelope,0) su_op_envelope_mono: %endif kmENV_func_do: - mov eax, dword [ecx+su_unit.size-su_voice.workspace+su_voice.release] ; eax = su_instrument.release + mov eax, dword [_CX+su_unit.size-su_voice.workspace+su_voice.release] ; eax = su_instrument.release test eax, eax ; if (eax == 0) je kmENV_func_process ; goto process mov dword [WRK+su_env_work.state], ENV_STATE_RELEASE ; [state]=RELEASE @@ -45,7 +45,7 @@ kmENV_func_decay: jne short kmENV_func_release ; goto release call su_env_map ; d x, where d=decay fsubp st1, st0 ; x-d - fld dword [edx+su_env_ports.sustain] ; s x-d, where s=sustain + fld dword [INP+su_env_ports.sustain] ; s x-d, where s=sustain fucomi st1 ; if (x-d>s) // is decay complete? fcmovb st0, st1 ; x-d x-d jnc short kmENV_func_statechange ; else goto statechange @@ -64,7 +64,7 @@ kmENV_func_leave: fstp st1 ; x', where x' is the new value fst dword [WRK+su_env_work.level] ; [level]=x' kmENV_func_leave2: - fmul dword [edx+su_env_ports.gain] ; [gain]*x' + fmul dword [INP+su_env_ports.gain] ; [gain]*x' ret %endif ; SU_USE_ENVELOPE @@ -83,9 +83,9 @@ EXPORT MANGLE_FUNC(su_op_noise,0) su_op_noise_mono: %endif call MANGLE_FUNC(FloatRandomNumber,0) - fld dword [edx+su_noise_ports.shape] + fld dword [INP+su_noise_ports.shape] call su_waveshaper - fld dword [edx+su_noise_ports.gain] + fld dword [INP+su_noise_ports.gain] fmulp st1, st0 ret @@ -102,8 +102,8 @@ SECT_TEXT(suoscill) EXPORT MANGLE_FUNC(su_op_oscillat,0) lodsb ; load the flags - fld dword [edx+su_osc_ports.detune] ; e, where e is the detune [0,1] - fsub dword [c_0_5] ; e-.5 + fld dword [INP+su_osc_ports.detune] ; e, where e is the detune [0,1] + apply fsub dword,c_0_5 ; e-.5 fadd st0, st0 ; d=2*e-.5, where d is the detune [-1,1] %ifdef INCLUDE_STEREO_OSCILLAT jnc su_op_oscillat_mono @@ -115,49 +115,49 @@ EXPORT MANGLE_FUNC(su_op_oscillat,0) su_op_oscillat_mono: %endif %ifdef INCLUDE_UNISONS - pushad ; push eax, WRK, WRK would suffice but this is shorter + push_registers _AX, WRK, _AX fldz ; 0 d fxch ; d a=0, "accumulated signal" su_op_oscillat_unison_loop: - fst dword [esp] ; save the current detune, d. We could keep it in fpu stack but it was getting big. + fst dword [_SP] ; save the current detune, d. We could keep it in fpu stack but it was getting big. call su_op_oscillat_single ; s a faddp st1, st0 ; a+=s test al, UNISON4 - je su_op_oscillat_unison_out + je su_op_oscillat_unison_out add WRK, 8 - fld dword [edx+su_osc_ports.phaseofs] ; p s - fadd dword [c_i12] ; p s, add some little phase offset to unison oscillators so they don't start in sync - fstp dword [edx+su_osc_ports.phaseofs] ; s note that this changes the phase for second, possible stereo run. That's probably ok - fld dword [esp] ; d s - fmul dword [c_0_5] ; .5*d s // negate and halve the detune of each oscillator + fld dword [INP+su_osc_ports.phaseofs] ; p s + apply fadd dword, c_i12 ; p s, add some little phase offset to unison oscillators so they don't start in sync + fstp dword [INP+su_osc_ports.phaseofs] ; s note that this changes the phase for second, possible stereo run. That's probably ok + fld dword [_SP] ; d s + apply fmul dword, c_0_5 ; .5*d s // negate and halve the detune of each oscillator fchs ; -.5*d s // negate and halve the detune of each oscillator dec eax jmp short su_op_oscillat_unison_loop su_op_oscillat_unison_out: - popad ; similarly, pop WRK, WRK, eax would suffice + pop_registers _AX, WRK, _AX ret su_op_oscillat_single: %endif - fld dword [edx+su_osc_ports.transpose] - fsub dword [c_0_5] - fdiv dword [c_i128] + fld dword [INP+su_osc_ports.transpose] + apply fsub dword,c_0_5 + apply fdiv dword,c_i128 faddp st1 test al, byte LFO jnz su_op_oscillat_skipnote - fiadd dword [ecx+su_unit.size-su_voice.workspace+su_voice.note] ; // st0 is note, st1 is t+d offset + fiadd dword [_CX+su_unit.size-su_voice.workspace+su_voice.note] ; // st0 is note, st1 is t+d offset su_op_oscillat_skipnote: - fmul dword [c_i12] + apply fmul dword,c_i12 call MANGLE_FUNC(su_power,0) test al, byte LFO jz short su_op_oscillat_normalize_note - fmul dword [c_lfo_normalize] ; // st0 is now frequency for lfo + apply fmul dword,c_lfo_normalize ; // st0 is now frequency for lfo jmp short su_op_oscillat_normalized su_op_oscillat_normalize_note: - fmul dword [c_freq_normalize] ; // st0 is now frequency + apply fmul dword,c_freq_normalize ; // st0 is now frequency su_op_oscillat_normalized: fadd dword [WRK+su_osc_wrk.phase] fst dword [WRK+su_osc_wrk.phase] - fadd dword [edx+su_osc_ports.phaseofs] + fadd dword [INP+su_osc_ports.phaseofs] %ifdef INCLUDE_SAMPLES test al, byte SAMPLE jz short su_op_oscillat_not_sample @@ -170,7 +170,7 @@ su_op_oscillat_not_sample: fxch fprem fstp st1 - fld dword [edx+su_osc_ports.color] ; // c p + fld dword [INP+su_osc_ports.color] ; // c p ; every oscillator test included if needed %ifdef INCLUDE_SINE test al, byte SINE @@ -199,10 +199,10 @@ su_op_oscillat_not_gate: %endif su_op_oscillat_shaping: ; finally, shape the oscillator and apply gain - fld dword [edx+su_osc_ports.shape] + fld dword [INP+su_osc_ports.shape] call su_waveshaper su_op_oscillat_gain: - fld dword [edx+su_osc_ports.gain] + fld dword [INP+su_osc_ports.gain] fmulp st1, st0 ret %define SU_INCLUDE_WAVESHAPER @@ -281,12 +281,12 @@ SECT_TEXT(sugate) su_oscillat_gate: fxch ; p c fstp st1 ; p - fmul dword [c_16] ; 16*p - push eax - push eax - fistp dword [esp] ; s=int(16*p), stack empty + apply fmul dword, c_16 ; 16*p + push _AX + push _AX + fistp dword [_SP] ; s=int(16*p), stack empty fld1 ; 1 - pop eax + pop _AX and al, 0xf ; ax=int(16*p) & 15, stack: 1 bt word [VAL-4],ax ; if bit ax of the gate word is set jc go4kVCO_gate_bit ; goto gate_bit @@ -294,10 +294,10 @@ su_oscillat_gate: go4kVCO_gate_bit: ; stack: 0/1, let's call it x fld dword [WRK+su_osc_wrk.gatestate] ; g x, g is gatestate, x is the input to this filter 0/1 fsub st1 ; g-x x - fmul dword [c_dc_const] ; c(g-x) x + apply fmul dword,c_dc_const ; c(g-x) x faddp st1, st0 ; x+c(g-x) fst dword [WRK+su_osc_wrk.gatestate] ; g'=x+c(g-x) - pop eax ; Another way to see this (c~0.996) + pop _AX ; Another way to see this (c~0.996) ret ; g'=cg+(1-c)x ; This is a low-pass to smooth the gate transitions @@ -321,26 +321,26 @@ SECT_DATA(suconst) SECT_TEXT(suoscsam) su_oscillat_sample: ; p - pushad ; edx must be saved, eax & ecx if this is stereo osc - push edx + push_registers _AX,_DX,_CX,_BX ; edx must be saved, eax & ecx if this is stereo osc + push _AX mov al, byte [VAL-4] ; reuse "color" as the sample number - lea edi, [MANGLE_DATA(su_sample_offsets) + eax*8] ; edi points now to the sample table entry - fmul dword [c_samplefreq_scaling] ; p*r - fistp dword [esp] - pop edx ; edx is now the sample number - movzx ebx, word [edi + su_sample_offset.loopstart] ; ecx = loopstart + apply {lea _DI,}, MANGLE_DATA(su_sample_offsets), _AX*8,{} ; edi points now to the sample table entry + apply fmul dword, c_samplefreq_scaling ; p*r + fistp dword [_SP] + pop _DX ; edx is now the sample number + movzx ebx, word [_DI + su_sample_offset.loopstart] ; ecx = loopstart sub edx, ebx ; if sample number < loop start jl su_oscillat_sample_not_looping ; then we're not looping yet mov eax, edx ; eax = sample number - movzx ecx, word [edi + su_sample_offset.looplength] ; edi is now the loop length + movzx ecx, word [_DI + su_sample_offset.looplength] ; edi is now the loop length xor edx, edx ; div wants edx to be empty div ecx ; edx is now the remainder su_oscillat_sample_not_looping: add edx, ebx ; sampleno += loopstart - add edx, dword [edi + su_sample_offset.start] - fild word [MANGLE_DATA(su_sample_table) + edx*2] - fdiv dword [c_32767] - popad + add edx, dword [_DI + su_sample_offset.start] + apply fild word, MANGLE_DATA(su_sample_table), _DX*2,{} + apply fdiv dword, c_32767 + pop_registers _AX,_DX,_CX,_BX ret SECT_DATA(suconst) @@ -369,8 +369,8 @@ EXPORT MANGLE_FUNC(su_op_loadval,0) call su_op_loadval_mono su_op_loadval_mono: %endif - fld dword [edx+su_load_val_ports.value] ; v - fsub dword [c_0_5] ; v-.5 + fld dword [INP+su_load_val_ports.value] ; v + apply fsub dword, c_0_5 fadd st0 ; 2*v-1 ret @@ -388,18 +388,18 @@ su_op_loadval_mono: SECT_TEXT(sureceiv) EXPORT MANGLE_FUNC(su_op_receive,0) - lea ecx, dword [WRK+su_unit.ports] + lea _CX, [WRK+su_unit.ports] %ifdef INCLUDE_STEREO_RECEIVE jnc su_op_receive_mono xor eax,eax - fld dword [ecx+su_receive_ports.right] - mov dword [ecx+su_receive_ports.right],eax + fld dword [_CX+su_receive_ports.right] + mov dword [_CX+su_receive_ports.right],eax su_op_receive_mono: %else xor eax,eax %endif - fld dword [ecx+su_receive_ports.left] - mov dword [ecx+su_receive_ports.left],eax + fld dword [_CX+su_receive_ports.left] + mov dword [_CX+su_receive_ports.left],eax ret %endif ; RECEIVE_ID > -1 diff --git a/src/player.asm b/src/player.asm index 2cf3f2c..ece24c6 100644 --- a/src/player.asm +++ b/src/player.asm @@ -1,3 +1,26 @@ +%if BITS == 32 + %define BUFFER_STACK_LOC 44 + %define render_prologue pushad ; stdcall & everything nonvolatile except eax, ecx, edx + %macro render_epilogue 0 + popad + ret 4 ; clean the passed parameter from stack. + %endmacro +%elifidn __OUTPUT_FORMAT__,win64 + %define BUFFER_STACK_LOC 48 + %define render_prologue push_registers rcx,rdi,rsi,rbx,rbp ; rcx = ptr to buf. rdi,rsi,rbx,rbp nonvolatile + %macro render_epilogue 0 + pop_registers rcx,rdi,rsi,rbx,rbp + ret + %endmacro +%else ; 64 bit mac & linux + %define BUFFER_STACK_LOC 48 + %define render_prologue push_registers rdi,rbx,rbp ; rdi = ptr to buf. rbx & rbp nonvolatile + %macro render_epilogue 0 + pop_registers rdi,rbx,rbp + ret + %endmacro +%endif + ;------------------------------------------------------------------------------- ; Uninitialized data ;------------------------------------------------------------------------------- @@ -35,46 +58,46 @@ SECT_DATA(suconst) %macro output_sound 0 %ifndef SU_USE_16BIT_OUTPUT %ifndef SU_CLIP_OUTPUT ; The modern way. No need to clip; OS can do it. - mov edi, dword [esp+44] ; edi containts ptr - mov esi, su_synth_obj+su_synth.left + mov _DI, [_SP+BUFFER_STACK_LOC] ; edi containts ptr + mov _SI, PTRWORD su_synth_obj + su_synth.left movsd ; copy left channel to output buffer movsd ; copy right channel to output buffer - mov dword [esp+44], edi ; save back the updated ptr - lea edi, [esi-8] - xor eax,eax + mov [_SP+BUFFER_STACK_LOC], _DI ; save back the updated ptr + lea _DI, [_SI-8] + xor eax, eax stosd ; clear left channel so the VM is ready to write them again stosd ; clear right channel so the VM is ready to write them again %else - mov esi, dword [esp+44] ; esi points to the output buffer - xor ecx,ecx + mov _SI, qword [_SP+BUFFER_STACK_LOC] ; esi points to the output buffer + xor _CX,_CX xor eax,eax %%loop: ; loop over two channels, left & right - fld dword [su_synth_obj+su_synth.left+ecx*4] + apply fld dword,su_synth_obj+su_synth.left,_CX*4,{} call su_clip - fstp dword [esi] - mov dword [su_synth_obj+su_synth.left+ecx*4],eax ; clear the sample so the VM is ready to write it - add esi,4 + fstp dword [_SI] + apply mov dword,su_synth_obj+su_synth.left,_CX*4,{,eax} ; clear the sample so the VM is ready to write it + add _SI,4 cmp ecx,2 jl %%loop - mov dword [esp+44], esi ; save esi back to stack + mov dword [_SP+BUFFER_STACK_LOC], _SI ; save esi back to stack %endif %else ; 16-bit output, always clipped. This is a bit legacy method. - mov esi, dword [esp+44] ; esi points to the output buffer - mov edi, su_synth_obj+su_synth.left + mov _SI, [_SP+BUFFER_STACK_LOC] ; esi points to the output buffer + mov _DI, PTRWORD su_synth_obj+su_synth.left mov ecx, 2 %%loop: ; loop over two channels, left & right - fld dword [edi] + fld dword [_DI] call su_clip - fmul dword [c_32767] - push eax - fistp dword [esp] - pop eax - mov word [esi],ax ; // store integer converted right sample + apply fmul dword, c_32767 + push _AX + fistp dword [_SP] + pop _AX + mov word [_SI],ax ; // store integer converted right sample xor eax,eax stosd - add esi,2 + add _SI,2 loop %%loop - mov dword [esp+44], esi ; save esi back to stack + mov [_SP+BUFFER_STACK_LOC], _SI ; save esi back to stack %endif %endmacro @@ -87,30 +110,29 @@ SECT_DATA(suconst) ;------------------------------------------------------------------------------- SECT_TEXT(surender) -EXPORT MANGLE_FUNC(su_render,4) ; Stack: ptr - pushad ; Stack: pushad ptr +EXPORT MANGLE_FUNC(su_render,PTRSIZE) ; Stack: ptr + render_prologue %ifdef INCLUDE_GMDLS call su_gmdls_load %endif xor eax, eax ; ecx is the current row su_render_rowloop: ; loop through every row in the song - push eax ; Stack: row pushad ptr + push _AX ; Stack: row pushad ptr call su_update_voices ; update instruments for the new row xor eax, eax ; ecx is the current sample within row su_render_sampleloop: ; loop through every sample in the row - push eax ; Stack: sample row pushad ptr + push _AX ; Stack: sample row pushad ptr call MANGLE_FUNC(su_run_vm,0) ; run through the VM code output_sound ; *ptr++ = left, *ptr++ = right - pop eax ; Stack: row pushad ptr + pop _AX ; Stack: row pushad ptr inc eax cmp eax, SAMPLES_PER_ROW jl su_render_sampleloop - pop eax ; Stack: pushad ptr + pop _AX ; Stack: pushad ptr inc eax cmp eax, TOTAL_ROWS jl su_render_rowloop - popad ; Stack: ptr - ret 4 ; Stack emptied by ret + render_epilogue ;------------------------------------------------------------------------------- ; su_update_voices function: polyphonic & chord implementation @@ -126,30 +148,30 @@ su_update_voices: ; Stack: retaddr row xor edx, edx mov ebx, PATTERN_SIZE ; we could do xor ebx,ebx; mov bl,PATTERN_SIZE, but that would limit patternsize to 256... div ebx ; eax = current pattern, edx = current row in pattern - lea esi, [MANGLE_DATA(su_tracks)+eax] ; esi points to the pattern data for current track + apply {lea _SI,},MANGLE_DATA(su_tracks),_AX,{} ; esi points to the pattern data for current track xor eax, eax ; eax is the first voice of next track xor ebx, ebx ; ebx is the first voice of current track - mov ebp, su_current_voiceno ; ebp points to the current_voiceno array + mov _BP, PTRWORD su_current_voiceno ; ebp points to the current_voiceno array su_update_voices_trackloop: - movzx eax, byte [esi] ; eax = current pattern + movzx eax, byte [_SI] ; eax = current pattern imul eax, PATTERN_SIZE ; eax = offset to current pattern data - movzx eax, byte [MANGLE_DATA(su_patterns)+eax+edx] ; eax = note - push edx ; Stack: ptrnrow + apply {movzx eax,byte},MANGLE_DATA(su_patterns),_AX,_DX,{} ; eax = note + push _DX ; Stack: ptrnrow xor edx, edx ; edx=0 mov ecx, ebx ; ecx=first voice of the track to be done su_calculate_voices_loop: ; do { - bt dword [su_voicetrack_bitmask],ecx ; // notice that the incs don't set carry + apply bt dword, su_voicetrack_bitmask,{,ecx}; // notice that the incs don't set carry inc edx ; edx++ // edx=numvoices inc ecx ; ecx++ // ecx=the first voice of next track jc su_calculate_voices_loop ; } while bit ecx-1 of bitmask is on - push ecx ; Stack: next_instr ptrnrow + push _CX ; Stack: next_instr ptrnrow cmp al, HLD ; anything but hold causes action je short su_update_voices_nexttrack - mov ecx, dword [ebp] + mov ecx, dword [_BP] mov edi, ecx add edi, ebx shl edi, MAX_UNITS_SHIFT + 6 ; each unit = 64 bytes and there are 1< one voice per track version + +%else ; INCLUDE_MULTIVOICE_TRACKS not defined -> one voice per track ve_SIon su_update_voices: ; Stack: retaddr row xor edx, edx xor ebx, ebx mov bl, PATTERN_SIZE div ebx ; eax = current pattern, edx = current row in pattern - lea esi, [MANGLE_DATA(su_tracks)+eax] ; esi points to the pattern data for current track - lea edi, [su_synth_obj+su_synth.voices] + apply {lea _SI,},MANGLE_DATA(su_tracks),_AX,{}; esi points to the pattern data for current track + mov _DI, PTRWORD su_synth_obj+su_synth.voices mov bl, MAX_TRACKS ; MAX_TRACKS is always <= 32 so this is ok su_update_voices_trackloop: - movzx eax, byte [esi] ; eax = current pattern + movzx eax, byte [_SI] ; eax = current pattern imul eax, PATTERN_SIZE ; eax = offset to current pattern data - movzx eax, byte [MANGLE_DATA(su_patterns)+eax+edx] ; ecx = note + apply {movzx eax, byte}, MANGLE_DATA(su_patterns), _AX, _DX, {} ; ecx = note cmp al, HLD ; anything but hold causes action je short su_update_voices_nexttrack - inc dword [edi+su_voice.release] ; set the voice currently active to release; notice that it could increment any number of times + inc dword [_DI+su_voice.release] ; set the voice currently active to release; notice that it could increment any number of times cmp al, HLD jl su_update_voices_nexttrack ; if cl < HLD (no new note triggered) goto nexttrack su_update_voices_retrigger: - stosd ; save note + stosd ; save note mov ecx, (su_voice.size - su_voice.release)/4 ; could be xor ecx, ecx; mov ch,...>>8, but will it actually be smaller after compression? xor eax, eax rep stosd ; clear the workspace of the new voice, retriggering oscillators jmp short su_update_voices_skipadd su_update_voices_nexttrack: - add edi, su_voice.size + add _DI, su_voice.size su_update_voices_skipadd: - add esi, MAX_PATTERNS + add _SI, MAX_PATTERNS dec ebx jnz short su_update_voices_trackloop ret diff --git a/src/sointu.asm b/src/sointu.asm index b57ba57..3e23bfd 100644 --- a/src/sointu.asm +++ b/src/sointu.asm @@ -1,6 +1,97 @@ -%define WRK ebp ; // alias for unit workspace -%define VAL esi ; // alias for unit values (transformed/untransformed) -%define COM ebx ; // alias for instrument opcodes +%if BITS == 64 + %define WRK rbp ; alias for unit workspace + %define VAL rsi ; alias for unit values (transformed/untransformed) + %define COM rbx ; alias for instrument opcodes + %define INP rdx ; alias for transformed inputs + %define _AX rax ; push and offsets have to be r* on 64-bit and e* on 32-bit + %define _BX rbx + %define _CX rcx + %define _DX rdx + %define _SP rsp + %define _SI rsi + %define _DI rdi + %define _BP rbp + %define PTRSIZE 8 + %define PTRWORD qword + %define RESPTR resq + %define DPTR dq + + %macro apply 2 + mov r9, qword %2 + %1 [r9] + %endmacro + + %macro apply 3 + mov r9, qword %2 + %1 [r9] %3 + %endmacro + + %macro apply 4 + mov r9, qword %2 + %1 [r9+%3] %4 + %endmacro + + %macro apply 5 + mov r9, qword %2 + lea r9, [r9+%3] + %1 [r9+%4] %5 + %endmacro + + %macro push_registers 1-* + %rep %0 + push %1 + %rotate 1 + %endrep + %endmacro + + %macro pop_registers 1-* + %rep %0 + %rotate -1 + pop %1 + %endrep + %endmacro +%else + %define WRK ebp ; alias for unit workspace + %define VAL esi ; alias for unit values (transformed/untransformed) + %define COM ebx ; alias for instrument opcodes + %define INP edx ; alias for transformed inputs + %define _AX eax + %define _BX ebx + %define _CX ecx + %define _DX edx + %define _SP esp + %define _SI esi + %define _DI edi + %define _BP ebp + %define PTRSIZE 4 + %define PTRWORD dword + %define RESPTR resd + %define DPTR dd + + %macro apply 2 + %1 [%2] + %endmacro + + %macro apply 3 + %1 [%2] %3 + %endmacro + + %macro apply 4 + %1 [%2+%3] %4 + %endmacro + + %macro apply 5 + %1 [%2+%3+%4] %5 + %endmacro + + %macro push_registers 1-* + pushad ; in 32-bit mode, this is the easiest way to store all the registers + %endmacro + + %macro pop_registers 1-* + popad + %endmacro +%endif ;=============================================================================== ; Uninitialized data: The one and only synth object @@ -16,16 +107,14 @@ su_transformed_values resd 16 ;=============================================================================== SECT_DATA(suoptabl) -su_synth_commands - dd OPCODES +su_synth_commands DPTR OPCODES ;=============================================================================== ; The number of transformed parameters each opcode takes ;=============================================================================== SECT_DATA(suparcnt) -su_opcode_numparams - db NUMPARAMS +su_opcode_numparams db NUMPARAMS ;------------------------------------------------------------------------------- ; Constants used by the common functions @@ -58,34 +147,40 @@ su_polyphony_bitmask dd POLYPHONY_BITMASK ; does the next voice reuse th SECT_TEXT(surunvm) EXPORT MANGLE_FUNC(su_run_vm,0) - mov COM, MANGLE_DATA(su_commands) ; COM points to vm code - mov VAL, MANGLE_DATA(su_params) ; VAL points to unit params + mov COM, PTRWORD MANGLE_DATA(su_commands) ; COM points to vm code + mov VAL, PTRWORD MANGLE_DATA(su_params) ; VAL points to unit params ; su_unit.size will be added back before WRK is used - mov WRK, su_synth_obj + su_synth.voices + su_voice.workspace - su_unit.size + mov WRK, PTRWORD su_synth_obj + su_synth.voices + su_voice.workspace - su_unit.size push COM ; Stack: COM push VAL ; Stack: VAL COM push WRK ; Stack: WRK VAL COM -%if DELAY_ID > -1 - mov dword [MANGLE_DATA(su_delay_buffer_ofs)], MANGLE_DATA(su_delay_buffer) ; reset delaywrk to first delayline +%if DELAY_ID > -1 + %if BITS == 64 ; TODO: find a way to do this with a macro + mov r9,PTRWORD MANGLE_DATA(su_delay_buffer_ofs) + mov _AX,PTRWORD MANGLE_DATA(su_delay_buffer) + mov qword [r9],_AX ; reset delaywrk to first delayline + %else + mov dword [MANGLE_DATA(su_delay_buffer_ofs)],MANGLE_DATA(su_delay_buffer) ; reset delaywrk to first + %endif %endif xor ecx, ecx ; voice = 0 - push ecx ; Stack: voice WRK VAL COM + push _CX ; Stack: voice WRK VAL COM su_run_vm_loop: ; loop until all voices done movzx eax, byte [COM] ; eax = command byte inc COM ; move to next instruction add WRK, su_unit.size ; move WRK to next unit - push eax + push _AX shr eax,1 - mov al,byte [eax+su_opcode_numparams] - push eax + apply {mov al,byte},su_opcode_numparams,_AX,{} + push _AX call su_transform_values - mov ecx, dword [esp+8] - pop eax + mov _CX, PTRWORD [_SP+2*PTRSIZE] + pop _AX shr eax,1 - call dword [eax*4+su_synth_commands] ; call the function corresponding to the instruction - cmp dword [esp],MAX_VOICES ; if (voice < MAX_VOICES) + apply call,su_synth_commands,_AX*PTRSIZE,{} ; call the function corresponding to the instruction + cmp dword [_SP],MAX_VOICES ; if (voice < MAX_VOICES) jl su_run_vm_loop ; goto vm_loop - add esp, 16 ; Stack cleared + add _SP, 4*PTRSIZE ; Stack cleared ret ;------------------------------------------------------------------------------- @@ -96,12 +191,12 @@ su_run_vm_loop: ; loop until all voices done SECT_TEXT(surandom) EXPORT MANGLE_FUNC(FloatRandomNumber,0) - push eax - imul eax,dword [MANGLE_DATA(RandSeed)],16007 - mov dword [MANGLE_DATA(RandSeed)], eax - fild dword [MANGLE_DATA(RandSeed)] - fidiv dword [c_RandDiv] - pop eax + push _AX + apply {imul eax,},MANGLE_DATA(RandSeed),{,16007} + apply mov,MANGLE_DATA(RandSeed),{, eax} + apply fild dword,MANGLE_DATA(RandSeed) + apply fidiv dword,c_RandDiv + pop _AX ret ;------------------------------------------------------------------------------- @@ -117,31 +212,26 @@ EXPORT MANGLE_FUNC(FloatRandomNumber,0) SECT_TEXT(sutransf) su_transform_values: - push ecx + push _CX xor ecx, ecx xor eax, eax - mov edx, su_transformed_values + mov INP, PTRWORD su_transformed_values su_transform_values_loop: - cmp ecx, dword [esp+8] + cmp ecx, dword [_SP+2*PTRSIZE] jge su_transform_values_out lodsb - push eax - fild dword [esp] - fmul dword [c_i128] - fadd dword [WRK+su_unit.ports+ecx*4] - fstp dword [edx+ecx*4] - mov dword [WRK+su_unit.ports+ecx*4], 0 - pop eax + push _AX + fild dword [_SP] + apply fmul dword, c_i128 + fadd dword [WRK+su_unit.ports+_CX*4] + fstp dword [INP+_CX*4] + mov dword [WRK+su_unit.ports+_CX*4], 0 + pop _AX inc ecx jmp su_transform_values_loop su_transform_values_out: - pop ecx - ret 4 - -%macro TRANSFORM_VALUES 1 - push %1 %+ .params/4 - call su_transform_values -%endmacro + pop _CX + ret PTRSIZE ;------------------------------------------------------------------------------- ; su_env_map function: computes 2^(-24*x) of the envelope parameter @@ -154,8 +244,8 @@ SECT_TEXT(supower) %if ENVELOPE_ID > -1 ; TODO: compressor also uses this, so should be compiled if either su_env_map: - fld dword [edx+eax*4] ; x, where x is the parameter in the range 0-1 - fimul dword [c_24] ; 24*x + fld dword [INP+_AX*4] ; x, where x is the parameter in the range 0-1 + apply fimul dword,c_24 ; 24*x fchs ; -24*x ; flow into Power function, which outputs 2^(-24*x) %endif @@ -189,6 +279,13 @@ EXPORT MANGLE_FUNC(su_power,0) ; sources, as sources.asm defines SU_USE_WAVESHAPER ; if needed. %include "opcodes/effects.asm" -%include "player.asm" %include "introspection.asm" -%include "gmdls.asm" +%include "player.asm" + +%ifidn __OUTPUT_FORMAT__,win64 + %include "win64/gmdls_win64.asm" +%endif + +%ifidn __OUTPUT_FORMAT__,win32 + %include "win32/gmdls_win32.asm" +%endif \ No newline at end of file diff --git a/src/sointu.inc b/src/sointu.inc index a457f2e..b9f2705 100644 --- a/src/sointu.inc +++ b/src/sointu.inc @@ -19,17 +19,33 @@ ; on win32, function f with n parameters is mangled as "_f@n" %define MANGLE_FUNC(f,n) _ %+ f %+ @ %+ n %define WIN_OR_MAC + %assign BITS 32 + ; On windows and mac, data label d is mangled as "_d" + %define MANGLE_DATA(d) _ %+ d +%endif + +%ifidn __OUTPUT_FORMAT__,win64 + ; on win32, function f with n parameters is mangled as "_f@n" + %define MANGLE_FUNC(f,n) f + %define WIN_OR_MAC + %assign BITS 64 + ; On windows and mac, data label d is mangled as "_d" + %define MANGLE_DATA(d) d %endif %ifidn __OUTPUT_FORMAT__,elf32 ; on linux, function f with n parameters is mangled as "f" %define MANGLE_FUNC(f,n) f + ; On linux, data label d is mangled as "d" + %define MANGLE_DATA(d) d %endif %ifidn __OUTPUT_FORMAT__,macho32 ; on mac, function f with x parameters is mangled as "_f" %define MANGLE_FUNC(f,n) _f %define WIN_OR_MAC + ; On windows and mac, data label d is mangled as "_d" + %define MANGLE_DATA(d) _ %+ d %endif %ifdef WIN_OR_MAC @@ -44,8 +60,6 @@ %define SECT_DATA(n) section .data align=1 %define SECT_TEXT(n) section .code align=1 %endif - ; On windows and mac, data label d is mangled as "_d" - %define MANGLE_DATA(d) _ %+ d %else ; Linux %ifdef USE_SECTIONS @@ -57,8 +71,6 @@ %define SECT_DATA(n) section .data. progbits alloc noexec write align=1 %define SECT_TEXT(n) section .text. progbits alloc exec nowrite align=1 %endif - ; On linux, data label d is mangled as "d" - %define MANGLE_DATA(d) d %endif %ifdef SU_USE_ALL diff --git a/src/gmdls.asm b/src/win32/gmdls_win32.asm similarity index 87% rename from src/gmdls.asm rename to src/win32/gmdls_win32.asm index 7670f78..50b7c99 100644 --- a/src/gmdls.asm +++ b/src/win32/gmdls_win32.asm @@ -8,15 +8,15 @@ extern _ReadFile@20 ; requires windows SECT_TEXT(sugmdls) su_gmdls_load: - mov edi, MANGLE_DATA(su_sample_table) - mov esi, su_gmdls_path1 - su_gmdls_pathloop: + mov edi, MANGLE_DATA(su_sample_table) + mov esi, su_gmdls_path1 + su_gmdls_pathloop: push 0 ; OF_READ push edi ; &ofstruct, blatantly reuse the sample table push esi ; path call _OpenFile@12 ; eax = OpenFile(path,&ofstruct,OF_READ) add esi, su_gmdls_path2 - su_gmdls_path1 ; if we ever get to third, then crash - cmp eax, -1 ; eax == INVALID? + cmp eax, -1 ; eax == INVALID? je su_gmdls_pathloop push 0 ; NULL push edi ; &bytes_read, reusing sample table again; it does not matter that the first four bytes are trashed @@ -31,7 +31,7 @@ SECT_DATA(sugmpath) su_gmdls_path1: db 'drivers/gm.dls',0 su_gmdls_path2: - db 'drivers/etc/gm.dls',0 + db 'drivers/etc/gm.dls',0 SECT_DATA(suconst) c_samplefreq_scaling dd 84.28074964676522 ; o = 0.000092696138, n = 72, f = 44100*o*2**(n/12), scaling = 22050/f <- so note 72 plays at the "normal rate" diff --git a/src/win64/gmdls_win64.asm b/src/win64/gmdls_win64.asm new file mode 100644 index 0000000..724287d --- /dev/null +++ b/src/win64/gmdls_win64.asm @@ -0,0 +1,44 @@ +%ifdef INCLUDE_GMDLS + +%define SAMPLE_TABLE_SIZE 3440660 ; size of gmdls + +extern OpenFile ; requires windows +extern ReadFile ; requires windows + +SECT_TEXT(sugmdls) +; Win64 ABI: RCX, RDX, R8, and R9 +su_gmdls_load: + sub rsp, 40 ; Win64 ABI requires "shadow space" + space for one parameter. + mov rdi, PTRWORD MANGLE_DATA(su_sample_table) + mov rsi, PTRWORD su_gmdls_path1 + su_gmdls_pathloop: + xor r8,r8 ; OF_READ + mov rdx, rdi ; &ofstruct, blatantly reuse the sample table + mov rcx, rsi ; path + call OpenFile ; eax = OpenFile(path,&ofstruct,OF_READ) + add rsi, su_gmdls_path2 - su_gmdls_path1 ; if we ever get to third, then crash + movsxd rcx,eax + cmp rcx, -1 ; ecx == INVALID? + je su_gmdls_pathloop + mov qword [rsp+32],0 + mov r9, rdi + mov r8d, SAMPLE_TABLE_SIZE ; number of bytes to read + mov rdx, rdi + call ReadFile ; Readfile(handle,&su_sample_table,SAMPLE_TABLE_SIZE,&bytes_read,NULL) + add rsp, 40 ; shadow space, as required by Win64 ABI + ret + +SECT_DATA(sugmpath) + +su_gmdls_path1: + db 'drivers/gm.dls',0 +su_gmdls_path2: + db 'drivers/etc/gm.dls',0 + +SECT_DATA(suconst) + c_samplefreq_scaling dd 84.28074964676522 ; o = 0.000092696138, n = 72, f = 44100*o*2**(n/12), scaling = 22050/f <- so note 72 plays at the "normal rate" + +SECT_BSS(susamtbl) + EXPORT MANGLE_DATA(su_sample_table) resb SAMPLE_TABLE_SIZE ; size of gmdls. + +%endif \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 509ab3e..e1c521b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,7 +8,14 @@ function(regression_test testname) add_executable(${testname} ${source}.asm test_renderer.c) # the tests include the entire ASM but we still want to rebuild when they change - file(GLOB SOINTU ${PROJECT_SOURCE_DIR}/src/*.inc ${PROJECT_SOURCE_DIR}/src/*.asm ${PROJECT_SOURCE_DIR}/src/opcodes/*.asm ${PROJECT_SOURCE_DIR}/src/opcodes/*.inc) + file(GLOB SOINTU ${PROJECT_SOURCE_DIR}/src/*.inc + ${PROJECT_SOURCE_DIR}/src/*.asm + ${PROJECT_SOURCE_DIR}/src/opcodes/*.asm + ${PROJECT_SOURCE_DIR}/src/opcodes/*.inc + ${PROJECT_SOURCE_DIR}/src/win32/*.asm + ${PROJECT_SOURCE_DIR}/src/win32/*.inc + ${PROJECT_SOURCE_DIR}/src/win64/*.asm + ${PROJECT_SOURCE_DIR}/src/win64/*.inc) set_source_files_properties(${source}.asm PROPERTIES OBJECT_DEPENDS "${SOINTU}") set_source_files_properties(${FOURKLANG} PROPERTIES HEADER_FILE_ONLY TRUE)