diff --git a/README.md b/README.md
index eed2ead..fdd9d76 100644
--- a/README.md
+++ b/README.md
@@ -167,9 +167,15 @@ wat2wasm --enable-bulk-memory test_chords.wat
 
 #### Examples
 
-The folder `examples/code` contains usage examples in C. If you want to target smaller executable sizes, using a compressing linker
+The folder `examples/code` contains usage examples for Sointu with winmm und dsound playback under Windows and asound playback under Unix. Source code is available in C and x86 assembly (win32, elf32 and elf64 versions). 
+
+To build the examples, use `ninja examples`.
+
+If you want to target smaller executable sizes, using a compressing linker
 like [Crinkler](https://github.com/runestubbe/Crinkler) on Windows is recommended.
 
+The linux examples use ALSA and need libasound2-dev (or libasound2-dev:386) installed. The 386 version also needs pipewire-alsa:386 installed, which is not there by default.
+
 ### Native virtual machine
 
 The native bridge allows Go to call the sointu compiled x86 native
diff --git a/examples/code/C/CMakeLists.txt b/examples/code/C/CMakeLists.txt
index 812815b..a9bfa8e 100644
--- a/examples/code/C/CMakeLists.txt
+++ b/examples/code/C/CMakeLists.txt
@@ -1,34 +1,68 @@
+# this fixes a bug in creating a static library from asm, similar to
+# https://discourse.cmake.org/t/building-lib-file-from-asm-cmake-bug/1959
+# but for NASM
+if(MSVC)
+    set(CMAKE_ASM_NASM_CREATE_STATIC_LIBRARY "<CMAKE_AR> /OUT:<TARGET> <LINK_FLAGS> <OBJECTS>")
+endif()
+
 add_custom_command(
-    COMMAND
-        ${compilecmd} -arch=${arch} -o physics_girl_st.asm "${PROJECT_SOURCE_DIR}/examples/patches/physics_girl_st.yml"
-    WORKING_DIRECTORY
-        ${CMAKE_CURRENT_BINARY_DIR}
-    DEPENDS
-        "${PROJECT_SOURCE_DIR}/examples/patches/physics_girl_st.yml"
-    OUTPUT
-        physics_girl_st.asm
-        physics_girl_st.h
-        physics_girl_st.inc
-    COMMENT
-        "Compiling ${PROJECT_SOURCE_DIR}/examples/patches/physics-girl-st.yml..."
+	COMMAND
+		${compilecmd} -arch=${arch} -o physics_girl_st.asm "${PROJECT_SOURCE_DIR}/examples/patches/physics_girl_st.yml"
+	WORKING_DIRECTORY
+		${CMAKE_CURRENT_BINARY_DIR}
+	DEPENDS
+		"${PROJECT_SOURCE_DIR}/examples/patches/physics_girl_st.yml"
+	OUTPUT
+		physics_girl_st.asm
+		physics_girl_st.h
+		physics_girl_st.inc
+	COMMENT
+		"Compiling ${PROJECT_SOURCE_DIR}/examples/patches/physics-girl-st.yml..."
 )
 
 add_library(physics_girl_st physics_girl_st.asm)
 add_dependencies(physics_girl_st sointu-compiler)
 
 if(WIN32)
-    add_executable(cplay
-        cplay.windows.c
-        physics_girl_st.h
-    )
-    target_link_libraries(cplay PRIVATE winmm)
+	add_executable(cplay-winmm
+		cplay.windows.winmm.c
+		physics_girl_st.h
+	)
+	target_link_libraries(cplay-winmm PRIVATE winmm)
+	target_link_libraries(cplay-winmm PRIVATE physics_girl_st)
+	target_include_directories(cplay-winmm PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+	add_dependencies(examples cplay-winmm)
+
+	add_executable(cplay-directsound
+		cplay.windows.directsound.c
+		physics_girl_st.h
+	)
+	target_link_libraries(cplay-directsound PRIVATE dsound ws2_32 ucrt)
+	target_link_libraries(cplay-directsound PRIVATE physics_girl_st)
+	target_include_directories(cplay-directsound PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+	add_dependencies(examples cplay-directsound)
 elseif(UNIX)
-    add_executable(cplay
-        cplay.unix.c
-        physics_girl_st.h
-    )
-    target_link_libraries(cplay PRIVATE asound pthread)
-    target_link_options(cplay PRIVATE -z noexecstack -no-pie)
+	add_executable(cplay
+		cplay.unix.c
+		physics_girl_st.h
+	)
+	target_link_libraries(cplay PRIVATE asound pthread)
+	target_link_options(cplay PRIVATE -z noexecstack -no-pie)
+	target_link_libraries(cplay PRIVATE physics_girl_st)
+	target_include_directories(cplay PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+	add_dependencies(examples cplay)
 endif()
-target_link_libraries(cplay PRIVATE physics_girl_st)
-target_include_directories(cplay PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+
+add_executable(cwav
+	cwav.c
+	physics_girl_st.h
+)
+if(WIN32)
+	target_compile_definitions(cwav PRIVATE _CRT_SECURE_NO_WARNINGS)
+elseif(UNIX)
+	target_link_options(cwav PRIVATE -z noexecstack -no-pie)
+endif()
+target_link_libraries(cwav PRIVATE physics_girl_st)
+target_include_directories(cwav PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+
+add_dependencies(examples cwav)
diff --git a/examples/code/C/cplay.unix.c b/examples/code/C/cplay.unix.c
index a181fe3..13d8b83 100644
--- a/examples/code/C/cplay.unix.c
+++ b/examples/code/C/cplay.unix.c
@@ -20,7 +20,19 @@ int main(int argc, char **args) {
 
 	// Play the track.
 	snd_pcm_open(&pcm_handle, "default", SND_PCM_STREAM_PLAYBACK, 0);
-	snd_pcm_set_params(pcm_handle, SND_PCM_FORMAT_FLOAT, SND_PCM_ACCESS_RW_INTERLEAVED, SU_CHANNEL_COUNT, SU_SAMPLE_RATE, 0, SU_LENGTH_IN_SAMPLES);
+	snd_pcm_set_params(
+		pcm_handle,
+#ifdef SU_SAMPLE_FLOAT
+		SND_PCM_FORMAT_FLOAT,
+#else // SU_SAMPLE_FLOAT
+		SND_PCM_FORMAT_S16_LE,
+#endif // SU_SAMPLE_FLOAT
+		SND_PCM_ACCESS_RW_INTERLEAVED,
+		SU_CHANNEL_COUNT,
+		SU_SAMPLE_RATE,
+		0,
+		SU_LENGTH_IN_SAMPLES
+	);
 	snd_pcm_writei(pcm_handle, sound_buffer, SU_LENGTH_IN_SAMPLES);
 
 	return 0;
diff --git a/examples/code/C/cplay.windows.directsound.c b/examples/code/C/cplay.windows.directsound.c
new file mode 100644
index 0000000..6eed12e
--- /dev/null
+++ b/examples/code/C/cplay.windows.directsound.c
@@ -0,0 +1,75 @@
+#include <stdio.h>
+#include <stdint.h>
+#include "physics_girl_st.h"
+#define WIN32_LEAN_AND_MEAN
+#define WIN32_EXTRA_LEAN
+#include <Windows.h>
+#include "mmsystem.h"
+#include "mmreg.h"
+#define CINTERFACE
+#include <dsound.h>
+
+#ifndef DSBCAPS_TRUEPLAYPOSITION // Not defined in MinGW dsound headers, so let's add it
+#define DSBCAPS_TRUEPLAYPOSITION 0x00080000
+#endif
+
+SUsample sound_buffer[SU_LENGTH_IN_SAMPLES * SU_CHANNEL_COUNT];
+WAVEFORMATEX wave_format = {
+#ifdef SU_SAMPLE_FLOAT
+	WAVE_FORMAT_IEEE_FLOAT,
+#else
+	WAVE_FORMAT_PCM,
+#endif
+	SU_CHANNEL_COUNT,
+	SU_SAMPLE_RATE,
+	SU_SAMPLE_RATE * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT,
+	SU_SAMPLE_SIZE * SU_CHANNEL_COUNT,
+	SU_SAMPLE_SIZE*8,
+	0
+};
+DSBUFFERDESC buffer_description = {
+	sizeof(DSBUFFERDESC),
+	DSBCAPS_GETCURRENTPOSITION2 | DSBCAPS_GLOBALFOCUS | DSBCAPS_TRUEPLAYPOSITION,
+	SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT,
+	0,
+	&wave_format,
+	0
+};
+
+int main(int argc, char **args) {
+	// Load gm.dls if necessary.
+#ifdef SU_LOAD_GMDLS
+	su_load_gmdls();
+#endif // SU_LOAD_GMDLS
+
+	HWND hWnd = GetForegroundWindow();
+	if(hWnd == NULL) {
+		hWnd = GetDesktopWindow();
+	}
+
+	LPDIRECTSOUND direct_sound;
+	LPDIRECTSOUNDBUFFER direct_sound_buffer;
+	DirectSoundCreate(0, &direct_sound, 0);
+	IDirectSound_SetCooperativeLevel(direct_sound, hWnd, DSSCL_PRIORITY);
+	IDirectSound_CreateSoundBuffer(direct_sound, &buffer_description, &direct_sound_buffer, NULL);
+	
+	LPVOID p1;
+	DWORD l1;
+	IDirectSoundBuffer_Lock(direct_sound_buffer, 0, SU_LENGTH_IN_SAMPLES * SU_CHANNEL_COUNT * SU_SAMPLE_SIZE, &p1, &l1, NULL, NULL, 0);
+	CreateThread(0, 0, (LPTHREAD_START_ROUTINE)su_render_song, p1, 0, 0);
+	IDirectSoundBuffer_Play(direct_sound_buffer, 0, 0, 0);
+
+	// We need to handle windows messages properly while playing, as waveOutWrite is async.
+	MSG msg = {0};
+	DWORD last_play_cursor = 0;
+	for(DWORD play_cursor = 0; play_cursor >= last_play_cursor; IDirectSoundBuffer_GetCurrentPosition(direct_sound_buffer, (DWORD*)&play_cursor, NULL)) {
+		while (PeekMessageA(&msg, NULL, 0, 0, PM_REMOVE)) {
+			TranslateMessage(&msg);
+			DispatchMessageA(&msg);
+		}
+
+		last_play_cursor = play_cursor;
+	}
+
+	return 0;
+}
diff --git a/examples/code/C/cplay.windows.c b/examples/code/C/cplay.windows.winmm.c
similarity index 78%
rename from examples/code/C/cplay.windows.c
rename to examples/code/C/cplay.windows.winmm.c
index 1a59fa8..834e756 100644
--- a/examples/code/C/cplay.windows.c
+++ b/examples/code/C/cplay.windows.winmm.c
@@ -9,7 +9,7 @@
 
 SUsample sound_buffer[SU_LENGTH_IN_SAMPLES * SU_CHANNEL_COUNT];
 HWAVEOUT	wave_out_handle;
-WAVEFORMATEX WaveFMT = {
+WAVEFORMATEX wave_format = {
 #ifdef SU_SAMPLE_FLOAT
 	WAVE_FORMAT_IEEE_FLOAT,
 #else
@@ -22,7 +22,7 @@ WAVEFORMATEX WaveFMT = {
 	SU_SAMPLE_SIZE*8,
 	0
 };
-WAVEHDR WaveHDR = {
+WAVEHDR wave_header = {
 	(LPSTR)sound_buffer, 
 	SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT,
 	0,
@@ -32,7 +32,7 @@ WAVEHDR WaveHDR = {
 	0,
 	0
 };
-MMTIME MMTime = {
+MMTIME mmtime = {
 	TIME_SAMPLES,
 	0
 };
@@ -48,11 +48,11 @@ int main(int argc, char **args) {
 	// We render in the background while playing already. Fortunately,
 	// Windows is slow with the calls below, so we're not worried that
 	// we don't have enough samples ready before the track starts.
-	waveOutOpen(&wave_out_handle, WAVE_MAPPER, &WaveFMT, 0, 0, CALLBACK_NULL);
-	waveOutWrite(wave_out_handle, &WaveHDR, sizeof(WaveHDR));
+	waveOutOpen(&wave_out_handle, WAVE_MAPPER, &wave_format, 0, 0, CALLBACK_NULL);
+	waveOutWrite(wave_out_handle, &wave_header, sizeof(wave_header));
 
 	// We need to handle windows messages properly while playing, as waveOutWrite is async.
-	for(MSG msg = {0}; MMTime.u.sample != SU_LENGTH_IN_SAMPLES; waveOutGetPosition(wave_out_handle, &MMTime, sizeof(MMTIME))) {
+	for(MSG msg = {0}; mmtime.u.sample != SU_LENGTH_IN_SAMPLES; waveOutGetPosition(wave_out_handle, &mmtime, sizeof(MMTIME))) {
 		while (PeekMessageA(&msg, NULL, 0, 0, PM_REMOVE)) {
 			TranslateMessage(&msg);
 			DispatchMessageA(&msg);
diff --git a/examples/code/C/cwav.c b/examples/code/C/cwav.c
new file mode 100644
index 0000000..7675517
--- /dev/null
+++ b/examples/code/C/cwav.c
@@ -0,0 +1,72 @@
+#include <stdio.h>
+#include <stdint.h>
+#include "physics_girl_st.h"
+
+#define WAVE_FORMAT_PCM 0x1
+#define WAVE_FORMAT_IEEE_FLOAT 0x3
+
+static SUsample sound_buffer[SU_LENGTH_IN_SAMPLES * SU_CHANNEL_COUNT];
+
+#pragma pack(push, 1)
+typedef struct {
+	char riff[4];
+	uint32_t file_size;
+	char wavefmt[8];
+} riff_header_t;
+
+typedef struct {
+	char data[4];
+	uint32_t data_size;
+} data_header_t;
+
+typedef struct {
+	riff_header_t riff_header;
+	uint32_t riff_header_size;
+	uint16_t sample_type;
+	uint16_t channel_count;
+	uint32_t sample_rate;
+	uint32_t bytes_per_second;
+	uint16_t bytes_per_channel;
+	uint16_t bits_per_sample;
+	data_header_t data_header;
+} wave_header_t;
+#pragma pack(pop)
+
+int main(int argc, char **args) {
+	wave_header_t wave_header = {
+		.riff_header = (riff_header_t) {
+			.riff = "RIFF",
+			.file_size = sizeof(wave_header_t) + SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT,
+			.wavefmt = "WAVEfmt ",
+		},
+		.riff_header_size = sizeof(riff_header_t),
+	#ifdef SU_SAMPLE_FLOAT
+		.sample_type = WAVE_FORMAT_IEEE_FLOAT,
+	#else // SU_SAMPLE_FLOAT
+		.sample_type = WAVE_FORMAT_PCM,
+	#endif // SU_SAMPLE_FLOAT
+		.channel_count = SU_CHANNEL_COUNT,
+		.sample_rate = SU_SAMPLE_RATE,
+		.bytes_per_second = SU_SAMPLE_SIZE * SU_SAMPLE_RATE * SU_CHANNEL_COUNT,
+		.bytes_per_channel = SU_SAMPLE_SIZE * SU_CHANNEL_COUNT,
+		.bits_per_sample = SU_SAMPLE_SIZE * 8,
+		.data_header = (data_header_t) {
+			.data = "data",
+			.data_size = sizeof(data_header_t) + SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+		}
+	};
+
+	// Load gm.dls if necessary.
+#ifdef SU_LOAD_GMDLS
+    su_load_gmdls();
+#endif // SU_LOAD_GMDLS
+
+	su_render_song(sound_buffer);
+
+	FILE *file = fopen("physics_girl_st.wav", "wb");
+	fwrite(&wave_header, sizeof(wave_header_t), 1, file);
+	fwrite((uint8_t *)sound_buffer, 1, SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT, file);
+	fclose(file);
+
+	return 0;
+}
diff --git a/examples/code/CMakeLists.txt b/examples/code/CMakeLists.txt
index 5446ca3..3176713 100644
--- a/examples/code/CMakeLists.txt
+++ b/examples/code/CMakeLists.txt
@@ -1 +1,5 @@
+set_directory_properties(PROPERTIES EXCLUDE_FROM_ALL ON)
+add_custom_target(examples)
+
+add_subdirectory(asm)
 add_subdirectory(C)
diff --git a/examples/code/asm/386/CMakeLists.txt b/examples/code/asm/386/CMakeLists.txt
new file mode 100644
index 0000000..c7a8e02
--- /dev/null
+++ b/examples/code/asm/386/CMakeLists.txt
@@ -0,0 +1,10 @@
+if(WIN32)
+	set(CMAKE_ASM_NASM_OBJECT_FORMAT win32)
+elseif(UNIX)
+	set(CMAKE_ASM_NASM_OBJECT_FORMAT elf32)
+endif()
+set(CMAKE_ASM_NASM_COMPILE_OBJECT "<CMAKE_ASM_NASM_COMPILER> <INCLUDES> <DEFINES> <FLAGS> -f ${CMAKE_ASM_NASM_OBJECT_FORMAT} -o <OBJECT> <SOURCE>")
+
+add_asm_example(asmplay "${PROJECT_SOURCE_DIR}/examples/patches/physics_girl_st.yml" 386 32 "winmm" "asound;pthread")
+add_asm_example(asmwav "${PROJECT_SOURCE_DIR}/examples/patches/physics_girl_st.yml" 386 32 "" "")
+target_compile_definitions(asmwav-386 PRIVATE FILENAME="physics_girl_st.wav")
diff --git a/examples/code/asm/386/asmplay.elf32.asm b/examples/code/asm/386/asmplay.elf32.asm
new file mode 100644
index 0000000..718bda4
--- /dev/null
+++ b/examples/code/asm/386/asmplay.elf32.asm
@@ -0,0 +1,81 @@
+%include TRACK_INCLUDE
+
+%define SND_PCM_FORMAT_S16_LE 0x2
+%define SND_PCM_FORMAT_FLOAT 0xE
+%define SND_PCM_ACCESS_RW_INTERLEAVED 0x3
+%define SND_PCM_STREAM_PLAYBACK 0x0
+
+section .bss
+sound_buffer:
+	resb SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+
+render_thread:
+	resd 1
+
+pcm_handle:
+	resd 1
+
+section .data
+default_device:
+	db "default", 0
+
+section .text
+symbols:
+	extern pthread_create
+	extern sleep
+	extern snd_pcm_open
+	extern snd_pcm_set_params
+	extern snd_pcm_writei
+
+	global main
+main:
+	; elf32 uses the cdecl calling convention. This is more readable imo ;)
+
+	; Prologue
+	push	ebp
+	mov	 ebp, esp
+	sub	 esp, 0x10
+
+	; Unix does not have gm.dls, no need to ifdef and setup here.
+
+	; We render in the background while playing already.
+	push sound_buffer
+	lea eax, su_render_song
+	push eax
+	push 0
+	push render_thread
+	call pthread_create
+
+	; We can't start playing too early or the missing samples will be audible.
+	push 0x2
+	call sleep
+
+	; Play the track.
+	push 0x0
+	push SND_PCM_STREAM_PLAYBACK
+	push default_device
+	push pcm_handle
+	call snd_pcm_open
+
+	push SU_LENGTH_IN_SAMPLES
+	push 0
+	push SU_SAMPLE_RATE
+	push SU_CHANNEL_COUNT
+	push SND_PCM_ACCESS_RW_INTERLEAVED
+%ifdef SU_SAMPLE_FLOAT
+	push SND_PCM_FORMAT_FLOAT
+%else ; SU_SAMPLE_FLOAT
+	push SND_PCM_FORMAT_S16_LE
+%endif ; SU_SAMPLE_FLOAT
+	push dword [pcm_handle]
+	call snd_pcm_set_params
+
+	push SU_LENGTH_IN_SAMPLES
+	push sound_buffer
+	push dword [pcm_handle]
+	call snd_pcm_writei
+
+exit:
+	; At least we can skip the epilogue :)
+	leave
+	ret
diff --git a/examples/code/asm/386/asmplay.win32.asm b/examples/code/asm/386/asmplay.win32.asm
new file mode 100644
index 0000000..9840438
--- /dev/null
+++ b/examples/code/asm/386/asmplay.win32.asm
@@ -0,0 +1,120 @@
+%define MANGLED
+%include TRACK_INCLUDE
+
+%define WAVE_FORMAT_PCM 0x1
+%define WAVE_FORMAT_IEEE_FLOAT 0x3
+%define WHDR_PREPARED 0x2
+%define WAVE_MAPPER 0xFFFFFFFF
+%define TIME_SAMPLES 0x2
+%define PM_REMOVE 0x1
+
+section .bss
+sound_buffer:
+	resb SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+
+wave_out_handle:
+	resd 1
+
+msg:
+	resd 1
+message:
+	resd 7
+
+section .data
+wave_format:
+%ifdef SU_SAMPLE_FLOAT
+	dw WAVE_FORMAT_IEEE_FLOAT
+%else ; SU_SAMPLE_FLOAT
+	dw WAVE_FORMAT_PCM
+%endif ; SU_SAMPLE_FLOAT
+	dw SU_CHANNEL_COUNT
+	dd SU_SAMPLE_RATE 
+	dd SU_SAMPLE_SIZE * SU_SAMPLE_RATE * SU_CHANNEL_COUNT
+	dw SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+	dw SU_SAMPLE_SIZE * 8
+	dw 0
+
+wave_header:
+	dd sound_buffer
+	dd SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+	times 2 dd 0
+	dd WHDR_PREPARED
+	times 4 dd 0
+wave_header_end:
+
+mmtime:
+	dd TIME_SAMPLES
+sample:
+	times 2 dd 0
+mmtime_end:
+
+section .text
+symbols:
+	extern _CreateThread@24
+	extern _waveOutOpen@24
+	extern _waveOutWrite@12
+	extern _waveOutGetPosition@12
+	extern _PeekMessageA@20
+	extern _TranslateMessage@4
+	extern _DispatchMessageA@4
+
+	global _mainCRTStartup
+_mainCRTStartup:
+	; win32 uses the cdecl calling convention. This is more readable imo ;)
+	; We can also skip the prologue; Windows doesn't mind.
+
+%ifdef SU_LOAD_GMDLS
+	call _su_load_gmdls
+%endif ; SU_LOAD_GMDLS
+
+	times 2 push 0
+	push sound_buffer
+	lea eax, _su_render_song@4
+	push eax
+	times 2 push 0
+	call _CreateThread@24
+
+	; We render in the background while playing already. Fortunately,
+	; Windows is slow with the calls below, so we're not worried that
+	; we don't have enough samples ready before the track starts.
+	times 3 push 0
+	push wave_format
+	push WAVE_MAPPER
+	push wave_out_handle
+	call _waveOutOpen@24
+
+	push wave_header_end - wave_header
+	push wave_header
+	push dword [wave_out_handle]
+	call _waveOutWrite@12
+
+	; We need to handle windows messages properly while playing, as waveOutWrite is async.
+mainloop:
+	dispatchloop:
+		push PM_REMOVE
+		times 3 push 0
+		push msg
+		call _PeekMessageA@20
+		jz dispatchloop_end
+
+		push msg
+		call _TranslateMessage@4
+
+		push msg
+		call _DispatchMessageA@4
+
+		jmp dispatchloop
+	dispatchloop_end:
+
+	push mmtime_end - mmtime
+	push mmtime
+	push dword [wave_out_handle]
+	call _waveOutGetPosition@12
+
+	cmp dword [sample], SU_LENGTH_IN_SAMPLES
+	jne mainloop
+
+exit:
+	; At least we can skip the epilogue :)
+	leave
+	ret
diff --git a/examples/code/asm/386/asmwav.elf32.asm b/examples/code/asm/386/asmwav.elf32.asm
new file mode 100644
index 0000000..607d007
--- /dev/null
+++ b/examples/code/asm/386/asmwav.elf32.asm
@@ -0,0 +1,91 @@
+%include TRACK_INCLUDE
+
+%define WAVE_FORMAT_PCM 0x1
+%define WAVE_FORMAT_IEEE_FLOAT 0x3
+
+section .bss
+sound_buffer:
+	resb SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+
+file:
+	resd 1
+
+section .data
+; Change the filename over -DFILENAME="yourfilename.wav"
+filename:
+	db FILENAME, 0
+
+format:
+	db "wb", 0
+
+; This is the wave file header.
+wave_file:
+	db "RIFF"
+	dd wave_file_end + SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT - wave_file
+	db "WAVE"
+	db "fmt "
+wave_format_end:
+	dd wave_format_end - wave_file
+%ifdef SU_SAMPLE_FLOAT
+	dw WAVE_FORMAT_IEEE_FLOAT
+%else ; SU_SAMPLE_FLOAT
+	dw WAVE_FORMAT_PCM
+%endif ; SU_SAMPLE_FLOAT
+	dw SU_CHANNEL_COUNT
+	dd SU_SAMPLE_RATE
+	dd SU_SAMPLE_SIZE * SU_SAMPLE_RATE * SU_CHANNEL_COUNT
+	dw SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+	dw SU_SAMPLE_SIZE * 8
+wave_header_end:
+	db "data"
+	dd wave_file_end + SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT - wave_header_end
+wave_file_end:
+
+section .text
+symbols:
+	extern fopen
+	extern fwrite
+	extern fclose
+
+	global main
+main:
+	; elf32 uses the cdecl calling convention. This is more readable imo ;)
+
+	; Prologue
+	push	ebp
+	mov	 ebp, esp
+	sub	 esp, 0x10
+
+	; Unix does not have gm.dls, no need to ifdef and setup here.
+
+	; We render the complete track here.
+	push sound_buffer
+	call su_render_song
+
+	; Now we open the file and save the track.
+	push format
+	push filename
+	call fopen
+	mov dword [file], eax
+
+	; Write header
+	push dword [file]
+	push 0x1
+	push wave_file_end - wave_file
+	push wave_file
+	call fwrite
+
+	; write data
+	push dword [file]
+	push 0x1
+	push SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+	push sound_buffer
+	call fwrite
+
+	push dword [file]
+	call fclose
+
+exit:
+	; At least we can skip the epilogue :)
+	leave
+	ret
diff --git a/examples/code/asm/386/asmwav.win32.asm b/examples/code/asm/386/asmwav.win32.asm
new file mode 100644
index 0000000..dc91629
--- /dev/null
+++ b/examples/code/asm/386/asmwav.win32.asm
@@ -0,0 +1,102 @@
+%define MANGLED
+%include TRACK_INCLUDE
+
+%define WAVE_FORMAT_PCM 0x1
+%define WAVE_FORMAT_IEEE_FLOAT 0x3
+%define FILE_ATTRIBUTE_NORMAL 0x00000080
+%define CREATE_ALWAYS 2
+%define GENERIC_WRITE 0x40000000
+
+section .bss
+sound_buffer:
+	resb SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+
+file:
+	resd 1
+
+bytes_written:
+	resd 1
+
+section .data
+; Change the filename over -DFILENAME="yourfilename.wav"
+filename:
+	db FILENAME, 0
+
+; This is the wave file header.
+wave_file:
+	db "RIFF"
+	dd wave_file_end + SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT - wave_file
+	db "WAVE"
+	db "fmt "
+wave_format_end:
+	dd wave_format_end - wave_file
+%ifdef SU_SAMPLE_FLOAT
+	dw WAVE_FORMAT_IEEE_FLOAT
+%else ; SU_SAMPLE_FLOAT
+	dw WAVE_FORMAT_PCM
+%endif ; SU_SAMPLE_FLOAT
+	dw SU_CHANNEL_COUNT
+	dd SU_SAMPLE_RATE
+	dd SU_SAMPLE_SIZE * SU_SAMPLE_RATE * SU_CHANNEL_COUNT
+	dw SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+	dw SU_SAMPLE_SIZE * 8
+wave_header_end:
+	db "data"
+	dd wave_file_end + SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT - wave_header_end
+wave_file_end:
+
+section .text
+symbols:
+	extern _CreateFileA@28
+	extern _WriteFile@20
+	extern _CloseHandle@4
+
+	global _mainCRTStartup
+_mainCRTStartup:
+	; Prologue
+	push	ebp
+	mov	 ebp, esp
+	sub	 esp, 0x10
+
+%ifdef SU_LOAD_GMDLS
+	call _su_load_gmdls
+%endif ; SU_LOAD_GMDLS
+
+	; We render the complete track here.
+	push sound_buffer
+	call _su_render_song@4
+
+	; Now we open the file and save the track.
+	push 0x0
+	push FILE_ATTRIBUTE_NORMAL
+	push CREATE_ALWAYS
+	push 0x0
+	push 0x0
+	push GENERIC_WRITE
+	push filename
+	call _CreateFileA@28
+	mov dword [file], eax
+
+	; This is the WAV header
+	push 0x0
+	push bytes_written
+	push wave_file_end - wave_file
+	push wave_file
+	push dword [file]
+	call _WriteFile@20
+	
+	; There we write the actual samples
+	push 0x0
+	push bytes_written
+	push SU_LENGTH_IN_SAMPLES * SU_CHANNEL_COUNT * SU_SAMPLE_SIZE
+	push sound_buffer
+	push dword [file]
+	call _WriteFile@20
+	
+	push dword [file]
+	call _CloseHandle@4
+
+exit:
+	; At least we can skip the epilogue :)
+	leave
+	ret
diff --git a/examples/code/asm/CMakeLists.txt b/examples/code/asm/CMakeLists.txt
new file mode 100644
index 0000000..7dee6c9
--- /dev/null
+++ b/examples/code/asm/CMakeLists.txt
@@ -0,0 +1,58 @@
+# identifier: Name of the example
+# songfile: File path of the song YAML file.
+# architecture: 386 or amd64
+# abi: 32 or 64
+# windows_libraries: All libraries that you need to link on Windows
+# unix_libraries: All libraries that you need to link on unix
+function(add_asm_example identifier songfile architecture sizeof_void_ptr windows_libraries unix_libraries)
+	get_filename_component(songprefix ${songfile} NAME_WE)
+
+	# Generate the song assembly file
+	add_custom_command(
+		COMMAND
+			${compilecmd} -arch=${architecture} -o ${songprefix}_${architecture}.asm ${songfile}
+		WORKING_DIRECTORY
+			${CMAKE_CURRENT_BINARY_DIR}
+		DEPENDS
+			${songfile}
+		OUTPUT
+			${songprefix}_${architecture}.asm
+			${songprefix}_${architecture}.h
+			${songprefix}_${architecture}.inc
+		COMMENT
+			"Compiling ${PROJECT_SOURCE_DIR}/examples/patches/physics-girl-st.yml..."
+	)
+
+	# Platform dependent options
+	if(WIN32)
+		set(abi win)
+		set(libraries ${windows_libraries})
+		if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+			set(link_options -nostartfiles)
+		endif()
+	elseif(UNIX)
+		set(abi elf)
+		set(link_options -z noexecstack -no-pie)
+		set(libraries ${unix_libraries})
+	endif()
+
+	# Add target
+	add_executable(${identifier}-${architecture}
+		${identifier}.${abi}${sizeof_void_ptr}.asm
+		${songprefix}_${architecture}.asm
+		${songprefix}_${architecture}.inc
+	)
+	set_target_properties(${identifier}-${architecture} PROPERTIES ASM_NASM_COMPILE_OPTIONS -f${abi}${sizeof_void_ptr})
+	target_include_directories(${identifier}-${architecture} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+	set_target_properties(${identifier}-${architecture} PROPERTIES LINKER_LANGUAGE C)
+	target_link_options(${identifier}-${architecture} PRIVATE -m${sizeof_void_ptr} ${link_options})
+	target_link_libraries(${identifier}-${architecture} PRIVATE ${libraries})
+	target_compile_definitions(${identifier}-${architecture} PRIVATE TRACK_INCLUDE="${songprefix}_${architecture}.inc")
+
+	# Set up dependencies
+	add_dependencies(${identifier}-${architecture} sointu-compiler)
+	add_dependencies(examples ${identifier}-${architecture})
+endfunction()
+
+add_subdirectory(386)
+add_subdirectory(amd64)
diff --git a/examples/code/asm/amd64/CMakeLists.txt b/examples/code/asm/amd64/CMakeLists.txt
new file mode 100644
index 0000000..9eca050
--- /dev/null
+++ b/examples/code/asm/amd64/CMakeLists.txt
@@ -0,0 +1,12 @@
+if(WIN32)
+	set(CMAKE_ASM_NASM_OBJECT_FORMAT win64)
+elseif(UNIX)
+	set(CMAKE_ASM_NASM_OBJECT_FORMAT elf64)
+endif()
+set(CMAKE_ASM_NASM_COMPILE_OBJECT "<CMAKE_ASM_NASM_COMPILER> <INCLUDES> <DEFINES> <FLAGS> -f ${CMAKE_ASM_NASM_OBJECT_FORMAT} -o <OBJECT> <SOURCE>")
+
+if(UNIX)
+	add_asm_example(asmplay "${PROJECT_SOURCE_DIR}/examples/patches/physics_girl_st.yml" amd64 64 "winmm" "asound;pthread")
+	add_asm_example(asmwav "${PROJECT_SOURCE_DIR}/examples/patches/physics_girl_st.yml" amd64 64 "" "")
+	target_compile_definitions(asmwav-amd64 PRIVATE FILENAME="physics_girl_st.wav")
+endif()
diff --git a/examples/code/asm/amd64/asmplay.elf64.asm b/examples/code/asm/amd64/asmplay.elf64.asm
new file mode 100644
index 0000000..120f262
--- /dev/null
+++ b/examples/code/asm/amd64/asmplay.elf64.asm
@@ -0,0 +1,81 @@
+%include TRACK_INCLUDE
+
+%define SND_PCM_FORMAT_S16_LE 0x2
+%define SND_PCM_FORMAT_FLOAT 0xE
+%define SND_PCM_ACCESS_RW_INTERLEAVED 0x3
+%define SND_PCM_STREAM_PLAYBACK 0x0
+
+section .bss
+sound_buffer:
+	resb SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+
+render_thread:
+	resq 1
+
+pcm_handle:
+	resq 1
+
+section .data
+default_device:
+	db "default", 0
+
+section .text
+symbols:
+	extern pthread_create
+	extern sleep
+	extern snd_pcm_open
+	extern snd_pcm_set_params
+	extern snd_pcm_writei
+
+	global main
+main:
+	; Prologue
+	push	rbp
+	mov	 rbp, rsp
+	sub	 rsp, 0x10
+
+	; Unix does not have gm.dls, no need to ifdef and setup here.
+
+	; We render in the background while playing already.
+	mov rcx, sound_buffer
+	lea rdx, su_render_song
+	mov rsi, 0x0
+	mov rdi, render_thread
+	call pthread_create
+
+	; We can't start playing too early or the missing samples will be audible.
+	mov edi, 0x2
+	call sleep
+
+	; Play the track.
+	mov rdi, pcm_handle
+	mov rsi, default_device
+	mov rdx, SND_PCM_STREAM_PLAYBACK
+	mov rcx, 0x0
+	call snd_pcm_open
+
+	; This is unfortunate. amd64 ABI calling convention kicks in.
+	; now we have to maintain the stack pointer :/
+	mov rdi, qword [pcm_handle]
+	sub rsp, 0x8
+	push SU_LENGTH_IN_SAMPLES
+%ifdef SU_SAMPLE_FLOAT
+	mov rsi, SND_PCM_FORMAT_FLOAT
+%else ; SU_SAMPLE_FLOAT
+	mov rsi, SND_PCM_FORMAT_S16_LE
+%endif ; SU_SAMPLE_FLOAT
+	mov rdx, SND_PCM_ACCESS_RW_INTERLEAVED
+	mov rcx, SU_CHANNEL_COUNT
+	mov r8d, SU_SAMPLE_RATE
+	mov r9d, 0x0
+	call snd_pcm_set_params
+
+	mov rdi, qword [pcm_handle]
+	mov rsi, sound_buffer
+	mov rdx, SU_LENGTH_IN_SAMPLES
+	call snd_pcm_writei
+
+exit:
+	; At least we can skip the epilogue :)
+	leave
+	ret
diff --git a/examples/code/asm/amd64/asmwav.elf64.asm b/examples/code/asm/amd64/asmwav.elf64.asm
new file mode 100644
index 0000000..905b2b3
--- /dev/null
+++ b/examples/code/asm/amd64/asmwav.elf64.asm
@@ -0,0 +1,91 @@
+%include TRACK_INCLUDE
+
+%define WAVE_FORMAT_PCM 0x1
+%define WAVE_FORMAT_IEEE_FLOAT 0x3
+
+section .bss
+sound_buffer:
+	resb SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+
+file:
+	resq 1
+
+section .data
+; Change the filename over -DFILENAME="yourfilename.wav"
+filename:
+	db FILENAME, 0
+
+format:
+	db "wb", 0
+
+; This is the wave file header.
+wave_file:
+	db "RIFF"
+	dd wave_file_end + SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT - wave_file
+	db "WAVE"
+	db "fmt "
+wave_format_end:
+	dd wave_format_end - wave_file
+%ifdef SU_SAMPLE_FLOAT
+	dw WAVE_FORMAT_IEEE_FLOAT
+%else ; SU_SAMPLE_FLOAT
+	dw WAVE_FORMAT_PCM
+%endif ; SU_SAMPLE_FLOAT
+	dw SU_CHANNEL_COUNT
+	dd SU_SAMPLE_RATE
+	dd SU_SAMPLE_SIZE * SU_SAMPLE_RATE * SU_CHANNEL_COUNT
+	dw SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+	dw SU_SAMPLE_SIZE * 8
+wave_header_end:
+	db "data"
+	dd wave_file_end + SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT - wave_header_end
+wave_file_end:
+
+section .text
+symbols:
+	extern fopen
+	extern fwrite
+	extern fclose
+
+	global main
+main:
+	; elf32 uses the cdecl calling convention. This is more readable imo ;)
+
+	; Prologue
+	push	rbp
+	mov	 rbp, rsp
+	sub	 rsp, 0x10
+
+	; Unix does not have gm.dls, no need to ifdef and setup here.
+
+	; We render the complete track here.
+	mov rdi, sound_buffer
+	call su_render_song
+
+	; Now we open the file and save the track.
+	mov rsi, format
+	mov rdi, filename
+	call fopen
+	mov qword [file], rax
+
+	; Write header
+	mov rcx, qword [file]
+	mov rdx, 0x1
+	mov rsi, wave_file_end - wave_file
+	mov rdi, wave_file
+	call fwrite
+
+	; write data
+	mov rcx, qword [file]
+	mov rdx, 0x1
+	mov rsi, SU_LENGTH_IN_SAMPLES * SU_SAMPLE_SIZE * SU_CHANNEL_COUNT
+	mov rdi, sound_buffer
+	call fwrite
+
+	mov rdi, qword [file]
+	call fclose
+
+exit:
+	; At least we can skip the epilogue :)
+	leave
+	ret
diff --git a/vm/compiler/templates/amd64-386/player.inc b/vm/compiler/templates/amd64-386/player.inc
index 7d52bdd..d2e3f99 100644
--- a/vm/compiler/templates/amd64-386/player.inc
+++ b/vm/compiler/templates/amd64-386/player.inc
@@ -37,9 +37,12 @@
 %define SU_SYNC
 {{- end}}
 
-section _su_symbols text
 _su_symbols:
+%ifdef MANGLED
 	extern _su_render_song@4
+%else ; MANGLED
+    extern su_render_song
+%endif ; MANGLED
 
 {{- if gt (.SampleOffsets | len) 0}}
 	extern _su_load_gmdls