diff --git a/docs/creating-agents.md b/docs/creating-agents.md
index 2feed56..207a491 100644
--- a/docs/creating-agents.md
+++ b/docs/creating-agents.md
@@ -143,18 +143,18 @@ needs:
 |---|---|
 | `Claude Base Chat` | Claude, Anthropic Messages (`/v1/messages`) |
 | `OpenAI Base Chat` | OpenAI, Chat Completions (`/chat/completions`) |
-| `OpenAI Responses Base` | OpenAI, Responses API (`/responses`) |
+| `OpenAI Base Responses` | OpenAI, Responses API (`/responses`) |
 | `Google Base Chat` | Google AI, Gemini `generateContent` |
 | `Ollama Base Chat` | Ollama, native `/api/chat` |
-| `Ollama FIM Base` | Ollama, native `/api/generate` fill-in-the-middle |
+| `Ollama Base FIM` | Ollama, native `/api/generate` fill-in-the-middle |
 
 For any OpenAI-compatible provider (Mistral, OpenRouter, LM Studio, llama.cpp,
 DeepSeek, …) extend `OpenAI Base Chat` and override `provider_instance`.
 
-Each bundled concrete agent (`Claude Sonnet Chat`, `Claude Code Completion`,
-`OpenAI Chat Completions`, `OpenAI Responses Chat`, `Google Chat`,
-`Ollama Chat`, `Ollama FIM`) is itself a thin delta over one of these bases and
-works as a parent too — `extends = "Claude Sonnet Chat"` inherits everything including
+Each bundled concrete agent (`Claude Chat — Sonnet`, `Claude Completion`,
+`OpenAI Chat`, `OpenAI Chat — Responses`, `Google Chat`,
+`Ollama Chat — Simple`, `Ollama Completion — FIM`) is itself a thin delta over one of these bases and
+works as a parent too — `extends = "Claude Chat — Sonnet"` inherits everything including
 the model.
 
 ## `[body]` — the request, literally
diff --git a/docs/ollama-configuration.md b/docs/ollama-configuration.md
index ad8a82b..0ab9b51 100644
--- a/docs/ollama-configuration.md
+++ b/docs/ollama-configuration.md
@@ -28,6 +28,31 @@ ollama run qwen2.5-coder:32b
 
 You're all set! QodeAssist is now ready to use in Qt Creator.
 
+## Which models do I actually need?
+
+You do **not** need a separate model for every agent. Each bundled Ollama agent
+names a *default* model only as an example — you can point any agent at a model
+you already have via its settings → **Change…** (a per-agent override; it does not
+edit the bundled agent). **Seeing a model name on an agent is not a reason to
+download it.**
+
+The defaults cluster into a tiny set, so one or two pulls cover everyday use:
+
+| Pull this | Unlocks |
+|---|---|
+| `qwen2.5-coder:7b` | Ollama Chat — Simple · Ollama Completion — FIM · Ollama Completion — Chat-style · Ollama Quick Refactor |
+| `qwen3.5:9b` (or `:4b` on ~8 GB) | Ollama Chat — Thinking · Ollama Compression — 16/32 GB (`:4b` → Compression — 8 GB) |
+
+Optional specialists — pull only if you want that capability:
+
+| Pull this | For |
+|---|---|
+| `gemma4:12b` | Ollama Chat — Gemma 4 — agentic chat with vision + native reasoning |
+| `theqtcompany/codellama-7b-qml` | Ollama Completion — QML (Qt) — Qt's QML-specific completion model |
+
+Rule of thumb: pick the agent for the job, then either pull its named model **or**
+swap it (Change…) for one you already have.
+
 ## Extended Thinking Mode
 
 Ollama supports extended thinking mode for models that are capable of deep reasoning (such as DeepSeek-R1, QwQ, and similar reasoning models). This mode allows the model to show its step-by-step reasoning process before providing the final answer.
diff --git a/sources/agents/agents.qrc b/sources/agents/agents.qrc
index aae36ad..7c6ab2f 100644
--- a/sources/agents/agents.qrc
+++ b/sources/agents/agents.qrc
@@ -1,30 +1,37 @@
 <RCC>
     <qresource prefix="/agents">
         <file>claude_base_chat.toml</file>
-        <file>claude_chat.toml</file>
-        <file>claude_opus_chat.toml</file>
-        <file>claude_opus_max.toml</file>
+        <file>claude_chat_sonnet.toml</file>
+        <file>claude_chat_opus_xhigh.toml</file>
+        <file>claude_chat_opus_max.toml</file>
         <file>claude_completion.toml</file>
         <file>claude_compression.toml</file>
         <file>claude_quick_refactor.toml</file>
         <file>claude_quick_refactor_fast.toml</file>
         <file>openai_base_chat.toml</file>
-        <file>openai_chat_completions.toml</file>
-        <file>openai_responses_base.toml</file>
-        <file>openai_responses_chat.toml</file>
+        <file>openai_chat.toml</file>
+        <file>openai_base_responses.toml</file>
+        <file>openai_chat_responses.toml</file>
         <file>google_base_chat.toml</file>
         <file>google_chat.toml</file>
         <file>ollama_base_chat.toml</file>
-        <file>ollama_chat.toml</file>
-        <file>ollama_chat_completion.toml</file>
+        <file>ollama_chat_simple.toml</file>
+        <file>ollama_chat_thinking.toml</file>
+        <file>ollama_completion_chat.toml</file>
         <file>ollama_base_fim.toml</file>
-        <file>ollama_fim.toml</file>
-        <file>ollama_codellama_qml_fim.toml</file>
-        <file>ollama_quick_refactor.toml</file>
-        <file>ollama_compression.toml</file>
+        <file>ollama_completion_fim.toml</file>
+        <file>ollama_completion_qml.toml</file>
+        <file>ollama_quick_refactor_simple.toml</file>
+        <file>ollama_quick_refactor_qwen35.toml</file>
+        <file>ollama_quick_refactor_gemma4.toml</file>
+        <file>ollama_compression_8gb.toml</file>
+        <file>ollama_compression_16gb.toml</file>
+        <file>ollama_compression_32gb.toml</file>
+        <file>ollama_chat_gemma4.toml</file>
     </qresource>
     <qresource prefix="/roles">
         <file alias="qt-cpp-developer.md">roles/qt-cpp-developer.md</file>
+        <file alias="agentic-coder.md">roles/agentic-coder.md</file>
         <file alias="code-completion.md">roles/code-completion.md</file>
         <file alias="code-completion-c-like.md">roles/code-completion-c-like.md</file>
         <file alias="code-completion-qml.md">roles/code-completion-qml.md</file>
diff --git a/sources/agents/claude_opus_max.toml b/sources/agents/claude_chat_opus_max.toml
similarity index 94%
rename from sources/agents/claude_opus_max.toml
rename to sources/agents/claude_chat_opus_max.toml
index a9c711c..3eb988d 100644
--- a/sources/agents/claude_opus_max.toml
+++ b/sources/agents/claude_chat_opus_max.toml
@@ -1,7 +1,7 @@
 schema_version = 1
 
 extends     = "Claude Base Chat"
-name        = "Claude Opus Max Chat"
+name        = "Claude Chat — Opus Max"
 description = "Anthropic Claude Opus 4.8 — maximum-capability coding chat; adaptive thinking at max effort with 128k output. For frontier problems; higher cost and latency."
 
 model             = "claude-opus-4-8"
diff --git a/sources/agents/claude_opus_chat.toml b/sources/agents/claude_chat_opus_xhigh.toml
similarity index 93%
rename from sources/agents/claude_opus_chat.toml
rename to sources/agents/claude_chat_opus_xhigh.toml
index 655708c..8712f2b 100644
--- a/sources/agents/claude_opus_chat.toml
+++ b/sources/agents/claude_chat_opus_xhigh.toml
@@ -1,7 +1,7 @@
 schema_version = 1
 
 extends     = "Claude Base Chat"
-name        = "Claude Opus xHigh Chat"
+name        = "Claude Chat — Opus xHigh"
 description = "Anthropic Claude Opus 4.8 — coding chat with adaptive thinking at xhigh effort, tuned for agentic and coding work."
 
 model             = "claude-opus-4-8"
diff --git a/sources/agents/claude_chat.toml b/sources/agents/claude_chat_sonnet.toml
similarity index 93%
rename from sources/agents/claude_chat.toml
rename to sources/agents/claude_chat_sonnet.toml
index cbb2c68..1f882ae 100644
--- a/sources/agents/claude_chat.toml
+++ b/sources/agents/claude_chat_sonnet.toml
@@ -1,7 +1,7 @@
 schema_version = 1
 
 extends     = "Claude Base Chat"
-name        = "Claude Sonnet Chat"
+name        = "Claude Chat — Sonnet"
 description = "Anthropic Claude — coding chat with adaptive thinking."
 
 model             = "claude-sonnet-4-6"
diff --git a/sources/agents/claude_completion.toml b/sources/agents/claude_completion.toml
index 761e142..5bc219b 100644
--- a/sources/agents/claude_completion.toml
+++ b/sources/agents/claude_completion.toml
@@ -1,7 +1,7 @@
 schema_version = 1
 
 extends     = "Claude Base Chat"
-name        = "Claude Code Completion"
+name        = "Claude Completion"
 description = "Anthropic Claude — code completion using the master <code_context> message format over the Messages API."
 
 model = "claude-haiku-4-5"
diff --git a/sources/agents/claude_compression.toml b/sources/agents/claude_compression.toml
index 764806e..42f9991 100644
--- a/sources/agents/claude_compression.toml
+++ b/sources/agents/claude_compression.toml
@@ -1,7 +1,7 @@
 schema_version = 1
 
 extends     = "Claude Base Chat"
-name        = "Claude Chat Compression"
+name        = "Claude Compression"
 description = "Anthropic Claude Haiku — fast, low-cost conversation summarization for shorter chats. Carries the summary system prompt; no tools, no thinking."
 
 model        = "claude-haiku-4-5"
diff --git a/sources/agents/claude_quick_refactor_fast.toml b/sources/agents/claude_quick_refactor_fast.toml
index 5424b6f..e02dfa0 100644
--- a/sources/agents/claude_quick_refactor_fast.toml
+++ b/sources/agents/claude_quick_refactor_fast.toml
@@ -1,7 +1,7 @@
 schema_version = 1
 
 extends     = "Claude Base Chat"
-name        = "Claude Quick Refactor Fast"
+name        = "Claude Quick Refactor — Fast"
 description = "Anthropic Claude Haiku — fast single-shot inline refactor. Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context (file, code, cursor/selection). No thinking; tools off."
 
 model           = "claude-haiku-4-5"
diff --git a/sources/agents/ollama_base_fim.toml b/sources/agents/ollama_base_fim.toml
index 454ed44..75419fe 100644
--- a/sources/agents/ollama_base_fim.toml
+++ b/sources/agents/ollama_base_fim.toml
@@ -1,6 +1,6 @@
 schema_version = 1
 
-name        = "Ollama FIM Base"
+name        = "Ollama Base FIM"
 description = "Ollama native /api/generate FIM request body. Abstract — extend it and set model."
 abstract    = true
 
diff --git a/sources/agents/ollama_chat.toml b/sources/agents/ollama_chat.toml
deleted file mode 100644
index bd18886..0000000
--- a/sources/agents/ollama_chat.toml
+++ /dev/null
@@ -1,15 +0,0 @@
-schema_version = 1
-
-extends     = "Ollama Base Chat"
-name        = "Ollama Chat"
-description = "Local Ollama coding chat (qwen2.5-coder)."
-
-model = "qwen2.5-coder:7b"
-tags  = ["chat", "ollama", "local"]
-
-system_prompt = """{{ read_file(":/roles/qt-cpp-developer.md") }}"""
-
-[body.options]
-num_predict = 2048
-temperature = 0.7
-keep_alive  = "5m"
diff --git a/sources/agents/ollama_chat_gemma4.toml b/sources/agents/ollama_chat_gemma4.toml
new file mode 100644
index 0000000..9f038e8
--- /dev/null
+++ b/sources/agents/ollama_chat_gemma4.toml
@@ -0,0 +1,22 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Chat — Gemma 4"
+description = "Local Ollama agentic coding chat (Gemma 4 12B by default) — native reasoning plus IDE tool use (read/edit/build), driven by the act-first agentic-coder role. Vision-capable."
+
+model           = "gemma4:12b"
+enable_tools    = true
+enable_thinking = true
+tags            = ["chat", "ollama", "local", "16gb"]
+
+system_prompt = """{{ read_file(":/roles/agentic-coder.md") }}"""
+
+[body]
+think = true
+
+[body.options]
+temperature = 1.0
+top_p       = 0.95
+top_k       = 64
+num_predict = 8096
+keep_alive  = "5m"
diff --git a/sources/agents/ollama_chat_simple.toml b/sources/agents/ollama_chat_simple.toml
new file mode 100644
index 0000000..284ee6d
--- /dev/null
+++ b/sources/agents/ollama_chat_simple.toml
@@ -0,0 +1,15 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Chat — Simple"
+description = "Local Ollama coding chat for any model — plain conversational assistant, no tools, no thinking (Qwen2.5-Coder 7B by default)."
+
+model = "qwen3.5:4b"
+tags  = ["chat", "ollama", "local", "8gb"]
+
+system_prompt = """{{ read_file(":/roles/qt-cpp-developer.md") }}"""
+
+[body.options]
+num_predict = 2048
+temperature = 0.7
+keep_alive  = "5m"
diff --git a/sources/agents/ollama_chat_thinking.toml b/sources/agents/ollama_chat_thinking.toml
new file mode 100644
index 0000000..16f0d40
--- /dev/null
+++ b/sources/agents/ollama_chat_thinking.toml
@@ -0,0 +1,19 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Chat — Thinking"
+description = "Local Ollama agentic coding chat — tools + native reasoning, for any tools/thinking-capable model (Qwen3.5 9B by default). Acts through IDE tools (read/edit/build) via the act-first agentic-coder role."
+
+model           = "qwen3.5:9b"
+enable_tools    = true
+enable_thinking = true
+tags            = ["chat", "ollama", "local", "16gb"]
+
+system_prompt = """{{ read_file(":/roles/agentic-coder.md") }}"""
+
+[body]
+think = true
+
+[body.options]
+num_predict = 8192
+keep_alive  = "5m"
diff --git a/sources/agents/ollama_codellama_qml_fim.toml b/sources/agents/ollama_codellama_qml_fim.toml
deleted file mode 100644
index d389c0e..0000000
--- a/sources/agents/ollama_codellama_qml_fim.toml
+++ /dev/null
@@ -1,33 +0,0 @@
-schema_version = 1
-
-name        = "Ollama CodeLlama QML FIM"
-description = "Local Ollama FIM for the CodeLlama-13B QML fine-tune (raw passthrough Modelfile, suffix-first markers)."
-
-provider_instance = "Ollama (Native)"
-endpoint          = "/api/generate"
-
-model = "theqtcompany/codellama-7b-qml"
-tags  = ["completion", "ollama", "local", "fim", "qml"]
-
-[match]
-file_patterns = ["*.qml"]
-
-[body]
-prompt = """{{ tojson("<SUF>" + ctx.suffix + "<PRE>" + ctx.prefix + "<MID>") }}"""
-
-[body.options]
-temperature = 0
-top_p = 1
-repeat_penalty = 1.05
-num_predict = 500
-stop = [
-  "<SUF>",
-  "<PRE>",
-  "</PRE>",
-  "</SUF>",
-  "< EOT >",
-  "\\end",
-  "<MID>",
-  "</MID>",
-  "##",
-]
diff --git a/sources/agents/ollama_chat_completion.toml b/sources/agents/ollama_completion_chat.toml
similarity index 58%
rename from sources/agents/ollama_chat_completion.toml
rename to sources/agents/ollama_completion_chat.toml
index 43bceb1..e32f160 100644
--- a/sources/agents/ollama_chat_completion.toml
+++ b/sources/agents/ollama_completion_chat.toml
@@ -1,11 +1,11 @@
 schema_version = 1
 
 extends     = "Ollama Base Chat"
-name        = "Ollama FIM-on-chat"
-description = "Local Ollama code completion over /api/chat using the <code_context> message format — FIM-on-chat."
+name        = "Ollama Completion — Chat-style"
+description = "Code completion via a chat request — the cursor sits in a <code_context> message on /api/chat, so the model just continues the code. Works with ANY normal chat/instruct model (qwen2.5-coder, qwen3, llama3, gemma, mistral, codellama-instruct, …), so use this when your model is NOT a dedicated FIM model. Tradeoff vs native 'Ollama Completion — FIM': a bit slower and may leak code fences or explanations, so slightly less precise."
 
 model = "qwen2.5-coder:7b"
-tags  = ["completion", "ollama", "local", "fim"]
+tags  = ["completion", "ollama", "local", "fim", "8gb"]
 
 system_prompt = """
 {%- if language == "qml" %}{{ read_file(":/roles/code-completion-qml.md") }}
@@ -25,7 +25,7 @@ messages = """
 """
 
 [body.options]
-num_predict = 512
+num_predict = 256
 temperature = 0.2
 keep_alive  = "5m"
 stop        = ["</code_context>"]
diff --git a/sources/agents/ollama_completion_fim.toml b/sources/agents/ollama_completion_fim.toml
new file mode 100644
index 0000000..647548f
--- /dev/null
+++ b/sources/agents/ollama_completion_fim.toml
@@ -0,0 +1,13 @@
+schema_version = 1
+
+extends     = "Ollama Base FIM"
+name        = "Ollama Completion — FIM"
+description = "Native fill-in-the-middle completion — uses the model's OWN FIM template (prompt+suffix on /api/generate). Fast and clean (no markdown or prose), but works ONLY with models that ship a FIM template, and those are few: the base / '-code' variants, NOT instruct/chat models. Verified to work: qwen2.5-coder (incl. -base), codellama:7b-code, deepseek-coder-v2 lite-base. A plain chat model outputs garbage here — use 'Ollama Completion — Chat-style' instead. Check a model: `ollama show <model> --modelfile` must mention 'Suffix'."
+
+model = "qwen2.5-coder:7b"
+tags  = ["completion", "ollama", "local", "fim", "8gb"]
+
+[body.options]
+num_predict = 256
+temperature = 0.2
+keep_alive  = "5m"
diff --git a/sources/agents/ollama_completion_qml.toml b/sources/agents/ollama_completion_qml.toml
new file mode 100644
index 0000000..cb9c78d
--- /dev/null
+++ b/sources/agents/ollama_completion_qml.toml
@@ -0,0 +1,33 @@
+schema_version = 1
+
+name        = "Ollama Completion — QML (Qt)"
+description = "Specialized QML completion built for The Qt Company's CodeLlama-QML fine-tune (`theqtcompany/codellama-7b-qml` / `13b-qml`) — a model trained specifically for QML. Native fill-in-the-middle with the fine-tune's suffix-first markers; route `.qml` files here via [match]. Not a general agent — use only with a Qt QML model (theqtcompany/* or palm1r/codellama-13b-code-qml)."
+
+provider_instance = "Ollama (Native)"
+endpoint          = "/api/generate"
+
+model = "theqtcompany/codellama-7b-qml"
+tags  = ["completion", "ollama", "local", "fim", "qml", "8gb"]
+
+[match]
+file_patterns = ["*.qml"]
+
+[body]
+prompt = """{{ tojson("<SUF>" + ctx.suffix + "<PRE>" + ctx.prefix + "<MID>") }}"""
+
+[body.options]
+temperature = 0
+top_p = 1
+repeat_penalty = 1.05
+num_predict = 500
+stop = [
+  "<SUF>",
+  "<PRE>",
+  "</PRE>",
+  "</SUF>",
+  "< EOT >",
+  "\\end",
+  "<MID>",
+  "</MID>",
+  "##",
+]
diff --git a/sources/agents/ollama_compression.toml b/sources/agents/ollama_compression.toml
deleted file mode 100644
index be92648..0000000
--- a/sources/agents/ollama_compression.toml
+++ /dev/null
@@ -1,17 +0,0 @@
-schema_version = 1
-
-extends     = "Ollama Base Chat"
-name        = "Ollama Chat Compression"
-description = "Local Ollama conversation summarization (qwen2.5-coder). Carries the summary system prompt; no tools."
-
-model        = "qwen2.5-coder:7b"
-enable_tools = false
-tags         = ["compression", "ollama", "local"]
-
-system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
-
-[body.options]
-num_predict = 2048
-temperature = 0.3
-num_ctx     = 8192
-keep_alive  = "5m"
diff --git a/sources/agents/ollama_compression_16gb.toml b/sources/agents/ollama_compression_16gb.toml
new file mode 100644
index 0000000..b3e3cab
--- /dev/null
+++ b/sources/agents/ollama_compression_16gb.toml
@@ -0,0 +1,17 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Compression — 16 GB"
+description = "Conversation summarization tuned for ~16 GB RAM — qwen3.5:9b at num_ctx 8192 (~10 GB resident). General summarizer with moderate context. KV cache is reserved at load per num_ctx. No tools."
+
+model        = "qwen3.5:9b"
+enable_tools = false
+tags         = ["compression", "ollama", "local", "16gb"]
+
+system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
+
+[body.options]
+num_predict = 2048
+temperature = 0.3
+num_ctx     = 8192
+keep_alive  = "5m"
diff --git a/sources/agents/ollama_compression_32gb.toml b/sources/agents/ollama_compression_32gb.toml
new file mode 100644
index 0000000..1794a26
--- /dev/null
+++ b/sources/agents/ollama_compression_32gb.toml
@@ -0,0 +1,17 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Compression — 32 GB"
+description = "Conversation summarization tuned for ~32 GB RAM — qwen3.5:9b at num_ctx 24576 (~20 GB resident). Long context for big chats. KV cache is reserved at load per num_ctx. No tools."
+
+model        = "qwen3.5:9b"
+enable_tools = false
+tags         = ["compression", "ollama", "local", "32gb"]
+
+system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
+
+[body.options]
+num_predict = 2048
+temperature = 0.3
+num_ctx     = 24576
+keep_alive  = "5m"
diff --git a/sources/agents/ollama_compression_8gb.toml b/sources/agents/ollama_compression_8gb.toml
new file mode 100644
index 0000000..4bbb2d4
--- /dev/null
+++ b/sources/agents/ollama_compression_8gb.toml
@@ -0,0 +1,17 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Compression — 8 GB"
+description = "Conversation summarization tuned for ~8 GB RAM — qwen3.5:4b at num_ctx 8192 (~5.5 GB resident). Smallest qwen3.5 (same family as the bigger tiers), decent context for the footprint. KV cache is reserved at load per num_ctx. No tools."
+
+model        = "qwen3.5:4b"
+enable_tools = false
+tags         = ["compression", "ollama", "local", "8gb"]
+
+system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
+
+[body.options]
+num_predict = 2048
+temperature = 0.3
+num_ctx     = 8192
+keep_alive  = "5m"
diff --git a/sources/agents/ollama_fim.toml b/sources/agents/ollama_fim.toml
deleted file mode 100644
index 5d4c979..0000000
--- a/sources/agents/ollama_fim.toml
+++ /dev/null
@@ -1,13 +0,0 @@
-schema_version = 1
-
-extends     = "Ollama FIM Base"
-name        = "Ollama FIM"
-description = "Local Ollama FIM code completion (qwen2.5-coder)."
-
-model = "qwen2.5-coder:7b"
-tags  = ["completion", "ollama", "local", "fim"]
-
-[body.options]
-num_predict = 256
-temperature = 0.2
-keep_alive  = "5m"
diff --git a/sources/agents/ollama_quick_refactor.toml b/sources/agents/ollama_quick_refactor.toml
deleted file mode 100644
index dc03b5a..0000000
--- a/sources/agents/ollama_quick_refactor.toml
+++ /dev/null
@@ -1,16 +0,0 @@
-schema_version = 1
-
-extends     = "Ollama Base Chat"
-name        = "Ollama Quick Refactor"
-description = "Local Ollama deterministic inline refactor (qwen2.5-coder). Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context. No tools."
-
-model        = "qwen2.5-coder:7b"
-enable_tools = false
-tags         = ["refactor", "ollama", "local"]
-
-system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}"""
-
-[body.options]
-num_predict = 2048
-temperature = 0.2
-keep_alive  = "5m"
diff --git a/sources/agents/ollama_quick_refactor_gemma4.toml b/sources/agents/ollama_quick_refactor_gemma4.toml
new file mode 100644
index 0000000..571bdc1
--- /dev/null
+++ b/sources/agents/ollama_quick_refactor_gemma4.toml
@@ -0,0 +1,19 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Quick Refactor — Gemma 4"
+description = "Local Ollama smart inline refactor — gemma4:12b with native reasoning (thinks before editing). Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context. No tools."
+
+model           = "gemma4:12b"
+enable_tools    = false
+enable_thinking = true
+tags            = ["refactor", "ollama", "local", "16gb"]
+
+system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}"""
+
+[body]
+think = true
+
+[body.options]
+num_predict = 4096
+keep_alive  = "5m"
diff --git a/sources/agents/ollama_quick_refactor_qwen35.toml b/sources/agents/ollama_quick_refactor_qwen35.toml
new file mode 100644
index 0000000..6ec9613
--- /dev/null
+++ b/sources/agents/ollama_quick_refactor_qwen35.toml
@@ -0,0 +1,19 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Quick Refactor — Qwen3.5"
+description = "Local Ollama smart inline refactor — qwen3.5:9b with native reasoning (thinks before editing). Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context. No tools."
+
+model           = "qwen3.5:9b"
+enable_tools    = false
+enable_thinking = true
+tags            = ["refactor", "ollama", "local", "16gb"]
+
+system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}"""
+
+[body]
+think = true
+
+[body.options]
+num_predict = 4096
+keep_alive  = "5m"
diff --git a/sources/agents/ollama_quick_refactor_simple.toml b/sources/agents/ollama_quick_refactor_simple.toml
new file mode 100644
index 0000000..1fa14eb
--- /dev/null
+++ b/sources/agents/ollama_quick_refactor_simple.toml
@@ -0,0 +1,16 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Quick Refactor — Simple"
+description = "Local Ollama deterministic inline refactor — fast single-shot, any model (Qwen2.5-Coder 7B by default). Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context. No thinking, no tools."
+
+model        = "qwen2.5-coder:7b"
+enable_tools = false
+tags         = ["refactor", "ollama", "local", "8gb"]
+
+system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}"""
+
+[body.options]
+num_predict = 2048
+temperature = 0.2
+keep_alive  = "5m"
diff --git a/sources/agents/openai_responses_base.toml b/sources/agents/openai_base_responses.toml
similarity index 97%
rename from sources/agents/openai_responses_base.toml
rename to sources/agents/openai_base_responses.toml
index b7548f2..fedaf41 100644
--- a/sources/agents/openai_responses_base.toml
+++ b/sources/agents/openai_base_responses.toml
@@ -1,6 +1,6 @@
 schema_version = 1
 
-name        = "OpenAI Responses Base"
+name        = "OpenAI Base Responses"
 description = "OpenAI Responses API request body (/responses). Abstract — extend it and set model."
 abstract    = true
 
diff --git a/sources/agents/openai_chat_completions.toml b/sources/agents/openai_chat.toml
similarity index 89%
rename from sources/agents/openai_chat_completions.toml
rename to sources/agents/openai_chat.toml
index 14787cf..2b9b2e4 100644
--- a/sources/agents/openai_chat_completions.toml
+++ b/sources/agents/openai_chat.toml
@@ -1,7 +1,7 @@
 schema_version = 1
 
 extends     = "OpenAI Base Chat"
-name        = "OpenAI Chat Completions"
+name        = "OpenAI Chat"
 description = "OpenAI GPT-4o — coding chat via Chat Completions."
 
 model           = "gpt-4o"
diff --git a/sources/agents/openai_responses_chat.toml b/sources/agents/openai_chat_responses.toml
similarity index 83%
rename from sources/agents/openai_responses_chat.toml
rename to sources/agents/openai_chat_responses.toml
index 019a207..33c11b2 100644
--- a/sources/agents/openai_responses_chat.toml
+++ b/sources/agents/openai_chat_responses.toml
@@ -1,7 +1,7 @@
 schema_version = 1
 
-extends     = "OpenAI Responses Base"
-name        = "OpenAI Responses Chat"
+extends     = "OpenAI Base Responses"
+name        = "OpenAI Chat — Responses"
 description = "OpenAI o4-mini — reasoning coding chat via the Responses API."
 
 model           = "o4-mini"
diff --git a/sources/agents/roles/agentic-coder.md b/sources/agents/roles/agentic-coder.md
new file mode 100644
index 0000000..0d3cebb
--- /dev/null
+++ b/sources/agents/roles/agentic-coder.md
@@ -0,0 +1,15 @@
+You are an autonomous Qt/C++ coding agent working inside the Qt Creator IDE. You have tools to read, search, edit, and build the project. Use them to complete the user's request directly — act, do not wait for approval.
+
+## Workflow
+- **Do the task with tools.** When the request is clear, carry it out. Ask a question only when it is genuinely ambiguous or destructive — never to confirm an obvious change.
+- **Read before editing.** Call read_file for the exact current content before changing a file, so your edit matches.
+- **Edit through tools.** Apply changes with edit_file (create_new_file for new files). Do not paste whole files into chat when an edit will do.
+- **Stay in the project source root.** Never create or edit files in the build directory.
+- **Verify when it matters.** After non-trivial changes, build_project and check get_issues_list, then fix anything you broke.
+
+## Code
+- C++20, Qt6; match the surrounding style, naming, and patterns.
+- Write the minimum that solves the task: no over-engineering, no TODOs, no debug code, no unrelated refactoring. Make sure it compiles.
+
+## Reporting
+- Be brief: a one-line summary of what changed and which files. Let the diff speak. Call out only non-obvious consequences.