feat: Add more agents configs

2026-06-30 01:59:11 -04:00 · 2026-06-29 10:05:16 +02:00
parent 747dfb540e
commit 70c6d30a72
31 changed files with 287 additions and 127 deletions
--- a/sources/agents/agents.qrc
+++ b/sources/agents/agents.qrc
@@ -1,30 +1,37 @@
 <RCC>
    <qresource prefix="/agents">
        <file>claude_base_chat.toml</file>
-        <file>claude_chat.toml</file>
-        <file>claude_opus_chat.toml</file>
-        <file>claude_opus_max.toml</file>
+        <file>claude_chat_sonnet.toml</file>
+        <file>claude_chat_opus_xhigh.toml</file>
+        <file>claude_chat_opus_max.toml</file>
        <file>claude_completion.toml</file>
        <file>claude_compression.toml</file>
        <file>claude_quick_refactor.toml</file>
        <file>claude_quick_refactor_fast.toml</file>
        <file>openai_base_chat.toml</file>
-        <file>openai_chat_completions.toml</file>
-        <file>openai_responses_base.toml</file>
-        <file>openai_responses_chat.toml</file>
+        <file>openai_chat.toml</file>
+        <file>openai_base_responses.toml</file>
+        <file>openai_chat_responses.toml</file>
        <file>google_base_chat.toml</file>
        <file>google_chat.toml</file>
        <file>ollama_base_chat.toml</file>
-        <file>ollama_chat.toml</file>
-        <file>ollama_chat_completion.toml</file>
+        <file>ollama_chat_simple.toml</file>
+        <file>ollama_chat_thinking.toml</file>
+        <file>ollama_completion_chat.toml</file>
        <file>ollama_base_fim.toml</file>
-        <file>ollama_fim.toml</file>
-        <file>ollama_codellama_qml_fim.toml</file>
-        <file>ollama_quick_refactor.toml</file>
-        <file>ollama_compression.toml</file>
+        <file>ollama_completion_fim.toml</file>
+        <file>ollama_completion_qml.toml</file>
+        <file>ollama_quick_refactor_simple.toml</file>
+        <file>ollama_quick_refactor_qwen35.toml</file>
+        <file>ollama_quick_refactor_gemma4.toml</file>
+        <file>ollama_compression_8gb.toml</file>
+        <file>ollama_compression_16gb.toml</file>
+        <file>ollama_compression_32gb.toml</file>
+        <file>ollama_chat_gemma4.toml</file>
    </qresource>
    <qresource prefix="/roles">
        <file alias="qt-cpp-developer.md">roles/qt-cpp-developer.md</file>
+        <file alias="agentic-coder.md">roles/agentic-coder.md</file>
        <file alias="code-completion.md">roles/code-completion.md</file>
        <file alias="code-completion-c-like.md">roles/code-completion-c-like.md</file>
        <file alias="code-completion-qml.md">roles/code-completion-qml.md</file>
--- a/sources/agents/claude_chat_opus_max.toml
+++ b/sources/agents/claude_chat_opus_max.toml
@@ -1,7 +1,7 @@
 schema_version = 1

 extends     = "Claude Base Chat"
-name        = "Claude Opus Max Chat"
+name        = "Claude Chat — Opus Max"
 description = "Anthropic Claude Opus 4.8 — maximum-capability coding chat; adaptive thinking at max effort with 128k output. For frontier problems; higher cost and latency."

 model             = "claude-opus-4-8"
--- a/sources/agents/claude_chat_opus_xhigh.toml
+++ b/sources/agents/claude_chat_opus_xhigh.toml
@@ -1,7 +1,7 @@
 schema_version = 1

 extends     = "Claude Base Chat"
-name        = "Claude Opus xHigh Chat"
+name        = "Claude Chat — Opus xHigh"
 description = "Anthropic Claude Opus 4.8 — coding chat with adaptive thinking at xhigh effort, tuned for agentic and coding work."

 model             = "claude-opus-4-8"
--- a/sources/agents/claude_chat_sonnet.toml
+++ b/sources/agents/claude_chat_sonnet.toml
@@ -1,7 +1,7 @@
 schema_version = 1

 extends     = "Claude Base Chat"
-name        = "Claude Sonnet Chat"
+name        = "Claude Chat — Sonnet"
 description = "Anthropic Claude — coding chat with adaptive thinking."

 model             = "claude-sonnet-4-6"
--- a/sources/agents/claude_completion.toml
+++ b/sources/agents/claude_completion.toml
@@ -1,7 +1,7 @@
 schema_version = 1

 extends     = "Claude Base Chat"
-name        = "Claude Code Completion"
+name        = "Claude Completion"
 description = "Anthropic Claude — code completion using the master <code_context> message format over the Messages API."

 model = "claude-haiku-4-5"
--- a/sources/agents/claude_compression.toml
+++ b/sources/agents/claude_compression.toml
@@ -1,7 +1,7 @@
 schema_version = 1

 extends     = "Claude Base Chat"
-name        = "Claude Chat Compression"
+name        = "Claude Compression"
 description = "Anthropic Claude Haiku — fast, low-cost conversation summarization for shorter chats. Carries the summary system prompt; no tools, no thinking."

 model        = "claude-haiku-4-5"
--- a/sources/agents/claude_quick_refactor_fast.toml
+++ b/sources/agents/claude_quick_refactor_fast.toml
@@ -1,7 +1,7 @@
 schema_version = 1

 extends     = "Claude Base Chat"
-name        = "Claude Quick Refactor Fast"
+name        = "Claude Quick Refactor — Fast"
 description = "Anthropic Claude Haiku — fast single-shot inline refactor. Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context (file, code, cursor/selection). No thinking; tools off."

 model           = "claude-haiku-4-5"
--- a/sources/agents/ollama_base_fim.toml
+++ b/sources/agents/ollama_base_fim.toml
@@ -1,6 +1,6 @@
 schema_version = 1

-name        = "Ollama FIM Base"
+name        = "Ollama Base FIM"
 description = "Ollama native /api/generate FIM request body. Abstract — extend it and set model."
 abstract    = true

--- a/sources/agents/ollama_chat.toml
+++ b/sources/agents/ollama_chat.toml
@@ -1,15 +0,0 @@
-schema_version = 1
-
-extends     = "Ollama Base Chat"
-name        = "Ollama Chat"
-description = "Local Ollama coding chat (qwen2.5-coder)."
-
-model = "qwen2.5-coder:7b"
-tags  = ["chat", "ollama", "local"]
-
-system_prompt = """{{ read_file(":/roles/qt-cpp-developer.md") }}"""
-
-[body.options]
-num_predict = 2048
-temperature = 0.7
-keep_alive  = "5m"
--- a/sources/agents/ollama_chat_gemma4.toml
+++ b/sources/agents/ollama_chat_gemma4.toml
@@ -0,0 +1,22 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Chat — Gemma 4"
+description = "Local Ollama agentic coding chat (Gemma 4 12B by default) — native reasoning plus IDE tool use (read/edit/build), driven by the act-first agentic-coder role. Vision-capable."
+
+model           = "gemma4:12b"
+enable_tools    = true
+enable_thinking = true
+tags            = ["chat", "ollama", "local", "16gb"]
+
+system_prompt = """{{ read_file(":/roles/agentic-coder.md") }}"""
+
+[body]
+think = true
+
+[body.options]
+temperature = 1.0
+top_p       = 0.95
+top_k       = 64
+num_predict = 8096
+keep_alive  = "5m"
--- a/sources/agents/ollama_chat_simple.toml
+++ b/sources/agents/ollama_chat_simple.toml
@@ -0,0 +1,15 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Chat — Simple"
+description = "Local Ollama coding chat for any model — plain conversational assistant, no tools, no thinking (Qwen2.5-Coder 7B by default)."
+
+model = "qwen3.5:4b"
+tags  = ["chat", "ollama", "local", "8gb"]
+
+system_prompt = """{{ read_file(":/roles/qt-cpp-developer.md") }}"""
+
+[body.options]
+num_predict = 2048
+temperature = 0.7
+keep_alive  = "5m"
--- a/sources/agents/ollama_chat_thinking.toml
+++ b/sources/agents/ollama_chat_thinking.toml
@@ -0,0 +1,19 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Chat — Thinking"
+description = "Local Ollama agentic coding chat — tools + native reasoning, for any tools/thinking-capable model (Qwen3.5 9B by default). Acts through IDE tools (read/edit/build) via the act-first agentic-coder role."
+
+model           = "qwen3.5:9b"
+enable_tools    = true
+enable_thinking = true
+tags            = ["chat", "ollama", "local", "16gb"]
+
+system_prompt = """{{ read_file(":/roles/agentic-coder.md") }}"""
+
+[body]
+think = true
+
+[body.options]
+num_predict = 8192
+keep_alive  = "5m"
--- a/sources/agents/ollama_codellama_qml_fim.toml
+++ b/sources/agents/ollama_codellama_qml_fim.toml
@@ -1,33 +0,0 @@
-schema_version = 1
-
-name        = "Ollama CodeLlama QML FIM"
-description = "Local Ollama FIM for the CodeLlama-13B QML fine-tune (raw passthrough Modelfile, suffix-first markers)."
-
-provider_instance = "Ollama (Native)"
-endpoint          = "/api/generate"
-
-model = "theqtcompany/codellama-7b-qml"
-tags  = ["completion", "ollama", "local", "fim", "qml"]
-
-[match]
-file_patterns = ["*.qml"]
-
-[body]
-prompt = """{{ tojson("<SUF>" + ctx.suffix + "<PRE>" + ctx.prefix + "<MID>") }}"""
-
-[body.options]
-temperature = 0
-top_p = 1
-repeat_penalty = 1.05
-num_predict = 500
-stop = [
-  "<SUF>",
-  "<PRE>",
-  "</PRE>",
-  "</SUF>",
-  "< EOT >",
-  "\\end",
-  "<MID>",
-  "</MID>",
-  "##",
-]
--- a/sources/agents/ollama_completion_chat.toml
+++ b/sources/agents/ollama_completion_chat.toml
@@ -1,11 +1,11 @@
 schema_version = 1

 extends     = "Ollama Base Chat"
-name        = "Ollama FIM-on-chat"
-description = "Local Ollama code completion over /api/chat using the <code_context> message format — FIM-on-chat."
+name        = "Ollama Completion — Chat-style"
+description = "Code completion via a chat request — the cursor sits in a <code_context> message on /api/chat, so the model just continues the code. Works with ANY normal chat/instruct model (qwen2.5-coder, qwen3, llama3, gemma, mistral, codellama-instruct, …), so use this when your model is NOT a dedicated FIM model. Tradeoff vs native 'Ollama Completion — FIM': a bit slower and may leak code fences or explanations, so slightly less precise."

 model = "qwen2.5-coder:7b"
-tags  = ["completion", "ollama", "local", "fim"]
+tags  = ["completion", "ollama", "local", "fim", "8gb"]

 system_prompt = """
 {%- if language == "qml" %}{{ read_file(":/roles/code-completion-qml.md") }}
@@ -25,7 +25,7 @@ messages = """
 """

 [body.options]
-num_predict = 512
+num_predict = 256
 temperature = 0.2
 keep_alive  = "5m"
 stop        = ["</code_context>"]
--- a/sources/agents/ollama_completion_fim.toml
+++ b/sources/agents/ollama_completion_fim.toml
@@ -0,0 +1,13 @@
+schema_version = 1
+
+extends     = "Ollama Base FIM"
+name        = "Ollama Completion — FIM"
+description = "Native fill-in-the-middle completion — uses the model's OWN FIM template (prompt+suffix on /api/generate). Fast and clean (no markdown or prose), but works ONLY with models that ship a FIM template, and those are few: the base / '-code' variants, NOT instruct/chat models. Verified to work: qwen2.5-coder (incl. -base), codellama:7b-code, deepseek-coder-v2 lite-base. A plain chat model outputs garbage here — use 'Ollama Completion — Chat-style' instead. Check a model: `ollama show <model> --modelfile` must mention 'Suffix'."
+
+model = "qwen2.5-coder:7b"
+tags  = ["completion", "ollama", "local", "fim", "8gb"]
+
+[body.options]
+num_predict = 256
+temperature = 0.2
+keep_alive  = "5m"
--- a/sources/agents/ollama_completion_qml.toml
+++ b/sources/agents/ollama_completion_qml.toml
@@ -0,0 +1,33 @@
+schema_version = 1
+
+name        = "Ollama Completion — QML (Qt)"
+description = "Specialized QML completion built for The Qt Company's CodeLlama-QML fine-tune (`theqtcompany/codellama-7b-qml` / `13b-qml`) — a model trained specifically for QML. Native fill-in-the-middle with the fine-tune's suffix-first markers; route `.qml` files here via [match]. Not a general agent — use only with a Qt QML model (theqtcompany/* or palm1r/codellama-13b-code-qml)."
+
+provider_instance = "Ollama (Native)"
+endpoint          = "/api/generate"
+
+model = "theqtcompany/codellama-7b-qml"
+tags  = ["completion", "ollama", "local", "fim", "qml", "8gb"]
+
+[match]
+file_patterns = ["*.qml"]
+
+[body]
+prompt = """{{ tojson("<SUF>" + ctx.suffix + "<PRE>" + ctx.prefix + "<MID>") }}"""
+
+[body.options]
+temperature = 0
+top_p = 1
+repeat_penalty = 1.05
+num_predict = 500
+stop = [
+  "<SUF>",
+  "<PRE>",
+  "</PRE>",
+  "</SUF>",
+  "< EOT >",
+  "\\end",
+  "<MID>",
+  "</MID>",
+  "##",
+]
--- a/sources/agents/ollama_compression.toml
+++ b/sources/agents/ollama_compression.toml
@@ -1,17 +0,0 @@
-schema_version = 1
-
-extends     = "Ollama Base Chat"
-name        = "Ollama Chat Compression"
-description = "Local Ollama conversation summarization (qwen2.5-coder). Carries the summary system prompt; no tools."
-
-model        = "qwen2.5-coder:7b"
-enable_tools = false
-tags         = ["compression", "ollama", "local"]
-
-system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
-
-[body.options]
-num_predict = 2048
-temperature = 0.3
-num_ctx     = 8192
-keep_alive  = "5m"
--- a/sources/agents/ollama_compression_16gb.toml
+++ b/sources/agents/ollama_compression_16gb.toml
@@ -0,0 +1,17 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Compression — 16 GB"
+description = "Conversation summarization tuned for ~16 GB RAM — qwen3.5:9b at num_ctx 8192 (~10 GB resident). General summarizer with moderate context. KV cache is reserved at load per num_ctx. No tools."
+
+model        = "qwen3.5:9b"
+enable_tools = false
+tags         = ["compression", "ollama", "local", "16gb"]
+
+system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
+
+[body.options]
+num_predict = 2048
+temperature = 0.3
+num_ctx     = 8192
+keep_alive  = "5m"
--- a/sources/agents/ollama_compression_32gb.toml
+++ b/sources/agents/ollama_compression_32gb.toml
@@ -0,0 +1,17 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Compression — 32 GB"
+description = "Conversation summarization tuned for ~32 GB RAM — qwen3.5:9b at num_ctx 24576 (~20 GB resident). Long context for big chats. KV cache is reserved at load per num_ctx. No tools."
+
+model        = "qwen3.5:9b"
+enable_tools = false
+tags         = ["compression", "ollama", "local", "32gb"]
+
+system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
+
+[body.options]
+num_predict = 2048
+temperature = 0.3
+num_ctx     = 24576
+keep_alive  = "5m"
--- a/sources/agents/ollama_compression_8gb.toml
+++ b/sources/agents/ollama_compression_8gb.toml
@@ -0,0 +1,17 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Compression — 8 GB"
+description = "Conversation summarization tuned for ~8 GB RAM — qwen3.5:4b at num_ctx 8192 (~5.5 GB resident). Smallest qwen3.5 (same family as the bigger tiers), decent context for the footprint. KV cache is reserved at load per num_ctx. No tools."
+
+model        = "qwen3.5:4b"
+enable_tools = false
+tags         = ["compression", "ollama", "local", "8gb"]
+
+system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
+
+[body.options]
+num_predict = 2048
+temperature = 0.3
+num_ctx     = 8192
+keep_alive  = "5m"
--- a/sources/agents/ollama_fim.toml
+++ b/sources/agents/ollama_fim.toml
@@ -1,13 +0,0 @@
-schema_version = 1
-
-extends     = "Ollama FIM Base"
-name        = "Ollama FIM"
-description = "Local Ollama FIM code completion (qwen2.5-coder)."
-
-model = "qwen2.5-coder:7b"
-tags  = ["completion", "ollama", "local", "fim"]
-
-[body.options]
-num_predict = 256
-temperature = 0.2
-keep_alive  = "5m"
--- a/sources/agents/ollama_quick_refactor.toml
+++ b/sources/agents/ollama_quick_refactor.toml
@@ -1,16 +0,0 @@
-schema_version = 1
-
-extends     = "Ollama Base Chat"
-name        = "Ollama Quick Refactor"
-description = "Local Ollama deterministic inline refactor (qwen2.5-coder). Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context. No tools."
-
-model        = "qwen2.5-coder:7b"
-enable_tools = false
-tags         = ["refactor", "ollama", "local"]
-
-system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}"""
-
-[body.options]
-num_predict = 2048
-temperature = 0.2
-keep_alive  = "5m"
--- a/sources/agents/ollama_quick_refactor_gemma4.toml
+++ b/sources/agents/ollama_quick_refactor_gemma4.toml
@@ -0,0 +1,19 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Quick Refactor — Gemma 4"
+description = "Local Ollama smart inline refactor — gemma4:12b with native reasoning (thinks before editing). Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context. No tools."
+
+model           = "gemma4:12b"
+enable_tools    = false
+enable_thinking = true
+tags            = ["refactor", "ollama", "local", "16gb"]
+
+system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}"""
+
+[body]
+think = true
+
+[body.options]
+num_predict = 4096
+keep_alive  = "5m"
--- a/sources/agents/ollama_quick_refactor_qwen35.toml
+++ b/sources/agents/ollama_quick_refactor_qwen35.toml
@@ -0,0 +1,19 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Quick Refactor — Qwen3.5"
+description = "Local Ollama smart inline refactor — qwen3.5:9b with native reasoning (thinks before editing). Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context. No tools."
+
+model           = "qwen3.5:9b"
+enable_tools    = false
+enable_thinking = true
+tags            = ["refactor", "ollama", "local", "16gb"]
+
+system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}"""
+
+[body]
+think = true
+
+[body.options]
+num_predict = 4096
+keep_alive  = "5m"
--- a/sources/agents/ollama_quick_refactor_simple.toml
+++ b/sources/agents/ollama_quick_refactor_simple.toml
@@ -0,0 +1,16 @@
+schema_version = 1
+
+extends     = "Ollama Base Chat"
+name        = "Ollama Quick Refactor — Simple"
+description = "Local Ollama deterministic inline refactor — fast single-shot, any model (Qwen2.5-Coder 7B by default). Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context. No thinking, no tools."
+
+model        = "qwen2.5-coder:7b"
+enable_tools = false
+tags         = ["refactor", "ollama", "local", "8gb"]
+
+system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}"""
+
+[body.options]
+num_predict = 2048
+temperature = 0.2
+keep_alive  = "5m"
--- a/sources/agents/openai_base_responses.toml
+++ b/sources/agents/openai_base_responses.toml
@@ -1,6 +1,6 @@
 schema_version = 1

-name        = "OpenAI Responses Base"
+name        = "OpenAI Base Responses"
 description = "OpenAI Responses API request body (/responses). Abstract — extend it and set model."
 abstract    = true

--- a/sources/agents/openai_chat_completions.toml
+++ b/sources/agents/openai_chat_completions.toml
@@ -1,7 +1,7 @@
 schema_version = 1

 extends     = "OpenAI Base Chat"
-name        = "OpenAI Chat Completions"
+name        = "OpenAI Chat"
 description = "OpenAI GPT-4o — coding chat via Chat Completions."

 model           = "gpt-4o"
--- a/sources/agents/openai_chat_responses.toml
+++ b/sources/agents/openai_chat_responses.toml
@@ -1,7 +1,7 @@
 schema_version = 1

-extends     = "OpenAI Responses Base"
-name        = "OpenAI Responses Chat"
+extends     = "OpenAI Base Responses"
+name        = "OpenAI Chat — Responses"
 description = "OpenAI o4-mini — reasoning coding chat via the Responses API."

 model           = "o4-mini"
--- a/sources/agents/roles/agentic-coder.md
+++ b/sources/agents/roles/agentic-coder.md
@@ -0,0 +1,15 @@
+You are an autonomous Qt/C++ coding agent working inside the Qt Creator IDE. You have tools to read, search, edit, and build the project. Use them to complete the user's request directly — act, do not wait for approval.
+
+## Workflow
+- **Do the task with tools.** When the request is clear, carry it out. Ask a question only when it is genuinely ambiguous or destructive — never to confirm an obvious change.
+- **Read before editing.** Call read_file for the exact current content before changing a file, so your edit matches.
+- **Edit through tools.** Apply changes with edit_file (create_new_file for new files). Do not paste whole files into chat when an edit will do.
+- **Stay in the project source root.** Never create or edit files in the build directory.
+- **Verify when it matters.** After non-trivial changes, build_project and check get_issues_list, then fix anything you broke.
+
+## Code
+- C++20, Qt6; match the surrounding style, naming, and patterns.
+- Write the minimum that solves the task: no over-engineering, no TODOs, no debug code, no unrelated refactoring. Make sure it compiles.
+
+## Reporting
+- Be brief: a one-line summary of what changed and which files. Let the diff speak. Call out only non-obvious consequences.