refactor: Finalize agent template

2026-06-15 02:39:19 -04:00 · 2026-06-03 17:28:50 +02:00
parent 98a618cf87
commit c151c5030b
57 changed files with 1737 additions and 393 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -47,6 +47,11 @@ if(GTest_FOUND)
  add_subdirectory(test)
 endif()

+option(QODEASSIST_BUILD_BENCH "Build the standalone agent bench CLI" ON)
+if(QODEASSIST_BUILD_BENCH)
+  add_subdirectory(bench)
+endif()
+
 add_qtc_plugin(QodeAssist
  PLUGIN_DEPENDS
    QtCreator::Core
--- a/bench/CMakeLists.txt
+++ b/bench/CMakeLists.txt
@@ -0,0 +1,27 @@
+add_executable(QodeAssistBench
+    main.cpp
+)
+
+target_link_libraries(QodeAssistBench PRIVATE
+    Qt::Core
+    Session
+    Agents
+    Providers
+    ProvidersConfig
+    LLMQore
+)
+
+set_target_properties(QodeAssistBench PROPERTIES
+    OUTPUT_NAME bench
+    FOLDER "qtc_runnable"
+)
+
+if(APPLE)
+    get_target_property(_qtcCoreLoc QtCreator::Core LOCATION)
+    get_filename_component(_qtcCoreDir "${_qtcCoreLoc}" DIRECTORY)
+    get_filename_component(QTC_FRAMEWORKS_DIR "${_qtcCoreDir}/../../Frameworks" ABSOLUTE)
+    if(EXISTS "${QTC_FRAMEWORKS_DIR}")
+        configure_file(run-bench.sh.in "${CMAKE_CURRENT_BINARY_DIR}/run-bench.sh" @ONLY)
+        execute_process(COMMAND chmod +x "${CMAKE_CURRENT_BINARY_DIR}/run-bench.sh")
+    endif()
+endif()
--- a/bench/main.cpp
+++ b/bench/main.cpp
@@ -0,0 +1,449 @@
+// Copyright (C) 2024-2026 Petr Mironychev
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <QCommandLineParser>
+#include <QCoreApplication>
+#include <QDir>
+#include <QFile>
+#include <QFileInfo>
+#include <QHash>
+#include <QJsonDocument>
+#include <QJsonObject>
+#include <QRegularExpression>
+#include <QTextStream>
+#include <QTimer>
+
+#include <memory>
+#include <vector>
+
+#include <LLMQore/BaseClient.hpp>
+#include <LLMQore/ContentBlocks.hpp>
+#include <LLMQore/ToolRegistry.hpp>
+#include <LLMQore/ToolsManager.hpp>
+
+#include <Agent.hpp>
+#include <AgentConfig.hpp>
+#include <AgentFactory.hpp>
+#include <ContextData.hpp>
+#include <ContextRenderer.hpp>
+#include <GenericProvider.hpp>
+#include <Provider.hpp>
+#include <ProviderInstance.hpp>
+#include <ProviderInstanceFactory.hpp>
+#include <ProviderSecretsStore.hpp>
+#include <ResponseEvent.hpp>
+#include <Session.hpp>
+#include <SessionManager.hpp>
+
+using namespace QodeAssist;
+
+namespace {
+
+QTextStream &out()
+{
+    static QTextStream s(stdout);
+    return s;
+}
+
+QTextStream &err()
+{
+    static QTextStream s(stderr);
+    return s;
+}
+
+QString readStdin()
+{
+    QTextStream in(stdin);
+    return in.readAll();
+}
+
+QHash<QString, QString> parseEnvFile(const QString &path, QString *errorOut)
+{
+    QHash<QString, QString> map;
+    QFile f(path);
+    if (!f.open(QIODevice::ReadOnly | QIODevice::Text)) {
+        if (errorOut)
+            *errorOut = QStringLiteral("cannot open env file: %1").arg(path);
+        return map;
+    }
+    QTextStream in(&f);
+    while (!in.atEnd()) {
+        QString line = in.readLine().trimmed();
+        if (line.isEmpty() || line.startsWith(QLatin1Char('#')))
+            continue;
+        if (line.startsWith(QLatin1String("export ")))
+            line = line.mid(7).trimmed();
+        const int eq = line.indexOf(QLatin1Char('='));
+        if (eq <= 0)
+            continue;
+        const QString key = line.left(eq).trimmed();
+        QString value = line.mid(eq + 1).trimmed();
+        if (value.size() >= 2
+            && ((value.startsWith(QLatin1Char('"')) && value.endsWith(QLatin1Char('"')))
+                || (value.startsWith(QLatin1Char('\'')) && value.endsWith(QLatin1Char('\''))))) {
+            value = value.mid(1, value.size() - 2);
+        }
+        map.insert(key, value);
+    }
+    return map;
+}
+
+QStringList apiKeyCandidates(const QString &clientApi, const QString &apiKeyRef)
+{
+    QStringList c;
+    if (!apiKeyRef.isEmpty())
+        c << apiKeyRef;
+    if (clientApi == QLatin1String("Claude"))
+        c << QStringLiteral("ANTHROPIC_API_KEY");
+    else if (clientApi.startsWith(QLatin1String("OpenAI")))
+        c << QStringLiteral("OPENAI_API_KEY");
+    else if (clientApi == QLatin1String("Mistral AI"))
+        c << QStringLiteral("MISTRAL_API_KEY");
+    else if (clientApi == QLatin1String("Codestral"))
+        c << QStringLiteral("CODESTRAL_API_KEY");
+    else if (clientApi == QLatin1String("Google AI"))
+        c << QStringLiteral("GEMINI_API_KEY") << QStringLiteral("GOOGLE_API_KEY");
+    else if (clientApi == QLatin1String("OpenRouter"))
+        c << QStringLiteral("OPENROUTER_API_KEY");
+
+    QString derived = clientApi.toUpper();
+    derived.replace(QRegularExpression(QStringLiteral("[^A-Z0-9]+")), QStringLiteral("_"));
+    derived = derived.trimmed();
+    while (derived.startsWith(QLatin1Char('_')))
+        derived.remove(0, 1);
+    while (derived.endsWith(QLatin1Char('_')))
+        derived.chop(1);
+    if (!derived.isEmpty())
+        c << derived + QStringLiteral("_API_KEY");
+    return c;
+}
+
+QString resolveApiKey(
+    const QHash<QString, QString> &envFile, const QString &clientApi, const QString &apiKeyRef)
+{
+    for (const QString &name : apiKeyCandidates(clientApi, apiKeyRef)) {
+        auto it = envFile.constFind(name);
+        if (it != envFile.constEnd() && !it.value().isEmpty())
+            return it.value();
+        const QByteArray fromProc = qgetenv(name.toUtf8().constData());
+        if (!fromProc.isEmpty())
+            return QString::fromUtf8(fromProc);
+    }
+    return {};
+}
+
+QString imageMediaType(const QString &path)
+{
+    const QString ext = QFileInfo(path).suffix().toLower();
+    if (ext == QLatin1String("png"))
+        return QStringLiteral("image/png");
+    if (ext == QLatin1String("jpg") || ext == QLatin1String("jpeg"))
+        return QStringLiteral("image/jpeg");
+    if (ext == QLatin1String("gif"))
+        return QStringLiteral("image/gif");
+    if (ext == QLatin1String("webp"))
+        return QStringLiteral("image/webp");
+    return {};
+}
+
+void printEvent(const ResponseEvent &ev, bool showThinking)
+{
+    switch (ev.kind()) {
+    case ResponseEvent::Kind::TextDelta:
+        if (const auto *d = ev.as<ResponseEvents::TextDelta>()) {
+            out() << d->text;
+            out().flush();
+        }
+        break;
+    case ResponseEvent::Kind::ThinkingDelta:
+        if (showThinking) {
+            if (const auto *d = ev.as<ResponseEvents::ThinkingDelta>()) {
+                err() << d->thinking;
+                err().flush();
+            }
+        }
+        break;
+    case ResponseEvent::Kind::ToolCallStart:
+        if (const auto *d = ev.as<ResponseEvents::ToolCallStart>())
+            err() << "\n[tool-call] " << d->name << " (" << d->id << ")\n";
+        break;
+    case ResponseEvent::Kind::ToolCallEnd:
+        if (const auto *d = ev.as<ResponseEvents::ToolCallEnd>()) {
+            const QString args
+                = QString::fromUtf8(QJsonDocument(d->finalArgs).toJson(QJsonDocument::Compact));
+            err() << "[tool-args] " << args << "\n";
+        }
+        break;
+    case ResponseEvent::Kind::ToolResult:
+        if (const auto *d = ev.as<ResponseEvents::ToolResult>())
+            err() << "[tool-result" << (d->isError ? " ERROR" : "") << "] " << d->text << "\n";
+        break;
+    case ResponseEvent::Kind::Usage:
+        if (const auto *d = ev.as<ResponseEvents::Usage>())
+            err() << "\n[usage] in=" << d->inputTokens << " out=" << d->outputTokens << "\n";
+        break;
+    case ResponseEvent::Kind::Error:
+        if (const auto *d = ev.as<ResponseEvents::Error>())
+            err() << "\n[error] " << d->message << "\n";
+        break;
+    case ResponseEvent::Kind::MessageStart:
+    case ResponseEvent::Kind::ToolCallArgsDelta:
+    case ResponseEvent::Kind::MessageStop:
+        break;
+    }
+}
+
+} // namespace
+
+int main(int argc, char *argv[])
+{
+    QCoreApplication app(argc, argv);
+    QCoreApplication::setOrganizationName(QStringLiteral("QtProject"));
+    QCoreApplication::setApplicationName(QStringLiteral("QtCreator"));
+
+    QCommandLineParser parser;
+    parser.setApplicationDescription(
+        "QodeAssist bench — drive an agent through the live session pipeline.");
+    parser.addHelpOption();
+
+    QCommandLineOption listOpt(QStringList{"l", "list"}, "List available agent profiles and exit.");
+    QCommandLineOption agentOpt(
+        QStringList{"a", "agent"}, "Agent profile name to run.", "name");
+    QCommandLineOption fileOpt(
+        QStringList{"f", "file"}, "Load an agent from a TOML file instead of by name.", "path");
+    QCommandLineOption promptOpt(
+        QStringList{"p", "prompt"},
+        "Prompt text. If omitted, positional args or stdin are used.",
+        "text");
+    QCommandLineOption noThinkingOpt("no-thinking", "Hide thinking deltas from output.");
+    QCommandLineOption envOpt(
+        QStringList{"e", "env"},
+        "Read API keys from a dotenv file (KEY=VALUE per line). Defaults to ./.env if present.",
+        "path");
+    QCommandLineOption apiKeyOpt(
+        "api-key", "API key to use for the agent's provider (overrides env/settings).", "value");
+    QCommandLineOption projectDirOpt(
+        QStringList{"C", "project-dir"},
+        "Project root for the agent's context (${PROJECT_DIR}). Defaults to the current directory.",
+        "path");
+    QCommandLineOption imageOpt(
+        QStringList{"i", "image"},
+        "Attach an image file (png/jpeg/gif/webp). Repeatable. Requires a vision-capable agent.",
+        "path");
+    QCommandLineOption mcpOpt(
+        "mcp",
+        "Load MCP servers from a JSON config (mcpServers map) to give the agent executable tools.",
+        "path");
+    QCommandLineOption fimOpt(
+        "fim",
+        "Fill-in-the-middle completion mode: send prompt as the prefix and --suffix as the suffix.");
+    QCommandLineOption suffixOpt(
+        "suffix", "Suffix code after the cursor (FIM mode only).", "text");
+    parser.addOption(listOpt);
+    parser.addOption(agentOpt);
+    parser.addOption(fileOpt);
+    parser.addOption(promptOpt);
+    parser.addOption(noThinkingOpt);
+    parser.addOption(envOpt);
+    parser.addOption(apiKeyOpt);
+    parser.addOption(projectDirOpt);
+    parser.addOption(imageOpt);
+    parser.addOption(mcpOpt);
+    parser.addOption(fimOpt);
+    parser.addOption(suffixOpt);
+    parser.addPositionalArgument("prompt", "Prompt text (alternative to --prompt).", "[prompt...]");
+    parser.process(app);
+
+    Providers::registerBuiltinProviders();
+
+    auto *instances = new Providers::ProviderInstanceFactory(&app);
+    auto *secrets = new Providers::ProviderSecretsStore(&app);
+    auto *agentFactory = new AgentFactory(instances, secrets, &app);
+    auto *sessions = new SessionManager(agentFactory, &app);
+
+    if (parser.isSet(listOpt)) {
+        const QStringList names = agentFactory->configNames();
+        if (names.isEmpty())
+            err() << "No agent profiles found.\n";
+        for (const QString &n : names)
+            out() << n << "\n";
+        return 0;
+    }
+
+    QString error;
+    Session *session = nullptr;
+    if (parser.isSet(fileOpt)) {
+        Agent *agent = agentFactory->createFromFile(parser.value(fileOpt), &app, &error);
+        if (agent)
+            session = new Session(agent, &app);
+    } else if (parser.isSet(agentOpt)) {
+        session = sessions->createSession(parser.value(agentOpt), &error);
+    } else {
+        err() << "Specify an agent with --agent <name> or --file <path>, or use --list.\n";
+        return 2;
+    }
+
+    if (!session || !session->isValid()) {
+        err() << "Failed to create session: "
+              << (session ? session->invalidReason() : error) << "\n";
+        return 1;
+    }
+
+    {
+        QHash<QString, QString> envFile;
+        QString envPath = parser.value(envOpt);
+        if (envPath.isEmpty() && QFile::exists(QStringLiteral(".env")))
+            envPath = QStringLiteral(".env");
+        if (!envPath.isEmpty()) {
+            QString envErr;
+            envFile = parseEnvFile(envPath, &envErr);
+            if (!envErr.isEmpty())
+                err() << "[env] " << envErr << "\n";
+        }
+
+        QString key = parser.value(apiKeyOpt);
+        if (key.isEmpty()) {
+            const AgentConfig &cfg = session->agent()->config();
+            const Providers::ProviderInstance *inst
+                = instances->instanceByName(cfg.providerInstance);
+            if (inst)
+                key = resolveApiKey(envFile, inst->clientApi, inst->apiKeyRef);
+        }
+        if (!key.isEmpty() && session->agent()->provider())
+            session->agent()->provider()->setApiKey(key);
+    }
+
+    {
+        Templates::ContextRenderer::Bindings bindings;
+        bindings.projectDir = parser.isSet(projectDirOpt)
+                                  ? QDir(parser.value(projectDirOpt)).absolutePath()
+                                  : QDir::currentPath();
+        bindings.homeDir = QDir::homePath();
+        session->setContextBindings(bindings);
+    }
+
+    const QStringList imagePaths = parser.values(imageOpt);
+
+    QString prompt = parser.value(promptOpt);
+    if (prompt.isEmpty())
+        prompt = parser.positionalArguments().join(QLatin1Char(' '));
+    if (prompt.isEmpty() && imagePaths.isEmpty())
+        prompt = readStdin().trimmed();
+    if (prompt.isEmpty() && imagePaths.isEmpty()) {
+        err() << "Empty prompt.\n";
+        return 2;
+    }
+
+    if (!imagePaths.isEmpty() && !session->supportsImages())
+        err() << "[warning] agent's provider does not advertise image support.\n";
+
+    const bool showThinking = !parser.isSet(noThinkingOpt);
+    int exitCode = 0;
+
+    QObject::connect(session, &Session::event, &app, [showThinking](const ResponseEvent &ev) {
+        printEvent(ev, showThinking);
+    });
+    QObject::connect(
+        session, &Session::finished, &app, [&](const LLMQore::RequestID &, const QString &reason) {
+            err() << "\n[done] stopReason=" << (reason.isEmpty() ? "<none>" : reason) << "\n";
+            QCoreApplication::quit();
+        });
+    QObject::connect(
+        session, &Session::failed, &app, [&](const LLMQore::RequestID &, const QString &msg) {
+            err() << "\n[failed] " << msg << "\n";
+            exitCode = 1;
+            QCoreApplication::quit();
+        });
+
+    auto dispatch = [&] {
+        if (parser.isSet(fimOpt)) {
+            Templates::ContextData ctx;
+            ctx.prefix = prompt;
+            if (parser.isSet(suffixOpt))
+                ctx.suffix = parser.value(suffixOpt);
+            if (session->sendCompletion(std::move(ctx)).isEmpty()) {
+                err() << "Failed to dispatch FIM request (check provider URL / model).\n";
+                exitCode = 1;
+                QCoreApplication::quit();
+            }
+            return;
+        }
+
+        std::vector<std::unique_ptr<LLMQore::ContentBlock>> blocks;
+        for (const QString &imgPath : imagePaths) {
+            QFile img(imgPath);
+            if (!img.open(QIODevice::ReadOnly)) {
+                err() << "[image] cannot open: " << imgPath << "\n";
+                exitCode = 1;
+                QCoreApplication::quit();
+                return;
+            }
+            const QString media = imageMediaType(imgPath);
+            if (media.isEmpty()) {
+                err() << "[image] unsupported type: " << imgPath << "\n";
+                exitCode = 1;
+                QCoreApplication::quit();
+                return;
+            }
+            const QString b64 = QString::fromLatin1(img.readAll().toBase64());
+            blocks.push_back(std::make_unique<LLMQore::ImageContent>(
+                b64, media, LLMQore::ImageContent::ImageSourceType::Base64));
+        }
+        if (!prompt.isEmpty())
+            blocks.push_back(std::make_unique<LLMQore::TextContent>(prompt));
+        if (blocks.empty()) {
+            err() << "Nothing to send.\n";
+            exitCode = 1;
+            QCoreApplication::quit();
+            return;
+        }
+        if (session->send(std::move(blocks)).isEmpty()) {
+            err() << "Failed to dispatch request (check provider URL / model).\n";
+            exitCode = 1;
+            QCoreApplication::quit();
+        }
+    };
+
+    if (parser.isSet(mcpOpt)) {
+        const QString mcpPath = parser.value(mcpOpt);
+        QFile mcpFile(mcpPath);
+        if (!mcpFile.open(QIODevice::ReadOnly | QIODevice::Text)) {
+            err() << "[mcp] cannot open config: " << mcpPath << "\n";
+            return 2;
+        }
+        QJsonParseError jerr;
+        const QJsonDocument mcpDoc = QJsonDocument::fromJson(mcpFile.readAll(), &jerr);
+        if (jerr.error != QJsonParseError::NoError || !mcpDoc.isObject()) {
+            err() << "[mcp] invalid JSON config: " << jerr.errorString() << "\n";
+            return 2;
+        }
+        auto *client = session->client();
+        if (!client) {
+            err() << "[mcp] session has no client.\n";
+            return 1;
+        }
+        auto *tools = client->tools();
+        tools->loadMcpServers(mcpDoc.object());
+        err() << "[mcp] loading servers, waiting for tools...\n";
+
+        auto dispatched = std::make_shared<bool>(false);
+        auto fire = [&, dispatched] {
+            if (*dispatched)
+                return;
+            *dispatched = true;
+            const int n = tools->getToolsDefinitions().size();
+            err() << "[mcp] " << n << " tool(s) available.\n";
+            dispatch();
+        };
+        QObject::connect(tools, &LLMQore::ToolRegistry::toolsChanged, &app, [&, fire] {
+            if (!tools->getToolsDefinitions().isEmpty())
+                fire();
+        });
+        QTimer::singleShot(15000, &app, fire);
+    } else {
+        QTimer::singleShot(0, &app, dispatch);
+    }
+
+    app.exec();
+    return exitCode;
+}
--- a/bench/run-bench.sh.in
+++ b/bench/run-bench.sh.in
@@ -0,0 +1,6 @@
+#!/bin/sh
+# Generated by CMake. Runs bench with a single Qt copy (Qt Creator's bundled
+# frameworks) to avoid duplicate-Qt objc warnings.
+DIR="$(cd "$(dirname "$0")" && pwd)"
+export DYLD_FRAMEWORK_PATH="@QTC_FRAMEWORKS_DIR@${DYLD_FRAMEWORK_PATH:+:$DYLD_FRAMEWORK_PATH}"
+exec "$DIR/bench" "$@"
--- a/docs/agent-templates-design.md
+++ b/docs/agent-templates-design.md
@@ -0,0 +1,358 @@
+# Agent Templates — Design Note (body model, include, extends)
+
+Status: agreed design / ready to implement. Dev-facing (not end-user docs).
+Scope: how agent TOML profiles describe the request and share structure.
+
+## Problem this replaces
+
+The shipped model has each agent embed a `[template].message_format` jinja string
+that hand-builds the **whole** request body as text, plus `[template.sampling]` and
+`[template.thinking.*]` blocks merged in by `applySampling`. Pains:
+
+- Massive copy-paste: 9 OpenAI-compatible agents share a byte-identical ~50-line
+  `message_format`; 4 Claude agents share another; `role` + README `context` are
+  identical across 18 files.
+- `[template.sampling]` / `[template.thinking.overrides]` /
+  `[template.thinking.request_block.*]` describe **merge machinery**, not the request
+  body — they don't look like the actual API call. The `overrides` vs `request_block`
+  split is meaningless (both are deep-merged into the request identically).
+- Manual JSON-by-string-concatenation: trailing-comma bookkeeping
+  (`{% if not loop.is_last %},{% endif %}`) everywhere; a missing comma fails
+  silently at runtime (`renderBody` returns nullopt, only a `qWarning`).
+- `include` is hard-disabled, so there is no way to share a sub-fragment.
+
+## Agreed model
+
+### 1. `[body]` is a deep-mergeable table = the request body, 1:1 with the API
+
+Replace the `message_format` string and the `sampling`/`thinking` blocks with a
+single `[body]` TOML table whose keys are the **literal request-body fields**.
+Because it is a table (not a string), `extends` / `deepMerge` can override it
+field-by-field — variants become a 2-line delta instead of a copied body.
+
+Field-value rules at build time (per key in `[body]`, applied recursively):
+- **string containing jinja** (`{{` or `{%`) → render through inja, splice the
+  output as **raw JSON** (array / object / string). Empty render → key omitted.
+- **string without jinja** (e.g. `"high"`) → literal JSON string, as-is.
+- **number / bool / inline-table** → as-is.
+
+So `messages` / `contents` and `system` / `system_instruction` are just **string
+fields holding jinja**; everything else (`max_tokens`, `temperature`, `stream`,
+`thinking`, `output_config`, `generationConfig`, …) is a literal value that reads
+exactly like the curl body.
+
+No runtime toggles: thinking / tools / streaming are **fixed per agent**. A thinking
+agent literally carries the `thinking` fields; a non-thinking variant is a separate
+file. There is no `{% if thinking %}` and no `thinkingEnabled` flag threaded into
+rendering. `system` uses `{% if existsIn(ctx, "system_prompt") %}` only because that
+is about *presence of data*, not a mode toggle.
+
+Outside the body:
+- `model` — supplied by the **client** from its own settings; never in the profile.
+  Google embeds the model in the URL, so its `endpoint` uses a `${MODEL}` placeholder
+  the client resolves (same substitution style as `${PROJECT_DIR}` / `${HOME}`).
+- `tools` — injected by the **provider** when `enable_tools` is set (tool
+  definitions are dynamic, from `ToolsManager`; they can't be authored in TOML).
+- `stream` — always on. Literal `"stream": true` in the body for OpenAI / Claude /
+  Mistral; encoded in the `endpoint` URL for Google.
+
+### 2. `include` re-enabled as whitelisted partials
+
+The message-array rendering (the complex, comma-heavy part) lives in
+`sources/agents/partials/*.jinja`, shared via `{% include %}`. The throwing include
+callback is replaced by a sandboxed resolver that:
+- rejects names containing `..`, a leading `/`, or a scheme/drive;
+- resolves only against known roots: bundled `:/agents/partials/` then the user
+  `partials/` dir;
+- parses/caches the partial in the same `inja::Environment`.
+
+A missing/typo'd partial is a **load-time** error.
+
+### 3. `extends` shares config down a hierarchy
+
+`extends` already exists (`resolveExtends` + `deepMerge` + `abstract`/`hidden`); it
+keeps doing what it does, now over the structured `[body]` too. Typical 2–3 levels:
+
+```
+chat_base (abstract)            → system_prompt (shared by all)
+  ├─ openai_base (abstract)     → provider/endpoint/enable_tools + [body]
+  │    ├─ openai_chat           → name
+  │    ├─ mistral_chat          → name, provider, endpoint
+  │    └─ mistral_reasoning      → + [body].reasoning_effort
+  ├─ anthropic_base (abstract)  → provider/endpoint/thinking + [body]
+  │    ├─ claude_chat           → name
+  │    └─ claude_sonnet          → + [body.output_config].effort
+  └─ google_base (abstract)     → provider/endpoint + [body]
+       └─ gemini_chat           → name
+```
+
+Notes:
+- `[body]` is shared whole when identical (the 8 OpenAI-compatible providers); a
+  variant overrides only the differing field — no duplicated body.
+- Arrays (`tags`) are **replaced** on override, not appended (`deepMerge` recurses
+  objects only). A child that wants base tags + extras restates the full list.
+- Division of labour: **include** shares the message-rendering fragment across
+  unrelated families; **extends** shares config (system_prompt / endpoint / body)
+  down one inheritance chain.
+- With `model` gone, per-model files collapse: agents that previously differed only
+  by `model` become one agent (the client picks the model). A separate file is only
+  needed when the body genuinely differs (effort, no-thinking, …).
+
+### `role` + `context` merged into `system_prompt`
+
+The old `role` (static) and `context` (jinja, reads files) are two layers of the
+same system prompt (`SystemPromptBuilder` layers `agent.role` / `agent.context`).
+Merge into one `system_prompt` field, always rendered through `ContextRenderer`
+(static text passes through; dynamic parts use `{% %}`), e.g. README via
+`file_exists` instead of the `set readme / if length` dance. `Session` collapses the
+two layers into one rendered layer.
+
+## Worked examples
+
+OpenAI base:
+```toml
+extends = "Chat Base"
+abstract = true
+provider_instance = "OpenAI (Chat Completions)"
+endpoint = "/chat/completions"
+enable_tools = true
+
+[body]
+max_tokens  = 8192
+temperature = 0.7
+stream      = true
+messages    = """
+[ {% include "partials/openai_messages.jinja" %} ]
+"""
+```
+
+Mistral reasoning child (delta only):
+```toml
+extends = "OpenAI Base Chat"
+name    = "Mistral Reasoning Chat"
+provider_instance = "Mistral AI"
+endpoint = "/v1/chat/completions"
+enable_thinking = true
+
+[body]
+reasoning_effort = "medium"
+```
+
+Claude base (literally the curl body):
+```toml
+extends = "Chat Base"
+abstract = true
+provider_instance = "Claude"
+endpoint = "/v1/messages"
+enable_thinking = true
+enable_tools = true
+
+[body]
+max_tokens  = 16000
+temperature = 1
+stream      = true
+thinking      = { type = "adaptive", display = "summarized" }
+output_config = { effort = "high" }
+system   = """{% if existsIn(ctx, "system_prompt") %}{{ tojson(ctx.system_prompt) }}{% endif %}"""
+messages = """
+[ {% include "partials/anthropic_messages.jinja" %} ]
+"""
+```
+
+Sonnet child (delta only):
+```toml
+extends = "Anthropic Base Chat"
+name    = "Claude Sonnet"
+
+[body.output_config]
+effort = "medium"
+```
+
+Google base (`${MODEL}` in endpoint; streaming in the URL):
+```toml
+extends = "Chat Base"
+abstract = true
+provider_instance = "Google AI"
+endpoint = "/models/${MODEL}:streamGenerateContent?alt=sse"
+enable_thinking = true
+enable_tools = true
+
+[body]
+system_instruction = """{% if existsIn(ctx, "system_prompt") %}{ "parts": [ { "text": {{ tojson(ctx.system_prompt) }} } ] }{% endif %}"""
+contents = """
+[ {% include "partials/google_contents.jinja" %} ]
+"""
+
+[body.generationConfig]
+maxOutputTokens = 16000
+temperature     = 1
+thinkingConfig  = { includeThoughts = true, thinkingBudget = 8192 }
+```
+
+### Partials
+
+`partials/openai_messages.jinja` dispatches per message:
+```jinja
+{% if existsIn(ctx, "system_prompt") %}
+{ "role": "system", "content": {{ tojson(ctx.system_prompt) }} },
+{% endif %}
+{% for msg in ctx.history %}
+  {% if msg.role == "assistant" %}{% include "partials/openai_assistant.jinja" %}
+  {% else if length(filter_by_type(msg.content_blocks, "tool_result")) > 0 %}{% include "partials/openai_tool_results.jinja" %}
+  {% else %}{% include "partials/openai_user.jinja" %}
+  {% endif %}
+{% endfor %}
+```
+
+`partials/openai_assistant.jinja`:
+```jinja
+{% set tcalls = filter_by_type(msg.content_blocks, "tool_use") %}
+{
+  "role": "assistant",
+  "content": {{ tojson(msg.content) }}
+  {% if length(tcalls) > 0 %}
+  , "tool_calls": [
+    {% for b in tcalls %}
+    { "id": {{ tojson(b.id) }}, "type": "function",
+      "function": { "name": {{ tojson(b.name) }}, "arguments": {{ tojson(tojson(b.input)) }} } },
+    {% endfor %}
+  ]
+  {% endif %}
+},
+```
+
+`partials/openai_tool_results.jinja`:
+```jinja
+{% for b in filter_by_type(msg.content_blocks, "tool_result") %}
+{ "role": "tool", "tool_call_id": {{ tojson(b.tool_use_id) }}, "content": {{ tojson(b.content) }} },
+{% endfor %}
+```
+
+`partials/openai_user.jinja`:
+```jinja
+{% if existsIn(msg, "images") %}
+{ "role": "user", "content": {% include "partials/openai_image_content.jinja" %} },
+{% else %}
+{ "role": "user", "content": {{ tojson(msg.content) }} },
+{% endif %}
+```
+
+`partials/openai_image_content.jinja`:
+```jinja
+[
+  { "type": "text", "text": {{ tojson(msg.content) }} }
+  {% for img in msg.images %}
+  ,
+  {% if img.is_url %}
+  { "type": "image_url", "image_url": { "url": {{ tojson(img.data) }} } }
+  {% else %}
+  { "type": "image_url", "image_url": { "url": "data:{{ img.media_type }};base64,{{ img.data }}" } }
+  {% endif %}
+  {% endfor %}
+]
+```
+
+`partials/anthropic_messages.jinja`:
+```jinja
+{% for msg in ctx.history %}
+{
+  "role": {{ tojson(msg.role) }},
+  "content": [
+    {% for b in msg.content_blocks %}
+      {% if b.type == "image" %}{% include "partials/anthropic_image.jinja" %}
+      {% else %}{{ tojson(b) }},
+      {% endif %}
+    {% endfor %}
+  ]
+},
+{% endfor %}
+```
+
+`partials/anthropic_image.jinja`:
+```jinja
+{
+  "type": "image",
+  "source":
+  {% if b.is_url %}
+  { "type": "url", "url": {{ tojson(b.data) }} }
+  {% else %}
+  { "type": "base64", "media_type": {{ tojson(b.media_type) }}, "data": {{ tojson(b.data) }} }
+  {% endif %}
+},
+```
+
+`partials/google_contents.jinja`:
+```jinja
+{% for msg in ctx.history %}
+{
+  "role": {% if msg.role == "assistant" %}"model"{% else %}"user"{% endif %},
+  "parts": [ {% for b in msg.content_blocks %}{% include "partials/google_part.jinja" %}{% endfor %} ]
+},
+{% endfor %}
+```
+
+`partials/google_part.jinja`:
+```jinja
+{% if b.type == "text" %}
+{ "text": {{ tojson(b.text) }} },
+{% else if b.type == "thinking" %}
+{ "text": {{ tojson(b.thinking) }}, "thought": true, "thoughtSignature": {{ tojson(b.signature) }} },
+{% else if b.type == "tool_use" %}
+{ "functionCall": { "name": {{ tojson(b.name) }}, "args": {{ tojson(b.input) }} } },
+{% else if b.type == "tool_result" %}
+{ "functionResponse": { "name": {{ tojson(b.name) }}, "response": { "result": {{ tojson(b.content) }} } } },
+{% else if b.type == "image" %}
+  {% if b.is_url %}
+  { "file_data": { "mime_type": {{ tojson(b.media_type) }}, "file_uri": {{ tojson(b.data) }} } },
+  {% else %}
+  { "inline_data": { "mime_type": {{ tojson(b.media_type) }}, "data": {{ tojson(b.data) }} } },
+  {% endif %}
+{% else %}
+{ "text": "" },
+{% endif %}
+```
+
+## C++ work
+
+In `JsonPromptTemplate`:
+- Parse `[body]` as a `QJsonObject` (not a string). Walk it recursively and build the
+  request: render jinja-bearing string values via inja and splice the parsed JSON;
+  pass literal strings / scalars / inline-tables through; drop keys whose render is
+  empty.
+- **Delete** `m_sampling`, `m_thinking`, and `applySampling` entirely — the body is
+  the request; there is no separate sampling/thinking merge.
+- Drop the `thinkingEnabled` parameter from `buildFullRequest` /
+  `Provider::prepareRequest` / `Session` — it no longer affects rendering.
+- Add a **JSON-aware** trailing-comma stripper before `QJsonDocument::fromJson`
+  (tracks string/escape state so `,}` / `,]` inside string values are not touched).
+  This is what lets partials emit an unconditional `,` after every element and drop
+  all `loop.is_last` bookkeeping.
+
+In `AgentConfig` / `AgentLoader`:
+- Replace `messageFormat` (string) with `body` (`QJsonObject`); merge `role` +
+  `context` into `system_prompt`. `[template].sampling` / `[template].thinking` are
+  removed.
+- `extends` / `deepMerge` are unchanged; they now also merge `[body]`.
+- Validate at load: a referenced partial must resolve; the assembled body must parse
+  as JSON (render once against a synthetic context with tool_use / tool_result /
+  image). Catches breakage at startup, not mid-conversation.
+
+In the client / provider layer:
+- The client sets `model` from its settings (and resolves `${MODEL}` in the
+  endpoint); `Session` no longer seeds the payload with `cfg.model`.
+- The provider keeps injecting `tools` when `enable_tools` is set.
+
+In `Session`:
+- Collapse the `agent.role` + `agent.context` system-prompt layers into one rendered
+  `system_prompt` layer.
+
+## Implementation order
+
+1. JSON-aware trailing-comma stripper + whitelisted `include` resolver (enables
+   readable partials).
+2. `[body]`-table model in `JsonPromptTemplate` + loader; delete
+   sampling/thinking/`applySampling`; drop `thinkingEnabled`.
+3. `system_prompt` merge in loader + `Session`.
+4. `model` from client (+ `${MODEL}` endpoint substitution); convert bundled agents
+   to the base/partials/`extends` layout.
+5. Load-time validation (partial resolves, body parses).
--- a/settings/AgentDetailPane.cpp
+++ b/settings/AgentDetailPane.cpp
@@ -8,6 +8,7 @@
 #include "SettingsTheme.hpp"
 #include "SettingsUiBuilders.hpp"

+#include <AgentFactory.hpp>
 #include <ProviderInstance.hpp>
 #include <ProviderInstanceFactory.hpp>

@@ -18,6 +19,7 @@
 #include <QFont>
 #include <QFrame>
 #include <QGridLayout>
+#include <QJsonDocument>
 #include <QHBoxLayout>
 #include <QLabel>
 #include <QLineEdit>
@@ -157,32 +159,30 @@ AgentDetailPane::AgentDetailPane(QWidget *parent)
                  "group the agent list."));
    identity->bodyLayout()->addLayout(idGrid);

-    auto *roleSection = new SectionBox(tr("System role"), this);
+    auto *roleSection = new SectionBox(tr("System prompt"), this);
    auto *roleHint = makeHintLabel(
-        tr("Prepended to every request as the system message."));
+        tr("Jinja2 template (via ContextRenderer) rendered into the system "
+           "prompt. Supports read_file/file_exists with ${PROJECT_DIR}/${HOME}."));
    m_roleText = new QPlainTextEdit(this);
    m_roleText->setReadOnly(true);
+    m_roleText->setFont(monospaceFont(11));
    m_roleText->setMinimumHeight(120);
    roleSection->bodyLayout()->addWidget(roleHint);
    roleSection->bodyLayout()->addWidget(m_roleText);

-    auto *contextSection = new SectionBox(tr("Context"), this);
-    auto *contextHint = makeHintLabel(
-        tr("Jinja2 template rendered with ContextManager bindings into the "
-           "agent.context system-prompt layer. Empty = no context block."));
-    m_contextText = new QPlainTextEdit(this);
-    m_contextText->setReadOnly(true);
-    m_contextText->setFont(monospaceFont(11));
-    m_contextText->setMinimumHeight(120);
-    contextSection->bodyLayout()->addWidget(contextHint);
-    contextSection->bodyLayout()->addWidget(m_contextText);
-
    auto *connection = new SectionBox(tr("Connection"), this);
    m_providerCombo = new QComboBox(this);
    m_providerCombo->setSizeAdjustPolicy(QComboBox::AdjustToContents);
    m_providerCombo->setEnabled(false);
    m_endpointValue = makeReadOnlyLine(true);
-    m_modelValue = makeReadOnlyLine(true);
+    m_modelValue = new QLineEdit(this);
+    m_modelValue->setFont(monospaceFont(11));
+    m_modelValue->setClearButtonEnabled(true);
+    connect(m_modelValue, &QLineEdit::editingFinished, this, [this]() {
+        if (!m_agentFactory || !m_current)
+            return;
+        m_agentFactory->setModelOverride(m_current->name, m_modelValue->text().trimmed());
+    });

    auto *connGrid = new QGridLayout;
    connGrid->setContentsMargins(0, 0, 0, 0);
@@ -195,7 +195,9 @@ AgentDetailPane::AgentDetailPane(QWidget *parent)
        .row(tr("Endpoint:"), m_endpointValue,
             tr("Appended to the provider's URL. Blank uses the "
                "provider default."))
-        .row(tr("Model:"), m_modelValue);
+        .row(tr("Model:"), m_modelValue,
+             tr("Model sent to the provider. Overrides the agent's built-in "
+                "default for this QodeAssist install; empty = use the default."));
    connection->bodyLayout()->addLayout(connGrid);

    m_effectiveUrl = new QLabel(this);
@@ -231,7 +233,7 @@ AgentDetailPane::AgentDetailPane(QWidget *parent)
    m_messageFormat->setMinimumHeight(140);

    templ->bodyLayout()->addWidget(templHint);
-    auto *mfLabel = new QLabel(tr("message_format:"), this);
+    auto *mfLabel = new QLabel(tr("body:"), this);
    templ->bodyLayout()->addWidget(mfLabel);
    templ->bodyLayout()->addWidget(m_messageFormat);

@@ -269,7 +271,6 @@ AgentDetailPane::AgentDetailPane(QWidget *parent)
    root->addWidget(match);
    root->addWidget(templ);
    root->addWidget(roleSection);
-    root->addWidget(contextSection);
    root->addWidget(m_diagnostics);
    root->addWidget(m_rawToggle, 0, Qt::AlignLeft);
    root->addWidget(m_rawToml);
@@ -286,6 +287,11 @@ void AgentDetailPane::setInstanceFactory(Providers::ProviderInstanceFactory *fac
    populateProviderCombo();
 }

+void AgentDetailPane::setAgentFactory(AgentFactory *factory)
+{
+    m_agentFactory = factory;
+}
+
 void AgentDetailPane::populateProviderCombo()
 {
    if (m_providerComboPopulated)
@@ -351,7 +357,13 @@ void AgentDetailPane::setAgent(const AgentConfig &cfg)

    m_endpointValue->setText(cfg.endpoint);
    m_endpointValue->setPlaceholderText(tr("(provider default)"));
-    m_modelValue->setText(cfg.model);
+    {
+        const QString override
+            = m_agentFactory ? m_agentFactory->modelOverride(cfg.name) : QString();
+        m_modelValue->setText(override);
+        m_modelValue->setPlaceholderText(
+            cfg.model.isEmpty() ? tr("(set a model)") : cfg.model);
+    }

    const QString eff = resolvedUrl + cfg.endpoint;
    m_effectiveUrl->setText(
@@ -361,16 +373,15 @@ void AgentDetailPane::setAgent(const AgentConfig &cfg)
                  .arg(tr("effective request line"), eff));

    m_roleText->setPlainText(
-        cfg.role.isEmpty() ? tr("(no system role set)") : cfg.role);
-    m_contextText->setPlainText(
-        cfg.context.isEmpty() ? tr("(no context block)") : cfg.context);
+        cfg.systemPrompt.isEmpty() ? tr("(no system prompt set)") : cfg.systemPrompt);

    m_filePatternsValue->setText(cfg.match.filePatterns.join(QStringLiteral(", ")));
    m_filePatternsValue->setPlaceholderText(tr("(matches every file)"));

    m_messageFormat->setPlainText(
-        cfg.messageFormat.isEmpty() ? tr("(inherited from parent / none)")
-                                    : cfg.messageFormat);
+        cfg.body.isEmpty()
+            ? tr("(inherited from parent / none)")
+            : QString::fromUtf8(QJsonDocument(cfg.body).toJson(QJsonDocument::Indented)));

    const FileReadResult raw = readFileTextCapped(cfg.sourcePath, kRawTomlMaxBytes);
    switch (raw.status) {
@@ -422,7 +433,6 @@ void AgentDetailPane::clear()
    m_modelValue->clear();
    m_effectiveUrl->clear();
    m_roleText->clear();
-    m_contextText->clear();
    m_filePatternsValue->clear();
    m_messageFormat->clear();
    m_rawToml->clear();
--- a/settings/AgentDetailPane.hpp
+++ b/settings/AgentDetailPane.hpp
@@ -17,6 +17,10 @@ class QPlainTextEdit;
 class QPushButton;
 class QToolButton;

+namespace QodeAssist {
+class AgentFactory;
+}
+
 namespace QodeAssist::Providers {
 class ProviderInstanceFactory;
 }
@@ -32,6 +36,7 @@ public:
    explicit AgentDetailPane(QWidget *parent = nullptr);

    void setInstanceFactory(Providers::ProviderInstanceFactory *factory);
+    void setAgentFactory(AgentFactory *factory);
    void setAgent(const AgentConfig &cfg);
    void clear();
    void setLoadDiagnostics(const QStringList &errors, const QStringList &warnings);
@@ -72,6 +77,7 @@ private:

    QComboBox *m_providerCombo = nullptr;
    QPointer<Providers::ProviderInstanceFactory> m_instanceFactory;
+    QPointer<AgentFactory> m_agentFactory;
    QLineEdit *m_endpointValue = nullptr;
    QLineEdit *m_modelValue = nullptr;
    QLabel *m_effectiveUrl = nullptr;
@@ -79,7 +85,6 @@ private:
    QLineEdit *m_filePatternsValue = nullptr;

    QPlainTextEdit *m_roleText = nullptr;
-    QPlainTextEdit *m_contextText = nullptr;
    QPlainTextEdit *m_messageFormat = nullptr;

    SectionBox *m_diagnostics = nullptr;
--- a/settings/AgentsSettingsPage.cpp
+++ b/settings/AgentsSettingsPage.cpp
@@ -100,6 +100,7 @@ public:

        m_detail = new AgentDetailPane(this);
        m_detail->setInstanceFactory(m_agentFactory->instanceFactory());
+        m_detail->setAgentFactory(m_agentFactory);
        m_detailScroll = new QScrollArea(this);
        m_detailScroll->setWidgetResizable(true);
        m_detailScroll->setFrameShape(QFrame::StyledPanel);
--- a/sources/Session/Session.cpp
+++ b/sources/Session/Session.cpp
@@ -81,8 +81,6 @@ Session::Session(Agent *agent, ConversationHistory *externalHistory, QObject *pa

    m_router = new ResponseRouter(client, m_history, this);
    connect(m_router, &ResponseRouter::event, this, &Session::onRouterEvent);
-
-    m_systemPrompt->setLayer(QStringLiteral("agent.role"), m_agent->config().role);
 }

 Session::~Session()
@@ -134,12 +132,13 @@ QString Session::renderAgentContext() const
    if (!m_agent)
        return {};
    const auto &cfg = m_agent->config();
-    if (cfg.context.isEmpty())
+    if (cfg.systemPrompt.isEmpty())
        return {};
    QString err;
-    QString rendered = Templates::ContextRenderer::render(cfg.context, m_contextBindings, &err);
+    QString rendered
+        = Templates::ContextRenderer::render(cfg.systemPrompt, m_contextBindings, &err);
    if (!err.isEmpty())
-        qWarning("[QodeAssist] agent.context render failed: %s", qUtf8Printable(err));
+        qWarning("[QodeAssist] agent.system render failed: %s", qUtf8Printable(err));
    return rendered;
 }

@@ -231,9 +230,9 @@ LLMQore::RequestID Session::dispatch(

    const QString renderedContext = renderAgentContext();
    if (renderedContext.isEmpty())
-        m_systemPrompt->clearLayer(QStringLiteral("agent.context"));
+        m_systemPrompt->clearLayer(QStringLiteral("agent.system"));
    else
-        m_systemPrompt->setLayer(QStringLiteral("agent.context"), renderedContext);
+        m_systemPrompt->setLayer(QStringLiteral("agent.system"), renderedContext);

    Templates::ContextData ctx = toLegacyContext();
    QJsonObject payload{{QStringLiteral("model"), cfg.model}};
--- a/sources/agents/Agent.cpp
+++ b/sources/agents/Agent.cpp
@@ -34,9 +34,8 @@ QString AgentConfig::validate(const AgentConfig &config)
        return QStringLiteral("Agent config '%1' has no model").arg(config.name);
    if (config.endpoint.isEmpty())
        return QStringLiteral("Agent config '%1' has no endpoint").arg(config.name);
-    if (config.messageFormat.isEmpty()) {
-        return QStringLiteral("Agent config '%1' has no [template].message_format")
-            .arg(config.name);
+    if (config.body.isEmpty()) {
+        return QStringLiteral("Agent config '%1' has no [body]").arg(config.name);
    }
    return {};
 }
--- a/sources/agents/AgentConfig.hpp
+++ b/sources/agents/AgentConfig.hpp
@@ -19,7 +19,7 @@ struct AgentConfig
    QString providerInstance;
    QString model;
    QString endpoint;
-    QString role;
+    QString systemPrompt;
    QStringList tags;

    struct Match
@@ -40,10 +40,7 @@ struct AgentConfig
    bool enableThinking = false;
    bool enableTools = false;

-    QString messageFormat;
-    QJsonObject sampling;
-    QJsonObject thinking;
-    QString context;
+    QJsonObject body;
    QString extendsName;
    bool abstract = false;
    bool hidden = false;
--- a/sources/agents/AgentFactory.cpp
+++ b/sources/agents/AgentFactory.cpp
@@ -4,6 +4,11 @@

 #include "AgentFactory.hpp"

+#include <QDir>
+#include <QFile>
+#include <QFileInfo>
+#include <QJsonDocument>
+#include <QJsonObject>
 #include <QLoggingCategory>
 #include <QThread>

@@ -37,6 +42,7 @@ AgentFactory::AgentFactory(
    , m_secrets(secrets)
 {
    ::initAgentsResource();
+    loadModelOverrides();
    reload();
 }

@@ -169,7 +175,11 @@ Agent *AgentFactory::create(const QString &name, QObject *parent, QString *error
        *cfg, m_instanceFactory.data(), m_secrets.data(), errorOut);
    if (!provider)
        return nullptr;
-    auto agent = std::make_unique<Agent>(*cfg, provider, /*parent=*/nullptr);
+    AgentConfig resolved = *cfg;
+    const QString modelOv = m_modelOverrides.value(resolved.name);
+    if (!modelOv.isEmpty())
+        resolved.model = modelOv;
+    auto agent = std::make_unique<Agent>(resolved, provider, /*parent=*/nullptr);
    if (!agent->isValid()) {
        if (errorOut)
            *errorOut = agent->invalidReason();
@@ -193,6 +203,9 @@ Agent *AgentFactory::createFromFile(
        *cfgOpt, m_instanceFactory.data(), m_secrets.data(), errorOut);
    if (!provider)
        return nullptr;
+    const QString modelOv = m_modelOverrides.value(cfgOpt->name);
+    if (!modelOv.isEmpty())
+        cfgOpt->model = modelOv;
    auto agent = std::make_unique<Agent>(std::move(*cfgOpt), provider, /*parent=*/nullptr);
    if (!agent->isValid()) {
        if (errorOut) *errorOut = agent->invalidReason();
@@ -221,4 +234,55 @@ Providers::ProviderSecretsStore *AgentFactory::secretsStore() const noexcept
    return m_secrets.data();
 }

+QString AgentFactory::modelOverride(const QString &agentName) const
+{
+    return m_modelOverrides.value(agentName);
+}
+
+void AgentFactory::setModelOverride(const QString &agentName, const QString &model)
+{
+    if (model.isEmpty())
+        m_modelOverrides.remove(agentName);
+    else
+        m_modelOverrides.insert(agentName, model);
+    saveModelOverrides();
+}
+
+namespace {
+QString modelOverridesPath()
+{
+    return Core::ICore::userResourcePath(QStringLiteral("qodeassist/config/agent_models.json"))
+        .toFSPathString();
+}
+} // namespace
+
+void AgentFactory::loadModelOverrides()
+{
+    m_modelOverrides.clear();
+    QFile f(modelOverridesPath());
+    if (!f.open(QIODevice::ReadOnly | QIODevice::Text))
+        return;
+    const QJsonObject obj = QJsonDocument::fromJson(f.readAll()).object();
+    for (auto it = obj.constBegin(); it != obj.constEnd(); ++it) {
+        const QString model = it.value().toString();
+        if (!model.isEmpty())
+            m_modelOverrides.insert(it.key(), model);
+    }
+}
+
+void AgentFactory::saveModelOverrides() const
+{
+    const QString path = modelOverridesPath();
+    QDir().mkpath(QFileInfo(path).absolutePath());
+    QJsonObject obj;
+    for (auto it = m_modelOverrides.constBegin(); it != m_modelOverrides.constEnd(); ++it)
+        obj.insert(it.key(), it.value());
+    QFile f(path);
+    if (!f.open(QIODevice::WriteOnly | QIODevice::Text)) {
+        LOG_MESSAGE(QStringLiteral("[Agents] cannot write model overrides: %1").arg(path));
+        return;
+    }
+    f.write(QJsonDocument(obj).toJson(QJsonDocument::Indented));
+}
+
 } // namespace QodeAssist
--- a/sources/agents/AgentFactory.hpp
+++ b/sources/agents/AgentFactory.hpp
@@ -53,12 +53,22 @@ public:
    void registerConfig(AgentConfig config);
    void clear();

+    // Per-agent model chosen in QodeAssist settings. The agent TOML's `model`
+    // is only the default; an override here (keyed by agent name) wins and is
+    // applied when the agent is built. Empty model clears the override.
+    [[nodiscard]] QString modelOverride(const QString &agentName) const;
+    void setModelOverride(const QString &agentName, const QString &model);
+
    [[nodiscard]] Providers::ProviderInstanceFactory *instanceFactory() const noexcept;
    [[nodiscard]] Providers::ProviderSecretsStore *secretsStore() const noexcept;

 private:
+    void loadModelOverrides();
+    void saveModelOverrides() const;
+
    std::vector<AgentConfig> m_configs;
    QHash<QString, qsizetype> m_indexByName;
+    QHash<QString, QString> m_modelOverrides;
    QStringList m_errors;
    QStringList m_warnings;
    QPointer<Providers::ProviderInstanceFactory> m_instanceFactory;
--- a/sources/agents/AgentLoader.cpp
+++ b/sources/agents/AgentLoader.cpp
@@ -120,8 +120,7 @@ AgentConfig configFromMerged(const QJsonObject &obj)
    cfg.providerInstance = obj.value("provider_instance").toString();
    cfg.model       = obj.value("model").toString();
    cfg.endpoint    = obj.value("endpoint").toString();
-    cfg.role        = obj.value("role").toString();
-    cfg.context     = obj.value("context").toString();
+    cfg.systemPrompt = obj.value("system_prompt").toString();
    cfg.enableThinking = obj.value("enable_thinking").toBool(false);
    cfg.enableTools    = obj.value("enable_tools").toBool(false);
    cfg.tags        = stringArray(obj.value("tags"));
@@ -135,10 +134,7 @@ AgentConfig configFromMerged(const QJsonObject &obj)
    cfg.abstract    = obj.value("abstract").toBool(false);
    cfg.hidden      = obj.value("hidden").toBool(false);

-    const QJsonObject tpl = obj.value("template").toObject();
-    cfg.messageFormat = tpl.value("message_format").toString();
-    cfg.sampling      = tpl.value("sampling").toObject();
-    cfg.thinking      = tpl.value("thinking").toObject();
+    cfg.body = obj.value("body").toObject();
    return cfg;
 }

--- a/sources/agents/agents.qrc
+++ b/sources/agents/agents.qrc
@@ -1,11 +1,46 @@
 <RCC>
    <qresource prefix="/agents">
+        <file>partials/openai_messages.jinja</file>
+        <file>partials/openai_assistant.jinja</file>
+        <file>partials/openai_tool_results.jinja</file>
+        <file>partials/openai_user.jinja</file>
+        <file>partials/openai_image_content.jinja</file>
+        <file>partials/openai_responses_input.jinja</file>
+        <file>partials/anthropic_messages.jinja</file>
+        <file>partials/anthropic_image.jinja</file>
+        <file>partials/google_contents.jinja</file>
+        <file>partials/google_part.jinja</file>
+        <file>partials/ollama_messages.jinja</file>
+
+        <file>chat_base.toml</file>
+        <file>openai_base_chat.toml</file>
+        <file>openai_responses_base.toml</file>
+        <file>anthropic_base_chat.toml</file>
+        <file>google_base_chat.toml</file>
        <file>ollama_base_chat.toml</file>
        <file>ollama_base_fim.toml</file>
+
+        <file>openai_chat.toml</file>
+        <file>openai_compatible_chat.toml</file>
+        <file>openai_responses.toml</file>
+        <file>mistral_chat.toml</file>
+        <file>mistral_medium_chat.toml</file>
+        <file>mistral_reasoning_chat.toml</file>
+        <file>codestral_chat.toml</file>
+        <file>codestral_fim.toml</file>
+        <file>llamacpp_chat.toml</file>
+        <file>lmstudio_chat.toml</file>
+        <file>lmstudio_responses.toml</file>
+        <file>openrouter_chat.toml</file>
+        <file>ollama_openai_chat.toml</file>
        <file>ollama_gemma4_e4b_chat.toml</file>
        <file>ollama_codellama_7b_code_fim.toml</file>
        <file>ollama_codellama_13b_qml_fim.toml</file>
+
        <file>claude_sonnet_chat.toml</file>
+        <file>claude_sonnet46_chat.toml</file>
+        <file>claude_haiku45_chat.toml</file>
+        <file>claude_opus_max.toml</file>
        <file>google_gemini_chat.toml</file>
    </qresource>
 </RCC>
--- a/sources/agents/anthropic_base_chat.toml
+++ b/sources/agents/anthropic_base_chat.toml
@@ -0,0 +1,19 @@
+schema_version = 1
+
+name        = "Anthropic Base Chat"
+description = "Anthropic Messages API request body (/v1/messages). Abstract — extend it and set model."
+abstract    = true
+extends     = "Chat Base"
+
+provider_instance = "Claude"
+endpoint          = "/v1/messages"
+enable_tools      = true
+tags = ["chat", "claude", "anthropic", "cloud"]
+
+[body]
+max_tokens  = 8192
+temperature = 1
+system   = """{% if existsIn(ctx, "system_prompt") %}{{ tojson(ctx.system_prompt) }}{% endif %}"""
+messages = """
+[ {% include "partials/anthropic_messages.jinja" %} ]
+"""
--- a/sources/agents/chat_base.toml
+++ b/sources/agents/chat_base.toml
@@ -0,0 +1,16 @@
+schema_version = 1
+
+name        = "Chat Base"
+description = "Shared system prompt for coding-chat agents. Abstract — not selectable."
+abstract    = true
+
+system_prompt = """
+You are a helpful coding assistant integrated into Qt Creator.
+Answer concisely. Prefer concrete diffs or minimal patches over rewriting
+whole files. Use markdown code blocks with language tags.
+
+{% if file_exists("${PROJECT_DIR}/README.md") %}
+## Project README.md
+{{ read_file("${PROJECT_DIR}/README.md") }}
+{% endif %}
+"""
--- a/sources/agents/claude_haiku45_chat.toml
+++ b/sources/agents/claude_haiku45_chat.toml
@@ -0,0 +1,14 @@
+schema_version = 1
+
+extends     = "Anthropic Base Chat"
+name        = "Claude Haiku 4.5 Chat"
+description = "Anthropic Claude Haiku 4.5 — fastest model with near-frontier intelligence; extended thinking (manual budget)."
+
+model = "claude-haiku-4-5-20251001"
+
+enable_thinking = true
+tags = ["chat", "claude", "anthropic", "cloud", "haiku", "fast"]
+
+[body]
+max_tokens = 16000
+thinking   = { type = "enabled", budget_tokens = 4096 }
--- a/sources/agents/claude_opus_max.toml
+++ b/sources/agents/claude_opus_max.toml
@@ -0,0 +1,15 @@
+schema_version = 1
+
+extends     = "Anthropic Base Chat"
+name        = "Claude Opus 4.8 Max"
+description = "Anthropic Claude Opus 4.8 at maximum capability — adaptive thinking at max effort, 128k max output."
+
+model = "claude-opus-4-8"
+
+enable_thinking = true
+tags = ["chat", "claude", "anthropic", "cloud", "opus", "max"]
+
+[body]
+max_tokens    = 128000
+thinking      = { type = "adaptive", display = "summarized" }
+output_config = { effort = "max" }
--- a/sources/agents/claude_sonnet46_chat.toml
+++ b/sources/agents/claude_sonnet46_chat.toml
@@ -0,0 +1,15 @@
+schema_version = 1
+
+extends     = "Anthropic Base Chat"
+name        = "Claude Sonnet 4.6 Chat"
+description = "Anthropic Claude Sonnet 4.6 — fast, capable coding chat with adaptive thinking."
+
+model = "claude-sonnet-4-6"
+
+enable_thinking = true
+tags = ["chat", "claude", "anthropic", "cloud", "sonnet"]
+
+[body]
+max_tokens    = 16000
+thinking      = { type = "adaptive", display = "summarized" }
+output_config = { effort = "high" }
--- a/sources/agents/claude_sonnet_chat.toml
+++ b/sources/agents/claude_sonnet_chat.toml
@@ -1,77 +1,14 @@
 schema_version = 1

+extends     = "Anthropic Base Chat"
 name        = "Claude Sonnet Chat"
-description = "Anthropic Claude (Messages API) — coding chat assistant via the hosted Claude provider."
-
-provider_instance = "Claude"
-endpoint          = "/v1/messages"
+description = "Anthropic Claude Sonnet 4.6 (Messages API) — coding chat assistant with thinking."

 model = "claude-sonnet-4-6"

-role = """
-You are a helpful coding assistant integrated into Qt Creator.
-Answer concisely. When the user shares code, prefer concrete diffs or
-minimal patches over rewriting whole files. Use markdown code blocks
-with language tags so the IDE can render them.
-"""
-
 enable_thinking = true
-enable_tools    = true
-
 tags = ["chat", "claude", "anthropic", "cloud"]

-context = """
-{%- set readme = read_file("${PROJECT_DIR}/README.md") -%}
-{%- if length(readme) > 0 %}
-## Project README.md
-{{ readme }}
-{%- endif %}
-"""
-
-[template]
-message_format = """
-{
-  {%- if existsIn(ctx, "system_prompt") %}
-  "system": {{ tojson(ctx.system_prompt) }},
-  {%- endif %}
-  "messages": [
-    {%- for msg in ctx.history %}
-    {
-      "role": {{ tojson(msg.role) }},
-      "content": [
-        {%- for b in msg.content_blocks %}
-        {%- if b.type == "image" %}
-        {
-          "type": "image",
-          "source": {
-            {%- if b.is_url %}
-            "type": "url",
-            "url": {{ tojson(b.data) }}
-            {%- else %}
-            "type": "base64",
-            "media_type": {{ tojson(b.media_type) }},
-            "data": {{ tojson(b.data) }}
-            {%- endif %}
-          }
-        }{% if not loop.is_last %},{% endif %}
-        {%- else %}
-        {{ tojson(b) }}{% if not loop.is_last %},{% endif %}
-        {%- endif %}
-        {%- endfor %}
-      ]
-    }{% if not loop.is_last %},{% endif %}
-    {%- endfor %}
-  ]
-}
-"""
-
-[template.sampling]
+[body]
 max_tokens = 8192
-temperature = 1
-
-[template.thinking.overrides]
-temperature = 1
-
-[template.thinking.request_block.thinking]
-type          = "enabled"
-budget_tokens = 4096
+thinking   = { type = "enabled", budget_tokens = 4096 }
--- a/sources/agents/codestral_chat.toml
+++ b/sources/agents/codestral_chat.toml
@@ -0,0 +1,11 @@
+schema_version = 1
+
+extends     = "OpenAI Base Chat"
+name        = "Codestral Chat"
+description = "Mistral Codestral (Chat Completions API) — coding chat assistant."
+
+provider_instance = "Codestral"
+endpoint          = "/v1/chat/completions"
+model             = "codestral-latest"
+
+tags = ["chat", "codestral", "mistral", "cloud"]
--- a/sources/agents/codestral_fim.toml
+++ b/sources/agents/codestral_fim.toml
@@ -0,0 +1,19 @@
+schema_version = 1
+
+name        = "Codestral FIM"
+description = "Mistral Codestral fill-in-the-middle code completion (/v1/fim/completions)."
+
+provider_instance = "Mistral AI"
+endpoint          = "/v1/fim/completions"
+model             = "codestral-latest"
+
+enable_thinking = false
+enable_tools    = false
+tags = ["fim", "codestral", "mistral", "cloud", "completion"]
+
+[body]
+max_tokens  = 256
+temperature = 0.2
+stream      = true
+prompt      = """{{ tojson(ctx.prefix) }}"""
+suffix      = """{% if existsIn(ctx, "suffix") %}{{ tojson(ctx.suffix) }}{% endif %}"""
--- a/sources/agents/google_base_chat.toml
+++ b/sources/agents/google_base_chat.toml
@@ -0,0 +1,20 @@
+schema_version = 1
+
+name        = "Google Base Chat"
+description = "Google Gemini generateContent request body. Abstract — extend it and set model/endpoint."
+abstract    = true
+extends     = "Chat Base"
+
+provider_instance = "Google AI"
+enable_tools      = true
+tags = ["chat", "gemini", "google", "cloud"]
+
+[body]
+system_instruction = """{% if existsIn(ctx, "system_prompt") %}{ "parts": [ { "text": {{ tojson(ctx.system_prompt) }} } ] }{% endif %}"""
+contents = """
+[ {% include "partials/google_contents.jinja" %} ]
+"""
+
+[body.generationConfig]
+maxOutputTokens = 8192
+temperature     = 1
--- a/sources/agents/google_gemini_chat.toml
+++ b/sources/agents/google_gemini_chat.toml
@@ -1,79 +1,15 @@
 schema_version = 1

+extends     = "Google Base Chat"
 name        = "Gemini Chat"
-description = "Google Gemini (generateContent API) — coding chat assistant via the hosted Google AI provider."
+description = "Google Gemini 2.5 Flash (generateContent API) — coding chat with thinking."

-provider_instance = "Google AI"
 endpoint = "/models/gemini-2.5-flash:streamGenerateContent?alt=sse"
-
 model    = "gemini-2.5-flash"

-role = """
-You are a helpful coding assistant integrated into Qt Creator.
-Answer concisely. When the user shares code, prefer concrete diffs or
-minimal patches over rewriting whole files. Use markdown code blocks
-with language tags so the IDE can render them.
-"""
-
 enable_thinking = true
-enable_tools    = true
-
 tags = ["chat", "gemini", "google", "cloud"]

-context = """
-{%- set readme = read_file("${PROJECT_DIR}/README.md") -%}
-{%- if length(readme) > 0 %}
-## Project README.md
-{{ readme }}
-{%- endif %}
-"""
-
-[template]
-message_format = """
-{
-  {%- if existsIn(ctx, "system_prompt") %}
-  "system_instruction": { "parts": [{ "text": {{ tojson(ctx.system_prompt) }} }] },
-  {%- endif %}
-  "contents": [
-    {%- for msg in ctx.history %}
-    {
-      "role": {% if msg.role == "assistant" %}"model"{% else %}"user"{% endif %},
-      "parts": [
-        {%- for b in msg.content_blocks %}
-        {%- if b.type == "text" %}
-        { "text": {{ tojson(b.text) }} }
-        {%- else if b.type == "thinking" %}
-        { "text": {{ tojson(b.thinking) }}, "thought": true, "thoughtSignature": {{ tojson(b.signature) }} }
-        {%- else if b.type == "tool_use" %}
-        { "functionCall": { "name": {{ tojson(b.name) }}, "args": {{ tojson(b.input) }} } }
-        {%- else if b.type == "tool_result" %}
-        { "functionResponse": { "name": {{ tojson(b.name) }}, "response": { "result": {{ tojson(b.content) }} } } }
-        {%- else if b.type == "image" %}
-        {%- if b.is_url %}
-        { "file_data": { "mime_type": {{ tojson(b.media_type) }}, "file_uri": {{ tojson(b.data) }} } }
-        {%- else %}
-        { "inline_data": { "mime_type": {{ tojson(b.media_type) }}, "data": {{ tojson(b.data) }} } }
-        {%- endif %}
-        {%- else %}
-        { "text": "" }
-        {%- endif %}
-        {% if not loop.is_last %},{% endif %}
-        {%- endfor %}
-      ]
-    }{% if not loop.is_last %},{% endif %}
-    {%- endfor %}
-  ]
-}
-"""
-
-[template.sampling.generationConfig]
-maxOutputTokens = 8192
-temperature     = 1
-
-[template.thinking.request_block.generationConfig]
-temperature     = 1
+[body.generationConfig]
 maxOutputTokens = 16000
-
-[template.thinking.request_block.generationConfig.thinkingConfig]
-includeThoughts = true
-thinkingBudget  = 8192
+thinkingConfig  = { includeThoughts = true, thinkingBudget = 8192 }
--- a/sources/agents/llamacpp_chat.toml
+++ b/sources/agents/llamacpp_chat.toml
@@ -0,0 +1,11 @@
+schema_version = 1
+
+extends     = "OpenAI Base Chat"
+name        = "llama.cpp Chat"
+description = "llama.cpp server (OpenAI-compatible Chat Completions) — local coding chat assistant."
+
+provider_instance = "llama.cpp"
+endpoint          = "/v1/chat/completions"
+model             = "llama"
+
+tags = ["chat", "llamacpp", "local"]
--- a/sources/agents/lmstudio_chat.toml
+++ b/sources/agents/lmstudio_chat.toml
@@ -0,0 +1,11 @@
+schema_version = 1
+
+extends     = "OpenAI Base Chat"
+name        = "LM Studio Chat"
+description = "LM Studio (Chat Completions API) — local coding chat assistant."
+
+provider_instance = "LM Studio (Chat Completions)"
+endpoint          = "/v1/chat/completions"
+model             = "local-model"
+
+tags = ["chat", "lmstudio", "local"]
--- a/sources/agents/lmstudio_responses.toml
+++ b/sources/agents/lmstudio_responses.toml
@@ -0,0 +1,11 @@
+schema_version = 1
+
+extends     = "OpenAI Responses Base"
+name        = "LM Studio Responses"
+description = "LM Studio (Responses API) — local coding chat assistant."
+
+provider_instance = "LM Studio (Responses API)"
+endpoint          = "/v1/responses"
+model             = "local-model"
+
+tags = ["chat", "lmstudio", "responses", "local"]
--- a/sources/agents/mistral_chat.toml
+++ b/sources/agents/mistral_chat.toml
@@ -0,0 +1,11 @@
+schema_version = 1
+
+extends     = "OpenAI Base Chat"
+name        = "Mistral Chat"
+description = "Mistral Large (Chat Completions API) — coding chat assistant."
+
+provider_instance = "Mistral AI"
+endpoint          = "/v1/chat/completions"
+model             = "mistral-large-latest"
+
+tags = ["chat", "mistral", "cloud"]
--- a/sources/agents/mistral_medium_chat.toml
+++ b/sources/agents/mistral_medium_chat.toml
@@ -0,0 +1,11 @@
+schema_version = 1
+
+extends     = "OpenAI Base Chat"
+name        = "Mistral Medium Chat"
+description = "Mistral Medium 3.5 (Chat Completions API) — frontier coding/agentic chat."
+
+provider_instance = "Mistral AI"
+endpoint          = "/v1/chat/completions"
+model             = "mistral-medium-latest"
+
+tags = ["chat", "mistral", "medium", "cloud"]
--- a/sources/agents/mistral_reasoning_chat.toml
+++ b/sources/agents/mistral_reasoning_chat.toml
@@ -0,0 +1,12 @@
+schema_version = 1
+
+extends     = "OpenAI Base Chat"
+name        = "Mistral Reasoning Chat"
+description = "Mistral Magistral Medium — native chain-of-thought reasoning model."
+
+provider_instance = "Mistral AI"
+endpoint          = "/v1/chat/completions"
+model             = "magistral-medium-latest"
+
+enable_thinking = true
+tags = ["chat", "mistral", "reasoning", "cloud"]
--- a/sources/agents/ollama_base_chat.toml
+++ b/sources/agents/ollama_base_chat.toml
@@ -1,44 +1,21 @@
 schema_version = 1

 name        = "Ollama Base Chat"
-description = "Shared base for Ollama /api/chat profiles."
-
+description = "Ollama native /api/chat request body. Abstract — extend it and set model/options."
 abstract    = true
+extends     = "Chat Base"

 provider_instance = "Ollama (Native)"
 endpoint          = "/api/chat"
-
 tags = ["ollama", "local"]

-[template]
-message_format = """
-{
-  "messages": [
-    {%- if existsIn(ctx, "system_prompt") %}
-    {
-      "role": "system",
-      "content": {{ tojson(ctx.system_prompt) }}
-    }{% if length(ctx.history) > 0 %},{% endif %}
-    {%- endif %}
-    {%- for msg in ctx.history %}
-    {
-      "role": {{ tojson(msg.role) }},
-      "content": {{ tojson(msg.content) }}{% if existsIn(msg, "images") %},
-      "images": [
-        {%- for img in msg.images %}
-        {{ tojson(img.data) }}{% if not loop.is_last %},{% endif %}
-        {%- endfor %}
-      ]{% endif %}
-    }{% if not loop.is_last %},{% endif %}
-    {%- endfor %}
-  ]
-}
+[body]
+stream   = true
+messages = """
+[ {% include "partials/ollama_messages.jinja" %} ]
 """

-[template.sampling]
-stream = true
-
-[template.sampling.options]
+[body.options]
 num_predict = 2048
 temperature = 0.7
 keep_alive  = "5m"
--- a/sources/agents/ollama_base_fim.toml
+++ b/sources/agents/ollama_base_fim.toml
@@ -1,30 +1,18 @@
 schema_version = 1

 name        = "Ollama FIM Base"
-description = "Shared base for Ollama native FIM (/api/generate) profiles."
-
+description = "Ollama native /api/generate FIM request body. Abstract — extend it and set model/prompt."
 abstract    = true

 provider_instance = "Ollama (Native)"
 endpoint          = "/api/generate"
-
 tags = ["ollama", "local", "fim"]

-[template]
-message_format = """
-{
-  "prompt": {{ tojson(ctx.prefix) }},
-  "suffix": {{ tojson(ctx.suffix) }}
-  {%- if existsIn(ctx, "system_prompt") %},
-  "system": {{ tojson(ctx.system_prompt) }}
-  {%- endif %}
-}
-"""
-
-[template.sampling]
+[body]
 stream = true
+system = """{% if existsIn(ctx, "system_prompt") %}{{ tojson(ctx.system_prompt) }}{% endif %}"""

-[template.sampling.options]
+[body.options]
 num_predict = 512
 temperature = 0.2
 top_p       = 0.9
--- a/sources/agents/ollama_codellama_13b_qml_fim.toml
+++ b/sources/agents/ollama_codellama_13b_qml_fim.toml
@@ -1,11 +1,9 @@
 schema_version = 1

+extends     = "Ollama FIM Base"
 name        = "Qt CodeLlama 13B QML FIM"
 description = "Local Qt-Company-tuned CodeLlama 13B for QML FIM completion."

-provider_instance = "Ollama (Native)"
-endpoint          = "/api/generate"
-
 model = "theqtcompany/codellama-13b-qml:latest"

 tags = ["fim", "ollama", "local", "codellama", "qml", "qt"]
@@ -13,28 +11,12 @@ tags = ["fim", "ollama", "local", "codellama", "qml", "qt"]
 [match]
 file_patterns = ["*.qml"]

-[template]
-message_format = """
-{
-  "prompt": {%- if existsIn(ctx, "suffix") and length(ctx.suffix) > 0 -%}
-    {{ tojson("<SUF>" + ctx.suffix + "<PRE>" + ctx.prefix + "<MID>") }}
-  {%- else -%}
-    {{ tojson("<PRE>" + ctx.prefix + "<MID>") }}
-  {%- endif %}
-  {%- if existsIn(ctx, "system_prompt") %},
-  "system": {{ tojson(ctx.system_prompt) }}
-  {%- endif %}
-}
-"""
+[body]
+prompt = """{% if existsIn(ctx, "suffix") and length(ctx.suffix) > 0 %}{{ tojson("<SUF>" + ctx.suffix + "<PRE>" + ctx.prefix + "<MID>") }}{% else %}{{ tojson("<PRE>" + ctx.prefix + "<MID>") }}{% endif %}"""

-[template.sampling]
-stream = true
-
-[template.sampling.options]
+[body.options]
 num_predict    = 500
 temperature    = 0
 top_p          = 1
 repeat_penalty = 1.05
-keep_alive  = "5m"
-
 stop = ["<SUF>", "<PRE>", "</PRE>", "</SUF>", "< EOT >", "\\end", "<MID>", "</MID>", "##"]
--- a/sources/agents/ollama_codellama_7b_code_fim.toml
+++ b/sources/agents/ollama_codellama_7b_code_fim.toml
@@ -1,11 +1,9 @@
 schema_version = 1

+extends     = "Ollama FIM Base"
 name        = "CodeLlama 7B Code FIM"
 description = "Local CodeLlama 7B (code variant) on Ollama, FIM completion via PRE/SUF/MID markers."

-provider_instance = "Ollama (Native)"
-endpoint          = "/api/generate"
-
 model = "codellama:7b-code"

 tags = ["fim", "ollama", "local", "codellama"]
@@ -13,22 +11,8 @@ tags = ["fim", "ollama", "local", "codellama"]
 [match]
 file_patterns = ["*.cpp", "*.cc", "*.cxx", "*.c", "*.h", "*.hpp", "*.hxx", "*.inl"]

-[template]
-message_format = """
-{
-  "prompt": {{ tojson("<PRE> " + ctx.prefix + " <SUF>" + ctx.suffix + " <MID>") }}
-  {%- if existsIn(ctx, "system_prompt") %},
-  "system": {{ tojson(ctx.system_prompt) }}
-  {%- endif %}
-}
-"""
+[body]
+prompt = """{{ tojson("<PRE> " + ctx.prefix + " <SUF>" + ctx.suffix + " <MID>") }}"""

-[template.sampling]
-stream = true
-
-[template.sampling.options]
-num_predict = 512
-temperature = 0.2
-top_p       = 0.9
-keep_alive  = "5m"
+[body.options]
 stop = ["<EOT>", "<PRE>", "<SUF>", "<MID>"]
--- a/sources/agents/ollama_gemma4_e4b_chat.toml
+++ b/sources/agents/ollama_gemma4_e4b_chat.toml
@@ -1,34 +1,16 @@
 schema_version = 1

-name    = "Ollama gemma4:e4b Chat"
 extends     = "Ollama Base Chat"
-
+name        = "Ollama gemma4:e4b Chat"
 description = "Local Gemma 4 E4B on Ollama /api/chat — coding chat assistant."

 model = "gemma4:e4b"

-role = """
-You are a helpful coding assistant integrated into Qt Creator.
-Answer concisely. When the user shares code, prefer concrete diffs or
-minimal patches over rewriting whole files. Use markdown code blocks
-with language tags so the IDE can render them.
-"""
-
 enable_thinking = true
 enable_tools    = true
-
 tags = ["chat", "ollama", "local", "gemma"]

-context = """
-{%- set readme      = read_file("${PROJECT_DIR}/README.md")        -%}
-
-{%- if length(readme) > 0 %}
-## Project README.md
-{{ readme }}
-{%- endif %}
-"""
-
-[template.sampling.options]
+[body.options]
 num_predict = 4096
 temperature = 1
 top_k       = 64
--- a/sources/agents/ollama_openai_chat.toml
+++ b/sources/agents/ollama_openai_chat.toml
@@ -0,0 +1,11 @@
+schema_version = 1
+
+extends     = "OpenAI Base Chat"
+name        = "Ollama (OpenAI-compatible) Chat"
+description = "Ollama via its OpenAI-compatible Chat Completions endpoint — local coding chat assistant."
+
+provider_instance = "Ollama (OpenAI-compatible)"
+endpoint          = "/v1/chat/completions"
+model             = "qwen2.5-coder"
+
+tags = ["chat", "ollama", "local"]
--- a/sources/agents/openai_base_chat.toml
+++ b/sources/agents/openai_base_chat.toml
@@ -0,0 +1,18 @@
+schema_version = 1
+
+name        = "OpenAI Base Chat"
+description = "OpenAI Chat Completions request body. Abstract — extend it and set provider/endpoint/model."
+abstract    = true
+extends     = "Chat Base"
+
+provider_instance = "OpenAI (Chat Completions)"
+endpoint          = "/chat/completions"
+enable_tools      = true
+tags = ["chat", "openai", "cloud"]
+
+[body]
+max_tokens  = 8192
+temperature = 0.7
+messages    = """
+[ {% include "partials/openai_messages.jinja" %} ]
+"""
--- a/sources/agents/openai_chat.toml
+++ b/sources/agents/openai_chat.toml
@@ -0,0 +1,7 @@
+schema_version = 1
+
+extends     = "OpenAI Base Chat"
+name        = "OpenAI Chat"
+description = "OpenAI Chat Completions — coding chat assistant (GPT-4o)."
+
+model = "gpt-4o"
--- a/sources/agents/openai_compatible_chat.toml
+++ b/sources/agents/openai_compatible_chat.toml
@@ -0,0 +1,10 @@
+schema_version = 1
+
+extends     = "OpenAI Base Chat"
+name        = "OpenAI Compatible Chat"
+description = "Any OpenAI-compatible Chat Completions endpoint — set the model to match your server."
+
+provider_instance = "OpenAI Compatible"
+model             = "default"
+
+tags = ["chat", "openai", "compatible"]
--- a/sources/agents/openai_responses.toml
+++ b/sources/agents/openai_responses.toml
@@ -0,0 +1,7 @@
+schema_version = 1
+
+extends     = "OpenAI Responses Base"
+name        = "OpenAI Responses"
+description = "OpenAI Responses API (/responses) — coding chat assistant."
+
+model = "gpt-4o"
--- a/sources/agents/openai_responses_base.toml
+++ b/sources/agents/openai_responses_base.toml
@@ -0,0 +1,19 @@
+schema_version = 1
+
+name        = "OpenAI Responses Base"
+description = "OpenAI Responses API request body (/responses). Abstract — extend it and set provider/endpoint/model."
+abstract    = true
+extends     = "Chat Base"
+
+provider_instance = "OpenAI (Responses API)"
+endpoint          = "/responses"
+enable_tools      = true
+tags = ["chat", "openai", "responses", "cloud"]
+
+[body]
+max_output_tokens = 8192
+temperature       = 0.7
+instructions = """{% if existsIn(ctx, "system_prompt") %}{{ tojson(ctx.system_prompt) }}{% endif %}"""
+input = """
+[ {% include "partials/openai_responses_input.jinja" %} ]
+"""
--- a/sources/agents/openrouter_chat.toml
+++ b/sources/agents/openrouter_chat.toml
@@ -0,0 +1,11 @@
+schema_version = 1
+
+extends     = "OpenAI Base Chat"
+name        = "OpenRouter Chat"
+description = "OpenRouter (OpenAI-compatible Chat Completions) — coding chat assistant."
+
+provider_instance = "OpenRouter"
+endpoint          = "/chat/completions"
+model             = "openai/gpt-4o"
+
+tags = ["chat", "openrouter", "cloud"]
--- a/sources/agents/partials/anthropic_image.jinja
+++ b/sources/agents/partials/anthropic_image.jinja
@@ -0,0 +1,9 @@
+{
+  "type": "image",
+  "source":
+  {% if b.is_url %}
+  { "type": "url", "url": {{ tojson(b.data) }} }
+  {% else %}
+  { "type": "base64", "media_type": {{ tojson(b.media_type) }}, "data": {{ tojson(b.data) }} }
+  {% endif %}
+},
--- a/sources/agents/partials/anthropic_messages.jinja
+++ b/sources/agents/partials/anthropic_messages.jinja
@@ -0,0 +1,12 @@
+{% for msg in ctx.history %}
+{
+  "role": {{ tojson(msg.role) }},
+  "content": [
+    {% for b in msg.content_blocks %}
+      {% if b.type == "image" %}{% include "partials/anthropic_image.jinja" %}
+      {% else %}{{ tojson(b) }},
+      {% endif %}
+    {% endfor %}
+  ]
+},
+{% endfor %}
--- a/sources/agents/partials/google_contents.jinja
+++ b/sources/agents/partials/google_contents.jinja
@@ -0,0 +1,6 @@
+{% for msg in ctx.history %}
+{
+  "role": {% if msg.role == "assistant" %}"model"{% else %}"user"{% endif %},
+  "parts": [ {% for b in msg.content_blocks %}{% include "partials/google_part.jinja" %}{% endfor %} ]
+},
+{% endfor %}
--- a/sources/agents/partials/google_part.jinja
+++ b/sources/agents/partials/google_part.jinja
@@ -0,0 +1,17 @@
+{% if b.type == "text" %}
+{ "text": {{ tojson(b.text) }} },
+{% else if b.type == "thinking" %}
+{ "text": {{ tojson(b.thinking) }}, "thought": true, "thoughtSignature": {{ tojson(b.signature) }} },
+{% else if b.type == "tool_use" %}
+{ "functionCall": { "name": {{ tojson(b.name) }}, "args": {{ tojson(b.input) }} } },
+{% else if b.type == "tool_result" %}
+{ "functionResponse": { "name": {{ tojson(b.name) }}, "response": { "result": {{ tojson(b.content) }} } } },
+{% else if b.type == "image" %}
+  {% if b.is_url %}
+  { "file_data": { "mime_type": {{ tojson(b.media_type) }}, "file_uri": {{ tojson(b.data) }} } },
+  {% else %}
+  { "inline_data": { "mime_type": {{ tojson(b.media_type) }}, "data": {{ tojson(b.data) }} } },
+  {% endif %}
+{% else %}
+{ "text": "" },
+{% endif %}
--- a/sources/agents/partials/ollama_messages.jinja
+++ b/sources/agents/partials/ollama_messages.jinja
@@ -0,0 +1,16 @@
+{% if existsIn(ctx, "system_prompt") %}
+{ "role": "system", "content": {{ tojson(ctx.system_prompt) }} },
+{% endif %}
+{% for msg in ctx.history %}
+{
+  "role": {{ tojson(msg.role) }},
+  "content": {{ tojson(msg.content) }}
+  {% if existsIn(msg, "images") %}
+  , "images": [
+    {% for img in msg.images %}
+    {{ tojson(img.data) }},
+    {% endfor %}
+  ]
+  {% endif %}
+},
+{% endfor %}
--- a/sources/agents/partials/openai_assistant.jinja
+++ b/sources/agents/partials/openai_assistant.jinja
@@ -0,0 +1,19 @@
+{% set tcalls = filter_by_type(msg.content_blocks, "tool_use") %}
+{
+  "role": "assistant",
+  "content": {{ tojson(msg.content) }}
+  {% if length(tcalls) > 0 %}
+  , "tool_calls": [
+    {% for b in tcalls %}
+    {
+      "id": {{ tojson(b.id) }},
+      "type": "function",
+      "function": {
+        "name": {{ tojson(b.name) }},
+        "arguments": {{ tojson(tojson(b.input)) }}
+      }
+    },
+    {% endfor %}
+  ]
+  {% endif %}
+},
--- a/sources/agents/partials/openai_image_content.jinja
+++ b/sources/agents/partials/openai_image_content.jinja
@@ -0,0 +1,11 @@
+[
+  { "type": "text", "text": {{ tojson(msg.content) }} }
+  {% for img in msg.images %}
+  ,
+  {% if img.is_url %}
+  { "type": "image_url", "image_url": { "url": {{ tojson(img.data) }} } }
+  {% else %}
+  { "type": "image_url", "image_url": { "url": "data:{{ img.media_type }};base64,{{ img.data }}" } }
+  {% endif %}
+  {% endfor %}
+]
--- a/sources/agents/partials/openai_messages.jinja
+++ b/sources/agents/partials/openai_messages.jinja
@@ -0,0 +1,9 @@
+{% if existsIn(ctx, "system_prompt") %}
+{ "role": "system", "content": {{ tojson(ctx.system_prompt) }} },
+{% endif %}
+{% for msg in ctx.history %}
+  {% if msg.role == "assistant" %}{% include "partials/openai_assistant.jinja" %}
+  {% else if length(filter_by_type(msg.content_blocks, "tool_result")) > 0 %}{% include "partials/openai_tool_results.jinja" %}
+  {% else %}{% include "partials/openai_user.jinja" %}
+  {% endif %}
+{% endfor %}
--- a/sources/agents/partials/openai_responses_input.jinja
+++ b/sources/agents/partials/openai_responses_input.jinja
@@ -0,0 +1,30 @@
+{% for msg in ctx.history %}
+  {% if msg.role == "assistant" %}
+    {% if msg.content != "" %}
+    { "role": "assistant", "content": {{ tojson(msg.content) }} },
+    {% endif %}
+    {% for b in filter_by_type(msg.content_blocks, "tool_use") %}
+    { "type": "function_call", "call_id": {{ tojson(b.id) }}, "name": {{ tojson(b.name) }}, "arguments": {{ tojson(tojson(b.input)) }} },
+    {% endfor %}
+  {% else if length(filter_by_type(msg.content_blocks, "tool_result")) > 0 %}
+    {% for b in filter_by_type(msg.content_blocks, "tool_result") %}
+    { "type": "function_call_output", "call_id": {{ tojson(b.tool_use_id) }}, "output": {{ tojson(b.content) }} },
+    {% endfor %}
+  {% else %}
+    {% if existsIn(msg, "images") %}
+    { "role": "user", "content": [
+      { "type": "input_text", "text": {{ tojson(msg.content) }} }
+      {% for img in msg.images %}
+      ,
+      {% if img.is_url %}
+      { "type": "input_image", "detail": "auto", "image_url": {{ tojson(img.data) }} }
+      {% else %}
+      { "type": "input_image", "detail": "auto", "image_url": "data:{{ img.media_type }};base64,{{ img.data }}" }
+      {% endif %}
+      {% endfor %}
+    ] },
+    {% else %}
+    { "role": "user", "content": {{ tojson(msg.content) }} },
+    {% endif %}
+  {% endif %}
+{% endfor %}
--- a/sources/agents/partials/openai_tool_results.jinja
+++ b/sources/agents/partials/openai_tool_results.jinja
@@ -0,0 +1,3 @@
+{% for b in filter_by_type(msg.content_blocks, "tool_result") %}
+{ "role": "tool", "tool_call_id": {{ tojson(b.tool_use_id) }}, "content": {{ tojson(b.content) }} },
+{% endfor %}
--- a/sources/agents/partials/openai_user.jinja
+++ b/sources/agents/partials/openai_user.jinja
@@ -0,0 +1,5 @@
+{% if existsIn(msg, "images") %}
+{ "role": "user", "content": {% include "partials/openai_image_content.jinja" %} },
+{% else %}
+{ "role": "user", "content": {{ tojson(msg.content) }} },
+{% endif %}
--- a/sources/settings/AgentSelectionDialog.cpp
+++ b/sources/settings/AgentSelectionDialog.cpp
@@ -125,7 +125,7 @@ AgentRowCard::AgentRowCard(const AgentConfig &cfg, QWidget *parent)
 {
    setItemName(cfg.name);
    QStringList haystack{cfg.name, cfg.providerInstance, cfg.model,
-                         cfg.description, cfg.role,
+                         cfg.description, cfg.systemPrompt,
                         cfg.endpoint};
    haystack += cfg.tags;
    buildSearchHaystack(haystack);
@@ -234,8 +234,8 @@ AgentRowCard::AgentRowCard(const AgentConfig &cfg, QWidget *parent)
        tooltip += cfg.description + QStringLiteral("\n\n");
    if (!cfg.providerInstance.isEmpty())
        tooltip += Tr::tr("Provider instance: %1\n").arg(cfg.providerInstance);
-    if (!cfg.role.isEmpty())
-        tooltip += Tr::tr("Role: %1\n").arg(cfg.role);
+    if (!cfg.systemPrompt.isEmpty())
+        tooltip += Tr::tr("System prompt: %1\n").arg(cfg.systemPrompt);
    if (!cfg.endpoint.isEmpty())
        tooltip += Tr::tr("Endpoint: %1\n").arg(cfg.endpoint);
    setToolTip(tooltip.trimmed());
--- a/sources/templates/JsonPromptTemplate.cpp
+++ b/sources/templates/JsonPromptTemplate.cpp
@@ -5,6 +5,9 @@
 #include "JsonPromptTemplate.hpp"

 #include <QDebug>
+#include <QDir>
+#include <QFile>
+#include <QFileInfo>
 #include <QHash>
 #include <QJsonArray>
 #include <QJsonDocument>
@@ -138,19 +141,77 @@ nlohmann::json buildContextJson(const ContextData &context)
    return data;
 }

+// JSON-aware removal of trailing commas (a `,` immediately followed, after
+// optional whitespace, by `}` or `]`). Body partials emit an unconditional
+// comma after every array element / object member; this pass deletes the
+// dangling one before the closing bracket so the result parses as strict
+// JSON. String literals are skipped, so commas inside string values (e.g. a
+// tool result containing "],") are never touched.
+std::string stripTrailingCommas(const std::string &in)
+{
+    std::string out;
+    out.reserve(in.size());
+    bool inString = false;
+    bool escaped = false;
+    for (std::size_t i = 0; i < in.size(); ++i) {
+        const char c = in[i];
+        if (inString) {
+            out.push_back(c);
+            if (escaped)
+                escaped = false;
+            else if (c == '\\')
+                escaped = true;
+            else if (c == '"')
+                inString = false;
+            continue;
+        }
+        if (c == '"') {
+            inString = true;
+            out.push_back(c);
+            continue;
+        }
+        if (c == ',') {
+            std::size_t j = i + 1;
+            while (j < in.size()
+                   && (in[j] == ' ' || in[j] == '\t' || in[j] == '\n' || in[j] == '\r'))
+                ++j;
+            if (j < in.size() && (in[j] == '}' || in[j] == ']'))
+                continue; // drop this comma
+        }
+        out.push_back(c);
+    }
+    return out;
+}
+
+// Install a sandboxed `{% include %}` resolver. Includes resolve only against
+// the given roots (bundled qrc partials, then the user agent's own dir); names
+// containing ".." or starting with "/" are rejected. The included partial is
+// parsed in the same environment, so its own includes/callbacks resolve too.
+void setIncludeResolver(inja::Environment &env, std::vector<QString> roots)
+{
+    inja::Environment *envPtr = &env;
+    env.set_include_callback(
+        [envPtr, roots = std::move(roots)](
+            const std::filesystem::path &, const std::string &name) -> inja::Template {
+            const QString rel = QString::fromStdString(name);
+            if (rel.contains(QStringLiteral("..")) || rel.startsWith(QLatin1Char('/'))) {
+                throw inja::FileError("include rejected (path traversal): '" + name + "'");
+            }
+            for (const QString &root : roots) {
+                QFile f(root + QLatin1Char('/') + rel);
+                if (f.open(QIODevice::ReadOnly | QIODevice::Text))
+                    return envPtr->parse(QString::fromUtf8(f.readAll()).toStdString());
+            }
+            throw inja::FileError("include not found in partials roots: '" + name + "'");
+        });
+}
+
 void registerStandardCallbacks(inja::Environment &env)
 {
-    // Sandbox: disable filesystem reads from `{% include %}` and reject
-    // any include callback. User-authored templates run with full
-    // process privileges, so they must not slurp arbitrary files via
-    // include directives. File reads happen only through
-    // ContextManager-provided callbacks (e.g. read_file()).
+    // `{% include %}` resolution is wired per-instance in fromConfig() via a
+    // whitelisted callback; disable inja's own filesystem search so the only
+    // path is our sandboxed resolver.
    env.set_search_included_templates_in_files(false);
-    env.set_include_callback(
-        [](const std::filesystem::path &, const std::string &name) -> inja::Template {
-            throw inja::FileError(
-                "include is disabled in QodeAssist templates: '" + name + "'");
-        });

    // Disable inja's `##` line-statement shorthand — collides with
    // Markdown headings inside template bodies. Same rationale as in
@@ -161,6 +222,23 @@ void registerStandardCallbacks(inja::Environment &env)
        return args.at(0)->dump();
    });

+    // Returns the subset of a content_blocks array whose "type" equals the
+    // second argument. Lets templates build provider-specific structures (e.g.
+    // OpenAI message-level tool_calls / tool result messages) from a filtered
+    // list with clean loop.is_first/is_last comma handling.
+    env.add_callback("filter_by_type", 2, [](inja::Arguments &args) -> nlohmann::json {
+        const nlohmann::json &blocks = *args.at(0);
+        const std::string type = args.at(1)->get<std::string>();
+        nlohmann::json result = nlohmann::json::array();
+        if (blocks.is_array()) {
+            for (const auto &b : blocks) {
+                if (b.is_object() && b.value("type", std::string{}) == type)
+                    result.push_back(b);
+            }
+        }
+        return result;
+    });
+
    env.add_callback("strip_signature_suffix", 1, [](inja::Arguments &args) -> nlohmann::json {
        std::string content = args.at(0)->get<std::string>();
        const std::string marker = "\n[Signature: ";
@@ -215,6 +293,66 @@ void registerStandardCallbacks(inja::Environment &env)
        });
 }

+// A representative context for the load-time dry run: it populates every key a
+// body/partial might touch (system_prompt, prefix, suffix, and a history that
+// includes text, tool_use, tool_result and image blocks) so validation
+// exercises all branches without tripping on missing variables.
+ContextData makeValidationContext()
+{
+    ContextData ctx;
+    ctx.systemPrompt = QStringLiteral("validation");
+    ctx.prefix = QStringLiteral("prefix");
+    ctx.suffix = QStringLiteral("suffix");
+
+    QVector<Message> history;
+    history.append(Message::text(QStringLiteral("user"), QStringLiteral("hello")));
+
+    Message asst;
+    asst.role = QStringLiteral("assistant");
+    {
+        ContentBlockEntry t;
+        t.kind = ContentBlockEntry::Kind::Text;
+        t.text = QStringLiteral("hi");
+        asst.blocks.append(t);
+        ContentBlockEntry tu;
+        tu.kind = ContentBlockEntry::Kind::ToolUse;
+        tu.toolUseId = QStringLiteral("call_1");
+        tu.toolName = QStringLiteral("read_file");
+        tu.toolInput = QJsonObject{{QStringLiteral("path"), QStringLiteral("x")}};
+        asst.blocks.append(tu);
+    }
+    history.append(asst);
+
+    Message toolMsg;
+    toolMsg.role = QStringLiteral("user");
+    {
+        ContentBlockEntry tr;
+        tr.kind = ContentBlockEntry::Kind::ToolResult;
+        tr.toolUseId = QStringLiteral("call_1");
+        tr.result = QStringLiteral("ok");
+        toolMsg.blocks.append(tr);
+    }
+    history.append(toolMsg);
+
+    Message imgMsg;
+    imgMsg.role = QStringLiteral("user");
+    {
+        ContentBlockEntry te;
+        te.kind = ContentBlockEntry::Kind::Text;
+        te.text = QStringLiteral("look");
+        imgMsg.blocks.append(te);
+        ContentBlockEntry im;
+        im.kind = ContentBlockEntry::Kind::Image;
+        im.imageData = QStringLiteral("AAAA");
+        im.mediaType = QStringLiteral("image/png");
+        imgMsg.blocks.append(im);
+    }
+    history.append(imgMsg);
+
+    ctx.history = history;
+    return ctx;
+}
+
 } // namespace

 std::unique_ptr<JsonPromptTemplate> JsonPromptTemplate::fromConfig(
@@ -224,97 +362,154 @@ std::unique_ptr<JsonPromptTemplate> JsonPromptTemplate::fromConfig(
        if (error) *error = msg;
    };

-    if (cfg.messageFormat.isEmpty()) {
-        setError(QStringLiteral("Agent '%1' has empty message_format").arg(cfg.name));
+    if (cfg.body.isEmpty()) {
+        setError(QStringLiteral("Agent '%1' has empty [body]").arg(cfg.name));
        return nullptr;
    }

    auto tpl = std::unique_ptr<JsonPromptTemplate>(new JsonPromptTemplate);
    tpl->m_name = cfg.name;
    tpl->m_description = cfg.description;
-    tpl->m_sampling = cfg.sampling;
-    tpl->m_thinking = cfg.thinking;
+    tpl->m_body = cfg.body;
+
+    tpl->m_partialRoots.push_back(QStringLiteral(":/agents"));
+    if (cfg.isUserSource()) {
+        const QString dir = QFileInfo(cfg.sourcePath).absolutePath();
+        if (!dir.isEmpty())
+            tpl->m_partialRoots.push_back(dir);
+    }

    registerStandardCallbacks(tpl->m_env);
-    try {
-        tpl->m_template = tpl->m_env.parse(cfg.messageFormat.toStdString());
-    } catch (const std::exception &e) {
-        setError(QStringLiteral("Failed to parse jinja for '%1': %2")
-                     .arg(cfg.name, QString::fromUtf8(e.what())));
+    setIncludeResolver(tpl->m_env, tpl->m_partialRoots);
+
+    // Dry-run against a representative context: catches jinja syntax errors,
+    // unknown callbacks and missing partials at load time instead of on first send.
+    if (!tpl->renderBody(makeValidationContext())) {
+        setError(QStringLiteral("Agent '%1' [body] failed to render to valid JSON "
+                                "(see log)").arg(cfg.name));
        return nullptr;
    }
    return tpl;
 }

-std::optional<QJsonObject> JsonPromptTemplate::renderBody(const ContextData &context) const
+namespace {
+
+// Render one body value. A string containing jinja is rendered and its output
+// spliced in as raw JSON; a plain string and any scalar pass through unchanged;
+// objects/arrays recurse. A jinja string that renders to nothing sets `omit`
+// so the caller drops the key. Returns false on render / JSON-parse failure.
+// The caller must hold the render lock (inja's env is not re-entrant).
+bool renderValue(
+    inja::Environment &env,
+    const QString &tplName,
+    const QJsonValue &in,
+    const nlohmann::json &data,
+    QJsonValue &out,
+    bool &omit)
 {
-    const nlohmann::json data = buildContextJson(context);
+    omit = false;
+
+    if (in.isObject()) {
+        QJsonObject obj;
+        const QJsonObject src = in.toObject();
+        for (auto it = src.constBegin(); it != src.constEnd(); ++it) {
+            QJsonValue v;
+            bool om = false;
+            if (!renderValue(env, tplName, it.value(), data, v, om))
+                return false;
+            if (!om)
+                obj.insert(it.key(), v);
+        }
+        out = obj;
+        return true;
+    }
+
+    if (in.isArray()) {
+        QJsonArray arr;
+        const QJsonArray src = in.toArray();
+        for (const QJsonValue &elem : src) {
+            QJsonValue v;
+            bool om = false;
+            if (!renderValue(env, tplName, elem, data, v, om))
+                return false;
+            if (!om)
+                arr.append(v);
+        }
+        out = arr;
+        return true;
+    }
+
+    if (!in.isString()) {
+        out = in;
+        return true;
+    }
+
+    const QString s = in.toString();
+    if (!s.contains(QStringLiteral("{{")) && !s.contains(QStringLiteral("{%"))) {
+        out = in;
+        return true;
+    }

    std::string rendered;
    try {
-        std::lock_guard<std::mutex> lock(m_renderMutex);
-        rendered = m_env.render(m_template, data);
+        rendered = env.render(s.toStdString(), data);
    } catch (const std::exception &e) {
-        qWarning("[QodeAssist] Template '%s' render failed: %s",
-                 qUtf8Printable(m_name),
-                 e.what());
-        return std::nullopt;
+        qWarning("[QodeAssist] Template '%s' field render failed: %s",
+                 qUtf8Printable(tplName), e.what());
+        return false;
    }

-    QJsonParseError err;
-    const QJsonDocument doc
-        = QJsonDocument::fromJson(QByteArray::fromStdString(rendered), &err);
-    constexpr std::size_t kMaxRenderedLogChars = 500;
-    const std::string truncated = rendered.size() > kMaxRenderedLogChars
-        ? rendered.substr(0, kMaxRenderedLogChars) + "... [truncated]"
-        : rendered;
-    if (err.error != QJsonParseError::NoError) {
-        qWarning("[QodeAssist] Template '%s' produced invalid JSON at offset %d: %s\n"
-                 "--- raw output (truncated) ---\n%s",
-                 qUtf8Printable(m_name),
-                 err.offset,
-                 qUtf8Printable(err.errorString()),
-                 truncated.c_str());
-        return std::nullopt;
+    rendered = stripTrailingCommas(rendered);
+    if (QString::fromStdString(rendered).trimmed().isEmpty()) {
+        omit = true;
+        return true;
    }
-    if (!doc.isObject()) {
-        qWarning("[QodeAssist] Template '%s' rendered a non-object JSON value (truncated):\n%s",
-                 qUtf8Printable(m_name),
-                 truncated.c_str());
-        return std::nullopt;
+
+    // Wrap so ANY JSON value (array/object/string/number) parses via QJsonDocument.
+    const std::string wrapped = "{\"v\":" + rendered + "}";
+    QJsonParseError perr;
+    const QJsonDocument doc = QJsonDocument::fromJson(QByteArray::fromStdString(wrapped), &perr);
+    if (perr.error != QJsonParseError::NoError || !doc.isObject()) {
+        const QString snippet = QString::fromStdString(rendered).left(500);
+        qWarning("[QodeAssist] Template '%s' field produced invalid JSON: %s\n"
+                 "--- rendered (truncated) ---\n%s",
+                 qUtf8Printable(tplName),
+                 qUtf8Printable(perr.errorString()),
+                 qUtf8Printable(snippet));
+        return false;
    }
-    return doc.object();
+    out = doc.object().value(QStringLiteral("v"));
+    return true;
 }

-namespace {
-
 bool mergeRenderedBody(QJsonObject &request, const std::optional<QJsonObject> &body)
 {
    if (!body)
        return false;
-    for (auto it = body->constBegin(); it != body->constEnd(); ++it) {
+    for (auto it = body->constBegin(); it != body->constEnd(); ++it)
        request.insert(it.key(), it.value());
-    }
    return true;
 }

-void deepMergeInto(QJsonObject &base, const QJsonObject &overlay)
-{
-    for (auto it = overlay.constBegin(); it != overlay.constEnd(); ++it) {
-        const QJsonValue baseVal = base.value(it.key());
-        const QJsonValue overlayVal = it.value();
-        if (baseVal.isObject() && overlayVal.isObject()) {
-            QJsonObject merged = baseVal.toObject();
-            deepMergeInto(merged, overlayVal.toObject());
-            base[it.key()] = merged;
-        } else {
-            base[it.key()] = overlayVal;
-        }
-    }
-}
-
 } // namespace

+std::optional<QJsonObject> JsonPromptTemplate::renderBody(const ContextData &context) const
+{
+    const nlohmann::json data = buildContextJson(context);
+
+    std::lock_guard<std::mutex> lock(m_renderMutex);
+    QJsonObject request;
+    for (auto it = m_body.constBegin(); it != m_body.constEnd(); ++it) {
+        QJsonValue v;
+        bool omit = false;
+        if (!renderValue(m_env, m_name, it.value(), data, v, omit))
+            return std::nullopt;
+        if (!omit)
+            request.insert(it.key(), v);
+    }
+    return request;
+}
+
 void JsonPromptTemplate::prepareRequest(QJsonObject &request, const ContextData &context) const
 {
    mergeRenderedBody(request, renderBody(context));
@@ -323,27 +518,9 @@ void JsonPromptTemplate::prepareRequest(QJsonObject &request, const ContextData
 bool JsonPromptTemplate::buildFullRequest(
    QJsonObject &request,
    const ContextData &context,
-    bool thinkingEnabled) const
+    bool /*thinkingEnabled*/) const
 {
-    if (!mergeRenderedBody(request, renderBody(context)))
-        return false;
-    applySampling(request, thinkingEnabled);
-    return true;
-}
-
-void JsonPromptTemplate::applySampling(QJsonObject &request, bool thinkingEnabled) const
-{
-    // Merge order: sampling provides defaults → body wins for its own
-    // keys → thinking overrides win on top.
-    QJsonObject merged = m_sampling;
-    deepMergeInto(merged, request);
-
-    if (thinkingEnabled && !m_thinking.isEmpty()) {
-        deepMergeInto(merged, m_thinking.value("overrides").toObject());
-        deepMergeInto(merged, m_thinking.value("request_block").toObject());
-    }
-
-    request = std::move(merged);
+    return mergeRenderedBody(request, renderBody(context));
 }

 } // namespace QodeAssist::Templates
--- a/sources/templates/JsonPromptTemplate.hpp
+++ b/sources/templates/JsonPromptTemplate.hpp
@@ -7,6 +7,7 @@
 #include <memory>
 #include <mutex>
 #include <optional>
+#include <vector>

 #include <QJsonObject>
 #include <QString>
@@ -50,27 +51,30 @@ public:
        const ContextData &context,
        bool thinkingEnabled = false) const override;

-    const QJsonObject &sampling() const { return m_sampling; }
-
 private:
    JsonPromptTemplate() = default;

    std::optional<QJsonObject> renderBody(const ContextData &context) const;
-    void applySampling(QJsonObject &request, bool thinkingEnabled) const;

    QString m_name;
    QString m_description;

+    // The literal request body, as a deep-mergeable object. String values
+    // that contain jinja are rendered and spliced as JSON at request time;
+    // literal strings and scalars pass through unchanged.
+    QJsonObject m_body;
+
+    // Roots searched (in order) by the `{% include %}` resolver. The first
+    // is the bundled qrc partials prefix; an optional second is the user
+    // agent's own directory, so user profiles can ship their own partials.
+    std::vector<QString> m_partialRoots;
+
    // m_env is populated once in fromConfig() and never mutated again.
    // It is `mutable` only because inja::Environment::render() is not a
    // const member; m_renderMutex serialises those render() calls since
    // inja's render path is not internally re-entrant on one Environment.
    mutable inja::Environment m_env;
-    inja::Template m_template;
    mutable std::mutex m_renderMutex;
-
-    QJsonObject m_sampling;
-    QJsonObject m_thinking;
 };

 } // namespace QodeAssist::Templates