refactor: Remove project rules

2026-06-13 17:59:15 -04:00 · 2026-06-11 13:36:23 +02:00
parent 2c9475cddf
commit 05fe38e289
45 changed files with 1333 additions and 299 deletions
--- a/ChatView/ChatCompressor.cpp
+++ b/ChatView/ChatCompressor.cpp
@@ -125,10 +125,10 @@ void ChatCompressor::startCompression(const QString &chatFilePath, ChatModel *ch
    std::vector<std::unique_ptr<LLMQore::ContentBlock>> blocks;
    blocks.push_back(std::make_unique<LLMQore::TextContent>(buildCompressionPrompt()));

-    m_currentRequestId = session->send(
-        std::move(blocks), /*toolsOverride=*/false, /*thinkingOverride=*/false);
+    m_currentRequestId = session->send(std::move(blocks), /*toolsOverride=*/false);
    if (m_currentRequestId.isEmpty()) {
-        handleCompressionError(tr("Failed to start compression request"));
+        handleCompressionError(tr("Failed to start compression request: %1")
+                                   .arg(session->lastError().message));
        return;
    }
    LOG_MESSAGE(QString("Starting compression request: %1").arg(m_currentRequestId));
--- a/ChatView/ChatModel.cpp
+++ b/ChatView/ChatModel.cpp
@@ -390,22 +390,6 @@ void ChatModel::addToolExecutionStatus(
    }
 }

-void ChatModel::dropTrailingAssistantMessage(const QString &requestId)
-{
-    if (m_messages.isEmpty())
-        return;
-
-    const Message &last = m_messages.last();
-    if (last.role != ChatRole::Assistant || last.id != requestId)
-        return;
-
-    const int idx = m_messages.size() - 1;
-    beginRemoveRows(QModelIndex(), idx, idx);
-    m_messages.removeLast();
-    endRemoveRows();
-    LOG_MESSAGE(QString("Dropped leaked pre-tool assistant message at index %1").arg(idx));
-}
-
 void ChatModel::setToolMessageData(
    const QString &toolId,
    const QString &toolName,
--- a/ChatView/ChatModel.hpp
+++ b/ChatView/ChatModel.hpp
@@ -102,7 +102,6 @@ public:
        const QString &toolId,
        const QString &toolName,
        const QJsonObject &toolArguments);
-    void dropTrailingAssistantMessage(const QString &requestId);
    void setToolMessageData(
        const QString &toolId,
        const QString &toolName,
--- a/ChatView/ClientInterface.cpp
+++ b/ChatView/ClientInterface.cpp
@@ -268,8 +268,8 @@ void ClientInterface::sendMessage(

    const LLMQore::RequestID requestId = session->send(std::move(blocks));
    if (requestId.isEmpty()) {
-        const QString error = QStringLiteral("Failed to start chat request for agent: %1")
-                                  .arg(m_activeAgent);
+        const QString error = QStringLiteral("Failed to start chat request for agent '%1': %2")
+                                  .arg(m_activeAgent, session->lastError().message);
        LOG_MESSAGE(error);
        m_sessionManager->removeSession(session);
        emit errorOccurred(error);
@@ -277,7 +277,7 @@ void ClientInterface::sendMessage(
    }

    QJsonObject request{{"id", requestId}};
-    m_activeRequests[requestId] = {request, session, /*dropPreToolText=*/false};
+    m_activeRequests[requestId] = {request, session};

    emit requestStarted(requestId);
 }
@@ -613,15 +613,11 @@ void ClientInterface::handleToolExecutionStarted(
    const QString &toolName,
    const QJsonObject &arguments)
 {
-    const auto requestIt = m_activeRequests.constFind(requestId);
-    if (requestIt == m_activeRequests.constEnd()) {
+    if (!m_activeRequests.contains(requestId)) {
        LOG_MESSAGE(QString("Ignoring tool execution start for non-chat request: %1").arg(requestId));
        return;
    }

-    if (requestIt->dropPreToolText) {
-        m_chatModel->dropTrailingAssistantMessage(requestId);
-    }
    m_chatModel->addToolExecutionStatus(requestId, toolId, toolName, arguments);
    m_awaitingContinuation.insert(requestId);
 }
--- a/ChatView/ClientInterface.hpp
+++ b/ChatView/ClientInterface.hpp
@@ -91,7 +91,6 @@ private:
    {
        QJsonObject originalRequest;
        QPointer<Session> session;
-        bool dropPreToolText = false;
    };

    ChatModel *m_chatModel;
--- a/LLMClientInterface.cpp
+++ b/LLMClientInterface.cpp
@@ -276,8 +276,8 @@ void LLMClientInterface::handleCompletion(const QJsonObject &request)
    connect(session, &Session::finished, this, [this, session](const LLMQore::RequestID &, const QString &) {
        onCompletionFinished(requestIdForSession(session));
    });
-    connect(session, &Session::failed, this, [this, session](const LLMQore::RequestID &, const QString &error) {
-        onCompletionFailed(requestIdForSession(session), error);
+    connect(session, &Session::failed, this, [this, session](const LLMQore::RequestID &, const QodeAssist::ErrorInfo &error) {
+        onCompletionFailed(requestIdForSession(session), error.message);
    });

    if (auto *client = session->client())
@@ -286,8 +286,9 @@ void LLMClientInterface::handleCompletion(const QJsonObject &request)

    const LLMQore::RequestID requestId = session->sendCompletion(std::move(context));
    if (requestId.isEmpty()) {
+        QString error = QString("Failed to start completion request for agent '%1': %2")
+                            .arg(agentName, session->lastError().message);
        session->deleteLater();
-        QString error = QString("Failed to start completion request for agent: %1").arg(agentName);
        LOG_MESSAGE(error);
        sendErrorResponse(request, error);
        return;
--- a/QuickRefactorHandler.cpp
+++ b/QuickRefactorHandler.cpp
@@ -202,9 +202,10 @@ void QuickRefactorHandler::prepareAndSendRequest(
    const LLMQore::RequestID requestId = session->send(std::move(blocks), enableTools);
    if (requestId.isEmpty()) {
        m_isRefactoringInProgress = false;
+        const QString reason = session->lastError().message;
        m_sessionManager->removeSession(session);
-        emitError(QStringLiteral("Failed to start quick refactor request for agent: %1")
-                      .arg(agentName));
+        emitError(QStringLiteral("Failed to start quick refactor request for agent '%1': %2")
+                      .arg(agentName, reason));
        return;
    }

--- a/README.md
+++ b/README.md
@@ -218,7 +218,6 @@ For optimal coding assistance, we recommend using these top-tier models:

 - **[Agent Roles](docs/agent-roles.md)** - Create AI personas with specialized system prompts
 - **[Chat Summarization](docs/chat-summarization.md)** - Compress conversations to save context tokens
- **[Project Rules](docs/project-rules.md)** - Customize AI behavior for your project
 - **[Ignoring Files](docs/ignoring-files.md)** - Exclude files from context using `.qodeassistignore`

 ## Features
@@ -473,7 +472,7 @@ QodeAssist uses a flexible prompt composition system that adapts to different co
 - **Custom Instructions** provide reusable templates that can be augmented with specific details
 - **Tool Calling** is available for Chat and Quick Refactor when enabled

-See [Project Rules Documentation](docs/project-rules.md), [Agent Roles Guide](docs/agent-roles.md), and [Quick Refactoring Guide](docs/quick-refactoring.md) for more details.
+See [Agent Roles Guide](docs/agent-roles.md) and [Quick Refactoring Guide](docs/quick-refactoring.md) for more details.

 ## QtCreator Version Compatibility

--- a/bench/main.cpp
+++ b/bench/main.cpp
@@ -7,18 +7,23 @@
 #include <QFile>
 #include <QFileInfo>
 #include <QHash>
+#include <QJsonArray>
 #include <QJsonDocument>
 #include <QJsonObject>
 #include <QRegularExpression>
 #include <QTextStream>
 #include <QTimer>

+#include <functional>
 #include <memory>
+#include <optional>
 #include <vector>

 #include <LLMQore/BaseClient.hpp>
+#include <LLMQore/BaseTool.hpp>
 #include <LLMQore/ContentBlocks.hpp>
 #include <LLMQore/ToolRegistry.hpp>
+#include <LLMQore/ToolResult.hpp>
 #include <LLMQore/ToolsManager.hpp>

 #include <Agent.hpp>
@@ -145,6 +150,69 @@ QString imageMediaType(const QString &path)
    return {};
 }

+class BenchEchoTool : public LLMQore::BaseTool
+{
+public:
+    using BaseTool::BaseTool;
+    QString id() const override { return QStringLiteral("bench_echo"); }
+    QString displayName() const override { return QStringLiteral("Bench echo"); }
+    QString description() const override
+    {
+        return QStringLiteral("Echoes the given text back verbatim. "
+                              "Use whenever the user asks to echo something.");
+    }
+    QJsonObject parametersSchema() const override
+    {
+        return QJsonObject{
+            {QStringLiteral("type"), QStringLiteral("object")},
+            {QStringLiteral("properties"),
+             QJsonObject{
+                 {QStringLiteral("text"),
+                  QJsonObject{
+                      {QStringLiteral("type"), QStringLiteral("string")},
+                      {QStringLiteral("description"), QStringLiteral("Text to echo back")}}}}},
+            {QStringLiteral("required"), QJsonArray{QStringLiteral("text")}}};
+    }
+    QFuture<LLMQore::ToolResult> executeAsync(const QJsonObject &input) override
+    {
+        return QtFuture::makeReadyValueFuture(LLMQore::ToolResult::text(
+            QStringLiteral("echo: %1").arg(input.value(QStringLiteral("text")).toString())));
+    }
+};
+
+class BenchAddTool : public LLMQore::BaseTool
+{
+public:
+    using BaseTool::BaseTool;
+    QString id() const override { return QStringLiteral("bench_add"); }
+    QString displayName() const override { return QStringLiteral("Bench add"); }
+    QString description() const override
+    {
+        return QStringLiteral("Adds two numbers and returns the sum. "
+                              "Use whenever the user asks to add numbers.");
+    }
+    QJsonObject parametersSchema() const override
+    {
+        return QJsonObject{
+            {QStringLiteral("type"), QStringLiteral("object")},
+            {QStringLiteral("properties"),
+             QJsonObject{
+                 {QStringLiteral("a"),
+                  QJsonObject{{QStringLiteral("type"), QStringLiteral("number")}}},
+                 {QStringLiteral("b"),
+                  QJsonObject{{QStringLiteral("type"), QStringLiteral("number")}}}}},
+            {QStringLiteral("required"),
+             QJsonArray{QStringLiteral("a"), QStringLiteral("b")}}};
+    }
+    QFuture<LLMQore::ToolResult> executeAsync(const QJsonObject &input) override
+    {
+        const double sum = input.value(QStringLiteral("a")).toDouble()
+                           + input.value(QStringLiteral("b")).toDouble();
+        return QtFuture::makeReadyValueFuture(
+            LLMQore::ToolResult::text(QString::number(sum)));
+    }
+};
+
 void printEvent(const ResponseEvent &ev, bool showThinking)
 {
    switch (ev.kind()) {
@@ -178,8 +246,10 @@ void printEvent(const ResponseEvent &ev, bool showThinking)
            err() << "[tool-result" << (d->isError ? " ERROR" : "") << "] " << d->text << "\n";
        break;
    case ResponseEvent::Kind::Usage:
-        if (const auto *d = ev.as<ResponseEvents::Usage>())
-            err() << "\n[usage] in=" << d->inputTokens << " out=" << d->outputTokens << "\n";
+        if (const auto *d = ev.as<ResponseEvents::Usage>()) {
+            err() << "\n[usage] in=" << d->inputTokens << " out=" << d->outputTokens
+                  << " cached=" << d->cachedTokens << " reasoning=" << d->reasoningTokens << "\n";
+        }
        break;
    case ResponseEvent::Kind::Error:
        if (const auto *d = ev.as<ResponseEvents::Error>())
@@ -212,7 +282,9 @@ int main(int argc, char *argv[])
        QStringList{"f", "file"}, "Load an agent from a TOML file instead of by name.", "path");
    QCommandLineOption promptOpt(
        QStringList{"p", "prompt"},
-        "Prompt text. If omitted, positional args or stdin are used.",
+        "Prompt text. Repeatable: each occurrence is one chat turn, sent after the "
+        "previous turn finishes (history is replayed through the agent template). "
+        "If omitted, positional args or stdin are used as a single turn.",
        "text");
    QCommandLineOption noThinkingOpt("no-thinking", "Hide thinking deltas from output.");
    QCommandLineOption envOpt(
@@ -221,6 +293,11 @@ int main(int argc, char *argv[])
        "path");
    QCommandLineOption apiKeyOpt(
        "api-key", "API key to use for the agent's provider (overrides env/settings).", "value");
+    QCommandLineOption timeoutOpt(
+        "timeout",
+        "Network transfer timeout in seconds (a stalled stream fails instead of hanging). "
+        "Default 60, 0 disables.",
+        "seconds");
    QCommandLineOption projectDirOpt(
        QStringList{"C", "project-dir"},
        "Project root for the agent's context (${PROJECT_DIR}). Defaults to the current directory.",
@@ -233,6 +310,11 @@ int main(int argc, char *argv[])
        "mcp",
        "Load MCP servers from a JSON config (mcpServers map) to give the agent executable tools.",
        "path");
+    QCommandLineOption builtinToolsOpt(
+        "builtin-tools",
+        "Register local test tools (bench_echo, bench_add) and force tools on. "
+        "Lets the model exercise tool calls without an MCP server, e.g. "
+        "-p \"echo hello via the tool\" -p \"now add 2 and 3\".");
    QCommandLineOption fimOpt(
        "fim",
        "Fill-in-the-middle completion mode: send prompt as the prefix and --suffix as the suffix.");
@@ -245,9 +327,11 @@ int main(int argc, char *argv[])
    parser.addOption(noThinkingOpt);
    parser.addOption(envOpt);
    parser.addOption(apiKeyOpt);
+    parser.addOption(timeoutOpt);
    parser.addOption(projectDirOpt);
    parser.addOption(imageOpt);
    parser.addOption(mcpOpt);
+    parser.addOption(builtinToolsOpt);
    parser.addOption(fimOpt);
    parser.addOption(suffixOpt);
    parser.addPositionalArgument("prompt", "Prompt text (alternative to --prompt).", "[prompt...]");
@@ -292,6 +376,20 @@ int main(int argc, char *argv[])
        return 1;
    }

+    {
+        bool ok = false;
+        const int timeoutSecs = parser.isSet(timeoutOpt)
+                                    ? parser.value(timeoutOpt).toInt(&ok)
+                                    : 60;
+        if (parser.isSet(timeoutOpt) && !ok) {
+            err() << "Invalid --timeout value.\n";
+            return 2;
+        }
+        if (timeoutSecs > 0)
+            if (auto *client = session->client())
+                client->setTransferTimeout(timeoutSecs * 1000);
+    }
+
    {
        QHash<QString, QString> envFile;
        QString envPath = parser.value(envOpt);
@@ -327,21 +425,41 @@ int main(int argc, char *argv[])

    const QStringList imagePaths = parser.values(imageOpt);

-    QString prompt = parser.value(promptOpt);
-    if (prompt.isEmpty())
-        prompt = parser.positionalArguments().join(QLatin1Char(' '));
-    if (prompt.isEmpty() && imagePaths.isEmpty())
-        prompt = readStdin().trimmed();
-    if (prompt.isEmpty() && imagePaths.isEmpty()) {
+    QStringList turns = parser.values(promptOpt);
+    if (turns.isEmpty()) {
+        QString prompt = parser.positionalArguments().join(QLatin1Char(' '));
+        if (prompt.isEmpty() && imagePaths.isEmpty())
+            prompt = readStdin().trimmed();
+        if (!prompt.isEmpty())
+            turns << prompt;
+    }
+    if (turns.isEmpty() && imagePaths.isEmpty()) {
        err() << "Empty prompt.\n";
        return 2;
    }
+    if (fimMode && turns.size() > 1) {
+        err() << "FIM mode takes a single prompt; extra turns ignored.\n";
+        turns = {turns.first()};
+    }

    if (!imagePaths.isEmpty() && !session->supportsImages())
        err() << "[warning] agent's provider does not advertise image support.\n";

+    std::optional<bool> toolsOverride;
+    if (parser.isSet(builtinToolsOpt) || parser.isSet(mcpOpt))
+        toolsOverride = true;
+
+    if (parser.isSet(builtinToolsOpt)) {
+        auto *tools = session->client()->tools();
+        tools->addTool(new BenchEchoTool(tools));
+        tools->addTool(new BenchAddTool(tools));
+        err() << "[tools] registered bench_echo, bench_add\n";
+    }
+
    const bool showThinking = !parser.isSet(noThinkingOpt);
    int exitCode = 0;
+    int nextTurn = 0;
+    std::function<void()> sendNextTurn;

    QObject::connect(
        session, &Session::event, &app, [showThinking](const ResponseEvent &ev) {
@@ -351,65 +469,83 @@ int main(int argc, char *argv[])
        session, &Session::finished, &app,
        [&](const LLMQore::RequestID &, const QString &reason) {
            err() << "\n[done] stopReason=" << (reason.isEmpty() ? "<none>" : reason) << "\n";
+            if (!fimMode && nextTurn < turns.size()) {
+                sendNextTurn();
+                return;
+            }
            QCoreApplication::quit();
        });
    QObject::connect(
        session, &Session::failed, &app,
-        [&](const LLMQore::RequestID &, const QString &msg) {
-            err() << "\n[failed] " << msg << "\n";
+        [&](const LLMQore::RequestID &, const QodeAssist::ErrorInfo &info) {
+            err() << "\n[failed] " << info.message << "\n";
            exitCode = 1;
            QCoreApplication::quit();
        });
+    QObject::connect(
+        session, &Session::cancelled, &app, [&](const LLMQore::RequestID &) {
+            err() << "\n[cancelled]\n";
+            QCoreApplication::quit();
+        });

-    auto dispatch = [&] {
-        if (fimMode) {
-            Templates::ContextData ctx;
-            ctx.prefix = prompt;
-            if (parser.isSet(suffixOpt))
-                ctx.suffix = parser.value(suffixOpt);
-            if (session->sendCompletion(std::move(ctx)).isEmpty()) {
-                err() << "Failed to dispatch FIM request (check provider URL / model).\n";
-                exitCode = 1;
-                QCoreApplication::quit();
-            }
-            return;
-        }
-
+    sendNextTurn = [&] {
        std::vector<std::unique_ptr<LLMQore::ContentBlock>> blocks;
-        for (const QString &imgPath : imagePaths) {
-            QFile img(imgPath);
-            if (!img.open(QIODevice::ReadOnly)) {
-                err() << "[image] cannot open: " << imgPath << "\n";
-                exitCode = 1;
-                QCoreApplication::quit();
-                return;
+        if (nextTurn == 0) {
+            for (const QString &imgPath : imagePaths) {
+                QFile img(imgPath);
+                if (!img.open(QIODevice::ReadOnly)) {
+                    err() << "[image] cannot open: " << imgPath << "\n";
+                    exitCode = 1;
+                    QCoreApplication::quit();
+                    return;
+                }
+                const QString media = imageMediaType(imgPath);
+                if (media.isEmpty()) {
+                    err() << "[image] unsupported type: " << imgPath << "\n";
+                    exitCode = 1;
+                    QCoreApplication::quit();
+                    return;
+                }
+                const QString b64 = QString::fromLatin1(img.readAll().toBase64());
+                blocks.push_back(std::make_unique<LLMQore::ImageContent>(
+                    b64, media, LLMQore::ImageContent::ImageSourceType::Base64));
            }
-            const QString media = imageMediaType(imgPath);
-            if (media.isEmpty()) {
-                err() << "[image] unsupported type: " << imgPath << "\n";
-                exitCode = 1;
-                QCoreApplication::quit();
-                return;
-            }
-            const QString b64 = QString::fromLatin1(img.readAll().toBase64());
-            blocks.push_back(std::make_unique<LLMQore::ImageContent>(
-                b64, media, LLMQore::ImageContent::ImageSourceType::Base64));
        }
-        if (!prompt.isEmpty())
-            blocks.push_back(std::make_unique<LLMQore::TextContent>(prompt));
+        const QString text = turns.value(nextTurn);
+        if (!text.isEmpty())
+            blocks.push_back(std::make_unique<LLMQore::TextContent>(text));
        if (blocks.empty()) {
            err() << "Nothing to send.\n";
            exitCode = 1;
            QCoreApplication::quit();
            return;
        }
-        if (session->send(std::move(blocks)).isEmpty()) {
-            err() << "Failed to dispatch request (check provider URL / model).\n";
+        if (turns.size() > 1)
+            err() << "\n[turn " << (nextTurn + 1) << "/" << turns.size() << "] " << text << "\n";
+        ++nextTurn;
+        if (session->send(std::move(blocks), toolsOverride).isEmpty()) {
+            err() << "Failed to dispatch request: " << session->lastError().message << "\n";
            exitCode = 1;
            QCoreApplication::quit();
        }
    };

+    auto dispatch = [&] {
+        if (fimMode) {
+            Templates::ContextData ctx;
+            ctx.prefix = turns.value(0);
+            if (parser.isSet(suffixOpt))
+                ctx.suffix = parser.value(suffixOpt);
+            if (session->sendCompletion(std::move(ctx)).isEmpty()) {
+                err() << "Failed to dispatch FIM request: " << session->lastError().message << "\n";
+                exitCode = 1;
+                QCoreApplication::quit();
+            }
+            return;
+        }
+        sendNextTurn();
+    };
+
    if (parser.isSet(mcpOpt)) {
        const QString mcpPath = parser.value(mcpOpt);
        QFile mcpFile(mcpPath);
--- a/docs/agent-roles.md
+++ b/docs/agent-roles.md
@@ -168,7 +168,6 @@ This allows roles to augment rather than replace your base configuration.

 ## Related Documentation

- [Project Rules](project-rules.md) - Project-specific AI behavior customization
 - [Chat Assistant Features](../README.md#chat-assistant) - Overview of chat functionality
 - [File Context](file-context.md) - Attaching files to chat context

--- a/docs/chat-summarization.md
+++ b/docs/chat-summarization.md
@@ -112,4 +112,3 @@ No additional configuration is required.

 - [Agent Roles](agent-roles.md) - Switch between AI personas
 - [File Context](file-context.md) - Attach files to chat
- [Project Rules](project-rules.md) - Customize AI behavior
--- a/docs/core-class-diagram.svg
+++ b/docs/core-class-diagram.svg
--- a/docs/project-rules.md
+++ b/docs/project-rules.md
@@ -1,35 +0,0 @@
-# Project Rules Configuration
-
-QodeAssist supports project-specific rules to customize AI behavior for your codebase. Create a `.qodeassist/rules/` directory in your project root.
-
-## Quick Start
-
-```bash
-mkdir -p .qodeassist/rules/{common,completion,chat,quickrefactor}
-```
-
-## Directory Structure
-
-```
-.qodeassist/
-└── rules/
-    ├── common/           # Applied to all contexts
-    ├── completion/       # Code completion only
-    ├── chat/            # Chat assistant only
-    └── quickrefactor/   # Quick refactor only
-```
-
-All `.md` files in each directory are automatically loaded and added to the system prompt.
-
-## Example
-
-Create `.qodeassist/rules/common/general.md`:
-
-```markdown
-# Project Guidelines
- Use snake_case for private members
- Prefix interfaces with 'I'
- Always document public APIs
- Prefer Qt containers over STL
-```
-
--- a/docs/quick-refactoring.md
+++ b/docs/quick-refactoring.md
@@ -206,7 +206,6 @@ The LLM receives:
 - **Cursor Position**: Marked with `<cursor>` tag
 - **Selection Markers**: `<selection_start>` and `<selection_end>` tags
 - **Your Instructions**: Built-in, custom, or typed
- **Project Rules**: If configured (see [Project Rules](project-rules.md))

 ### Context Configuration

@@ -270,7 +269,6 @@ Fully local setup for offline or secure environments.

 ## Related Documentation

- [Project Rules](project-rules.md) - Project-specific AI behavior customization
 - [File Context](file-context.md) - Attaching files to chat context
 - [Ignoring Files](ignoring-files.md) - Exclude files from AI context
 - [Provider Configuration](../README.md#configuration) - Setting up LLM providers
--- a/docs/target-architecture.md
+++ b/docs/target-architecture.md
@@ -0,0 +1,652 @@
+# QodeAssist — Target Architecture (v1.0)
+
+Status: design baseline, derived from the fixed use-case inventory below.
+Scope: the complete plugin, designed "from scratch" — what the architecture
+should be if nothing legacy constrained it. The current code (see
+`architecture.md`) already converges on this; §10 lists the remaining deltas.
+
+---
+
+## 1. Use-case inventory (requirements baseline)
+
+Every architectural decision below is justified by one of these. Features not
+on this list (Rules system, legacy provider/model/template pickers, Stack A)
+are intentionally out of scope.
+
+| # | Use case | What the user gets |
+|---|----------|--------------------|
+| U1 | **Code completion** | Inline FIM/instruct suggestions via LSP; auto + manual trigger, multiline, smart-context suppression, accept full / word-by-word |
+| U2 | **Chat assistant** | 4 placements (sidebar, bottom pane, editor tab, floating window); streaming text + thinking blocks + tool blocks + file-edit blocks (apply/undo); attachments, linked files, @-mentions, open-files sync; token counter; persisted history; one-click summarization; runtime agent + role pickers |
+| U3 | **Quick refactor** | Selection + instruction by hotkey; custom-instructions library; separate agent; optional tools; streamed result inserted into the editor |
+| U4 | **Tools** | read/create/edit file, search, find, list, build, diagnostics, terminal, todo, load_skill; per-tool enable |
+| U5 | **Skills** | discovery from `.qodeassist/skills`, `.claude/skills`, `~/.claude/skills`; auto-injection, explicit `/` picker, always-on |
+| U6 | **MCP** | server mode (expose plugin tools, HTTP/SSE + stdio bridge) and client hub (consume external tools in chat/refactor) |
+| U7 | **Providers** | 13 `client_api` types over one GenericProvider; secrets store; local-server autostart; model listing |
+| U8 | **Agents** | TOML profiles: `extends`, `[body]` table 1:1 with the wire request, Jinja partials, `match` rules, per-agent model override, per-pipeline rosters |
+| U9 | **Roles** | JSON roles composed into `system_prompt` via `{{ agent_role(id) }}` |
+| U10 | **Bench CLI** | headless agent testing on the same core stack, `.env` secrets |
+| U11 | **Configuration UI** | settings pages for everything above; per-project settings; updater + status widget |
+
+---
+
+## 2. Design principles
+
+1. **One stack.** Every LLM byte — completion, chat, compression, refactor,
+   bench — flows through the same `Session` pipeline. No parallel legacy path.
+2. **Hexagonal core.** The runtime (agents, sessions, providers, templates,
+   prompt rendering) has zero Qt Creator dependencies. The IDE and the bench
+   CLI are two hosts composing the same core; IDE-specific facts enter only
+   through ports (document reading, project scanning, secrets, tool hosting).
+3. **Configuration is declarative, code is mechanism.** What is sent (request
+   `[body]`, system prompt, endpoint, model) lives in TOML/JSON/Jinja and is
+   user-overridable; *how* it is sent (streaming, retries, tool loop, event
+   routing) lives in C++ and is identical for all providers.
+4. **Capability-driven behavior.** Providers and agents declare capabilities
+   (tools, thinking, images, model listing); features and UI adapt to the
+   declared set instead of switching on provider names.
+5. **Single source of truth for conversation state.** `ConversationHistory`
+   owns the messages; `ChatModel` and persistence are projections of it, never
+   independent copies.
+6. **Per-feature composition roots, no singletons.** Each feature constructs
+   and owns its dependencies (`new` + parent); shared services are passed
+   explicitly (constructor/setter, QML context properties for the chat).
+7. **Streaming-first event model.** One typed `ResponseEvent` stream is the
+   only contract between the core and every consumer. Deltas exist for live
+   UI (chat); one-shot pipelines (completion, refactor, bench) ignore them,
+   wait for `finished`, and read the final assistant message from history.
+8. **Fail at load, not mid-conversation.** Agent profiles are validated when
+   loaded (partials resolve, assembled body parses as JSON against a synthetic
+   context), so a config error never surfaces as a silent runtime drop.
+
+---
+
+## 3. Layered model
+
+```mermaid
+flowchart TB
+    subgraph HOSTS["Hosts — composition roots"]
+        PLUGIN["Qt Creator plugin<br/>qodeassist.cpp"]
+        BENCH["bench CLI"]
+    end
+
+    subgraph L5["L5 · Presentation"]
+        LSP["LSP bridge<br/>inline suggestions"]
+        QMLUI["ChatView QML<br/>4 placements"]
+        RW["Refactor widgets"]
+        SUI["Settings pages"]
+    end
+
+    subgraph L4["L4 · Features"]
+        FCOMP["CompletionFeature"]
+        FCHAT["ChatFeature"]
+        FREF["RefactorFeature"]
+    end
+
+    subgraph L3["L3 · Capabilities"]
+        CTX["ContextEngine<br/>ports + QtC adapters"]
+        TOOLS["ToolKit"]
+        SKILLS["SkillsEngine"]
+        MCPH["McpHub<br/>client + server"]
+    end
+
+    subgraph L2["L2 · Core runtime — IDE-independent"]
+        SM["SessionManager"]
+        SESS["Session"]
+        AGF["AgentFactory + AgentRouter"]
+        AG["Agent"]
+        PROV["GenericProvider"]
+        TPL["JsonPromptTemplate"]
+    end
+
+    subgraph L1["L1 · Declarative config"]
+        PCONF["providers/*.toml"]
+        ACONF["agents/*.toml + partials/*.jinja"]
+        ROST["rosters / pipelines"]
+        ROLES["agent_roles/*.json"]
+        SKCONF["skills/*.md"]
+        SEC["SecretsStore"]
+    end
+
+    subgraph L0["L0 · Wire — LLMQore"]
+        CLIENTS["*Client — SSE streaming"]
+        TOOLFW["Tool framework"]
+        MCPT["MCP transports"]
+    end
+
+    PLUGIN --> L4
+    PLUGIN --> SUI
+    BENCH --> SM
+    LSP --> FCOMP
+    QMLUI --> FCHAT
+    RW --> FREF
+    FCOMP --> SM
+    FCHAT --> SM
+    FREF --> SM
+    FCOMP --> CTX
+    FCHAT --> CTX
+    FREF --> CTX
+    FCHAT --> SKILLS
+    FCHAT --> TOOLS
+    FREF --> TOOLS
+    TOOLS --> TOOLFW
+    MCPH --> MCPT
+    SM --> SESS
+    SESS --> AG
+    AGF --> AG
+    AG --> PROV
+    AG --> TPL
+    AGF --> ACONF
+    AGF --> PCONF
+    AGF --> SEC
+    AGF --> ROST
+    TPL --> ROLES
+    PROV --> CLIENTS
+    SKILLS --> SKCONF
+```
+
+### Layer contracts
+
+| Layer | Contains | May depend on | Must NOT depend on |
+|-------|----------|---------------|--------------------|
+| **L0 Wire** | LLMQore clients (one per wire protocol: Claude, OpenAI Chat, OpenAI Responses, Google, Ollama, Mistral, llama.cpp), tool framework, MCP transports | Qt Network | anything above |
+| **L1 Config** | `ProviderInstance`, `AgentProfile` (+ loader/validator), rosters, roles, skills, secrets port | toml++, inja | Qt Creator, L2+ |
+| **L2 Core** | `Agent`, `AgentFactory`, `AgentRouter`, `Provider`/`GenericProvider`, `JsonPromptTemplate`, `Session`, `SessionManager`, `ConversationHistory`, `SystemPromptBuilder`, `ResponseRouter`, `ToolContributorRegistry` | L0, L1 | Qt Creator, QML, features |
+| **L3 Capabilities** | `ContextEngine` (ports + QtC adapters), `ToolKit` (built-in tools), `SkillsEngine`, `McpHub` | L0–L2, QtC APIs *only in adapters* | features, UI |
+| **L4 Features** | `CompletionFeature`, `ChatFeature` (send/stream, compression, token counting, file edits), `RefactorFeature` | L2, L3 | each other |
+| **L5 Presentation** | LSP bridge, ChatView QML, refactor widgets, settings pages | its feature | core internals |
+| **Hosts** | plugin shell, bench CLI | everything (composition only) | — |
+
+The hard rule that makes U10 (bench) and testability free: **L0–L2 build into
+targets with no Qt Creator linkage.** Bench links L0–L2 plus a thin CLI host;
+the plugin adds L3 adapters, L4, L5.
+
+---
+
+## 4. Core domain model
+
+Rendered copy: [core-class-diagram.svg](core-class-diagram.svg) (regenerate
+when the diagram below changes).
+
+```mermaid
+classDiagram
+    direction TB
+    class SessionManager {
+        +acquire(agentName) Session
+        +release(session)
+        +toolContributors() ToolContributorRegistry
+    }
+    class Session {
+        +send(blocks, toolPolicy)
+        +cancel()
+        +history() ConversationHistory
+        +systemPrompt() SystemPromptBuilder
+        +event(ResponseEvent)
+        +finished(id, stopReason)
+        +failed(id, ErrorInfo)
+        +cancelled(id)
+    }
+    class ConversationHistory {
+        +messages() vector~Message~
+        +lastAssistantText() string
+        +append(Message)
+        +reset(vector~Message~)
+    }
+    class Message {
+        +role Role
+        +blocks vector~ContentBlock~
+    }
+    class SystemPromptBuilder {
+        +setLayer(id, text, priority)
+        +removeLayer(id)
+        +compose() string
+    }
+    class ResponseRouter {
+        +attach(BaseClient)
+        +event(ResponseEvent)
+    }
+    class Agent {
+        +config() AgentConfig
+        +provider() Provider
+        +promptTemplate() PromptTemplate
+    }
+    class AgentFactory {
+        +create(name) Agent
+        +configByName(name) AgentConfig
+        +effectiveModel(name) string
+    }
+    class AgentRouter {
+        +pickAgent(roster, fileCtx) string
+    }
+    class Provider {
+        <<interface>>
+        +capabilities() Capabilities
+        +prepareRequest(request, ctx)
+        +sendRequest(json) RequestID
+        +cancelRequest(RequestID)
+    }
+    class GenericProvider {
+        -client BaseClient
+    }
+    class PromptTemplate {
+        <<interface>>
+        +buildFullRequest(request, ctx)
+    }
+    class JsonPromptTemplate {
+        -bodySpec QJsonObject
+        -env InjaEnvironment
+    }
+    class ToolContributorRegistry {
+        +registerContributor(fn)
+        +applyTo(ToolsManager)
+    }
+
+    SessionManager o-- Session : pools
+    SessionManager --> AgentFactory : builds via
+    SessionManager --> ToolContributorRegistry
+    Session *-- ConversationHistory
+    Session *-- SystemPromptBuilder
+    Session *-- ResponseRouter
+    Session --> Agent
+    ConversationHistory o-- Message
+    Agent *-- Provider
+    Agent *-- PromptTemplate
+    AgentFactory ..> Agent : creates
+    AgentFactory --> AgentRouter
+    GenericProvider --|> Provider
+    JsonPromptTemplate --|> PromptTemplate
+```
+
+Responsibilities, one line each:
+
+- **Agent** — immutable bundle of *what to call*: resolved config + provider +
+  compiled prompt template. No request state.
+- **Session** — one conversation's runtime: owns history, system-prompt
+  layers, response routing, the in-flight request, and the tool-execution
+  loop (tool_use → execute → tool_result → continue). `send(blocks)` is the
+  *only* entry point: every pipeline appends a user message and dispatches;
+  there are no per-pipeline send variants. What differs between completion,
+  chat, and refactor is the agent's template and the consumption mode (deltas
+  vs final message), never the Session API.
+- **SessionManager** — creates/pools sessions per agent; the single place
+  features go to get one. Pooling (not per-message construction) covers the
+  "fresh agent + provider + secrets read per request" latency cost. It reuses
+  only the expensive parts (agent, provider, compiled template, secrets read):
+  `acquire` hands out a session with cleared history and system-prompt
+  layers, so one-shot pipelines never see a previous exchange.
+- **AgentRouter** — the *only* agent picker. Every pipeline (completion, chat,
+  compression, refactor) resolves its agent through
+  `pickAgent(roster, {file, project})`; no feature-local picker logic.
+- **GenericProvider** — one class for all 13 client APIs; varies only by
+  LLMQore client factory + metadata. Request *shape* belongs to the template,
+  never to the provider.
+- **JsonPromptTemplate** — compiles the agent's `[body]` table; renders
+  Jinja-bearing string values, splices raw JSON, drops empty keys; validated
+  at load time.
+- **SystemPromptBuilder** — ordered named layers (`agent.system`,
+  `chat.context`, `refactor`, `compression`); features mutate only their own
+  layer.
+- **ResponseRouter / ResponseEvent** — adapts LLMQore client signals into one
+  typed stream: `TextDelta`, `ThinkingDelta`, `ToolCallStart/End`,
+  `ToolResult`, `Usage`, `Error`, `MessageStop`.
+- **ToolContributorRegistry** — contributors (built-in ToolKit, SkillTool,
+  McpHub) register once; `SessionManager` applies them to every new session's
+  `ToolsManager`. This is how MCP tools reach chat *and* refactor (U6) without
+  feature code knowing about MCP.
+
+---
+
+## 5. Runtime flows
+
+### 5.1 Chat (U2) — the richest path
+
+```mermaid
+sequenceDiagram
+    autonumber
+    actor U as User
+    participant V as ChatView QML
+    participant F as ChatFeature
+    participant SM as SessionManager
+    participant S as Session
+    participant T as JsonPromptTemplate
+    participant P as GenericProvider
+    participant C as LLMQore Client
+    participant R as ResponseRouter
+
+    U->>V: message + attachments
+    V->>F: sendMessage(text, files, images)
+    F->>SM: acquire(activeAgent)
+    SM-->>F: Session (pooled)
+    F->>S: systemPrompt().setLayer("chat.context", project + skills + linked files)
+    F->>S: send(userBlocks, toolPolicy)
+    S->>T: buildFullRequest(history, system, ctx)
+    T-->>S: request JSON (body is 1:1 with the API)
+    S->>P: sendRequest(json)
+    P->>C: HTTP POST, SSE stream
+    loop streaming
+        C-->>R: chunk / thinking / tool_use / usage
+        R-->>S: ResponseEvent
+        S-->>F: event(ResponseEvent)
+        F-->>V: ChatModel projection update
+    end
+    opt tool call requested
+        S->>S: execute tool via ToolsManager
+        S->>P: continue with tool_result
+    end
+    C-->>R: finalized
+    R-->>S: MessageStop + Usage
+    S-->>F: finished()
+    F->>SM: release(session)
+```
+
+State ownership in chat: `Session.history()` is the truth. `ChatModel` is a
+QML projection built from history events (`messageAdded`, `messageUpdated`);
+`ChatSerializer`/`ChatHistoryStore` persist *history*, and restoring a chat
+seeds a new session's history — never the other way around. File-edit blocks,
+apply/undo, and the token counter are ChatFeature concerns layered on the
+event stream.
+
+### 5.2 Completion (U1)
+
+```
+LSP getCompletionsCycling
+  → CompletionFeature
+      agent   = AgentRouter.pickAgent(roster.codeCompletion, {file, project})
+      session = SessionManager.acquire(agent)
+      ctx     = ContextEngine: prefix/suffix + open-files context (policy from
+                CodeCompletionSettings — editor policy, not agent config)
+      session.send(blocks{completion context}, tools=off)
+  on finished → history().lastAssistantText()
+      → CodeHandler (output-mode post-processing) → LSP items
+```
+
+No special Session method: the completion context travels as the content of
+an ordinary user message (a structured block carrying prefix/suffix + file
+context), and the template context exposes it as `ctx.prefix` / `ctx.suffix`.
+FIM vs instruct is *agent config* (template + body), not feature code: a FIM
+agent's body renders `prefix`/`suffix` into FIM fields; an instruct agent's
+body renders the same exchange as a chat-shaped request. The feature is
+identical for both — and since completion has no incremental UI, it never
+touches the delta stream: it waits for `finished` and reads the last message.
+
+### 5.3 Quick refactor (U3)
+
+```
+Hotkey → RefactorFeature
+  agent   = AgentRouter.pickAgent(roster.quickRefactor, {file, project})
+  session = SessionManager.acquire(agent)
+  session.systemPrompt().setLayer("refactor", tagged selection + output rules)
+  session.send(blocks{instruction}, toolPolicy)
+  on finished → history().lastAssistantText()
+      → ResponseCleaner → RefactorResult → editor insert (accept/reject)
+```
+
+Same consumption mode as completion: the feature listens to
+`Session::finished`/`failed` only (events at most drive a progress spinner
+and cancel) and reads the result from history — it never connects to raw
+client signals. Tool calls during refactor run inside the session's tool
+loop; history's last assistant message is whatever the model produced after
+the final tool round.
+
+### 5.4 Compression (U2) and bench (U10)
+
+Compression is ChatFeature reusing the same path with
+`roster.chatCompression` and a `"compression"` system layer; the summary
+starts a new history. Bench is a host: CLI args + `.env` secrets → L1 + L2
+composition → `Session.send` → events printed to stdout. Anything bench can't
+do without the IDE is, by construction, an L3+ concern.
+
+---
+
+## 6. Configuration model
+
+```mermaid
+erDiagram
+    AGENT_PROFILE ||--o| AGENT_PROFILE : extends
+    AGENT_PROFILE }o--|| PROVIDER_INSTANCE : provider_instance
+    AGENT_PROFILE }o--o{ PARTIAL : includes
+    AGENT_PROFILE }o--o{ ROLE : agent_role
+    ROSTER }o--o{ AGENT_PROFILE : ranks
+    MODEL_OVERRIDE |o--|| AGENT_PROFILE : overrides_model
+    PROVIDER_INSTANCE }o--|| CLIENT_API : client_api
+    PROVIDER_INSTANCE }o--o| SECRET : api_key_ref
+    PROVIDER_INSTANCE ||--o| LAUNCH_CONFIG : autostarts
+
+    AGENT_PROFILE {
+        string name
+        bool abstract
+        string system_prompt "jinja, composes agent_role()"
+        json body "request body, 1:1 with API"
+        string endpoint "may contain MODEL placeholder"
+        string model "default; override wins"
+        bool enable_tools "capability hint"
+        bool enable_thinking "capability hint"
+        json match "file, path, project patterns"
+    }
+    PROVIDER_INSTANCE {
+        string name
+        string client_api
+        string url
+        string api_key_ref
+    }
+    ROLE {
+        string id
+        string systemPrompt
+    }
+    ROSTER {
+        string pipeline "completion, chat, compression, refactor"
+        list agents "ordered candidates"
+    }
+```
+
+Rules of the config layer (full spec: `agent-templates-design.md`):
+
+- `[body]` **is** the request body — field-by-field, deep-mergeable through
+  `extends`; Jinja-bearing strings render and splice as raw JSON, literals
+  pass through. No separate sampling/thinking merge machinery.
+- `include` resolves only sandboxed partial roots (bundled `:/agents/partials/`,
+  then user `partials/`); a missing partial is a load-time error.
+- Two-level hierarchy: one abstract base per provider family, thin children.
+- Per-agent model override lives in `agent_models.json` and is applied by
+  `AgentFactory`; `${MODEL}` in `endpoint` covers URL-model providers.
+- Roles are JSON managed by the Roles settings UI; profiles pull them in with
+  `{{ agent_role("<id>") }}` — the only system-prompt edit point is the
+  profile.
+- Secrets never appear in TOML; `api_key_ref` resolves through the
+  `SecretsStore` port (QtC keychain in the plugin, `.env` in bench).
+
+---
+
+## 7. Capabilities layer
+
+**ContextEngine** replaces the monolithic ContextManager with three focused
+services behind IDE-agnostic ports:
+
+| Service | Port (L2-visible) | QtC adapter |
+|---------|-------------------|-------------|
+| `EditorContext` — current doc, selection, prefix/suffix | `IDocumentReader` | TextEditor API |
+| `ProjectContext` — root, file listing, ignore filtering (`.qodeassistignore`), open files, changes | `IProjectScanner` | ProjectExplorer API |
+| `TokenEstimator` — input estimates, calibrated by server usage | — (pure) | — |
+
+**ToolKit** registers the built-in tools (U4) with the
+`ToolContributorRegistry`; each tool declares a permission class (read /
+write / execute) so per-tool enablement (settings) and confirmation policy
+(terminal commands) live in one place.
+
+**SkillsEngine** (U5): discovery + watching of the three skill roots; exposes
+`catalogText()` (names + descriptions for the system prompt),
+`alwaysOnBodies()`, and the `load_skill` tool; the `/` picker injects a
+skill's body into a single message.
+
+**McpHub** (U6): client side connects configured servers and contributes
+their tools through the same registry (tools reach every session uniformly);
+server side exposes ToolKit over HTTP/SSE + stdio bridge.
+
+---
+
+## 8. Cross-cutting policies
+
+Architecture is the rules as much as the boxes. These policies bind every
+layer and are part of the contract:
+
+### 8.1 Threading
+
+The core runs on the GUI thread; concurrency is the Qt event loop plus async
+network I/O — no shared-state threading anywhere in L1–L4. Work that can
+block (project scans, token estimation over large trees) hides behind L3
+ports; an adapter may use worker threads internally but delivers results as
+queued signals. Core types are therefore deliberately not thread-safe.
+
+### 8.2 Request lifecycle
+
+A session has at most one in-flight request; `send()` while in flight cancels
+the previous request first. Every request terminates in exactly one of three
+states — `finished(stopReason)`, `failed(error)`, `cancelled()` — and
+cancellation is *not* an error: no consumer may string-match a message to
+tell them apart.
+
+### 8.3 Errors
+
+Runtime errors are typed, not strings: `ErrorInfo { category, message,
+providerDetail }` with categories `Config | Auth | Network | Provider |
+Validation | Tool`. The category drives UI affordances (Auth → open provider
+settings, Network → offer retry); free text is for logs only. Load-time
+errors (principle 8) surface in the agents settings page, never as a failed
+send.
+
+### 8.4 Timeouts and retries
+
+Transfer timeouts are per-pipeline policy (completion short, chat/refactor
+from settings), applied by the feature — never baked into agent profiles. A
+streaming request is never silently retried after the first byte; automatic
+retry with capped backoff is allowed only for connection-phase failures.
+Anything beyond that is an explicit user action.
+
+### 8.5 Observability
+
+One `RequestID` correlates feature → session → provider → client → events →
+logs. Each layer logs under its own category (`qodeassist.session`,
+`qodeassist.provider`, `qodeassist.tools`, …); request bodies are logged only
+at debug level, and secrets are redacted unconditionally. `Usage` events are
+the single source feeding the token counter, `TokenEstimator` calibration,
+and the performance log.
+
+### 8.6 Config compatibility
+
+Agent profiles carry a `schema_version`; the loader migrates old user
+configs forward or rejects them with an actionable message — silent
+reinterpretation is forbidden. Bundled profiles are read-only resources that
+user profiles shadow by name. Persisted chat history is versioned the same
+way.
+
+### 8.7 Security
+
+Secrets exist only behind the `SecretsStore` port; they never reach TOML,
+logs, or persisted chats. Tool permission classes (read / write / execute)
+centralize the confirmation policy. The MCP server is opt-in and binds
+loopback by default; skill and partial roots are sandboxed — nothing resolves
+outside its declared directory.
+
+### 8.8 Testing
+
+The test pyramid follows the layers:
+
+| Layer | Strategy |
+|-------|----------|
+| L1 | loader/validator unit tests; golden-file snapshots of every bundled profile's rendered body against a synthetic context — the same check as load-time validation, run in CI |
+| L2 | `Session` / `ResponseRouter` replay tests over recorded SSE fixtures per provider; fake `BaseClient`, no network |
+| L3 | contract tests against the ports; QtC adapters covered only by plugin integration |
+| E2E | bench (U10) against live providers — the same composition the plugin uses |
+
+Layering is enforced mechanically, not by review: each layer is its own
+CMake target, and the core targets do not link Qt Creator — a violating
+include fails the build.
+
+---
+
+## 9. Module / target layout
+
+```
+core/                       # no Qt Creator linkage — bench + tests link this
+  config/                   # L1: ProviderInstance, AgentProfile, loaders,
+                            #     validators, rosters, roles, secrets port
+  providers/                # L2: Provider, GenericProvider, ProviderFactory,
+                            #     ClaudeCacheControl
+  prompt/                   # L2: JsonPromptTemplate, ContextRenderer, partials
+  agents/                   # L2: Agent, AgentFactory, AgentRouter
+  session/                  # L2: Session, SessionManager, ConversationHistory,
+                            #     SystemPromptBuilder, ResponseRouter, events
+  skills/                   # L3 (IDE-free part): SkillsEngine, loaders
+ide/                        # Qt Creator adapters only
+  context/                  # EditorContext, ProjectContext adapters, ignore
+  tools/                    # built-in ToolKit (build, issues, editor edits…)
+  mcp/                      # McpHub managers
+features/
+  completion/               # LSP bridge + CompletionFeature + CodeHandler
+  chat/                     # ChatFeature: ClientInterface, ChatModel(projection),
+                            #   Compressor, TokenCounter, FileEditController,
+                            #   serializer/store
+  refactor/                 # RefactorFeature + custom instructions
+ui/
+  ChatView qml/, widgets/, settings pages
+hosts/
+  plugin/                   # qodeassist.cpp — composition root, actions, panes
+  bench/                    # CLI composition root
+tests/
+  config/                   # loader cases + golden rendered-body snapshots
+  session/                  # SSE replay fixtures per provider, fake client
+external/
+  llmqore/ inja/ tomlplusplus/
+```
+
+Dependency direction is strictly downward in the table of §3; `features/*`
+never include each other; `ui/*` talks only to its feature; `hosts/*` are the
+only places allowed to know about everything.
+
+---
+
+## 10. Deltas from the current working tree
+
+What "from scratch" changes relative to today's code — the migration
+checklist to call the architecture done:
+
+1. **Stack A physical teardown** — delete root `providers/*`,
+   `pluginllmcore/*`, `ConfigurationManager`, legacy provider/model/template
+   settings pages, and the Stack A registration + MCP loop in
+   `qodeassist.cpp`. Runtime already has no consumers.
+2. **Single history owner** — make `ChatModel` a projection of
+   `Session::history()` (subscribe to history signals) instead of a parallel
+   message store with seed-on-send; `ChatCompressor` reads history, not the
+   model.
+3. **Single send path** — delete `Session::sendCompletion(ContextData)`;
+   the completion context becomes user-message content sent through the one
+   `send()` (the completion handler already reads its result from history's
+   last message). Move `QuickRefactorHandler` off raw `BaseClient` signals
+   (`requestCompleted`/`requestFinalized`/`requestFailed`) onto
+   `Session::finished`/`failed` + `history().lastAssistantText()`.
+4. **Three-state request lifecycle** — add `cancelled` to `Session`; today
+   `cancel()` emits `failed(id, "Cancelled by user")` and consumers must
+   string-match to tell cancellation from failure (§8.2).
+5. **Typed errors** — replace `lastError` strings and the `failed(QString)`
+   payload with `ErrorInfo` categories (§8.3).
+6. **One agent picker** — fold `pickCompletionAgent` / `pickRefactorAgent`
+   remnants into `AgentRouter.pickAgent(roster, …)` exclusively; chat picker
+   filters to the `chatAssistant` roster.
+7. **MCP tools on session clients** — register MCP-contributed tools through
+   `ToolContributorRegistry` so chat/refactor sessions get them (today they
+   are registered only on dead Stack A providers).
+8. **Session pooling** — `SessionManager.acquire/release` with a small pool
+   per agent, replacing per-message agent + provider + secrets construction.
+9. **ContextManager split** — extract `EditorContext` / `ProjectContext` /
+   `TokenEstimator` behind ports; move QtC API use into `ide/context`.
+10. **`[body]` model completion** — finish `agent-templates-design.md`
+    (body-table rendering, sandboxed `include`, load-time validation, model
+    override + `${MODEL}`, `schema_version` gate), delete sampling/thinking
+    merge machinery.
+11. **Message type unification** — one `Message`/`ContentBlock` shape from
+    history to QML (roles, text, thinking, tool use/result, images); delete
+    the parallel `ChatModel::Message` struct.
+12. **Test scaffolding** — golden rendered-body snapshots + SSE replay
+    fixtures (§8.8); CI builds the core targets without Qt Creator so a
+    layering violation fails the build.
+13. **Stale docs cleanup** — `project-rules.md` describes the removed Rules
+    system; mark or delete.
--- a/settings/AgentDetailPane.cpp
+++ b/settings/AgentDetailPane.cpp
@@ -227,15 +227,15 @@ AgentDetailPane::AgentDetailPane(QWidget *parent)
        tr("Jinja2 template (via inja) rendered to the request body. "
           "Built-in context: ctx.prefix, ctx.suffix, ctx.history, "
           "ctx.system_prompt, agent.model."));
-    m_messageFormat = new QPlainTextEdit(this);
-    m_messageFormat->setReadOnly(true);
-    m_messageFormat->setFont(monospaceFont(11));
-    m_messageFormat->setMinimumHeight(140);
+    m_bodyView = new QPlainTextEdit(this);
+    m_bodyView->setReadOnly(true);
+    m_bodyView->setFont(monospaceFont(11));
+    m_bodyView->setMinimumHeight(140);

    templ->bodyLayout()->addWidget(templHint);
    auto *mfLabel = new QLabel(tr("body:"), this);
    templ->bodyLayout()->addWidget(mfLabel);
-    templ->bodyLayout()->addWidget(m_messageFormat);
+    templ->bodyLayout()->addWidget(m_bodyView);

    m_diagnostics = new SectionBox(tr("Load errors"), this);
    m_diagnosticsView = new QPlainTextEdit(this);
@@ -378,7 +378,7 @@ void AgentDetailPane::setAgent(const AgentConfig &cfg)
    m_filePatternsValue->setText(cfg.match.filePatterns.join(QStringLiteral(", ")));
    m_filePatternsValue->setPlaceholderText(tr("(matches every file)"));

-    m_messageFormat->setPlainText(
+    m_bodyView->setPlainText(
        cfg.body.isEmpty()
            ? tr("(inherited from parent / none)")
            : QString::fromUtf8(QJsonDocument(cfg.body).toJson(QJsonDocument::Indented)));
@@ -434,7 +434,7 @@ void AgentDetailPane::clear()
    m_effectiveUrl->clear();
    m_roleText->clear();
    m_filePatternsValue->clear();
-    m_messageFormat->clear();
+    m_bodyView->clear();
    m_rawToml->clear();
    m_openBtn->setEnabled(false);
    m_dupBtn->setEnabled(false);
--- a/settings/AgentDetailPane.hpp
+++ b/settings/AgentDetailPane.hpp
@@ -85,7 +85,7 @@ private:
    QLineEdit *m_filePatternsValue = nullptr;

    QPlainTextEdit *m_roleText = nullptr;
-    QPlainTextEdit *m_messageFormat = nullptr;
+    QPlainTextEdit *m_bodyView = nullptr;

    SectionBox *m_diagnostics = nullptr;
    QPlainTextEdit *m_diagnosticsView = nullptr;
--- a/sources/Session/CMakeLists.txt
+++ b/sources/Session/CMakeLists.txt
@@ -3,6 +3,7 @@ add_library(Session STATIC
    MessageSerializer.hpp MessageSerializer.cpp
    PluginBlocks.hpp
    LLMRequest.hpp
+    ErrorInfo.hpp
    ResponseEvent.hpp
    ConversationHistory.hpp ConversationHistory.cpp
    ResponseRouter.hpp ResponseRouter.cpp
--- a/sources/Session/ErrorInfo.hpp
+++ b/sources/Session/ErrorInfo.hpp
@@ -0,0 +1,61 @@
+// Copyright (C) 2024-2026 Petr Mironychev
+// SPDX-License-Identifier: GPL-3.0-or-later
+// Additional attribution terms under GPLv3 §7(b) apply — see LICENSE
+
+#pragma once
+
+#include <QMetaType>
+#include <QString>
+
+#include <utility>
+
+namespace QodeAssist {
+
+enum class ErrorCategory {
+    Config,
+    Auth,
+    Network,
+    Provider,
+    Validation,
+    Tool,
+};
+
+struct ErrorInfo
+{
+    ErrorCategory category = ErrorCategory::Provider;
+    QString message;
+    QString providerDetail;
+
+    bool isEmpty() const noexcept { return message.isEmpty(); }
+};
+
+[[nodiscard]] inline ErrorInfo makeError(
+    ErrorCategory category, QString message, QString providerDetail = QString())
+{
+    return ErrorInfo{category, std::move(message), std::move(providerDetail)};
+}
+
+[[nodiscard]] inline ErrorCategory categorizeProviderError(const QString &raw)
+{
+    const QString text = raw.toLower();
+
+    const auto contains = [&text](const char *needle) {
+        return text.contains(QLatin1String(needle));
+    };
+
+    if (contains("401") || contains("403") || contains("unauthorized")
+        || contains("forbidden") || contains("api key") || contains("apikey")
+        || contains("authentication") || contains("invalid token"))
+        return ErrorCategory::Auth;
+
+    if (contains("timeout") || contains("timed out") || contains("connection")
+        || contains("could not resolve") || contains("unreachable")
+        || contains("network") || contains("ssl") || contains("refused"))
+        return ErrorCategory::Network;
+
+    return ErrorCategory::Provider;
+}
+
+} // namespace QodeAssist
+
+Q_DECLARE_METATYPE(QodeAssist::ErrorInfo)
--- a/sources/Session/ResponseEvent.hpp
+++ b/sources/Session/ResponseEvent.hpp
@@ -9,6 +9,8 @@

 #include <variant>

+#include "ErrorInfo.hpp"
+
 namespace QodeAssist {

 namespace ResponseEvents {
@@ -45,6 +47,7 @@ struct ToolCallEnd
 struct ToolResult
 {
    QString toolUseId;
+    QString name;
    QString text;
    bool isError = false;
 };
@@ -53,11 +56,14 @@ struct Usage
 {
    int inputTokens = 0;
    int outputTokens = 0;
+    int cachedTokens = 0;
+    int reasoningTokens = 0;
 };

 struct Error
 {
    QString message;
+    ErrorCategory category = ErrorCategory::Provider;
 };

 struct MessageStop
@@ -128,21 +134,27 @@ public:
            Kind::ToolCallEnd, ResponseEvents::ToolCallEnd{std::move(id), std::move(finalArgs)}};
    }

-    static ResponseEvent toolResult(QString toolUseId, QString text, bool isError = false)
+    static ResponseEvent toolResult(
+        QString toolUseId, QString name, QString text, bool isError = false)
    {
        return {
            Kind::ToolResult,
-            ResponseEvents::ToolResult{std::move(toolUseId), std::move(text), isError}};
+            ResponseEvents::ToolResult{
+                std::move(toolUseId), std::move(name), std::move(text), isError}};
    }

-    static ResponseEvent usage(int inputTokens, int outputTokens)
+    static ResponseEvent usage(
+        int inputTokens, int outputTokens, int cachedTokens = 0, int reasoningTokens = 0)
    {
-        return {Kind::Usage, ResponseEvents::Usage{inputTokens, outputTokens}};
+        return {
+            Kind::Usage,
+            ResponseEvents::Usage{inputTokens, outputTokens, cachedTokens, reasoningTokens}};
    }

-    static ResponseEvent error(QString message)
+    static ResponseEvent error(
+        QString message, ErrorCategory category = ErrorCategory::Provider)
    {
-        return {Kind::Error, ResponseEvents::Error{std::move(message)}};
+        return {Kind::Error, ResponseEvents::Error{std::move(message), category}};
    }

 private:
--- a/sources/Session/ResponseRouter.cpp
+++ b/sources/Session/ResponseRouter.cpp
@@ -79,7 +79,7 @@ void ResponseRouter::ensureAssistantOpen()
    if (m_assistantOpen && !m_inToolResults)
        return;
    if (m_history)
-        m_history->append(Message(Message::Role::Assistant));
+        m_history->append(Message(Message::Role::Assistant, m_activeId));
    emit event(ResponseEvent::messageStart());
    m_assistantOpen = true;
    m_inToolResults = false;
@@ -107,15 +107,19 @@ void ResponseRouter::onThinking(
 }

 void ResponseRouter::onToolStarted(
-    const LLMQore::RequestID &id, const QString &toolId, const QString &toolName)
+    const LLMQore::RequestID &id,
+    const QString &toolId,
+    const QString &toolName,
+    const QJsonObject &arguments)
 {
    if (id != m_activeId)
        return;
    ensureAssistantOpen();
    if (m_history)
        m_history->appendBlockToLast(
-            std::make_unique<LLMQore::ToolUseContent>(toolId, toolName));
+            std::make_unique<LLMQore::ToolUseContent>(toolId, toolName, arguments));
    emit event(ResponseEvent::toolCallStart(toolId, toolName));
+    emit event(ResponseEvent::toolCallEnd(toolId, arguments));
 }

 void ResponseRouter::onToolResultReady(
@@ -124,7 +128,6 @@ void ResponseRouter::onToolResultReady(
    const QString &toolName,
    const QString &result)
 {
-    Q_UNUSED(toolName);
    if (id != m_activeId)
        return;

@@ -141,7 +144,7 @@ void ResponseRouter::onToolResultReady(

    m_assistantOpen = false;
    m_inToolResults = true;
-    emit event(ResponseEvent::toolResult(toolId, result, /*isError=*/false));
+    emit event(ResponseEvent::toolResult(toolId, toolName, result, /*isError=*/false));
 }

 void ResponseRouter::onFinalized(
@@ -149,6 +152,13 @@ void ResponseRouter::onFinalized(
 {
    if (id != m_activeId)
        return;
+    if (info.usage) {
+        emit event(ResponseEvent::usage(
+            info.usage->promptTokens,
+            info.usage->completionTokens,
+            info.usage->cachedPromptTokens,
+            info.usage->reasoningTokens));
+    }
    emit event(ResponseEvent::messageStop(info.stopReason));
    endRequest();
 }
@@ -157,7 +167,7 @@ void ResponseRouter::onFailed(const LLMQore::RequestID &id, const QString &err)
 {
    if (id != m_activeId)
        return;
-    emit event(ResponseEvent::error(err));
+    emit event(ResponseEvent::error(err, categorizeProviderError(err)));
    endRequest();
 }

--- a/sources/Session/ResponseRouter.hpp
+++ b/sources/Session/ResponseRouter.hpp
@@ -6,6 +6,7 @@

 #include <LLMQore/BaseClient.hpp>

+#include <QJsonObject>
 #include <QObject>
 #include <QPointer>
 #include <QString>
@@ -41,7 +42,10 @@ private slots:
    void onThinking(
        const LLMQore::RequestID &id, const QString &thinking, const QString &signature);
    void onToolStarted(
-        const LLMQore::RequestID &id, const QString &toolId, const QString &toolName);
+        const LLMQore::RequestID &id,
+        const QString &toolId,
+        const QString &toolName,
+        const QJsonObject &arguments);
    void onToolResultReady(
        const LLMQore::RequestID &id,
        const QString &toolId,
--- a/sources/Session/Session.cpp
+++ b/sources/Session/Session.cpp
@@ -36,15 +36,9 @@ QString roleToLegacyString(Message::Role role)
    return QStringLiteral("user");
 }

-} // namespace
+[[maybe_unused]] const int kErrorInfoMetaTypeId = qRegisterMetaType<QodeAssist::ErrorInfo>();

-Session::Session(QObject *parent)
-    : QObject(parent)
-    , m_history(new ConversationHistory(this))
-    , m_systemPrompt(new SystemPromptBuilder(this))
-{
-    m_invalidReason = QStringLiteral("Session: no agent attached");
-}
+} // namespace

 Session::Session(Agent *agent, QObject *parent)
    : Session(agent, /*externalHistory=*/nullptr, parent)
@@ -86,7 +80,7 @@ Session::Session(Agent *agent, ConversationHistory *externalHistory, QObject *pa
 Session::~Session()
 {
    if (isInFlight())
-        cancel();
+        teardownInFlight();
 }

 bool Session::isValid() const noexcept
@@ -104,6 +98,11 @@ bool Session::isInFlight() const noexcept
    return !m_inFlight.isEmpty();
 }

+const ErrorInfo &Session::lastError() const noexcept
+{
+    return m_lastError;
+}
+
 LLMQore::BaseClient *Session::client() const noexcept
 {
    auto *provider = m_agent ? m_agent->provider() : nullptr;
@@ -127,21 +126,6 @@ void Session::setContextBindings(Templates::ContextRenderer::Bindings bindings)
    m_contextBindings = std::move(bindings);
 }

-QString Session::renderAgentContext() const
-{
-    if (!m_agent)
-        return {};
-    const auto &cfg = m_agent->config();
-    if (cfg.systemPrompt.isEmpty())
-        return {};
-    QString err;
-    QString rendered
-        = Templates::ContextRenderer::render(cfg.systemPrompt, m_contextBindings, &err);
-    if (!err.isEmpty())
-        qWarning("[QodeAssist] agent.system render failed: %s", qUtf8Printable(err));
-    return rendered;
-}
-
 LLMQore::RequestID Session::sendText(const QString &text)
 {
    std::vector<std::unique_ptr<LLMQore::ContentBlock>> blocks;
@@ -152,22 +136,27 @@ LLMQore::RequestID Session::sendText(const QString &text)

 LLMQore::RequestID Session::sendCompletion(Templates::ContextData ctx)
 {
-    if (!isValid())
+    if (!isValid()) {
+        m_lastError = makeError(ErrorCategory::Config, invalidReason());
        return {};
+    }
    if (isInFlight())
        cancel();
-    return dispatchContext(std::move(ctx), /*tools=*/false, /*thinking=*/false);
+    return dispatchContext(std::move(ctx), /*tools=*/false);
 }

 LLMQore::RequestID Session::send(
    std::vector<std::unique_ptr<LLMQore::ContentBlock>> userBlocks,
-    std::optional<bool> toolsOverride,
-    std::optional<bool> thinkingOverride)
+    std::optional<bool> toolsOverride)
 {
-    if (!isValid() || userBlocks.empty())
+    if (!isValid()) {
+        m_lastError = makeError(ErrorCategory::Config, invalidReason());
        return {};
-    if (!m_history)
+    }
+    if (userBlocks.empty() || !m_history) {
+        m_lastError = makeError(ErrorCategory::Validation, QStringLiteral("Session: nothing to send"));
        return {};
+    }

    if (isInFlight())
        cancel();
@@ -177,10 +166,20 @@ LLMQore::RequestID Session::send(
        msg.appendBlock(std::move(b));
    m_history->append(std::move(msg));

-    return dispatch(toolsOverride, thinkingOverride);
+    return dispatch(toolsOverride);
 }

 void Session::cancel()
+{
+    if (m_inFlight.isEmpty())
+        return;
+
+    const auto id = m_inFlight;
+    teardownInFlight();
+    emit cancelled(id);
+}
+
+void Session::teardownInFlight()
 {
    if (m_inFlight.isEmpty())
        return;
@@ -191,41 +190,61 @@ void Session::cancel()
        m_router->endRequest();
    if (m_agent && m_agent->provider())
        m_agent->provider()->cancelRequest(id);
-    emit failed(id, QStringLiteral("Cancelled by user"));
 }

-LLMQore::RequestID Session::dispatch(
-    std::optional<bool> toolsOverride, std::optional<bool> thinkingOverride)
+LLMQore::RequestID Session::dispatch(std::optional<bool> toolsOverride)
 {
    const auto &cfg = m_agent->config();

-    const QString renderedContext = renderAgentContext();
-    if (renderedContext.isEmpty())
+    if (cfg.systemPrompt.isEmpty()) {
        m_systemPrompt->clearLayer(QStringLiteral("agent.system"));
-    else
-        m_systemPrompt->setLayer(QStringLiteral("agent.system"), renderedContext);
+    } else {
+        QString renderErr;
+        const QString renderedContext = Templates::ContextRenderer::render(
+            cfg.systemPrompt, m_contextBindings, &renderErr);
+        if (!renderErr.isEmpty()) {
+            m_lastError = makeError(
+                ErrorCategory::Validation,
+                QStringLiteral("Agent '%1' system_prompt render failed: %2")
+                    .arg(cfg.name, renderErr));
+            qWarning("[QodeAssist] %s", qUtf8Printable(m_lastError.message));
+            return {};
+        }
+        if (renderedContext.isEmpty())
+            m_systemPrompt->clearLayer(QStringLiteral("agent.system"));
+        else
+            m_systemPrompt->setLayer(
+                QStringLiteral("agent.system"), renderedContext, SystemPromptBuilder::kAgentPriority);
+    }

    const bool tools = toolsOverride.value_or(cfg.enableTools);
-    const bool thinking = thinkingOverride.value_or(cfg.enableThinking);
-    return dispatchContext(toLegacyContext(), tools, thinking);
+    return dispatchContext(toLegacyContext(), tools);
 }

-LLMQore::RequestID Session::dispatchContext(
-    Templates::ContextData ctx, bool tools, bool thinking)
+LLMQore::RequestID Session::dispatchContext(Templates::ContextData ctx, bool tools)
 {
+    m_lastError = {};
+
    auto *provider = m_agent->provider();
    auto *tmpl = m_agent->promptTemplate();
    const auto &cfg = m_agent->config();

    QJsonObject payload{{QStringLiteral("model"), cfg.model}};
-    if (!provider->prepareRequest(payload, tmpl, ctx, tools, thinking))
+    QString prepareErr;
+    if (!provider->prepareRequest(payload, tmpl, ctx, tools, &prepareErr)) {
+        m_lastError = makeError(ErrorCategory::Validation, prepareErr, prepareErr);
        return {};
+    }

    QString endpoint = cfg.endpoint;
    endpoint.replace(QStringLiteral("${MODEL}"), cfg.model);
    const auto id = provider->sendRequest(QUrl(provider->url()), payload, endpoint);
-    if (id.isEmpty())
+    if (id.isEmpty()) {
+        m_lastError = makeError(
+            ErrorCategory::Provider,
+            QStringLiteral("Provider '%1' failed to start the request").arg(provider->name()));
        return {};
+    }

    m_inFlight = id;
    if (m_router)
@@ -389,9 +408,11 @@ void Session::onRouterEvent(const ResponseEvent &ev)
    } else if (ev.kind() == ResponseEvent::Kind::Error) {
        const auto *err = ev.as<ResponseEvents::Error>();
        const QString msg = err ? err->message : QStringLiteral("unknown error");
+        const ErrorCategory category = err ? err->category : ErrorCategory::Provider;
+        m_lastError = makeError(category, msg, msg);
        const auto id = m_inFlight;
        m_inFlight.clear();
-        emit failed(id, msg);
+        emit failed(id, m_lastError);
    }
 }

--- a/sources/Session/Session.hpp
+++ b/sources/Session/Session.hpp
@@ -20,6 +20,7 @@
 #include <vector>

 #include "ConversationHistory.hpp"
+#include "ErrorInfo.hpp"
 #include "ResponseEvent.hpp"

 namespace QodeAssist {
@@ -33,8 +34,6 @@ class Session : public QObject
    Q_OBJECT
    Q_DISABLE_COPY_MOVE(Session)
 public:
-    explicit Session(QObject *parent = nullptr);
-
    Session(
        Agent *agent,
        ConversationHistory *externalHistory = nullptr,
@@ -47,6 +46,7 @@ public:
    bool isValid() const noexcept;
    QString invalidReason() const;
    bool isInFlight() const noexcept;
+    const ErrorInfo &lastError() const noexcept;

    using ContentLoader = std::function<QString(const QString &storedPath)>;
    void setContentLoader(ContentLoader loader);
@@ -60,12 +60,9 @@ public:

    void setContextBindings(Templates::ContextRenderer::Bindings bindings);

-    QString renderAgentContext() const;
-
    LLMQore::RequestID send(
        std::vector<std::unique_ptr<LLMQore::ContentBlock>> userBlocks,
-        std::optional<bool> toolsOverride = std::nullopt,
-        std::optional<bool> thinkingOverride = std::nullopt);
+        std::optional<bool> toolsOverride = std::nullopt);

    LLMQore::RequestID sendText(const QString &text);

@@ -78,16 +75,16 @@ signals:

    void started(const LLMQore::RequestID &id);
    void finished(const LLMQore::RequestID &id, const QString &stopReason);
-    void failed(const LLMQore::RequestID &id, const QString &error);
+    void failed(const LLMQore::RequestID &id, const QodeAssist::ErrorInfo &error);
+    void cancelled(const LLMQore::RequestID &id);

 private slots:
    void onRouterEvent(const QodeAssist::ResponseEvent &ev);

 private:
-    LLMQore::RequestID dispatch(
-        std::optional<bool> toolsOverride = std::nullopt,
-        std::optional<bool> thinkingOverride = std::nullopt);
-    LLMQore::RequestID dispatchContext(Templates::ContextData ctx, bool tools, bool thinking);
+    LLMQore::RequestID dispatch(std::optional<bool> toolsOverride = std::nullopt);
+    LLMQore::RequestID dispatchContext(Templates::ContextData ctx, bool tools);
+    void teardownInFlight();
    Templates::ContextData toLegacyContext() const;

    Agent *m_agent = nullptr;                              // child if non-null
@@ -97,17 +94,16 @@ private:

    LLMQore::RequestID m_inFlight;
    QString m_invalidReason;
+    ErrorInfo m_lastError;

    Templates::ContextRenderer::Bindings m_contextBindings;
+    ContentLoader m_contentLoader;

 public:
    static Templates::ContextData buildLegacyContext(
        const std::vector<Message> &history,
        const QString &systemPrompt,
        const ContentLoader &loader = ContentLoader{});
-
-private:
-    ContentLoader m_contentLoader;
 };

 } // namespace QodeAssist
--- a/sources/Session/SessionManager.cpp
+++ b/sources/Session/SessionManager.cpp
@@ -10,10 +10,6 @@

 namespace QodeAssist {

-SessionManager::SessionManager(QObject *parent)
-    : QObject(parent)
-{}
-
 SessionManager::SessionManager(AgentFactory *agentFactory, QObject *parent)
    : QObject(parent)
    , m_agentFactory(agentFactory)
@@ -21,14 +17,6 @@ SessionManager::SessionManager(AgentFactory *agentFactory, QObject *parent)

 SessionManager::~SessionManager() = default;

-Session *SessionManager::createSession()
-{
-    auto *session = new Session(this);
-    m_sessions.append(session);
-    emit sessionCreated(session);
-    return session;
-}
-
 Session *SessionManager::createSession(const QString &agentName, QString *errorOut)
 {
    return createSession(agentName, /*externalHistory=*/nullptr, errorOut);
--- a/sources/Session/SessionManager.hpp
+++ b/sources/Session/SessionManager.hpp
@@ -22,14 +22,10 @@ class SessionManager : public QObject
    Q_OBJECT
    Q_DISABLE_COPY_MOVE(SessionManager)
 public:
-    explicit SessionManager(QObject *parent = nullptr);
-
-    SessionManager(AgentFactory *agentFactory, QObject *parent = nullptr);
+    explicit SessionManager(AgentFactory *agentFactory, QObject *parent = nullptr);

    ~SessionManager() override;

-    Session *createSession();
-
    Session *createSession(const QString &agentName, QString *errorOut = nullptr);

    Session *createSession(
--- a/sources/Session/SystemPromptBuilder.cpp
+++ b/sources/Session/SystemPromptBuilder.cpp
@@ -4,30 +4,34 @@

 #include "SystemPromptBuilder.hpp"

+#include <algorithm>
+
 namespace QodeAssist {

 SystemPromptBuilder::SystemPromptBuilder(QObject *parent)
    : QObject(parent)
 {}

-void SystemPromptBuilder::setLayer(const QString &name, const QString &text)
+void SystemPromptBuilder::setLayer(const QString &name, const QString &text, int priority)
 {
-    for (auto &pair : m_layers) {
-        if (pair.first == name) {
-            if (pair.second == text) return;
-            pair.second = text;
+    for (auto &layer : m_layers) {
+        if (layer.name == name) {
+            if (layer.text == text && layer.priority == priority)
+                return;
+            layer.text = text;
+            layer.priority = priority;
            emit layersChanged();
            return;
        }
    }
-    m_layers.append({name, text});
+    m_layers.append({name, text, priority});
    emit layersChanged();
 }

 void SystemPromptBuilder::clearLayer(const QString &name)
 {
    for (auto it = m_layers.begin(); it != m_layers.end(); ++it) {
-        if (it->first == name) {
+        if (it->name == name) {
            m_layers.erase(it);
            emit layersChanged();
            return;
@@ -44,8 +48,8 @@ void SystemPromptBuilder::clear()

 QString SystemPromptBuilder::layer(const QString &name) const
 {
-    for (const auto &pair : m_layers) {
-        if (pair.first == name) return pair.second;
+    for (const auto &l : m_layers) {
+        if (l.name == name) return l.text;
    }
    return {};
 }
@@ -54,17 +58,22 @@ QStringList SystemPromptBuilder::layerNames() const
 {
    QStringList out;
    out.reserve(m_layers.size());
-    for (const auto &pair : m_layers) out.append(pair.first);
+    for (const auto &l : m_layers) out.append(l.name);
    return out;
 }

 QString SystemPromptBuilder::compose(const QString &separator) const
 {
+    QVector<Layer> ordered = m_layers;
+    std::stable_sort(
+        ordered.begin(), ordered.end(),
+        [](const Layer &a, const Layer &b) { return a.priority < b.priority; });
+
    QStringList parts;
-    parts.reserve(m_layers.size());
-    for (const auto &pair : m_layers) {
-        if (!pair.second.isEmpty())
-            parts.append(pair.second);
+    parts.reserve(ordered.size());
+    for (const auto &l : ordered) {
+        if (!l.text.isEmpty())
+            parts.append(l.text);
    }
    return parts.join(separator);
 }
--- a/sources/Session/SystemPromptBuilder.hpp
+++ b/sources/Session/SystemPromptBuilder.hpp
@@ -15,9 +15,12 @@ class SystemPromptBuilder : public QObject
 {
    Q_OBJECT
 public:
+    static constexpr int kAgentPriority = 0;
+    static constexpr int kDefaultPriority = 100;
+
    explicit SystemPromptBuilder(QObject *parent = nullptr);

-    void setLayer(const QString &name, const QString &text);
+    void setLayer(const QString &name, const QString &text, int priority = kDefaultPriority);
    void clearLayer(const QString &name);
    void clear();

@@ -31,7 +34,14 @@ signals:
    void layersChanged();

 private:
-    QVector<QPair<QString, QString>> m_layers;
+    struct Layer
+    {
+        QString name;
+        QString text;
+        int priority = kDefaultPriority;
+    };
+
+    QVector<Layer> m_layers;
 };

 } // namespace QodeAssist
--- a/sources/agents/Agent.cpp
+++ b/sources/agents/Agent.cpp
@@ -55,6 +55,8 @@ Agent::Agent(AgentConfig config, Providers::Provider *providerOwned, QObject *pa
        return;
    }
    m_provider->setParent(this);
+    m_provider->setPromptCaching(
+        m_config.cachePrompt, m_config.cacheTtl == QLatin1StringView{"1h"});

    QString tmplErr;
    m_promptTemplate = JsonPromptTemplate::fromConfig(m_config, &tmplErr);
--- a/sources/agents/AgentConfig.hpp
+++ b/sources/agents/AgentConfig.hpp
@@ -39,6 +39,8 @@ struct AgentConfig

    bool enableThinking = false;
    bool enableTools = false;
+    bool cachePrompt = false;
+    QString cacheTtl;

    QJsonObject body;
    QString extendsName;
--- a/sources/agents/AgentFactory.cpp
+++ b/sources/agents/AgentFactory.cpp
@@ -194,7 +194,7 @@ Agent *AgentFactory::createFromFile(
 {
    QString parseErr;
    QStringList warnings;
-    auto cfgOpt = Agents::AgentLoader::parseFile(tomlPath, &parseErr, &warnings);
+    auto cfgOpt = Agents::AgentLoader::parseFile(tomlPath, agentQrcPrefix(), &parseErr, &warnings);
    if (!cfgOpt) {
        if (errorOut) *errorOut = parseErr;
        return nullptr;
--- a/sources/agents/AgentLoader.cpp
+++ b/sources/agents/AgentLoader.cpp
@@ -6,6 +6,7 @@

 #include <QDir>
 #include <QFile>
+#include <QFileInfo>
 #include <QHash>
 #include <QJsonArray>
 #include <QJsonDocument>
@@ -123,6 +124,8 @@ AgentConfig configFromMerged(const QJsonObject &obj)
    cfg.systemPrompt = obj.value("system_prompt").toString();
    cfg.enableThinking = obj.value("enable_thinking").toBool(false);
    cfg.enableTools    = obj.value("enable_tools").toBool(false);
+    cfg.cachePrompt    = obj.value("cache_prompt").toBool(false);
+    cfg.cacheTtl       = obj.value("cache_ttl").toString();
    cfg.tags        = stringArray(obj.value("tags"));

    const QJsonObject matchObj = obj.value("match").toObject();
@@ -147,6 +150,34 @@ struct RawEntry

 constexpr int kMaxExtendsDepth = 32;

+void scanDir(
+    const QString &dir,
+    bool isUserLayer,
+    QHash<QString, RawEntry> &raw,
+    QStringList &errors)
+{
+    if (dir.isEmpty()) return;
+    QDir d(dir);
+    if (!d.exists()) return;
+    const QStringList files = d.entryList({"*.toml"}, QDir::Files);
+    for (const QString &fname : files) {
+        const QString fullPath = d.filePath(fname);
+        QString err;
+        auto objOpt = parseTomlFile(fullPath, &err);
+        if (!objOpt) {
+            errors.append(err);
+            continue;
+        }
+        const QString name = objOpt->value("name").toString();
+        if (name.isEmpty()) {
+            errors.append(QStringLiteral("Agent at %1 has no 'name'").arg(fullPath));
+            continue;
+        }
+        const bool overrides = isUserLayer && raw.contains(name);
+        raw.insert(name, {*objOpt, fullPath, overrides});
+    }
+}
+
 QJsonObject resolveExtends(
    const QString &name,
    const QHash<QString, RawEntry> &raw,
@@ -190,12 +221,47 @@ QJsonObject resolveExtends(
 } // namespace

 std::optional<AgentConfig> AgentLoader::parseFile(
-    const QString &path, QString *error, QStringList * /*warnings*/)
+    const QString &path,
+    const QString &qrcPrefix,
+    QString *error,
+    QStringList * /*warnings*/)
 {
    auto objOpt = parseTomlFile(path, error);
    if (!objOpt) return std::nullopt;
-    AgentConfig cfg = configFromMerged(*objOpt);
+
+    const QString name = objOpt->value("name").toString();
+    if (name.isEmpty()) {
+        if (error) *error = QStringLiteral("Agent at %1 has no 'name'").arg(path);
+        return std::nullopt;
+    }
+
+    QHash<QString, RawEntry> raw;
+    QStringList scanErrors;
+    scanDir(qrcPrefix, /*isUserLayer=*/false, raw, scanErrors);
+    scanDir(QFileInfo(path).absolutePath(), /*isUserLayer=*/true, raw, scanErrors);
+    raw.insert(name, {*objOpt, path, raw.contains(name)});
+
+    QSet<QString> visiting;
+    QStringList resolveErrors;
+    const QJsonObject merged = resolveExtends(name, raw, visiting, resolveErrors);
+    if (!resolveErrors.isEmpty() || merged.isEmpty()) {
+        if (error) {
+            *error = resolveErrors.isEmpty()
+                         ? QStringLiteral("Agent '%1' resolved to an empty config").arg(name)
+                         : resolveErrors.join(QStringLiteral("; "));
+        }
+        return std::nullopt;
+    }
+
+    AgentConfig cfg = configFromMerged(merged);
    cfg.sourcePath = path;
+    if (cfg.abstract) {
+        if (error) {
+            *error = QStringLiteral("Agent '%1' is abstract — extend it instead of "
+                                    "loading it directly").arg(name);
+        }
+        return std::nullopt;
+    }
    return cfg;
 }

@@ -204,31 +270,8 @@ AgentLoader::LoadResult AgentLoader::load(const QString &qrcPrefix, const QStrin
    LoadResult result;
    QHash<QString, RawEntry> raw;

-    auto scan = [&](const QString &dir, bool isUserLayer) {
-        if (dir.isEmpty()) return;
-        QDir d(dir);
-        if (!d.exists()) return;
-        const QStringList files = d.entryList({"*.toml"}, QDir::Files);
-        for (const QString &fname : files) {
-            const QString fullPath = d.filePath(fname);
-            QString err;
-            auto objOpt = parseTomlFile(fullPath, &err);
-            if (!objOpt) {
-                result.errors.append(err);
-                continue;
-            }
-            const QString name = objOpt->value("name").toString();
-            if (name.isEmpty()) {
-                result.errors.append(QStringLiteral("Agent at %1 has no 'name'").arg(fullPath));
-                continue;
-            }
-            const bool overrides = isUserLayer && raw.contains(name);
-            raw.insert(name, {*objOpt, fullPath, overrides});
-        }
-    };
-
-    scan(qrcPrefix, /*isUserLayer=*/false);
-    scan(userDir,   /*isUserLayer=*/true);
+    scanDir(qrcPrefix, /*isUserLayer=*/false, raw, result.errors);
+    scanDir(userDir,   /*isUserLayer=*/true,  raw, result.errors);

    for (auto it = raw.constBegin(); it != raw.constEnd(); ++it) {
        const QString &name = it.key();
--- a/sources/agents/AgentLoader.hpp
+++ b/sources/agents/AgentLoader.hpp
@@ -25,7 +25,10 @@ public:
    static LoadResult load(const QString &qrcPrefix, const QString &userDir);

    static std::optional<AgentConfig> parseFile(
-        const QString &path, QString *error, QStringList *warnings = nullptr);
+        const QString &path,
+        const QString &qrcPrefix,
+        QString *error,
+        QStringList *warnings = nullptr);
 };

 } // namespace QodeAssist::Agents
--- a/sources/agents/anthropic_base_chat.toml
+++ b/sources/agents/anthropic_base_chat.toml
@@ -7,6 +7,7 @@ abstract    = true
 provider_instance = "Claude"
 endpoint          = "/v1/messages"
 enable_tools      = true
+cache_prompt      = true
 tags = ["chat", "claude", "anthropic", "cloud"]

 system_prompt = """{{ agent_role() }}"""
--- a/sources/agents/partials/anthropic_messages.jinja
+++ b/sources/agents/partials/anthropic_messages.jinja
@@ -3,8 +3,17 @@
  "role": {{ tojson(msg.role) }},
  "content": [
    {% for b in msg.content_blocks %}
-      {% if b.type == "image" %}{% include "partials/anthropic_image.jinja" %}
-      {% else %}{{ tojson(b) }},
+      {% if b.type == "text" %}
+      { "type": "text", "text": {{ tojson(b.text) }} },
+      {% else if b.type == "thinking" %}
+      { "type": "thinking", "thinking": {{ tojson(b.thinking) }}, "signature": {{ tojson(b.signature) }} },
+      {% else if b.type == "redacted_thinking" %}
+      { "type": "redacted_thinking", "data": {{ tojson(b.data) }} },
+      {% else if b.type == "tool_use" %}
+      { "type": "tool_use", "id": {{ tojson(b.id) }}, "name": {{ tojson(b.name) }}, "input": {{ tojson(b.input) }} },
+      {% else if b.type == "tool_result" %}
+      { "type": "tool_result", "tool_use_id": {{ tojson(b.tool_use_id) }}, "content": {{ tojson(b.content) }} },
+      {% else if b.type == "image" %}{% include "partials/anthropic_image.jinja" %}
      {% endif %}
    {% endfor %}
  ]
--- a/sources/agents/partials/ollama_messages.jinja
+++ b/sources/agents/partials/ollama_messages.jinja
@@ -2,15 +2,36 @@
 { "role": "system", "content": {{ tojson(ctx.system_prompt) }} },
 {% endif %}
 {% for msg in ctx.history %}
-{
-  "role": {{ tojson(msg.role) }},
-  "content": {{ tojson(msg.content) }}
-  {% if existsIn(msg, "images") %}
-  , "images": [
-    {% for img in msg.images %}
-    {{ tojson(img.data) }},
+  {% set tcalls = filter_by_type(msg.content_blocks, "tool_use") %}
+  {% set tresults = filter_by_type(msg.content_blocks, "tool_result") %}
+  {% if length(tresults) > 0 %}
+    {% for b in tresults %}
+    {
+      "role": "tool",
+      "content": {{ tojson(b.content) }}
+      {% if b.name != "" %}
+      , "tool_name": {{ tojson(b.name) }}
+      {% endif %}
+    },
    {% endfor %}
-  ]
+  {% else %}
+  {
+    "role": {{ tojson(msg.role) }},
+    "content": {{ tojson(msg.content) }}
+    {% if length(tcalls) > 0 %}
+    , "tool_calls": [
+      {% for b in tcalls %}
+      { "type": "function", "function": { "name": {{ tojson(b.name) }}, "arguments": {{ tojson(b.input) }} } },
+      {% endfor %}
+    ]
+    {% endif %}
+    {% if existsIn(msg, "images") %}
+    , "images": [
+      {% for img in msg.images %}
+      {{ tojson(img.data) }},
+      {% endfor %}
+    ]
+    {% endif %}
+  },
  {% endif %}
-},
 {% endfor %}
--- a/sources/agents/partials/openai_assistant.jinja
+++ b/sources/agents/partials/openai_assistant.jinja
@@ -1,7 +1,7 @@
 {% set tcalls = filter_by_type(msg.content_blocks, "tool_use") %}
 {
  "role": "assistant",
-  "content": {{ tojson(msg.content) }}
+  "content": {% if msg.content != "" %}{{ tojson(msg.content) }}{% else %}null{% endif %}
  {% if length(tcalls) > 0 %}
  , "tool_calls": [
    {% for b in tcalls %}
--- a/sources/agents/partials/openai_image_content.jinja
+++ b/sources/agents/partials/openai_image_content.jinja
@@ -1,11 +1,12 @@
 [
-  { "type": "text", "text": {{ tojson(msg.content) }} }
+  {% if msg.content != "" %}
+  { "type": "text", "text": {{ tojson(msg.content) }} },
+  {% endif %}
  {% for img in msg.images %}
-  ,
  {% if img.is_url %}
-  { "type": "image_url", "image_url": { "url": {{ tojson(img.data) }} } }
+  { "type": "image_url", "image_url": { "url": {{ tojson(img.data) }} } },
  {% else %}
-  { "type": "image_url", "image_url": { "url": "data:{{ img.media_type }};base64,{{ img.data }}" } }
+  { "type": "image_url", "image_url": { "url": "data:{{ img.media_type }};base64,{{ img.data }}" } },
  {% endif %}
  {% endfor %}
 ]
--- a/sources/providers/CMakeLists.txt
+++ b/sources/providers/CMakeLists.txt
@@ -3,6 +3,7 @@ add_library(Providers STATIC
    Provider.hpp Provider.cpp
    ProviderFactory.hpp ProviderFactory.cpp
    GenericProvider.hpp GenericProvider.cpp
+    ClaudeCacheControl.hpp
 )

 target_link_libraries(Providers
--- a/sources/providers/ClaudeCacheControl.hpp
+++ b/sources/providers/ClaudeCacheControl.hpp
@@ -0,0 +1,90 @@
+// Copyright (C) 2024-2026 Petr Mironychev
+// SPDX-License-Identifier: GPL-3.0-or-later
+// Additional attribution terms under GPLv3 §7(b) apply — see LICENSE
+
+#pragma once
+
+#include <QJsonArray>
+#include <QJsonObject>
+#include <QJsonValue>
+#include <QString>
+
+namespace QodeAssist::Providers::ClaudeCacheControl {
+
+inline QJsonObject buildBreakpoint(bool extendedTtl)
+{
+    QJsonObject cacheControl{{"type", "ephemeral"}};
+    if (extendedTtl)
+        cacheControl["ttl"] = "1h";
+    return cacheControl;
+}
+
+inline void markLastBlock(QJsonArray &blocks, const QJsonObject &cacheControl)
+{
+    if (blocks.isEmpty())
+        return;
+    QJsonObject last = blocks.last().toObject();
+    last["cache_control"] = cacheControl;
+    blocks.replace(blocks.size() - 1, last);
+}
+
+inline void applyToSystem(QJsonObject &request, const QJsonObject &cacheControl)
+{
+    if (!request.contains("system"))
+        return;
+
+    const QJsonValue sys = request.value("system");
+    if (sys.isString()) {
+        const QString text = sys.toString();
+        if (!text.isEmpty()) {
+            request["system"] = QJsonArray{QJsonObject{
+                {"type", "text"}, {"text", text}, {"cache_control", cacheControl}}};
+        }
+    } else if (sys.isArray()) {
+        QJsonArray blocks = sys.toArray();
+        markLastBlock(blocks, cacheControl);
+        request["system"] = blocks;
+    }
+}
+
+inline void applyToTools(QJsonObject &request, const QJsonObject &cacheControl)
+{
+    if (!request.contains("tools"))
+        return;
+    QJsonArray tools = request.value("tools").toArray();
+    markLastBlock(tools, cacheControl);
+    request["tools"] = tools;
+}
+
+inline void applyToHistory(QJsonObject &request, const QJsonObject &cacheControl)
+{
+    if (!request.contains("messages"))
+        return;
+    QJsonArray messages = request.value("messages").toArray();
+    if (messages.size() < 2)
+        return;
+
+    const int idx = messages.size() - 2;
+    QJsonObject msg = messages[idx].toObject();
+    const QJsonValue content = msg.value("content");
+    if (content.isString()) {
+        msg["content"] = QJsonArray{QJsonObject{
+            {"type", "text"}, {"text", content.toString()}, {"cache_control", cacheControl}}};
+    } else if (content.isArray()) {
+        QJsonArray blocks = content.toArray();
+        markLastBlock(blocks, cacheControl);
+        msg["content"] = blocks;
+    }
+    messages.replace(idx, msg);
+    request["messages"] = messages;
+}
+
+inline void apply(QJsonObject &request, bool extendedTtl)
+{
+    const QJsonObject cacheControl = buildBreakpoint(extendedTtl);
+    applyToSystem(request, cacheControl);
+    applyToTools(request, cacheControl);
+    applyToHistory(request, cacheControl);
+}
+
+} // namespace QodeAssist::Providers::ClaudeCacheControl
--- a/sources/providers/Provider.cpp
+++ b/sources/providers/Provider.cpp
@@ -4,9 +4,11 @@

 #include "Provider.hpp"

+#include "ClaudeCacheControl.hpp"
 #include "PromptTemplate.hpp"

 #include <LLMQore/BaseClient.hpp>
+#include <LLMQore/ClaudeClient.hpp>
 #include <LLMQore/ToolsManager.hpp>

 #include <QJsonArray>
@@ -25,24 +27,27 @@ bool Provider::prepareRequest(
    PromptTemplate *prompt,
    const ContextData &context,
    bool isToolsEnabled,
-    bool isThinkingEnabled)
+    QString *errorOut)
 {
-    if (!prompt) {
-        LOG_MESSAGE(QString("Provider '%1': null template").arg(name()));
+    const auto fail = [errorOut](const QString &message) {
+        LOG_MESSAGE(message);
+        if (errorOut)
+            *errorOut = message;
        return false;
-    }
+    };
+
+    if (!prompt)
+        return fail(QString("Provider '%1': null template").arg(name()));

    if (!prompt->isSupportProvider(providerID())) {
-        LOG_MESSAGE(QString("Template '%1' doesn't support provider '%2'")
+        return fail(QString("Template '%1' doesn't support provider '%2'")
                        .arg(prompt->name(), name()));
-        return false;
    }

-    if (!prompt->buildFullRequest(request, context, isThinkingEnabled)) {
-        LOG_MESSAGE(
-            QString("Provider '%1': template '%2' failed to build request")
+    if (!prompt->buildFullRequest(request, context)) {
+        return fail(
+            QString("Provider '%1': template '%2' failed to build request (see log)")
                .arg(name(), prompt->name()));
-        return false;
    }

    if (isToolsEnabled) {
@@ -51,9 +56,21 @@ bool Provider::prepareRequest(
            request["tools"] = toolsDefinitions;
        }
    }
+
+    if (m_promptCachingEnabled)
+        ClaudeCacheControl::apply(request, m_promptCachingExtendedTtl);
+
    return true;
 }

+void Provider::setPromptCaching(bool enabled, bool extendedTtl)
+{
+    m_promptCachingEnabled = enabled;
+    m_promptCachingExtendedTtl = enabled && extendedTtl;
+    if (auto *claude = qobject_cast<::LLMQore::ClaudeClient *>(client()))
+        claude->setUseExtendedCacheTTL(m_promptCachingExtendedTtl);
+}
+
 RequestID Provider::sendRequest(
    const QUrl &url, const QJsonObject &payload, const QString &endpoint)
 {
--- a/sources/providers/Provider.hpp
+++ b/sources/providers/Provider.hpp
@@ -61,7 +61,7 @@ public:
        PromptTemplate *prompt,
        const ContextData &context,
        bool isToolsEnabled,
-        bool isThinkingEnabled);
+        QString *errorOut = nullptr);
    virtual QFuture<QList<QString>> getInstalledModels(const QString &url) = 0;
    virtual ProviderID providerID() const = 0;
    virtual ProviderCapabilities capabilities() const { return {}; }
@@ -73,9 +73,13 @@ public:
    void cancelRequest(const RequestID &requestId);
    ::LLMQore::ToolsManager *toolsManager() const;

+    void setPromptCaching(bool enabled, bool extendedTtl);
+
 private:
    QString m_url;
    QString m_apiKey;
+    bool m_promptCachingEnabled = false;
+    bool m_promptCachingExtendedTtl = false;
 };

 } // namespace QodeAssist::Providers
--- a/sources/templates/JsonPromptTemplate.cpp
+++ b/sources/templates/JsonPromptTemplate.cpp
@@ -310,6 +310,15 @@ ContextData makeValidationContext()
    Message asst;
    asst.role = QStringLiteral("assistant");
    {
+        ContentBlockEntry th;
+        th.kind = ContentBlockEntry::Kind::Thinking;
+        th.thinking = QStringLiteral("reasoning");
+        th.signature = QStringLiteral("sig");
+        asst.blocks.append(th);
+        ContentBlockEntry rth;
+        rth.kind = ContentBlockEntry::Kind::RedactedThinking;
+        rth.signature = QStringLiteral("sig");
+        asst.blocks.append(rth);
        ContentBlockEntry t;
        t.kind = ContentBlockEntry::Kind::Text;
        t.text = QStringLiteral("hi");
@@ -516,9 +525,7 @@ void JsonPromptTemplate::prepareRequest(QJsonObject &request, const ContextData
 }

 bool JsonPromptTemplate::buildFullRequest(
-    QJsonObject &request,
-    const ContextData &context,
-    bool /*thinkingEnabled*/) const
+    QJsonObject &request, const ContextData &context) const
 {
    return mergeRenderedBody(request, renderBody(context));
 }
--- a/sources/templates/JsonPromptTemplate.hpp
+++ b/sources/templates/JsonPromptTemplate.hpp
@@ -47,9 +47,7 @@ public:
    void prepareRequest(QJsonObject &request, const ContextData &context) const override;

    [[nodiscard]] bool buildFullRequest(
-        QJsonObject &request,
-        const ContextData &context,
-        bool thinkingEnabled = false) const override;
+        QJsonObject &request, const ContextData &context) const override;

 private:
    JsonPromptTemplate() = default;
--- a/sources/templates/PromptTemplate.hpp
+++ b/sources/templates/PromptTemplate.hpp
@@ -40,9 +40,7 @@ public:
    virtual bool isSupportModel(const QString & /*modelName*/) const { return true; }

    [[nodiscard]] virtual bool buildFullRequest(
-        QJsonObject &request,
-        const ContextData &context,
-        bool /*thinkingEnabled*/ = false) const
+        QJsonObject &request, const ContextData &context) const
    {
        prepareRequest(request, context);
        return true;