From b4f31dee23b33096f15316bbef987b8666cb1ec3 Mon Sep 17 00:00:00 2001
From: Petr Mironychev <9195189+Palm1r@users.noreply.github.com>
Date: Wed, 26 Nov 2025 10:32:04 +0100
Subject: [PATCH] feat: Add ollama thinking feature (#275)

---
 ChatView/ChatModel.cpp             |  11 +++-
 docs/ollama-configuration.md       |  38 +++++++++++
 providers/OllamaMessage.cpp        |  53 +++++++++++++++
 providers/OllamaMessage.hpp        |   5 ++
 providers/OllamaProvider.cpp       | 101 ++++++++++++++++++++++++++++-
 providers/OllamaProvider.hpp       |   4 ++
 settings/ChatAssistantSettings.cpp |   8 +--
 7 files changed, 212 insertions(+), 8 deletions(-)
diff --git a/ChatView/ChatModel.cpp b/ChatView/ChatModel.cpp
index 12c8827..a0cc1ec 100644
--- a/ChatView/ChatModel.cpp
+++ b/ChatView/ChatModel.cpp
@@ -95,7 +95,6 @@ QVariant ChatModel::data(const QModelIndex &index, int role) const
             imageMap["storedPath"] = image.storedPath;
             imageMap["mediaType"] = image.mediaType;
             
-            // Generate proper file URL for cross-platform compatibility
             if (!m_chatFilePath.isEmpty()) {
                 QFileInfo fileInfo(m_chatFilePath);
                 QString baseName = fileInfo.completeBaseName();
@@ -450,6 +449,16 @@ void ChatModel::addThinkingBlock(
         displayContent += "\n[Signature: " + signature.left(40) + "...]";
     }
 
+    for (int i = 0; i < m_messages.size(); ++i) {
+        if (m_messages[i].role == ChatRole::Thinking && m_messages[i].id == requestId) {
+            m_messages[i].content = displayContent;
+            m_messages[i].signature = signature;
+            emit dataChanged(index(i), index(i));
+            LOG_MESSAGE(QString("Updated existing thinking message at index %1").arg(i));
+            return;
+        }
+    }
+
     beginInsertRows(QModelIndex(), m_messages.size(), m_messages.size());
     Message thinkingMessage;
     thinkingMessage.role = ChatRole::Thinking;
diff --git a/docs/ollama-configuration.md b/docs/ollama-configuration.md
index 27d1390..ad8a82b 100644
--- a/docs/ollama-configuration.md
+++ b/docs/ollama-configuration.md
@@ -28,6 +28,44 @@ ollama run qwen2.5-coder:32b
 
 You're all set! QodeAssist is now ready to use in Qt Creator.
 
+## Extended Thinking Mode
+
+Ollama supports extended thinking mode for models that are capable of deep reasoning (such as DeepSeek-R1, QwQ, and similar reasoning models). This mode allows the model to show its step-by-step reasoning process before providing the final answer.
+
+### How to Enable
+
+**For Chat Assistant:**
+1. Navigate to Qt Creator > Preferences > QodeAssist > Chat Assistant
+2. In the "Extended Thinking (Claude, Ollama)" section, check "Enable extended thinking mode"
+3. Select a reasoning-capable model (e.g., deepseek-r1:8b, qwq:32b)
+4. Click Apply
+
+**For Quick Refactoring:**
+1. Navigate to Qt Creator > Preferences > QodeAssist > Quick Refactor
+2. Check "Enable Thinking Mode"
+3. Configure thinking budget and max tokens as needed
+4. Click Apply
+
+### Supported Models
+
+Thinking mode works best with models specifically designed for reasoning:
+- **DeepSeek-R1** series (deepseek-r1:8b, deepseek-r1:14b, deepseek-r1:32b)
+- **QwQ** series (qwq:32b)
+- Other models trained for chain-of-thought reasoning
+
+### How It Works
+
+When thinking mode is enabled:
+1. The model generates internal reasoning (visible in the chat as "Thinking" blocks)
+2. After reasoning, it provides the final answer
+3. You can collapse/expand thinking blocks to focus on the final answer
+4. Temperature is automatically set to 1.0 for optimal reasoning performance
+
+**Technical Details:**
+- Thinking mode adds the `enable_thinking: true` parameter to requests sent to Ollama
+- This is natively supported by the Ollama API for compatible models
+- Works in both Chat Assistant and Quick Refactoring contexts
+
 <details>
   <summary>Example of Ollama settings: (click to expand)</summary>
 
diff --git a/providers/OllamaMessage.cpp b/providers/OllamaMessage.cpp
index cb2125d..c97576b 100644
--- a/providers/OllamaMessage.cpp
+++ b/providers/OllamaMessage.cpp
@@ -70,6 +70,22 @@ void OllamaMessage::handleToolCall(const QJsonObject &toolCall)
     LOG_MESSAGE(
         QString("OllamaMessage: Structured tool call detected - name=%1, id=%2").arg(name, toolId));
 }
+
+void OllamaMessage::handleThinkingDelta(const QString &thinking)
+{
+    LLMCore::ThinkingContent *thinkingContent = getOrCreateThinkingContent();
+    thinkingContent->appendThinking(thinking);
+}
+
+void OllamaMessage::handleThinkingComplete(const QString &signature)
+{
+    if (m_currentThinkingContent) {
+        m_currentThinkingContent->setSignature(signature);
+        LOG_MESSAGE(QString("OllamaMessage: Set thinking signature, length=%1")
+                        .arg(signature.length()));
+    }
+}
+
 void OllamaMessage::handleDone(bool done)
 {
     m_done = done;
@@ -216,6 +232,7 @@ QJsonObject OllamaMessage::toProviderFormat() const
 
     QString textContent;
     QJsonArray toolCalls;
+    QString thinkingContent;
 
     for (auto block : m_currentBlocks) {
         if (!block)
@@ -228,9 +245,15 @@ QJsonObject OllamaMessage::toProviderFormat() const
             toolCall["type"] = "function";
             toolCall["function"] = QJsonObject{{"name", tool->name()}, {"arguments", tool->input()}};
             toolCalls.append(toolCall);
+        } else if (auto thinking = qobject_cast<LLMCore::ThinkingContent *>(block)) {
+            thinkingContent += thinking->thinking();
         }
     }
 
+    if (!thinkingContent.isEmpty()) {
+        message["thinking"] = thinkingContent;
+    }
+
     if (!textContent.isEmpty()) {
         message["content"] = textContent;
     }
@@ -275,6 +298,17 @@ QList<LLMCore::ToolUseContent *> OllamaMessage::getCurrentToolUseContent() const
     return toolBlocks;
 }
 
+QList<LLMCore::ThinkingContent *> OllamaMessage::getCurrentThinkingContent() const
+{
+    QList<LLMCore::ThinkingContent *> thinkingBlocks;
+    for (auto block : m_currentBlocks) {
+        if (auto thinkingContent = qobject_cast<LLMCore::ThinkingContent *>(block)) {
+            thinkingBlocks.append(thinkingContent);
+        }
+    }
+    return thinkingBlocks;
+}
+
 void OllamaMessage::startNewContinuation()
 {
     LOG_MESSAGE(QString("OllamaMessage: Starting new continuation"));
@@ -284,6 +318,7 @@ void OllamaMessage::startNewContinuation()
     m_done = false;
     m_state = LLMCore::MessageState::Building;
     m_contentAddedToTextBlock = false;
+    m_currentThinkingContent = nullptr;
 }
 
 void OllamaMessage::updateStateFromDone()
@@ -309,4 +344,22 @@ LLMCore::TextContent *OllamaMessage::getOrCreateTextContent()
     return addCurrentContent<LLMCore::TextContent>();
 }
 
+LLMCore::ThinkingContent *OllamaMessage::getOrCreateThinkingContent()
+{
+    if (m_currentThinkingContent) {
+        return m_currentThinkingContent;
+    }
+
+    for (auto block : m_currentBlocks) {
+        if (auto thinkingContent = qobject_cast<LLMCore::ThinkingContent *>(block)) {
+            m_currentThinkingContent = thinkingContent;
+            return m_currentThinkingContent;
+        }
+    }
+
+    m_currentThinkingContent = addCurrentContent<LLMCore::ThinkingContent>();
+    LOG_MESSAGE(QString("OllamaMessage: Created new ThinkingContent block"));
+    return m_currentThinkingContent;
+}
+
 } // namespace QodeAssist::Providers
diff --git a/providers/OllamaMessage.hpp b/providers/OllamaMessage.hpp
index 442d170..123cfcc 100644
--- a/providers/OllamaMessage.hpp
+++ b/providers/OllamaMessage.hpp
@@ -31,6 +31,8 @@ public:
 
     void handleContentDelta(const QString &content);
     void handleToolCall(const QJsonObject &toolCall);
+    void handleThinkingDelta(const QString &thinking);
+    void handleThinkingComplete(const QString &signature);
     void handleDone(bool done);
 
     QJsonObject toProviderFormat() const;
@@ -38,6 +40,7 @@ public:
 
     LLMCore::MessageState state() const { return m_state; }
     QList<LLMCore::ToolUseContent *> getCurrentToolUseContent() const;
+    QList<LLMCore::ThinkingContent *> getCurrentThinkingContent() const;
     QList<LLMCore::ContentBlock *> currentBlocks() const { return m_currentBlocks; }
 
     void startNewContinuation();
@@ -48,11 +51,13 @@ private:
     QList<LLMCore::ContentBlock *> m_currentBlocks;
     QString m_accumulatedContent;
     bool m_contentAddedToTextBlock = false;
+    LLMCore::ThinkingContent *m_currentThinkingContent = nullptr;
 
     void updateStateFromDone();
     bool tryParseToolCall();
     bool isLikelyToolCallJson(const QString &content) const;
     LLMCore::TextContent *getOrCreateTextContent();
+    LLMCore::ThinkingContent *getOrCreateThinkingContent();
 
     template<typename T, typename... Args>
     T *addCurrentContent(Args &&...args)
diff --git a/providers/OllamaProvider.cpp b/providers/OllamaProvider.cpp
index 2c49939..f098634 100644
--- a/providers/OllamaProvider.cpp
+++ b/providers/OllamaProvider.cpp
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2024-2025 Petr Mironychev
  *
  * This file is part of QodeAssist.
@@ -104,12 +104,31 @@ void OllamaProvider::prepareRequest(
         request["keep_alive"] = settings.ollamaLivetime();
     };
 
+    auto applyThinkingMode = [&request]() {
+        request["enable_thinking"] = true;
+        QJsonObject options = request["options"].toObject();
+        options["temperature"] = 1.0;
+        request["options"] = options;
+    };
+
     if (type == LLMCore::RequestType::CodeCompletion) {
         applySettings(Settings::codeCompletionSettings());
     } else if (type == LLMCore::RequestType::QuickRefactoring) {
-        applySettings(Settings::quickRefactorSettings());
+        const auto &qrSettings = Settings::quickRefactorSettings();
+        applySettings(qrSettings);
+        
+        if (isThinkingEnabled) {
+            applyThinkingMode();
+            LOG_MESSAGE(QString("OllamaProvider: Thinking mode enabled for QuickRefactoring"));
+        }
     } else {
-        applySettings(Settings::chatAssistantSettings());
+        const auto &chatSettings = Settings::chatAssistantSettings();
+        applySettings(chatSettings);
+
+        if (isThinkingEnabled) {
+            applyThinkingMode();
+            LOG_MESSAGE(QString("OllamaProvider: Thinking mode enabled for Chat"));
+        }
     }
 
     if (isToolsEnabled) {
@@ -247,6 +266,11 @@ bool OllamaProvider::supportImage() const
     return true;
 }
 
+bool OllamaProvider::supportThinking() const
+{
+    return true;
+}
+
 void OllamaProvider::cancelRequest(const LLMCore::RequestID &requestId)
 {
     LOG_MESSAGE(QString("OllamaProvider: Cancelling request %1").arg(requestId));
@@ -405,12 +429,48 @@ void OllamaProvider::processStreamData(const QString &requestId, const QJsonObje
         LOG_MESSAGE(QString("Cleared message state for continuation request %1").arg(requestId));
     }
 
+    if (data.contains("thinking")) {
+        QString thinkingDelta = data["thinking"].toString();
+        if (!thinkingDelta.isEmpty()) {
+            message->handleThinkingDelta(thinkingDelta);
+            LOG_MESSAGE(QString("OllamaProvider: Received thinking delta, length=%1")
+                            .arg(thinkingDelta.length()));
+        }
+    }
+
     if (data.contains("message")) {
         QJsonObject messageObj = data["message"].toObject();
 
+        if (messageObj.contains("thinking")) {
+            QString thinkingDelta = messageObj["thinking"].toString();
+            if (!thinkingDelta.isEmpty()) {
+                message->handleThinkingDelta(thinkingDelta);
+                LOG_MESSAGE(QString("OllamaProvider: Received thinking delta from message.thinking, length=%1")
+                                .arg(thinkingDelta.length()));
+                
+                if (!m_thinkingStarted.contains(requestId)) {
+                    auto thinkingBlocks = message->getCurrentThinkingContent();
+                    if (!thinkingBlocks.isEmpty() && thinkingBlocks.first()) {
+                        QString currentThinking = thinkingBlocks.first()->thinking();
+                        QString displayThinking = currentThinking.length() > 50
+                            ? QString("%1...").arg(currentThinking.left(50))
+                            : currentThinking;
+                        
+                        emit thinkingBlockReceived(requestId, displayThinking, "");
+                        LOG_MESSAGE(QString("Emitted initial thinking indicator for request %1, length=%2")
+                                        .arg(requestId)
+                                        .arg(currentThinking.length()));
+                        m_thinkingStarted.insert(requestId);
+                    }
+                }
+            }
+        }
+
         if (messageObj.contains("content")) {
             QString content = messageObj["content"].toString();
             if (!content.isEmpty()) {
+                emitThinkingBlocks(requestId, message);
+
                 message->handleContentDelta(content);
 
                 bool hasTextContent = false;
@@ -460,6 +520,13 @@ void OllamaProvider::processStreamData(const QString &requestId, const QJsonObje
     }
 
     if (data["done"].toBool()) {
+        if (data.contains("signature")) {
+            QString signature = data["signature"].toString();
+            message->handleThinkingComplete(signature);
+            LOG_MESSAGE(QString("OllamaProvider: Set thinking signature, length=%1")
+                            .arg(signature.length()));
+        }
+
         message->handleDone(true);
         handleMessageComplete(requestId);
     }
@@ -472,6 +539,8 @@ void OllamaProvider::handleMessageComplete(const QString &requestId)
 
     OllamaMessage *message = m_messages[requestId];
 
+    emitThinkingBlocks(requestId, message);
+
     if (message->state() == LLMCore::MessageState::RequiresToolExecution) {
         LOG_MESSAGE(QString("Ollama message requires tool execution for %1").arg(requestId));
 
@@ -517,6 +586,32 @@ void OllamaProvider::cleanupRequest(const LLMCore::RequestID &requestId)
     m_dataBuffers.remove(requestId);
     m_requestUrls.remove(requestId);
     m_originalRequests.remove(requestId);
+    m_thinkingEmitted.remove(requestId);
+    m_thinkingStarted.remove(requestId);
     m_toolsManager->cleanupRequest(requestId);
 }
+
+void OllamaProvider::emitThinkingBlocks(const QString &requestId, OllamaMessage *message)
+{
+    if (!message || m_thinkingEmitted.contains(requestId)) {
+        return;
+    }
+
+    auto thinkingBlocks = message->getCurrentThinkingContent();
+    if (thinkingBlocks.isEmpty()) {
+        return;
+    }
+
+    for (auto thinkingContent : thinkingBlocks) {
+        emit thinkingBlockReceived(
+            requestId, thinkingContent->thinking(), thinkingContent->signature());
+        LOG_MESSAGE(QString("Emitted thinking block for request %1, thinking length=%2, signature "
+                            "length=%3")
+                        .arg(requestId)
+                        .arg(thinkingContent->thinking().length())
+                        .arg(thinkingContent->signature().length()));
+    }
+    m_thinkingEmitted.insert(requestId);
+}
+
 } // namespace QodeAssist::Providers
diff --git a/providers/OllamaProvider.hpp b/providers/OllamaProvider.hpp
index 685b1d6..ef4fd2c 100644
--- a/providers/OllamaProvider.hpp
+++ b/providers/OllamaProvider.hpp
@@ -55,6 +55,7 @@ public:
 
     bool supportsTools() const override;
     bool supportImage() const override;
+    bool supportThinking() const override;
     void cancelRequest(const LLMCore::RequestID &requestId) override;
 
 public slots:
@@ -73,10 +74,13 @@ private:
     void processStreamData(const QString &requestId, const QJsonObject &data);
     void handleMessageComplete(const QString &requestId);
     void cleanupRequest(const LLMCore::RequestID &requestId);
+    void emitThinkingBlocks(const QString &requestId, OllamaMessage *message);
 
     QHash<QodeAssist::LLMCore::RequestID, OllamaMessage *> m_messages;
     QHash<QodeAssist::LLMCore::RequestID, QUrl> m_requestUrls;
     QHash<QodeAssist::LLMCore::RequestID, QJsonObject> m_originalRequests;
+    QSet<QString> m_thinkingEmitted;
+    QSet<QString> m_thinkingStarted;
     Tools::ToolsManager *m_toolsManager;
 };
 
diff --git a/settings/ChatAssistantSettings.cpp b/settings/ChatAssistantSettings.cpp
index 856e90d..9537997 100644
--- a/settings/ChatAssistantSettings.cpp
+++ b/settings/ChatAssistantSettings.cpp
@@ -146,10 +146,10 @@ ChatAssistantSettings::ChatAssistantSettings()
 
     // Extended Thinking Settings
     enableThinkingMode.setSettingsKey(Constants::CA_ENABLE_THINKING_MODE);
-    enableThinkingMode.setLabelText(Tr::tr("Enable extended thinking mode (Claude only).\n Temperature is 1.0 accordingly API requerement"));
+    enableThinkingMode.setLabelText(Tr::tr("Enable extended thinking mode (Claude, Ollama).\n Temperature is 1.0 accordingly API requirement for Claude"));
     enableThinkingMode.setToolTip(
-        Tr::tr("Enable Claude's extended thinking mode for complex reasoning tasks. "
-               "This provides step-by-step reasoning before the final answer."));
+        Tr::tr("Enable extended thinking mode for complex reasoning tasks. "
+               "This provides step-by-step reasoning before the final answer. "));
     enableThinkingMode.setDefaultValue(false);
 
     thinkingBudgetTokens.setSettingsKey(Constants::CA_THINKING_BUDGET_TOKENS);
@@ -298,7 +298,7 @@ ChatAssistantSettings::ChatAssistantSettings()
                 }},
             Group{title(Tr::tr("Ollama Settings")), Column{Row{ollamaGrid, Stretch{1}}}},
             Group{
-                title(Tr::tr("Extended Thinking (Claude Only)")),
+                title(Tr::tr("Extended Thinking (Claude, Ollama)")),
                 Column{enableThinkingMode, Row{thinkingGrid, Stretch{1}}}},
             Group{title(Tr::tr("Chat Settings")), Row{chatViewSettingsGrid, Stretch{1}}},
             Stretch{1}};