From b4f31dee23b33096f15316bbef987b8666cb1ec3 Mon Sep 17 00:00:00 2001 From: Petr Mironychev <9195189+Palm1r@users.noreply.github.com> Date: Wed, 26 Nov 2025 10:32:04 +0100 Subject: [PATCH] feat: Add ollama thinking feature (#275) --- ChatView/ChatModel.cpp | 11 +++- docs/ollama-configuration.md | 38 +++++++++++ providers/OllamaMessage.cpp | 53 +++++++++++++++ providers/OllamaMessage.hpp | 5 ++ providers/OllamaProvider.cpp | 101 ++++++++++++++++++++++++++++- providers/OllamaProvider.hpp | 4 ++ settings/ChatAssistantSettings.cpp | 8 +-- 7 files changed, 212 insertions(+), 8 deletions(-) diff --git a/ChatView/ChatModel.cpp b/ChatView/ChatModel.cpp index 12c8827..a0cc1ec 100644 --- a/ChatView/ChatModel.cpp +++ b/ChatView/ChatModel.cpp @@ -95,7 +95,6 @@ QVariant ChatModel::data(const QModelIndex &index, int role) const imageMap["storedPath"] = image.storedPath; imageMap["mediaType"] = image.mediaType; - // Generate proper file URL for cross-platform compatibility if (!m_chatFilePath.isEmpty()) { QFileInfo fileInfo(m_chatFilePath); QString baseName = fileInfo.completeBaseName(); @@ -450,6 +449,16 @@ void ChatModel::addThinkingBlock( displayContent += "\n[Signature: " + signature.left(40) + "...]"; } + for (int i = 0; i < m_messages.size(); ++i) { + if (m_messages[i].role == ChatRole::Thinking && m_messages[i].id == requestId) { + m_messages[i].content = displayContent; + m_messages[i].signature = signature; + emit dataChanged(index(i), index(i)); + LOG_MESSAGE(QString("Updated existing thinking message at index %1").arg(i)); + return; + } + } + beginInsertRows(QModelIndex(), m_messages.size(), m_messages.size()); Message thinkingMessage; thinkingMessage.role = ChatRole::Thinking; diff --git a/docs/ollama-configuration.md b/docs/ollama-configuration.md index 27d1390..ad8a82b 100644 --- a/docs/ollama-configuration.md +++ b/docs/ollama-configuration.md @@ -28,6 +28,44 @@ ollama run qwen2.5-coder:32b You're all set! QodeAssist is now ready to use in Qt Creator. +## Extended Thinking Mode + +Ollama supports extended thinking mode for models that are capable of deep reasoning (such as DeepSeek-R1, QwQ, and similar reasoning models). This mode allows the model to show its step-by-step reasoning process before providing the final answer. + +### How to Enable + +**For Chat Assistant:** +1. Navigate to Qt Creator > Preferences > QodeAssist > Chat Assistant +2. In the "Extended Thinking (Claude, Ollama)" section, check "Enable extended thinking mode" +3. Select a reasoning-capable model (e.g., deepseek-r1:8b, qwq:32b) +4. Click Apply + +**For Quick Refactoring:** +1. Navigate to Qt Creator > Preferences > QodeAssist > Quick Refactor +2. Check "Enable Thinking Mode" +3. Configure thinking budget and max tokens as needed +4. Click Apply + +### Supported Models + +Thinking mode works best with models specifically designed for reasoning: +- **DeepSeek-R1** series (deepseek-r1:8b, deepseek-r1:14b, deepseek-r1:32b) +- **QwQ** series (qwq:32b) +- Other models trained for chain-of-thought reasoning + +### How It Works + +When thinking mode is enabled: +1. The model generates internal reasoning (visible in the chat as "Thinking" blocks) +2. After reasoning, it provides the final answer +3. You can collapse/expand thinking blocks to focus on the final answer +4. Temperature is automatically set to 1.0 for optimal reasoning performance + +**Technical Details:** +- Thinking mode adds the `enable_thinking: true` parameter to requests sent to Ollama +- This is natively supported by the Ollama API for compatible models +- Works in both Chat Assistant and Quick Refactoring contexts +
Example of Ollama settings: (click to expand) diff --git a/providers/OllamaMessage.cpp b/providers/OllamaMessage.cpp index cb2125d..c97576b 100644 --- a/providers/OllamaMessage.cpp +++ b/providers/OllamaMessage.cpp @@ -70,6 +70,22 @@ void OllamaMessage::handleToolCall(const QJsonObject &toolCall) LOG_MESSAGE( QString("OllamaMessage: Structured tool call detected - name=%1, id=%2").arg(name, toolId)); } + +void OllamaMessage::handleThinkingDelta(const QString &thinking) +{ + LLMCore::ThinkingContent *thinkingContent = getOrCreateThinkingContent(); + thinkingContent->appendThinking(thinking); +} + +void OllamaMessage::handleThinkingComplete(const QString &signature) +{ + if (m_currentThinkingContent) { + m_currentThinkingContent->setSignature(signature); + LOG_MESSAGE(QString("OllamaMessage: Set thinking signature, length=%1") + .arg(signature.length())); + } +} + void OllamaMessage::handleDone(bool done) { m_done = done; @@ -216,6 +232,7 @@ QJsonObject OllamaMessage::toProviderFormat() const QString textContent; QJsonArray toolCalls; + QString thinkingContent; for (auto block : m_currentBlocks) { if (!block) @@ -228,9 +245,15 @@ QJsonObject OllamaMessage::toProviderFormat() const toolCall["type"] = "function"; toolCall["function"] = QJsonObject{{"name", tool->name()}, {"arguments", tool->input()}}; toolCalls.append(toolCall); + } else if (auto thinking = qobject_cast(block)) { + thinkingContent += thinking->thinking(); } } + if (!thinkingContent.isEmpty()) { + message["thinking"] = thinkingContent; + } + if (!textContent.isEmpty()) { message["content"] = textContent; } @@ -275,6 +298,17 @@ QList OllamaMessage::getCurrentToolUseContent() const return toolBlocks; } +QList OllamaMessage::getCurrentThinkingContent() const +{ + QList thinkingBlocks; + for (auto block : m_currentBlocks) { + if (auto thinkingContent = qobject_cast(block)) { + thinkingBlocks.append(thinkingContent); + } + } + return thinkingBlocks; +} + void OllamaMessage::startNewContinuation() { LOG_MESSAGE(QString("OllamaMessage: Starting new continuation")); @@ -284,6 +318,7 @@ void OllamaMessage::startNewContinuation() m_done = false; m_state = LLMCore::MessageState::Building; m_contentAddedToTextBlock = false; + m_currentThinkingContent = nullptr; } void OllamaMessage::updateStateFromDone() @@ -309,4 +344,22 @@ LLMCore::TextContent *OllamaMessage::getOrCreateTextContent() return addCurrentContent(); } +LLMCore::ThinkingContent *OllamaMessage::getOrCreateThinkingContent() +{ + if (m_currentThinkingContent) { + return m_currentThinkingContent; + } + + for (auto block : m_currentBlocks) { + if (auto thinkingContent = qobject_cast(block)) { + m_currentThinkingContent = thinkingContent; + return m_currentThinkingContent; + } + } + + m_currentThinkingContent = addCurrentContent(); + LOG_MESSAGE(QString("OllamaMessage: Created new ThinkingContent block")); + return m_currentThinkingContent; +} + } // namespace QodeAssist::Providers diff --git a/providers/OllamaMessage.hpp b/providers/OllamaMessage.hpp index 442d170..123cfcc 100644 --- a/providers/OllamaMessage.hpp +++ b/providers/OllamaMessage.hpp @@ -31,6 +31,8 @@ public: void handleContentDelta(const QString &content); void handleToolCall(const QJsonObject &toolCall); + void handleThinkingDelta(const QString &thinking); + void handleThinkingComplete(const QString &signature); void handleDone(bool done); QJsonObject toProviderFormat() const; @@ -38,6 +40,7 @@ public: LLMCore::MessageState state() const { return m_state; } QList getCurrentToolUseContent() const; + QList getCurrentThinkingContent() const; QList currentBlocks() const { return m_currentBlocks; } void startNewContinuation(); @@ -48,11 +51,13 @@ private: QList m_currentBlocks; QString m_accumulatedContent; bool m_contentAddedToTextBlock = false; + LLMCore::ThinkingContent *m_currentThinkingContent = nullptr; void updateStateFromDone(); bool tryParseToolCall(); bool isLikelyToolCallJson(const QString &content) const; LLMCore::TextContent *getOrCreateTextContent(); + LLMCore::ThinkingContent *getOrCreateThinkingContent(); template T *addCurrentContent(Args &&...args) diff --git a/providers/OllamaProvider.cpp b/providers/OllamaProvider.cpp index 2c49939..f098634 100644 --- a/providers/OllamaProvider.cpp +++ b/providers/OllamaProvider.cpp @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2024-2025 Petr Mironychev * * This file is part of QodeAssist. @@ -104,12 +104,31 @@ void OllamaProvider::prepareRequest( request["keep_alive"] = settings.ollamaLivetime(); }; + auto applyThinkingMode = [&request]() { + request["enable_thinking"] = true; + QJsonObject options = request["options"].toObject(); + options["temperature"] = 1.0; + request["options"] = options; + }; + if (type == LLMCore::RequestType::CodeCompletion) { applySettings(Settings::codeCompletionSettings()); } else if (type == LLMCore::RequestType::QuickRefactoring) { - applySettings(Settings::quickRefactorSettings()); + const auto &qrSettings = Settings::quickRefactorSettings(); + applySettings(qrSettings); + + if (isThinkingEnabled) { + applyThinkingMode(); + LOG_MESSAGE(QString("OllamaProvider: Thinking mode enabled for QuickRefactoring")); + } } else { - applySettings(Settings::chatAssistantSettings()); + const auto &chatSettings = Settings::chatAssistantSettings(); + applySettings(chatSettings); + + if (isThinkingEnabled) { + applyThinkingMode(); + LOG_MESSAGE(QString("OllamaProvider: Thinking mode enabled for Chat")); + } } if (isToolsEnabled) { @@ -247,6 +266,11 @@ bool OllamaProvider::supportImage() const return true; } +bool OllamaProvider::supportThinking() const +{ + return true; +} + void OllamaProvider::cancelRequest(const LLMCore::RequestID &requestId) { LOG_MESSAGE(QString("OllamaProvider: Cancelling request %1").arg(requestId)); @@ -405,12 +429,48 @@ void OllamaProvider::processStreamData(const QString &requestId, const QJsonObje LOG_MESSAGE(QString("Cleared message state for continuation request %1").arg(requestId)); } + if (data.contains("thinking")) { + QString thinkingDelta = data["thinking"].toString(); + if (!thinkingDelta.isEmpty()) { + message->handleThinkingDelta(thinkingDelta); + LOG_MESSAGE(QString("OllamaProvider: Received thinking delta, length=%1") + .arg(thinkingDelta.length())); + } + } + if (data.contains("message")) { QJsonObject messageObj = data["message"].toObject(); + if (messageObj.contains("thinking")) { + QString thinkingDelta = messageObj["thinking"].toString(); + if (!thinkingDelta.isEmpty()) { + message->handleThinkingDelta(thinkingDelta); + LOG_MESSAGE(QString("OllamaProvider: Received thinking delta from message.thinking, length=%1") + .arg(thinkingDelta.length())); + + if (!m_thinkingStarted.contains(requestId)) { + auto thinkingBlocks = message->getCurrentThinkingContent(); + if (!thinkingBlocks.isEmpty() && thinkingBlocks.first()) { + QString currentThinking = thinkingBlocks.first()->thinking(); + QString displayThinking = currentThinking.length() > 50 + ? QString("%1...").arg(currentThinking.left(50)) + : currentThinking; + + emit thinkingBlockReceived(requestId, displayThinking, ""); + LOG_MESSAGE(QString("Emitted initial thinking indicator for request %1, length=%2") + .arg(requestId) + .arg(currentThinking.length())); + m_thinkingStarted.insert(requestId); + } + } + } + } + if (messageObj.contains("content")) { QString content = messageObj["content"].toString(); if (!content.isEmpty()) { + emitThinkingBlocks(requestId, message); + message->handleContentDelta(content); bool hasTextContent = false; @@ -460,6 +520,13 @@ void OllamaProvider::processStreamData(const QString &requestId, const QJsonObje } if (data["done"].toBool()) { + if (data.contains("signature")) { + QString signature = data["signature"].toString(); + message->handleThinkingComplete(signature); + LOG_MESSAGE(QString("OllamaProvider: Set thinking signature, length=%1") + .arg(signature.length())); + } + message->handleDone(true); handleMessageComplete(requestId); } @@ -472,6 +539,8 @@ void OllamaProvider::handleMessageComplete(const QString &requestId) OllamaMessage *message = m_messages[requestId]; + emitThinkingBlocks(requestId, message); + if (message->state() == LLMCore::MessageState::RequiresToolExecution) { LOG_MESSAGE(QString("Ollama message requires tool execution for %1").arg(requestId)); @@ -517,6 +586,32 @@ void OllamaProvider::cleanupRequest(const LLMCore::RequestID &requestId) m_dataBuffers.remove(requestId); m_requestUrls.remove(requestId); m_originalRequests.remove(requestId); + m_thinkingEmitted.remove(requestId); + m_thinkingStarted.remove(requestId); m_toolsManager->cleanupRequest(requestId); } + +void OllamaProvider::emitThinkingBlocks(const QString &requestId, OllamaMessage *message) +{ + if (!message || m_thinkingEmitted.contains(requestId)) { + return; + } + + auto thinkingBlocks = message->getCurrentThinkingContent(); + if (thinkingBlocks.isEmpty()) { + return; + } + + for (auto thinkingContent : thinkingBlocks) { + emit thinkingBlockReceived( + requestId, thinkingContent->thinking(), thinkingContent->signature()); + LOG_MESSAGE(QString("Emitted thinking block for request %1, thinking length=%2, signature " + "length=%3") + .arg(requestId) + .arg(thinkingContent->thinking().length()) + .arg(thinkingContent->signature().length())); + } + m_thinkingEmitted.insert(requestId); +} + } // namespace QodeAssist::Providers diff --git a/providers/OllamaProvider.hpp b/providers/OllamaProvider.hpp index 685b1d6..ef4fd2c 100644 --- a/providers/OllamaProvider.hpp +++ b/providers/OllamaProvider.hpp @@ -55,6 +55,7 @@ public: bool supportsTools() const override; bool supportImage() const override; + bool supportThinking() const override; void cancelRequest(const LLMCore::RequestID &requestId) override; public slots: @@ -73,10 +74,13 @@ private: void processStreamData(const QString &requestId, const QJsonObject &data); void handleMessageComplete(const QString &requestId); void cleanupRequest(const LLMCore::RequestID &requestId); + void emitThinkingBlocks(const QString &requestId, OllamaMessage *message); QHash m_messages; QHash m_requestUrls; QHash m_originalRequests; + QSet m_thinkingEmitted; + QSet m_thinkingStarted; Tools::ToolsManager *m_toolsManager; }; diff --git a/settings/ChatAssistantSettings.cpp b/settings/ChatAssistantSettings.cpp index 856e90d..9537997 100644 --- a/settings/ChatAssistantSettings.cpp +++ b/settings/ChatAssistantSettings.cpp @@ -146,10 +146,10 @@ ChatAssistantSettings::ChatAssistantSettings() // Extended Thinking Settings enableThinkingMode.setSettingsKey(Constants::CA_ENABLE_THINKING_MODE); - enableThinkingMode.setLabelText(Tr::tr("Enable extended thinking mode (Claude only).\n Temperature is 1.0 accordingly API requerement")); + enableThinkingMode.setLabelText(Tr::tr("Enable extended thinking mode (Claude, Ollama).\n Temperature is 1.0 accordingly API requirement for Claude")); enableThinkingMode.setToolTip( - Tr::tr("Enable Claude's extended thinking mode for complex reasoning tasks. " - "This provides step-by-step reasoning before the final answer.")); + Tr::tr("Enable extended thinking mode for complex reasoning tasks. " + "This provides step-by-step reasoning before the final answer. ")); enableThinkingMode.setDefaultValue(false); thinkingBudgetTokens.setSettingsKey(Constants::CA_THINKING_BUDGET_TOKENS); @@ -298,7 +298,7 @@ ChatAssistantSettings::ChatAssistantSettings() }}, Group{title(Tr::tr("Ollama Settings")), Column{Row{ollamaGrid, Stretch{1}}}}, Group{ - title(Tr::tr("Extended Thinking (Claude Only)")), + title(Tr::tr("Extended Thinking (Claude, Ollama)")), Column{enableThinkingMode, Row{thinkingGrid, Stretch{1}}}}, Group{title(Tr::tr("Chat Settings")), Row{chatViewSettingsGrid, Stretch{1}}}, Stretch{1}};