feat: Add Claude extended thinking (#254)

* feat: Add Claude extended thinking * fix: Set 1.0 temperature for thinking mode
2026-02-17 12:33:05 -05:00 · 2025-11-12 18:33:15 +01:00
parent 89797639cf
commit 161d77ac04
23 changed files with 745 additions and 40 deletions
--- a/providers/ClaudeMessage.cpp
+++ b/providers/ClaudeMessage.cpp
@ -46,6 +46,19 @@ void ClaudeMessage::handleContentBlockStart(

        addCurrentContent<LLMCore::ToolUseContent>(toolId, toolName, toolInput);
        m_pendingToolInputs[index] = "";
+
+    } else if (blockType == "thinking") {
+        QString thinking = data["thinking"].toString();
+        QString signature = data["signature"].toString();
+        LOG_MESSAGE(QString("ClaudeMessage: Creating thinking block with signature length=%1")
+                        .arg(signature.length()));
+        addCurrentContent<LLMCore::ThinkingContent>(thinking, signature);
+
+    } else if (blockType == "redacted_thinking") {
+        QString signature = data["signature"].toString();
+        LOG_MESSAGE(QString("ClaudeMessage: Creating redacted_thinking block with signature length=%1")
+                        .arg(signature.length()));
+        addCurrentContent<LLMCore::RedactedThinkingContent>(signature);
    }
 }

@ -66,6 +79,24 @@ void ClaudeMessage::handleContentBlockDelta(
        if (m_pendingToolInputs.contains(index)) {
            m_pendingToolInputs[index] += partialJson;
        }
+
+    } else if (deltaType == "thinking_delta") {
+        if (auto thinkingContent = qobject_cast<LLMCore::ThinkingContent *>(m_currentBlocks[index])) {
+            thinkingContent->appendThinking(delta["thinking"].toString());
+        }
+        
+    } else if (deltaType == "signature_delta") {
+        if (auto thinkingContent = qobject_cast<LLMCore::ThinkingContent *>(m_currentBlocks[index])) {
+            QString signature = delta["signature"].toString();
+            thinkingContent->setSignature(signature);
+            LOG_MESSAGE(QString("Set signature for thinking block %1: length=%2")
+                            .arg(index).arg(signature.length()));
+        } else if (auto redactedContent = qobject_cast<LLMCore::RedactedThinkingContent *>(m_currentBlocks[index])) {
+            QString signature = delta["signature"].toString();
+            redactedContent->setSignature(signature);
+            LOG_MESSAGE(QString("Set signature for redacted_thinking block %1: length=%2")
+                            .arg(index).arg(signature.length()));
+        }
    }
 }

@ -104,11 +135,17 @@ QJsonObject ClaudeMessage::toProviderFormat() const
    message["role"] = "assistant";

    QJsonArray content;
+    
    for (auto block : m_currentBlocks) {
-        content.append(block->toJson(LLMCore::ProviderFormat::Claude));
+        QJsonValue blockJson = block->toJson(LLMCore::ProviderFormat::Claude);
+        content.append(blockJson);
    }

    message["content"] = content;
+    
+    LOG_MESSAGE(QString("ClaudeMessage::toProviderFormat - message with %1 content block(s)")
+                    .arg(m_currentBlocks.size()));
+    
    return message;
 }

@ -138,6 +175,28 @@ QList<LLMCore::ToolUseContent *> ClaudeMessage::getCurrentToolUseContent() const
    return toolBlocks;
 }

+QList<LLMCore::ThinkingContent *> ClaudeMessage::getCurrentThinkingContent() const
+{
+    QList<LLMCore::ThinkingContent *> thinkingBlocks;
+    for (auto block : m_currentBlocks) {
+        if (auto thinkingContent = qobject_cast<LLMCore::ThinkingContent *>(block)) {
+            thinkingBlocks.append(thinkingContent);
+        }
+    }
+    return thinkingBlocks;
+}
+
+QList<LLMCore::RedactedThinkingContent *> ClaudeMessage::getCurrentRedactedThinkingContent() const
+{
+    QList<LLMCore::RedactedThinkingContent *> redactedBlocks;
+    for (auto block : m_currentBlocks) {
+        if (auto redactedContent = qobject_cast<LLMCore::RedactedThinkingContent *>(block)) {
+            redactedBlocks.append(redactedContent);
+        }
+    }
+    return redactedBlocks;
+}
+
 void ClaudeMessage::startNewContinuation()
 {
    LOG_MESSAGE(QString("ClaudeMessage: Starting new continuation"));
--- a/providers/ClaudeMessage.hpp
+++ b/providers/ClaudeMessage.hpp
@ -39,6 +39,9 @@ public:

    LLMCore::MessageState state() const { return m_state; }
    QList<LLMCore::ToolUseContent *> getCurrentToolUseContent() const;
+    QList<LLMCore::ThinkingContent *> getCurrentThinkingContent() const;
+    QList<LLMCore::RedactedThinkingContent *> getCurrentRedactedThinkingContent() const;
+    const QList<LLMCore::ContentBlock *> &getCurrentBlocks() const { return m_currentBlocks; }

    void startNewContinuation();

--- a/providers/ClaudeProvider.cpp
+++ b/providers/ClaudeProvider.cpp
@ -86,7 +86,6 @@ void ClaudeProvider::prepareRequest(

    auto applyModelParams = [&request](const auto &settings) {
        request["max_tokens"] = settings.maxTokens();
-        request["temperature"] = settings.temperature();
        if (settings.useTopP())
            request["top_p"] = settings.topP();
        if (settings.useTopK())
@ -96,8 +95,21 @@ void ClaudeProvider::prepareRequest(

    if (type == LLMCore::RequestType::CodeCompletion) {
        applyModelParams(Settings::codeCompletionSettings());
+        request["temperature"] = Settings::codeCompletionSettings().temperature();
    } else {
-        applyModelParams(Settings::chatAssistantSettings());
+        const auto &chatSettings = Settings::chatAssistantSettings();
+        applyModelParams(chatSettings);
+
+        if (chatSettings.enableThinkingMode()) {
+            QJsonObject thinkingObj;
+            thinkingObj["type"] = "enabled";
+            thinkingObj["budget_tokens"] = chatSettings.thinkingBudgetTokens();
+            request["thinking"] = thinkingObj;
+            request["max_tokens"] = chatSettings.thinkingMaxTokens();
+            request["temperature"] = 1.0;
+        } else {
+            request["temperature"] = chatSettings.temperature();
+        }
    }

    if (isToolsEnabled) {
@ -169,7 +181,8 @@ QList<QString> ClaudeProvider::validateRequest(const QJsonObject &request, LLMCo
        {"top_k", {}},
        {"stop", QJsonArray{}},
        {"stream", {}},
-        {"tools", {}}};
+        {"tools", {}},
+        {"thinking", QJsonObject{{"type", {}}, {"budget_tokens", {}}}}};

    return LLMCore::ValidationUtils::validateRequestFields(request, templateReq);
 }
@ -220,6 +233,10 @@ bool ClaudeProvider::supportsTools() const
    return true;
 }

+bool ClaudeProvider::supportThinking() const {
+    return true;
+};
+
 void ClaudeProvider::cancelRequest(const LLMCore::RequestID &requestId)
 {
    LOG_MESSAGE(QString("ClaudeProvider: Cancelling request %1").arg(requestId));
@ -308,7 +325,14 @@ void ClaudeProvider::onToolExecutionComplete(
    messages.append(userMessage);

    continuationRequest["messages"] = messages;
-
+    
+    if (continuationRequest.contains("thinking")) {
+        QJsonObject thinkingObj = continuationRequest["thinking"].toObject();
+        LOG_MESSAGE(QString("Thinking mode preserved for continuation: type=%1, budget=%2 tokens")
+                        .arg(thinkingObj["type"].toString())
+                        .arg(thinkingObj["budget_tokens"].toInt()));
+    }
+    
    LOG_MESSAGE(QString("Sending continuation request for %1 with %2 tool results")
                    .arg(requestId)
                    .arg(toolResults.size()));
@ -347,6 +371,13 @@ void ClaudeProvider::processStreamEvent(const QString &requestId, const QJsonObj

        LOG_MESSAGE(
            QString("Adding new content block: type=%1, index=%2").arg(blockType).arg(index));
+        
+        if (blockType == "thinking" || blockType == "redacted_thinking") {
+            QJsonDocument eventDoc(event);
+            LOG_MESSAGE(QString("content_block_start event for %1: %2")
+                            .arg(blockType)
+                            .arg(QString::fromUtf8(eventDoc.toJson(QJsonDocument::Compact))));
+        }

        message->handleContentBlockStart(index, blockType, contentBlock);

@ -362,12 +393,90 @@ void ClaudeProvider::processStreamEvent(const QString &requestId, const QJsonObj
            LLMCore::DataBuffers &buffers = m_dataBuffers[requestId];
            buffers.responseContent += text;
            emit partialResponseReceived(requestId, text);
+        } else if (deltaType == "signature_delta") {
+            QString signature = delta["signature"].toString();
        }

    } else if (eventType == "content_block_stop") {
        int index = event["index"].toInt();
+        
+        auto allBlocks = message->getCurrentBlocks();
+        if (index < allBlocks.size()) {
+            QString blockType = allBlocks[index]->type();
+            if (blockType == "thinking" || blockType == "redacted_thinking") {
+                QJsonDocument eventDoc(event);
+                LOG_MESSAGE(QString("content_block_stop event for %1 at index %2: %3")
+                                .arg(blockType)
+                                .arg(index)
+                                .arg(QString::fromUtf8(eventDoc.toJson(QJsonDocument::Compact))));
+            }
+        }
+        
+        if (event.contains("content_block")) {
+            QJsonObject contentBlock = event["content_block"].toObject();
+            QString blockType = contentBlock["type"].toString();
+            
+            if (blockType == "thinking") {
+                QString signature = contentBlock["signature"].toString();
+                if (!signature.isEmpty()) {
+                    auto allBlocks = message->getCurrentBlocks();
+                    if (index < allBlocks.size()) {
+                        if (auto thinkingContent = qobject_cast<LLMCore::ThinkingContent *>(allBlocks[index])) {
+                            thinkingContent->setSignature(signature);
+                            LOG_MESSAGE(
+                                QString("Updated thinking block signature from content_block_stop, "
+                                        "signature length=%1")
+                                    .arg(signature.length()));
+                        }
+                    }
+                }
+            } else if (blockType == "redacted_thinking") {
+                QString signature = contentBlock["signature"].toString();
+                if (!signature.isEmpty()) {
+                    auto allBlocks = message->getCurrentBlocks();
+                    if (index < allBlocks.size()) {
+                        if (auto redactedContent = qobject_cast<LLMCore::RedactedThinkingContent *>(allBlocks[index])) {
+                            redactedContent->setSignature(signature);
+                            LOG_MESSAGE(
+                                QString("Updated redacted_thinking block signature from content_block_stop, "
+                                        "signature length=%1")
+                                    .arg(signature.length()));
+                        }
+                    }
+                }
+            }
+        }
+        
        message->handleContentBlockStop(index);

+        auto thinkingBlocks = message->getCurrentThinkingContent();
+        for (auto thinkingContent : thinkingBlocks) {
+            auto allBlocks = message->getCurrentBlocks();
+            if (index < allBlocks.size() && allBlocks[index] == thinkingContent) {
+                emit thinkingBlockReceived(
+                    requestId, thinkingContent->thinking(), thinkingContent->signature());
+                LOG_MESSAGE(
+                    QString("Emitted thinking block for request %1, thinking length=%2, signature length=%3")
+                        .arg(requestId)
+                        .arg(thinkingContent->thinking().length())
+                        .arg(thinkingContent->signature().length()));
+                break;
+            }
+        }
+
+        auto redactedBlocks = message->getCurrentRedactedThinkingContent();
+        for (auto redactedContent : redactedBlocks) {
+            auto allBlocks = message->getCurrentBlocks();
+            if (index < allBlocks.size() && allBlocks[index] == redactedContent) {
+                emit redactedThinkingBlockReceived(requestId, redactedContent->signature());
+                LOG_MESSAGE(
+                    QString("Emitted redacted thinking block for request %1, signature length=%2")
+                        .arg(requestId)
+                        .arg(redactedContent->signature().length()));
+                break;
+            }
+        }
+
    } else if (eventType == "message_delta") {
        QJsonObject delta = event["delta"].toObject();
        if (delta.contains("stop_reason")) {
--- a/providers/ClaudeProvider.hpp
+++ b/providers/ClaudeProvider.hpp
@ -53,6 +53,7 @@ public:
        const LLMCore::RequestID &requestId, const QUrl &url, const QJsonObject &payload) override;

    bool supportsTools() const override;
+    bool supportThinking() const override;
    void cancelRequest(const LLMCore::RequestID &requestId) override;

 public slots: