feat: Add Claude extended thinking (#254)

* feat: Add Claude extended thinking
* fix: Set 1.0 temperature for thinking mode
This commit is contained in:
Petr Mironychev
2025-11-12 18:33:15 +01:00
committed by GitHub
parent 89797639cf
commit 161d77ac04
23 changed files with 745 additions and 40 deletions

View File

@ -86,7 +86,6 @@ void ClaudeProvider::prepareRequest(
auto applyModelParams = [&request](const auto &settings) {
request["max_tokens"] = settings.maxTokens();
request["temperature"] = settings.temperature();
if (settings.useTopP())
request["top_p"] = settings.topP();
if (settings.useTopK())
@ -96,8 +95,21 @@ void ClaudeProvider::prepareRequest(
if (type == LLMCore::RequestType::CodeCompletion) {
applyModelParams(Settings::codeCompletionSettings());
request["temperature"] = Settings::codeCompletionSettings().temperature();
} else {
applyModelParams(Settings::chatAssistantSettings());
const auto &chatSettings = Settings::chatAssistantSettings();
applyModelParams(chatSettings);
if (chatSettings.enableThinkingMode()) {
QJsonObject thinkingObj;
thinkingObj["type"] = "enabled";
thinkingObj["budget_tokens"] = chatSettings.thinkingBudgetTokens();
request["thinking"] = thinkingObj;
request["max_tokens"] = chatSettings.thinkingMaxTokens();
request["temperature"] = 1.0;
} else {
request["temperature"] = chatSettings.temperature();
}
}
if (isToolsEnabled) {
@ -169,7 +181,8 @@ QList<QString> ClaudeProvider::validateRequest(const QJsonObject &request, LLMCo
{"top_k", {}},
{"stop", QJsonArray{}},
{"stream", {}},
{"tools", {}}};
{"tools", {}},
{"thinking", QJsonObject{{"type", {}}, {"budget_tokens", {}}}}};
return LLMCore::ValidationUtils::validateRequestFields(request, templateReq);
}
@ -220,6 +233,10 @@ bool ClaudeProvider::supportsTools() const
return true;
}
bool ClaudeProvider::supportThinking() const {
return true;
};
void ClaudeProvider::cancelRequest(const LLMCore::RequestID &requestId)
{
LOG_MESSAGE(QString("ClaudeProvider: Cancelling request %1").arg(requestId));
@ -308,7 +325,14 @@ void ClaudeProvider::onToolExecutionComplete(
messages.append(userMessage);
continuationRequest["messages"] = messages;
if (continuationRequest.contains("thinking")) {
QJsonObject thinkingObj = continuationRequest["thinking"].toObject();
LOG_MESSAGE(QString("Thinking mode preserved for continuation: type=%1, budget=%2 tokens")
.arg(thinkingObj["type"].toString())
.arg(thinkingObj["budget_tokens"].toInt()));
}
LOG_MESSAGE(QString("Sending continuation request for %1 with %2 tool results")
.arg(requestId)
.arg(toolResults.size()));
@ -347,6 +371,13 @@ void ClaudeProvider::processStreamEvent(const QString &requestId, const QJsonObj
LOG_MESSAGE(
QString("Adding new content block: type=%1, index=%2").arg(blockType).arg(index));
if (blockType == "thinking" || blockType == "redacted_thinking") {
QJsonDocument eventDoc(event);
LOG_MESSAGE(QString("content_block_start event for %1: %2")
.arg(blockType)
.arg(QString::fromUtf8(eventDoc.toJson(QJsonDocument::Compact))));
}
message->handleContentBlockStart(index, blockType, contentBlock);
@ -362,12 +393,90 @@ void ClaudeProvider::processStreamEvent(const QString &requestId, const QJsonObj
LLMCore::DataBuffers &buffers = m_dataBuffers[requestId];
buffers.responseContent += text;
emit partialResponseReceived(requestId, text);
} else if (deltaType == "signature_delta") {
QString signature = delta["signature"].toString();
}
} else if (eventType == "content_block_stop") {
int index = event["index"].toInt();
auto allBlocks = message->getCurrentBlocks();
if (index < allBlocks.size()) {
QString blockType = allBlocks[index]->type();
if (blockType == "thinking" || blockType == "redacted_thinking") {
QJsonDocument eventDoc(event);
LOG_MESSAGE(QString("content_block_stop event for %1 at index %2: %3")
.arg(blockType)
.arg(index)
.arg(QString::fromUtf8(eventDoc.toJson(QJsonDocument::Compact))));
}
}
if (event.contains("content_block")) {
QJsonObject contentBlock = event["content_block"].toObject();
QString blockType = contentBlock["type"].toString();
if (blockType == "thinking") {
QString signature = contentBlock["signature"].toString();
if (!signature.isEmpty()) {
auto allBlocks = message->getCurrentBlocks();
if (index < allBlocks.size()) {
if (auto thinkingContent = qobject_cast<LLMCore::ThinkingContent *>(allBlocks[index])) {
thinkingContent->setSignature(signature);
LOG_MESSAGE(
QString("Updated thinking block signature from content_block_stop, "
"signature length=%1")
.arg(signature.length()));
}
}
}
} else if (blockType == "redacted_thinking") {
QString signature = contentBlock["signature"].toString();
if (!signature.isEmpty()) {
auto allBlocks = message->getCurrentBlocks();
if (index < allBlocks.size()) {
if (auto redactedContent = qobject_cast<LLMCore::RedactedThinkingContent *>(allBlocks[index])) {
redactedContent->setSignature(signature);
LOG_MESSAGE(
QString("Updated redacted_thinking block signature from content_block_stop, "
"signature length=%1")
.arg(signature.length()));
}
}
}
}
}
message->handleContentBlockStop(index);
auto thinkingBlocks = message->getCurrentThinkingContent();
for (auto thinkingContent : thinkingBlocks) {
auto allBlocks = message->getCurrentBlocks();
if (index < allBlocks.size() && allBlocks[index] == thinkingContent) {
emit thinkingBlockReceived(
requestId, thinkingContent->thinking(), thinkingContent->signature());
LOG_MESSAGE(
QString("Emitted thinking block for request %1, thinking length=%2, signature length=%3")
.arg(requestId)
.arg(thinkingContent->thinking().length())
.arg(thinkingContent->signature().length()));
break;
}
}
auto redactedBlocks = message->getCurrentRedactedThinkingContent();
for (auto redactedContent : redactedBlocks) {
auto allBlocks = message->getCurrentBlocks();
if (index < allBlocks.size() && allBlocks[index] == redactedContent) {
emit redactedThinkingBlockReceived(requestId, redactedContent->signature());
LOG_MESSAGE(
QString("Emitted redacted thinking block for request %1, signature length=%2")
.arg(requestId)
.arg(redactedContent->signature().length()));
break;
}
}
} else if (eventType == "message_delta") {
QJsonObject delta = event["delta"].toObject();
if (delta.contains("stop_reason")) {