mirror of
https://github.com/Palm1r/QodeAssist.git
synced 2025-11-13 21:42:52 -05:00
feat: Add Claude extended thinking (#254)
* feat: Add Claude extended thinking * fix: Set 1.0 temperature for thinking mode
This commit is contained in:
@ -46,6 +46,19 @@ void ClaudeMessage::handleContentBlockStart(
|
||||
|
||||
addCurrentContent<LLMCore::ToolUseContent>(toolId, toolName, toolInput);
|
||||
m_pendingToolInputs[index] = "";
|
||||
|
||||
} else if (blockType == "thinking") {
|
||||
QString thinking = data["thinking"].toString();
|
||||
QString signature = data["signature"].toString();
|
||||
LOG_MESSAGE(QString("ClaudeMessage: Creating thinking block with signature length=%1")
|
||||
.arg(signature.length()));
|
||||
addCurrentContent<LLMCore::ThinkingContent>(thinking, signature);
|
||||
|
||||
} else if (blockType == "redacted_thinking") {
|
||||
QString signature = data["signature"].toString();
|
||||
LOG_MESSAGE(QString("ClaudeMessage: Creating redacted_thinking block with signature length=%1")
|
||||
.arg(signature.length()));
|
||||
addCurrentContent<LLMCore::RedactedThinkingContent>(signature);
|
||||
}
|
||||
}
|
||||
|
||||
@ -66,6 +79,24 @@ void ClaudeMessage::handleContentBlockDelta(
|
||||
if (m_pendingToolInputs.contains(index)) {
|
||||
m_pendingToolInputs[index] += partialJson;
|
||||
}
|
||||
|
||||
} else if (deltaType == "thinking_delta") {
|
||||
if (auto thinkingContent = qobject_cast<LLMCore::ThinkingContent *>(m_currentBlocks[index])) {
|
||||
thinkingContent->appendThinking(delta["thinking"].toString());
|
||||
}
|
||||
|
||||
} else if (deltaType == "signature_delta") {
|
||||
if (auto thinkingContent = qobject_cast<LLMCore::ThinkingContent *>(m_currentBlocks[index])) {
|
||||
QString signature = delta["signature"].toString();
|
||||
thinkingContent->setSignature(signature);
|
||||
LOG_MESSAGE(QString("Set signature for thinking block %1: length=%2")
|
||||
.arg(index).arg(signature.length()));
|
||||
} else if (auto redactedContent = qobject_cast<LLMCore::RedactedThinkingContent *>(m_currentBlocks[index])) {
|
||||
QString signature = delta["signature"].toString();
|
||||
redactedContent->setSignature(signature);
|
||||
LOG_MESSAGE(QString("Set signature for redacted_thinking block %1: length=%2")
|
||||
.arg(index).arg(signature.length()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -104,11 +135,17 @@ QJsonObject ClaudeMessage::toProviderFormat() const
|
||||
message["role"] = "assistant";
|
||||
|
||||
QJsonArray content;
|
||||
|
||||
for (auto block : m_currentBlocks) {
|
||||
content.append(block->toJson(LLMCore::ProviderFormat::Claude));
|
||||
QJsonValue blockJson = block->toJson(LLMCore::ProviderFormat::Claude);
|
||||
content.append(blockJson);
|
||||
}
|
||||
|
||||
message["content"] = content;
|
||||
|
||||
LOG_MESSAGE(QString("ClaudeMessage::toProviderFormat - message with %1 content block(s)")
|
||||
.arg(m_currentBlocks.size()));
|
||||
|
||||
return message;
|
||||
}
|
||||
|
||||
@ -138,6 +175,28 @@ QList<LLMCore::ToolUseContent *> ClaudeMessage::getCurrentToolUseContent() const
|
||||
return toolBlocks;
|
||||
}
|
||||
|
||||
QList<LLMCore::ThinkingContent *> ClaudeMessage::getCurrentThinkingContent() const
|
||||
{
|
||||
QList<LLMCore::ThinkingContent *> thinkingBlocks;
|
||||
for (auto block : m_currentBlocks) {
|
||||
if (auto thinkingContent = qobject_cast<LLMCore::ThinkingContent *>(block)) {
|
||||
thinkingBlocks.append(thinkingContent);
|
||||
}
|
||||
}
|
||||
return thinkingBlocks;
|
||||
}
|
||||
|
||||
QList<LLMCore::RedactedThinkingContent *> ClaudeMessage::getCurrentRedactedThinkingContent() const
|
||||
{
|
||||
QList<LLMCore::RedactedThinkingContent *> redactedBlocks;
|
||||
for (auto block : m_currentBlocks) {
|
||||
if (auto redactedContent = qobject_cast<LLMCore::RedactedThinkingContent *>(block)) {
|
||||
redactedBlocks.append(redactedContent);
|
||||
}
|
||||
}
|
||||
return redactedBlocks;
|
||||
}
|
||||
|
||||
void ClaudeMessage::startNewContinuation()
|
||||
{
|
||||
LOG_MESSAGE(QString("ClaudeMessage: Starting new continuation"));
|
||||
|
||||
@ -39,6 +39,9 @@ public:
|
||||
|
||||
LLMCore::MessageState state() const { return m_state; }
|
||||
QList<LLMCore::ToolUseContent *> getCurrentToolUseContent() const;
|
||||
QList<LLMCore::ThinkingContent *> getCurrentThinkingContent() const;
|
||||
QList<LLMCore::RedactedThinkingContent *> getCurrentRedactedThinkingContent() const;
|
||||
const QList<LLMCore::ContentBlock *> &getCurrentBlocks() const { return m_currentBlocks; }
|
||||
|
||||
void startNewContinuation();
|
||||
|
||||
|
||||
@ -86,7 +86,6 @@ void ClaudeProvider::prepareRequest(
|
||||
|
||||
auto applyModelParams = [&request](const auto &settings) {
|
||||
request["max_tokens"] = settings.maxTokens();
|
||||
request["temperature"] = settings.temperature();
|
||||
if (settings.useTopP())
|
||||
request["top_p"] = settings.topP();
|
||||
if (settings.useTopK())
|
||||
@ -96,8 +95,21 @@ void ClaudeProvider::prepareRequest(
|
||||
|
||||
if (type == LLMCore::RequestType::CodeCompletion) {
|
||||
applyModelParams(Settings::codeCompletionSettings());
|
||||
request["temperature"] = Settings::codeCompletionSettings().temperature();
|
||||
} else {
|
||||
applyModelParams(Settings::chatAssistantSettings());
|
||||
const auto &chatSettings = Settings::chatAssistantSettings();
|
||||
applyModelParams(chatSettings);
|
||||
|
||||
if (chatSettings.enableThinkingMode()) {
|
||||
QJsonObject thinkingObj;
|
||||
thinkingObj["type"] = "enabled";
|
||||
thinkingObj["budget_tokens"] = chatSettings.thinkingBudgetTokens();
|
||||
request["thinking"] = thinkingObj;
|
||||
request["max_tokens"] = chatSettings.thinkingMaxTokens();
|
||||
request["temperature"] = 1.0;
|
||||
} else {
|
||||
request["temperature"] = chatSettings.temperature();
|
||||
}
|
||||
}
|
||||
|
||||
if (isToolsEnabled) {
|
||||
@ -169,7 +181,8 @@ QList<QString> ClaudeProvider::validateRequest(const QJsonObject &request, LLMCo
|
||||
{"top_k", {}},
|
||||
{"stop", QJsonArray{}},
|
||||
{"stream", {}},
|
||||
{"tools", {}}};
|
||||
{"tools", {}},
|
||||
{"thinking", QJsonObject{{"type", {}}, {"budget_tokens", {}}}}};
|
||||
|
||||
return LLMCore::ValidationUtils::validateRequestFields(request, templateReq);
|
||||
}
|
||||
@ -220,6 +233,10 @@ bool ClaudeProvider::supportsTools() const
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ClaudeProvider::supportThinking() const {
|
||||
return true;
|
||||
};
|
||||
|
||||
void ClaudeProvider::cancelRequest(const LLMCore::RequestID &requestId)
|
||||
{
|
||||
LOG_MESSAGE(QString("ClaudeProvider: Cancelling request %1").arg(requestId));
|
||||
@ -308,7 +325,14 @@ void ClaudeProvider::onToolExecutionComplete(
|
||||
messages.append(userMessage);
|
||||
|
||||
continuationRequest["messages"] = messages;
|
||||
|
||||
|
||||
if (continuationRequest.contains("thinking")) {
|
||||
QJsonObject thinkingObj = continuationRequest["thinking"].toObject();
|
||||
LOG_MESSAGE(QString("Thinking mode preserved for continuation: type=%1, budget=%2 tokens")
|
||||
.arg(thinkingObj["type"].toString())
|
||||
.arg(thinkingObj["budget_tokens"].toInt()));
|
||||
}
|
||||
|
||||
LOG_MESSAGE(QString("Sending continuation request for %1 with %2 tool results")
|
||||
.arg(requestId)
|
||||
.arg(toolResults.size()));
|
||||
@ -347,6 +371,13 @@ void ClaudeProvider::processStreamEvent(const QString &requestId, const QJsonObj
|
||||
|
||||
LOG_MESSAGE(
|
||||
QString("Adding new content block: type=%1, index=%2").arg(blockType).arg(index));
|
||||
|
||||
if (blockType == "thinking" || blockType == "redacted_thinking") {
|
||||
QJsonDocument eventDoc(event);
|
||||
LOG_MESSAGE(QString("content_block_start event for %1: %2")
|
||||
.arg(blockType)
|
||||
.arg(QString::fromUtf8(eventDoc.toJson(QJsonDocument::Compact))));
|
||||
}
|
||||
|
||||
message->handleContentBlockStart(index, blockType, contentBlock);
|
||||
|
||||
@ -362,12 +393,90 @@ void ClaudeProvider::processStreamEvent(const QString &requestId, const QJsonObj
|
||||
LLMCore::DataBuffers &buffers = m_dataBuffers[requestId];
|
||||
buffers.responseContent += text;
|
||||
emit partialResponseReceived(requestId, text);
|
||||
} else if (deltaType == "signature_delta") {
|
||||
QString signature = delta["signature"].toString();
|
||||
}
|
||||
|
||||
} else if (eventType == "content_block_stop") {
|
||||
int index = event["index"].toInt();
|
||||
|
||||
auto allBlocks = message->getCurrentBlocks();
|
||||
if (index < allBlocks.size()) {
|
||||
QString blockType = allBlocks[index]->type();
|
||||
if (blockType == "thinking" || blockType == "redacted_thinking") {
|
||||
QJsonDocument eventDoc(event);
|
||||
LOG_MESSAGE(QString("content_block_stop event for %1 at index %2: %3")
|
||||
.arg(blockType)
|
||||
.arg(index)
|
||||
.arg(QString::fromUtf8(eventDoc.toJson(QJsonDocument::Compact))));
|
||||
}
|
||||
}
|
||||
|
||||
if (event.contains("content_block")) {
|
||||
QJsonObject contentBlock = event["content_block"].toObject();
|
||||
QString blockType = contentBlock["type"].toString();
|
||||
|
||||
if (blockType == "thinking") {
|
||||
QString signature = contentBlock["signature"].toString();
|
||||
if (!signature.isEmpty()) {
|
||||
auto allBlocks = message->getCurrentBlocks();
|
||||
if (index < allBlocks.size()) {
|
||||
if (auto thinkingContent = qobject_cast<LLMCore::ThinkingContent *>(allBlocks[index])) {
|
||||
thinkingContent->setSignature(signature);
|
||||
LOG_MESSAGE(
|
||||
QString("Updated thinking block signature from content_block_stop, "
|
||||
"signature length=%1")
|
||||
.arg(signature.length()));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (blockType == "redacted_thinking") {
|
||||
QString signature = contentBlock["signature"].toString();
|
||||
if (!signature.isEmpty()) {
|
||||
auto allBlocks = message->getCurrentBlocks();
|
||||
if (index < allBlocks.size()) {
|
||||
if (auto redactedContent = qobject_cast<LLMCore::RedactedThinkingContent *>(allBlocks[index])) {
|
||||
redactedContent->setSignature(signature);
|
||||
LOG_MESSAGE(
|
||||
QString("Updated redacted_thinking block signature from content_block_stop, "
|
||||
"signature length=%1")
|
||||
.arg(signature.length()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
message->handleContentBlockStop(index);
|
||||
|
||||
auto thinkingBlocks = message->getCurrentThinkingContent();
|
||||
for (auto thinkingContent : thinkingBlocks) {
|
||||
auto allBlocks = message->getCurrentBlocks();
|
||||
if (index < allBlocks.size() && allBlocks[index] == thinkingContent) {
|
||||
emit thinkingBlockReceived(
|
||||
requestId, thinkingContent->thinking(), thinkingContent->signature());
|
||||
LOG_MESSAGE(
|
||||
QString("Emitted thinking block for request %1, thinking length=%2, signature length=%3")
|
||||
.arg(requestId)
|
||||
.arg(thinkingContent->thinking().length())
|
||||
.arg(thinkingContent->signature().length()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto redactedBlocks = message->getCurrentRedactedThinkingContent();
|
||||
for (auto redactedContent : redactedBlocks) {
|
||||
auto allBlocks = message->getCurrentBlocks();
|
||||
if (index < allBlocks.size() && allBlocks[index] == redactedContent) {
|
||||
emit redactedThinkingBlockReceived(requestId, redactedContent->signature());
|
||||
LOG_MESSAGE(
|
||||
QString("Emitted redacted thinking block for request %1, signature length=%2")
|
||||
.arg(requestId)
|
||||
.arg(redactedContent->signature().length()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} else if (eventType == "message_delta") {
|
||||
QJsonObject delta = event["delta"].toObject();
|
||||
if (delta.contains("stop_reason")) {
|
||||
|
||||
@ -53,6 +53,7 @@ public:
|
||||
const LLMCore::RequestID &requestId, const QUrl &url, const QJsonObject &payload) override;
|
||||
|
||||
bool supportsTools() const override;
|
||||
bool supportThinking() const override;
|
||||
void cancelRequest(const LLMCore::RequestID &requestId) override;
|
||||
|
||||
public slots:
|
||||
|
||||
Reference in New Issue
Block a user