mirror of
https://github.com/Palm1r/QodeAssist.git
synced 2026-05-30 02:49:12 -04:00
feat: Add caching Claude provider
This commit is contained in:
@@ -552,6 +552,14 @@ int ChatModel::sessionCompletionTokens() const
|
||||
return total;
|
||||
}
|
||||
|
||||
int ChatModel::sessionCachedPromptTokens() const
|
||||
{
|
||||
int total = 0;
|
||||
for (const auto &m : m_messages)
|
||||
total += m.cachedPromptTokens;
|
||||
return total;
|
||||
}
|
||||
|
||||
int ChatModel::sessionTotalTokens() const
|
||||
{
|
||||
return sessionPromptTokens() + sessionCompletionTokens();
|
||||
|
||||
@@ -19,6 +19,7 @@ class ChatModel : public QAbstractListModel
|
||||
Q_OBJECT
|
||||
Q_PROPERTY(int sessionPromptTokens READ sessionPromptTokens NOTIFY sessionUsageChanged FINAL)
|
||||
Q_PROPERTY(int sessionCompletionTokens READ sessionCompletionTokens NOTIFY sessionUsageChanged FINAL)
|
||||
Q_PROPERTY(int sessionCachedPromptTokens READ sessionCachedPromptTokens NOTIFY sessionUsageChanged FINAL)
|
||||
Q_PROPERTY(int sessionTotalTokens READ sessionTotalTokens NOTIFY sessionUsageChanged FINAL)
|
||||
QML_ELEMENT
|
||||
|
||||
@@ -110,6 +111,7 @@ public:
|
||||
|
||||
int sessionPromptTokens() const;
|
||||
int sessionCompletionTokens() const;
|
||||
int sessionCachedPromptTokens() const;
|
||||
int sessionTotalTokens() const;
|
||||
|
||||
void setLoadingFromHistory(bool loading);
|
||||
|
||||
@@ -91,10 +91,20 @@ ChatRootView {
|
||||
loadButton.onClicked: root.showLoadDialog()
|
||||
clearButton.onClicked: root.clearChat()
|
||||
tokensBadge {
|
||||
text: qsTr("next ~%1 · session ↑%2 ↓%3")
|
||||
readonly property int sessionCached: root.chatModel.sessionCachedPromptTokens
|
||||
text: sessionCached > 0
|
||||
? qsTr("next ~%1 · session ↑%2 ↓%3 ↻%4")
|
||||
.arg(root.inputTokensCount)
|
||||
.arg(root.chatModel.sessionPromptTokens)
|
||||
.arg(root.chatModel.sessionCompletionTokens)
|
||||
.arg(sessionCached)
|
||||
: qsTr("next ~%1 · session ↑%2 ↓%3")
|
||||
.arg(root.inputTokensCount)
|
||||
.arg(root.chatModel.sessionPromptTokens)
|
||||
.arg(root.chatModel.sessionCompletionTokens)
|
||||
ToolTip.text: sessionCached > 0
|
||||
? qsTr("next request (estimate) · session prompt ↑ / completion ↓ / cached ↻ (provider cache hits)")
|
||||
: qsTr("next request (estimate) · session prompt ↑ / completion ↓")
|
||||
}
|
||||
recentPath {
|
||||
text: qsTr("Сhat name: %1").arg(root.chatFileName.length > 0 ? root.chatFileName : "Unsaved")
|
||||
|
||||
89
providers/ClaudeCacheControl.hpp
Normal file
89
providers/ClaudeCacheControl.hpp
Normal file
@@ -0,0 +1,89 @@
|
||||
// Copyright (C) 2024-2026 Petr Mironychev
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <QJsonArray>
|
||||
#include <QJsonObject>
|
||||
#include <QJsonValue>
|
||||
#include <QString>
|
||||
|
||||
namespace QodeAssist::Providers::ClaudeCacheControl {
|
||||
|
||||
inline QJsonObject buildBreakpoint(bool extendedTtl)
|
||||
{
|
||||
QJsonObject cacheControl{{"type", "ephemeral"}};
|
||||
if (extendedTtl)
|
||||
cacheControl["ttl"] = "1h";
|
||||
return cacheControl;
|
||||
}
|
||||
|
||||
inline void markLastBlock(QJsonArray &blocks, const QJsonObject &cacheControl)
|
||||
{
|
||||
if (blocks.isEmpty())
|
||||
return;
|
||||
QJsonObject last = blocks.last().toObject();
|
||||
last["cache_control"] = cacheControl;
|
||||
blocks.replace(blocks.size() - 1, last);
|
||||
}
|
||||
|
||||
inline void applyToSystem(QJsonObject &request, const QJsonObject &cacheControl)
|
||||
{
|
||||
if (!request.contains("system"))
|
||||
return;
|
||||
|
||||
const QJsonValue sys = request.value("system");
|
||||
if (sys.isString()) {
|
||||
const QString text = sys.toString();
|
||||
if (!text.isEmpty()) {
|
||||
request["system"] = QJsonArray{QJsonObject{
|
||||
{"type", "text"}, {"text", text}, {"cache_control", cacheControl}}};
|
||||
}
|
||||
} else if (sys.isArray()) {
|
||||
QJsonArray blocks = sys.toArray();
|
||||
markLastBlock(blocks, cacheControl);
|
||||
request["system"] = blocks;
|
||||
}
|
||||
}
|
||||
|
||||
inline void applyToTools(QJsonObject &request, const QJsonObject &cacheControl)
|
||||
{
|
||||
if (!request.contains("tools"))
|
||||
return;
|
||||
QJsonArray tools = request.value("tools").toArray();
|
||||
markLastBlock(tools, cacheControl);
|
||||
request["tools"] = tools;
|
||||
}
|
||||
|
||||
inline void applyToHistory(QJsonObject &request, const QJsonObject &cacheControl)
|
||||
{
|
||||
if (!request.contains("messages"))
|
||||
return;
|
||||
QJsonArray messages = request.value("messages").toArray();
|
||||
if (messages.size() < 2)
|
||||
return;
|
||||
|
||||
const int idx = messages.size() - 2;
|
||||
QJsonObject msg = messages[idx].toObject();
|
||||
const QJsonValue content = msg.value("content");
|
||||
if (content.isString()) {
|
||||
msg["content"] = QJsonArray{QJsonObject{
|
||||
{"type", "text"}, {"text", content.toString()}, {"cache_control", cacheControl}}};
|
||||
} else if (content.isArray()) {
|
||||
QJsonArray blocks = content.toArray();
|
||||
markLastBlock(blocks, cacheControl);
|
||||
msg["content"] = blocks;
|
||||
}
|
||||
messages.replace(idx, msg);
|
||||
request["messages"] = messages;
|
||||
}
|
||||
|
||||
inline void apply(QJsonObject &request, bool extendedTtl)
|
||||
{
|
||||
const QJsonObject cacheControl = buildBreakpoint(extendedTtl);
|
||||
applyToSystem(request, cacheControl);
|
||||
applyToTools(request, cacheControl);
|
||||
applyToHistory(request, cacheControl);
|
||||
}
|
||||
|
||||
} // namespace QodeAssist::Providers::ClaudeCacheControl
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
#include <LLMQore/ToolsManager.hpp>
|
||||
|
||||
#include "ClaudeCacheControl.hpp"
|
||||
#include "logger/Logger.hpp"
|
||||
#include "settings/ChatAssistantSettings.hpp"
|
||||
#include "settings/CodeCompletionSettings.hpp"
|
||||
@@ -104,6 +105,14 @@ void ClaudeProvider::prepareRequest(
|
||||
LOG_MESSAGE(QString("Added %1 tools to Claude request").arg(toolsDefinitions.size()));
|
||||
}
|
||||
}
|
||||
|
||||
const auto &ps = Settings::providerSettings();
|
||||
const bool cachingOn = ps.claudeEnablePromptCaching()
|
||||
&& type != PluginLLMCore::RequestType::CodeCompletion;
|
||||
m_client->setUseExtendedCacheTTL(cachingOn && ps.claudeUseExtendedCacheTTL());
|
||||
if (cachingOn) {
|
||||
ClaudeCacheControl::apply(request, ps.claudeUseExtendedCacheTTL());
|
||||
}
|
||||
}
|
||||
|
||||
QFuture<QList<QString>> ClaudeProvider::getInstalledModels(const QString &baseUrl)
|
||||
|
||||
@@ -53,6 +53,23 @@ ProviderSettings::ProviderSettings()
|
||||
claudeApiKey.setDefaultValue("");
|
||||
claudeApiKey.setAutoApply(true);
|
||||
|
||||
claudeEnablePromptCaching.setSettingsKey(Constants::CLAUDE_ENABLE_PROMPT_CACHING);
|
||||
claudeEnablePromptCaching.setLabelText(Tr::tr("Enable prompt caching"));
|
||||
claudeEnablePromptCaching.setToolTip(
|
||||
Tr::tr("Marks the system prompt, tool definitions, and stable chat history with "
|
||||
"cache_control so Anthropic caches the request prefix (5-minute TTL). "
|
||||
"Reduces cost and latency on repeated turns."));
|
||||
claudeEnablePromptCaching.setDefaultValue(false);
|
||||
claudeEnablePromptCaching.setAutoApply(true);
|
||||
|
||||
claudeUseExtendedCacheTTL.setSettingsKey(Constants::CLAUDE_USE_EXTENDED_CACHE_TTL);
|
||||
claudeUseExtendedCacheTTL.setLabelText(Tr::tr("Use 1h cache TTL (beta)"));
|
||||
claudeUseExtendedCacheTTL.setToolTip(
|
||||
Tr::tr("Requests Anthropic's 1-hour cache TTL instead of the default 5 minutes. "
|
||||
"Sends the extended-cache-ttl-2025-04-11 beta header."));
|
||||
claudeUseExtendedCacheTTL.setDefaultValue(false);
|
||||
claudeUseExtendedCacheTTL.setAutoApply(true);
|
||||
|
||||
// OpenAI Settings
|
||||
openAiApiKey.setSettingsKey(Constants::OPEN_AI_API_KEY);
|
||||
openAiApiKey.setLabelText(Tr::tr("OpenAI API Key:"));
|
||||
@@ -124,7 +141,9 @@ ProviderSettings::ProviderSettings()
|
||||
Space{8},
|
||||
Group{title(Tr::tr("OpenAI Compatible Settings")), Column{openAiCompatApiKey}},
|
||||
Space{8},
|
||||
Group{title(Tr::tr("Claude Settings")), Column{claudeApiKey}},
|
||||
Group{
|
||||
title(Tr::tr("Claude Settings")),
|
||||
Column{claudeApiKey, claudeEnablePromptCaching, claudeUseExtendedCacheTTL}},
|
||||
Space{8},
|
||||
Group{title(Tr::tr("Mistral AI Settings")), Column{mistralAiApiKey, codestralApiKey}},
|
||||
Space{8},
|
||||
@@ -148,6 +167,12 @@ void ProviderSettings::setupConnections()
|
||||
openAiCompatApiKey.writeSettings();
|
||||
});
|
||||
connect(&claudeApiKey, &ButtonAspect::changed, this, [this]() { claudeApiKey.writeSettings(); });
|
||||
connect(&claudeEnablePromptCaching, &Utils::BoolAspect::changed, this, [this]() {
|
||||
claudeEnablePromptCaching.writeSettings();
|
||||
});
|
||||
connect(&claudeUseExtendedCacheTTL, &Utils::BoolAspect::changed, this, [this]() {
|
||||
claudeUseExtendedCacheTTL.writeSettings();
|
||||
});
|
||||
connect(&openAiApiKey, &ButtonAspect::changed, this, [this]() { openAiApiKey.writeSettings(); });
|
||||
connect(&mistralAiApiKey, &ButtonAspect::changed, this, [this]() {
|
||||
mistralAiApiKey.writeSettings();
|
||||
@@ -179,6 +204,8 @@ void ProviderSettings::resetSettingsToDefaults()
|
||||
resetAspect(openRouterApiKey);
|
||||
resetAspect(openAiCompatApiKey);
|
||||
resetAspect(claudeApiKey);
|
||||
resetAspect(claudeEnablePromptCaching);
|
||||
resetAspect(claudeUseExtendedCacheTTL);
|
||||
resetAspect(openAiApiKey);
|
||||
resetAspect(mistralAiApiKey);
|
||||
resetAspect(googleAiApiKey);
|
||||
|
||||
@@ -20,6 +20,8 @@ public:
|
||||
Utils::StringAspect openRouterApiKey{this};
|
||||
Utils::StringAspect openAiCompatApiKey{this};
|
||||
Utils::StringAspect claudeApiKey{this};
|
||||
Utils::BoolAspect claudeEnablePromptCaching{this};
|
||||
Utils::BoolAspect claudeUseExtendedCacheTTL{this};
|
||||
Utils::StringAspect openAiApiKey{this};
|
||||
Utils::StringAspect mistralAiApiKey{this};
|
||||
Utils::StringAspect codestralApiKey{this};
|
||||
|
||||
@@ -149,6 +149,9 @@ const char OLLAMA_BASIC_AUTH_API_KEY_HISTORY[] = "QodeAssist.ollamaBasicAuthApiK
|
||||
const char LLAMA_CPP_API_KEY[] = "QodeAssist.llamaCppApiKey";
|
||||
const char LLAMA_CPP_API_KEY_HISTORY[] = "QodeAssist.llamaCppApiKeyHistory";
|
||||
|
||||
const char CLAUDE_ENABLE_PROMPT_CACHING[] = "QodeAssist.claudeEnablePromptCaching";
|
||||
const char CLAUDE_USE_EXTENDED_CACHE_TTL[] = "QodeAssist.claudeUseExtendedCacheTTL";
|
||||
|
||||
// context settings
|
||||
const char CC_READ_FULL_FILE[] = "QodeAssist.ccReadFullFile";
|
||||
const char CC_READ_STRINGS_BEFORE_CURSOR[] = "QodeAssist.ccReadStringsBeforeCursor";
|
||||
|
||||
2
sources/external/llmqore
vendored
2
sources/external/llmqore
vendored
Submodule sources/external/llmqore updated: 0c9fc8bd7c...6b62a2d57c
@@ -3,6 +3,7 @@ add_executable(QodeAssistTest
|
||||
../LLMClientInterface.cpp
|
||||
../LLMSuggestion.cpp
|
||||
CodeHandlerTest.cpp
|
||||
ClaudeCacheControlTest.cpp
|
||||
DocumentContextReaderTest.cpp
|
||||
LLMSuggestionTest.cpp
|
||||
# LLMClientInterfaceTests.cpp
|
||||
@@ -21,6 +22,8 @@ target_link_libraries(QodeAssistTest PRIVATE
|
||||
LLMQore
|
||||
)
|
||||
|
||||
target_include_directories(QodeAssistTest PRIVATE ${CMAKE_SOURCE_DIR})
|
||||
|
||||
target_compile_definitions(QodeAssistTest PRIVATE CMAKE_CURRENT_SOURCE_DIR="${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
|
||||
add_test(NAME QodeAssistTest COMMAND QodeAssistTest)
|
||||
|
||||
181
test/ClaudeCacheControlTest.cpp
Normal file
181
test/ClaudeCacheControlTest.cpp
Normal file
@@ -0,0 +1,181 @@
|
||||
// Copyright (C) 2024-2026 Petr Mironychev
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <QJsonArray>
|
||||
#include <QJsonObject>
|
||||
|
||||
#include "providers/ClaudeCacheControl.hpp"
|
||||
|
||||
using namespace QodeAssist::Providers::ClaudeCacheControl;
|
||||
|
||||
namespace {
|
||||
|
||||
QJsonObject expectedEphemeral(bool extendedTtl)
|
||||
{
|
||||
QJsonObject obj{{"type", "ephemeral"}};
|
||||
if (extendedTtl)
|
||||
obj["ttl"] = "1h";
|
||||
return obj;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(ClaudeCacheControlTest, BreakpointWithoutExtendedTTL)
|
||||
{
|
||||
const QJsonObject cc = buildBreakpoint(false);
|
||||
EXPECT_EQ(cc.value("type").toString(), "ephemeral");
|
||||
EXPECT_FALSE(cc.contains("ttl"));
|
||||
}
|
||||
|
||||
TEST(ClaudeCacheControlTest, BreakpointWithExtendedTTL)
|
||||
{
|
||||
const QJsonObject cc = buildBreakpoint(true);
|
||||
EXPECT_EQ(cc.value("type").toString(), "ephemeral");
|
||||
EXPECT_EQ(cc.value("ttl").toString(), "1h");
|
||||
}
|
||||
|
||||
TEST(ClaudeCacheControlTest, SystemAsStringWrappedIntoArray)
|
||||
{
|
||||
QJsonObject request;
|
||||
request["system"] = "you are a helpful agent";
|
||||
|
||||
apply(request, false);
|
||||
|
||||
ASSERT_TRUE(request.value("system").isArray());
|
||||
const QJsonArray sys = request.value("system").toArray();
|
||||
ASSERT_EQ(sys.size(), 1);
|
||||
|
||||
const QJsonObject block = sys.first().toObject();
|
||||
EXPECT_EQ(block.value("type").toString(), "text");
|
||||
EXPECT_EQ(block.value("text").toString(), "you are a helpful agent");
|
||||
EXPECT_EQ(block.value("cache_control").toObject(), expectedEphemeral(false));
|
||||
}
|
||||
|
||||
TEST(ClaudeCacheControlTest, EmptySystemStringIsNotWrapped)
|
||||
{
|
||||
QJsonObject request;
|
||||
request["system"] = "";
|
||||
|
||||
apply(request, false);
|
||||
|
||||
EXPECT_TRUE(request.value("system").isString());
|
||||
}
|
||||
|
||||
TEST(ClaudeCacheControlTest, SystemAsArrayMarksLastBlock)
|
||||
{
|
||||
QJsonObject request;
|
||||
request["system"] = QJsonArray{
|
||||
QJsonObject{{"type", "text"}, {"text", "a"}},
|
||||
QJsonObject{{"type", "text"}, {"text", "b"}}};
|
||||
|
||||
apply(request, false);
|
||||
|
||||
const QJsonArray sys = request.value("system").toArray();
|
||||
ASSERT_EQ(sys.size(), 2);
|
||||
EXPECT_FALSE(sys[0].toObject().contains("cache_control"));
|
||||
EXPECT_EQ(sys[1].toObject().value("cache_control").toObject(), expectedEphemeral(false));
|
||||
}
|
||||
|
||||
TEST(ClaudeCacheControlTest, ToolsLastEntryGetsCacheControl)
|
||||
{
|
||||
QJsonObject request;
|
||||
request["tools"] = QJsonArray{
|
||||
QJsonObject{{"name", "read_file"}},
|
||||
QJsonObject{{"name", "edit_file"}},
|
||||
QJsonObject{{"name", "search"}}};
|
||||
|
||||
apply(request, true);
|
||||
|
||||
const QJsonArray tools = request.value("tools").toArray();
|
||||
ASSERT_EQ(tools.size(), 3);
|
||||
EXPECT_FALSE(tools[0].toObject().contains("cache_control"));
|
||||
EXPECT_FALSE(tools[1].toObject().contains("cache_control"));
|
||||
EXPECT_EQ(tools[2].toObject().value("cache_control").toObject(), expectedEphemeral(true));
|
||||
}
|
||||
|
||||
TEST(ClaudeCacheControlTest, SingleMessageHistorySkipped)
|
||||
{
|
||||
QJsonObject request;
|
||||
request["messages"]
|
||||
= QJsonArray{QJsonObject{{"role", "user"}, {"content", "first message"}}};
|
||||
|
||||
apply(request, false);
|
||||
|
||||
const QJsonArray msgs = request.value("messages").toArray();
|
||||
ASSERT_EQ(msgs.size(), 1);
|
||||
EXPECT_TRUE(msgs[0].toObject().value("content").isString());
|
||||
}
|
||||
|
||||
TEST(ClaudeCacheControlTest, HistoryBreakpointOnSecondToLastMessage)
|
||||
{
|
||||
QJsonObject request;
|
||||
request["messages"] = QJsonArray{
|
||||
QJsonObject{{"role", "user"}, {"content", "u1"}},
|
||||
QJsonObject{{"role", "assistant"}, {"content", "a1"}},
|
||||
QJsonObject{{"role", "user"}, {"content", "u2-current"}}};
|
||||
|
||||
apply(request, false);
|
||||
|
||||
const QJsonArray msgs = request.value("messages").toArray();
|
||||
ASSERT_EQ(msgs.size(), 3);
|
||||
|
||||
EXPECT_TRUE(msgs[0].toObject().value("content").isString());
|
||||
|
||||
const QJsonArray a1Content = msgs[1].toObject().value("content").toArray();
|
||||
ASSERT_EQ(a1Content.size(), 1);
|
||||
EXPECT_EQ(a1Content.first().toObject().value("text").toString(), "a1");
|
||||
EXPECT_EQ(
|
||||
a1Content.first().toObject().value("cache_control").toObject(),
|
||||
expectedEphemeral(false));
|
||||
|
||||
EXPECT_TRUE(msgs[2].toObject().value("content").isString());
|
||||
}
|
||||
|
||||
TEST(ClaudeCacheControlTest, HistoryArrayContentMarksLastBlock)
|
||||
{
|
||||
QJsonObject request;
|
||||
request["messages"] = QJsonArray{
|
||||
QJsonObject{
|
||||
{"role", "user"},
|
||||
{"content",
|
||||
QJsonArray{
|
||||
QJsonObject{{"type", "text"}, {"text", "describe this"}},
|
||||
QJsonObject{{"type", "image"}}}}},
|
||||
QJsonObject{{"role", "assistant"}, {"content", "ok"}}};
|
||||
|
||||
apply(request, false);
|
||||
|
||||
const QJsonArray msgs = request.value("messages").toArray();
|
||||
const QJsonArray content = msgs[0].toObject().value("content").toArray();
|
||||
ASSERT_EQ(content.size(), 2);
|
||||
EXPECT_FALSE(content[0].toObject().contains("cache_control"));
|
||||
EXPECT_EQ(content[1].toObject().value("cache_control").toObject(), expectedEphemeral(false));
|
||||
}
|
||||
|
||||
TEST(ClaudeCacheControlTest, NoSystemNoToolsNoMessagesIsNoop)
|
||||
{
|
||||
QJsonObject request;
|
||||
request["model"] = "claude-sonnet-4-5";
|
||||
request["max_tokens"] = 1024;
|
||||
|
||||
apply(request, false);
|
||||
|
||||
EXPECT_EQ(request.value("model").toString(), "claude-sonnet-4-5");
|
||||
EXPECT_EQ(request.value("max_tokens").toInt(), 1024);
|
||||
EXPECT_FALSE(request.contains("system"));
|
||||
EXPECT_FALSE(request.contains("tools"));
|
||||
EXPECT_FALSE(request.contains("messages"));
|
||||
}
|
||||
|
||||
TEST(ClaudeCacheControlTest, EmptyToolsArrayIsNoop)
|
||||
{
|
||||
QJsonObject request;
|
||||
request["tools"] = QJsonArray{};
|
||||
|
||||
apply(request, false);
|
||||
|
||||
EXPECT_TRUE(request.value("tools").isArray());
|
||||
EXPECT_TRUE(request.value("tools").toArray().isEmpty());
|
||||
}
|
||||
Reference in New Issue
Block a user