From 7704bffd88394021409687fedd295003f9d78a93 Mon Sep 17 00:00:00 2001 From: Petr Mironychev <9195189+Palm1r@users.noreply.github.com> Date: Fri, 15 May 2026 10:28:21 +0200 Subject: [PATCH] feat: Add caching Claude provider --- ChatView/ChatModel.cpp | 8 ++ ChatView/ChatModel.hpp | 2 + ChatView/qml/RootItem.qml | 12 +- providers/ClaudeCacheControl.hpp | 89 +++++++++++++++ providers/ClaudeProvider.cpp | 9 ++ settings/ProviderSettings.cpp | 29 ++++- settings/ProviderSettings.hpp | 2 + settings/SettingsConstants.hpp | 3 + sources/external/llmqore | 2 +- test/CMakeLists.txt | 3 + test/ClaudeCacheControlTest.cpp | 181 +++++++++++++++++++++++++++++++ 11 files changed, 337 insertions(+), 3 deletions(-) create mode 100644 providers/ClaudeCacheControl.hpp create mode 100644 test/ClaudeCacheControlTest.cpp diff --git a/ChatView/ChatModel.cpp b/ChatView/ChatModel.cpp index 90619e0..ff2111d 100644 --- a/ChatView/ChatModel.cpp +++ b/ChatView/ChatModel.cpp @@ -552,6 +552,14 @@ int ChatModel::sessionCompletionTokens() const return total; } +int ChatModel::sessionCachedPromptTokens() const +{ + int total = 0; + for (const auto &m : m_messages) + total += m.cachedPromptTokens; + return total; +} + int ChatModel::sessionTotalTokens() const { return sessionPromptTokens() + sessionCompletionTokens(); diff --git a/ChatView/ChatModel.hpp b/ChatView/ChatModel.hpp index 3f5a56b..b3f7c73 100644 --- a/ChatView/ChatModel.hpp +++ b/ChatView/ChatModel.hpp @@ -19,6 +19,7 @@ class ChatModel : public QAbstractListModel Q_OBJECT Q_PROPERTY(int sessionPromptTokens READ sessionPromptTokens NOTIFY sessionUsageChanged FINAL) Q_PROPERTY(int sessionCompletionTokens READ sessionCompletionTokens NOTIFY sessionUsageChanged FINAL) + Q_PROPERTY(int sessionCachedPromptTokens READ sessionCachedPromptTokens NOTIFY sessionUsageChanged FINAL) Q_PROPERTY(int sessionTotalTokens READ sessionTotalTokens NOTIFY sessionUsageChanged FINAL) QML_ELEMENT @@ -110,6 +111,7 @@ public: int sessionPromptTokens() const; int sessionCompletionTokens() const; + int sessionCachedPromptTokens() const; int sessionTotalTokens() const; void setLoadingFromHistory(bool loading); diff --git a/ChatView/qml/RootItem.qml b/ChatView/qml/RootItem.qml index 8f9f51d..490d5e4 100644 --- a/ChatView/qml/RootItem.qml +++ b/ChatView/qml/RootItem.qml @@ -91,10 +91,20 @@ ChatRootView { loadButton.onClicked: root.showLoadDialog() clearButton.onClicked: root.clearChat() tokensBadge { - text: qsTr("next ~%1 · session ↑%2 ↓%3") + readonly property int sessionCached: root.chatModel.sessionCachedPromptTokens + text: sessionCached > 0 + ? qsTr("next ~%1 · session ↑%2 ↓%3 ↻%4") .arg(root.inputTokensCount) .arg(root.chatModel.sessionPromptTokens) .arg(root.chatModel.sessionCompletionTokens) + .arg(sessionCached) + : qsTr("next ~%1 · session ↑%2 ↓%3") + .arg(root.inputTokensCount) + .arg(root.chatModel.sessionPromptTokens) + .arg(root.chatModel.sessionCompletionTokens) + ToolTip.text: sessionCached > 0 + ? qsTr("next request (estimate) · session prompt ↑ / completion ↓ / cached ↻ (provider cache hits)") + : qsTr("next request (estimate) · session prompt ↑ / completion ↓") } recentPath { text: qsTr("Сhat name: %1").arg(root.chatFileName.length > 0 ? root.chatFileName : "Unsaved") diff --git a/providers/ClaudeCacheControl.hpp b/providers/ClaudeCacheControl.hpp new file mode 100644 index 0000000..35563bc --- /dev/null +++ b/providers/ClaudeCacheControl.hpp @@ -0,0 +1,89 @@ +// Copyright (C) 2024-2026 Petr Mironychev +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include +#include +#include + +namespace QodeAssist::Providers::ClaudeCacheControl { + +inline QJsonObject buildBreakpoint(bool extendedTtl) +{ + QJsonObject cacheControl{{"type", "ephemeral"}}; + if (extendedTtl) + cacheControl["ttl"] = "1h"; + return cacheControl; +} + +inline void markLastBlock(QJsonArray &blocks, const QJsonObject &cacheControl) +{ + if (blocks.isEmpty()) + return; + QJsonObject last = blocks.last().toObject(); + last["cache_control"] = cacheControl; + blocks.replace(blocks.size() - 1, last); +} + +inline void applyToSystem(QJsonObject &request, const QJsonObject &cacheControl) +{ + if (!request.contains("system")) + return; + + const QJsonValue sys = request.value("system"); + if (sys.isString()) { + const QString text = sys.toString(); + if (!text.isEmpty()) { + request["system"] = QJsonArray{QJsonObject{ + {"type", "text"}, {"text", text}, {"cache_control", cacheControl}}}; + } + } else if (sys.isArray()) { + QJsonArray blocks = sys.toArray(); + markLastBlock(blocks, cacheControl); + request["system"] = blocks; + } +} + +inline void applyToTools(QJsonObject &request, const QJsonObject &cacheControl) +{ + if (!request.contains("tools")) + return; + QJsonArray tools = request.value("tools").toArray(); + markLastBlock(tools, cacheControl); + request["tools"] = tools; +} + +inline void applyToHistory(QJsonObject &request, const QJsonObject &cacheControl) +{ + if (!request.contains("messages")) + return; + QJsonArray messages = request.value("messages").toArray(); + if (messages.size() < 2) + return; + + const int idx = messages.size() - 2; + QJsonObject msg = messages[idx].toObject(); + const QJsonValue content = msg.value("content"); + if (content.isString()) { + msg["content"] = QJsonArray{QJsonObject{ + {"type", "text"}, {"text", content.toString()}, {"cache_control", cacheControl}}}; + } else if (content.isArray()) { + QJsonArray blocks = content.toArray(); + markLastBlock(blocks, cacheControl); + msg["content"] = blocks; + } + messages.replace(idx, msg); + request["messages"] = messages; +} + +inline void apply(QJsonObject &request, bool extendedTtl) +{ + const QJsonObject cacheControl = buildBreakpoint(extendedTtl); + applyToSystem(request, cacheControl); + applyToTools(request, cacheControl); + applyToHistory(request, cacheControl); +} + +} // namespace QodeAssist::Providers::ClaudeCacheControl diff --git a/providers/ClaudeProvider.cpp b/providers/ClaudeProvider.cpp index 8befc10..cb3262e 100644 --- a/providers/ClaudeProvider.cpp +++ b/providers/ClaudeProvider.cpp @@ -9,6 +9,7 @@ #include +#include "ClaudeCacheControl.hpp" #include "logger/Logger.hpp" #include "settings/ChatAssistantSettings.hpp" #include "settings/CodeCompletionSettings.hpp" @@ -104,6 +105,14 @@ void ClaudeProvider::prepareRequest( LOG_MESSAGE(QString("Added %1 tools to Claude request").arg(toolsDefinitions.size())); } } + + const auto &ps = Settings::providerSettings(); + const bool cachingOn = ps.claudeEnablePromptCaching() + && type != PluginLLMCore::RequestType::CodeCompletion; + m_client->setUseExtendedCacheTTL(cachingOn && ps.claudeUseExtendedCacheTTL()); + if (cachingOn) { + ClaudeCacheControl::apply(request, ps.claudeUseExtendedCacheTTL()); + } } QFuture> ClaudeProvider::getInstalledModels(const QString &baseUrl) diff --git a/settings/ProviderSettings.cpp b/settings/ProviderSettings.cpp index 7229f5c..9ecc224 100644 --- a/settings/ProviderSettings.cpp +++ b/settings/ProviderSettings.cpp @@ -53,6 +53,23 @@ ProviderSettings::ProviderSettings() claudeApiKey.setDefaultValue(""); claudeApiKey.setAutoApply(true); + claudeEnablePromptCaching.setSettingsKey(Constants::CLAUDE_ENABLE_PROMPT_CACHING); + claudeEnablePromptCaching.setLabelText(Tr::tr("Enable prompt caching")); + claudeEnablePromptCaching.setToolTip( + Tr::tr("Marks the system prompt, tool definitions, and stable chat history with " + "cache_control so Anthropic caches the request prefix (5-minute TTL). " + "Reduces cost and latency on repeated turns.")); + claudeEnablePromptCaching.setDefaultValue(false); + claudeEnablePromptCaching.setAutoApply(true); + + claudeUseExtendedCacheTTL.setSettingsKey(Constants::CLAUDE_USE_EXTENDED_CACHE_TTL); + claudeUseExtendedCacheTTL.setLabelText(Tr::tr("Use 1h cache TTL (beta)")); + claudeUseExtendedCacheTTL.setToolTip( + Tr::tr("Requests Anthropic's 1-hour cache TTL instead of the default 5 minutes. " + "Sends the extended-cache-ttl-2025-04-11 beta header.")); + claudeUseExtendedCacheTTL.setDefaultValue(false); + claudeUseExtendedCacheTTL.setAutoApply(true); + // OpenAI Settings openAiApiKey.setSettingsKey(Constants::OPEN_AI_API_KEY); openAiApiKey.setLabelText(Tr::tr("OpenAI API Key:")); @@ -124,7 +141,9 @@ ProviderSettings::ProviderSettings() Space{8}, Group{title(Tr::tr("OpenAI Compatible Settings")), Column{openAiCompatApiKey}}, Space{8}, - Group{title(Tr::tr("Claude Settings")), Column{claudeApiKey}}, + Group{ + title(Tr::tr("Claude Settings")), + Column{claudeApiKey, claudeEnablePromptCaching, claudeUseExtendedCacheTTL}}, Space{8}, Group{title(Tr::tr("Mistral AI Settings")), Column{mistralAiApiKey, codestralApiKey}}, Space{8}, @@ -148,6 +167,12 @@ void ProviderSettings::setupConnections() openAiCompatApiKey.writeSettings(); }); connect(&claudeApiKey, &ButtonAspect::changed, this, [this]() { claudeApiKey.writeSettings(); }); + connect(&claudeEnablePromptCaching, &Utils::BoolAspect::changed, this, [this]() { + claudeEnablePromptCaching.writeSettings(); + }); + connect(&claudeUseExtendedCacheTTL, &Utils::BoolAspect::changed, this, [this]() { + claudeUseExtendedCacheTTL.writeSettings(); + }); connect(&openAiApiKey, &ButtonAspect::changed, this, [this]() { openAiApiKey.writeSettings(); }); connect(&mistralAiApiKey, &ButtonAspect::changed, this, [this]() { mistralAiApiKey.writeSettings(); @@ -179,6 +204,8 @@ void ProviderSettings::resetSettingsToDefaults() resetAspect(openRouterApiKey); resetAspect(openAiCompatApiKey); resetAspect(claudeApiKey); + resetAspect(claudeEnablePromptCaching); + resetAspect(claudeUseExtendedCacheTTL); resetAspect(openAiApiKey); resetAspect(mistralAiApiKey); resetAspect(googleAiApiKey); diff --git a/settings/ProviderSettings.hpp b/settings/ProviderSettings.hpp index 3eff7e5..d2ed259 100644 --- a/settings/ProviderSettings.hpp +++ b/settings/ProviderSettings.hpp @@ -20,6 +20,8 @@ public: Utils::StringAspect openRouterApiKey{this}; Utils::StringAspect openAiCompatApiKey{this}; Utils::StringAspect claudeApiKey{this}; + Utils::BoolAspect claudeEnablePromptCaching{this}; + Utils::BoolAspect claudeUseExtendedCacheTTL{this}; Utils::StringAspect openAiApiKey{this}; Utils::StringAspect mistralAiApiKey{this}; Utils::StringAspect codestralApiKey{this}; diff --git a/settings/SettingsConstants.hpp b/settings/SettingsConstants.hpp index baf8798..40a4e8e 100644 --- a/settings/SettingsConstants.hpp +++ b/settings/SettingsConstants.hpp @@ -149,6 +149,9 @@ const char OLLAMA_BASIC_AUTH_API_KEY_HISTORY[] = "QodeAssist.ollamaBasicAuthApiK const char LLAMA_CPP_API_KEY[] = "QodeAssist.llamaCppApiKey"; const char LLAMA_CPP_API_KEY_HISTORY[] = "QodeAssist.llamaCppApiKeyHistory"; +const char CLAUDE_ENABLE_PROMPT_CACHING[] = "QodeAssist.claudeEnablePromptCaching"; +const char CLAUDE_USE_EXTENDED_CACHE_TTL[] = "QodeAssist.claudeUseExtendedCacheTTL"; + // context settings const char CC_READ_FULL_FILE[] = "QodeAssist.ccReadFullFile"; const char CC_READ_STRINGS_BEFORE_CURSOR[] = "QodeAssist.ccReadStringsBeforeCursor"; diff --git a/sources/external/llmqore b/sources/external/llmqore index 0c9fc8b..6b62a2d 160000 --- a/sources/external/llmqore +++ b/sources/external/llmqore @@ -1 +1 @@ -Subproject commit 0c9fc8bd7cb52946328d29f02c804c7872ba06b1 +Subproject commit 6b62a2d57c0e22952d15e783f341a5d895258dd6 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2d772a9..3659261 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -3,6 +3,7 @@ add_executable(QodeAssistTest ../LLMClientInterface.cpp ../LLMSuggestion.cpp CodeHandlerTest.cpp + ClaudeCacheControlTest.cpp DocumentContextReaderTest.cpp LLMSuggestionTest.cpp # LLMClientInterfaceTests.cpp @@ -21,6 +22,8 @@ target_link_libraries(QodeAssistTest PRIVATE LLMQore ) +target_include_directories(QodeAssistTest PRIVATE ${CMAKE_SOURCE_DIR}) + target_compile_definitions(QodeAssistTest PRIVATE CMAKE_CURRENT_SOURCE_DIR="${CMAKE_CURRENT_SOURCE_DIR}") add_test(NAME QodeAssistTest COMMAND QodeAssistTest) diff --git a/test/ClaudeCacheControlTest.cpp b/test/ClaudeCacheControlTest.cpp new file mode 100644 index 0000000..00698e8 --- /dev/null +++ b/test/ClaudeCacheControlTest.cpp @@ -0,0 +1,181 @@ +// Copyright (C) 2024-2026 Petr Mironychev +// SPDX-License-Identifier: GPL-3.0-or-later + +#include + +#include +#include + +#include "providers/ClaudeCacheControl.hpp" + +using namespace QodeAssist::Providers::ClaudeCacheControl; + +namespace { + +QJsonObject expectedEphemeral(bool extendedTtl) +{ + QJsonObject obj{{"type", "ephemeral"}}; + if (extendedTtl) + obj["ttl"] = "1h"; + return obj; +} + +} // namespace + +TEST(ClaudeCacheControlTest, BreakpointWithoutExtendedTTL) +{ + const QJsonObject cc = buildBreakpoint(false); + EXPECT_EQ(cc.value("type").toString(), "ephemeral"); + EXPECT_FALSE(cc.contains("ttl")); +} + +TEST(ClaudeCacheControlTest, BreakpointWithExtendedTTL) +{ + const QJsonObject cc = buildBreakpoint(true); + EXPECT_EQ(cc.value("type").toString(), "ephemeral"); + EXPECT_EQ(cc.value("ttl").toString(), "1h"); +} + +TEST(ClaudeCacheControlTest, SystemAsStringWrappedIntoArray) +{ + QJsonObject request; + request["system"] = "you are a helpful agent"; + + apply(request, false); + + ASSERT_TRUE(request.value("system").isArray()); + const QJsonArray sys = request.value("system").toArray(); + ASSERT_EQ(sys.size(), 1); + + const QJsonObject block = sys.first().toObject(); + EXPECT_EQ(block.value("type").toString(), "text"); + EXPECT_EQ(block.value("text").toString(), "you are a helpful agent"); + EXPECT_EQ(block.value("cache_control").toObject(), expectedEphemeral(false)); +} + +TEST(ClaudeCacheControlTest, EmptySystemStringIsNotWrapped) +{ + QJsonObject request; + request["system"] = ""; + + apply(request, false); + + EXPECT_TRUE(request.value("system").isString()); +} + +TEST(ClaudeCacheControlTest, SystemAsArrayMarksLastBlock) +{ + QJsonObject request; + request["system"] = QJsonArray{ + QJsonObject{{"type", "text"}, {"text", "a"}}, + QJsonObject{{"type", "text"}, {"text", "b"}}}; + + apply(request, false); + + const QJsonArray sys = request.value("system").toArray(); + ASSERT_EQ(sys.size(), 2); + EXPECT_FALSE(sys[0].toObject().contains("cache_control")); + EXPECT_EQ(sys[1].toObject().value("cache_control").toObject(), expectedEphemeral(false)); +} + +TEST(ClaudeCacheControlTest, ToolsLastEntryGetsCacheControl) +{ + QJsonObject request; + request["tools"] = QJsonArray{ + QJsonObject{{"name", "read_file"}}, + QJsonObject{{"name", "edit_file"}}, + QJsonObject{{"name", "search"}}}; + + apply(request, true); + + const QJsonArray tools = request.value("tools").toArray(); + ASSERT_EQ(tools.size(), 3); + EXPECT_FALSE(tools[0].toObject().contains("cache_control")); + EXPECT_FALSE(tools[1].toObject().contains("cache_control")); + EXPECT_EQ(tools[2].toObject().value("cache_control").toObject(), expectedEphemeral(true)); +} + +TEST(ClaudeCacheControlTest, SingleMessageHistorySkipped) +{ + QJsonObject request; + request["messages"] + = QJsonArray{QJsonObject{{"role", "user"}, {"content", "first message"}}}; + + apply(request, false); + + const QJsonArray msgs = request.value("messages").toArray(); + ASSERT_EQ(msgs.size(), 1); + EXPECT_TRUE(msgs[0].toObject().value("content").isString()); +} + +TEST(ClaudeCacheControlTest, HistoryBreakpointOnSecondToLastMessage) +{ + QJsonObject request; + request["messages"] = QJsonArray{ + QJsonObject{{"role", "user"}, {"content", "u1"}}, + QJsonObject{{"role", "assistant"}, {"content", "a1"}}, + QJsonObject{{"role", "user"}, {"content", "u2-current"}}}; + + apply(request, false); + + const QJsonArray msgs = request.value("messages").toArray(); + ASSERT_EQ(msgs.size(), 3); + + EXPECT_TRUE(msgs[0].toObject().value("content").isString()); + + const QJsonArray a1Content = msgs[1].toObject().value("content").toArray(); + ASSERT_EQ(a1Content.size(), 1); + EXPECT_EQ(a1Content.first().toObject().value("text").toString(), "a1"); + EXPECT_EQ( + a1Content.first().toObject().value("cache_control").toObject(), + expectedEphemeral(false)); + + EXPECT_TRUE(msgs[2].toObject().value("content").isString()); +} + +TEST(ClaudeCacheControlTest, HistoryArrayContentMarksLastBlock) +{ + QJsonObject request; + request["messages"] = QJsonArray{ + QJsonObject{ + {"role", "user"}, + {"content", + QJsonArray{ + QJsonObject{{"type", "text"}, {"text", "describe this"}}, + QJsonObject{{"type", "image"}}}}}, + QJsonObject{{"role", "assistant"}, {"content", "ok"}}}; + + apply(request, false); + + const QJsonArray msgs = request.value("messages").toArray(); + const QJsonArray content = msgs[0].toObject().value("content").toArray(); + ASSERT_EQ(content.size(), 2); + EXPECT_FALSE(content[0].toObject().contains("cache_control")); + EXPECT_EQ(content[1].toObject().value("cache_control").toObject(), expectedEphemeral(false)); +} + +TEST(ClaudeCacheControlTest, NoSystemNoToolsNoMessagesIsNoop) +{ + QJsonObject request; + request["model"] = "claude-sonnet-4-5"; + request["max_tokens"] = 1024; + + apply(request, false); + + EXPECT_EQ(request.value("model").toString(), "claude-sonnet-4-5"); + EXPECT_EQ(request.value("max_tokens").toInt(), 1024); + EXPECT_FALSE(request.contains("system")); + EXPECT_FALSE(request.contains("tools")); + EXPECT_FALSE(request.contains("messages")); +} + +TEST(ClaudeCacheControlTest, EmptyToolsArrayIsNoop) +{ + QJsonObject request; + request["tools"] = QJsonArray{}; + + apply(request, false); + + EXPECT_TRUE(request.value("tools").isArray()); + EXPECT_TRUE(request.value("tools").toArray().isEmpty()); +}