feat: Add caching Claude provider

This commit is contained in:
Petr Mironychev
2026-05-15 10:28:21 +02:00
parent 3b421f60af
commit 7704bffd88
11 changed files with 337 additions and 3 deletions

View File

@@ -552,6 +552,14 @@ int ChatModel::sessionCompletionTokens() const
return total;
}
int ChatModel::sessionCachedPromptTokens() const
{
int total = 0;
for (const auto &m : m_messages)
total += m.cachedPromptTokens;
return total;
}
int ChatModel::sessionTotalTokens() const
{
return sessionPromptTokens() + sessionCompletionTokens();

View File

@@ -19,6 +19,7 @@ class ChatModel : public QAbstractListModel
Q_OBJECT
Q_PROPERTY(int sessionPromptTokens READ sessionPromptTokens NOTIFY sessionUsageChanged FINAL)
Q_PROPERTY(int sessionCompletionTokens READ sessionCompletionTokens NOTIFY sessionUsageChanged FINAL)
Q_PROPERTY(int sessionCachedPromptTokens READ sessionCachedPromptTokens NOTIFY sessionUsageChanged FINAL)
Q_PROPERTY(int sessionTotalTokens READ sessionTotalTokens NOTIFY sessionUsageChanged FINAL)
QML_ELEMENT
@@ -110,6 +111,7 @@ public:
int sessionPromptTokens() const;
int sessionCompletionTokens() const;
int sessionCachedPromptTokens() const;
int sessionTotalTokens() const;
void setLoadingFromHistory(bool loading);

View File

@@ -91,10 +91,20 @@ ChatRootView {
loadButton.onClicked: root.showLoadDialog()
clearButton.onClicked: root.clearChat()
tokensBadge {
text: qsTr("next ~%1 · session ↑%2 ↓%3")
readonly property int sessionCached: root.chatModel.sessionCachedPromptTokens
text: sessionCached > 0
? qsTr("next ~%1 · session ↑%2 ↓%3 ↻%4")
.arg(root.inputTokensCount)
.arg(root.chatModel.sessionPromptTokens)
.arg(root.chatModel.sessionCompletionTokens)
.arg(sessionCached)
: qsTr("next ~%1 · session ↑%2 ↓%3")
.arg(root.inputTokensCount)
.arg(root.chatModel.sessionPromptTokens)
.arg(root.chatModel.sessionCompletionTokens)
ToolTip.text: sessionCached > 0
? qsTr("next request (estimate) · session prompt ↑ / completion ↓ / cached ↻ (provider cache hits)")
: qsTr("next request (estimate) · session prompt ↑ / completion ↓")
}
recentPath {
text: qsTr("Сhat name: %1").arg(root.chatFileName.length > 0 ? root.chatFileName : "Unsaved")

View File

@@ -0,0 +1,89 @@
// Copyright (C) 2024-2026 Petr Mironychev
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <QJsonArray>
#include <QJsonObject>
#include <QJsonValue>
#include <QString>
namespace QodeAssist::Providers::ClaudeCacheControl {
inline QJsonObject buildBreakpoint(bool extendedTtl)
{
QJsonObject cacheControl{{"type", "ephemeral"}};
if (extendedTtl)
cacheControl["ttl"] = "1h";
return cacheControl;
}
inline void markLastBlock(QJsonArray &blocks, const QJsonObject &cacheControl)
{
if (blocks.isEmpty())
return;
QJsonObject last = blocks.last().toObject();
last["cache_control"] = cacheControl;
blocks.replace(blocks.size() - 1, last);
}
inline void applyToSystem(QJsonObject &request, const QJsonObject &cacheControl)
{
if (!request.contains("system"))
return;
const QJsonValue sys = request.value("system");
if (sys.isString()) {
const QString text = sys.toString();
if (!text.isEmpty()) {
request["system"] = QJsonArray{QJsonObject{
{"type", "text"}, {"text", text}, {"cache_control", cacheControl}}};
}
} else if (sys.isArray()) {
QJsonArray blocks = sys.toArray();
markLastBlock(blocks, cacheControl);
request["system"] = blocks;
}
}
inline void applyToTools(QJsonObject &request, const QJsonObject &cacheControl)
{
if (!request.contains("tools"))
return;
QJsonArray tools = request.value("tools").toArray();
markLastBlock(tools, cacheControl);
request["tools"] = tools;
}
inline void applyToHistory(QJsonObject &request, const QJsonObject &cacheControl)
{
if (!request.contains("messages"))
return;
QJsonArray messages = request.value("messages").toArray();
if (messages.size() < 2)
return;
const int idx = messages.size() - 2;
QJsonObject msg = messages[idx].toObject();
const QJsonValue content = msg.value("content");
if (content.isString()) {
msg["content"] = QJsonArray{QJsonObject{
{"type", "text"}, {"text", content.toString()}, {"cache_control", cacheControl}}};
} else if (content.isArray()) {
QJsonArray blocks = content.toArray();
markLastBlock(blocks, cacheControl);
msg["content"] = blocks;
}
messages.replace(idx, msg);
request["messages"] = messages;
}
inline void apply(QJsonObject &request, bool extendedTtl)
{
const QJsonObject cacheControl = buildBreakpoint(extendedTtl);
applyToSystem(request, cacheControl);
applyToTools(request, cacheControl);
applyToHistory(request, cacheControl);
}
} // namespace QodeAssist::Providers::ClaudeCacheControl

View File

@@ -9,6 +9,7 @@
#include <LLMQore/ToolsManager.hpp>
#include "ClaudeCacheControl.hpp"
#include "logger/Logger.hpp"
#include "settings/ChatAssistantSettings.hpp"
#include "settings/CodeCompletionSettings.hpp"
@@ -104,6 +105,14 @@ void ClaudeProvider::prepareRequest(
LOG_MESSAGE(QString("Added %1 tools to Claude request").arg(toolsDefinitions.size()));
}
}
const auto &ps = Settings::providerSettings();
const bool cachingOn = ps.claudeEnablePromptCaching()
&& type != PluginLLMCore::RequestType::CodeCompletion;
m_client->setUseExtendedCacheTTL(cachingOn && ps.claudeUseExtendedCacheTTL());
if (cachingOn) {
ClaudeCacheControl::apply(request, ps.claudeUseExtendedCacheTTL());
}
}
QFuture<QList<QString>> ClaudeProvider::getInstalledModels(const QString &baseUrl)

View File

@@ -53,6 +53,23 @@ ProviderSettings::ProviderSettings()
claudeApiKey.setDefaultValue("");
claudeApiKey.setAutoApply(true);
claudeEnablePromptCaching.setSettingsKey(Constants::CLAUDE_ENABLE_PROMPT_CACHING);
claudeEnablePromptCaching.setLabelText(Tr::tr("Enable prompt caching"));
claudeEnablePromptCaching.setToolTip(
Tr::tr("Marks the system prompt, tool definitions, and stable chat history with "
"cache_control so Anthropic caches the request prefix (5-minute TTL). "
"Reduces cost and latency on repeated turns."));
claudeEnablePromptCaching.setDefaultValue(false);
claudeEnablePromptCaching.setAutoApply(true);
claudeUseExtendedCacheTTL.setSettingsKey(Constants::CLAUDE_USE_EXTENDED_CACHE_TTL);
claudeUseExtendedCacheTTL.setLabelText(Tr::tr("Use 1h cache TTL (beta)"));
claudeUseExtendedCacheTTL.setToolTip(
Tr::tr("Requests Anthropic's 1-hour cache TTL instead of the default 5 minutes. "
"Sends the extended-cache-ttl-2025-04-11 beta header."));
claudeUseExtendedCacheTTL.setDefaultValue(false);
claudeUseExtendedCacheTTL.setAutoApply(true);
// OpenAI Settings
openAiApiKey.setSettingsKey(Constants::OPEN_AI_API_KEY);
openAiApiKey.setLabelText(Tr::tr("OpenAI API Key:"));
@@ -124,7 +141,9 @@ ProviderSettings::ProviderSettings()
Space{8},
Group{title(Tr::tr("OpenAI Compatible Settings")), Column{openAiCompatApiKey}},
Space{8},
Group{title(Tr::tr("Claude Settings")), Column{claudeApiKey}},
Group{
title(Tr::tr("Claude Settings")),
Column{claudeApiKey, claudeEnablePromptCaching, claudeUseExtendedCacheTTL}},
Space{8},
Group{title(Tr::tr("Mistral AI Settings")), Column{mistralAiApiKey, codestralApiKey}},
Space{8},
@@ -148,6 +167,12 @@ void ProviderSettings::setupConnections()
openAiCompatApiKey.writeSettings();
});
connect(&claudeApiKey, &ButtonAspect::changed, this, [this]() { claudeApiKey.writeSettings(); });
connect(&claudeEnablePromptCaching, &Utils::BoolAspect::changed, this, [this]() {
claudeEnablePromptCaching.writeSettings();
});
connect(&claudeUseExtendedCacheTTL, &Utils::BoolAspect::changed, this, [this]() {
claudeUseExtendedCacheTTL.writeSettings();
});
connect(&openAiApiKey, &ButtonAspect::changed, this, [this]() { openAiApiKey.writeSettings(); });
connect(&mistralAiApiKey, &ButtonAspect::changed, this, [this]() {
mistralAiApiKey.writeSettings();
@@ -179,6 +204,8 @@ void ProviderSettings::resetSettingsToDefaults()
resetAspect(openRouterApiKey);
resetAspect(openAiCompatApiKey);
resetAspect(claudeApiKey);
resetAspect(claudeEnablePromptCaching);
resetAspect(claudeUseExtendedCacheTTL);
resetAspect(openAiApiKey);
resetAspect(mistralAiApiKey);
resetAspect(googleAiApiKey);

View File

@@ -20,6 +20,8 @@ public:
Utils::StringAspect openRouterApiKey{this};
Utils::StringAspect openAiCompatApiKey{this};
Utils::StringAspect claudeApiKey{this};
Utils::BoolAspect claudeEnablePromptCaching{this};
Utils::BoolAspect claudeUseExtendedCacheTTL{this};
Utils::StringAspect openAiApiKey{this};
Utils::StringAspect mistralAiApiKey{this};
Utils::StringAspect codestralApiKey{this};

View File

@@ -149,6 +149,9 @@ const char OLLAMA_BASIC_AUTH_API_KEY_HISTORY[] = "QodeAssist.ollamaBasicAuthApiK
const char LLAMA_CPP_API_KEY[] = "QodeAssist.llamaCppApiKey";
const char LLAMA_CPP_API_KEY_HISTORY[] = "QodeAssist.llamaCppApiKeyHistory";
const char CLAUDE_ENABLE_PROMPT_CACHING[] = "QodeAssist.claudeEnablePromptCaching";
const char CLAUDE_USE_EXTENDED_CACHE_TTL[] = "QodeAssist.claudeUseExtendedCacheTTL";
// context settings
const char CC_READ_FULL_FILE[] = "QodeAssist.ccReadFullFile";
const char CC_READ_STRINGS_BEFORE_CURSOR[] = "QodeAssist.ccReadStringsBeforeCursor";

View File

@@ -3,6 +3,7 @@ add_executable(QodeAssistTest
../LLMClientInterface.cpp
../LLMSuggestion.cpp
CodeHandlerTest.cpp
ClaudeCacheControlTest.cpp
DocumentContextReaderTest.cpp
LLMSuggestionTest.cpp
# LLMClientInterfaceTests.cpp
@@ -21,6 +22,8 @@ target_link_libraries(QodeAssistTest PRIVATE
LLMQore
)
target_include_directories(QodeAssistTest PRIVATE ${CMAKE_SOURCE_DIR})
target_compile_definitions(QodeAssistTest PRIVATE CMAKE_CURRENT_SOURCE_DIR="${CMAKE_CURRENT_SOURCE_DIR}")
add_test(NAME QodeAssistTest COMMAND QodeAssistTest)

View File

@@ -0,0 +1,181 @@
// Copyright (C) 2024-2026 Petr Mironychev
// SPDX-License-Identifier: GPL-3.0-or-later
#include <gtest/gtest.h>
#include <QJsonArray>
#include <QJsonObject>
#include "providers/ClaudeCacheControl.hpp"
using namespace QodeAssist::Providers::ClaudeCacheControl;
namespace {
QJsonObject expectedEphemeral(bool extendedTtl)
{
QJsonObject obj{{"type", "ephemeral"}};
if (extendedTtl)
obj["ttl"] = "1h";
return obj;
}
} // namespace
TEST(ClaudeCacheControlTest, BreakpointWithoutExtendedTTL)
{
const QJsonObject cc = buildBreakpoint(false);
EXPECT_EQ(cc.value("type").toString(), "ephemeral");
EXPECT_FALSE(cc.contains("ttl"));
}
TEST(ClaudeCacheControlTest, BreakpointWithExtendedTTL)
{
const QJsonObject cc = buildBreakpoint(true);
EXPECT_EQ(cc.value("type").toString(), "ephemeral");
EXPECT_EQ(cc.value("ttl").toString(), "1h");
}
TEST(ClaudeCacheControlTest, SystemAsStringWrappedIntoArray)
{
QJsonObject request;
request["system"] = "you are a helpful agent";
apply(request, false);
ASSERT_TRUE(request.value("system").isArray());
const QJsonArray sys = request.value("system").toArray();
ASSERT_EQ(sys.size(), 1);
const QJsonObject block = sys.first().toObject();
EXPECT_EQ(block.value("type").toString(), "text");
EXPECT_EQ(block.value("text").toString(), "you are a helpful agent");
EXPECT_EQ(block.value("cache_control").toObject(), expectedEphemeral(false));
}
TEST(ClaudeCacheControlTest, EmptySystemStringIsNotWrapped)
{
QJsonObject request;
request["system"] = "";
apply(request, false);
EXPECT_TRUE(request.value("system").isString());
}
TEST(ClaudeCacheControlTest, SystemAsArrayMarksLastBlock)
{
QJsonObject request;
request["system"] = QJsonArray{
QJsonObject{{"type", "text"}, {"text", "a"}},
QJsonObject{{"type", "text"}, {"text", "b"}}};
apply(request, false);
const QJsonArray sys = request.value("system").toArray();
ASSERT_EQ(sys.size(), 2);
EXPECT_FALSE(sys[0].toObject().contains("cache_control"));
EXPECT_EQ(sys[1].toObject().value("cache_control").toObject(), expectedEphemeral(false));
}
TEST(ClaudeCacheControlTest, ToolsLastEntryGetsCacheControl)
{
QJsonObject request;
request["tools"] = QJsonArray{
QJsonObject{{"name", "read_file"}},
QJsonObject{{"name", "edit_file"}},
QJsonObject{{"name", "search"}}};
apply(request, true);
const QJsonArray tools = request.value("tools").toArray();
ASSERT_EQ(tools.size(), 3);
EXPECT_FALSE(tools[0].toObject().contains("cache_control"));
EXPECT_FALSE(tools[1].toObject().contains("cache_control"));
EXPECT_EQ(tools[2].toObject().value("cache_control").toObject(), expectedEphemeral(true));
}
TEST(ClaudeCacheControlTest, SingleMessageHistorySkipped)
{
QJsonObject request;
request["messages"]
= QJsonArray{QJsonObject{{"role", "user"}, {"content", "first message"}}};
apply(request, false);
const QJsonArray msgs = request.value("messages").toArray();
ASSERT_EQ(msgs.size(), 1);
EXPECT_TRUE(msgs[0].toObject().value("content").isString());
}
TEST(ClaudeCacheControlTest, HistoryBreakpointOnSecondToLastMessage)
{
QJsonObject request;
request["messages"] = QJsonArray{
QJsonObject{{"role", "user"}, {"content", "u1"}},
QJsonObject{{"role", "assistant"}, {"content", "a1"}},
QJsonObject{{"role", "user"}, {"content", "u2-current"}}};
apply(request, false);
const QJsonArray msgs = request.value("messages").toArray();
ASSERT_EQ(msgs.size(), 3);
EXPECT_TRUE(msgs[0].toObject().value("content").isString());
const QJsonArray a1Content = msgs[1].toObject().value("content").toArray();
ASSERT_EQ(a1Content.size(), 1);
EXPECT_EQ(a1Content.first().toObject().value("text").toString(), "a1");
EXPECT_EQ(
a1Content.first().toObject().value("cache_control").toObject(),
expectedEphemeral(false));
EXPECT_TRUE(msgs[2].toObject().value("content").isString());
}
TEST(ClaudeCacheControlTest, HistoryArrayContentMarksLastBlock)
{
QJsonObject request;
request["messages"] = QJsonArray{
QJsonObject{
{"role", "user"},
{"content",
QJsonArray{
QJsonObject{{"type", "text"}, {"text", "describe this"}},
QJsonObject{{"type", "image"}}}}},
QJsonObject{{"role", "assistant"}, {"content", "ok"}}};
apply(request, false);
const QJsonArray msgs = request.value("messages").toArray();
const QJsonArray content = msgs[0].toObject().value("content").toArray();
ASSERT_EQ(content.size(), 2);
EXPECT_FALSE(content[0].toObject().contains("cache_control"));
EXPECT_EQ(content[1].toObject().value("cache_control").toObject(), expectedEphemeral(false));
}
TEST(ClaudeCacheControlTest, NoSystemNoToolsNoMessagesIsNoop)
{
QJsonObject request;
request["model"] = "claude-sonnet-4-5";
request["max_tokens"] = 1024;
apply(request, false);
EXPECT_EQ(request.value("model").toString(), "claude-sonnet-4-5");
EXPECT_EQ(request.value("max_tokens").toInt(), 1024);
EXPECT_FALSE(request.contains("system"));
EXPECT_FALSE(request.contains("tools"));
EXPECT_FALSE(request.contains("messages"));
}
TEST(ClaudeCacheControlTest, EmptyToolsArrayIsNoop)
{
QJsonObject request;
request["tools"] = QJsonArray{};
apply(request, false);
EXPECT_TRUE(request.value("tools").isArray());
EXPECT_TRUE(request.value("tools").toArray().isEmpty());
}