refactor: Update token usage api (#347)

* refactor: Improve token usage api
* refactor: Image recognition to tokens
This commit is contained in:
Petr Mironychev
2026-05-14 21:19:12 +02:00
committed by GitHub
parent 86f4635080
commit 3b421f60af
20 changed files with 518 additions and 60 deletions

View File

@@ -11,7 +11,6 @@
#include <QUrl>
#include <QtQml>
#include "ChatAssistantSettings.hpp"
#include "Logger.hpp"
#include "context/ChangesManager.h"
@@ -20,14 +19,6 @@ namespace QodeAssist::Chat {
ChatModel::ChatModel(QObject *parent)
: QAbstractListModel(parent)
{
auto &settings = Settings::chatAssistantSettings();
connect(
&settings.chatTokensThreshold,
&Utils::BaseAspect::changed,
this,
&ChatModel::tokensThresholdChanged);
connect(&Context::ChangesManager::instance(),
&Context::ChangesManager::fileEditApplied,
this,
@@ -86,6 +77,16 @@ QVariant ChatModel::data(const QModelIndex &index, int role) const
case Roles::IsRedacted: {
return message.isRedacted;
}
case Roles::PromptTokens:
return message.promptTokens;
case Roles::CompletionTokens:
return message.completionTokens;
case Roles::CachedPromptTokens:
return message.cachedPromptTokens;
case Roles::ReasoningTokens:
return message.reasoningTokens;
case Roles::TotalTokens:
return message.promptTokens + message.completionTokens;
case Roles::Images: {
QVariantList imagesList;
for (const auto &image : message.images) {
@@ -124,6 +125,11 @@ QHash<int, QByteArray> ChatModel::roleNames() const
roles[Roles::Attachments] = "attachments";
roles[Roles::IsRedacted] = "isRedacted";
roles[Roles::Images] = "images";
roles[Roles::PromptTokens] = "promptTokens";
roles[Roles::CompletionTokens] = "completionTokens";
roles[Roles::CachedPromptTokens] = "cachedPromptTokens";
roles[Roles::ReasoningTokens] = "reasoningTokens";
roles[Roles::TotalTokens] = "totalTokens";
return roles;
}
@@ -207,6 +213,7 @@ void ChatModel::clear()
m_messages.clear();
endResetModel();
emit modelReseted();
emit sessionUsageChanged();
}
QList<MessagePart> ChatModel::processMessageContent(const QString &content) const
@@ -310,12 +317,6 @@ QJsonArray ChatModel::prepareMessagesForRequest(const QString &systemPrompt) con
return messages;
}
int ChatModel::tokensThreshold() const
{
auto &settings = Settings::chatAssistantSettings();
return settings.chatTokensThreshold();
}
QString ChatModel::lastMessageId() const
{
return !m_messages.isEmpty() ? m_messages.last().id : "";
@@ -330,6 +331,7 @@ void ChatModel::resetModelTo(int index)
beginRemoveRows(QModelIndex(), index, m_messages.size() - 1);
m_messages.remove(index, m_messages.size() - index);
endRemoveRows();
emit sessionUsageChanged();
}
}
@@ -507,6 +509,54 @@ void ChatModel::updateMessageContent(const QString &messageId, const QString &ne
}
}
void ChatModel::setMessageUsage(
const QString &messageId,
int promptTokens,
int completionTokens,
int cachedPromptTokens,
int reasoningTokens)
{
for (int i = 0; i < m_messages.size(); ++i) {
if (m_messages[i].id != messageId)
continue;
m_messages[i].promptTokens = promptTokens;
m_messages[i].completionTokens = completionTokens;
m_messages[i].cachedPromptTokens = cachedPromptTokens;
m_messages[i].reasoningTokens = reasoningTokens;
emit dataChanged(
index(i),
index(i),
{Roles::PromptTokens,
Roles::CompletionTokens,
Roles::CachedPromptTokens,
Roles::ReasoningTokens,
Roles::TotalTokens});
emit sessionUsageChanged();
return;
}
}
int ChatModel::sessionPromptTokens() const
{
int total = 0;
for (const auto &m : m_messages)
total += m.promptTokens;
return total;
}
int ChatModel::sessionCompletionTokens() const
{
int total = 0;
for (const auto &m : m_messages)
total += m.completionTokens;
return total;
}
int ChatModel::sessionTotalTokens() const
{
return sessionPromptTokens() + sessionCompletionTokens();
}
void ChatModel::setLoadingFromHistory(bool loading)
{
m_loadingFromHistory = loading;

View File

@@ -17,14 +17,27 @@ namespace QodeAssist::Chat {
class ChatModel : public QAbstractListModel
{
Q_OBJECT
Q_PROPERTY(int tokensThreshold READ tokensThreshold NOTIFY tokensThresholdChanged FINAL)
Q_PROPERTY(int sessionPromptTokens READ sessionPromptTokens NOTIFY sessionUsageChanged FINAL)
Q_PROPERTY(int sessionCompletionTokens READ sessionCompletionTokens NOTIFY sessionUsageChanged FINAL)
Q_PROPERTY(int sessionTotalTokens READ sessionTotalTokens NOTIFY sessionUsageChanged FINAL)
QML_ELEMENT
public:
enum ChatRole { System, User, Assistant, Tool, FileEdit, Thinking };
Q_ENUM(ChatRole)
enum Roles { RoleType = Qt::UserRole, Content, Attachments, IsRedacted, Images };
enum Roles {
RoleType = Qt::UserRole,
Content,
Attachments,
IsRedacted,
Images,
PromptTokens,
CompletionTokens,
CachedPromptTokens,
ReasoningTokens,
TotalTokens
};
Q_ENUM(Roles)
struct ImageAttachment
@@ -44,6 +57,11 @@ public:
QList<Context::ContentFile> attachments;
QList<ImageAttachment> images;
int promptTokens = 0;
int completionTokens = 0;
int cachedPromptTokens = 0;
int reasoningTokens = 0;
};
explicit ChatModel(QObject *parent = nullptr);
@@ -66,8 +84,6 @@ public:
QVector<Message> getChatHistory() const;
QJsonArray prepareMessagesForRequest(const QString &systemPrompt) const;
int tokensThreshold() const;
QString currentModel() const;
QString lastMessageId() const;
@@ -84,6 +100,17 @@ public:
const QString &requestId, const QString &thinking, const QString &signature);
void addRedactedThinkingBlock(const QString &requestId, const QString &signature);
void updateMessageContent(const QString &messageId, const QString &newContent);
void setMessageUsage(
const QString &messageId,
int promptTokens,
int completionTokens,
int cachedPromptTokens,
int reasoningTokens);
int sessionPromptTokens() const;
int sessionCompletionTokens() const;
int sessionTotalTokens() const;
void setLoadingFromHistory(bool loading);
bool isLoadingFromHistory() const;
@@ -92,8 +119,8 @@ public:
QString chatFilePath() const;
signals:
void tokensThresholdChanged();
void modelReseted();
void sessionUsageChanged();
private slots:
void onFileEditApplied(const QString &editId);

View File

@@ -3,7 +3,12 @@
#include "ChatRootView.hpp"
#include <algorithm>
#include <LLMQore/ToolsManager.hpp>
#include <QClipboard>
#include <QJsonArray>
#include <QJsonDocument>
#include <QDesktopServices>
#include <QDir>
#include <QFile>
@@ -31,7 +36,6 @@
#include "Logger.hpp"
#include "ProjectSettings.hpp"
#include "ProvidersManager.hpp"
#include "ToolsSettings.hpp"
#include "context/ChangesManager.h"
#include "context/ContextManager.hpp"
#include "context/TokenUtils.hpp"
@@ -107,6 +111,22 @@ ChatRootView::ChatRootView(QQuickItem *parent)
&Utils::BaseAspect::changed,
this,
&ChatRootView::updateInputTokensCount);
connect(this, &ChatRootView::useToolsChanged, this, &ChatRootView::updateInputTokensCount);
connect(
&Settings::chatAssistantSettings().enableChatTools,
&Utils::BaseAspect::changed,
this,
&ChatRootView::updateInputTokensCount);
rewireToolsChangedConnection();
connect(
&Settings::generalSettings().caProvider,
&Utils::BaseAspect::changed,
this,
[this]() {
rewireToolsChangedConnection();
updateInputTokensCount();
});
connect(
&Settings::chatAssistantSettings().systemPrompt,
&Utils::BaseAspect::changed,
@@ -171,6 +191,28 @@ ChatRootView::ChatRootView(QQuickItem *parent)
updateCurrentMessageEditsStats();
});
connect(
m_clientInterface,
&ClientInterface::messageUsageReceived,
this,
[this](int promptTokens, int /*completionTokens*/, int /*cached*/, int /*reasoning*/) {
if (promptTokens <= 0 || m_lastSentEstimate <= 0)
return;
const double rawFactor
= static_cast<double>(promptTokens) / static_cast<double>(m_lastSentEstimate);
const double clamped = std::clamp(rawFactor, 0.5, 3.0);
m_calibrationFactor = 0.5 * m_calibrationFactor + 0.5 * clamped;
LOG_MESSAGE(QString("Token calibration: server=%1 estimated=%2 ratio=%3 ema=%4")
.arg(promptTokens)
.arg(m_lastSentEstimate)
.arg(rawFactor, 0, 'f', 3)
.arg(m_calibrationFactor, 0, 'f', 3));
updateInputTokensCount();
});
connect(
&Context::ChangesManager::instance(),
&Context::ChangesManager::fileEditAdded,
@@ -247,7 +289,6 @@ ChatRootView::ChatRootView(QQuickItem *parent)
emit lastErrorMessageChanged();
});
// ChatCompressor signals
connect(m_chatCompressor, &ChatCompressor::compressionStarted, this, [this]() {
emit isCompressingChanged();
});
@@ -259,6 +300,12 @@ ChatRootView::ChatRootView(QQuickItem *parent)
emit compressionCompleted(compressedChatPath);
loadHistory(compressedChatPath);
if (m_pendingSend.active) {
PendingSend p = m_pendingSend;
m_pendingSend = {};
dispatchSend(p.message, p.attachments, p.linkedFiles, p.useTools, p.useThinking);
}
});
connect(m_chatCompressor, &ChatCompressor::compressionFailed, this, [this](const QString &error) {
@@ -266,6 +313,12 @@ ChatRootView::ChatRootView(QQuickItem *parent)
m_lastErrorMessage = error;
emit lastErrorMessageChanged();
emit compressionFailed(error);
if (m_pendingSend.active) {
PendingSend p = m_pendingSend;
m_pendingSend = {};
dispatchSend(p.message, p.attachments, p.linkedFiles, p.useTools, p.useThinking);
}
});
}
@@ -276,32 +329,72 @@ ChatModel *ChatRootView::chatModel() const
void ChatRootView::sendMessage(const QString &message)
{
if (m_inputTokensCount > m_chatModel->tokensThreshold()) {
QMessageBox::StandardButton reply = QMessageBox::question(
Core::ICore::dialogParent(),
tr("Token Limit Exceeded"),
tr("The chat history has exceeded the token limit.\n"
"Would you like to create new chat?"),
QMessageBox::Yes | QMessageBox::No);
const QStringList attachments = m_attachmentFiles;
const QStringList linkedFiles = m_linkedFiles;
const bool tools = useTools();
const bool thinking = useThinking();
if (reply == QMessageBox::Yes) {
autosave();
m_chatModel->clear();
setRecentFilePath(QString{});
return;
}
}
if (deferSendForAutoCompress(message, attachments, linkedFiles, tools, thinking))
return;
dispatchSend(message, attachments, linkedFiles, tools, thinking);
}
bool ChatRootView::deferSendForAutoCompress(
const QString &message,
const QStringList &attachments,
const QStringList &linkedFiles,
bool useToolsArg,
bool useThinkingArg)
{
auto &settings = Settings::chatAssistantSettings();
if (!settings.autoCompress())
return false;
const int threshold = settings.autoCompressThreshold();
if (m_inputTokensCount < threshold)
return false;
if (m_recentFilePath.isEmpty()) {
QString filePath = getAutosaveFilePath(message, m_attachmentFiles);
QString filePath = getAutosaveFilePath(message, attachments);
if (filePath.isEmpty())
return false;
setRecentFilePath(filePath);
LOG_MESSAGE(QString("Set chat file path for new chat (auto-compress): %1").arg(filePath));
}
if (m_chatCompressor->isCompressing() || m_pendingSend.active)
return false;
LOG_MESSAGE(QString("Auto-compress preempt: estimated next=%1 ≥ threshold=%2; deferring send")
.arg(m_inputTokensCount)
.arg(threshold));
m_pendingSend = {message, attachments, linkedFiles, useToolsArg, useThinkingArg, true};
compressCurrentChat();
return true;
}
void ChatRootView::dispatchSend(
const QString &message,
const QStringList &attachments,
const QStringList &linkedFiles,
bool useToolsArg,
bool useThinkingArg)
{
if (m_recentFilePath.isEmpty()) {
QString filePath = getAutosaveFilePath(message, attachments);
if (!filePath.isEmpty()) {
setRecentFilePath(filePath);
LOG_MESSAGE(QString("Set chat file path for new chat: %1").arg(filePath));
}
}
m_clientInterface
->sendMessage(message, m_attachmentFiles, m_linkedFiles, useTools(), useThinking());
m_lastSentEstimate = m_calibrationFactor > 0.0
? static_cast<int>(m_inputTokensCount / m_calibrationFactor)
: m_inputTokensCount;
m_clientInterface->sendMessage(message, attachments, linkedFiles, useToolsArg, useThinkingArg);
m_fileManager->clearIntermediateStorage();
clearAttachmentFiles();
@@ -392,7 +485,8 @@ void ChatRootView::loadHistory(const QString &filePath)
setRecentFilePath(filePath);
}
m_fileManager->clearIntermediateStorage();
if (!m_pendingSend.active)
m_fileManager->clearIntermediateStorage();
m_attachmentFiles.clear();
m_linkedFiles.clear();
emit attachmentFilesChanged();
@@ -747,6 +841,27 @@ void ChatRootView::openFileInEditor(const QString &filePath)
Core::EditorManager::openEditor(Utils::FilePath::fromString(filePath));
}
void ChatRootView::rewireToolsChangedConnection()
{
if (m_toolsChangedConn)
QObject::disconnect(m_toolsChangedConn);
m_toolsChangedConn = {};
const auto providerName = Settings::generalSettings().caProvider();
auto *provider = PluginLLMCore::ProvidersManager::instance().getProviderByName(providerName);
if (!provider)
return;
auto *tm = provider->toolsManager();
if (!tm)
return;
m_toolsChangedConn = connect(
tm,
&::LLMQore::ToolRegistry::toolsChanged,
this,
&ChatRootView::updateInputTokensCount);
}
void ChatRootView::updateInputTokensCount()
{
int inputTokens = m_messageTokensCount;
@@ -756,14 +871,33 @@ void ChatRootView::updateInputTokensCount()
inputTokens += Context::TokenUtils::estimateTokens(settings.systemPrompt());
}
const auto splitImageEstimate = [](const QStringList &paths, QStringList &textPaths) {
int imageTokens = 0;
for (const QString &p : paths) {
if (Context::TokenUtils::isImageFilePath(p))
imageTokens += Context::TokenUtils::estimateImageAttachmentTokens(p);
else
textPaths.append(p);
}
return imageTokens;
};
if (!m_attachmentFiles.isEmpty()) {
auto attachFiles = m_clientInterface->contextManager()->getContentFiles(m_attachmentFiles);
inputTokens += Context::TokenUtils::estimateFilesTokens(attachFiles);
QStringList textPaths;
inputTokens += splitImageEstimate(m_attachmentFiles, textPaths);
if (!textPaths.isEmpty()) {
auto attachFiles = m_clientInterface->contextManager()->getContentFiles(textPaths);
inputTokens += Context::TokenUtils::estimateFilesTokens(attachFiles);
}
}
if (!m_linkedFiles.isEmpty()) {
auto linkFiles = m_clientInterface->contextManager()->getContentFiles(m_linkedFiles);
inputTokens += Context::TokenUtils::estimateFilesTokens(linkFiles);
QStringList textPaths;
inputTokens += splitImageEstimate(m_linkedFiles, textPaths);
if (!textPaths.isEmpty()) {
auto linkFiles = m_clientInterface->contextManager()->getContentFiles(textPaths);
inputTokens += Context::TokenUtils::estimateFilesTokens(linkFiles);
}
}
const auto &history = m_chatModel->getChatHistory();
@@ -772,7 +906,22 @@ void ChatRootView::updateInputTokensCount()
inputTokens += 4; // + role
}
m_inputTokensCount = inputTokens;
if (useTools()) {
const auto providerName = Settings::generalSettings().caProvider();
if (auto *provider = PluginLLMCore::ProvidersManager::instance().getProviderByName(
providerName)) {
if (auto *tm = provider->toolsManager()) {
const QJsonArray toolDefs = tm->getToolsDefinitions();
if (!toolDefs.isEmpty()) {
const QByteArray serialized
= QJsonDocument(toolDefs).toJson(QJsonDocument::Compact);
inputTokens += static_cast<int>(serialized.size() / 4);
}
}
}
}
m_inputTokensCount = static_cast<int>(inputTokens * m_calibrationFactor);
emit inputTokensCountChanged();
}

View File

@@ -212,6 +212,21 @@ signals:
void openFilesChanged();
private:
void rewireToolsChangedConnection();
QMetaObject::Connection m_toolsChangedConn;
bool deferSendForAutoCompress(
const QString &message,
const QStringList &attachments,
const QStringList &linkedFiles,
bool useTools,
bool useThinking);
void dispatchSend(
const QString &message,
const QStringList &attachments,
const QStringList &linkedFiles,
bool useTools,
bool useThinking);
void updateFileEditStatus(const QString &editId, const QString &status);
QString getChatsHistoryDir() const;
QString getSuggestedFileName() const;
@@ -228,6 +243,18 @@ private:
QStringList m_linkedFiles;
int m_messageTokensCount{0};
int m_inputTokensCount{0};
int m_lastSentEstimate{0};
double m_calibrationFactor{1.0};
struct PendingSend {
QString message;
QStringList attachments;
QStringList linkedFiles;
bool useTools = false;
bool useThinking = false;
bool active = false;
};
PendingSend m_pendingSend;
bool m_isSyncOpenFiles;
QList<Core::IEditor *> m_currentEditors;
bool m_isRequestInProgress;

View File

@@ -103,6 +103,17 @@ QJsonObject ChatSerializer::serializeMessage(
messageObj["images"] = imagesArray;
}
if (message.promptTokens > 0 || message.completionTokens > 0) {
QJsonObject usageObj;
usageObj["promptTokens"] = message.promptTokens;
usageObj["completionTokens"] = message.completionTokens;
if (message.cachedPromptTokens > 0)
usageObj["cachedPromptTokens"] = message.cachedPromptTokens;
if (message.reasoningTokens > 0)
usageObj["reasoningTokens"] = message.reasoningTokens;
messageObj["usage"] = usageObj;
}
return messageObj;
}
@@ -139,6 +150,14 @@ ChatModel::Message ChatSerializer::deserializeMessage(
}
}
if (json.contains("usage")) {
const QJsonObject usageObj = json["usage"].toObject();
message.promptTokens = usageObj["promptTokens"].toInt();
message.completionTokens = usageObj["completionTokens"].toInt();
message.cachedPromptTokens = usageObj["cachedPromptTokens"].toInt();
message.reasoningTokens = usageObj["reasoningTokens"].toInt();
}
return message;
}

View File

@@ -257,6 +257,12 @@ void ClientInterface::sendMessage(
this,
&ClientInterface::handleFullResponse,
Qt::UniqueConnection);
connect(
provider->client(),
&::LLMQore::BaseClient::requestFinalized,
this,
&ClientInterface::handleRequestFinalized,
Qt::UniqueConnection);
connect(
provider->client(),
&::LLMQore::BaseClient::requestFailed,
@@ -449,6 +455,29 @@ void ClientInterface::handleFullResponse(const QString &requestId, const QString
m_awaitingContinuation.remove(requestId);
}
void ClientInterface::handleRequestFinalized(
const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info)
{
if (!m_activeRequests.contains(requestId))
return;
if (!info.usage)
return;
const auto &u = *info.usage;
m_chatModel->setMessageUsage(
requestId, u.promptTokens, u.completionTokens, u.cachedPromptTokens, u.reasoningTokens);
emit messageUsageReceived(
u.promptTokens, u.completionTokens, u.cachedPromptTokens, u.reasoningTokens);
LOG_MESSAGE(QString("Chat usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5")
.arg(requestId)
.arg(u.promptTokens)
.arg(u.completionTokens)
.arg(u.cachedPromptTokens)
.arg(u.reasoningTokens));
}
void ClientInterface::handleRequestFailed(const QString &requestId, const QString &error)
{
auto it = m_activeRequests.find(requestId);

View File

@@ -11,6 +11,7 @@
#include "ChatModel.hpp"
#include "Provider.hpp"
#include "pluginllmcore/IPromptProvider.hpp"
#include <LLMQore/BaseClient.hpp>
#include <context/ContextManager.hpp>
namespace QodeAssist::Chat {
@@ -42,10 +43,13 @@ signals:
void errorOccurred(const QString &error);
void messageReceivedCompletely();
void requestStarted(const QString &requestId);
void messageUsageReceived(
int promptTokens, int completionTokens, int cachedPromptTokens, int reasoningTokens);
private slots:
void handlePartialResponse(const QString &requestId, const QString &partialText);
void handleFullResponse(const QString &requestId, const QString &fullText);
void handleRequestFinalized(const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info);
void handleRequestFailed(const QString &requestId, const QString &error);
void handleThinkingBlockReceived(
const QString &requestId, const QString &thinking, const QString &signature);

View File

@@ -91,7 +91,10 @@ ChatRootView {
loadButton.onClicked: root.showLoadDialog()
clearButton.onClicked: root.clearChat()
tokensBadge {
text: qsTr("%1/%2").arg(root.inputTokensCount).arg(root.chatModel.tokensThreshold)
text: qsTr("next ~%1 · session ↑%2 ↓%3")
.arg(root.inputTokensCount)
.arg(root.chatModel.sessionPromptTokens)
.arg(root.chatModel.sessionCompletionTokens)
}
recentPath {
text: qsTr("Сhat name: %1").arg(root.chatFileName.length > 0 ? root.chatFileName : "Unsaved")
@@ -270,6 +273,10 @@ ChatRootView {
codeFontSize: root.codeFontSize
textFontSize: root.textFontSize
textFormat: root.textFormat
promptTokens: model.promptTokens || 0
completionTokens: model.completionTokens || 0
cachedPromptTokens: model.cachedPromptTokens || 0
reasoningTokens: model.reasoningTokens || 0
onResetChatToMessage: function(idx) {
messageInput.text = model.content

View File

@@ -34,6 +34,11 @@ Rectangle {
property bool isUserMessage: false
property int messageIndex: -1
property int promptTokens: 0
property int completionTokens: 0
property int cachedPromptTokens: 0
property int reasoningTokens: 0
signal resetChatToMessage(int index)
signal openFileRequested(string filePath)
@@ -135,6 +140,39 @@ Rectangle {
}
}
}
RowLayout {
id: usageBadge
Layout.fillWidth: true
Layout.leftMargin: 10
Layout.rightMargin: 10
spacing: 8
visible: !root.isUserMessage
&& (root.promptTokens > 0 || root.completionTokens > 0)
Item { Layout.fillWidth: true }
Text {
text: root.cachedPromptTokens > 0
? qsTr("↑ %1 (cached %2)").arg(root.promptTokens).arg(root.cachedPromptTokens)
: qsTr("↑ %1").arg(root.promptTokens)
color: palette.placeholderText
font.pointSize: Math.max(root.textFontSize - 2, 7)
}
Text {
text: root.reasoningTokens > 0
? qsTr("↓ %1 (reasoning %2)").arg(root.completionTokens).arg(root.reasoningTokens)
: qsTr("↓ %1").arg(root.completionTokens)
color: palette.placeholderText
font.pointSize: Math.max(root.textFontSize - 2, 7)
}
Text {
text: qsTr("Σ %1").arg(root.promptTokens + root.completionTokens)
color: palette.placeholderText
font.pointSize: Math.max(root.textFontSize - 2, 7)
}
}
}
Rectangle {

View File

@@ -63,6 +63,21 @@ void LLMClientInterface::handleFullResponse(const QString &requestId, const QStr
m_performanceLogger.endTimeMeasurement(requestId);
}
void LLMClientInterface::handleRequestFinalized(
const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info)
{
if (!m_activeRequests.contains(requestId) || !info.usage)
return;
const auto &u = *info.usage;
LOG_MESSAGE(QString("Completion usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5")
.arg(requestId)
.arg(u.promptTokens)
.arg(u.completionTokens)
.arg(u.cachedPromptTokens)
.arg(u.reasoningTokens));
}
void LLMClientInterface::handleRequestFailed(const QString &requestId, const QString &error)
{
auto it = m_activeRequests.find(requestId);
@@ -325,6 +340,12 @@ void LLMClientInterface::handleCompletion(const QJsonObject &request)
this,
&LLMClientInterface::handleFullResponse,
Qt::UniqueConnection);
connect(
provider->client(),
&::LLMQore::BaseClient::requestFinalized,
this,
&LLMClientInterface::handleRequestFinalized,
Qt::UniqueConnection);
connect(
provider->client(),
&::LLMQore::BaseClient::requestFailed,

View File

@@ -3,6 +3,7 @@
#pragma once
#include <LLMQore/BaseClient.hpp>
#include <languageclient/languageclientinterface.h>
#include <texteditor/texteditor.h>
@@ -52,6 +53,8 @@ protected:
private slots:
void handleFullResponse(const QString &requestId, const QString &fullText);
void handleRequestFinalized(
const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info);
void handleRequestFailed(const QString &requestId, const QString &error);
private:

View File

@@ -152,6 +152,13 @@ void QuickRefactorHandler::prepareAndSendRequest(
&QuickRefactorHandler::handleFullResponse,
Qt::UniqueConnection);
connect(
provider->client(),
&::LLMQore::BaseClient::requestFinalized,
this,
&QuickRefactorHandler::handleRequestFinalized,
Qt::UniqueConnection);
connect(
provider->client(),
&::LLMQore::BaseClient::requestFailed,
@@ -408,6 +415,22 @@ void QuickRefactorHandler::handleFullResponse(const QString &requestId, const QS
}
}
void QuickRefactorHandler::handleRequestFinalized(
const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info)
{
if (requestId != m_lastRequestId || !info.usage)
return;
const auto &u = *info.usage;
LOG_MESSAGE(
QString("Quick refactor usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5")
.arg(requestId)
.arg(u.promptTokens)
.arg(u.completionTokens)
.arg(u.cachedPromptTokens)
.arg(u.reasoningTokens));
}
void QuickRefactorHandler::handleRequestFailed(const QString &requestId, const QString &error)
{
if (requestId == m_lastRequestId) {

View File

@@ -6,6 +6,7 @@
#include <QJsonObject>
#include <QObject>
#include <LLMQore/BaseClient.hpp>
#include <texteditor/texteditor.h>
#include <utils/textutils.h>
@@ -43,6 +44,8 @@ signals:
private slots:
void handleFullResponse(const QString &requestId, const QString &fullText);
void handleRequestFinalized(
const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info);
void handleRequestFailed(const QString &requestId, const QString &error);
private:

View File

@@ -15,6 +15,7 @@ add_library(Context STATIC
target_link_libraries(Context
PUBLIC
Qt::Core
Qt::Gui
QtCreator::Core
QtCreator::TextEditor
QtCreator::Utils

View File

@@ -3,6 +3,14 @@
#include "TokenUtils.hpp"
#include <QFileInfo>
#include <QImageReader>
#include <QSet>
#include <QSize>
#include <algorithm>
#include <cmath>
namespace QodeAssist::Context {
int TokenUtils::estimateTokens(const QString &text)
@@ -15,8 +23,48 @@ int TokenUtils::estimateTokens(const QString &text)
return text.length() / 4;
}
bool TokenUtils::isImageFilePath(const QString &filePath)
{
static const QSet<QString> imageExtensions = {"png", "jpg", "jpeg", "gif", "webp", "bmp"};
return imageExtensions.contains(QFileInfo(filePath).suffix().toLower());
}
int TokenUtils::estimateImageAttachmentTokens(const QString &filePath)
{
QImageReader reader(filePath);
QSize size = reader.size();
if (!size.isValid() || size.isEmpty())
return 1500;
double w = size.width();
double h = size.height();
const double longSide = std::max(w, h);
if (longSide > 2048.0) {
const double s = 2048.0 / longSide;
w *= s;
h *= s;
}
const double shortSide = std::min(w, h);
if (shortSide > 768.0) {
const double s = 768.0 / shortSide;
w *= s;
h *= s;
}
const int tilesW = static_cast<int>(std::ceil(w / 512.0));
const int tilesH = static_cast<int>(std::ceil(h / 512.0));
const int tiles = std::max(1, tilesW * tilesH);
return 85 + tiles * 170;
}
int TokenUtils::estimateFileTokens(const Context::ContentFile &file)
{
if (isImageFilePath(file.filename))
return estimateImageAttachmentTokens(QString());
int total = 0;
total += estimateTokens(file.filename);

View File

@@ -15,6 +15,8 @@ public:
static int estimateTokens(const QString &text);
static int estimateFileTokens(const Context::ContentFile &file);
static int estimateFilesTokens(const QList<Context::ContentFile> &files);
static bool isImageFilePath(const QString &filePath);
static int estimateImageAttachmentTokens(const QString &filePath);
};
} // namespace QodeAssist::Context

View File

@@ -29,14 +29,6 @@ ChatAssistantSettings::ChatAssistantSettings()
setDisplayName(Tr::tr("Chat Assistant"));
// Chat Settings
chatTokensThreshold.setSettingsKey(Constants::CA_TOKENS_THRESHOLD);
chatTokensThreshold.setLabelText(Tr::tr("Chat history token limit:"));
chatTokensThreshold.setToolTip(Tr::tr("Maximum number of tokens in chat history. When "
"exceeded, oldest messages will be removed."));
chatTokensThreshold.setRange(1, 99999999);
chatTokensThreshold.setDefaultValue(20000);
linkOpenFiles.setSettingsKey(Constants::CA_LINK_OPEN_FILES);
linkOpenFiles.setLabelText(Tr::tr("Sync open files with assistant by default"));
linkOpenFiles.setDefaultValue(false);
@@ -58,6 +50,18 @@ ChatAssistantSettings::ChatAssistantSettings()
enableChatTools.setToolTip(Tr::tr("When enabled, AI can use tools to read files, search project, and build code"));
enableChatTools.setDefaultValue(false);
autoCompress.setSettingsKey(Constants::CA_AUTO_COMPRESS);
autoCompress.setLabelText(Tr::tr("Auto-compress chat when session tokens exceed:"));
autoCompress.setToolTip(Tr::tr(
"After each assistant response, if the running session token total exceeds the "
"threshold, the chat is summarized and a new compressed chat is started "
"automatically. The original chat is preserved on disk."));
autoCompress.setDefaultValue(false);
autoCompressThreshold.setSettingsKey(Constants::CA_AUTO_COMPRESS_THRESHOLD);
autoCompressThreshold.setRange(1000, 99999999);
autoCompressThreshold.setDefaultValue(40000);
// General Parameters Settings
temperature.setSettingsKey(Constants::CA_TEMPERATURE);
temperature.setLabelText(Tr::tr("Temperature:"));
@@ -292,11 +296,11 @@ ChatAssistantSettings::ChatAssistantSettings()
Group{
title(Tr::tr("Chat Settings")),
Column{
Row{chatTokensThreshold, Stretch{1}},
linkOpenFiles,
autosave,
enableChatInBottomToolBar,
enableChatInNavigationPanel}},
enableChatInNavigationPanel,
Row{autoCompress, autoCompressThreshold, Stretch{1}}}},
Space{8},
Group{
title(Tr::tr("Tools")),
@@ -348,7 +352,8 @@ void ChatAssistantSettings::resetSettingsToDefaults()
QMessageBox::Yes | QMessageBox::No);
if (reply == QMessageBox::Yes) {
resetAspect(chatTokensThreshold);
resetAspect(autoCompress);
resetAspect(autoCompressThreshold);
resetAspect(temperature);
resetAspect(maxTokens);
resetAspect(useTopP);

View File

@@ -18,12 +18,13 @@ public:
ButtonAspect resetToDefaults{this};
// Chat settings
Utils::IntegerAspect chatTokensThreshold{this};
Utils::BoolAspect linkOpenFiles{this};
Utils::BoolAspect autosave{this};
Utils::BoolAspect enableChatInBottomToolBar{this};
Utils::BoolAspect enableChatInNavigationPanel{this};
Utils::BoolAspect enableChatTools{this};
Utils::BoolAspect autoCompress{this};
Utils::IntegerAspect autoCompressThreshold{this};
// General Parameters Settings
Utils::DoubleAspect temperature{this};

View File

@@ -78,7 +78,8 @@ const char MAX_FILE_THRESHOLD[] = "QodeAssist.maxFileThreshold";
const char CC_MULTILINE_COMPLETION[] = "QodeAssist.ccMultilineCompletion";
const char CC_MODEL_OUTPUT_HANDLER[] = "QodeAssist.ccModelOutputHandler";
const char CA_AUTO_APPLY_FILE_EDITS[] = "QodeAssist.caAutoApplyFileEdits";
const char CA_TOKENS_THRESHOLD[] = "QodeAssist.caTokensThreshold";
const char CA_AUTO_COMPRESS[] = "QodeAssist.caAutoCompress";
const char CA_AUTO_COMPRESS_THRESHOLD[] = "QodeAssist.caAutoCompressThreshold";
const char CA_LINK_OPEN_FILES[] = "QodeAssist.caLinkOpenFiles";
const char CA_AUTOSAVE[] = "QodeAssist.caAutosave";
const char CC_CUSTOM_LANGUAGES[] = "QodeAssist.ccCustomLanguages";