diff --git a/ChatView/ChatModel.cpp b/ChatView/ChatModel.cpp index cc8aa4b..90619e0 100644 --- a/ChatView/ChatModel.cpp +++ b/ChatView/ChatModel.cpp @@ -11,7 +11,6 @@ #include #include -#include "ChatAssistantSettings.hpp" #include "Logger.hpp" #include "context/ChangesManager.h" @@ -20,14 +19,6 @@ namespace QodeAssist::Chat { ChatModel::ChatModel(QObject *parent) : QAbstractListModel(parent) { - auto &settings = Settings::chatAssistantSettings(); - - connect( - &settings.chatTokensThreshold, - &Utils::BaseAspect::changed, - this, - &ChatModel::tokensThresholdChanged); - connect(&Context::ChangesManager::instance(), &Context::ChangesManager::fileEditApplied, this, @@ -86,6 +77,16 @@ QVariant ChatModel::data(const QModelIndex &index, int role) const case Roles::IsRedacted: { return message.isRedacted; } + case Roles::PromptTokens: + return message.promptTokens; + case Roles::CompletionTokens: + return message.completionTokens; + case Roles::CachedPromptTokens: + return message.cachedPromptTokens; + case Roles::ReasoningTokens: + return message.reasoningTokens; + case Roles::TotalTokens: + return message.promptTokens + message.completionTokens; case Roles::Images: { QVariantList imagesList; for (const auto &image : message.images) { @@ -124,6 +125,11 @@ QHash ChatModel::roleNames() const roles[Roles::Attachments] = "attachments"; roles[Roles::IsRedacted] = "isRedacted"; roles[Roles::Images] = "images"; + roles[Roles::PromptTokens] = "promptTokens"; + roles[Roles::CompletionTokens] = "completionTokens"; + roles[Roles::CachedPromptTokens] = "cachedPromptTokens"; + roles[Roles::ReasoningTokens] = "reasoningTokens"; + roles[Roles::TotalTokens] = "totalTokens"; return roles; } @@ -207,6 +213,7 @@ void ChatModel::clear() m_messages.clear(); endResetModel(); emit modelReseted(); + emit sessionUsageChanged(); } QList ChatModel::processMessageContent(const QString &content) const @@ -310,12 +317,6 @@ QJsonArray ChatModel::prepareMessagesForRequest(const QString &systemPrompt) con return messages; } -int ChatModel::tokensThreshold() const -{ - auto &settings = Settings::chatAssistantSettings(); - return settings.chatTokensThreshold(); -} - QString ChatModel::lastMessageId() const { return !m_messages.isEmpty() ? m_messages.last().id : ""; @@ -330,6 +331,7 @@ void ChatModel::resetModelTo(int index) beginRemoveRows(QModelIndex(), index, m_messages.size() - 1); m_messages.remove(index, m_messages.size() - index); endRemoveRows(); + emit sessionUsageChanged(); } } @@ -507,6 +509,54 @@ void ChatModel::updateMessageContent(const QString &messageId, const QString &ne } } +void ChatModel::setMessageUsage( + const QString &messageId, + int promptTokens, + int completionTokens, + int cachedPromptTokens, + int reasoningTokens) +{ + for (int i = 0; i < m_messages.size(); ++i) { + if (m_messages[i].id != messageId) + continue; + m_messages[i].promptTokens = promptTokens; + m_messages[i].completionTokens = completionTokens; + m_messages[i].cachedPromptTokens = cachedPromptTokens; + m_messages[i].reasoningTokens = reasoningTokens; + emit dataChanged( + index(i), + index(i), + {Roles::PromptTokens, + Roles::CompletionTokens, + Roles::CachedPromptTokens, + Roles::ReasoningTokens, + Roles::TotalTokens}); + emit sessionUsageChanged(); + return; + } +} + +int ChatModel::sessionPromptTokens() const +{ + int total = 0; + for (const auto &m : m_messages) + total += m.promptTokens; + return total; +} + +int ChatModel::sessionCompletionTokens() const +{ + int total = 0; + for (const auto &m : m_messages) + total += m.completionTokens; + return total; +} + +int ChatModel::sessionTotalTokens() const +{ + return sessionPromptTokens() + sessionCompletionTokens(); +} + void ChatModel::setLoadingFromHistory(bool loading) { m_loadingFromHistory = loading; diff --git a/ChatView/ChatModel.hpp b/ChatView/ChatModel.hpp index dd7c5ad..3f5a56b 100644 --- a/ChatView/ChatModel.hpp +++ b/ChatView/ChatModel.hpp @@ -17,14 +17,27 @@ namespace QodeAssist::Chat { class ChatModel : public QAbstractListModel { Q_OBJECT - Q_PROPERTY(int tokensThreshold READ tokensThreshold NOTIFY tokensThresholdChanged FINAL) + Q_PROPERTY(int sessionPromptTokens READ sessionPromptTokens NOTIFY sessionUsageChanged FINAL) + Q_PROPERTY(int sessionCompletionTokens READ sessionCompletionTokens NOTIFY sessionUsageChanged FINAL) + Q_PROPERTY(int sessionTotalTokens READ sessionTotalTokens NOTIFY sessionUsageChanged FINAL) QML_ELEMENT public: enum ChatRole { System, User, Assistant, Tool, FileEdit, Thinking }; Q_ENUM(ChatRole) - enum Roles { RoleType = Qt::UserRole, Content, Attachments, IsRedacted, Images }; + enum Roles { + RoleType = Qt::UserRole, + Content, + Attachments, + IsRedacted, + Images, + PromptTokens, + CompletionTokens, + CachedPromptTokens, + ReasoningTokens, + TotalTokens + }; Q_ENUM(Roles) struct ImageAttachment @@ -44,6 +57,11 @@ public: QList attachments; QList images; + + int promptTokens = 0; + int completionTokens = 0; + int cachedPromptTokens = 0; + int reasoningTokens = 0; }; explicit ChatModel(QObject *parent = nullptr); @@ -66,8 +84,6 @@ public: QVector getChatHistory() const; QJsonArray prepareMessagesForRequest(const QString &systemPrompt) const; - int tokensThreshold() const; - QString currentModel() const; QString lastMessageId() const; @@ -84,6 +100,17 @@ public: const QString &requestId, const QString &thinking, const QString &signature); void addRedactedThinkingBlock(const QString &requestId, const QString &signature); void updateMessageContent(const QString &messageId, const QString &newContent); + + void setMessageUsage( + const QString &messageId, + int promptTokens, + int completionTokens, + int cachedPromptTokens, + int reasoningTokens); + + int sessionPromptTokens() const; + int sessionCompletionTokens() const; + int sessionTotalTokens() const; void setLoadingFromHistory(bool loading); bool isLoadingFromHistory() const; @@ -92,8 +119,8 @@ public: QString chatFilePath() const; signals: - void tokensThresholdChanged(); void modelReseted(); + void sessionUsageChanged(); private slots: void onFileEditApplied(const QString &editId); diff --git a/ChatView/ChatRootView.cpp b/ChatView/ChatRootView.cpp index d07e1bc..3a2ea0f 100644 --- a/ChatView/ChatRootView.cpp +++ b/ChatView/ChatRootView.cpp @@ -3,7 +3,12 @@ #include "ChatRootView.hpp" +#include + +#include #include +#include +#include #include #include #include @@ -31,7 +36,6 @@ #include "Logger.hpp" #include "ProjectSettings.hpp" #include "ProvidersManager.hpp" -#include "ToolsSettings.hpp" #include "context/ChangesManager.h" #include "context/ContextManager.hpp" #include "context/TokenUtils.hpp" @@ -107,6 +111,22 @@ ChatRootView::ChatRootView(QQuickItem *parent) &Utils::BaseAspect::changed, this, &ChatRootView::updateInputTokensCount); + connect(this, &ChatRootView::useToolsChanged, this, &ChatRootView::updateInputTokensCount); + connect( + &Settings::chatAssistantSettings().enableChatTools, + &Utils::BaseAspect::changed, + this, + &ChatRootView::updateInputTokensCount); + + rewireToolsChangedConnection(); + connect( + &Settings::generalSettings().caProvider, + &Utils::BaseAspect::changed, + this, + [this]() { + rewireToolsChangedConnection(); + updateInputTokensCount(); + }); connect( &Settings::chatAssistantSettings().systemPrompt, &Utils::BaseAspect::changed, @@ -171,6 +191,28 @@ ChatRootView::ChatRootView(QQuickItem *parent) updateCurrentMessageEditsStats(); }); + connect( + m_clientInterface, + &ClientInterface::messageUsageReceived, + this, + [this](int promptTokens, int /*completionTokens*/, int /*cached*/, int /*reasoning*/) { + if (promptTokens <= 0 || m_lastSentEstimate <= 0) + return; + + const double rawFactor + = static_cast(promptTokens) / static_cast(m_lastSentEstimate); + const double clamped = std::clamp(rawFactor, 0.5, 3.0); + m_calibrationFactor = 0.5 * m_calibrationFactor + 0.5 * clamped; + + LOG_MESSAGE(QString("Token calibration: server=%1 estimated=%2 ratio=%3 ema=%4") + .arg(promptTokens) + .arg(m_lastSentEstimate) + .arg(rawFactor, 0, 'f', 3) + .arg(m_calibrationFactor, 0, 'f', 3)); + + updateInputTokensCount(); + }); + connect( &Context::ChangesManager::instance(), &Context::ChangesManager::fileEditAdded, @@ -247,7 +289,6 @@ ChatRootView::ChatRootView(QQuickItem *parent) emit lastErrorMessageChanged(); }); - // ChatCompressor signals connect(m_chatCompressor, &ChatCompressor::compressionStarted, this, [this]() { emit isCompressingChanged(); }); @@ -259,6 +300,12 @@ ChatRootView::ChatRootView(QQuickItem *parent) emit compressionCompleted(compressedChatPath); loadHistory(compressedChatPath); + + if (m_pendingSend.active) { + PendingSend p = m_pendingSend; + m_pendingSend = {}; + dispatchSend(p.message, p.attachments, p.linkedFiles, p.useTools, p.useThinking); + } }); connect(m_chatCompressor, &ChatCompressor::compressionFailed, this, [this](const QString &error) { @@ -266,6 +313,12 @@ ChatRootView::ChatRootView(QQuickItem *parent) m_lastErrorMessage = error; emit lastErrorMessageChanged(); emit compressionFailed(error); + + if (m_pendingSend.active) { + PendingSend p = m_pendingSend; + m_pendingSend = {}; + dispatchSend(p.message, p.attachments, p.linkedFiles, p.useTools, p.useThinking); + } }); } @@ -276,32 +329,72 @@ ChatModel *ChatRootView::chatModel() const void ChatRootView::sendMessage(const QString &message) { - if (m_inputTokensCount > m_chatModel->tokensThreshold()) { - QMessageBox::StandardButton reply = QMessageBox::question( - Core::ICore::dialogParent(), - tr("Token Limit Exceeded"), - tr("The chat history has exceeded the token limit.\n" - "Would you like to create new chat?"), - QMessageBox::Yes | QMessageBox::No); + const QStringList attachments = m_attachmentFiles; + const QStringList linkedFiles = m_linkedFiles; + const bool tools = useTools(); + const bool thinking = useThinking(); - if (reply == QMessageBox::Yes) { - autosave(); - m_chatModel->clear(); - setRecentFilePath(QString{}); - return; - } - } + if (deferSendForAutoCompress(message, attachments, linkedFiles, tools, thinking)) + return; + + dispatchSend(message, attachments, linkedFiles, tools, thinking); +} + +bool ChatRootView::deferSendForAutoCompress( + const QString &message, + const QStringList &attachments, + const QStringList &linkedFiles, + bool useToolsArg, + bool useThinkingArg) +{ + auto &settings = Settings::chatAssistantSettings(); + if (!settings.autoCompress()) + return false; + + const int threshold = settings.autoCompressThreshold(); + if (m_inputTokensCount < threshold) + return false; if (m_recentFilePath.isEmpty()) { - QString filePath = getAutosaveFilePath(message, m_attachmentFiles); + QString filePath = getAutosaveFilePath(message, attachments); + if (filePath.isEmpty()) + return false; + setRecentFilePath(filePath); + LOG_MESSAGE(QString("Set chat file path for new chat (auto-compress): %1").arg(filePath)); + } + + if (m_chatCompressor->isCompressing() || m_pendingSend.active) + return false; + + LOG_MESSAGE(QString("Auto-compress preempt: estimated next=%1 ≥ threshold=%2; deferring send") + .arg(m_inputTokensCount) + .arg(threshold)); + + m_pendingSend = {message, attachments, linkedFiles, useToolsArg, useThinkingArg, true}; + compressCurrentChat(); + return true; +} + +void ChatRootView::dispatchSend( + const QString &message, + const QStringList &attachments, + const QStringList &linkedFiles, + bool useToolsArg, + bool useThinkingArg) +{ + if (m_recentFilePath.isEmpty()) { + QString filePath = getAutosaveFilePath(message, attachments); if (!filePath.isEmpty()) { setRecentFilePath(filePath); LOG_MESSAGE(QString("Set chat file path for new chat: %1").arg(filePath)); } } - m_clientInterface - ->sendMessage(message, m_attachmentFiles, m_linkedFiles, useTools(), useThinking()); + m_lastSentEstimate = m_calibrationFactor > 0.0 + ? static_cast(m_inputTokensCount / m_calibrationFactor) + : m_inputTokensCount; + + m_clientInterface->sendMessage(message, attachments, linkedFiles, useToolsArg, useThinkingArg); m_fileManager->clearIntermediateStorage(); clearAttachmentFiles(); @@ -392,7 +485,8 @@ void ChatRootView::loadHistory(const QString &filePath) setRecentFilePath(filePath); } - m_fileManager->clearIntermediateStorage(); + if (!m_pendingSend.active) + m_fileManager->clearIntermediateStorage(); m_attachmentFiles.clear(); m_linkedFiles.clear(); emit attachmentFilesChanged(); @@ -747,6 +841,27 @@ void ChatRootView::openFileInEditor(const QString &filePath) Core::EditorManager::openEditor(Utils::FilePath::fromString(filePath)); } +void ChatRootView::rewireToolsChangedConnection() +{ + if (m_toolsChangedConn) + QObject::disconnect(m_toolsChangedConn); + m_toolsChangedConn = {}; + + const auto providerName = Settings::generalSettings().caProvider(); + auto *provider = PluginLLMCore::ProvidersManager::instance().getProviderByName(providerName); + if (!provider) + return; + auto *tm = provider->toolsManager(); + if (!tm) + return; + + m_toolsChangedConn = connect( + tm, + &::LLMQore::ToolRegistry::toolsChanged, + this, + &ChatRootView::updateInputTokensCount); +} + void ChatRootView::updateInputTokensCount() { int inputTokens = m_messageTokensCount; @@ -756,14 +871,33 @@ void ChatRootView::updateInputTokensCount() inputTokens += Context::TokenUtils::estimateTokens(settings.systemPrompt()); } + const auto splitImageEstimate = [](const QStringList &paths, QStringList &textPaths) { + int imageTokens = 0; + for (const QString &p : paths) { + if (Context::TokenUtils::isImageFilePath(p)) + imageTokens += Context::TokenUtils::estimateImageAttachmentTokens(p); + else + textPaths.append(p); + } + return imageTokens; + }; + if (!m_attachmentFiles.isEmpty()) { - auto attachFiles = m_clientInterface->contextManager()->getContentFiles(m_attachmentFiles); - inputTokens += Context::TokenUtils::estimateFilesTokens(attachFiles); + QStringList textPaths; + inputTokens += splitImageEstimate(m_attachmentFiles, textPaths); + if (!textPaths.isEmpty()) { + auto attachFiles = m_clientInterface->contextManager()->getContentFiles(textPaths); + inputTokens += Context::TokenUtils::estimateFilesTokens(attachFiles); + } } if (!m_linkedFiles.isEmpty()) { - auto linkFiles = m_clientInterface->contextManager()->getContentFiles(m_linkedFiles); - inputTokens += Context::TokenUtils::estimateFilesTokens(linkFiles); + QStringList textPaths; + inputTokens += splitImageEstimate(m_linkedFiles, textPaths); + if (!textPaths.isEmpty()) { + auto linkFiles = m_clientInterface->contextManager()->getContentFiles(textPaths); + inputTokens += Context::TokenUtils::estimateFilesTokens(linkFiles); + } } const auto &history = m_chatModel->getChatHistory(); @@ -772,7 +906,22 @@ void ChatRootView::updateInputTokensCount() inputTokens += 4; // + role } - m_inputTokensCount = inputTokens; + if (useTools()) { + const auto providerName = Settings::generalSettings().caProvider(); + if (auto *provider = PluginLLMCore::ProvidersManager::instance().getProviderByName( + providerName)) { + if (auto *tm = provider->toolsManager()) { + const QJsonArray toolDefs = tm->getToolsDefinitions(); + if (!toolDefs.isEmpty()) { + const QByteArray serialized + = QJsonDocument(toolDefs).toJson(QJsonDocument::Compact); + inputTokens += static_cast(serialized.size() / 4); + } + } + } + } + + m_inputTokensCount = static_cast(inputTokens * m_calibrationFactor); emit inputTokensCountChanged(); } diff --git a/ChatView/ChatRootView.hpp b/ChatView/ChatRootView.hpp index d9f5ef4..11126d5 100644 --- a/ChatView/ChatRootView.hpp +++ b/ChatView/ChatRootView.hpp @@ -212,6 +212,21 @@ signals: void openFilesChanged(); private: + void rewireToolsChangedConnection(); + QMetaObject::Connection m_toolsChangedConn; + + bool deferSendForAutoCompress( + const QString &message, + const QStringList &attachments, + const QStringList &linkedFiles, + bool useTools, + bool useThinking); + void dispatchSend( + const QString &message, + const QStringList &attachments, + const QStringList &linkedFiles, + bool useTools, + bool useThinking); void updateFileEditStatus(const QString &editId, const QString &status); QString getChatsHistoryDir() const; QString getSuggestedFileName() const; @@ -228,6 +243,18 @@ private: QStringList m_linkedFiles; int m_messageTokensCount{0}; int m_inputTokensCount{0}; + int m_lastSentEstimate{0}; + double m_calibrationFactor{1.0}; + + struct PendingSend { + QString message; + QStringList attachments; + QStringList linkedFiles; + bool useTools = false; + bool useThinking = false; + bool active = false; + }; + PendingSend m_pendingSend; bool m_isSyncOpenFiles; QList m_currentEditors; bool m_isRequestInProgress; diff --git a/ChatView/ChatSerializer.cpp b/ChatView/ChatSerializer.cpp index 23d1a4d..26ce8ae 100644 --- a/ChatView/ChatSerializer.cpp +++ b/ChatView/ChatSerializer.cpp @@ -103,6 +103,17 @@ QJsonObject ChatSerializer::serializeMessage( messageObj["images"] = imagesArray; } + if (message.promptTokens > 0 || message.completionTokens > 0) { + QJsonObject usageObj; + usageObj["promptTokens"] = message.promptTokens; + usageObj["completionTokens"] = message.completionTokens; + if (message.cachedPromptTokens > 0) + usageObj["cachedPromptTokens"] = message.cachedPromptTokens; + if (message.reasoningTokens > 0) + usageObj["reasoningTokens"] = message.reasoningTokens; + messageObj["usage"] = usageObj; + } + return messageObj; } @@ -139,6 +150,14 @@ ChatModel::Message ChatSerializer::deserializeMessage( } } + if (json.contains("usage")) { + const QJsonObject usageObj = json["usage"].toObject(); + message.promptTokens = usageObj["promptTokens"].toInt(); + message.completionTokens = usageObj["completionTokens"].toInt(); + message.cachedPromptTokens = usageObj["cachedPromptTokens"].toInt(); + message.reasoningTokens = usageObj["reasoningTokens"].toInt(); + } + return message; } diff --git a/ChatView/ClientInterface.cpp b/ChatView/ClientInterface.cpp index 4bddc16..eebbc4e 100644 --- a/ChatView/ClientInterface.cpp +++ b/ChatView/ClientInterface.cpp @@ -257,6 +257,12 @@ void ClientInterface::sendMessage( this, &ClientInterface::handleFullResponse, Qt::UniqueConnection); + connect( + provider->client(), + &::LLMQore::BaseClient::requestFinalized, + this, + &ClientInterface::handleRequestFinalized, + Qt::UniqueConnection); connect( provider->client(), &::LLMQore::BaseClient::requestFailed, @@ -449,6 +455,29 @@ void ClientInterface::handleFullResponse(const QString &requestId, const QString m_awaitingContinuation.remove(requestId); } +void ClientInterface::handleRequestFinalized( + const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info) +{ + if (!m_activeRequests.contains(requestId)) + return; + if (!info.usage) + return; + + const auto &u = *info.usage; + m_chatModel->setMessageUsage( + requestId, u.promptTokens, u.completionTokens, u.cachedPromptTokens, u.reasoningTokens); + + emit messageUsageReceived( + u.promptTokens, u.completionTokens, u.cachedPromptTokens, u.reasoningTokens); + + LOG_MESSAGE(QString("Chat usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5") + .arg(requestId) + .arg(u.promptTokens) + .arg(u.completionTokens) + .arg(u.cachedPromptTokens) + .arg(u.reasoningTokens)); +} + void ClientInterface::handleRequestFailed(const QString &requestId, const QString &error) { auto it = m_activeRequests.find(requestId); diff --git a/ChatView/ClientInterface.hpp b/ChatView/ClientInterface.hpp index c000159..433961e 100644 --- a/ChatView/ClientInterface.hpp +++ b/ChatView/ClientInterface.hpp @@ -11,6 +11,7 @@ #include "ChatModel.hpp" #include "Provider.hpp" #include "pluginllmcore/IPromptProvider.hpp" +#include #include namespace QodeAssist::Chat { @@ -42,10 +43,13 @@ signals: void errorOccurred(const QString &error); void messageReceivedCompletely(); void requestStarted(const QString &requestId); + void messageUsageReceived( + int promptTokens, int completionTokens, int cachedPromptTokens, int reasoningTokens); private slots: void handlePartialResponse(const QString &requestId, const QString &partialText); void handleFullResponse(const QString &requestId, const QString &fullText); + void handleRequestFinalized(const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info); void handleRequestFailed(const QString &requestId, const QString &error); void handleThinkingBlockReceived( const QString &requestId, const QString &thinking, const QString &signature); diff --git a/ChatView/qml/RootItem.qml b/ChatView/qml/RootItem.qml index 3fbd0f9..8f9f51d 100644 --- a/ChatView/qml/RootItem.qml +++ b/ChatView/qml/RootItem.qml @@ -91,7 +91,10 @@ ChatRootView { loadButton.onClicked: root.showLoadDialog() clearButton.onClicked: root.clearChat() tokensBadge { - text: qsTr("%1/%2").arg(root.inputTokensCount).arg(root.chatModel.tokensThreshold) + text: qsTr("next ~%1 · session ↑%2 ↓%3") + .arg(root.inputTokensCount) + .arg(root.chatModel.sessionPromptTokens) + .arg(root.chatModel.sessionCompletionTokens) } recentPath { text: qsTr("Сhat name: %1").arg(root.chatFileName.length > 0 ? root.chatFileName : "Unsaved") @@ -270,6 +273,10 @@ ChatRootView { codeFontSize: root.codeFontSize textFontSize: root.textFontSize textFormat: root.textFormat + promptTokens: model.promptTokens || 0 + completionTokens: model.completionTokens || 0 + cachedPromptTokens: model.cachedPromptTokens || 0 + reasoningTokens: model.reasoningTokens || 0 onResetChatToMessage: function(idx) { messageInput.text = model.content diff --git a/ChatView/qml/chatparts/ChatItem.qml b/ChatView/qml/chatparts/ChatItem.qml index ef99d0f..9aa0ada 100644 --- a/ChatView/qml/chatparts/ChatItem.qml +++ b/ChatView/qml/chatparts/ChatItem.qml @@ -34,6 +34,11 @@ Rectangle { property bool isUserMessage: false property int messageIndex: -1 + property int promptTokens: 0 + property int completionTokens: 0 + property int cachedPromptTokens: 0 + property int reasoningTokens: 0 + signal resetChatToMessage(int index) signal openFileRequested(string filePath) @@ -135,6 +140,39 @@ Rectangle { } } } + + RowLayout { + id: usageBadge + + Layout.fillWidth: true + Layout.leftMargin: 10 + Layout.rightMargin: 10 + spacing: 8 + visible: !root.isUserMessage + && (root.promptTokens > 0 || root.completionTokens > 0) + + Item { Layout.fillWidth: true } + + Text { + text: root.cachedPromptTokens > 0 + ? qsTr("↑ %1 (cached %2)").arg(root.promptTokens).arg(root.cachedPromptTokens) + : qsTr("↑ %1").arg(root.promptTokens) + color: palette.placeholderText + font.pointSize: Math.max(root.textFontSize - 2, 7) + } + Text { + text: root.reasoningTokens > 0 + ? qsTr("↓ %1 (reasoning %2)").arg(root.completionTokens).arg(root.reasoningTokens) + : qsTr("↓ %1").arg(root.completionTokens) + color: palette.placeholderText + font.pointSize: Math.max(root.textFontSize - 2, 7) + } + Text { + text: qsTr("Σ %1").arg(root.promptTokens + root.completionTokens) + color: palette.placeholderText + font.pointSize: Math.max(root.textFontSize - 2, 7) + } + } } Rectangle { diff --git a/LLMClientInterface.cpp b/LLMClientInterface.cpp index 72f1813..83ab131 100644 --- a/LLMClientInterface.cpp +++ b/LLMClientInterface.cpp @@ -63,6 +63,21 @@ void LLMClientInterface::handleFullResponse(const QString &requestId, const QStr m_performanceLogger.endTimeMeasurement(requestId); } +void LLMClientInterface::handleRequestFinalized( + const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info) +{ + if (!m_activeRequests.contains(requestId) || !info.usage) + return; + + const auto &u = *info.usage; + LOG_MESSAGE(QString("Completion usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5") + .arg(requestId) + .arg(u.promptTokens) + .arg(u.completionTokens) + .arg(u.cachedPromptTokens) + .arg(u.reasoningTokens)); +} + void LLMClientInterface::handleRequestFailed(const QString &requestId, const QString &error) { auto it = m_activeRequests.find(requestId); @@ -325,6 +340,12 @@ void LLMClientInterface::handleCompletion(const QJsonObject &request) this, &LLMClientInterface::handleFullResponse, Qt::UniqueConnection); + connect( + provider->client(), + &::LLMQore::BaseClient::requestFinalized, + this, + &LLMClientInterface::handleRequestFinalized, + Qt::UniqueConnection); connect( provider->client(), &::LLMQore::BaseClient::requestFailed, diff --git a/LLMClientInterface.hpp b/LLMClientInterface.hpp index 0be1124..24e48e3 100644 --- a/LLMClientInterface.hpp +++ b/LLMClientInterface.hpp @@ -3,6 +3,7 @@ #pragma once +#include #include #include @@ -52,6 +53,8 @@ protected: private slots: void handleFullResponse(const QString &requestId, const QString &fullText); + void handleRequestFinalized( + const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info); void handleRequestFailed(const QString &requestId, const QString &error); private: diff --git a/QuickRefactorHandler.cpp b/QuickRefactorHandler.cpp index e831392..aacd78c 100644 --- a/QuickRefactorHandler.cpp +++ b/QuickRefactorHandler.cpp @@ -152,6 +152,13 @@ void QuickRefactorHandler::prepareAndSendRequest( &QuickRefactorHandler::handleFullResponse, Qt::UniqueConnection); + connect( + provider->client(), + &::LLMQore::BaseClient::requestFinalized, + this, + &QuickRefactorHandler::handleRequestFinalized, + Qt::UniqueConnection); + connect( provider->client(), &::LLMQore::BaseClient::requestFailed, @@ -408,6 +415,22 @@ void QuickRefactorHandler::handleFullResponse(const QString &requestId, const QS } } +void QuickRefactorHandler::handleRequestFinalized( + const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info) +{ + if (requestId != m_lastRequestId || !info.usage) + return; + + const auto &u = *info.usage; + LOG_MESSAGE( + QString("Quick refactor usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5") + .arg(requestId) + .arg(u.promptTokens) + .arg(u.completionTokens) + .arg(u.cachedPromptTokens) + .arg(u.reasoningTokens)); +} + void QuickRefactorHandler::handleRequestFailed(const QString &requestId, const QString &error) { if (requestId == m_lastRequestId) { diff --git a/QuickRefactorHandler.hpp b/QuickRefactorHandler.hpp index 195e428..140c2b7 100644 --- a/QuickRefactorHandler.hpp +++ b/QuickRefactorHandler.hpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -43,6 +44,8 @@ signals: private slots: void handleFullResponse(const QString &requestId, const QString &fullText); + void handleRequestFinalized( + const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info); void handleRequestFailed(const QString &requestId, const QString &error); private: diff --git a/context/CMakeLists.txt b/context/CMakeLists.txt index c5754c1..0ef3d9f 100644 --- a/context/CMakeLists.txt +++ b/context/CMakeLists.txt @@ -15,6 +15,7 @@ add_library(Context STATIC target_link_libraries(Context PUBLIC Qt::Core + Qt::Gui QtCreator::Core QtCreator::TextEditor QtCreator::Utils diff --git a/context/TokenUtils.cpp b/context/TokenUtils.cpp index 7d415ce..6868760 100644 --- a/context/TokenUtils.cpp +++ b/context/TokenUtils.cpp @@ -3,6 +3,14 @@ #include "TokenUtils.hpp" +#include +#include +#include +#include + +#include +#include + namespace QodeAssist::Context { int TokenUtils::estimateTokens(const QString &text) @@ -15,8 +23,48 @@ int TokenUtils::estimateTokens(const QString &text) return text.length() / 4; } +bool TokenUtils::isImageFilePath(const QString &filePath) +{ + static const QSet imageExtensions = {"png", "jpg", "jpeg", "gif", "webp", "bmp"}; + return imageExtensions.contains(QFileInfo(filePath).suffix().toLower()); +} + +int TokenUtils::estimateImageAttachmentTokens(const QString &filePath) +{ + QImageReader reader(filePath); + QSize size = reader.size(); + if (!size.isValid() || size.isEmpty()) + return 1500; + + double w = size.width(); + double h = size.height(); + + const double longSide = std::max(w, h); + if (longSide > 2048.0) { + const double s = 2048.0 / longSide; + w *= s; + h *= s; + } + + const double shortSide = std::min(w, h); + if (shortSide > 768.0) { + const double s = 768.0 / shortSide; + w *= s; + h *= s; + } + + const int tilesW = static_cast(std::ceil(w / 512.0)); + const int tilesH = static_cast(std::ceil(h / 512.0)); + const int tiles = std::max(1, tilesW * tilesH); + + return 85 + tiles * 170; +} + int TokenUtils::estimateFileTokens(const Context::ContentFile &file) { + if (isImageFilePath(file.filename)) + return estimateImageAttachmentTokens(QString()); + int total = 0; total += estimateTokens(file.filename); diff --git a/context/TokenUtils.hpp b/context/TokenUtils.hpp index 8ad2164..63f6508 100644 --- a/context/TokenUtils.hpp +++ b/context/TokenUtils.hpp @@ -15,6 +15,8 @@ public: static int estimateTokens(const QString &text); static int estimateFileTokens(const Context::ContentFile &file); static int estimateFilesTokens(const QList &files); + static bool isImageFilePath(const QString &filePath); + static int estimateImageAttachmentTokens(const QString &filePath); }; } // namespace QodeAssist::Context diff --git a/settings/ChatAssistantSettings.cpp b/settings/ChatAssistantSettings.cpp index 5a94ad3..ad32c88 100644 --- a/settings/ChatAssistantSettings.cpp +++ b/settings/ChatAssistantSettings.cpp @@ -29,14 +29,6 @@ ChatAssistantSettings::ChatAssistantSettings() setDisplayName(Tr::tr("Chat Assistant")); - // Chat Settings - chatTokensThreshold.setSettingsKey(Constants::CA_TOKENS_THRESHOLD); - chatTokensThreshold.setLabelText(Tr::tr("Chat history token limit:")); - chatTokensThreshold.setToolTip(Tr::tr("Maximum number of tokens in chat history. When " - "exceeded, oldest messages will be removed.")); - chatTokensThreshold.setRange(1, 99999999); - chatTokensThreshold.setDefaultValue(20000); - linkOpenFiles.setSettingsKey(Constants::CA_LINK_OPEN_FILES); linkOpenFiles.setLabelText(Tr::tr("Sync open files with assistant by default")); linkOpenFiles.setDefaultValue(false); @@ -58,6 +50,18 @@ ChatAssistantSettings::ChatAssistantSettings() enableChatTools.setToolTip(Tr::tr("When enabled, AI can use tools to read files, search project, and build code")); enableChatTools.setDefaultValue(false); + autoCompress.setSettingsKey(Constants::CA_AUTO_COMPRESS); + autoCompress.setLabelText(Tr::tr("Auto-compress chat when session tokens exceed:")); + autoCompress.setToolTip(Tr::tr( + "After each assistant response, if the running session token total exceeds the " + "threshold, the chat is summarized and a new compressed chat is started " + "automatically. The original chat is preserved on disk.")); + autoCompress.setDefaultValue(false); + + autoCompressThreshold.setSettingsKey(Constants::CA_AUTO_COMPRESS_THRESHOLD); + autoCompressThreshold.setRange(1000, 99999999); + autoCompressThreshold.setDefaultValue(40000); + // General Parameters Settings temperature.setSettingsKey(Constants::CA_TEMPERATURE); temperature.setLabelText(Tr::tr("Temperature:")); @@ -292,11 +296,11 @@ ChatAssistantSettings::ChatAssistantSettings() Group{ title(Tr::tr("Chat Settings")), Column{ - Row{chatTokensThreshold, Stretch{1}}, linkOpenFiles, autosave, enableChatInBottomToolBar, - enableChatInNavigationPanel}}, + enableChatInNavigationPanel, + Row{autoCompress, autoCompressThreshold, Stretch{1}}}}, Space{8}, Group{ title(Tr::tr("Tools")), @@ -348,7 +352,8 @@ void ChatAssistantSettings::resetSettingsToDefaults() QMessageBox::Yes | QMessageBox::No); if (reply == QMessageBox::Yes) { - resetAspect(chatTokensThreshold); + resetAspect(autoCompress); + resetAspect(autoCompressThreshold); resetAspect(temperature); resetAspect(maxTokens); resetAspect(useTopP); diff --git a/settings/ChatAssistantSettings.hpp b/settings/ChatAssistantSettings.hpp index 1218a25..716d0bd 100644 --- a/settings/ChatAssistantSettings.hpp +++ b/settings/ChatAssistantSettings.hpp @@ -18,12 +18,13 @@ public: ButtonAspect resetToDefaults{this}; // Chat settings - Utils::IntegerAspect chatTokensThreshold{this}; Utils::BoolAspect linkOpenFiles{this}; Utils::BoolAspect autosave{this}; Utils::BoolAspect enableChatInBottomToolBar{this}; Utils::BoolAspect enableChatInNavigationPanel{this}; Utils::BoolAspect enableChatTools{this}; + Utils::BoolAspect autoCompress{this}; + Utils::IntegerAspect autoCompressThreshold{this}; // General Parameters Settings Utils::DoubleAspect temperature{this}; diff --git a/settings/SettingsConstants.hpp b/settings/SettingsConstants.hpp index 2859176..baf8798 100644 --- a/settings/SettingsConstants.hpp +++ b/settings/SettingsConstants.hpp @@ -78,7 +78,8 @@ const char MAX_FILE_THRESHOLD[] = "QodeAssist.maxFileThreshold"; const char CC_MULTILINE_COMPLETION[] = "QodeAssist.ccMultilineCompletion"; const char CC_MODEL_OUTPUT_HANDLER[] = "QodeAssist.ccModelOutputHandler"; const char CA_AUTO_APPLY_FILE_EDITS[] = "QodeAssist.caAutoApplyFileEdits"; -const char CA_TOKENS_THRESHOLD[] = "QodeAssist.caTokensThreshold"; +const char CA_AUTO_COMPRESS[] = "QodeAssist.caAutoCompress"; +const char CA_AUTO_COMPRESS_THRESHOLD[] = "QodeAssist.caAutoCompressThreshold"; const char CA_LINK_OPEN_FILES[] = "QodeAssist.caLinkOpenFiles"; const char CA_AUTOSAVE[] = "QodeAssist.caAutosave"; const char CC_CUSTOM_LANGUAGES[] = "QodeAssist.ccCustomLanguages"; diff --git a/sources/external/llmqore b/sources/external/llmqore index c042931..0c9fc8b 160000 --- a/sources/external/llmqore +++ b/sources/external/llmqore @@ -1 +1 @@ -Subproject commit c0429314bb5f8d5d65bd82363ef108e462cb9c37 +Subproject commit 0c9fc8bd7cb52946328d29f02c804c7872ba06b1