refactor: Update token usage api (#347)

* refactor: Improve token usage api * refactor: Image recognition to tokens
2026-06-12 17:29:13 -04:00 · 2026-05-14 21:19:12 +02:00
parent 86f4635080
commit 3b421f60af
20 changed files with 518 additions and 60 deletions
--- a/ChatView/ChatModel.cpp
+++ b/ChatView/ChatModel.cpp
@@ -11,7 +11,6 @@
 #include <QUrl>
 #include <QtQml>

-#include "ChatAssistantSettings.hpp"
 #include "Logger.hpp"
 #include "context/ChangesManager.h"

@@ -20,14 +19,6 @@ namespace QodeAssist::Chat {
 ChatModel::ChatModel(QObject *parent)
    : QAbstractListModel(parent)
 {
-    auto &settings = Settings::chatAssistantSettings();
-
-    connect(
-        &settings.chatTokensThreshold,
-        &Utils::BaseAspect::changed,
-        this,
-        &ChatModel::tokensThresholdChanged);
-    
    connect(&Context::ChangesManager::instance(),
            &Context::ChangesManager::fileEditApplied,
            this,
@@ -86,6 +77,16 @@ QVariant ChatModel::data(const QModelIndex &index, int role) const
    case Roles::IsRedacted: {
        return message.isRedacted;
    }
+    case Roles::PromptTokens:
+        return message.promptTokens;
+    case Roles::CompletionTokens:
+        return message.completionTokens;
+    case Roles::CachedPromptTokens:
+        return message.cachedPromptTokens;
+    case Roles::ReasoningTokens:
+        return message.reasoningTokens;
+    case Roles::TotalTokens:
+        return message.promptTokens + message.completionTokens;
    case Roles::Images: {
        QVariantList imagesList;
        for (const auto &image : message.images) {
@@ -124,6 +125,11 @@ QHash<int, QByteArray> ChatModel::roleNames() const
    roles[Roles::Attachments] = "attachments";
    roles[Roles::IsRedacted] = "isRedacted";
    roles[Roles::Images] = "images";
+    roles[Roles::PromptTokens] = "promptTokens";
+    roles[Roles::CompletionTokens] = "completionTokens";
+    roles[Roles::CachedPromptTokens] = "cachedPromptTokens";
+    roles[Roles::ReasoningTokens] = "reasoningTokens";
+    roles[Roles::TotalTokens] = "totalTokens";
    return roles;
 }

@@ -207,6 +213,7 @@ void ChatModel::clear()
    m_messages.clear();
    endResetModel();
    emit modelReseted();
+    emit sessionUsageChanged();
 }

 QList<MessagePart> ChatModel::processMessageContent(const QString &content) const
@@ -310,12 +317,6 @@ QJsonArray ChatModel::prepareMessagesForRequest(const QString &systemPrompt) con
    return messages;
 }

-int ChatModel::tokensThreshold() const
-{
-    auto &settings = Settings::chatAssistantSettings();
-    return settings.chatTokensThreshold();
-}
-
 QString ChatModel::lastMessageId() const
 {
    return !m_messages.isEmpty() ? m_messages.last().id : "";
@@ -330,6 +331,7 @@ void ChatModel::resetModelTo(int index)
        beginRemoveRows(QModelIndex(), index, m_messages.size() - 1);
        m_messages.remove(index, m_messages.size() - index);
        endRemoveRows();
+        emit sessionUsageChanged();
    }
 }

@@ -507,6 +509,54 @@ void ChatModel::updateMessageContent(const QString &messageId, const QString &ne
    }
 }

+void ChatModel::setMessageUsage(
+    const QString &messageId,
+    int promptTokens,
+    int completionTokens,
+    int cachedPromptTokens,
+    int reasoningTokens)
+{
+    for (int i = 0; i < m_messages.size(); ++i) {
+        if (m_messages[i].id != messageId)
+            continue;
+        m_messages[i].promptTokens = promptTokens;
+        m_messages[i].completionTokens = completionTokens;
+        m_messages[i].cachedPromptTokens = cachedPromptTokens;
+        m_messages[i].reasoningTokens = reasoningTokens;
+        emit dataChanged(
+            index(i),
+            index(i),
+            {Roles::PromptTokens,
+             Roles::CompletionTokens,
+             Roles::CachedPromptTokens,
+             Roles::ReasoningTokens,
+             Roles::TotalTokens});
+        emit sessionUsageChanged();
+        return;
+    }
+}
+
+int ChatModel::sessionPromptTokens() const
+{
+    int total = 0;
+    for (const auto &m : m_messages)
+        total += m.promptTokens;
+    return total;
+}
+
+int ChatModel::sessionCompletionTokens() const
+{
+    int total = 0;
+    for (const auto &m : m_messages)
+        total += m.completionTokens;
+    return total;
+}
+
+int ChatModel::sessionTotalTokens() const
+{
+    return sessionPromptTokens() + sessionCompletionTokens();
+}
+
 void ChatModel::setLoadingFromHistory(bool loading)
 {
    m_loadingFromHistory = loading;
--- a/ChatView/ChatModel.hpp
+++ b/ChatView/ChatModel.hpp
@@ -17,14 +17,27 @@ namespace QodeAssist::Chat {
 class ChatModel : public QAbstractListModel
 {
    Q_OBJECT
-    Q_PROPERTY(int tokensThreshold READ tokensThreshold NOTIFY tokensThresholdChanged FINAL)
+    Q_PROPERTY(int sessionPromptTokens READ sessionPromptTokens NOTIFY sessionUsageChanged FINAL)
+    Q_PROPERTY(int sessionCompletionTokens READ sessionCompletionTokens NOTIFY sessionUsageChanged FINAL)
+    Q_PROPERTY(int sessionTotalTokens READ sessionTotalTokens NOTIFY sessionUsageChanged FINAL)
    QML_ELEMENT

 public:
    enum ChatRole { System, User, Assistant, Tool, FileEdit, Thinking };
    Q_ENUM(ChatRole)

-    enum Roles { RoleType = Qt::UserRole, Content, Attachments, IsRedacted, Images };
+    enum Roles {
+        RoleType = Qt::UserRole,
+        Content,
+        Attachments,
+        IsRedacted,
+        Images,
+        PromptTokens,
+        CompletionTokens,
+        CachedPromptTokens,
+        ReasoningTokens,
+        TotalTokens
+    };
    Q_ENUM(Roles)

    struct ImageAttachment
@@ -44,6 +57,11 @@ public:

        QList<Context::ContentFile> attachments;
        QList<ImageAttachment> images;
+
+        int promptTokens = 0;
+        int completionTokens = 0;
+        int cachedPromptTokens = 0;
+        int reasoningTokens = 0;
    };

    explicit ChatModel(QObject *parent = nullptr);
@@ -66,8 +84,6 @@ public:
    QVector<Message> getChatHistory() const;
    QJsonArray prepareMessagesForRequest(const QString &systemPrompt) const;

-    int tokensThreshold() const;
-
    QString currentModel() const;
    QString lastMessageId() const;

@@ -84,6 +100,17 @@ public:
        const QString &requestId, const QString &thinking, const QString &signature);
    void addRedactedThinkingBlock(const QString &requestId, const QString &signature);
    void updateMessageContent(const QString &messageId, const QString &newContent);
+
+    void setMessageUsage(
+        const QString &messageId,
+        int promptTokens,
+        int completionTokens,
+        int cachedPromptTokens,
+        int reasoningTokens);
+
+    int sessionPromptTokens() const;
+    int sessionCompletionTokens() const;
+    int sessionTotalTokens() const;
    
    void setLoadingFromHistory(bool loading);
    bool isLoadingFromHistory() const;
@@ -92,8 +119,8 @@ public:
    QString chatFilePath() const;

 signals:
-    void tokensThresholdChanged();
    void modelReseted();
+    void sessionUsageChanged();

 private slots:
    void onFileEditApplied(const QString &editId);
--- a/ChatView/ChatRootView.cpp
+++ b/ChatView/ChatRootView.cpp
@@ -3,7 +3,12 @@

 #include "ChatRootView.hpp"

+#include <algorithm>
+
+#include <LLMQore/ToolsManager.hpp>
 #include <QClipboard>
+#include <QJsonArray>
+#include <QJsonDocument>
 #include <QDesktopServices>
 #include <QDir>
 #include <QFile>
@@ -31,7 +36,6 @@
 #include "Logger.hpp"
 #include "ProjectSettings.hpp"
 #include "ProvidersManager.hpp"
-#include "ToolsSettings.hpp"
 #include "context/ChangesManager.h"
 #include "context/ContextManager.hpp"
 #include "context/TokenUtils.hpp"
@@ -107,6 +111,22 @@ ChatRootView::ChatRootView(QQuickItem *parent)
        &Utils::BaseAspect::changed,
        this,
        &ChatRootView::updateInputTokensCount);
+    connect(this, &ChatRootView::useToolsChanged, this, &ChatRootView::updateInputTokensCount);
+    connect(
+        &Settings::chatAssistantSettings().enableChatTools,
+        &Utils::BaseAspect::changed,
+        this,
+        &ChatRootView::updateInputTokensCount);
+
+    rewireToolsChangedConnection();
+    connect(
+        &Settings::generalSettings().caProvider,
+        &Utils::BaseAspect::changed,
+        this,
+        [this]() {
+            rewireToolsChangedConnection();
+            updateInputTokensCount();
+        });
    connect(
        &Settings::chatAssistantSettings().systemPrompt,
        &Utils::BaseAspect::changed,
@@ -171,6 +191,28 @@ ChatRootView::ChatRootView(QQuickItem *parent)
        updateCurrentMessageEditsStats();
    });

+    connect(
+        m_clientInterface,
+        &ClientInterface::messageUsageReceived,
+        this,
+        [this](int promptTokens, int /*completionTokens*/, int /*cached*/, int /*reasoning*/) {
+            if (promptTokens <= 0 || m_lastSentEstimate <= 0)
+                return;
+
+            const double rawFactor
+                = static_cast<double>(promptTokens) / static_cast<double>(m_lastSentEstimate);
+            const double clamped = std::clamp(rawFactor, 0.5, 3.0);
+            m_calibrationFactor = 0.5 * m_calibrationFactor + 0.5 * clamped;
+
+            LOG_MESSAGE(QString("Token calibration: server=%1 estimated=%2 ratio=%3 ema=%4")
+                            .arg(promptTokens)
+                            .arg(m_lastSentEstimate)
+                            .arg(rawFactor, 0, 'f', 3)
+                            .arg(m_calibrationFactor, 0, 'f', 3));
+
+            updateInputTokensCount();
+        });
+
    connect(
        &Context::ChangesManager::instance(),
        &Context::ChangesManager::fileEditAdded,
@@ -247,7 +289,6 @@ ChatRootView::ChatRootView(QQuickItem *parent)
        emit lastErrorMessageChanged();
    });

-    // ChatCompressor signals
    connect(m_chatCompressor, &ChatCompressor::compressionStarted, this, [this]() {
        emit isCompressingChanged();
    });
@@ -259,6 +300,12 @@ ChatRootView::ChatRootView(QQuickItem *parent)
        emit compressionCompleted(compressedChatPath);

        loadHistory(compressedChatPath);
+
+        if (m_pendingSend.active) {
+            PendingSend p = m_pendingSend;
+            m_pendingSend = {};
+            dispatchSend(p.message, p.attachments, p.linkedFiles, p.useTools, p.useThinking);
+        }
    });

    connect(m_chatCompressor, &ChatCompressor::compressionFailed, this, [this](const QString &error) {
@@ -266,6 +313,12 @@ ChatRootView::ChatRootView(QQuickItem *parent)
        m_lastErrorMessage = error;
        emit lastErrorMessageChanged();
        emit compressionFailed(error);
+
+        if (m_pendingSend.active) {
+            PendingSend p = m_pendingSend;
+            m_pendingSend = {};
+            dispatchSend(p.message, p.attachments, p.linkedFiles, p.useTools, p.useThinking);
+        }
    });
 }

@@ -276,32 +329,72 @@ ChatModel *ChatRootView::chatModel() const

 void ChatRootView::sendMessage(const QString &message)
 {
-    if (m_inputTokensCount > m_chatModel->tokensThreshold()) {
-        QMessageBox::StandardButton reply = QMessageBox::question(
-            Core::ICore::dialogParent(),
-            tr("Token Limit Exceeded"),
-            tr("The chat history has exceeded the token limit.\n"
-               "Would you like to create new chat?"),
-            QMessageBox::Yes | QMessageBox::No);
+    const QStringList attachments = m_attachmentFiles;
+    const QStringList linkedFiles = m_linkedFiles;
+    const bool tools = useTools();
+    const bool thinking = useThinking();

-        if (reply == QMessageBox::Yes) {
-            autosave();
-            m_chatModel->clear();
-            setRecentFilePath(QString{});
-            return;
-        }
-    }
+    if (deferSendForAutoCompress(message, attachments, linkedFiles, tools, thinking))
+        return;
+
+    dispatchSend(message, attachments, linkedFiles, tools, thinking);
+}
+
+bool ChatRootView::deferSendForAutoCompress(
+    const QString &message,
+    const QStringList &attachments,
+    const QStringList &linkedFiles,
+    bool useToolsArg,
+    bool useThinkingArg)
+{
+    auto &settings = Settings::chatAssistantSettings();
+    if (!settings.autoCompress())
+        return false;
+
+    const int threshold = settings.autoCompressThreshold();
+    if (m_inputTokensCount < threshold)
+        return false;

    if (m_recentFilePath.isEmpty()) {
-        QString filePath = getAutosaveFilePath(message, m_attachmentFiles);
+        QString filePath = getAutosaveFilePath(message, attachments);
+        if (filePath.isEmpty())
+            return false;
+        setRecentFilePath(filePath);
+        LOG_MESSAGE(QString("Set chat file path for new chat (auto-compress): %1").arg(filePath));
+    }
+
+    if (m_chatCompressor->isCompressing() || m_pendingSend.active)
+        return false;
+
+    LOG_MESSAGE(QString("Auto-compress preempt: estimated next=%1 ≥ threshold=%2; deferring send")
+                    .arg(m_inputTokensCount)
+                    .arg(threshold));
+
+    m_pendingSend = {message, attachments, linkedFiles, useToolsArg, useThinkingArg, true};
+    compressCurrentChat();
+    return true;
+}
+
+void ChatRootView::dispatchSend(
+    const QString &message,
+    const QStringList &attachments,
+    const QStringList &linkedFiles,
+    bool useToolsArg,
+    bool useThinkingArg)
+{
+    if (m_recentFilePath.isEmpty()) {
+        QString filePath = getAutosaveFilePath(message, attachments);
        if (!filePath.isEmpty()) {
            setRecentFilePath(filePath);
            LOG_MESSAGE(QString("Set chat file path for new chat: %1").arg(filePath));
        }
    }

-    m_clientInterface
-        ->sendMessage(message, m_attachmentFiles, m_linkedFiles, useTools(), useThinking());
+    m_lastSentEstimate = m_calibrationFactor > 0.0
+                             ? static_cast<int>(m_inputTokensCount / m_calibrationFactor)
+                             : m_inputTokensCount;
+
+    m_clientInterface->sendMessage(message, attachments, linkedFiles, useToolsArg, useThinkingArg);

    m_fileManager->clearIntermediateStorage();
    clearAttachmentFiles();
@@ -392,7 +485,8 @@ void ChatRootView::loadHistory(const QString &filePath)
        setRecentFilePath(filePath);
    }

-    m_fileManager->clearIntermediateStorage();
+    if (!m_pendingSend.active)
+        m_fileManager->clearIntermediateStorage();
    m_attachmentFiles.clear();
    m_linkedFiles.clear();
    emit attachmentFilesChanged();
@@ -747,6 +841,27 @@ void ChatRootView::openFileInEditor(const QString &filePath)
    Core::EditorManager::openEditor(Utils::FilePath::fromString(filePath));
 }

+void ChatRootView::rewireToolsChangedConnection()
+{
+    if (m_toolsChangedConn)
+        QObject::disconnect(m_toolsChangedConn);
+    m_toolsChangedConn = {};
+
+    const auto providerName = Settings::generalSettings().caProvider();
+    auto *provider = PluginLLMCore::ProvidersManager::instance().getProviderByName(providerName);
+    if (!provider)
+        return;
+    auto *tm = provider->toolsManager();
+    if (!tm)
+        return;
+
+    m_toolsChangedConn = connect(
+        tm,
+        &::LLMQore::ToolRegistry::toolsChanged,
+        this,
+        &ChatRootView::updateInputTokensCount);
+}
+
 void ChatRootView::updateInputTokensCount()
 {
    int inputTokens = m_messageTokensCount;
@@ -756,14 +871,33 @@ void ChatRootView::updateInputTokensCount()
        inputTokens += Context::TokenUtils::estimateTokens(settings.systemPrompt());
    }

+    const auto splitImageEstimate = [](const QStringList &paths, QStringList &textPaths) {
+        int imageTokens = 0;
+        for (const QString &p : paths) {
+            if (Context::TokenUtils::isImageFilePath(p))
+                imageTokens += Context::TokenUtils::estimateImageAttachmentTokens(p);
+            else
+                textPaths.append(p);
+        }
+        return imageTokens;
+    };
+
    if (!m_attachmentFiles.isEmpty()) {
-        auto attachFiles = m_clientInterface->contextManager()->getContentFiles(m_attachmentFiles);
-        inputTokens += Context::TokenUtils::estimateFilesTokens(attachFiles);
+        QStringList textPaths;
+        inputTokens += splitImageEstimate(m_attachmentFiles, textPaths);
+        if (!textPaths.isEmpty()) {
+            auto attachFiles = m_clientInterface->contextManager()->getContentFiles(textPaths);
+            inputTokens += Context::TokenUtils::estimateFilesTokens(attachFiles);
+        }
    }

    if (!m_linkedFiles.isEmpty()) {
-        auto linkFiles = m_clientInterface->contextManager()->getContentFiles(m_linkedFiles);
-        inputTokens += Context::TokenUtils::estimateFilesTokens(linkFiles);
+        QStringList textPaths;
+        inputTokens += splitImageEstimate(m_linkedFiles, textPaths);
+        if (!textPaths.isEmpty()) {
+            auto linkFiles = m_clientInterface->contextManager()->getContentFiles(textPaths);
+            inputTokens += Context::TokenUtils::estimateFilesTokens(linkFiles);
+        }
    }

    const auto &history = m_chatModel->getChatHistory();
@@ -772,7 +906,22 @@ void ChatRootView::updateInputTokensCount()
        inputTokens += 4; // + role
    }

-    m_inputTokensCount = inputTokens;
+    if (useTools()) {
+        const auto providerName = Settings::generalSettings().caProvider();
+        if (auto *provider = PluginLLMCore::ProvidersManager::instance().getProviderByName(
+                providerName)) {
+            if (auto *tm = provider->toolsManager()) {
+                const QJsonArray toolDefs = tm->getToolsDefinitions();
+                if (!toolDefs.isEmpty()) {
+                    const QByteArray serialized
+                        = QJsonDocument(toolDefs).toJson(QJsonDocument::Compact);
+                    inputTokens += static_cast<int>(serialized.size() / 4);
+                }
+            }
+        }
+    }
+
+    m_inputTokensCount = static_cast<int>(inputTokens * m_calibrationFactor);
    emit inputTokensCountChanged();
 }

--- a/ChatView/ChatRootView.hpp
+++ b/ChatView/ChatRootView.hpp
@@ -212,6 +212,21 @@ signals:
    void openFilesChanged();

 private:
+    void rewireToolsChangedConnection();
+    QMetaObject::Connection m_toolsChangedConn;
+
+    bool deferSendForAutoCompress(
+        const QString &message,
+        const QStringList &attachments,
+        const QStringList &linkedFiles,
+        bool useTools,
+        bool useThinking);
+    void dispatchSend(
+        const QString &message,
+        const QStringList &attachments,
+        const QStringList &linkedFiles,
+        bool useTools,
+        bool useThinking);
    void updateFileEditStatus(const QString &editId, const QString &status);
    QString getChatsHistoryDir() const;
    QString getSuggestedFileName() const;
@@ -228,6 +243,18 @@ private:
    QStringList m_linkedFiles;
    int m_messageTokensCount{0};
    int m_inputTokensCount{0};
+    int m_lastSentEstimate{0};
+    double m_calibrationFactor{1.0};
+
+    struct PendingSend {
+        QString message;
+        QStringList attachments;
+        QStringList linkedFiles;
+        bool useTools = false;
+        bool useThinking = false;
+        bool active = false;
+    };
+    PendingSend m_pendingSend;
    bool m_isSyncOpenFiles;
    QList<Core::IEditor *> m_currentEditors;
    bool m_isRequestInProgress;
--- a/ChatView/ChatSerializer.cpp
+++ b/ChatView/ChatSerializer.cpp
@@ -103,6 +103,17 @@ QJsonObject ChatSerializer::serializeMessage(
        messageObj["images"] = imagesArray;
    }

+    if (message.promptTokens > 0 || message.completionTokens > 0) {
+        QJsonObject usageObj;
+        usageObj["promptTokens"] = message.promptTokens;
+        usageObj["completionTokens"] = message.completionTokens;
+        if (message.cachedPromptTokens > 0)
+            usageObj["cachedPromptTokens"] = message.cachedPromptTokens;
+        if (message.reasoningTokens > 0)
+            usageObj["reasoningTokens"] = message.reasoningTokens;
+        messageObj["usage"] = usageObj;
+    }
+
    return messageObj;
 }

@@ -139,6 +150,14 @@ ChatModel::Message ChatSerializer::deserializeMessage(
        }
    }

+    if (json.contains("usage")) {
+        const QJsonObject usageObj = json["usage"].toObject();
+        message.promptTokens = usageObj["promptTokens"].toInt();
+        message.completionTokens = usageObj["completionTokens"].toInt();
+        message.cachedPromptTokens = usageObj["cachedPromptTokens"].toInt();
+        message.reasoningTokens = usageObj["reasoningTokens"].toInt();
+    }
+
    return message;
 }

--- a/ChatView/ClientInterface.cpp
+++ b/ChatView/ClientInterface.cpp
@@ -257,6 +257,12 @@ void ClientInterface::sendMessage(
        this,
        &ClientInterface::handleFullResponse,
        Qt::UniqueConnection);
+    connect(
+        provider->client(),
+        &::LLMQore::BaseClient::requestFinalized,
+        this,
+        &ClientInterface::handleRequestFinalized,
+        Qt::UniqueConnection);
    connect(
        provider->client(),
        &::LLMQore::BaseClient::requestFailed,
@@ -449,6 +455,29 @@ void ClientInterface::handleFullResponse(const QString &requestId, const QString
    m_awaitingContinuation.remove(requestId);
 }

+void ClientInterface::handleRequestFinalized(
+    const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info)
+{
+    if (!m_activeRequests.contains(requestId))
+        return;
+    if (!info.usage)
+        return;
+
+    const auto &u = *info.usage;
+    m_chatModel->setMessageUsage(
+        requestId, u.promptTokens, u.completionTokens, u.cachedPromptTokens, u.reasoningTokens);
+
+    emit messageUsageReceived(
+        u.promptTokens, u.completionTokens, u.cachedPromptTokens, u.reasoningTokens);
+
+    LOG_MESSAGE(QString("Chat usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5")
+                    .arg(requestId)
+                    .arg(u.promptTokens)
+                    .arg(u.completionTokens)
+                    .arg(u.cachedPromptTokens)
+                    .arg(u.reasoningTokens));
+}
+
 void ClientInterface::handleRequestFailed(const QString &requestId, const QString &error)
 {
    auto it = m_activeRequests.find(requestId);
--- a/ChatView/ClientInterface.hpp
+++ b/ChatView/ClientInterface.hpp
@@ -11,6 +11,7 @@
 #include "ChatModel.hpp"
 #include "Provider.hpp"
 #include "pluginllmcore/IPromptProvider.hpp"
+#include <LLMQore/BaseClient.hpp>
 #include <context/ContextManager.hpp>

 namespace QodeAssist::Chat {
@@ -42,10 +43,13 @@ signals:
    void errorOccurred(const QString &error);
    void messageReceivedCompletely();
    void requestStarted(const QString &requestId);
+    void messageUsageReceived(
+        int promptTokens, int completionTokens, int cachedPromptTokens, int reasoningTokens);

 private slots:
    void handlePartialResponse(const QString &requestId, const QString &partialText);
    void handleFullResponse(const QString &requestId, const QString &fullText);
+    void handleRequestFinalized(const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info);
    void handleRequestFailed(const QString &requestId, const QString &error);
    void handleThinkingBlockReceived(
        const QString &requestId, const QString &thinking, const QString &signature);
--- a/ChatView/qml/RootItem.qml
+++ b/ChatView/qml/RootItem.qml
@@ -91,7 +91,10 @@ ChatRootView {
            loadButton.onClicked: root.showLoadDialog()
            clearButton.onClicked: root.clearChat()
            tokensBadge {
-                text: qsTr("%1/%2").arg(root.inputTokensCount).arg(root.chatModel.tokensThreshold)
+                text: qsTr("next ~%1  ·  session ↑%2 ↓%3")
+                          .arg(root.inputTokensCount)
+                          .arg(root.chatModel.sessionPromptTokens)
+                          .arg(root.chatModel.sessionCompletionTokens)
            }
            recentPath {
                text: qsTr("Сhat name: %1").arg(root.chatFileName.length > 0 ? root.chatFileName : "Unsaved")
@@ -270,6 +273,10 @@ ChatRootView {
                    codeFontSize: root.codeFontSize
                    textFontSize: root.textFontSize
                    textFormat: root.textFormat
+                    promptTokens: model.promptTokens || 0
+                    completionTokens: model.completionTokens || 0
+                    cachedPromptTokens: model.cachedPromptTokens || 0
+                    reasoningTokens: model.reasoningTokens || 0

                    onResetChatToMessage: function(idx) {
                        messageInput.text = model.content
--- a/ChatView/qml/chatparts/ChatItem.qml
+++ b/ChatView/qml/chatparts/ChatItem.qml
@@ -34,6 +34,11 @@ Rectangle {
    property bool isUserMessage: false
    property int messageIndex: -1

+    property int promptTokens: 0
+    property int completionTokens: 0
+    property int cachedPromptTokens: 0
+    property int reasoningTokens: 0
+
    signal resetChatToMessage(int index)
    signal openFileRequested(string filePath)

@@ -135,6 +140,39 @@ Rectangle {
                }
            }
        }
+
+        RowLayout {
+            id: usageBadge
+
+            Layout.fillWidth: true
+            Layout.leftMargin: 10
+            Layout.rightMargin: 10
+            spacing: 8
+            visible: !root.isUserMessage
+                     && (root.promptTokens > 0 || root.completionTokens > 0)
+
+            Item { Layout.fillWidth: true }
+
+            Text {
+                text: root.cachedPromptTokens > 0
+                          ? qsTr("↑ %1 (cached %2)").arg(root.promptTokens).arg(root.cachedPromptTokens)
+                          : qsTr("↑ %1").arg(root.promptTokens)
+                color: palette.placeholderText
+                font.pointSize: Math.max(root.textFontSize - 2, 7)
+            }
+            Text {
+                text: root.reasoningTokens > 0
+                          ? qsTr("↓ %1 (reasoning %2)").arg(root.completionTokens).arg(root.reasoningTokens)
+                          : qsTr("↓ %1").arg(root.completionTokens)
+                color: palette.placeholderText
+                font.pointSize: Math.max(root.textFontSize - 2, 7)
+            }
+            Text {
+                text: qsTr("Σ %1").arg(root.promptTokens + root.completionTokens)
+                color: palette.placeholderText
+                font.pointSize: Math.max(root.textFontSize - 2, 7)
+            }
+        }
    }

    Rectangle {
--- a/LLMClientInterface.cpp
+++ b/LLMClientInterface.cpp
@@ -63,6 +63,21 @@ void LLMClientInterface::handleFullResponse(const QString &requestId, const QStr
    m_performanceLogger.endTimeMeasurement(requestId);
 }

+void LLMClientInterface::handleRequestFinalized(
+    const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info)
+{
+    if (!m_activeRequests.contains(requestId) || !info.usage)
+        return;
+
+    const auto &u = *info.usage;
+    LOG_MESSAGE(QString("Completion usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5")
+                    .arg(requestId)
+                    .arg(u.promptTokens)
+                    .arg(u.completionTokens)
+                    .arg(u.cachedPromptTokens)
+                    .arg(u.reasoningTokens));
+}
+
 void LLMClientInterface::handleRequestFailed(const QString &requestId, const QString &error)
 {
    auto it = m_activeRequests.find(requestId);
@@ -325,6 +340,12 @@ void LLMClientInterface::handleCompletion(const QJsonObject &request)
        this,
        &LLMClientInterface::handleFullResponse,
        Qt::UniqueConnection);
+    connect(
+        provider->client(),
+        &::LLMQore::BaseClient::requestFinalized,
+        this,
+        &LLMClientInterface::handleRequestFinalized,
+        Qt::UniqueConnection);
    connect(
        provider->client(),
        &::LLMQore::BaseClient::requestFailed,
--- a/LLMClientInterface.hpp
+++ b/LLMClientInterface.hpp
@@ -3,6 +3,7 @@

 #pragma once

+#include <LLMQore/BaseClient.hpp>
 #include <languageclient/languageclientinterface.h>
 #include <texteditor/texteditor.h>

@@ -52,6 +53,8 @@ protected:

 private slots:
    void handleFullResponse(const QString &requestId, const QString &fullText);
+    void handleRequestFinalized(
+        const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info);
    void handleRequestFailed(const QString &requestId, const QString &error);

 private:
--- a/QuickRefactorHandler.cpp
+++ b/QuickRefactorHandler.cpp
@@ -152,6 +152,13 @@ void QuickRefactorHandler::prepareAndSendRequest(
        &QuickRefactorHandler::handleFullResponse,
        Qt::UniqueConnection);

+    connect(
+        provider->client(),
+        &::LLMQore::BaseClient::requestFinalized,
+        this,
+        &QuickRefactorHandler::handleRequestFinalized,
+        Qt::UniqueConnection);
+
    connect(
        provider->client(),
        &::LLMQore::BaseClient::requestFailed,
@@ -408,6 +415,22 @@ void QuickRefactorHandler::handleFullResponse(const QString &requestId, const QS
    }
 }

+void QuickRefactorHandler::handleRequestFinalized(
+    const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info)
+{
+    if (requestId != m_lastRequestId || !info.usage)
+        return;
+
+    const auto &u = *info.usage;
+    LOG_MESSAGE(
+        QString("Quick refactor usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5")
+            .arg(requestId)
+            .arg(u.promptTokens)
+            .arg(u.completionTokens)
+            .arg(u.cachedPromptTokens)
+            .arg(u.reasoningTokens));
+}
+
 void QuickRefactorHandler::handleRequestFailed(const QString &requestId, const QString &error)
 {
    if (requestId == m_lastRequestId) {
--- a/QuickRefactorHandler.hpp
+++ b/QuickRefactorHandler.hpp
@@ -6,6 +6,7 @@
 #include <QJsonObject>
 #include <QObject>

+#include <LLMQore/BaseClient.hpp>
 #include <texteditor/texteditor.h>
 #include <utils/textutils.h>

@@ -43,6 +44,8 @@ signals:

 private slots:
    void handleFullResponse(const QString &requestId, const QString &fullText);
+    void handleRequestFinalized(
+        const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info);
    void handleRequestFailed(const QString &requestId, const QString &error);

 private:
--- a/context/CMakeLists.txt
+++ b/context/CMakeLists.txt
@@ -15,6 +15,7 @@ add_library(Context STATIC
 target_link_libraries(Context
    PUBLIC
    Qt::Core
+    Qt::Gui
    QtCreator::Core
    QtCreator::TextEditor
    QtCreator::Utils
--- a/context/TokenUtils.cpp
+++ b/context/TokenUtils.cpp
@@ -3,6 +3,14 @@

 #include "TokenUtils.hpp"

+#include <QFileInfo>
+#include <QImageReader>
+#include <QSet>
+#include <QSize>
+
+#include <algorithm>
+#include <cmath>
+
 namespace QodeAssist::Context {

 int TokenUtils::estimateTokens(const QString &text)
@@ -15,8 +23,48 @@ int TokenUtils::estimateTokens(const QString &text)
    return text.length() / 4;
 }

+bool TokenUtils::isImageFilePath(const QString &filePath)
+{
+    static const QSet<QString> imageExtensions = {"png", "jpg", "jpeg", "gif", "webp", "bmp"};
+    return imageExtensions.contains(QFileInfo(filePath).suffix().toLower());
+}
+
+int TokenUtils::estimateImageAttachmentTokens(const QString &filePath)
+{
+    QImageReader reader(filePath);
+    QSize size = reader.size();
+    if (!size.isValid() || size.isEmpty())
+        return 1500;
+
+    double w = size.width();
+    double h = size.height();
+
+    const double longSide = std::max(w, h);
+    if (longSide > 2048.0) {
+        const double s = 2048.0 / longSide;
+        w *= s;
+        h *= s;
+    }
+
+    const double shortSide = std::min(w, h);
+    if (shortSide > 768.0) {
+        const double s = 768.0 / shortSide;
+        w *= s;
+        h *= s;
+    }
+
+    const int tilesW = static_cast<int>(std::ceil(w / 512.0));
+    const int tilesH = static_cast<int>(std::ceil(h / 512.0));
+    const int tiles = std::max(1, tilesW * tilesH);
+
+    return 85 + tiles * 170;
+}
+
 int TokenUtils::estimateFileTokens(const Context::ContentFile &file)
 {
+    if (isImageFilePath(file.filename))
+        return estimateImageAttachmentTokens(QString());
+
    int total = 0;

    total += estimateTokens(file.filename);
--- a/context/TokenUtils.hpp
+++ b/context/TokenUtils.hpp
@@ -15,6 +15,8 @@ public:
    static int estimateTokens(const QString &text);
    static int estimateFileTokens(const Context::ContentFile &file);
    static int estimateFilesTokens(const QList<Context::ContentFile> &files);
+    static bool isImageFilePath(const QString &filePath);
+    static int estimateImageAttachmentTokens(const QString &filePath);
 };

 } // namespace QodeAssist::Context
--- a/settings/ChatAssistantSettings.cpp
+++ b/settings/ChatAssistantSettings.cpp
@@ -29,14 +29,6 @@ ChatAssistantSettings::ChatAssistantSettings()

    setDisplayName(Tr::tr("Chat Assistant"));

-    // Chat Settings
-    chatTokensThreshold.setSettingsKey(Constants::CA_TOKENS_THRESHOLD);
-    chatTokensThreshold.setLabelText(Tr::tr("Chat history token limit:"));
-    chatTokensThreshold.setToolTip(Tr::tr("Maximum number of tokens in chat history. When "
-                                          "exceeded, oldest messages will be removed."));
-    chatTokensThreshold.setRange(1, 99999999);
-    chatTokensThreshold.setDefaultValue(20000);
-
    linkOpenFiles.setSettingsKey(Constants::CA_LINK_OPEN_FILES);
    linkOpenFiles.setLabelText(Tr::tr("Sync open files with assistant by default"));
    linkOpenFiles.setDefaultValue(false);
@@ -58,6 +50,18 @@ ChatAssistantSettings::ChatAssistantSettings()
    enableChatTools.setToolTip(Tr::tr("When enabled, AI can use tools to read files, search project, and build code"));
    enableChatTools.setDefaultValue(false);

+    autoCompress.setSettingsKey(Constants::CA_AUTO_COMPRESS);
+    autoCompress.setLabelText(Tr::tr("Auto-compress chat when session tokens exceed:"));
+    autoCompress.setToolTip(Tr::tr(
+        "After each assistant response, if the running session token total exceeds the "
+        "threshold, the chat is summarized and a new compressed chat is started "
+        "automatically. The original chat is preserved on disk."));
+    autoCompress.setDefaultValue(false);
+
+    autoCompressThreshold.setSettingsKey(Constants::CA_AUTO_COMPRESS_THRESHOLD);
+    autoCompressThreshold.setRange(1000, 99999999);
+    autoCompressThreshold.setDefaultValue(40000);
+
    // General Parameters Settings
    temperature.setSettingsKey(Constants::CA_TEMPERATURE);
    temperature.setLabelText(Tr::tr("Temperature:"));
@@ -292,11 +296,11 @@ ChatAssistantSettings::ChatAssistantSettings()
            Group{
                title(Tr::tr("Chat Settings")),
                Column{
-                    Row{chatTokensThreshold, Stretch{1}},
                    linkOpenFiles,
                    autosave,
                    enableChatInBottomToolBar,
-                    enableChatInNavigationPanel}},
+                    enableChatInNavigationPanel,
+                    Row{autoCompress, autoCompressThreshold, Stretch{1}}}},
            Space{8},
            Group{
                title(Tr::tr("Tools")),
@@ -348,7 +352,8 @@ void ChatAssistantSettings::resetSettingsToDefaults()
        QMessageBox::Yes | QMessageBox::No);

    if (reply == QMessageBox::Yes) {
-        resetAspect(chatTokensThreshold);
+        resetAspect(autoCompress);
+        resetAspect(autoCompressThreshold);
        resetAspect(temperature);
        resetAspect(maxTokens);
        resetAspect(useTopP);
--- a/settings/ChatAssistantSettings.hpp
+++ b/settings/ChatAssistantSettings.hpp
@@ -18,12 +18,13 @@ public:
    ButtonAspect resetToDefaults{this};

    // Chat settings
-    Utils::IntegerAspect chatTokensThreshold{this};
    Utils::BoolAspect linkOpenFiles{this};
    Utils::BoolAspect autosave{this};
    Utils::BoolAspect enableChatInBottomToolBar{this};
    Utils::BoolAspect enableChatInNavigationPanel{this};
    Utils::BoolAspect enableChatTools{this};
+    Utils::BoolAspect autoCompress{this};
+    Utils::IntegerAspect autoCompressThreshold{this};

    // General Parameters Settings
    Utils::DoubleAspect temperature{this};
--- a/settings/SettingsConstants.hpp
+++ b/settings/SettingsConstants.hpp
@@ -78,7 +78,8 @@ const char MAX_FILE_THRESHOLD[] = "QodeAssist.maxFileThreshold";
 const char CC_MULTILINE_COMPLETION[] = "QodeAssist.ccMultilineCompletion";
 const char CC_MODEL_OUTPUT_HANDLER[] = "QodeAssist.ccModelOutputHandler";
 const char CA_AUTO_APPLY_FILE_EDITS[] = "QodeAssist.caAutoApplyFileEdits";
-const char CA_TOKENS_THRESHOLD[] = "QodeAssist.caTokensThreshold";
+const char CA_AUTO_COMPRESS[] = "QodeAssist.caAutoCompress";
+const char CA_AUTO_COMPRESS_THRESHOLD[] = "QodeAssist.caAutoCompressThreshold";
 const char CA_LINK_OPEN_FILES[] = "QodeAssist.caLinkOpenFiles";
 const char CA_AUTOSAVE[] = "QodeAssist.caAutosave";
 const char CC_CUSTOM_LANGUAGES[] = "QodeAssist.ccCustomLanguages";
--- a/sources/external/llmqore
+++ b/sources/external/llmqore