diff --git a/ChatView/ChatModel.cpp b/ChatView/ChatModel.cpp
index cc8aa4b..90619e0 100644
--- a/ChatView/ChatModel.cpp
+++ b/ChatView/ChatModel.cpp
@@ -11,7 +11,6 @@
 #include <QUrl>
 #include <QtQml>
 
-#include "ChatAssistantSettings.hpp"
 #include "Logger.hpp"
 #include "context/ChangesManager.h"
 
@@ -20,14 +19,6 @@ namespace QodeAssist::Chat {
 ChatModel::ChatModel(QObject *parent)
     : QAbstractListModel(parent)
 {
-    auto &settings = Settings::chatAssistantSettings();
-
-    connect(
-        &settings.chatTokensThreshold,
-        &Utils::BaseAspect::changed,
-        this,
-        &ChatModel::tokensThresholdChanged);
-    
     connect(&Context::ChangesManager::instance(),
             &Context::ChangesManager::fileEditApplied,
             this,
@@ -86,6 +77,16 @@ QVariant ChatModel::data(const QModelIndex &index, int role) const
     case Roles::IsRedacted: {
         return message.isRedacted;
     }
+    case Roles::PromptTokens:
+        return message.promptTokens;
+    case Roles::CompletionTokens:
+        return message.completionTokens;
+    case Roles::CachedPromptTokens:
+        return message.cachedPromptTokens;
+    case Roles::ReasoningTokens:
+        return message.reasoningTokens;
+    case Roles::TotalTokens:
+        return message.promptTokens + message.completionTokens;
     case Roles::Images: {
         QVariantList imagesList;
         for (const auto &image : message.images) {
@@ -124,6 +125,11 @@ QHash<int, QByteArray> ChatModel::roleNames() const
     roles[Roles::Attachments] = "attachments";
     roles[Roles::IsRedacted] = "isRedacted";
     roles[Roles::Images] = "images";
+    roles[Roles::PromptTokens] = "promptTokens";
+    roles[Roles::CompletionTokens] = "completionTokens";
+    roles[Roles::CachedPromptTokens] = "cachedPromptTokens";
+    roles[Roles::ReasoningTokens] = "reasoningTokens";
+    roles[Roles::TotalTokens] = "totalTokens";
     return roles;
 }
 
@@ -207,6 +213,7 @@ void ChatModel::clear()
     m_messages.clear();
     endResetModel();
     emit modelReseted();
+    emit sessionUsageChanged();
 }
 
 QList<MessagePart> ChatModel::processMessageContent(const QString &content) const
@@ -310,12 +317,6 @@ QJsonArray ChatModel::prepareMessagesForRequest(const QString &systemPrompt) con
     return messages;
 }
 
-int ChatModel::tokensThreshold() const
-{
-    auto &settings = Settings::chatAssistantSettings();
-    return settings.chatTokensThreshold();
-}
-
 QString ChatModel::lastMessageId() const
 {
     return !m_messages.isEmpty() ? m_messages.last().id : "";
@@ -330,6 +331,7 @@ void ChatModel::resetModelTo(int index)
         beginRemoveRows(QModelIndex(), index, m_messages.size() - 1);
         m_messages.remove(index, m_messages.size() - index);
         endRemoveRows();
+        emit sessionUsageChanged();
     }
 }
 
@@ -507,6 +509,54 @@ void ChatModel::updateMessageContent(const QString &messageId, const QString &ne
     }
 }
 
+void ChatModel::setMessageUsage(
+    const QString &messageId,
+    int promptTokens,
+    int completionTokens,
+    int cachedPromptTokens,
+    int reasoningTokens)
+{
+    for (int i = 0; i < m_messages.size(); ++i) {
+        if (m_messages[i].id != messageId)
+            continue;
+        m_messages[i].promptTokens = promptTokens;
+        m_messages[i].completionTokens = completionTokens;
+        m_messages[i].cachedPromptTokens = cachedPromptTokens;
+        m_messages[i].reasoningTokens = reasoningTokens;
+        emit dataChanged(
+            index(i),
+            index(i),
+            {Roles::PromptTokens,
+             Roles::CompletionTokens,
+             Roles::CachedPromptTokens,
+             Roles::ReasoningTokens,
+             Roles::TotalTokens});
+        emit sessionUsageChanged();
+        return;
+    }
+}
+
+int ChatModel::sessionPromptTokens() const
+{
+    int total = 0;
+    for (const auto &m : m_messages)
+        total += m.promptTokens;
+    return total;
+}
+
+int ChatModel::sessionCompletionTokens() const
+{
+    int total = 0;
+    for (const auto &m : m_messages)
+        total += m.completionTokens;
+    return total;
+}
+
+int ChatModel::sessionTotalTokens() const
+{
+    return sessionPromptTokens() + sessionCompletionTokens();
+}
+
 void ChatModel::setLoadingFromHistory(bool loading)
 {
     m_loadingFromHistory = loading;
diff --git a/ChatView/ChatModel.hpp b/ChatView/ChatModel.hpp
index dd7c5ad..3f5a56b 100644
--- a/ChatView/ChatModel.hpp
+++ b/ChatView/ChatModel.hpp
@@ -17,14 +17,27 @@ namespace QodeAssist::Chat {
 class ChatModel : public QAbstractListModel
 {
     Q_OBJECT
-    Q_PROPERTY(int tokensThreshold READ tokensThreshold NOTIFY tokensThresholdChanged FINAL)
+    Q_PROPERTY(int sessionPromptTokens READ sessionPromptTokens NOTIFY sessionUsageChanged FINAL)
+    Q_PROPERTY(int sessionCompletionTokens READ sessionCompletionTokens NOTIFY sessionUsageChanged FINAL)
+    Q_PROPERTY(int sessionTotalTokens READ sessionTotalTokens NOTIFY sessionUsageChanged FINAL)
     QML_ELEMENT
 
 public:
     enum ChatRole { System, User, Assistant, Tool, FileEdit, Thinking };
     Q_ENUM(ChatRole)
 
-    enum Roles { RoleType = Qt::UserRole, Content, Attachments, IsRedacted, Images };
+    enum Roles {
+        RoleType = Qt::UserRole,
+        Content,
+        Attachments,
+        IsRedacted,
+        Images,
+        PromptTokens,
+        CompletionTokens,
+        CachedPromptTokens,
+        ReasoningTokens,
+        TotalTokens
+    };
     Q_ENUM(Roles)
 
     struct ImageAttachment
@@ -44,6 +57,11 @@ public:
 
         QList<Context::ContentFile> attachments;
         QList<ImageAttachment> images;
+
+        int promptTokens = 0;
+        int completionTokens = 0;
+        int cachedPromptTokens = 0;
+        int reasoningTokens = 0;
     };
 
     explicit ChatModel(QObject *parent = nullptr);
@@ -66,8 +84,6 @@ public:
     QVector<Message> getChatHistory() const;
     QJsonArray prepareMessagesForRequest(const QString &systemPrompt) const;
 
-    int tokensThreshold() const;
-
     QString currentModel() const;
     QString lastMessageId() const;
 
@@ -84,6 +100,17 @@ public:
         const QString &requestId, const QString &thinking, const QString &signature);
     void addRedactedThinkingBlock(const QString &requestId, const QString &signature);
     void updateMessageContent(const QString &messageId, const QString &newContent);
+
+    void setMessageUsage(
+        const QString &messageId,
+        int promptTokens,
+        int completionTokens,
+        int cachedPromptTokens,
+        int reasoningTokens);
+
+    int sessionPromptTokens() const;
+    int sessionCompletionTokens() const;
+    int sessionTotalTokens() const;
     
     void setLoadingFromHistory(bool loading);
     bool isLoadingFromHistory() const;
@@ -92,8 +119,8 @@ public:
     QString chatFilePath() const;
 
 signals:
-    void tokensThresholdChanged();
     void modelReseted();
+    void sessionUsageChanged();
 
 private slots:
     void onFileEditApplied(const QString &editId);
diff --git a/ChatView/ChatRootView.cpp b/ChatView/ChatRootView.cpp
index d07e1bc..3a2ea0f 100644
--- a/ChatView/ChatRootView.cpp
+++ b/ChatView/ChatRootView.cpp
@@ -3,7 +3,12 @@
 
 #include "ChatRootView.hpp"
 
+#include <algorithm>
+
+#include <LLMQore/ToolsManager.hpp>
 #include <QClipboard>
+#include <QJsonArray>
+#include <QJsonDocument>
 #include <QDesktopServices>
 #include <QDir>
 #include <QFile>
@@ -31,7 +36,6 @@
 #include "Logger.hpp"
 #include "ProjectSettings.hpp"
 #include "ProvidersManager.hpp"
-#include "ToolsSettings.hpp"
 #include "context/ChangesManager.h"
 #include "context/ContextManager.hpp"
 #include "context/TokenUtils.hpp"
@@ -107,6 +111,22 @@ ChatRootView::ChatRootView(QQuickItem *parent)
         &Utils::BaseAspect::changed,
         this,
         &ChatRootView::updateInputTokensCount);
+    connect(this, &ChatRootView::useToolsChanged, this, &ChatRootView::updateInputTokensCount);
+    connect(
+        &Settings::chatAssistantSettings().enableChatTools,
+        &Utils::BaseAspect::changed,
+        this,
+        &ChatRootView::updateInputTokensCount);
+
+    rewireToolsChangedConnection();
+    connect(
+        &Settings::generalSettings().caProvider,
+        &Utils::BaseAspect::changed,
+        this,
+        [this]() {
+            rewireToolsChangedConnection();
+            updateInputTokensCount();
+        });
     connect(
         &Settings::chatAssistantSettings().systemPrompt,
         &Utils::BaseAspect::changed,
@@ -171,6 +191,28 @@ ChatRootView::ChatRootView(QQuickItem *parent)
         updateCurrentMessageEditsStats();
     });
 
+    connect(
+        m_clientInterface,
+        &ClientInterface::messageUsageReceived,
+        this,
+        [this](int promptTokens, int /*completionTokens*/, int /*cached*/, int /*reasoning*/) {
+            if (promptTokens <= 0 || m_lastSentEstimate <= 0)
+                return;
+
+            const double rawFactor
+                = static_cast<double>(promptTokens) / static_cast<double>(m_lastSentEstimate);
+            const double clamped = std::clamp(rawFactor, 0.5, 3.0);
+            m_calibrationFactor = 0.5 * m_calibrationFactor + 0.5 * clamped;
+
+            LOG_MESSAGE(QString("Token calibration: server=%1 estimated=%2 ratio=%3 ema=%4")
+                            .arg(promptTokens)
+                            .arg(m_lastSentEstimate)
+                            .arg(rawFactor, 0, 'f', 3)
+                            .arg(m_calibrationFactor, 0, 'f', 3));
+
+            updateInputTokensCount();
+        });
+
     connect(
         &Context::ChangesManager::instance(),
         &Context::ChangesManager::fileEditAdded,
@@ -247,7 +289,6 @@ ChatRootView::ChatRootView(QQuickItem *parent)
         emit lastErrorMessageChanged();
     });
 
-    // ChatCompressor signals
     connect(m_chatCompressor, &ChatCompressor::compressionStarted, this, [this]() {
         emit isCompressingChanged();
     });
@@ -259,6 +300,12 @@ ChatRootView::ChatRootView(QQuickItem *parent)
         emit compressionCompleted(compressedChatPath);
 
         loadHistory(compressedChatPath);
+
+        if (m_pendingSend.active) {
+            PendingSend p = m_pendingSend;
+            m_pendingSend = {};
+            dispatchSend(p.message, p.attachments, p.linkedFiles, p.useTools, p.useThinking);
+        }
     });
 
     connect(m_chatCompressor, &ChatCompressor::compressionFailed, this, [this](const QString &error) {
@@ -266,6 +313,12 @@ ChatRootView::ChatRootView(QQuickItem *parent)
         m_lastErrorMessage = error;
         emit lastErrorMessageChanged();
         emit compressionFailed(error);
+
+        if (m_pendingSend.active) {
+            PendingSend p = m_pendingSend;
+            m_pendingSend = {};
+            dispatchSend(p.message, p.attachments, p.linkedFiles, p.useTools, p.useThinking);
+        }
     });
 }
 
@@ -276,32 +329,72 @@ ChatModel *ChatRootView::chatModel() const
 
 void ChatRootView::sendMessage(const QString &message)
 {
-    if (m_inputTokensCount > m_chatModel->tokensThreshold()) {
-        QMessageBox::StandardButton reply = QMessageBox::question(
-            Core::ICore::dialogParent(),
-            tr("Token Limit Exceeded"),
-            tr("The chat history has exceeded the token limit.\n"
-               "Would you like to create new chat?"),
-            QMessageBox::Yes | QMessageBox::No);
+    const QStringList attachments = m_attachmentFiles;
+    const QStringList linkedFiles = m_linkedFiles;
+    const bool tools = useTools();
+    const bool thinking = useThinking();
 
-        if (reply == QMessageBox::Yes) {
-            autosave();
-            m_chatModel->clear();
-            setRecentFilePath(QString{});
-            return;
-        }
-    }
+    if (deferSendForAutoCompress(message, attachments, linkedFiles, tools, thinking))
+        return;
+
+    dispatchSend(message, attachments, linkedFiles, tools, thinking);
+}
+
+bool ChatRootView::deferSendForAutoCompress(
+    const QString &message,
+    const QStringList &attachments,
+    const QStringList &linkedFiles,
+    bool useToolsArg,
+    bool useThinkingArg)
+{
+    auto &settings = Settings::chatAssistantSettings();
+    if (!settings.autoCompress())
+        return false;
+
+    const int threshold = settings.autoCompressThreshold();
+    if (m_inputTokensCount < threshold)
+        return false;
 
     if (m_recentFilePath.isEmpty()) {
-        QString filePath = getAutosaveFilePath(message, m_attachmentFiles);
+        QString filePath = getAutosaveFilePath(message, attachments);
+        if (filePath.isEmpty())
+            return false;
+        setRecentFilePath(filePath);
+        LOG_MESSAGE(QString("Set chat file path for new chat (auto-compress): %1").arg(filePath));
+    }
+
+    if (m_chatCompressor->isCompressing() || m_pendingSend.active)
+        return false;
+
+    LOG_MESSAGE(QString("Auto-compress preempt: estimated next=%1 ≥ threshold=%2; deferring send")
+                    .arg(m_inputTokensCount)
+                    .arg(threshold));
+
+    m_pendingSend = {message, attachments, linkedFiles, useToolsArg, useThinkingArg, true};
+    compressCurrentChat();
+    return true;
+}
+
+void ChatRootView::dispatchSend(
+    const QString &message,
+    const QStringList &attachments,
+    const QStringList &linkedFiles,
+    bool useToolsArg,
+    bool useThinkingArg)
+{
+    if (m_recentFilePath.isEmpty()) {
+        QString filePath = getAutosaveFilePath(message, attachments);
         if (!filePath.isEmpty()) {
             setRecentFilePath(filePath);
             LOG_MESSAGE(QString("Set chat file path for new chat: %1").arg(filePath));
         }
     }
 
-    m_clientInterface
-        ->sendMessage(message, m_attachmentFiles, m_linkedFiles, useTools(), useThinking());
+    m_lastSentEstimate = m_calibrationFactor > 0.0
+                             ? static_cast<int>(m_inputTokensCount / m_calibrationFactor)
+                             : m_inputTokensCount;
+
+    m_clientInterface->sendMessage(message, attachments, linkedFiles, useToolsArg, useThinkingArg);
 
     m_fileManager->clearIntermediateStorage();
     clearAttachmentFiles();
@@ -392,7 +485,8 @@ void ChatRootView::loadHistory(const QString &filePath)
         setRecentFilePath(filePath);
     }
 
-    m_fileManager->clearIntermediateStorage();
+    if (!m_pendingSend.active)
+        m_fileManager->clearIntermediateStorage();
     m_attachmentFiles.clear();
     m_linkedFiles.clear();
     emit attachmentFilesChanged();
@@ -747,6 +841,27 @@ void ChatRootView::openFileInEditor(const QString &filePath)
     Core::EditorManager::openEditor(Utils::FilePath::fromString(filePath));
 }
 
+void ChatRootView::rewireToolsChangedConnection()
+{
+    if (m_toolsChangedConn)
+        QObject::disconnect(m_toolsChangedConn);
+    m_toolsChangedConn = {};
+
+    const auto providerName = Settings::generalSettings().caProvider();
+    auto *provider = PluginLLMCore::ProvidersManager::instance().getProviderByName(providerName);
+    if (!provider)
+        return;
+    auto *tm = provider->toolsManager();
+    if (!tm)
+        return;
+
+    m_toolsChangedConn = connect(
+        tm,
+        &::LLMQore::ToolRegistry::toolsChanged,
+        this,
+        &ChatRootView::updateInputTokensCount);
+}
+
 void ChatRootView::updateInputTokensCount()
 {
     int inputTokens = m_messageTokensCount;
@@ -756,14 +871,33 @@ void ChatRootView::updateInputTokensCount()
         inputTokens += Context::TokenUtils::estimateTokens(settings.systemPrompt());
     }
 
+    const auto splitImageEstimate = [](const QStringList &paths, QStringList &textPaths) {
+        int imageTokens = 0;
+        for (const QString &p : paths) {
+            if (Context::TokenUtils::isImageFilePath(p))
+                imageTokens += Context::TokenUtils::estimateImageAttachmentTokens(p);
+            else
+                textPaths.append(p);
+        }
+        return imageTokens;
+    };
+
     if (!m_attachmentFiles.isEmpty()) {
-        auto attachFiles = m_clientInterface->contextManager()->getContentFiles(m_attachmentFiles);
-        inputTokens += Context::TokenUtils::estimateFilesTokens(attachFiles);
+        QStringList textPaths;
+        inputTokens += splitImageEstimate(m_attachmentFiles, textPaths);
+        if (!textPaths.isEmpty()) {
+            auto attachFiles = m_clientInterface->contextManager()->getContentFiles(textPaths);
+            inputTokens += Context::TokenUtils::estimateFilesTokens(attachFiles);
+        }
     }
 
     if (!m_linkedFiles.isEmpty()) {
-        auto linkFiles = m_clientInterface->contextManager()->getContentFiles(m_linkedFiles);
-        inputTokens += Context::TokenUtils::estimateFilesTokens(linkFiles);
+        QStringList textPaths;
+        inputTokens += splitImageEstimate(m_linkedFiles, textPaths);
+        if (!textPaths.isEmpty()) {
+            auto linkFiles = m_clientInterface->contextManager()->getContentFiles(textPaths);
+            inputTokens += Context::TokenUtils::estimateFilesTokens(linkFiles);
+        }
     }
 
     const auto &history = m_chatModel->getChatHistory();
@@ -772,7 +906,22 @@ void ChatRootView::updateInputTokensCount()
         inputTokens += 4; // + role
     }
 
-    m_inputTokensCount = inputTokens;
+    if (useTools()) {
+        const auto providerName = Settings::generalSettings().caProvider();
+        if (auto *provider = PluginLLMCore::ProvidersManager::instance().getProviderByName(
+                providerName)) {
+            if (auto *tm = provider->toolsManager()) {
+                const QJsonArray toolDefs = tm->getToolsDefinitions();
+                if (!toolDefs.isEmpty()) {
+                    const QByteArray serialized
+                        = QJsonDocument(toolDefs).toJson(QJsonDocument::Compact);
+                    inputTokens += static_cast<int>(serialized.size() / 4);
+                }
+            }
+        }
+    }
+
+    m_inputTokensCount = static_cast<int>(inputTokens * m_calibrationFactor);
     emit inputTokensCountChanged();
 }
 
diff --git a/ChatView/ChatRootView.hpp b/ChatView/ChatRootView.hpp
index d9f5ef4..11126d5 100644
--- a/ChatView/ChatRootView.hpp
+++ b/ChatView/ChatRootView.hpp
@@ -212,6 +212,21 @@ signals:
     void openFilesChanged();
 
 private:
+    void rewireToolsChangedConnection();
+    QMetaObject::Connection m_toolsChangedConn;
+
+    bool deferSendForAutoCompress(
+        const QString &message,
+        const QStringList &attachments,
+        const QStringList &linkedFiles,
+        bool useTools,
+        bool useThinking);
+    void dispatchSend(
+        const QString &message,
+        const QStringList &attachments,
+        const QStringList &linkedFiles,
+        bool useTools,
+        bool useThinking);
     void updateFileEditStatus(const QString &editId, const QString &status);
     QString getChatsHistoryDir() const;
     QString getSuggestedFileName() const;
@@ -228,6 +243,18 @@ private:
     QStringList m_linkedFiles;
     int m_messageTokensCount{0};
     int m_inputTokensCount{0};
+    int m_lastSentEstimate{0};
+    double m_calibrationFactor{1.0};
+
+    struct PendingSend {
+        QString message;
+        QStringList attachments;
+        QStringList linkedFiles;
+        bool useTools = false;
+        bool useThinking = false;
+        bool active = false;
+    };
+    PendingSend m_pendingSend;
     bool m_isSyncOpenFiles;
     QList<Core::IEditor *> m_currentEditors;
     bool m_isRequestInProgress;
diff --git a/ChatView/ChatSerializer.cpp b/ChatView/ChatSerializer.cpp
index 23d1a4d..26ce8ae 100644
--- a/ChatView/ChatSerializer.cpp
+++ b/ChatView/ChatSerializer.cpp
@@ -103,6 +103,17 @@ QJsonObject ChatSerializer::serializeMessage(
         messageObj["images"] = imagesArray;
     }
 
+    if (message.promptTokens > 0 || message.completionTokens > 0) {
+        QJsonObject usageObj;
+        usageObj["promptTokens"] = message.promptTokens;
+        usageObj["completionTokens"] = message.completionTokens;
+        if (message.cachedPromptTokens > 0)
+            usageObj["cachedPromptTokens"] = message.cachedPromptTokens;
+        if (message.reasoningTokens > 0)
+            usageObj["reasoningTokens"] = message.reasoningTokens;
+        messageObj["usage"] = usageObj;
+    }
+
     return messageObj;
 }
 
@@ -139,6 +150,14 @@ ChatModel::Message ChatSerializer::deserializeMessage(
         }
     }
 
+    if (json.contains("usage")) {
+        const QJsonObject usageObj = json["usage"].toObject();
+        message.promptTokens = usageObj["promptTokens"].toInt();
+        message.completionTokens = usageObj["completionTokens"].toInt();
+        message.cachedPromptTokens = usageObj["cachedPromptTokens"].toInt();
+        message.reasoningTokens = usageObj["reasoningTokens"].toInt();
+    }
+
     return message;
 }
 
diff --git a/ChatView/ClientInterface.cpp b/ChatView/ClientInterface.cpp
index 4bddc16..eebbc4e 100644
--- a/ChatView/ClientInterface.cpp
+++ b/ChatView/ClientInterface.cpp
@@ -257,6 +257,12 @@ void ClientInterface::sendMessage(
         this,
         &ClientInterface::handleFullResponse,
         Qt::UniqueConnection);
+    connect(
+        provider->client(),
+        &::LLMQore::BaseClient::requestFinalized,
+        this,
+        &ClientInterface::handleRequestFinalized,
+        Qt::UniqueConnection);
     connect(
         provider->client(),
         &::LLMQore::BaseClient::requestFailed,
@@ -449,6 +455,29 @@ void ClientInterface::handleFullResponse(const QString &requestId, const QString
     m_awaitingContinuation.remove(requestId);
 }
 
+void ClientInterface::handleRequestFinalized(
+    const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info)
+{
+    if (!m_activeRequests.contains(requestId))
+        return;
+    if (!info.usage)
+        return;
+
+    const auto &u = *info.usage;
+    m_chatModel->setMessageUsage(
+        requestId, u.promptTokens, u.completionTokens, u.cachedPromptTokens, u.reasoningTokens);
+
+    emit messageUsageReceived(
+        u.promptTokens, u.completionTokens, u.cachedPromptTokens, u.reasoningTokens);
+
+    LOG_MESSAGE(QString("Chat usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5")
+                    .arg(requestId)
+                    .arg(u.promptTokens)
+                    .arg(u.completionTokens)
+                    .arg(u.cachedPromptTokens)
+                    .arg(u.reasoningTokens));
+}
+
 void ClientInterface::handleRequestFailed(const QString &requestId, const QString &error)
 {
     auto it = m_activeRequests.find(requestId);
diff --git a/ChatView/ClientInterface.hpp b/ChatView/ClientInterface.hpp
index c000159..433961e 100644
--- a/ChatView/ClientInterface.hpp
+++ b/ChatView/ClientInterface.hpp
@@ -11,6 +11,7 @@
 #include "ChatModel.hpp"
 #include "Provider.hpp"
 #include "pluginllmcore/IPromptProvider.hpp"
+#include <LLMQore/BaseClient.hpp>
 #include <context/ContextManager.hpp>
 
 namespace QodeAssist::Chat {
@@ -42,10 +43,13 @@ signals:
     void errorOccurred(const QString &error);
     void messageReceivedCompletely();
     void requestStarted(const QString &requestId);
+    void messageUsageReceived(
+        int promptTokens, int completionTokens, int cachedPromptTokens, int reasoningTokens);
 
 private slots:
     void handlePartialResponse(const QString &requestId, const QString &partialText);
     void handleFullResponse(const QString &requestId, const QString &fullText);
+    void handleRequestFinalized(const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info);
     void handleRequestFailed(const QString &requestId, const QString &error);
     void handleThinkingBlockReceived(
         const QString &requestId, const QString &thinking, const QString &signature);
diff --git a/ChatView/qml/RootItem.qml b/ChatView/qml/RootItem.qml
index 3fbd0f9..8f9f51d 100644
--- a/ChatView/qml/RootItem.qml
+++ b/ChatView/qml/RootItem.qml
@@ -91,7 +91,10 @@ ChatRootView {
             loadButton.onClicked: root.showLoadDialog()
             clearButton.onClicked: root.clearChat()
             tokensBadge {
-                text: qsTr("%1/%2").arg(root.inputTokensCount).arg(root.chatModel.tokensThreshold)
+                text: qsTr("next ~%1  ·  session ↑%2 ↓%3")
+                          .arg(root.inputTokensCount)
+                          .arg(root.chatModel.sessionPromptTokens)
+                          .arg(root.chatModel.sessionCompletionTokens)
             }
             recentPath {
                 text: qsTr("Сhat name: %1").arg(root.chatFileName.length > 0 ? root.chatFileName : "Unsaved")
@@ -270,6 +273,10 @@ ChatRootView {
                     codeFontSize: root.codeFontSize
                     textFontSize: root.textFontSize
                     textFormat: root.textFormat
+                    promptTokens: model.promptTokens || 0
+                    completionTokens: model.completionTokens || 0
+                    cachedPromptTokens: model.cachedPromptTokens || 0
+                    reasoningTokens: model.reasoningTokens || 0
 
                     onResetChatToMessage: function(idx) {
                         messageInput.text = model.content
diff --git a/ChatView/qml/chatparts/ChatItem.qml b/ChatView/qml/chatparts/ChatItem.qml
index ef99d0f..9aa0ada 100644
--- a/ChatView/qml/chatparts/ChatItem.qml
+++ b/ChatView/qml/chatparts/ChatItem.qml
@@ -34,6 +34,11 @@ Rectangle {
     property bool isUserMessage: false
     property int messageIndex: -1
 
+    property int promptTokens: 0
+    property int completionTokens: 0
+    property int cachedPromptTokens: 0
+    property int reasoningTokens: 0
+
     signal resetChatToMessage(int index)
     signal openFileRequested(string filePath)
 
@@ -135,6 +140,39 @@ Rectangle {
                 }
             }
         }
+
+        RowLayout {
+            id: usageBadge
+
+            Layout.fillWidth: true
+            Layout.leftMargin: 10
+            Layout.rightMargin: 10
+            spacing: 8
+            visible: !root.isUserMessage
+                     && (root.promptTokens > 0 || root.completionTokens > 0)
+
+            Item { Layout.fillWidth: true }
+
+            Text {
+                text: root.cachedPromptTokens > 0
+                          ? qsTr("↑ %1 (cached %2)").arg(root.promptTokens).arg(root.cachedPromptTokens)
+                          : qsTr("↑ %1").arg(root.promptTokens)
+                color: palette.placeholderText
+                font.pointSize: Math.max(root.textFontSize - 2, 7)
+            }
+            Text {
+                text: root.reasoningTokens > 0
+                          ? qsTr("↓ %1 (reasoning %2)").arg(root.completionTokens).arg(root.reasoningTokens)
+                          : qsTr("↓ %1").arg(root.completionTokens)
+                color: palette.placeholderText
+                font.pointSize: Math.max(root.textFontSize - 2, 7)
+            }
+            Text {
+                text: qsTr("Σ %1").arg(root.promptTokens + root.completionTokens)
+                color: palette.placeholderText
+                font.pointSize: Math.max(root.textFontSize - 2, 7)
+            }
+        }
     }
 
     Rectangle {
diff --git a/LLMClientInterface.cpp b/LLMClientInterface.cpp
index 72f1813..83ab131 100644
--- a/LLMClientInterface.cpp
+++ b/LLMClientInterface.cpp
@@ -63,6 +63,21 @@ void LLMClientInterface::handleFullResponse(const QString &requestId, const QStr
     m_performanceLogger.endTimeMeasurement(requestId);
 }
 
+void LLMClientInterface::handleRequestFinalized(
+    const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info)
+{
+    if (!m_activeRequests.contains(requestId) || !info.usage)
+        return;
+
+    const auto &u = *info.usage;
+    LOG_MESSAGE(QString("Completion usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5")
+                    .arg(requestId)
+                    .arg(u.promptTokens)
+                    .arg(u.completionTokens)
+                    .arg(u.cachedPromptTokens)
+                    .arg(u.reasoningTokens));
+}
+
 void LLMClientInterface::handleRequestFailed(const QString &requestId, const QString &error)
 {
     auto it = m_activeRequests.find(requestId);
@@ -325,6 +340,12 @@ void LLMClientInterface::handleCompletion(const QJsonObject &request)
         this,
         &LLMClientInterface::handleFullResponse,
         Qt::UniqueConnection);
+    connect(
+        provider->client(),
+        &::LLMQore::BaseClient::requestFinalized,
+        this,
+        &LLMClientInterface::handleRequestFinalized,
+        Qt::UniqueConnection);
     connect(
         provider->client(),
         &::LLMQore::BaseClient::requestFailed,
diff --git a/LLMClientInterface.hpp b/LLMClientInterface.hpp
index 0be1124..24e48e3 100644
--- a/LLMClientInterface.hpp
+++ b/LLMClientInterface.hpp
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <LLMQore/BaseClient.hpp>
 #include <languageclient/languageclientinterface.h>
 #include <texteditor/texteditor.h>
 
@@ -52,6 +53,8 @@ protected:
 
 private slots:
     void handleFullResponse(const QString &requestId, const QString &fullText);
+    void handleRequestFinalized(
+        const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info);
     void handleRequestFailed(const QString &requestId, const QString &error);
 
 private:
diff --git a/QuickRefactorHandler.cpp b/QuickRefactorHandler.cpp
index e831392..aacd78c 100644
--- a/QuickRefactorHandler.cpp
+++ b/QuickRefactorHandler.cpp
@@ -152,6 +152,13 @@ void QuickRefactorHandler::prepareAndSendRequest(
         &QuickRefactorHandler::handleFullResponse,
         Qt::UniqueConnection);
 
+    connect(
+        provider->client(),
+        &::LLMQore::BaseClient::requestFinalized,
+        this,
+        &QuickRefactorHandler::handleRequestFinalized,
+        Qt::UniqueConnection);
+
     connect(
         provider->client(),
         &::LLMQore::BaseClient::requestFailed,
@@ -408,6 +415,22 @@ void QuickRefactorHandler::handleFullResponse(const QString &requestId, const QS
     }
 }
 
+void QuickRefactorHandler::handleRequestFinalized(
+    const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info)
+{
+    if (requestId != m_lastRequestId || !info.usage)
+        return;
+
+    const auto &u = *info.usage;
+    LOG_MESSAGE(
+        QString("Quick refactor usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5")
+            .arg(requestId)
+            .arg(u.promptTokens)
+            .arg(u.completionTokens)
+            .arg(u.cachedPromptTokens)
+            .arg(u.reasoningTokens));
+}
+
 void QuickRefactorHandler::handleRequestFailed(const QString &requestId, const QString &error)
 {
     if (requestId == m_lastRequestId) {
diff --git a/QuickRefactorHandler.hpp b/QuickRefactorHandler.hpp
index 195e428..140c2b7 100644
--- a/QuickRefactorHandler.hpp
+++ b/QuickRefactorHandler.hpp
@@ -6,6 +6,7 @@
 #include <QJsonObject>
 #include <QObject>
 
+#include <LLMQore/BaseClient.hpp>
 #include <texteditor/texteditor.h>
 #include <utils/textutils.h>
 
@@ -43,6 +44,8 @@ signals:
 
 private slots:
     void handleFullResponse(const QString &requestId, const QString &fullText);
+    void handleRequestFinalized(
+        const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info);
     void handleRequestFailed(const QString &requestId, const QString &error);
 
 private:
diff --git a/context/CMakeLists.txt b/context/CMakeLists.txt
index c5754c1..0ef3d9f 100644
--- a/context/CMakeLists.txt
+++ b/context/CMakeLists.txt
@@ -15,6 +15,7 @@ add_library(Context STATIC
 target_link_libraries(Context
     PUBLIC
     Qt::Core
+    Qt::Gui
     QtCreator::Core
     QtCreator::TextEditor
     QtCreator::Utils
diff --git a/context/TokenUtils.cpp b/context/TokenUtils.cpp
index 7d415ce..6868760 100644
--- a/context/TokenUtils.cpp
+++ b/context/TokenUtils.cpp
@@ -3,6 +3,14 @@
 
 #include "TokenUtils.hpp"
 
+#include <QFileInfo>
+#include <QImageReader>
+#include <QSet>
+#include <QSize>
+
+#include <algorithm>
+#include <cmath>
+
 namespace QodeAssist::Context {
 
 int TokenUtils::estimateTokens(const QString &text)
@@ -15,8 +23,48 @@ int TokenUtils::estimateTokens(const QString &text)
     return text.length() / 4;
 }
 
+bool TokenUtils::isImageFilePath(const QString &filePath)
+{
+    static const QSet<QString> imageExtensions = {"png", "jpg", "jpeg", "gif", "webp", "bmp"};
+    return imageExtensions.contains(QFileInfo(filePath).suffix().toLower());
+}
+
+int TokenUtils::estimateImageAttachmentTokens(const QString &filePath)
+{
+    QImageReader reader(filePath);
+    QSize size = reader.size();
+    if (!size.isValid() || size.isEmpty())
+        return 1500;
+
+    double w = size.width();
+    double h = size.height();
+
+    const double longSide = std::max(w, h);
+    if (longSide > 2048.0) {
+        const double s = 2048.0 / longSide;
+        w *= s;
+        h *= s;
+    }
+
+    const double shortSide = std::min(w, h);
+    if (shortSide > 768.0) {
+        const double s = 768.0 / shortSide;
+        w *= s;
+        h *= s;
+    }
+
+    const int tilesW = static_cast<int>(std::ceil(w / 512.0));
+    const int tilesH = static_cast<int>(std::ceil(h / 512.0));
+    const int tiles = std::max(1, tilesW * tilesH);
+
+    return 85 + tiles * 170;
+}
+
 int TokenUtils::estimateFileTokens(const Context::ContentFile &file)
 {
+    if (isImageFilePath(file.filename))
+        return estimateImageAttachmentTokens(QString());
+
     int total = 0;
 
     total += estimateTokens(file.filename);
diff --git a/context/TokenUtils.hpp b/context/TokenUtils.hpp
index 8ad2164..63f6508 100644
--- a/context/TokenUtils.hpp
+++ b/context/TokenUtils.hpp
@@ -15,6 +15,8 @@ public:
     static int estimateTokens(const QString &text);
     static int estimateFileTokens(const Context::ContentFile &file);
     static int estimateFilesTokens(const QList<Context::ContentFile> &files);
+    static bool isImageFilePath(const QString &filePath);
+    static int estimateImageAttachmentTokens(const QString &filePath);
 };
 
 } // namespace QodeAssist::Context
diff --git a/settings/ChatAssistantSettings.cpp b/settings/ChatAssistantSettings.cpp
index 5a94ad3..ad32c88 100644
--- a/settings/ChatAssistantSettings.cpp
+++ b/settings/ChatAssistantSettings.cpp
@@ -29,14 +29,6 @@ ChatAssistantSettings::ChatAssistantSettings()
 
     setDisplayName(Tr::tr("Chat Assistant"));
 
-    // Chat Settings
-    chatTokensThreshold.setSettingsKey(Constants::CA_TOKENS_THRESHOLD);
-    chatTokensThreshold.setLabelText(Tr::tr("Chat history token limit:"));
-    chatTokensThreshold.setToolTip(Tr::tr("Maximum number of tokens in chat history. When "
-                                          "exceeded, oldest messages will be removed."));
-    chatTokensThreshold.setRange(1, 99999999);
-    chatTokensThreshold.setDefaultValue(20000);
-
     linkOpenFiles.setSettingsKey(Constants::CA_LINK_OPEN_FILES);
     linkOpenFiles.setLabelText(Tr::tr("Sync open files with assistant by default"));
     linkOpenFiles.setDefaultValue(false);
@@ -58,6 +50,18 @@ ChatAssistantSettings::ChatAssistantSettings()
     enableChatTools.setToolTip(Tr::tr("When enabled, AI can use tools to read files, search project, and build code"));
     enableChatTools.setDefaultValue(false);
 
+    autoCompress.setSettingsKey(Constants::CA_AUTO_COMPRESS);
+    autoCompress.setLabelText(Tr::tr("Auto-compress chat when session tokens exceed:"));
+    autoCompress.setToolTip(Tr::tr(
+        "After each assistant response, if the running session token total exceeds the "
+        "threshold, the chat is summarized and a new compressed chat is started "
+        "automatically. The original chat is preserved on disk."));
+    autoCompress.setDefaultValue(false);
+
+    autoCompressThreshold.setSettingsKey(Constants::CA_AUTO_COMPRESS_THRESHOLD);
+    autoCompressThreshold.setRange(1000, 99999999);
+    autoCompressThreshold.setDefaultValue(40000);
+
     // General Parameters Settings
     temperature.setSettingsKey(Constants::CA_TEMPERATURE);
     temperature.setLabelText(Tr::tr("Temperature:"));
@@ -292,11 +296,11 @@ ChatAssistantSettings::ChatAssistantSettings()
             Group{
                 title(Tr::tr("Chat Settings")),
                 Column{
-                    Row{chatTokensThreshold, Stretch{1}},
                     linkOpenFiles,
                     autosave,
                     enableChatInBottomToolBar,
-                    enableChatInNavigationPanel}},
+                    enableChatInNavigationPanel,
+                    Row{autoCompress, autoCompressThreshold, Stretch{1}}}},
             Space{8},
             Group{
                 title(Tr::tr("Tools")),
@@ -348,7 +352,8 @@ void ChatAssistantSettings::resetSettingsToDefaults()
         QMessageBox::Yes | QMessageBox::No);
 
     if (reply == QMessageBox::Yes) {
-        resetAspect(chatTokensThreshold);
+        resetAspect(autoCompress);
+        resetAspect(autoCompressThreshold);
         resetAspect(temperature);
         resetAspect(maxTokens);
         resetAspect(useTopP);
diff --git a/settings/ChatAssistantSettings.hpp b/settings/ChatAssistantSettings.hpp
index 1218a25..716d0bd 100644
--- a/settings/ChatAssistantSettings.hpp
+++ b/settings/ChatAssistantSettings.hpp
@@ -18,12 +18,13 @@ public:
     ButtonAspect resetToDefaults{this};
 
     // Chat settings
-    Utils::IntegerAspect chatTokensThreshold{this};
     Utils::BoolAspect linkOpenFiles{this};
     Utils::BoolAspect autosave{this};
     Utils::BoolAspect enableChatInBottomToolBar{this};
     Utils::BoolAspect enableChatInNavigationPanel{this};
     Utils::BoolAspect enableChatTools{this};
+    Utils::BoolAspect autoCompress{this};
+    Utils::IntegerAspect autoCompressThreshold{this};
 
     // General Parameters Settings
     Utils::DoubleAspect temperature{this};
diff --git a/settings/SettingsConstants.hpp b/settings/SettingsConstants.hpp
index 2859176..baf8798 100644
--- a/settings/SettingsConstants.hpp
+++ b/settings/SettingsConstants.hpp
@@ -78,7 +78,8 @@ const char MAX_FILE_THRESHOLD[] = "QodeAssist.maxFileThreshold";
 const char CC_MULTILINE_COMPLETION[] = "QodeAssist.ccMultilineCompletion";
 const char CC_MODEL_OUTPUT_HANDLER[] = "QodeAssist.ccModelOutputHandler";
 const char CA_AUTO_APPLY_FILE_EDITS[] = "QodeAssist.caAutoApplyFileEdits";
-const char CA_TOKENS_THRESHOLD[] = "QodeAssist.caTokensThreshold";
+const char CA_AUTO_COMPRESS[] = "QodeAssist.caAutoCompress";
+const char CA_AUTO_COMPRESS_THRESHOLD[] = "QodeAssist.caAutoCompressThreshold";
 const char CA_LINK_OPEN_FILES[] = "QodeAssist.caLinkOpenFiles";
 const char CA_AUTOSAVE[] = "QodeAssist.caAutosave";
 const char CC_CUSTOM_LANGUAGES[] = "QodeAssist.ccCustomLanguages";
diff --git a/sources/external/llmqore b/sources/external/llmqore
index c042931..0c9fc8b 160000
--- a/sources/external/llmqore
+++ b/sources/external/llmqore
@@ -1 +1 @@
-Subproject commit c0429314bb5f8d5d65bd82363ef108e462cb9c37
+Subproject commit 0c9fc8bd7cb52946328d29f02c804c7872ba06b1