mirror of
https://github.com/Palm1r/QodeAssist.git
synced 2026-05-30 02:49:12 -04:00
refactor: Update token usage api (#347)
* refactor: Improve token usage api * refactor: Image recognition to tokens
This commit is contained in:
@@ -11,7 +11,6 @@
|
||||
#include <QUrl>
|
||||
#include <QtQml>
|
||||
|
||||
#include "ChatAssistantSettings.hpp"
|
||||
#include "Logger.hpp"
|
||||
#include "context/ChangesManager.h"
|
||||
|
||||
@@ -20,14 +19,6 @@ namespace QodeAssist::Chat {
|
||||
ChatModel::ChatModel(QObject *parent)
|
||||
: QAbstractListModel(parent)
|
||||
{
|
||||
auto &settings = Settings::chatAssistantSettings();
|
||||
|
||||
connect(
|
||||
&settings.chatTokensThreshold,
|
||||
&Utils::BaseAspect::changed,
|
||||
this,
|
||||
&ChatModel::tokensThresholdChanged);
|
||||
|
||||
connect(&Context::ChangesManager::instance(),
|
||||
&Context::ChangesManager::fileEditApplied,
|
||||
this,
|
||||
@@ -86,6 +77,16 @@ QVariant ChatModel::data(const QModelIndex &index, int role) const
|
||||
case Roles::IsRedacted: {
|
||||
return message.isRedacted;
|
||||
}
|
||||
case Roles::PromptTokens:
|
||||
return message.promptTokens;
|
||||
case Roles::CompletionTokens:
|
||||
return message.completionTokens;
|
||||
case Roles::CachedPromptTokens:
|
||||
return message.cachedPromptTokens;
|
||||
case Roles::ReasoningTokens:
|
||||
return message.reasoningTokens;
|
||||
case Roles::TotalTokens:
|
||||
return message.promptTokens + message.completionTokens;
|
||||
case Roles::Images: {
|
||||
QVariantList imagesList;
|
||||
for (const auto &image : message.images) {
|
||||
@@ -124,6 +125,11 @@ QHash<int, QByteArray> ChatModel::roleNames() const
|
||||
roles[Roles::Attachments] = "attachments";
|
||||
roles[Roles::IsRedacted] = "isRedacted";
|
||||
roles[Roles::Images] = "images";
|
||||
roles[Roles::PromptTokens] = "promptTokens";
|
||||
roles[Roles::CompletionTokens] = "completionTokens";
|
||||
roles[Roles::CachedPromptTokens] = "cachedPromptTokens";
|
||||
roles[Roles::ReasoningTokens] = "reasoningTokens";
|
||||
roles[Roles::TotalTokens] = "totalTokens";
|
||||
return roles;
|
||||
}
|
||||
|
||||
@@ -207,6 +213,7 @@ void ChatModel::clear()
|
||||
m_messages.clear();
|
||||
endResetModel();
|
||||
emit modelReseted();
|
||||
emit sessionUsageChanged();
|
||||
}
|
||||
|
||||
QList<MessagePart> ChatModel::processMessageContent(const QString &content) const
|
||||
@@ -310,12 +317,6 @@ QJsonArray ChatModel::prepareMessagesForRequest(const QString &systemPrompt) con
|
||||
return messages;
|
||||
}
|
||||
|
||||
int ChatModel::tokensThreshold() const
|
||||
{
|
||||
auto &settings = Settings::chatAssistantSettings();
|
||||
return settings.chatTokensThreshold();
|
||||
}
|
||||
|
||||
QString ChatModel::lastMessageId() const
|
||||
{
|
||||
return !m_messages.isEmpty() ? m_messages.last().id : "";
|
||||
@@ -330,6 +331,7 @@ void ChatModel::resetModelTo(int index)
|
||||
beginRemoveRows(QModelIndex(), index, m_messages.size() - 1);
|
||||
m_messages.remove(index, m_messages.size() - index);
|
||||
endRemoveRows();
|
||||
emit sessionUsageChanged();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -507,6 +509,54 @@ void ChatModel::updateMessageContent(const QString &messageId, const QString &ne
|
||||
}
|
||||
}
|
||||
|
||||
void ChatModel::setMessageUsage(
|
||||
const QString &messageId,
|
||||
int promptTokens,
|
||||
int completionTokens,
|
||||
int cachedPromptTokens,
|
||||
int reasoningTokens)
|
||||
{
|
||||
for (int i = 0; i < m_messages.size(); ++i) {
|
||||
if (m_messages[i].id != messageId)
|
||||
continue;
|
||||
m_messages[i].promptTokens = promptTokens;
|
||||
m_messages[i].completionTokens = completionTokens;
|
||||
m_messages[i].cachedPromptTokens = cachedPromptTokens;
|
||||
m_messages[i].reasoningTokens = reasoningTokens;
|
||||
emit dataChanged(
|
||||
index(i),
|
||||
index(i),
|
||||
{Roles::PromptTokens,
|
||||
Roles::CompletionTokens,
|
||||
Roles::CachedPromptTokens,
|
||||
Roles::ReasoningTokens,
|
||||
Roles::TotalTokens});
|
||||
emit sessionUsageChanged();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int ChatModel::sessionPromptTokens() const
|
||||
{
|
||||
int total = 0;
|
||||
for (const auto &m : m_messages)
|
||||
total += m.promptTokens;
|
||||
return total;
|
||||
}
|
||||
|
||||
int ChatModel::sessionCompletionTokens() const
|
||||
{
|
||||
int total = 0;
|
||||
for (const auto &m : m_messages)
|
||||
total += m.completionTokens;
|
||||
return total;
|
||||
}
|
||||
|
||||
int ChatModel::sessionTotalTokens() const
|
||||
{
|
||||
return sessionPromptTokens() + sessionCompletionTokens();
|
||||
}
|
||||
|
||||
void ChatModel::setLoadingFromHistory(bool loading)
|
||||
{
|
||||
m_loadingFromHistory = loading;
|
||||
|
||||
@@ -17,14 +17,27 @@ namespace QodeAssist::Chat {
|
||||
class ChatModel : public QAbstractListModel
|
||||
{
|
||||
Q_OBJECT
|
||||
Q_PROPERTY(int tokensThreshold READ tokensThreshold NOTIFY tokensThresholdChanged FINAL)
|
||||
Q_PROPERTY(int sessionPromptTokens READ sessionPromptTokens NOTIFY sessionUsageChanged FINAL)
|
||||
Q_PROPERTY(int sessionCompletionTokens READ sessionCompletionTokens NOTIFY sessionUsageChanged FINAL)
|
||||
Q_PROPERTY(int sessionTotalTokens READ sessionTotalTokens NOTIFY sessionUsageChanged FINAL)
|
||||
QML_ELEMENT
|
||||
|
||||
public:
|
||||
enum ChatRole { System, User, Assistant, Tool, FileEdit, Thinking };
|
||||
Q_ENUM(ChatRole)
|
||||
|
||||
enum Roles { RoleType = Qt::UserRole, Content, Attachments, IsRedacted, Images };
|
||||
enum Roles {
|
||||
RoleType = Qt::UserRole,
|
||||
Content,
|
||||
Attachments,
|
||||
IsRedacted,
|
||||
Images,
|
||||
PromptTokens,
|
||||
CompletionTokens,
|
||||
CachedPromptTokens,
|
||||
ReasoningTokens,
|
||||
TotalTokens
|
||||
};
|
||||
Q_ENUM(Roles)
|
||||
|
||||
struct ImageAttachment
|
||||
@@ -44,6 +57,11 @@ public:
|
||||
|
||||
QList<Context::ContentFile> attachments;
|
||||
QList<ImageAttachment> images;
|
||||
|
||||
int promptTokens = 0;
|
||||
int completionTokens = 0;
|
||||
int cachedPromptTokens = 0;
|
||||
int reasoningTokens = 0;
|
||||
};
|
||||
|
||||
explicit ChatModel(QObject *parent = nullptr);
|
||||
@@ -66,8 +84,6 @@ public:
|
||||
QVector<Message> getChatHistory() const;
|
||||
QJsonArray prepareMessagesForRequest(const QString &systemPrompt) const;
|
||||
|
||||
int tokensThreshold() const;
|
||||
|
||||
QString currentModel() const;
|
||||
QString lastMessageId() const;
|
||||
|
||||
@@ -84,6 +100,17 @@ public:
|
||||
const QString &requestId, const QString &thinking, const QString &signature);
|
||||
void addRedactedThinkingBlock(const QString &requestId, const QString &signature);
|
||||
void updateMessageContent(const QString &messageId, const QString &newContent);
|
||||
|
||||
void setMessageUsage(
|
||||
const QString &messageId,
|
||||
int promptTokens,
|
||||
int completionTokens,
|
||||
int cachedPromptTokens,
|
||||
int reasoningTokens);
|
||||
|
||||
int sessionPromptTokens() const;
|
||||
int sessionCompletionTokens() const;
|
||||
int sessionTotalTokens() const;
|
||||
|
||||
void setLoadingFromHistory(bool loading);
|
||||
bool isLoadingFromHistory() const;
|
||||
@@ -92,8 +119,8 @@ public:
|
||||
QString chatFilePath() const;
|
||||
|
||||
signals:
|
||||
void tokensThresholdChanged();
|
||||
void modelReseted();
|
||||
void sessionUsageChanged();
|
||||
|
||||
private slots:
|
||||
void onFileEditApplied(const QString &editId);
|
||||
|
||||
@@ -3,7 +3,12 @@
|
||||
|
||||
#include "ChatRootView.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <LLMQore/ToolsManager.hpp>
|
||||
#include <QClipboard>
|
||||
#include <QJsonArray>
|
||||
#include <QJsonDocument>
|
||||
#include <QDesktopServices>
|
||||
#include <QDir>
|
||||
#include <QFile>
|
||||
@@ -31,7 +36,6 @@
|
||||
#include "Logger.hpp"
|
||||
#include "ProjectSettings.hpp"
|
||||
#include "ProvidersManager.hpp"
|
||||
#include "ToolsSettings.hpp"
|
||||
#include "context/ChangesManager.h"
|
||||
#include "context/ContextManager.hpp"
|
||||
#include "context/TokenUtils.hpp"
|
||||
@@ -107,6 +111,22 @@ ChatRootView::ChatRootView(QQuickItem *parent)
|
||||
&Utils::BaseAspect::changed,
|
||||
this,
|
||||
&ChatRootView::updateInputTokensCount);
|
||||
connect(this, &ChatRootView::useToolsChanged, this, &ChatRootView::updateInputTokensCount);
|
||||
connect(
|
||||
&Settings::chatAssistantSettings().enableChatTools,
|
||||
&Utils::BaseAspect::changed,
|
||||
this,
|
||||
&ChatRootView::updateInputTokensCount);
|
||||
|
||||
rewireToolsChangedConnection();
|
||||
connect(
|
||||
&Settings::generalSettings().caProvider,
|
||||
&Utils::BaseAspect::changed,
|
||||
this,
|
||||
[this]() {
|
||||
rewireToolsChangedConnection();
|
||||
updateInputTokensCount();
|
||||
});
|
||||
connect(
|
||||
&Settings::chatAssistantSettings().systemPrompt,
|
||||
&Utils::BaseAspect::changed,
|
||||
@@ -171,6 +191,28 @@ ChatRootView::ChatRootView(QQuickItem *parent)
|
||||
updateCurrentMessageEditsStats();
|
||||
});
|
||||
|
||||
connect(
|
||||
m_clientInterface,
|
||||
&ClientInterface::messageUsageReceived,
|
||||
this,
|
||||
[this](int promptTokens, int /*completionTokens*/, int /*cached*/, int /*reasoning*/) {
|
||||
if (promptTokens <= 0 || m_lastSentEstimate <= 0)
|
||||
return;
|
||||
|
||||
const double rawFactor
|
||||
= static_cast<double>(promptTokens) / static_cast<double>(m_lastSentEstimate);
|
||||
const double clamped = std::clamp(rawFactor, 0.5, 3.0);
|
||||
m_calibrationFactor = 0.5 * m_calibrationFactor + 0.5 * clamped;
|
||||
|
||||
LOG_MESSAGE(QString("Token calibration: server=%1 estimated=%2 ratio=%3 ema=%4")
|
||||
.arg(promptTokens)
|
||||
.arg(m_lastSentEstimate)
|
||||
.arg(rawFactor, 0, 'f', 3)
|
||||
.arg(m_calibrationFactor, 0, 'f', 3));
|
||||
|
||||
updateInputTokensCount();
|
||||
});
|
||||
|
||||
connect(
|
||||
&Context::ChangesManager::instance(),
|
||||
&Context::ChangesManager::fileEditAdded,
|
||||
@@ -247,7 +289,6 @@ ChatRootView::ChatRootView(QQuickItem *parent)
|
||||
emit lastErrorMessageChanged();
|
||||
});
|
||||
|
||||
// ChatCompressor signals
|
||||
connect(m_chatCompressor, &ChatCompressor::compressionStarted, this, [this]() {
|
||||
emit isCompressingChanged();
|
||||
});
|
||||
@@ -259,6 +300,12 @@ ChatRootView::ChatRootView(QQuickItem *parent)
|
||||
emit compressionCompleted(compressedChatPath);
|
||||
|
||||
loadHistory(compressedChatPath);
|
||||
|
||||
if (m_pendingSend.active) {
|
||||
PendingSend p = m_pendingSend;
|
||||
m_pendingSend = {};
|
||||
dispatchSend(p.message, p.attachments, p.linkedFiles, p.useTools, p.useThinking);
|
||||
}
|
||||
});
|
||||
|
||||
connect(m_chatCompressor, &ChatCompressor::compressionFailed, this, [this](const QString &error) {
|
||||
@@ -266,6 +313,12 @@ ChatRootView::ChatRootView(QQuickItem *parent)
|
||||
m_lastErrorMessage = error;
|
||||
emit lastErrorMessageChanged();
|
||||
emit compressionFailed(error);
|
||||
|
||||
if (m_pendingSend.active) {
|
||||
PendingSend p = m_pendingSend;
|
||||
m_pendingSend = {};
|
||||
dispatchSend(p.message, p.attachments, p.linkedFiles, p.useTools, p.useThinking);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -276,32 +329,72 @@ ChatModel *ChatRootView::chatModel() const
|
||||
|
||||
void ChatRootView::sendMessage(const QString &message)
|
||||
{
|
||||
if (m_inputTokensCount > m_chatModel->tokensThreshold()) {
|
||||
QMessageBox::StandardButton reply = QMessageBox::question(
|
||||
Core::ICore::dialogParent(),
|
||||
tr("Token Limit Exceeded"),
|
||||
tr("The chat history has exceeded the token limit.\n"
|
||||
"Would you like to create new chat?"),
|
||||
QMessageBox::Yes | QMessageBox::No);
|
||||
const QStringList attachments = m_attachmentFiles;
|
||||
const QStringList linkedFiles = m_linkedFiles;
|
||||
const bool tools = useTools();
|
||||
const bool thinking = useThinking();
|
||||
|
||||
if (reply == QMessageBox::Yes) {
|
||||
autosave();
|
||||
m_chatModel->clear();
|
||||
setRecentFilePath(QString{});
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (deferSendForAutoCompress(message, attachments, linkedFiles, tools, thinking))
|
||||
return;
|
||||
|
||||
dispatchSend(message, attachments, linkedFiles, tools, thinking);
|
||||
}
|
||||
|
||||
bool ChatRootView::deferSendForAutoCompress(
|
||||
const QString &message,
|
||||
const QStringList &attachments,
|
||||
const QStringList &linkedFiles,
|
||||
bool useToolsArg,
|
||||
bool useThinkingArg)
|
||||
{
|
||||
auto &settings = Settings::chatAssistantSettings();
|
||||
if (!settings.autoCompress())
|
||||
return false;
|
||||
|
||||
const int threshold = settings.autoCompressThreshold();
|
||||
if (m_inputTokensCount < threshold)
|
||||
return false;
|
||||
|
||||
if (m_recentFilePath.isEmpty()) {
|
||||
QString filePath = getAutosaveFilePath(message, m_attachmentFiles);
|
||||
QString filePath = getAutosaveFilePath(message, attachments);
|
||||
if (filePath.isEmpty())
|
||||
return false;
|
||||
setRecentFilePath(filePath);
|
||||
LOG_MESSAGE(QString("Set chat file path for new chat (auto-compress): %1").arg(filePath));
|
||||
}
|
||||
|
||||
if (m_chatCompressor->isCompressing() || m_pendingSend.active)
|
||||
return false;
|
||||
|
||||
LOG_MESSAGE(QString("Auto-compress preempt: estimated next=%1 ≥ threshold=%2; deferring send")
|
||||
.arg(m_inputTokensCount)
|
||||
.arg(threshold));
|
||||
|
||||
m_pendingSend = {message, attachments, linkedFiles, useToolsArg, useThinkingArg, true};
|
||||
compressCurrentChat();
|
||||
return true;
|
||||
}
|
||||
|
||||
void ChatRootView::dispatchSend(
|
||||
const QString &message,
|
||||
const QStringList &attachments,
|
||||
const QStringList &linkedFiles,
|
||||
bool useToolsArg,
|
||||
bool useThinkingArg)
|
||||
{
|
||||
if (m_recentFilePath.isEmpty()) {
|
||||
QString filePath = getAutosaveFilePath(message, attachments);
|
||||
if (!filePath.isEmpty()) {
|
||||
setRecentFilePath(filePath);
|
||||
LOG_MESSAGE(QString("Set chat file path for new chat: %1").arg(filePath));
|
||||
}
|
||||
}
|
||||
|
||||
m_clientInterface
|
||||
->sendMessage(message, m_attachmentFiles, m_linkedFiles, useTools(), useThinking());
|
||||
m_lastSentEstimate = m_calibrationFactor > 0.0
|
||||
? static_cast<int>(m_inputTokensCount / m_calibrationFactor)
|
||||
: m_inputTokensCount;
|
||||
|
||||
m_clientInterface->sendMessage(message, attachments, linkedFiles, useToolsArg, useThinkingArg);
|
||||
|
||||
m_fileManager->clearIntermediateStorage();
|
||||
clearAttachmentFiles();
|
||||
@@ -392,7 +485,8 @@ void ChatRootView::loadHistory(const QString &filePath)
|
||||
setRecentFilePath(filePath);
|
||||
}
|
||||
|
||||
m_fileManager->clearIntermediateStorage();
|
||||
if (!m_pendingSend.active)
|
||||
m_fileManager->clearIntermediateStorage();
|
||||
m_attachmentFiles.clear();
|
||||
m_linkedFiles.clear();
|
||||
emit attachmentFilesChanged();
|
||||
@@ -747,6 +841,27 @@ void ChatRootView::openFileInEditor(const QString &filePath)
|
||||
Core::EditorManager::openEditor(Utils::FilePath::fromString(filePath));
|
||||
}
|
||||
|
||||
void ChatRootView::rewireToolsChangedConnection()
|
||||
{
|
||||
if (m_toolsChangedConn)
|
||||
QObject::disconnect(m_toolsChangedConn);
|
||||
m_toolsChangedConn = {};
|
||||
|
||||
const auto providerName = Settings::generalSettings().caProvider();
|
||||
auto *provider = PluginLLMCore::ProvidersManager::instance().getProviderByName(providerName);
|
||||
if (!provider)
|
||||
return;
|
||||
auto *tm = provider->toolsManager();
|
||||
if (!tm)
|
||||
return;
|
||||
|
||||
m_toolsChangedConn = connect(
|
||||
tm,
|
||||
&::LLMQore::ToolRegistry::toolsChanged,
|
||||
this,
|
||||
&ChatRootView::updateInputTokensCount);
|
||||
}
|
||||
|
||||
void ChatRootView::updateInputTokensCount()
|
||||
{
|
||||
int inputTokens = m_messageTokensCount;
|
||||
@@ -756,14 +871,33 @@ void ChatRootView::updateInputTokensCount()
|
||||
inputTokens += Context::TokenUtils::estimateTokens(settings.systemPrompt());
|
||||
}
|
||||
|
||||
const auto splitImageEstimate = [](const QStringList &paths, QStringList &textPaths) {
|
||||
int imageTokens = 0;
|
||||
for (const QString &p : paths) {
|
||||
if (Context::TokenUtils::isImageFilePath(p))
|
||||
imageTokens += Context::TokenUtils::estimateImageAttachmentTokens(p);
|
||||
else
|
||||
textPaths.append(p);
|
||||
}
|
||||
return imageTokens;
|
||||
};
|
||||
|
||||
if (!m_attachmentFiles.isEmpty()) {
|
||||
auto attachFiles = m_clientInterface->contextManager()->getContentFiles(m_attachmentFiles);
|
||||
inputTokens += Context::TokenUtils::estimateFilesTokens(attachFiles);
|
||||
QStringList textPaths;
|
||||
inputTokens += splitImageEstimate(m_attachmentFiles, textPaths);
|
||||
if (!textPaths.isEmpty()) {
|
||||
auto attachFiles = m_clientInterface->contextManager()->getContentFiles(textPaths);
|
||||
inputTokens += Context::TokenUtils::estimateFilesTokens(attachFiles);
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_linkedFiles.isEmpty()) {
|
||||
auto linkFiles = m_clientInterface->contextManager()->getContentFiles(m_linkedFiles);
|
||||
inputTokens += Context::TokenUtils::estimateFilesTokens(linkFiles);
|
||||
QStringList textPaths;
|
||||
inputTokens += splitImageEstimate(m_linkedFiles, textPaths);
|
||||
if (!textPaths.isEmpty()) {
|
||||
auto linkFiles = m_clientInterface->contextManager()->getContentFiles(textPaths);
|
||||
inputTokens += Context::TokenUtils::estimateFilesTokens(linkFiles);
|
||||
}
|
||||
}
|
||||
|
||||
const auto &history = m_chatModel->getChatHistory();
|
||||
@@ -772,7 +906,22 @@ void ChatRootView::updateInputTokensCount()
|
||||
inputTokens += 4; // + role
|
||||
}
|
||||
|
||||
m_inputTokensCount = inputTokens;
|
||||
if (useTools()) {
|
||||
const auto providerName = Settings::generalSettings().caProvider();
|
||||
if (auto *provider = PluginLLMCore::ProvidersManager::instance().getProviderByName(
|
||||
providerName)) {
|
||||
if (auto *tm = provider->toolsManager()) {
|
||||
const QJsonArray toolDefs = tm->getToolsDefinitions();
|
||||
if (!toolDefs.isEmpty()) {
|
||||
const QByteArray serialized
|
||||
= QJsonDocument(toolDefs).toJson(QJsonDocument::Compact);
|
||||
inputTokens += static_cast<int>(serialized.size() / 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_inputTokensCount = static_cast<int>(inputTokens * m_calibrationFactor);
|
||||
emit inputTokensCountChanged();
|
||||
}
|
||||
|
||||
|
||||
@@ -212,6 +212,21 @@ signals:
|
||||
void openFilesChanged();
|
||||
|
||||
private:
|
||||
void rewireToolsChangedConnection();
|
||||
QMetaObject::Connection m_toolsChangedConn;
|
||||
|
||||
bool deferSendForAutoCompress(
|
||||
const QString &message,
|
||||
const QStringList &attachments,
|
||||
const QStringList &linkedFiles,
|
||||
bool useTools,
|
||||
bool useThinking);
|
||||
void dispatchSend(
|
||||
const QString &message,
|
||||
const QStringList &attachments,
|
||||
const QStringList &linkedFiles,
|
||||
bool useTools,
|
||||
bool useThinking);
|
||||
void updateFileEditStatus(const QString &editId, const QString &status);
|
||||
QString getChatsHistoryDir() const;
|
||||
QString getSuggestedFileName() const;
|
||||
@@ -228,6 +243,18 @@ private:
|
||||
QStringList m_linkedFiles;
|
||||
int m_messageTokensCount{0};
|
||||
int m_inputTokensCount{0};
|
||||
int m_lastSentEstimate{0};
|
||||
double m_calibrationFactor{1.0};
|
||||
|
||||
struct PendingSend {
|
||||
QString message;
|
||||
QStringList attachments;
|
||||
QStringList linkedFiles;
|
||||
bool useTools = false;
|
||||
bool useThinking = false;
|
||||
bool active = false;
|
||||
};
|
||||
PendingSend m_pendingSend;
|
||||
bool m_isSyncOpenFiles;
|
||||
QList<Core::IEditor *> m_currentEditors;
|
||||
bool m_isRequestInProgress;
|
||||
|
||||
@@ -103,6 +103,17 @@ QJsonObject ChatSerializer::serializeMessage(
|
||||
messageObj["images"] = imagesArray;
|
||||
}
|
||||
|
||||
if (message.promptTokens > 0 || message.completionTokens > 0) {
|
||||
QJsonObject usageObj;
|
||||
usageObj["promptTokens"] = message.promptTokens;
|
||||
usageObj["completionTokens"] = message.completionTokens;
|
||||
if (message.cachedPromptTokens > 0)
|
||||
usageObj["cachedPromptTokens"] = message.cachedPromptTokens;
|
||||
if (message.reasoningTokens > 0)
|
||||
usageObj["reasoningTokens"] = message.reasoningTokens;
|
||||
messageObj["usage"] = usageObj;
|
||||
}
|
||||
|
||||
return messageObj;
|
||||
}
|
||||
|
||||
@@ -139,6 +150,14 @@ ChatModel::Message ChatSerializer::deserializeMessage(
|
||||
}
|
||||
}
|
||||
|
||||
if (json.contains("usage")) {
|
||||
const QJsonObject usageObj = json["usage"].toObject();
|
||||
message.promptTokens = usageObj["promptTokens"].toInt();
|
||||
message.completionTokens = usageObj["completionTokens"].toInt();
|
||||
message.cachedPromptTokens = usageObj["cachedPromptTokens"].toInt();
|
||||
message.reasoningTokens = usageObj["reasoningTokens"].toInt();
|
||||
}
|
||||
|
||||
return message;
|
||||
}
|
||||
|
||||
|
||||
@@ -257,6 +257,12 @@ void ClientInterface::sendMessage(
|
||||
this,
|
||||
&ClientInterface::handleFullResponse,
|
||||
Qt::UniqueConnection);
|
||||
connect(
|
||||
provider->client(),
|
||||
&::LLMQore::BaseClient::requestFinalized,
|
||||
this,
|
||||
&ClientInterface::handleRequestFinalized,
|
||||
Qt::UniqueConnection);
|
||||
connect(
|
||||
provider->client(),
|
||||
&::LLMQore::BaseClient::requestFailed,
|
||||
@@ -449,6 +455,29 @@ void ClientInterface::handleFullResponse(const QString &requestId, const QString
|
||||
m_awaitingContinuation.remove(requestId);
|
||||
}
|
||||
|
||||
void ClientInterface::handleRequestFinalized(
|
||||
const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info)
|
||||
{
|
||||
if (!m_activeRequests.contains(requestId))
|
||||
return;
|
||||
if (!info.usage)
|
||||
return;
|
||||
|
||||
const auto &u = *info.usage;
|
||||
m_chatModel->setMessageUsage(
|
||||
requestId, u.promptTokens, u.completionTokens, u.cachedPromptTokens, u.reasoningTokens);
|
||||
|
||||
emit messageUsageReceived(
|
||||
u.promptTokens, u.completionTokens, u.cachedPromptTokens, u.reasoningTokens);
|
||||
|
||||
LOG_MESSAGE(QString("Chat usage [%1]: prompt=%2 completion=%3 cached=%4 reasoning=%5")
|
||||
.arg(requestId)
|
||||
.arg(u.promptTokens)
|
||||
.arg(u.completionTokens)
|
||||
.arg(u.cachedPromptTokens)
|
||||
.arg(u.reasoningTokens));
|
||||
}
|
||||
|
||||
void ClientInterface::handleRequestFailed(const QString &requestId, const QString &error)
|
||||
{
|
||||
auto it = m_activeRequests.find(requestId);
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "ChatModel.hpp"
|
||||
#include "Provider.hpp"
|
||||
#include "pluginllmcore/IPromptProvider.hpp"
|
||||
#include <LLMQore/BaseClient.hpp>
|
||||
#include <context/ContextManager.hpp>
|
||||
|
||||
namespace QodeAssist::Chat {
|
||||
@@ -42,10 +43,13 @@ signals:
|
||||
void errorOccurred(const QString &error);
|
||||
void messageReceivedCompletely();
|
||||
void requestStarted(const QString &requestId);
|
||||
void messageUsageReceived(
|
||||
int promptTokens, int completionTokens, int cachedPromptTokens, int reasoningTokens);
|
||||
|
||||
private slots:
|
||||
void handlePartialResponse(const QString &requestId, const QString &partialText);
|
||||
void handleFullResponse(const QString &requestId, const QString &fullText);
|
||||
void handleRequestFinalized(const ::LLMQore::RequestID &requestId, const ::LLMQore::CompletionInfo &info);
|
||||
void handleRequestFailed(const QString &requestId, const QString &error);
|
||||
void handleThinkingBlockReceived(
|
||||
const QString &requestId, const QString &thinking, const QString &signature);
|
||||
|
||||
@@ -91,7 +91,10 @@ ChatRootView {
|
||||
loadButton.onClicked: root.showLoadDialog()
|
||||
clearButton.onClicked: root.clearChat()
|
||||
tokensBadge {
|
||||
text: qsTr("%1/%2").arg(root.inputTokensCount).arg(root.chatModel.tokensThreshold)
|
||||
text: qsTr("next ~%1 · session ↑%2 ↓%3")
|
||||
.arg(root.inputTokensCount)
|
||||
.arg(root.chatModel.sessionPromptTokens)
|
||||
.arg(root.chatModel.sessionCompletionTokens)
|
||||
}
|
||||
recentPath {
|
||||
text: qsTr("Сhat name: %1").arg(root.chatFileName.length > 0 ? root.chatFileName : "Unsaved")
|
||||
@@ -270,6 +273,10 @@ ChatRootView {
|
||||
codeFontSize: root.codeFontSize
|
||||
textFontSize: root.textFontSize
|
||||
textFormat: root.textFormat
|
||||
promptTokens: model.promptTokens || 0
|
||||
completionTokens: model.completionTokens || 0
|
||||
cachedPromptTokens: model.cachedPromptTokens || 0
|
||||
reasoningTokens: model.reasoningTokens || 0
|
||||
|
||||
onResetChatToMessage: function(idx) {
|
||||
messageInput.text = model.content
|
||||
|
||||
@@ -34,6 +34,11 @@ Rectangle {
|
||||
property bool isUserMessage: false
|
||||
property int messageIndex: -1
|
||||
|
||||
property int promptTokens: 0
|
||||
property int completionTokens: 0
|
||||
property int cachedPromptTokens: 0
|
||||
property int reasoningTokens: 0
|
||||
|
||||
signal resetChatToMessage(int index)
|
||||
signal openFileRequested(string filePath)
|
||||
|
||||
@@ -135,6 +140,39 @@ Rectangle {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RowLayout {
|
||||
id: usageBadge
|
||||
|
||||
Layout.fillWidth: true
|
||||
Layout.leftMargin: 10
|
||||
Layout.rightMargin: 10
|
||||
spacing: 8
|
||||
visible: !root.isUserMessage
|
||||
&& (root.promptTokens > 0 || root.completionTokens > 0)
|
||||
|
||||
Item { Layout.fillWidth: true }
|
||||
|
||||
Text {
|
||||
text: root.cachedPromptTokens > 0
|
||||
? qsTr("↑ %1 (cached %2)").arg(root.promptTokens).arg(root.cachedPromptTokens)
|
||||
: qsTr("↑ %1").arg(root.promptTokens)
|
||||
color: palette.placeholderText
|
||||
font.pointSize: Math.max(root.textFontSize - 2, 7)
|
||||
}
|
||||
Text {
|
||||
text: root.reasoningTokens > 0
|
||||
? qsTr("↓ %1 (reasoning %2)").arg(root.completionTokens).arg(root.reasoningTokens)
|
||||
: qsTr("↓ %1").arg(root.completionTokens)
|
||||
color: palette.placeholderText
|
||||
font.pointSize: Math.max(root.textFontSize - 2, 7)
|
||||
}
|
||||
Text {
|
||||
text: qsTr("Σ %1").arg(root.promptTokens + root.completionTokens)
|
||||
color: palette.placeholderText
|
||||
font.pointSize: Math.max(root.textFontSize - 2, 7)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Rectangle {
|
||||
|
||||
Reference in New Issue
Block a user