♻️ refactor: Improve response handler for LMStudio

2026-02-20 05:53:13 -05:00 · 2024-12-10 17:13:56 +01:00
parent b692402897
commit 882047d7b2
8 changed files with 238 additions and 81 deletions
--- a/providers/LMStudioProvider.cpp
+++ b/providers/LMStudioProvider.cpp
@ -25,6 +25,7 @@
 #include <QJsonObject>
 #include <QNetworkReply>

+#include "llmcore/OpenAIMessage.hpp"
 #include "logger/Logger.hpp"
 #include "settings/ChatAssistantSettings.hpp"
 #include "settings/CodeCompletionSettings.hpp"
@ -101,43 +102,19 @@ void LMStudioProvider::prepareRequest(QJsonObject &request, LLMCore::RequestType

 bool LMStudioProvider::handleResponse(QNetworkReply *reply, QString &accumulatedResponse)
 {
-    bool isComplete = false;
-    while (reply->canReadLine()) {
-        QByteArray line = reply->readLine().trimmed();
-        if (line.isEmpty()) {
-            continue;
-        }
-        if (line == "data: [DONE]") {
-            isComplete = true;
-            break;
-        }
-        if (line.startsWith("data: ")) {
-            line = line.mid(6); // Remove "data: " prefix
-        }
-        QJsonDocument jsonResponse = QJsonDocument::fromJson(line);
-        if (jsonResponse.isNull()) {
-            qWarning() << "Invalid JSON response from LM Studio:" << line;
-            continue;
-        }
-        QJsonObject responseObj = jsonResponse.object();
-        if (responseObj.contains("choices")) {
-            QJsonArray choices = responseObj["choices"].toArray();
-            if (!choices.isEmpty()) {
-                QJsonObject choice = choices.first().toObject();
-                QJsonObject delta = choice["delta"].toObject();
-                if (delta.contains("content")) {
-                    QString completion = delta["content"].toString();
-
-                    accumulatedResponse += completion;
-                }
-                if (choice["finish_reason"].toString() == "stop") {
-                    isComplete = true;
-                    break;
-                }
-            }
-        }
+    QByteArray data = reply->readAll();
+    if (data.isEmpty()) {
+        return false;
    }
-    return isComplete;
+
+    auto message = LLMCore::OpenAIMessage::fromJson(data);
+    if (message.hasError()) {
+        LOG_MESSAGE("Error in OpenAI response: " + message.error);
+        return false;
+    }
+
+    accumulatedResponse += message.getContent();
+    return message.isDone();
 }

 QList<QString> LMStudioProvider::getInstalledModels(const QString &url)
--- a/providers/OllamaMessage.cpp
+++ b/providers/OllamaMessage.cpp
@ -1,76 +0,0 @@
-/* 
- * Copyright (C) 2024 Petr Mironychev
- *
- * This file is part of QodeAssist.
- *
- * QodeAssist is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * QodeAssist is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
- */
-
-#include "OllamaMessage.hpp"
-
-namespace QodeAssist::Providers {
-
-OllamaMessage OllamaMessage::fromJson(const QJsonObject &obj, Type type)
-{
-    OllamaMessage msg;
-    msg.model = obj["model"].toString();
-    msg.createdAt = QDateTime::fromString(obj["created_at"].toString(), Qt::ISODate);
-    msg.done = obj["done"].toBool();
-    msg.doneReason = obj["done_reason"].toString();
-    msg.error = obj["error"].toString();
-
-    if (type == Type::Generate) {
-        auto &genResponse = msg.response.emplace<GenerateResponse>();
-        genResponse.response = obj["response"].toString();
-        if (msg.done && obj.contains("context")) {
-            const auto array = obj["context"].toArray();
-            genResponse.context.reserve(array.size());
-            for (const auto &val : array) {
-                genResponse.context.append(val.toInt());
-            }
-        }
-    } else {
-        auto &chatResponse = msg.response.emplace<ChatResponse>();
-        const auto msgObj = obj["message"].toObject();
-        chatResponse.role = msgObj["role"].toString();
-        chatResponse.content = msgObj["content"].toString();
-    }
-
-    if (msg.done) {
-        msg.metrics
-            = {obj["total_duration"].toVariant().toLongLong(),
-               obj["load_duration"].toVariant().toLongLong(),
-               obj["prompt_eval_count"].toVariant().toLongLong(),
-               obj["prompt_eval_duration"].toVariant().toLongLong(),
-               obj["eval_count"].toVariant().toLongLong(),
-               obj["eval_duration"].toVariant().toLongLong()};
-    }
-
-    return msg;
-}
-
-QString OllamaMessage::getContent() const
-{
-    if (std::holds_alternative<GenerateResponse>(response)) {
-        return std::get<GenerateResponse>(response).response;
-    }
-    return std::get<ChatResponse>(response).content;
-}
-
-bool OllamaMessage::hasError() const
-{
-    return !error.isEmpty();
-}
-
-} // namespace QodeAssist::Providers
--- a/providers/OllamaMessage.hpp
+++ b/providers/OllamaMessage.hpp
@ -1,71 +0,0 @@
-/* 
- * Copyright (C) 2024 Petr Mironychev
- *
- * This file is part of QodeAssist.
- *
- * QodeAssist is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * QodeAssist is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include <QDateTime>
-#include <QJsonArray>
-#include <QJsonObject>
-#include <QObject>
-
-namespace QodeAssist::Providers {
-
-class OllamaMessage
-{
-public:
-    enum class Type { Generate, Chat };
-
-    struct Metrics
-    {
-        qint64 totalDuration{0};
-        qint64 loadDuration{0};
-        qint64 promptEvalCount{0};
-        qint64 promptEvalDuration{0};
-        qint64 evalCount{0};
-        qint64 evalDuration{0};
-    };
-
-    struct GenerateResponse
-    {
-        QString response;
-        QVector<int> context;
-    };
-
-    struct ChatResponse
-    {
-        QString role;
-        QString content;
-    };
-
-    QString model;
-    QDateTime createdAt;
-    std::variant<GenerateResponse, ChatResponse> response;
-    bool done{false};
-    QString doneReason;
-    Metrics metrics;
-    QString error;
-
-    static OllamaMessage fromJson(const QJsonObject &obj, Type type);
-
-    QString getContent() const;
-
-    bool hasError() const;
-};
-
-} // namespace QodeAssist::Providers
--- a/providers/OllamaProvider.cpp
+++ b/providers/OllamaProvider.cpp
@ -25,7 +25,7 @@
 #include <QNetworkReply>
 #include <QtCore/qeventloop.h>

-#include "OllamaMessage.hpp"
+#include "llmcore/OllamaMessage.hpp"
 #include "logger/Logger.hpp"
 #include "settings/ChatAssistantSettings.hpp"
 #include "settings/CodeCompletionSettings.hpp"
@ -88,41 +88,23 @@ void OllamaProvider::prepareRequest(QJsonObject &request, LLMCore::RequestType t

 bool OllamaProvider::handleResponse(QNetworkReply *reply, QString &accumulatedResponse)
 {
-    const QString endpoint = reply->url().path();
-    auto messageType = endpoint == completionEndpoint() ? OllamaMessage::Type::Generate
-                                                        : OllamaMessage::Type::Chat;
-
-    auto processMessage =
-        [&accumulatedResponse](const QJsonDocument &doc, OllamaMessage::Type messageType) {
-            if (doc.isNull()) {
-                LOG_MESSAGE("Invalid JSON response from Ollama");
-                return false;
-            }
-
-            auto message = OllamaMessage::fromJson(doc.object(), messageType);
-            if (message.hasError()) {
-                LOG_MESSAGE("Error in Ollama response: " + message.error);
-                return false;
-            }
-
-            accumulatedResponse += message.getContent();
-            return message.done;
-        };
-
-    if (reply->canReadLine()) {
-        while (reply->canReadLine()) {
-            QByteArray line = reply->readLine().trimmed();
-            if (line.isEmpty())
-                continue;
-
-            if (processMessage(QJsonDocument::fromJson(line), messageType)) {
-                return true;
-            }
-        }
+    QByteArray data = reply->readAll();
+    if (data.isEmpty()) {
        return false;
-    } else {
-        return processMessage(QJsonDocument::fromJson(reply->readAll()), messageType);
    }
+
+    const QString endpoint = reply->url().path();
+    auto messageType = endpoint == completionEndpoint() ? LLMCore::OllamaMessage::Type::Generate
+                                                        : LLMCore::OllamaMessage::Type::Chat;
+
+    auto message = LLMCore::OllamaMessage::fromJson(data, messageType);
+    if (message.hasError()) {
+        LOG_MESSAGE("Error in Ollama response: " + message.error);
+        return false;
+    }
+
+    accumulatedResponse += message.getContent();
+    return message.done;
 }

 QList<QString> OllamaProvider::getInstalledModels(const QString &url)