From e66f4672148754eec216857545c6288bf8e3e860 Mon Sep 17 00:00:00 2001 From: Petr Mironychev <9195189+Palm1r@users.noreply.github.com> Date: Sun, 9 Mar 2025 22:57:33 +0100 Subject: [PATCH] feat: Add llama.cpp provider and fim template (#118) --- CMakeLists.txt | 2 + llmcore/ContextData.hpp | 2 +- llmcore/ProviderID.hpp | 3 +- providers/LlamaCppProvider.cpp | 213 +++++++++++++++++++++++++++++++++ providers/LlamaCppProvider.hpp | 47 ++++++++ providers/Providers.hpp | 2 + templates/Alpaca.hpp | 9 +- templates/ChatML.hpp | 9 +- templates/Llama2.hpp | 9 +- templates/Llama3.hpp | 9 +- templates/LlamaCppFim.hpp | 56 +++++++++ templates/OpenAICompatible.hpp | 7 +- templates/Templates.hpp | 2 + 13 files changed, 349 insertions(+), 21 deletions(-) create mode 100644 providers/LlamaCppProvider.cpp create mode 100644 providers/LlamaCppProvider.hpp create mode 100644 templates/LlamaCppFim.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 83ea3e4..a63c5c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,6 +69,7 @@ add_qtc_plugin(QodeAssist templates/Llama2.hpp templates/CodeLlamaQMLFim.hpp templates/GoogleAI.hpp + templates/LlamaCppFim.hpp providers/Providers.hpp providers/OllamaProvider.hpp providers/OllamaProvider.cpp providers/ClaudeProvider.hpp providers/ClaudeProvider.cpp @@ -78,6 +79,7 @@ add_qtc_plugin(QodeAssist providers/OpenAICompatProvider.hpp providers/OpenAICompatProvider.cpp providers/OpenRouterAIProvider.hpp providers/OpenRouterAIProvider.cpp providers/GoogleAIProvider.hpp providers/GoogleAIProvider.cpp + providers/LlamaCppProvider.hpp providers/LlamaCppProvider.cpp QodeAssist.qrc LSPCompletion.hpp LLMSuggestion.hpp LLMSuggestion.cpp diff --git a/llmcore/ContextData.hpp b/llmcore/ContextData.hpp index 1d862a5..25295f1 100644 --- a/llmcore/ContextData.hpp +++ b/llmcore/ContextData.hpp @@ -30,7 +30,7 @@ struct Message QString content; // clang-format off - auto operator<=>(const Message&) const = default; + bool operator==(const Message&) const = default; // clang-format on }; diff --git a/llmcore/ProviderID.hpp b/llmcore/ProviderID.hpp index b15a484..64ea687 100644 --- a/llmcore/ProviderID.hpp +++ b/llmcore/ProviderID.hpp @@ -28,6 +28,7 @@ enum class ProviderID { OpenAICompatible, MistralAI, OpenRouter, - GoogleAI + GoogleAI, + LlamaCpp }; } diff --git a/providers/LlamaCppProvider.cpp b/providers/LlamaCppProvider.cpp new file mode 100644 index 0000000..70712ec --- /dev/null +++ b/providers/LlamaCppProvider.cpp @@ -0,0 +1,213 @@ +/* + * Copyright (C) 2024 Petr Mironychev + * + * This file is part of QodeAssist. + * + * QodeAssist is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * QodeAssist is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with QodeAssist. If not, see . + */ + +#include "LlamaCppProvider.hpp" + +#include +#include +#include +#include +#include + +#include "llmcore/OpenAIMessage.hpp" +#include "llmcore/ValidationUtils.hpp" +#include "logger/Logger.hpp" +#include "settings/ChatAssistantSettings.hpp" +#include "settings/CodeCompletionSettings.hpp" + +namespace QodeAssist::Providers { + +QString LlamaCppProvider::name() const +{ + return "llama.cpp"; +} + +QString LlamaCppProvider::url() const +{ + return "http://localhost:8080"; +} + +QString LlamaCppProvider::completionEndpoint() const +{ + return "/infill"; +} + +QString LlamaCppProvider::chatEndpoint() const +{ + return "/v1/chat/completions"; +} + +bool LlamaCppProvider::supportsModelListing() const +{ + return false; +} + +void LlamaCppProvider::prepareRequest( + QJsonObject &request, + LLMCore::PromptTemplate *prompt, + LLMCore::ContextData context, + LLMCore::RequestType type) +{ + if (!prompt->isSupportProvider(providerID())) { + LOG_MESSAGE(QString("Template %1 doesn't support %2 provider").arg(name(), prompt->name())); + } + + prompt->prepareRequest(request, context); + + auto applyModelParams = [&request](const auto &settings) { + request["max_tokens"] = settings.maxTokens(); + request["temperature"] = settings.temperature(); + + if (settings.useTopP()) + request["top_p"] = settings.topP(); + if (settings.useTopK()) + request["top_k"] = settings.topK(); + if (settings.useFrequencyPenalty()) + request["frequency_penalty"] = settings.frequencyPenalty(); + if (settings.usePresencePenalty()) + request["presence_penalty"] = settings.presencePenalty(); + }; + + if (type == LLMCore::RequestType::CodeCompletion) { + applyModelParams(Settings::codeCompletionSettings()); + } else { + applyModelParams(Settings::chatAssistantSettings()); + } +} + +bool LlamaCppProvider::handleResponse(QNetworkReply *reply, QString &accumulatedResponse) +{ + QByteArray data = reply->readAll(); + if (data.isEmpty()) { + return false; + } + + bool isDone = data.contains("\"stop\":true") || data.contains("data: [DONE]"); + + QByteArrayList lines = data.split('\n'); + for (const QByteArray &line : lines) { + if (line.trimmed().isEmpty()) { + continue; + } + + if (line == "data: [DONE]") { + isDone = true; + continue; + } + + QByteArray jsonData = line; + if (line.startsWith("data: ")) { + jsonData = line.mid(6); + } + + QJsonParseError error; + QJsonDocument doc = QJsonDocument::fromJson(jsonData, &error); + if (doc.isNull()) { + continue; + } + + QJsonObject obj = doc.object(); + + if (obj.contains("content")) { + QString content = obj["content"].toString(); + if (!content.isEmpty()) { + accumulatedResponse += content; + } + } else if (obj.contains("choices")) { + auto message = LLMCore::OpenAIMessage::fromJson(obj); + if (message.hasError()) { + LOG_MESSAGE("Error in llama.cpp response: " + message.error); + continue; + } + + QString content = message.getContent(); + if (!content.isEmpty()) { + accumulatedResponse += content; + } + + if (message.isDone()) { + isDone = true; + } + } + + if (obj["stop"].toBool()) { + isDone = true; + } + } + + return isDone; +} + +QList LlamaCppProvider::getInstalledModels(const QString &url) +{ + return {}; +} + +QList LlamaCppProvider::validateRequest( + const QJsonObject &request, LLMCore::TemplateType type) +{ + if (type == LLMCore::TemplateType::FIM) { + const auto infillReq = QJsonObject{ + {"model", {}}, + {"input_prefix", {}}, + {"input_suffix", {}}, + {"prompt", {}}, + {"temperature", {}}, + {"top_p", {}}, + {"top_k", {}}, + {"max_tokens", {}}, + {"frequency_penalty", {}}, + {"presence_penalty", {}}, + {"stop", QJsonArray{}}, + {"stream", {}}}; + + return LLMCore::ValidationUtils::validateRequestFields(request, infillReq); + } else { + const auto chatReq = QJsonObject{ + {"model", {}}, + {"messages", QJsonArray{{QJsonObject{{"role", {}}, {"content", {}}}}}}, + {"temperature", {}}, + {"max_tokens", {}}, + {"top_p", {}}, + {"top_k", {}}, + {"frequency_penalty", {}}, + {"presence_penalty", {}}, + {"stop", QJsonArray{}}, + {"stream", {}}}; + + return LLMCore::ValidationUtils::validateRequestFields(request, chatReq); + } +} + +QString LlamaCppProvider::apiKey() const +{ + return {}; +} + +void LlamaCppProvider::prepareNetworkRequest(QNetworkRequest &networkRequest) const +{ + networkRequest.setHeader(QNetworkRequest::ContentTypeHeader, "application/json"); +} + +LLMCore::ProviderID LlamaCppProvider::providerID() const +{ + return LLMCore::ProviderID::LlamaCpp; +} + +} // namespace QodeAssist::Providers diff --git a/providers/LlamaCppProvider.hpp b/providers/LlamaCppProvider.hpp new file mode 100644 index 0000000..5b398b9 --- /dev/null +++ b/providers/LlamaCppProvider.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2024 Petr Mironychev + * + * This file is part of QodeAssist. + * + * QodeAssist is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * QodeAssist is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with QodeAssist. If not, see . + */ + +#pragma once + +#include "llmcore/Provider.hpp" + +namespace QodeAssist::Providers { + +class LlamaCppProvider : public LLMCore::Provider +{ +public: + QString name() const override; + QString url() const override; + QString completionEndpoint() const override; + QString chatEndpoint() const override; + bool supportsModelListing() const override; + void prepareRequest( + QJsonObject &request, + LLMCore::PromptTemplate *prompt, + LLMCore::ContextData context, + LLMCore::RequestType type) override; + bool handleResponse(QNetworkReply *reply, QString &accumulatedResponse) override; + QList getInstalledModels(const QString &url) override; + QList validateRequest(const QJsonObject &request, LLMCore::TemplateType type) override; + QString apiKey() const override; + void prepareNetworkRequest(QNetworkRequest &networkRequest) const override; + LLMCore::ProviderID providerID() const override; +}; + +} // namespace QodeAssist::Providers diff --git a/providers/Providers.hpp b/providers/Providers.hpp index 0a6999d..3671f60 100644 --- a/providers/Providers.hpp +++ b/providers/Providers.hpp @@ -23,6 +23,7 @@ #include "providers/ClaudeProvider.hpp" #include "providers/GoogleAIProvider.hpp" #include "providers/LMStudioProvider.hpp" +#include "providers/LlamaCppProvider.hpp" #include "providers/MistralAIProvider.hpp" #include "providers/OllamaProvider.hpp" #include "providers/OpenAICompatProvider.hpp" @@ -42,6 +43,7 @@ inline void registerProviders() providerManager.registerProvider(); providerManager.registerProvider(); providerManager.registerProvider(); + providerManager.registerProvider(); } } // namespace QodeAssist::Providers diff --git a/templates/Alpaca.hpp b/templates/Alpaca.hpp index 89bb097..e95e6ac 100644 --- a/templates/Alpaca.hpp +++ b/templates/Alpaca.hpp @@ -75,10 +75,11 @@ public: bool isSupportProvider(LLMCore::ProviderID id) const override { switch (id) { - case QodeAssist::LLMCore::ProviderID::Ollama: - case QodeAssist::LLMCore::ProviderID::LMStudio: - case QodeAssist::LLMCore::ProviderID::OpenRouter: - case QodeAssist::LLMCore::ProviderID::OpenAICompatible: + case LLMCore::ProviderID::Ollama: + case LLMCore::ProviderID::LMStudio: + case LLMCore::ProviderID::OpenRouter: + case LLMCore::ProviderID::OpenAICompatible: + case LLMCore::ProviderID::LlamaCpp: return true; default: return false; diff --git a/templates/ChatML.hpp b/templates/ChatML.hpp index c76399b..4241d67 100644 --- a/templates/ChatML.hpp +++ b/templates/ChatML.hpp @@ -76,10 +76,11 @@ public: bool isSupportProvider(LLMCore::ProviderID id) const override { switch (id) { - case QodeAssist::LLMCore::ProviderID::Ollama: - case QodeAssist::LLMCore::ProviderID::LMStudio: - case QodeAssist::LLMCore::ProviderID::OpenRouter: - case QodeAssist::LLMCore::ProviderID::OpenAICompatible: + case LLMCore::ProviderID::Ollama: + case LLMCore::ProviderID::LMStudio: + case LLMCore::ProviderID::OpenRouter: + case LLMCore::ProviderID::OpenAICompatible: + case LLMCore::ProviderID::LlamaCpp: return true; default: return false; diff --git a/templates/Llama2.hpp b/templates/Llama2.hpp index c80f86b..5d9dd42 100644 --- a/templates/Llama2.hpp +++ b/templates/Llama2.hpp @@ -73,10 +73,11 @@ public: bool isSupportProvider(LLMCore::ProviderID id) const override { switch (id) { - case QodeAssist::LLMCore::ProviderID::Ollama: - case QodeAssist::LLMCore::ProviderID::LMStudio: - case QodeAssist::LLMCore::ProviderID::OpenRouter: - case QodeAssist::LLMCore::ProviderID::OpenAICompatible: + case LLMCore::ProviderID::Ollama: + case LLMCore::ProviderID::LMStudio: + case LLMCore::ProviderID::OpenRouter: + case LLMCore::ProviderID::OpenAICompatible: + case LLMCore::ProviderID::LlamaCpp: return true; default: return false; diff --git a/templates/Llama3.hpp b/templates/Llama3.hpp index 28814b9..11812e2 100644 --- a/templates/Llama3.hpp +++ b/templates/Llama3.hpp @@ -80,10 +80,11 @@ public: bool isSupportProvider(LLMCore::ProviderID id) const override { switch (id) { - case QodeAssist::LLMCore::ProviderID::Ollama: - case QodeAssist::LLMCore::ProviderID::LMStudio: - case QodeAssist::LLMCore::ProviderID::OpenRouter: - case QodeAssist::LLMCore::ProviderID::OpenAICompatible: + case LLMCore::ProviderID::Ollama: + case LLMCore::ProviderID::LMStudio: + case LLMCore::ProviderID::OpenRouter: + case LLMCore::ProviderID::OpenAICompatible: + case LLMCore::ProviderID::LlamaCpp: return true; default: return false; diff --git a/templates/LlamaCppFim.hpp b/templates/LlamaCppFim.hpp new file mode 100644 index 0000000..1bce7a8 --- /dev/null +++ b/templates/LlamaCppFim.hpp @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2024 Petr Mironychev + * + * This file is part of QodeAssist. + * + * QodeAssist is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * QodeAssist is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with QodeAssist. If not, see . + */ + +#pragma once + +#include "llmcore/PromptTemplate.hpp" + +namespace QodeAssist::Templates { + +class LlamaCppFim : public LLMCore::PromptTemplate +{ +public: + LLMCore::TemplateType type() const override { return LLMCore::TemplateType::FIM; } + QString name() const override { return "llama.cpp FIM"; } + QStringList stopWords() const override { return {}; } + + void prepareRequest(QJsonObject &request, const LLMCore::ContextData &context) const override + { + request["input_prefix"] = context.prefix.value_or(""); + request["input_suffix"] = context.suffix.value_or(""); + } + + QString description() const override + { + return "Default llama.cpp FIM (Fill-in-Middle) /infill template with native format:\n\n" + "{\n" + " \"input_prefix\": \"\",\n" + " \"input_suffix\": \"\",\n" + " \"input_extra\": \"\"\n" + "}\n\n" + "Recommended for models with FIM capability."; + } + + bool isSupportProvider(LLMCore::ProviderID id) const override + { + return id == QodeAssist::LLMCore::ProviderID::LlamaCpp; + } +}; + +} // namespace QodeAssist::Templates diff --git a/templates/OpenAICompatible.hpp b/templates/OpenAICompatible.hpp index fcfdb82..1625f38 100644 --- a/templates/OpenAICompatible.hpp +++ b/templates/OpenAICompatible.hpp @@ -63,9 +63,10 @@ public: bool isSupportProvider(LLMCore::ProviderID id) const override { switch (id) { - case QodeAssist::LLMCore::ProviderID::OpenAICompatible: - case QodeAssist::LLMCore::ProviderID::OpenRouter: - case QodeAssist::LLMCore::ProviderID::LMStudio: + case LLMCore::ProviderID::OpenAICompatible: + case LLMCore::ProviderID::OpenRouter: + case LLMCore::ProviderID::LMStudio: + case LLMCore::ProviderID::LlamaCpp: return true; default: return false; diff --git a/templates/Templates.hpp b/templates/Templates.hpp index 04775fd..4712cd3 100644 --- a/templates/Templates.hpp +++ b/templates/Templates.hpp @@ -34,6 +34,7 @@ #include "templates/GoogleAI.hpp" #include "templates/Llama2.hpp" #include "templates/Llama3.hpp" +#include "templates/LlamaCppFim.hpp" #include "templates/Qwen.hpp" #include "templates/StarCoder2Fim.hpp" @@ -60,6 +61,7 @@ inline void registerTemplates() templateManager.registerTemplate(); templateManager.registerTemplate(); templateManager.registerTemplate(); + templateManager.registerTemplate(); } } // namespace QodeAssist::Templates