mirror of
https://github.com/Palm1r/QodeAssist.git
synced 2026-02-12 10:10:44 -05:00
Compare commits
8 Commits
v0.5.0
...
dev-rag-ba
| Author | SHA1 | Date | |
|---|---|---|---|
| 142afa725f | |||
| f36db033e6 | |||
| 5dfcf74128 | |||
| 02101665ca | |||
| 77a03d42ed | |||
| 09c38c8b0e | |||
| 7b73d7af7b | |||
| 5a426b4d9f |
@ -24,21 +24,24 @@
|
|||||||
#include <QFileDialog>
|
#include <QFileDialog>
|
||||||
#include <QMessageBox>
|
#include <QMessageBox>
|
||||||
|
|
||||||
|
#include <coreplugin/editormanager/editormanager.h>
|
||||||
#include <coreplugin/icore.h>
|
#include <coreplugin/icore.h>
|
||||||
#include <projectexplorer/project.h>
|
#include <projectexplorer/project.h>
|
||||||
#include <projectexplorer/projectexplorer.h>
|
#include <projectexplorer/projectexplorer.h>
|
||||||
#include <projectexplorer/projectmanager.h>
|
#include <projectexplorer/projectmanager.h>
|
||||||
|
#include <projectexplorer/projecttree.h>
|
||||||
#include <utils/theme/theme.h>
|
#include <utils/theme/theme.h>
|
||||||
#include <utils/utilsicons.h>
|
#include <utils/utilsicons.h>
|
||||||
#include <coreplugin/editormanager/editormanager.h>
|
|
||||||
|
|
||||||
#include "ChatAssistantSettings.hpp"
|
#include "ChatAssistantSettings.hpp"
|
||||||
#include "ChatSerializer.hpp"
|
#include "ChatSerializer.hpp"
|
||||||
#include "GeneralSettings.hpp"
|
#include "GeneralSettings.hpp"
|
||||||
#include "Logger.hpp"
|
#include "Logger.hpp"
|
||||||
#include "ProjectSettings.hpp"
|
#include "ProjectSettings.hpp"
|
||||||
#include "context/TokenUtils.hpp"
|
|
||||||
#include "context/ContextManager.hpp"
|
#include "context/ContextManager.hpp"
|
||||||
|
#include "context/FileChunker.hpp"
|
||||||
|
#include "context/RAGManager.hpp"
|
||||||
|
#include "context/TokenUtils.hpp"
|
||||||
|
|
||||||
namespace QodeAssist::Chat {
|
namespace QodeAssist::Chat {
|
||||||
|
|
||||||
@ -447,6 +450,69 @@ void ChatRootView::openChatHistoryFolder()
|
|||||||
QDesktopServices::openUrl(url);
|
QDesktopServices::openUrl(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ChatRootView.cpp
|
||||||
|
|
||||||
|
void ChatRootView::testRAG(const QString &message)
|
||||||
|
{
|
||||||
|
auto project = ProjectExplorer::ProjectTree::currentProject();
|
||||||
|
if (!project) {
|
||||||
|
qDebug() << "No active project found";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const QString TEST_QUERY = message;
|
||||||
|
|
||||||
|
qDebug() << "Starting RAG test with query:";
|
||||||
|
qDebug() << TEST_QUERY;
|
||||||
|
qDebug() << "\nFirst, processing project files...";
|
||||||
|
|
||||||
|
auto files = Context::ContextManager::instance().getProjectSourceFiles(project);
|
||||||
|
// Было: auto future = Context::RAGManager::instance().processFiles(project, files);
|
||||||
|
// Стало:
|
||||||
|
auto future = Context::RAGManager::instance().processProjectFiles(project, files);
|
||||||
|
|
||||||
|
connect(
|
||||||
|
&Context::RAGManager::instance(),
|
||||||
|
&Context::RAGManager::vectorizationProgress,
|
||||||
|
this,
|
||||||
|
[](int processed, int total) {
|
||||||
|
qDebug() << QString("Vectorization progress: %1 of %2 files").arg(processed).arg(total);
|
||||||
|
});
|
||||||
|
|
||||||
|
connect(
|
||||||
|
&Context::RAGManager::instance(),
|
||||||
|
&Context::RAGManager::vectorizationFinished,
|
||||||
|
this,
|
||||||
|
[this, project, TEST_QUERY]() {
|
||||||
|
qDebug() << "\nVectorization completed. Starting similarity search...\n";
|
||||||
|
// Было: Context::RAGManager::instance().searchSimilarDocuments(TEST_QUERY, project, 5);
|
||||||
|
// Стало:
|
||||||
|
auto future = Context::RAGManager::instance().findRelevantChunks(TEST_QUERY, project, 5);
|
||||||
|
future.then([](const QList<Context::RAGManager::ChunkSearchResult> &results) {
|
||||||
|
qDebug() << "Found" << results.size() << "relevant chunks:";
|
||||||
|
for (const auto &result : results) {
|
||||||
|
qDebug() << "File:" << result.filePath;
|
||||||
|
qDebug() << "Lines:" << result.startLine << "-" << result.endLine;
|
||||||
|
qDebug() << "Score:" << result.combinedScore;
|
||||||
|
qDebug() << "Content:" << result.content;
|
||||||
|
qDebug() << "---";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void ChatRootView::testChunking()
|
||||||
|
{
|
||||||
|
auto project = ProjectExplorer::ProjectTree::currentProject();
|
||||||
|
if (!project) {
|
||||||
|
qDebug() << "No active project found";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Context::FileChunker::ChunkingConfig config;
|
||||||
|
Context::ContextManager::instance().testProjectChunks(project, config);
|
||||||
|
}
|
||||||
|
|
||||||
void ChatRootView::updateInputTokensCount()
|
void ChatRootView::updateInputTokensCount()
|
||||||
{
|
{
|
||||||
int inputTokens = m_messageTokensCount;
|
int inputTokens = m_messageTokensCount;
|
||||||
|
|||||||
@ -65,6 +65,8 @@ public:
|
|||||||
Q_INVOKABLE void calculateMessageTokensCount(const QString &message);
|
Q_INVOKABLE void calculateMessageTokensCount(const QString &message);
|
||||||
Q_INVOKABLE void setIsSyncOpenFiles(bool state);
|
Q_INVOKABLE void setIsSyncOpenFiles(bool state);
|
||||||
Q_INVOKABLE void openChatHistoryFolder();
|
Q_INVOKABLE void openChatHistoryFolder();
|
||||||
|
Q_INVOKABLE void testRAG(const QString &message);
|
||||||
|
Q_INVOKABLE void testChunking();
|
||||||
|
|
||||||
Q_INVOKABLE void updateInputTokensCount();
|
Q_INVOKABLE void updateInputTokensCount();
|
||||||
int inputTokensCount() const;
|
int inputTokensCount() const;
|
||||||
|
|||||||
@ -198,6 +198,8 @@ ChatRootView {
|
|||||||
}
|
}
|
||||||
attachFiles.onClicked: root.showAttachFilesDialog()
|
attachFiles.onClicked: root.showAttachFilesDialog()
|
||||||
linkFiles.onClicked: root.showLinkFilesDialog()
|
linkFiles.onClicked: root.showLinkFilesDialog()
|
||||||
|
testRag.onClicked: root.testRAG(messageInput.text)
|
||||||
|
testChunks.onClicked: root.testChunking()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -30,6 +30,8 @@ Rectangle {
|
|||||||
property alias syncOpenFiles: syncOpenFilesId
|
property alias syncOpenFiles: syncOpenFilesId
|
||||||
property alias attachFiles: attachFilesId
|
property alias attachFiles: attachFilesId
|
||||||
property alias linkFiles: linkFilesId
|
property alias linkFiles: linkFilesId
|
||||||
|
property alias testRag: testRagId
|
||||||
|
property alias testChunks: testChunksId
|
||||||
|
|
||||||
color: palette.window.hslLightness > 0.5 ?
|
color: palette.window.hslLightness > 0.5 ?
|
||||||
Qt.darker(palette.window, 1.1) :
|
Qt.darker(palette.window, 1.1) :
|
||||||
@ -91,6 +93,18 @@ Rectangle {
|
|||||||
ToolTip.text: qsTr("Automatically synchronize currently opened files with the model context")
|
ToolTip.text: qsTr("Automatically synchronize currently opened files with the model context")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QoAButton {
|
||||||
|
id: testRagId
|
||||||
|
|
||||||
|
text: qsTr("Test RAG")
|
||||||
|
}
|
||||||
|
|
||||||
|
QoAButton {
|
||||||
|
id: testChunksId
|
||||||
|
|
||||||
|
text: qsTr("Test Chunks")
|
||||||
|
}
|
||||||
|
|
||||||
Item {
|
Item {
|
||||||
Layout.fillWidth: true
|
Layout.fillWidth: true
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,11 +5,20 @@ add_library(Context STATIC
|
|||||||
ContentFile.hpp
|
ContentFile.hpp
|
||||||
TokenUtils.hpp TokenUtils.cpp
|
TokenUtils.hpp TokenUtils.cpp
|
||||||
ProgrammingLanguage.hpp ProgrammingLanguage.cpp
|
ProgrammingLanguage.hpp ProgrammingLanguage.cpp
|
||||||
|
RAGManager.hpp RAGManager.cpp
|
||||||
|
RAGStorage.hpp RAGStorage.cpp
|
||||||
|
RAGData.hpp
|
||||||
|
RAGVectorizer.hpp RAGVectorizer.cpp
|
||||||
|
RAGSimilaritySearch.hpp RAGSimilaritySearch.cpp
|
||||||
|
RAGPreprocessor.hpp RAGPreprocessor.cpp
|
||||||
|
EnhancedRAGSimilaritySearch.hpp EnhancedRAGSimilaritySearch.cpp
|
||||||
|
FileChunker.hpp FileChunker.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(Context
|
target_link_libraries(Context
|
||||||
PUBLIC
|
PUBLIC
|
||||||
Qt::Core
|
Qt::Core
|
||||||
|
Qt::Sql
|
||||||
QtCreator::Core
|
QtCreator::Core
|
||||||
QtCreator::TextEditor
|
QtCreator::TextEditor
|
||||||
QtCreator::Utils
|
QtCreator::Utils
|
||||||
|
|||||||
@ -23,6 +23,11 @@
|
|||||||
#include <QFileInfo>
|
#include <QFileInfo>
|
||||||
#include <QTextStream>
|
#include <QTextStream>
|
||||||
|
|
||||||
|
#include <projectexplorer/project.h>
|
||||||
|
#include <projectexplorer/projectnodes.h>
|
||||||
|
|
||||||
|
#include "FileChunker.hpp"
|
||||||
|
|
||||||
namespace QodeAssist::Context {
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
ContextManager &ContextManager::instance()
|
ContextManager &ContextManager::instance()
|
||||||
@ -64,4 +69,108 @@ ContentFile ContextManager::createContentFile(const QString &filePath) const
|
|||||||
return contentFile;
|
return contentFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ContextManager::isInBuildDirectory(const QString &filePath) const
|
||||||
|
{
|
||||||
|
static const QStringList buildDirPatterns
|
||||||
|
= {QString(QDir::separator()) + QLatin1String("build") + QDir::separator(),
|
||||||
|
QString(QDir::separator()) + QLatin1String("Build") + QDir::separator(),
|
||||||
|
QString(QDir::separator()) + QLatin1String("BUILD") + QDir::separator(),
|
||||||
|
QString(QDir::separator()) + QLatin1String("debug") + QDir::separator(),
|
||||||
|
QString(QDir::separator()) + QLatin1String("Debug") + QDir::separator(),
|
||||||
|
QString(QDir::separator()) + QLatin1String("DEBUG") + QDir::separator(),
|
||||||
|
QString(QDir::separator()) + QLatin1String("release") + QDir::separator(),
|
||||||
|
QString(QDir::separator()) + QLatin1String("Release") + QDir::separator(),
|
||||||
|
QString(QDir::separator()) + QLatin1String("RELEASE") + QDir::separator(),
|
||||||
|
QString(QDir::separator()) + QLatin1String("builds") + QDir::separator()};
|
||||||
|
|
||||||
|
// Нормализуем путь
|
||||||
|
QString normalizedPath = QDir::fromNativeSeparators(filePath);
|
||||||
|
|
||||||
|
// Проверяем, содержит ли путь паттерны build-директории
|
||||||
|
for (const QString &pattern : buildDirPatterns) {
|
||||||
|
// Сравниваем с нормализованным паттерном
|
||||||
|
QString normalizedPattern = QDir::fromNativeSeparators(pattern);
|
||||||
|
if (normalizedPath.contains(normalizedPattern)) {
|
||||||
|
qDebug() << "Skipping build file:" << filePath;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
QStringList ContextManager::getProjectSourceFiles(ProjectExplorer::Project *project) const
|
||||||
|
{
|
||||||
|
QStringList sourceFiles;
|
||||||
|
if (!project)
|
||||||
|
return sourceFiles;
|
||||||
|
|
||||||
|
auto projectNode = project->rootProjectNode();
|
||||||
|
if (!projectNode)
|
||||||
|
return sourceFiles;
|
||||||
|
|
||||||
|
projectNode->forEachNode(
|
||||||
|
[&sourceFiles, this](ProjectExplorer::FileNode *fileNode) {
|
||||||
|
if (fileNode) {
|
||||||
|
QString filePath = fileNode->filePath().toString();
|
||||||
|
if (shouldProcessFile(filePath) && !isInBuildDirectory(filePath)) {
|
||||||
|
sourceFiles.append(filePath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
nullptr);
|
||||||
|
|
||||||
|
return sourceFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ContextManager::shouldProcessFile(const QString &filePath) const
|
||||||
|
{
|
||||||
|
static const QStringList supportedExtensions
|
||||||
|
= {"cpp", "hpp", "c", "h", "cc", "hh", "cxx", "hxx", "qml", "js", "py"};
|
||||||
|
|
||||||
|
QFileInfo fileInfo(filePath);
|
||||||
|
return supportedExtensions.contains(fileInfo.suffix().toLower());
|
||||||
|
}
|
||||||
|
|
||||||
|
void ContextManager::testProjectChunks(
|
||||||
|
ProjectExplorer::Project *project, const FileChunker::ChunkingConfig &config)
|
||||||
|
{
|
||||||
|
if (!project) {
|
||||||
|
qDebug() << "No project provided";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "\nStarting test chunking for project:" << project->displayName();
|
||||||
|
|
||||||
|
// Get source files
|
||||||
|
QStringList sourceFiles = getProjectSourceFiles(project);
|
||||||
|
qDebug() << "Found" << sourceFiles.size() << "source files";
|
||||||
|
|
||||||
|
// Create chunker
|
||||||
|
auto chunker = new FileChunker(config, this);
|
||||||
|
|
||||||
|
// Connect progress and error signals
|
||||||
|
connect(chunker, &FileChunker::progressUpdated, this, [](int processed, int total) {
|
||||||
|
qDebug() << "Progress:" << processed << "/" << total << "files";
|
||||||
|
});
|
||||||
|
|
||||||
|
connect(chunker, &FileChunker::error, this, [](const QString &error) {
|
||||||
|
qDebug() << "Error:" << error;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Start chunking and handle results
|
||||||
|
auto future = chunker->chunkFiles(sourceFiles);
|
||||||
|
|
||||||
|
// Используем QFutureWatcher для обработки результатов
|
||||||
|
auto watcher = new QFutureWatcher<QList<FileChunk>>(this);
|
||||||
|
|
||||||
|
connect(watcher, &QFutureWatcher<QList<FileChunk>>::finished, this, [watcher, chunker]() {
|
||||||
|
// Очистка
|
||||||
|
watcher->deleteLater();
|
||||||
|
chunker->deleteLater();
|
||||||
|
});
|
||||||
|
|
||||||
|
watcher->setFuture(future);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace QodeAssist::Context
|
} // namespace QodeAssist::Context
|
||||||
|
|||||||
@ -19,10 +19,16 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "ContentFile.hpp"
|
||||||
|
|
||||||
#include <QObject>
|
#include <QObject>
|
||||||
#include <QString>
|
#include <QString>
|
||||||
|
|
||||||
#include "ContentFile.hpp"
|
#include "FileChunker.hpp"
|
||||||
|
|
||||||
|
namespace ProjectExplorer {
|
||||||
|
class Project;
|
||||||
|
}
|
||||||
|
|
||||||
namespace QodeAssist::Context {
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
@ -32,15 +38,23 @@ class ContextManager : public QObject
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
static ContextManager &instance();
|
static ContextManager &instance();
|
||||||
|
|
||||||
QString readFile(const QString &filePath) const;
|
QString readFile(const QString &filePath) const;
|
||||||
QList<ContentFile> getContentFiles(const QStringList &filePaths) const;
|
QList<ContentFile> getContentFiles(const QStringList &filePaths) const;
|
||||||
|
QStringList getProjectSourceFiles(ProjectExplorer::Project *project) const;
|
||||||
|
|
||||||
|
void testProjectChunks(
|
||||||
|
ProjectExplorer::Project *project, const FileChunker::ChunkingConfig &config);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
explicit ContextManager(QObject *parent = nullptr);
|
explicit ContextManager(QObject *parent = nullptr);
|
||||||
~ContextManager() = default;
|
~ContextManager() = default;
|
||||||
ContextManager(const ContextManager &) = delete;
|
ContextManager(const ContextManager &) = delete;
|
||||||
ContextManager &operator=(const ContextManager &) = delete;
|
ContextManager &operator=(const ContextManager &) = delete;
|
||||||
|
|
||||||
ContentFile createContentFile(const QString &filePath) const;
|
ContentFile createContentFile(const QString &filePath) const;
|
||||||
|
bool shouldProcessFile(const QString &filePath) const;
|
||||||
|
bool isInBuildDirectory(const QString &filePath) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace QodeAssist::Context
|
} // namespace QodeAssist::Context
|
||||||
|
|||||||
265
context/EnhancedRAGSimilaritySearch.cpp
Normal file
265
context/EnhancedRAGSimilaritySearch.cpp
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
#include "EnhancedRAGSimilaritySearch.hpp"
|
||||||
|
|
||||||
|
#include <QSet>
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
// Static regex getters
|
||||||
|
const QRegularExpression &EnhancedRAGSimilaritySearch::getNamespaceRegex()
|
||||||
|
{
|
||||||
|
static const QRegularExpression regex(R"(namespace\s+(?:\w+\s*::\s*)*\w+\s*\{)");
|
||||||
|
return regex;
|
||||||
|
}
|
||||||
|
|
||||||
|
const QRegularExpression &EnhancedRAGSimilaritySearch::getClassRegex()
|
||||||
|
{
|
||||||
|
static const QRegularExpression regex(
|
||||||
|
R"((?:template\s*<[^>]*>\s*)?(?:class|struct)\s+(\w+)\s*(?:final\s*)?(?::\s*(?:public|protected|private)\s+\w+(?:\s*,\s*(?:public|protected|private)\s+\w+)*\s*)?{)");
|
||||||
|
return regex;
|
||||||
|
}
|
||||||
|
|
||||||
|
const QRegularExpression &EnhancedRAGSimilaritySearch::getFunctionRegex()
|
||||||
|
{
|
||||||
|
static const QRegularExpression regex(
|
||||||
|
R"((?:virtual\s+)?(?:static\s+)?(?:inline\s+)?(?:explicit\s+)?(?:constexpr\s+)?(?:[\w:]+\s+)?(?:\w+\s*::\s*)*\w+\s*\([^)]*\)\s*(?:const\s*)?(?:noexcept\s*)?(?:override\s*)?(?:final\s*)?(?:=\s*0\s*)?(?:=\s*default\s*)?(?:=\s*delete\s*)?(?:\s*->.*?)?\s*{)");
|
||||||
|
return regex;
|
||||||
|
}
|
||||||
|
|
||||||
|
const QRegularExpression &EnhancedRAGSimilaritySearch::getTemplateRegex()
|
||||||
|
{
|
||||||
|
static const QRegularExpression regex(R"(template\s*<[^>]*>\s*(?:class|struct|typename)\s+\w+)");
|
||||||
|
return regex;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache getters
|
||||||
|
QCache<QString, EnhancedRAGSimilaritySearch::SimilarityScore> &
|
||||||
|
EnhancedRAGSimilaritySearch::getScoreCache()
|
||||||
|
{
|
||||||
|
static QCache<QString, SimilarityScore> cache(1000); // Cache size of 1000 entries
|
||||||
|
return cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
QCache<QString, QStringList> &EnhancedRAGSimilaritySearch::getStructureCache()
|
||||||
|
{
|
||||||
|
static QCache<QString, QStringList> cache(500); // Cache size of 500 entries
|
||||||
|
return cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main public interface
|
||||||
|
EnhancedRAGSimilaritySearch::SimilarityScore EnhancedRAGSimilaritySearch::calculateSimilarity(
|
||||||
|
const RAGVector &v1, const RAGVector &v2, const QString &code1, const QString &code2)
|
||||||
|
{
|
||||||
|
// Generate cache key based on content hashes
|
||||||
|
QString cacheKey = QString("%1_%2").arg(qHash(code1)).arg(qHash(code2));
|
||||||
|
|
||||||
|
// Check cache first
|
||||||
|
auto &scoreCache = getScoreCache();
|
||||||
|
if (auto *cached = scoreCache.object(cacheKey)) {
|
||||||
|
return *cached;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate new similarity score
|
||||||
|
SimilarityScore score = calculateSimilarityInternal(v1, v2, code1, code2);
|
||||||
|
|
||||||
|
// Cache the result
|
||||||
|
scoreCache.insert(cacheKey, new SimilarityScore(score));
|
||||||
|
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Internal implementation
|
||||||
|
EnhancedRAGSimilaritySearch::SimilarityScore EnhancedRAGSimilaritySearch::calculateSimilarityInternal(
|
||||||
|
const RAGVector &v1, const RAGVector &v2, const QString &code1, const QString &code2)
|
||||||
|
{
|
||||||
|
if (v1.empty() || v2.empty()) {
|
||||||
|
LOG_MESSAGE("Warning: Empty vectors in similarity calculation");
|
||||||
|
return SimilarityScore(0.0f, 0.0f, 0.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (v1.size() != v2.size()) {
|
||||||
|
LOG_MESSAGE(QString("Vector size mismatch: %1 vs %2").arg(v1.size()).arg(v2.size()));
|
||||||
|
return SimilarityScore(0.0f, 0.0f, 0.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate semantic similarity using vector embeddings
|
||||||
|
float semantic_similarity = 0.0f;
|
||||||
|
|
||||||
|
#if defined(__SSE__) || defined(_M_X64) || defined(_M_AMD64)
|
||||||
|
if (v1.size() >= 4) { // Use SSE for vectors of 4 or more elements
|
||||||
|
semantic_similarity = calculateCosineSimilaritySSE(v1, v2);
|
||||||
|
} else {
|
||||||
|
semantic_similarity = calculateCosineSimilarity(v1, v2);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
semantic_similarity = calculateCosineSimilarity(v1, v2);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// If semantic similarity is very low, skip structural analysis
|
||||||
|
if (semantic_similarity < 0.0001f) {
|
||||||
|
return SimilarityScore(0.0f, 0.0f, 0.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate structural similarity
|
||||||
|
float structural_similarity = calculateStructuralSimilarity(code1, code2);
|
||||||
|
|
||||||
|
// Calculate combined score with dynamic weights
|
||||||
|
float semantic_weight = 0.7f;
|
||||||
|
const int large_file_threshold = 10000;
|
||||||
|
|
||||||
|
if (code1.size() > large_file_threshold || code2.size() > large_file_threshold) {
|
||||||
|
semantic_weight = 0.8f; // Increase semantic weight for large files
|
||||||
|
}
|
||||||
|
|
||||||
|
float combined_score = semantic_weight * semantic_similarity
|
||||||
|
+ (1.0f - semantic_weight) * structural_similarity;
|
||||||
|
|
||||||
|
return SimilarityScore(semantic_similarity, structural_similarity, combined_score);
|
||||||
|
}
|
||||||
|
|
||||||
|
float EnhancedRAGSimilaritySearch::calculateCosineSimilarity(const RAGVector &v1, const RAGVector &v2)
|
||||||
|
{
|
||||||
|
float dotProduct = 0.0f;
|
||||||
|
float norm1 = 0.0f;
|
||||||
|
float norm2 = 0.0f;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < v1.size(); ++i) {
|
||||||
|
dotProduct += v1[i] * v2[i];
|
||||||
|
norm1 += v1[i] * v1[i];
|
||||||
|
norm2 += v2[i] * v2[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
norm1 = std::sqrt(norm1);
|
||||||
|
norm2 = std::sqrt(norm2);
|
||||||
|
|
||||||
|
if (norm1 == 0.0f || norm2 == 0.0f) {
|
||||||
|
return 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dotProduct / (norm1 * norm2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__SSE__) || defined(_M_X64) || defined(_M_AMD64)
|
||||||
|
float EnhancedRAGSimilaritySearch::calculateCosineSimilaritySSE(
|
||||||
|
const RAGVector &v1, const RAGVector &v2)
|
||||||
|
{
|
||||||
|
const float *p1 = v1.data();
|
||||||
|
const float *p2 = v2.data();
|
||||||
|
const size_t size = v1.size();
|
||||||
|
const size_t alignedSize = size & ~3ULL; // Round down to multiple of 4
|
||||||
|
|
||||||
|
__m128 sum = _mm_setzero_ps();
|
||||||
|
__m128 norm1 = _mm_setzero_ps();
|
||||||
|
__m128 norm2 = _mm_setzero_ps();
|
||||||
|
|
||||||
|
// Process 4 elements at a time using SSE
|
||||||
|
for (size_t i = 0; i < alignedSize; i += 4) {
|
||||||
|
__m128 v1_vec = _mm_loadu_ps(p1 + i); // Use unaligned load for safety
|
||||||
|
__m128 v2_vec = _mm_loadu_ps(p2 + i);
|
||||||
|
|
||||||
|
sum = _mm_add_ps(sum, _mm_mul_ps(v1_vec, v2_vec));
|
||||||
|
norm1 = _mm_add_ps(norm1, _mm_mul_ps(v1_vec, v1_vec));
|
||||||
|
norm2 = _mm_add_ps(norm2, _mm_mul_ps(v2_vec, v2_vec));
|
||||||
|
}
|
||||||
|
|
||||||
|
float dotProduct = horizontalSum(sum);
|
||||||
|
float n1 = std::sqrt(horizontalSum(norm1));
|
||||||
|
float n2 = std::sqrt(horizontalSum(norm2));
|
||||||
|
|
||||||
|
// Process remaining elements
|
||||||
|
for (size_t i = alignedSize; i < size; ++i) {
|
||||||
|
dotProduct += v1[i] * v2[i];
|
||||||
|
n1 += v1[i] * v1[i];
|
||||||
|
n2 += v2[i] * v2[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n1 == 0.0f || n2 == 0.0f) {
|
||||||
|
return 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dotProduct / (std::sqrt(n1) * std::sqrt(n2));
|
||||||
|
}
|
||||||
|
|
||||||
|
float EnhancedRAGSimilaritySearch::horizontalSum(__m128 x)
|
||||||
|
{
|
||||||
|
__m128 shuf = _mm_shuffle_ps(x, x, _MM_SHUFFLE(2, 3, 0, 1));
|
||||||
|
__m128 sums = _mm_add_ps(x, shuf);
|
||||||
|
shuf = _mm_movehl_ps(shuf, sums);
|
||||||
|
sums = _mm_add_ss(sums, shuf);
|
||||||
|
return _mm_cvtss_f32(sums);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
float EnhancedRAGSimilaritySearch::calculateStructuralSimilarity(
|
||||||
|
const QString &code1, const QString &code2)
|
||||||
|
{
|
||||||
|
QStringList structures1 = extractStructures(code1);
|
||||||
|
QStringList structures2 = extractStructures(code2);
|
||||||
|
|
||||||
|
return calculateJaccardSimilarity(structures1, structures2);
|
||||||
|
}
|
||||||
|
|
||||||
|
QStringList EnhancedRAGSimilaritySearch::extractStructures(const QString &code)
|
||||||
|
{
|
||||||
|
// Check cache first
|
||||||
|
auto &structureCache = getStructureCache();
|
||||||
|
QString cacheKey = QString::number(qHash(code));
|
||||||
|
|
||||||
|
if (auto *cached = structureCache.object(cacheKey)) {
|
||||||
|
return *cached;
|
||||||
|
}
|
||||||
|
|
||||||
|
QStringList structures;
|
||||||
|
structures.reserve(100); // Reserve space for typical file
|
||||||
|
|
||||||
|
// Extract namespaces
|
||||||
|
auto namespaceMatches = getNamespaceRegex().globalMatch(code);
|
||||||
|
while (namespaceMatches.hasNext()) {
|
||||||
|
structures.append(namespaceMatches.next().captured().trimmed());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract classes
|
||||||
|
auto classMatches = getClassRegex().globalMatch(code);
|
||||||
|
while (classMatches.hasNext()) {
|
||||||
|
structures.append(classMatches.next().captured().trimmed());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract functions
|
||||||
|
auto functionMatches = getFunctionRegex().globalMatch(code);
|
||||||
|
while (functionMatches.hasNext()) {
|
||||||
|
structures.append(functionMatches.next().captured().trimmed());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract templates
|
||||||
|
auto templateMatches = getTemplateRegex().globalMatch(code);
|
||||||
|
while (templateMatches.hasNext()) {
|
||||||
|
structures.append(templateMatches.next().captured().trimmed());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache the result
|
||||||
|
structureCache.insert(cacheKey, new QStringList(structures));
|
||||||
|
|
||||||
|
return structures;
|
||||||
|
}
|
||||||
|
|
||||||
|
float EnhancedRAGSimilaritySearch::calculateJaccardSimilarity(
|
||||||
|
const QStringList &set1, const QStringList &set2)
|
||||||
|
{
|
||||||
|
if (set1.isEmpty() && set2.isEmpty()) {
|
||||||
|
return 1.0f; // Пустые множества считаем идентичными
|
||||||
|
}
|
||||||
|
if (set1.isEmpty() || set2.isEmpty()) {
|
||||||
|
return 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
QSet<QString> set1Unique = QSet<QString>(set1.begin(), set1.end());
|
||||||
|
QSet<QString> set2Unique = QSet<QString>(set2.begin(), set2.end());
|
||||||
|
|
||||||
|
QSet<QString> intersection = set1Unique;
|
||||||
|
intersection.intersect(set2Unique);
|
||||||
|
|
||||||
|
QSet<QString> union_set = set1Unique;
|
||||||
|
union_set.unite(set2Unique);
|
||||||
|
|
||||||
|
return static_cast<float>(intersection.size()) / union_set.size();
|
||||||
|
}
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
74
context/EnhancedRAGSimilaritySearch.hpp
Normal file
74
context/EnhancedRAGSimilaritySearch.hpp
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <QCache>
|
||||||
|
#include <QHash>
|
||||||
|
#include <QRegularExpression>
|
||||||
|
#include <QString>
|
||||||
|
#include <QStringList>
|
||||||
|
#include <QtGlobal>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cmath>
|
||||||
|
#include <optional>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#if defined(__SSE__) || defined(_M_X64) || defined(_M_AMD64)
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "RAGData.hpp"
|
||||||
|
#include "logger/Logger.hpp"
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
class EnhancedRAGSimilaritySearch
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
struct SimilarityScore
|
||||||
|
{
|
||||||
|
float semantic_similarity{0.0f};
|
||||||
|
float structural_similarity{0.0f};
|
||||||
|
float combined_score{0.0f};
|
||||||
|
|
||||||
|
SimilarityScore() = default;
|
||||||
|
SimilarityScore(float sem, float str, float comb)
|
||||||
|
: semantic_similarity(sem)
|
||||||
|
, structural_similarity(str)
|
||||||
|
, combined_score(comb)
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
static SimilarityScore calculateSimilarity(
|
||||||
|
const RAGVector &v1, const RAGVector &v2, const QString &code1, const QString &code2);
|
||||||
|
|
||||||
|
private:
|
||||||
|
static SimilarityScore calculateSimilarityInternal(
|
||||||
|
const RAGVector &v1, const RAGVector &v2, const QString &code1, const QString &code2);
|
||||||
|
|
||||||
|
static float calculateCosineSimilarity(const RAGVector &v1, const RAGVector &v2);
|
||||||
|
|
||||||
|
#if defined(__SSE__) || defined(_M_X64) || defined(_M_AMD64)
|
||||||
|
static float calculateCosineSimilaritySSE(const RAGVector &v1, const RAGVector &v2);
|
||||||
|
static float horizontalSum(__m128 x);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static float calculateStructuralSimilarity(const QString &code1, const QString &code2);
|
||||||
|
static QStringList extractStructures(const QString &code);
|
||||||
|
static float calculateJaccardSimilarity(const QStringList &set1, const QStringList &set2);
|
||||||
|
|
||||||
|
static const QRegularExpression &getNamespaceRegex();
|
||||||
|
static const QRegularExpression &getClassRegex();
|
||||||
|
static const QRegularExpression &getFunctionRegex();
|
||||||
|
static const QRegularExpression &getTemplateRegex();
|
||||||
|
|
||||||
|
// Cache for similarity scores
|
||||||
|
static QCache<QString, SimilarityScore> &getScoreCache();
|
||||||
|
|
||||||
|
// Cache for extracted structures
|
||||||
|
static QCache<QString, QStringList> &getStructureCache();
|
||||||
|
|
||||||
|
EnhancedRAGSimilaritySearch() = delete; // Prevent instantiation
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
198
context/FileChunker.cpp
Normal file
198
context/FileChunker.cpp
Normal file
@ -0,0 +1,198 @@
|
|||||||
|
// FileChunker.cpp
|
||||||
|
#include "FileChunker.hpp"
|
||||||
|
|
||||||
|
#include <coreplugin/idocument.h>
|
||||||
|
#include <texteditor/syntaxhighlighter.h>
|
||||||
|
#include <texteditor/textdocument.h>
|
||||||
|
#include <texteditor/texteditorconstants.h>
|
||||||
|
|
||||||
|
#include <QFutureWatcher>
|
||||||
|
#include <QTimer>
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
FileChunker::FileChunker(QObject *parent)
|
||||||
|
: QObject(parent)
|
||||||
|
{}
|
||||||
|
|
||||||
|
FileChunker::FileChunker(const ChunkingConfig &config, QObject *parent)
|
||||||
|
: QObject(parent)
|
||||||
|
, m_config(config)
|
||||||
|
{}
|
||||||
|
|
||||||
|
QFuture<QList<FileChunk>> FileChunker::chunkFiles(const QStringList &filePaths)
|
||||||
|
{
|
||||||
|
qDebug() << "\nStarting chunking process for" << filePaths.size() << "files";
|
||||||
|
qDebug() << "Configuration:"
|
||||||
|
<< "\n Max lines per chunk:" << m_config.maxLinesPerChunk
|
||||||
|
<< "\n Overlap lines:" << m_config.overlapLines
|
||||||
|
<< "\n Skip empty lines:" << m_config.skipEmptyLines
|
||||||
|
<< "\n Preserve functions:" << m_config.preserveFunctions
|
||||||
|
<< "\n Preserve classes:" << m_config.preserveClasses
|
||||||
|
<< "\n Batch size:" << m_config.batchSize;
|
||||||
|
|
||||||
|
auto promise = std::make_shared<QPromise<QList<FileChunk>>>();
|
||||||
|
promise->start();
|
||||||
|
|
||||||
|
if (filePaths.isEmpty()) {
|
||||||
|
qDebug() << "No files to process";
|
||||||
|
promise->addResult({});
|
||||||
|
promise->finish();
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
processNextBatch(promise, filePaths, 0);
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
void FileChunker::processNextBatch(
|
||||||
|
std::shared_ptr<QPromise<QList<FileChunk>>> promise, const QStringList &files, int startIndex)
|
||||||
|
{
|
||||||
|
if (startIndex >= files.size()) {
|
||||||
|
emit chunkingComplete();
|
||||||
|
promise->finish();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int endIndex = qMin(startIndex + m_config.batchSize, files.size());
|
||||||
|
QList<FileChunk> batchChunks;
|
||||||
|
|
||||||
|
for (int i = startIndex; i < endIndex; ++i) {
|
||||||
|
try {
|
||||||
|
auto chunks = processFile(files[i]);
|
||||||
|
batchChunks.append(chunks);
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
emit error(QString("Error processing file %1: %2").arg(files[i], e.what()));
|
||||||
|
}
|
||||||
|
emit progressUpdated(i + 1, files.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
promise->addResult(batchChunks);
|
||||||
|
|
||||||
|
// Планируем обработку следующего батча
|
||||||
|
QTimer::singleShot(0, this, [this, promise, files, endIndex]() {
|
||||||
|
processNextBatch(promise, files, endIndex);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<FileChunk> FileChunker::processFile(const QString &filePath)
|
||||||
|
{
|
||||||
|
qDebug() << "\nProcessing file:" << filePath;
|
||||||
|
|
||||||
|
auto document = new TextEditor::TextDocument;
|
||||||
|
auto filePathObj = Utils::FilePath::fromString(filePath);
|
||||||
|
auto result = document->open(&m_error, filePathObj, filePathObj);
|
||||||
|
if (result != Core::IDocument::OpenResult::Success) {
|
||||||
|
qDebug() << "Failed to open document:" << filePath << "-" << m_error;
|
||||||
|
emit error(QString("Failed to open document: %1 - %2").arg(filePath, m_error));
|
||||||
|
delete document;
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "Document opened successfully. Line count:" << document->document()->blockCount();
|
||||||
|
|
||||||
|
auto chunks = createChunksForDocument(document);
|
||||||
|
qDebug() << "Created" << chunks.size() << "chunks for file";
|
||||||
|
|
||||||
|
delete document;
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<FileChunk> FileChunker::createChunksForDocument(TextEditor::TextDocument *document)
|
||||||
|
{
|
||||||
|
QList<FileChunk> chunks;
|
||||||
|
QString filePath = document->filePath().toString();
|
||||||
|
qDebug() << "\nCreating chunks for document:" << filePath << "\nConfiguration:"
|
||||||
|
<< "\n Max lines per chunk:" << m_config.maxLinesPerChunk
|
||||||
|
<< "\n Min lines per chunk:" << m_config.minLinesPerChunk
|
||||||
|
<< "\n Overlap lines:" << m_config.overlapLines;
|
||||||
|
// Если файл меньше минимального размера чанка, создаем один чанк
|
||||||
|
if (document->document()->blockCount() <= m_config.minLinesPerChunk) {
|
||||||
|
FileChunk chunk;
|
||||||
|
chunk.filePath = filePath;
|
||||||
|
chunk.startLine = 0;
|
||||||
|
chunk.endLine = document->document()->blockCount() - 1;
|
||||||
|
chunk.createdAt = QDateTime::currentDateTime();
|
||||||
|
chunk.updatedAt = chunk.createdAt;
|
||||||
|
|
||||||
|
QString content;
|
||||||
|
QTextBlock block = document->document()->firstBlock();
|
||||||
|
while (block.isValid()) {
|
||||||
|
content += block.text() + "\n";
|
||||||
|
block = block.next();
|
||||||
|
}
|
||||||
|
chunk.content = content;
|
||||||
|
|
||||||
|
qDebug() << "File is smaller than minimum chunk size. Creating single chunk:"
|
||||||
|
<< "\n Lines:" << chunk.lineCount() << "\n Content size:" << chunk.content.size()
|
||||||
|
<< "bytes";
|
||||||
|
|
||||||
|
chunks.append(chunk);
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Для больших файлов создаем чанки фиксированного размера с перекрытием
|
||||||
|
int currentStartLine = 0;
|
||||||
|
int lineCount = 0;
|
||||||
|
QString content;
|
||||||
|
QTextBlock block = document->document()->firstBlock();
|
||||||
|
|
||||||
|
while (block.isValid()) {
|
||||||
|
content += block.text() + "\n";
|
||||||
|
lineCount++;
|
||||||
|
|
||||||
|
// Если достигли размера чанка или это последний блок
|
||||||
|
if (lineCount >= m_config.maxLinesPerChunk || !block.next().isValid()) {
|
||||||
|
FileChunk chunk;
|
||||||
|
chunk.filePath = filePath;
|
||||||
|
chunk.startLine = currentStartLine;
|
||||||
|
chunk.endLine = currentStartLine + lineCount - 1;
|
||||||
|
chunk.content = content;
|
||||||
|
chunk.createdAt = QDateTime::currentDateTime();
|
||||||
|
chunk.updatedAt = chunk.createdAt;
|
||||||
|
|
||||||
|
qDebug() << "Creating chunk:"
|
||||||
|
<< "\n Start line:" << chunk.startLine << "\n End line:" << chunk.endLine
|
||||||
|
<< "\n Lines:" << chunk.lineCount()
|
||||||
|
<< "\n Content size:" << chunk.content.size() << "bytes";
|
||||||
|
|
||||||
|
chunks.append(chunk);
|
||||||
|
|
||||||
|
// Начинаем новый чанк с учетом перекрытия
|
||||||
|
if (block.next().isValid()) {
|
||||||
|
// Отступаем назад на размер перекрытия
|
||||||
|
int overlapLines = qMin(m_config.overlapLines, lineCount);
|
||||||
|
currentStartLine = chunk.endLine - overlapLines + 1;
|
||||||
|
|
||||||
|
// Сбрасываем контент, но добавляем перекрывающиеся строки
|
||||||
|
content.clear();
|
||||||
|
QTextBlock overlapBlock = document->document()->findBlockByLineNumber(
|
||||||
|
currentStartLine);
|
||||||
|
while (overlapBlock.isValid() && overlapBlock.blockNumber() <= chunk.endLine) {
|
||||||
|
content += overlapBlock.text() + "\n";
|
||||||
|
overlapBlock = overlapBlock.next();
|
||||||
|
}
|
||||||
|
lineCount = overlapLines;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
block = block.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "Finished creating chunks for file:" << filePath
|
||||||
|
<< "\nTotal chunks:" << chunks.size();
|
||||||
|
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FileChunker::setConfig(const ChunkingConfig &config)
|
||||||
|
{
|
||||||
|
m_config = config;
|
||||||
|
}
|
||||||
|
|
||||||
|
FileChunker::ChunkingConfig FileChunker::config() const
|
||||||
|
{
|
||||||
|
return m_config;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
68
context/FileChunker.hpp
Normal file
68
context/FileChunker.hpp
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
// FileChunker.hpp
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <texteditor/textdocument.h>
|
||||||
|
#include <QDateTime>
|
||||||
|
#include <QFuture>
|
||||||
|
#include <QString>
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
struct FileChunk
|
||||||
|
{
|
||||||
|
QString filePath; // Path to the source file
|
||||||
|
int startLine; // Starting line of the chunk
|
||||||
|
int endLine; // Ending line of the chunk
|
||||||
|
QDateTime createdAt; // When the chunk was created
|
||||||
|
QDateTime updatedAt; // When the chunk was last updated
|
||||||
|
QString content; // Content of the chunk
|
||||||
|
|
||||||
|
// Helper methods
|
||||||
|
int lineCount() const { return endLine - startLine + 1; }
|
||||||
|
bool isValid() const { return !filePath.isEmpty() && startLine >= 0 && endLine >= startLine; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class FileChunker : public QObject
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
public:
|
||||||
|
struct ChunkingConfig
|
||||||
|
{
|
||||||
|
int maxLinesPerChunk = 80;
|
||||||
|
int minLinesPerChunk = 40;
|
||||||
|
int overlapLines = 20;
|
||||||
|
bool skipEmptyLines = true;
|
||||||
|
bool preserveFunctions = true;
|
||||||
|
bool preserveClasses = true;
|
||||||
|
int batchSize = 10;
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit FileChunker(QObject *parent = nullptr);
|
||||||
|
explicit FileChunker(const ChunkingConfig &config, QObject *parent = nullptr);
|
||||||
|
|
||||||
|
// Main chunking method
|
||||||
|
QFuture<QList<FileChunk>> chunkFiles(const QStringList &filePaths);
|
||||||
|
|
||||||
|
// Configuration
|
||||||
|
void setConfig(const ChunkingConfig &config);
|
||||||
|
ChunkingConfig config() const;
|
||||||
|
|
||||||
|
signals:
|
||||||
|
void progressUpdated(int processedFiles, int totalFiles);
|
||||||
|
void chunkingComplete();
|
||||||
|
void error(const QString &errorMessage);
|
||||||
|
|
||||||
|
private:
|
||||||
|
QList<FileChunk> processFile(const QString &filePath);
|
||||||
|
QList<FileChunk> createChunksForDocument(TextEditor::TextDocument *document);
|
||||||
|
void processNextBatch(
|
||||||
|
std::shared_ptr<QPromise<QList<FileChunk>>> promise,
|
||||||
|
const QStringList &files,
|
||||||
|
int startIndex);
|
||||||
|
|
||||||
|
ChunkingConfig m_config;
|
||||||
|
QString m_error;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
@ -24,8 +24,8 @@
|
|||||||
namespace QodeAssist::Context {
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
enum class ProgrammingLanguage {
|
enum class ProgrammingLanguage {
|
||||||
QML, // QML/JavaScript
|
QML,
|
||||||
Cpp, // C/C++
|
Cpp,
|
||||||
Python,
|
Python,
|
||||||
Unknown,
|
Unknown,
|
||||||
};
|
};
|
||||||
|
|||||||
7
context/RAGData.hpp
Normal file
7
context/RAGData.hpp
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
using RAGVector = std::vector<float>;
|
||||||
|
}
|
||||||
443
context/RAGManager.cpp
Normal file
443
context/RAGManager.cpp
Normal file
@ -0,0 +1,443 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2024 Petr Mironychev
|
||||||
|
*
|
||||||
|
* This file is part of QodeAssist.
|
||||||
|
*
|
||||||
|
* QodeAssist is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* QodeAssist is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "RAGManager.hpp"
|
||||||
|
#include "EnhancedRAGSimilaritySearch.hpp"
|
||||||
|
#include "RAGPreprocessor.hpp"
|
||||||
|
#include "RAGSimilaritySearch.hpp"
|
||||||
|
#include "logger/Logger.hpp"
|
||||||
|
|
||||||
|
#include <coreplugin/icore.h>
|
||||||
|
#include <projectexplorer/project.h>
|
||||||
|
#include <QFile>
|
||||||
|
#include <QtConcurrent>
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
RAGManager &RAGManager::instance()
|
||||||
|
{
|
||||||
|
static RAGManager manager;
|
||||||
|
return manager;
|
||||||
|
}
|
||||||
|
|
||||||
|
RAGManager::RAGManager(QObject *parent)
|
||||||
|
: QObject(parent)
|
||||||
|
, m_vectorizer(std::make_unique<RAGVectorizer>())
|
||||||
|
{}
|
||||||
|
|
||||||
|
RAGManager::~RAGManager() {}
|
||||||
|
|
||||||
|
QString RAGManager::getStoragePath(ProjectExplorer::Project *project) const
|
||||||
|
{
|
||||||
|
return QString("%1/qodeassist/%2/rag/vectors.db")
|
||||||
|
.arg(Core::ICore::userResourcePath().toString(), project->displayName());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<QString> RAGManager::loadFileContent(const QString &filePath)
|
||||||
|
{
|
||||||
|
QFile file(filePath);
|
||||||
|
if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
|
||||||
|
qDebug() << "ERROR: Failed to open file for reading:" << filePath
|
||||||
|
<< "Error:" << file.errorString();
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
QFileInfo fileInfo(filePath);
|
||||||
|
qDebug() << "Loading content from file:" << fileInfo.fileName() << "Size:" << fileInfo.size()
|
||||||
|
<< "bytes";
|
||||||
|
|
||||||
|
QString content = QString::fromUtf8(file.readAll());
|
||||||
|
if (content.isEmpty()) {
|
||||||
|
qDebug() << "WARNING: Empty content read from file:" << filePath;
|
||||||
|
}
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RAGManager::ensureStorageForProject(ProjectExplorer::Project *project) const
|
||||||
|
{
|
||||||
|
qDebug() << "Ensuring storage for project:" << project->displayName();
|
||||||
|
|
||||||
|
if (m_currentProject == project && m_currentStorage) {
|
||||||
|
qDebug() << "Using existing storage";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "Creating new storage";
|
||||||
|
m_currentStorage.reset();
|
||||||
|
m_currentProject = project;
|
||||||
|
|
||||||
|
if (project) {
|
||||||
|
QString storagePath = getStoragePath(project);
|
||||||
|
qDebug() << "Storage path:" << storagePath;
|
||||||
|
|
||||||
|
StorageOptions options;
|
||||||
|
m_currentStorage = std::make_unique<RAGStorage>(storagePath, options);
|
||||||
|
|
||||||
|
qDebug() << "Initializing storage...";
|
||||||
|
if (!m_currentStorage->init()) {
|
||||||
|
qDebug() << "Failed to initialize storage";
|
||||||
|
m_currentStorage.reset();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
qDebug() << "Storage initialized successfully";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
QFuture<void> RAGManager::processProjectFiles(
|
||||||
|
ProjectExplorer::Project *project,
|
||||||
|
const QStringList &filePaths,
|
||||||
|
const FileChunker::ChunkingConfig &config)
|
||||||
|
{
|
||||||
|
qDebug() << "\nStarting batch processing of" << filePaths.size()
|
||||||
|
<< "files for project:" << project->displayName();
|
||||||
|
|
||||||
|
auto promise = std::make_shared<QPromise<void>>();
|
||||||
|
promise->start();
|
||||||
|
|
||||||
|
qDebug() << "Initializing storage...";
|
||||||
|
ensureStorageForProject(project);
|
||||||
|
|
||||||
|
if (!m_currentStorage) {
|
||||||
|
qDebug() << "Failed to initialize storage for project:" << project->displayName();
|
||||||
|
promise->finish();
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
qDebug() << "Storage initialized successfully";
|
||||||
|
|
||||||
|
qDebug() << "Checking files for processing...";
|
||||||
|
QSet<QString> uniqueFiles;
|
||||||
|
for (const QString &filePath : filePaths) {
|
||||||
|
qDebug() << "Checking file:" << filePath;
|
||||||
|
if (isFileStorageOutdated(project, filePath)) {
|
||||||
|
qDebug() << "File needs processing:" << filePath;
|
||||||
|
uniqueFiles.insert(filePath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
QStringList filesToProcess = uniqueFiles.values();
|
||||||
|
|
||||||
|
if (filesToProcess.isEmpty()) {
|
||||||
|
qDebug() << "No files need processing";
|
||||||
|
emit vectorizationFinished();
|
||||||
|
promise->finish();
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "Starting to process" << filesToProcess.size() << "files";
|
||||||
|
const int batchSize = 10;
|
||||||
|
processNextFileBatch(promise, project, filesToProcess, config, 0, batchSize);
|
||||||
|
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
void RAGManager::processNextFileBatch(
|
||||||
|
std::shared_ptr<QPromise<void>> promise,
|
||||||
|
ProjectExplorer::Project *project,
|
||||||
|
const QStringList &files,
|
||||||
|
const FileChunker::ChunkingConfig &config,
|
||||||
|
int startIndex,
|
||||||
|
int batchSize)
|
||||||
|
{
|
||||||
|
if (startIndex >= files.size()) {
|
||||||
|
qDebug() << "All batches processed successfully";
|
||||||
|
emit vectorizationFinished();
|
||||||
|
promise->finish();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int endIndex = qMin(startIndex + batchSize, files.size());
|
||||||
|
auto currentBatch = files.mid(startIndex, endIndex - startIndex);
|
||||||
|
|
||||||
|
qDebug() << "\nProcessing batch" << (startIndex / batchSize + 1) << "(" << currentBatch.size()
|
||||||
|
<< "files)"
|
||||||
|
<< "\nProgress:" << startIndex << "to" << endIndex << "of" << files.size();
|
||||||
|
|
||||||
|
for (const QString &filePath : currentBatch) {
|
||||||
|
qDebug() << "Starting processing file:" << filePath;
|
||||||
|
auto future = processFileWithChunks(project, filePath, config);
|
||||||
|
auto watcher = new QFutureWatcher<bool>;
|
||||||
|
watcher->setFuture(future);
|
||||||
|
|
||||||
|
connect(
|
||||||
|
watcher,
|
||||||
|
&QFutureWatcher<bool>::finished,
|
||||||
|
this,
|
||||||
|
[this,
|
||||||
|
watcher,
|
||||||
|
promise,
|
||||||
|
project,
|
||||||
|
files,
|
||||||
|
startIndex,
|
||||||
|
endIndex,
|
||||||
|
batchSize,
|
||||||
|
config,
|
||||||
|
filePath]() {
|
||||||
|
bool success = watcher->result();
|
||||||
|
qDebug() << "File processed:" << filePath << "success:" << success;
|
||||||
|
|
||||||
|
bool isLastFileInBatch = (filePath == files[endIndex - 1]);
|
||||||
|
if (isLastFileInBatch) {
|
||||||
|
qDebug() << "Batch completed, moving to next batch";
|
||||||
|
emit vectorizationProgress(endIndex, files.size());
|
||||||
|
processNextFileBatch(promise, project, files, config, endIndex, batchSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
watcher->deleteLater();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
QFuture<bool> RAGManager::processFileWithChunks(
|
||||||
|
ProjectExplorer::Project *project,
|
||||||
|
const QString &filePath,
|
||||||
|
const FileChunker::ChunkingConfig &config)
|
||||||
|
{
|
||||||
|
auto promise = std::make_shared<QPromise<bool>>();
|
||||||
|
promise->start();
|
||||||
|
|
||||||
|
ensureStorageForProject(project);
|
||||||
|
if (!m_currentStorage) {
|
||||||
|
qDebug() << "Storage not initialized for file:" << filePath;
|
||||||
|
promise->addResult(false);
|
||||||
|
promise->finish();
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto fileContent = loadFileContent(filePath);
|
||||||
|
if (!fileContent) {
|
||||||
|
qDebug() << "Failed to load content for file:" << filePath;
|
||||||
|
promise->addResult(false);
|
||||||
|
promise->finish();
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "Creating chunks for file:" << filePath;
|
||||||
|
auto chunksFuture = m_chunker.chunkFiles({filePath});
|
||||||
|
auto chunks = chunksFuture.result();
|
||||||
|
|
||||||
|
if (chunks.isEmpty()) {
|
||||||
|
qDebug() << "No chunks created for file:" << filePath;
|
||||||
|
promise->addResult(false);
|
||||||
|
promise->finish();
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "Created" << chunks.size() << "chunks for file:" << filePath;
|
||||||
|
|
||||||
|
// Преобразуем FileChunk в FileChunkData
|
||||||
|
QList<FileChunkData> chunkData;
|
||||||
|
for (const auto &chunk : chunks) {
|
||||||
|
FileChunkData data;
|
||||||
|
data.filePath = chunk.filePath;
|
||||||
|
data.startLine = chunk.startLine;
|
||||||
|
data.endLine = chunk.endLine;
|
||||||
|
data.content = chunk.content;
|
||||||
|
chunkData.append(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "Deleting old chunks for file:" << filePath;
|
||||||
|
m_currentStorage->deleteChunksForFile(filePath);
|
||||||
|
|
||||||
|
auto vectorizeFuture = vectorizeAndStoreChunks(filePath, chunkData);
|
||||||
|
auto watcher = new QFutureWatcher<void>;
|
||||||
|
watcher->setFuture(vectorizeFuture);
|
||||||
|
|
||||||
|
connect(watcher, &QFutureWatcher<void>::finished, this, [promise, watcher, filePath]() {
|
||||||
|
qDebug() << "Completed processing file:" << filePath;
|
||||||
|
promise->addResult(true);
|
||||||
|
promise->finish();
|
||||||
|
watcher->deleteLater();
|
||||||
|
});
|
||||||
|
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
QFuture<void> RAGManager::vectorizeAndStoreChunks(
|
||||||
|
const QString &filePath, const QList<FileChunkData> &chunks)
|
||||||
|
{
|
||||||
|
qDebug() << "Vectorizing and storing" << chunks.size() << "chunks for file:" << filePath;
|
||||||
|
|
||||||
|
auto promise = std::make_shared<QPromise<void>>();
|
||||||
|
promise->start();
|
||||||
|
|
||||||
|
// Обрабатываем чанки последовательно
|
||||||
|
processNextChunk(promise, chunks, 0);
|
||||||
|
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
void RAGManager::processNextChunk(
|
||||||
|
std::shared_ptr<QPromise<void>> promise, const QList<FileChunkData> &chunks, int currentIndex)
|
||||||
|
{
|
||||||
|
if (currentIndex >= chunks.size()) {
|
||||||
|
promise->finish();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto &chunk = chunks[currentIndex];
|
||||||
|
QString processedContent = RAGPreprocessor::preprocessCode(chunk.content);
|
||||||
|
qDebug() << "Processing chunk" << currentIndex + 1 << "of" << chunks.size();
|
||||||
|
|
||||||
|
auto vectorFuture = m_vectorizer->vectorizeText(processedContent);
|
||||||
|
auto watcher = new QFutureWatcher<RAGVector>;
|
||||||
|
watcher->setFuture(vectorFuture);
|
||||||
|
|
||||||
|
connect(
|
||||||
|
watcher,
|
||||||
|
&QFutureWatcher<RAGVector>::finished,
|
||||||
|
this,
|
||||||
|
[this, watcher, promise, chunks, currentIndex, chunk]() {
|
||||||
|
auto vector = watcher->result();
|
||||||
|
|
||||||
|
if (!vector.empty()) {
|
||||||
|
qDebug() << "Storing vector and chunk for file:" << chunk.filePath;
|
||||||
|
bool vectorStored = m_currentStorage->storeVector(chunk.filePath, vector);
|
||||||
|
bool chunkStored = m_currentStorage->storeChunk(chunk);
|
||||||
|
qDebug() << "Storage results - Vector:" << vectorStored << "Chunk:" << chunkStored;
|
||||||
|
} else {
|
||||||
|
qDebug() << "Failed to vectorize chunk content";
|
||||||
|
}
|
||||||
|
|
||||||
|
processNextChunk(promise, chunks, currentIndex + 1);
|
||||||
|
|
||||||
|
watcher->deleteLater();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
QFuture<QList<RAGManager::ChunkSearchResult>> RAGManager::findRelevantChunks(
|
||||||
|
const QString &query, ProjectExplorer::Project *project, int topK)
|
||||||
|
{
|
||||||
|
auto promise = std::make_shared<QPromise<QList<ChunkSearchResult>>>();
|
||||||
|
promise->start();
|
||||||
|
|
||||||
|
ensureStorageForProject(project);
|
||||||
|
if (!m_currentStorage) {
|
||||||
|
qDebug() << "Storage not initialized for project:" << project->displayName();
|
||||||
|
promise->addResult({});
|
||||||
|
promise->finish();
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
QString processedQuery = RAGPreprocessor::preprocessCode(query);
|
||||||
|
|
||||||
|
auto vectorFuture = m_vectorizer->vectorizeText(processedQuery);
|
||||||
|
vectorFuture.then([this, promise, project, processedQuery, topK](const RAGVector &queryVector) {
|
||||||
|
if (queryVector.empty()) {
|
||||||
|
qDebug() << "Failed to vectorize query";
|
||||||
|
promise->addResult({});
|
||||||
|
promise->finish();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto files = m_currentStorage->getFilesWithChunks();
|
||||||
|
QList<FileChunkData> allChunks;
|
||||||
|
|
||||||
|
for (const auto &filePath : files) {
|
||||||
|
auto fileChunks = m_currentStorage->getChunksForFile(filePath);
|
||||||
|
allChunks.append(fileChunks);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto results = rankChunks(queryVector, processedQuery, allChunks);
|
||||||
|
|
||||||
|
if (results.size() > topK) {
|
||||||
|
results = results.mid(0, topK);
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "Found" << results.size() << "relevant chunks";
|
||||||
|
promise->addResult(results);
|
||||||
|
promise->finish();
|
||||||
|
|
||||||
|
closeStorage();
|
||||||
|
});
|
||||||
|
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<RAGManager::ChunkSearchResult> RAGManager::rankChunks(
|
||||||
|
const RAGVector &queryVector, const QString &queryText, const QList<FileChunkData> &chunks)
|
||||||
|
{
|
||||||
|
QList<ChunkSearchResult> results;
|
||||||
|
results.reserve(chunks.size());
|
||||||
|
|
||||||
|
for (const auto &chunk : chunks) {
|
||||||
|
auto chunkVector = m_currentStorage->getVector(chunk.filePath);
|
||||||
|
if (!chunkVector.has_value()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
QString processedChunk = RAGPreprocessor::preprocessCode(chunk.content);
|
||||||
|
|
||||||
|
auto similarity = EnhancedRAGSimilaritySearch::calculateSimilarity(
|
||||||
|
queryVector, chunkVector.value(), queryText, processedChunk);
|
||||||
|
|
||||||
|
results.append(ChunkSearchResult{
|
||||||
|
chunk.filePath,
|
||||||
|
chunk.startLine,
|
||||||
|
chunk.endLine,
|
||||||
|
chunk.content,
|
||||||
|
similarity.semantic_similarity,
|
||||||
|
similarity.structural_similarity,
|
||||||
|
similarity.combined_score});
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(results.begin(), results.end());
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
QStringList RAGManager::getStoredFiles(ProjectExplorer::Project *project) const
|
||||||
|
{
|
||||||
|
ensureStorageForProject(project);
|
||||||
|
if (!m_currentStorage) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
return m_currentStorage->getAllFiles();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RAGManager::isFileStorageOutdated(
|
||||||
|
ProjectExplorer::Project *project, const QString &filePath) const
|
||||||
|
{
|
||||||
|
ensureStorageForProject(project);
|
||||||
|
if (!m_currentStorage) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return m_currentStorage->needsUpdate(filePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<RAGVector> RAGManager::loadVectorFromStorage(
|
||||||
|
ProjectExplorer::Project *project, const QString &filePath)
|
||||||
|
{
|
||||||
|
ensureStorageForProject(project);
|
||||||
|
if (!m_currentStorage) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return m_currentStorage->getVector(filePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RAGManager::closeStorage()
|
||||||
|
{
|
||||||
|
qDebug() << "Closing storage...";
|
||||||
|
if (m_currentStorage) {
|
||||||
|
m_currentStorage.reset();
|
||||||
|
m_currentProject = nullptr;
|
||||||
|
qDebug() << "Storage closed";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
119
context/RAGManager.hpp
Normal file
119
context/RAGManager.hpp
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2024 Petr Mironychev
|
||||||
|
*
|
||||||
|
* This file is part of QodeAssist.
|
||||||
|
*
|
||||||
|
* QodeAssist is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* QodeAssist is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
|
#include <QFuture>
|
||||||
|
#include <QObject>
|
||||||
|
|
||||||
|
#include "FileChunker.hpp"
|
||||||
|
#include "RAGData.hpp"
|
||||||
|
#include "RAGStorage.hpp"
|
||||||
|
#include "RAGVectorizer.hpp"
|
||||||
|
|
||||||
|
namespace ProjectExplorer {
|
||||||
|
class Project;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
class RAGManager : public QObject
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
public:
|
||||||
|
struct ChunkSearchResult
|
||||||
|
{
|
||||||
|
QString filePath;
|
||||||
|
int startLine;
|
||||||
|
int endLine;
|
||||||
|
QString content;
|
||||||
|
float semanticScore;
|
||||||
|
float structuralScore;
|
||||||
|
float combinedScore;
|
||||||
|
|
||||||
|
bool operator<(const ChunkSearchResult &other) const
|
||||||
|
{
|
||||||
|
return combinedScore > other.combinedScore;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static RAGManager &instance();
|
||||||
|
|
||||||
|
QFuture<void> processProjectFiles(
|
||||||
|
ProjectExplorer::Project *project,
|
||||||
|
const QStringList &filePaths,
|
||||||
|
const FileChunker::ChunkingConfig &config = FileChunker::ChunkingConfig());
|
||||||
|
|
||||||
|
QFuture<QList<ChunkSearchResult>> findRelevantChunks(
|
||||||
|
const QString &query, ProjectExplorer::Project *project, int topK = 5);
|
||||||
|
|
||||||
|
QStringList getStoredFiles(ProjectExplorer::Project *project) const;
|
||||||
|
bool isFileStorageOutdated(ProjectExplorer::Project *project, const QString &filePath) const;
|
||||||
|
|
||||||
|
void processNextChunk(
|
||||||
|
std::shared_ptr<QPromise<void>> promise,
|
||||||
|
const QList<FileChunkData> &chunks,
|
||||||
|
int currentIndex);
|
||||||
|
void closeStorage();
|
||||||
|
signals:
|
||||||
|
void vectorizationProgress(int processed, int total);
|
||||||
|
void vectorizationFinished();
|
||||||
|
|
||||||
|
private:
|
||||||
|
explicit RAGManager(QObject *parent = nullptr);
|
||||||
|
~RAGManager();
|
||||||
|
RAGManager(const RAGManager &) = delete;
|
||||||
|
RAGManager &operator=(const RAGManager &) = delete;
|
||||||
|
|
||||||
|
QString getStoragePath(ProjectExplorer::Project *project) const;
|
||||||
|
void ensureStorageForProject(ProjectExplorer::Project *project) const;
|
||||||
|
std::optional<QString> loadFileContent(const QString &filePath);
|
||||||
|
std::optional<RAGVector> loadVectorFromStorage(
|
||||||
|
ProjectExplorer::Project *project, const QString &filePath);
|
||||||
|
|
||||||
|
void processNextFileBatch(
|
||||||
|
std::shared_ptr<QPromise<void>> promise,
|
||||||
|
ProjectExplorer::Project *project,
|
||||||
|
const QStringList &files,
|
||||||
|
const FileChunker::ChunkingConfig &config,
|
||||||
|
int startIndex,
|
||||||
|
int batchSize);
|
||||||
|
|
||||||
|
QFuture<bool> processFileWithChunks(
|
||||||
|
ProjectExplorer::Project *project,
|
||||||
|
const QString &filePath,
|
||||||
|
const FileChunker::ChunkingConfig &config);
|
||||||
|
|
||||||
|
QFuture<void> vectorizeAndStoreChunks(
|
||||||
|
const QString &filePath, const QList<FileChunkData> &chunks);
|
||||||
|
|
||||||
|
QList<ChunkSearchResult> rankChunks(
|
||||||
|
const RAGVector &queryVector, const QString &queryText, const QList<FileChunkData> &chunks);
|
||||||
|
|
||||||
|
private:
|
||||||
|
mutable std::unique_ptr<RAGVectorizer> m_vectorizer;
|
||||||
|
mutable std::unique_ptr<RAGStorage> m_currentStorage;
|
||||||
|
mutable ProjectExplorer::Project *m_currentProject{nullptr};
|
||||||
|
FileChunker m_chunker;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
2
context/RAGPreprocessor.cpp
Normal file
2
context/RAGPreprocessor.cpp
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
#include "RAGPreprocessor.hpp"
|
||||||
|
|
||||||
64
context/RAGPreprocessor.hpp
Normal file
64
context/RAGPreprocessor.hpp
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
#include <QRegularExpression>
|
||||||
|
#include <QString>
|
||||||
|
|
||||||
|
#include "Logger.hpp"
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
class RAGPreprocessor
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static const QRegularExpression &getLicenseRegex()
|
||||||
|
{
|
||||||
|
static const QRegularExpression regex(
|
||||||
|
R"((/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)|//[^\n]*(?:\n|$))",
|
||||||
|
QRegularExpression::MultilineOption);
|
||||||
|
return regex;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const QRegularExpression &getClassRegex()
|
||||||
|
{
|
||||||
|
static const QRegularExpression regex(
|
||||||
|
R"((?:template\s*<[^>]*>\s*)?(?:class|struct)\s+(\w+)\s*(?:final\s*)?(?::\s*(?:public|protected|private)\s+\w+(?:\s*,\s*(?:public|protected|private)\s+\w+)*\s*)?{)");
|
||||||
|
return regex;
|
||||||
|
}
|
||||||
|
|
||||||
|
static QString preprocessCode(const QString &code)
|
||||||
|
{
|
||||||
|
if (code.isEmpty()) {
|
||||||
|
return QString();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
QStringList lines = code.split('\n', Qt::SkipEmptyParts);
|
||||||
|
return processLines(lines);
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
LOG_MESSAGE(QString("Error preprocessing code: %1").arg(e.what()));
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static QString processLines(const QStringList &lines)
|
||||||
|
{
|
||||||
|
const int estimatedAvgLength = 80;
|
||||||
|
QString result;
|
||||||
|
result.reserve(lines.size() * estimatedAvgLength);
|
||||||
|
|
||||||
|
for (const QString &line : lines) {
|
||||||
|
const QString trimmed = line.trimmed();
|
||||||
|
if (!trimmed.isEmpty()) {
|
||||||
|
result += trimmed;
|
||||||
|
result += QLatin1Char('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.endsWith('\n')) {
|
||||||
|
result.chop(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
67
context/RAGSimilaritySearch.cpp
Normal file
67
context/RAGSimilaritySearch.cpp
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2024 Petr Mironychev
|
||||||
|
*
|
||||||
|
* This file is part of QodeAssist.
|
||||||
|
*
|
||||||
|
* QodeAssist is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* QodeAssist is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "RAGSimilaritySearch.hpp"
|
||||||
|
#include "logger/Logger.hpp"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
float RAGSimilaritySearch::l2Distance(const RAGVector &v1, const RAGVector &v2)
|
||||||
|
{
|
||||||
|
if (v1.size() != v2.size()) {
|
||||||
|
LOG_MESSAGE(QString("Vector size mismatch: %1 vs %2").arg(v1.size()).arg(v2.size()));
|
||||||
|
return std::numeric_limits<float>::max();
|
||||||
|
}
|
||||||
|
|
||||||
|
float sum = 0.0f;
|
||||||
|
for (size_t i = 0; i < v1.size(); ++i) {
|
||||||
|
float diff = v1[i] - v2[i];
|
||||||
|
sum += diff * diff;
|
||||||
|
}
|
||||||
|
return std::sqrt(sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
float RAGSimilaritySearch::cosineSimilarity(const RAGVector &v1, const RAGVector &v2)
|
||||||
|
{
|
||||||
|
if (v1.size() != v2.size()) {
|
||||||
|
LOG_MESSAGE(QString("Vector size mismatch: %1 vs %2").arg(v1.size()).arg(v2.size()));
|
||||||
|
return 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
float dotProduct = 0.0f;
|
||||||
|
float norm1 = 0.0f;
|
||||||
|
float norm2 = 0.0f;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < v1.size(); ++i) {
|
||||||
|
dotProduct += v1[i] * v2[i];
|
||||||
|
norm1 += v1[i] * v1[i];
|
||||||
|
norm2 += v2[i] * v2[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
norm1 = std::sqrt(norm1);
|
||||||
|
norm2 = std::sqrt(norm2);
|
||||||
|
|
||||||
|
if (norm1 == 0.0f || norm2 == 0.0f)
|
||||||
|
return 0.0f;
|
||||||
|
return dotProduct / (norm1 * norm2);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
37
context/RAGSimilaritySearch.hpp
Normal file
37
context/RAGSimilaritySearch.hpp
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2024 Petr Mironychev
|
||||||
|
*
|
||||||
|
* This file is part of QodeAssist.
|
||||||
|
*
|
||||||
|
* QodeAssist is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* QodeAssist is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "RAGData.hpp"
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
class RAGSimilaritySearch
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static float l2Distance(const RAGVector &v1, const RAGVector &v2);
|
||||||
|
|
||||||
|
static float cosineSimilarity(const RAGVector &v1, const RAGVector &v2);
|
||||||
|
|
||||||
|
private:
|
||||||
|
RAGSimilaritySearch() = delete;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
1047
context/RAGStorage.cpp
Normal file
1047
context/RAGStorage.cpp
Normal file
File diff suppressed because it is too large
Load Diff
174
context/RAGStorage.hpp
Normal file
174
context/RAGStorage.hpp
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2024 Petr Mironychev
|
||||||
|
*
|
||||||
|
* This file is part of QodeAssist.
|
||||||
|
*
|
||||||
|
* QodeAssist is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* QodeAssist is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// RAGStorage.hpp
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
#include <QDateTime>
|
||||||
|
#include <QMutex>
|
||||||
|
#include <QObject>
|
||||||
|
#include <QSqlDatabase>
|
||||||
|
#include <QString>
|
||||||
|
#include <qsqlquery.h>
|
||||||
|
|
||||||
|
#include <RAGData.hpp>
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
struct FileChunkData
|
||||||
|
{
|
||||||
|
QString filePath;
|
||||||
|
int startLine;
|
||||||
|
int endLine;
|
||||||
|
QString content;
|
||||||
|
QDateTime createdAt;
|
||||||
|
QDateTime updatedAt;
|
||||||
|
|
||||||
|
bool isValid() const
|
||||||
|
{
|
||||||
|
return !filePath.isEmpty() && startLine >= 0 && endLine >= startLine && !content.isEmpty();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct StorageOptions
|
||||||
|
{
|
||||||
|
int maxChunkSize = 1024 * 1024;
|
||||||
|
int maxVectorSize = 1024;
|
||||||
|
bool useCompression = false;
|
||||||
|
bool enableLogging = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct StorageStatistics
|
||||||
|
{
|
||||||
|
int totalChunks;
|
||||||
|
int totalVectors;
|
||||||
|
int totalFiles;
|
||||||
|
qint64 totalSize;
|
||||||
|
QDateTime lastUpdate;
|
||||||
|
};
|
||||||
|
|
||||||
|
class RAGStorage : public QObject
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
public:
|
||||||
|
static constexpr int CURRENT_VERSION = 1;
|
||||||
|
|
||||||
|
enum class Status { Ok, DatabaseError, ValidationError, VersionError, ConnectionError };
|
||||||
|
|
||||||
|
struct ValidationResult
|
||||||
|
{
|
||||||
|
bool isValid;
|
||||||
|
QString errorMessage;
|
||||||
|
Status errorStatus;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Error
|
||||||
|
{
|
||||||
|
QString message;
|
||||||
|
QString sqlError;
|
||||||
|
QString query;
|
||||||
|
Status status;
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit RAGStorage(
|
||||||
|
const QString &dbPath,
|
||||||
|
const StorageOptions &options = StorageOptions(),
|
||||||
|
QObject *parent = nullptr);
|
||||||
|
~RAGStorage();
|
||||||
|
|
||||||
|
bool init();
|
||||||
|
Status status() const;
|
||||||
|
Error lastError() const;
|
||||||
|
bool isReady() const;
|
||||||
|
QString dbPath() const;
|
||||||
|
|
||||||
|
bool beginTransaction();
|
||||||
|
bool commitTransaction();
|
||||||
|
bool rollbackTransaction();
|
||||||
|
|
||||||
|
bool storeVector(const QString &filePath, const RAGVector &vector);
|
||||||
|
bool updateVector(const QString &filePath, const RAGVector &vector);
|
||||||
|
std::optional<RAGVector> getVector(const QString &filePath);
|
||||||
|
bool needsUpdate(const QString &filePath);
|
||||||
|
QStringList getAllFiles();
|
||||||
|
|
||||||
|
bool storeChunk(const FileChunkData &chunk);
|
||||||
|
bool storeChunks(const QList<FileChunkData> &chunks);
|
||||||
|
bool updateChunk(const FileChunkData &chunk);
|
||||||
|
bool updateChunks(const QList<FileChunkData> &chunks);
|
||||||
|
bool deleteChunksForFile(const QString &filePath);
|
||||||
|
std::optional<FileChunkData> getChunk(const QString &filePath, int startLine, int endLine);
|
||||||
|
QList<FileChunkData> getChunksForFile(const QString &filePath);
|
||||||
|
bool chunkExists(const QString &filePath, int startLine, int endLine);
|
||||||
|
|
||||||
|
int getChunkCount(const QString &filePath);
|
||||||
|
bool deleteOldChunks(const QString &filePath, const QDateTime &olderThan);
|
||||||
|
bool deleteAllChunks();
|
||||||
|
QStringList getFilesWithChunks();
|
||||||
|
bool vacuum();
|
||||||
|
bool backup(const QString &backupPath);
|
||||||
|
bool restore(const QString &backupPath);
|
||||||
|
StorageStatistics getStatistics() const;
|
||||||
|
|
||||||
|
int getStorageVersion() const;
|
||||||
|
bool isVersionCompatible() const;
|
||||||
|
|
||||||
|
bool applyMigration(int version);
|
||||||
|
signals:
|
||||||
|
void errorOccurred(const Error &error);
|
||||||
|
void operationCompleted(const QString &operation);
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool createTables();
|
||||||
|
bool createIndices();
|
||||||
|
bool createVersionTable();
|
||||||
|
bool createChunksTable();
|
||||||
|
bool createVectorsTable();
|
||||||
|
bool openDatabase();
|
||||||
|
bool initializeNewStorage();
|
||||||
|
bool upgradeStorage(int fromVersion);
|
||||||
|
bool validateSchema() const;
|
||||||
|
|
||||||
|
QDateTime getFileLastModified(const QString &filePath);
|
||||||
|
RAGVector blobToVector(const QByteArray &blob);
|
||||||
|
QByteArray vectorToBlob(const RAGVector &vector);
|
||||||
|
|
||||||
|
void setError(const QString &message, Status status = Status::DatabaseError);
|
||||||
|
void clearError();
|
||||||
|
bool prepareStatements();
|
||||||
|
ValidationResult validateChunk(const FileChunkData &chunk) const;
|
||||||
|
ValidationResult validateVector(const RAGVector &vector) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
QSqlDatabase m_db;
|
||||||
|
QString m_dbPath;
|
||||||
|
StorageOptions m_options;
|
||||||
|
mutable QMutex m_mutex;
|
||||||
|
Error m_lastError;
|
||||||
|
Status m_status;
|
||||||
|
|
||||||
|
QSqlQuery m_insertChunkQuery;
|
||||||
|
QSqlQuery m_updateChunkQuery;
|
||||||
|
QSqlQuery m_insertVectorQuery;
|
||||||
|
QSqlQuery m_updateVectorQuery;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
116
context/RAGVectorizer.cpp
Normal file
116
context/RAGVectorizer.cpp
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2024 Petr Mironychev
|
||||||
|
*
|
||||||
|
* This file is part of QodeAssist.
|
||||||
|
*
|
||||||
|
* QodeAssist is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* QodeAssist is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "RAGVectorizer.hpp"
|
||||||
|
|
||||||
|
#include <QJsonArray>
|
||||||
|
#include <QJsonDocument>
|
||||||
|
#include <QJsonObject>
|
||||||
|
#include <QNetworkReply>
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
RAGVectorizer::RAGVectorizer(const QString &providerUrl,
|
||||||
|
const QString &modelName,
|
||||||
|
QObject *parent)
|
||||||
|
: QObject(parent)
|
||||||
|
, m_network(new QNetworkAccessManager(this))
|
||||||
|
, m_embedProviderUrl(providerUrl)
|
||||||
|
, m_model(modelName)
|
||||||
|
{}
|
||||||
|
|
||||||
|
RAGVectorizer::~RAGVectorizer() {}
|
||||||
|
|
||||||
|
QJsonObject RAGVectorizer::prepareEmbeddingRequest(const QString &text) const
|
||||||
|
{
|
||||||
|
return QJsonObject{{"model", m_model}, {"prompt", text}};
|
||||||
|
}
|
||||||
|
|
||||||
|
RAGVector RAGVectorizer::parseEmbeddingResponse(const QByteArray &response) const
|
||||||
|
{
|
||||||
|
QJsonDocument doc = QJsonDocument::fromJson(response);
|
||||||
|
if (doc.isNull()) {
|
||||||
|
qDebug() << "Failed to parse JSON response";
|
||||||
|
return RAGVector();
|
||||||
|
}
|
||||||
|
|
||||||
|
QJsonObject obj = doc.object();
|
||||||
|
if (!obj.contains("embedding")) {
|
||||||
|
qDebug() << "Response does not contain 'embedding' field";
|
||||||
|
// qDebug() << "Response content:" << response;
|
||||||
|
return RAGVector();
|
||||||
|
}
|
||||||
|
|
||||||
|
QJsonArray array = obj["embedding"].toArray();
|
||||||
|
if (array.isEmpty()) {
|
||||||
|
qDebug() << "Embedding array is empty";
|
||||||
|
return RAGVector();
|
||||||
|
}
|
||||||
|
|
||||||
|
RAGVector result;
|
||||||
|
result.reserve(array.size());
|
||||||
|
for (const auto &value : array) {
|
||||||
|
result.push_back(value.toDouble());
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "Successfully parsed vector with size:" << result.size();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
QFuture<RAGVector> RAGVectorizer::vectorizeText(const QString &text)
|
||||||
|
{
|
||||||
|
qDebug() << "Vectorizing text, length:" << text.length();
|
||||||
|
qDebug() << "Using embedding provider:" << m_embedProviderUrl;
|
||||||
|
|
||||||
|
auto promise = std::make_shared<QPromise<RAGVector>>();
|
||||||
|
promise->start();
|
||||||
|
|
||||||
|
QNetworkRequest request(QUrl(m_embedProviderUrl + "/api/embeddings"));
|
||||||
|
request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
|
||||||
|
|
||||||
|
QJsonObject requestData = prepareEmbeddingRequest(text);
|
||||||
|
QByteArray jsonData = QJsonDocument(requestData).toJson();
|
||||||
|
qDebug() << "Sending request to embeddings API:" << jsonData;
|
||||||
|
|
||||||
|
auto reply = m_network->post(request, jsonData);
|
||||||
|
|
||||||
|
connect(reply, &QNetworkReply::finished, this, [promise, reply, this]() {
|
||||||
|
if (reply->error() == QNetworkReply::NoError) {
|
||||||
|
QByteArray response = reply->readAll();
|
||||||
|
// qDebug() << "Received response from embeddings API:" << response;
|
||||||
|
|
||||||
|
auto vector = parseEmbeddingResponse(response);
|
||||||
|
qDebug() << "Parsed vector size:" << vector.size();
|
||||||
|
promise->addResult(vector);
|
||||||
|
} else {
|
||||||
|
qDebug() << "Network error:" << reply->errorString();
|
||||||
|
qDebug() << "HTTP status code:"
|
||||||
|
<< reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt();
|
||||||
|
qDebug() << "Response:" << reply->readAll();
|
||||||
|
|
||||||
|
promise->addResult(RAGVector());
|
||||||
|
}
|
||||||
|
promise->finish();
|
||||||
|
reply->deleteLater();
|
||||||
|
});
|
||||||
|
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
51
context/RAGVectorizer.hpp
Normal file
51
context/RAGVectorizer.hpp
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2024 Petr Mironychev
|
||||||
|
*
|
||||||
|
* This file is part of QodeAssist.
|
||||||
|
*
|
||||||
|
* QodeAssist is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* QodeAssist is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <QFuture>
|
||||||
|
#include <QNetworkAccessManager>
|
||||||
|
#include <QObject>
|
||||||
|
|
||||||
|
#include <RAGData.hpp>
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
class RAGVectorizer : public QObject
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
explicit RAGVectorizer(
|
||||||
|
const QString &providerUrl = "http://localhost:11434",
|
||||||
|
const QString &modelName = "all-minilm:33m-l12-v2-fp16",
|
||||||
|
QObject *parent = nullptr);
|
||||||
|
~RAGVectorizer();
|
||||||
|
|
||||||
|
QFuture<RAGVector> vectorizeText(const QString &text);
|
||||||
|
|
||||||
|
private:
|
||||||
|
QJsonObject prepareEmbeddingRequest(const QString &text) const;
|
||||||
|
RAGVector parseEmbeddingResponse(const QByteArray &response) const;
|
||||||
|
|
||||||
|
QNetworkAccessManager *m_network;
|
||||||
|
QString m_embedProviderUrl;
|
||||||
|
QString m_model;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
||||||
Reference in New Issue
Block a user