mirror of
https://github.com/Palm1r/QodeAssist.git
synced 2025-05-28 03:10:28 -04:00
feat: Add similarity search
This commit is contained in:
parent
5a426b4d9f
commit
7b73d7af7b
@ -452,17 +452,36 @@ void ChatRootView::openChatHistoryFolder()
|
||||
void ChatRootView::testRAG()
|
||||
{
|
||||
auto project = ProjectExplorer::ProjectTree::currentProject();
|
||||
if (project) {
|
||||
auto files = Context::ContextManager::instance().getProjectSourceFiles(project);
|
||||
auto future = Context::RAGManager::instance().processFiles(project, files);
|
||||
connect(
|
||||
&Context::RAGManager::instance(),
|
||||
&Context::RAGManager::vectorizationProgress,
|
||||
this,
|
||||
[](int processed, int total) {
|
||||
qDebug() << "Processed" << processed << "of" << total << "files";
|
||||
});
|
||||
if (!project) {
|
||||
qDebug() << "No active project found";
|
||||
return;
|
||||
}
|
||||
|
||||
const QString TEST_QUERY = "";
|
||||
|
||||
qDebug() << "Starting RAG test with query:";
|
||||
qDebug() << TEST_QUERY;
|
||||
qDebug() << "\nFirst, processing project files...";
|
||||
|
||||
auto files = Context::ContextManager::instance().getProjectSourceFiles(project);
|
||||
auto future = Context::RAGManager::instance().processFiles(project, files);
|
||||
|
||||
connect(
|
||||
&Context::RAGManager::instance(),
|
||||
&Context::RAGManager::vectorizationProgress,
|
||||
this,
|
||||
[](int processed, int total) {
|
||||
qDebug() << QString("Vectorization progress: %1 of %2 files").arg(processed).arg(total);
|
||||
});
|
||||
|
||||
connect(
|
||||
&Context::RAGManager::instance(),
|
||||
&Context::RAGManager::vectorizationFinished,
|
||||
this,
|
||||
[this, project, TEST_QUERY]() {
|
||||
qDebug() << "\nVectorization completed. Starting similarity search...\n";
|
||||
Context::RAGManager::instance().searchSimilarDocuments(TEST_QUERY, project, 5);
|
||||
});
|
||||
}
|
||||
|
||||
void ChatRootView::updateInputTokensCount()
|
||||
|
@ -9,6 +9,7 @@ add_library(Context STATIC
|
||||
RAGStorage.hpp RAGStorage.cpp
|
||||
RAGData.hpp
|
||||
RAGVectorizer.hpp RAGVectorizer.cpp
|
||||
RAGSimilaritySearch.hpp RAGSimilaritySearch.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(Context
|
||||
|
@ -67,6 +67,28 @@ ContentFile ContextManager::createContentFile(const QString &filePath) const
|
||||
return contentFile;
|
||||
}
|
||||
|
||||
bool ContextManager::isInBuildDirectory(const QString &filePath) const
|
||||
{
|
||||
static const QStringList buildDirPatterns
|
||||
= {"/build/",
|
||||
"/Build/",
|
||||
"/BUILD/",
|
||||
"/debug/",
|
||||
"/Debug/",
|
||||
"/DEBUG/",
|
||||
"/release/",
|
||||
"/Release/",
|
||||
"/RELEASE/",
|
||||
"/builds/"};
|
||||
|
||||
for (const QString &pattern : buildDirPatterns) {
|
||||
if (filePath.contains(pattern)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
QStringList ContextManager::getProjectSourceFiles(ProjectExplorer::Project *project) const
|
||||
{
|
||||
QStringList sourceFiles;
|
||||
@ -79,8 +101,11 @@ QStringList ContextManager::getProjectSourceFiles(ProjectExplorer::Project *proj
|
||||
|
||||
projectNode->forEachNode(
|
||||
[&sourceFiles, this](ProjectExplorer::FileNode *fileNode) {
|
||||
if (fileNode && shouldProcessFile(fileNode->filePath().toString())) {
|
||||
sourceFiles.append(fileNode->filePath().toString());
|
||||
if (fileNode) {
|
||||
QString filePath = fileNode->filePath().toString();
|
||||
if (shouldProcessFile(filePath) && !isInBuildDirectory(filePath)) {
|
||||
sourceFiles.append(filePath);
|
||||
}
|
||||
}
|
||||
},
|
||||
nullptr);
|
||||
|
@ -48,6 +48,7 @@ private:
|
||||
|
||||
ContentFile createContentFile(const QString &filePath) const;
|
||||
bool shouldProcessFile(const QString &filePath) const;
|
||||
bool isInBuildDirectory(const QString &filePath) const;
|
||||
};
|
||||
|
||||
} // namespace QodeAssist::Context
|
||||
|
@ -18,11 +18,14 @@
|
||||
*/
|
||||
|
||||
#include "RAGManager.hpp"
|
||||
#include "RAGSimilaritySearch.hpp"
|
||||
#include "logger/Logger.hpp"
|
||||
|
||||
#include <coreplugin/icore.h>
|
||||
#include <projectexplorer/project.h>
|
||||
#include <QFile>
|
||||
#include <QtConcurrent>
|
||||
#include <queue>
|
||||
|
||||
namespace QodeAssist::Context {
|
||||
|
||||
@ -39,6 +42,13 @@ RAGManager::RAGManager(QObject *parent)
|
||||
|
||||
RAGManager::~RAGManager() {}
|
||||
|
||||
bool RAGManager::SearchResult::operator<(const SearchResult &other) const
|
||||
{
|
||||
if (cosineScore != other.cosineScore)
|
||||
return cosineScore > other.cosineScore;
|
||||
return l2Score < other.l2Score;
|
||||
}
|
||||
|
||||
QString RAGManager::getStoragePath(ProjectExplorer::Project *project) const
|
||||
{
|
||||
return QString("%1/qodeassist/%2/rag/vectors.db")
|
||||
@ -165,7 +175,11 @@ QFuture<bool> RAGManager::processFile(ProjectExplorer::Project *project, const Q
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
auto vectorFuture = m_vectorizer->vectorizeText(QString::fromUtf8(file.readAll()));
|
||||
QFileInfo fileInfo(filePath);
|
||||
QString fileName = fileInfo.fileName();
|
||||
QString content = QString("// %1\n%2").arg(fileName, QString::fromUtf8(file.readAll()));
|
||||
|
||||
auto vectorFuture = m_vectorizer->vectorizeText(content);
|
||||
vectorFuture.then([promise, filePath, this](const RAGVector &vector) {
|
||||
if (vector.empty()) {
|
||||
promise->addResult(false);
|
||||
@ -214,4 +228,67 @@ bool RAGManager::isFileStorageOutdated(
|
||||
return m_currentStorage->needsUpdate(filePath);
|
||||
}
|
||||
|
||||
QFuture<QList<RAGManager::SearchResult>> RAGManager::search(
|
||||
const QString &text, ProjectExplorer::Project *project, int topK)
|
||||
{
|
||||
auto promise = std::make_shared<QPromise<QList<SearchResult>>>();
|
||||
promise->start();
|
||||
|
||||
auto queryVectorFuture = m_vectorizer->vectorizeText(text);
|
||||
queryVectorFuture.then([this, promise, project, topK](const RAGVector &queryVector) {
|
||||
if (queryVector.empty()) {
|
||||
LOG_MESSAGE("Failed to vectorize query text");
|
||||
promise->addResult(QList<SearchResult>());
|
||||
promise->finish();
|
||||
return;
|
||||
}
|
||||
|
||||
auto storedFiles = getStoredFiles(project);
|
||||
std::priority_queue<SearchResult> results;
|
||||
|
||||
for (const auto &filePath : storedFiles) {
|
||||
auto storedVector = loadVectorFromStorage(project, filePath);
|
||||
if (!storedVector.has_value())
|
||||
continue;
|
||||
|
||||
float l2Score = RAGSimilaritySearch::l2Distance(queryVector, storedVector.value());
|
||||
float cosineScore
|
||||
= RAGSimilaritySearch::cosineSimilarity(queryVector, storedVector.value());
|
||||
|
||||
results.push(SearchResult{filePath, l2Score, cosineScore});
|
||||
}
|
||||
|
||||
QList<SearchResult> resultsList;
|
||||
int count = 0;
|
||||
while (!results.empty() && count < topK) {
|
||||
resultsList.append(results.top());
|
||||
results.pop();
|
||||
count++;
|
||||
}
|
||||
|
||||
promise->addResult(resultsList);
|
||||
promise->finish();
|
||||
});
|
||||
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
void RAGManager::searchSimilarDocuments(
|
||||
const QString &text, ProjectExplorer::Project *project, int topK)
|
||||
{
|
||||
auto future = search(text, project, topK);
|
||||
future.then([this](const QList<SearchResult> &results) { logSearchResults(results); });
|
||||
}
|
||||
|
||||
void RAGManager::logSearchResults(const QList<SearchResult> &results) const
|
||||
{
|
||||
qDebug() << QString("\nTop %1 similar documents:").arg(results.size());
|
||||
|
||||
for (const auto &result : results) {
|
||||
qDebug() << QString("File: %1").arg(result.filePath);
|
||||
qDebug() << QString(" Cosine Similarity: %1").arg(result.cosineScore);
|
||||
qDebug() << QString(" L2 Distance: %1\n").arg(result.l2Score);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace QodeAssist::Context
|
||||
|
@ -40,11 +40,28 @@ class RAGManager : public QObject
|
||||
public:
|
||||
static RAGManager &instance();
|
||||
|
||||
struct SearchResult
|
||||
{
|
||||
QString filePath;
|
||||
float l2Score;
|
||||
float cosineScore;
|
||||
|
||||
bool operator<(const SearchResult &other) const;
|
||||
};
|
||||
|
||||
// Process and vectorize files
|
||||
QFuture<void> processFiles(ProjectExplorer::Project *project, const QStringList &filePaths);
|
||||
std::optional<RAGVector> loadVectorFromStorage(
|
||||
ProjectExplorer::Project *project, const QString &filePath);
|
||||
QStringList getStoredFiles(ProjectExplorer::Project *project) const;
|
||||
bool isFileStorageOutdated(ProjectExplorer::Project *project, const QString &filePath) const;
|
||||
RAGVectorizer *getVectorizer() const { return m_vectorizer.get(); }
|
||||
|
||||
// Search functionality
|
||||
QFuture<QList<SearchResult>> search(
|
||||
const QString &text, ProjectExplorer::Project *project, int topK = 5);
|
||||
void searchSimilarDocuments(const QString &text, ProjectExplorer::Project *project, int topK = 5);
|
||||
void logSearchResults(const QList<SearchResult> &results) const;
|
||||
|
||||
signals:
|
||||
void vectorizationProgress(int processed, int total);
|
||||
@ -53,6 +70,8 @@ signals:
|
||||
private:
|
||||
RAGManager(QObject *parent = nullptr);
|
||||
~RAGManager();
|
||||
RAGManager(const RAGManager &) = delete;
|
||||
RAGManager &operator=(const RAGManager &) = delete;
|
||||
|
||||
QFuture<bool> processFile(ProjectExplorer::Project *project, const QString &filePath);
|
||||
void processNextBatch(
|
||||
|
67
context/RAGSimilaritySearch.cpp
Normal file
67
context/RAGSimilaritySearch.cpp
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Petr Mironychev
|
||||
*
|
||||
* This file is part of QodeAssist.
|
||||
*
|
||||
* QodeAssist is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* QodeAssist is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "RAGSimilaritySearch.hpp"
|
||||
#include "logger/Logger.hpp"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace QodeAssist::Context {
|
||||
|
||||
float RAGSimilaritySearch::l2Distance(const RAGVector &v1, const RAGVector &v2)
|
||||
{
|
||||
if (v1.size() != v2.size()) {
|
||||
LOG_MESSAGE(QString("Vector size mismatch: %1 vs %2").arg(v1.size()).arg(v2.size()));
|
||||
return std::numeric_limits<float>::max();
|
||||
}
|
||||
|
||||
float sum = 0.0f;
|
||||
for (size_t i = 0; i < v1.size(); ++i) {
|
||||
float diff = v1[i] - v2[i];
|
||||
sum += diff * diff;
|
||||
}
|
||||
return std::sqrt(sum);
|
||||
}
|
||||
|
||||
float RAGSimilaritySearch::cosineSimilarity(const RAGVector &v1, const RAGVector &v2)
|
||||
{
|
||||
if (v1.size() != v2.size()) {
|
||||
LOG_MESSAGE(QString("Vector size mismatch: %1 vs %2").arg(v1.size()).arg(v2.size()));
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
float dotProduct = 0.0f;
|
||||
float norm1 = 0.0f;
|
||||
float norm2 = 0.0f;
|
||||
|
||||
for (size_t i = 0; i < v1.size(); ++i) {
|
||||
dotProduct += v1[i] * v2[i];
|
||||
norm1 += v1[i] * v1[i];
|
||||
norm2 += v2[i] * v2[i];
|
||||
}
|
||||
|
||||
norm1 = std::sqrt(norm1);
|
||||
norm2 = std::sqrt(norm2);
|
||||
|
||||
if (norm1 == 0.0f || norm2 == 0.0f)
|
||||
return 0.0f;
|
||||
return dotProduct / (norm1 * norm2);
|
||||
}
|
||||
|
||||
} // namespace QodeAssist::Context
|
37
context/RAGSimilaritySearch.hpp
Normal file
37
context/RAGSimilaritySearch.hpp
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Petr Mironychev
|
||||
*
|
||||
* This file is part of QodeAssist.
|
||||
*
|
||||
* QodeAssist is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* QodeAssist is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "RAGData.hpp"
|
||||
|
||||
namespace QodeAssist::Context {
|
||||
|
||||
class RAGSimilaritySearch
|
||||
{
|
||||
public:
|
||||
static float l2Distance(const RAGVector &v1, const RAGVector &v2);
|
||||
|
||||
static float cosineSimilarity(const RAGVector &v1, const RAGVector &v2);
|
||||
|
||||
private:
|
||||
RAGSimilaritySearch() = delete;
|
||||
};
|
||||
|
||||
} // namespace QodeAssist::Context
|
Loading…
Reference in New Issue
Block a user