mirror of
https://github.com/Palm1r/QodeAssist.git
synced 2025-05-27 19:00:30 -04:00
feat: Add using vector and chunks in Context Manager
This commit is contained in:
parent
f36db033e6
commit
142afa725f
@ -450,6 +450,8 @@ void ChatRootView::openChatHistoryFolder()
|
||||
QDesktopServices::openUrl(url);
|
||||
}
|
||||
|
||||
// ChatRootView.cpp
|
||||
|
||||
void ChatRootView::testRAG(const QString &message)
|
||||
{
|
||||
auto project = ProjectExplorer::ProjectTree::currentProject();
|
||||
@ -465,7 +467,9 @@ void ChatRootView::testRAG(const QString &message)
|
||||
qDebug() << "\nFirst, processing project files...";
|
||||
|
||||
auto files = Context::ContextManager::instance().getProjectSourceFiles(project);
|
||||
auto future = Context::RAGManager::instance().processFiles(project, files);
|
||||
// Было: auto future = Context::RAGManager::instance().processFiles(project, files);
|
||||
// Стало:
|
||||
auto future = Context::RAGManager::instance().processProjectFiles(project, files);
|
||||
|
||||
connect(
|
||||
&Context::RAGManager::instance(),
|
||||
@ -481,7 +485,19 @@ void ChatRootView::testRAG(const QString &message)
|
||||
this,
|
||||
[this, project, TEST_QUERY]() {
|
||||
qDebug() << "\nVectorization completed. Starting similarity search...\n";
|
||||
Context::RAGManager::instance().searchSimilarDocuments(TEST_QUERY, project, 5);
|
||||
// Было: Context::RAGManager::instance().searchSimilarDocuments(TEST_QUERY, project, 5);
|
||||
// Стало:
|
||||
auto future = Context::RAGManager::instance().findRelevantChunks(TEST_QUERY, project, 5);
|
||||
future.then([](const QList<Context::RAGManager::ChunkSearchResult> &results) {
|
||||
qDebug() << "Found" << results.size() << "relevant chunks:";
|
||||
for (const auto &result : results) {
|
||||
qDebug() << "File:" << result.filePath;
|
||||
qDebug() << "Lines:" << result.startLine << "-" << result.endLine;
|
||||
qDebug() << "Score:" << result.combinedScore;
|
||||
qDebug() << "Content:" << result.content;
|
||||
qDebug() << "---";
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -29,13 +29,13 @@ class FileChunker : public QObject
|
||||
public:
|
||||
struct ChunkingConfig
|
||||
{
|
||||
int maxLinesPerChunk = 80; // Размер чанка (было 200)
|
||||
int minLinesPerChunk = 40; // Минимальный размер для начала чанкинга
|
||||
int overlapLines = 20; // Перекрытие между чанками
|
||||
bool skipEmptyLines = true; // Пропускать пустые строки
|
||||
bool preserveFunctions = true; // Сохранять функции целиком
|
||||
bool preserveClasses = true; // Сохранять классы целиком
|
||||
int batchSize = 10; // Количество файлов для параллельной обработки
|
||||
int maxLinesPerChunk = 80;
|
||||
int minLinesPerChunk = 40;
|
||||
int overlapLines = 20;
|
||||
bool skipEmptyLines = true;
|
||||
bool preserveFunctions = true;
|
||||
bool preserveClasses = true;
|
||||
int batchSize = 10;
|
||||
};
|
||||
|
||||
explicit FileChunker(QObject *parent = nullptr);
|
||||
|
@ -24,8 +24,8 @@
|
||||
namespace QodeAssist::Context {
|
||||
|
||||
enum class ProgrammingLanguage {
|
||||
QML, // QML/JavaScript
|
||||
Cpp, // C/C++
|
||||
QML,
|
||||
Cpp,
|
||||
Python,
|
||||
Unknown,
|
||||
};
|
||||
|
@ -18,6 +18,8 @@
|
||||
*/
|
||||
|
||||
#include "RAGManager.hpp"
|
||||
#include "EnhancedRAGSimilaritySearch.hpp"
|
||||
#include "RAGPreprocessor.hpp"
|
||||
#include "RAGSimilaritySearch.hpp"
|
||||
#include "logger/Logger.hpp"
|
||||
|
||||
@ -25,10 +27,6 @@
|
||||
#include <projectexplorer/project.h>
|
||||
#include <QFile>
|
||||
#include <QtConcurrent>
|
||||
#include <queue>
|
||||
|
||||
#include <EnhancedRAGSimilaritySearch.hpp>
|
||||
#include <RAGPreprocessor.hpp>
|
||||
|
||||
namespace QodeAssist::Context {
|
||||
|
||||
@ -45,13 +43,6 @@ RAGManager::RAGManager(QObject *parent)
|
||||
|
||||
RAGManager::~RAGManager() {}
|
||||
|
||||
// bool RAGManager::SearchResult::operator<(const SearchResult &other) const
|
||||
// {
|
||||
// if (cosineScore != other.cosineScore)
|
||||
// return cosineScore > other.cosineScore;
|
||||
// return l2Score < other.l2Score;
|
||||
// }
|
||||
|
||||
QString RAGManager::getStoragePath(ProjectExplorer::Project *project) const
|
||||
{
|
||||
return QString("%1/qodeassist/%2/rag/vectors.db")
|
||||
@ -78,50 +69,61 @@ std::optional<QString> RAGManager::loadFileContent(const QString &filePath)
|
||||
return content;
|
||||
}
|
||||
|
||||
void RAGManager::ensureStorageForProject(ProjectExplorer::Project *project)
|
||||
void RAGManager::ensureStorageForProject(ProjectExplorer::Project *project) const
|
||||
{
|
||||
qDebug() << "Ensuring storage for project:" << project->displayName();
|
||||
|
||||
if (m_currentProject == project && m_currentStorage) {
|
||||
qDebug() << "Using existing storage";
|
||||
return;
|
||||
}
|
||||
|
||||
qDebug() << "Creating new storage";
|
||||
m_currentStorage.reset();
|
||||
m_currentProject = project;
|
||||
|
||||
if (project) {
|
||||
m_currentStorage = std::make_unique<RAGStorage>(getStoragePath(project), this);
|
||||
QString storagePath = getStoragePath(project);
|
||||
qDebug() << "Storage path:" << storagePath;
|
||||
|
||||
StorageOptions options;
|
||||
m_currentStorage = std::make_unique<RAGStorage>(storagePath, options);
|
||||
|
||||
qDebug() << "Initializing storage...";
|
||||
if (!m_currentStorage->init()) {
|
||||
qDebug() << "Failed to initialize storage";
|
||||
m_currentStorage.reset();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!m_currentStorage->isVersionCompatible()) {
|
||||
qDebug() << "Storage version is incompatible, needs rebuild";
|
||||
// todo recreate db or show error
|
||||
}
|
||||
qDebug() << "Storage initialized successfully";
|
||||
}
|
||||
}
|
||||
|
||||
QFuture<void> RAGManager::processFiles(
|
||||
ProjectExplorer::Project *project, const QStringList &filePaths)
|
||||
QFuture<void> RAGManager::processProjectFiles(
|
||||
ProjectExplorer::Project *project,
|
||||
const QStringList &filePaths,
|
||||
const FileChunker::ChunkingConfig &config)
|
||||
{
|
||||
qDebug() << "Starting batch processing of" << filePaths.size()
|
||||
qDebug() << "\nStarting batch processing of" << filePaths.size()
|
||||
<< "files for project:" << project->displayName();
|
||||
|
||||
auto promise = std::make_shared<QPromise<void>>();
|
||||
promise->start();
|
||||
|
||||
qDebug() << "Initializing storage...";
|
||||
ensureStorageForProject(project);
|
||||
|
||||
if (!m_currentStorage) {
|
||||
qDebug() << "Failed to initialize storage for project:" << project->displayName();
|
||||
promise->finish();
|
||||
return promise->future();
|
||||
}
|
||||
qDebug() << "Storage initialized successfully";
|
||||
|
||||
const int batchSize = 10;
|
||||
|
||||
qDebug() << "Checking files for processing...";
|
||||
QSet<QString> uniqueFiles;
|
||||
for (const QString &filePath : filePaths) {
|
||||
qDebug() << "Checking file:" << filePath;
|
||||
if (isFileStorageOutdated(project, filePath)) {
|
||||
qDebug() << "File needs processing:" << filePath;
|
||||
uniqueFiles.insert(filePath);
|
||||
@ -137,103 +139,23 @@ QFuture<void> RAGManager::processFiles(
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
qDebug() << "Processing" << filesToProcess.size() << "files in batches of" << batchSize;
|
||||
|
||||
processNextBatch(promise, project, filesToProcess, 0, batchSize);
|
||||
qDebug() << "Starting to process" << filesToProcess.size() << "files";
|
||||
const int batchSize = 10;
|
||||
processNextFileBatch(promise, project, filesToProcess, config, 0, batchSize);
|
||||
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
void RAGManager::searchSimilarDocuments(
|
||||
const QString &text, ProjectExplorer::Project *project, int topK)
|
||||
{
|
||||
qDebug() << "\nStarting similarity search with parameters:";
|
||||
qDebug() << "Query length:" << text.length();
|
||||
qDebug() << "Project:" << project->displayName();
|
||||
qDebug() << "Top K:" << topK;
|
||||
|
||||
QString processedText = RAGPreprocessor::preprocessCode(text);
|
||||
qDebug() << "Preprocessed query length:" << processedText.length();
|
||||
|
||||
auto future = m_vectorizer->vectorizeText(processedText);
|
||||
qDebug() << "Started query vectorization";
|
||||
|
||||
future.then([this, project, processedText, topK, text](const RAGVector &queryVector) {
|
||||
if (queryVector.empty()) {
|
||||
qDebug() << "ERROR: Query vectorization failed - empty vector";
|
||||
return;
|
||||
}
|
||||
qDebug() << "Query vector generated, size:" << queryVector.size();
|
||||
|
||||
auto storedFiles = getStoredFiles(project);
|
||||
qDebug() << "Found" << storedFiles.size() << "stored files to compare";
|
||||
|
||||
QList<SearchResult> results;
|
||||
results.reserve(storedFiles.size());
|
||||
|
||||
int processedFiles = 0;
|
||||
int skippedFiles = 0;
|
||||
|
||||
for (const auto &filePath : storedFiles) {
|
||||
auto storedCode = loadFileContent(filePath);
|
||||
if (!storedCode.has_value()) {
|
||||
qDebug() << "ERROR: Failed to load content for file:" << filePath;
|
||||
skippedFiles++;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto storedVector = loadVectorFromStorage(project, filePath);
|
||||
if (!storedVector.has_value()) {
|
||||
qDebug() << "ERROR: Failed to load vector for file:" << filePath;
|
||||
skippedFiles++;
|
||||
continue;
|
||||
}
|
||||
|
||||
QString processedStoredCode = RAGPreprocessor::preprocessCode(storedCode.value());
|
||||
|
||||
auto similarity = EnhancedRAGSimilaritySearch::calculateSimilarity(
|
||||
queryVector, storedVector.value(), processedText, processedStoredCode);
|
||||
|
||||
results.append(
|
||||
{filePath,
|
||||
similarity.semantic_similarity,
|
||||
similarity.structural_similarity,
|
||||
similarity.combined_score});
|
||||
|
||||
processedFiles++;
|
||||
if (processedFiles % 100 == 0) {
|
||||
qDebug() << "Processed" << processedFiles << "files...";
|
||||
}
|
||||
}
|
||||
|
||||
qDebug() << "\nSearch statistics:";
|
||||
qDebug() << "Total files processed:" << processedFiles;
|
||||
qDebug() << "Files skipped:" << skippedFiles;
|
||||
qDebug() << "Total results before filtering:" << results.size();
|
||||
|
||||
if (results.size() > topK) {
|
||||
qDebug() << "Performing partial sort for top" << topK << "results";
|
||||
std::partial_sort(results.begin(), results.begin() + topK, results.end());
|
||||
results = results.mid(0, topK);
|
||||
} else {
|
||||
qDebug() << "Performing full sort for" << results.size() << "results";
|
||||
std::sort(results.begin(), results.end());
|
||||
}
|
||||
|
||||
qDebug() << "Sorting completed, logging final results...";
|
||||
logSearchResults(results);
|
||||
});
|
||||
}
|
||||
|
||||
void RAGManager::processNextBatch(
|
||||
void RAGManager::processNextFileBatch(
|
||||
std::shared_ptr<QPromise<void>> promise,
|
||||
ProjectExplorer::Project *project,
|
||||
const QStringList &files,
|
||||
const FileChunker::ChunkingConfig &config,
|
||||
int startIndex,
|
||||
int batchSize)
|
||||
{
|
||||
if (startIndex >= files.size()) {
|
||||
qDebug() << "All batches processed";
|
||||
qDebug() << "All batches processed successfully";
|
||||
emit vectorizationFinished();
|
||||
promise->finish();
|
||||
return;
|
||||
@ -242,12 +164,13 @@ void RAGManager::processNextBatch(
|
||||
int endIndex = qMin(startIndex + batchSize, files.size());
|
||||
auto currentBatch = files.mid(startIndex, endIndex - startIndex);
|
||||
|
||||
qDebug() << "Processing batch" << startIndex / batchSize + 1 << "files" << startIndex << "to"
|
||||
<< endIndex;
|
||||
qDebug() << "\nProcessing batch" << (startIndex / batchSize + 1) << "(" << currentBatch.size()
|
||||
<< "files)"
|
||||
<< "\nProgress:" << startIndex << "to" << endIndex << "of" << files.size();
|
||||
|
||||
for (const QString &filePath : currentBatch) {
|
||||
qDebug() << "Starting processing of file:" << filePath;
|
||||
auto future = processFile(project, filePath);
|
||||
qDebug() << "Starting processing file:" << filePath;
|
||||
auto future = processFileWithChunks(project, filePath, config);
|
||||
auto watcher = new QFutureWatcher<bool>;
|
||||
watcher->setFuture(future);
|
||||
|
||||
@ -255,7 +178,16 @@ void RAGManager::processNextBatch(
|
||||
watcher,
|
||||
&QFutureWatcher<bool>::finished,
|
||||
this,
|
||||
[this, watcher, promise, project, files, startIndex, endIndex, batchSize, filePath]() {
|
||||
[this,
|
||||
watcher,
|
||||
promise,
|
||||
project,
|
||||
files,
|
||||
startIndex,
|
||||
endIndex,
|
||||
batchSize,
|
||||
config,
|
||||
filePath]() {
|
||||
bool success = watcher->result();
|
||||
qDebug() << "File processed:" << filePath << "success:" << success;
|
||||
|
||||
@ -263,7 +195,7 @@ void RAGManager::processNextBatch(
|
||||
if (isLastFileInBatch) {
|
||||
qDebug() << "Batch completed, moving to next batch";
|
||||
emit vectorizationProgress(endIndex, files.size());
|
||||
processNextBatch(promise, project, files, endIndex, batchSize);
|
||||
processNextFileBatch(promise, project, files, config, endIndex, batchSize);
|
||||
}
|
||||
|
||||
watcher->deleteLater();
|
||||
@ -271,61 +203,223 @@ void RAGManager::processNextBatch(
|
||||
}
|
||||
}
|
||||
|
||||
QFuture<bool> RAGManager::processFile(ProjectExplorer::Project *project, const QString &filePath)
|
||||
QFuture<bool> RAGManager::processFileWithChunks(
|
||||
ProjectExplorer::Project *project,
|
||||
const QString &filePath,
|
||||
const FileChunker::ChunkingConfig &config)
|
||||
{
|
||||
qDebug() << "Starting to process file:" << filePath;
|
||||
|
||||
auto promise = std::make_shared<QPromise<bool>>();
|
||||
promise->start();
|
||||
|
||||
ensureStorageForProject(project);
|
||||
if (!m_currentStorage) {
|
||||
qDebug() << "ERROR: Storage not initialized for project" << project->displayName();
|
||||
qDebug() << "Storage not initialized for file:" << filePath;
|
||||
promise->addResult(false);
|
||||
promise->finish();
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
QFile file(filePath);
|
||||
if (!file.open(QIODevice::ReadOnly)) {
|
||||
qDebug() << "ERROR: Failed to open file for reading:" << filePath;
|
||||
auto fileContent = loadFileContent(filePath);
|
||||
if (!fileContent) {
|
||||
qDebug() << "Failed to load content for file:" << filePath;
|
||||
promise->addResult(false);
|
||||
promise->finish();
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
QFileInfo fileInfo(filePath);
|
||||
QString fileName = fileInfo.fileName();
|
||||
QString content = QString::fromUtf8(file.readAll());
|
||||
qDebug() << "Creating chunks for file:" << filePath;
|
||||
auto chunksFuture = m_chunker.chunkFiles({filePath});
|
||||
auto chunks = chunksFuture.result();
|
||||
|
||||
qDebug() << "File" << fileName << "read, content size:" << content.size() << "bytes";
|
||||
|
||||
QString processedContent = RAGPreprocessor::preprocessCode(content);
|
||||
qDebug() << "Preprocessed content size:" << processedContent.size() << "bytes";
|
||||
|
||||
auto vectorFuture = m_vectorizer->vectorizeText(processedContent);
|
||||
qDebug() << "Started vectorization for file:" << fileName;
|
||||
|
||||
vectorFuture.then([promise, filePath, fileName, this](const RAGVector &vector) {
|
||||
if (vector.empty()) {
|
||||
qDebug() << "ERROR: Vectorization failed for file:" << fileName << "- empty vector";
|
||||
promise->addResult(false);
|
||||
} else {
|
||||
qDebug() << "Vector generated for file:" << fileName << "size:" << vector.size();
|
||||
bool success = m_currentStorage->storeVector(filePath, vector);
|
||||
if (!success) {
|
||||
qDebug() << "ERROR: Failed to store vector for file:" << fileName;
|
||||
} else {
|
||||
qDebug() << "Successfully stored vector for file:" << fileName;
|
||||
}
|
||||
promise->addResult(success);
|
||||
}
|
||||
if (chunks.isEmpty()) {
|
||||
qDebug() << "No chunks created for file:" << filePath;
|
||||
promise->addResult(false);
|
||||
promise->finish();
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
qDebug() << "Created" << chunks.size() << "chunks for file:" << filePath;
|
||||
|
||||
// Преобразуем FileChunk в FileChunkData
|
||||
QList<FileChunkData> chunkData;
|
||||
for (const auto &chunk : chunks) {
|
||||
FileChunkData data;
|
||||
data.filePath = chunk.filePath;
|
||||
data.startLine = chunk.startLine;
|
||||
data.endLine = chunk.endLine;
|
||||
data.content = chunk.content;
|
||||
chunkData.append(data);
|
||||
}
|
||||
|
||||
qDebug() << "Deleting old chunks for file:" << filePath;
|
||||
m_currentStorage->deleteChunksForFile(filePath);
|
||||
|
||||
auto vectorizeFuture = vectorizeAndStoreChunks(filePath, chunkData);
|
||||
auto watcher = new QFutureWatcher<void>;
|
||||
watcher->setFuture(vectorizeFuture);
|
||||
|
||||
connect(watcher, &QFutureWatcher<void>::finished, this, [promise, watcher, filePath]() {
|
||||
qDebug() << "Completed processing file:" << filePath;
|
||||
promise->addResult(true);
|
||||
promise->finish();
|
||||
watcher->deleteLater();
|
||||
});
|
||||
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
QFuture<void> RAGManager::vectorizeAndStoreChunks(
|
||||
const QString &filePath, const QList<FileChunkData> &chunks)
|
||||
{
|
||||
qDebug() << "Vectorizing and storing" << chunks.size() << "chunks for file:" << filePath;
|
||||
|
||||
auto promise = std::make_shared<QPromise<void>>();
|
||||
promise->start();
|
||||
|
||||
// Обрабатываем чанки последовательно
|
||||
processNextChunk(promise, chunks, 0);
|
||||
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
void RAGManager::processNextChunk(
|
||||
std::shared_ptr<QPromise<void>> promise, const QList<FileChunkData> &chunks, int currentIndex)
|
||||
{
|
||||
if (currentIndex >= chunks.size()) {
|
||||
promise->finish();
|
||||
return;
|
||||
}
|
||||
|
||||
const auto &chunk = chunks[currentIndex];
|
||||
QString processedContent = RAGPreprocessor::preprocessCode(chunk.content);
|
||||
qDebug() << "Processing chunk" << currentIndex + 1 << "of" << chunks.size();
|
||||
|
||||
auto vectorFuture = m_vectorizer->vectorizeText(processedContent);
|
||||
auto watcher = new QFutureWatcher<RAGVector>;
|
||||
watcher->setFuture(vectorFuture);
|
||||
|
||||
connect(
|
||||
watcher,
|
||||
&QFutureWatcher<RAGVector>::finished,
|
||||
this,
|
||||
[this, watcher, promise, chunks, currentIndex, chunk]() {
|
||||
auto vector = watcher->result();
|
||||
|
||||
if (!vector.empty()) {
|
||||
qDebug() << "Storing vector and chunk for file:" << chunk.filePath;
|
||||
bool vectorStored = m_currentStorage->storeVector(chunk.filePath, vector);
|
||||
bool chunkStored = m_currentStorage->storeChunk(chunk);
|
||||
qDebug() << "Storage results - Vector:" << vectorStored << "Chunk:" << chunkStored;
|
||||
} else {
|
||||
qDebug() << "Failed to vectorize chunk content";
|
||||
}
|
||||
|
||||
processNextChunk(promise, chunks, currentIndex + 1);
|
||||
|
||||
watcher->deleteLater();
|
||||
});
|
||||
}
|
||||
|
||||
QFuture<QList<RAGManager::ChunkSearchResult>> RAGManager::findRelevantChunks(
|
||||
const QString &query, ProjectExplorer::Project *project, int topK)
|
||||
{
|
||||
auto promise = std::make_shared<QPromise<QList<ChunkSearchResult>>>();
|
||||
promise->start();
|
||||
|
||||
ensureStorageForProject(project);
|
||||
if (!m_currentStorage) {
|
||||
qDebug() << "Storage not initialized for project:" << project->displayName();
|
||||
promise->addResult({});
|
||||
promise->finish();
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
QString processedQuery = RAGPreprocessor::preprocessCode(query);
|
||||
|
||||
auto vectorFuture = m_vectorizer->vectorizeText(processedQuery);
|
||||
vectorFuture.then([this, promise, project, processedQuery, topK](const RAGVector &queryVector) {
|
||||
if (queryVector.empty()) {
|
||||
qDebug() << "Failed to vectorize query";
|
||||
promise->addResult({});
|
||||
promise->finish();
|
||||
return;
|
||||
}
|
||||
|
||||
auto files = m_currentStorage->getFilesWithChunks();
|
||||
QList<FileChunkData> allChunks;
|
||||
|
||||
for (const auto &filePath : files) {
|
||||
auto fileChunks = m_currentStorage->getChunksForFile(filePath);
|
||||
allChunks.append(fileChunks);
|
||||
}
|
||||
|
||||
auto results = rankChunks(queryVector, processedQuery, allChunks);
|
||||
|
||||
if (results.size() > topK) {
|
||||
results = results.mid(0, topK);
|
||||
}
|
||||
|
||||
qDebug() << "Found" << results.size() << "relevant chunks";
|
||||
promise->addResult(results);
|
||||
promise->finish();
|
||||
|
||||
closeStorage();
|
||||
});
|
||||
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
QList<RAGManager::ChunkSearchResult> RAGManager::rankChunks(
|
||||
const RAGVector &queryVector, const QString &queryText, const QList<FileChunkData> &chunks)
|
||||
{
|
||||
QList<ChunkSearchResult> results;
|
||||
results.reserve(chunks.size());
|
||||
|
||||
for (const auto &chunk : chunks) {
|
||||
auto chunkVector = m_currentStorage->getVector(chunk.filePath);
|
||||
if (!chunkVector.has_value()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
QString processedChunk = RAGPreprocessor::preprocessCode(chunk.content);
|
||||
|
||||
auto similarity = EnhancedRAGSimilaritySearch::calculateSimilarity(
|
||||
queryVector, chunkVector.value(), queryText, processedChunk);
|
||||
|
||||
results.append(ChunkSearchResult{
|
||||
chunk.filePath,
|
||||
chunk.startLine,
|
||||
chunk.endLine,
|
||||
chunk.content,
|
||||
similarity.semantic_similarity,
|
||||
similarity.structural_similarity,
|
||||
similarity.combined_score});
|
||||
}
|
||||
|
||||
std::sort(results.begin(), results.end());
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
QStringList RAGManager::getStoredFiles(ProjectExplorer::Project *project) const
|
||||
{
|
||||
ensureStorageForProject(project);
|
||||
if (!m_currentStorage) {
|
||||
return {};
|
||||
}
|
||||
return m_currentStorage->getAllFiles();
|
||||
}
|
||||
|
||||
bool RAGManager::isFileStorageOutdated(
|
||||
ProjectExplorer::Project *project, const QString &filePath) const
|
||||
{
|
||||
ensureStorageForProject(project);
|
||||
if (!m_currentStorage) {
|
||||
return true;
|
||||
}
|
||||
return m_currentStorage->needsUpdate(filePath);
|
||||
}
|
||||
|
||||
std::optional<RAGVector> RAGManager::loadVectorFromStorage(
|
||||
ProjectExplorer::Project *project, const QString &filePath)
|
||||
{
|
||||
@ -336,105 +430,14 @@ std::optional<RAGVector> RAGManager::loadVectorFromStorage(
|
||||
return m_currentStorage->getVector(filePath);
|
||||
}
|
||||
|
||||
QStringList RAGManager::getStoredFiles(ProjectExplorer::Project *project) const
|
||||
void RAGManager::closeStorage()
|
||||
{
|
||||
if (m_currentProject != project || !m_currentStorage) {
|
||||
auto tempStorage = RAGStorage(getStoragePath(project), nullptr);
|
||||
if (!tempStorage.init()) {
|
||||
return {};
|
||||
}
|
||||
return tempStorage.getAllFiles();
|
||||
qDebug() << "Closing storage...";
|
||||
if (m_currentStorage) {
|
||||
m_currentStorage.reset();
|
||||
m_currentProject = nullptr;
|
||||
qDebug() << "Storage closed";
|
||||
}
|
||||
return m_currentStorage->getAllFiles();
|
||||
}
|
||||
|
||||
bool RAGManager::isFileStorageOutdated(
|
||||
ProjectExplorer::Project *project, const QString &filePath) const
|
||||
{
|
||||
if (m_currentProject != project || !m_currentStorage) {
|
||||
auto tempStorage = RAGStorage(getStoragePath(project), nullptr);
|
||||
if (!tempStorage.init()) {
|
||||
return true;
|
||||
}
|
||||
return tempStorage.needsUpdate(filePath);
|
||||
}
|
||||
return m_currentStorage->needsUpdate(filePath);
|
||||
}
|
||||
|
||||
QFuture<QList<RAGManager::SearchResult>> RAGManager::search(
|
||||
const QString &text, ProjectExplorer::Project *project, int topK)
|
||||
{
|
||||
auto promise = std::make_shared<QPromise<QList<SearchResult>>>();
|
||||
promise->start();
|
||||
|
||||
auto queryVectorFuture = m_vectorizer->vectorizeText(text);
|
||||
queryVectorFuture.then([this, promise, project, topK](const RAGVector &queryVector) {
|
||||
if (queryVector.empty()) {
|
||||
LOG_MESSAGE("Failed to vectorize query text");
|
||||
promise->addResult(QList<SearchResult>());
|
||||
promise->finish();
|
||||
return;
|
||||
}
|
||||
|
||||
auto storedFiles = getStoredFiles(project);
|
||||
std::priority_queue<SearchResult> results;
|
||||
|
||||
for (const auto &filePath : storedFiles) {
|
||||
auto storedVector = loadVectorFromStorage(project, filePath);
|
||||
if (!storedVector.has_value())
|
||||
continue;
|
||||
|
||||
float l2Score = RAGSimilaritySearch::l2Distance(queryVector, storedVector.value());
|
||||
float cosineScore
|
||||
= RAGSimilaritySearch::cosineSimilarity(queryVector, storedVector.value());
|
||||
|
||||
results.push(SearchResult{filePath, l2Score, cosineScore});
|
||||
}
|
||||
|
||||
QList<SearchResult> resultsList;
|
||||
int count = 0;
|
||||
while (!results.empty() && count < topK) {
|
||||
resultsList.append(results.top());
|
||||
results.pop();
|
||||
count++;
|
||||
}
|
||||
|
||||
promise->addResult(resultsList);
|
||||
promise->finish();
|
||||
});
|
||||
|
||||
return promise->future();
|
||||
}
|
||||
|
||||
// void RAGManager::searchSimilarDocuments(
|
||||
// const QString &text, ProjectExplorer::Project *project, int topK)
|
||||
// {
|
||||
// auto future = search(text, project, topK);
|
||||
// future.then([this](const QList<SearchResult> &results) { logSearchResults(results); });
|
||||
// }
|
||||
|
||||
void RAGManager::logSearchResults(const QList<SearchResult> &results) const
|
||||
{
|
||||
qDebug() << "\n=== Search Results ===";
|
||||
qDebug() << "Number of results:" << results.size();
|
||||
|
||||
if (results.empty()) {
|
||||
qDebug() << "No similar documents found.";
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < results.size(); ++i) {
|
||||
const auto &result = results[i];
|
||||
QFileInfo fileInfo(result.filePath);
|
||||
qDebug() << "\nResult #" << (i + 1);
|
||||
qDebug() << "File:" << fileInfo.fileName();
|
||||
qDebug() << "Full path:" << result.filePath;
|
||||
qDebug() << "Semantic similarity:" << QString::number(result.semantic_similarity, 'f', 4);
|
||||
qDebug() << "Structural similarity:"
|
||||
<< QString::number(result.structural_similarity, 'f', 4);
|
||||
qDebug() << "Combined score:" << QString::number(result.combined_score, 'f', 4);
|
||||
}
|
||||
qDebug() << "\n=== End of Results ===\n";
|
||||
}
|
||||
|
||||
} // namespace QodeAssist::Context
|
||||
|
@ -20,13 +20,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <QFuture>
|
||||
#include <QObject>
|
||||
#include <QString>
|
||||
|
||||
#include "FileChunker.hpp"
|
||||
#include "RAGData.hpp"
|
||||
#include "RAGStorage.hpp"
|
||||
#include "RAGVectorizer.hpp"
|
||||
#include <RAGData.hpp>
|
||||
|
||||
namespace ProjectExplorer {
|
||||
class Project;
|
||||
@ -37,60 +38,82 @@ namespace QodeAssist::Context {
|
||||
class RAGManager : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
static RAGManager &instance();
|
||||
|
||||
struct SearchResult
|
||||
public:
|
||||
struct ChunkSearchResult
|
||||
{
|
||||
QString filePath;
|
||||
float semantic_similarity;
|
||||
float structural_similarity;
|
||||
float combined_score;
|
||||
int startLine;
|
||||
int endLine;
|
||||
QString content;
|
||||
float semanticScore;
|
||||
float structuralScore;
|
||||
float combinedScore;
|
||||
|
||||
bool operator<(const SearchResult &other) const
|
||||
bool operator<(const ChunkSearchResult &other) const
|
||||
{
|
||||
return combined_score > other.combined_score;
|
||||
return combinedScore > other.combinedScore;
|
||||
}
|
||||
};
|
||||
|
||||
// Process and vectorize files
|
||||
QFuture<void> processFiles(ProjectExplorer::Project *project, const QStringList &filePaths);
|
||||
std::optional<RAGVector> loadVectorFromStorage(
|
||||
ProjectExplorer::Project *project, const QString &filePath);
|
||||
static RAGManager &instance();
|
||||
|
||||
QFuture<void> processProjectFiles(
|
||||
ProjectExplorer::Project *project,
|
||||
const QStringList &filePaths,
|
||||
const FileChunker::ChunkingConfig &config = FileChunker::ChunkingConfig());
|
||||
|
||||
QFuture<QList<ChunkSearchResult>> findRelevantChunks(
|
||||
const QString &query, ProjectExplorer::Project *project, int topK = 5);
|
||||
|
||||
QStringList getStoredFiles(ProjectExplorer::Project *project) const;
|
||||
bool isFileStorageOutdated(ProjectExplorer::Project *project, const QString &filePath) const;
|
||||
RAGVectorizer *getVectorizer() const { return m_vectorizer.get(); }
|
||||
|
||||
// Search functionality
|
||||
QFuture<QList<SearchResult>> search(
|
||||
const QString &text, ProjectExplorer::Project *project, int topK = 5);
|
||||
void searchSimilarDocuments(const QString &text, ProjectExplorer::Project *project, int topK = 5);
|
||||
void logSearchResults(const QList<SearchResult> &results) const;
|
||||
|
||||
void processNextChunk(
|
||||
std::shared_ptr<QPromise<void>> promise,
|
||||
const QList<FileChunkData> &chunks,
|
||||
int currentIndex);
|
||||
void closeStorage();
|
||||
signals:
|
||||
void vectorizationProgress(int processed, int total);
|
||||
void vectorizationFinished();
|
||||
|
||||
private:
|
||||
RAGManager(QObject *parent = nullptr);
|
||||
explicit RAGManager(QObject *parent = nullptr);
|
||||
~RAGManager();
|
||||
RAGManager(const RAGManager &) = delete;
|
||||
RAGManager &operator=(const RAGManager &) = delete;
|
||||
|
||||
QFuture<bool> processFile(ProjectExplorer::Project *project, const QString &filePath);
|
||||
void processNextBatch(
|
||||
QString getStoragePath(ProjectExplorer::Project *project) const;
|
||||
void ensureStorageForProject(ProjectExplorer::Project *project) const;
|
||||
std::optional<QString> loadFileContent(const QString &filePath);
|
||||
std::optional<RAGVector> loadVectorFromStorage(
|
||||
ProjectExplorer::Project *project, const QString &filePath);
|
||||
|
||||
void processNextFileBatch(
|
||||
std::shared_ptr<QPromise<void>> promise,
|
||||
ProjectExplorer::Project *project,
|
||||
const QStringList &files,
|
||||
const FileChunker::ChunkingConfig &config,
|
||||
int startIndex,
|
||||
int batchSize);
|
||||
void ensureStorageForProject(ProjectExplorer::Project *project);
|
||||
QString getStoragePath(ProjectExplorer::Project *project) const;
|
||||
|
||||
std::unique_ptr<RAGVectorizer> m_vectorizer;
|
||||
std::unique_ptr<RAGStorage> m_currentStorage;
|
||||
ProjectExplorer::Project *m_currentProject{nullptr};
|
||||
std::optional<QString> loadFileContent(const QString &filePath);
|
||||
QFuture<bool> processFileWithChunks(
|
||||
ProjectExplorer::Project *project,
|
||||
const QString &filePath,
|
||||
const FileChunker::ChunkingConfig &config);
|
||||
|
||||
QFuture<void> vectorizeAndStoreChunks(
|
||||
const QString &filePath, const QList<FileChunkData> &chunks);
|
||||
|
||||
QList<ChunkSearchResult> rankChunks(
|
||||
const RAGVector &queryVector, const QString &queryText, const QList<FileChunkData> &chunks);
|
||||
|
||||
private:
|
||||
mutable std::unique_ptr<RAGVectorizer> m_vectorizer;
|
||||
mutable std::unique_ptr<RAGStorage> m_currentStorage;
|
||||
mutable ProjectExplorer::Project *m_currentProject{nullptr};
|
||||
FileChunker m_chunker;
|
||||
};
|
||||
|
||||
} // namespace QodeAssist::Context
|
||||
|
@ -30,19 +30,18 @@ public:
|
||||
}
|
||||
|
||||
try {
|
||||
// Прямое разделение без промежуточной копии
|
||||
QStringList lines = code.split('\n', Qt::SkipEmptyParts);
|
||||
return processLines(lines);
|
||||
} catch (const std::exception &e) {
|
||||
LOG_MESSAGE(QString("Error preprocessing code: %1").arg(e.what()));
|
||||
return code; // Возвращаем оригинальный код в случае ошибки
|
||||
return code;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static QString processLines(const QStringList &lines)
|
||||
{
|
||||
const int estimatedAvgLength = 80; // Примерная средняя длина строки
|
||||
const int estimatedAvgLength = 80;
|
||||
QString result;
|
||||
result.reserve(lines.size() * estimatedAvgLength);
|
||||
|
||||
@ -54,7 +53,6 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
// Убираем последний перенос строки, если он есть
|
||||
if (result.endsWith('\n')) {
|
||||
result.chop(1);
|
||||
}
|
||||
|
@ -20,10 +20,12 @@
|
||||
// RAGStorage.cpp
|
||||
#include "RAGStorage.hpp"
|
||||
#include <QDebug>
|
||||
#include <QDir>
|
||||
#include <QFile>
|
||||
#include <QFileInfo>
|
||||
#include <QSqlError>
|
||||
#include <QSqlQuery>
|
||||
#include <QUuid>
|
||||
|
||||
namespace QodeAssist::Context {
|
||||
|
||||
@ -45,43 +47,61 @@ RAGStorage::~RAGStorage()
|
||||
bool RAGStorage::init()
|
||||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
qDebug() << "Initializing RAGStorage at path:" << m_dbPath;
|
||||
|
||||
if (!openDatabase()) {
|
||||
qDebug() << "Failed to open database";
|
||||
return false;
|
||||
}
|
||||
qDebug() << "Database opened successfully";
|
||||
|
||||
if (!createTables()) {
|
||||
qDebug() << "Failed to create tables";
|
||||
return false;
|
||||
}
|
||||
qDebug() << "Tables created successfully";
|
||||
|
||||
if (!createIndices()) {
|
||||
qDebug() << "Failed to create indices";
|
||||
return false;
|
||||
}
|
||||
qDebug() << "Indices created successfully";
|
||||
|
||||
int version = getStorageVersion();
|
||||
qDebug() << "Current storage version:" << version;
|
||||
|
||||
if (version < CURRENT_VERSION) {
|
||||
qDebug() << "Upgrading storage from version" << version << "to" << CURRENT_VERSION;
|
||||
if (!upgradeStorage(version)) {
|
||||
qDebug() << "Failed to upgrade storage";
|
||||
return false;
|
||||
}
|
||||
qDebug() << "Storage upgraded successfully";
|
||||
}
|
||||
|
||||
if (!prepareStatements()) {
|
||||
qDebug() << "Failed to prepare statements";
|
||||
return false;
|
||||
}
|
||||
qDebug() << "Statements prepared successfully";
|
||||
|
||||
m_status = Status::Ok;
|
||||
qDebug() << "RAGStorage initialized successfully";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RAGStorage::openDatabase()
|
||||
{
|
||||
qDebug() << "Opening database at:" << m_dbPath;
|
||||
|
||||
QDir dir(QFileInfo(m_dbPath).absolutePath());
|
||||
if (!dir.exists() && !dir.mkpath(".")) {
|
||||
setError("Failed to create database directory", Status::DatabaseError);
|
||||
return false;
|
||||
}
|
||||
|
||||
m_db = QSqlDatabase::addDatabase("QSQLITE", "rag_storage");
|
||||
QString connectionName = QString("rag_storage_%1").arg(QUuid::createUuid().toString());
|
||||
m_db = QSqlDatabase::addDatabase("QSQLITE", connectionName);
|
||||
m_db.setDatabaseName(m_dbPath);
|
||||
|
||||
if (!m_db.open()) {
|
||||
@ -89,14 +109,41 @@ bool RAGStorage::openDatabase()
|
||||
return false;
|
||||
}
|
||||
|
||||
QSqlQuery query(m_db);
|
||||
if (!query.exec("PRAGMA journal_mode=WAL")) {
|
||||
qDebug() << "Failed to set journal mode:" << query.lastError().text();
|
||||
}
|
||||
|
||||
if (!query.exec("PRAGMA synchronous=NORMAL")) {
|
||||
qDebug() << "Failed to set synchronous mode:" << query.lastError().text();
|
||||
}
|
||||
|
||||
qDebug() << "Database opened successfully";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RAGStorage::createTables()
|
||||
{
|
||||
if (!createVersionTable() || !createVectorsTable() || !createChunksTable()) {
|
||||
qDebug() << "Creating tables...";
|
||||
|
||||
if (!createVersionTable()) {
|
||||
qDebug() << "Failed to create version table";
|
||||
return false;
|
||||
}
|
||||
qDebug() << "Version table created";
|
||||
|
||||
if (!createVectorsTable()) {
|
||||
qDebug() << "Failed to create vectors table";
|
||||
return false;
|
||||
}
|
||||
qDebug() << "Vectors table created";
|
||||
|
||||
if (!createChunksTable()) {
|
||||
qDebug() << "Failed to create chunks table";
|
||||
return false;
|
||||
}
|
||||
qDebug() << "Chunks table created";
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -120,12 +167,34 @@ bool RAGStorage::createIndices()
|
||||
|
||||
bool RAGStorage::createVersionTable()
|
||||
{
|
||||
qDebug() << "Creating version table...";
|
||||
|
||||
QSqlQuery query(m_db);
|
||||
return query.exec("CREATE TABLE IF NOT EXISTS storage_version ("
|
||||
"id INTEGER PRIMARY KEY AUTOINCREMENT,"
|
||||
"version INTEGER NOT NULL,"
|
||||
"created_at DATETIME DEFAULT CURRENT_TIMESTAMP"
|
||||
")");
|
||||
bool success = query.exec("CREATE TABLE IF NOT EXISTS storage_version ("
|
||||
"id INTEGER PRIMARY KEY AUTOINCREMENT,"
|
||||
"version INTEGER NOT NULL,"
|
||||
"created_at DATETIME DEFAULT CURRENT_TIMESTAMP"
|
||||
")");
|
||||
|
||||
if (!success) {
|
||||
qDebug() << "Failed to create version table:" << query.lastError().text();
|
||||
return false;
|
||||
}
|
||||
|
||||
query.exec("SELECT COUNT(*) FROM storage_version");
|
||||
if (query.next() && query.value(0).toInt() == 0) {
|
||||
qDebug() << "Inserting initial version record";
|
||||
QSqlQuery insertQuery(m_db);
|
||||
success = insertQuery.exec(
|
||||
QString("INSERT INTO storage_version (version) VALUES (%1)").arg(CURRENT_VERSION));
|
||||
if (!success) {
|
||||
qDebug() << "Failed to insert initial version:" << insertQuery.lastError().text();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
qDebug() << "Version table ready";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RAGStorage::createVectorsTable()
|
||||
@ -158,6 +227,8 @@ bool RAGStorage::createChunksTable()
|
||||
|
||||
bool RAGStorage::prepareStatements()
|
||||
{
|
||||
qDebug() << "Preparing SQL statements...";
|
||||
|
||||
m_insertChunkQuery = QSqlQuery(m_db);
|
||||
if (!m_insertChunkQuery.prepare(
|
||||
"INSERT INTO file_chunks (file_path, start_line, end_line, content) "
|
||||
@ -178,7 +249,7 @@ bool RAGStorage::prepareStatements()
|
||||
if (!m_insertVectorQuery.prepare(
|
||||
"INSERT INTO file_vectors (file_path, vector_data, last_modified) "
|
||||
"VALUES (:path, :vector, :modified)")) {
|
||||
setError("Failed to prepare insert vector query");
|
||||
setError("Failed to prepare insert vector query: " + m_insertVectorQuery.lastError().text());
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -186,7 +257,7 @@ bool RAGStorage::prepareStatements()
|
||||
if (!m_updateVectorQuery.prepare(
|
||||
"UPDATE file_vectors SET vector_data = :vector, last_modified = :modified, "
|
||||
"updated_at = CURRENT_TIMESTAMP WHERE file_path = :path")) {
|
||||
setError("Failed to prepare update vector query");
|
||||
setError("Failed to prepare update vector query: " + m_updateVectorQuery.lastError().text());
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -197,7 +268,9 @@ bool RAGStorage::storeChunk(const FileChunkData &chunk)
|
||||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
|
||||
if (!validateChunk(chunk)) {
|
||||
auto validation = validateChunk(chunk);
|
||||
if (!validation.isValid) {
|
||||
setError(validation.errorMessage, validation.errorStatus);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -228,7 +301,9 @@ bool RAGStorage::storeChunks(const QList<FileChunkData> &chunks)
|
||||
}
|
||||
|
||||
for (const auto &chunk : chunks) {
|
||||
if (!validateChunk(chunk)) {
|
||||
auto validation = validateChunk(chunk);
|
||||
if (!validation.isValid) {
|
||||
setError(validation.errorMessage, validation.errorStatus);
|
||||
rollbackTransaction();
|
||||
return false;
|
||||
}
|
||||
@ -248,34 +323,30 @@ bool RAGStorage::storeChunks(const QList<FileChunkData> &chunks)
|
||||
return commitTransaction();
|
||||
}
|
||||
|
||||
bool RAGStorage::validateChunk(const FileChunkData &chunk) const
|
||||
RAGStorage::ValidationResult RAGStorage::validateChunk(const FileChunkData &chunk) const
|
||||
{
|
||||
if (!chunk.isValid()) {
|
||||
setError("Invalid chunk data", Status::ValidationError);
|
||||
return false;
|
||||
return {false, "Invalid chunk data", Status::ValidationError};
|
||||
}
|
||||
|
||||
if (chunk.content.size() > m_options.maxChunkSize) {
|
||||
setError("Chunk content exceeds maximum size", Status::ValidationError);
|
||||
return false;
|
||||
return {false, "Chunk content exceeds maximum size", Status::ValidationError};
|
||||
}
|
||||
|
||||
return true;
|
||||
return {true, QString(), Status::Ok};
|
||||
}
|
||||
|
||||
bool RAGStorage::validateVector(const RAGVector &vector) const
|
||||
RAGStorage::ValidationResult RAGStorage::validateVector(const RAGVector &vector) const
|
||||
{
|
||||
if (vector.empty()) {
|
||||
setError("Empty vector data", Status::ValidationError);
|
||||
return false;
|
||||
return {false, "Empty vector data", Status::ValidationError};
|
||||
}
|
||||
|
||||
if (vector.size() > m_options.maxVectorSize) {
|
||||
setError("Vector size exceeds maximum limit", Status::ValidationError);
|
||||
return false;
|
||||
return {false, "Vector size exceeds maximum limit", Status::ValidationError};
|
||||
}
|
||||
|
||||
return true;
|
||||
return {true, QString(), Status::Ok};
|
||||
}
|
||||
|
||||
bool RAGStorage::beginTransaction()
|
||||
@ -296,8 +367,11 @@ bool RAGStorage::rollbackTransaction()
|
||||
bool RAGStorage::storeVector(const QString &filePath, const RAGVector &vector)
|
||||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
qDebug() << "Storing vector for file:" << filePath;
|
||||
|
||||
if (!validateVector(vector)) {
|
||||
auto validation = validateVector(vector);
|
||||
if (!validation.isValid) {
|
||||
setError(validation.errorMessage, validation.errorStatus);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -307,17 +381,31 @@ bool RAGStorage::storeVector(const QString &filePath, const RAGVector &vector)
|
||||
|
||||
QDateTime lastModified = getFileLastModified(filePath);
|
||||
QByteArray blob = vectorToBlob(vector);
|
||||
qDebug() << "Vector converted to blob, size:" << blob.size() << "bytes";
|
||||
|
||||
m_updateVectorQuery.bindValue(":path", filePath);
|
||||
m_updateVectorQuery.bindValue(":vector", blob);
|
||||
m_updateVectorQuery.bindValue(":modified", lastModified);
|
||||
|
||||
if (m_updateVectorQuery.exec()) {
|
||||
if (m_updateVectorQuery.numRowsAffected() > 0) {
|
||||
qDebug() << "Vector updated successfully";
|
||||
return commitTransaction();
|
||||
}
|
||||
}
|
||||
|
||||
m_insertVectorQuery.bindValue(":path", filePath);
|
||||
m_insertVectorQuery.bindValue(":vector", blob);
|
||||
m_insertVectorQuery.bindValue(":modified", lastModified);
|
||||
|
||||
if (!m_insertVectorQuery.exec()) {
|
||||
qDebug() << "Failed to store vector:" << m_insertVectorQuery.lastError().text();
|
||||
rollbackTransaction();
|
||||
setError("Failed to store vector: " + m_insertVectorQuery.lastError().text());
|
||||
return false;
|
||||
}
|
||||
|
||||
qDebug() << "Vector stored successfully";
|
||||
return commitTransaction();
|
||||
}
|
||||
|
||||
@ -325,7 +413,9 @@ bool RAGStorage::updateVector(const QString &filePath, const RAGVector &vector)
|
||||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
|
||||
if (!validateVector(vector)) {
|
||||
auto validation = validateVector(vector);
|
||||
if (!validation.isValid) {
|
||||
setError(validation.errorMessage, validation.errorStatus);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -391,19 +481,42 @@ QDateTime RAGStorage::getFileLastModified(const QString &filePath)
|
||||
|
||||
RAGVector RAGStorage::blobToVector(const QByteArray &blob)
|
||||
{
|
||||
// Реализация конвертации из QByteArray в RAGVector
|
||||
// Зависит от конкретной реализации RAGVector
|
||||
RAGVector vector;
|
||||
// TODO: Implement conversion
|
||||
QDataStream stream(blob);
|
||||
stream.setVersion(QDataStream::Qt_6_0);
|
||||
stream.setFloatingPointPrecision(QDataStream::DoublePrecision);
|
||||
|
||||
qint32 size;
|
||||
stream >> size;
|
||||
|
||||
vector.resize(size);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
double value;
|
||||
stream >> value;
|
||||
vector[i] = value;
|
||||
}
|
||||
|
||||
qDebug() << "Vector restored from blob, size:" << vector.size();
|
||||
|
||||
return vector;
|
||||
}
|
||||
|
||||
QByteArray RAGStorage::vectorToBlob(const RAGVector &vector)
|
||||
{
|
||||
// Реализация конвертации из RAGVector в QByteArray
|
||||
// Зависит от конкретной реализации RAGVector
|
||||
QByteArray blob;
|
||||
// TODO: Implement conversion
|
||||
QDataStream stream(&blob, QIODevice::WriteOnly);
|
||||
stream.setVersion(QDataStream::Qt_6_0);
|
||||
stream.setFloatingPointPrecision(QDataStream::DoublePrecision);
|
||||
|
||||
stream << static_cast<qint32>(vector.size());
|
||||
|
||||
for (double value : vector) {
|
||||
stream << value;
|
||||
}
|
||||
|
||||
qDebug() << "Vector converted to blob, vector size:" << vector.size()
|
||||
<< "blob size:" << blob.size();
|
||||
|
||||
return blob;
|
||||
}
|
||||
|
||||
@ -420,12 +533,12 @@ void RAGStorage::clearError()
|
||||
m_status = Status::Ok;
|
||||
}
|
||||
|
||||
Status RAGStorage::status() const
|
||||
RAGStorage::Status RAGStorage::status() const
|
||||
{
|
||||
return m_status;
|
||||
}
|
||||
|
||||
Error RAGStorage::lastError() const
|
||||
RAGStorage::Error RAGStorage::lastError() const
|
||||
{
|
||||
return m_lastError;
|
||||
}
|
||||
@ -482,7 +595,6 @@ bool RAGStorage::backup(const QString &backupPath)
|
||||
return false;
|
||||
}
|
||||
|
||||
// Создаем резервную копию через SQLite backup API
|
||||
QFile::copy(m_dbPath, backupPath);
|
||||
|
||||
return true;
|
||||
@ -495,7 +607,6 @@ StorageStatistics RAGStorage::getStatistics() const
|
||||
StorageStatistics stats;
|
||||
QSqlQuery query(m_db);
|
||||
|
||||
// Получаем статистику по чанкам
|
||||
if (query.exec("SELECT COUNT(*), SUM(LENGTH(content)) FROM file_chunks")) {
|
||||
if (query.next()) {
|
||||
stats.totalChunks = query.value(0).toInt();
|
||||
@ -503,7 +614,6 @@ StorageStatistics RAGStorage::getStatistics() const
|
||||
}
|
||||
}
|
||||
|
||||
// Получаем статистику по векторам
|
||||
if (query.exec("SELECT COUNT(*) FROM file_vectors")) {
|
||||
if (query.next()) {
|
||||
stats.totalVectors = query.value(0).toInt();
|
||||
@ -518,7 +628,6 @@ StorageStatistics RAGStorage::getStatistics() const
|
||||
}
|
||||
}
|
||||
|
||||
// Получаем время последнего обновления
|
||||
if (query.exec("SELECT MAX(updated_at) FROM ("
|
||||
"SELECT updated_at FROM file_chunks "
|
||||
"UNION "
|
||||
@ -610,7 +719,8 @@ bool RAGStorage::updateChunk(const FileChunkData &chunk)
|
||||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
|
||||
if (!validateChunk(chunk)) {
|
||||
auto validation = validateChunk(chunk);
|
||||
if (!validation.isValid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -641,7 +751,8 @@ bool RAGStorage::updateChunks(const QList<FileChunkData> &chunks)
|
||||
}
|
||||
|
||||
for (const auto &chunk : chunks) {
|
||||
if (!validateChunk(chunk)) {
|
||||
auto validation = validateChunk(chunk);
|
||||
if (!validation.isValid) {
|
||||
rollbackTransaction();
|
||||
return false;
|
||||
}
|
||||
@ -739,14 +850,24 @@ bool RAGStorage::chunkExists(const QString &filePath, int startLine, int endLine
|
||||
|
||||
int RAGStorage::getStorageVersion() const
|
||||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
qDebug() << "Getting storage version...";
|
||||
|
||||
QSqlQuery query(m_db);
|
||||
if (query.exec("SELECT version FROM storage_version ORDER BY id DESC LIMIT 1")) {
|
||||
if (query.next()) {
|
||||
return query.value(0).toInt();
|
||||
}
|
||||
qDebug() << "Created query object";
|
||||
|
||||
if (!query.exec("SELECT version FROM storage_version ORDER BY id DESC LIMIT 1")) {
|
||||
qDebug() << "Failed to execute version query:" << query.lastError().text();
|
||||
return 0;
|
||||
}
|
||||
qDebug() << "Version query executed";
|
||||
|
||||
if (query.next()) {
|
||||
int version = query.value(0).toInt();
|
||||
qDebug() << "Current version:" << version;
|
||||
return version;
|
||||
}
|
||||
|
||||
qDebug() << "No version found, assuming version 0";
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -764,7 +885,6 @@ bool RAGStorage::upgradeStorage(int fromVersion)
|
||||
return false;
|
||||
}
|
||||
|
||||
// Выполняем последовательные миграции от текущей версии до последней
|
||||
for (int version = fromVersion + 1; version <= CURRENT_VERSION; ++version) {
|
||||
if (!applyMigration(version)) {
|
||||
rollbackTransaction();
|
||||
@ -773,7 +893,6 @@ bool RAGStorage::upgradeStorage(int fromVersion)
|
||||
}
|
||||
}
|
||||
|
||||
// Обновляем версию в БД
|
||||
QSqlQuery query(m_db);
|
||||
query.prepare("INSERT INTO storage_version (version) VALUES (:version)");
|
||||
query.bindValue(":version", CURRENT_VERSION);
|
||||
@ -793,15 +912,13 @@ bool RAGStorage::applyMigration(int version)
|
||||
|
||||
switch (version) {
|
||||
case 1:
|
||||
// Миграция на версию 1
|
||||
if (!query.exec("ALTER TABLE file_chunks ADD COLUMN metadata TEXT")) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
|
||||
// Добавляем новые кейсы для будущих версий
|
||||
// case 2:
|
||||
// // Миграция на версию 2
|
||||
// //
|
||||
// break;
|
||||
|
||||
default:
|
||||
@ -816,7 +933,6 @@ bool RAGStorage::validateSchema() const
|
||||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
|
||||
// Проверяем наличие всех необходимых таблиц
|
||||
QStringList requiredTables = {"storage_version", "file_vectors", "file_chunks"};
|
||||
|
||||
QSqlQuery query(m_db);
|
||||
@ -833,7 +949,6 @@ bool RAGStorage::validateSchema() const
|
||||
}
|
||||
}
|
||||
|
||||
// Проверяем структуру таблиц
|
||||
struct ColumnInfo
|
||||
{
|
||||
QString name;
|
||||
@ -875,7 +990,6 @@ bool RAGStorage::validateSchema() const
|
||||
return false;
|
||||
}
|
||||
|
||||
// Проверяем каждую колонку
|
||||
for (int i = 0; i < actualColumns.size(); ++i) {
|
||||
const auto &expected = it.value()[i];
|
||||
const auto &actual = actualColumns[i];
|
||||
@ -894,23 +1008,19 @@ bool RAGStorage::restore(const QString &backupPath)
|
||||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
|
||||
// Закрываем текущее соединение
|
||||
if (m_db.isOpen()) {
|
||||
m_db.close();
|
||||
}
|
||||
|
||||
// Копируем файл бэкапа
|
||||
if (!QFile::remove(m_dbPath) || !QFile::copy(backupPath, m_dbPath)) {
|
||||
setError("Failed to restore from backup");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Переоткрываем БД
|
||||
if (!openDatabase()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Проверяем валидность схемы
|
||||
if (!validateSchema()) {
|
||||
setError("Invalid schema in backup file");
|
||||
return false;
|
||||
|
@ -32,17 +32,14 @@
|
||||
|
||||
namespace QodeAssist::Context {
|
||||
|
||||
/**
|
||||
* @brief Структура для хранения информации о чанке файла
|
||||
*/
|
||||
struct FileChunkData
|
||||
{
|
||||
QString filePath; ///< Путь к файлу
|
||||
int startLine; ///< Начальная строка чанка
|
||||
int endLine; ///< Конечная строка чанка
|
||||
QString content; ///< Содержимое чанка
|
||||
QDateTime createdAt; ///< Время создания
|
||||
QDateTime updatedAt; ///< Время последнего обновления
|
||||
QString filePath;
|
||||
int startLine;
|
||||
int endLine;
|
||||
QString content;
|
||||
QDateTime createdAt;
|
||||
QDateTime updatedAt;
|
||||
|
||||
bool isValid() const
|
||||
{
|
||||
@ -50,32 +47,23 @@ struct FileChunkData
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Структура для настройки хранилища
|
||||
*/
|
||||
struct StorageOptions
|
||||
{
|
||||
int maxChunkSize = 1024 * 1024; ///< Максимальный размер чанка в байтах
|
||||
int maxVectorSize = 1024; ///< Максимальный размер вектора
|
||||
bool useCompression = false; ///< Использовать сжатие данных
|
||||
bool enableLogging = false; ///< Включить журналирование
|
||||
int maxChunkSize = 1024 * 1024;
|
||||
int maxVectorSize = 1024;
|
||||
bool useCompression = false;
|
||||
bool enableLogging = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Структура для хранения статистики
|
||||
*/
|
||||
struct StorageStatistics
|
||||
{
|
||||
int totalChunks; ///< Общее количество чанков
|
||||
int totalVectors; ///< Общее количество векторов
|
||||
int totalFiles; ///< Общее количество файлов
|
||||
qint64 totalSize; ///< Общий размер данных
|
||||
QDateTime lastUpdate; ///< Время последнего обновления
|
||||
int totalChunks;
|
||||
int totalVectors;
|
||||
int totalFiles;
|
||||
qint64 totalSize;
|
||||
QDateTime lastUpdate;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Класс для работы с хранилищем RAG данных
|
||||
*/
|
||||
class RAGStorage : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
@ -85,6 +73,13 @@ public:
|
||||
|
||||
enum class Status { Ok, DatabaseError, ValidationError, VersionError, ConnectionError };
|
||||
|
||||
struct ValidationResult
|
||||
{
|
||||
bool isValid;
|
||||
QString errorMessage;
|
||||
Status errorStatus;
|
||||
};
|
||||
|
||||
struct Error
|
||||
{
|
||||
QString message;
|
||||
@ -99,26 +94,22 @@ public:
|
||||
QObject *parent = nullptr);
|
||||
~RAGStorage();
|
||||
|
||||
// Инициализация и проверка состояния
|
||||
bool init();
|
||||
Status status() const;
|
||||
Error lastError() const;
|
||||
bool isReady() const;
|
||||
QString dbPath() const;
|
||||
|
||||
// Управление транзакциями
|
||||
bool beginTransaction();
|
||||
bool commitTransaction();
|
||||
bool rollbackTransaction();
|
||||
|
||||
// Операции с векторами
|
||||
bool storeVector(const QString &filePath, const RAGVector &vector);
|
||||
bool updateVector(const QString &filePath, const RAGVector &vector);
|
||||
std::optional<RAGVector> getVector(const QString &filePath);
|
||||
bool needsUpdate(const QString &filePath);
|
||||
QStringList getAllFiles();
|
||||
|
||||
// Операции с чанками
|
||||
bool storeChunk(const FileChunkData &chunk);
|
||||
bool storeChunks(const QList<FileChunkData> &chunks);
|
||||
bool updateChunk(const FileChunkData &chunk);
|
||||
@ -128,7 +119,6 @@ public:
|
||||
QList<FileChunkData> getChunksForFile(const QString &filePath);
|
||||
bool chunkExists(const QString &filePath, int startLine, int endLine);
|
||||
|
||||
// Обслуживание
|
||||
int getChunkCount(const QString &filePath);
|
||||
bool deleteOldChunks(const QString &filePath, const QDateTime &olderThan);
|
||||
bool deleteAllChunks();
|
||||
@ -138,10 +128,10 @@ public:
|
||||
bool restore(const QString &backupPath);
|
||||
StorageStatistics getStatistics() const;
|
||||
|
||||
// Версионирование
|
||||
int getStorageVersion() const;
|
||||
bool isVersionCompatible() const;
|
||||
|
||||
bool applyMigration(int version);
|
||||
signals:
|
||||
void errorOccurred(const Error &error);
|
||||
void operationCompleted(const QString &operation);
|
||||
@ -164,8 +154,8 @@ private:
|
||||
void setError(const QString &message, Status status = Status::DatabaseError);
|
||||
void clearError();
|
||||
bool prepareStatements();
|
||||
bool validateChunk(const FileChunkData &chunk) const;
|
||||
bool validateVector(const RAGVector &vector) const;
|
||||
ValidationResult validateChunk(const FileChunkData &chunk) const;
|
||||
ValidationResult validateVector(const RAGVector &vector) const;
|
||||
|
||||
private:
|
||||
QSqlDatabase m_db;
|
||||
@ -175,7 +165,6 @@ private:
|
||||
Error m_lastError;
|
||||
Status m_status;
|
||||
|
||||
// Подготовленные запросы
|
||||
QSqlQuery m_insertChunkQuery;
|
||||
QSqlQuery m_updateChunkQuery;
|
||||
QSqlQuery m_insertVectorQuery;
|
||||
|
@ -45,31 +45,65 @@ QJsonObject RAGVectorizer::prepareEmbeddingRequest(const QString &text) const
|
||||
RAGVector RAGVectorizer::parseEmbeddingResponse(const QByteArray &response) const
|
||||
{
|
||||
QJsonDocument doc = QJsonDocument::fromJson(response);
|
||||
QJsonArray array = doc.object()["embedding"].toArray();
|
||||
if (doc.isNull()) {
|
||||
qDebug() << "Failed to parse JSON response";
|
||||
return RAGVector();
|
||||
}
|
||||
|
||||
QJsonObject obj = doc.object();
|
||||
if (!obj.contains("embedding")) {
|
||||
qDebug() << "Response does not contain 'embedding' field";
|
||||
// qDebug() << "Response content:" << response;
|
||||
return RAGVector();
|
||||
}
|
||||
|
||||
QJsonArray array = obj["embedding"].toArray();
|
||||
if (array.isEmpty()) {
|
||||
qDebug() << "Embedding array is empty";
|
||||
return RAGVector();
|
||||
}
|
||||
|
||||
RAGVector result;
|
||||
result.reserve(array.size());
|
||||
for (const auto &value : array) {
|
||||
result.push_back(value.toDouble());
|
||||
}
|
||||
|
||||
qDebug() << "Successfully parsed vector with size:" << result.size();
|
||||
return result;
|
||||
}
|
||||
|
||||
QFuture<RAGVector> RAGVectorizer::vectorizeText(const QString &text)
|
||||
{
|
||||
qDebug() << "Vectorizing text, length:" << text.length();
|
||||
qDebug() << "Using embedding provider:" << m_embedProviderUrl;
|
||||
|
||||
auto promise = std::make_shared<QPromise<RAGVector>>();
|
||||
promise->start();
|
||||
|
||||
QNetworkRequest request(QUrl(m_embedProviderUrl + "/api/embeddings"));
|
||||
request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
|
||||
|
||||
auto reply = m_network->post(request, QJsonDocument(prepareEmbeddingRequest(text)).toJson());
|
||||
QJsonObject requestData = prepareEmbeddingRequest(text);
|
||||
QByteArray jsonData = QJsonDocument(requestData).toJson();
|
||||
qDebug() << "Sending request to embeddings API:" << jsonData;
|
||||
|
||||
auto reply = m_network->post(request, jsonData);
|
||||
|
||||
connect(reply, &QNetworkReply::finished, this, [promise, reply, this]() {
|
||||
if (reply->error() == QNetworkReply::NoError) {
|
||||
promise->addResult(parseEmbeddingResponse(reply->readAll()));
|
||||
QByteArray response = reply->readAll();
|
||||
// qDebug() << "Received response from embeddings API:" << response;
|
||||
|
||||
auto vector = parseEmbeddingResponse(response);
|
||||
qDebug() << "Parsed vector size:" << vector.size();
|
||||
promise->addResult(vector);
|
||||
} else {
|
||||
// TODO check error setException
|
||||
qDebug() << "Network error:" << reply->errorString();
|
||||
qDebug() << "HTTP status code:"
|
||||
<< reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt();
|
||||
qDebug() << "Response:" << reply->readAll();
|
||||
|
||||
promise->addResult(RAGVector());
|
||||
}
|
||||
promise->finish();
|
||||
|
Loading…
Reference in New Issue
Block a user