feat: Add using vector and chunks in Context Manager

This commit is contained in:
Petr Mironychev 2025-02-09 12:11:23 +01:00
parent f36db033e6
commit 142afa725f
9 changed files with 558 additions and 385 deletions

View File

@ -450,6 +450,8 @@ void ChatRootView::openChatHistoryFolder()
QDesktopServices::openUrl(url); QDesktopServices::openUrl(url);
} }
// ChatRootView.cpp
void ChatRootView::testRAG(const QString &message) void ChatRootView::testRAG(const QString &message)
{ {
auto project = ProjectExplorer::ProjectTree::currentProject(); auto project = ProjectExplorer::ProjectTree::currentProject();
@ -465,7 +467,9 @@ void ChatRootView::testRAG(const QString &message)
qDebug() << "\nFirst, processing project files..."; qDebug() << "\nFirst, processing project files...";
auto files = Context::ContextManager::instance().getProjectSourceFiles(project); auto files = Context::ContextManager::instance().getProjectSourceFiles(project);
auto future = Context::RAGManager::instance().processFiles(project, files); // Было: auto future = Context::RAGManager::instance().processFiles(project, files);
// Стало:
auto future = Context::RAGManager::instance().processProjectFiles(project, files);
connect( connect(
&Context::RAGManager::instance(), &Context::RAGManager::instance(),
@ -481,7 +485,19 @@ void ChatRootView::testRAG(const QString &message)
this, this,
[this, project, TEST_QUERY]() { [this, project, TEST_QUERY]() {
qDebug() << "\nVectorization completed. Starting similarity search...\n"; qDebug() << "\nVectorization completed. Starting similarity search...\n";
Context::RAGManager::instance().searchSimilarDocuments(TEST_QUERY, project, 5); // Было: Context::RAGManager::instance().searchSimilarDocuments(TEST_QUERY, project, 5);
// Стало:
auto future = Context::RAGManager::instance().findRelevantChunks(TEST_QUERY, project, 5);
future.then([](const QList<Context::RAGManager::ChunkSearchResult> &results) {
qDebug() << "Found" << results.size() << "relevant chunks:";
for (const auto &result : results) {
qDebug() << "File:" << result.filePath;
qDebug() << "Lines:" << result.startLine << "-" << result.endLine;
qDebug() << "Score:" << result.combinedScore;
qDebug() << "Content:" << result.content;
qDebug() << "---";
}
});
}); });
} }

View File

@ -29,13 +29,13 @@ class FileChunker : public QObject
public: public:
struct ChunkingConfig struct ChunkingConfig
{ {
int maxLinesPerChunk = 80; // Размер чанка (было 200) int maxLinesPerChunk = 80;
int minLinesPerChunk = 40; // Минимальный размер для начала чанкинга int minLinesPerChunk = 40;
int overlapLines = 20; // Перекрытие между чанками int overlapLines = 20;
bool skipEmptyLines = true; // Пропускать пустые строки bool skipEmptyLines = true;
bool preserveFunctions = true; // Сохранять функции целиком bool preserveFunctions = true;
bool preserveClasses = true; // Сохранять классы целиком bool preserveClasses = true;
int batchSize = 10; // Количество файлов для параллельной обработки int batchSize = 10;
}; };
explicit FileChunker(QObject *parent = nullptr); explicit FileChunker(QObject *parent = nullptr);

View File

@ -24,8 +24,8 @@
namespace QodeAssist::Context { namespace QodeAssist::Context {
enum class ProgrammingLanguage { enum class ProgrammingLanguage {
QML, // QML/JavaScript QML,
Cpp, // C/C++ Cpp,
Python, Python,
Unknown, Unknown,
}; };

View File

@ -18,6 +18,8 @@
*/ */
#include "RAGManager.hpp" #include "RAGManager.hpp"
#include "EnhancedRAGSimilaritySearch.hpp"
#include "RAGPreprocessor.hpp"
#include "RAGSimilaritySearch.hpp" #include "RAGSimilaritySearch.hpp"
#include "logger/Logger.hpp" #include "logger/Logger.hpp"
@ -25,10 +27,6 @@
#include <projectexplorer/project.h> #include <projectexplorer/project.h>
#include <QFile> #include <QFile>
#include <QtConcurrent> #include <QtConcurrent>
#include <queue>
#include <EnhancedRAGSimilaritySearch.hpp>
#include <RAGPreprocessor.hpp>
namespace QodeAssist::Context { namespace QodeAssist::Context {
@ -45,13 +43,6 @@ RAGManager::RAGManager(QObject *parent)
RAGManager::~RAGManager() {} RAGManager::~RAGManager() {}
// bool RAGManager::SearchResult::operator<(const SearchResult &other) const
// {
// if (cosineScore != other.cosineScore)
// return cosineScore > other.cosineScore;
// return l2Score < other.l2Score;
// }
QString RAGManager::getStoragePath(ProjectExplorer::Project *project) const QString RAGManager::getStoragePath(ProjectExplorer::Project *project) const
{ {
return QString("%1/qodeassist/%2/rag/vectors.db") return QString("%1/qodeassist/%2/rag/vectors.db")
@ -78,50 +69,61 @@ std::optional<QString> RAGManager::loadFileContent(const QString &filePath)
return content; return content;
} }
void RAGManager::ensureStorageForProject(ProjectExplorer::Project *project) void RAGManager::ensureStorageForProject(ProjectExplorer::Project *project) const
{ {
qDebug() << "Ensuring storage for project:" << project->displayName();
if (m_currentProject == project && m_currentStorage) { if (m_currentProject == project && m_currentStorage) {
qDebug() << "Using existing storage";
return; return;
} }
qDebug() << "Creating new storage";
m_currentStorage.reset(); m_currentStorage.reset();
m_currentProject = project; m_currentProject = project;
if (project) { if (project) {
m_currentStorage = std::make_unique<RAGStorage>(getStoragePath(project), this); QString storagePath = getStoragePath(project);
qDebug() << "Storage path:" << storagePath;
StorageOptions options;
m_currentStorage = std::make_unique<RAGStorage>(storagePath, options);
qDebug() << "Initializing storage...";
if (!m_currentStorage->init()) { if (!m_currentStorage->init()) {
qDebug() << "Failed to initialize storage"; qDebug() << "Failed to initialize storage";
m_currentStorage.reset(); m_currentStorage.reset();
return; return;
} }
qDebug() << "Storage initialized successfully";
if (!m_currentStorage->isVersionCompatible()) {
qDebug() << "Storage version is incompatible, needs rebuild";
// todo recreate db or show error
}
} }
} }
QFuture<void> RAGManager::processFiles( QFuture<void> RAGManager::processProjectFiles(
ProjectExplorer::Project *project, const QStringList &filePaths) ProjectExplorer::Project *project,
const QStringList &filePaths,
const FileChunker::ChunkingConfig &config)
{ {
qDebug() << "Starting batch processing of" << filePaths.size() qDebug() << "\nStarting batch processing of" << filePaths.size()
<< "files for project:" << project->displayName(); << "files for project:" << project->displayName();
auto promise = std::make_shared<QPromise<void>>(); auto promise = std::make_shared<QPromise<void>>();
promise->start(); promise->start();
qDebug() << "Initializing storage...";
ensureStorageForProject(project); ensureStorageForProject(project);
if (!m_currentStorage) { if (!m_currentStorage) {
qDebug() << "Failed to initialize storage for project:" << project->displayName(); qDebug() << "Failed to initialize storage for project:" << project->displayName();
promise->finish(); promise->finish();
return promise->future(); return promise->future();
} }
qDebug() << "Storage initialized successfully";
const int batchSize = 10; qDebug() << "Checking files for processing...";
QSet<QString> uniqueFiles; QSet<QString> uniqueFiles;
for (const QString &filePath : filePaths) { for (const QString &filePath : filePaths) {
qDebug() << "Checking file:" << filePath;
if (isFileStorageOutdated(project, filePath)) { if (isFileStorageOutdated(project, filePath)) {
qDebug() << "File needs processing:" << filePath; qDebug() << "File needs processing:" << filePath;
uniqueFiles.insert(filePath); uniqueFiles.insert(filePath);
@ -137,103 +139,23 @@ QFuture<void> RAGManager::processFiles(
return promise->future(); return promise->future();
} }
qDebug() << "Processing" << filesToProcess.size() << "files in batches of" << batchSize; qDebug() << "Starting to process" << filesToProcess.size() << "files";
const int batchSize = 10;
processNextBatch(promise, project, filesToProcess, 0, batchSize); processNextFileBatch(promise, project, filesToProcess, config, 0, batchSize);
return promise->future(); return promise->future();
} }
void RAGManager::searchSimilarDocuments( void RAGManager::processNextFileBatch(
const QString &text, ProjectExplorer::Project *project, int topK)
{
qDebug() << "\nStarting similarity search with parameters:";
qDebug() << "Query length:" << text.length();
qDebug() << "Project:" << project->displayName();
qDebug() << "Top K:" << topK;
QString processedText = RAGPreprocessor::preprocessCode(text);
qDebug() << "Preprocessed query length:" << processedText.length();
auto future = m_vectorizer->vectorizeText(processedText);
qDebug() << "Started query vectorization";
future.then([this, project, processedText, topK, text](const RAGVector &queryVector) {
if (queryVector.empty()) {
qDebug() << "ERROR: Query vectorization failed - empty vector";
return;
}
qDebug() << "Query vector generated, size:" << queryVector.size();
auto storedFiles = getStoredFiles(project);
qDebug() << "Found" << storedFiles.size() << "stored files to compare";
QList<SearchResult> results;
results.reserve(storedFiles.size());
int processedFiles = 0;
int skippedFiles = 0;
for (const auto &filePath : storedFiles) {
auto storedCode = loadFileContent(filePath);
if (!storedCode.has_value()) {
qDebug() << "ERROR: Failed to load content for file:" << filePath;
skippedFiles++;
continue;
}
auto storedVector = loadVectorFromStorage(project, filePath);
if (!storedVector.has_value()) {
qDebug() << "ERROR: Failed to load vector for file:" << filePath;
skippedFiles++;
continue;
}
QString processedStoredCode = RAGPreprocessor::preprocessCode(storedCode.value());
auto similarity = EnhancedRAGSimilaritySearch::calculateSimilarity(
queryVector, storedVector.value(), processedText, processedStoredCode);
results.append(
{filePath,
similarity.semantic_similarity,
similarity.structural_similarity,
similarity.combined_score});
processedFiles++;
if (processedFiles % 100 == 0) {
qDebug() << "Processed" << processedFiles << "files...";
}
}
qDebug() << "\nSearch statistics:";
qDebug() << "Total files processed:" << processedFiles;
qDebug() << "Files skipped:" << skippedFiles;
qDebug() << "Total results before filtering:" << results.size();
if (results.size() > topK) {
qDebug() << "Performing partial sort for top" << topK << "results";
std::partial_sort(results.begin(), results.begin() + topK, results.end());
results = results.mid(0, topK);
} else {
qDebug() << "Performing full sort for" << results.size() << "results";
std::sort(results.begin(), results.end());
}
qDebug() << "Sorting completed, logging final results...";
logSearchResults(results);
});
}
void RAGManager::processNextBatch(
std::shared_ptr<QPromise<void>> promise, std::shared_ptr<QPromise<void>> promise,
ProjectExplorer::Project *project, ProjectExplorer::Project *project,
const QStringList &files, const QStringList &files,
const FileChunker::ChunkingConfig &config,
int startIndex, int startIndex,
int batchSize) int batchSize)
{ {
if (startIndex >= files.size()) { if (startIndex >= files.size()) {
qDebug() << "All batches processed"; qDebug() << "All batches processed successfully";
emit vectorizationFinished(); emit vectorizationFinished();
promise->finish(); promise->finish();
return; return;
@ -242,12 +164,13 @@ void RAGManager::processNextBatch(
int endIndex = qMin(startIndex + batchSize, files.size()); int endIndex = qMin(startIndex + batchSize, files.size());
auto currentBatch = files.mid(startIndex, endIndex - startIndex); auto currentBatch = files.mid(startIndex, endIndex - startIndex);
qDebug() << "Processing batch" << startIndex / batchSize + 1 << "files" << startIndex << "to" qDebug() << "\nProcessing batch" << (startIndex / batchSize + 1) << "(" << currentBatch.size()
<< endIndex; << "files)"
<< "\nProgress:" << startIndex << "to" << endIndex << "of" << files.size();
for (const QString &filePath : currentBatch) { for (const QString &filePath : currentBatch) {
qDebug() << "Starting processing of file:" << filePath; qDebug() << "Starting processing file:" << filePath;
auto future = processFile(project, filePath); auto future = processFileWithChunks(project, filePath, config);
auto watcher = new QFutureWatcher<bool>; auto watcher = new QFutureWatcher<bool>;
watcher->setFuture(future); watcher->setFuture(future);
@ -255,7 +178,16 @@ void RAGManager::processNextBatch(
watcher, watcher,
&QFutureWatcher<bool>::finished, &QFutureWatcher<bool>::finished,
this, this,
[this, watcher, promise, project, files, startIndex, endIndex, batchSize, filePath]() { [this,
watcher,
promise,
project,
files,
startIndex,
endIndex,
batchSize,
config,
filePath]() {
bool success = watcher->result(); bool success = watcher->result();
qDebug() << "File processed:" << filePath << "success:" << success; qDebug() << "File processed:" << filePath << "success:" << success;
@ -263,7 +195,7 @@ void RAGManager::processNextBatch(
if (isLastFileInBatch) { if (isLastFileInBatch) {
qDebug() << "Batch completed, moving to next batch"; qDebug() << "Batch completed, moving to next batch";
emit vectorizationProgress(endIndex, files.size()); emit vectorizationProgress(endIndex, files.size());
processNextBatch(promise, project, files, endIndex, batchSize); processNextFileBatch(promise, project, files, config, endIndex, batchSize);
} }
watcher->deleteLater(); watcher->deleteLater();
@ -271,61 +203,223 @@ void RAGManager::processNextBatch(
} }
} }
QFuture<bool> RAGManager::processFile(ProjectExplorer::Project *project, const QString &filePath) QFuture<bool> RAGManager::processFileWithChunks(
ProjectExplorer::Project *project,
const QString &filePath,
const FileChunker::ChunkingConfig &config)
{ {
qDebug() << "Starting to process file:" << filePath;
auto promise = std::make_shared<QPromise<bool>>(); auto promise = std::make_shared<QPromise<bool>>();
promise->start(); promise->start();
ensureStorageForProject(project); ensureStorageForProject(project);
if (!m_currentStorage) { if (!m_currentStorage) {
qDebug() << "ERROR: Storage not initialized for project" << project->displayName(); qDebug() << "Storage not initialized for file:" << filePath;
promise->addResult(false); promise->addResult(false);
promise->finish(); promise->finish();
return promise->future(); return promise->future();
} }
QFile file(filePath); auto fileContent = loadFileContent(filePath);
if (!file.open(QIODevice::ReadOnly)) { if (!fileContent) {
qDebug() << "ERROR: Failed to open file for reading:" << filePath; qDebug() << "Failed to load content for file:" << filePath;
promise->addResult(false); promise->addResult(false);
promise->finish(); promise->finish();
return promise->future(); return promise->future();
} }
QFileInfo fileInfo(filePath); qDebug() << "Creating chunks for file:" << filePath;
QString fileName = fileInfo.fileName(); auto chunksFuture = m_chunker.chunkFiles({filePath});
QString content = QString::fromUtf8(file.readAll()); auto chunks = chunksFuture.result();
qDebug() << "File" << fileName << "read, content size:" << content.size() << "bytes"; if (chunks.isEmpty()) {
qDebug() << "No chunks created for file:" << filePath;
QString processedContent = RAGPreprocessor::preprocessCode(content);
qDebug() << "Preprocessed content size:" << processedContent.size() << "bytes";
auto vectorFuture = m_vectorizer->vectorizeText(processedContent);
qDebug() << "Started vectorization for file:" << fileName;
vectorFuture.then([promise, filePath, fileName, this](const RAGVector &vector) {
if (vector.empty()) {
qDebug() << "ERROR: Vectorization failed for file:" << fileName << "- empty vector";
promise->addResult(false); promise->addResult(false);
} else {
qDebug() << "Vector generated for file:" << fileName << "size:" << vector.size();
bool success = m_currentStorage->storeVector(filePath, vector);
if (!success) {
qDebug() << "ERROR: Failed to store vector for file:" << fileName;
} else {
qDebug() << "Successfully stored vector for file:" << fileName;
}
promise->addResult(success);
}
promise->finish(); promise->finish();
return promise->future();
}
qDebug() << "Created" << chunks.size() << "chunks for file:" << filePath;
// Преобразуем FileChunk в FileChunkData
QList<FileChunkData> chunkData;
for (const auto &chunk : chunks) {
FileChunkData data;
data.filePath = chunk.filePath;
data.startLine = chunk.startLine;
data.endLine = chunk.endLine;
data.content = chunk.content;
chunkData.append(data);
}
qDebug() << "Deleting old chunks for file:" << filePath;
m_currentStorage->deleteChunksForFile(filePath);
auto vectorizeFuture = vectorizeAndStoreChunks(filePath, chunkData);
auto watcher = new QFutureWatcher<void>;
watcher->setFuture(vectorizeFuture);
connect(watcher, &QFutureWatcher<void>::finished, this, [promise, watcher, filePath]() {
qDebug() << "Completed processing file:" << filePath;
promise->addResult(true);
promise->finish();
watcher->deleteLater();
}); });
return promise->future(); return promise->future();
} }
QFuture<void> RAGManager::vectorizeAndStoreChunks(
const QString &filePath, const QList<FileChunkData> &chunks)
{
qDebug() << "Vectorizing and storing" << chunks.size() << "chunks for file:" << filePath;
auto promise = std::make_shared<QPromise<void>>();
promise->start();
// Обрабатываем чанки последовательно
processNextChunk(promise, chunks, 0);
return promise->future();
}
void RAGManager::processNextChunk(
std::shared_ptr<QPromise<void>> promise, const QList<FileChunkData> &chunks, int currentIndex)
{
if (currentIndex >= chunks.size()) {
promise->finish();
return;
}
const auto &chunk = chunks[currentIndex];
QString processedContent = RAGPreprocessor::preprocessCode(chunk.content);
qDebug() << "Processing chunk" << currentIndex + 1 << "of" << chunks.size();
auto vectorFuture = m_vectorizer->vectorizeText(processedContent);
auto watcher = new QFutureWatcher<RAGVector>;
watcher->setFuture(vectorFuture);
connect(
watcher,
&QFutureWatcher<RAGVector>::finished,
this,
[this, watcher, promise, chunks, currentIndex, chunk]() {
auto vector = watcher->result();
if (!vector.empty()) {
qDebug() << "Storing vector and chunk for file:" << chunk.filePath;
bool vectorStored = m_currentStorage->storeVector(chunk.filePath, vector);
bool chunkStored = m_currentStorage->storeChunk(chunk);
qDebug() << "Storage results - Vector:" << vectorStored << "Chunk:" << chunkStored;
} else {
qDebug() << "Failed to vectorize chunk content";
}
processNextChunk(promise, chunks, currentIndex + 1);
watcher->deleteLater();
});
}
QFuture<QList<RAGManager::ChunkSearchResult>> RAGManager::findRelevantChunks(
const QString &query, ProjectExplorer::Project *project, int topK)
{
auto promise = std::make_shared<QPromise<QList<ChunkSearchResult>>>();
promise->start();
ensureStorageForProject(project);
if (!m_currentStorage) {
qDebug() << "Storage not initialized for project:" << project->displayName();
promise->addResult({});
promise->finish();
return promise->future();
}
QString processedQuery = RAGPreprocessor::preprocessCode(query);
auto vectorFuture = m_vectorizer->vectorizeText(processedQuery);
vectorFuture.then([this, promise, project, processedQuery, topK](const RAGVector &queryVector) {
if (queryVector.empty()) {
qDebug() << "Failed to vectorize query";
promise->addResult({});
promise->finish();
return;
}
auto files = m_currentStorage->getFilesWithChunks();
QList<FileChunkData> allChunks;
for (const auto &filePath : files) {
auto fileChunks = m_currentStorage->getChunksForFile(filePath);
allChunks.append(fileChunks);
}
auto results = rankChunks(queryVector, processedQuery, allChunks);
if (results.size() > topK) {
results = results.mid(0, topK);
}
qDebug() << "Found" << results.size() << "relevant chunks";
promise->addResult(results);
promise->finish();
closeStorage();
});
return promise->future();
}
QList<RAGManager::ChunkSearchResult> RAGManager::rankChunks(
const RAGVector &queryVector, const QString &queryText, const QList<FileChunkData> &chunks)
{
QList<ChunkSearchResult> results;
results.reserve(chunks.size());
for (const auto &chunk : chunks) {
auto chunkVector = m_currentStorage->getVector(chunk.filePath);
if (!chunkVector.has_value()) {
continue;
}
QString processedChunk = RAGPreprocessor::preprocessCode(chunk.content);
auto similarity = EnhancedRAGSimilaritySearch::calculateSimilarity(
queryVector, chunkVector.value(), queryText, processedChunk);
results.append(ChunkSearchResult{
chunk.filePath,
chunk.startLine,
chunk.endLine,
chunk.content,
similarity.semantic_similarity,
similarity.structural_similarity,
similarity.combined_score});
}
std::sort(results.begin(), results.end());
return results;
}
QStringList RAGManager::getStoredFiles(ProjectExplorer::Project *project) const
{
ensureStorageForProject(project);
if (!m_currentStorage) {
return {};
}
return m_currentStorage->getAllFiles();
}
bool RAGManager::isFileStorageOutdated(
ProjectExplorer::Project *project, const QString &filePath) const
{
ensureStorageForProject(project);
if (!m_currentStorage) {
return true;
}
return m_currentStorage->needsUpdate(filePath);
}
std::optional<RAGVector> RAGManager::loadVectorFromStorage( std::optional<RAGVector> RAGManager::loadVectorFromStorage(
ProjectExplorer::Project *project, const QString &filePath) ProjectExplorer::Project *project, const QString &filePath)
{ {
@ -336,105 +430,14 @@ std::optional<RAGVector> RAGManager::loadVectorFromStorage(
return m_currentStorage->getVector(filePath); return m_currentStorage->getVector(filePath);
} }
QStringList RAGManager::getStoredFiles(ProjectExplorer::Project *project) const void RAGManager::closeStorage()
{ {
if (m_currentProject != project || !m_currentStorage) { qDebug() << "Closing storage...";
auto tempStorage = RAGStorage(getStoragePath(project), nullptr); if (m_currentStorage) {
if (!tempStorage.init()) { m_currentStorage.reset();
return {}; m_currentProject = nullptr;
qDebug() << "Storage closed";
} }
return tempStorage.getAllFiles();
}
return m_currentStorage->getAllFiles();
}
bool RAGManager::isFileStorageOutdated(
ProjectExplorer::Project *project, const QString &filePath) const
{
if (m_currentProject != project || !m_currentStorage) {
auto tempStorage = RAGStorage(getStoragePath(project), nullptr);
if (!tempStorage.init()) {
return true;
}
return tempStorage.needsUpdate(filePath);
}
return m_currentStorage->needsUpdate(filePath);
}
QFuture<QList<RAGManager::SearchResult>> RAGManager::search(
const QString &text, ProjectExplorer::Project *project, int topK)
{
auto promise = std::make_shared<QPromise<QList<SearchResult>>>();
promise->start();
auto queryVectorFuture = m_vectorizer->vectorizeText(text);
queryVectorFuture.then([this, promise, project, topK](const RAGVector &queryVector) {
if (queryVector.empty()) {
LOG_MESSAGE("Failed to vectorize query text");
promise->addResult(QList<SearchResult>());
promise->finish();
return;
}
auto storedFiles = getStoredFiles(project);
std::priority_queue<SearchResult> results;
for (const auto &filePath : storedFiles) {
auto storedVector = loadVectorFromStorage(project, filePath);
if (!storedVector.has_value())
continue;
float l2Score = RAGSimilaritySearch::l2Distance(queryVector, storedVector.value());
float cosineScore
= RAGSimilaritySearch::cosineSimilarity(queryVector, storedVector.value());
results.push(SearchResult{filePath, l2Score, cosineScore});
}
QList<SearchResult> resultsList;
int count = 0;
while (!results.empty() && count < topK) {
resultsList.append(results.top());
results.pop();
count++;
}
promise->addResult(resultsList);
promise->finish();
});
return promise->future();
}
// void RAGManager::searchSimilarDocuments(
// const QString &text, ProjectExplorer::Project *project, int topK)
// {
// auto future = search(text, project, topK);
// future.then([this](const QList<SearchResult> &results) { logSearchResults(results); });
// }
void RAGManager::logSearchResults(const QList<SearchResult> &results) const
{
qDebug() << "\n=== Search Results ===";
qDebug() << "Number of results:" << results.size();
if (results.empty()) {
qDebug() << "No similar documents found.";
return;
}
for (int i = 0; i < results.size(); ++i) {
const auto &result = results[i];
QFileInfo fileInfo(result.filePath);
qDebug() << "\nResult #" << (i + 1);
qDebug() << "File:" << fileInfo.fileName();
qDebug() << "Full path:" << result.filePath;
qDebug() << "Semantic similarity:" << QString::number(result.semantic_similarity, 'f', 4);
qDebug() << "Structural similarity:"
<< QString::number(result.structural_similarity, 'f', 4);
qDebug() << "Combined score:" << QString::number(result.combined_score, 'f', 4);
}
qDebug() << "\n=== End of Results ===\n";
} }
} // namespace QodeAssist::Context } // namespace QodeAssist::Context

View File

@ -20,13 +20,14 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <optional>
#include <QFuture> #include <QFuture>
#include <QObject> #include <QObject>
#include <QString>
#include "FileChunker.hpp"
#include "RAGData.hpp"
#include "RAGStorage.hpp" #include "RAGStorage.hpp"
#include "RAGVectorizer.hpp" #include "RAGVectorizer.hpp"
#include <RAGData.hpp>
namespace ProjectExplorer { namespace ProjectExplorer {
class Project; class Project;
@ -37,60 +38,82 @@ namespace QodeAssist::Context {
class RAGManager : public QObject class RAGManager : public QObject
{ {
Q_OBJECT Q_OBJECT
public:
static RAGManager &instance();
struct SearchResult public:
struct ChunkSearchResult
{ {
QString filePath; QString filePath;
float semantic_similarity; int startLine;
float structural_similarity; int endLine;
float combined_score; QString content;
float semanticScore;
float structuralScore;
float combinedScore;
bool operator<(const SearchResult &other) const bool operator<(const ChunkSearchResult &other) const
{ {
return combined_score > other.combined_score; return combinedScore > other.combinedScore;
} }
}; };
// Process and vectorize files static RAGManager &instance();
QFuture<void> processFiles(ProjectExplorer::Project *project, const QStringList &filePaths);
std::optional<RAGVector> loadVectorFromStorage( QFuture<void> processProjectFiles(
ProjectExplorer::Project *project, const QString &filePath); ProjectExplorer::Project *project,
const QStringList &filePaths,
const FileChunker::ChunkingConfig &config = FileChunker::ChunkingConfig());
QFuture<QList<ChunkSearchResult>> findRelevantChunks(
const QString &query, ProjectExplorer::Project *project, int topK = 5);
QStringList getStoredFiles(ProjectExplorer::Project *project) const; QStringList getStoredFiles(ProjectExplorer::Project *project) const;
bool isFileStorageOutdated(ProjectExplorer::Project *project, const QString &filePath) const; bool isFileStorageOutdated(ProjectExplorer::Project *project, const QString &filePath) const;
RAGVectorizer *getVectorizer() const { return m_vectorizer.get(); }
// Search functionality
QFuture<QList<SearchResult>> search(
const QString &text, ProjectExplorer::Project *project, int topK = 5);
void searchSimilarDocuments(const QString &text, ProjectExplorer::Project *project, int topK = 5);
void logSearchResults(const QList<SearchResult> &results) const;
void processNextChunk(
std::shared_ptr<QPromise<void>> promise,
const QList<FileChunkData> &chunks,
int currentIndex);
void closeStorage();
signals: signals:
void vectorizationProgress(int processed, int total); void vectorizationProgress(int processed, int total);
void vectorizationFinished(); void vectorizationFinished();
private: private:
RAGManager(QObject *parent = nullptr); explicit RAGManager(QObject *parent = nullptr);
~RAGManager(); ~RAGManager();
RAGManager(const RAGManager &) = delete; RAGManager(const RAGManager &) = delete;
RAGManager &operator=(const RAGManager &) = delete; RAGManager &operator=(const RAGManager &) = delete;
QFuture<bool> processFile(ProjectExplorer::Project *project, const QString &filePath); QString getStoragePath(ProjectExplorer::Project *project) const;
void processNextBatch( void ensureStorageForProject(ProjectExplorer::Project *project) const;
std::optional<QString> loadFileContent(const QString &filePath);
std::optional<RAGVector> loadVectorFromStorage(
ProjectExplorer::Project *project, const QString &filePath);
void processNextFileBatch(
std::shared_ptr<QPromise<void>> promise, std::shared_ptr<QPromise<void>> promise,
ProjectExplorer::Project *project, ProjectExplorer::Project *project,
const QStringList &files, const QStringList &files,
const FileChunker::ChunkingConfig &config,
int startIndex, int startIndex,
int batchSize); int batchSize);
void ensureStorageForProject(ProjectExplorer::Project *project);
QString getStoragePath(ProjectExplorer::Project *project) const;
std::unique_ptr<RAGVectorizer> m_vectorizer; QFuture<bool> processFileWithChunks(
std::unique_ptr<RAGStorage> m_currentStorage; ProjectExplorer::Project *project,
ProjectExplorer::Project *m_currentProject{nullptr}; const QString &filePath,
std::optional<QString> loadFileContent(const QString &filePath); const FileChunker::ChunkingConfig &config);
QFuture<void> vectorizeAndStoreChunks(
const QString &filePath, const QList<FileChunkData> &chunks);
QList<ChunkSearchResult> rankChunks(
const RAGVector &queryVector, const QString &queryText, const QList<FileChunkData> &chunks);
private:
mutable std::unique_ptr<RAGVectorizer> m_vectorizer;
mutable std::unique_ptr<RAGStorage> m_currentStorage;
mutable ProjectExplorer::Project *m_currentProject{nullptr};
FileChunker m_chunker;
}; };
} // namespace QodeAssist::Context } // namespace QodeAssist::Context

View File

@ -30,19 +30,18 @@ public:
} }
try { try {
// Прямое разделение без промежуточной копии
QStringList lines = code.split('\n', Qt::SkipEmptyParts); QStringList lines = code.split('\n', Qt::SkipEmptyParts);
return processLines(lines); return processLines(lines);
} catch (const std::exception &e) { } catch (const std::exception &e) {
LOG_MESSAGE(QString("Error preprocessing code: %1").arg(e.what())); LOG_MESSAGE(QString("Error preprocessing code: %1").arg(e.what()));
return code; // Возвращаем оригинальный код в случае ошибки return code;
} }
} }
private: private:
static QString processLines(const QStringList &lines) static QString processLines(const QStringList &lines)
{ {
const int estimatedAvgLength = 80; // Примерная средняя длина строки const int estimatedAvgLength = 80;
QString result; QString result;
result.reserve(lines.size() * estimatedAvgLength); result.reserve(lines.size() * estimatedAvgLength);
@ -54,7 +53,6 @@ private:
} }
} }
// Убираем последний перенос строки, если он есть
if (result.endsWith('\n')) { if (result.endsWith('\n')) {
result.chop(1); result.chop(1);
} }

View File

@ -20,10 +20,12 @@
// RAGStorage.cpp // RAGStorage.cpp
#include "RAGStorage.hpp" #include "RAGStorage.hpp"
#include <QDebug> #include <QDebug>
#include <QDir>
#include <QFile> #include <QFile>
#include <QFileInfo> #include <QFileInfo>
#include <QSqlError> #include <QSqlError>
#include <QSqlQuery> #include <QSqlQuery>
#include <QUuid>
namespace QodeAssist::Context { namespace QodeAssist::Context {
@ -45,43 +47,61 @@ RAGStorage::~RAGStorage()
bool RAGStorage::init() bool RAGStorage::init()
{ {
QMutexLocker locker(&m_mutex); QMutexLocker locker(&m_mutex);
qDebug() << "Initializing RAGStorage at path:" << m_dbPath;
if (!openDatabase()) { if (!openDatabase()) {
qDebug() << "Failed to open database";
return false; return false;
} }
qDebug() << "Database opened successfully";
if (!createTables()) { if (!createTables()) {
qDebug() << "Failed to create tables";
return false; return false;
} }
qDebug() << "Tables created successfully";
if (!createIndices()) { if (!createIndices()) {
qDebug() << "Failed to create indices";
return false; return false;
} }
qDebug() << "Indices created successfully";
int version = getStorageVersion(); int version = getStorageVersion();
qDebug() << "Current storage version:" << version;
if (version < CURRENT_VERSION) { if (version < CURRENT_VERSION) {
qDebug() << "Upgrading storage from version" << version << "to" << CURRENT_VERSION;
if (!upgradeStorage(version)) { if (!upgradeStorage(version)) {
qDebug() << "Failed to upgrade storage";
return false; return false;
} }
qDebug() << "Storage upgraded successfully";
} }
if (!prepareStatements()) { if (!prepareStatements()) {
qDebug() << "Failed to prepare statements";
return false; return false;
} }
qDebug() << "Statements prepared successfully";
m_status = Status::Ok; m_status = Status::Ok;
qDebug() << "RAGStorage initialized successfully";
return true; return true;
} }
bool RAGStorage::openDatabase() bool RAGStorage::openDatabase()
{ {
qDebug() << "Opening database at:" << m_dbPath;
QDir dir(QFileInfo(m_dbPath).absolutePath()); QDir dir(QFileInfo(m_dbPath).absolutePath());
if (!dir.exists() && !dir.mkpath(".")) { if (!dir.exists() && !dir.mkpath(".")) {
setError("Failed to create database directory", Status::DatabaseError); setError("Failed to create database directory", Status::DatabaseError);
return false; return false;
} }
m_db = QSqlDatabase::addDatabase("QSQLITE", "rag_storage"); QString connectionName = QString("rag_storage_%1").arg(QUuid::createUuid().toString());
m_db = QSqlDatabase::addDatabase("QSQLITE", connectionName);
m_db.setDatabaseName(m_dbPath); m_db.setDatabaseName(m_dbPath);
if (!m_db.open()) { if (!m_db.open()) {
@ -89,14 +109,41 @@ bool RAGStorage::openDatabase()
return false; return false;
} }
QSqlQuery query(m_db);
if (!query.exec("PRAGMA journal_mode=WAL")) {
qDebug() << "Failed to set journal mode:" << query.lastError().text();
}
if (!query.exec("PRAGMA synchronous=NORMAL")) {
qDebug() << "Failed to set synchronous mode:" << query.lastError().text();
}
qDebug() << "Database opened successfully";
return true; return true;
} }
bool RAGStorage::createTables() bool RAGStorage::createTables()
{ {
if (!createVersionTable() || !createVectorsTable() || !createChunksTable()) { qDebug() << "Creating tables...";
if (!createVersionTable()) {
qDebug() << "Failed to create version table";
return false; return false;
} }
qDebug() << "Version table created";
if (!createVectorsTable()) {
qDebug() << "Failed to create vectors table";
return false;
}
qDebug() << "Vectors table created";
if (!createChunksTable()) {
qDebug() << "Failed to create chunks table";
return false;
}
qDebug() << "Chunks table created";
return true; return true;
} }
@ -120,12 +167,34 @@ bool RAGStorage::createIndices()
bool RAGStorage::createVersionTable() bool RAGStorage::createVersionTable()
{ {
qDebug() << "Creating version table...";
QSqlQuery query(m_db); QSqlQuery query(m_db);
return query.exec("CREATE TABLE IF NOT EXISTS storage_version (" bool success = query.exec("CREATE TABLE IF NOT EXISTS storage_version ("
"id INTEGER PRIMARY KEY AUTOINCREMENT," "id INTEGER PRIMARY KEY AUTOINCREMENT,"
"version INTEGER NOT NULL," "version INTEGER NOT NULL,"
"created_at DATETIME DEFAULT CURRENT_TIMESTAMP" "created_at DATETIME DEFAULT CURRENT_TIMESTAMP"
")"); ")");
if (!success) {
qDebug() << "Failed to create version table:" << query.lastError().text();
return false;
}
query.exec("SELECT COUNT(*) FROM storage_version");
if (query.next() && query.value(0).toInt() == 0) {
qDebug() << "Inserting initial version record";
QSqlQuery insertQuery(m_db);
success = insertQuery.exec(
QString("INSERT INTO storage_version (version) VALUES (%1)").arg(CURRENT_VERSION));
if (!success) {
qDebug() << "Failed to insert initial version:" << insertQuery.lastError().text();
return false;
}
}
qDebug() << "Version table ready";
return true;
} }
bool RAGStorage::createVectorsTable() bool RAGStorage::createVectorsTable()
@ -158,6 +227,8 @@ bool RAGStorage::createChunksTable()
bool RAGStorage::prepareStatements() bool RAGStorage::prepareStatements()
{ {
qDebug() << "Preparing SQL statements...";
m_insertChunkQuery = QSqlQuery(m_db); m_insertChunkQuery = QSqlQuery(m_db);
if (!m_insertChunkQuery.prepare( if (!m_insertChunkQuery.prepare(
"INSERT INTO file_chunks (file_path, start_line, end_line, content) " "INSERT INTO file_chunks (file_path, start_line, end_line, content) "
@ -178,7 +249,7 @@ bool RAGStorage::prepareStatements()
if (!m_insertVectorQuery.prepare( if (!m_insertVectorQuery.prepare(
"INSERT INTO file_vectors (file_path, vector_data, last_modified) " "INSERT INTO file_vectors (file_path, vector_data, last_modified) "
"VALUES (:path, :vector, :modified)")) { "VALUES (:path, :vector, :modified)")) {
setError("Failed to prepare insert vector query"); setError("Failed to prepare insert vector query: " + m_insertVectorQuery.lastError().text());
return false; return false;
} }
@ -186,7 +257,7 @@ bool RAGStorage::prepareStatements()
if (!m_updateVectorQuery.prepare( if (!m_updateVectorQuery.prepare(
"UPDATE file_vectors SET vector_data = :vector, last_modified = :modified, " "UPDATE file_vectors SET vector_data = :vector, last_modified = :modified, "
"updated_at = CURRENT_TIMESTAMP WHERE file_path = :path")) { "updated_at = CURRENT_TIMESTAMP WHERE file_path = :path")) {
setError("Failed to prepare update vector query"); setError("Failed to prepare update vector query: " + m_updateVectorQuery.lastError().text());
return false; return false;
} }
@ -197,7 +268,9 @@ bool RAGStorage::storeChunk(const FileChunkData &chunk)
{ {
QMutexLocker locker(&m_mutex); QMutexLocker locker(&m_mutex);
if (!validateChunk(chunk)) { auto validation = validateChunk(chunk);
if (!validation.isValid) {
setError(validation.errorMessage, validation.errorStatus);
return false; return false;
} }
@ -228,7 +301,9 @@ bool RAGStorage::storeChunks(const QList<FileChunkData> &chunks)
} }
for (const auto &chunk : chunks) { for (const auto &chunk : chunks) {
if (!validateChunk(chunk)) { auto validation = validateChunk(chunk);
if (!validation.isValid) {
setError(validation.errorMessage, validation.errorStatus);
rollbackTransaction(); rollbackTransaction();
return false; return false;
} }
@ -248,34 +323,30 @@ bool RAGStorage::storeChunks(const QList<FileChunkData> &chunks)
return commitTransaction(); return commitTransaction();
} }
bool RAGStorage::validateChunk(const FileChunkData &chunk) const RAGStorage::ValidationResult RAGStorage::validateChunk(const FileChunkData &chunk) const
{ {
if (!chunk.isValid()) { if (!chunk.isValid()) {
setError("Invalid chunk data", Status::ValidationError); return {false, "Invalid chunk data", Status::ValidationError};
return false;
} }
if (chunk.content.size() > m_options.maxChunkSize) { if (chunk.content.size() > m_options.maxChunkSize) {
setError("Chunk content exceeds maximum size", Status::ValidationError); return {false, "Chunk content exceeds maximum size", Status::ValidationError};
return false;
} }
return true; return {true, QString(), Status::Ok};
} }
bool RAGStorage::validateVector(const RAGVector &vector) const RAGStorage::ValidationResult RAGStorage::validateVector(const RAGVector &vector) const
{ {
if (vector.empty()) { if (vector.empty()) {
setError("Empty vector data", Status::ValidationError); return {false, "Empty vector data", Status::ValidationError};
return false;
} }
if (vector.size() > m_options.maxVectorSize) { if (vector.size() > m_options.maxVectorSize) {
setError("Vector size exceeds maximum limit", Status::ValidationError); return {false, "Vector size exceeds maximum limit", Status::ValidationError};
return false;
} }
return true; return {true, QString(), Status::Ok};
} }
bool RAGStorage::beginTransaction() bool RAGStorage::beginTransaction()
@ -296,8 +367,11 @@ bool RAGStorage::rollbackTransaction()
bool RAGStorage::storeVector(const QString &filePath, const RAGVector &vector) bool RAGStorage::storeVector(const QString &filePath, const RAGVector &vector)
{ {
QMutexLocker locker(&m_mutex); QMutexLocker locker(&m_mutex);
qDebug() << "Storing vector for file:" << filePath;
if (!validateVector(vector)) { auto validation = validateVector(vector);
if (!validation.isValid) {
setError(validation.errorMessage, validation.errorStatus);
return false; return false;
} }
@ -307,17 +381,31 @@ bool RAGStorage::storeVector(const QString &filePath, const RAGVector &vector)
QDateTime lastModified = getFileLastModified(filePath); QDateTime lastModified = getFileLastModified(filePath);
QByteArray blob = vectorToBlob(vector); QByteArray blob = vectorToBlob(vector);
qDebug() << "Vector converted to blob, size:" << blob.size() << "bytes";
m_updateVectorQuery.bindValue(":path", filePath);
m_updateVectorQuery.bindValue(":vector", blob);
m_updateVectorQuery.bindValue(":modified", lastModified);
if (m_updateVectorQuery.exec()) {
if (m_updateVectorQuery.numRowsAffected() > 0) {
qDebug() << "Vector updated successfully";
return commitTransaction();
}
}
m_insertVectorQuery.bindValue(":path", filePath); m_insertVectorQuery.bindValue(":path", filePath);
m_insertVectorQuery.bindValue(":vector", blob); m_insertVectorQuery.bindValue(":vector", blob);
m_insertVectorQuery.bindValue(":modified", lastModified); m_insertVectorQuery.bindValue(":modified", lastModified);
if (!m_insertVectorQuery.exec()) { if (!m_insertVectorQuery.exec()) {
qDebug() << "Failed to store vector:" << m_insertVectorQuery.lastError().text();
rollbackTransaction(); rollbackTransaction();
setError("Failed to store vector: " + m_insertVectorQuery.lastError().text()); setError("Failed to store vector: " + m_insertVectorQuery.lastError().text());
return false; return false;
} }
qDebug() << "Vector stored successfully";
return commitTransaction(); return commitTransaction();
} }
@ -325,7 +413,9 @@ bool RAGStorage::updateVector(const QString &filePath, const RAGVector &vector)
{ {
QMutexLocker locker(&m_mutex); QMutexLocker locker(&m_mutex);
if (!validateVector(vector)) { auto validation = validateVector(vector);
if (!validation.isValid) {
setError(validation.errorMessage, validation.errorStatus);
return false; return false;
} }
@ -391,19 +481,42 @@ QDateTime RAGStorage::getFileLastModified(const QString &filePath)
RAGVector RAGStorage::blobToVector(const QByteArray &blob) RAGVector RAGStorage::blobToVector(const QByteArray &blob)
{ {
// Реализация конвертации из QByteArray в RAGVector
// Зависит от конкретной реализации RAGVector
RAGVector vector; RAGVector vector;
// TODO: Implement conversion QDataStream stream(blob);
stream.setVersion(QDataStream::Qt_6_0);
stream.setFloatingPointPrecision(QDataStream::DoublePrecision);
qint32 size;
stream >> size;
vector.resize(size);
for (int i = 0; i < size; ++i) {
double value;
stream >> value;
vector[i] = value;
}
qDebug() << "Vector restored from blob, size:" << vector.size();
return vector; return vector;
} }
QByteArray RAGStorage::vectorToBlob(const RAGVector &vector) QByteArray RAGStorage::vectorToBlob(const RAGVector &vector)
{ {
// Реализация конвертации из RAGVector в QByteArray
// Зависит от конкретной реализации RAGVector
QByteArray blob; QByteArray blob;
// TODO: Implement conversion QDataStream stream(&blob, QIODevice::WriteOnly);
stream.setVersion(QDataStream::Qt_6_0);
stream.setFloatingPointPrecision(QDataStream::DoublePrecision);
stream << static_cast<qint32>(vector.size());
for (double value : vector) {
stream << value;
}
qDebug() << "Vector converted to blob, vector size:" << vector.size()
<< "blob size:" << blob.size();
return blob; return blob;
} }
@ -420,12 +533,12 @@ void RAGStorage::clearError()
m_status = Status::Ok; m_status = Status::Ok;
} }
Status RAGStorage::status() const RAGStorage::Status RAGStorage::status() const
{ {
return m_status; return m_status;
} }
Error RAGStorage::lastError() const RAGStorage::Error RAGStorage::lastError() const
{ {
return m_lastError; return m_lastError;
} }
@ -482,7 +595,6 @@ bool RAGStorage::backup(const QString &backupPath)
return false; return false;
} }
// Создаем резервную копию через SQLite backup API
QFile::copy(m_dbPath, backupPath); QFile::copy(m_dbPath, backupPath);
return true; return true;
@ -495,7 +607,6 @@ StorageStatistics RAGStorage::getStatistics() const
StorageStatistics stats; StorageStatistics stats;
QSqlQuery query(m_db); QSqlQuery query(m_db);
// Получаем статистику по чанкам
if (query.exec("SELECT COUNT(*), SUM(LENGTH(content)) FROM file_chunks")) { if (query.exec("SELECT COUNT(*), SUM(LENGTH(content)) FROM file_chunks")) {
if (query.next()) { if (query.next()) {
stats.totalChunks = query.value(0).toInt(); stats.totalChunks = query.value(0).toInt();
@ -503,7 +614,6 @@ StorageStatistics RAGStorage::getStatistics() const
} }
} }
// Получаем статистику по векторам
if (query.exec("SELECT COUNT(*) FROM file_vectors")) { if (query.exec("SELECT COUNT(*) FROM file_vectors")) {
if (query.next()) { if (query.next()) {
stats.totalVectors = query.value(0).toInt(); stats.totalVectors = query.value(0).toInt();
@ -518,7 +628,6 @@ StorageStatistics RAGStorage::getStatistics() const
} }
} }
// Получаем время последнего обновления
if (query.exec("SELECT MAX(updated_at) FROM (" if (query.exec("SELECT MAX(updated_at) FROM ("
"SELECT updated_at FROM file_chunks " "SELECT updated_at FROM file_chunks "
"UNION " "UNION "
@ -610,7 +719,8 @@ bool RAGStorage::updateChunk(const FileChunkData &chunk)
{ {
QMutexLocker locker(&m_mutex); QMutexLocker locker(&m_mutex);
if (!validateChunk(chunk)) { auto validation = validateChunk(chunk);
if (!validation.isValid) {
return false; return false;
} }
@ -641,7 +751,8 @@ bool RAGStorage::updateChunks(const QList<FileChunkData> &chunks)
} }
for (const auto &chunk : chunks) { for (const auto &chunk : chunks) {
if (!validateChunk(chunk)) { auto validation = validateChunk(chunk);
if (!validation.isValid) {
rollbackTransaction(); rollbackTransaction();
return false; return false;
} }
@ -739,14 +850,24 @@ bool RAGStorage::chunkExists(const QString &filePath, int startLine, int endLine
int RAGStorage::getStorageVersion() const int RAGStorage::getStorageVersion() const
{ {
QMutexLocker locker(&m_mutex); qDebug() << "Getting storage version...";
QSqlQuery query(m_db); QSqlQuery query(m_db);
if (query.exec("SELECT version FROM storage_version ORDER BY id DESC LIMIT 1")) { qDebug() << "Created query object";
if (!query.exec("SELECT version FROM storage_version ORDER BY id DESC LIMIT 1")) {
qDebug() << "Failed to execute version query:" << query.lastError().text();
return 0;
}
qDebug() << "Version query executed";
if (query.next()) { if (query.next()) {
return query.value(0).toInt(); int version = query.value(0).toInt();
} qDebug() << "Current version:" << version;
return version;
} }
qDebug() << "No version found, assuming version 0";
return 0; return 0;
} }
@ -764,7 +885,6 @@ bool RAGStorage::upgradeStorage(int fromVersion)
return false; return false;
} }
// Выполняем последовательные миграции от текущей версии до последней
for (int version = fromVersion + 1; version <= CURRENT_VERSION; ++version) { for (int version = fromVersion + 1; version <= CURRENT_VERSION; ++version) {
if (!applyMigration(version)) { if (!applyMigration(version)) {
rollbackTransaction(); rollbackTransaction();
@ -773,7 +893,6 @@ bool RAGStorage::upgradeStorage(int fromVersion)
} }
} }
// Обновляем версию в БД
QSqlQuery query(m_db); QSqlQuery query(m_db);
query.prepare("INSERT INTO storage_version (version) VALUES (:version)"); query.prepare("INSERT INTO storage_version (version) VALUES (:version)");
query.bindValue(":version", CURRENT_VERSION); query.bindValue(":version", CURRENT_VERSION);
@ -793,15 +912,13 @@ bool RAGStorage::applyMigration(int version)
switch (version) { switch (version) {
case 1: case 1:
// Миграция на версию 1
if (!query.exec("ALTER TABLE file_chunks ADD COLUMN metadata TEXT")) { if (!query.exec("ALTER TABLE file_chunks ADD COLUMN metadata TEXT")) {
return false; return false;
} }
break; break;
// Добавляем новые кейсы для будущих версий
// case 2: // case 2:
// // Миграция на версию 2 // //
// break; // break;
default: default:
@ -816,7 +933,6 @@ bool RAGStorage::validateSchema() const
{ {
QMutexLocker locker(&m_mutex); QMutexLocker locker(&m_mutex);
// Проверяем наличие всех необходимых таблиц
QStringList requiredTables = {"storage_version", "file_vectors", "file_chunks"}; QStringList requiredTables = {"storage_version", "file_vectors", "file_chunks"};
QSqlQuery query(m_db); QSqlQuery query(m_db);
@ -833,7 +949,6 @@ bool RAGStorage::validateSchema() const
} }
} }
// Проверяем структуру таблиц
struct ColumnInfo struct ColumnInfo
{ {
QString name; QString name;
@ -875,7 +990,6 @@ bool RAGStorage::validateSchema() const
return false; return false;
} }
// Проверяем каждую колонку
for (int i = 0; i < actualColumns.size(); ++i) { for (int i = 0; i < actualColumns.size(); ++i) {
const auto &expected = it.value()[i]; const auto &expected = it.value()[i];
const auto &actual = actualColumns[i]; const auto &actual = actualColumns[i];
@ -894,23 +1008,19 @@ bool RAGStorage::restore(const QString &backupPath)
{ {
QMutexLocker locker(&m_mutex); QMutexLocker locker(&m_mutex);
// Закрываем текущее соединение
if (m_db.isOpen()) { if (m_db.isOpen()) {
m_db.close(); m_db.close();
} }
// Копируем файл бэкапа
if (!QFile::remove(m_dbPath) || !QFile::copy(backupPath, m_dbPath)) { if (!QFile::remove(m_dbPath) || !QFile::copy(backupPath, m_dbPath)) {
setError("Failed to restore from backup"); setError("Failed to restore from backup");
return false; return false;
} }
// Переоткрываем БД
if (!openDatabase()) { if (!openDatabase()) {
return false; return false;
} }
// Проверяем валидность схемы
if (!validateSchema()) { if (!validateSchema()) {
setError("Invalid schema in backup file"); setError("Invalid schema in backup file");
return false; return false;

View File

@ -32,17 +32,14 @@
namespace QodeAssist::Context { namespace QodeAssist::Context {
/**
* @brief Структура для хранения информации о чанке файла
*/
struct FileChunkData struct FileChunkData
{ {
QString filePath; ///< Путь к файлу QString filePath;
int startLine; ///< Начальная строка чанка int startLine;
int endLine; ///< Конечная строка чанка int endLine;
QString content; ///< Содержимое чанка QString content;
QDateTime createdAt; ///< Время создания QDateTime createdAt;
QDateTime updatedAt; ///< Время последнего обновления QDateTime updatedAt;
bool isValid() const bool isValid() const
{ {
@ -50,32 +47,23 @@ struct FileChunkData
} }
}; };
/**
* @brief Структура для настройки хранилища
*/
struct StorageOptions struct StorageOptions
{ {
int maxChunkSize = 1024 * 1024; ///< Максимальный размер чанка в байтах int maxChunkSize = 1024 * 1024;
int maxVectorSize = 1024; ///< Максимальный размер вектора int maxVectorSize = 1024;
bool useCompression = false; ///< Использовать сжатие данных bool useCompression = false;
bool enableLogging = false; ///< Включить журналирование bool enableLogging = false;
}; };
/**
* @brief Структура для хранения статистики
*/
struct StorageStatistics struct StorageStatistics
{ {
int totalChunks; ///< Общее количество чанков int totalChunks;
int totalVectors; ///< Общее количество векторов int totalVectors;
int totalFiles; ///< Общее количество файлов int totalFiles;
qint64 totalSize; ///< Общий размер данных qint64 totalSize;
QDateTime lastUpdate; ///< Время последнего обновления QDateTime lastUpdate;
}; };
/**
* @brief Класс для работы с хранилищем RAG данных
*/
class RAGStorage : public QObject class RAGStorage : public QObject
{ {
Q_OBJECT Q_OBJECT
@ -85,6 +73,13 @@ public:
enum class Status { Ok, DatabaseError, ValidationError, VersionError, ConnectionError }; enum class Status { Ok, DatabaseError, ValidationError, VersionError, ConnectionError };
struct ValidationResult
{
bool isValid;
QString errorMessage;
Status errorStatus;
};
struct Error struct Error
{ {
QString message; QString message;
@ -99,26 +94,22 @@ public:
QObject *parent = nullptr); QObject *parent = nullptr);
~RAGStorage(); ~RAGStorage();
// Инициализация и проверка состояния
bool init(); bool init();
Status status() const; Status status() const;
Error lastError() const; Error lastError() const;
bool isReady() const; bool isReady() const;
QString dbPath() const; QString dbPath() const;
// Управление транзакциями
bool beginTransaction(); bool beginTransaction();
bool commitTransaction(); bool commitTransaction();
bool rollbackTransaction(); bool rollbackTransaction();
// Операции с векторами
bool storeVector(const QString &filePath, const RAGVector &vector); bool storeVector(const QString &filePath, const RAGVector &vector);
bool updateVector(const QString &filePath, const RAGVector &vector); bool updateVector(const QString &filePath, const RAGVector &vector);
std::optional<RAGVector> getVector(const QString &filePath); std::optional<RAGVector> getVector(const QString &filePath);
bool needsUpdate(const QString &filePath); bool needsUpdate(const QString &filePath);
QStringList getAllFiles(); QStringList getAllFiles();
// Операции с чанками
bool storeChunk(const FileChunkData &chunk); bool storeChunk(const FileChunkData &chunk);
bool storeChunks(const QList<FileChunkData> &chunks); bool storeChunks(const QList<FileChunkData> &chunks);
bool updateChunk(const FileChunkData &chunk); bool updateChunk(const FileChunkData &chunk);
@ -128,7 +119,6 @@ public:
QList<FileChunkData> getChunksForFile(const QString &filePath); QList<FileChunkData> getChunksForFile(const QString &filePath);
bool chunkExists(const QString &filePath, int startLine, int endLine); bool chunkExists(const QString &filePath, int startLine, int endLine);
// Обслуживание
int getChunkCount(const QString &filePath); int getChunkCount(const QString &filePath);
bool deleteOldChunks(const QString &filePath, const QDateTime &olderThan); bool deleteOldChunks(const QString &filePath, const QDateTime &olderThan);
bool deleteAllChunks(); bool deleteAllChunks();
@ -138,10 +128,10 @@ public:
bool restore(const QString &backupPath); bool restore(const QString &backupPath);
StorageStatistics getStatistics() const; StorageStatistics getStatistics() const;
// Версионирование
int getStorageVersion() const; int getStorageVersion() const;
bool isVersionCompatible() const; bool isVersionCompatible() const;
bool applyMigration(int version);
signals: signals:
void errorOccurred(const Error &error); void errorOccurred(const Error &error);
void operationCompleted(const QString &operation); void operationCompleted(const QString &operation);
@ -164,8 +154,8 @@ private:
void setError(const QString &message, Status status = Status::DatabaseError); void setError(const QString &message, Status status = Status::DatabaseError);
void clearError(); void clearError();
bool prepareStatements(); bool prepareStatements();
bool validateChunk(const FileChunkData &chunk) const; ValidationResult validateChunk(const FileChunkData &chunk) const;
bool validateVector(const RAGVector &vector) const; ValidationResult validateVector(const RAGVector &vector) const;
private: private:
QSqlDatabase m_db; QSqlDatabase m_db;
@ -175,7 +165,6 @@ private:
Error m_lastError; Error m_lastError;
Status m_status; Status m_status;
// Подготовленные запросы
QSqlQuery m_insertChunkQuery; QSqlQuery m_insertChunkQuery;
QSqlQuery m_updateChunkQuery; QSqlQuery m_updateChunkQuery;
QSqlQuery m_insertVectorQuery; QSqlQuery m_insertVectorQuery;

View File

@ -45,31 +45,65 @@ QJsonObject RAGVectorizer::prepareEmbeddingRequest(const QString &text) const
RAGVector RAGVectorizer::parseEmbeddingResponse(const QByteArray &response) const RAGVector RAGVectorizer::parseEmbeddingResponse(const QByteArray &response) const
{ {
QJsonDocument doc = QJsonDocument::fromJson(response); QJsonDocument doc = QJsonDocument::fromJson(response);
QJsonArray array = doc.object()["embedding"].toArray(); if (doc.isNull()) {
qDebug() << "Failed to parse JSON response";
return RAGVector();
}
QJsonObject obj = doc.object();
if (!obj.contains("embedding")) {
qDebug() << "Response does not contain 'embedding' field";
// qDebug() << "Response content:" << response;
return RAGVector();
}
QJsonArray array = obj["embedding"].toArray();
if (array.isEmpty()) {
qDebug() << "Embedding array is empty";
return RAGVector();
}
RAGVector result; RAGVector result;
result.reserve(array.size()); result.reserve(array.size());
for (const auto &value : array) { for (const auto &value : array) {
result.push_back(value.toDouble()); result.push_back(value.toDouble());
} }
qDebug() << "Successfully parsed vector with size:" << result.size();
return result; return result;
} }
QFuture<RAGVector> RAGVectorizer::vectorizeText(const QString &text) QFuture<RAGVector> RAGVectorizer::vectorizeText(const QString &text)
{ {
qDebug() << "Vectorizing text, length:" << text.length();
qDebug() << "Using embedding provider:" << m_embedProviderUrl;
auto promise = std::make_shared<QPromise<RAGVector>>(); auto promise = std::make_shared<QPromise<RAGVector>>();
promise->start(); promise->start();
QNetworkRequest request(QUrl(m_embedProviderUrl + "/api/embeddings")); QNetworkRequest request(QUrl(m_embedProviderUrl + "/api/embeddings"));
request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json"); request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
auto reply = m_network->post(request, QJsonDocument(prepareEmbeddingRequest(text)).toJson()); QJsonObject requestData = prepareEmbeddingRequest(text);
QByteArray jsonData = QJsonDocument(requestData).toJson();
qDebug() << "Sending request to embeddings API:" << jsonData;
auto reply = m_network->post(request, jsonData);
connect(reply, &QNetworkReply::finished, this, [promise, reply, this]() { connect(reply, &QNetworkReply::finished, this, [promise, reply, this]() {
if (reply->error() == QNetworkReply::NoError) { if (reply->error() == QNetworkReply::NoError) {
promise->addResult(parseEmbeddingResponse(reply->readAll())); QByteArray response = reply->readAll();
// qDebug() << "Received response from embeddings API:" << response;
auto vector = parseEmbeddingResponse(response);
qDebug() << "Parsed vector size:" << vector.size();
promise->addResult(vector);
} else { } else {
// TODO check error setException qDebug() << "Network error:" << reply->errorString();
qDebug() << "HTTP status code:"
<< reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt();
qDebug() << "Response:" << reply->readAll();
promise->addResult(RAGVector()); promise->addResult(RAGVector());
} }
promise->finish(); promise->finish();