From 02101665ca67b9cda1e8c1e990df9a67ae3f313c Mon Sep 17 00:00:00 2001 From: Petr Mironychev <9195189+Palm1r@users.noreply.github.com> Date: Sun, 9 Feb 2025 01:00:30 +0100 Subject: [PATCH] feat: Add version to vector db --- context/ContextManager.cpp | 30 +++++++++----- context/RAGManager.cpp | 24 ++++++----- context/RAGManager.hpp | 8 ---- context/RAGStorage.cpp | 82 +++++++++++++++++++++++++++++++++++++- context/RAGStorage.hpp | 10 ++++- 5 files changed, 123 insertions(+), 31 deletions(-) diff --git a/context/ContextManager.cpp b/context/ContextManager.cpp index 9ca7029..e2280e8 100644 --- a/context/ContextManager.cpp +++ b/context/ContextManager.cpp @@ -70,22 +70,30 @@ ContentFile ContextManager::createContentFile(const QString &filePath) const bool ContextManager::isInBuildDirectory(const QString &filePath) const { static const QStringList buildDirPatterns - = {"/build/", - "/Build/", - "/BUILD/", - "/debug/", - "/Debug/", - "/DEBUG/", - "/release/", - "/Release/", - "/RELEASE/", - "/builds/"}; + = {QString(QDir::separator()) + QLatin1String("build") + QDir::separator(), + QString(QDir::separator()) + QLatin1String("Build") + QDir::separator(), + QString(QDir::separator()) + QLatin1String("BUILD") + QDir::separator(), + QString(QDir::separator()) + QLatin1String("debug") + QDir::separator(), + QString(QDir::separator()) + QLatin1String("Debug") + QDir::separator(), + QString(QDir::separator()) + QLatin1String("DEBUG") + QDir::separator(), + QString(QDir::separator()) + QLatin1String("release") + QDir::separator(), + QString(QDir::separator()) + QLatin1String("Release") + QDir::separator(), + QString(QDir::separator()) + QLatin1String("RELEASE") + QDir::separator(), + QString(QDir::separator()) + QLatin1String("builds") + QDir::separator()}; + // Нормализуем путь + QString normalizedPath = QDir::fromNativeSeparators(filePath); + + // Проверяем, содержит ли путь паттерны build-директории for (const QString &pattern : buildDirPatterns) { - if (filePath.contains(pattern)) { + // Сравниваем с нормализованным паттерном + QString normalizedPattern = QDir::fromNativeSeparators(pattern); + if (normalizedPath.contains(normalizedPattern)) { + qDebug() << "Skipping build file:" << filePath; return true; } } + return false; } diff --git a/context/RAGManager.cpp b/context/RAGManager.cpp index bebe772..a00a890 100644 --- a/context/RAGManager.cpp +++ b/context/RAGManager.cpp @@ -89,7 +89,16 @@ void RAGManager::ensureStorageForProject(ProjectExplorer::Project *project) if (project) { m_currentStorage = std::make_unique(getStoragePath(project), this); - m_currentStorage->init(); + if (!m_currentStorage->init()) { + qDebug() << "Failed to initialize storage"; + m_currentStorage.reset(); + return; + } + + if (!m_currentStorage->isVersionCompatible()) { + qDebug() << "Storage version is incompatible, needs rebuild"; + // todo recreate db or show error + } } } @@ -111,14 +120,16 @@ QFuture RAGManager::processFiles( const int batchSize = 10; - QStringList filesToProcess; + QSet uniqueFiles; for (const QString &filePath : filePaths) { if (isFileStorageOutdated(project, filePath)) { qDebug() << "File needs processing:" << filePath; - filesToProcess.append(filePath); + uniqueFiles.insert(filePath); } } + QStringList filesToProcess = uniqueFiles.values(); + if (filesToProcess.isEmpty()) { qDebug() << "No files need processing"; emit vectorizationFinished(); @@ -141,7 +152,6 @@ void RAGManager::searchSimilarDocuments( qDebug() << "Project:" << project->displayName(); qDebug() << "Top K:" << topK; - // Предобработка текста запроса QString processedText = RAGPreprocessor::preprocessCode(text); qDebug() << "Preprocessed query length:" << processedText.length(); @@ -165,7 +175,6 @@ void RAGManager::searchSimilarDocuments( int skippedFiles = 0; for (const auto &filePath : storedFiles) { - // Загружаем и обрабатываем содержимое файла auto storedCode = loadFileContent(filePath); if (!storedCode.has_value()) { qDebug() << "ERROR: Failed to load content for file:" << filePath; @@ -173,7 +182,6 @@ void RAGManager::searchSimilarDocuments( continue; } - // Получаем вектор из хранилища auto storedVector = loadVectorFromStorage(project, filePath); if (!storedVector.has_value()) { qDebug() << "ERROR: Failed to load vector for file:" << filePath; @@ -181,10 +189,8 @@ void RAGManager::searchSimilarDocuments( continue; } - // Предобработка содержимого файла QString processedStoredCode = RAGPreprocessor::preprocessCode(storedCode.value()); - // Используем улучшенное сравнение auto similarity = EnhancedRAGSimilaritySearch::calculateSimilarity( queryVector, storedVector.value(), processedText, processedStoredCode); @@ -205,7 +211,6 @@ void RAGManager::searchSimilarDocuments( qDebug() << "Files skipped:" << skippedFiles; qDebug() << "Total results before filtering:" << results.size(); - // Оптимизированная сортировка топ K результатов if (results.size() > topK) { qDebug() << "Performing partial sort for top" << topK << "results"; std::partial_sort(results.begin(), results.begin() + topK, results.end()); @@ -295,7 +300,6 @@ QFuture RAGManager::processFile(ProjectExplorer::Project *project, const Q qDebug() << "File" << fileName << "read, content size:" << content.size() << "bytes"; - // Предобработка контента QString processedContent = RAGPreprocessor::preprocessCode(content); qDebug() << "Preprocessed content size:" << processedContent.size() << "bytes"; diff --git a/context/RAGManager.hpp b/context/RAGManager.hpp index 2687c3a..f942604 100644 --- a/context/RAGManager.hpp +++ b/context/RAGManager.hpp @@ -40,14 +40,6 @@ class RAGManager : public QObject public: static RAGManager &instance(); - // struct SearchResult - // { - // QString filePath; - // float l2Score; - // float cosineScore; - - // bool operator<(const SearchResult &other) const; - // }; struct SearchResult { QString filePath; diff --git a/context/RAGStorage.cpp b/context/RAGStorage.cpp index cb926d9..624273d 100644 --- a/context/RAGStorage.cpp +++ b/context/RAGStorage.cpp @@ -41,9 +41,23 @@ RAGStorage::~RAGStorage() bool RAGStorage::init() { if (!openDatabase()) { + qDebug() << "Failed to open database"; return false; } - return createTables(); + + if (!createTables()) { + qDebug() << "Failed to create tables"; + return false; + } + + // check version + int version = getStorageVersion(); + if (version < CURRENT_VERSION) { + qDebug() << "Storage version" << version << "needs upgrade to" << CURRENT_VERSION; + return upgradeStorage(version); + } + + return true; } bool RAGStorage::openDatabase() @@ -59,9 +73,25 @@ bool RAGStorage::openDatabase() return m_db.open(); } +bool RAGStorage::createVersionTable() +{ + QSqlQuery query(m_db); + return query.exec("CREATE TABLE IF NOT EXISTS storage_version (" + "id INTEGER PRIMARY KEY AUTOINCREMENT," + "version INTEGER NOT NULL," + "created_at DATETIME DEFAULT CURRENT_TIMESTAMP" + ")"); +} + bool RAGStorage::createTables() { QSqlQuery query(m_db); + + if (!createVersionTable()) { + qDebug() << "Failed to create version table"; + return false; + } + return query.exec("CREATE TABLE IF NOT EXISTS file_vectors (" "id INTEGER PRIMARY KEY AUTOINCREMENT," "file_path TEXT UNIQUE NOT NULL," @@ -72,6 +102,56 @@ bool RAGStorage::createTables() ")"); } +int RAGStorage::getStorageVersion() const +{ + QSqlQuery query(m_db); + query.exec("SELECT version FROM storage_version ORDER BY id DESC LIMIT 1"); + + if (query.next()) { + return query.value(0).toInt(); + } + return 0; +} + +bool RAGStorage::initializeNewStorage() +{ + QSqlQuery query(m_db); + query.prepare("INSERT INTO storage_version (version) VALUES (:version)"); + query.bindValue(":version", CURRENT_VERSION); + return query.exec(); +} + +bool RAGStorage::upgradeStorage(int fromVersion) +{ + if (fromVersion >= CURRENT_VERSION) { + return true; + } + + m_db.transaction(); + + try { + // migration + switch (fromVersion) { + case 0: + // new db initialize + if (!initializeNewStorage()) { + throw std::runtime_error("Failed to initialize version"); + } + break; + // new versions will be here + // case 1: // upgrade from 1 to 2 + // break; + } + + m_db.commit(); + return true; + } catch (const std::exception &e) { + m_db.rollback(); + qDebug() << "Failed to upgrade storage:" << e.what(); + return false; + } +} + bool RAGStorage::storeVector(const QString &filePath, const RAGVector &vector) { QSqlQuery query(m_db); diff --git a/context/RAGStorage.hpp b/context/RAGStorage.hpp index cf754bd..09eaed1 100644 --- a/context/RAGStorage.hpp +++ b/context/RAGStorage.hpp @@ -32,6 +32,8 @@ class RAGStorage : public QObject { Q_OBJECT public: + static constexpr int CURRENT_VERSION = 1; + explicit RAGStorage(const QString &dbPath, QObject *parent = nullptr); ~RAGStorage(); @@ -41,12 +43,18 @@ public: std::optional getVector(const QString &filePath); bool needsUpdate(const QString &filePath); QStringList getAllFiles(); - QString dbPath() const; + // Новые методы для работы с версией + int getStorageVersion() const; + bool isVersionCompatible() const { return getStorageVersion() == CURRENT_VERSION; } + private: bool createTables(); + bool createVersionTable(); bool openDatabase(); + bool initializeNewStorage(); + bool upgradeStorage(int fromVersion); QDateTime getFileLastModified(const QString &filePath); RAGVector blobToVector(const QByteArray &blob); QByteArray vectorToBlob(const RAGVector &vector);