feat: Add version to vector db

This commit is contained in:
Petr Mironychev 2025-02-09 01:00:30 +01:00
parent 77a03d42ed
commit 02101665ca
5 changed files with 123 additions and 31 deletions

View File

@ -70,22 +70,30 @@ ContentFile ContextManager::createContentFile(const QString &filePath) const
bool ContextManager::isInBuildDirectory(const QString &filePath) const
{
static const QStringList buildDirPatterns
= {"/build/",
"/Build/",
"/BUILD/",
"/debug/",
"/Debug/",
"/DEBUG/",
"/release/",
"/Release/",
"/RELEASE/",
"/builds/"};
= {QString(QDir::separator()) + QLatin1String("build") + QDir::separator(),
QString(QDir::separator()) + QLatin1String("Build") + QDir::separator(),
QString(QDir::separator()) + QLatin1String("BUILD") + QDir::separator(),
QString(QDir::separator()) + QLatin1String("debug") + QDir::separator(),
QString(QDir::separator()) + QLatin1String("Debug") + QDir::separator(),
QString(QDir::separator()) + QLatin1String("DEBUG") + QDir::separator(),
QString(QDir::separator()) + QLatin1String("release") + QDir::separator(),
QString(QDir::separator()) + QLatin1String("Release") + QDir::separator(),
QString(QDir::separator()) + QLatin1String("RELEASE") + QDir::separator(),
QString(QDir::separator()) + QLatin1String("builds") + QDir::separator()};
// Нормализуем путь
QString normalizedPath = QDir::fromNativeSeparators(filePath);
// Проверяем, содержит ли путь паттерны build-директории
for (const QString &pattern : buildDirPatterns) {
if (filePath.contains(pattern)) {
// Сравниваем с нормализованным паттерном
QString normalizedPattern = QDir::fromNativeSeparators(pattern);
if (normalizedPath.contains(normalizedPattern)) {
qDebug() << "Skipping build file:" << filePath;
return true;
}
}
return false;
}

View File

@ -89,7 +89,16 @@ void RAGManager::ensureStorageForProject(ProjectExplorer::Project *project)
if (project) {
m_currentStorage = std::make_unique<RAGStorage>(getStoragePath(project), this);
m_currentStorage->init();
if (!m_currentStorage->init()) {
qDebug() << "Failed to initialize storage";
m_currentStorage.reset();
return;
}
if (!m_currentStorage->isVersionCompatible()) {
qDebug() << "Storage version is incompatible, needs rebuild";
// todo recreate db or show error
}
}
}
@ -111,14 +120,16 @@ QFuture<void> RAGManager::processFiles(
const int batchSize = 10;
QStringList filesToProcess;
QSet<QString> uniqueFiles;
for (const QString &filePath : filePaths) {
if (isFileStorageOutdated(project, filePath)) {
qDebug() << "File needs processing:" << filePath;
filesToProcess.append(filePath);
uniqueFiles.insert(filePath);
}
}
QStringList filesToProcess = uniqueFiles.values();
if (filesToProcess.isEmpty()) {
qDebug() << "No files need processing";
emit vectorizationFinished();
@ -141,7 +152,6 @@ void RAGManager::searchSimilarDocuments(
qDebug() << "Project:" << project->displayName();
qDebug() << "Top K:" << topK;
// Предобработка текста запроса
QString processedText = RAGPreprocessor::preprocessCode(text);
qDebug() << "Preprocessed query length:" << processedText.length();
@ -165,7 +175,6 @@ void RAGManager::searchSimilarDocuments(
int skippedFiles = 0;
for (const auto &filePath : storedFiles) {
// Загружаем и обрабатываем содержимое файла
auto storedCode = loadFileContent(filePath);
if (!storedCode.has_value()) {
qDebug() << "ERROR: Failed to load content for file:" << filePath;
@ -173,7 +182,6 @@ void RAGManager::searchSimilarDocuments(
continue;
}
// Получаем вектор из хранилища
auto storedVector = loadVectorFromStorage(project, filePath);
if (!storedVector.has_value()) {
qDebug() << "ERROR: Failed to load vector for file:" << filePath;
@ -181,10 +189,8 @@ void RAGManager::searchSimilarDocuments(
continue;
}
// Предобработка содержимого файла
QString processedStoredCode = RAGPreprocessor::preprocessCode(storedCode.value());
// Используем улучшенное сравнение
auto similarity = EnhancedRAGSimilaritySearch::calculateSimilarity(
queryVector, storedVector.value(), processedText, processedStoredCode);
@ -205,7 +211,6 @@ void RAGManager::searchSimilarDocuments(
qDebug() << "Files skipped:" << skippedFiles;
qDebug() << "Total results before filtering:" << results.size();
// Оптимизированная сортировка топ K результатов
if (results.size() > topK) {
qDebug() << "Performing partial sort for top" << topK << "results";
std::partial_sort(results.begin(), results.begin() + topK, results.end());
@ -295,7 +300,6 @@ QFuture<bool> RAGManager::processFile(ProjectExplorer::Project *project, const Q
qDebug() << "File" << fileName << "read, content size:" << content.size() << "bytes";
// Предобработка контента
QString processedContent = RAGPreprocessor::preprocessCode(content);
qDebug() << "Preprocessed content size:" << processedContent.size() << "bytes";

View File

@ -40,14 +40,6 @@ class RAGManager : public QObject
public:
static RAGManager &instance();
// struct SearchResult
// {
// QString filePath;
// float l2Score;
// float cosineScore;
// bool operator<(const SearchResult &other) const;
// };
struct SearchResult
{
QString filePath;

View File

@ -41,9 +41,23 @@ RAGStorage::~RAGStorage()
bool RAGStorage::init()
{
if (!openDatabase()) {
qDebug() << "Failed to open database";
return false;
}
return createTables();
if (!createTables()) {
qDebug() << "Failed to create tables";
return false;
}
// check version
int version = getStorageVersion();
if (version < CURRENT_VERSION) {
qDebug() << "Storage version" << version << "needs upgrade to" << CURRENT_VERSION;
return upgradeStorage(version);
}
return true;
}
bool RAGStorage::openDatabase()
@ -59,9 +73,25 @@ bool RAGStorage::openDatabase()
return m_db.open();
}
bool RAGStorage::createVersionTable()
{
QSqlQuery query(m_db);
return query.exec("CREATE TABLE IF NOT EXISTS storage_version ("
"id INTEGER PRIMARY KEY AUTOINCREMENT,"
"version INTEGER NOT NULL,"
"created_at DATETIME DEFAULT CURRENT_TIMESTAMP"
")");
}
bool RAGStorage::createTables()
{
QSqlQuery query(m_db);
if (!createVersionTable()) {
qDebug() << "Failed to create version table";
return false;
}
return query.exec("CREATE TABLE IF NOT EXISTS file_vectors ("
"id INTEGER PRIMARY KEY AUTOINCREMENT,"
"file_path TEXT UNIQUE NOT NULL,"
@ -72,6 +102,56 @@ bool RAGStorage::createTables()
")");
}
int RAGStorage::getStorageVersion() const
{
QSqlQuery query(m_db);
query.exec("SELECT version FROM storage_version ORDER BY id DESC LIMIT 1");
if (query.next()) {
return query.value(0).toInt();
}
return 0;
}
bool RAGStorage::initializeNewStorage()
{
QSqlQuery query(m_db);
query.prepare("INSERT INTO storage_version (version) VALUES (:version)");
query.bindValue(":version", CURRENT_VERSION);
return query.exec();
}
bool RAGStorage::upgradeStorage(int fromVersion)
{
if (fromVersion >= CURRENT_VERSION) {
return true;
}
m_db.transaction();
try {
// migration
switch (fromVersion) {
case 0:
// new db initialize
if (!initializeNewStorage()) {
throw std::runtime_error("Failed to initialize version");
}
break;
// new versions will be here
// case 1: // upgrade from 1 to 2
// break;
}
m_db.commit();
return true;
} catch (const std::exception &e) {
m_db.rollback();
qDebug() << "Failed to upgrade storage:" << e.what();
return false;
}
}
bool RAGStorage::storeVector(const QString &filePath, const RAGVector &vector)
{
QSqlQuery query(m_db);

View File

@ -32,6 +32,8 @@ class RAGStorage : public QObject
{
Q_OBJECT
public:
static constexpr int CURRENT_VERSION = 1;
explicit RAGStorage(const QString &dbPath, QObject *parent = nullptr);
~RAGStorage();
@ -41,12 +43,18 @@ public:
std::optional<RAGVector> getVector(const QString &filePath);
bool needsUpdate(const QString &filePath);
QStringList getAllFiles();
QString dbPath() const;
// Новые методы для работы с версией
int getStorageVersion() const;
bool isVersionCompatible() const { return getStorageVersion() == CURRENT_VERSION; }
private:
bool createTables();
bool createVersionTable();
bool openDatabase();
bool initializeNewStorage();
bool upgradeStorage(int fromVersion);
QDateTime getFileLastModified(const QString &filePath);
RAGVector blobToVector(const QByteArray &blob);
QByteArray vectorToBlob(const RAGVector &vector);