mirror of
https://github.com/Palm1r/QodeAssist.git
synced 2025-06-04 01:28:58 -04:00
feat: add chunking
This commit is contained in:
parent
02101665ca
commit
5dfcf74128
@ -39,6 +39,7 @@
|
|||||||
#include "Logger.hpp"
|
#include "Logger.hpp"
|
||||||
#include "ProjectSettings.hpp"
|
#include "ProjectSettings.hpp"
|
||||||
#include "context/ContextManager.hpp"
|
#include "context/ContextManager.hpp"
|
||||||
|
#include "context/FileChunker.hpp"
|
||||||
#include "context/RAGManager.hpp"
|
#include "context/RAGManager.hpp"
|
||||||
#include "context/TokenUtils.hpp"
|
#include "context/TokenUtils.hpp"
|
||||||
|
|
||||||
@ -484,6 +485,18 @@ void ChatRootView::testRAG(const QString &message)
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ChatRootView::testChunking()
|
||||||
|
{
|
||||||
|
auto project = ProjectExplorer::ProjectTree::currentProject();
|
||||||
|
if (!project) {
|
||||||
|
qDebug() << "No active project found";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Context::FileChunker::ChunkingConfig config;
|
||||||
|
Context::ContextManager::instance().testProjectChunks(project, config);
|
||||||
|
}
|
||||||
|
|
||||||
void ChatRootView::updateInputTokensCount()
|
void ChatRootView::updateInputTokensCount()
|
||||||
{
|
{
|
||||||
int inputTokens = m_messageTokensCount;
|
int inputTokens = m_messageTokensCount;
|
||||||
|
@ -66,6 +66,7 @@ public:
|
|||||||
Q_INVOKABLE void setIsSyncOpenFiles(bool state);
|
Q_INVOKABLE void setIsSyncOpenFiles(bool state);
|
||||||
Q_INVOKABLE void openChatHistoryFolder();
|
Q_INVOKABLE void openChatHistoryFolder();
|
||||||
Q_INVOKABLE void testRAG(const QString &message);
|
Q_INVOKABLE void testRAG(const QString &message);
|
||||||
|
Q_INVOKABLE void testChunking();
|
||||||
|
|
||||||
Q_INVOKABLE void updateInputTokensCount();
|
Q_INVOKABLE void updateInputTokensCount();
|
||||||
int inputTokensCount() const;
|
int inputTokensCount() const;
|
||||||
|
@ -199,6 +199,7 @@ ChatRootView {
|
|||||||
attachFiles.onClicked: root.showAttachFilesDialog()
|
attachFiles.onClicked: root.showAttachFilesDialog()
|
||||||
linkFiles.onClicked: root.showLinkFilesDialog()
|
linkFiles.onClicked: root.showLinkFilesDialog()
|
||||||
testRag.onClicked: root.testRAG(messageInput.text)
|
testRag.onClicked: root.testRAG(messageInput.text)
|
||||||
|
testChunks.onClicked: root.testChunking()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@ Rectangle {
|
|||||||
property alias attachFiles: attachFilesId
|
property alias attachFiles: attachFilesId
|
||||||
property alias linkFiles: linkFilesId
|
property alias linkFiles: linkFilesId
|
||||||
property alias testRag: testRagId
|
property alias testRag: testRagId
|
||||||
|
property alias testChunks: testChunksId
|
||||||
|
|
||||||
color: palette.window.hslLightness > 0.5 ?
|
color: palette.window.hslLightness > 0.5 ?
|
||||||
Qt.darker(palette.window, 1.1) :
|
Qt.darker(palette.window, 1.1) :
|
||||||
@ -98,6 +99,12 @@ Rectangle {
|
|||||||
text: qsTr("Test RAG")
|
text: qsTr("Test RAG")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QoAButton {
|
||||||
|
id: testChunksId
|
||||||
|
|
||||||
|
text: qsTr("Test Chunks")
|
||||||
|
}
|
||||||
|
|
||||||
Item {
|
Item {
|
||||||
Layout.fillWidth: true
|
Layout.fillWidth: true
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,7 @@ add_library(Context STATIC
|
|||||||
RAGSimilaritySearch.hpp RAGSimilaritySearch.cpp
|
RAGSimilaritySearch.hpp RAGSimilaritySearch.cpp
|
||||||
RAGPreprocessor.hpp RAGPreprocessor.cpp
|
RAGPreprocessor.hpp RAGPreprocessor.cpp
|
||||||
EnhancedRAGSimilaritySearch.hpp EnhancedRAGSimilaritySearch.cpp
|
EnhancedRAGSimilaritySearch.hpp EnhancedRAGSimilaritySearch.cpp
|
||||||
|
FileChunker.hpp FileChunker.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(Context
|
target_link_libraries(Context
|
||||||
|
@ -26,6 +26,8 @@
|
|||||||
#include <projectexplorer/project.h>
|
#include <projectexplorer/project.h>
|
||||||
#include <projectexplorer/projectnodes.h>
|
#include <projectexplorer/projectnodes.h>
|
||||||
|
|
||||||
|
#include "FileChunker.hpp"
|
||||||
|
|
||||||
namespace QodeAssist::Context {
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
ContextManager &ContextManager::instance()
|
ContextManager &ContextManager::instance()
|
||||||
@ -130,4 +132,45 @@ bool ContextManager::shouldProcessFile(const QString &filePath) const
|
|||||||
return supportedExtensions.contains(fileInfo.suffix().toLower());
|
return supportedExtensions.contains(fileInfo.suffix().toLower());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ContextManager::testProjectChunks(
|
||||||
|
ProjectExplorer::Project *project, const FileChunker::ChunkingConfig &config)
|
||||||
|
{
|
||||||
|
if (!project) {
|
||||||
|
qDebug() << "No project provided";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "\nStarting test chunking for project:" << project->displayName();
|
||||||
|
|
||||||
|
// Get source files
|
||||||
|
QStringList sourceFiles = getProjectSourceFiles(project);
|
||||||
|
qDebug() << "Found" << sourceFiles.size() << "source files";
|
||||||
|
|
||||||
|
// Create chunker
|
||||||
|
auto chunker = new FileChunker(config, this);
|
||||||
|
|
||||||
|
// Connect progress and error signals
|
||||||
|
connect(chunker, &FileChunker::progressUpdated, this, [](int processed, int total) {
|
||||||
|
qDebug() << "Progress:" << processed << "/" << total << "files";
|
||||||
|
});
|
||||||
|
|
||||||
|
connect(chunker, &FileChunker::error, this, [](const QString &error) {
|
||||||
|
qDebug() << "Error:" << error;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Start chunking and handle results
|
||||||
|
auto future = chunker->chunkFiles(sourceFiles);
|
||||||
|
|
||||||
|
// Используем QFutureWatcher для обработки результатов
|
||||||
|
auto watcher = new QFutureWatcher<QList<FileChunk>>(this);
|
||||||
|
|
||||||
|
connect(watcher, &QFutureWatcher<QList<FileChunk>>::finished, this, [watcher, chunker]() {
|
||||||
|
// Очистка
|
||||||
|
watcher->deleteLater();
|
||||||
|
chunker->deleteLater();
|
||||||
|
});
|
||||||
|
|
||||||
|
watcher->setFuture(future);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace QodeAssist::Context
|
} // namespace QodeAssist::Context
|
||||||
|
@ -20,9 +20,12 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "ContentFile.hpp"
|
#include "ContentFile.hpp"
|
||||||
|
|
||||||
#include <QObject>
|
#include <QObject>
|
||||||
#include <QString>
|
#include <QString>
|
||||||
|
|
||||||
|
#include "FileChunker.hpp"
|
||||||
|
|
||||||
namespace ProjectExplorer {
|
namespace ProjectExplorer {
|
||||||
class Project;
|
class Project;
|
||||||
}
|
}
|
||||||
@ -40,6 +43,9 @@ public:
|
|||||||
QList<ContentFile> getContentFiles(const QStringList &filePaths) const;
|
QList<ContentFile> getContentFiles(const QStringList &filePaths) const;
|
||||||
QStringList getProjectSourceFiles(ProjectExplorer::Project *project) const;
|
QStringList getProjectSourceFiles(ProjectExplorer::Project *project) const;
|
||||||
|
|
||||||
|
void testProjectChunks(
|
||||||
|
ProjectExplorer::Project *project, const FileChunker::ChunkingConfig &config);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
explicit ContextManager(QObject *parent = nullptr);
|
explicit ContextManager(QObject *parent = nullptr);
|
||||||
~ContextManager() = default;
|
~ContextManager() = default;
|
||||||
|
198
context/FileChunker.cpp
Normal file
198
context/FileChunker.cpp
Normal file
@ -0,0 +1,198 @@
|
|||||||
|
// FileChunker.cpp
|
||||||
|
#include "FileChunker.hpp"
|
||||||
|
|
||||||
|
#include <coreplugin/idocument.h>
|
||||||
|
#include <texteditor/syntaxhighlighter.h>
|
||||||
|
#include <texteditor/textdocument.h>
|
||||||
|
#include <texteditor/texteditorconstants.h>
|
||||||
|
|
||||||
|
#include <QFutureWatcher>
|
||||||
|
#include <QTimer>
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
FileChunker::FileChunker(QObject *parent)
|
||||||
|
: QObject(parent)
|
||||||
|
{}
|
||||||
|
|
||||||
|
FileChunker::FileChunker(const ChunkingConfig &config, QObject *parent)
|
||||||
|
: QObject(parent)
|
||||||
|
, m_config(config)
|
||||||
|
{}
|
||||||
|
|
||||||
|
QFuture<QList<FileChunk>> FileChunker::chunkFiles(const QStringList &filePaths)
|
||||||
|
{
|
||||||
|
qDebug() << "\nStarting chunking process for" << filePaths.size() << "files";
|
||||||
|
qDebug() << "Configuration:"
|
||||||
|
<< "\n Max lines per chunk:" << m_config.maxLinesPerChunk
|
||||||
|
<< "\n Overlap lines:" << m_config.overlapLines
|
||||||
|
<< "\n Skip empty lines:" << m_config.skipEmptyLines
|
||||||
|
<< "\n Preserve functions:" << m_config.preserveFunctions
|
||||||
|
<< "\n Preserve classes:" << m_config.preserveClasses
|
||||||
|
<< "\n Batch size:" << m_config.batchSize;
|
||||||
|
|
||||||
|
auto promise = std::make_shared<QPromise<QList<FileChunk>>>();
|
||||||
|
promise->start();
|
||||||
|
|
||||||
|
if (filePaths.isEmpty()) {
|
||||||
|
qDebug() << "No files to process";
|
||||||
|
promise->addResult({});
|
||||||
|
promise->finish();
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
processNextBatch(promise, filePaths, 0);
|
||||||
|
return promise->future();
|
||||||
|
}
|
||||||
|
|
||||||
|
void FileChunker::processNextBatch(
|
||||||
|
std::shared_ptr<QPromise<QList<FileChunk>>> promise, const QStringList &files, int startIndex)
|
||||||
|
{
|
||||||
|
if (startIndex >= files.size()) {
|
||||||
|
emit chunkingComplete();
|
||||||
|
promise->finish();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int endIndex = qMin(startIndex + m_config.batchSize, files.size());
|
||||||
|
QList<FileChunk> batchChunks;
|
||||||
|
|
||||||
|
for (int i = startIndex; i < endIndex; ++i) {
|
||||||
|
try {
|
||||||
|
auto chunks = processFile(files[i]);
|
||||||
|
batchChunks.append(chunks);
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
emit error(QString("Error processing file %1: %2").arg(files[i], e.what()));
|
||||||
|
}
|
||||||
|
emit progressUpdated(i + 1, files.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
promise->addResult(batchChunks);
|
||||||
|
|
||||||
|
// Планируем обработку следующего батча
|
||||||
|
QTimer::singleShot(0, this, [this, promise, files, endIndex]() {
|
||||||
|
processNextBatch(promise, files, endIndex);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<FileChunk> FileChunker::processFile(const QString &filePath)
|
||||||
|
{
|
||||||
|
qDebug() << "\nProcessing file:" << filePath;
|
||||||
|
|
||||||
|
auto document = new TextEditor::TextDocument;
|
||||||
|
auto filePathObj = Utils::FilePath::fromString(filePath);
|
||||||
|
auto result = document->open(&m_error, filePathObj, filePathObj);
|
||||||
|
if (result != Core::IDocument::OpenResult::Success) {
|
||||||
|
qDebug() << "Failed to open document:" << filePath << "-" << m_error;
|
||||||
|
emit error(QString("Failed to open document: %1 - %2").arg(filePath, m_error));
|
||||||
|
delete document;
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "Document opened successfully. Line count:" << document->document()->blockCount();
|
||||||
|
|
||||||
|
auto chunks = createChunksForDocument(document);
|
||||||
|
qDebug() << "Created" << chunks.size() << "chunks for file";
|
||||||
|
|
||||||
|
delete document;
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<FileChunk> FileChunker::createChunksForDocument(TextEditor::TextDocument *document)
|
||||||
|
{
|
||||||
|
QList<FileChunk> chunks;
|
||||||
|
QString filePath = document->filePath().toString();
|
||||||
|
qDebug() << "\nCreating chunks for document:" << filePath << "\nConfiguration:"
|
||||||
|
<< "\n Max lines per chunk:" << m_config.maxLinesPerChunk
|
||||||
|
<< "\n Min lines per chunk:" << m_config.minLinesPerChunk
|
||||||
|
<< "\n Overlap lines:" << m_config.overlapLines;
|
||||||
|
// Если файл меньше минимального размера чанка, создаем один чанк
|
||||||
|
if (document->document()->blockCount() <= m_config.minLinesPerChunk) {
|
||||||
|
FileChunk chunk;
|
||||||
|
chunk.filePath = filePath;
|
||||||
|
chunk.startLine = 0;
|
||||||
|
chunk.endLine = document->document()->blockCount() - 1;
|
||||||
|
chunk.createdAt = QDateTime::currentDateTime();
|
||||||
|
chunk.updatedAt = chunk.createdAt;
|
||||||
|
|
||||||
|
QString content;
|
||||||
|
QTextBlock block = document->document()->firstBlock();
|
||||||
|
while (block.isValid()) {
|
||||||
|
content += block.text() + "\n";
|
||||||
|
block = block.next();
|
||||||
|
}
|
||||||
|
chunk.content = content;
|
||||||
|
|
||||||
|
qDebug() << "File is smaller than minimum chunk size. Creating single chunk:"
|
||||||
|
<< "\n Lines:" << chunk.lineCount() << "\n Content size:" << chunk.content.size()
|
||||||
|
<< "bytes";
|
||||||
|
|
||||||
|
chunks.append(chunk);
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Для больших файлов создаем чанки фиксированного размера с перекрытием
|
||||||
|
int currentStartLine = 0;
|
||||||
|
int lineCount = 0;
|
||||||
|
QString content;
|
||||||
|
QTextBlock block = document->document()->firstBlock();
|
||||||
|
|
||||||
|
while (block.isValid()) {
|
||||||
|
content += block.text() + "\n";
|
||||||
|
lineCount++;
|
||||||
|
|
||||||
|
// Если достигли размера чанка или это последний блок
|
||||||
|
if (lineCount >= m_config.maxLinesPerChunk || !block.next().isValid()) {
|
||||||
|
FileChunk chunk;
|
||||||
|
chunk.filePath = filePath;
|
||||||
|
chunk.startLine = currentStartLine;
|
||||||
|
chunk.endLine = currentStartLine + lineCount - 1;
|
||||||
|
chunk.content = content;
|
||||||
|
chunk.createdAt = QDateTime::currentDateTime();
|
||||||
|
chunk.updatedAt = chunk.createdAt;
|
||||||
|
|
||||||
|
qDebug() << "Creating chunk:"
|
||||||
|
<< "\n Start line:" << chunk.startLine << "\n End line:" << chunk.endLine
|
||||||
|
<< "\n Lines:" << chunk.lineCount()
|
||||||
|
<< "\n Content size:" << chunk.content.size() << "bytes";
|
||||||
|
|
||||||
|
chunks.append(chunk);
|
||||||
|
|
||||||
|
// Начинаем новый чанк с учетом перекрытия
|
||||||
|
if (block.next().isValid()) {
|
||||||
|
// Отступаем назад на размер перекрытия
|
||||||
|
int overlapLines = qMin(m_config.overlapLines, lineCount);
|
||||||
|
currentStartLine = chunk.endLine - overlapLines + 1;
|
||||||
|
|
||||||
|
// Сбрасываем контент, но добавляем перекрывающиеся строки
|
||||||
|
content.clear();
|
||||||
|
QTextBlock overlapBlock = document->document()->findBlockByLineNumber(
|
||||||
|
currentStartLine);
|
||||||
|
while (overlapBlock.isValid() && overlapBlock.blockNumber() <= chunk.endLine) {
|
||||||
|
content += overlapBlock.text() + "\n";
|
||||||
|
overlapBlock = overlapBlock.next();
|
||||||
|
}
|
||||||
|
lineCount = overlapLines;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
block = block.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
qDebug() << "Finished creating chunks for file:" << filePath
|
||||||
|
<< "\nTotal chunks:" << chunks.size();
|
||||||
|
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FileChunker::setConfig(const ChunkingConfig &config)
|
||||||
|
{
|
||||||
|
m_config = config;
|
||||||
|
}
|
||||||
|
|
||||||
|
FileChunker::ChunkingConfig FileChunker::config() const
|
||||||
|
{
|
||||||
|
return m_config;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
68
context/FileChunker.hpp
Normal file
68
context/FileChunker.hpp
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
// FileChunker.hpp
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <texteditor/textdocument.h>
|
||||||
|
#include <QDateTime>
|
||||||
|
#include <QFuture>
|
||||||
|
#include <QString>
|
||||||
|
|
||||||
|
namespace QodeAssist::Context {
|
||||||
|
|
||||||
|
struct FileChunk
|
||||||
|
{
|
||||||
|
QString filePath; // Path to the source file
|
||||||
|
int startLine; // Starting line of the chunk
|
||||||
|
int endLine; // Ending line of the chunk
|
||||||
|
QDateTime createdAt; // When the chunk was created
|
||||||
|
QDateTime updatedAt; // When the chunk was last updated
|
||||||
|
QString content; // Content of the chunk
|
||||||
|
|
||||||
|
// Helper methods
|
||||||
|
int lineCount() const { return endLine - startLine + 1; }
|
||||||
|
bool isValid() const { return !filePath.isEmpty() && startLine >= 0 && endLine >= startLine; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class FileChunker : public QObject
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
public:
|
||||||
|
struct ChunkingConfig
|
||||||
|
{
|
||||||
|
int maxLinesPerChunk = 80; // Размер чанка (было 200)
|
||||||
|
int minLinesPerChunk = 40; // Минимальный размер для начала чанкинга
|
||||||
|
int overlapLines = 20; // Перекрытие между чанками
|
||||||
|
bool skipEmptyLines = true; // Пропускать пустые строки
|
||||||
|
bool preserveFunctions = true; // Сохранять функции целиком
|
||||||
|
bool preserveClasses = true; // Сохранять классы целиком
|
||||||
|
int batchSize = 10; // Количество файлов для параллельной обработки
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit FileChunker(QObject *parent = nullptr);
|
||||||
|
explicit FileChunker(const ChunkingConfig &config, QObject *parent = nullptr);
|
||||||
|
|
||||||
|
// Main chunking method
|
||||||
|
QFuture<QList<FileChunk>> chunkFiles(const QStringList &filePaths);
|
||||||
|
|
||||||
|
// Configuration
|
||||||
|
void setConfig(const ChunkingConfig &config);
|
||||||
|
ChunkingConfig config() const;
|
||||||
|
|
||||||
|
signals:
|
||||||
|
void progressUpdated(int processedFiles, int totalFiles);
|
||||||
|
void chunkingComplete();
|
||||||
|
void error(const QString &errorMessage);
|
||||||
|
|
||||||
|
private:
|
||||||
|
QList<FileChunk> processFile(const QString &filePath);
|
||||||
|
QList<FileChunk> createChunksForDocument(TextEditor::TextDocument *document);
|
||||||
|
void processNextBatch(
|
||||||
|
std::shared_ptr<QPromise<QList<FileChunk>>> promise,
|
||||||
|
const QStringList &files,
|
||||||
|
int startIndex);
|
||||||
|
|
||||||
|
ChunkingConfig m_config;
|
||||||
|
QString m_error;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace QodeAssist::Context
|
Loading…
x
Reference in New Issue
Block a user