From 61aae8e3bae16cbd8eb26e71024f132395cad8dd Mon Sep 17 00:00:00 2001 From: Petr Mironychev <9195189+Palm1r@users.noreply.github.com> Date: Thu, 29 Aug 2024 00:28:54 +0200 Subject: [PATCH] Remove copyright from request --- DocumentContextReader.cpp | 148 +++++++++++++++++++++++--------------- DocumentContextReader.hpp | 17 +++-- LLMClientInterface.cpp | 5 ++ QodeAssistSettings.cpp | 2 +- 4 files changed, 110 insertions(+), 62 deletions(-) diff --git a/DocumentContextReader.cpp b/DocumentContextReader.cpp index dd0e394..056857c 100644 --- a/DocumentContextReader.cpp +++ b/DocumentContextReader.cpp @@ -27,6 +27,13 @@ namespace QodeAssist { +DocumentContextReader::DocumentContextReader(TextEditor::TextDocument *textDocument) + : m_textDocument(textDocument) + , m_document(textDocument->document()) +{ + m_copyrightInfo = findCopyright(); +} + QString DocumentContextReader::getLineText(int lineNumber, int cursorPosition) const { if (!m_document || lineNumber < 0) @@ -55,76 +62,41 @@ QString DocumentContextReader::getContextBefore(int lineNumber, int cursorPosition, int linesCount) const { - QString context; - for (int i = qMax(0, lineNumber - linesCount); i <= lineNumber; ++i) { - QString line = getLineText(i, i == lineNumber ? cursorPosition : -1); - context += line; - if (i < lineNumber) - context += "\n"; + int effectiveStartLine; + if (m_copyrightInfo.found) { + effectiveStartLine = qMax(m_copyrightInfo.endLine + 1, lineNumber - linesCount); + } else { + effectiveStartLine = qMax(0, lineNumber - linesCount); } - return context; + + return getContextBetween(effectiveStartLine, lineNumber, cursorPosition); } QString DocumentContextReader::getContextAfter(int lineNumber, int cursorPosition, int linesCount) const { - QString context; - int maxLine = lineNumber + linesCount; - for (int i = lineNumber; i <= maxLine; ++i) { - QString line = getLineText(i); - if (i == lineNumber && cursorPosition >= 0) { - line = line.mid(cursorPosition); - } - context += line; - if (i < maxLine && !line.isEmpty()) - context += "\n"; - } - return context; + int endLine = qMin(m_document->blockCount() - 1, lineNumber + linesCount); + return getContextBetween(lineNumber + 1, endLine, cursorPosition); } QString DocumentContextReader::readWholeFileBefore(int lineNumber, int cursorPosition) const { - QString content; - QTextBlock block = m_document->begin(); - int currentLine = 0; - - while (block.isValid() && currentLine <= lineNumber) { - if (currentLine == lineNumber) { - content += block.text().left(cursorPosition); - break; - } else { - content += block.text() + "\n"; - } - block = block.next(); - currentLine++; + int startLine = 0; + if (m_copyrightInfo.found) { + startLine = m_copyrightInfo.endLine + 1; } - return content; + startLine = qMin(startLine, lineNumber); + + QString result = getContextBetween(startLine, lineNumber, cursorPosition); + + return result; } QString DocumentContextReader::readWholeFileAfter(int lineNumber, int cursorPosition) const { - QString content; - QTextBlock block = m_document->begin(); - int currentLine = 0; - - while (block.isValid() && currentLine < lineNumber) { - block = block.next(); - currentLine++; - } - - while (block.isValid()) { - if (currentLine == lineNumber) { - content += block.text().mid(cursorPosition) + "\n"; - } else { - content += block.text() + "\n"; - } - block = block.next(); - currentLine++; - } - - return content.trimmed(); + return getContextBetween(lineNumber, m_document->blockCount() - 1, cursorPosition); } QString DocumentContextReader::getLanguageAndFileInfo() const @@ -139,10 +111,7 @@ QString DocumentContextReader::getLanguageAndFileInfo() const QString fileExtension = QFileInfo(filePath).suffix(); return QString("//Language: %1 (MIME: %2) filepath: %3(%4)\n\n") - .arg(language) - .arg(mimeType) - .arg(filePath) - .arg(fileExtension); + .arg(language, mimeType, filePath, fileExtension); } QString DocumentContextReader::getSpecificInstructions() const @@ -152,4 +121,69 @@ QString DocumentContextReader::getSpecificInstructions() const return QString("//Instructions: %1").arg(specificInstruction); } +CopyrightInfo DocumentContextReader::findCopyright() +{ + CopyrightInfo result = {-1, -1, false}; + + QString text = m_document->toPlainText(); + QRegularExpressionMatchIterator matchIterator = getCopyrightRegex().globalMatch(text); + + QList copyrightBlocks; + + while (matchIterator.hasNext()) { + QRegularExpressionMatch match = matchIterator.next(); + int startPos = match.capturedStart(); + int endPos = match.capturedEnd(); + + CopyrightInfo info; + info.startLine = m_document->findBlock(startPos).blockNumber(); + info.endLine = m_document->findBlock(endPos).blockNumber(); + info.found = true; + + copyrightBlocks.append(info); + } + + for (int i = 0; i < copyrightBlocks.size() - 1; ++i) { + if (copyrightBlocks[i].endLine + 1 >= copyrightBlocks[i + 1].startLine) { + copyrightBlocks[i].endLine = copyrightBlocks[i + 1].endLine; + copyrightBlocks.removeAt(i + 1); + --i; + } + } + + if (!copyrightBlocks.isEmpty()) { // temproary solution, need cache + return copyrightBlocks.first(); + } + + return result; +} + +QString DocumentContextReader::getContextBetween(int startLine, + int endLine, + int cursorPosition) const +{ + QString context; + for (int i = startLine; i <= endLine; ++i) { + QTextBlock block = m_document->findBlockByNumber(i); + if (!block.isValid()) { + break; + } + if (i == endLine) { + context += block.text().left(cursorPosition); + } else { + context += block.text() + "\n"; + } + } + + return context; +} + +const QRegularExpression &DocumentContextReader::getCopyrightRegex() +{ + static const QRegularExpression copyrightRegex( + R"((?:/\*[\s\S]*?Copyright[\s\S]*?\*/| // Copyright[\s\S]*?(?:\n\s*//.*)*|///.*Copyright[\s\S]*?(?:\n\s*///.*)*)|(?://))", + QRegularExpression::MultilineOption | QRegularExpression::CaseInsensitiveOption); + return copyrightRegex; +} + } // namespace QodeAssist diff --git a/DocumentContextReader.hpp b/DocumentContextReader.hpp index 7a2c229..b43a9f4 100644 --- a/DocumentContextReader.hpp +++ b/DocumentContextReader.hpp @@ -24,13 +24,17 @@ namespace QodeAssist { +struct CopyrightInfo +{ + int startLine; + int endLine; + bool found; +}; + class DocumentContextReader { public: - DocumentContextReader(TextEditor::TextDocument *textDocument) - : m_textDocument(textDocument) - , m_document(textDocument->document()) - {} + DocumentContextReader(TextEditor::TextDocument *textDocument); QString getLineText(int lineNumber, int cursorPosition = -1) const; QString getContextBefore(int lineNumber, int cursorPosition, int linesCount) const; @@ -39,10 +43,15 @@ public: QString readWholeFileAfter(int lineNumber, int cursorPosition) const; QString getLanguageAndFileInfo() const; QString getSpecificInstructions() const; + CopyrightInfo findCopyright(); + QString getContextBetween(int startLine, int endLine, int cursorPosition) const; private: TextEditor::TextDocument *m_textDocument; QTextDocument *m_document; + CopyrightInfo m_copyrightInfo; + + static const QRegularExpression &getCopyrightRegex(); }; } // namespace QodeAssist diff --git a/LLMClientInterface.cpp b/LLMClientInterface.cpp index 064c9ef..dd2d3a7 100644 --- a/LLMClientInterface.cpp +++ b/LLMClientInterface.cpp @@ -101,6 +101,9 @@ QString LLMClientInterface::сontextBefore(TextEditor::TextEditorWidget *widget, DocumentContextReader reader(widget->textDocument()); QString languageAndFileInfo = reader.getLanguageAndFileInfo(); + if (lineNumber < reader.findCopyright().endLine) + return QString(); + QString contextBefore; if (settings().readFullFile()) { contextBefore = reader.readWholeFileBefore(lineNumber, cursorPosition); @@ -124,6 +127,8 @@ QString LLMClientInterface::сontextAfter(TextEditor::TextEditorWidget *widget, return QString(); DocumentContextReader reader(widget->textDocument()); + if (lineNumber < reader.findCopyright().endLine) + return QString(); QString contextAfter; if (settings().readFullFile()) { diff --git a/QodeAssistSettings.cpp b/QodeAssistSettings.cpp index becb576..9fd5437 100644 --- a/QodeAssistSettings.cpp +++ b/QodeAssistSettings.cpp @@ -211,12 +211,12 @@ QodeAssistSettings::QodeAssistSettings() Form{Column{Row{selectModels, modelName}}}}, Group{title(Tr::tr("FIM Prompt Settings")), Form{Column{fimPrompts, - readFullFile, maxFileThreshold, ollamaLivetime, specificInstractions, temperature, maxTokens, + readFullFile, readStringsBeforeCursor, readStringsAfterCursor, startSuggestionTimer,