fix: Improve support for code blocks without language (#108)

This makes it possible to represent code blocks in models that emit
their suggestion immediately after the ``` characters.
This commit is contained in:
Povilas Kanapickas 2025-03-07 16:30:22 +02:00 committed by GitHub
parent 9a5047618d
commit f94c79a5ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 265 additions and 88 deletions

View File

@ -1,5 +1,6 @@
/* /*
* Copyright (C) 2024 Petr Mironychev * Copyright (C) 2024 Petr Mironychev
* Copyright (C) 2025 Povilas Kanapickas <povilas@radix.lt>
* *
* This file is part of QodeAssist. * This file is part of QodeAssist.
* *
@ -18,41 +19,122 @@
*/ */
#include "CodeHandler.hpp" #include "CodeHandler.hpp"
#include <QFileInfo>
#include <QHash> #include <QHash>
namespace QodeAssist { namespace QodeAssist {
QString CodeHandler::processText(QString text) struct LanguageProperties
{
QString name;
QString commentStyle;
QVector<QString> namesFromModel;
QVector<QString> fileExtensions;
};
const QVector<LanguageProperties> &getKnownLanguages()
{
static QVector<LanguageProperties> knownLanguages = {
{"python", "#", {"python", "py"}, {"py"}},
{"lua", "--", {"lua"}, {"lua"}},
{"js", "//", {"js", "javascript"}, {"js", "jsx"}},
{"ts", "//", {"ts", "typescript"}, {"ts", "tsx"}},
{"c-like", "//", {"c", "c++", "cpp"}, {"c", "h", "cpp", "hpp"}},
{"java", "//", {"java"}, {"java"}},
{"c#", "//", {"cs", "csharp"}, {"cs"}},
{"php", "//", {"php"}, {"php"}},
{"ruby", "#", {"rb", "ruby"}, {"rb"}},
{"go", "//", {"go"}, {"go"}},
{"swift", "//", {"swift"}, {"swift"}},
{"kotlin", "//", {"kt", "kotlin"}, {"kt", "kotlin"}},
{"scala", "//", {"scala"}, {"scala"}},
{"r", "#", {"r"}, {"r"}},
{"shell", "#", {"shell", "bash", "sh"}, {"sh", "bash"}},
{"perl", "#", {"pl", "perl"}, {"pl"}},
{"hs", "--", {"hs", "haskell"}, {"hs"}},
};
return knownLanguages;
}
static QHash<QString, QString> buildLanguageToCommentPrefixMap()
{
QHash<QString, QString> result;
for (const auto &languageProps : getKnownLanguages()) {
result[languageProps.name] = languageProps.commentStyle;
}
return result;
}
static QHash<QString, QString> buildExtensionToLanguageMap()
{
QHash<QString, QString> result;
for (const auto &languageProps : getKnownLanguages()) {
for (const auto &extension : languageProps.fileExtensions) {
result[extension] = languageProps.name;
}
}
return result;
}
static QHash<QString, QString> buildModelLanguageNameToLanguageMap()
{
QHash<QString, QString> result;
for (const auto &languageProps : getKnownLanguages()) {
for (const auto &nameFromModel : languageProps.namesFromModel) {
result[nameFromModel] = languageProps.name;
}
}
return result;
}
QString CodeHandler::processText(QString text, QString currentFilePath)
{ {
QString result; QString result;
QStringList lines = text.split('\n'); QStringList lines = text.split('\n');
bool inCodeBlock = false; bool inCodeBlock = false;
QString pendingComments; QString pendingComments;
QString currentLanguage;
auto currentFileExtension = QFileInfo(currentFilePath).suffix();
auto currentLanguage = detectLanguageFromExtension(currentFileExtension);
auto addPendingCommentsIfAny = [&]() {
if (pendingComments.isEmpty()) {
return;
}
QStringList commentLines = pendingComments.split('\n');
QString commentPrefix = getCommentPrefix(currentLanguage);
for (const QString &commentLine : commentLines) {
if (!commentLine.trimmed().isEmpty()) {
result += commentPrefix + " " + commentLine.trimmed() + "\n";
} else {
result += "\n";
}
}
pendingComments.clear();
};
for (const QString &line : lines) { for (const QString &line : lines) {
if (line.trimmed().startsWith("```")) { if (line.trimmed().startsWith("```")) {
if (!inCodeBlock) { if (!inCodeBlock) {
currentLanguage = detectLanguage(line); auto lineLanguage = detectLanguageFromLine(line);
if (!lineLanguage.isEmpty()) {
currentLanguage = lineLanguage;
}
addPendingCommentsIfAny();
if (lineLanguage.isEmpty()) {
// language not detected, so add direct output from model, if any
result += line.trimmed().mid(3) + "\n"; // add the remainder of line after ```
}
} }
inCodeBlock = !inCodeBlock; inCodeBlock = !inCodeBlock;
continue; continue;
} }
if (inCodeBlock) { if (inCodeBlock) {
if (!pendingComments.isEmpty()) {
QStringList commentLines = pendingComments.split('\n');
QString commentPrefix = getCommentPrefix(currentLanguage);
for (const QString &commentLine : commentLines) {
if (!commentLine.trimmed().isEmpty()) {
result += commentPrefix + " " + commentLine.trimmed() + "\n";
} else {
result += "\n";
}
}
pendingComments.clear();
}
result += line + "\n"; result += line + "\n";
} else { } else {
QString trimmed = line.trimmed(); QString trimmed = line.trimmed();
@ -64,45 +146,27 @@ QString CodeHandler::processText(QString text)
} }
} }
if (!pendingComments.isEmpty()) { addPendingCommentsIfAny();
QStringList commentLines = pendingComments.split('\n');
QString commentPrefix = getCommentPrefix(currentLanguage);
for (const QString &commentLine : commentLines) {
if (!commentLine.trimmed().isEmpty()) {
result += commentPrefix + " " + commentLine.trimmed() + "\n";
} else {
result += "\n";
}
}
}
return result; return result;
} }
QString CodeHandler::getCommentPrefix(const QString &language) QString CodeHandler::getCommentPrefix(const QString &language)
{ {
static const QHash<QString, QString> commentPrefixes static const auto commentPrefixes = buildLanguageToCommentPrefixMap();
= {{"python", "#"}, {"py", "#"}, {"lua", "--"}, {"javascript", "//"}, return commentPrefixes.value(language, "//");
{"js", "//"}, {"typescript", "//"}, {"ts", "//"}, {"cpp", "//"},
{"c++", "//"}, {"c", "//"}, {"java", "//"}, {"csharp", "//"},
{"cs", "//"}, {"php", "//"}, {"ruby", "#"}, {"rb", "#"},
{"rust", "//"}, {"rs", "//"}, {"go", "//"}, {"swift", "//"},
{"kotlin", "//"}, {"kt", "//"}, {"scala", "//"}, {"r", "#"},
{"shell", "#"}, {"bash", "#"}, {"sh", "#"}, {"perl", "#"},
{"pl", "#"}, {"haskell", "--"}, {"hs", "--"}};
return commentPrefixes.value(language.toLower(), "//");
} }
QString CodeHandler::detectLanguage(const QString &line) QString CodeHandler::detectLanguageFromLine(const QString &line)
{ {
QString trimmed = line.trimmed(); static const auto modelNameToLanguage = buildModelLanguageNameToLanguageMap();
if (trimmed.length() <= 3) { // Если только ``` return modelNameToLanguage.value(line.trimmed().mid(3).trimmed(), "");
return QString(); }
}
return trimmed.mid(3).trimmed(); QString CodeHandler::detectLanguageFromExtension(const QString &extension)
{
static const auto extensionToLanguage = buildExtensionToLanguageMap();
return extensionToLanguage.value(extension.toLower(), "");
} }
const QRegularExpression &CodeHandler::getFullCodeBlockRegex() const QRegularExpression &CodeHandler::getFullCodeBlockRegex()

View File

@ -28,9 +28,17 @@ namespace QodeAssist {
class CodeHandler class CodeHandler
{ {
public: public:
static QString processText(QString text); static QString processText(QString text, QString currentFileName);
static QString detectLanguage(const QString &line); /**
* Detects language from line, or returns empty string if this was not possible
*/
static QString detectLanguageFromLine(const QString &line);
/**
* Detects language file name, or returns empty string if this was not possible
*/
static QString detectLanguageFromExtension(const QString &extension);
private: private:
static QString getCommentPrefix(const QString &language); static QString getCommentPrefix(const QString &language);

View File

@ -37,6 +37,14 @@
namespace QodeAssist { namespace QodeAssist {
QString extractFilePathFromRequest(const QJsonObject &request)
{
QJsonObject params = request["params"].toObject();
QJsonObject doc = params["doc"].toObject();
QString uri = doc["uri"].toString();
return QUrl(uri).toLocalFile();
}
LLMClientInterface::LLMClientInterface() LLMClientInterface::LLMClientInterface()
: m_requestHandler(this) : m_requestHandler(this)
{ {
@ -251,9 +259,8 @@ LLMCore::ContextData LLMClientInterface::prepareContext(
QJsonObject params = request["params"].toObject(); QJsonObject params = request["params"].toObject();
QJsonObject doc = params["doc"].toObject(); QJsonObject doc = params["doc"].toObject();
QJsonObject position = doc["position"].toObject(); QJsonObject position = doc["position"].toObject();
QString uri = doc["uri"].toString();
Utils::FilePath filePath = Utils::FilePath::fromString(QUrl(uri).toLocalFile()); Utils::FilePath filePath = Utils::FilePath::fromString(extractFilePathFromRequest(request));
TextEditor::TextDocument *textDocument = TextEditor::TextDocument::textDocumentForFilePath( TextEditor::TextDocument *textDocument = TextEditor::TextDocument::textDocumentForFilePath(
filePath); filePath);
@ -296,7 +303,7 @@ void LLMClientInterface::sendCompletionToClient(
QString processedCompletion QString processedCompletion
= promptTemplate->type() == LLMCore::TemplateType::Chat = promptTemplate->type() == LLMCore::TemplateType::Chat
&& Settings::codeCompletionSettings().smartProcessInstuctText() && Settings::codeCompletionSettings().smartProcessInstuctText()
? CodeHandler::processText(completion) ? CodeHandler::processText(completion, extractFilePathFromRequest(request))
: completion; : completion;
completionItem[LanguageServerProtocol::textKey] = processedCompletion; completionItem[LanguageServerProtocol::textKey] = processedCompletion;

View File

@ -33,18 +33,66 @@ class CodeHandlerTest : public QObject, public testing::Test
Q_OBJECT Q_OBJECT
}; };
TEST_F(CodeHandlerTest, testProcessTextWithCodeBlock) TEST_F(CodeHandlerTest, testProcessTextEmpty)
{
EXPECT_EQ(CodeHandler::processText("", "/file.py"), "\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithLanguageCodeBlock)
{ {
QString input = "This is a comment\n" QString input = "This is a comment\n"
"```python\nprint('Hello, world!')\n```\n" "```python\nprint('Hello, world!')\n```\n"
"Another comment"; "Another comment";
EXPECT_EQ( EXPECT_EQ(
CodeHandler::processText(input), CodeHandler::processText(input, "/file.py"),
"# This is a comment\n\nprint('Hello, world!')\n# Another comment\n\n"); "# This is a comment\n\nprint('Hello, world!')\n# Another comment\n\n");
} }
TEST_F(CodeHandlerTest, testProcessTextWithMultipleCodeBlocks) TEST_F(CodeHandlerTest, testProcessTextWithPlainCodeBlockNoNewline)
{
QString input = "This is a comment\n"
"```print('Hello, world!')\n```\n"
"Another comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# This is a comment\n\nprint('Hello, world!')\n# Another comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithPlainCodeBlockWithNewline)
{
QString input = "This is a comment\n"
"```\nprint('Hello, world!')\n```\n"
"Another comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# This is a comment\n\n\nprint('Hello, world!')\n# Another comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextNoCommentsWithLanguageCodeBlock)
{
QString input = "```python\nprint('Hello, world!')\n```";
EXPECT_EQ(CodeHandler::processText(input, "/file.py"), "print('Hello, world!')\n");
}
TEST_F(CodeHandlerTest, testProcessTextNoCommentsWithPlainCodeBlockNoNewline)
{
QString input = "```print('Hello, world!')\n```";
EXPECT_EQ(CodeHandler::processText(input, "/file.py"), "print('Hello, world!')\n");
}
TEST_F(CodeHandlerTest, testProcessTextNoCommentsWithPlainCodeBlockWithNewline)
{
QString input = "```\nprint('Hello, world!')\n```";
EXPECT_EQ(CodeHandler::processText(input, "/file.py"), "\nprint('Hello, world!')\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithMultipleCodeBlocksDifferentLanguages)
{ {
QString input = "First comment\n```python\nprint('Block 1')\n" QString input = "First comment\n```python\nprint('Block 1')\n"
"```\nMiddle comment\n" "```\nMiddle comment\n"
@ -52,7 +100,7 @@ TEST_F(CodeHandlerTest, testProcessTextWithMultipleCodeBlocks)
"Last comment"; "Last comment";
EXPECT_EQ( EXPECT_EQ(
CodeHandler::processText(input), CodeHandler::processText(input, "/file.py"),
"# First comment\n\n" "# First comment\n\n"
"print('Block 1')\n" "print('Block 1')\n"
"// Middle comment\n\n" "// Middle comment\n\n"
@ -60,13 +108,74 @@ TEST_F(CodeHandlerTest, testProcessTextWithMultipleCodeBlocks)
"// Last comment\n\n"); "// Last comment\n\n");
} }
TEST_F(CodeHandlerTest, testProcessTextWithMultipleCodeBlocksSameLanguage)
{
QString input = "First comment\n```python\nprint('Block 1')\n"
"```\nMiddle comment\n"
"```python\nprint('Block 2')\n```\n"
"Last comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# First comment\n\n"
"print('Block 1')\n"
"# Middle comment\n\n"
"print('Block 2')\n"
"# Last comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithMultiplePlainCodeBlocksWithNewline)
{
QString input = "First comment\n```\nprint('Block 1')\n"
"```\nMiddle comment\n"
"```\ncout << \"Block 2\";\n```\n"
"Last comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# First comment\n\n\n"
"print('Block 1')\n"
"# Middle comment\n\n\n"
"cout << \"Block 2\";\n"
"# Last comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithMultiplePlainCodeBlocksWithoutNewline)
{
QString input = "First comment\n```print('Block 1')\n"
"```\nMiddle comment\n"
"```cout << \"Block 2\";\n```\n"
"Last comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# First comment\n\n"
"print('Block 1')\n"
"# Middle comment\n\n"
"cout << \"Block 2\";\n"
"# Last comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithEmptyLines) TEST_F(CodeHandlerTest, testProcessTextWithEmptyLines)
{ {
QString input = "Comment with empty line\n\n```python\nprint('Hello')\n```\n\nAnother comment"; QString input = "Comment with empty line\n\n```python\nprint('Hello')\n```\n\nAnother comment";
EXPECT_EQ( EXPECT_EQ(
CodeHandler::processText(input), CodeHandler::processText(input, "/file.py"),
"# Comment with empty line\n\n\nprint('Hello')\n\n# Another comment\n\n"); "# Comment with empty line\n\n\n"
"print('Hello')\n\n"
"# Another comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextPlainCodeBlockWithNewlineWithEmptyLines)
{
QString input = "Comment with empty line\n\n```\nprint('Hello')\n```\n\nAnother comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# Comment with empty line\n\n\n\n"
"print('Hello')\n\n"
"# Another comment\n\n");
} }
TEST_F(CodeHandlerTest, testProcessTextWithoutCodeBlock) TEST_F(CodeHandlerTest, testProcessTextWithoutCodeBlock)
@ -74,31 +183,32 @@ TEST_F(CodeHandlerTest, testProcessTextWithoutCodeBlock)
QString input = "This is just a comment\nwith multiple lines"; QString input = "This is just a comment\nwith multiple lines";
EXPECT_EQ( EXPECT_EQ(
CodeHandler::processText(input), "// This is just a comment\n// with multiple lines\n\n"); CodeHandler::processText(input, "/file.py"),
"# This is just a comment\n# with multiple lines\n\n");
} }
TEST_F(CodeHandlerTest, testProcessTextWithDifferentLanguages) TEST_F(CodeHandlerTest, testDetectLanguageFromLine)
{ {
QString input = "Python code:\n" EXPECT_EQ(CodeHandler::detectLanguageFromLine("```python"), "python");
"```python\nprint('Hello')\n```\n" EXPECT_EQ(CodeHandler::detectLanguageFromLine("```javascript"), "js");
"JavaScript code:\n" EXPECT_EQ(CodeHandler::detectLanguageFromLine("```cpp"), "c-like");
"```javascript\nconsole.log('Hello');\n```"; EXPECT_EQ(CodeHandler::detectLanguageFromLine("``` ruby "), "ruby");
EXPECT_EQ(CodeHandler::detectLanguageFromLine("```"), "");
EXPECT_EQ( EXPECT_EQ(CodeHandler::detectLanguageFromLine("``` "), "");
CodeHandler::processText(input),
"# Python code:\n\nprint('Hello')\n"
"// JavaScript code:\n\nconsole.log('Hello');\n");
} }
TEST_F(CodeHandlerTest, testDetectLanguage) TEST_F(CodeHandlerTest, testDetectLanguageFromExtension)
{ {
EXPECT_EQ(CodeHandler::detectLanguage("```python"), "python"); EXPECT_EQ(CodeHandler::detectLanguageFromExtension("py"), "python");
EXPECT_EQ(CodeHandler::detectLanguage("```javascript"), "javascript"); EXPECT_EQ(CodeHandler::detectLanguageFromExtension("js"), "js");
EXPECT_EQ(CodeHandler::detectLanguage("```cpp"), "cpp"); EXPECT_EQ(CodeHandler::detectLanguageFromExtension("cpp"), "c-like");
EXPECT_EQ(CodeHandler::detectLanguage("``` ruby "), "ruby"); EXPECT_EQ(CodeHandler::detectLanguageFromExtension("hpp"), "c-like");
EXPECT_EQ(CodeHandler::detectLanguage("```"), ""); EXPECT_EQ(CodeHandler::detectLanguageFromExtension("rb"), "ruby");
EXPECT_EQ(CodeHandler::detectLanguage("``` "), ""); EXPECT_EQ(CodeHandler::detectLanguageFromExtension("sh"), "shell");
EXPECT_EQ(CodeHandler::detectLanguageFromExtension("unknown"), "");
EXPECT_EQ(CodeHandler::detectLanguageFromExtension(""), "");
} }
TEST_F(CodeHandlerTest, testCommentPrefixForDifferentLanguages) TEST_F(CodeHandlerTest, testCommentPrefixForDifferentLanguages)
{ {
struct TestCase struct TestCase
@ -115,21 +225,9 @@ TEST_F(CodeHandlerTest, testCommentPrefixForDifferentLanguages)
{"lua", "Comment\n```lua\ncode\n```", "-- Comment\n\ncode\n"}}; {"lua", "Comment\n```lua\ncode\n```", "-- Comment\n\ncode\n"}};
for (const auto &testCase : testCases) { for (const auto &testCase : testCases) {
EXPECT_EQ(CodeHandler::processText(testCase.input), testCase.expected) EXPECT_EQ(CodeHandler::processText(testCase.input, ""), testCase.expected)
<< "Failed for language: " << testCase.language; << "Failed for language: " << testCase.language;
} }
} }
TEST_F(CodeHandlerTest, testEmptyInput)
{
EXPECT_EQ(CodeHandler::processText(""), "\n\n");
}
TEST_F(CodeHandlerTest, testCodeBlockWithoutLanguage)
{
QString input = "Comment\n```\ncode\n```";
EXPECT_EQ(CodeHandler::processText(input), "// Comment\n\ncode\n");
}
#include "CodeHandlerTest.moc" #include "CodeHandlerTest.moc"