fix: Improve support for code blocks without language (#108)

This makes it possible to represent code blocks in models that emit
their suggestion immediately after the ``` characters.
This commit is contained in:
Povilas Kanapickas 2025-03-07 16:30:22 +02:00 committed by GitHub
parent 9a5047618d
commit f94c79a5ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 265 additions and 88 deletions

View File

@ -1,5 +1,6 @@
/*
/*
* Copyright (C) 2024 Petr Mironychev
* Copyright (C) 2025 Povilas Kanapickas <povilas@radix.lt>
*
* This file is part of QodeAssist.
*
@ -18,41 +19,122 @@
*/
#include "CodeHandler.hpp"
#include <QFileInfo>
#include <QHash>
namespace QodeAssist {
QString CodeHandler::processText(QString text)
struct LanguageProperties
{
QString name;
QString commentStyle;
QVector<QString> namesFromModel;
QVector<QString> fileExtensions;
};
const QVector<LanguageProperties> &getKnownLanguages()
{
static QVector<LanguageProperties> knownLanguages = {
{"python", "#", {"python", "py"}, {"py"}},
{"lua", "--", {"lua"}, {"lua"}},
{"js", "//", {"js", "javascript"}, {"js", "jsx"}},
{"ts", "//", {"ts", "typescript"}, {"ts", "tsx"}},
{"c-like", "//", {"c", "c++", "cpp"}, {"c", "h", "cpp", "hpp"}},
{"java", "//", {"java"}, {"java"}},
{"c#", "//", {"cs", "csharp"}, {"cs"}},
{"php", "//", {"php"}, {"php"}},
{"ruby", "#", {"rb", "ruby"}, {"rb"}},
{"go", "//", {"go"}, {"go"}},
{"swift", "//", {"swift"}, {"swift"}},
{"kotlin", "//", {"kt", "kotlin"}, {"kt", "kotlin"}},
{"scala", "//", {"scala"}, {"scala"}},
{"r", "#", {"r"}, {"r"}},
{"shell", "#", {"shell", "bash", "sh"}, {"sh", "bash"}},
{"perl", "#", {"pl", "perl"}, {"pl"}},
{"hs", "--", {"hs", "haskell"}, {"hs"}},
};
return knownLanguages;
}
static QHash<QString, QString> buildLanguageToCommentPrefixMap()
{
QHash<QString, QString> result;
for (const auto &languageProps : getKnownLanguages()) {
result[languageProps.name] = languageProps.commentStyle;
}
return result;
}
static QHash<QString, QString> buildExtensionToLanguageMap()
{
QHash<QString, QString> result;
for (const auto &languageProps : getKnownLanguages()) {
for (const auto &extension : languageProps.fileExtensions) {
result[extension] = languageProps.name;
}
}
return result;
}
static QHash<QString, QString> buildModelLanguageNameToLanguageMap()
{
QHash<QString, QString> result;
for (const auto &languageProps : getKnownLanguages()) {
for (const auto &nameFromModel : languageProps.namesFromModel) {
result[nameFromModel] = languageProps.name;
}
}
return result;
}
QString CodeHandler::processText(QString text, QString currentFilePath)
{
QString result;
QStringList lines = text.split('\n');
bool inCodeBlock = false;
QString pendingComments;
QString currentLanguage;
auto currentFileExtension = QFileInfo(currentFilePath).suffix();
auto currentLanguage = detectLanguageFromExtension(currentFileExtension);
auto addPendingCommentsIfAny = [&]() {
if (pendingComments.isEmpty()) {
return;
}
QStringList commentLines = pendingComments.split('\n');
QString commentPrefix = getCommentPrefix(currentLanguage);
for (const QString &commentLine : commentLines) {
if (!commentLine.trimmed().isEmpty()) {
result += commentPrefix + " " + commentLine.trimmed() + "\n";
} else {
result += "\n";
}
}
pendingComments.clear();
};
for (const QString &line : lines) {
if (line.trimmed().startsWith("```")) {
if (!inCodeBlock) {
currentLanguage = detectLanguage(line);
auto lineLanguage = detectLanguageFromLine(line);
if (!lineLanguage.isEmpty()) {
currentLanguage = lineLanguage;
}
addPendingCommentsIfAny();
if (lineLanguage.isEmpty()) {
// language not detected, so add direct output from model, if any
result += line.trimmed().mid(3) + "\n"; // add the remainder of line after ```
}
}
inCodeBlock = !inCodeBlock;
continue;
}
if (inCodeBlock) {
if (!pendingComments.isEmpty()) {
QStringList commentLines = pendingComments.split('\n');
QString commentPrefix = getCommentPrefix(currentLanguage);
for (const QString &commentLine : commentLines) {
if (!commentLine.trimmed().isEmpty()) {
result += commentPrefix + " " + commentLine.trimmed() + "\n";
} else {
result += "\n";
}
}
pendingComments.clear();
}
result += line + "\n";
} else {
QString trimmed = line.trimmed();
@ -64,45 +146,27 @@ QString CodeHandler::processText(QString text)
}
}
if (!pendingComments.isEmpty()) {
QStringList commentLines = pendingComments.split('\n');
QString commentPrefix = getCommentPrefix(currentLanguage);
for (const QString &commentLine : commentLines) {
if (!commentLine.trimmed().isEmpty()) {
result += commentPrefix + " " + commentLine.trimmed() + "\n";
} else {
result += "\n";
}
}
}
addPendingCommentsIfAny();
return result;
}
QString CodeHandler::getCommentPrefix(const QString &language)
{
static const QHash<QString, QString> commentPrefixes
= {{"python", "#"}, {"py", "#"}, {"lua", "--"}, {"javascript", "//"},
{"js", "//"}, {"typescript", "//"}, {"ts", "//"}, {"cpp", "//"},
{"c++", "//"}, {"c", "//"}, {"java", "//"}, {"csharp", "//"},
{"cs", "//"}, {"php", "//"}, {"ruby", "#"}, {"rb", "#"},
{"rust", "//"}, {"rs", "//"}, {"go", "//"}, {"swift", "//"},
{"kotlin", "//"}, {"kt", "//"}, {"scala", "//"}, {"r", "#"},
{"shell", "#"}, {"bash", "#"}, {"sh", "#"}, {"perl", "#"},
{"pl", "#"}, {"haskell", "--"}, {"hs", "--"}};
return commentPrefixes.value(language.toLower(), "//");
static const auto commentPrefixes = buildLanguageToCommentPrefixMap();
return commentPrefixes.value(language, "//");
}
QString CodeHandler::detectLanguage(const QString &line)
QString CodeHandler::detectLanguageFromLine(const QString &line)
{
QString trimmed = line.trimmed();
if (trimmed.length() <= 3) { // Если только ```
return QString();
}
static const auto modelNameToLanguage = buildModelLanguageNameToLanguageMap();
return modelNameToLanguage.value(line.trimmed().mid(3).trimmed(), "");
}
return trimmed.mid(3).trimmed();
QString CodeHandler::detectLanguageFromExtension(const QString &extension)
{
static const auto extensionToLanguage = buildExtensionToLanguageMap();
return extensionToLanguage.value(extension.toLower(), "");
}
const QRegularExpression &CodeHandler::getFullCodeBlockRegex()

View File

@ -28,9 +28,17 @@ namespace QodeAssist {
class CodeHandler
{
public:
static QString processText(QString text);
static QString processText(QString text, QString currentFileName);
static QString detectLanguage(const QString &line);
/**
* Detects language from line, or returns empty string if this was not possible
*/
static QString detectLanguageFromLine(const QString &line);
/**
* Detects language file name, or returns empty string if this was not possible
*/
static QString detectLanguageFromExtension(const QString &extension);
private:
static QString getCommentPrefix(const QString &language);

View File

@ -37,6 +37,14 @@
namespace QodeAssist {
QString extractFilePathFromRequest(const QJsonObject &request)
{
QJsonObject params = request["params"].toObject();
QJsonObject doc = params["doc"].toObject();
QString uri = doc["uri"].toString();
return QUrl(uri).toLocalFile();
}
LLMClientInterface::LLMClientInterface()
: m_requestHandler(this)
{
@ -251,9 +259,8 @@ LLMCore::ContextData LLMClientInterface::prepareContext(
QJsonObject params = request["params"].toObject();
QJsonObject doc = params["doc"].toObject();
QJsonObject position = doc["position"].toObject();
QString uri = doc["uri"].toString();
Utils::FilePath filePath = Utils::FilePath::fromString(QUrl(uri).toLocalFile());
Utils::FilePath filePath = Utils::FilePath::fromString(extractFilePathFromRequest(request));
TextEditor::TextDocument *textDocument = TextEditor::TextDocument::textDocumentForFilePath(
filePath);
@ -296,7 +303,7 @@ void LLMClientInterface::sendCompletionToClient(
QString processedCompletion
= promptTemplate->type() == LLMCore::TemplateType::Chat
&& Settings::codeCompletionSettings().smartProcessInstuctText()
? CodeHandler::processText(completion)
? CodeHandler::processText(completion, extractFilePathFromRequest(request))
: completion;
completionItem[LanguageServerProtocol::textKey] = processedCompletion;

View File

@ -33,18 +33,66 @@ class CodeHandlerTest : public QObject, public testing::Test
Q_OBJECT
};
TEST_F(CodeHandlerTest, testProcessTextWithCodeBlock)
TEST_F(CodeHandlerTest, testProcessTextEmpty)
{
EXPECT_EQ(CodeHandler::processText("", "/file.py"), "\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithLanguageCodeBlock)
{
QString input = "This is a comment\n"
"```python\nprint('Hello, world!')\n```\n"
"Another comment";
EXPECT_EQ(
CodeHandler::processText(input),
CodeHandler::processText(input, "/file.py"),
"# This is a comment\n\nprint('Hello, world!')\n# Another comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithMultipleCodeBlocks)
TEST_F(CodeHandlerTest, testProcessTextWithPlainCodeBlockNoNewline)
{
QString input = "This is a comment\n"
"```print('Hello, world!')\n```\n"
"Another comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# This is a comment\n\nprint('Hello, world!')\n# Another comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithPlainCodeBlockWithNewline)
{
QString input = "This is a comment\n"
"```\nprint('Hello, world!')\n```\n"
"Another comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# This is a comment\n\n\nprint('Hello, world!')\n# Another comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextNoCommentsWithLanguageCodeBlock)
{
QString input = "```python\nprint('Hello, world!')\n```";
EXPECT_EQ(CodeHandler::processText(input, "/file.py"), "print('Hello, world!')\n");
}
TEST_F(CodeHandlerTest, testProcessTextNoCommentsWithPlainCodeBlockNoNewline)
{
QString input = "```print('Hello, world!')\n```";
EXPECT_EQ(CodeHandler::processText(input, "/file.py"), "print('Hello, world!')\n");
}
TEST_F(CodeHandlerTest, testProcessTextNoCommentsWithPlainCodeBlockWithNewline)
{
QString input = "```\nprint('Hello, world!')\n```";
EXPECT_EQ(CodeHandler::processText(input, "/file.py"), "\nprint('Hello, world!')\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithMultipleCodeBlocksDifferentLanguages)
{
QString input = "First comment\n```python\nprint('Block 1')\n"
"```\nMiddle comment\n"
@ -52,7 +100,7 @@ TEST_F(CodeHandlerTest, testProcessTextWithMultipleCodeBlocks)
"Last comment";
EXPECT_EQ(
CodeHandler::processText(input),
CodeHandler::processText(input, "/file.py"),
"# First comment\n\n"
"print('Block 1')\n"
"// Middle comment\n\n"
@ -60,13 +108,74 @@ TEST_F(CodeHandlerTest, testProcessTextWithMultipleCodeBlocks)
"// Last comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithMultipleCodeBlocksSameLanguage)
{
QString input = "First comment\n```python\nprint('Block 1')\n"
"```\nMiddle comment\n"
"```python\nprint('Block 2')\n```\n"
"Last comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# First comment\n\n"
"print('Block 1')\n"
"# Middle comment\n\n"
"print('Block 2')\n"
"# Last comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithMultiplePlainCodeBlocksWithNewline)
{
QString input = "First comment\n```\nprint('Block 1')\n"
"```\nMiddle comment\n"
"```\ncout << \"Block 2\";\n```\n"
"Last comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# First comment\n\n\n"
"print('Block 1')\n"
"# Middle comment\n\n\n"
"cout << \"Block 2\";\n"
"# Last comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithMultiplePlainCodeBlocksWithoutNewline)
{
QString input = "First comment\n```print('Block 1')\n"
"```\nMiddle comment\n"
"```cout << \"Block 2\";\n```\n"
"Last comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# First comment\n\n"
"print('Block 1')\n"
"# Middle comment\n\n"
"cout << \"Block 2\";\n"
"# Last comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithEmptyLines)
{
QString input = "Comment with empty line\n\n```python\nprint('Hello')\n```\n\nAnother comment";
EXPECT_EQ(
CodeHandler::processText(input),
"# Comment with empty line\n\n\nprint('Hello')\n\n# Another comment\n\n");
CodeHandler::processText(input, "/file.py"),
"# Comment with empty line\n\n\n"
"print('Hello')\n\n"
"# Another comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextPlainCodeBlockWithNewlineWithEmptyLines)
{
QString input = "Comment with empty line\n\n```\nprint('Hello')\n```\n\nAnother comment";
EXPECT_EQ(
CodeHandler::processText(input, "/file.py"),
"# Comment with empty line\n\n\n\n"
"print('Hello')\n\n"
"# Another comment\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithoutCodeBlock)
@ -74,31 +183,32 @@ TEST_F(CodeHandlerTest, testProcessTextWithoutCodeBlock)
QString input = "This is just a comment\nwith multiple lines";
EXPECT_EQ(
CodeHandler::processText(input), "// This is just a comment\n// with multiple lines\n\n");
CodeHandler::processText(input, "/file.py"),
"# This is just a comment\n# with multiple lines\n\n");
}
TEST_F(CodeHandlerTest, testProcessTextWithDifferentLanguages)
TEST_F(CodeHandlerTest, testDetectLanguageFromLine)
{
QString input = "Python code:\n"
"```python\nprint('Hello')\n```\n"
"JavaScript code:\n"
"```javascript\nconsole.log('Hello');\n```";
EXPECT_EQ(
CodeHandler::processText(input),
"# Python code:\n\nprint('Hello')\n"
"// JavaScript code:\n\nconsole.log('Hello');\n");
EXPECT_EQ(CodeHandler::detectLanguageFromLine("```python"), "python");
EXPECT_EQ(CodeHandler::detectLanguageFromLine("```javascript"), "js");
EXPECT_EQ(CodeHandler::detectLanguageFromLine("```cpp"), "c-like");
EXPECT_EQ(CodeHandler::detectLanguageFromLine("``` ruby "), "ruby");
EXPECT_EQ(CodeHandler::detectLanguageFromLine("```"), "");
EXPECT_EQ(CodeHandler::detectLanguageFromLine("``` "), "");
}
TEST_F(CodeHandlerTest, testDetectLanguage)
TEST_F(CodeHandlerTest, testDetectLanguageFromExtension)
{
EXPECT_EQ(CodeHandler::detectLanguage("```python"), "python");
EXPECT_EQ(CodeHandler::detectLanguage("```javascript"), "javascript");
EXPECT_EQ(CodeHandler::detectLanguage("```cpp"), "cpp");
EXPECT_EQ(CodeHandler::detectLanguage("``` ruby "), "ruby");
EXPECT_EQ(CodeHandler::detectLanguage("```"), "");
EXPECT_EQ(CodeHandler::detectLanguage("``` "), "");
EXPECT_EQ(CodeHandler::detectLanguageFromExtension("py"), "python");
EXPECT_EQ(CodeHandler::detectLanguageFromExtension("js"), "js");
EXPECT_EQ(CodeHandler::detectLanguageFromExtension("cpp"), "c-like");
EXPECT_EQ(CodeHandler::detectLanguageFromExtension("hpp"), "c-like");
EXPECT_EQ(CodeHandler::detectLanguageFromExtension("rb"), "ruby");
EXPECT_EQ(CodeHandler::detectLanguageFromExtension("sh"), "shell");
EXPECT_EQ(CodeHandler::detectLanguageFromExtension("unknown"), "");
EXPECT_EQ(CodeHandler::detectLanguageFromExtension(""), "");
}
TEST_F(CodeHandlerTest, testCommentPrefixForDifferentLanguages)
{
struct TestCase
@ -115,21 +225,9 @@ TEST_F(CodeHandlerTest, testCommentPrefixForDifferentLanguages)
{"lua", "Comment\n```lua\ncode\n```", "-- Comment\n\ncode\n"}};
for (const auto &testCase : testCases) {
EXPECT_EQ(CodeHandler::processText(testCase.input), testCase.expected)
EXPECT_EQ(CodeHandler::processText(testCase.input, ""), testCase.expected)
<< "Failed for language: " << testCase.language;
}
}
TEST_F(CodeHandlerTest, testEmptyInput)
{
EXPECT_EQ(CodeHandler::processText(""), "\n\n");
}
TEST_F(CodeHandlerTest, testCodeBlockWithoutLanguage)
{
QString input = "Comment\n```\ncode\n```";
EXPECT_EQ(CodeHandler::processText(input), "// Comment\n\ncode\n");
}
#include "CodeHandlerTest.moc"