fix: Improve support for code blocks without language (#108)

This makes it possible to represent code blocks in models that emit
their suggestion immediately after the ``` characters.
This commit is contained in:
Povilas Kanapickas
2025-03-07 16:30:22 +02:00
committed by GitHub
parent 9a5047618d
commit f94c79a5ff
4 changed files with 265 additions and 88 deletions

View File

@ -1,5 +1,6 @@
/*
/*
* Copyright (C) 2024 Petr Mironychev
* Copyright (C) 2025 Povilas Kanapickas <povilas@radix.lt>
*
* This file is part of QodeAssist.
*
@ -18,41 +19,122 @@
*/
#include "CodeHandler.hpp"
#include <QFileInfo>
#include <QHash>
namespace QodeAssist {
QString CodeHandler::processText(QString text)
struct LanguageProperties
{
QString name;
QString commentStyle;
QVector<QString> namesFromModel;
QVector<QString> fileExtensions;
};
const QVector<LanguageProperties> &getKnownLanguages()
{
static QVector<LanguageProperties> knownLanguages = {
{"python", "#", {"python", "py"}, {"py"}},
{"lua", "--", {"lua"}, {"lua"}},
{"js", "//", {"js", "javascript"}, {"js", "jsx"}},
{"ts", "//", {"ts", "typescript"}, {"ts", "tsx"}},
{"c-like", "//", {"c", "c++", "cpp"}, {"c", "h", "cpp", "hpp"}},
{"java", "//", {"java"}, {"java"}},
{"c#", "//", {"cs", "csharp"}, {"cs"}},
{"php", "//", {"php"}, {"php"}},
{"ruby", "#", {"rb", "ruby"}, {"rb"}},
{"go", "//", {"go"}, {"go"}},
{"swift", "//", {"swift"}, {"swift"}},
{"kotlin", "//", {"kt", "kotlin"}, {"kt", "kotlin"}},
{"scala", "//", {"scala"}, {"scala"}},
{"r", "#", {"r"}, {"r"}},
{"shell", "#", {"shell", "bash", "sh"}, {"sh", "bash"}},
{"perl", "#", {"pl", "perl"}, {"pl"}},
{"hs", "--", {"hs", "haskell"}, {"hs"}},
};
return knownLanguages;
}
static QHash<QString, QString> buildLanguageToCommentPrefixMap()
{
QHash<QString, QString> result;
for (const auto &languageProps : getKnownLanguages()) {
result[languageProps.name] = languageProps.commentStyle;
}
return result;
}
static QHash<QString, QString> buildExtensionToLanguageMap()
{
QHash<QString, QString> result;
for (const auto &languageProps : getKnownLanguages()) {
for (const auto &extension : languageProps.fileExtensions) {
result[extension] = languageProps.name;
}
}
return result;
}
static QHash<QString, QString> buildModelLanguageNameToLanguageMap()
{
QHash<QString, QString> result;
for (const auto &languageProps : getKnownLanguages()) {
for (const auto &nameFromModel : languageProps.namesFromModel) {
result[nameFromModel] = languageProps.name;
}
}
return result;
}
QString CodeHandler::processText(QString text, QString currentFilePath)
{
QString result;
QStringList lines = text.split('\n');
bool inCodeBlock = false;
QString pendingComments;
QString currentLanguage;
auto currentFileExtension = QFileInfo(currentFilePath).suffix();
auto currentLanguage = detectLanguageFromExtension(currentFileExtension);
auto addPendingCommentsIfAny = [&]() {
if (pendingComments.isEmpty()) {
return;
}
QStringList commentLines = pendingComments.split('\n');
QString commentPrefix = getCommentPrefix(currentLanguage);
for (const QString &commentLine : commentLines) {
if (!commentLine.trimmed().isEmpty()) {
result += commentPrefix + " " + commentLine.trimmed() + "\n";
} else {
result += "\n";
}
}
pendingComments.clear();
};
for (const QString &line : lines) {
if (line.trimmed().startsWith("```")) {
if (!inCodeBlock) {
currentLanguage = detectLanguage(line);
auto lineLanguage = detectLanguageFromLine(line);
if (!lineLanguage.isEmpty()) {
currentLanguage = lineLanguage;
}
addPendingCommentsIfAny();
if (lineLanguage.isEmpty()) {
// language not detected, so add direct output from model, if any
result += line.trimmed().mid(3) + "\n"; // add the remainder of line after ```
}
}
inCodeBlock = !inCodeBlock;
continue;
}
if (inCodeBlock) {
if (!pendingComments.isEmpty()) {
QStringList commentLines = pendingComments.split('\n');
QString commentPrefix = getCommentPrefix(currentLanguage);
for (const QString &commentLine : commentLines) {
if (!commentLine.trimmed().isEmpty()) {
result += commentPrefix + " " + commentLine.trimmed() + "\n";
} else {
result += "\n";
}
}
pendingComments.clear();
}
result += line + "\n";
} else {
QString trimmed = line.trimmed();
@ -64,45 +146,27 @@ QString CodeHandler::processText(QString text)
}
}
if (!pendingComments.isEmpty()) {
QStringList commentLines = pendingComments.split('\n');
QString commentPrefix = getCommentPrefix(currentLanguage);
for (const QString &commentLine : commentLines) {
if (!commentLine.trimmed().isEmpty()) {
result += commentPrefix + " " + commentLine.trimmed() + "\n";
} else {
result += "\n";
}
}
}
addPendingCommentsIfAny();
return result;
}
QString CodeHandler::getCommentPrefix(const QString &language)
{
static const QHash<QString, QString> commentPrefixes
= {{"python", "#"}, {"py", "#"}, {"lua", "--"}, {"javascript", "//"},
{"js", "//"}, {"typescript", "//"}, {"ts", "//"}, {"cpp", "//"},
{"c++", "//"}, {"c", "//"}, {"java", "//"}, {"csharp", "//"},
{"cs", "//"}, {"php", "//"}, {"ruby", "#"}, {"rb", "#"},
{"rust", "//"}, {"rs", "//"}, {"go", "//"}, {"swift", "//"},
{"kotlin", "//"}, {"kt", "//"}, {"scala", "//"}, {"r", "#"},
{"shell", "#"}, {"bash", "#"}, {"sh", "#"}, {"perl", "#"},
{"pl", "#"}, {"haskell", "--"}, {"hs", "--"}};
return commentPrefixes.value(language.toLower(), "//");
static const auto commentPrefixes = buildLanguageToCommentPrefixMap();
return commentPrefixes.value(language, "//");
}
QString CodeHandler::detectLanguage(const QString &line)
QString CodeHandler::detectLanguageFromLine(const QString &line)
{
QString trimmed = line.trimmed();
if (trimmed.length() <= 3) { // Если только ```
return QString();
}
static const auto modelNameToLanguage = buildModelLanguageNameToLanguageMap();
return modelNameToLanguage.value(line.trimmed().mid(3).trimmed(), "");
}
return trimmed.mid(3).trimmed();
QString CodeHandler::detectLanguageFromExtension(const QString &extension)
{
static const auto extensionToLanguage = buildExtensionToLanguageMap();
return extensionToLanguage.value(extension.toLower(), "");
}
const QRegularExpression &CodeHandler::getFullCodeBlockRegex()