feat: Add image support for Claude, OpenAI and Google (#268)

* feat: Add image support for Claude * feat: Add images support for OpenAI * feat: Add support images for google ai * refactor: Separate ImageComponent * feat: Add attach image button * feat: Add support image for Mistral provider * feat: Add support images for OpenAI compatible providers * feat: Add support images for Ollama
2025-11-22 02:22:44 -05:00 · 2025-11-20 15:49:39 +01:00
parent ce9e2717d6
commit 55b6080273
41 changed files with 860 additions and 93 deletions
--- a/templates/Claude.hpp
+++ b/templates/Claude.hpp
@ -41,34 +41,55 @@ public:

        if (context.history) {
            for (const auto &msg : context.history.value()) {
-                if (msg.role != "system") {
-                    // Handle thinking blocks with structured content
-                    if (msg.isThinking) {
-                        // Create content array with thinking block
-                        QJsonArray content;
-                        QJsonObject thinkingBlock;
-                        thinkingBlock["type"] = msg.isRedacted ? "redacted_thinking" : "thinking";
-                        
-                        // Extract actual thinking text (remove display signature)
-                        QString thinkingText = msg.content;
-                        int signaturePos = thinkingText.indexOf("\n[Signature: ");
-                        if (signaturePos != -1) {
-                            thinkingText = thinkingText.left(signaturePos);
-                        }
-                        
-                        if (!msg.isRedacted) {
-                            thinkingBlock["thinking"] = thinkingText;
-                        }
-                        if (!msg.signature.isEmpty()) {
-                            thinkingBlock["signature"] = msg.signature;
-                        }
-                        content.append(thinkingBlock);
-                        
-                        messages.append(QJsonObject{{"role", "assistant"}, {"content", content}});
-                    } else {
-                        // Normal message
-                        messages.append(QJsonObject{{"role", msg.role}, {"content", msg.content}});
+                if (msg.role == "system") continue;
+                
+                if (msg.isThinking) {
+                    QJsonArray content;
+                    QJsonObject thinkingBlock;
+                    thinkingBlock["type"] = msg.isRedacted ? "redacted_thinking" : "thinking";
+                    
+                    QString thinkingText = msg.content;
+                    int signaturePos = thinkingText.indexOf("\n[Signature: ");
+                    if (signaturePos != -1) {
+                        thinkingText = thinkingText.left(signaturePos);
                    }
+                    
+                    if (!msg.isRedacted) {
+                        thinkingBlock["thinking"] = thinkingText;
+                    }
+                    if (!msg.signature.isEmpty()) {
+                        thinkingBlock["signature"] = msg.signature;
+                    }
+                    content.append(thinkingBlock);
+                    
+                    messages.append(QJsonObject{{"role", "assistant"}, {"content", content}});
+                } else if (msg.images && !msg.images->isEmpty()) {
+                    QJsonArray content;
+                    
+                    if (!msg.content.isEmpty()) {
+                        content.append(QJsonObject{{"type", "text"}, {"text", msg.content}});
+                    }
+                    
+                    for (const auto &image : msg.images.value()) {
+                        QJsonObject imageBlock;
+                        imageBlock["type"] = "image";
+                        
+                        QJsonObject source;
+                        if (image.isUrl) {
+                            source["type"] = "url";
+                            source["url"] = image.data;
+                        } else {
+                            source["type"] = "base64";
+                            source["media_type"] = image.mediaType;
+                            source["data"] = image.data;
+                        }
+                        imageBlock["source"] = source;
+                        content.append(imageBlock);
+                    }
+                    
+                    messages.append(QJsonObject{{"role", msg.role}, {"content", content}});
+                } else {
+                    messages.append(QJsonObject{{"role", msg.role}, {"content", msg.content}});
                }
            }
        }
--- a/templates/GoogleAI.hpp
+++ b/templates/GoogleAI.hpp
@ -46,7 +46,29 @@ public:
            QJsonObject content;
            QJsonArray parts;

-            parts.append(QJsonObject{{"text", msg.content}});
+            if (!msg.content.isEmpty()) {
+                parts.append(QJsonObject{{"text", msg.content}});
+            }
+
+            if (msg.images && !msg.images->isEmpty()) {
+                for (const auto &image : msg.images.value()) {
+                    QJsonObject imagePart;
+                    
+                    if (image.isUrl) {
+                        QJsonObject fileData;
+                        fileData["mime_type"] = image.mediaType;
+                        fileData["file_uri"] = image.data;
+                        imagePart["file_data"] = fileData;
+                    } else {
+                        QJsonObject inlineData;
+                        inlineData["mime_type"] = image.mediaType;
+                        inlineData["data"] = image.data;
+                        imagePart["inline_data"] = inlineData;
+                    }
+                    
+                    parts.append(imagePart);
+                }
+            }

            QString role = msg.role;
            if (role == "assistant") {
--- a/templates/MistralAI.hpp
+++ b/templates/MistralAI.hpp
@ -74,7 +74,31 @@ public:

        if (context.history) {
            for (const auto &msg : context.history.value()) {
-                messages.append(QJsonObject{{"role", msg.role}, {"content", msg.content}});
+                if (msg.images && !msg.images->isEmpty()) {
+                    QJsonArray content;
+                    
+                    if (!msg.content.isEmpty()) {
+                        content.append(QJsonObject{{"type", "text"}, {"text", msg.content}});
+                    }
+                    
+                    for (const auto &image : msg.images.value()) {
+                        QJsonObject imageBlock;
+                        imageBlock["type"] = "image_url";
+                        
+                        QJsonObject imageUrl;
+                        if (image.isUrl) {
+                            imageUrl["url"] = image.data;
+                        } else {
+                            imageUrl["url"] = QString("data:%1;base64,%2").arg(image.mediaType, image.data);
+                        }
+                        imageBlock["image_url"] = imageUrl;
+                        content.append(imageBlock);
+                    }
+                    
+                    messages.append(QJsonObject{{"role", msg.role}, {"content", content}});
+                } else {
+                    messages.append(QJsonObject{{"role", msg.role}, {"content", msg.content}});
+                }
            }
        }

@ -90,7 +114,7 @@ public:
               "    {\"role\": \"assistant\", \"content\": \"<assistant response>\"}\n"
               "  ]\n"
               "}\n\n"
-               "Supports system messages and conversation history.";
+               "Supports system messages, conversation history, and images.";
    }
    bool isSupportProvider(LLMCore::ProviderID id) const override
    {
--- a/templates/Ollama.hpp
+++ b/templates/Ollama.hpp
@ -76,7 +76,19 @@ public:

        if (context.history) {
            for (const auto &msg : context.history.value()) {
-                messages.append(QJsonObject{{"role", msg.role}, {"content", msg.content}});
+                QJsonObject messageObj;
+                messageObj["role"] = msg.role;
+                messageObj["content"] = msg.content;
+                
+                if (msg.images && !msg.images->isEmpty()) {
+                    QJsonArray images;
+                    for (const auto &image : msg.images.value()) {
+                        images.append(image.data);
+                    }
+                    messageObj["images"] = images;
+                }
+                
+                messages.append(messageObj);
            }
        }

@ -88,11 +100,12 @@ public:
               "{\n"
               "  \"messages\": [\n"
               "    {\"role\": \"system\", \"content\": \"<system prompt>\"},\n"
-               "    {\"role\": \"user\", \"content\": \"<user message>\"},\n"
+               "    {\"role\": \"user\", \"content\": \"<user message>\", \"images\": [\"<base64>\"]},\n"
               "    {\"role\": \"assistant\", \"content\": \"<assistant response>\"}\n"
               "  ]\n"
               "}\n\n"
-               "Recommended for Ollama models with chat capability.";
+               "Recommended for Ollama models with chat capability.\n"
+               "Supports images for multimodal models (e.g., llava).";
    }
    bool isSupportProvider(LLMCore::ProviderID id) const override
    {
--- a/templates/OpenAI.hpp
+++ b/templates/OpenAI.hpp
@ -42,7 +42,31 @@ public:

        if (context.history) {
            for (const auto &msg : context.history.value()) {
-                messages.append(QJsonObject{{"role", msg.role}, {"content", msg.content}});
+                if (msg.images && !msg.images->isEmpty()) {
+                    QJsonArray content;
+                    
+                    if (!msg.content.isEmpty()) {
+                        content.append(QJsonObject{{"type", "text"}, {"text", msg.content}});
+                    }
+                    
+                    for (const auto &image : msg.images.value()) {
+                        QJsonObject imageBlock;
+                        imageBlock["type"] = "image_url";
+                        
+                        QJsonObject imageUrl;
+                        if (image.isUrl) {
+                            imageUrl["url"] = image.data;
+                        } else {
+                            imageUrl["url"] = QString("data:%1;base64,%2").arg(image.mediaType, image.data);
+                        }
+                        imageBlock["image_url"] = imageUrl;
+                        content.append(imageBlock);
+                    }
+                    
+                    messages.append(QJsonObject{{"role", msg.role}, {"content", content}});
+                } else {
+                    messages.append(QJsonObject{{"role", msg.role}, {"content", msg.content}});
+                }
            }
        }

--- a/templates/OpenAICompatible.hpp
+++ b/templates/OpenAICompatible.hpp
@ -42,7 +42,31 @@ public:

        if (context.history) {
            for (const auto &msg : context.history.value()) {
-                messages.append(QJsonObject{{"role", msg.role}, {"content", msg.content}});
+                if (msg.images && !msg.images->isEmpty()) {
+                    QJsonArray content;
+                    
+                    if (!msg.content.isEmpty()) {
+                        content.append(QJsonObject{{"type", "text"}, {"text", msg.content}});
+                    }
+                    
+                    for (const auto &image : msg.images.value()) {
+                        QJsonObject imageBlock;
+                        imageBlock["type"] = "image_url";
+                        
+                        QJsonObject imageUrl;
+                        if (image.isUrl) {
+                            imageUrl["url"] = image.data;
+                        } else {
+                            imageUrl["url"] = QString("data:%1;base64,%2").arg(image.mediaType, image.data);
+                        }
+                        imageBlock["image_url"] = imageUrl;
+                        content.append(imageBlock);
+                    }
+                    
+                    messages.append(QJsonObject{{"role", msg.role}, {"content", content}});
+                } else {
+                    messages.append(QJsonObject{{"role", msg.role}, {"content", msg.content}});
+                }
            }
        }

@ -58,7 +82,8 @@ public:
               "    {\"role\": \"assistant\", \"content\": \"<assistant response>\"}\n"
               "  ]\n"
               "}\n\n"
-               "Works with any service implementing the OpenAI Chat API specification.";
+               "Works with any service implementing the OpenAI Chat API specification.\n"
+               "Supports images.";
    }
    bool isSupportProvider(LLMCore::ProviderID id) const override
    {