mirror of
https://github.com/Palm1r/QodeAssist.git
synced 2026-06-30 18:19:11 -04:00
feat: Improve agents config
This commit is contained in:
@@ -18,6 +18,9 @@
|
|||||||
<file>openai_quick_refactor.toml</file>
|
<file>openai_quick_refactor.toml</file>
|
||||||
<file>google_base_chat.toml</file>
|
<file>google_base_chat.toml</file>
|
||||||
<file>google_chat.toml</file>
|
<file>google_chat.toml</file>
|
||||||
|
<file>google_completion.toml</file>
|
||||||
|
<file>google_compression.toml</file>
|
||||||
|
<file>google_quick_refactor.toml</file>
|
||||||
<file>mistral_base_chat.toml</file>
|
<file>mistral_base_chat.toml</file>
|
||||||
<file>mistral_chat.toml</file>
|
<file>mistral_chat.toml</file>
|
||||||
<file>mistral_chat_reasoning.toml</file>
|
<file>mistral_chat_reasoning.toml</file>
|
||||||
@@ -40,6 +43,16 @@
|
|||||||
<file>ollama_compression_16gb.toml</file>
|
<file>ollama_compression_16gb.toml</file>
|
||||||
<file>ollama_compression_32gb.toml</file>
|
<file>ollama_compression_32gb.toml</file>
|
||||||
<file>ollama_chat_gemma4.toml</file>
|
<file>ollama_chat_gemma4.toml</file>
|
||||||
|
<file>llamacpp_base_chat.toml</file>
|
||||||
|
<file>llamacpp_chat.toml</file>
|
||||||
|
<file>llamacpp_completion_fim.toml</file>
|
||||||
|
<file>llamacpp_compression.toml</file>
|
||||||
|
<file>llamacpp_quick_refactor.toml</file>
|
||||||
|
<file>lmstudio_base_responses.toml</file>
|
||||||
|
<file>lmstudio_chat.toml</file>
|
||||||
|
<file>lmstudio_completion.toml</file>
|
||||||
|
<file>lmstudio_compression.toml</file>
|
||||||
|
<file>lmstudio_quick_refactor.toml</file>
|
||||||
</qresource>
|
</qresource>
|
||||||
<qresource prefix="/roles">
|
<qresource prefix="/roles">
|
||||||
<file alias="qt-cpp-developer.md">roles/qt-cpp-developer.md</file>
|
<file alias="qt-cpp-developer.md">roles/qt-cpp-developer.md</file>
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ system_prompt = """
|
|||||||
|
|
||||||
[body]
|
[body]
|
||||||
max_tokens = 512
|
max_tokens = 512
|
||||||
temperature = 0.2
|
temperature = 0
|
||||||
stop_sequences = ["</code_context>"]
|
stop_sequences = ["</code_context>"]
|
||||||
messages = """
|
messages = """
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -9,4 +9,4 @@ tags = ["completion", "codestral", "mistral", "cloud", "fim"]
|
|||||||
|
|
||||||
[body]
|
[body]
|
||||||
max_tokens = 256
|
max_tokens = 256
|
||||||
temperature = 0.2
|
temperature = 0
|
||||||
|
|||||||
31
sources/agents/google_completion.toml
Normal file
31
sources/agents/google_completion.toml
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "Google Base Chat"
|
||||||
|
name = "Google Completion"
|
||||||
|
description = "Google Gemini 3.1 Flash-Lite — code completion using the <code_context> chat format over generateContent. Thinking disabled (thinkingBudget=0) and temperature=0 for fast, deterministic insertions; stops at </code_context>."
|
||||||
|
|
||||||
|
model = "gemini-3.1-flash-lite"
|
||||||
|
tags = ["completion", "gemini", "google", "cloud"]
|
||||||
|
|
||||||
|
system_prompt = """
|
||||||
|
{%- if language == "qml" %}{{ read_file(":/roles/code-completion-qml.md") }}
|
||||||
|
{%- else if language == "c-like" %}{{ read_file(":/roles/code-completion-c-like.md") }}
|
||||||
|
{%- else %}{{ read_file(":/roles/code-completion.md") }}
|
||||||
|
{%- endif %}
|
||||||
|
{{ read_file(":/tasks/code-completion.md") }}"""
|
||||||
|
|
||||||
|
[body]
|
||||||
|
contents = """
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"parts": [ { "text": {{ tojson("Here is the code context with insertion points:\\n<code_context>\\n" + ctx.prefix + "<cursor>" + ctx.suffix + "\\n</code_context>") }} } ]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
|
||||||
|
[body.generationConfig]
|
||||||
|
maxOutputTokens = 1024
|
||||||
|
temperature = 0
|
||||||
|
stopSequences = ["</code_context>"]
|
||||||
|
thinkingConfig = { thinkingBudget = 0 }
|
||||||
16
sources/agents/google_compression.toml
Normal file
16
sources/agents/google_compression.toml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "Google Base Chat"
|
||||||
|
name = "Google Compression"
|
||||||
|
description = "Google Gemini 3.1 Flash-Lite — fast, low-cost conversation summarization. Carries the summary system prompt; no tools, thinking disabled (thinkingBudget=0)."
|
||||||
|
|
||||||
|
model = "gemini-3.1-flash-lite"
|
||||||
|
enable_tools = false
|
||||||
|
tags = ["compression", "gemini", "google", "cloud"]
|
||||||
|
|
||||||
|
system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
|
||||||
|
|
||||||
|
[body.generationConfig]
|
||||||
|
maxOutputTokens = 16000
|
||||||
|
temperature = 0.3
|
||||||
|
thinkingConfig = { thinkingBudget = 0 }
|
||||||
17
sources/agents/google_quick_refactor.toml
Normal file
17
sources/agents/google_quick_refactor.toml
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "Google Base Chat"
|
||||||
|
name = "Google Quick Refactor"
|
||||||
|
description = "Google Gemini 3.5 Flash — agentic inline refactor with tools and thinking (gathers context before editing). Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context (file, code, cursor/selection)."
|
||||||
|
|
||||||
|
model = "gemini-3.5-flash"
|
||||||
|
enable_tools = true
|
||||||
|
enable_thinking = true
|
||||||
|
tags = ["refactor", "gemini", "google", "cloud"]
|
||||||
|
|
||||||
|
system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}"""
|
||||||
|
|
||||||
|
[body.generationConfig]
|
||||||
|
maxOutputTokens = 16000
|
||||||
|
temperature = 1
|
||||||
|
thinkingConfig = { includeThoughts = true, thinkingBudget = 8192 }
|
||||||
9
sources/agents/llamacpp_base_chat.toml
Normal file
9
sources/agents/llamacpp_base_chat.toml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "OpenAI Base Chat"
|
||||||
|
name = "llama.cpp Base Chat"
|
||||||
|
description = "llama.cpp server Chat Completions request body (OpenAI-compatible /v1/chat/completions). Abstract — extend it and set model."
|
||||||
|
abstract = true
|
||||||
|
|
||||||
|
provider_instance = "llama.cpp"
|
||||||
|
endpoint = "/v1/chat/completions"
|
||||||
15
sources/agents/llamacpp_chat.toml
Normal file
15
sources/agents/llamacpp_chat.toml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "llama.cpp Base Chat"
|
||||||
|
name = "llama.cpp Chat"
|
||||||
|
description = "Local llama.cpp (llama-server) — coding chat via the OpenAI-compatible Chat Completions API. llama-server serves whichever GGUF you loaded, so 'model' is only a label. Tool calling needs a tool-capable model and llama-server started with --jinja."
|
||||||
|
|
||||||
|
model = "qwen2.5-coder-7b-instruct"
|
||||||
|
enable_tools = true
|
||||||
|
tags = ["chat", "llama.cpp", "local"]
|
||||||
|
|
||||||
|
system_prompt = """{{ read_file(":/roles/qt-cpp-developer.md") }}"""
|
||||||
|
|
||||||
|
[body]
|
||||||
|
max_tokens = 8192
|
||||||
|
temperature = 0.7
|
||||||
15
sources/agents/llamacpp_completion_fim.toml
Normal file
15
sources/agents/llamacpp_completion_fim.toml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
name = "llama.cpp Completion — FIM"
|
||||||
|
description = "Local llama.cpp native fill-in-the-middle via the /infill endpoint (input_prefix + input_suffix). Fast and clean, but the loaded GGUF MUST be a FIM-trained model (qwen2.5-coder, codellama-code, deepseek-coder, starcoder2, codegemma). A plain chat model produces garbage here — there is no native completion path for those. llama-server serves whichever model is loaded, so 'model' is only a label."
|
||||||
|
|
||||||
|
provider_instance = "llama.cpp"
|
||||||
|
endpoint = "/infill"
|
||||||
|
model = "qwen2.5-coder-7b"
|
||||||
|
tags = ["completion", "llama.cpp", "local", "fim"]
|
||||||
|
|
||||||
|
[body]
|
||||||
|
input_prefix = """{{ tojson(ctx.prefix) }}"""
|
||||||
|
input_suffix = """{% if existsIn(ctx, "suffix") %}{{ tojson(ctx.suffix) }}{% endif %}"""
|
||||||
|
n_predict = 256
|
||||||
|
temperature = 0.2
|
||||||
15
sources/agents/llamacpp_compression.toml
Normal file
15
sources/agents/llamacpp_compression.toml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "llama.cpp Base Chat"
|
||||||
|
name = "llama.cpp Compression"
|
||||||
|
description = "Local llama.cpp — conversation summarization via the OpenAI-compatible Chat Completions API. Carries the summary system prompt; no tools. llama-server serves whichever GGUF is loaded, so 'model' is only a label."
|
||||||
|
|
||||||
|
model = "qwen2.5-coder-7b-instruct"
|
||||||
|
enable_tools = false
|
||||||
|
tags = ["compression", "llama.cpp", "local"]
|
||||||
|
|
||||||
|
system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
|
||||||
|
|
||||||
|
[body]
|
||||||
|
max_tokens = 16000
|
||||||
|
temperature = 0.3
|
||||||
15
sources/agents/llamacpp_quick_refactor.toml
Normal file
15
sources/agents/llamacpp_quick_refactor.toml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "llama.cpp Base Chat"
|
||||||
|
name = "llama.cpp Quick Refactor"
|
||||||
|
description = "Local llama.cpp deterministic inline refactor via the OpenAI-compatible Chat Completions API. Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context. Tools off by default for a fast single shot; enabling them needs a tool-capable model + llama-server --jinja."
|
||||||
|
|
||||||
|
model = "qwen2.5-coder-7b-instruct"
|
||||||
|
enable_tools = false
|
||||||
|
tags = ["refactor", "llama.cpp", "local"]
|
||||||
|
|
||||||
|
system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}"""
|
||||||
|
|
||||||
|
[body]
|
||||||
|
max_tokens = 8192
|
||||||
|
temperature = 0.2
|
||||||
9
sources/agents/lmstudio_base_responses.toml
Normal file
9
sources/agents/lmstudio_base_responses.toml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "OpenAI Base Responses"
|
||||||
|
name = "LM Studio Base Responses"
|
||||||
|
description = "LM Studio Responses API request body (OpenAI-compatible /v1/responses). Abstract — extend it and set model."
|
||||||
|
abstract = true
|
||||||
|
|
||||||
|
provider_instance = "LM Studio (Responses API)"
|
||||||
|
endpoint = "/v1/responses"
|
||||||
15
sources/agents/lmstudio_chat.toml
Normal file
15
sources/agents/lmstudio_chat.toml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "LM Studio Base Responses"
|
||||||
|
name = "LM Studio Chat"
|
||||||
|
description = "Local LM Studio — coding chat via the OpenAI-compatible Responses API (/v1/responses) on Gemma 4 12B (tools + vision capable). Set 'model' to the identifier of the model loaded in LM Studio."
|
||||||
|
|
||||||
|
model = "google/gemma-4-12b"
|
||||||
|
enable_tools = true
|
||||||
|
tags = ["chat", "lmstudio", "responses", "local"]
|
||||||
|
|
||||||
|
system_prompt = """{{ read_file(":/roles/qt-cpp-developer.md") }}"""
|
||||||
|
|
||||||
|
[body]
|
||||||
|
max_output_tokens = 8192
|
||||||
|
temperature = 0.7
|
||||||
24
sources/agents/lmstudio_completion.toml
Normal file
24
sources/agents/lmstudio_completion.toml
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "LM Studio Base Responses"
|
||||||
|
name = "LM Studio Completion"
|
||||||
|
description = "Local LM Studio — code completion via the Responses API: the cursor sits in a <code_context> message, so the model continues the code. Use a NON-thinking instruct/code model and set `model` to whatever you have loaded (qwen2.5-coder-7b-instruct is a good pick). Avoid reasoning models: Gemma 4 (incl. -qat) and similar emit reasoning tokens before any code, the detailed completion task makes them deliberate for hundreds-to-thousands of tokens (worst on the no-op cases), and reasoning cannot be disabled via the Responses API — so they exhaust max_output_tokens and return an empty completion no matter how high it is set."
|
||||||
|
|
||||||
|
model = "qwen2.5-coder-7b-instruct"
|
||||||
|
tags = ["completion", "lmstudio", "responses", "local"]
|
||||||
|
|
||||||
|
system_prompt = """
|
||||||
|
{%- if language == "qml" %}{{ read_file(":/roles/code-completion-qml.md") }}
|
||||||
|
{%- else if language == "c-like" %}{{ read_file(":/roles/code-completion-c-like.md") }}
|
||||||
|
{%- else %}{{ read_file(":/roles/code-completion.md") }}
|
||||||
|
{%- endif %}
|
||||||
|
{{ read_file(":/tasks/code-completion.md") }}"""
|
||||||
|
|
||||||
|
[body]
|
||||||
|
max_output_tokens = 256
|
||||||
|
temperature = 0
|
||||||
|
input = """
|
||||||
|
[
|
||||||
|
{ "role": "user", "content": {{ tojson("Here is the code context with insertion points:\\n<code_context>\\n" + ctx.prefix + "<cursor>" + ctx.suffix + "\\n</code_context>") }} }
|
||||||
|
]
|
||||||
|
"""
|
||||||
15
sources/agents/lmstudio_compression.toml
Normal file
15
sources/agents/lmstudio_compression.toml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "LM Studio Base Responses"
|
||||||
|
name = "LM Studio Compression"
|
||||||
|
description = "Local LM Studio — conversation summarization via the Responses API. Carries the summary system prompt; no tools. Gemma 4 12B by default; set 'model' to the loaded model's identifier."
|
||||||
|
|
||||||
|
model = "google/gemma-4-12b"
|
||||||
|
enable_tools = false
|
||||||
|
tags = ["compression", "lmstudio", "responses", "local"]
|
||||||
|
|
||||||
|
system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
|
||||||
|
|
||||||
|
[body]
|
||||||
|
max_output_tokens = 16000
|
||||||
|
temperature = 0.3
|
||||||
15
sources/agents/lmstudio_quick_refactor.toml
Normal file
15
sources/agents/lmstudio_quick_refactor.toml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
schema_version = 1
|
||||||
|
|
||||||
|
extends = "LM Studio Base Responses"
|
||||||
|
name = "LM Studio Quick Refactor"
|
||||||
|
description = "Local LM Studio deterministic inline refactor via the Responses API. Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context. Tools off by default for a fast single shot; Gemma 4 12B is tool-capable if you enable them."
|
||||||
|
|
||||||
|
model = "google/gemma-4-12b"
|
||||||
|
enable_tools = false
|
||||||
|
tags = ["refactor", "lmstudio", "responses", "local"]
|
||||||
|
|
||||||
|
system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}"""
|
||||||
|
|
||||||
|
[body]
|
||||||
|
max_output_tokens = 8192
|
||||||
|
temperature = 0.2
|
||||||
@@ -10,4 +10,4 @@ tags = ["completion", "mistral", "codestral", "cloud", "fim"]
|
|||||||
|
|
||||||
[body]
|
[body]
|
||||||
max_tokens = 256
|
max_tokens = 256
|
||||||
temperature = 0.2
|
temperature = 0
|
||||||
|
|||||||
@@ -2,12 +2,12 @@ schema_version = 1
|
|||||||
|
|
||||||
extends = "Ollama Base FIM"
|
extends = "Ollama Base FIM"
|
||||||
name = "Ollama Completion — FIM"
|
name = "Ollama Completion — FIM"
|
||||||
description = "Native fill-in-the-middle completion — uses the model's OWN FIM template (prompt+suffix on /api/generate). Fast and clean (no markdown or prose), but works ONLY with models that ship a FIM template, and those are few: the base / '-code' variants, NOT instruct/chat models. Verified to work: qwen2.5-coder (incl. -base), codellama:7b-code, deepseek-coder-v2 lite-base. A plain chat model outputs garbage here — use 'Ollama Completion — Chat-style' instead. Check a model: `ollama show <model> --modelfile` must mention 'Suffix'."
|
description = "Native fill-in-the-middle completion — uses the model's OWN FIM template (prompt+suffix on /api/generate). Fast and clean (no markdown or prose), but works ONLY with a true BASE / '-code' model. Pick a base tag explicitly: the bare 'qwen2.5-coder:7b' tag is the INSTRUCT model (it ships an im_start chat template), and on FIM it rambles whole programs and prose — use 'qwen2.5-coder:7b-base' instead. Verified base/-code FIM models: qwen2.5-coder:7b-base, codellama:7b-code, deepseek-coder-v2 lite-base. A plain chat/instruct model outputs garbage here — use 'Ollama Completion — Chat-style' instead. Check a model: `ollama show <model> --modelfile` must mention 'Suffix' and must NOT have an im_start/chat template."
|
||||||
|
|
||||||
model = "qwen2.5-coder:7b"
|
model = "qwen2.5-coder:7b-base-q5_K_M"
|
||||||
tags = ["completion", "ollama", "local", "fim", "8gb"]
|
tags = ["completion", "ollama", "local", "fim", "8gb"]
|
||||||
|
|
||||||
[body.options]
|
[body.options]
|
||||||
num_predict = 256
|
num_predict = 256
|
||||||
temperature = 0.2
|
temperature = 0
|
||||||
keep_alive = "5m"
|
keep_alive = "5m"
|
||||||
|
|||||||
@@ -10,6 +10,9 @@ tags = ["compression", "ollama", "local", "16gb"]
|
|||||||
|
|
||||||
system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
|
system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
|
||||||
|
|
||||||
|
[body]
|
||||||
|
think = false
|
||||||
|
|
||||||
[body.options]
|
[body.options]
|
||||||
num_predict = 2048
|
num_predict = 2048
|
||||||
temperature = 0.3
|
temperature = 0.3
|
||||||
|
|||||||
@@ -10,6 +10,9 @@ tags = ["compression", "ollama", "local", "32gb"]
|
|||||||
|
|
||||||
system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
|
system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
|
||||||
|
|
||||||
|
[body]
|
||||||
|
think = false
|
||||||
|
|
||||||
[body.options]
|
[body.options]
|
||||||
num_predict = 2048
|
num_predict = 2048
|
||||||
temperature = 0.3
|
temperature = 0.3
|
||||||
|
|||||||
@@ -10,6 +10,9 @@ tags = ["compression", "ollama", "local", "8gb"]
|
|||||||
|
|
||||||
system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
|
system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
|
||||||
|
|
||||||
|
[body]
|
||||||
|
think = false
|
||||||
|
|
||||||
[body.options]
|
[body.options]
|
||||||
num_predict = 2048
|
num_predict = 2048
|
||||||
temperature = 0.3
|
temperature = 0.3
|
||||||
|
|||||||
@@ -1,20 +1,25 @@
|
|||||||
Core Requirements:
|
Core Requirements:
|
||||||
1. Continue code exactly from the cursor position, ensuring it properly connects with any existing code after the cursor
|
1. Continue code exactly from the cursor position, ensuring it properly connects with any existing code after the cursor
|
||||||
2. Never repeat existing code before or after the cursor
|
2. Never repeat existing code before or after the cursor — the text after <cursor> already exists, so do not reproduce any of it
|
||||||
|
|
||||||
Specific Guidelines:
|
Specific Guidelines:
|
||||||
- For function calls: Complete parameters with appropriate types and names
|
- For function calls: Complete parameters with appropriate types and names
|
||||||
- For class members: Respect access modifiers and class conventions
|
- For class members: Respect access modifiers and class conventions
|
||||||
- Respect existing indentation and formatting
|
- Respect existing indentation and formatting; do not re-emit indentation that already precedes the cursor
|
||||||
- Consider scope and visibility of referenced symbols
|
- Consider scope and visibility of referenced symbols (do not use a symbol that is only declared after the cursor)
|
||||||
- Ensure seamless integration with code both before and after the cursor
|
- Ensure seamless integration with code both before and after the cursor
|
||||||
|
|
||||||
|
When nothing should be inserted, return an empty code block. This applies only when:
|
||||||
|
- Any insertion would duplicate code that already appears after the cursor, or
|
||||||
|
- The cursor sits in the middle of an existing identifier or type name, or between a complete type and its variable name
|
||||||
|
Otherwise, always provide a completion (for example, fill an empty initializer list or argument list). In the no-insertion cases above, output an empty code block and nothing else — never describe the code, report errors, ask questions, or suggest alternatives.
|
||||||
|
|
||||||
Context Format:
|
Context Format:
|
||||||
<code_context>
|
<code_context>
|
||||||
...code before the cursor...<cursor>...code after the cursor...
|
...code before the cursor...<cursor>...code after the cursor...
|
||||||
</code_context>
|
</code_context>
|
||||||
|
|
||||||
Response Format:
|
Response Format:
|
||||||
- No explanations or comments
|
- Your entire response must be exactly one code block tagged with the language, and nothing else
|
||||||
- Only include new characters needed to create valid code
|
- Never write any sentence, note, explanation, or comment before or after the code block — not even to state that the code is already complete
|
||||||
- Should be codeblock with language
|
- Inside the block, include only the new characters needed at the cursor to form valid code; leave the block empty only in the no-insertion cases listed above
|
||||||
|
|||||||
2
sources/external/llmqore
vendored
2
sources/external/llmqore
vendored
Submodule sources/external/llmqore updated: ea44041b24...4450eceda9
@@ -48,7 +48,26 @@ QFuture<QList<QString>> GenericProvider::getInstalledModels(const QString &url)
|
|||||||
{
|
{
|
||||||
m_client->setUrl(url);
|
m_client->setUrl(url);
|
||||||
m_client->setApiKey(apiKey());
|
m_client->setApiKey(apiKey());
|
||||||
return m_client->listModels();
|
return m_client->listModels(modelsEndpoint(url));
|
||||||
|
}
|
||||||
|
|
||||||
|
QString GenericProvider::modelsEndpoint(const QString &url) const
|
||||||
|
{
|
||||||
|
switch (m_id) {
|
||||||
|
case ProviderID::OpenAI:
|
||||||
|
case ProviderID::OpenAIResponses:
|
||||||
|
case ProviderID::OpenAICompatible:
|
||||||
|
case ProviderID::LMStudio:
|
||||||
|
case ProviderID::OpenRouter:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
QString base = url;
|
||||||
|
while (base.endsWith('/'))
|
||||||
|
base.chop(1);
|
||||||
|
return base.endsWith("/v1") ? QStringLiteral("/models") : QStringLiteral("/v1/models");
|
||||||
}
|
}
|
||||||
|
|
||||||
RequestID GenericProvider::sendRequest(
|
RequestID GenericProvider::sendRequest(
|
||||||
|
|||||||
@@ -39,6 +39,8 @@ public:
|
|||||||
const QUrl &url, const QJsonObject &payload, const QString &endpoint) override;
|
const QUrl &url, const QJsonObject &payload, const QString &endpoint) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
QString modelsEndpoint(const QString &url) const;
|
||||||
|
|
||||||
QString m_name;
|
QString m_name;
|
||||||
ProviderID m_id;
|
ProviderID m_id;
|
||||||
::LLMQore::BaseClient *m_client;
|
::LLMQore::BaseClient *m_client;
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ protected:
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
LLMQore::RequestID ask(const QString &, LLMQore::RequestMode) override { return {}; }
|
LLMQore::RequestID ask(const QString &, LLMQore::RequestMode) override { return {}; }
|
||||||
QFuture<QList<QString>> listModels() override { return {}; }
|
QFuture<QList<QString>> listModels(const QString & = {}) override { return {}; }
|
||||||
LLMQore::ToolSchemaFormat toolSchemaFormat() const override
|
LLMQore::ToolSchemaFormat toolSchemaFormat() const override
|
||||||
{
|
{
|
||||||
return LLMQore::ToolSchemaFormat::Claude;
|
return LLMQore::ToolSchemaFormat::Claude;
|
||||||
|
|||||||
Reference in New Issue
Block a user