QodeAssist/sources/agents/ollama_completion_chat.toml

schema_version = 1

extends     = "Ollama Base Chat"
name        = "Ollama Completion — Chat-style"
description = "Code completion via a chat request — the cursor sits in a <code_context> message on /api/chat, so the model just continues the code. Works with ANY normal chat/instruct model (qwen2.5-coder, qwen3, llama3, gemma, mistral, codellama-instruct, …), so use this when your model is NOT a dedicated FIM model. Tradeoff vs native 'Ollama Completion — FIM': a bit slower and may leak code fences or explanations, so slightly less precise."

model = "qwen2.5-coder:7b"
tags  = ["completion", "ollama", "local", "fim", "8gb"]

system_prompt = """
{%- if language == "qml" %}{{ read_file(":/roles/code-completion-qml.md") }}
{%- else if language == "c-like" %}{{ read_file(":/roles/code-completion-c-like.md") }}
{%- else %}{{ read_file(":/roles/code-completion.md") }}
{%- endif %}
{{ read_file(":/tasks/code-completion.md") }}"""

[body]
messages = """
[
{% if existsIn(ctx, "system_prompt") %}
{ "role": "system", "content": {{ tojson(ctx.system_prompt) }} },
{% endif %}
{ "role": "user", "content": {{ tojson("Here is the code context with insertion points:\\n<code_context>\\n" + ctx.prefix + "<cursor>" + ctx.suffix + "\\n</code_context>") }} }
]
"""

[body.options]
num_predict = 256
temperature = 0.2
keep_alive  = "5m"
stop        = ["</code_context>"]