mirror of
https://github.com/Palm1r/QodeAssist.git
synced 2026-06-30 01:59:11 -04:00
18 lines
616 B
TOML
18 lines
616 B
TOML
schema_version = 1
|
|
|
|
extends = "Ollama Base Chat"
|
|
name = "Ollama Compression — 8 GB"
|
|
description = "Conversation summarization tuned for ~8 GB RAM — qwen3.5:4b at num_ctx 8192 (~5.5 GB resident). Smallest qwen3.5 (same family as the bigger tiers), decent context for the footprint. KV cache is reserved at load per num_ctx. No tools."
|
|
|
|
model = "qwen3.5:4b"
|
|
enable_tools = false
|
|
tags = ["compression", "ollama", "local", "8gb"]
|
|
|
|
system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}"""
|
|
|
|
[body.options]
|
|
num_predict = 2048
|
|
temperature = 0.3
|
|
num_ctx = 8192
|
|
keep_alive = "5m"
|