diff --git a/sources/agents/agents.qrc b/sources/agents/agents.qrc index 7b8edc3..ab8f57a 100644 --- a/sources/agents/agents.qrc +++ b/sources/agents/agents.qrc @@ -10,8 +10,12 @@ claude_quick_refactor_fast.toml openai_base_chat.toml openai_chat.toml + openai_chat_mini.toml openai_base_responses.toml openai_chat_responses.toml + openai_completion.toml + openai_compression.toml + openai_quick_refactor.toml google_base_chat.toml google_chat.toml mistral_base_chat.toml diff --git a/sources/agents/openai_chat.toml b/sources/agents/openai_chat.toml index 2b9b2e4..5bf0a04 100644 --- a/sources/agents/openai_chat.toml +++ b/sources/agents/openai_chat.toml @@ -2,9 +2,9 @@ schema_version = 1 extends = "OpenAI Base Chat" name = "OpenAI Chat" -description = "OpenAI GPT-4o — coding chat via Chat Completions." +description = "OpenAI GPT-5.5 — flagship coding chat via Chat Completions at high reasoning effort. The default OpenAI choice. NOTE: GPT-5 models reject 'max_tokens' and 'temperature' on Chat Completions — use 'max_completion_tokens' + 'reasoning_effort'. Reasoning runs internally and is NOT surfaced as a thinking block on Chat Completions; use 'OpenAI Chat — Responses' for visible reasoning summaries." -model = "gpt-4o" +model = "gpt-5.5" enable_tools = true enable_thinking = true tags = ["chat", "openai", "cloud"] @@ -12,5 +12,5 @@ tags = ["chat", "openai", "cloud"] system_prompt = """{{ read_file(":/roles/qt-cpp-developer.md") }}""" [body] -max_tokens = 8192 -temperature = 0.7 +max_completion_tokens = 32000 +reasoning_effort = "high" diff --git a/sources/agents/openai_chat_mini.toml b/sources/agents/openai_chat_mini.toml new file mode 100644 index 0000000..46383de --- /dev/null +++ b/sources/agents/openai_chat_mini.toml @@ -0,0 +1,16 @@ +schema_version = 1 + +extends = "OpenAI Base Chat" +name = "OpenAI Chat — Mini" +description = "OpenAI GPT-5.4 mini — fast, lower-cost coding chat via Chat Completions at medium reasoning effort. NOTE: GPT-5 models use 'max_completion_tokens' + 'reasoning_effort' and reject 'temperature' on Chat Completions." + +model = "gpt-5.4-mini" +enable_tools = true +enable_thinking = true +tags = ["chat", "openai", "cloud"] + +system_prompt = """{{ read_file(":/roles/qt-cpp-developer.md") }}""" + +[body] +max_completion_tokens = 16000 +reasoning_effort = "medium" diff --git a/sources/agents/openai_chat_responses.toml b/sources/agents/openai_chat_responses.toml index 33c11b2..6fc80f0 100644 --- a/sources/agents/openai_chat_responses.toml +++ b/sources/agents/openai_chat_responses.toml @@ -2,9 +2,9 @@ schema_version = 1 extends = "OpenAI Base Responses" name = "OpenAI Chat — Responses" -description = "OpenAI o4-mini — reasoning coding chat via the Responses API." +description = "OpenAI GPT-5.1 — reasoning coding chat via the Responses API. Best for agentic coding; surfaces a reasoning summary (summary=auto) as a thinking block. Uses max_output_tokens + the reasoning object (reasoning models reject temperature)." -model = "o4-mini" +model = "gpt-5.1" enable_tools = true enable_thinking = true tags = ["chat", "openai", "responses", "cloud"] @@ -12,5 +12,5 @@ tags = ["chat", "openai", "responses", "cloud"] system_prompt = """{{ read_file(":/roles/qt-cpp-developer.md") }}""" [body] -max_output_tokens = 25000 -reasoning = { effort = "medium", summary = "auto" } +max_output_tokens = 32000 +reasoning = { effort = "high", summary = "auto" } diff --git a/sources/agents/openai_completion.toml b/sources/agents/openai_completion.toml new file mode 100644 index 0000000..851fd88 --- /dev/null +++ b/sources/agents/openai_completion.toml @@ -0,0 +1,30 @@ +schema_version = 1 + +extends = "OpenAI Base Chat" +name = "OpenAI Completion" +description = "OpenAI GPT-5.4 mini — code completion using the chat format over Chat Completions. reasoning_effort=none for low latency. GPT-5 models reject 'temperature' AND 'stop' on Chat Completions, so completion cannot be pinned to t=0 and has no stop sequence — it relies on the prompt + max_completion_tokens to stay short." + +model = "gpt-5.4-mini" +tags = ["completion", "openai", "cloud"] + +system_prompt = """ +{%- if language == "qml" %}{{ read_file(":/roles/code-completion-qml.md") }} +{%- else if language == "c-like" %}{{ read_file(":/roles/code-completion-c-like.md") }} +{%- else %}{{ read_file(":/roles/code-completion.md") }} +{%- endif %} +{{ read_file(":/tasks/code-completion.md") }}""" + +[body] +max_completion_tokens = 1024 +reasoning_effort = "none" +messages = """ +[ + {% if existsIn(ctx, "system_prompt") %} + { "role": "system", "content": {{ tojson(ctx.system_prompt) }} }, + {% endif %} + { + "role": "user", + "content": {{ tojson("Here is the code context with insertion points:\\n\\n" + ctx.prefix + "" + ctx.suffix + "\\n") }} + } +] +""" diff --git a/sources/agents/openai_compression.toml b/sources/agents/openai_compression.toml new file mode 100644 index 0000000..61d15c9 --- /dev/null +++ b/sources/agents/openai_compression.toml @@ -0,0 +1,15 @@ +schema_version = 1 + +extends = "OpenAI Base Chat" +name = "OpenAI Compression" +description = "OpenAI GPT-5.4 mini — fast, low-cost conversation summarization for shorter chats. Carries the summary system prompt; no tools, reasoning_effort=none. GPT-5 models reject 'temperature' on Chat Completions." + +model = "gpt-5.4-mini" +enable_tools = false +tags = ["compression", "openai", "cloud"] + +system_prompt = """{{ read_file(":/tasks/chat-compressor.md") }}""" + +[body] +max_completion_tokens = 16000 +reasoning_effort = "none" diff --git a/sources/agents/openai_quick_refactor.toml b/sources/agents/openai_quick_refactor.toml new file mode 100644 index 0000000..582c808 --- /dev/null +++ b/sources/agents/openai_quick_refactor.toml @@ -0,0 +1,16 @@ +schema_version = 1 + +extends = "OpenAI Base Chat" +name = "OpenAI Quick Refactor" +description = "OpenAI GPT-5.1 — agentic inline refactor with tools at high reasoning effort (gathers context before editing). Static output rules from :/tasks/quick-refactor.md; QuickRefactorHandler injects the live editor context (file, code, cursor/selection). GPT-5 models use 'max_completion_tokens' + 'reasoning_effort' and reject 'temperature'." + +model = "gpt-5.1" +enable_tools = true +enable_thinking = true +tags = ["refactor", "openai", "cloud"] + +system_prompt = """{{ read_file(":/tasks/quick-refactor.md") }}""" + +[body] +max_completion_tokens = 16000 +reasoning_effort = "high"