schema_version = 1 extends = "LM Studio Base Responses" name = "LM Studio Completion" description = "Local LM Studio — code completion via the Responses API: the cursor sits in a message, so the model continues the code. Use a NON-thinking instruct/code model and set `model` to whatever you have loaded (qwen2.5-coder-7b-instruct is a good pick). Avoid reasoning models: Gemma 4 (incl. -qat) and similar emit reasoning tokens before any code, the detailed completion task makes them deliberate for hundreds-to-thousands of tokens (worst on the no-op cases), and reasoning cannot be disabled via the Responses API — so they exhaust max_output_tokens and return an empty completion no matter how high it is set." model = "qwen2.5-coder-7b-instruct" tags = ["completion", "lmstudio", "responses", "local"] system_prompt = """ {%- if language == "qml" %}{{ read_file(":/roles/code-completion-qml.md") }} {%- else if language == "c-like" %}{{ read_file(":/roles/code-completion-c-like.md") }} {%- else %}{{ read_file(":/roles/code-completion.md") }} {%- endif %} {{ read_file(":/tasks/code-completion.md") }}""" [body] max_output_tokens = 256 temperature = 0 input = """ [ { "role": "user", "content": {{ tojson("Here is the code context with insertion points:\\n\\n" + ctx.prefix + "" + ctx.suffix + "\\n") }} } ] """