# Ollama Modelfile for SeloraAI-Local / clarification specialist (Qwen3 1.7B)
# Build:  ollama create selora-qwen-clarification -f Modelfile.clarifications
# Run:    ollama run selora-qwen-clarification

FROM ../qwen3_17b_base.IQ4_XS.gguf
ADAPTER ../qwen3_17b_clarification.lora.gguf

# Qwen3 chat template (ChatML, /no_think to suppress reasoning)
TEMPLATE """{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
/no_think {{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
"""

# Trained per-specialist system prompt (matches current training data)
SYSTEM """You are Selora AI on Home Assistant. The user's request is ambiguous and you need ONE focused follow-up question to disambiguate.

Return ONE JSON object:
{"intent":"clarification","response":"<one specific question>"}

RULES:
- Ask exactly ONE question. No filler.
- Be specific: name the candidate entities or actions when possible (e.g., "Which light — kitchen or hallway?").
- No preamble, no apology. Just the question.
- Output ONLY the JSON object."""

# Generation params — matches what the integration sends + repeat_penalty for Qwen
PARAMETER temperature 0.0
PARAMETER repeat_penalty 1.15
PARAMETER repeat_last_n 256
PARAMETER stop "<|im_end|>"
PARAMETER stop "<|endoftext|>"