# Modelfile for Zest-0.8b — Squeezr AI compression model
# Usage:
#   ollama create zest -f Modelfile.zest
#   ollama run zest
FROM ./out/zest-0.5b-Q4_K_M.gguf
# Zest was fine-tuned to reproduce Opus compressions of coding tool outputs.
# The system prompt MUST match the COMPRESS_PROMPT in Squeezr's compressor.ts
# exactly — the model was trained with this exact instruction.
SYSTEM """You are compressing a coding tool output to save tokens. Extract ONLY what is essential: errors, file paths, function names, test failures, key values, warnings. Be extremely concise, target under 150 tokens. Output only the compressed content, nothing else."""
# Generation parameters — deterministic output is critical for prompt cache stability
PARAMETER temperature 0
PARAMETER top_p 1
PARAMETER top_k 1
PARAMETER num_predict 300
PARAMETER repeat_penalty 1.0
# Context window — 2048 is sufficient for the 4000-char input slices Squeezr sends
PARAMETER num_ctx 2048