Spaces:

build-small-hackathon
/

scrubdata

Running

OpenAI Codex

deploy: add sponsor:openai tag (Best Use of Codex) + Codex-hardened build

16dc556 11 days ago

1.54 kB

	# Non-thinking Modelfile for the fine-tuned planner.
	#
	# The published GGUF (ricalanis/scrubdata-qwen3-4b-gguf) ships Qwen3's full
	# thinking+tools template, which makes Ollama burn the token budget "thinking" and
	# return empty/garbage for our task. We fine-tuned the NON-thinking Instruct model to
	# emit the JSON plan directly, so override the template to match training.
	#
	# ALSO: the current Unsloth (2026.6.1) Q4_K_M GGUF export is CORRUPTED for this model
	# (degenerates into <tool_call> loops). Use Q8_0 — it works. Export with q8_0.
	#
	# ollama pull hf.co/ricalanis/scrubdata-qwen3-4b-v6-q8:Q8_0
	# ollama create scrubdata-ft -f notebooks/Modelfile
	# uv run eval/run_finetuned.py --model scrubdata-ft --n 40

	# CONSTRAINED DECODING REQUIRED ON LONG PROMPTS: on full planning prompts the Q8 GGUF's
	# first token can degenerate into <tool_call> loops (Qwen3 tool-calling prior). Use
	# format=json in the Ollama API call (grammar-constrained decoding), or under
	# transformers suppress_tokens=[151657, 151658]. See eval/capture_plan_local.py and the
	# model card's Integrity section (the GGUF itself was re-exported 2026-06-12; sha256s
	# recorded there).

	# v6 = mixA (more real paired data): hospital repair 0.475/0.185 (v4/v5 was 0/0.42)
	FROM hf.co/ricalanis/scrubdata-qwen3-4b-v6-q8:Q8_0
	TEMPLATE """{{- if .System }}<\|im_start\|>system
	{{ .System }}<\|im_end\|>
	{{ end }}{{- range .Messages }}<\|im_start\|>{{ .Role }}
	{{ .Content }}<\|im_end\|>
	{{ end }}<\|im_start\|>assistant
	"""
	PARAMETER stop "<\|im_end\|>"
	PARAMETER temperature 0