#!/usr/bin/env python3 """ Thanatos-27B — verify Modelfile and HF Ollama bridge files stay in sync. The repo ships two parallel Ollama configurations: - ``Modelfile`` is consumed by the local-build path (``ollama create -f Modelfile``). It contains ``TEMPLATE`` / ``SYSTEM`` / ``PARAMETER`` directives. - ``template`` / ``system`` / ``params`` at the repo root are consumed by HF's Ollama bridge when users ``ollama run hf.co/FoolDev/Thanatos-27B`` directly. HF does NOT read the Modelfile (per https://huggingface.co/docs/hub/en/ollama). If the two configurations drift apart, ``hf.co/...`` users and ``make build`` users get different behaviour — exactly the bug we shipped before commits 33458f7 / 70ccef1 fixed it. This script is the regression guard: it parses the Modelfile, loads the three bridge files, and fails on any mismatch. Usage: python3 scripts/check_bridge_sync.py # exit 0 if in sync, 1 (with diff details) if not. Called from scripts/check.sh as part of the standard lint pass, so the pre-commit hook catches drift before it lands. """ from __future__ import annotations import json import re import sys from pathlib import Path ROOT = Path(__file__).resolve().parent.parent # Ollama Modelfile reference: https://github.com/ollama/ollama/blob/main/docs/modelfile.md TEMPLATE_RE = re.compile(r'^TEMPLATE\s+"""(.*?)"""', re.DOTALL | re.MULTILINE) SYSTEM_RE = re.compile(r'^SYSTEM\s+"""(.*?)"""', re.DOTALL | re.MULTILINE) PARAMETER_RE = re.compile(r'^PARAMETER\s+(\S+)\s+(.*?)\s*$', re.MULTILINE) def parse_modelfile(text: str) -> tuple[str, str, dict[str, object]]: """Extract TEMPLATE, SYSTEM, and PARAMETER blocks from a Modelfile.""" tpl_match = TEMPLATE_RE.search(text) if not tpl_match: die("Modelfile has no TEMPLATE block") template = tpl_match.group(1) sys_match = SYSTEM_RE.search(text) if not sys_match: die("Modelfile has no SYSTEM block") system = sys_match.group(1) params: dict[str, object] = {} stops: list[str] = [] for key, raw in PARAMETER_RE.findall(text): # Strip outer quotes if present. value: object = raw.strip() if isinstance(value, str) and len(value) >= 2 and value[0] == value[-1] == '"': value = value[1:-1] # Stop tokens accumulate; everything else is scalar. if key == "stop": stops.append(value) # type: ignore[arg-type] continue # Cast known numeric params. if key in {"temperature", "top_p", "top_k", "repeat_penalty", "num_ctx", "num_predict", "num_gpu", "num_batch", "seed"}: try: value = float(value) if "." in str(value) else int(value) # type: ignore[arg-type] except (TypeError, ValueError): pass params[key] = value if stops: params["stop"] = stops return template, system, params def die(msg: str) -> None: print(f"[FAIL] {msg}", file=sys.stderr) sys.exit(1) def diff_strings(label: str, expected: str, actual: str) -> bool: if expected == actual: return True print(f"[FAIL] {label} drift detected", file=sys.stderr) print(f" Modelfile len={len(expected)} bridge file len={len(actual)}", file=sys.stderr) # Show the first diverging line for quick orientation. e_lines = expected.splitlines() a_lines = actual.splitlines() for i, (e, a) in enumerate(zip(e_lines, a_lines)): if e != a: print(f" first diff at line {i + 1}:", file=sys.stderr) print(f" modelfile : {e!r}", file=sys.stderr) print(f" bridge : {a!r}", file=sys.stderr) return False if len(e_lines) != len(a_lines): print(f" line count differs: modelfile={len(e_lines)} bridge={len(a_lines)}", file=sys.stderr) return False def main() -> int: modelfile = (ROOT / "Modelfile").read_text() bridge_template = (ROOT / "template").read_text() bridge_system = (ROOT / "system").read_text() bridge_params = json.loads((ROOT / "params").read_text()) mf_template, mf_system, mf_params = parse_modelfile(modelfile) ok = True # 1. TEMPLATE: byte-for-byte. ok &= diff_strings("TEMPLATE", mf_template, bridge_template) # 2. SYSTEM: trim trailing whitespace on both ends. The bridge file # typically has a trailing newline; the Modelfile block doesn't. ok &= diff_strings("SYSTEM", mf_system.strip(), bridge_system.strip()) # 3. PARAMETER vs params JSON: compare normalized dicts. if mf_params != bridge_params: print("[FAIL] params drift detected", file=sys.stderr) for k in sorted(set(mf_params) | set(bridge_params)): mv = mf_params.get(k, "") bv = bridge_params.get(k, "") if mv != bv: print(f" {k}: modelfile={mv!r} bridge={bv!r}", file=sys.stderr) ok = False if not ok: print("\n[!] Modelfile and bridge files are out of sync.", file=sys.stderr) print(" Edit them together: any change to TEMPLATE / SYSTEM /", file=sys.stderr) print(" PARAMETER must be reflected in template / system / params.", file=sys.stderr) return 1 print("[ ok ] Modelfile <-> bridge files in sync") return 0 if __name__ == "__main__": sys.exit(main())