Thanatos-27B / scripts /check_bridge_sync.py
FoolDev's picture
Rename back: Thanatos-27B-Heretic → Thanatos-27B (HF repo also renamed)
7197abd
#!/usr/bin/env python3
"""
Thanatos-27B — verify Modelfile and HF Ollama bridge files stay in sync.
The repo ships two parallel Ollama configurations:
- ``Modelfile`` is consumed by the local-build path (``ollama create -f Modelfile``).
It contains ``TEMPLATE`` / ``SYSTEM`` / ``PARAMETER`` directives.
- ``template`` / ``system`` / ``params`` at the repo root are consumed by HF's
Ollama bridge when users ``ollama run hf.co/FoolDev/Thanatos-27B`` directly. HF
does NOT read the Modelfile (per https://huggingface.co/docs/hub/en/ollama).
If the two configurations drift apart, ``hf.co/...`` users and ``make build``
users get different behaviour — exactly the bug we shipped before commits
33458f7 / 70ccef1 fixed it. This script is the regression guard: it parses the
Modelfile, loads the three bridge files, and fails on any mismatch.
Usage:
python3 scripts/check_bridge_sync.py
# exit 0 if in sync, 1 (with diff details) if not.
Called from scripts/check.sh as part of the standard lint pass, so the
pre-commit hook catches drift before it lands.
"""
from __future__ import annotations
import json
import re
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
# Ollama Modelfile reference: https://github.com/ollama/ollama/blob/main/docs/modelfile.md
TEMPLATE_RE = re.compile(r'^TEMPLATE\s+"""(.*?)"""', re.DOTALL | re.MULTILINE)
SYSTEM_RE = re.compile(r'^SYSTEM\s+"""(.*?)"""', re.DOTALL | re.MULTILINE)
PARAMETER_RE = re.compile(r'^PARAMETER\s+(\S+)\s+(.*?)\s*$', re.MULTILINE)
def parse_modelfile(text: str) -> tuple[str, str, dict[str, object]]:
"""Extract TEMPLATE, SYSTEM, and PARAMETER blocks from a Modelfile."""
tpl_match = TEMPLATE_RE.search(text)
if not tpl_match:
die("Modelfile has no TEMPLATE block")
template = tpl_match.group(1)
sys_match = SYSTEM_RE.search(text)
if not sys_match:
die("Modelfile has no SYSTEM block")
system = sys_match.group(1)
params: dict[str, object] = {}
stops: list[str] = []
for key, raw in PARAMETER_RE.findall(text):
# Strip outer quotes if present.
value: object = raw.strip()
if isinstance(value, str) and len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
# Stop tokens accumulate; everything else is scalar.
if key == "stop":
stops.append(value) # type: ignore[arg-type]
continue
# Cast known numeric params.
if key in {"temperature", "top_p", "top_k", "repeat_penalty",
"num_ctx", "num_predict", "num_gpu", "num_batch", "seed"}:
try:
value = float(value) if "." in str(value) else int(value) # type: ignore[arg-type]
except (TypeError, ValueError):
pass
params[key] = value
if stops:
params["stop"] = stops
return template, system, params
def die(msg: str) -> None:
print(f"[FAIL] {msg}", file=sys.stderr)
sys.exit(1)
def diff_strings(label: str, expected: str, actual: str) -> bool:
if expected == actual:
return True
print(f"[FAIL] {label} drift detected", file=sys.stderr)
print(f" Modelfile len={len(expected)} bridge file len={len(actual)}", file=sys.stderr)
# Show the first diverging line for quick orientation.
e_lines = expected.splitlines()
a_lines = actual.splitlines()
for i, (e, a) in enumerate(zip(e_lines, a_lines)):
if e != a:
print(f" first diff at line {i + 1}:", file=sys.stderr)
print(f" modelfile : {e!r}", file=sys.stderr)
print(f" bridge : {a!r}", file=sys.stderr)
return False
if len(e_lines) != len(a_lines):
print(f" line count differs: modelfile={len(e_lines)} bridge={len(a_lines)}",
file=sys.stderr)
return False
def main() -> int:
modelfile = (ROOT / "Modelfile").read_text()
bridge_template = (ROOT / "template").read_text()
bridge_system = (ROOT / "system").read_text()
bridge_params = json.loads((ROOT / "params").read_text())
mf_template, mf_system, mf_params = parse_modelfile(modelfile)
ok = True
# 1. TEMPLATE: byte-for-byte.
ok &= diff_strings("TEMPLATE", mf_template, bridge_template)
# 2. SYSTEM: trim trailing whitespace on both ends. The bridge file
# typically has a trailing newline; the Modelfile block doesn't.
ok &= diff_strings("SYSTEM", mf_system.strip(), bridge_system.strip())
# 3. PARAMETER vs params JSON: compare normalized dicts.
if mf_params != bridge_params:
print("[FAIL] params drift detected", file=sys.stderr)
for k in sorted(set(mf_params) | set(bridge_params)):
mv = mf_params.get(k, "<missing>")
bv = bridge_params.get(k, "<missing>")
if mv != bv:
print(f" {k}: modelfile={mv!r} bridge={bv!r}", file=sys.stderr)
ok = False
if not ok:
print("\n[!] Modelfile and bridge files are out of sync.", file=sys.stderr)
print(" Edit them together: any change to TEMPLATE / SYSTEM /",
file=sys.stderr)
print(" PARAMETER must be reflected in template / system / params.",
file=sys.stderr)
return 1
print("[ ok ] Modelfile <-> bridge files in sync")
return 0
if __name__ == "__main__":
sys.exit(main())