File size: 5,543 Bytes
64b629a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python3
"""
Janus-35B — verify Modelfile and HF Ollama bridge files stay in sync.

The repo ships two parallel Ollama configurations:

  - ``Modelfile`` is consumed by the local-build path
    (``ollama create janus -f Modelfile``). It contains
    ``TEMPLATE`` / ``SYSTEM`` / ``PARAMETER`` directives.
  - ``template`` / ``system`` / ``params`` at the repo root are consumed by HF's
    Ollama bridge when users ``ollama run hf.co/FoolDev/janus`` directly. HF
    does NOT read the Modelfile (per https://huggingface.co/docs/hub/en/ollama).

If the two configurations drift apart, ``hf.co/...`` users and local-build
users get different behaviour — exactly the bug fixed in commit 70ccef1
("Add HF Ollama bridge files (template/system/params)"). This script is
the regression guard: it parses the Modelfile, loads the three bridge
files, and fails on any mismatch.

Usage:
    python3 scripts/check_bridge_sync.py
    # exit 0 if in sync, 1 (with diff details) if not.

Run this manually before pushing a Modelfile / bridge-file edit. The 27B
sibling repo wires an equivalent script into scripts/check.sh and a
pre-commit hook; this repo intentionally stays leaner and runs it
on demand.
"""
from __future__ import annotations

import json
import re
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent

# Ollama Modelfile reference: https://github.com/ollama/ollama/blob/main/docs/modelfile.md
TEMPLATE_RE = re.compile(r'^TEMPLATE\s+"""(.*?)"""', re.DOTALL | re.MULTILINE)
SYSTEM_RE = re.compile(r'^SYSTEM\s+"""(.*?)"""', re.DOTALL | re.MULTILINE)
PARAMETER_RE = re.compile(r'^PARAMETER\s+(\S+)\s+(.*?)\s*$', re.MULTILINE)


def parse_modelfile(text: str) -> tuple[str, str, dict[str, object]]:
    """Extract TEMPLATE, SYSTEM, and PARAMETER blocks from a Modelfile."""
    tpl_match = TEMPLATE_RE.search(text)
    if not tpl_match:
        die("Modelfile has no TEMPLATE block")
    template = tpl_match.group(1)

    sys_match = SYSTEM_RE.search(text)
    if not sys_match:
        die("Modelfile has no SYSTEM block")
    system = sys_match.group(1)

    params: dict[str, object] = {}
    stops: list[str] = []
    for key, raw in PARAMETER_RE.findall(text):
        # Strip outer quotes if present.
        value: object = raw.strip()
        if isinstance(value, str) and len(value) >= 2 and value[0] == value[-1] == '"':
            value = value[1:-1]
        # Stop tokens accumulate; everything else is scalar.
        if key == "stop":
            stops.append(value)  # type: ignore[arg-type]
            continue
        # Cast known numeric params.
        if key in {"temperature", "top_p", "top_k", "repeat_penalty",
                   "num_ctx", "num_predict", "num_gpu", "num_batch", "seed"}:
            try:
                value = float(value) if "." in str(value) else int(value)  # type: ignore[arg-type]
            except (TypeError, ValueError):
                pass
        params[key] = value

    if stops:
        params["stop"] = stops

    return template, system, params


def die(msg: str) -> None:
    print(f"[FAIL] {msg}", file=sys.stderr)
    sys.exit(1)


def diff_strings(label: str, expected: str, actual: str) -> bool:
    if expected == actual:
        return True
    print(f"[FAIL] {label} drift detected", file=sys.stderr)
    print(f"  Modelfile len={len(expected)}  bridge file len={len(actual)}", file=sys.stderr)
    # Show the first diverging line for quick orientation.
    e_lines = expected.splitlines()
    a_lines = actual.splitlines()
    for i, (e, a) in enumerate(zip(e_lines, a_lines)):
        if e != a:
            print(f"  first diff at line {i + 1}:", file=sys.stderr)
            print(f"    modelfile : {e!r}", file=sys.stderr)
            print(f"    bridge    : {a!r}", file=sys.stderr)
            return False
    if len(e_lines) != len(a_lines):
        print(f"  line count differs: modelfile={len(e_lines)} bridge={len(a_lines)}",
              file=sys.stderr)
    return False


def main() -> int:
    modelfile = (ROOT / "Modelfile").read_text()
    bridge_template = (ROOT / "template").read_text()
    bridge_system = (ROOT / "system").read_text()
    bridge_params = json.loads((ROOT / "params").read_text())

    mf_template, mf_system, mf_params = parse_modelfile(modelfile)

    ok = True

    # 1. TEMPLATE: byte-for-byte.
    ok &= diff_strings("TEMPLATE", mf_template, bridge_template)

    # 2. SYSTEM: trim trailing whitespace on both ends. The bridge file
    #    typically has a trailing newline; the Modelfile block doesn't.
    ok &= diff_strings("SYSTEM", mf_system.strip(), bridge_system.strip())

    # 3. PARAMETER vs params JSON: compare normalized dicts.
    if mf_params != bridge_params:
        print("[FAIL] params drift detected", file=sys.stderr)
        for k in sorted(set(mf_params) | set(bridge_params)):
            mv = mf_params.get(k, "<missing>")
            bv = bridge_params.get(k, "<missing>")
            if mv != bv:
                print(f"  {k}: modelfile={mv!r}  bridge={bv!r}", file=sys.stderr)
        ok = False

    if not ok:
        print("\n[!] Modelfile and bridge files are out of sync.", file=sys.stderr)
        print("    Edit them together: any change to TEMPLATE / SYSTEM /",
              file=sys.stderr)
        print("    PARAMETER must be reflected in template / system / params.",
              file=sys.stderr)
        return 1

    print("[ ok ] Modelfile <-> bridge files in sync")
    return 0


if __name__ == "__main__":
    sys.exit(main())