File size: 6,706 Bytes
e34beb2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
"""
Optional ONLINE lookup: "will this exact Hugging Face model run on my machine?"

Deterministic — no AI involved. Given any repo id (or model page URL), this:
  1. checks the local catalogue (offline) by repo id and aliases;
  2. otherwise makes ONE metadata call to the Hub, reads the model-tree tags
     (base_model:finetune/adapter/quantized/merge), and walks up to 3 hops to
     find a catalogue ancestor — "your finetune runs because its base runs";
  3. otherwise falls back to raw parameter-count math, clearly labelled.

This is the only part of FitCheck that touches the network at runtime, and the
UI labels it as a live lookup. The core advisor stays fully offline.
"""

import re
from functools import lru_cache

from .hardware import HardwareSpec
from .real_advisor import (
    USE_CASES, _SAFETY_FILL, _evaluate, _option_json, catalogue,
)

_RELATION = re.compile(r"^base_model:(finetune|adapter|quantized|merge):(.+)$")


@lru_cache(maxsize=1)
def _index() -> dict:
    idx = {}
    for e in catalogue()["entries"]:
        idx[e["repo_id"].lower()] = e
        for a in e.get("aliases", []):
            idx[a.lower()] = e
    return idx


def normalize_repo_id(text: str) -> str:
    """Accept a bare repo id or any huggingface.co URL."""
    text = (text or "").strip().rstrip("/")
    m = re.search(r"huggingface\.co/([\w.-]+/[\w.-]+)", text)
    if m:
        return m.group(1)
    return text


def _relations(info) -> list[tuple[str, str]]:
    out = []
    for t in (getattr(info, "tags", None) or []):
        m = _RELATION.match(t)
        if m:
            out.append((m.group(1), m.group(2)))
    if not out:
        # cardData fallback only when tags carry no typed relation — the tag
        # knows whether it's a finetune or a quantized copy; cardData doesn't.
        card = getattr(info, "card_data", None)
        if card:
            base = card.get("base_model") if hasattr(card, "get") else getattr(card, "base_model", None)
            if isinstance(base, str):
                out.append(("finetune", base))
            elif isinstance(base, list):
                out.extend(("finetune", b) for b in base if isinstance(b, str))
    return out


def lookup(repo_input: str, payload: dict, spec: HardwareSpec) -> dict:
    """Returns {found, model, chain, verdict-ish fields} or {error}."""
    repo_id = normalize_repo_id(repo_input)
    if not re.fullmatch(r"[\w.-]+/[\w.-]+", repo_id):
        return {"error": f"'{repo_input}' doesn't look like a Hugging Face repo id "
                         f"(expected something like author/model-name)."}

    uc = USE_CASES.get(payload.get("usecase", "chat"), USE_CASES["chat"])
    chain = [repo_id]

    # 1) Offline: direct catalogue hit (also via aliases).
    entry = _index().get(repo_id.lower())
    via = None

    # 2) Online: one metadata call + base-model walk.
    info = None
    if entry is None:
        from huggingface_hub import HfApi
        api = HfApi()
        current = repo_id
        try:
            info = api.model_info(current, expand=["tags", "safetensors", "cardData",
                                                   "pipeline_tag", "gated"])
        except Exception as exc:  # noqa: BLE001 — surface the real failure
            return {"error": f"Couldn't find '{repo_id}' on Hugging Face "
                             f"({type(exc).__name__}). Check the spelling?"}
        hop_info = info
        for _hop in range(3):
            rels = _relations(hop_info)
            if not rels:
                break
            # Prefer finetune/merge (same memory as base) over quantized.
            rels.sort(key=lambda r: 0 if r[0] in ("finetune", "merge", "adapter") else 1)
            rel, parent = rels[0]
            chain.append(parent)
            entry = _index().get(parent.lower())
            if entry is not None:
                via = {"relation": rel, "base": parent}
                break
            try:
                hop_info = api.model_info(parent, expand=["tags", "cardData"])
            except Exception:  # noqa: BLE001 — chain ends here
                break

    if entry is not None:
        r = _evaluate(entry, spec, uc)
        opt = _option_json(r, spec)
        explain = f"<b>{repo_id.split('/')[-1]}</b> "
        if via:
            word = {"finetune": "is fine-tuned from", "merge": "is merged from",
                    "adapter": "is an adapter on", "quantized": "is a compressed copy of"}[via["relation"]]
            explain += (f"{word} <b>{entry['name']}</b> — if the base runs, this runs, "
                        f"with the same memory needs.")
            if via["relation"] == "adapter":
                explain += " Add roughly 0.1–0.5 GB for the adapter file."
        else:
            explain += f"is <b>{entry['name']}</b> in our catalogue."
        return {"found": True, "match": "catalogue", "chain": chain,
                "explain": explain, "option": opt, "live": via is not None or info is not None}

    # 3) Raw math from parameter count — clearly labelled estimate.
    st = getattr(info, "safetensors", None)
    total = getattr(st, "total", None) if st else None
    if not total:
        return {"error": f"'{repo_id}' exists, but doesn't share its size or a known "
                         f"base model, so an honest estimate isn't possible."}
    params_b = total / 1e9
    weights_4bit = round(params_b * 4.85 / 8, 2)   # effective 4-bit bits/weight
    need = round(weights_4bit * 1.25 + 0.58, 2)
    fast, total_b = spec.fast_budget_gb, spec.total_budget_gb
    if spec.has_fast_path and need <= fast * _SAFETY_FILL:
        verdict = "great"
    elif need <= total_b * _SAFETY_FILL:
        verdict = "tight"
    else:
        verdict = "no"
    return {
        "found": True, "match": "estimate", "chain": chain, "live": True,
        "explain": (f"<b>{repo_id.split('/')[-1]}</b> isn't in our catalogue and lists no "
                    f"known base, so this is raw math from its {params_b:.1f}B parameters "
                    f"at a 4-bit setting — an estimate, not a measured figure."),
        "option": {
            "verdict": verdict, "model": repo_id.split("/")[-1],
            "desc": f"{params_b:.1f}B parameters (from its files on Hugging Face)",
            "setting": "Balanced (4-bit)",
            "memory": "Too big" if verdict == "no" else f"{need:g} GB",
            "feel": "", "url": f"https://huggingface.co/{repo_id}",
            "license": "", "license_note": "", "gated": bool(getattr(info, "gated", False)),
            "run": {}, "provenance": "estimated", "stale": False,
            "params_b": round(params_b, 2), "active_params_b": None,
        },
    }