Spaces:
Running on Zero
Running on Zero
File size: 6,706 Bytes
e34beb2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | """
Optional ONLINE lookup: "will this exact Hugging Face model run on my machine?"
Deterministic — no AI involved. Given any repo id (or model page URL), this:
1. checks the local catalogue (offline) by repo id and aliases;
2. otherwise makes ONE metadata call to the Hub, reads the model-tree tags
(base_model:finetune/adapter/quantized/merge), and walks up to 3 hops to
find a catalogue ancestor — "your finetune runs because its base runs";
3. otherwise falls back to raw parameter-count math, clearly labelled.
This is the only part of FitCheck that touches the network at runtime, and the
UI labels it as a live lookup. The core advisor stays fully offline.
"""
import re
from functools import lru_cache
from .hardware import HardwareSpec
from .real_advisor import (
USE_CASES, _SAFETY_FILL, _evaluate, _option_json, catalogue,
)
_RELATION = re.compile(r"^base_model:(finetune|adapter|quantized|merge):(.+)$")
@lru_cache(maxsize=1)
def _index() -> dict:
idx = {}
for e in catalogue()["entries"]:
idx[e["repo_id"].lower()] = e
for a in e.get("aliases", []):
idx[a.lower()] = e
return idx
def normalize_repo_id(text: str) -> str:
"""Accept a bare repo id or any huggingface.co URL."""
text = (text or "").strip().rstrip("/")
m = re.search(r"huggingface\.co/([\w.-]+/[\w.-]+)", text)
if m:
return m.group(1)
return text
def _relations(info) -> list[tuple[str, str]]:
out = []
for t in (getattr(info, "tags", None) or []):
m = _RELATION.match(t)
if m:
out.append((m.group(1), m.group(2)))
if not out:
# cardData fallback only when tags carry no typed relation — the tag
# knows whether it's a finetune or a quantized copy; cardData doesn't.
card = getattr(info, "card_data", None)
if card:
base = card.get("base_model") if hasattr(card, "get") else getattr(card, "base_model", None)
if isinstance(base, str):
out.append(("finetune", base))
elif isinstance(base, list):
out.extend(("finetune", b) for b in base if isinstance(b, str))
return out
def lookup(repo_input: str, payload: dict, spec: HardwareSpec) -> dict:
"""Returns {found, model, chain, verdict-ish fields} or {error}."""
repo_id = normalize_repo_id(repo_input)
if not re.fullmatch(r"[\w.-]+/[\w.-]+", repo_id):
return {"error": f"'{repo_input}' doesn't look like a Hugging Face repo id "
f"(expected something like author/model-name)."}
uc = USE_CASES.get(payload.get("usecase", "chat"), USE_CASES["chat"])
chain = [repo_id]
# 1) Offline: direct catalogue hit (also via aliases).
entry = _index().get(repo_id.lower())
via = None
# 2) Online: one metadata call + base-model walk.
info = None
if entry is None:
from huggingface_hub import HfApi
api = HfApi()
current = repo_id
try:
info = api.model_info(current, expand=["tags", "safetensors", "cardData",
"pipeline_tag", "gated"])
except Exception as exc: # noqa: BLE001 — surface the real failure
return {"error": f"Couldn't find '{repo_id}' on Hugging Face "
f"({type(exc).__name__}). Check the spelling?"}
hop_info = info
for _hop in range(3):
rels = _relations(hop_info)
if not rels:
break
# Prefer finetune/merge (same memory as base) over quantized.
rels.sort(key=lambda r: 0 if r[0] in ("finetune", "merge", "adapter") else 1)
rel, parent = rels[0]
chain.append(parent)
entry = _index().get(parent.lower())
if entry is not None:
via = {"relation": rel, "base": parent}
break
try:
hop_info = api.model_info(parent, expand=["tags", "cardData"])
except Exception: # noqa: BLE001 — chain ends here
break
if entry is not None:
r = _evaluate(entry, spec, uc)
opt = _option_json(r, spec)
explain = f"<b>{repo_id.split('/')[-1]}</b> "
if via:
word = {"finetune": "is fine-tuned from", "merge": "is merged from",
"adapter": "is an adapter on", "quantized": "is a compressed copy of"}[via["relation"]]
explain += (f"{word} <b>{entry['name']}</b> — if the base runs, this runs, "
f"with the same memory needs.")
if via["relation"] == "adapter":
explain += " Add roughly 0.1–0.5 GB for the adapter file."
else:
explain += f"is <b>{entry['name']}</b> in our catalogue."
return {"found": True, "match": "catalogue", "chain": chain,
"explain": explain, "option": opt, "live": via is not None or info is not None}
# 3) Raw math from parameter count — clearly labelled estimate.
st = getattr(info, "safetensors", None)
total = getattr(st, "total", None) if st else None
if not total:
return {"error": f"'{repo_id}' exists, but doesn't share its size or a known "
f"base model, so an honest estimate isn't possible."}
params_b = total / 1e9
weights_4bit = round(params_b * 4.85 / 8, 2) # effective 4-bit bits/weight
need = round(weights_4bit * 1.25 + 0.58, 2)
fast, total_b = spec.fast_budget_gb, spec.total_budget_gb
if spec.has_fast_path and need <= fast * _SAFETY_FILL:
verdict = "great"
elif need <= total_b * _SAFETY_FILL:
verdict = "tight"
else:
verdict = "no"
return {
"found": True, "match": "estimate", "chain": chain, "live": True,
"explain": (f"<b>{repo_id.split('/')[-1]}</b> isn't in our catalogue and lists no "
f"known base, so this is raw math from its {params_b:.1f}B parameters "
f"at a 4-bit setting — an estimate, not a measured figure."),
"option": {
"verdict": verdict, "model": repo_id.split("/")[-1],
"desc": f"{params_b:.1f}B parameters (from its files on Hugging Face)",
"setting": "Balanced (4-bit)",
"memory": "Too big" if verdict == "no" else f"{need:g} GB",
"feel": "", "url": f"https://huggingface.co/{repo_id}",
"license": "", "license_note": "", "gated": bool(getattr(info, "gated", False)),
"run": {}, "provenance": "estimated", "stale": False,
"params_b": round(params_b, 2), "active_params_b": None,
},
}
|