FitCheck / engine /hub_lookup.py
cn0303's picture
Real catalogue: 83 verified models, buy-advice mode, live model lookup, license-aware cards
e34beb2 verified
"""
Optional ONLINE lookup: "will this exact Hugging Face model run on my machine?"
Deterministic — no AI involved. Given any repo id (or model page URL), this:
1. checks the local catalogue (offline) by repo id and aliases;
2. otherwise makes ONE metadata call to the Hub, reads the model-tree tags
(base_model:finetune/adapter/quantized/merge), and walks up to 3 hops to
find a catalogue ancestor — "your finetune runs because its base runs";
3. otherwise falls back to raw parameter-count math, clearly labelled.
This is the only part of FitCheck that touches the network at runtime, and the
UI labels it as a live lookup. The core advisor stays fully offline.
"""
import re
from functools import lru_cache
from .hardware import HardwareSpec
from .real_advisor import (
USE_CASES, _SAFETY_FILL, _evaluate, _option_json, catalogue,
)
_RELATION = re.compile(r"^base_model:(finetune|adapter|quantized|merge):(.+)$")
@lru_cache(maxsize=1)
def _index() -> dict:
idx = {}
for e in catalogue()["entries"]:
idx[e["repo_id"].lower()] = e
for a in e.get("aliases", []):
idx[a.lower()] = e
return idx
def normalize_repo_id(text: str) -> str:
"""Accept a bare repo id or any huggingface.co URL."""
text = (text or "").strip().rstrip("/")
m = re.search(r"huggingface\.co/([\w.-]+/[\w.-]+)", text)
if m:
return m.group(1)
return text
def _relations(info) -> list[tuple[str, str]]:
out = []
for t in (getattr(info, "tags", None) or []):
m = _RELATION.match(t)
if m:
out.append((m.group(1), m.group(2)))
if not out:
# cardData fallback only when tags carry no typed relation — the tag
# knows whether it's a finetune or a quantized copy; cardData doesn't.
card = getattr(info, "card_data", None)
if card:
base = card.get("base_model") if hasattr(card, "get") else getattr(card, "base_model", None)
if isinstance(base, str):
out.append(("finetune", base))
elif isinstance(base, list):
out.extend(("finetune", b) for b in base if isinstance(b, str))
return out
def lookup(repo_input: str, payload: dict, spec: HardwareSpec) -> dict:
"""Returns {found, model, chain, verdict-ish fields} or {error}."""
repo_id = normalize_repo_id(repo_input)
if not re.fullmatch(r"[\w.-]+/[\w.-]+", repo_id):
return {"error": f"'{repo_input}' doesn't look like a Hugging Face repo id "
f"(expected something like author/model-name)."}
uc = USE_CASES.get(payload.get("usecase", "chat"), USE_CASES["chat"])
chain = [repo_id]
# 1) Offline: direct catalogue hit (also via aliases).
entry = _index().get(repo_id.lower())
via = None
# 2) Online: one metadata call + base-model walk.
info = None
if entry is None:
from huggingface_hub import HfApi
api = HfApi()
current = repo_id
try:
info = api.model_info(current, expand=["tags", "safetensors", "cardData",
"pipeline_tag", "gated"])
except Exception as exc: # noqa: BLE001 — surface the real failure
return {"error": f"Couldn't find '{repo_id}' on Hugging Face "
f"({type(exc).__name__}). Check the spelling?"}
hop_info = info
for _hop in range(3):
rels = _relations(hop_info)
if not rels:
break
# Prefer finetune/merge (same memory as base) over quantized.
rels.sort(key=lambda r: 0 if r[0] in ("finetune", "merge", "adapter") else 1)
rel, parent = rels[0]
chain.append(parent)
entry = _index().get(parent.lower())
if entry is not None:
via = {"relation": rel, "base": parent}
break
try:
hop_info = api.model_info(parent, expand=["tags", "cardData"])
except Exception: # noqa: BLE001 — chain ends here
break
if entry is not None:
r = _evaluate(entry, spec, uc)
opt = _option_json(r, spec)
explain = f"<b>{repo_id.split('/')[-1]}</b> "
if via:
word = {"finetune": "is fine-tuned from", "merge": "is merged from",
"adapter": "is an adapter on", "quantized": "is a compressed copy of"}[via["relation"]]
explain += (f"{word} <b>{entry['name']}</b> — if the base runs, this runs, "
f"with the same memory needs.")
if via["relation"] == "adapter":
explain += " Add roughly 0.1–0.5 GB for the adapter file."
else:
explain += f"is <b>{entry['name']}</b> in our catalogue."
return {"found": True, "match": "catalogue", "chain": chain,
"explain": explain, "option": opt, "live": via is not None or info is not None}
# 3) Raw math from parameter count — clearly labelled estimate.
st = getattr(info, "safetensors", None)
total = getattr(st, "total", None) if st else None
if not total:
return {"error": f"'{repo_id}' exists, but doesn't share its size or a known "
f"base model, so an honest estimate isn't possible."}
params_b = total / 1e9
weights_4bit = round(params_b * 4.85 / 8, 2) # effective 4-bit bits/weight
need = round(weights_4bit * 1.25 + 0.58, 2)
fast, total_b = spec.fast_budget_gb, spec.total_budget_gb
if spec.has_fast_path and need <= fast * _SAFETY_FILL:
verdict = "great"
elif need <= total_b * _SAFETY_FILL:
verdict = "tight"
else:
verdict = "no"
return {
"found": True, "match": "estimate", "chain": chain, "live": True,
"explain": (f"<b>{repo_id.split('/')[-1]}</b> isn't in our catalogue and lists no "
f"known base, so this is raw math from its {params_b:.1f}B parameters "
f"at a 4-bit setting — an estimate, not a measured figure."),
"option": {
"verdict": verdict, "model": repo_id.split("/")[-1],
"desc": f"{params_b:.1f}B parameters (from its files on Hugging Face)",
"setting": "Balanced (4-bit)",
"memory": "Too big" if verdict == "no" else f"{need:g} GB",
"feel": "", "url": f"https://huggingface.co/{repo_id}",
"license": "", "license_note": "", "gated": bool(getattr(info, "gated", False)),
"run": {}, "provenance": "estimated", "stale": False,
"params_b": round(params_b, 2), "active_params_b": None,
},
}