""" Optional ONLINE lookup: "will this exact Hugging Face model run on my machine?" Deterministic — no AI involved. Given any repo id (or model page URL), this: 1. checks the local catalogue (offline) by repo id and aliases; 2. otherwise makes ONE metadata call to the Hub, reads the model-tree tags (base_model:finetune/adapter/quantized/merge), and walks up to 3 hops to find a catalogue ancestor — "your finetune runs because its base runs"; 3. otherwise falls back to raw parameter-count math, clearly labelled. This is the only part of FitCheck that touches the network at runtime, and the UI labels it as a live lookup. The core advisor stays fully offline. """ import re from functools import lru_cache from .hardware import HardwareSpec from .real_advisor import ( USE_CASES, _SAFETY_FILL, _evaluate, _option_json, catalogue, ) _RELATION = re.compile(r"^base_model:(finetune|adapter|quantized|merge):(.+)$") @lru_cache(maxsize=1) def _index() -> dict: idx = {} for e in catalogue()["entries"]: idx[e["repo_id"].lower()] = e for a in e.get("aliases", []): idx[a.lower()] = e return idx def normalize_repo_id(text: str) -> str: """Accept a bare repo id or any huggingface.co URL.""" text = (text or "").strip().rstrip("/") m = re.search(r"huggingface\.co/([\w.-]+/[\w.-]+)", text) if m: return m.group(1) return text def _relations(info) -> list[tuple[str, str]]: out = [] for t in (getattr(info, "tags", None) or []): m = _RELATION.match(t) if m: out.append((m.group(1), m.group(2))) if not out: # cardData fallback only when tags carry no typed relation — the tag # knows whether it's a finetune or a quantized copy; cardData doesn't. card = getattr(info, "card_data", None) if card: base = card.get("base_model") if hasattr(card, "get") else getattr(card, "base_model", None) if isinstance(base, str): out.append(("finetune", base)) elif isinstance(base, list): out.extend(("finetune", b) for b in base if isinstance(b, str)) return out def lookup(repo_input: str, payload: dict, spec: HardwareSpec) -> dict: """Returns {found, model, chain, verdict-ish fields} or {error}.""" repo_id = normalize_repo_id(repo_input) if not re.fullmatch(r"[\w.-]+/[\w.-]+", repo_id): return {"error": f"'{repo_input}' doesn't look like a Hugging Face repo id " f"(expected something like author/model-name)."} uc = USE_CASES.get(payload.get("usecase", "chat"), USE_CASES["chat"]) chain = [repo_id] # 1) Offline: direct catalogue hit (also via aliases). entry = _index().get(repo_id.lower()) via = None # 2) Online: one metadata call + base-model walk. info = None if entry is None: from huggingface_hub import HfApi api = HfApi() current = repo_id try: info = api.model_info(current, expand=["tags", "safetensors", "cardData", "pipeline_tag", "gated"]) except Exception as exc: # noqa: BLE001 — surface the real failure return {"error": f"Couldn't find '{repo_id}' on Hugging Face " f"({type(exc).__name__}). Check the spelling?"} hop_info = info for _hop in range(3): rels = _relations(hop_info) if not rels: break # Prefer finetune/merge (same memory as base) over quantized. rels.sort(key=lambda r: 0 if r[0] in ("finetune", "merge", "adapter") else 1) rel, parent = rels[0] chain.append(parent) entry = _index().get(parent.lower()) if entry is not None: via = {"relation": rel, "base": parent} break try: hop_info = api.model_info(parent, expand=["tags", "cardData"]) except Exception: # noqa: BLE001 — chain ends here break if entry is not None: r = _evaluate(entry, spec, uc) opt = _option_json(r, spec) explain = f"{repo_id.split('/')[-1]} " if via: word = {"finetune": "is fine-tuned from", "merge": "is merged from", "adapter": "is an adapter on", "quantized": "is a compressed copy of"}[via["relation"]] explain += (f"{word} {entry['name']} — if the base runs, this runs, " f"with the same memory needs.") if via["relation"] == "adapter": explain += " Add roughly 0.1–0.5 GB for the adapter file." else: explain += f"is {entry['name']} in our catalogue." return {"found": True, "match": "catalogue", "chain": chain, "explain": explain, "option": opt, "live": via is not None or info is not None} # 3) Raw math from parameter count — clearly labelled estimate. st = getattr(info, "safetensors", None) total = getattr(st, "total", None) if st else None if not total: return {"error": f"'{repo_id}' exists, but doesn't share its size or a known " f"base model, so an honest estimate isn't possible."} params_b = total / 1e9 weights_4bit = round(params_b * 4.85 / 8, 2) # effective 4-bit bits/weight need = round(weights_4bit * 1.25 + 0.58, 2) fast, total_b = spec.fast_budget_gb, spec.total_budget_gb if spec.has_fast_path and need <= fast * _SAFETY_FILL: verdict = "great" elif need <= total_b * _SAFETY_FILL: verdict = "tight" else: verdict = "no" return { "found": True, "match": "estimate", "chain": chain, "live": True, "explain": (f"{repo_id.split('/')[-1]} isn't in our catalogue and lists no " f"known base, so this is raw math from its {params_b:.1f}B parameters " f"at a 4-bit setting — an estimate, not a measured figure."), "option": { "verdict": verdict, "model": repo_id.split("/")[-1], "desc": f"{params_b:.1f}B parameters (from its files on Hugging Face)", "setting": "Balanced (4-bit)", "memory": "Too big" if verdict == "no" else f"{need:g} GB", "feel": "", "url": f"https://huggingface.co/{repo_id}", "license": "", "license_note": "", "gated": bool(getattr(info, "gated", False)), "run": {}, "provenance": "estimated", "stale": False, "params_b": round(params_b, 2), "active_params_b": None, }, }