Spaces:

build-small-hackathon
/

FitCheck

Running on Zero

File size: 6,706 Bytes

e34beb2

"""
Optional ONLINE lookup: "will this exact Hugging Face model run on my machine?"

Deterministic — no AI involved. Given any repo id (or model page URL), this:
  1. checks the local catalogue (offline) by repo id and aliases;
  2. otherwise makes ONE metadata call to the Hub, reads the model-tree tags
     (base_model:finetune/adapter/quantized/merge), and walks up to 3 hops to
     find a catalogue ancestor — "your finetune runs because its base runs";
  3. otherwise falls back to raw parameter-count math, clearly labelled.

This is the only part of FitCheck that touches the network at runtime, and the
UI labels it as a live lookup. The core advisor stays fully offline.
"""

import re
from functools import lru_cache

from .hardware import HardwareSpec
from .real_advisor import (
    USE_CASES, _SAFETY_FILL, _evaluate, _option_json, catalogue,
)

_RELATION = re.compile(r"^base_model:(finetune|adapter|quantized|merge):(.+)$")


@lru_cache(maxsize=1)
def _index() -> dict:
    idx = {}
    for e in catalogue()["entries"]:
        idx[e["repo_id"].lower()] = e
        for a in e.get("aliases", []):
            idx[a.lower()] = e
    return idx


def normalize_repo_id(text: str) -> str:
    """Accept a bare repo id or any huggingface.co URL."""
    text = (text or "").strip().rstrip("/")
    m = re.search(r"huggingface\.co/([\w.-]+/[\w.-]+)", text)
    if m:
        return m.group(1)
    return text


def _relations(info) -> list[tuple[str, str]]:
    out = []
    for t in (getattr(info, "tags", None) or []):
        m = _RELATION.match(t)
        if m:
            out.append((m.group(1), m.group(2)))
    if not out:
        # cardData fallback only when tags carry no typed relation — the tag
        # knows whether it's a finetune or a quantized copy; cardData doesn't.
        card = getattr(info, "card_data", None)
        if card:
            base = card.get("base_model") if hasattr(card, "get") else getattr(card, "base_model", None)
            if isinstance(base, str):
                out.append(("finetune", base))
            elif isinstance(base, list):
                out.extend(("finetune", b) for b in base if isinstance(b, str))
    return out


def lookup(repo_input: str, payload: dict, spec: HardwareSpec) -> dict:
    """Returns {found, model, chain, verdict-ish fields} or {error}."""
    repo_id = normalize_repo_id(repo_input)
    if not re.fullmatch(r"[\w.-]+/[\w.-]+", repo_id):
        return {"error": f"'{repo_input}' doesn't look like a Hugging Face repo id "
                         f"(expected something like author/model-name)."}

    uc = USE_CASES.get(payload.get("usecase", "chat"), USE_CASES["chat"])
    chain = [repo_id]

    # 1) Offline: direct catalogue hit (also via aliases).
    entry = _index().get(repo_id.lower())
    via = None

    # 2) Online: one metadata call + base-model walk.
    info = None
    if entry is None:
        from huggingface_hub import HfApi
        api = HfApi()
        current = repo_id
        try:
            info = api.model_info(current, expand=["tags", "safetensors", "cardData",
                                                   "pipeline_tag", "gated"])
        except Exception as exc:  # noqa: BLE001 — surface the real failure
            return {"error": f"Couldn't find '{repo_id}' on Hugging Face "
                             f"({type(exc).__name__}). Check the spelling?"}
        hop_info = info
        for _hop in range(3):
            rels = _relations(hop_info)
            if not rels:
                break
            # Prefer finetune/merge (same memory as base) over quantized.
            rels.sort(key=lambda r: 0 if r[0] in ("finetune", "merge", "adapter") else 1)
            rel, parent = rels[0]
            chain.append(parent)
            entry = _index().get(parent.lower())
            if entry is not None:
                via = {"relation": rel, "base": parent}
                break
            try:
                hop_info = api.model_info(parent, expand=["tags", "cardData"])
            except Exception:  # noqa: BLE001 — chain ends here
                break

    if entry is not None:
        r = _evaluate(entry, spec, uc)
        opt = _option_json(r, spec)
        explain = f"<b>{repo_id.split('/')[-1]}</b> "
        if via:
            word = {"finetune": "is fine-tuned from", "merge": "is merged from",
                    "adapter": "is an adapter on", "quantized": "is a compressed copy of"}[via["relation"]]
            explain += (f"{word} <b>{entry['name']}</b> — if the base runs, this runs, "
                        f"with the same memory needs.")
            if via["relation"] == "adapter":
                explain += " Add roughly 0.1–0.5 GB for the adapter file."
        else:
            explain += f"is <b>{entry['name']}</b> in our catalogue."
        return {"found": True, "match": "catalogue", "chain": chain,
                "explain": explain, "option": opt, "live": via is not None or info is not None}

    # 3) Raw math from parameter count — clearly labelled estimate.
    st = getattr(info, "safetensors", None)
    total = getattr(st, "total", None) if st else None
    if not total:
        return {"error": f"'{repo_id}' exists, but doesn't share its size or a known "
                         f"base model, so an honest estimate isn't possible."}
    params_b = total / 1e9
    weights_4bit = round(params_b * 4.85 / 8, 2)   # effective 4-bit bits/weight
    need = round(weights_4bit * 1.25 + 0.58, 2)
    fast, total_b = spec.fast_budget_gb, spec.total_budget_gb
    if spec.has_fast_path and need <= fast * _SAFETY_FILL:
        verdict = "great"
    elif need <= total_b * _SAFETY_FILL:
        verdict = "tight"
    else:
        verdict = "no"
    return {
        "found": True, "match": "estimate", "chain": chain, "live": True,
        "explain": (f"<b>{repo_id.split('/')[-1]}</b> isn't in our catalogue and lists no "
                    f"known base, so this is raw math from its {params_b:.1f}B parameters "
                    f"at a 4-bit setting — an estimate, not a measured figure."),
        "option": {
            "verdict": verdict, "model": repo_id.split("/")[-1],
            "desc": f"{params_b:.1f}B parameters (from its files on Hugging Face)",
            "setting": "Balanced (4-bit)",
            "memory": "Too big" if verdict == "no" else f"{need:g} GB",
            "feel": "", "url": f"https://huggingface.co/{repo_id}",
            "license": "", "license_note": "", "gated": bool(getattr(info, "gated", False)),
            "run": {}, "provenance": "estimated", "stale": False,
            "params_b": round(params_b, 2), "active_params_b": None,
        },
    }