Spaces:

build-small-hackathon
/

FitCheck

Running on Zero

App Files Files Community

FitCheck / engine /hub_lookup.py

cn0303

Real catalogue: 83 verified models, buy-advice mode, live model lookup, license-aware cards

e34beb2 verified 1 day ago

raw

history blame contribute delete

6.71 kB

	"""
	Optional ONLINE lookup: "will this exact Hugging Face model run on my machine?"

	Deterministic — no AI involved. Given any repo id (or model page URL), this:
	1. checks the local catalogue (offline) by repo id and aliases;
	2. otherwise makes ONE metadata call to the Hub, reads the model-tree tags
	(base_model:finetune/adapter/quantized/merge), and walks up to 3 hops to
	find a catalogue ancestor — "your finetune runs because its base runs";
	3. otherwise falls back to raw parameter-count math, clearly labelled.

	This is the only part of FitCheck that touches the network at runtime, and the
	UI labels it as a live lookup. The core advisor stays fully offline.
	"""

	import re
	from functools import lru_cache

	from .hardware import HardwareSpec
	from .real_advisor import (
	USE_CASES, _SAFETY_FILL, _evaluate, _option_json, catalogue,
	)

	_RELATION = re.compile(r"^base_model:(finetune\|adapter\|quantized\|merge):(.+)$")


	@lru_cache(maxsize=1)
	def _index() -> dict:
	idx = {}
	for e in catalogue()["entries"]:
	idx[e["repo_id"].lower()] = e
	for a in e.get("aliases", []):
	idx[a.lower()] = e
	return idx


	def normalize_repo_id(text: str) -> str:
	"""Accept a bare repo id or any huggingface.co URL."""
	text = (text or "").strip().rstrip("/")
	m = re.search(r"huggingface\.co/([\w.-]+/[\w.-]+)", text)
	if m:
	return m.group(1)
	return text


	def _relations(info) -> list[tuple[str, str]]:
	out = []
	for t in (getattr(info, "tags", None) or []):
	m = _RELATION.match(t)
	if m:
	out.append((m.group(1), m.group(2)))
	if not out:
	# cardData fallback only when tags carry no typed relation — the tag
	# knows whether it's a finetune or a quantized copy; cardData doesn't.
	card = getattr(info, "card_data", None)
	if card:
	base = card.get("base_model") if hasattr(card, "get") else getattr(card, "base_model", None)
	if isinstance(base, str):
	out.append(("finetune", base))
	elif isinstance(base, list):
	out.extend(("finetune", b) for b in base if isinstance(b, str))
	return out


	def lookup(repo_input: str, payload: dict, spec: HardwareSpec) -> dict:
	"""Returns {found, model, chain, verdict-ish fields} or {error}."""
	repo_id = normalize_repo_id(repo_input)
	if not re.fullmatch(r"[\w.-]+/[\w.-]+", repo_id):
	return {"error": f"'{repo_input}' doesn't look like a Hugging Face repo id "
	f"(expected something like author/model-name)."}

	uc = USE_CASES.get(payload.get("usecase", "chat"), USE_CASES["chat"])
	chain = [repo_id]

	# 1) Offline: direct catalogue hit (also via aliases).
	entry = _index().get(repo_id.lower())
	via = None

	# 2) Online: one metadata call + base-model walk.
	info = None
	if entry is None:
	from huggingface_hub import HfApi
	api = HfApi()
	current = repo_id
	try:
	info = api.model_info(current, expand=["tags", "safetensors", "cardData",
	"pipeline_tag", "gated"])
	except Exception as exc: # noqa: BLE001 — surface the real failure
	return {"error": f"Couldn't find '{repo_id}' on Hugging Face "
	f"({type(exc).__name__}). Check the spelling?"}
	hop_info = info
	for _hop in range(3):
	rels = _relations(hop_info)
	if not rels:
	break
	# Prefer finetune/merge (same memory as base) over quantized.
	rels.sort(key=lambda r: 0 if r[0] in ("finetune", "merge", "adapter") else 1)
	rel, parent = rels[0]
	chain.append(parent)
	entry = _index().get(parent.lower())
	if entry is not None:
	via = {"relation": rel, "base": parent}
	break
	try:
	hop_info = api.model_info(parent, expand=["tags", "cardData"])
	except Exception: # noqa: BLE001 — chain ends here
	break

	if entry is not None:
	r = _evaluate(entry, spec, uc)
	opt = _option_json(r, spec)
	explain = f"<b>{repo_id.split('/')[-1]}</b> "
	if via:
	word = {"finetune": "is fine-tuned from", "merge": "is merged from",
	"adapter": "is an adapter on", "quantized": "is a compressed copy of"}[via["relation"]]
	explain += (f"{word} <b>{entry['name']}</b> — if the base runs, this runs, "
	f"with the same memory needs.")
	if via["relation"] == "adapter":
	explain += " Add roughly 0.1–0.5 GB for the adapter file."
	else:
	explain += f"is <b>{entry['name']}</b> in our catalogue."
	return {"found": True, "match": "catalogue", "chain": chain,
	"explain": explain, "option": opt, "live": via is not None or info is not None}

	# 3) Raw math from parameter count — clearly labelled estimate.
	st = getattr(info, "safetensors", None)
	total = getattr(st, "total", None) if st else None
	if not total:
	return {"error": f"'{repo_id}' exists, but doesn't share its size or a known "
	f"base model, so an honest estimate isn't possible."}
	params_b = total / 1e9
	weights_4bit = round(params_b * 4.85 / 8, 2) # effective 4-bit bits/weight
	need = round(weights_4bit * 1.25 + 0.58, 2)
	fast, total_b = spec.fast_budget_gb, spec.total_budget_gb
	if spec.has_fast_path and need <= fast * _SAFETY_FILL:
	verdict = "great"
	elif need <= total_b * _SAFETY_FILL:
	verdict = "tight"
	else:
	verdict = "no"
	return {
	"found": True, "match": "estimate", "chain": chain, "live": True,
	"explain": (f"<b>{repo_id.split('/')[-1]}</b> isn't in our catalogue and lists no "
	f"known base, so this is raw math from its {params_b:.1f}B parameters "
	f"at a 4-bit setting — an estimate, not a measured figure."),
	"option": {
	"verdict": verdict, "model": repo_id.split("/")[-1],
	"desc": f"{params_b:.1f}B parameters (from its files on Hugging Face)",
	"setting": "Balanced (4-bit)",
	"memory": "Too big" if verdict == "no" else f"{need:g} GB",
	"feel": "", "url": f"https://huggingface.co/{repo_id}",
	"license": "", "license_note": "", "gated": bool(getattr(info, "gated", False)),
	"run": {}, "provenance": "estimated", "stale": False,
	"params_b": round(params_b, 2), "active_params_b": None,
	},
	}