Spaces:

build-small-hackathon
/

FitCheck

Running on Zero

File size: 28,588 Bytes

"""
Engine v2: honest verdicts over REAL models from catalogue.json.

This replaces both the size-class advisor and the placeholder families. Every
option it returns is an actual model with a Hugging Face link, a license, and
memory figures with provenance:

  - LLM / VLM weights = the EXACT GGUF file size in bytes from the Hub
    (ground truth — better than any params-times-bits estimate).
  - Chat memory (KV cache) = GQA-aware math from the model's real config
    (layers, hidden, kv-heads) when available; a conservative parameter-count
    heuristic when the repo is gated (labelled as estimated).
  - Working space includes a +0.577 GB buffer — the 95% load-success margin
    oobabooga fitted over 19,517 real measurements (gguf-vram-formula).
  - Non-GGUF families (vision / image gen / audio / embeddings / data) carry a
    single memory figure whose provenance is vendor-published, community-
    reported, or estimated — and the UI says which.

The catalogue is baked into the repo at build time (refreshed by
scripts/refresh_catalogue.py), so the running app makes no network calls.
"""

import json
from functools import lru_cache
from pathlib import Path

from .hardware import HardwareSpec
from .runtimes import pick_runtimes
from .speed import bandwidth_for_spec, predict_decode_tps, feel_text

_CATALOGUE_PATH = Path(__file__).resolve().parent.parent / "catalogue.json"

# We only fill a budget to this fraction — the rest is breathing room.
_SAFETY_FILL = 0.90
# oobabooga's fitted 95%-load-success buffer (GB), cited in the UI footnote.
_CONFIDENCE_BUFFER_GB = 0.577

_VERDICT_WORD = {"great": "Runs great", "tight": "Tight, but works", "no": "Won't fit"}
_C_MODEL = "#818CF8"
_C_WORK = "#868E9C"

# Quant ladder quality order (matches scripts/refresh_catalogue.py).
_QUANT_ORDER = ["Q8_0", "Q6_K", "Q5_K_M", "Q4_K_M", "IQ4_XS", "Q3_K_M", "Q2_K"]
_FOUR_BIT_RANK = _QUANT_ORDER.index("IQ4_XS")   # >= this index quality = sub-4-bit
_COMPROMISE_QUANTS = ["Q4_K_M", "IQ4_XS", "Q3_K_M", "Q2_K"]


# --------------------------------------------------------------------------
# Use cases
# --------------------------------------------------------------------------

class UC:
    def __init__(self, key, plain, family, ctx=4096, min_b=0.0, good_b=0.0,
                 factor=1.0, note=""):
        # min_b/good_b are LLM-quality bars in billions of params. They default
        # to 0 because they're meaningless for vision/audio/etc. — a 0.003B
        # YOLO is a complete, excellent model, not a too-small LLM. Only the
        # text use cases set them explicitly.
        self.key, self.plain_name, self.family = key, plain, family
        self.context_tokens, self.min_b, self.good_b = ctx, min_b, good_b
        self.overhead_factor, self.note = factor, note


USE_CASES = {u.key: u for u in [
    UC("chat", "Just chatting / asking questions", "llm", 4096, 0.5, 3.0),
    UC("writing", "Writing & summarising", "llm", 4096, 1.5, 7.0),
    UC("coding", "Coding help", "llm", 8192, 3.0, 7.0,
       note="Bigger models are much more reliable for code."),
    UC("agents", "Agents & tool use", "llm", 8192, 7.0, 7.0, 1.15,
       note="Needs steady instruction-following — go medium or larger."),
    UC("rag", "Chat with your documents", "llm", 16384, 3.0, 7.0,
       note="Long documents use extra memory for context — that's included here."),
    UC("translate", "Translation", "llm", 4096, 1.5, 7.0),
    UC("finetune", "Fine-tune an LLM (LoRA)", "llm", 2048, 3.0, 7.0, 2.2,
       note="Training needs roughly 2-3x the memory of just chatting. That's baked into these numbers."),
    UC("custom", "Your custom goal", "llm", 4096, 0.5, 7.0),
    UC("vlm", "Chat about images & video", "vlm", 4096, 1.5, 4.0),
    UC("detect", "Object detection", "vision"),
    UC("segment", "Image segmentation", "vision"),
    UC("pose", "Pose estimation (2D & 6-DoF)", "vision"),
    UC("classify", "Image classification", "vision"),
    UC("depth", "Depth estimation", "vision"),
    UC("ocr", "Read text from images (OCR)", "vision"),
    UC("train-vision", "Train a vision model", "vision", factor=3.0,
       note="Training needs roughly 3x the memory of running the same model."),
    UC("imagegen", "Generate images", "imagegen"),
    UC("inpaint", "Edit / inpaint images", "imagegen"),
    UC("upscale", "Upscale / restore images", "imagegen"),
    UC("videogen", "Generate video", "imagegen"),
    UC("bgremove", "Remove backgrounds", "imagegen"),
    UC("stt", "Speech to text", "audio"),
    UC("tts", "Text to speech / voice", "audio"),
    UC("music", "Generate music", "audio"),
    UC("embed", "Semantic search / embeddings", "embed"),
    UC("forecast", "Time-series forecasting", "data"),
    UC("tabular", "Predict from spreadsheets", "data"),
]}

# Use cases answered by the whole LLM family (entries don't list these).
_TEXT_UCS = {"chat", "writing", "coding", "agents", "rag", "translate",
             "finetune", "custom"}

_TOOLS = {
    "llm": [
        {"name": "Ollama", "what": "Type one line; it downloads and runs the model for you.",
         "install": "Get it from ollama.com", "tag": "Easiest"},
        {"name": "LM Studio", "what": "A point-and-click app with a chat window, no commands.",
         "install": "Download from lmstudio.ai", "tag": "Easy"},
        {"name": "llama.cpp", "what": "The lightweight engine under the hood. Runs GGUF files directly.",
         "install": "Releases on GitHub", "tag": "Advanced"},
    ],
    "vision": [
        {"name": "Ultralytics", "what": "One pip install, then detect objects from a webcam or file.",
         "install": "pip install ultralytics", "tag": "Easiest"},
        {"name": "PyTorch", "what": "Full control for custom pipelines and training.",
         "install": "pytorch.org", "tag": "Advanced"},
    ],
    "imagegen": [
        {"name": "ComfyUI", "what": "Powerful visual node editor for image/video pipelines.",
         "install": "Download from GitHub", "tag": "Moderate"},
        {"name": "diffusers", "what": "Hugging Face's Python library for generation pipelines.",
         "install": "pip install diffusers", "tag": "Moderate"},
        {"name": "Fooocus", "what": "Image generation that 'just works': one folder, double-click.",
         "install": "Download from GitHub", "tag": "Easiest"},
    ],
    "audio": [
        {"name": "faster-whisper", "what": "Fast, accurate transcription with a tiny install.",
         "install": "pip install faster-whisper", "tag": "Easiest"},
        {"name": "whisper.cpp", "what": "Runs Whisper efficiently on CPU and small machines.",
         "install": "Build from GitHub", "tag": "Advanced"},
    ],
    "embed": [
        {"name": "sentence-transformers", "what": "Turn text into searchable vectors in a few lines.",
         "install": "pip install sentence-transformers", "tag": "Easiest"},
        {"name": "Chroma", "what": "A simple local database to store and search those vectors.",
         "install": "pip install chromadb", "tag": "Easy"},
    ],
    "data": [
        {"name": "Python + pip", "what": "These models ship as small Python packages.",
         "install": "pip install (see the model card)", "tag": "Easiest"},
    ],
}
_TOOLS["vlm"] = _TOOLS["llm"]


# --------------------------------------------------------------------------
# Catalogue access
# --------------------------------------------------------------------------

@lru_cache(maxsize=1)
def catalogue() -> dict:
    return json.loads(_CATALOGUE_PATH.read_text(encoding="utf-8"))


@lru_cache(maxsize=1)
def _by_use_case() -> dict:
    out: dict[str, list[dict]] = {}
    for e in catalogue()["entries"]:
        if e["family"] in ("llm", "vlm"):
            ucs = list(_TEXT_UCS) if e["family"] == "llm" else ["vlm"]
        else:
            ucs = e.get("use_cases", [])
        for uc in ucs:
            out.setdefault(uc, []).append(e)
    for uc in out:
        out[uc].sort(key=lambda e: e.get("params_b", 0), reverse=True)
    return out


def catalogue_date() -> str:
    return catalogue().get("generated_at", "")[:10]


# --------------------------------------------------------------------------
# Memory math
# --------------------------------------------------------------------------

# Fallback architecture shapes by parameter count (conservative typicals),
# used only when a gated repo hides its config.json.
_ARCH_FALLBACK = [
    (1.5, 24, 2048), (4.5, 28, 3072), (9.0, 32, 4096),
    (16.0, 40, 5120), (40.0, 48, 6656), (1e9, 80, 8192),
]


def _kv_gb(entry: dict, ctx: int) -> tuple[float, bool]:
    """KV-cache GB for `ctx` tokens. Returns (gb, exact?)."""
    ctx = min(ctx, entry.get("context_len") or ctx)
    arch = entry.get("arch")
    if arch:
        per_layer = arch["hidden"] * arch["n_kv_heads"] / arch["n_heads"]
        return 2 * arch["n_layers"] * per_layer * ctx * 2 / 1e9, True
    params = entry.get("params_b", 4.0)
    for cap, layers, hidden in _ARCH_FALLBACK:
        if params <= cap:
            return 2 * layers * hidden * ctx * 2 * 0.30 / 1e9, False
    return 1.0, False


def _overhead_gb(weights: float, factor: float) -> float:
    if factor >= 2.0:   # training: optimizer state + activations dominate
        return round(_CONFIDENCE_BUFFER_GB + weights * (factor - 1.0), 2)
    return round((_CONFIDENCE_BUFFER_GB + 0.08 * weights) * factor, 2)


def _estimate(entry: dict, quant: dict, ctx: int, factor: float) -> dict:
    weights = quant["file_gb"]
    kv, kv_exact = _kv_gb(entry, ctx)
    kv = round(kv, 2)
    overhead = _overhead_gb(weights, factor)
    return {"weights": weights, "kv": kv, "overhead": overhead,
            "total": round(weights + kv + overhead, 2), "kv_exact": kv_exact}


# --------------------------------------------------------------------------
# Per-entry evaluation
# --------------------------------------------------------------------------

def _quant_rank(key: str) -> int:
    return _QUANT_ORDER.index(key) if key in _QUANT_ORDER else len(_QUANT_ORDER)


def _feel(entry: dict, verdict: str, spec: HardwareSpec) -> str:
    if verdict == "no":
        return "—"
    active = entry.get("active_params_b") or entry.get("params_b", 4)
    if verdict == "tight":
        if entry.get("active_params_b"):
            return f"Usable even part-offloaded (only {entry['active_params_b']:g}B active per word)"
        return "Slow — usable for short tasks, not snappy chat"
    if active <= 4:
        return "Fast — replies feel instant"
    if active <= 14:
        return "Comfortable — quick enough for live chat"
    return "Steady — fine, just not instant on big answers"


def _eval_gguf(entry: dict, spec: HardwareSpec, uc: UC) -> dict:
    """Verdict for an LLM/VLM entry with a real quant ladder."""
    fast, total = spec.fast_budget_gb, spec.total_budget_gb
    quants = sorted(entry.get("quants", []), key=lambda q: _quant_rank(q["key"]))
    ctx, factor = uc.context_tokens, uc.overhead_factor

    # Fast path: best quality quant >= 4-bit that fits the GPU budget.
    if spec.has_fast_path:
        for q in quants:
            if _quant_rank(q["key"]) > _FOUR_BIT_RANK:
                break   # don't call a sub-4-bit squeeze "runs great"
            est = _estimate(entry, q, ctx, factor)
            if est["total"] <= fast * _SAFETY_FILL:
                return {"verdict": "great", "quant": q, "est": est}

    # Compromise: spill into ordinary RAM, shrinking quality only if needed.
    for qkey in _COMPROMISE_QUANTS:
        q = next((x for x in quants if x["key"] == qkey), None)
        if not q:
            continue
        est = _estimate(entry, q, ctx, factor)
        if est["total"] <= total * _SAFETY_FILL:
            return {"verdict": "tight", "quant": q, "est": est}

    q = quants[-1] if quants else {"key": "Q4_K_M", "plain": "Balanced (4-bit)",
                                   "file_gb": entry.get("params_b", 4) * 0.6}
    return {"verdict": "no", "quant": q, "est": _estimate(entry, q, ctx, factor)}


def _eval_flat(entry: dict, spec: HardwareSpec, uc: UC) -> dict:
    """Verdict for a non-GGUF entry with one memory figure."""
    need = round(entry.get("mem_gb", 4.0) * uc.overhead_factor, 2)
    fast, total = spec.fast_budget_gb, spec.total_budget_gb
    est = {"weights": need, "kv": 0.0, "overhead": 0.0, "total": need, "kv_exact": False}
    setting = {"key": "full", "plain": "Full model", "file_gb": need}
    if spec.has_fast_path and need <= fast * _SAFETY_FILL:
        return {"verdict": "great", "quant": setting, "est": est}
    # Image/video generation without a GPU is minutes-per-image: say so.
    if entry["family"] == "imagegen" and not spec.has_fast_path and need > 4:
        return {"verdict": "no", "quant": setting, "est": est}
    if need <= total * _SAFETY_FILL:
        return {"verdict": "tight", "quant": setting, "est": est}
    return {"verdict": "no", "quant": setting, "est": est}


def _evaluate(entry: dict, spec: HardwareSpec, uc: UC) -> dict:
    if entry.get("quants"):
        r = _eval_gguf(entry, spec, uc)
    else:
        r = _eval_flat(entry, spec, uc)
    r["entry"] = entry
    return r


# --------------------------------------------------------------------------
# Advise: full UI-shaped result
# --------------------------------------------------------------------------

def _speed_pred(r: dict, spec: HardwareSpec, bw: float | None) -> dict | None:
    """Measured/roofline tok/s prediction for a GGUF option, if bandwidth known."""
    e, v, est = r["entry"], r["verdict"], r["est"]
    if not e.get("quants") or v == "no" or not bw:
        return None
    params = e.get("params_b") or 1.0
    active = (e.get("active_params_b") or params) / params
    if v == "tight":
        # share of the read bytes that live in slow system RAM
        fast_room = spec.fast_budget_gb * _SAFETY_FILL
        offload = max(0.0, min(1.0, 1 - fast_room / max(est["total"], 0.1)))
    else:
        offload = 0.0
    return predict_decode_tps(
        bandwidth_gbs=bw, weights_gb=est["weights"], kv_gb=est["kv"],
        active_fraction=active, offload_fraction=offload,
    )


def _option_json(r: dict, spec: HardwareSpec, bw: float | None = None) -> dict:
    e, v = r["entry"], r["verdict"]
    pred = _speed_pred(r, spec, bw)
    feel = feel_text(pred) if pred else _feel(e, v, spec)
    if not e.get("quants") and v == "tight" and not spec.has_fast_path:
        feel = "Runs on the processor — slow but workable"
    lic_label = e.get("license", "")
    return {
        "verdict": v,
        "model": e["name"],
        "desc": e.get("good_for", ""),
        "setting": r["quant"].get("plain", "Full model"),
        "memory": "Too big" if v == "no" else f"{r['est']['total']:g} GB",
        "feel": feel,
        "params_b": e.get("params_b"),
        "active_params_b": e.get("active_params_b"),
        "url": (e.get("links") or {}).get("hf") or (e.get("links") or {}).get("home", ""),
        "license": lic_label,
        "license_note": e.get("license_note", ""),
        "gated": e.get("gated", False),
        "run": e.get("run", {}),
        "provenance": e.get("provenance", "estimated"),
        "stale": e.get("stale", False),
    }


def _pick_headline(results: list[dict], uc: UC) -> tuple[dict | None, bool]:
    great = [r for r in results if r["verdict"] == "great"]
    tight = [r for r in results if r["verdict"] == "tight"]

    def params(r):
        return r["entry"].get("params_b", 0)

    great_ok = [r for r in great if params(r) >= uc.min_b]
    tight_ok = [r for r in tight if params(r) >= uc.min_b]
    if great_ok:
        # Fast-and-capable is the best answer: biggest model that runs great.
        return max(great_ok, key=params), True
    if tight_ok:
        if uc.good_b > 0:
            # LLMs: close to the ideal size, not needlessly oversized-and-slow.
            below = [r for r in tight_ok if params(r) <= uc.good_b * 1.5]
            return (max(below, key=params) if below else min(tight_ok, key=params)), True
        # Non-LLM families: the biggest model that fits is simply the best one.
        return max(tight_ok, key=params), True
    if great:
        return max(great, key=params), False
    if tight:
        return min(tight, key=params), False
    return None, False


def _provenance_line(headline: dict | None) -> str:
    if not headline:
        return ""
    e = headline["entry"]
    prov = e.get("provenance", "estimated")
    if prov == "filesize":
        line = ("Model size is the exact file size on Hugging Face. Chat memory and "
                "working space are conservative estimates with a 0.58 GB safety buffer "
                "(the 95% load-success margin fitted from ~19,500 real measurements).")
        if not headline["est"].get("kv_exact"):
            line += " This repo hides its exact shape, so chat memory is estimated from its size."
        return line
    if prov == "vendor":
        return "The memory figure is the maker's own published number."
    if prov == "community":
        return "The memory figure is community-reported, not vendor-published — treat it as a good estimate."
    return "The memory figure is estimated from the model's size — conservative, not measured."


def advise_real(payload: dict, spec: HardwareSpec) -> dict:
    uc = USE_CASES.get(payload.get("usecase", "chat"), USE_CASES["chat"])
    candidates = _by_use_case().get(uc.key, [])

    # Honest gap, not a fake answer: if the catalogue doesn't cover a goal yet,
    # say so and point at the live lookup instead of inventing options.
    if not candidates:
        return {
            "catalogue_version": catalogue_date(),
            "verdict": "tight", "verdict_word": "Not covered yet",
            "headline": "Our catalogue doesn't cover this goal yet.",
            "detail": ("FitCheck only answers from verified model data, and nothing in the "
                       "current catalogue serves this goal — so rather than guess, we'd "
                       "rather say so. If you know a specific model for it, paste its "
                       "Hugging Face id in the <b>'Have a specific model in mind?'</b> box "
                       "and we'll check that exact model against your machine."),
            "note": "The catalogue grows every night; niche goals are next in line.",
            "gauge": {}, "options": [], "tools": _TOOLS.get(uc.family, []),
            "commands": {"intro": "", "items": []}, "provenance": "",
            "meets_goal": False, "use_case": uc.plain_name,
        }

    results = [_evaluate(e, spec, uc) for e in candidates]

    fast, total = spec.fast_budget_gb, spec.total_budget_gb
    headline, meets_goal = _pick_headline(results, uc)

    bw, bw_src = bandwidth_for_spec(spec)
    options = [_option_json(r, spec, bw) for r in results]

    if headline:
        e, est, q = headline["entry"], headline["est"], headline["quant"]
        hv = headline["verdict"]
        need = est["total"]
        where = ("on your Mac" if spec.is_apple_silicon and hv == "great" else
                 "on your graphics card" if hv == "great" and spec.has_fast_path else
                 "using your computer's memory" if hv == "tight" else "")
        if hv == "great":
            head_text = f"Yes, you can run {e['name']} {where}, today."
        else:
            head_text = f"Sort of. {e['name']} will run {where}, with trade-offs."
        if e.get("quants"):
            detail = (
                f"For this goal, the honest pick is <b>{e['name']}</b> at the "
                f"<b>{q.get('plain', q['key'])}</b> setting. {e.get('good_for','')} "
                f"It needs about <b>{need:g} GB</b> "
                f"(the model file is {est['weights']:g} GB — exact size on Hugging Face — "
                f"plus {est['kv']:g} GB chat memory and {est['overhead']:g} GB working space), "
                f"and you have roughly <b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total."
            )
        else:
            detail = (
                f"For this goal, the honest pick is <b>{e['name']}</b>. "
                f"{e.get('good_for','')} It needs about <b>{need:g} GB</b>, and you have "
                f"roughly <b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total."
            )
        model_part, work_part = est["weights"], round(need - est["weights"], 2)
    else:
        hv = "no"
        smallest = min(results, key=lambda r: r["est"]["total"], default=None)
        need = smallest["est"]["total"] if smallest else 1.0
        head_text = "This goal is a stretch on this machine. Here's the honest picture."
        detail = (
            f"Even the lightest option here needs about <b>{need:g} GB</b>, but this "
            f"machine can offer only about <b>{total:g} GB</b> once the operating system "
            f"has its share. That's not a failure — small computers just have small "
            f"budgets. Adding memory, or a free cloud notebook, would open this up."
        )
        model_part, work_part = round(need * 0.8, 2), round(need * 0.2, 2)

    note_bits = []
    if headline and not meets_goal:
        note_bits.append(
            f"This is the best this machine can do, but it's on the small side for "
            f"{uc.plain_name.lower()} — treat results as 'okay', not great.")
    if uc.note:
        note_bits.append(uc.note)
    if headline and headline["entry"].get("mem_note"):
        note_bits.append(headline["entry"]["mem_note"])
    if headline and headline["entry"].get("license_note"):
        note_bits.append(headline["entry"]["license_note"])
    if headline and headline["entry"].get("gated"):
        note_bits.append("This model is gated: accept its terms on Hugging Face once before downloading.")

    scale = max(total, need, 1) * 1.05
    gauge = {
        "need_gb": f"{need:g} GB needed",
        "fast_gb": f"{fast:g} GB", "total_gb": f"{total:g} GB",
        "fill_pct": round(min(need / scale, 1.0) * 100, 1),
        "mark_pct": round(min(fast / scale, 1.0) * 100, 1),
        "breakdown": [
            {"label": f"Model {model_part:g} GB", "color": _C_MODEL},
            {"label": f"Chat memory + working space {work_part:g} GB", "color": _C_WORK},
        ],
    }

    speed = None
    if headline:
        pred = _speed_pred(headline, spec, bw)
        if pred:
            speed = {**pred, "bw": bw, "bw_source": bw_src,
                     "model": headline["entry"]["name"]}

    if uc.family == "llm":
        tools = [{"name": r.name, "what": r.plain_what, "install": r.install_hint,
                  "tag": r.difficulty} for r in pick_runtimes(spec)]
    else:
        tools = _TOOLS.get(uc.family, [])

    commands = {"intro": "These get you running in minutes — real commands for the exact pick above.",
                "items": []}
    if headline:
        run = headline["entry"].get("run", {})
        if run.get("ollama"):
            commands["items"].append({"label": "Easy way (Ollama)", "code": run["ollama"]})
        if run.get("llamacpp"):
            commands["items"].append({"label": "Power way (llama.cpp)", "code": run["llamacpp"]})
        if run.get("pip"):
            commands["items"].append({"label": "Install", "code": run["pip"]})

    return {
        "catalogue_version": catalogue_date(),
        "verdict": hv,
        "verdict_word": _VERDICT_WORD[hv],
        "headline": head_text,
        "detail": detail,
        "note": "  ".join(note_bits),
        "gauge": gauge,
        "options": options,
        "tools": tools,
        "commands": commands,
        "provenance": _provenance_line(headline) + (
            f" Speed is {'predicted from real community measurements' if speed and speed['method'] == 'measured-model' else 'an analytical bandwidth estimate'}"
            f" — see 'Why this speed?' below." if speed else ""),
        "speed": speed,
        "meets_goal": meets_goal,
        "use_case": uc.plain_name,
        "headline_model": headline["entry"]["name"] if headline else "",
    }


# --------------------------------------------------------------------------
# Reverse mode: "what machine do I need for X?"
# --------------------------------------------------------------------------

# Ladders are cheap -> expensive. Budget hints are rough 2026 street prices for
# a whole sensible build, shown as guidance, not gospel.
_PC_LADDER = [
    ("Any old laptop (8 GB RAM, no GPU)", dict(ram_gb=8, vram_gb=0, vendor="none"), "what you may already own"),
    ("16 GB RAM laptop, no GPU", dict(ram_gb=16, vram_gb=0, vendor="none"), "~$500"),
    ("16 GB RAM + RTX 4060 (8 GB)", dict(ram_gb=16, vram_gb=8, vendor="nvidia"), "~$800"),
    ("16 GB RAM + RTX 3060 (12 GB)", dict(ram_gb=16, vram_gb=12, vendor="nvidia"), "~$900"),
    ("32 GB RAM + RTX 5070 (12 GB)", dict(ram_gb=32, vram_gb=12, vendor="nvidia"), "~$1,300"),
    ("32 GB RAM + RTX 5070 Ti (16 GB)", dict(ram_gb=32, vram_gb=16, vendor="nvidia"), "~$1,600"),
    ("32 GB RAM + RTX 4090 (24 GB)", dict(ram_gb=32, vram_gb=24, vendor="nvidia"), "~$2,500"),
    ("64 GB RAM + RTX 5090 (32 GB)", dict(ram_gb=64, vram_gb=32, vendor="nvidia"), "~$3,500+"),
]
_MAC_LADDER = [
    ("Mac with 16 GB unified memory", dict(ram_gb=16), "~$1,000"),
    ("Mac with 24 GB unified memory", dict(ram_gb=24), "~$1,400"),
    ("Mac with 32 GB unified memory", dict(ram_gb=32), "~$1,800"),
    ("Mac with 64 GB unified memory", dict(ram_gb=64), "~$2,800"),
    ("Mac with 128 GB unified memory", dict(ram_gb=128), "~$4,500+"),
]


def _spec_for_tier(kind: str, hw: dict) -> HardwareSpec:
    if kind == "mac":
        return HardwareSpec(os="macos", ram_gb=hw["ram_gb"], gpu_vendor="apple",
                            is_apple_silicon=True, form_factor="mac")
    return HardwareSpec(os="windows", ram_gb=hw["ram_gb"],
                        gpu_vendor=hw.get("vendor", "none"),
                        vram_gb=hw.get("vram_gb", 0.0), form_factor="desktop")


def min_specs(usecases) -> dict:
    """For one OR several goals: the cheapest tier where EVERY goal genuinely
    works (the union of requirements, not a sum), the tier where every goal
    runs great, and what each goal would actually run on those tiers.
    Pure engine inversion — fully offline."""
    if isinstance(usecases, str):
        usecases = [usecases]
    seen = set()
    ucs = []
    for u in usecases or ["chat"]:
        uc = USE_CASES.get(u, USE_CASES["chat"])
        if uc.key not in seen:
            seen.add(uc.key)
            ucs.append(uc)

    def walk(kind, ladder):
        minimum = comfortable = None
        for label, hw, price in ladder:
            spec = _spec_for_tier(kind, hw)
            per_goal, all_meet, all_great = [], True, True
            for uc in ucs:
                res = advise_real({"usecase": uc.key}, spec)
                all_meet &= res["meets_goal"] and res["verdict"] in ("great", "tight")
                all_great &= res["meets_goal"] and res["verdict"] == "great"
                per_goal.append({"goal": uc.plain_name,
                                 "model": res["headline_model"] or "nothing realistic",
                                 "verdict": res["verdict"]})
            tier = {"label": label, "price": price, "goals": per_goal,
                    "runs": "; ".join(f"{g['goal']}: {g['model']}" for g in per_goal)
                            if len(per_goal) > 1 else per_goal[0]["model"]}
            if minimum is None and all_meet:
                minimum = tier
            if comfortable is None and all_great:
                comfortable = tier
            if minimum and comfortable:
                break
        return minimum, comfortable

    pc_min, pc_comfy = walk("pc", _PC_LADDER)
    mac_min, mac_comfy = walk("mac", _MAC_LADDER)
    notes = [uc.note for uc in ucs if uc.note]
    return {
        "use_case": " + ".join(uc.plain_name for uc in ucs),
        "goals": [uc.plain_name for uc in ucs],
        "catalogue_version": catalogue_date(),
        "note": "  ".join(notes),
        "pc": {"minimum": pc_min, "comfortable": pc_comfy},
        "mac": {"minimum": mac_min, "comfortable": mac_comfy},
        "disclaimer": ("Price hints are rough 2026 street prices for a sensible whole "
                       "build — they vary a lot by region and second-hand luck. The "
                       "memory math is the same conservative engine as the main check."
                       + (" Tiers are the union of every goal you picked: each one has "
                          "to genuinely work." if len(ucs) > 1 else "")),
    }