FitCheck / engine /real_advisor.py
cn0303's picture
Speed predictions with receipts: bandwidth roofline, real-runs chart, honest provenance
ee8ca43 verified
"""
Engine v2: honest verdicts over REAL models from catalogue.json.
This replaces both the size-class advisor and the placeholder families. Every
option it returns is an actual model with a Hugging Face link, a license, and
memory figures with provenance:
- LLM / VLM weights = the EXACT GGUF file size in bytes from the Hub
(ground truth — better than any params-times-bits estimate).
- Chat memory (KV cache) = GQA-aware math from the model's real config
(layers, hidden, kv-heads) when available; a conservative parameter-count
heuristic when the repo is gated (labelled as estimated).
- Working space includes a +0.577 GB buffer — the 95% load-success margin
oobabooga fitted over 19,517 real measurements (gguf-vram-formula).
- Non-GGUF families (vision / image gen / audio / embeddings / data) carry a
single memory figure whose provenance is vendor-published, community-
reported, or estimated — and the UI says which.
The catalogue is baked into the repo at build time (refreshed by
scripts/refresh_catalogue.py), so the running app makes no network calls.
"""
import json
from functools import lru_cache
from pathlib import Path
from .hardware import HardwareSpec
from .runtimes import pick_runtimes
from .speed import bandwidth_for_spec, predict_decode_tps, feel_text
_CATALOGUE_PATH = Path(__file__).resolve().parent.parent / "catalogue.json"
# We only fill a budget to this fraction — the rest is breathing room.
_SAFETY_FILL = 0.90
# oobabooga's fitted 95%-load-success buffer (GB), cited in the UI footnote.
_CONFIDENCE_BUFFER_GB = 0.577
_VERDICT_WORD = {"great": "Runs great", "tight": "Tight, but works", "no": "Won't fit"}
_C_MODEL = "#818CF8"
_C_WORK = "#868E9C"
# Quant ladder quality order (matches scripts/refresh_catalogue.py).
_QUANT_ORDER = ["Q8_0", "Q6_K", "Q5_K_M", "Q4_K_M", "IQ4_XS", "Q3_K_M", "Q2_K"]
_FOUR_BIT_RANK = _QUANT_ORDER.index("IQ4_XS") # >= this index quality = sub-4-bit
_COMPROMISE_QUANTS = ["Q4_K_M", "IQ4_XS", "Q3_K_M", "Q2_K"]
# --------------------------------------------------------------------------
# Use cases
# --------------------------------------------------------------------------
class UC:
def __init__(self, key, plain, family, ctx=4096, min_b=0.0, good_b=0.0,
factor=1.0, note=""):
# min_b/good_b are LLM-quality bars in billions of params. They default
# to 0 because they're meaningless for vision/audio/etc. — a 0.003B
# YOLO is a complete, excellent model, not a too-small LLM. Only the
# text use cases set them explicitly.
self.key, self.plain_name, self.family = key, plain, family
self.context_tokens, self.min_b, self.good_b = ctx, min_b, good_b
self.overhead_factor, self.note = factor, note
USE_CASES = {u.key: u for u in [
UC("chat", "Just chatting / asking questions", "llm", 4096, 0.5, 3.0),
UC("writing", "Writing & summarising", "llm", 4096, 1.5, 7.0),
UC("coding", "Coding help", "llm", 8192, 3.0, 7.0,
note="Bigger models are much more reliable for code."),
UC("agents", "Agents & tool use", "llm", 8192, 7.0, 7.0, 1.15,
note="Needs steady instruction-following — go medium or larger."),
UC("rag", "Chat with your documents", "llm", 16384, 3.0, 7.0,
note="Long documents use extra memory for context — that's included here."),
UC("translate", "Translation", "llm", 4096, 1.5, 7.0),
UC("finetune", "Fine-tune an LLM (LoRA)", "llm", 2048, 3.0, 7.0, 2.2,
note="Training needs roughly 2-3x the memory of just chatting. That's baked into these numbers."),
UC("custom", "Your custom goal", "llm", 4096, 0.5, 7.0),
UC("vlm", "Chat about images & video", "vlm", 4096, 1.5, 4.0),
UC("detect", "Object detection", "vision"),
UC("segment", "Image segmentation", "vision"),
UC("pose", "Pose estimation (2D & 6-DoF)", "vision"),
UC("classify", "Image classification", "vision"),
UC("depth", "Depth estimation", "vision"),
UC("ocr", "Read text from images (OCR)", "vision"),
UC("train-vision", "Train a vision model", "vision", factor=3.0,
note="Training needs roughly 3x the memory of running the same model."),
UC("imagegen", "Generate images", "imagegen"),
UC("inpaint", "Edit / inpaint images", "imagegen"),
UC("upscale", "Upscale / restore images", "imagegen"),
UC("videogen", "Generate video", "imagegen"),
UC("bgremove", "Remove backgrounds", "imagegen"),
UC("stt", "Speech to text", "audio"),
UC("tts", "Text to speech / voice", "audio"),
UC("music", "Generate music", "audio"),
UC("embed", "Semantic search / embeddings", "embed"),
UC("forecast", "Time-series forecasting", "data"),
UC("tabular", "Predict from spreadsheets", "data"),
]}
# Use cases answered by the whole LLM family (entries don't list these).
_TEXT_UCS = {"chat", "writing", "coding", "agents", "rag", "translate",
"finetune", "custom"}
_TOOLS = {
"llm": [
{"name": "Ollama", "what": "Type one line; it downloads and runs the model for you.",
"install": "Get it from ollama.com", "tag": "Easiest"},
{"name": "LM Studio", "what": "A point-and-click app with a chat window, no commands.",
"install": "Download from lmstudio.ai", "tag": "Easy"},
{"name": "llama.cpp", "what": "The lightweight engine under the hood. Runs GGUF files directly.",
"install": "Releases on GitHub", "tag": "Advanced"},
],
"vision": [
{"name": "Ultralytics", "what": "One pip install, then detect objects from a webcam or file.",
"install": "pip install ultralytics", "tag": "Easiest"},
{"name": "PyTorch", "what": "Full control for custom pipelines and training.",
"install": "pytorch.org", "tag": "Advanced"},
],
"imagegen": [
{"name": "ComfyUI", "what": "Powerful visual node editor for image/video pipelines.",
"install": "Download from GitHub", "tag": "Moderate"},
{"name": "diffusers", "what": "Hugging Face's Python library for generation pipelines.",
"install": "pip install diffusers", "tag": "Moderate"},
{"name": "Fooocus", "what": "Image generation that 'just works': one folder, double-click.",
"install": "Download from GitHub", "tag": "Easiest"},
],
"audio": [
{"name": "faster-whisper", "what": "Fast, accurate transcription with a tiny install.",
"install": "pip install faster-whisper", "tag": "Easiest"},
{"name": "whisper.cpp", "what": "Runs Whisper efficiently on CPU and small machines.",
"install": "Build from GitHub", "tag": "Advanced"},
],
"embed": [
{"name": "sentence-transformers", "what": "Turn text into searchable vectors in a few lines.",
"install": "pip install sentence-transformers", "tag": "Easiest"},
{"name": "Chroma", "what": "A simple local database to store and search those vectors.",
"install": "pip install chromadb", "tag": "Easy"},
],
"data": [
{"name": "Python + pip", "what": "These models ship as small Python packages.",
"install": "pip install (see the model card)", "tag": "Easiest"},
],
}
_TOOLS["vlm"] = _TOOLS["llm"]
# --------------------------------------------------------------------------
# Catalogue access
# --------------------------------------------------------------------------
@lru_cache(maxsize=1)
def catalogue() -> dict:
return json.loads(_CATALOGUE_PATH.read_text(encoding="utf-8"))
@lru_cache(maxsize=1)
def _by_use_case() -> dict:
out: dict[str, list[dict]] = {}
for e in catalogue()["entries"]:
if e["family"] in ("llm", "vlm"):
ucs = list(_TEXT_UCS) if e["family"] == "llm" else ["vlm"]
else:
ucs = e.get("use_cases", [])
for uc in ucs:
out.setdefault(uc, []).append(e)
for uc in out:
out[uc].sort(key=lambda e: e.get("params_b", 0), reverse=True)
return out
def catalogue_date() -> str:
return catalogue().get("generated_at", "")[:10]
# --------------------------------------------------------------------------
# Memory math
# --------------------------------------------------------------------------
# Fallback architecture shapes by parameter count (conservative typicals),
# used only when a gated repo hides its config.json.
_ARCH_FALLBACK = [
(1.5, 24, 2048), (4.5, 28, 3072), (9.0, 32, 4096),
(16.0, 40, 5120), (40.0, 48, 6656), (1e9, 80, 8192),
]
def _kv_gb(entry: dict, ctx: int) -> tuple[float, bool]:
"""KV-cache GB for `ctx` tokens. Returns (gb, exact?)."""
ctx = min(ctx, entry.get("context_len") or ctx)
arch = entry.get("arch")
if arch:
per_layer = arch["hidden"] * arch["n_kv_heads"] / arch["n_heads"]
return 2 * arch["n_layers"] * per_layer * ctx * 2 / 1e9, True
params = entry.get("params_b", 4.0)
for cap, layers, hidden in _ARCH_FALLBACK:
if params <= cap:
return 2 * layers * hidden * ctx * 2 * 0.30 / 1e9, False
return 1.0, False
def _overhead_gb(weights: float, factor: float) -> float:
if factor >= 2.0: # training: optimizer state + activations dominate
return round(_CONFIDENCE_BUFFER_GB + weights * (factor - 1.0), 2)
return round((_CONFIDENCE_BUFFER_GB + 0.08 * weights) * factor, 2)
def _estimate(entry: dict, quant: dict, ctx: int, factor: float) -> dict:
weights = quant["file_gb"]
kv, kv_exact = _kv_gb(entry, ctx)
kv = round(kv, 2)
overhead = _overhead_gb(weights, factor)
return {"weights": weights, "kv": kv, "overhead": overhead,
"total": round(weights + kv + overhead, 2), "kv_exact": kv_exact}
# --------------------------------------------------------------------------
# Per-entry evaluation
# --------------------------------------------------------------------------
def _quant_rank(key: str) -> int:
return _QUANT_ORDER.index(key) if key in _QUANT_ORDER else len(_QUANT_ORDER)
def _feel(entry: dict, verdict: str, spec: HardwareSpec) -> str:
if verdict == "no":
return "—"
active = entry.get("active_params_b") or entry.get("params_b", 4)
if verdict == "tight":
if entry.get("active_params_b"):
return f"Usable even part-offloaded (only {entry['active_params_b']:g}B active per word)"
return "Slow — usable for short tasks, not snappy chat"
if active <= 4:
return "Fast — replies feel instant"
if active <= 14:
return "Comfortable — quick enough for live chat"
return "Steady — fine, just not instant on big answers"
def _eval_gguf(entry: dict, spec: HardwareSpec, uc: UC) -> dict:
"""Verdict for an LLM/VLM entry with a real quant ladder."""
fast, total = spec.fast_budget_gb, spec.total_budget_gb
quants = sorted(entry.get("quants", []), key=lambda q: _quant_rank(q["key"]))
ctx, factor = uc.context_tokens, uc.overhead_factor
# Fast path: best quality quant >= 4-bit that fits the GPU budget.
if spec.has_fast_path:
for q in quants:
if _quant_rank(q["key"]) > _FOUR_BIT_RANK:
break # don't call a sub-4-bit squeeze "runs great"
est = _estimate(entry, q, ctx, factor)
if est["total"] <= fast * _SAFETY_FILL:
return {"verdict": "great", "quant": q, "est": est}
# Compromise: spill into ordinary RAM, shrinking quality only if needed.
for qkey in _COMPROMISE_QUANTS:
q = next((x for x in quants if x["key"] == qkey), None)
if not q:
continue
est = _estimate(entry, q, ctx, factor)
if est["total"] <= total * _SAFETY_FILL:
return {"verdict": "tight", "quant": q, "est": est}
q = quants[-1] if quants else {"key": "Q4_K_M", "plain": "Balanced (4-bit)",
"file_gb": entry.get("params_b", 4) * 0.6}
return {"verdict": "no", "quant": q, "est": _estimate(entry, q, ctx, factor)}
def _eval_flat(entry: dict, spec: HardwareSpec, uc: UC) -> dict:
"""Verdict for a non-GGUF entry with one memory figure."""
need = round(entry.get("mem_gb", 4.0) * uc.overhead_factor, 2)
fast, total = spec.fast_budget_gb, spec.total_budget_gb
est = {"weights": need, "kv": 0.0, "overhead": 0.0, "total": need, "kv_exact": False}
setting = {"key": "full", "plain": "Full model", "file_gb": need}
if spec.has_fast_path and need <= fast * _SAFETY_FILL:
return {"verdict": "great", "quant": setting, "est": est}
# Image/video generation without a GPU is minutes-per-image: say so.
if entry["family"] == "imagegen" and not spec.has_fast_path and need > 4:
return {"verdict": "no", "quant": setting, "est": est}
if need <= total * _SAFETY_FILL:
return {"verdict": "tight", "quant": setting, "est": est}
return {"verdict": "no", "quant": setting, "est": est}
def _evaluate(entry: dict, spec: HardwareSpec, uc: UC) -> dict:
if entry.get("quants"):
r = _eval_gguf(entry, spec, uc)
else:
r = _eval_flat(entry, spec, uc)
r["entry"] = entry
return r
# --------------------------------------------------------------------------
# Advise: full UI-shaped result
# --------------------------------------------------------------------------
def _speed_pred(r: dict, spec: HardwareSpec, bw: float | None) -> dict | None:
"""Measured/roofline tok/s prediction for a GGUF option, if bandwidth known."""
e, v, est = r["entry"], r["verdict"], r["est"]
if not e.get("quants") or v == "no" or not bw:
return None
params = e.get("params_b") or 1.0
active = (e.get("active_params_b") or params) / params
if v == "tight":
# share of the read bytes that live in slow system RAM
fast_room = spec.fast_budget_gb * _SAFETY_FILL
offload = max(0.0, min(1.0, 1 - fast_room / max(est["total"], 0.1)))
else:
offload = 0.0
return predict_decode_tps(
bandwidth_gbs=bw, weights_gb=est["weights"], kv_gb=est["kv"],
active_fraction=active, offload_fraction=offload,
)
def _option_json(r: dict, spec: HardwareSpec, bw: float | None = None) -> dict:
e, v = r["entry"], r["verdict"]
pred = _speed_pred(r, spec, bw)
feel = feel_text(pred) if pred else _feel(e, v, spec)
if not e.get("quants") and v == "tight" and not spec.has_fast_path:
feel = "Runs on the processor — slow but workable"
lic_label = e.get("license", "")
return {
"verdict": v,
"model": e["name"],
"desc": e.get("good_for", ""),
"setting": r["quant"].get("plain", "Full model"),
"memory": "Too big" if v == "no" else f"{r['est']['total']:g} GB",
"feel": feel,
"params_b": e.get("params_b"),
"active_params_b": e.get("active_params_b"),
"url": (e.get("links") or {}).get("hf") or (e.get("links") or {}).get("home", ""),
"license": lic_label,
"license_note": e.get("license_note", ""),
"gated": e.get("gated", False),
"run": e.get("run", {}),
"provenance": e.get("provenance", "estimated"),
"stale": e.get("stale", False),
}
def _pick_headline(results: list[dict], uc: UC) -> tuple[dict | None, bool]:
great = [r for r in results if r["verdict"] == "great"]
tight = [r for r in results if r["verdict"] == "tight"]
def params(r):
return r["entry"].get("params_b", 0)
great_ok = [r for r in great if params(r) >= uc.min_b]
tight_ok = [r for r in tight if params(r) >= uc.min_b]
if great_ok:
# Fast-and-capable is the best answer: biggest model that runs great.
return max(great_ok, key=params), True
if tight_ok:
if uc.good_b > 0:
# LLMs: close to the ideal size, not needlessly oversized-and-slow.
below = [r for r in tight_ok if params(r) <= uc.good_b * 1.5]
return (max(below, key=params) if below else min(tight_ok, key=params)), True
# Non-LLM families: the biggest model that fits is simply the best one.
return max(tight_ok, key=params), True
if great:
return max(great, key=params), False
if tight:
return min(tight, key=params), False
return None, False
def _provenance_line(headline: dict | None) -> str:
if not headline:
return ""
e = headline["entry"]
prov = e.get("provenance", "estimated")
if prov == "filesize":
line = ("Model size is the exact file size on Hugging Face. Chat memory and "
"working space are conservative estimates with a 0.58 GB safety buffer "
"(the 95% load-success margin fitted from ~19,500 real measurements).")
if not headline["est"].get("kv_exact"):
line += " This repo hides its exact shape, so chat memory is estimated from its size."
return line
if prov == "vendor":
return "The memory figure is the maker's own published number."
if prov == "community":
return "The memory figure is community-reported, not vendor-published — treat it as a good estimate."
return "The memory figure is estimated from the model's size — conservative, not measured."
def advise_real(payload: dict, spec: HardwareSpec) -> dict:
uc = USE_CASES.get(payload.get("usecase", "chat"), USE_CASES["chat"])
candidates = _by_use_case().get(uc.key, [])
# Honest gap, not a fake answer: if the catalogue doesn't cover a goal yet,
# say so and point at the live lookup instead of inventing options.
if not candidates:
return {
"catalogue_version": catalogue_date(),
"verdict": "tight", "verdict_word": "Not covered yet",
"headline": "Our catalogue doesn't cover this goal yet.",
"detail": ("FitCheck only answers from verified model data, and nothing in the "
"current catalogue serves this goal — so rather than guess, we'd "
"rather say so. If you know a specific model for it, paste its "
"Hugging Face id in the <b>'Have a specific model in mind?'</b> box "
"and we'll check that exact model against your machine."),
"note": "The catalogue grows every night; niche goals are next in line.",
"gauge": {}, "options": [], "tools": _TOOLS.get(uc.family, []),
"commands": {"intro": "", "items": []}, "provenance": "",
"meets_goal": False, "use_case": uc.plain_name,
}
results = [_evaluate(e, spec, uc) for e in candidates]
fast, total = spec.fast_budget_gb, spec.total_budget_gb
headline, meets_goal = _pick_headline(results, uc)
bw, bw_src = bandwidth_for_spec(spec)
options = [_option_json(r, spec, bw) for r in results]
if headline:
e, est, q = headline["entry"], headline["est"], headline["quant"]
hv = headline["verdict"]
need = est["total"]
where = ("on your Mac" if spec.is_apple_silicon and hv == "great" else
"on your graphics card" if hv == "great" and spec.has_fast_path else
"using your computer's memory" if hv == "tight" else "")
if hv == "great":
head_text = f"Yes, you can run {e['name']} {where}, today."
else:
head_text = f"Sort of. {e['name']} will run {where}, with trade-offs."
if e.get("quants"):
detail = (
f"For this goal, the honest pick is <b>{e['name']}</b> at the "
f"<b>{q.get('plain', q['key'])}</b> setting. {e.get('good_for','')} "
f"It needs about <b>{need:g} GB</b> "
f"(the model file is {est['weights']:g} GB — exact size on Hugging Face — "
f"plus {est['kv']:g} GB chat memory and {est['overhead']:g} GB working space), "
f"and you have roughly <b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total."
)
else:
detail = (
f"For this goal, the honest pick is <b>{e['name']}</b>. "
f"{e.get('good_for','')} It needs about <b>{need:g} GB</b>, and you have "
f"roughly <b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total."
)
model_part, work_part = est["weights"], round(need - est["weights"], 2)
else:
hv = "no"
smallest = min(results, key=lambda r: r["est"]["total"], default=None)
need = smallest["est"]["total"] if smallest else 1.0
head_text = "This goal is a stretch on this machine. Here's the honest picture."
detail = (
f"Even the lightest option here needs about <b>{need:g} GB</b>, but this "
f"machine can offer only about <b>{total:g} GB</b> once the operating system "
f"has its share. That's not a failure — small computers just have small "
f"budgets. Adding memory, or a free cloud notebook, would open this up."
)
model_part, work_part = round(need * 0.8, 2), round(need * 0.2, 2)
note_bits = []
if headline and not meets_goal:
note_bits.append(
f"This is the best this machine can do, but it's on the small side for "
f"{uc.plain_name.lower()} — treat results as 'okay', not great.")
if uc.note:
note_bits.append(uc.note)
if headline and headline["entry"].get("mem_note"):
note_bits.append(headline["entry"]["mem_note"])
if headline and headline["entry"].get("license_note"):
note_bits.append(headline["entry"]["license_note"])
if headline and headline["entry"].get("gated"):
note_bits.append("This model is gated: accept its terms on Hugging Face once before downloading.")
scale = max(total, need, 1) * 1.05
gauge = {
"need_gb": f"{need:g} GB needed",
"fast_gb": f"{fast:g} GB", "total_gb": f"{total:g} GB",
"fill_pct": round(min(need / scale, 1.0) * 100, 1),
"mark_pct": round(min(fast / scale, 1.0) * 100, 1),
"breakdown": [
{"label": f"Model {model_part:g} GB", "color": _C_MODEL},
{"label": f"Chat memory + working space {work_part:g} GB", "color": _C_WORK},
],
}
speed = None
if headline:
pred = _speed_pred(headline, spec, bw)
if pred:
speed = {**pred, "bw": bw, "bw_source": bw_src,
"model": headline["entry"]["name"]}
if uc.family == "llm":
tools = [{"name": r.name, "what": r.plain_what, "install": r.install_hint,
"tag": r.difficulty} for r in pick_runtimes(spec)]
else:
tools = _TOOLS.get(uc.family, [])
commands = {"intro": "These get you running in minutes — real commands for the exact pick above.",
"items": []}
if headline:
run = headline["entry"].get("run", {})
if run.get("ollama"):
commands["items"].append({"label": "Easy way (Ollama)", "code": run["ollama"]})
if run.get("llamacpp"):
commands["items"].append({"label": "Power way (llama.cpp)", "code": run["llamacpp"]})
if run.get("pip"):
commands["items"].append({"label": "Install", "code": run["pip"]})
return {
"catalogue_version": catalogue_date(),
"verdict": hv,
"verdict_word": _VERDICT_WORD[hv],
"headline": head_text,
"detail": detail,
"note": " ".join(note_bits),
"gauge": gauge,
"options": options,
"tools": tools,
"commands": commands,
"provenance": _provenance_line(headline) + (
f" Speed is {'predicted from real community measurements' if speed and speed['method'] == 'measured-model' else 'an analytical bandwidth estimate'}"
f" — see 'Why this speed?' below." if speed else ""),
"speed": speed,
"meets_goal": meets_goal,
"use_case": uc.plain_name,
"headline_model": headline["entry"]["name"] if headline else "",
}
# --------------------------------------------------------------------------
# Reverse mode: "what machine do I need for X?"
# --------------------------------------------------------------------------
# Ladders are cheap -> expensive. Budget hints are rough 2026 street prices for
# a whole sensible build, shown as guidance, not gospel.
_PC_LADDER = [
("Any old laptop (8 GB RAM, no GPU)", dict(ram_gb=8, vram_gb=0, vendor="none"), "what you may already own"),
("16 GB RAM laptop, no GPU", dict(ram_gb=16, vram_gb=0, vendor="none"), "~$500"),
("16 GB RAM + RTX 4060 (8 GB)", dict(ram_gb=16, vram_gb=8, vendor="nvidia"), "~$800"),
("16 GB RAM + RTX 3060 (12 GB)", dict(ram_gb=16, vram_gb=12, vendor="nvidia"), "~$900"),
("32 GB RAM + RTX 5070 (12 GB)", dict(ram_gb=32, vram_gb=12, vendor="nvidia"), "~$1,300"),
("32 GB RAM + RTX 5070 Ti (16 GB)", dict(ram_gb=32, vram_gb=16, vendor="nvidia"), "~$1,600"),
("32 GB RAM + RTX 4090 (24 GB)", dict(ram_gb=32, vram_gb=24, vendor="nvidia"), "~$2,500"),
("64 GB RAM + RTX 5090 (32 GB)", dict(ram_gb=64, vram_gb=32, vendor="nvidia"), "~$3,500+"),
]
_MAC_LADDER = [
("Mac with 16 GB unified memory", dict(ram_gb=16), "~$1,000"),
("Mac with 24 GB unified memory", dict(ram_gb=24), "~$1,400"),
("Mac with 32 GB unified memory", dict(ram_gb=32), "~$1,800"),
("Mac with 64 GB unified memory", dict(ram_gb=64), "~$2,800"),
("Mac with 128 GB unified memory", dict(ram_gb=128), "~$4,500+"),
]
def _spec_for_tier(kind: str, hw: dict) -> HardwareSpec:
if kind == "mac":
return HardwareSpec(os="macos", ram_gb=hw["ram_gb"], gpu_vendor="apple",
is_apple_silicon=True, form_factor="mac")
return HardwareSpec(os="windows", ram_gb=hw["ram_gb"],
gpu_vendor=hw.get("vendor", "none"),
vram_gb=hw.get("vram_gb", 0.0), form_factor="desktop")
def min_specs(usecases) -> dict:
"""For one OR several goals: the cheapest tier where EVERY goal genuinely
works (the union of requirements, not a sum), the tier where every goal
runs great, and what each goal would actually run on those tiers.
Pure engine inversion — fully offline."""
if isinstance(usecases, str):
usecases = [usecases]
seen = set()
ucs = []
for u in usecases or ["chat"]:
uc = USE_CASES.get(u, USE_CASES["chat"])
if uc.key not in seen:
seen.add(uc.key)
ucs.append(uc)
def walk(kind, ladder):
minimum = comfortable = None
for label, hw, price in ladder:
spec = _spec_for_tier(kind, hw)
per_goal, all_meet, all_great = [], True, True
for uc in ucs:
res = advise_real({"usecase": uc.key}, spec)
all_meet &= res["meets_goal"] and res["verdict"] in ("great", "tight")
all_great &= res["meets_goal"] and res["verdict"] == "great"
per_goal.append({"goal": uc.plain_name,
"model": res["headline_model"] or "nothing realistic",
"verdict": res["verdict"]})
tier = {"label": label, "price": price, "goals": per_goal,
"runs": "; ".join(f"{g['goal']}: {g['model']}" for g in per_goal)
if len(per_goal) > 1 else per_goal[0]["model"]}
if minimum is None and all_meet:
minimum = tier
if comfortable is None and all_great:
comfortable = tier
if minimum and comfortable:
break
return minimum, comfortable
pc_min, pc_comfy = walk("pc", _PC_LADDER)
mac_min, mac_comfy = walk("mac", _MAC_LADDER)
notes = [uc.note for uc in ucs if uc.note]
return {
"use_case": " + ".join(uc.plain_name for uc in ucs),
"goals": [uc.plain_name for uc in ucs],
"catalogue_version": catalogue_date(),
"note": " ".join(notes),
"pc": {"minimum": pc_min, "comfortable": pc_comfy},
"mac": {"minimum": mac_min, "comfortable": mac_comfy},
"disclaimer": ("Price hints are rough 2026 street prices for a sensible whole "
"build — they vary a lot by region and second-hand luck. The "
"memory math is the same conservative engine as the main check."
+ (" Tiers are the union of every goal you picked: each one has "
"to genuinely work." if len(ucs) > 1 else "")),
}