Spaces:

build-small-hackathon
/

FitCheck

Running on Zero

App Files Files Community

FitCheck / engine /ui_adapter.py

cn0303

Speed predictions with receipts: bandwidth roofline, real-runs chart, honest provenance

ee8ca43 verified about 22 hours ago

raw

history blame contribute delete

9.91 kB

	"""
	Adapter: turn a frontend payload into the exact JSON the static/ frontend
	renders, using the REAL deterministic engine (not the placeholder).

	The frontend speaks one contract (verdicts ``great\|tight\|no``, an options list,
	a gauge, tools, commands). The engine speaks another (``works_now\|compromises\|
	dont_bother`` over ``ModelVerdict`` objects). This module is the seam between
	them, so neither side has to know about the other.

	Scope: the engine currently models the LLM family only (its model classes
	are all text models). Vision / image-gen / audio / data goals still fall back to
	the input-aware placeholder in ``app.py`` — that boundary is deliberate and
	honest, not an oversight. ``is_llm_usecase`` below is the routing switch.
	"""

	import re

	from .advisor import (
	advise,
	VERDICT_WORKS,
	VERDICT_COMPROMISE,
	VERDICT_NO,
	)
	from .catalogue import MODEL_CLASSES
	from .explain import speed_hint, ollama_command, llamacpp_command
	from .hardware import HardwareSpec

	# Bands: engine verdict -> the colour-key the frontend understands.
	_VERDICT_UI = {
	VERDICT_WORKS: "great",
	VERDICT_COMPROMISE: "tight",
	VERDICT_NO: "no",
	}
	_VERDICT_WORD = {"great": "Runs great", "tight": "Tight, but works", "no": "Won't fit"}

	# Gauge breakdown colours (match the placeholder palette in app.py / style.css).
	_C_MODEL = "#818CF8" # the weights themselves
	_C_WORK = "#868E9C" # chat memory + working space

	# Goals the engine can answer for real. Everything LLM-shaped maps onto a chat
	# context; "translate"/"custom" are still language models, so they route here.
	_LLM_USECASES = {
	"chat", "writing", "coding", "agents", "rag", "finetune", "translate", "custom",
	}
	# The engine's own use-case keys. Frontend ids that aren't 1:1 get mapped.
	_USECASE_ALIAS = {"translate": "chat", "custom": "chat"}


	def is_llm_usecase(usecase: str) -> bool:
	"""True if the real engine should answer this goal (vs. the placeholder)."""
	return usecase in _LLM_USECASES


	# --------------------------------------------------------------------------
	# Frontend payload -> HardwareSpec
	# --------------------------------------------------------------------------

	def _num_in(text: str) -> float:
	"""First '<number> GB' figure in a string, else 0."""
	m = re.search(r"(\d+(?:\.\d+)?)\s*GB", text or "", re.I)
	return float(m.group(1)) if m else 0.0


	def spec_from_payload(p: dict) -> HardwareSpec:
	"""Build a HardwareSpec straight from the frontend's gather() payload.

	We construct the spec directly rather than going through build_spec(),
	because the frontend carries the vendor and a VRAM-bearing label already,
	and an Advanced box can override VRAM outright.
	"""
	computer = (p.get("computer") or "Windows laptop")
	kind = computer.lower()
	provider = (p.get("provider") or "none").lower()
	ram = float(p.get("ram_gb") or 16)

	# --- Apple Silicon: unified memory, no separate VRAM -------------------
	if "mac" in kind or provider == "apple":
	chip = p.get("gpu") or "Apple Silicon" # keep the tier (Pro/Max/Ultra) for bandwidth lookup
	return HardwareSpec(
	os="macos", ram_gb=ram, gpu_vendor="apple", vram_gb=0.0,
	is_apple_silicon=True,
	gpu_label=f"{chip} (shares your {ram:g} GB of memory)",
	form_factor="mac",
	)

	# --- Raspberry Pi / mini PC -------------------------------------------
	if "raspberry" in kind or "mini" in kind:
	return HardwareSpec(
	os="linux", ram_gb=ram, gpu_vendor="none", vram_gb=0.0,
	gpu_label="No dedicated graphics card (tiny computer)",
	form_factor="sbc",
	)

	os_name = "linux" if "linux" in kind else "windows"
	form = "desktop" if "desktop" in kind else "laptop"

	# VRAM: Advanced override wins; else the picker label; else a paste guess.
	vram = p.get("vram_gb")
	if not vram:
	vram = _num_in(p.get("gpu", "")) or _num_in(p.get("paste", ""))
	vram = float(vram or 0)

	if provider == "nvidia":
	vendor = "nvidia"
	elif provider == "amd":
	vendor = "amd"
	elif provider == "intel":
	vendor = "intel"
	else:
	vendor = "none" # "none" / "unsure": treat as no fast path
	vram = 0.0

	label = p.get("gpu") or "No dedicated graphics card (built-in graphics only)"
	return HardwareSpec(
	os=os_name, ram_gb=ram, gpu_vendor=vendor, vram_gb=vram,
	is_apple_silicon=False, gpu_label=label, form_factor=form,
	)


	# --------------------------------------------------------------------------
	# Advice -> frontend JSON
	# --------------------------------------------------------------------------

	def _where(spec: HardwareSpec, verdict: str) -> str:
	if verdict == "great":
	if spec.is_apple_silicon:
	return "on your Mac"
	if spec.has_fast_path:
	return "on your graphics card"
	return "on your computer"
	if verdict == "tight":
	return "using your computer's memory"
	return ""


	def advise_for_ui(payload: dict, catalogue_version: str) -> dict:
	"""Run the real engine and shape its output for static/app.js render()."""
	usecase = _USECASE_ALIAS.get(payload.get("usecase", "chat"), payload.get("usecase", "chat"))
	spec = spec_from_payload(payload)
	adv = advise(spec, usecase)

	fast = spec.fast_budget_gb
	total = spec.total_budget_gb

	# ---- Options table (already biggest -> smallest from the engine) -----
	options = []
	for v in adv.verdicts:
	ui_v = _VERDICT_UI[v.verdict]
	options.append({
	"verdict": ui_v,
	"model": v.model.plain_name,
	"desc": v.model.good_for,
	"setting": v.quant.plain_name,
	"memory": "Too big" if v.verdict == VERDICT_NO else f"{v.estimate.total_gb:g} GB",
	"feel": speed_hint(v, spec),
	})

	# ---- Headline ---------------------------------------------------------
	h = adv.headline
	hv = _VERDICT_UI[h.verdict] if h else "no"
	where = _where(spec, hv)

	if h and hv == "great":
	headline = f"Yes, you can run a {h.model.plain_name} model {where}, today."
	elif h and hv == "tight":
	headline = f"Sort of. A {h.model.plain_name} model will run {where}, with trade-offs."
	else:
	headline = "This goal is a stretch on this machine. Here's the honest picture."

	if h:
	est = h.estimate
	need_gb = est.total_gb
	detail = (
	f"For this goal, the sweet spot is a <b>{h.model.plain_name}</b> model "
	f"at the <b>{h.quant.plain_name}</b> setting. {h.model.good_for} "
	f"It needs about <b>{need_gb:g} GB</b> "
	f"(model {est.weights_gb:g} GB + chat memory {est.kv_cache_gb:g} GB "
	f"+ working space {est.overhead_gb:g} GB), and you have roughly "
	f"<b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total to work with."
	)
	else:
	# Nothing fits even squeezed: be honest, show the shortfall.
	smallest = adv.verdicts[-1]
	need_gb = smallest.estimate.total_gb
	detail = (
	f"Even the smallest model here needs about <b>{need_gb:g} GB</b>, "
	f"but this machine can offer only about <b>{total:g} GB</b> once the "
	f"operating system has its share. That's not a failure — small "
	f"computers just have small budgets. Adding memory, or a free cloud "
	f"option, would open this up."
	)

	# Notes: use-case caveat + the headline's own honest footnotes.
	note_bits = []
	if adv.use_case.note:
	note_bits.append(adv.use_case.note)
	if h and h.notes:
	note_bits.extend(h.notes)
	note = " ".join(note_bits)

	# ---- Gauge ------------------------------------------------------------
	scale = max(total, need_gb, 1) * 1.05
	if h:
	model_part = round(h.estimate.weights_gb, 1)
	work_part = round(need_gb - model_part, 1)
	else:
	model_part = round(need_gb * 0.8, 1)
	work_part = round(need_gb * 0.2, 1)
	gauge = {
	"need_gb": f"{need_gb:g} GB needed",
	"fast_gb": f"{fast:g} GB",
	"total_gb": f"{total:g} GB",
	"fill_pct": round(need_gb / scale * 100, 1),
	"mark_pct": round(fast / scale * 100, 1),
	"breakdown": [
	{"label": f"Model {model_part:g} GB", "color": _C_MODEL},
	{"label": f"Working space {work_part:g} GB", "color": _C_WORK},
	],
	}

	# ---- Tools (runtimes) -------------------------------------------------
	tools = [{
	"name": r.name, "what": r.plain_what,
	"install": r.install_hint, "tag": r.difficulty,
	} for r in adv.runtimes]

	# ---- Commands ---------------------------------------------------------
	cmd_intro = ("These get you a running model in minutes. Pick the easy one or "
	"the power one; they do the same job.")
	if h:
	commands = {"intro": cmd_intro, "items": [
	{"label": "Easy way (Ollama)", "code": ollama_command(h)},
	{"label": "Power way (llama.cpp)", "code": llamacpp_command(h)},
	]}
	else:
	tiny = MODEL_CLASSES[0]
	commands = {"intro": cmd_intro, "items": [
	{"label": "Smallest you could try (Ollama)", "code": f"ollama run {tiny.ollama_tag}"},
	]}

	return {
	"catalogue_version": catalogue_version,
	"verdict": hv,
	"verdict_word": _VERDICT_WORD[hv],
	"headline": headline,
	"detail": detail,
	"note": note,
	"gauge": gauge,
	"options": options,
	"tools": tools,
	"commands": commands,
	# Echoed back so the model brick can narrate the SAME numbers the UI shows.
	"meets_goal": adv.meets_goal,
	"use_case": adv.use_case.plain_name,
	}