Spaces:

build-small-hackathon
/

FitCheck

Running on Zero

App Files Files Community

FitCheck / engine /explain.py

cn0303

Deploy FitCheck: engine + Nemotron model brick on ZeroGPU

12d2e34 verified 1 day ago

raw

history blame contribute delete

5.79 kB

	"""
	Putting it in plain words.

	The advisor produces structured facts; this module turns them into sentences a
	non-technical person actually understands, and into commands they can copy and
	paste. No jargon survives here without being explained.
	"""

	from .advisor import (
	Advice,
	ModelVerdict,
	VERDICT_WORKS,
	VERDICT_COMPROMISE,
	VERDICT_NO,
	)

	VERDICT_EMOJI = {
	VERDICT_WORKS: "🟢",
	VERDICT_COMPROMISE: "🟡",
	VERDICT_NO: "🔴",
	}

	VERDICT_WORD = {
	VERDICT_WORKS: "Works now",
	VERDICT_COMPROMISE: "Works, with compromises",
	VERDICT_NO: "Don't bother",
	}


	def speed_hint(v: ModelVerdict, spec) -> str:
	"""A rough, honest feel for how fast replies will come."""
	if v.verdict == VERDICT_NO:
	return "—"
	if v.verdict == VERDICT_COMPROMISE:
	return "Slow — usable for short tasks, not snappy chat."
	# Works now (fast path). Bigger models are still slower even on a GPU.
	if v.model.billions <= 4:
	return "Fast — replies feel instant."
	if v.model.billions <= 14:
	return "Comfortable — quick enough for live chat."
	return "Steady — fine, just not instant on big answers."


	# --------------------------------------------------------------------------
	# Commands
	# --------------------------------------------------------------------------

	def ollama_command(v: ModelVerdict) -> str:
	return f"ollama run {v.model.ollama_tag}"


	def llamacpp_command(v: ModelVerdict) -> str:
	# llama.cpp can pull a GGUF straight from Hugging Face by repo:quant.
	return (f"llama-server -hf {v.model.gguf_repo}:{v.quant.key} "
	f"-c {v.estimate.context_tokens}")


	# --------------------------------------------------------------------------
	# Headline summary, in human words
	# --------------------------------------------------------------------------

	def headline_text(advice: Advice) -> str:
	spec = advice.spec
	uc = advice.use_case
	h = advice.headline

	if h is None:
	return (
	f"**Honest answer: this machine can't comfortably run local AI "
	f"for {uc.plain_name.lower()} yet.**\n\n"
	f"Even the smallest models need more memory than the "
	f"{spec.ram_gb:g} GB available here once everything else is "
	f"running. That's not a failure — small computers just have small "
	f"budgets. A free cloud option, or adding memory, would open this up."
	)

	m = h.model
	q = h.quant
	fast = "on the graphics card" if spec.has_fast_path and h.verdict == VERDICT_WORKS else "on the processor"

	if h.verdict == VERDICT_WORKS:
	lead = f"Yes — you can run a {m.plain_name} model {fast}, today."
	elif h.verdict == VERDICT_COMPROMISE:
	lead = f"Sort of — a {m.plain_name} model will run, but with trade-offs."
	else:
	lead = f"Not really — even a {m.plain_name} model is a stretch here."

	body = (
	f"\n\nFor {uc.plain_name.lower()}, the sweet spot on your machine is a "
	f"{m.plain_name} model at the {q.plain_name} setting. "
	f"{m.good_for}\n\n"
	f"That needs about {h.estimate.total_gb:g} GB of memory "
	f"(model {h.estimate.weights_gb:g} GB + chat memory "
	f"{h.estimate.kv_cache_gb:g} GB + working space {h.estimate.overhead_gb:g} GB), "
	f"and you have roughly {spec.fast_budget_gb:g} GB fast / "
	f"{spec.total_budget_gb:g} GB total to play with."
	)

	extra = ""
	if uc.note:
	extra += f"\n\nNote for this job: {uc.note}"
	if h.notes:
	extra += "\n\n" + "\n".join(f"- {n}" for n in h.notes)

	return lead + body + extra


	def jargon_glossary() -> str:
	return (
	"Plain-English glossary\n\n"
	"- Model — the AI's 'brain'. Bigger = smarter but heavier.\n"
	"- Parameters (e.g. 7B) — how big the brain is. 7B = 7 billion. "
	"More = smarter and hungrier for memory.\n"
	"- Quantisation (4-bit, 8-bit) — shrinking the model so it fits. "
	"4-bit is the popular sweet spot: much smaller, barely-noticeable quality loss.\n"
	"- VRAM — the fast memory on a graphics card. The single biggest "
	"factor in what you can run quickly.\n"
	"- RAM — your computer's normal memory. Models can use it too, but it's slower.\n"
	"- KV cache / 'chat memory' — scratch space the model uses to "
	"remember the current conversation. Longer chats use more.\n"
	"- GGUF — a single-file model format made for running locally.\n"
	"- llama.cpp / Ollama — the programs that actually run the model on your machine."
	)


	def how_to_find_specs(os_hint: str = "windows") -> str:
	common = (
	"Not sure of your specs? Here's how to check:\n\n"
	)
	if os_hint == "macos":
	return common + (
	"- Click the Apple menu (top-left) → About This Mac.\n"
	"- It shows your chip (e.g. Apple M2) and Memory (e.g. 16 GB).\n"
	"- On a Mac, that one memory number is all you need — the graphics "
	"share it."
	)
	if os_hint == "linux":
	return common + (
	"- RAM: run `free -h` in a terminal.\n"
	"- Graphics card: run `nvidia-smi` (NVIDIA) or `lspci \| grep VGA`.\n"
	)
	return common + (
	"- RAM: press `Ctrl + Shift + Esc` → Performance tab → Memory.\n"
	"- Graphics card: same window → GPU. The name is at the top "
	"right (e.g. NVIDIA RTX 3060).\n"
	"- No GPU section showing a real card? You likely have built-in "
	"graphics — that's fine, just pick the 'built-in' option."
	)