Spaces:
Running on Zero
Running on Zero
| """ | |
| Adapter: turn a frontend payload into the exact JSON the static/ frontend | |
| renders, using the REAL deterministic engine (not the placeholder). | |
| The frontend speaks one contract (verdicts ``great|tight|no``, an options list, | |
| a gauge, tools, commands). The engine speaks another (``works_now|compromises| | |
| dont_bother`` over ``ModelVerdict`` objects). This module is the seam between | |
| them, so neither side has to know about the other. | |
| Scope: the engine currently models the **LLM** family only (its model classes | |
| are all text models). Vision / image-gen / audio / data goals still fall back to | |
| the input-aware placeholder in ``app.py`` — that boundary is deliberate and | |
| honest, not an oversight. ``is_llm_usecase`` below is the routing switch. | |
| """ | |
| import re | |
| from .advisor import ( | |
| advise, | |
| VERDICT_WORKS, | |
| VERDICT_COMPROMISE, | |
| VERDICT_NO, | |
| ) | |
| from .catalogue import MODEL_CLASSES | |
| from .explain import speed_hint, ollama_command, llamacpp_command | |
| from .hardware import HardwareSpec | |
| # Bands: engine verdict -> the colour-key the frontend understands. | |
| _VERDICT_UI = { | |
| VERDICT_WORKS: "great", | |
| VERDICT_COMPROMISE: "tight", | |
| VERDICT_NO: "no", | |
| } | |
| _VERDICT_WORD = {"great": "Runs great", "tight": "Tight, but works", "no": "Won't fit"} | |
| # Gauge breakdown colours (match the placeholder palette in app.py / style.css). | |
| _C_MODEL = "#818CF8" # the weights themselves | |
| _C_WORK = "#868E9C" # chat memory + working space | |
| # Goals the engine can answer for real. Everything LLM-shaped maps onto a chat | |
| # context; "translate"/"custom" are still language models, so they route here. | |
| _LLM_USECASES = { | |
| "chat", "writing", "coding", "agents", "rag", "finetune", "translate", "custom", | |
| } | |
| # The engine's own use-case keys. Frontend ids that aren't 1:1 get mapped. | |
| _USECASE_ALIAS = {"translate": "chat", "custom": "chat"} | |
| def is_llm_usecase(usecase: str) -> bool: | |
| """True if the real engine should answer this goal (vs. the placeholder).""" | |
| return usecase in _LLM_USECASES | |
| # -------------------------------------------------------------------------- | |
| # Frontend payload -> HardwareSpec | |
| # -------------------------------------------------------------------------- | |
| def _num_in(text: str) -> float: | |
| """First '<number> GB' figure in a string, else 0.""" | |
| m = re.search(r"(\d+(?:\.\d+)?)\s*GB", text or "", re.I) | |
| return float(m.group(1)) if m else 0.0 | |
| def spec_from_payload(p: dict) -> HardwareSpec: | |
| """Build a HardwareSpec straight from the frontend's gather() payload. | |
| We construct the spec directly rather than going through build_spec(), | |
| because the frontend carries the vendor and a VRAM-bearing label already, | |
| and an Advanced box can override VRAM outright. | |
| """ | |
| computer = (p.get("computer") or "Windows laptop") | |
| kind = computer.lower() | |
| provider = (p.get("provider") or "none").lower() | |
| ram = float(p.get("ram_gb") or 16) | |
| # --- Apple Silicon: unified memory, no separate VRAM ------------------- | |
| if "mac" in kind or provider == "apple": | |
| chip = p.get("gpu") or "Apple Silicon" # keep the tier (Pro/Max/Ultra) for bandwidth lookup | |
| return HardwareSpec( | |
| os="macos", ram_gb=ram, gpu_vendor="apple", vram_gb=0.0, | |
| is_apple_silicon=True, | |
| gpu_label=f"{chip} (shares your {ram:g} GB of memory)", | |
| form_factor="mac", | |
| ) | |
| # --- Raspberry Pi / mini PC ------------------------------------------- | |
| if "raspberry" in kind or "mini" in kind: | |
| return HardwareSpec( | |
| os="linux", ram_gb=ram, gpu_vendor="none", vram_gb=0.0, | |
| gpu_label="No dedicated graphics card (tiny computer)", | |
| form_factor="sbc", | |
| ) | |
| os_name = "linux" if "linux" in kind else "windows" | |
| form = "desktop" if "desktop" in kind else "laptop" | |
| # VRAM: Advanced override wins; else the picker label; else a paste guess. | |
| vram = p.get("vram_gb") | |
| if not vram: | |
| vram = _num_in(p.get("gpu", "")) or _num_in(p.get("paste", "")) | |
| vram = float(vram or 0) | |
| if provider == "nvidia": | |
| vendor = "nvidia" | |
| elif provider == "amd": | |
| vendor = "amd" | |
| elif provider == "intel": | |
| vendor = "intel" | |
| else: | |
| vendor = "none" # "none" / "unsure": treat as no fast path | |
| vram = 0.0 | |
| label = p.get("gpu") or "No dedicated graphics card (built-in graphics only)" | |
| return HardwareSpec( | |
| os=os_name, ram_gb=ram, gpu_vendor=vendor, vram_gb=vram, | |
| is_apple_silicon=False, gpu_label=label, form_factor=form, | |
| ) | |
| # -------------------------------------------------------------------------- | |
| # Advice -> frontend JSON | |
| # -------------------------------------------------------------------------- | |
| def _where(spec: HardwareSpec, verdict: str) -> str: | |
| if verdict == "great": | |
| if spec.is_apple_silicon: | |
| return "on your Mac" | |
| if spec.has_fast_path: | |
| return "on your graphics card" | |
| return "on your computer" | |
| if verdict == "tight": | |
| return "using your computer's memory" | |
| return "" | |
| def advise_for_ui(payload: dict, catalogue_version: str) -> dict: | |
| """Run the real engine and shape its output for static/app.js render().""" | |
| usecase = _USECASE_ALIAS.get(payload.get("usecase", "chat"), payload.get("usecase", "chat")) | |
| spec = spec_from_payload(payload) | |
| adv = advise(spec, usecase) | |
| fast = spec.fast_budget_gb | |
| total = spec.total_budget_gb | |
| # ---- Options table (already biggest -> smallest from the engine) ----- | |
| options = [] | |
| for v in adv.verdicts: | |
| ui_v = _VERDICT_UI[v.verdict] | |
| options.append({ | |
| "verdict": ui_v, | |
| "model": v.model.plain_name, | |
| "desc": v.model.good_for, | |
| "setting": v.quant.plain_name, | |
| "memory": "Too big" if v.verdict == VERDICT_NO else f"{v.estimate.total_gb:g} GB", | |
| "feel": speed_hint(v, spec), | |
| }) | |
| # ---- Headline --------------------------------------------------------- | |
| h = adv.headline | |
| hv = _VERDICT_UI[h.verdict] if h else "no" | |
| where = _where(spec, hv) | |
| if h and hv == "great": | |
| headline = f"Yes, you can run a {h.model.plain_name} model {where}, today." | |
| elif h and hv == "tight": | |
| headline = f"Sort of. A {h.model.plain_name} model will run {where}, with trade-offs." | |
| else: | |
| headline = "This goal is a stretch on this machine. Here's the honest picture." | |
| if h: | |
| est = h.estimate | |
| need_gb = est.total_gb | |
| detail = ( | |
| f"For this goal, the sweet spot is a <b>{h.model.plain_name}</b> model " | |
| f"at the <b>{h.quant.plain_name}</b> setting. {h.model.good_for} " | |
| f"It needs about <b>{need_gb:g} GB</b> " | |
| f"(model {est.weights_gb:g} GB + chat memory {est.kv_cache_gb:g} GB " | |
| f"+ working space {est.overhead_gb:g} GB), and you have roughly " | |
| f"<b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total to work with." | |
| ) | |
| else: | |
| # Nothing fits even squeezed: be honest, show the shortfall. | |
| smallest = adv.verdicts[-1] | |
| need_gb = smallest.estimate.total_gb | |
| detail = ( | |
| f"Even the smallest model here needs about <b>{need_gb:g} GB</b>, " | |
| f"but this machine can offer only about <b>{total:g} GB</b> once the " | |
| f"operating system has its share. That's not a failure — small " | |
| f"computers just have small budgets. Adding memory, or a free cloud " | |
| f"option, would open this up." | |
| ) | |
| # Notes: use-case caveat + the headline's own honest footnotes. | |
| note_bits = [] | |
| if adv.use_case.note: | |
| note_bits.append(adv.use_case.note) | |
| if h and h.notes: | |
| note_bits.extend(h.notes) | |
| note = " ".join(note_bits) | |
| # ---- Gauge ------------------------------------------------------------ | |
| scale = max(total, need_gb, 1) * 1.05 | |
| if h: | |
| model_part = round(h.estimate.weights_gb, 1) | |
| work_part = round(need_gb - model_part, 1) | |
| else: | |
| model_part = round(need_gb * 0.8, 1) | |
| work_part = round(need_gb * 0.2, 1) | |
| gauge = { | |
| "need_gb": f"{need_gb:g} GB needed", | |
| "fast_gb": f"{fast:g} GB", | |
| "total_gb": f"{total:g} GB", | |
| "fill_pct": round(need_gb / scale * 100, 1), | |
| "mark_pct": round(fast / scale * 100, 1), | |
| "breakdown": [ | |
| {"label": f"Model {model_part:g} GB", "color": _C_MODEL}, | |
| {"label": f"Working space {work_part:g} GB", "color": _C_WORK}, | |
| ], | |
| } | |
| # ---- Tools (runtimes) ------------------------------------------------- | |
| tools = [{ | |
| "name": r.name, "what": r.plain_what, | |
| "install": r.install_hint, "tag": r.difficulty, | |
| } for r in adv.runtimes] | |
| # ---- Commands --------------------------------------------------------- | |
| cmd_intro = ("These get you a running model in minutes. Pick the easy one or " | |
| "the power one; they do the same job.") | |
| if h: | |
| commands = {"intro": cmd_intro, "items": [ | |
| {"label": "Easy way (Ollama)", "code": ollama_command(h)}, | |
| {"label": "Power way (llama.cpp)", "code": llamacpp_command(h)}, | |
| ]} | |
| else: | |
| tiny = MODEL_CLASSES[0] | |
| commands = {"intro": cmd_intro, "items": [ | |
| {"label": "Smallest you could try (Ollama)", "code": f"ollama run {tiny.ollama_tag}"}, | |
| ]} | |
| return { | |
| "catalogue_version": catalogue_version, | |
| "verdict": hv, | |
| "verdict_word": _VERDICT_WORD[hv], | |
| "headline": headline, | |
| "detail": detail, | |
| "note": note, | |
| "gauge": gauge, | |
| "options": options, | |
| "tools": tools, | |
| "commands": commands, | |
| # Echoed back so the model brick can narrate the SAME numbers the UI shows. | |
| "meets_goal": adv.meets_goal, | |
| "use_case": adv.use_case.plain_name, | |
| } | |