Spaces:

build-small-hackathon
/

FitCheck

Running on Zero

File size: 9,906 Bytes

"""
Adapter: turn a frontend payload into the exact JSON the static/ frontend
renders, using the REAL deterministic engine (not the placeholder).

The frontend speaks one contract (verdicts ``great|tight|no``, an options list,
a gauge, tools, commands). The engine speaks another (``works_now|compromises|
dont_bother`` over ``ModelVerdict`` objects). This module is the seam between
them, so neither side has to know about the other.

Scope: the engine currently models the **LLM** family only (its model classes
are all text models). Vision / image-gen / audio / data goals still fall back to
the input-aware placeholder in ``app.py`` — that boundary is deliberate and
honest, not an oversight. ``is_llm_usecase`` below is the routing switch.
"""

import re

from .advisor import (
    advise,
    VERDICT_WORKS,
    VERDICT_COMPROMISE,
    VERDICT_NO,
)
from .catalogue import MODEL_CLASSES
from .explain import speed_hint, ollama_command, llamacpp_command
from .hardware import HardwareSpec

# Bands: engine verdict -> the colour-key the frontend understands.
_VERDICT_UI = {
    VERDICT_WORKS: "great",
    VERDICT_COMPROMISE: "tight",
    VERDICT_NO: "no",
}
_VERDICT_WORD = {"great": "Runs great", "tight": "Tight, but works", "no": "Won't fit"}

# Gauge breakdown colours (match the placeholder palette in app.py / style.css).
_C_MODEL = "#818CF8"   # the weights themselves
_C_WORK = "#868E9C"    # chat memory + working space

# Goals the engine can answer for real. Everything LLM-shaped maps onto a chat
# context; "translate"/"custom" are still language models, so they route here.
_LLM_USECASES = {
    "chat", "writing", "coding", "agents", "rag", "finetune", "translate", "custom",
}
# The engine's own use-case keys. Frontend ids that aren't 1:1 get mapped.
_USECASE_ALIAS = {"translate": "chat", "custom": "chat"}


def is_llm_usecase(usecase: str) -> bool:
    """True if the real engine should answer this goal (vs. the placeholder)."""
    return usecase in _LLM_USECASES


# --------------------------------------------------------------------------
# Frontend payload -> HardwareSpec
# --------------------------------------------------------------------------

def _num_in(text: str) -> float:
    """First '<number> GB' figure in a string, else 0."""
    m = re.search(r"(\d+(?:\.\d+)?)\s*GB", text or "", re.I)
    return float(m.group(1)) if m else 0.0


def spec_from_payload(p: dict) -> HardwareSpec:
    """Build a HardwareSpec straight from the frontend's gather() payload.

    We construct the spec directly rather than going through build_spec(),
    because the frontend carries the vendor and a VRAM-bearing label already,
    and an Advanced box can override VRAM outright.
    """
    computer = (p.get("computer") or "Windows laptop")
    kind = computer.lower()
    provider = (p.get("provider") or "none").lower()
    ram = float(p.get("ram_gb") or 16)

    # --- Apple Silicon: unified memory, no separate VRAM -------------------
    if "mac" in kind or provider == "apple":
        chip = p.get("gpu") or "Apple Silicon"   # keep the tier (Pro/Max/Ultra) for bandwidth lookup
        return HardwareSpec(
            os="macos", ram_gb=ram, gpu_vendor="apple", vram_gb=0.0,
            is_apple_silicon=True,
            gpu_label=f"{chip} (shares your {ram:g} GB of memory)",
            form_factor="mac",
        )

    # --- Raspberry Pi / mini PC -------------------------------------------
    if "raspberry" in kind or "mini" in kind:
        return HardwareSpec(
            os="linux", ram_gb=ram, gpu_vendor="none", vram_gb=0.0,
            gpu_label="No dedicated graphics card (tiny computer)",
            form_factor="sbc",
        )

    os_name = "linux" if "linux" in kind else "windows"
    form = "desktop" if "desktop" in kind else "laptop"

    # VRAM: Advanced override wins; else the picker label; else a paste guess.
    vram = p.get("vram_gb")
    if not vram:
        vram = _num_in(p.get("gpu", "")) or _num_in(p.get("paste", ""))
    vram = float(vram or 0)

    if provider == "nvidia":
        vendor = "nvidia"
    elif provider == "amd":
        vendor = "amd"
    elif provider == "intel":
        vendor = "intel"
    else:
        vendor = "none"          # "none" / "unsure": treat as no fast path
        vram = 0.0

    label = p.get("gpu") or "No dedicated graphics card (built-in graphics only)"
    return HardwareSpec(
        os=os_name, ram_gb=ram, gpu_vendor=vendor, vram_gb=vram,
        is_apple_silicon=False, gpu_label=label, form_factor=form,
    )


# --------------------------------------------------------------------------
# Advice -> frontend JSON
# --------------------------------------------------------------------------

def _where(spec: HardwareSpec, verdict: str) -> str:
    if verdict == "great":
        if spec.is_apple_silicon:
            return "on your Mac"
        if spec.has_fast_path:
            return "on your graphics card"
        return "on your computer"
    if verdict == "tight":
        return "using your computer's memory"
    return ""


def advise_for_ui(payload: dict, catalogue_version: str) -> dict:
    """Run the real engine and shape its output for static/app.js render()."""
    usecase = _USECASE_ALIAS.get(payload.get("usecase", "chat"), payload.get("usecase", "chat"))
    spec = spec_from_payload(payload)
    adv = advise(spec, usecase)

    fast = spec.fast_budget_gb
    total = spec.total_budget_gb

    # ---- Options table (already biggest -> smallest from the engine) -----
    options = []
    for v in adv.verdicts:
        ui_v = _VERDICT_UI[v.verdict]
        options.append({
            "verdict": ui_v,
            "model": v.model.plain_name,
            "desc": v.model.good_for,
            "setting": v.quant.plain_name,
            "memory": "Too big" if v.verdict == VERDICT_NO else f"{v.estimate.total_gb:g} GB",
            "feel": speed_hint(v, spec),
        })

    # ---- Headline ---------------------------------------------------------
    h = adv.headline
    hv = _VERDICT_UI[h.verdict] if h else "no"
    where = _where(spec, hv)

    if h and hv == "great":
        headline = f"Yes, you can run a {h.model.plain_name} model {where}, today."
    elif h and hv == "tight":
        headline = f"Sort of. A {h.model.plain_name} model will run {where}, with trade-offs."
    else:
        headline = "This goal is a stretch on this machine. Here's the honest picture."

    if h:
        est = h.estimate
        need_gb = est.total_gb
        detail = (
            f"For this goal, the sweet spot is a <b>{h.model.plain_name}</b> model "
            f"at the <b>{h.quant.plain_name}</b> setting. {h.model.good_for} "
            f"It needs about <b>{need_gb:g} GB</b> "
            f"(model {est.weights_gb:g} GB + chat memory {est.kv_cache_gb:g} GB "
            f"+ working space {est.overhead_gb:g} GB), and you have roughly "
            f"<b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total to work with."
        )
    else:
        # Nothing fits even squeezed: be honest, show the shortfall.
        smallest = adv.verdicts[-1]
        need_gb = smallest.estimate.total_gb
        detail = (
            f"Even the smallest model here needs about <b>{need_gb:g} GB</b>, "
            f"but this machine can offer only about <b>{total:g} GB</b> once the "
            f"operating system has its share. That's not a failure — small "
            f"computers just have small budgets. Adding memory, or a free cloud "
            f"option, would open this up."
        )

    # Notes: use-case caveat + the headline's own honest footnotes.
    note_bits = []
    if adv.use_case.note:
        note_bits.append(adv.use_case.note)
    if h and h.notes:
        note_bits.extend(h.notes)
    note = "  ".join(note_bits)

    # ---- Gauge ------------------------------------------------------------
    scale = max(total, need_gb, 1) * 1.05
    if h:
        model_part = round(h.estimate.weights_gb, 1)
        work_part = round(need_gb - model_part, 1)
    else:
        model_part = round(need_gb * 0.8, 1)
        work_part = round(need_gb * 0.2, 1)
    gauge = {
        "need_gb": f"{need_gb:g} GB needed",
        "fast_gb": f"{fast:g} GB",
        "total_gb": f"{total:g} GB",
        "fill_pct": round(need_gb / scale * 100, 1),
        "mark_pct": round(fast / scale * 100, 1),
        "breakdown": [
            {"label": f"Model {model_part:g} GB", "color": _C_MODEL},
            {"label": f"Working space {work_part:g} GB", "color": _C_WORK},
        ],
    }

    # ---- Tools (runtimes) -------------------------------------------------
    tools = [{
        "name": r.name, "what": r.plain_what,
        "install": r.install_hint, "tag": r.difficulty,
    } for r in adv.runtimes]

    # ---- Commands ---------------------------------------------------------
    cmd_intro = ("These get you a running model in minutes. Pick the easy one or "
                 "the power one; they do the same job.")
    if h:
        commands = {"intro": cmd_intro, "items": [
            {"label": "Easy way (Ollama)", "code": ollama_command(h)},
            {"label": "Power way (llama.cpp)", "code": llamacpp_command(h)},
        ]}
    else:
        tiny = MODEL_CLASSES[0]
        commands = {"intro": cmd_intro, "items": [
            {"label": "Smallest you could try (Ollama)", "code": f"ollama run {tiny.ollama_tag}"},
        ]}

    return {
        "catalogue_version": catalogue_version,
        "verdict": hv,
        "verdict_word": _VERDICT_WORD[hv],
        "headline": headline,
        "detail": detail,
        "note": note,
        "gauge": gauge,
        "options": options,
        "tools": tools,
        "commands": commands,
        # Echoed back so the model brick can narrate the SAME numbers the UI shows.
        "meets_goal": adv.meets_goal,
        "use_case": adv.use_case.plain_name,
    }