File size: 9,906 Bytes
12d2e34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee8ca43
12d2e34
 
 
ee8ca43
12d2e34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
"""
Adapter: turn a frontend payload into the exact JSON the static/ frontend
renders, using the REAL deterministic engine (not the placeholder).

The frontend speaks one contract (verdicts ``great|tight|no``, an options list,
a gauge, tools, commands). The engine speaks another (``works_now|compromises|
dont_bother`` over ``ModelVerdict`` objects). This module is the seam between
them, so neither side has to know about the other.

Scope: the engine currently models the **LLM** family only (its model classes
are all text models). Vision / image-gen / audio / data goals still fall back to
the input-aware placeholder in ``app.py`` — that boundary is deliberate and
honest, not an oversight. ``is_llm_usecase`` below is the routing switch.
"""

import re

from .advisor import (
    advise,
    VERDICT_WORKS,
    VERDICT_COMPROMISE,
    VERDICT_NO,
)
from .catalogue import MODEL_CLASSES
from .explain import speed_hint, ollama_command, llamacpp_command
from .hardware import HardwareSpec

# Bands: engine verdict -> the colour-key the frontend understands.
_VERDICT_UI = {
    VERDICT_WORKS: "great",
    VERDICT_COMPROMISE: "tight",
    VERDICT_NO: "no",
}
_VERDICT_WORD = {"great": "Runs great", "tight": "Tight, but works", "no": "Won't fit"}

# Gauge breakdown colours (match the placeholder palette in app.py / style.css).
_C_MODEL = "#818CF8"   # the weights themselves
_C_WORK = "#868E9C"    # chat memory + working space

# Goals the engine can answer for real. Everything LLM-shaped maps onto a chat
# context; "translate"/"custom" are still language models, so they route here.
_LLM_USECASES = {
    "chat", "writing", "coding", "agents", "rag", "finetune", "translate", "custom",
}
# The engine's own use-case keys. Frontend ids that aren't 1:1 get mapped.
_USECASE_ALIAS = {"translate": "chat", "custom": "chat"}


def is_llm_usecase(usecase: str) -> bool:
    """True if the real engine should answer this goal (vs. the placeholder)."""
    return usecase in _LLM_USECASES


# --------------------------------------------------------------------------
# Frontend payload -> HardwareSpec
# --------------------------------------------------------------------------

def _num_in(text: str) -> float:
    """First '<number> GB' figure in a string, else 0."""
    m = re.search(r"(\d+(?:\.\d+)?)\s*GB", text or "", re.I)
    return float(m.group(1)) if m else 0.0


def spec_from_payload(p: dict) -> HardwareSpec:
    """Build a HardwareSpec straight from the frontend's gather() payload.

    We construct the spec directly rather than going through build_spec(),
    because the frontend carries the vendor and a VRAM-bearing label already,
    and an Advanced box can override VRAM outright.
    """
    computer = (p.get("computer") or "Windows laptop")
    kind = computer.lower()
    provider = (p.get("provider") or "none").lower()
    ram = float(p.get("ram_gb") or 16)

    # --- Apple Silicon: unified memory, no separate VRAM -------------------
    if "mac" in kind or provider == "apple":
        chip = p.get("gpu") or "Apple Silicon"   # keep the tier (Pro/Max/Ultra) for bandwidth lookup
        return HardwareSpec(
            os="macos", ram_gb=ram, gpu_vendor="apple", vram_gb=0.0,
            is_apple_silicon=True,
            gpu_label=f"{chip} (shares your {ram:g} GB of memory)",
            form_factor="mac",
        )

    # --- Raspberry Pi / mini PC -------------------------------------------
    if "raspberry" in kind or "mini" in kind:
        return HardwareSpec(
            os="linux", ram_gb=ram, gpu_vendor="none", vram_gb=0.0,
            gpu_label="No dedicated graphics card (tiny computer)",
            form_factor="sbc",
        )

    os_name = "linux" if "linux" in kind else "windows"
    form = "desktop" if "desktop" in kind else "laptop"

    # VRAM: Advanced override wins; else the picker label; else a paste guess.
    vram = p.get("vram_gb")
    if not vram:
        vram = _num_in(p.get("gpu", "")) or _num_in(p.get("paste", ""))
    vram = float(vram or 0)

    if provider == "nvidia":
        vendor = "nvidia"
    elif provider == "amd":
        vendor = "amd"
    elif provider == "intel":
        vendor = "intel"
    else:
        vendor = "none"          # "none" / "unsure": treat as no fast path
        vram = 0.0

    label = p.get("gpu") or "No dedicated graphics card (built-in graphics only)"
    return HardwareSpec(
        os=os_name, ram_gb=ram, gpu_vendor=vendor, vram_gb=vram,
        is_apple_silicon=False, gpu_label=label, form_factor=form,
    )


# --------------------------------------------------------------------------
# Advice -> frontend JSON
# --------------------------------------------------------------------------

def _where(spec: HardwareSpec, verdict: str) -> str:
    if verdict == "great":
        if spec.is_apple_silicon:
            return "on your Mac"
        if spec.has_fast_path:
            return "on your graphics card"
        return "on your computer"
    if verdict == "tight":
        return "using your computer's memory"
    return ""


def advise_for_ui(payload: dict, catalogue_version: str) -> dict:
    """Run the real engine and shape its output for static/app.js render()."""
    usecase = _USECASE_ALIAS.get(payload.get("usecase", "chat"), payload.get("usecase", "chat"))
    spec = spec_from_payload(payload)
    adv = advise(spec, usecase)

    fast = spec.fast_budget_gb
    total = spec.total_budget_gb

    # ---- Options table (already biggest -> smallest from the engine) -----
    options = []
    for v in adv.verdicts:
        ui_v = _VERDICT_UI[v.verdict]
        options.append({
            "verdict": ui_v,
            "model": v.model.plain_name,
            "desc": v.model.good_for,
            "setting": v.quant.plain_name,
            "memory": "Too big" if v.verdict == VERDICT_NO else f"{v.estimate.total_gb:g} GB",
            "feel": speed_hint(v, spec),
        })

    # ---- Headline ---------------------------------------------------------
    h = adv.headline
    hv = _VERDICT_UI[h.verdict] if h else "no"
    where = _where(spec, hv)

    if h and hv == "great":
        headline = f"Yes, you can run a {h.model.plain_name} model {where}, today."
    elif h and hv == "tight":
        headline = f"Sort of. A {h.model.plain_name} model will run {where}, with trade-offs."
    else:
        headline = "This goal is a stretch on this machine. Here's the honest picture."

    if h:
        est = h.estimate
        need_gb = est.total_gb
        detail = (
            f"For this goal, the sweet spot is a <b>{h.model.plain_name}</b> model "
            f"at the <b>{h.quant.plain_name}</b> setting. {h.model.good_for} "
            f"It needs about <b>{need_gb:g} GB</b> "
            f"(model {est.weights_gb:g} GB + chat memory {est.kv_cache_gb:g} GB "
            f"+ working space {est.overhead_gb:g} GB), and you have roughly "
            f"<b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total to work with."
        )
    else:
        # Nothing fits even squeezed: be honest, show the shortfall.
        smallest = adv.verdicts[-1]
        need_gb = smallest.estimate.total_gb
        detail = (
            f"Even the smallest model here needs about <b>{need_gb:g} GB</b>, "
            f"but this machine can offer only about <b>{total:g} GB</b> once the "
            f"operating system has its share. That's not a failure — small "
            f"computers just have small budgets. Adding memory, or a free cloud "
            f"option, would open this up."
        )

    # Notes: use-case caveat + the headline's own honest footnotes.
    note_bits = []
    if adv.use_case.note:
        note_bits.append(adv.use_case.note)
    if h and h.notes:
        note_bits.extend(h.notes)
    note = "  ".join(note_bits)

    # ---- Gauge ------------------------------------------------------------
    scale = max(total, need_gb, 1) * 1.05
    if h:
        model_part = round(h.estimate.weights_gb, 1)
        work_part = round(need_gb - model_part, 1)
    else:
        model_part = round(need_gb * 0.8, 1)
        work_part = round(need_gb * 0.2, 1)
    gauge = {
        "need_gb": f"{need_gb:g} GB needed",
        "fast_gb": f"{fast:g} GB",
        "total_gb": f"{total:g} GB",
        "fill_pct": round(need_gb / scale * 100, 1),
        "mark_pct": round(fast / scale * 100, 1),
        "breakdown": [
            {"label": f"Model {model_part:g} GB", "color": _C_MODEL},
            {"label": f"Working space {work_part:g} GB", "color": _C_WORK},
        ],
    }

    # ---- Tools (runtimes) -------------------------------------------------
    tools = [{
        "name": r.name, "what": r.plain_what,
        "install": r.install_hint, "tag": r.difficulty,
    } for r in adv.runtimes]

    # ---- Commands ---------------------------------------------------------
    cmd_intro = ("These get you a running model in minutes. Pick the easy one or "
                 "the power one; they do the same job.")
    if h:
        commands = {"intro": cmd_intro, "items": [
            {"label": "Easy way (Ollama)", "code": ollama_command(h)},
            {"label": "Power way (llama.cpp)", "code": llamacpp_command(h)},
        ]}
    else:
        tiny = MODEL_CLASSES[0]
        commands = {"intro": cmd_intro, "items": [
            {"label": "Smallest you could try (Ollama)", "code": f"ollama run {tiny.ollama_tag}"},
        ]}

    return {
        "catalogue_version": catalogue_version,
        "verdict": hv,
        "verdict_word": _VERDICT_WORD[hv],
        "headline": headline,
        "detail": detail,
        "note": note,
        "gauge": gauge,
        "options": options,
        "tools": tools,
        "commands": commands,
        # Echoed back so the model brick can narrate the SAME numbers the UI shows.
        "meets_goal": adv.meets_goal,
        "use_case": adv.use_case.plain_name,
    }