File size: 5,787 Bytes
12d2e34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""
Putting it in plain words.

The advisor produces structured facts; this module turns them into sentences a
non-technical person actually understands, and into commands they can copy and
paste. No jargon survives here without being explained.
"""

from .advisor import (
    Advice,
    ModelVerdict,
    VERDICT_WORKS,
    VERDICT_COMPROMISE,
    VERDICT_NO,
)

VERDICT_EMOJI = {
    VERDICT_WORKS: "🟢",
    VERDICT_COMPROMISE: "🟡",
    VERDICT_NO: "🔴",
}

VERDICT_WORD = {
    VERDICT_WORKS: "Works now",
    VERDICT_COMPROMISE: "Works, with compromises",
    VERDICT_NO: "Don't bother",
}


def speed_hint(v: ModelVerdict, spec) -> str:
    """A rough, honest feel for how fast replies will come."""
    if v.verdict == VERDICT_NO:
        return "—"
    if v.verdict == VERDICT_COMPROMISE:
        return "Slow — usable for short tasks, not snappy chat."
    # Works now (fast path). Bigger models are still slower even on a GPU.
    if v.model.billions <= 4:
        return "Fast — replies feel instant."
    if v.model.billions <= 14:
        return "Comfortable — quick enough for live chat."
    return "Steady — fine, just not instant on big answers."


# --------------------------------------------------------------------------
# Commands
# --------------------------------------------------------------------------

def ollama_command(v: ModelVerdict) -> str:
    return f"ollama run {v.model.ollama_tag}"


def llamacpp_command(v: ModelVerdict) -> str:
    # llama.cpp can pull a GGUF straight from Hugging Face by repo:quant.
    return (f"llama-server -hf {v.model.gguf_repo}:{v.quant.key} "
            f"-c {v.estimate.context_tokens}")


# --------------------------------------------------------------------------
# Headline summary, in human words
# --------------------------------------------------------------------------

def headline_text(advice: Advice) -> str:
    spec = advice.spec
    uc = advice.use_case
    h = advice.headline

    if h is None:
        return (
            f"**Honest answer: this machine can't comfortably run local AI "
            f"for {uc.plain_name.lower()} yet.**\n\n"
            f"Even the smallest models need more memory than the "
            f"{spec.ram_gb:g} GB available here once everything else is "
            f"running. That's not a failure — small computers just have small "
            f"budgets. A free cloud option, or adding memory, would open this up."
        )

    m = h.model
    q = h.quant
    fast = "on the graphics card" if spec.has_fast_path and h.verdict == VERDICT_WORKS else "on the processor"

    if h.verdict == VERDICT_WORKS:
        lead = f"**Yes — you can run a {m.plain_name} model {fast}, today.**"
    elif h.verdict == VERDICT_COMPROMISE:
        lead = f"**Sort of — a {m.plain_name} model will run, but with trade-offs.**"
    else:
        lead = f"**Not really — even a {m.plain_name} model is a stretch here.**"

    body = (
        f"\n\nFor **{uc.plain_name.lower()}**, the sweet spot on your machine is a "
        f"**{m.plain_name}** model at the **{q.plain_name}** setting. "
        f"{m.good_for}\n\n"
        f"That needs about **{h.estimate.total_gb:g} GB** of memory "
        f"(model {h.estimate.weights_gb:g} GB + chat memory "
        f"{h.estimate.kv_cache_gb:g} GB + working space {h.estimate.overhead_gb:g} GB), "
        f"and you have roughly **{spec.fast_budget_gb:g} GB** fast / "
        f"**{spec.total_budget_gb:g} GB** total to play with."
    )

    extra = ""
    if uc.note:
        extra += f"\n\n*Note for this job:* {uc.note}"
    if h.notes:
        extra += "\n\n" + "\n".join(f"- {n}" for n in h.notes)

    return lead + body + extra


def jargon_glossary() -> str:
    return (
        "**Plain-English glossary**\n\n"
        "- **Model** — the AI's 'brain'. Bigger = smarter but heavier.\n"
        "- **Parameters (e.g. 7B)** — how big the brain is. 7B = 7 billion. "
        "More = smarter and hungrier for memory.\n"
        "- **Quantisation (4-bit, 8-bit)** — shrinking the model so it fits. "
        "4-bit is the popular sweet spot: much smaller, barely-noticeable quality loss.\n"
        "- **VRAM** — the fast memory on a graphics card. The single biggest "
        "factor in what you can run quickly.\n"
        "- **RAM** — your computer's normal memory. Models can use it too, but it's slower.\n"
        "- **KV cache / 'chat memory'** — scratch space the model uses to "
        "remember the current conversation. Longer chats use more.\n"
        "- **GGUF** — a single-file model format made for running locally.\n"
        "- **llama.cpp / Ollama** — the programs that actually run the model on your machine."
    )


def how_to_find_specs(os_hint: str = "windows") -> str:
    common = (
        "**Not sure of your specs? Here's how to check:**\n\n"
    )
    if os_hint == "macos":
        return common + (
            "- Click the  Apple menu (top-left) → **About This Mac**.\n"
            "- It shows your chip (e.g. *Apple M2*) and **Memory** (e.g. *16 GB*).\n"
            "- On a Mac, that one memory number is all you need — the graphics "
            "share it."
        )
    if os_hint == "linux":
        return common + (
            "- RAM: run `free -h` in a terminal.\n"
            "- Graphics card: run `nvidia-smi` (NVIDIA) or `lspci | grep VGA`.\n"
        )
    return common + (
        "- **RAM:** press `Ctrl + Shift + Esc` → **Performance** tab → **Memory**.\n"
        "- **Graphics card:** same window → **GPU**. The name is at the top "
        "right (e.g. *NVIDIA RTX 3060*).\n"
        "- No GPU section showing a real card? You likely have built-in "
        "graphics — that's fine, just pick the 'built-in' option."
    )