File size: 3,515 Bytes
12d2e34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""
Runtimes: the actual programs that run a model on your machine.

We deliberately keep this list short and well-supported. For each machine we
surface TWO paths:

  - the easiest path   : a friendly app a non-technical person can install and
                         click (Ollama / LM Studio). This is the default.
  - the power path     : llama.cpp with GGUF files — more control, and the
                         tool the hackathon's "Llama Champion" goal rewards.

Plus platform-native options where they genuinely help (MLX on Apple,
OpenVINO on Intel, vLLM on big Linux GPU boxes).
"""

from dataclasses import dataclass


@dataclass(frozen=True)
class Runtime:
    key: str
    name: str
    plain_what: str          # what it is, in one friendly line
    difficulty: str          # "Easiest" | "Moderate" | "Advanced"
    install_hint: str
    site: str


RUNTIMES: dict[str, Runtime] = {
    "ollama": Runtime(
        "ollama", "Ollama",
        "A simple app. You type one line and it downloads and runs a model.",
        "Easiest", "Download the installer from ollama.com", "https://ollama.com"),
    "lmstudio": Runtime(
        "lmstudio", "LM Studio",
        "A point-and-click app with a chat window — no typing commands.",
        "Easiest", "Download from lmstudio.ai", "https://lmstudio.ai"),
    "llamacpp": Runtime(
        "llamacpp", "llama.cpp",
        "The lightweight engine under the hood. Runs GGUF model files directly.",
        "Advanced", "Build from source or grab a release on GitHub",
        "https://github.com/ggml-org/llama.cpp"),
    "mlx": Runtime(
        "mlx", "MLX",
        "Apple's own framework, built for Mac chips and their shared memory.",
        "Moderate", "pip install mlx-lm", "https://github.com/ml-explore/mlx"),
    "openvino": Runtime(
        "openvino", "OpenVINO",
        "Intel's toolkit that squeezes good speed out of Intel chips and NPUs.",
        "Moderate", "pip install optimum[openvino]",
        "https://docs.openvino.ai"),
    "vllm": Runtime(
        "vllm", "vLLM",
        "A heavy-duty server for big Linux machines with strong NVIDIA GPUs.",
        "Advanced", "pip install vllm", "https://docs.vllm.ai"),
}


def pick_runtimes(spec) -> list[Runtime]:
    """Choose the runtimes worth recommending for this machine, best-first.

    `spec` is a HardwareSpec. The first entry is the friendly default; the
    list always includes llama.cpp (the power / badge path) where it makes
    sense, and a platform-native option when one clearly helps.
    """
    out: list[Runtime] = []

    # Easiest path first — works almost everywhere and wraps llama.cpp anyway.
    out.append(RUNTIMES["ollama"])
    out.append(RUNTIMES["lmstudio"])

    if spec.is_apple_silicon:
        out.append(RUNTIMES["mlx"])
        out.append(RUNTIMES["llamacpp"])
    elif spec.gpu_vendor == "intel" or (spec.gpu_vendor == "none" and spec.os == "windows"):
        # Intel-leaning / CPU machines benefit from OpenVINO.
        out.append(RUNTIMES["openvino"])
        out.append(RUNTIMES["llamacpp"])
    else:
        out.append(RUNTIMES["llamacpp"])
        # Big Linux NVIDIA box → mention the server-grade option.
        if spec.os == "linux" and spec.gpu_vendor == "nvidia" and spec.vram_gb >= 16:
            out.append(RUNTIMES["vllm"])

    # De-duplicate while preserving order.
    seen, deduped = set(), []
    for r in out:
        if r.key not in seen:
            seen.add(r.key)
            deduped.append(r)
    return deduped