""" Runtimes: the actual programs that run a model on your machine. We deliberately keep this list short and well-supported. For each machine we surface TWO paths: - the easiest path : a friendly app a non-technical person can install and click (Ollama / LM Studio). This is the default. - the power path : llama.cpp with GGUF files — more control, and the tool the hackathon's "Llama Champion" goal rewards. Plus platform-native options where they genuinely help (MLX on Apple, OpenVINO on Intel, vLLM on big Linux GPU boxes). """ from dataclasses import dataclass @dataclass(frozen=True) class Runtime: key: str name: str plain_what: str # what it is, in one friendly line difficulty: str # "Easiest" | "Moderate" | "Advanced" install_hint: str site: str RUNTIMES: dict[str, Runtime] = { "ollama": Runtime( "ollama", "Ollama", "A simple app. You type one line and it downloads and runs a model.", "Easiest", "Download the installer from ollama.com", "https://ollama.com"), "lmstudio": Runtime( "lmstudio", "LM Studio", "A point-and-click app with a chat window — no typing commands.", "Easiest", "Download from lmstudio.ai", "https://lmstudio.ai"), "llamacpp": Runtime( "llamacpp", "llama.cpp", "The lightweight engine under the hood. Runs GGUF model files directly.", "Advanced", "Build from source or grab a release on GitHub", "https://github.com/ggml-org/llama.cpp"), "mlx": Runtime( "mlx", "MLX", "Apple's own framework, built for Mac chips and their shared memory.", "Moderate", "pip install mlx-lm", "https://github.com/ml-explore/mlx"), "openvino": Runtime( "openvino", "OpenVINO", "Intel's toolkit that squeezes good speed out of Intel chips and NPUs.", "Moderate", "pip install optimum[openvino]", "https://docs.openvino.ai"), "vllm": Runtime( "vllm", "vLLM", "A heavy-duty server for big Linux machines with strong NVIDIA GPUs.", "Advanced", "pip install vllm", "https://docs.vllm.ai"), } def pick_runtimes(spec) -> list[Runtime]: """Choose the runtimes worth recommending for this machine, best-first. `spec` is a HardwareSpec. The first entry is the friendly default; the list always includes llama.cpp (the power / badge path) where it makes sense, and a platform-native option when one clearly helps. """ out: list[Runtime] = [] # Easiest path first — works almost everywhere and wraps llama.cpp anyway. out.append(RUNTIMES["ollama"]) out.append(RUNTIMES["lmstudio"]) if spec.is_apple_silicon: out.append(RUNTIMES["mlx"]) out.append(RUNTIMES["llamacpp"]) elif spec.gpu_vendor == "intel" or (spec.gpu_vendor == "none" and spec.os == "windows"): # Intel-leaning / CPU machines benefit from OpenVINO. out.append(RUNTIMES["openvino"]) out.append(RUNTIMES["llamacpp"]) else: out.append(RUNTIMES["llamacpp"]) # Big Linux NVIDIA box → mention the server-grade option. if spec.os == "linux" and spec.gpu_vendor == "nvidia" and spec.vram_gb >= 16: out.append(RUNTIMES["vllm"]) # De-duplicate while preserving order. seen, deduped = set(), [] for r in out: if r.key not in seen: seen.add(r.key) deduped.append(r) return deduped