Spaces:
Running on Zero
Running on Zero
| """ | |
| Runtimes: the actual programs that run a model on your machine. | |
| We deliberately keep this list short and well-supported. For each machine we | |
| surface TWO paths: | |
| - the easiest path : a friendly app a non-technical person can install and | |
| click (Ollama / LM Studio). This is the default. | |
| - the power path : llama.cpp with GGUF files — more control, and the | |
| tool the hackathon's "Llama Champion" goal rewards. | |
| Plus platform-native options where they genuinely help (MLX on Apple, | |
| OpenVINO on Intel, vLLM on big Linux GPU boxes). | |
| """ | |
| from dataclasses import dataclass | |
| class Runtime: | |
| key: str | |
| name: str | |
| plain_what: str # what it is, in one friendly line | |
| difficulty: str # "Easiest" | "Moderate" | "Advanced" | |
| install_hint: str | |
| site: str | |
| RUNTIMES: dict[str, Runtime] = { | |
| "ollama": Runtime( | |
| "ollama", "Ollama", | |
| "A simple app. You type one line and it downloads and runs a model.", | |
| "Easiest", "Download the installer from ollama.com", "https://ollama.com"), | |
| "lmstudio": Runtime( | |
| "lmstudio", "LM Studio", | |
| "A point-and-click app with a chat window — no typing commands.", | |
| "Easiest", "Download from lmstudio.ai", "https://lmstudio.ai"), | |
| "llamacpp": Runtime( | |
| "llamacpp", "llama.cpp", | |
| "The lightweight engine under the hood. Runs GGUF model files directly.", | |
| "Advanced", "Build from source or grab a release on GitHub", | |
| "https://github.com/ggml-org/llama.cpp"), | |
| "mlx": Runtime( | |
| "mlx", "MLX", | |
| "Apple's own framework, built for Mac chips and their shared memory.", | |
| "Moderate", "pip install mlx-lm", "https://github.com/ml-explore/mlx"), | |
| "openvino": Runtime( | |
| "openvino", "OpenVINO", | |
| "Intel's toolkit that squeezes good speed out of Intel chips and NPUs.", | |
| "Moderate", "pip install optimum[openvino]", | |
| "https://docs.openvino.ai"), | |
| "vllm": Runtime( | |
| "vllm", "vLLM", | |
| "A heavy-duty server for big Linux machines with strong NVIDIA GPUs.", | |
| "Advanced", "pip install vllm", "https://docs.vllm.ai"), | |
| } | |
| def pick_runtimes(spec) -> list[Runtime]: | |
| """Choose the runtimes worth recommending for this machine, best-first. | |
| `spec` is a HardwareSpec. The first entry is the friendly default; the | |
| list always includes llama.cpp (the power / badge path) where it makes | |
| sense, and a platform-native option when one clearly helps. | |
| """ | |
| out: list[Runtime] = [] | |
| # Easiest path first — works almost everywhere and wraps llama.cpp anyway. | |
| out.append(RUNTIMES["ollama"]) | |
| out.append(RUNTIMES["lmstudio"]) | |
| if spec.is_apple_silicon: | |
| out.append(RUNTIMES["mlx"]) | |
| out.append(RUNTIMES["llamacpp"]) | |
| elif spec.gpu_vendor == "intel" or (spec.gpu_vendor == "none" and spec.os == "windows"): | |
| # Intel-leaning / CPU machines benefit from OpenVINO. | |
| out.append(RUNTIMES["openvino"]) | |
| out.append(RUNTIMES["llamacpp"]) | |
| else: | |
| out.append(RUNTIMES["llamacpp"]) | |
| # Big Linux NVIDIA box → mention the server-grade option. | |
| if spec.os == "linux" and spec.gpu_vendor == "nvidia" and spec.vram_gb >= 16: | |
| out.append(RUNTIMES["vllm"]) | |
| # De-duplicate while preserving order. | |
| seen, deduped = set(), [] | |
| for r in out: | |
| if r.key not in seen: | |
| seen.add(r.key) | |
| deduped.append(r) | |
| return deduped | |