FitCheck / engine /runtimes.py
cn0303's picture
Deploy FitCheck: engine + Nemotron model brick on ZeroGPU
12d2e34 verified
"""
Runtimes: the actual programs that run a model on your machine.
We deliberately keep this list short and well-supported. For each machine we
surface TWO paths:
- the easiest path : a friendly app a non-technical person can install and
click (Ollama / LM Studio). This is the default.
- the power path : llama.cpp with GGUF files — more control, and the
tool the hackathon's "Llama Champion" goal rewards.
Plus platform-native options where they genuinely help (MLX on Apple,
OpenVINO on Intel, vLLM on big Linux GPU boxes).
"""
from dataclasses import dataclass
@dataclass(frozen=True)
class Runtime:
key: str
name: str
plain_what: str # what it is, in one friendly line
difficulty: str # "Easiest" | "Moderate" | "Advanced"
install_hint: str
site: str
RUNTIMES: dict[str, Runtime] = {
"ollama": Runtime(
"ollama", "Ollama",
"A simple app. You type one line and it downloads and runs a model.",
"Easiest", "Download the installer from ollama.com", "https://ollama.com"),
"lmstudio": Runtime(
"lmstudio", "LM Studio",
"A point-and-click app with a chat window — no typing commands.",
"Easiest", "Download from lmstudio.ai", "https://lmstudio.ai"),
"llamacpp": Runtime(
"llamacpp", "llama.cpp",
"The lightweight engine under the hood. Runs GGUF model files directly.",
"Advanced", "Build from source or grab a release on GitHub",
"https://github.com/ggml-org/llama.cpp"),
"mlx": Runtime(
"mlx", "MLX",
"Apple's own framework, built for Mac chips and their shared memory.",
"Moderate", "pip install mlx-lm", "https://github.com/ml-explore/mlx"),
"openvino": Runtime(
"openvino", "OpenVINO",
"Intel's toolkit that squeezes good speed out of Intel chips and NPUs.",
"Moderate", "pip install optimum[openvino]",
"https://docs.openvino.ai"),
"vllm": Runtime(
"vllm", "vLLM",
"A heavy-duty server for big Linux machines with strong NVIDIA GPUs.",
"Advanced", "pip install vllm", "https://docs.vllm.ai"),
}
def pick_runtimes(spec) -> list[Runtime]:
"""Choose the runtimes worth recommending for this machine, best-first.
`spec` is a HardwareSpec. The first entry is the friendly default; the
list always includes llama.cpp (the power / badge path) where it makes
sense, and a platform-native option when one clearly helps.
"""
out: list[Runtime] = []
# Easiest path first — works almost everywhere and wraps llama.cpp anyway.
out.append(RUNTIMES["ollama"])
out.append(RUNTIMES["lmstudio"])
if spec.is_apple_silicon:
out.append(RUNTIMES["mlx"])
out.append(RUNTIMES["llamacpp"])
elif spec.gpu_vendor == "intel" or (spec.gpu_vendor == "none" and spec.os == "windows"):
# Intel-leaning / CPU machines benefit from OpenVINO.
out.append(RUNTIMES["openvino"])
out.append(RUNTIMES["llamacpp"])
else:
out.append(RUNTIMES["llamacpp"])
# Big Linux NVIDIA box → mention the server-grade option.
if spec.os == "linux" and spec.gpu_vendor == "nvidia" and spec.vram_gb >= 16:
out.append(RUNTIMES["vllm"])
# De-duplicate while preserving order.
seen, deduped = set(), []
for r in out:
if r.key not in seen:
seen.add(r.key)
deduped.append(r)
return deduped