Spaces:

build-small-hackathon
/

FitCheck

Running on Zero

App Files Files Community

FitCheck / engine /runtimes.py

cn0303

Deploy FitCheck: engine + Nemotron model brick on ZeroGPU

12d2e34 verified 1 day ago

raw

history blame contribute delete

3.52 kB

	"""
	Runtimes: the actual programs that run a model on your machine.

	We deliberately keep this list short and well-supported. For each machine we
	surface TWO paths:

	- the easiest path : a friendly app a non-technical person can install and
	click (Ollama / LM Studio). This is the default.
	- the power path : llama.cpp with GGUF files — more control, and the
	tool the hackathon's "Llama Champion" goal rewards.

	Plus platform-native options where they genuinely help (MLX on Apple,
	OpenVINO on Intel, vLLM on big Linux GPU boxes).
	"""

	from dataclasses import dataclass


	@dataclass(frozen=True)
	class Runtime:
	key: str
	name: str
	plain_what: str # what it is, in one friendly line
	difficulty: str # "Easiest" \| "Moderate" \| "Advanced"
	install_hint: str
	site: str


	RUNTIMES: dict[str, Runtime] = {
	"ollama": Runtime(
	"ollama", "Ollama",
	"A simple app. You type one line and it downloads and runs a model.",
	"Easiest", "Download the installer from ollama.com", "https://ollama.com"),
	"lmstudio": Runtime(
	"lmstudio", "LM Studio",
	"A point-and-click app with a chat window — no typing commands.",
	"Easiest", "Download from lmstudio.ai", "https://lmstudio.ai"),
	"llamacpp": Runtime(
	"llamacpp", "llama.cpp",
	"The lightweight engine under the hood. Runs GGUF model files directly.",
	"Advanced", "Build from source or grab a release on GitHub",
	"https://github.com/ggml-org/llama.cpp"),
	"mlx": Runtime(
	"mlx", "MLX",
	"Apple's own framework, built for Mac chips and their shared memory.",
	"Moderate", "pip install mlx-lm", "https://github.com/ml-explore/mlx"),
	"openvino": Runtime(
	"openvino", "OpenVINO",
	"Intel's toolkit that squeezes good speed out of Intel chips and NPUs.",
	"Moderate", "pip install optimum[openvino]",
	"https://docs.openvino.ai"),
	"vllm": Runtime(
	"vllm", "vLLM",
	"A heavy-duty server for big Linux machines with strong NVIDIA GPUs.",
	"Advanced", "pip install vllm", "https://docs.vllm.ai"),
	}


	def pick_runtimes(spec) -> list[Runtime]:
	"""Choose the runtimes worth recommending for this machine, best-first.

	`spec` is a HardwareSpec. The first entry is the friendly default; the
	list always includes llama.cpp (the power / badge path) where it makes
	sense, and a platform-native option when one clearly helps.
	"""
	out: list[Runtime] = []

	# Easiest path first — works almost everywhere and wraps llama.cpp anyway.
	out.append(RUNTIMES["ollama"])
	out.append(RUNTIMES["lmstudio"])

	if spec.is_apple_silicon:
	out.append(RUNTIMES["mlx"])
	out.append(RUNTIMES["llamacpp"])
	elif spec.gpu_vendor == "intel" or (spec.gpu_vendor == "none" and spec.os == "windows"):
	# Intel-leaning / CPU machines benefit from OpenVINO.
	out.append(RUNTIMES["openvino"])
	out.append(RUNTIMES["llamacpp"])
	else:
	out.append(RUNTIMES["llamacpp"])
	# Big Linux NVIDIA box → mention the server-grade option.
	if spec.os == "linux" and spec.gpu_vendor == "nvidia" and spec.vram_gb >= 16:
	out.append(RUNTIMES["vllm"])

	# De-duplicate while preserving order.
	seen, deduped = set(), []
	for r in out:
	if r.key not in seen:
	seen.add(r.key)
	deduped.append(r)
	return deduped