Spaces:

build-small-hackathon
/

lifeos

Running

App Files Files Community

lifeos / scripts /modal_bench.py

awaisaziz

Add LifeOS: local-first personal assistant on Nemotron-3-Nano-4B

40cf485 22 days ago

Raw

History Blame Contribute Delete

1.93 kB

	"""Dev-time benchmark matrix: Nemotron-3 (Mamba hybrid) vs Llama-3.1-Nemotron
	(llama arch) at different thread counts, to pick the model + Spaces hardware.

	Run: modal run scripts/modal_bench.py
	"""
	import modal

	app = modal.App("lifeos-bench")

	image = (
	modal.Image.debian_slim(python_version="3.13")
	.apt_install("build-essential", "cmake", "ninja-build", "git")
	.pip_install("llama-cpp-python==0.3.28", "huggingface-hub", "numpy")
	)

	MODELS = {
	"nemotron3-nano-4b": ("nvidia/NVIDIA-Nemotron-3-Nano-4B-GGUF", "NVIDIA-Nemotron3-Nano-4B-Q4_K_M.gguf"),
	"llama31-nemotron-4b": ("bartowski/nvidia_Llama-3.1-Nemotron-Nano-4B-v1.1-GGUF", "nvidia_Llama-3.1-Nemotron-Nano-4B-v1.1-Q4_K_M.gguf"),
	}

	MESSAGES = [
	{"role": "system", "content": "/no_think\nYou are LifeOS, a concise local assistant."},
	{"role": "user", "content": "In 3 bullet points, what's a cheap high-protein dinner using chicken thighs at $2.49/lb?"},
	]


	@app.function(image=image, cpu=8.0, memory=16384, timeout=3600)
	def bench(model_key: str, n_threads: int) -> dict:
	import time
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama

	repo, fname = MODELS[model_key]
	path = hf_hub_download(repo_id=repo, filename=fname)
	llm = Llama(model_path=path, n_ctx=4096, n_threads=n_threads, verbose=False)

	# warm once (memory map, graph build)
	llm.create_chat_completion(MESSAGES, max_tokens=8)

	t0 = time.time()
	n = 0
	for chunk in llm.create_chat_completion(MESSAGES, max_tokens=128, stream=True):
	d = chunk["choices"][0]["delta"]
	if d.get("content"):
	n += 1
	dt = time.time() - t0
	return {"model": model_key, "threads": n_threads, "tokens": n, "sec": round(dt, 1), "tps": round(n / dt, 2)}


	@app.local_entrypoint()
	def main():
	jobs = [(m, t) for m in MODELS for t in (2, 8)]
	for res in bench.starmap(jobs):
	print("RESULT:", res)