Spaces:

cloudronin
/

uofa-demo

Sleeping

App Files Files Community

uofa-demo / src /uofa_cli /setup_install.py

cloudronin

push build context (uofa source + packs + space app)

a28ec65 verified 12 days ago

Raw

History Blame Contribute Delete

14 kB

	"""Implementation of `uofa setup` (REQ-DIST-003).

	Downloads the Ollama runtime binary into the UofA-managed directory,
	launches a managed daemon, pulls the qwen3.5:4b model into the
	UofA-owned model store, and pre-warms the model so the first
	``uofa extract`` invocation does not pay cold-start latency.

	Network downloads support HTTP Range resume (REQ-DIST-003 AC 3) and
	verify SHA-256 against ``ollama_manifest.toml`` (AC 4). Progress is
	reported at >= 2-second intervals (AC 6) via a callback so the CLI
	layer can render a bar without this module knowing about TTYs.
	"""

	from __future__ import annotations

	import hashlib
	import os
	import platform as _platform
	import shutil
	import subprocess
	import sys
	import tarfile
	import time
	import zipfile
	from dataclasses import dataclass
	from datetime import datetime, timezone
	from pathlib import Path
	from typing import Callable

	from uofa_cli import setup_state


	_PROGRESS_INTERVAL_SEC = 2.0
	_DEFAULT_PORT = 11434
	_HEALTH_CHECK_TIMEOUT_SEC = 30.0
	_DOWNLOAD_CHUNK_BYTES = 1 << 20 # 1 MiB

	ProgressCallback = Callable[[int, int], None] # (bytes_so_far, total_bytes)


	@dataclass(frozen=True)
	class OllamaPlatformEntry:
	"""Resolved manifest entry for a single platform."""

	version: str
	url: str
	sha256: str
	archive_format: str # "binary" \| "tgz" \| "tar.zst" \| "zip"
	binary_inside_archive: str # POSIX-style path to the executable


	# ── Manifest ────────────────────────────────────────────────────


	def detect_wheel_platform_tag() -> str:
	"""Map the current host to a wheel-platform-tag used by ollama_manifest.

	Mirrors the keys we use in jre_manifest.toml so contributors don't
	have to remember two parallel naming conventions.
	"""
	system = _platform.system().lower()
	machine = _platform.machine().lower()
	if system == "darwin":
	return "macosx_11_0_arm64" if machine in ("arm64", "aarch64") else "macosx_11_0_x86_64"
	if system == "linux":
	return "manylinux_2_28_aarch64" if machine in ("arm64", "aarch64") else "manylinux_2_28_x86_64"
	if system == "windows":
	return "win_amd64"
	raise RuntimeError(f"Unsupported platform: system={system}, machine={machine}")


	def load_ollama_manifest(repo_root: Path \| None = None) -> dict:
	"""Load ollama_manifest.toml from the source tree."""
	try:
	import tomllib
	except ModuleNotFoundError:
	import tomli as tomllib # type: ignore[no-redef]

	if repo_root is None:
	# Walk up from this module looking for the manifest.
	here = Path(__file__).resolve()
	for parent in [here.parent, *here.parents]:
	candidate = parent / "ollama_manifest.toml"
	if candidate.is_file():
	repo_root = parent
	break
	else:
	raise FileNotFoundError("ollama_manifest.toml not found")

	with (repo_root / "ollama_manifest.toml").open("rb") as f:
	return tomllib.load(f)


	def resolve_platform_entry(platform_tag: str, manifest: dict \| None = None) -> OllamaPlatformEntry:
	if manifest is None:
	manifest = load_ollama_manifest()
	platforms = manifest.get("platforms", {})
	entry = platforms.get(platform_tag)
	if entry is None:
	raise KeyError(
	f"Platform {platform_tag} not in ollama_manifest.toml; "
	f"known: {sorted(platforms)}"
	)
	return OllamaPlatformEntry(
	version=entry["version"],
	url=entry["url"],
	sha256=entry["sha256"],
	archive_format=entry["archive_format"],
	binary_inside_archive=entry.get("binary_inside_archive", "ollama"),
	)


	# ── Download ────────────────────────────────────────────────────


	def download_ollama(
	entry: OllamaPlatformEntry,
	dest: Path,
	on_progress: ProgressCallback \| None = None,
	) -> None:
	"""Download the Ollama archive (or raw binary) to dest.

	Resumes on partial files via HTTP Range; verifies SHA-256 after.
	"""
	import requests # imported here so the module imports without [extract] installed

	dest.parent.mkdir(parents=True, exist_ok=True)
	existing = dest.stat().st_size if dest.exists() else 0

	# HEAD first to learn total size and whether Range is supported.
	head = requests.head(entry.url, allow_redirects=True, timeout=30)
	head.raise_for_status()
	total = int(head.headers.get("Content-Length", "0"))
	accept_ranges = head.headers.get("Accept-Ranges", "").lower() == "bytes"

	if existing == total and total > 0:
	# Already downloaded; just verify.
	_verify_sha256(dest, entry.sha256)
	if on_progress is not None:
	on_progress(total, total)
	return

	headers = {}
	mode = "wb"
	if existing and accept_ranges:
	headers["Range"] = f"bytes={existing}-"
	mode = "ab"
	else:
	existing = 0 # restart from scratch if server doesn't support resume

	with requests.get(entry.url, headers=headers, stream=True, timeout=120) as resp:
	resp.raise_for_status()
	with dest.open(mode) as f:
	bytes_so_far = existing
	last_emit = 0.0
	for chunk in resp.iter_content(chunk_size=_DOWNLOAD_CHUNK_BYTES):
	if not chunk:
	continue
	f.write(chunk)
	bytes_so_far += len(chunk)
	now = time.monotonic()
	if on_progress is not None and (now - last_emit) >= _PROGRESS_INTERVAL_SEC:
	on_progress(bytes_so_far, total)
	last_emit = now
	if on_progress is not None:
	on_progress(bytes_so_far, total)

	_verify_sha256(dest, entry.sha256)


	def _verify_sha256(path: Path, expected: str) -> None:
	h = hashlib.sha256()
	with path.open("rb") as f:
	for chunk in iter(lambda: f.read(_DOWNLOAD_CHUNK_BYTES), b""):
	h.update(chunk)
	actual = h.hexdigest()
	if actual != expected:
	raise ValueError(
	f"SHA-256 mismatch for {path}\n"
	f" expected: {expected}\n"
	f" actual: {actual}"
	)


	# ── Extraction ──────────────────────────────────────────────────


	def install_binary(
	entry: OllamaPlatformEntry,
	archive: Path,
	install_dir: Path,
	) -> Path:
	"""Extract or move the downloaded artifact and return the binary path.

	Handles four archive formats: ``binary`` (raw executable), ``tgz``,
	``tar.zst`` (system tar's --zstd), and ``zip``.
	"""
	install_dir.mkdir(parents=True, exist_ok=True)
	if entry.archive_format == "binary":
	# Raw binary: just chmod and move into place.
	target = install_dir / entry.binary_inside_archive
	shutil.copy2(archive, target)
	target.chmod(0o755)
	return target

	if entry.archive_format == "tgz":
	with tarfile.open(archive, "r:gz") as tf:
	tf.extractall(install_dir)
	elif entry.archive_format == "tar.zst":
	# Python's stdlib tarfile gained zstd in 3.14; use system tar for
	# broader compatibility. Requires GNU tar 1.31+ or macOS tar 11+.
	subprocess.run(
	["tar", "--zstd", "-xf", str(archive), "-C", str(install_dir)],
	check=True,
	)
	elif entry.archive_format == "zip":
	with zipfile.ZipFile(archive) as zf:
	zf.extractall(install_dir)
	else:
	raise ValueError(f"Unknown archive_format: {entry.archive_format}")

	binary_path = install_dir / entry.binary_inside_archive
	if not binary_path.exists():
	raise FileNotFoundError(
	f"Expected binary {entry.binary_inside_archive} not found "
	f"after extraction; install_dir contents: "
	f"{sorted(p.name for p in install_dir.rglob('*'))[:20]}"
	)
	binary_path.chmod(binary_path.stat().st_mode \| 0o755)
	return binary_path


	# ── Daemon lifecycle ────────────────────────────────────────────


	def start_managed_daemon(
	binary: Path,
	port: int = _DEFAULT_PORT,
	models_dir: Path \| None = None,
	) -> subprocess.Popen:
	"""Start `ollama serve` on the given port; return the Popen handle."""
	env = os.environ.copy()
	env["OLLAMA_HOST"] = f"127.0.0.1:{port}"
	if models_dir is not None:
	env["OLLAMA_MODELS"] = str(models_dir)
	return subprocess.Popen(
	[str(binary), "serve"],
	env=env,
	stdout=subprocess.DEVNULL,
	stderr=subprocess.PIPE,
	)


	def wait_for_daemon(port: int = _DEFAULT_PORT, timeout: float = _HEALTH_CHECK_TIMEOUT_SEC) -> None:
	"""Poll /api/tags until the daemon answers or timeout elapses."""
	import requests

	deadline = time.monotonic() + timeout
	last_err: Exception \| None = None
	while time.monotonic() < deadline:
	try:
	r = requests.get(f"http://127.0.0.1:{port}/api/tags", timeout=2)
	if r.status_code == 200:
	return
	except requests.RequestException as e:
	last_err = e
	time.sleep(0.25)
	raise TimeoutError(
	f"Ollama daemon did not respond on port {port} within {timeout}s "
	f"(last error: {last_err})"
	)


	def pull_model(
	port: int,
	model_tag: str,
	on_progress: ProgressCallback \| None = None,
	) -> None:
	"""Stream-pull a model via the Ollama HTTP API.

	Ollama returns one JSON object per line on /api/pull; each object
	has 'completed' / 'total' fields during the layer download phase.
	"""
	import json
	import requests

	with requests.post(
	f"http://127.0.0.1:{port}/api/pull",
	json={"model": model_tag, "stream": True},
	stream=True,
	timeout=None,
	) as resp:
	resp.raise_for_status()
	last_emit = 0.0
	for raw in resp.iter_lines():
	if not raw:
	continue
	try:
	msg = json.loads(raw)
	except json.JSONDecodeError:
	continue
	if "error" in msg:
	raise RuntimeError(f"Ollama pull failed: {msg['error']}")
	completed = int(msg.get("completed", 0))
	total = int(msg.get("total", 0))
	now = time.monotonic()
	if on_progress is not None and total and (now - last_emit) >= _PROGRESS_INTERVAL_SEC:
	on_progress(completed, total)
	last_emit = now


	def prewarm_model(port: int, model_tag: str) -> None:
	"""Issue one tiny generation request so the next `uofa extract` is hot."""
	import requests

	requests.post(
	f"http://127.0.0.1:{port}/api/generate",
	json={"model": model_tag, "prompt": "ok", "options": {"num_predict": 1}, "stream": False},
	timeout=120,
	).raise_for_status()


	# ── Top-level orchestration ─────────────────────────────────────


	def install(
	*,
	prefer_byo: bool = True,
	model_tag: str = "qwen3.5:4b",
	port: int = _DEFAULT_PORT,
	on_status: Callable[[str], None] \| None = None,
	on_progress: ProgressCallback \| None = None,
	) -> setup_state.SetupConfig:
	"""End-to-end install. Returns the resulting SetupConfig.

	If ``prefer_byo`` is True (REQ-DIST-005 default) and a system Ollama
	is detected, register it instead of installing a managed copy.
	"""
	say = on_status or (lambda _: None)

	byo = setup_state.detect_byo_ollama() if prefer_byo else None
	if byo is not None:
	say(f"Detected existing Ollama at {byo}; registering it.")
	binary = byo
	models_dir: Path \| None = None # let Ollama use its default model store
	mode = "byo"
	else:
	platform_tag = detect_wheel_platform_tag()
	say(f"Installing managed Ollama for {platform_tag}.")
	entry = resolve_platform_entry(platform_tag)
	archive_dir = setup_state.uofa_data_dir() / "downloads"
	archive_dir.mkdir(parents=True, exist_ok=True)
	archive_path = archive_dir / Path(entry.url).name
	say(f"Downloading {entry.url}")
	download_ollama(entry, archive_path, on_progress=on_progress)
	say("Extracting binary.")
	binary = install_binary(entry, archive_path, setup_state.runtime_dir(platform_tag))
	models_dir = setup_state.models_cache_dir()
	models_dir.mkdir(parents=True, exist_ok=True)
	mode = "managed"

	say(f"Starting daemon on port {port}.")
	daemon = start_managed_daemon(binary, port=port, models_dir=models_dir)
	try:
	wait_for_daemon(port)
	say(f"Pulling model {model_tag} (this may take several minutes).")
	pull_model(port, model_tag, on_progress=on_progress)
	say("Pre-warming model.")
	prewarm_model(port, model_tag)
	finally:
	# Leave the daemon stopped after install — `uofa extract` will
	# start its own short-lived daemon on the same port.
	daemon.terminate()
	try:
	daemon.wait(timeout=5)
	except subprocess.TimeoutExpired:
	daemon.kill()

	cfg = setup_state.SetupConfig(
	mode=mode,
	ollama_binary=binary,
	ollama_port=port,
	ollama_models_dir=models_dir,
	model_tag=model_tag,
	installed_at=datetime.now(timezone.utc).isoformat(timespec="seconds"),
	uofa_version=_uofa_version(),
	)
	setup_state.save_config(cfg)
	say(f"Wrote {setup_state.config_path()}")
	return cfg


	def _uofa_version() -> str:
	try:
	from importlib.metadata import version
	return version("uofa")
	except Exception:
	return "unknown"