Spaces:
Sleeping
Sleeping
| """Implementation of `uofa setup` (REQ-DIST-003). | |
| Downloads the Ollama runtime binary into the UofA-managed directory, | |
| launches a managed daemon, pulls the qwen3.5:4b model into the | |
| UofA-owned model store, and pre-warms the model so the first | |
| ``uofa extract`` invocation does not pay cold-start latency. | |
| Network downloads support HTTP Range resume (REQ-DIST-003 AC 3) and | |
| verify SHA-256 against ``ollama_manifest.toml`` (AC 4). Progress is | |
| reported at >= 2-second intervals (AC 6) via a callback so the CLI | |
| layer can render a bar without this module knowing about TTYs. | |
| """ | |
| from __future__ import annotations | |
| import hashlib | |
| import os | |
| import platform as _platform | |
| import shutil | |
| import subprocess | |
| import sys | |
| import tarfile | |
| import time | |
| import zipfile | |
| from dataclasses import dataclass | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| from typing import Callable | |
| from uofa_cli import setup_state | |
| _PROGRESS_INTERVAL_SEC = 2.0 | |
| _DEFAULT_PORT = 11434 | |
| _HEALTH_CHECK_TIMEOUT_SEC = 30.0 | |
| _DOWNLOAD_CHUNK_BYTES = 1 << 20 # 1 MiB | |
| ProgressCallback = Callable[[int, int], None] # (bytes_so_far, total_bytes) | |
| class OllamaPlatformEntry: | |
| """Resolved manifest entry for a single platform.""" | |
| version: str | |
| url: str | |
| sha256: str | |
| archive_format: str # "binary" | "tgz" | "tar.zst" | "zip" | |
| binary_inside_archive: str # POSIX-style path to the executable | |
| # ββ Manifest ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def detect_wheel_platform_tag() -> str: | |
| """Map the current host to a wheel-platform-tag used by ollama_manifest. | |
| Mirrors the keys we use in jre_manifest.toml so contributors don't | |
| have to remember two parallel naming conventions. | |
| """ | |
| system = _platform.system().lower() | |
| machine = _platform.machine().lower() | |
| if system == "darwin": | |
| return "macosx_11_0_arm64" if machine in ("arm64", "aarch64") else "macosx_11_0_x86_64" | |
| if system == "linux": | |
| return "manylinux_2_28_aarch64" if machine in ("arm64", "aarch64") else "manylinux_2_28_x86_64" | |
| if system == "windows": | |
| return "win_amd64" | |
| raise RuntimeError(f"Unsupported platform: system={system}, machine={machine}") | |
| def load_ollama_manifest(repo_root: Path | None = None) -> dict: | |
| """Load ollama_manifest.toml from the source tree.""" | |
| try: | |
| import tomllib | |
| except ModuleNotFoundError: | |
| import tomli as tomllib # type: ignore[no-redef] | |
| if repo_root is None: | |
| # Walk up from this module looking for the manifest. | |
| here = Path(__file__).resolve() | |
| for parent in [here.parent, *here.parents]: | |
| candidate = parent / "ollama_manifest.toml" | |
| if candidate.is_file(): | |
| repo_root = parent | |
| break | |
| else: | |
| raise FileNotFoundError("ollama_manifest.toml not found") | |
| with (repo_root / "ollama_manifest.toml").open("rb") as f: | |
| return tomllib.load(f) | |
| def resolve_platform_entry(platform_tag: str, manifest: dict | None = None) -> OllamaPlatformEntry: | |
| if manifest is None: | |
| manifest = load_ollama_manifest() | |
| platforms = manifest.get("platforms", {}) | |
| entry = platforms.get(platform_tag) | |
| if entry is None: | |
| raise KeyError( | |
| f"Platform {platform_tag} not in ollama_manifest.toml; " | |
| f"known: {sorted(platforms)}" | |
| ) | |
| return OllamaPlatformEntry( | |
| version=entry["version"], | |
| url=entry["url"], | |
| sha256=entry["sha256"], | |
| archive_format=entry["archive_format"], | |
| binary_inside_archive=entry.get("binary_inside_archive", "ollama"), | |
| ) | |
| # ββ Download ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def download_ollama( | |
| entry: OllamaPlatformEntry, | |
| dest: Path, | |
| on_progress: ProgressCallback | None = None, | |
| ) -> None: | |
| """Download the Ollama archive (or raw binary) to *dest*. | |
| Resumes on partial files via HTTP Range; verifies SHA-256 after. | |
| """ | |
| import requests # imported here so the module imports without [extract] installed | |
| dest.parent.mkdir(parents=True, exist_ok=True) | |
| existing = dest.stat().st_size if dest.exists() else 0 | |
| # HEAD first to learn total size and whether Range is supported. | |
| head = requests.head(entry.url, allow_redirects=True, timeout=30) | |
| head.raise_for_status() | |
| total = int(head.headers.get("Content-Length", "0")) | |
| accept_ranges = head.headers.get("Accept-Ranges", "").lower() == "bytes" | |
| if existing == total and total > 0: | |
| # Already downloaded; just verify. | |
| _verify_sha256(dest, entry.sha256) | |
| if on_progress is not None: | |
| on_progress(total, total) | |
| return | |
| headers = {} | |
| mode = "wb" | |
| if existing and accept_ranges: | |
| headers["Range"] = f"bytes={existing}-" | |
| mode = "ab" | |
| else: | |
| existing = 0 # restart from scratch if server doesn't support resume | |
| with requests.get(entry.url, headers=headers, stream=True, timeout=120) as resp: | |
| resp.raise_for_status() | |
| with dest.open(mode) as f: | |
| bytes_so_far = existing | |
| last_emit = 0.0 | |
| for chunk in resp.iter_content(chunk_size=_DOWNLOAD_CHUNK_BYTES): | |
| if not chunk: | |
| continue | |
| f.write(chunk) | |
| bytes_so_far += len(chunk) | |
| now = time.monotonic() | |
| if on_progress is not None and (now - last_emit) >= _PROGRESS_INTERVAL_SEC: | |
| on_progress(bytes_so_far, total) | |
| last_emit = now | |
| if on_progress is not None: | |
| on_progress(bytes_so_far, total) | |
| _verify_sha256(dest, entry.sha256) | |
| def _verify_sha256(path: Path, expected: str) -> None: | |
| h = hashlib.sha256() | |
| with path.open("rb") as f: | |
| for chunk in iter(lambda: f.read(_DOWNLOAD_CHUNK_BYTES), b""): | |
| h.update(chunk) | |
| actual = h.hexdigest() | |
| if actual != expected: | |
| raise ValueError( | |
| f"SHA-256 mismatch for {path}\n" | |
| f" expected: {expected}\n" | |
| f" actual: {actual}" | |
| ) | |
| # ββ Extraction ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def install_binary( | |
| entry: OllamaPlatformEntry, | |
| archive: Path, | |
| install_dir: Path, | |
| ) -> Path: | |
| """Extract or move the downloaded artifact and return the binary path. | |
| Handles four archive formats: ``binary`` (raw executable), ``tgz``, | |
| ``tar.zst`` (system tar's --zstd), and ``zip``. | |
| """ | |
| install_dir.mkdir(parents=True, exist_ok=True) | |
| if entry.archive_format == "binary": | |
| # Raw binary: just chmod and move into place. | |
| target = install_dir / entry.binary_inside_archive | |
| shutil.copy2(archive, target) | |
| target.chmod(0o755) | |
| return target | |
| if entry.archive_format == "tgz": | |
| with tarfile.open(archive, "r:gz") as tf: | |
| tf.extractall(install_dir) | |
| elif entry.archive_format == "tar.zst": | |
| # Python's stdlib tarfile gained zstd in 3.14; use system tar for | |
| # broader compatibility. Requires GNU tar 1.31+ or macOS tar 11+. | |
| subprocess.run( | |
| ["tar", "--zstd", "-xf", str(archive), "-C", str(install_dir)], | |
| check=True, | |
| ) | |
| elif entry.archive_format == "zip": | |
| with zipfile.ZipFile(archive) as zf: | |
| zf.extractall(install_dir) | |
| else: | |
| raise ValueError(f"Unknown archive_format: {entry.archive_format}") | |
| binary_path = install_dir / entry.binary_inside_archive | |
| if not binary_path.exists(): | |
| raise FileNotFoundError( | |
| f"Expected binary {entry.binary_inside_archive} not found " | |
| f"after extraction; install_dir contents: " | |
| f"{sorted(p.name for p in install_dir.rglob('*'))[:20]}" | |
| ) | |
| binary_path.chmod(binary_path.stat().st_mode | 0o755) | |
| return binary_path | |
| # ββ Daemon lifecycle ββββββββββββββββββββββββββββββββββββββββββββ | |
| def start_managed_daemon( | |
| binary: Path, | |
| port: int = _DEFAULT_PORT, | |
| models_dir: Path | None = None, | |
| ) -> subprocess.Popen: | |
| """Start `ollama serve` on the given port; return the Popen handle.""" | |
| env = os.environ.copy() | |
| env["OLLAMA_HOST"] = f"127.0.0.1:{port}" | |
| if models_dir is not None: | |
| env["OLLAMA_MODELS"] = str(models_dir) | |
| return subprocess.Popen( | |
| [str(binary), "serve"], | |
| env=env, | |
| stdout=subprocess.DEVNULL, | |
| stderr=subprocess.PIPE, | |
| ) | |
| def wait_for_daemon(port: int = _DEFAULT_PORT, timeout: float = _HEALTH_CHECK_TIMEOUT_SEC) -> None: | |
| """Poll /api/tags until the daemon answers or *timeout* elapses.""" | |
| import requests | |
| deadline = time.monotonic() + timeout | |
| last_err: Exception | None = None | |
| while time.monotonic() < deadline: | |
| try: | |
| r = requests.get(f"http://127.0.0.1:{port}/api/tags", timeout=2) | |
| if r.status_code == 200: | |
| return | |
| except requests.RequestException as e: | |
| last_err = e | |
| time.sleep(0.25) | |
| raise TimeoutError( | |
| f"Ollama daemon did not respond on port {port} within {timeout}s " | |
| f"(last error: {last_err})" | |
| ) | |
| def pull_model( | |
| port: int, | |
| model_tag: str, | |
| on_progress: ProgressCallback | None = None, | |
| ) -> None: | |
| """Stream-pull a model via the Ollama HTTP API. | |
| Ollama returns one JSON object per line on /api/pull; each object | |
| has 'completed' / 'total' fields during the layer download phase. | |
| """ | |
| import json | |
| import requests | |
| with requests.post( | |
| f"http://127.0.0.1:{port}/api/pull", | |
| json={"model": model_tag, "stream": True}, | |
| stream=True, | |
| timeout=None, | |
| ) as resp: | |
| resp.raise_for_status() | |
| last_emit = 0.0 | |
| for raw in resp.iter_lines(): | |
| if not raw: | |
| continue | |
| try: | |
| msg = json.loads(raw) | |
| except json.JSONDecodeError: | |
| continue | |
| if "error" in msg: | |
| raise RuntimeError(f"Ollama pull failed: {msg['error']}") | |
| completed = int(msg.get("completed", 0)) | |
| total = int(msg.get("total", 0)) | |
| now = time.monotonic() | |
| if on_progress is not None and total and (now - last_emit) >= _PROGRESS_INTERVAL_SEC: | |
| on_progress(completed, total) | |
| last_emit = now | |
| def prewarm_model(port: int, model_tag: str) -> None: | |
| """Issue one tiny generation request so the next `uofa extract` is hot.""" | |
| import requests | |
| requests.post( | |
| f"http://127.0.0.1:{port}/api/generate", | |
| json={"model": model_tag, "prompt": "ok", "options": {"num_predict": 1}, "stream": False}, | |
| timeout=120, | |
| ).raise_for_status() | |
| # ββ Top-level orchestration βββββββββββββββββββββββββββββββββββββ | |
| def install( | |
| *, | |
| prefer_byo: bool = True, | |
| model_tag: str = "qwen3.5:4b", | |
| port: int = _DEFAULT_PORT, | |
| on_status: Callable[[str], None] | None = None, | |
| on_progress: ProgressCallback | None = None, | |
| ) -> setup_state.SetupConfig: | |
| """End-to-end install. Returns the resulting SetupConfig. | |
| If ``prefer_byo`` is True (REQ-DIST-005 default) and a system Ollama | |
| is detected, register it instead of installing a managed copy. | |
| """ | |
| say = on_status or (lambda _: None) | |
| byo = setup_state.detect_byo_ollama() if prefer_byo else None | |
| if byo is not None: | |
| say(f"Detected existing Ollama at {byo}; registering it.") | |
| binary = byo | |
| models_dir: Path | None = None # let Ollama use its default model store | |
| mode = "byo" | |
| else: | |
| platform_tag = detect_wheel_platform_tag() | |
| say(f"Installing managed Ollama for {platform_tag}.") | |
| entry = resolve_platform_entry(platform_tag) | |
| archive_dir = setup_state.uofa_data_dir() / "downloads" | |
| archive_dir.mkdir(parents=True, exist_ok=True) | |
| archive_path = archive_dir / Path(entry.url).name | |
| say(f"Downloading {entry.url}") | |
| download_ollama(entry, archive_path, on_progress=on_progress) | |
| say("Extracting binary.") | |
| binary = install_binary(entry, archive_path, setup_state.runtime_dir(platform_tag)) | |
| models_dir = setup_state.models_cache_dir() | |
| models_dir.mkdir(parents=True, exist_ok=True) | |
| mode = "managed" | |
| say(f"Starting daemon on port {port}.") | |
| daemon = start_managed_daemon(binary, port=port, models_dir=models_dir) | |
| try: | |
| wait_for_daemon(port) | |
| say(f"Pulling model {model_tag} (this may take several minutes).") | |
| pull_model(port, model_tag, on_progress=on_progress) | |
| say("Pre-warming model.") | |
| prewarm_model(port, model_tag) | |
| finally: | |
| # Leave the daemon stopped after install β `uofa extract` will | |
| # start its own short-lived daemon on the same port. | |
| daemon.terminate() | |
| try: | |
| daemon.wait(timeout=5) | |
| except subprocess.TimeoutExpired: | |
| daemon.kill() | |
| cfg = setup_state.SetupConfig( | |
| mode=mode, | |
| ollama_binary=binary, | |
| ollama_port=port, | |
| ollama_models_dir=models_dir, | |
| model_tag=model_tag, | |
| installed_at=datetime.now(timezone.utc).isoformat(timespec="seconds"), | |
| uofa_version=_uofa_version(), | |
| ) | |
| setup_state.save_config(cfg) | |
| say(f"Wrote {setup_state.config_path()}") | |
| return cfg | |
| def _uofa_version() -> str: | |
| try: | |
| from importlib.metadata import version | |
| return version("uofa") | |
| except Exception: | |
| return "unknown" | |