"""Startup reachability check for the active backend. The whole point of smolcode is that one OpenAI-compatible endpoint (chosen by the preset) serves the model ladder. If that endpoint is unreachable — hal is off the VPN, the laptop Ollama isn't running — the agent loop will hang or fail deep inside a request with no obvious cause. Worse, a silent default to the wrong preset (the historical "it's using my laptop, not hal" bug) looks fine until you notice the weak single-tier model. `preflight()` makes that visible: it prints which preset/endpoint is active and probes `{base_url}/models` once at startup. On success it prints a one-line banner with the model count; on failure it prints a loud warning naming the dead URL and which *other* presets are reachable right now, so the fix is obvious. It never raises and never blocks the app from starting — it only informs. """ from __future__ import annotations import json import sys import urllib.error import urllib.request from .config import Preset, _PRESETS, load_preset _TIMEOUT = 4.0 # ANSI: bold, green ok, red warn — degrade to plain text when not a TTY. _BOLD, _GREEN, _RED, _DIM, _RESET = "\033[1m", "\033[32m", "\033[31m", "\033[2m", "\033[0m" def _color(s: str, code: str) -> str: return f"{code}{s}{_RESET}" if sys.stderr.isatty() else s def list_models(base_url: str, timeout: float = _TIMEOUT) -> list[str]: """Fetch model IDs from {base_url}/models. Returns [] on failure.""" url = base_url.rstrip("/") + "/models" try: with urllib.request.urlopen(url, timeout=timeout) as resp: if resp.status != 200: return [] data = json.loads(resp.read().decode("utf-8", "replace")) models = data.get("data") if isinstance(data, dict) else None if not isinstance(models, list): return [] ids: list[str] = [] for m in models: if isinstance(m, dict) and m.get("id"): ids.append(str(m["id"])) return sorted(ids) except (urllib.error.URLError, TimeoutError, OSError, ValueError, json.JSONDecodeError): return [] def probe(base_url: str, timeout: float = _TIMEOUT, api_key: str | None = None) -> tuple[bool, int | None, str | None]: """Return (reachable, model_count, error). Never raises. Sends the bearer token so endpoints that require auth (e.g. a vLLM server started with --api-key) report reachable instead of a spurious 401.""" url = base_url.rstrip("/") + "/models" headers = {"Authorization": f"Bearer {api_key}"} if api_key else {} try: req = urllib.request.Request(url, headers=headers) with urllib.request.urlopen(req, timeout=timeout) as resp: if resp.status != 200: return False, None, f"HTTP {resp.status}" data = json.loads(resp.read().decode("utf-8", "replace")) models = data.get("data") if isinstance(data, dict) else None count = len(models) if isinstance(models, list) else None return True, count, None except urllib.error.URLError as e: return False, None, getattr(e, "reason", str(e)).__str__() except (TimeoutError, OSError, ValueError, json.JSONDecodeError) as e: return False, None, str(e) def _reachable_alternatives(active_key: str) -> list[str]: """Which *other* known presets answer right now — points at the easy fix.""" out = [] for key, preset in _PRESETS.items(): if key == active_key: continue ok, _count, _err = probe(preset.base_url, timeout=2.0, api_key=preset.api_key) if ok: out.append(f"{key} ({preset.base_url})") return out def preflight(preset: Preset | None = None) -> bool: """Print a startup banner for the active backend. Returns True if reachable.""" preset = preset or load_preset() tiers = " · ".join(f"{t.name}:{t.model}" for t in preset.tiers) ok, count, err = probe(preset.base_url, api_key=preset.api_key) if ok: models = f"{count} models" if count is not None else "reachable" banner = (f"smolcode backend: preset={preset.key} · {preset.base_url} " f"· {models}\n tiers: {tiers}") print(_color(banner, _BOLD + _GREEN), file=sys.stderr) return True lines = [ _color("⚠ smolcode backend UNREACHABLE", _BOLD + _RED), f" preset={preset.key} · {preset.base_url} · {err}", f" tiers: {tiers}", ] alts = _reachable_alternatives(preset.key) if alts: lines.append(" reachable instead: " + ", ".join(alts)) lines.append(_color(" → set SMALLCODE_PRESET to one of the above, " "or fix the endpoint.", _DIM)) else: lines.append(_color(" → no known preset endpoint is answering right now.", _DIM)) print("\n".join(lines), file=sys.stderr) return False