smolcode / engine /preflight.py
seanpoyner's picture
Upload folder using huggingface_hub
daea45b verified
Raw
History Blame Contribute Delete
4.93 kB
"""Startup reachability check for the active backend.
The whole point of smolcode is that one OpenAI-compatible endpoint (chosen by
the preset) serves the model ladder. If that endpoint is unreachable — hal is
off the VPN, the laptop Ollama isn't running — the agent loop will hang or fail
deep inside a request with no obvious cause. Worse, a silent default to the
wrong preset (the historical "it's using my laptop, not hal" bug) looks fine
until you notice the weak single-tier model.
`preflight()` makes that visible: it prints which preset/endpoint is active and
probes `{base_url}/models` once at startup. On success it prints a one-line
banner with the model count; on failure it prints a loud warning naming the dead
URL and which *other* presets are reachable right now, so the fix is obvious.
It never raises and never blocks the app from starting — it only informs.
"""
from __future__ import annotations
import json
import sys
import urllib.error
import urllib.request
from .config import Preset, _PRESETS, load_preset
_TIMEOUT = 4.0
# ANSI: bold, green ok, red warn — degrade to plain text when not a TTY.
_BOLD, _GREEN, _RED, _DIM, _RESET = "\033[1m", "\033[32m", "\033[31m", "\033[2m", "\033[0m"
def _color(s: str, code: str) -> str:
return f"{code}{s}{_RESET}" if sys.stderr.isatty() else s
def list_models(base_url: str, timeout: float = _TIMEOUT) -> list[str]:
"""Fetch model IDs from {base_url}/models. Returns [] on failure."""
url = base_url.rstrip("/") + "/models"
try:
with urllib.request.urlopen(url, timeout=timeout) as resp:
if resp.status != 200:
return []
data = json.loads(resp.read().decode("utf-8", "replace"))
models = data.get("data") if isinstance(data, dict) else None
if not isinstance(models, list):
return []
ids: list[str] = []
for m in models:
if isinstance(m, dict) and m.get("id"):
ids.append(str(m["id"]))
return sorted(ids)
except (urllib.error.URLError, TimeoutError, OSError, ValueError, json.JSONDecodeError):
return []
def probe(base_url: str, timeout: float = _TIMEOUT,
api_key: str | None = None) -> tuple[bool, int | None, str | None]:
"""Return (reachable, model_count, error). Never raises.
Sends the bearer token so endpoints that require auth (e.g. a vLLM server
started with --api-key) report reachable instead of a spurious 401."""
url = base_url.rstrip("/") + "/models"
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
try:
req = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(req, timeout=timeout) as resp:
if resp.status != 200:
return False, None, f"HTTP {resp.status}"
data = json.loads(resp.read().decode("utf-8", "replace"))
models = data.get("data") if isinstance(data, dict) else None
count = len(models) if isinstance(models, list) else None
return True, count, None
except urllib.error.URLError as e:
return False, None, getattr(e, "reason", str(e)).__str__()
except (TimeoutError, OSError, ValueError, json.JSONDecodeError) as e:
return False, None, str(e)
def _reachable_alternatives(active_key: str) -> list[str]:
"""Which *other* known presets answer right now — points at the easy fix."""
out = []
for key, preset in _PRESETS.items():
if key == active_key:
continue
ok, _count, _err = probe(preset.base_url, timeout=2.0, api_key=preset.api_key)
if ok:
out.append(f"{key} ({preset.base_url})")
return out
def preflight(preset: Preset | None = None) -> bool:
"""Print a startup banner for the active backend. Returns True if reachable."""
preset = preset or load_preset()
tiers = " · ".join(f"{t.name}:{t.model}" for t in preset.tiers)
ok, count, err = probe(preset.base_url, api_key=preset.api_key)
if ok:
models = f"{count} models" if count is not None else "reachable"
banner = (f"smolcode backend: preset={preset.key} · {preset.base_url} "
f"· {models}\n tiers: {tiers}")
print(_color(banner, _BOLD + _GREEN), file=sys.stderr)
return True
lines = [
_color("⚠ smolcode backend UNREACHABLE", _BOLD + _RED),
f" preset={preset.key} · {preset.base_url} · {err}",
f" tiers: {tiers}",
]
alts = _reachable_alternatives(preset.key)
if alts:
lines.append(" reachable instead: " + ", ".join(alts))
lines.append(_color(" → set SMALLCODE_PRESET to one of the above, "
"or fix the endpoint.", _DIM))
else:
lines.append(_color(" → no known preset endpoint is answering right now.", _DIM))
print("\n".join(lines), file=sys.stderr)
return False