Spaces:

seanpoyner
/

smolcode

Paused

File size: 4,926 Bytes

daea45b

"""Startup reachability check for the active backend.

The whole point of smolcode is that one OpenAI-compatible endpoint (chosen by
the preset) serves the model ladder. If that endpoint is unreachable — hal is
off the VPN, the laptop Ollama isn't running — the agent loop will hang or fail
deep inside a request with no obvious cause. Worse, a silent default to the
wrong preset (the historical "it's using my laptop, not hal" bug) looks fine
until you notice the weak single-tier model.

`preflight()` makes that visible: it prints which preset/endpoint is active and
probes `{base_url}/models` once at startup. On success it prints a one-line
banner with the model count; on failure it prints a loud warning naming the dead
URL and which *other* presets are reachable right now, so the fix is obvious.

It never raises and never blocks the app from starting — it only informs.
"""
from __future__ import annotations

import json
import sys
import urllib.error
import urllib.request

from .config import Preset, _PRESETS, load_preset

_TIMEOUT = 4.0

# ANSI: bold, green ok, red warn — degrade to plain text when not a TTY.
_BOLD, _GREEN, _RED, _DIM, _RESET = "\033[1m", "\033[32m", "\033[31m", "\033[2m", "\033[0m"


def _color(s: str, code: str) -> str:
    return f"{code}{s}{_RESET}" if sys.stderr.isatty() else s


def list_models(base_url: str, timeout: float = _TIMEOUT) -> list[str]:
    """Fetch model IDs from {base_url}/models. Returns [] on failure."""
    url = base_url.rstrip("/") + "/models"
    try:
        with urllib.request.urlopen(url, timeout=timeout) as resp:
            if resp.status != 200:
                return []
            data = json.loads(resp.read().decode("utf-8", "replace"))
        models = data.get("data") if isinstance(data, dict) else None
        if not isinstance(models, list):
            return []
        ids: list[str] = []
        for m in models:
            if isinstance(m, dict) and m.get("id"):
                ids.append(str(m["id"]))
        return sorted(ids)
    except (urllib.error.URLError, TimeoutError, OSError, ValueError, json.JSONDecodeError):
        return []


def probe(base_url: str, timeout: float = _TIMEOUT,
          api_key: str | None = None) -> tuple[bool, int | None, str | None]:
    """Return (reachable, model_count, error). Never raises.

    Sends the bearer token so endpoints that require auth (e.g. a vLLM server
    started with --api-key) report reachable instead of a spurious 401."""
    url = base_url.rstrip("/") + "/models"
    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
    try:
        req = urllib.request.Request(url, headers=headers)
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            if resp.status != 200:
                return False, None, f"HTTP {resp.status}"
            data = json.loads(resp.read().decode("utf-8", "replace"))
        models = data.get("data") if isinstance(data, dict) else None
        count = len(models) if isinstance(models, list) else None
        return True, count, None
    except urllib.error.URLError as e:
        return False, None, getattr(e, "reason", str(e)).__str__()
    except (TimeoutError, OSError, ValueError, json.JSONDecodeError) as e:
        return False, None, str(e)


def _reachable_alternatives(active_key: str) -> list[str]:
    """Which *other* known presets answer right now — points at the easy fix."""
    out = []
    for key, preset in _PRESETS.items():
        if key == active_key:
            continue
        ok, _count, _err = probe(preset.base_url, timeout=2.0, api_key=preset.api_key)
        if ok:
            out.append(f"{key} ({preset.base_url})")
    return out


def preflight(preset: Preset | None = None) -> bool:
    """Print a startup banner for the active backend. Returns True if reachable."""
    preset = preset or load_preset()
    tiers = " · ".join(f"{t.name}:{t.model}" for t in preset.tiers)
    ok, count, err = probe(preset.base_url, api_key=preset.api_key)
    if ok:
        models = f"{count} models" if count is not None else "reachable"
        banner = (f"smolcode backend: preset={preset.key} · {preset.base_url} "
                  f"· {models}\n  tiers: {tiers}")
        print(_color(banner, _BOLD + _GREEN), file=sys.stderr)
        return True

    lines = [
        _color("⚠ smolcode backend UNREACHABLE", _BOLD + _RED),
        f"  preset={preset.key} · {preset.base_url} · {err}",
        f"  tiers: {tiers}",
    ]
    alts = _reachable_alternatives(preset.key)
    if alts:
        lines.append("  reachable instead: " + ", ".join(alts))
        lines.append(_color("  → set SMALLCODE_PRESET to one of the above, "
                            "or fix the endpoint.", _DIM))
    else:
        lines.append(_color("  → no known preset endpoint is answering right now.", _DIM))
    print("\n".join(lines), file=sys.stderr)
    return False