File size: 4,926 Bytes
daea45b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""Startup reachability check for the active backend.

The whole point of smolcode is that one OpenAI-compatible endpoint (chosen by
the preset) serves the model ladder. If that endpoint is unreachable — hal is
off the VPN, the laptop Ollama isn't running — the agent loop will hang or fail
deep inside a request with no obvious cause. Worse, a silent default to the
wrong preset (the historical "it's using my laptop, not hal" bug) looks fine
until you notice the weak single-tier model.

`preflight()` makes that visible: it prints which preset/endpoint is active and
probes `{base_url}/models` once at startup. On success it prints a one-line
banner with the model count; on failure it prints a loud warning naming the dead
URL and which *other* presets are reachable right now, so the fix is obvious.

It never raises and never blocks the app from starting — it only informs.
"""
from __future__ import annotations

import json
import sys
import urllib.error
import urllib.request

from .config import Preset, _PRESETS, load_preset

_TIMEOUT = 4.0

# ANSI: bold, green ok, red warn — degrade to plain text when not a TTY.
_BOLD, _GREEN, _RED, _DIM, _RESET = "\033[1m", "\033[32m", "\033[31m", "\033[2m", "\033[0m"


def _color(s: str, code: str) -> str:
    return f"{code}{s}{_RESET}" if sys.stderr.isatty() else s


def list_models(base_url: str, timeout: float = _TIMEOUT) -> list[str]:
    """Fetch model IDs from {base_url}/models. Returns [] on failure."""
    url = base_url.rstrip("/") + "/models"
    try:
        with urllib.request.urlopen(url, timeout=timeout) as resp:
            if resp.status != 200:
                return []
            data = json.loads(resp.read().decode("utf-8", "replace"))
        models = data.get("data") if isinstance(data, dict) else None
        if not isinstance(models, list):
            return []
        ids: list[str] = []
        for m in models:
            if isinstance(m, dict) and m.get("id"):
                ids.append(str(m["id"]))
        return sorted(ids)
    except (urllib.error.URLError, TimeoutError, OSError, ValueError, json.JSONDecodeError):
        return []


def probe(base_url: str, timeout: float = _TIMEOUT,
          api_key: str | None = None) -> tuple[bool, int | None, str | None]:
    """Return (reachable, model_count, error). Never raises.

    Sends the bearer token so endpoints that require auth (e.g. a vLLM server
    started with --api-key) report reachable instead of a spurious 401."""
    url = base_url.rstrip("/") + "/models"
    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
    try:
        req = urllib.request.Request(url, headers=headers)
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            if resp.status != 200:
                return False, None, f"HTTP {resp.status}"
            data = json.loads(resp.read().decode("utf-8", "replace"))
        models = data.get("data") if isinstance(data, dict) else None
        count = len(models) if isinstance(models, list) else None
        return True, count, None
    except urllib.error.URLError as e:
        return False, None, getattr(e, "reason", str(e)).__str__()
    except (TimeoutError, OSError, ValueError, json.JSONDecodeError) as e:
        return False, None, str(e)


def _reachable_alternatives(active_key: str) -> list[str]:
    """Which *other* known presets answer right now — points at the easy fix."""
    out = []
    for key, preset in _PRESETS.items():
        if key == active_key:
            continue
        ok, _count, _err = probe(preset.base_url, timeout=2.0, api_key=preset.api_key)
        if ok:
            out.append(f"{key} ({preset.base_url})")
    return out


def preflight(preset: Preset | None = None) -> bool:
    """Print a startup banner for the active backend. Returns True if reachable."""
    preset = preset or load_preset()
    tiers = " · ".join(f"{t.name}:{t.model}" for t in preset.tiers)
    ok, count, err = probe(preset.base_url, api_key=preset.api_key)
    if ok:
        models = f"{count} models" if count is not None else "reachable"
        banner = (f"smolcode backend: preset={preset.key} · {preset.base_url} "
                  f"· {models}\n  tiers: {tiers}")
        print(_color(banner, _BOLD + _GREEN), file=sys.stderr)
        return True

    lines = [
        _color("⚠ smolcode backend UNREACHABLE", _BOLD + _RED),
        f"  preset={preset.key} · {preset.base_url} · {err}",
        f"  tiers: {tiers}",
    ]
    alts = _reachable_alternatives(preset.key)
    if alts:
        lines.append("  reachable instead: " + ", ".join(alts))
        lines.append(_color("  → set SMALLCODE_PRESET to one of the above, "
                            "or fix the endpoint.", _DIM))
    else:
        lines.append(_color("  → no known preset endpoint is answering right now.", _DIM))
    print("\n".join(lines), file=sys.stderr)
    return False