File size: 6,988 Bytes
d4d3fde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
"""Curated Qwen model catalog for the OpenCode OpenEnv server.

Lives in the server (not the primitive) because routing decisions β€”
which HF router backend to pick for a given Qwen repo, what counts as
the "default" model, whether a model supports thinking β€” are
deployment concerns, not harness concerns. The primitive remains
provider-agnostic; this catalog is what the Gradio UI and the MCP
tools consult to turn a UI selection into a concrete
``(base_url, api_key, model_string, disable_thinking)`` quadruple.

Backends supported:

- ``vllm``       β€” user-supplied OpenAI-compatible endpoint (e.g. cloudflared
                   tunnel to ``vllm serve``, or a colocated vLLM server).
- ``hf_router``  β€” Hugging Face Inference Providers router at
                   ``https://router.huggingface.co/v1``. Auth via ``HF_TOKEN``.
                   Model id carries a ``:provider`` suffix to pick the HF
                   backend (``:together``, ``:scaleway``, ``:nscale``, ...).

Only HF providers verified to return ``logprobs`` are listed (see
``DOCS/HF/hf_inference_providers_logprobs.md``).
"""

from __future__ import annotations

from typing import Literal

from pydantic import BaseModel


BackendKind = Literal["vllm", "hf_router"]

HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1"


class CatalogModel(BaseModel):
    """One model entry in the curated Qwen catalog."""

    #: Canonical HF-Hub repo id (no ``:provider`` suffix).
    repo: str
    #: Backend kind β€” drives routing + auth shape.
    backend: BackendKind
    #: For ``hf_router`` entries, the ``:<provider>`` suffix HF uses to
    #: force a specific backend inference provider. Empty for ``vllm``.
    hf_route: str = ""
    #: Whether this model supports Qwen-style thinking mode.
    supports_thinking: bool = False
    #: Short human-readable label for UI dropdowns.
    label: str = ""

    @property
    def dropdown_key(self) -> str:
        """Stable unique key for UI selectors."""
        if self.backend == "hf_router":
            return f"hf-router://{self.repo}{self.hf_route}"
        return f"vllm://{self.repo}"

    @property
    def opencode_model_string(self) -> str:
        """Model id opencode should send to the endpoint.

        For HF router we bake the ``:provider`` suffix into the model
        string so the HF router picks the right backend.
        """
        if self.backend == "hf_router":
            return f"{self.repo}{self.hf_route}"
        return self.repo


# Ordered: self-hosted vLLM first (default), then HF router options.
CATALOG: list[CatalogModel] = [
    # --- Local vLLM (tunneled or colocated) ---
    CatalogModel(
        repo="Qwen/Qwen3.5-4B",
        backend="vllm",
        supports_thinking=True,
        label="Qwen3.5-4B (self-hosted vLLM)",
    ),
    # --- HF Inference Router (Together / Scaleway / Nscale) ---
    CatalogModel(
        repo="Qwen/Qwen3.5-397B-A17B",
        backend="hf_router",
        hf_route=":together",
        supports_thinking=True,
        label="Qwen3.5-397B-A17B β€” HF/Together",
    ),
    CatalogModel(
        repo="Qwen/Qwen3.5-397B-A17B",
        backend="hf_router",
        hf_route=":scaleway",
        supports_thinking=True,
        label="Qwen3.5-397B-A17B β€” HF/Scaleway",
    ),
    CatalogModel(
        repo="Qwen/Qwen3-Coder-480B-A35B-Instruct",
        backend="hf_router",
        hf_route=":together",
        supports_thinking=False,
        label="Qwen3-Coder-480B β€” HF/Together",
    ),
    CatalogModel(
        repo="Qwen/Qwen3-235B-A22B-Instruct-2507",
        backend="hf_router",
        hf_route=":nscale",
        supports_thinking=False,
        label="Qwen3-235B-A22B-2507 β€” HF/Nscale",
    ),
    CatalogModel(
        repo="Qwen/Qwen3-4B-Instruct-2507",
        backend="hf_router",
        hf_route=":nscale",
        supports_thinking=False,
        label="Qwen3-4B-Instruct-2507 β€” HF/Nscale",
    ),
    CatalogModel(
        repo="Qwen/Qwen3-Coder-30B-A3B-Instruct",
        backend="hf_router",
        hf_route=":scaleway",
        supports_thinking=False,
        label="Qwen3-Coder-30B-A3B β€” HF/Scaleway",
    ),
]


def by_key(key: str) -> CatalogModel:
    """Look up a catalog entry by ``dropdown_key``.

    Falls back to synthesising an ad-hoc entry from the key's prefix so
    users can enter a custom vLLM model id or a custom HF-router model
    id without editing the catalog:

    - ``"vllm://<repo>"`` β†’ ad-hoc vllm entry with ``repo`` as the model id.
    - ``"hf-router://<repo>[:<provider>]"`` β†’ ad-hoc hf_router entry; the
      provider suffix (if present) is preserved verbatim in ``hf_route``.
    """
    for m in CATALOG:
        if m.dropdown_key == key:
            return m
    if key.startswith("vllm://"):
        repo = key[len("vllm://"):].strip()
        if not repo:
            raise KeyError(f"missing model id in key: {key!r}")
        return CatalogModel(
            repo=repo, backend="vllm", supports_thinking=False,
            label=f"{repo} (custom vLLM)",
        )
    if key.startswith("hf-router://"):
        rest = key[len("hf-router://"):].strip()
        if not rest:
            raise KeyError(f"missing model id in key: {key!r}")
        if ":" in rest:
            repo, _, suffix = rest.partition(":")
            hf_route = ":" + suffix
        else:
            repo, hf_route = rest, ""
        return CatalogModel(
            repo=repo, backend="hf_router", hf_route=hf_route,
            supports_thinking=False,
            label=f"{repo}{hf_route} (custom HF Router)",
        )
    raise KeyError(f"unknown model key: {key!r}")


def default_model() -> CatalogModel:
    """First entry (self-hosted vLLM 4B)."""
    return CATALOG[0]


def resolve_endpoint(
    model_key: str,
    *,
    vllm_url: str = "",
    hf_token: str = "",
) -> tuple[str, str, str, "CatalogModel"]:
    """Translate a UI selection into ``(base_url, api_key, model_string, entry)``.

    Raises ``ValueError`` with a clear message when a required secret is
    missing so the UI can render a precise "please fill in X" message.
    """
    m = by_key(model_key)
    if m.backend == "vllm":
        vllm_url = (vllm_url or "").strip()
        if not vllm_url:
            raise ValueError(
                f"model {m.dropdown_key!r} requires a vLLM base URL "
                "(the tunneled or in-cluster /v1 endpoint)."
            )
        base = vllm_url.rstrip("/")
        if not base.endswith("/v1"):
            base = base + "/v1"
        return base, "anything", m.opencode_model_string, m
    if m.backend == "hf_router":
        hf_token = (hf_token or "").strip()
        if not hf_token:
            raise ValueError(
                f"model {m.dropdown_key!r} requires an HF token "
                "(hf_... from https://huggingface.co/settings/tokens)."
            )
        return HF_ROUTER_BASE_URL, hf_token, m.opencode_model_string, m
    raise ValueError(f"unknown backend: {m.backend}")