Spaces:
Sleeping
Sleeping
File size: 6,988 Bytes
d4d3fde | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | """Curated Qwen model catalog for the OpenCode OpenEnv server.
Lives in the server (not the primitive) because routing decisions β
which HF router backend to pick for a given Qwen repo, what counts as
the "default" model, whether a model supports thinking β are
deployment concerns, not harness concerns. The primitive remains
provider-agnostic; this catalog is what the Gradio UI and the MCP
tools consult to turn a UI selection into a concrete
``(base_url, api_key, model_string, disable_thinking)`` quadruple.
Backends supported:
- ``vllm`` β user-supplied OpenAI-compatible endpoint (e.g. cloudflared
tunnel to ``vllm serve``, or a colocated vLLM server).
- ``hf_router`` β Hugging Face Inference Providers router at
``https://router.huggingface.co/v1``. Auth via ``HF_TOKEN``.
Model id carries a ``:provider`` suffix to pick the HF
backend (``:together``, ``:scaleway``, ``:nscale``, ...).
Only HF providers verified to return ``logprobs`` are listed (see
``DOCS/HF/hf_inference_providers_logprobs.md``).
"""
from __future__ import annotations
from typing import Literal
from pydantic import BaseModel
BackendKind = Literal["vllm", "hf_router"]
HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1"
class CatalogModel(BaseModel):
"""One model entry in the curated Qwen catalog."""
#: Canonical HF-Hub repo id (no ``:provider`` suffix).
repo: str
#: Backend kind β drives routing + auth shape.
backend: BackendKind
#: For ``hf_router`` entries, the ``:<provider>`` suffix HF uses to
#: force a specific backend inference provider. Empty for ``vllm``.
hf_route: str = ""
#: Whether this model supports Qwen-style thinking mode.
supports_thinking: bool = False
#: Short human-readable label for UI dropdowns.
label: str = ""
@property
def dropdown_key(self) -> str:
"""Stable unique key for UI selectors."""
if self.backend == "hf_router":
return f"hf-router://{self.repo}{self.hf_route}"
return f"vllm://{self.repo}"
@property
def opencode_model_string(self) -> str:
"""Model id opencode should send to the endpoint.
For HF router we bake the ``:provider`` suffix into the model
string so the HF router picks the right backend.
"""
if self.backend == "hf_router":
return f"{self.repo}{self.hf_route}"
return self.repo
# Ordered: self-hosted vLLM first (default), then HF router options.
CATALOG: list[CatalogModel] = [
# --- Local vLLM (tunneled or colocated) ---
CatalogModel(
repo="Qwen/Qwen3.5-4B",
backend="vllm",
supports_thinking=True,
label="Qwen3.5-4B (self-hosted vLLM)",
),
# --- HF Inference Router (Together / Scaleway / Nscale) ---
CatalogModel(
repo="Qwen/Qwen3.5-397B-A17B",
backend="hf_router",
hf_route=":together",
supports_thinking=True,
label="Qwen3.5-397B-A17B β HF/Together",
),
CatalogModel(
repo="Qwen/Qwen3.5-397B-A17B",
backend="hf_router",
hf_route=":scaleway",
supports_thinking=True,
label="Qwen3.5-397B-A17B β HF/Scaleway",
),
CatalogModel(
repo="Qwen/Qwen3-Coder-480B-A35B-Instruct",
backend="hf_router",
hf_route=":together",
supports_thinking=False,
label="Qwen3-Coder-480B β HF/Together",
),
CatalogModel(
repo="Qwen/Qwen3-235B-A22B-Instruct-2507",
backend="hf_router",
hf_route=":nscale",
supports_thinking=False,
label="Qwen3-235B-A22B-2507 β HF/Nscale",
),
CatalogModel(
repo="Qwen/Qwen3-4B-Instruct-2507",
backend="hf_router",
hf_route=":nscale",
supports_thinking=False,
label="Qwen3-4B-Instruct-2507 β HF/Nscale",
),
CatalogModel(
repo="Qwen/Qwen3-Coder-30B-A3B-Instruct",
backend="hf_router",
hf_route=":scaleway",
supports_thinking=False,
label="Qwen3-Coder-30B-A3B β HF/Scaleway",
),
]
def by_key(key: str) -> CatalogModel:
"""Look up a catalog entry by ``dropdown_key``.
Falls back to synthesising an ad-hoc entry from the key's prefix so
users can enter a custom vLLM model id or a custom HF-router model
id without editing the catalog:
- ``"vllm://<repo>"`` β ad-hoc vllm entry with ``repo`` as the model id.
- ``"hf-router://<repo>[:<provider>]"`` β ad-hoc hf_router entry; the
provider suffix (if present) is preserved verbatim in ``hf_route``.
"""
for m in CATALOG:
if m.dropdown_key == key:
return m
if key.startswith("vllm://"):
repo = key[len("vllm://"):].strip()
if not repo:
raise KeyError(f"missing model id in key: {key!r}")
return CatalogModel(
repo=repo, backend="vllm", supports_thinking=False,
label=f"{repo} (custom vLLM)",
)
if key.startswith("hf-router://"):
rest = key[len("hf-router://"):].strip()
if not rest:
raise KeyError(f"missing model id in key: {key!r}")
if ":" in rest:
repo, _, suffix = rest.partition(":")
hf_route = ":" + suffix
else:
repo, hf_route = rest, ""
return CatalogModel(
repo=repo, backend="hf_router", hf_route=hf_route,
supports_thinking=False,
label=f"{repo}{hf_route} (custom HF Router)",
)
raise KeyError(f"unknown model key: {key!r}")
def default_model() -> CatalogModel:
"""First entry (self-hosted vLLM 4B)."""
return CATALOG[0]
def resolve_endpoint(
model_key: str,
*,
vllm_url: str = "",
hf_token: str = "",
) -> tuple[str, str, str, "CatalogModel"]:
"""Translate a UI selection into ``(base_url, api_key, model_string, entry)``.
Raises ``ValueError`` with a clear message when a required secret is
missing so the UI can render a precise "please fill in X" message.
"""
m = by_key(model_key)
if m.backend == "vllm":
vllm_url = (vllm_url or "").strip()
if not vllm_url:
raise ValueError(
f"model {m.dropdown_key!r} requires a vLLM base URL "
"(the tunneled or in-cluster /v1 endpoint)."
)
base = vllm_url.rstrip("/")
if not base.endswith("/v1"):
base = base + "/v1"
return base, "anything", m.opencode_model_string, m
if m.backend == "hf_router":
hf_token = (hf_token or "").strip()
if not hf_token:
raise ValueError(
f"model {m.dropdown_key!r} requires an HF token "
"(hf_... from https://huggingface.co/settings/tokens)."
)
return HF_ROUTER_BASE_URL, hf_token, m.opencode_model_string, m
raise ValueError(f"unknown backend: {m.backend}")
|