AdithyaSK's picture
AdithyaSK HF Staff
Upload folder using huggingface_hub
d4d3fde verified
"""Curated Qwen model catalog for the OpenCode OpenEnv server.
Lives in the server (not the primitive) because routing decisions —
which HF router backend to pick for a given Qwen repo, what counts as
the "default" model, whether a model supports thinking — are
deployment concerns, not harness concerns. The primitive remains
provider-agnostic; this catalog is what the Gradio UI and the MCP
tools consult to turn a UI selection into a concrete
``(base_url, api_key, model_string, disable_thinking)`` quadruple.
Backends supported:
- ``vllm`` — user-supplied OpenAI-compatible endpoint (e.g. cloudflared
tunnel to ``vllm serve``, or a colocated vLLM server).
- ``hf_router`` — Hugging Face Inference Providers router at
``https://router.huggingface.co/v1``. Auth via ``HF_TOKEN``.
Model id carries a ``:provider`` suffix to pick the HF
backend (``:together``, ``:scaleway``, ``:nscale``, ...).
Only HF providers verified to return ``logprobs`` are listed (see
``DOCS/HF/hf_inference_providers_logprobs.md``).
"""
from __future__ import annotations
from typing import Literal
from pydantic import BaseModel
BackendKind = Literal["vllm", "hf_router"]
HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1"
class CatalogModel(BaseModel):
"""One model entry in the curated Qwen catalog."""
#: Canonical HF-Hub repo id (no ``:provider`` suffix).
repo: str
#: Backend kind — drives routing + auth shape.
backend: BackendKind
#: For ``hf_router`` entries, the ``:<provider>`` suffix HF uses to
#: force a specific backend inference provider. Empty for ``vllm``.
hf_route: str = ""
#: Whether this model supports Qwen-style thinking mode.
supports_thinking: bool = False
#: Short human-readable label for UI dropdowns.
label: str = ""
@property
def dropdown_key(self) -> str:
"""Stable unique key for UI selectors."""
if self.backend == "hf_router":
return f"hf-router://{self.repo}{self.hf_route}"
return f"vllm://{self.repo}"
@property
def opencode_model_string(self) -> str:
"""Model id opencode should send to the endpoint.
For HF router we bake the ``:provider`` suffix into the model
string so the HF router picks the right backend.
"""
if self.backend == "hf_router":
return f"{self.repo}{self.hf_route}"
return self.repo
# Ordered: self-hosted vLLM first (default), then HF router options.
CATALOG: list[CatalogModel] = [
# --- Local vLLM (tunneled or colocated) ---
CatalogModel(
repo="Qwen/Qwen3.5-4B",
backend="vllm",
supports_thinking=True,
label="Qwen3.5-4B (self-hosted vLLM)",
),
# --- HF Inference Router (Together / Scaleway / Nscale) ---
CatalogModel(
repo="Qwen/Qwen3.5-397B-A17B",
backend="hf_router",
hf_route=":together",
supports_thinking=True,
label="Qwen3.5-397B-A17B — HF/Together",
),
CatalogModel(
repo="Qwen/Qwen3.5-397B-A17B",
backend="hf_router",
hf_route=":scaleway",
supports_thinking=True,
label="Qwen3.5-397B-A17B — HF/Scaleway",
),
CatalogModel(
repo="Qwen/Qwen3-Coder-480B-A35B-Instruct",
backend="hf_router",
hf_route=":together",
supports_thinking=False,
label="Qwen3-Coder-480B — HF/Together",
),
CatalogModel(
repo="Qwen/Qwen3-235B-A22B-Instruct-2507",
backend="hf_router",
hf_route=":nscale",
supports_thinking=False,
label="Qwen3-235B-A22B-2507 — HF/Nscale",
),
CatalogModel(
repo="Qwen/Qwen3-4B-Instruct-2507",
backend="hf_router",
hf_route=":nscale",
supports_thinking=False,
label="Qwen3-4B-Instruct-2507 — HF/Nscale",
),
CatalogModel(
repo="Qwen/Qwen3-Coder-30B-A3B-Instruct",
backend="hf_router",
hf_route=":scaleway",
supports_thinking=False,
label="Qwen3-Coder-30B-A3B — HF/Scaleway",
),
]
def by_key(key: str) -> CatalogModel:
"""Look up a catalog entry by ``dropdown_key``.
Falls back to synthesising an ad-hoc entry from the key's prefix so
users can enter a custom vLLM model id or a custom HF-router model
id without editing the catalog:
- ``"vllm://<repo>"`` → ad-hoc vllm entry with ``repo`` as the model id.
- ``"hf-router://<repo>[:<provider>]"`` → ad-hoc hf_router entry; the
provider suffix (if present) is preserved verbatim in ``hf_route``.
"""
for m in CATALOG:
if m.dropdown_key == key:
return m
if key.startswith("vllm://"):
repo = key[len("vllm://"):].strip()
if not repo:
raise KeyError(f"missing model id in key: {key!r}")
return CatalogModel(
repo=repo, backend="vllm", supports_thinking=False,
label=f"{repo} (custom vLLM)",
)
if key.startswith("hf-router://"):
rest = key[len("hf-router://"):].strip()
if not rest:
raise KeyError(f"missing model id in key: {key!r}")
if ":" in rest:
repo, _, suffix = rest.partition(":")
hf_route = ":" + suffix
else:
repo, hf_route = rest, ""
return CatalogModel(
repo=repo, backend="hf_router", hf_route=hf_route,
supports_thinking=False,
label=f"{repo}{hf_route} (custom HF Router)",
)
raise KeyError(f"unknown model key: {key!r}")
def default_model() -> CatalogModel:
"""First entry (self-hosted vLLM 4B)."""
return CATALOG[0]
def resolve_endpoint(
model_key: str,
*,
vllm_url: str = "",
hf_token: str = "",
) -> tuple[str, str, str, "CatalogModel"]:
"""Translate a UI selection into ``(base_url, api_key, model_string, entry)``.
Raises ``ValueError`` with a clear message when a required secret is
missing so the UI can render a precise "please fill in X" message.
"""
m = by_key(model_key)
if m.backend == "vllm":
vllm_url = (vllm_url or "").strip()
if not vllm_url:
raise ValueError(
f"model {m.dropdown_key!r} requires a vLLM base URL "
"(the tunneled or in-cluster /v1 endpoint)."
)
base = vllm_url.rstrip("/")
if not base.endswith("/v1"):
base = base + "/v1"
return base, "anything", m.opencode_model_string, m
if m.backend == "hf_router":
hf_token = (hf_token or "").strip()
if not hf_token:
raise ValueError(
f"model {m.dropdown_key!r} requires an HF token "
"(hf_... from https://huggingface.co/settings/tokens)."
)
return HF_ROUTER_BASE_URL, hf_token, m.opencode_model_string, m
raise ValueError(f"unknown backend: {m.backend}")