"""Curated Qwen model catalog for the OpenCode OpenEnv server. Lives in the server (not the primitive) because routing decisions — which HF router backend to pick for a given Qwen repo, what counts as the "default" model, whether a model supports thinking — are deployment concerns, not harness concerns. The primitive remains provider-agnostic; this catalog is what the Gradio UI and the MCP tools consult to turn a UI selection into a concrete ``(base_url, api_key, model_string, disable_thinking)`` quadruple. Backends supported: - ``vllm`` — user-supplied OpenAI-compatible endpoint (e.g. cloudflared tunnel to ``vllm serve``, or a colocated vLLM server). - ``hf_router`` — Hugging Face Inference Providers router at ``https://router.huggingface.co/v1``. Auth via ``HF_TOKEN``. Model id carries a ``:provider`` suffix to pick the HF backend (``:together``, ``:scaleway``, ``:nscale``, ...). Only HF providers verified to return ``logprobs`` are listed (see ``DOCS/HF/hf_inference_providers_logprobs.md``). """ from __future__ import annotations from typing import Literal from pydantic import BaseModel BackendKind = Literal["vllm", "hf_router"] HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1" class CatalogModel(BaseModel): """One model entry in the curated Qwen catalog.""" #: Canonical HF-Hub repo id (no ``:provider`` suffix). repo: str #: Backend kind — drives routing + auth shape. backend: BackendKind #: For ``hf_router`` entries, the ``:`` suffix HF uses to #: force a specific backend inference provider. Empty for ``vllm``. hf_route: str = "" #: Whether this model supports Qwen-style thinking mode. supports_thinking: bool = False #: Short human-readable label for UI dropdowns. label: str = "" @property def dropdown_key(self) -> str: """Stable unique key for UI selectors.""" if self.backend == "hf_router": return f"hf-router://{self.repo}{self.hf_route}" return f"vllm://{self.repo}" @property def opencode_model_string(self) -> str: """Model id opencode should send to the endpoint. For HF router we bake the ``:provider`` suffix into the model string so the HF router picks the right backend. """ if self.backend == "hf_router": return f"{self.repo}{self.hf_route}" return self.repo # Ordered: self-hosted vLLM first (default), then HF router options. CATALOG: list[CatalogModel] = [ # --- Local vLLM (tunneled or colocated) --- CatalogModel( repo="Qwen/Qwen3.5-4B", backend="vllm", supports_thinking=True, label="Qwen3.5-4B (self-hosted vLLM)", ), # --- HF Inference Router (Together / Scaleway / Nscale) --- CatalogModel( repo="Qwen/Qwen3.5-397B-A17B", backend="hf_router", hf_route=":together", supports_thinking=True, label="Qwen3.5-397B-A17B — HF/Together", ), CatalogModel( repo="Qwen/Qwen3.5-397B-A17B", backend="hf_router", hf_route=":scaleway", supports_thinking=True, label="Qwen3.5-397B-A17B — HF/Scaleway", ), CatalogModel( repo="Qwen/Qwen3-Coder-480B-A35B-Instruct", backend="hf_router", hf_route=":together", supports_thinking=False, label="Qwen3-Coder-480B — HF/Together", ), CatalogModel( repo="Qwen/Qwen3-235B-A22B-Instruct-2507", backend="hf_router", hf_route=":nscale", supports_thinking=False, label="Qwen3-235B-A22B-2507 — HF/Nscale", ), CatalogModel( repo="Qwen/Qwen3-4B-Instruct-2507", backend="hf_router", hf_route=":nscale", supports_thinking=False, label="Qwen3-4B-Instruct-2507 — HF/Nscale", ), CatalogModel( repo="Qwen/Qwen3-Coder-30B-A3B-Instruct", backend="hf_router", hf_route=":scaleway", supports_thinking=False, label="Qwen3-Coder-30B-A3B — HF/Scaleway", ), ] def by_key(key: str) -> CatalogModel: """Look up a catalog entry by ``dropdown_key``. Falls back to synthesising an ad-hoc entry from the key's prefix so users can enter a custom vLLM model id or a custom HF-router model id without editing the catalog: - ``"vllm://"`` → ad-hoc vllm entry with ``repo`` as the model id. - ``"hf-router://[:]"`` → ad-hoc hf_router entry; the provider suffix (if present) is preserved verbatim in ``hf_route``. """ for m in CATALOG: if m.dropdown_key == key: return m if key.startswith("vllm://"): repo = key[len("vllm://"):].strip() if not repo: raise KeyError(f"missing model id in key: {key!r}") return CatalogModel( repo=repo, backend="vllm", supports_thinking=False, label=f"{repo} (custom vLLM)", ) if key.startswith("hf-router://"): rest = key[len("hf-router://"):].strip() if not rest: raise KeyError(f"missing model id in key: {key!r}") if ":" in rest: repo, _, suffix = rest.partition(":") hf_route = ":" + suffix else: repo, hf_route = rest, "" return CatalogModel( repo=repo, backend="hf_router", hf_route=hf_route, supports_thinking=False, label=f"{repo}{hf_route} (custom HF Router)", ) raise KeyError(f"unknown model key: {key!r}") def default_model() -> CatalogModel: """First entry (self-hosted vLLM 4B).""" return CATALOG[0] def resolve_endpoint( model_key: str, *, vllm_url: str = "", hf_token: str = "", ) -> tuple[str, str, str, "CatalogModel"]: """Translate a UI selection into ``(base_url, api_key, model_string, entry)``. Raises ``ValueError`` with a clear message when a required secret is missing so the UI can render a precise "please fill in X" message. """ m = by_key(model_key) if m.backend == "vllm": vllm_url = (vllm_url or "").strip() if not vllm_url: raise ValueError( f"model {m.dropdown_key!r} requires a vLLM base URL " "(the tunneled or in-cluster /v1 endpoint)." ) base = vllm_url.rstrip("/") if not base.endswith("/v1"): base = base + "/v1" return base, "anything", m.opencode_model_string, m if m.backend == "hf_router": hf_token = (hf_token or "").strip() if not hf_token: raise ValueError( f"model {m.dropdown_key!r} requires an HF token " "(hf_... from https://huggingface.co/settings/tokens)." ) return HF_ROUTER_BASE_URL, hf_token, m.opencode_model_string, m raise ValueError(f"unknown backend: {m.backend}")