Spaces:
Paused
Paused
| """ | |
| Canonical model catalogs and lightweight validation helpers. | |
| Add, remove, or reorder entries here — both `hermes setup` and | |
| `hermes` provider-selection will pick up the change automatically. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import urllib.request | |
| import urllib.error | |
| from difflib import get_close_matches | |
| from typing import Any, Optional | |
| COPILOT_BASE_URL = "https://api.githubcopilot.com" | |
| COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models" | |
| COPILOT_EDITOR_VERSION = "vscode/1.104.1" | |
| COPILOT_REASONING_EFFORTS_GPT5 = ["minimal", "low", "medium", "high"] | |
| COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"] | |
| # Fallback OpenRouter snapshot used when the live catalog is unavailable. | |
| # (model_id, display description shown in menus) | |
| OPENROUTER_MODELS: list[tuple[str, str]] = [ | |
| ("anthropic/claude-opus-4.6", "recommended"), | |
| ("anthropic/claude-sonnet-4.6", ""), | |
| ("qwen/qwen3.6-plus", ""), | |
| ("anthropic/claude-sonnet-4.5", ""), | |
| ("anthropic/claude-haiku-4.5", ""), | |
| ("openai/gpt-5.4", ""), | |
| ("openai/gpt-5.4-mini", ""), | |
| ("xiaomi/mimo-v2-pro", ""), | |
| ("openai/gpt-5.3-codex", ""), | |
| ("google/gemini-3-pro-image-preview", ""), | |
| ("google/gemini-3-flash-preview", ""), | |
| ("google/gemini-3.1-pro-preview", ""), | |
| ("google/gemini-3.1-flash-lite-preview", ""), | |
| ("qwen/qwen3.5-plus-02-15", ""), | |
| ("qwen/qwen3.5-35b-a3b", ""), | |
| ("stepfun/step-3.5-flash", ""), | |
| ("minimax/minimax-m2.7", ""), | |
| ("minimax/minimax-m2.5", ""), | |
| ("z-ai/glm-5.1", ""), | |
| ("z-ai/glm-5-turbo", ""), | |
| ("moonshotai/kimi-k2.5", ""), | |
| ("x-ai/grok-4.20", ""), | |
| ("nvidia/nemotron-3-super-120b-a12b", ""), | |
| ("nvidia/nemotron-3-super-120b-a12b:free", "free"), | |
| ("arcee-ai/trinity-large-preview:free", "free"), | |
| ("arcee-ai/trinity-large-thinking", ""), | |
| ("openai/gpt-5.4-pro", ""), | |
| ("openai/gpt-5.4-nano", ""), | |
| ] | |
| _openrouter_catalog_cache: list[tuple[str, str]] | None = None | |
| def _codex_curated_models() -> list[str]: | |
| """Derive the openai-codex curated list from codex_models.py. | |
| Single source of truth: DEFAULT_CODEX_MODELS + forward-compat synthesis. | |
| This keeps the gateway /model picker in sync with the CLI `hermes model` | |
| flow without maintaining a separate static list. | |
| """ | |
| from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, _add_forward_compat_models | |
| return _add_forward_compat_models(list(DEFAULT_CODEX_MODELS)) | |
| _PROVIDER_MODELS: dict[str, list[str]] = { | |
| "nous": [ | |
| "xiaomi/mimo-v2-pro", | |
| "anthropic/claude-opus-4.6", | |
| "anthropic/claude-sonnet-4.6", | |
| "anthropic/claude-sonnet-4.5", | |
| "anthropic/claude-haiku-4.5", | |
| "openai/gpt-5.4", | |
| "openai/gpt-5.4-mini", | |
| "openai/gpt-5.3-codex", | |
| "google/gemini-3-pro-preview", | |
| "google/gemini-3-flash-preview", | |
| "google/gemini-3.1-pro-preview", | |
| "google/gemini-3.1-flash-lite-preview", | |
| "qwen/qwen3.5-plus-02-15", | |
| "qwen/qwen3.5-35b-a3b", | |
| "stepfun/step-3.5-flash", | |
| "minimax/minimax-m2.7", | |
| "minimax/minimax-m2.5", | |
| "z-ai/glm-5.1", | |
| "z-ai/glm-5-turbo", | |
| "moonshotai/kimi-k2.5", | |
| "x-ai/grok-4.20-beta", | |
| "nvidia/nemotron-3-super-120b-a12b", | |
| "nvidia/nemotron-3-super-120b-a12b:free", | |
| "arcee-ai/trinity-large-preview:free", | |
| "arcee-ai/trinity-large-thinking", | |
| "openai/gpt-5.4-pro", | |
| "openai/gpt-5.4-nano", | |
| ], | |
| "openai-codex": _codex_curated_models(), | |
| "copilot-acp": [ | |
| "copilot-acp", | |
| ], | |
| "copilot": [ | |
| "gpt-5.4", | |
| "gpt-5.4-mini", | |
| "gpt-5-mini", | |
| "gpt-5.3-codex", | |
| "gpt-5.2-codex", | |
| "gpt-4.1", | |
| "gpt-4o", | |
| "gpt-4o-mini", | |
| "claude-opus-4.6", | |
| "claude-sonnet-4.6", | |
| "claude-sonnet-4.5", | |
| "claude-haiku-4.5", | |
| "gemini-2.5-pro", | |
| "grok-code-fast-1", | |
| ], | |
| "gemini": [ | |
| "gemini-3.1-pro-preview", | |
| "gemini-3-flash-preview", | |
| "gemini-3.1-flash-lite-preview", | |
| "gemini-2.5-pro", | |
| "gemini-2.5-flash", | |
| "gemini-2.5-flash-lite", | |
| # Gemma open models (also served via AI Studio) | |
| "gemma-4-31b-it", | |
| "gemma-4-26b-it", | |
| ], | |
| "zai": [ | |
| "glm-5.1", | |
| "glm-5", | |
| "glm-5-turbo", | |
| "glm-4.7", | |
| "glm-4.5", | |
| "glm-4.5-flash", | |
| ], | |
| "xai": [ | |
| "grok-4.20-0309-reasoning", | |
| "grok-4.20-0309-non-reasoning", | |
| "grok-4.20-multi-agent-0309", | |
| "grok-4-1-fast-reasoning", | |
| "grok-4-1-fast-non-reasoning", | |
| "grok-4-fast-reasoning", | |
| "grok-4-fast-non-reasoning", | |
| "grok-4-0709", | |
| "grok-code-fast-1", | |
| "grok-3", | |
| "grok-3-mini", | |
| ], | |
| "kimi-coding": [ | |
| "kimi-for-coding", | |
| "kimi-k2.5", | |
| "kimi-k2-thinking", | |
| "kimi-k2-thinking-turbo", | |
| "kimi-k2-turbo-preview", | |
| "kimi-k2-0905-preview", | |
| ], | |
| "kimi-coding-cn": [ | |
| "kimi-k2.5", | |
| "kimi-k2-thinking", | |
| "kimi-k2-turbo-preview", | |
| "kimi-k2-0905-preview", | |
| ], | |
| "moonshot": [ | |
| "kimi-k2.5", | |
| "kimi-k2-thinking", | |
| "kimi-k2-turbo-preview", | |
| "kimi-k2-0905-preview", | |
| ], | |
| "minimax": [ | |
| "MiniMax-M2.7", | |
| "MiniMax-M2.5", | |
| "MiniMax-M2.1", | |
| "MiniMax-M2", | |
| ], | |
| "minimax-cn": [ | |
| "MiniMax-M2.7", | |
| "MiniMax-M2.5", | |
| "MiniMax-M2.1", | |
| "MiniMax-M2", | |
| ], | |
| "anthropic": [ | |
| "claude-opus-4-6", | |
| "claude-sonnet-4-6", | |
| "claude-opus-4-5-20251101", | |
| "claude-sonnet-4-5-20250929", | |
| "claude-opus-4-20250514", | |
| "claude-sonnet-4-20250514", | |
| "claude-haiku-4-5-20251001", | |
| ], | |
| "deepseek": [ | |
| "deepseek-chat", | |
| "deepseek-reasoner", | |
| ], | |
| "xiaomi": [ | |
| "mimo-v2-pro", | |
| "mimo-v2-omni", | |
| "mimo-v2-flash", | |
| ], | |
| "opencode-zen": [ | |
| "gpt-5.4-pro", | |
| "gpt-5.4", | |
| "gpt-5.3-codex", | |
| "gpt-5.3-codex-spark", | |
| "gpt-5.2", | |
| "gpt-5.2-codex", | |
| "gpt-5.1", | |
| "gpt-5.1-codex", | |
| "gpt-5.1-codex-max", | |
| "gpt-5.1-codex-mini", | |
| "gpt-5", | |
| "gpt-5-codex", | |
| "gpt-5-nano", | |
| "claude-opus-4-6", | |
| "claude-opus-4-5", | |
| "claude-opus-4-1", | |
| "claude-sonnet-4-6", | |
| "claude-sonnet-4-5", | |
| "claude-sonnet-4", | |
| "claude-haiku-4-5", | |
| "claude-3-5-haiku", | |
| "gemini-3.1-pro", | |
| "gemini-3-pro", | |
| "gemini-3-flash", | |
| "minimax-m2.7", | |
| "minimax-m2.5", | |
| "minimax-m2.5-free", | |
| "minimax-m2.1", | |
| "glm-5", | |
| "glm-4.7", | |
| "glm-4.6", | |
| "kimi-k2.5", | |
| "kimi-k2-thinking", | |
| "kimi-k2", | |
| "qwen3-coder", | |
| "big-pickle", | |
| ], | |
| "opencode-go": [ | |
| "glm-5", | |
| "kimi-k2.5", | |
| "mimo-v2-pro", | |
| "mimo-v2-omni", | |
| "minimax-m2.7", | |
| "minimax-m2.5", | |
| ], | |
| "ai-gateway": [ | |
| "anthropic/claude-opus-4.6", | |
| "anthropic/claude-sonnet-4.6", | |
| "anthropic/claude-sonnet-4.5", | |
| "anthropic/claude-haiku-4.5", | |
| "openai/gpt-5", | |
| "openai/gpt-4.1", | |
| "openai/gpt-4.1-mini", | |
| "google/gemini-3-pro-preview", | |
| "google/gemini-3-flash", | |
| "google/gemini-2.5-pro", | |
| "google/gemini-2.5-flash", | |
| "deepseek/deepseek-v3.2", | |
| ], | |
| "kilocode": [ | |
| "anthropic/claude-opus-4.6", | |
| "anthropic/claude-sonnet-4.6", | |
| "openai/gpt-5.4", | |
| "google/gemini-3-pro-preview", | |
| "google/gemini-3-flash-preview", | |
| ], | |
| # Alibaba DashScope Coding platform (coding-intl) — default endpoint. | |
| # Supports Qwen models + third-party providers (GLM, Kimi, MiniMax). | |
| # Users with classic DashScope keys should override DASHSCOPE_BASE_URL | |
| # to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat) | |
| # or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat). | |
| "alibaba": [ | |
| "qwen3.5-plus", | |
| "qwen3-coder-plus", | |
| "qwen3-coder-next", | |
| # Third-party models available on coding-intl | |
| "glm-5", | |
| "glm-4.7", | |
| "kimi-k2.5", | |
| "MiniMax-M2.5", | |
| ], | |
| # Curated HF model list — only agentic models that map to OpenRouter defaults. | |
| "huggingface": [ | |
| "Qwen/Qwen3.5-397B-A17B", | |
| "Qwen/Qwen3.5-35B-A3B", | |
| "deepseek-ai/DeepSeek-V3.2", | |
| "moonshotai/Kimi-K2.5", | |
| "MiniMaxAI/MiniMax-M2.5", | |
| "zai-org/GLM-5", | |
| "XiaomiMiMo/MiMo-V2-Flash", | |
| "moonshotai/Kimi-K2-Thinking", | |
| ], | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Nous Portal free-model filtering | |
| # --------------------------------------------------------------------------- | |
| # Models that are ALLOWED to appear when priced as free on Nous Portal. | |
| # Any other free model is hidden — prevents promotional/temporary free models | |
| # from cluttering the selection when users are paying subscribers. | |
| # Models in this list are ALSO filtered out if they are NOT free (i.e. they | |
| # should only appear in the menu when they are genuinely free). | |
| _NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({ | |
| "xiaomi/mimo-v2-pro", | |
| "xiaomi/mimo-v2-omni", | |
| }) | |
| def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool: | |
| """Return True if *model_id* has zero-cost prompt AND completion pricing.""" | |
| p = pricing.get(model_id) | |
| if not p: | |
| return False | |
| try: | |
| return float(p.get("prompt", "1")) == 0 and float(p.get("completion", "1")) == 0 | |
| except (TypeError, ValueError): | |
| return False | |
| def filter_nous_free_models( | |
| model_ids: list[str], | |
| pricing: dict[str, dict[str, str]], | |
| ) -> list[str]: | |
| """Filter the Nous Portal model list according to free-model policy. | |
| Rules: | |
| • Paid models that are NOT in the allowlist → keep (normal case). | |
| • Free models that are NOT in the allowlist → drop. | |
| • Allowlist models that ARE free → keep. | |
| • Allowlist models that are NOT free → drop. | |
| """ | |
| if not pricing: | |
| return model_ids # no pricing data — can't filter, show everything | |
| result: list[str] = [] | |
| for mid in model_ids: | |
| free = _is_model_free(mid, pricing) | |
| if mid in _NOUS_ALLOWED_FREE_MODELS: | |
| # Allowlist model: only show when it's actually free | |
| if free: | |
| result.append(mid) | |
| else: | |
| # Regular model: keep only when it's NOT free | |
| if not free: | |
| result.append(mid) | |
| return result | |
| # --------------------------------------------------------------------------- | |
| # Nous Portal account tier detection | |
| # --------------------------------------------------------------------------- | |
| def fetch_nous_account_tier(access_token: str, portal_base_url: str = "") -> dict[str, Any]: | |
| """Fetch the user's Nous Portal account/subscription info. | |
| Calls ``<portal>/api/oauth/account`` with the OAuth access token. | |
| Returns the parsed JSON dict on success, e.g.:: | |
| { | |
| "subscription": { | |
| "plan": "Plus", | |
| "tier": 2, | |
| "monthly_charge": 20, | |
| "credits_remaining": 1686.60, | |
| ... | |
| }, | |
| ... | |
| } | |
| Returns an empty dict on any failure (network, auth, parse). | |
| """ | |
| base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/") | |
| url = f"{base}/api/oauth/account" | |
| headers = { | |
| "Authorization": f"Bearer {access_token}", | |
| "Accept": "application/json", | |
| } | |
| try: | |
| req = urllib.request.Request(url, headers=headers) | |
| with urllib.request.urlopen(req, timeout=8) as resp: | |
| return json.loads(resp.read().decode()) | |
| except Exception: | |
| return {} | |
| def is_nous_free_tier(account_info: dict[str, Any]) -> bool: | |
| """Return True if the account info indicates a free (unpaid) tier. | |
| Checks ``subscription.monthly_charge == 0``. Returns False when | |
| the field is missing or unparseable (assumes paid — don't block users). | |
| """ | |
| sub = account_info.get("subscription") | |
| if not isinstance(sub, dict): | |
| return False | |
| charge = sub.get("monthly_charge") | |
| if charge is None: | |
| return False | |
| try: | |
| return float(charge) == 0 | |
| except (TypeError, ValueError): | |
| return False | |
| def partition_nous_models_by_tier( | |
| model_ids: list[str], | |
| pricing: dict[str, dict[str, str]], | |
| free_tier: bool, | |
| ) -> tuple[list[str], list[str]]: | |
| """Split Nous models into (selectable, unavailable) based on user tier. | |
| For paid-tier users: all models are selectable, none unavailable | |
| (free-model filtering is handled separately by ``filter_nous_free_models``). | |
| For free-tier users: only free models are selectable; paid models | |
| are returned as unavailable (shown grayed out in the menu). | |
| """ | |
| if not free_tier: | |
| return (model_ids, []) | |
| if not pricing: | |
| return (model_ids, []) # can't determine, show everything | |
| selectable: list[str] = [] | |
| unavailable: list[str] = [] | |
| for mid in model_ids: | |
| if _is_model_free(mid, pricing): | |
| selectable.append(mid) | |
| else: | |
| unavailable.append(mid) | |
| return (selectable, unavailable) | |
| # --------------------------------------------------------------------------- | |
| # TTL cache for free-tier detection — avoids repeated API calls within a | |
| # session while still picking up upgrades quickly. | |
| # --------------------------------------------------------------------------- | |
| _FREE_TIER_CACHE_TTL: int = 180 # seconds (3 minutes) | |
| _free_tier_cache: tuple[bool, float] | None = None # (result, timestamp) | |
| def check_nous_free_tier() -> bool: | |
| """Check if the current Nous Portal user is on a free (unpaid) tier. | |
| Results are cached for ``_FREE_TIER_CACHE_TTL`` seconds to avoid | |
| hitting the Portal API on every call. The cache is short-lived so | |
| that an account upgrade is reflected within a few minutes. | |
| Returns False (assume paid) on any error — never blocks paying users. | |
| """ | |
| global _free_tier_cache | |
| import time | |
| now = time.monotonic() | |
| if _free_tier_cache is not None: | |
| cached_result, cached_at = _free_tier_cache | |
| if now - cached_at < _FREE_TIER_CACHE_TTL: | |
| return cached_result | |
| try: | |
| from hermes_cli.auth import get_provider_auth_state, resolve_nous_runtime_credentials | |
| # Ensure we have a fresh token (triggers refresh if needed) | |
| resolve_nous_runtime_credentials(min_key_ttl_seconds=60) | |
| state = get_provider_auth_state("nous") | |
| if not state: | |
| _free_tier_cache = (False, now) | |
| return False | |
| access_token = state.get("access_token", "") | |
| portal_url = state.get("portal_base_url", "") | |
| if not access_token: | |
| _free_tier_cache = (False, now) | |
| return False | |
| account_info = fetch_nous_account_tier(access_token, portal_url) | |
| result = is_nous_free_tier(account_info) | |
| _free_tier_cache = (result, now) | |
| return result | |
| except Exception: | |
| _free_tier_cache = (False, now) | |
| return False # default to paid on error — don't block users | |
| _PROVIDER_LABELS = { | |
| "openrouter": "OpenRouter", | |
| "openai-codex": "OpenAI Codex", | |
| "copilot-acp": "GitHub Copilot ACP", | |
| "nous": "Nous Portal", | |
| "copilot": "GitHub Copilot", | |
| "gemini": "Google AI Studio", | |
| "zai": "Z.AI / GLM", | |
| "kimi-coding": "Kimi / Moonshot", | |
| "kimi-coding-cn": "Kimi / Moonshot (China)", | |
| "minimax": "MiniMax", | |
| "minimax-cn": "MiniMax (China)", | |
| "anthropic": "Anthropic", | |
| "deepseek": "DeepSeek", | |
| "opencode-zen": "OpenCode Zen", | |
| "opencode-go": "OpenCode Go", | |
| "ai-gateway": "AI Gateway", | |
| "kilocode": "Kilo Code", | |
| "alibaba": "Alibaba Cloud (DashScope)", | |
| "qwen-oauth": "Qwen OAuth (Portal)", | |
| "huggingface": "Hugging Face", | |
| "xiaomi": "Xiaomi MiMo", | |
| "custom": "Custom endpoint", | |
| } | |
| _PROVIDER_ALIASES = { | |
| "glm": "zai", | |
| "z-ai": "zai", | |
| "z.ai": "zai", | |
| "zhipu": "zai", | |
| "github": "copilot", | |
| "github-copilot": "copilot", | |
| "github-models": "copilot", | |
| "github-model": "copilot", | |
| "github-copilot-acp": "copilot-acp", | |
| "copilot-acp-agent": "copilot-acp", | |
| "google": "gemini", | |
| "google-gemini": "gemini", | |
| "google-ai-studio": "gemini", | |
| "kimi": "kimi-coding", | |
| "moonshot": "kimi-coding", | |
| "kimi-cn": "kimi-coding-cn", | |
| "moonshot-cn": "kimi-coding-cn", | |
| "minimax-china": "minimax-cn", | |
| "minimax_cn": "minimax-cn", | |
| "claude": "anthropic", | |
| "claude-code": "anthropic", | |
| "deep-seek": "deepseek", | |
| "opencode": "opencode-zen", | |
| "zen": "opencode-zen", | |
| "go": "opencode-go", | |
| "opencode-go-sub": "opencode-go", | |
| "aigateway": "ai-gateway", | |
| "vercel": "ai-gateway", | |
| "vercel-ai-gateway": "ai-gateway", | |
| "kilo": "kilocode", | |
| "kilo-code": "kilocode", | |
| "kilo-gateway": "kilocode", | |
| "dashscope": "alibaba", | |
| "aliyun": "alibaba", | |
| "qwen": "alibaba", | |
| "alibaba-cloud": "alibaba", | |
| "qwen-portal": "qwen-oauth", | |
| "hf": "huggingface", | |
| "hugging-face": "huggingface", | |
| "huggingface-hub": "huggingface", | |
| "mimo": "xiaomi", | |
| "xiaomi-mimo": "xiaomi", | |
| } | |
| def get_default_model_for_provider(provider: str) -> str: | |
| """Return the default model for a provider, or empty string if unknown. | |
| Uses the first entry in _PROVIDER_MODELS as the default. This is the | |
| model a user would be offered first in the ``hermes model`` picker. | |
| Used as a fallback when the user has configured a provider but never | |
| selected a model (e.g. ``hermes auth add openai-codex`` without | |
| ``hermes model``). | |
| """ | |
| models = _PROVIDER_MODELS.get(provider, []) | |
| return models[0] if models else "" | |
| def _openrouter_model_is_free(pricing: Any) -> bool: | |
| """Return True when both prompt and completion pricing are zero.""" | |
| if not isinstance(pricing, dict): | |
| return False | |
| try: | |
| return float(pricing.get("prompt", "0")) == 0 and float(pricing.get("completion", "0")) == 0 | |
| except (TypeError, ValueError): | |
| return False | |
| def fetch_openrouter_models( | |
| timeout: float = 8.0, | |
| *, | |
| force_refresh: bool = False, | |
| ) -> list[tuple[str, str]]: | |
| """Return the curated OpenRouter picker list, refreshed from the live catalog when possible.""" | |
| global _openrouter_catalog_cache | |
| if _openrouter_catalog_cache is not None and not force_refresh: | |
| return list(_openrouter_catalog_cache) | |
| fallback = list(OPENROUTER_MODELS) | |
| preferred_ids = [mid for mid, _ in fallback] | |
| try: | |
| req = urllib.request.Request( | |
| "https://openrouter.ai/api/v1/models", | |
| headers={"Accept": "application/json"}, | |
| ) | |
| with urllib.request.urlopen(req, timeout=timeout) as resp: | |
| payload = json.loads(resp.read().decode()) | |
| except Exception: | |
| return list(_openrouter_catalog_cache or fallback) | |
| live_items = payload.get("data", []) | |
| if not isinstance(live_items, list): | |
| return list(_openrouter_catalog_cache or fallback) | |
| live_by_id: dict[str, dict[str, Any]] = {} | |
| for item in live_items: | |
| if not isinstance(item, dict): | |
| continue | |
| mid = str(item.get("id") or "").strip() | |
| if not mid: | |
| continue | |
| live_by_id[mid] = item | |
| curated: list[tuple[str, str]] = [] | |
| for preferred_id in preferred_ids: | |
| live_item = live_by_id.get(preferred_id) | |
| if live_item is None: | |
| continue | |
| desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else "" | |
| curated.append((preferred_id, desc)) | |
| if not curated: | |
| return list(_openrouter_catalog_cache or fallback) | |
| first_id, _ = curated[0] | |
| curated[0] = (first_id, "recommended") | |
| _openrouter_catalog_cache = curated | |
| return list(curated) | |
| def model_ids(*, force_refresh: bool = False) -> list[str]: | |
| """Return just the OpenRouter model-id strings.""" | |
| return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)] | |
| def menu_labels(*, force_refresh: bool = False) -> list[str]: | |
| """Return display labels like 'anthropic/claude-opus-4.6 (recommended)'.""" | |
| labels = [] | |
| for mid, desc in fetch_openrouter_models(force_refresh=force_refresh): | |
| labels.append(f"{mid} ({desc})" if desc else mid) | |
| return labels | |
| # --------------------------------------------------------------------------- | |
| # Pricing helpers — fetch live pricing from OpenRouter-compatible /v1/models | |
| # --------------------------------------------------------------------------- | |
| # Cache: maps model_id → {"prompt": str, "completion": str} per endpoint | |
| _pricing_cache: dict[str, dict[str, dict[str, str]]] = {} | |
| def _format_price_per_mtok(per_token_str: str) -> str: | |
| """Convert a per-token price string to a human-friendly $/Mtok string. | |
| Always uses 2 decimal places so that prices align vertically when | |
| right-justified in a column (the decimal point stays in the same position). | |
| Examples: | |
| "0.000003" → "$3.00" (per million tokens) | |
| "0.00003" → "$30.00" | |
| "0.00000015" → "$0.15" | |
| "0.0000001" → "$0.10" | |
| "0.00018" → "$180.00" | |
| "0" → "free" | |
| """ | |
| try: | |
| val = float(per_token_str) | |
| except (TypeError, ValueError): | |
| return "?" | |
| if val == 0: | |
| return "free" | |
| per_m = val * 1_000_000 | |
| return f"${per_m:.2f}" | |
| def format_model_pricing_table( | |
| models: list[tuple[str, str]], | |
| pricing_map: dict[str, dict[str, str]], | |
| current_model: str = "", | |
| indent: str = " ", | |
| ) -> list[str]: | |
| """Build a column-aligned model+pricing table for terminal display. | |
| Returns a list of pre-formatted lines ready to print. | |
| *models* is ``[(model_id, description), ...]``. | |
| """ | |
| if not models: | |
| return [] | |
| # Build rows: (model_id, input_price, output_price, cache_price, is_current) | |
| rows: list[tuple[str, str, str, str, bool]] = [] | |
| has_cache = False | |
| for mid, _desc in models: | |
| is_cur = mid == current_model | |
| p = pricing_map.get(mid) | |
| if p: | |
| inp = _format_price_per_mtok(p.get("prompt", "")) | |
| out = _format_price_per_mtok(p.get("completion", "")) | |
| cache_read = p.get("input_cache_read", "") | |
| cache = _format_price_per_mtok(cache_read) if cache_read else "" | |
| if cache: | |
| has_cache = True | |
| else: | |
| inp, out, cache = "", "", "" | |
| rows.append((mid, inp, out, cache, is_cur)) | |
| name_col = max(len(r[0]) for r in rows) + 2 | |
| # Compute price column widths from the actual data so decimals align | |
| price_col = max( | |
| max((len(r[1]) for r in rows if r[1]), default=4), | |
| max((len(r[2]) for r in rows if r[2]), default=4), | |
| 3, # minimum: "In" / "Out" header | |
| ) | |
| cache_col = max( | |
| max((len(r[3]) for r in rows if r[3]), default=4), | |
| 5, # minimum: "Cache" header | |
| ) if has_cache else 0 | |
| lines: list[str] = [] | |
| # Header | |
| if has_cache: | |
| lines.append(f"{indent}{'Model':<{name_col}} {'In':>{price_col}} {'Out':>{price_col}} {'Cache':>{cache_col}} /Mtok") | |
| lines.append(f"{indent}{'-' * name_col} {'-' * price_col} {'-' * price_col} {'-' * cache_col}") | |
| else: | |
| lines.append(f"{indent}{'Model':<{name_col}} {'In':>{price_col}} {'Out':>{price_col}} /Mtok") | |
| lines.append(f"{indent}{'-' * name_col} {'-' * price_col} {'-' * price_col}") | |
| for mid, inp, out, cache, is_cur in rows: | |
| marker = " ← current" if is_cur else "" | |
| if has_cache: | |
| lines.append(f"{indent}{mid:<{name_col}} {inp:>{price_col}} {out:>{price_col}} {cache:>{cache_col}}{marker}") | |
| else: | |
| lines.append(f"{indent}{mid:<{name_col}} {inp:>{price_col}} {out:>{price_col}}{marker}") | |
| return lines | |
| def fetch_models_with_pricing( | |
| api_key: str | None = None, | |
| base_url: str = "https://openrouter.ai/api", | |
| timeout: float = 8.0, | |
| *, | |
| force_refresh: bool = False, | |
| ) -> dict[str, dict[str, str]]: | |
| """Fetch ``/v1/models`` and return ``{model_id: {prompt, completion}}`` pricing. | |
| Results are cached per *base_url* so repeated calls are free. | |
| Works with any OpenRouter-compatible endpoint (OpenRouter, Nous Portal). | |
| """ | |
| cache_key = (base_url or "").rstrip("/") | |
| if not force_refresh and cache_key in _pricing_cache: | |
| return _pricing_cache[cache_key] | |
| url = cache_key.rstrip("/") + "/v1/models" | |
| headers: dict[str, str] = {"Accept": "application/json"} | |
| if api_key: | |
| headers["Authorization"] = f"Bearer {api_key}" | |
| try: | |
| req = urllib.request.Request(url, headers=headers) | |
| with urllib.request.urlopen(req, timeout=timeout) as resp: | |
| payload = json.loads(resp.read().decode()) | |
| except Exception: | |
| _pricing_cache[cache_key] = {} | |
| return {} | |
| result: dict[str, dict[str, str]] = {} | |
| for item in payload.get("data", []): | |
| mid = item.get("id") | |
| pricing = item.get("pricing") | |
| if mid and isinstance(pricing, dict): | |
| entry: dict[str, str] = { | |
| "prompt": str(pricing.get("prompt", "")), | |
| "completion": str(pricing.get("completion", "")), | |
| } | |
| if pricing.get("input_cache_read"): | |
| entry["input_cache_read"] = str(pricing["input_cache_read"]) | |
| if pricing.get("input_cache_write"): | |
| entry["input_cache_write"] = str(pricing["input_cache_write"]) | |
| result[mid] = entry | |
| _pricing_cache[cache_key] = result | |
| return result | |
| def _resolve_openrouter_api_key() -> str: | |
| """Best-effort OpenRouter API key for pricing fetch.""" | |
| return os.getenv("OPENROUTER_API_KEY", "").strip() | |
| def _resolve_nous_pricing_credentials() -> tuple[str, str]: | |
| """Return ``(api_key, base_url)`` for Nous Portal pricing, or empty strings.""" | |
| try: | |
| from hermes_cli.auth import resolve_nous_runtime_credentials | |
| creds = resolve_nous_runtime_credentials() | |
| if creds: | |
| return (creds.get("api_key", ""), creds.get("base_url", "")) | |
| except Exception: | |
| pass | |
| return ("", "") | |
| def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]: | |
| """Return live pricing for providers that support it (openrouter, nous).""" | |
| normalized = normalize_provider(provider) | |
| if normalized == "openrouter": | |
| return fetch_models_with_pricing( | |
| api_key=_resolve_openrouter_api_key(), | |
| base_url="https://openrouter.ai/api", | |
| force_refresh=force_refresh, | |
| ) | |
| if normalized == "nous": | |
| api_key, base_url = _resolve_nous_pricing_credentials() | |
| if base_url: | |
| # Nous base_url typically looks like https://inference-api.nousresearch.com/v1 | |
| # We need the part before /v1 for our fetch function | |
| stripped = base_url.rstrip("/") | |
| if stripped.endswith("/v1"): | |
| stripped = stripped[:-3] | |
| return fetch_models_with_pricing( | |
| api_key=api_key, | |
| base_url=stripped, | |
| force_refresh=force_refresh, | |
| ) | |
| return {} | |
| # All provider IDs and aliases that are valid for the provider:model syntax. | |
| _KNOWN_PROVIDER_NAMES: set[str] = ( | |
| set(_PROVIDER_LABELS.keys()) | |
| | set(_PROVIDER_ALIASES.keys()) | |
| | {"openrouter", "custom"} | |
| ) | |
| def list_available_providers() -> list[dict[str, str]]: | |
| """Return info about all providers the user could use with ``provider:model``. | |
| Each dict has ``id``, ``label``, and ``aliases``. | |
| Checks which providers have valid credentials configured. | |
| """ | |
| # Canonical providers in display order | |
| _PROVIDER_ORDER = [ | |
| "openrouter", "nous", "openai-codex", "copilot", "copilot-acp", | |
| "gemini", "huggingface", | |
| "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba", | |
| "qwen-oauth", "xiaomi", | |
| "opencode-zen", "opencode-go", | |
| "ai-gateway", "deepseek", "custom", | |
| ] | |
| # Build reverse alias map | |
| aliases_for: dict[str, list[str]] = {} | |
| for alias, canonical in _PROVIDER_ALIASES.items(): | |
| aliases_for.setdefault(canonical, []).append(alias) | |
| result = [] | |
| for pid in _PROVIDER_ORDER: | |
| label = _PROVIDER_LABELS.get(pid, pid) | |
| alias_list = aliases_for.get(pid, []) | |
| # Check if this provider has credentials available | |
| has_creds = False | |
| try: | |
| from hermes_cli.auth import get_auth_status, has_usable_secret | |
| if pid == "custom": | |
| custom_base_url = _get_custom_base_url() or "" | |
| has_creds = bool(custom_base_url.strip()) | |
| elif pid == "openrouter": | |
| has_creds = has_usable_secret(os.getenv("OPENROUTER_API_KEY", "")) | |
| else: | |
| status = get_auth_status(pid) | |
| has_creds = bool(status.get("logged_in") or status.get("configured")) | |
| except Exception: | |
| pass | |
| result.append({ | |
| "id": pid, | |
| "label": label, | |
| "aliases": alias_list, | |
| "authenticated": has_creds, | |
| }) | |
| return result | |
| def parse_model_input(raw: str, current_provider: str) -> tuple[str, str]: | |
| """Parse ``/model`` input into ``(provider, model)``. | |
| Supports ``provider:model`` syntax to switch providers at runtime:: | |
| openrouter:anthropic/claude-sonnet-4.5 → ("openrouter", "anthropic/claude-sonnet-4.5") | |
| nous:hermes-3 → ("nous", "hermes-3") | |
| anthropic/claude-sonnet-4.5 → (current_provider, "anthropic/claude-sonnet-4.5") | |
| gpt-5.4 → (current_provider, "gpt-5.4") | |
| The colon is only treated as a provider delimiter if the left side is a | |
| recognized provider name or alias. This avoids misinterpreting model names | |
| that happen to contain colons (e.g. ``anthropic/claude-3.5-sonnet:beta``). | |
| Returns ``(provider, model)`` where *provider* is either the explicit | |
| provider from the input or *current_provider* if none was specified. | |
| """ | |
| stripped = raw.strip() | |
| colon = stripped.find(":") | |
| if colon > 0: | |
| provider_part = stripped[:colon].strip().lower() | |
| model_part = stripped[colon + 1:].strip() | |
| if provider_part and model_part and provider_part in _KNOWN_PROVIDER_NAMES: | |
| # Support custom:name:model triple syntax for named custom | |
| # providers. ``custom:local:qwen`` → ("custom:local", "qwen"). | |
| # Single colon ``custom:qwen`` → ("custom", "qwen") as before. | |
| if provider_part == "custom" and ":" in model_part: | |
| second_colon = model_part.find(":") | |
| custom_name = model_part[:second_colon].strip() | |
| actual_model = model_part[second_colon + 1:].strip() | |
| if custom_name and actual_model: | |
| return (f"custom:{custom_name}", actual_model) | |
| return (normalize_provider(provider_part), model_part) | |
| return (current_provider, stripped) | |
| def _get_custom_base_url() -> str: | |
| """Get the custom endpoint base_url from config.yaml.""" | |
| try: | |
| from hermes_cli.config import load_config | |
| config = load_config() | |
| model_cfg = config.get("model", {}) | |
| if isinstance(model_cfg, dict): | |
| return str(model_cfg.get("base_url", "")).strip() | |
| except Exception: | |
| pass | |
| return "" | |
| def curated_models_for_provider( | |
| provider: Optional[str], | |
| *, | |
| force_refresh: bool = False, | |
| ) -> list[tuple[str, str]]: | |
| """Return ``(model_id, description)`` tuples for a provider's model list. | |
| Tries to fetch the live model list from the provider's API first, | |
| falling back to the static ``_PROVIDER_MODELS`` catalog if the API | |
| is unreachable. | |
| """ | |
| normalized = normalize_provider(provider) | |
| if normalized == "openrouter": | |
| return fetch_openrouter_models(force_refresh=force_refresh) | |
| # Try live API first (Codex, Nous, etc. all support /models) | |
| live = provider_model_ids(normalized) | |
| if live: | |
| return [(m, "") for m in live] | |
| # Fallback to static catalog | |
| models = _PROVIDER_MODELS.get(normalized, []) | |
| return [(m, "") for m in models] | |
| def detect_provider_for_model( | |
| model_name: str, | |
| current_provider: str, | |
| ) -> Optional[tuple[str, str]]: | |
| """Auto-detect the best provider for a model name. | |
| Returns ``(provider_id, model_name)`` — the model name may be remapped | |
| (e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter). | |
| Returns ``None`` when no confident match is found. | |
| Priority: | |
| 0. Bare provider name → switch to that provider's default model | |
| 1. Direct provider with credentials (highest) | |
| 2. Direct provider without credentials → remap to OpenRouter slug | |
| 3. OpenRouter catalog match | |
| """ | |
| name = (model_name or "").strip() | |
| if not name: | |
| return None | |
| name_lower = name.lower() | |
| # --- Step 0: bare provider name typed as model --- | |
| # If someone types `/model nous` or `/model anthropic`, treat it as a | |
| # provider switch and pick the first model from that provider's catalog. | |
| # Skip "custom" and "openrouter" — custom has no model catalog, and | |
| # openrouter requires an explicit model name to be useful. | |
| resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower) | |
| if resolved_provider not in {"custom", "openrouter"}: | |
| default_models = _PROVIDER_MODELS.get(resolved_provider, []) | |
| if ( | |
| resolved_provider in _PROVIDER_LABELS | |
| and default_models | |
| and resolved_provider != normalize_provider(current_provider) | |
| ): | |
| return (resolved_provider, default_models[0]) | |
| # Aggregators list other providers' models — never auto-switch TO them | |
| _AGGREGATORS = {"nous", "openrouter"} | |
| # If the model belongs to the current provider's catalog, don't suggest switching | |
| current_models = _PROVIDER_MODELS.get(current_provider, []) | |
| if any(name_lower == m.lower() for m in current_models): | |
| return None | |
| # --- Step 1: check static provider catalogs for a direct match --- | |
| direct_match: Optional[str] = None | |
| for pid, models in _PROVIDER_MODELS.items(): | |
| if pid == current_provider or pid in _AGGREGATORS: | |
| continue | |
| if any(name_lower == m.lower() for m in models): | |
| direct_match = pid | |
| break | |
| if direct_match: | |
| # Check if we have credentials for this provider | |
| has_creds = False | |
| try: | |
| from hermes_cli.auth import PROVIDER_REGISTRY | |
| pconfig = PROVIDER_REGISTRY.get(direct_match) | |
| if pconfig: | |
| import os | |
| for env_var in pconfig.api_key_env_vars: | |
| if os.getenv(env_var, "").strip(): | |
| has_creds = True | |
| break | |
| except Exception: | |
| pass | |
| if has_creds: | |
| return (direct_match, name) | |
| # No direct creds — try to find this model on OpenRouter instead | |
| or_slug = _find_openrouter_slug(name) | |
| if or_slug: | |
| return ("openrouter", or_slug) | |
| # Still return the direct provider — credential resolution will | |
| # give a clear error rather than silently using the wrong provider | |
| return (direct_match, name) | |
| # --- Step 2: check OpenRouter catalog --- | |
| # First try exact match (handles provider/model format) | |
| or_slug = _find_openrouter_slug(name) | |
| if or_slug: | |
| if current_provider != "openrouter": | |
| return ("openrouter", or_slug) | |
| # Already on openrouter, just return the resolved slug | |
| if or_slug != name: | |
| return ("openrouter", or_slug) | |
| return None # already on openrouter with matching name | |
| return None | |
| def _find_openrouter_slug(model_name: str) -> Optional[str]: | |
| """Find the full OpenRouter model slug for a bare or partial model name. | |
| Handles: | |
| - Exact match: ``anthropic/claude-opus-4.6`` → as-is | |
| - Bare name: ``deepseek-chat`` → ``deepseek/deepseek-chat`` | |
| - Bare name: ``claude-opus-4.6`` → ``anthropic/claude-opus-4.6`` | |
| """ | |
| name_lower = model_name.strip().lower() | |
| if not name_lower: | |
| return None | |
| # Exact match (already has provider/ prefix) | |
| for mid in model_ids(): | |
| if name_lower == mid.lower(): | |
| return mid | |
| # Try matching just the model part (after the /) | |
| for mid in model_ids(): | |
| if "/" in mid: | |
| _, model_part = mid.split("/", 1) | |
| if name_lower == model_part.lower(): | |
| return mid | |
| return None | |
| def normalize_provider(provider: Optional[str]) -> str: | |
| """Normalize provider aliases to Hermes' canonical provider ids. | |
| Note: ``"auto"`` passes through unchanged — use | |
| ``hermes_cli.auth.resolve_provider()`` to resolve it to a concrete | |
| provider based on credentials and environment. | |
| """ | |
| normalized = (provider or "openrouter").strip().lower() | |
| return _PROVIDER_ALIASES.get(normalized, normalized) | |
| def provider_label(provider: Optional[str]) -> str: | |
| """Return a human-friendly label for a provider id or alias.""" | |
| original = (provider or "openrouter").strip() | |
| normalized = original.lower() | |
| if normalized == "auto": | |
| return "Auto" | |
| normalized = normalize_provider(normalized) | |
| return _PROVIDER_LABELS.get(normalized, original or "OpenRouter") | |
| # Models that support OpenAI Priority Processing (service_tier="priority"). | |
| # See https://openai.com/api-priority-processing/ for the canonical list. | |
| # Only the bare model slug is stored (no vendor prefix). | |
| _PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({ | |
| "gpt-5.4", | |
| "gpt-5.4-mini", | |
| "gpt-5.2", | |
| "gpt-5.1", | |
| "gpt-5", | |
| "gpt-5-mini", | |
| "gpt-4.1", | |
| "gpt-4.1-mini", | |
| "gpt-4.1-nano", | |
| "gpt-4o", | |
| "gpt-4o-mini", | |
| "o3", | |
| "o4-mini", | |
| }) | |
| # Models that support Anthropic Fast Mode (speed="fast"). | |
| # See https://platform.claude.com/docs/en/build-with-claude/fast-mode | |
| # Currently only Claude Opus 4.6. Both hyphen and dot variants are stored | |
| # to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6). | |
| _ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({ | |
| "claude-opus-4-6", | |
| "claude-opus-4.6", | |
| }) | |
| def _strip_vendor_prefix(model_id: str) -> str: | |
| """Strip vendor/ prefix from a model ID (e.g. 'anthropic/claude-opus-4-6' -> 'claude-opus-4-6').""" | |
| raw = str(model_id or "").strip().lower() | |
| if "/" in raw: | |
| raw = raw.split("/", 1)[1] | |
| return raw | |
| def model_supports_fast_mode(model_id: Optional[str]) -> bool: | |
| """Return whether Hermes should expose the /fast toggle for this model.""" | |
| raw = _strip_vendor_prefix(str(model_id or "")) | |
| if raw in _PRIORITY_PROCESSING_MODELS: | |
| return True | |
| # Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401) | |
| # and OpenRouter variant tags (:fast, :beta) for matching. | |
| base = raw.split(":")[0] | |
| return base in _ANTHROPIC_FAST_MODE_MODELS | |
| def _is_anthropic_fast_model(model_id: Optional[str]) -> bool: | |
| """Return True if the model supports Anthropic's fast mode (speed='fast').""" | |
| raw = _strip_vendor_prefix(str(model_id or "")) | |
| base = raw.split(":")[0] | |
| return base in _ANTHROPIC_FAST_MODE_MODELS | |
| def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None: | |
| """Return request_overrides for fast/priority mode, or None if unsupported. | |
| Returns provider-appropriate overrides: | |
| - OpenAI models: ``{"service_tier": "priority"}`` (Priority Processing) | |
| - Anthropic models: ``{"speed": "fast"}`` (Anthropic Fast Mode beta) | |
| The overrides are injected into the API request kwargs by | |
| ``_build_api_kwargs`` in run_agent.py — each API path handles its own | |
| keys (service_tier for OpenAI/Codex, speed for Anthropic Messages). | |
| """ | |
| if not model_supports_fast_mode(model_id): | |
| return None | |
| if _is_anthropic_fast_model(model_id): | |
| return {"speed": "fast"} | |
| return {"service_tier": "priority"} | |
| def _resolve_copilot_catalog_api_key() -> str: | |
| """Best-effort GitHub token for fetching the Copilot model catalog.""" | |
| try: | |
| from hermes_cli.auth import resolve_api_key_provider_credentials | |
| creds = resolve_api_key_provider_credentials("copilot") | |
| return str(creds.get("api_key") or "").strip() | |
| except Exception: | |
| return "" | |
| def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) -> list[str]: | |
| """Return the best known model catalog for a provider. | |
| Tries live API endpoints for providers that support them (Codex, Nous), | |
| falling back to static lists. | |
| """ | |
| normalized = normalize_provider(provider) | |
| if normalized == "openrouter": | |
| return model_ids(force_refresh=force_refresh) | |
| if normalized == "openai-codex": | |
| from hermes_cli.codex_models import get_codex_model_ids | |
| return get_codex_model_ids() | |
| if normalized in {"copilot", "copilot-acp"}: | |
| try: | |
| live = _fetch_github_models(_resolve_copilot_catalog_api_key()) | |
| if live: | |
| return live | |
| except Exception: | |
| pass | |
| if normalized == "copilot-acp": | |
| return list(_PROVIDER_MODELS.get("copilot", [])) | |
| if normalized == "nous": | |
| # Try live Nous Portal /models endpoint | |
| try: | |
| from hermes_cli.auth import fetch_nous_models, resolve_nous_runtime_credentials | |
| creds = resolve_nous_runtime_credentials() | |
| if creds: | |
| live = fetch_nous_models(api_key=creds.get("api_key", ""), inference_base_url=creds.get("base_url", "")) | |
| if live: | |
| return live | |
| except Exception: | |
| pass | |
| if normalized == "anthropic": | |
| live = _fetch_anthropic_models() | |
| if live: | |
| return live | |
| if normalized == "ai-gateway": | |
| live = _fetch_ai_gateway_models() | |
| if live: | |
| return live | |
| if normalized == "custom": | |
| base_url = _get_custom_base_url() | |
| if base_url: | |
| # Try common API key env vars for custom endpoints | |
| api_key = ( | |
| os.getenv("CUSTOM_API_KEY", "") | |
| or os.getenv("OPENAI_API_KEY", "") | |
| or os.getenv("OPENROUTER_API_KEY", "") | |
| ) | |
| live = fetch_api_models(api_key, base_url) | |
| if live: | |
| return live | |
| return list(_PROVIDER_MODELS.get(normalized, [])) | |
| def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]: | |
| """Fetch available models from the Anthropic /v1/models endpoint. | |
| Uses resolve_anthropic_token() to find credentials (env vars or | |
| Claude Code auto-discovery). Returns sorted model IDs or None. | |
| """ | |
| try: | |
| from agent.anthropic_adapter import resolve_anthropic_token, _is_oauth_token | |
| except ImportError: | |
| return None | |
| token = resolve_anthropic_token() | |
| if not token: | |
| return None | |
| headers: dict[str, str] = {"anthropic-version": "2023-06-01"} | |
| if _is_oauth_token(token): | |
| headers["Authorization"] = f"Bearer {token}" | |
| from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS | |
| headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS) | |
| else: | |
| headers["x-api-key"] = token | |
| req = urllib.request.Request( | |
| "https://api.anthropic.com/v1/models", | |
| headers=headers, | |
| ) | |
| try: | |
| with urllib.request.urlopen(req, timeout=timeout) as resp: | |
| data = json.loads(resp.read().decode()) | |
| models = [m["id"] for m in data.get("data", []) if m.get("id")] | |
| # Sort: latest/largest first (opus > sonnet > haiku, higher version first) | |
| return sorted(models, key=lambda m: ( | |
| "opus" not in m, # opus first | |
| "sonnet" not in m, # then sonnet | |
| "haiku" not in m, # then haiku | |
| m, # alphabetical within tier | |
| )) | |
| except Exception as e: | |
| import logging | |
| logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e) | |
| return None | |
| def _payload_items(payload: Any) -> list[dict[str, Any]]: | |
| if isinstance(payload, list): | |
| return [item for item in payload if isinstance(item, dict)] | |
| if isinstance(payload, dict): | |
| data = payload.get("data", []) | |
| if isinstance(data, list): | |
| return [item for item in data if isinstance(item, dict)] | |
| return [] | |
| def copilot_default_headers() -> dict[str, str]: | |
| """Standard headers for Copilot API requests. | |
| Includes Openai-Intent and x-initiator headers that opencode and the | |
| Copilot CLI send on every request. | |
| """ | |
| try: | |
| from hermes_cli.copilot_auth import copilot_request_headers | |
| return copilot_request_headers(is_agent_turn=True) | |
| except ImportError: | |
| return { | |
| "Editor-Version": COPILOT_EDITOR_VERSION, | |
| "User-Agent": "HermesAgent/1.0", | |
| "Openai-Intent": "conversation-edits", | |
| "x-initiator": "agent", | |
| } | |
| def _copilot_catalog_item_is_text_model(item: dict[str, Any]) -> bool: | |
| model_id = str(item.get("id") or "").strip() | |
| if not model_id: | |
| return False | |
| if item.get("model_picker_enabled") is False: | |
| return False | |
| capabilities = item.get("capabilities") | |
| if isinstance(capabilities, dict): | |
| model_type = str(capabilities.get("type") or "").strip().lower() | |
| if model_type and model_type != "chat": | |
| return False | |
| supported_endpoints = item.get("supported_endpoints") | |
| if isinstance(supported_endpoints, list): | |
| normalized_endpoints = { | |
| str(endpoint).strip() | |
| for endpoint in supported_endpoints | |
| if str(endpoint).strip() | |
| } | |
| if normalized_endpoints and not normalized_endpoints.intersection( | |
| {"/chat/completions", "/responses", "/v1/messages"} | |
| ): | |
| return False | |
| return True | |
| def fetch_github_model_catalog( | |
| api_key: Optional[str] = None, timeout: float = 5.0 | |
| ) -> Optional[list[dict[str, Any]]]: | |
| """Fetch the live GitHub Copilot model catalog for this account.""" | |
| attempts: list[dict[str, str]] = [] | |
| if api_key: | |
| attempts.append({ | |
| **copilot_default_headers(), | |
| "Authorization": f"Bearer {api_key}", | |
| }) | |
| attempts.append(copilot_default_headers()) | |
| for headers in attempts: | |
| req = urllib.request.Request(COPILOT_MODELS_URL, headers=headers) | |
| try: | |
| with urllib.request.urlopen(req, timeout=timeout) as resp: | |
| data = json.loads(resp.read().decode()) | |
| items = _payload_items(data) | |
| models: list[dict[str, Any]] = [] | |
| seen_ids: set[str] = set() | |
| for item in items: | |
| if not _copilot_catalog_item_is_text_model(item): | |
| continue | |
| model_id = str(item.get("id") or "").strip() | |
| if not model_id or model_id in seen_ids: | |
| continue | |
| seen_ids.add(model_id) | |
| models.append(item) | |
| if models: | |
| return models | |
| except Exception: | |
| continue | |
| return None | |
| def _is_github_models_base_url(base_url: Optional[str]) -> bool: | |
| normalized = (base_url or "").strip().rstrip("/").lower() | |
| return ( | |
| normalized.startswith(COPILOT_BASE_URL) | |
| or normalized.startswith("https://models.github.ai/inference") | |
| ) | |
| def _fetch_github_models(api_key: Optional[str] = None, timeout: float = 5.0) -> Optional[list[str]]: | |
| catalog = fetch_github_model_catalog(api_key=api_key, timeout=timeout) | |
| if not catalog: | |
| return None | |
| return [item.get("id", "") for item in catalog if item.get("id")] | |
| _COPILOT_MODEL_ALIASES = { | |
| "openai/gpt-5": "gpt-5-mini", | |
| "openai/gpt-5-chat": "gpt-5-mini", | |
| "openai/gpt-5-mini": "gpt-5-mini", | |
| "openai/gpt-5-nano": "gpt-5-mini", | |
| "openai/gpt-4.1": "gpt-4.1", | |
| "openai/gpt-4.1-mini": "gpt-4.1", | |
| "openai/gpt-4.1-nano": "gpt-4.1", | |
| "openai/gpt-4o": "gpt-4o", | |
| "openai/gpt-4o-mini": "gpt-4o-mini", | |
| "openai/o1": "gpt-5.2", | |
| "openai/o1-mini": "gpt-5-mini", | |
| "openai/o1-preview": "gpt-5.2", | |
| "openai/o3": "gpt-5.3-codex", | |
| "openai/o3-mini": "gpt-5-mini", | |
| "openai/o4-mini": "gpt-5-mini", | |
| "anthropic/claude-opus-4.6": "claude-opus-4.6", | |
| "anthropic/claude-sonnet-4.6": "claude-sonnet-4.6", | |
| "anthropic/claude-sonnet-4.5": "claude-sonnet-4.5", | |
| "anthropic/claude-haiku-4.5": "claude-haiku-4.5", | |
| } | |
| def _copilot_catalog_ids( | |
| catalog: Optional[list[dict[str, Any]]] = None, | |
| api_key: Optional[str] = None, | |
| ) -> set[str]: | |
| if catalog is None and api_key: | |
| catalog = fetch_github_model_catalog(api_key=api_key) | |
| if not catalog: | |
| return set() | |
| return { | |
| str(item.get("id") or "").strip() | |
| for item in catalog | |
| if str(item.get("id") or "").strip() | |
| } | |
| def normalize_copilot_model_id( | |
| model_id: Optional[str], | |
| *, | |
| catalog: Optional[list[dict[str, Any]]] = None, | |
| api_key: Optional[str] = None, | |
| ) -> str: | |
| raw = str(model_id or "").strip() | |
| if not raw: | |
| return "" | |
| catalog_ids = _copilot_catalog_ids(catalog=catalog, api_key=api_key) | |
| alias = _COPILOT_MODEL_ALIASES.get(raw) | |
| if alias: | |
| return alias | |
| candidates = [raw] | |
| if "/" in raw: | |
| candidates.append(raw.split("/", 1)[1].strip()) | |
| if raw.endswith("-mini"): | |
| candidates.append(raw[:-5]) | |
| if raw.endswith("-nano"): | |
| candidates.append(raw[:-5]) | |
| if raw.endswith("-chat"): | |
| candidates.append(raw[:-5]) | |
| seen: set[str] = set() | |
| for candidate in candidates: | |
| if not candidate or candidate in seen: | |
| continue | |
| seen.add(candidate) | |
| if candidate in _COPILOT_MODEL_ALIASES: | |
| return _COPILOT_MODEL_ALIASES[candidate] | |
| if candidate in catalog_ids: | |
| return candidate | |
| if "/" in raw: | |
| return raw.split("/", 1)[1].strip() | |
| return raw | |
| def _github_reasoning_efforts_for_model_id(model_id: str) -> list[str]: | |
| raw = (model_id or "").strip().lower() | |
| if raw.startswith(("openai/o1", "openai/o3", "openai/o4", "o1", "o3", "o4")): | |
| return list(COPILOT_REASONING_EFFORTS_O_SERIES) | |
| normalized = normalize_copilot_model_id(model_id).lower() | |
| if normalized.startswith("gpt-5"): | |
| return list(COPILOT_REASONING_EFFORTS_GPT5) | |
| return [] | |
| def _should_use_copilot_responses_api(model_id: str) -> bool: | |
| """Decide whether a Copilot model should use the Responses API. | |
| Replicates opencode's ``shouldUseCopilotResponsesApi`` logic: | |
| GPT-5+ models use Responses API, except ``gpt-5-mini`` which uses | |
| Chat Completions. All non-GPT models (Claude, Gemini, etc.) use | |
| Chat Completions. | |
| """ | |
| import re | |
| match = re.match(r"^gpt-(\d+)", model_id) | |
| if not match: | |
| return False | |
| major = int(match.group(1)) | |
| return major >= 5 and not model_id.startswith("gpt-5-mini") | |
| def copilot_model_api_mode( | |
| model_id: Optional[str], | |
| *, | |
| catalog: Optional[list[dict[str, Any]]] = None, | |
| api_key: Optional[str] = None, | |
| ) -> str: | |
| """Determine the API mode for a Copilot model. | |
| Uses the model ID pattern (matching opencode's approach) as the | |
| primary signal. Falls back to the catalog's ``supported_endpoints`` | |
| only for models not covered by the pattern check. | |
| """ | |
| normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key) | |
| if not normalized: | |
| return "chat_completions" | |
| # Primary: model ID pattern (matches opencode's shouldUseCopilotResponsesApi) | |
| if _should_use_copilot_responses_api(normalized): | |
| return "codex_responses" | |
| # Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.) | |
| if catalog is None and api_key: | |
| catalog = fetch_github_model_catalog(api_key=api_key) | |
| if catalog: | |
| catalog_entry = next((item for item in catalog if item.get("id") == normalized), None) | |
| if isinstance(catalog_entry, dict): | |
| supported_endpoints = { | |
| str(endpoint).strip() | |
| for endpoint in (catalog_entry.get("supported_endpoints") or []) | |
| if str(endpoint).strip() | |
| } | |
| # For non-GPT-5 models, check if they only support messages API | |
| if "/v1/messages" in supported_endpoints and "/chat/completions" not in supported_endpoints: | |
| return "anthropic_messages" | |
| return "chat_completions" | |
| def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str: | |
| """Normalize OpenCode config IDs to the bare model slug used in API requests.""" | |
| provider = normalize_provider(provider_id) | |
| current = str(model_id or "").strip() | |
| if not current or provider not in {"opencode-zen", "opencode-go"}: | |
| return current | |
| prefix = f"{provider}/" | |
| if current.lower().startswith(prefix): | |
| return current[len(prefix):] | |
| return current | |
| def opencode_model_api_mode(provider_id: Optional[str], model_id: Optional[str]) -> str: | |
| """Determine the API mode for an OpenCode Zen / Go model. | |
| OpenCode routes different models behind different API surfaces: | |
| - GPT-5 / Codex models on Zen use ``/v1/responses`` | |
| - Claude models on Zen use ``/v1/messages`` | |
| - MiniMax models on Go use ``/v1/messages`` | |
| - GLM / Kimi on Go use ``/v1/chat/completions`` | |
| - Other Zen models (Gemini, GLM, Kimi, MiniMax, Qwen, etc.) use | |
| ``/v1/chat/completions`` | |
| This follows the published OpenCode docs for Zen and Go endpoints. | |
| """ | |
| provider = normalize_provider(provider_id) | |
| normalized = normalize_opencode_model_id(provider_id, model_id).lower() | |
| if not normalized: | |
| return "chat_completions" | |
| if provider == "opencode-go": | |
| if normalized.startswith("minimax-"): | |
| return "anthropic_messages" | |
| return "chat_completions" | |
| if provider == "opencode-zen": | |
| if normalized.startswith("claude-"): | |
| return "anthropic_messages" | |
| if normalized.startswith("gpt-"): | |
| return "codex_responses" | |
| return "chat_completions" | |
| return "chat_completions" | |
| def github_model_reasoning_efforts( | |
| model_id: Optional[str], | |
| *, | |
| catalog: Optional[list[dict[str, Any]]] = None, | |
| api_key: Optional[str] = None, | |
| ) -> list[str]: | |
| """Return supported reasoning-effort levels for a Copilot-visible model.""" | |
| normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key) | |
| if not normalized: | |
| return [] | |
| catalog_entry = None | |
| if catalog is not None: | |
| catalog_entry = next((item for item in catalog if item.get("id") == normalized), None) | |
| elif api_key: | |
| fetched_catalog = fetch_github_model_catalog(api_key=api_key) | |
| if fetched_catalog: | |
| catalog_entry = next((item for item in fetched_catalog if item.get("id") == normalized), None) | |
| if catalog_entry is not None: | |
| capabilities = catalog_entry.get("capabilities") | |
| if isinstance(capabilities, dict): | |
| supports = capabilities.get("supports") | |
| if isinstance(supports, dict): | |
| efforts = supports.get("reasoning_effort") | |
| if isinstance(efforts, list): | |
| normalized_efforts = [ | |
| str(effort).strip().lower() | |
| for effort in efforts | |
| if str(effort).strip() | |
| ] | |
| return list(dict.fromkeys(normalized_efforts)) | |
| return [] | |
| legacy_capabilities = { | |
| str(capability).strip().lower() | |
| for capability in catalog_entry.get("capabilities", []) | |
| if str(capability).strip() | |
| } | |
| if "reasoning" not in legacy_capabilities: | |
| return [] | |
| return _github_reasoning_efforts_for_model_id(str(model_id or normalized)) | |
| def probe_api_models( | |
| api_key: Optional[str], | |
| base_url: Optional[str], | |
| timeout: float = 5.0, | |
| ) -> dict[str, Any]: | |
| """Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics.""" | |
| normalized = (base_url or "").strip().rstrip("/") | |
| if not normalized: | |
| return { | |
| "models": None, | |
| "probed_url": None, | |
| "resolved_base_url": "", | |
| "suggested_base_url": None, | |
| "used_fallback": False, | |
| } | |
| if _is_github_models_base_url(normalized): | |
| models = _fetch_github_models(api_key=api_key, timeout=timeout) | |
| return { | |
| "models": models, | |
| "probed_url": COPILOT_MODELS_URL, | |
| "resolved_base_url": COPILOT_BASE_URL, | |
| "suggested_base_url": None, | |
| "used_fallback": False, | |
| } | |
| if normalized.endswith("/v1"): | |
| alternate_base = normalized[:-3].rstrip("/") | |
| else: | |
| alternate_base = normalized + "/v1" | |
| candidates: list[tuple[str, bool]] = [(normalized, False)] | |
| if alternate_base and alternate_base != normalized: | |
| candidates.append((alternate_base, True)) | |
| tried: list[str] = [] | |
| headers: dict[str, str] = {} | |
| if api_key: | |
| headers["Authorization"] = f"Bearer {api_key}" | |
| if normalized.startswith(COPILOT_BASE_URL): | |
| headers.update(copilot_default_headers()) | |
| for candidate_base, is_fallback in candidates: | |
| url = candidate_base.rstrip("/") + "/models" | |
| tried.append(url) | |
| req = urllib.request.Request(url, headers=headers) | |
| try: | |
| with urllib.request.urlopen(req, timeout=timeout) as resp: | |
| data = json.loads(resp.read().decode()) | |
| return { | |
| "models": [m.get("id", "") for m in data.get("data", [])], | |
| "probed_url": url, | |
| "resolved_base_url": candidate_base.rstrip("/"), | |
| "suggested_base_url": alternate_base if alternate_base != candidate_base else normalized, | |
| "used_fallback": is_fallback, | |
| } | |
| except Exception: | |
| continue | |
| return { | |
| "models": None, | |
| "probed_url": tried[0] if tried else normalized.rstrip("/") + "/models", | |
| "resolved_base_url": normalized, | |
| "suggested_base_url": alternate_base if alternate_base != normalized else None, | |
| "used_fallback": False, | |
| } | |
| def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]: | |
| """Fetch available language models with tool-use from AI Gateway.""" | |
| api_key = os.getenv("AI_GATEWAY_API_KEY", "").strip() | |
| if not api_key: | |
| return None | |
| base_url = os.getenv("AI_GATEWAY_BASE_URL", "").strip() | |
| if not base_url: | |
| from hermes_constants import AI_GATEWAY_BASE_URL | |
| base_url = AI_GATEWAY_BASE_URL | |
| url = base_url.rstrip("/") + "/models" | |
| headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"} | |
| req = urllib.request.Request(url, headers=headers) | |
| try: | |
| with urllib.request.urlopen(req, timeout=timeout) as resp: | |
| data = json.loads(resp.read().decode()) | |
| return [ | |
| m["id"] | |
| for m in data.get("data", []) | |
| if m.get("id") | |
| and m.get("type") == "language" | |
| and "tool-use" in (m.get("tags") or []) | |
| ] | |
| except Exception: | |
| return None | |
| def fetch_api_models( | |
| api_key: Optional[str], | |
| base_url: Optional[str], | |
| timeout: float = 5.0, | |
| ) -> Optional[list[str]]: | |
| """Fetch the list of available model IDs from the provider's ``/models`` endpoint. | |
| Returns a list of model ID strings, or ``None`` if the endpoint could not | |
| be reached (network error, timeout, auth failure, etc.). | |
| """ | |
| return probe_api_models(api_key, base_url, timeout=timeout).get("models") | |
| def validate_requested_model( | |
| model_name: str, | |
| provider: Optional[str], | |
| *, | |
| api_key: Optional[str] = None, | |
| base_url: Optional[str] = None, | |
| ) -> dict[str, Any]: | |
| """ | |
| Validate a ``/model`` value for the active provider. | |
| Performs format checks first, then probes the live API to confirm | |
| the model actually exists. | |
| Returns a dict with: | |
| - accepted: whether the CLI should switch to the requested model now | |
| - persist: whether it is safe to save to config | |
| - recognized: whether it matched a known provider catalog | |
| - message: optional warning / guidance for the user | |
| """ | |
| requested = (model_name or "").strip() | |
| normalized = normalize_provider(provider) | |
| if normalized == "openrouter" and base_url and "openrouter.ai" not in base_url: | |
| normalized = "custom" | |
| requested_for_lookup = requested | |
| if normalized == "copilot": | |
| requested_for_lookup = normalize_copilot_model_id( | |
| requested, | |
| api_key=api_key, | |
| ) or requested | |
| if not requested: | |
| return { | |
| "accepted": False, | |
| "persist": False, | |
| "recognized": False, | |
| "message": "Model name cannot be empty.", | |
| } | |
| if any(ch.isspace() for ch in requested): | |
| return { | |
| "accepted": False, | |
| "persist": False, | |
| "recognized": False, | |
| "message": "Model names cannot contain spaces.", | |
| } | |
| if normalized == "custom": | |
| probe = probe_api_models(api_key, base_url) | |
| api_models = probe.get("models") | |
| if api_models is not None: | |
| if requested_for_lookup in set(api_models): | |
| return { | |
| "accepted": True, | |
| "persist": True, | |
| "recognized": True, | |
| "message": None, | |
| } | |
| suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5) | |
| suggestion_text = "" | |
| if suggestions: | |
| suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) | |
| message = ( | |
| f"Note: `{requested}` was not found in this custom endpoint's model listing " | |
| f"({probe.get('probed_url')}). It may still work if the server supports hidden or aliased models." | |
| f"{suggestion_text}" | |
| ) | |
| if probe.get("used_fallback"): | |
| message += ( | |
| f"\n Endpoint verification succeeded after trying `{probe.get('resolved_base_url')}`. " | |
| f"Consider saving that as your base URL." | |
| ) | |
| return { | |
| "accepted": True, | |
| "persist": True, | |
| "recognized": False, | |
| "message": message, | |
| } | |
| message = ( | |
| f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. " | |
| f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification." | |
| ) | |
| if probe.get("suggested_base_url"): | |
| message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`" | |
| return { | |
| "accepted": True, | |
| "persist": True, | |
| "recognized": False, | |
| "message": message, | |
| } | |
| # OpenAI Codex has its own catalog path; /v1/models probing is not the right validation path. | |
| if normalized == "openai-codex": | |
| try: | |
| codex_models = provider_model_ids("openai-codex") | |
| except Exception: | |
| codex_models = [] | |
| if codex_models: | |
| if requested_for_lookup in set(codex_models): | |
| return { | |
| "accepted": True, | |
| "persist": True, | |
| "recognized": True, | |
| "message": None, | |
| } | |
| suggestions = get_close_matches(requested_for_lookup, codex_models, n=3, cutoff=0.5) | |
| suggestion_text = "" | |
| if suggestions: | |
| suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) | |
| return { | |
| "accepted": True, | |
| "persist": True, | |
| "recognized": False, | |
| "message": ( | |
| f"Note: `{requested}` was not found in the OpenAI Codex model listing. " | |
| f"It may still work if your account has access to it." | |
| f"{suggestion_text}" | |
| ), | |
| } | |
| # Probe the live API to check if the model actually exists | |
| api_models = fetch_api_models(api_key, base_url) | |
| if api_models is not None: | |
| if requested_for_lookup in set(api_models): | |
| # API confirmed the model exists | |
| return { | |
| "accepted": True, | |
| "persist": True, | |
| "recognized": True, | |
| "message": None, | |
| } | |
| else: | |
| # API responded but model is not listed. Accept anyway — | |
| # the user may have access to models not shown in the public | |
| # listing (e.g. Z.AI Pro/Max plans can use glm-5 on coding | |
| # endpoints even though it's not in /models). Warn but allow. | |
| suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5) | |
| suggestion_text = "" | |
| if suggestions: | |
| suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) | |
| return { | |
| "accepted": True, | |
| "persist": True, | |
| "recognized": False, | |
| "message": ( | |
| f"Note: `{requested}` was not found in this provider's model listing. " | |
| f"It may still work if your plan supports it." | |
| f"{suggestion_text}" | |
| ), | |
| } | |
| # api_models is None — couldn't reach API. Accept and persist, | |
| # but warn so typos don't silently break things. | |
| provider_label = _PROVIDER_LABELS.get(normalized, normalized) | |
| return { | |
| "accepted": True, | |
| "persist": True, | |
| "recognized": False, | |
| "message": ( | |
| f"Could not reach the {provider_label} API to validate `{requested}`. " | |
| f"If the service isn't down, this model may not be valid." | |
| ), | |
| } | |