Spaces:

abc1181
/

HermesAgent

Paused

App Files Files Community

HermesAgent / hermes_cli /models.py

cjovs

Upload 1680 files

b5b9c2e verified about 2 months ago

raw

history blame contribute delete

67.3 kB

	"""
	Canonical model catalogs and lightweight validation helpers.

	Add, remove, or reorder entries here — both `hermes setup` and
	`hermes` provider-selection will pick up the change automatically.
	"""

	from __future__ import annotations

	import json
	import os
	import urllib.request
	import urllib.error
	from difflib import get_close_matches
	from typing import Any, Optional

	COPILOT_BASE_URL = "https://api.githubcopilot.com"
	COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
	COPILOT_EDITOR_VERSION = "vscode/1.104.1"
	COPILOT_REASONING_EFFORTS_GPT5 = ["minimal", "low", "medium", "high"]
	COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]


	# Fallback OpenRouter snapshot used when the live catalog is unavailable.
	# (model_id, display description shown in menus)
	OPENROUTER_MODELS: list[tuple[str, str]] = [
	("anthropic/claude-opus-4.6", "recommended"),
	("anthropic/claude-sonnet-4.6", ""),
	("qwen/qwen3.6-plus", ""),
	("anthropic/claude-sonnet-4.5", ""),
	("anthropic/claude-haiku-4.5", ""),
	("openai/gpt-5.4", ""),
	("openai/gpt-5.4-mini", ""),
	("xiaomi/mimo-v2-pro", ""),
	("openai/gpt-5.3-codex", ""),
	("google/gemini-3-pro-image-preview", ""),
	("google/gemini-3-flash-preview", ""),
	("google/gemini-3.1-pro-preview", ""),
	("google/gemini-3.1-flash-lite-preview", ""),
	("qwen/qwen3.5-plus-02-15", ""),
	("qwen/qwen3.5-35b-a3b", ""),
	("stepfun/step-3.5-flash", ""),
	("minimax/minimax-m2.7", ""),
	("minimax/minimax-m2.5", ""),
	("z-ai/glm-5.1", ""),
	("z-ai/glm-5-turbo", ""),
	("moonshotai/kimi-k2.5", ""),
	("x-ai/grok-4.20", ""),
	("nvidia/nemotron-3-super-120b-a12b", ""),
	("nvidia/nemotron-3-super-120b-a12b:free", "free"),
	("arcee-ai/trinity-large-preview:free", "free"),
	("arcee-ai/trinity-large-thinking", ""),
	("openai/gpt-5.4-pro", ""),
	("openai/gpt-5.4-nano", ""),
	]

	_openrouter_catalog_cache: list[tuple[str, str]] \| None = None


	def _codex_curated_models() -> list[str]:
	"""Derive the openai-codex curated list from codex_models.py.

	Single source of truth: DEFAULT_CODEX_MODELS + forward-compat synthesis.
	This keeps the gateway /model picker in sync with the CLI `hermes model`
	flow without maintaining a separate static list.
	"""
	from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, _add_forward_compat_models
	return _add_forward_compat_models(list(DEFAULT_CODEX_MODELS))


	_PROVIDER_MODELS: dict[str, list[str]] = {
	"nous": [
	"xiaomi/mimo-v2-pro",
	"anthropic/claude-opus-4.6",
	"anthropic/claude-sonnet-4.6",
	"anthropic/claude-sonnet-4.5",
	"anthropic/claude-haiku-4.5",
	"openai/gpt-5.4",
	"openai/gpt-5.4-mini",
	"openai/gpt-5.3-codex",
	"google/gemini-3-pro-preview",
	"google/gemini-3-flash-preview",
	"google/gemini-3.1-pro-preview",
	"google/gemini-3.1-flash-lite-preview",
	"qwen/qwen3.5-plus-02-15",
	"qwen/qwen3.5-35b-a3b",
	"stepfun/step-3.5-flash",
	"minimax/minimax-m2.7",
	"minimax/minimax-m2.5",
	"z-ai/glm-5.1",
	"z-ai/glm-5-turbo",
	"moonshotai/kimi-k2.5",
	"x-ai/grok-4.20-beta",
	"nvidia/nemotron-3-super-120b-a12b",
	"nvidia/nemotron-3-super-120b-a12b:free",
	"arcee-ai/trinity-large-preview:free",
	"arcee-ai/trinity-large-thinking",
	"openai/gpt-5.4-pro",
	"openai/gpt-5.4-nano",
	],
	"openai-codex": _codex_curated_models(),
	"copilot-acp": [
	"copilot-acp",
	],
	"copilot": [
	"gpt-5.4",
	"gpt-5.4-mini",
	"gpt-5-mini",
	"gpt-5.3-codex",
	"gpt-5.2-codex",
	"gpt-4.1",
	"gpt-4o",
	"gpt-4o-mini",
	"claude-opus-4.6",
	"claude-sonnet-4.6",
	"claude-sonnet-4.5",
	"claude-haiku-4.5",
	"gemini-2.5-pro",
	"grok-code-fast-1",
	],
	"gemini": [
	"gemini-3.1-pro-preview",
	"gemini-3-flash-preview",
	"gemini-3.1-flash-lite-preview",
	"gemini-2.5-pro",
	"gemini-2.5-flash",
	"gemini-2.5-flash-lite",
	# Gemma open models (also served via AI Studio)
	"gemma-4-31b-it",
	"gemma-4-26b-it",
	],
	"zai": [
	"glm-5.1",
	"glm-5",
	"glm-5-turbo",
	"glm-4.7",
	"glm-4.5",
	"glm-4.5-flash",
	],
	"xai": [
	"grok-4.20-0309-reasoning",
	"grok-4.20-0309-non-reasoning",
	"grok-4.20-multi-agent-0309",
	"grok-4-1-fast-reasoning",
	"grok-4-1-fast-non-reasoning",
	"grok-4-fast-reasoning",
	"grok-4-fast-non-reasoning",
	"grok-4-0709",
	"grok-code-fast-1",
	"grok-3",
	"grok-3-mini",
	],
	"kimi-coding": [
	"kimi-for-coding",
	"kimi-k2.5",
	"kimi-k2-thinking",
	"kimi-k2-thinking-turbo",
	"kimi-k2-turbo-preview",
	"kimi-k2-0905-preview",
	],
	"kimi-coding-cn": [
	"kimi-k2.5",
	"kimi-k2-thinking",
	"kimi-k2-turbo-preview",
	"kimi-k2-0905-preview",
	],
	"moonshot": [
	"kimi-k2.5",
	"kimi-k2-thinking",
	"kimi-k2-turbo-preview",
	"kimi-k2-0905-preview",
	],
	"minimax": [
	"MiniMax-M2.7",
	"MiniMax-M2.5",
	"MiniMax-M2.1",
	"MiniMax-M2",
	],
	"minimax-cn": [
	"MiniMax-M2.7",
	"MiniMax-M2.5",
	"MiniMax-M2.1",
	"MiniMax-M2",
	],
	"anthropic": [
	"claude-opus-4-6",
	"claude-sonnet-4-6",
	"claude-opus-4-5-20251101",
	"claude-sonnet-4-5-20250929",
	"claude-opus-4-20250514",
	"claude-sonnet-4-20250514",
	"claude-haiku-4-5-20251001",
	],
	"deepseek": [
	"deepseek-chat",
	"deepseek-reasoner",
	],
	"xiaomi": [
	"mimo-v2-pro",
	"mimo-v2-omni",
	"mimo-v2-flash",
	],
	"opencode-zen": [
	"gpt-5.4-pro",
	"gpt-5.4",
	"gpt-5.3-codex",
	"gpt-5.3-codex-spark",
	"gpt-5.2",
	"gpt-5.2-codex",
	"gpt-5.1",
	"gpt-5.1-codex",
	"gpt-5.1-codex-max",
	"gpt-5.1-codex-mini",
	"gpt-5",
	"gpt-5-codex",
	"gpt-5-nano",
	"claude-opus-4-6",
	"claude-opus-4-5",
	"claude-opus-4-1",
	"claude-sonnet-4-6",
	"claude-sonnet-4-5",
	"claude-sonnet-4",
	"claude-haiku-4-5",
	"claude-3-5-haiku",
	"gemini-3.1-pro",
	"gemini-3-pro",
	"gemini-3-flash",
	"minimax-m2.7",
	"minimax-m2.5",
	"minimax-m2.5-free",
	"minimax-m2.1",
	"glm-5",
	"glm-4.7",
	"glm-4.6",
	"kimi-k2.5",
	"kimi-k2-thinking",
	"kimi-k2",
	"qwen3-coder",
	"big-pickle",
	],
	"opencode-go": [
	"glm-5",
	"kimi-k2.5",
	"mimo-v2-pro",
	"mimo-v2-omni",
	"minimax-m2.7",
	"minimax-m2.5",
	],
	"ai-gateway": [
	"anthropic/claude-opus-4.6",
	"anthropic/claude-sonnet-4.6",
	"anthropic/claude-sonnet-4.5",
	"anthropic/claude-haiku-4.5",
	"openai/gpt-5",
	"openai/gpt-4.1",
	"openai/gpt-4.1-mini",
	"google/gemini-3-pro-preview",
	"google/gemini-3-flash",
	"google/gemini-2.5-pro",
	"google/gemini-2.5-flash",
	"deepseek/deepseek-v3.2",
	],
	"kilocode": [
	"anthropic/claude-opus-4.6",
	"anthropic/claude-sonnet-4.6",
	"openai/gpt-5.4",
	"google/gemini-3-pro-preview",
	"google/gemini-3-flash-preview",
	],
	# Alibaba DashScope Coding platform (coding-intl) — default endpoint.
	# Supports Qwen models + third-party providers (GLM, Kimi, MiniMax).
	# Users with classic DashScope keys should override DASHSCOPE_BASE_URL
	# to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
	# or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
	"alibaba": [
	"qwen3.5-plus",
	"qwen3-coder-plus",
	"qwen3-coder-next",
	# Third-party models available on coding-intl
	"glm-5",
	"glm-4.7",
	"kimi-k2.5",
	"MiniMax-M2.5",
	],
	# Curated HF model list — only agentic models that map to OpenRouter defaults.
	"huggingface": [
	"Qwen/Qwen3.5-397B-A17B",
	"Qwen/Qwen3.5-35B-A3B",
	"deepseek-ai/DeepSeek-V3.2",
	"moonshotai/Kimi-K2.5",
	"MiniMaxAI/MiniMax-M2.5",
	"zai-org/GLM-5",
	"XiaomiMiMo/MiMo-V2-Flash",
	"moonshotai/Kimi-K2-Thinking",
	],
	}

	# ---------------------------------------------------------------------------
	# Nous Portal free-model filtering
	# ---------------------------------------------------------------------------
	# Models that are ALLOWED to appear when priced as free on Nous Portal.
	# Any other free model is hidden — prevents promotional/temporary free models
	# from cluttering the selection when users are paying subscribers.
	# Models in this list are ALSO filtered out if they are NOT free (i.e. they
	# should only appear in the menu when they are genuinely free).
	_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
	"xiaomi/mimo-v2-pro",
	"xiaomi/mimo-v2-omni",
	})


	def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
	"""Return True if model_id has zero-cost prompt AND completion pricing."""
	p = pricing.get(model_id)
	if not p:
	return False
	try:
	return float(p.get("prompt", "1")) == 0 and float(p.get("completion", "1")) == 0
	except (TypeError, ValueError):
	return False


	def filter_nous_free_models(
	model_ids: list[str],
	pricing: dict[str, dict[str, str]],
	) -> list[str]:
	"""Filter the Nous Portal model list according to free-model policy.

	Rules:
	• Paid models that are NOT in the allowlist → keep (normal case).
	• Free models that are NOT in the allowlist → drop.
	• Allowlist models that ARE free → keep.
	• Allowlist models that are NOT free → drop.
	"""
	if not pricing:
	return model_ids # no pricing data — can't filter, show everything

	result: list[str] = []
	for mid in model_ids:
	free = _is_model_free(mid, pricing)
	if mid in _NOUS_ALLOWED_FREE_MODELS:
	# Allowlist model: only show when it's actually free
	if free:
	result.append(mid)
	else:
	# Regular model: keep only when it's NOT free
	if not free:
	result.append(mid)
	return result


	# ---------------------------------------------------------------------------
	# Nous Portal account tier detection
	# ---------------------------------------------------------------------------

	def fetch_nous_account_tier(access_token: str, portal_base_url: str = "") -> dict[str, Any]:
	"""Fetch the user's Nous Portal account/subscription info.

	Calls ``<portal>/api/oauth/account`` with the OAuth access token.

	Returns the parsed JSON dict on success, e.g.::

	{
	"subscription": {
	"plan": "Plus",
	"tier": 2,
	"monthly_charge": 20,
	"credits_remaining": 1686.60,
	...
	},
	...
	}

	Returns an empty dict on any failure (network, auth, parse).
	"""
	base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
	url = f"{base}/api/oauth/account"
	headers = {
	"Authorization": f"Bearer {access_token}",
	"Accept": "application/json",
	}
	try:
	req = urllib.request.Request(url, headers=headers)
	with urllib.request.urlopen(req, timeout=8) as resp:
	return json.loads(resp.read().decode())
	except Exception:
	return {}


	def is_nous_free_tier(account_info: dict[str, Any]) -> bool:
	"""Return True if the account info indicates a free (unpaid) tier.

	Checks ``subscription.monthly_charge == 0``. Returns False when
	the field is missing or unparseable (assumes paid — don't block users).
	"""
	sub = account_info.get("subscription")
	if not isinstance(sub, dict):
	return False
	charge = sub.get("monthly_charge")
	if charge is None:
	return False
	try:
	return float(charge) == 0
	except (TypeError, ValueError):
	return False


	def partition_nous_models_by_tier(
	model_ids: list[str],
	pricing: dict[str, dict[str, str]],
	free_tier: bool,
	) -> tuple[list[str], list[str]]:
	"""Split Nous models into (selectable, unavailable) based on user tier.

	For paid-tier users: all models are selectable, none unavailable
	(free-model filtering is handled separately by ``filter_nous_free_models``).

	For free-tier users: only free models are selectable; paid models
	are returned as unavailable (shown grayed out in the menu).
	"""
	if not free_tier:
	return (model_ids, [])

	if not pricing:
	return (model_ids, []) # can't determine, show everything

	selectable: list[str] = []
	unavailable: list[str] = []
	for mid in model_ids:
	if _is_model_free(mid, pricing):
	selectable.append(mid)
	else:
	unavailable.append(mid)
	return (selectable, unavailable)


	# ---------------------------------------------------------------------------
	# TTL cache for free-tier detection — avoids repeated API calls within a
	# session while still picking up upgrades quickly.
	# ---------------------------------------------------------------------------
	_FREE_TIER_CACHE_TTL: int = 180 # seconds (3 minutes)
	_free_tier_cache: tuple[bool, float] \| None = None # (result, timestamp)


	def check_nous_free_tier() -> bool:
	"""Check if the current Nous Portal user is on a free (unpaid) tier.

	Results are cached for ``_FREE_TIER_CACHE_TTL`` seconds to avoid
	hitting the Portal API on every call. The cache is short-lived so
	that an account upgrade is reflected within a few minutes.

	Returns False (assume paid) on any error — never blocks paying users.
	"""
	global _free_tier_cache
	import time

	now = time.monotonic()
	if _free_tier_cache is not None:
	cached_result, cached_at = _free_tier_cache
	if now - cached_at < _FREE_TIER_CACHE_TTL:
	return cached_result

	try:
	from hermes_cli.auth import get_provider_auth_state, resolve_nous_runtime_credentials

	# Ensure we have a fresh token (triggers refresh if needed)
	resolve_nous_runtime_credentials(min_key_ttl_seconds=60)

	state = get_provider_auth_state("nous")
	if not state:
	_free_tier_cache = (False, now)
	return False
	access_token = state.get("access_token", "")
	portal_url = state.get("portal_base_url", "")
	if not access_token:
	_free_tier_cache = (False, now)
	return False

	account_info = fetch_nous_account_tier(access_token, portal_url)
	result = is_nous_free_tier(account_info)
	_free_tier_cache = (result, now)
	return result
	except Exception:
	_free_tier_cache = (False, now)
	return False # default to paid on error — don't block users


	_PROVIDER_LABELS = {
	"openrouter": "OpenRouter",
	"openai-codex": "OpenAI Codex",
	"copilot-acp": "GitHub Copilot ACP",
	"nous": "Nous Portal",
	"copilot": "GitHub Copilot",
	"gemini": "Google AI Studio",
	"zai": "Z.AI / GLM",
	"kimi-coding": "Kimi / Moonshot",
	"kimi-coding-cn": "Kimi / Moonshot (China)",
	"minimax": "MiniMax",
	"minimax-cn": "MiniMax (China)",
	"anthropic": "Anthropic",
	"deepseek": "DeepSeek",
	"opencode-zen": "OpenCode Zen",
	"opencode-go": "OpenCode Go",
	"ai-gateway": "AI Gateway",
	"kilocode": "Kilo Code",
	"alibaba": "Alibaba Cloud (DashScope)",
	"qwen-oauth": "Qwen OAuth (Portal)",
	"huggingface": "Hugging Face",
	"xiaomi": "Xiaomi MiMo",
	"custom": "Custom endpoint",
	}

	_PROVIDER_ALIASES = {
	"glm": "zai",
	"z-ai": "zai",
	"z.ai": "zai",
	"zhipu": "zai",
	"github": "copilot",
	"github-copilot": "copilot",
	"github-models": "copilot",
	"github-model": "copilot",
	"github-copilot-acp": "copilot-acp",
	"copilot-acp-agent": "copilot-acp",
	"google": "gemini",
	"google-gemini": "gemini",
	"google-ai-studio": "gemini",
	"kimi": "kimi-coding",
	"moonshot": "kimi-coding",
	"kimi-cn": "kimi-coding-cn",
	"moonshot-cn": "kimi-coding-cn",
	"minimax-china": "minimax-cn",
	"minimax_cn": "minimax-cn",
	"claude": "anthropic",
	"claude-code": "anthropic",
	"deep-seek": "deepseek",
	"opencode": "opencode-zen",
	"zen": "opencode-zen",
	"go": "opencode-go",
	"opencode-go-sub": "opencode-go",
	"aigateway": "ai-gateway",
	"vercel": "ai-gateway",
	"vercel-ai-gateway": "ai-gateway",
	"kilo": "kilocode",
	"kilo-code": "kilocode",
	"kilo-gateway": "kilocode",
	"dashscope": "alibaba",
	"aliyun": "alibaba",
	"qwen": "alibaba",
	"alibaba-cloud": "alibaba",
	"qwen-portal": "qwen-oauth",
	"hf": "huggingface",
	"hugging-face": "huggingface",
	"huggingface-hub": "huggingface",
	"mimo": "xiaomi",
	"xiaomi-mimo": "xiaomi",
	}


	def get_default_model_for_provider(provider: str) -> str:
	"""Return the default model for a provider, or empty string if unknown.

	Uses the first entry in _PROVIDER_MODELS as the default. This is the
	model a user would be offered first in the ``hermes model`` picker.

	Used as a fallback when the user has configured a provider but never
	selected a model (e.g. ``hermes auth add openai-codex`` without
	``hermes model``).
	"""
	models = _PROVIDER_MODELS.get(provider, [])
	return models[0] if models else ""


	def _openrouter_model_is_free(pricing: Any) -> bool:
	"""Return True when both prompt and completion pricing are zero."""
	if not isinstance(pricing, dict):
	return False
	try:
	return float(pricing.get("prompt", "0")) == 0 and float(pricing.get("completion", "0")) == 0
	except (TypeError, ValueError):
	return False


	def fetch_openrouter_models(
	timeout: float = 8.0,
	*,
	force_refresh: bool = False,
	) -> list[tuple[str, str]]:
	"""Return the curated OpenRouter picker list, refreshed from the live catalog when possible."""
	global _openrouter_catalog_cache

	if _openrouter_catalog_cache is not None and not force_refresh:
	return list(_openrouter_catalog_cache)

	fallback = list(OPENROUTER_MODELS)
	preferred_ids = [mid for mid, _ in fallback]

	try:
	req = urllib.request.Request(
	"https://openrouter.ai/api/v1/models",
	headers={"Accept": "application/json"},
	)
	with urllib.request.urlopen(req, timeout=timeout) as resp:
	payload = json.loads(resp.read().decode())
	except Exception:
	return list(_openrouter_catalog_cache or fallback)

	live_items = payload.get("data", [])
	if not isinstance(live_items, list):
	return list(_openrouter_catalog_cache or fallback)

	live_by_id: dict[str, dict[str, Any]] = {}
	for item in live_items:
	if not isinstance(item, dict):
	continue
	mid = str(item.get("id") or "").strip()
	if not mid:
	continue
	live_by_id[mid] = item

	curated: list[tuple[str, str]] = []
	for preferred_id in preferred_ids:
	live_item = live_by_id.get(preferred_id)
	if live_item is None:
	continue
	desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
	curated.append((preferred_id, desc))

	if not curated:
	return list(_openrouter_catalog_cache or fallback)

	first_id, _ = curated[0]
	curated[0] = (first_id, "recommended")
	_openrouter_catalog_cache = curated
	return list(curated)


	def model_ids(*, force_refresh: bool = False) -> list[str]:
	"""Return just the OpenRouter model-id strings."""
	return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]


	def menu_labels(*, force_refresh: bool = False) -> list[str]:
	"""Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
	labels = []
	for mid, desc in fetch_openrouter_models(force_refresh=force_refresh):
	labels.append(f"{mid} ({desc})" if desc else mid)
	return labels



	# ---------------------------------------------------------------------------
	# Pricing helpers — fetch live pricing from OpenRouter-compatible /v1/models
	# ---------------------------------------------------------------------------

	# Cache: maps model_id → {"prompt": str, "completion": str} per endpoint
	_pricing_cache: dict[str, dict[str, dict[str, str]]] = {}


	def _format_price_per_mtok(per_token_str: str) -> str:
	"""Convert a per-token price string to a human-friendly $/Mtok string.

	Always uses 2 decimal places so that prices align vertically when
	right-justified in a column (the decimal point stays in the same position).

	Examples:
	"0.000003" → "$3.00" (per million tokens)
	"0.00003" → "$30.00"
	"0.00000015" → "$0.15"
	"0.0000001" → "$0.10"
	"0.00018" → "$180.00"
	"0" → "free"
	"""
	try:
	val = float(per_token_str)
	except (TypeError, ValueError):
	return "?"
	if val == 0:
	return "free"
	per_m = val * 1_000_000
	return f"${per_m:.2f}"


	def format_model_pricing_table(
	models: list[tuple[str, str]],
	pricing_map: dict[str, dict[str, str]],
	current_model: str = "",
	indent: str = " ",
	) -> list[str]:
	"""Build a column-aligned model+pricing table for terminal display.

	Returns a list of pre-formatted lines ready to print.
	models is ``[(model_id, description), ...]``.
	"""
	if not models:
	return []

	# Build rows: (model_id, input_price, output_price, cache_price, is_current)
	rows: list[tuple[str, str, str, str, bool]] = []
	has_cache = False
	for mid, _desc in models:
	is_cur = mid == current_model
	p = pricing_map.get(mid)
	if p:
	inp = _format_price_per_mtok(p.get("prompt", ""))
	out = _format_price_per_mtok(p.get("completion", ""))
	cache_read = p.get("input_cache_read", "")
	cache = _format_price_per_mtok(cache_read) if cache_read else ""
	if cache:
	has_cache = True
	else:
	inp, out, cache = "", "", ""
	rows.append((mid, inp, out, cache, is_cur))

	name_col = max(len(r[0]) for r in rows) + 2
	# Compute price column widths from the actual data so decimals align
	price_col = max(
	max((len(r[1]) for r in rows if r[1]), default=4),
	max((len(r[2]) for r in rows if r[2]), default=4),
	3, # minimum: "In" / "Out" header
	)
	cache_col = max(
	max((len(r[3]) for r in rows if r[3]), default=4),
	5, # minimum: "Cache" header
	) if has_cache else 0
	lines: list[str] = []

	# Header
	if has_cache:
	lines.append(f"{indent}{'Model':<{name_col}} {'In':>{price_col}} {'Out':>{price_col}} {'Cache':>{cache_col}} /Mtok")
	lines.append(f"{indent}{'-' * name_col} {'-' * price_col} {'-' * price_col} {'-' * cache_col}")
	else:
	lines.append(f"{indent}{'Model':<{name_col}} {'In':>{price_col}} {'Out':>{price_col}} /Mtok")
	lines.append(f"{indent}{'-' * name_col} {'-' * price_col} {'-' * price_col}")

	for mid, inp, out, cache, is_cur in rows:
	marker = " ← current" if is_cur else ""
	if has_cache:
	lines.append(f"{indent}{mid:<{name_col}} {inp:>{price_col}} {out:>{price_col}} {cache:>{cache_col}}{marker}")
	else:
	lines.append(f"{indent}{mid:<{name_col}} {inp:>{price_col}} {out:>{price_col}}{marker}")

	return lines


	def fetch_models_with_pricing(
	api_key: str \| None = None,
	base_url: str = "https://openrouter.ai/api",
	timeout: float = 8.0,
	*,
	force_refresh: bool = False,
	) -> dict[str, dict[str, str]]:
	"""Fetch ``/v1/models`` and return ``{model_id: {prompt, completion}}`` pricing.

	Results are cached per base_url so repeated calls are free.
	Works with any OpenRouter-compatible endpoint (OpenRouter, Nous Portal).
	"""
	cache_key = (base_url or "").rstrip("/")
	if not force_refresh and cache_key in _pricing_cache:
	return _pricing_cache[cache_key]

	url = cache_key.rstrip("/") + "/v1/models"
	headers: dict[str, str] = {"Accept": "application/json"}
	if api_key:
	headers["Authorization"] = f"Bearer {api_key}"

	try:
	req = urllib.request.Request(url, headers=headers)
	with urllib.request.urlopen(req, timeout=timeout) as resp:
	payload = json.loads(resp.read().decode())
	except Exception:
	_pricing_cache[cache_key] = {}
	return {}

	result: dict[str, dict[str, str]] = {}
	for item in payload.get("data", []):
	mid = item.get("id")
	pricing = item.get("pricing")
	if mid and isinstance(pricing, dict):
	entry: dict[str, str] = {
	"prompt": str(pricing.get("prompt", "")),
	"completion": str(pricing.get("completion", "")),
	}
	if pricing.get("input_cache_read"):
	entry["input_cache_read"] = str(pricing["input_cache_read"])
	if pricing.get("input_cache_write"):
	entry["input_cache_write"] = str(pricing["input_cache_write"])
	result[mid] = entry

	_pricing_cache[cache_key] = result
	return result


	def _resolve_openrouter_api_key() -> str:
	"""Best-effort OpenRouter API key for pricing fetch."""
	return os.getenv("OPENROUTER_API_KEY", "").strip()


	def _resolve_nous_pricing_credentials() -> tuple[str, str]:
	"""Return ``(api_key, base_url)`` for Nous Portal pricing, or empty strings."""
	try:
	from hermes_cli.auth import resolve_nous_runtime_credentials
	creds = resolve_nous_runtime_credentials()
	if creds:
	return (creds.get("api_key", ""), creds.get("base_url", ""))
	except Exception:
	pass
	return ("", "")


	def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
	"""Return live pricing for providers that support it (openrouter, nous)."""
	normalized = normalize_provider(provider)
	if normalized == "openrouter":
	return fetch_models_with_pricing(
	api_key=_resolve_openrouter_api_key(),
	base_url="https://openrouter.ai/api",
	force_refresh=force_refresh,
	)
	if normalized == "nous":
	api_key, base_url = _resolve_nous_pricing_credentials()
	if base_url:
	# Nous base_url typically looks like https://inference-api.nousresearch.com/v1
	# We need the part before /v1 for our fetch function
	stripped = base_url.rstrip("/")
	if stripped.endswith("/v1"):
	stripped = stripped[:-3]
	return fetch_models_with_pricing(
	api_key=api_key,
	base_url=stripped,
	force_refresh=force_refresh,
	)
	return {}


	# All provider IDs and aliases that are valid for the provider:model syntax.
	_KNOWN_PROVIDER_NAMES: set[str] = (
	set(_PROVIDER_LABELS.keys())
	\| set(_PROVIDER_ALIASES.keys())
	\| {"openrouter", "custom"}
	)


	def list_available_providers() -> list[dict[str, str]]:
	"""Return info about all providers the user could use with ``provider:model``.

	Each dict has ``id``, ``label``, and ``aliases``.
	Checks which providers have valid credentials configured.
	"""
	# Canonical providers in display order
	_PROVIDER_ORDER = [
	"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
	"gemini", "huggingface",
	"zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
	"qwen-oauth", "xiaomi",
	"opencode-zen", "opencode-go",
	"ai-gateway", "deepseek", "custom",
	]
	# Build reverse alias map
	aliases_for: dict[str, list[str]] = {}
	for alias, canonical in _PROVIDER_ALIASES.items():
	aliases_for.setdefault(canonical, []).append(alias)

	result = []
	for pid in _PROVIDER_ORDER:
	label = _PROVIDER_LABELS.get(pid, pid)
	alias_list = aliases_for.get(pid, [])
	# Check if this provider has credentials available
	has_creds = False
	try:
	from hermes_cli.auth import get_auth_status, has_usable_secret
	if pid == "custom":
	custom_base_url = _get_custom_base_url() or ""
	has_creds = bool(custom_base_url.strip())
	elif pid == "openrouter":
	has_creds = has_usable_secret(os.getenv("OPENROUTER_API_KEY", ""))
	else:
	status = get_auth_status(pid)
	has_creds = bool(status.get("logged_in") or status.get("configured"))
	except Exception:
	pass
	result.append({
	"id": pid,
	"label": label,
	"aliases": alias_list,
	"authenticated": has_creds,
	})
	return result


	def parse_model_input(raw: str, current_provider: str) -> tuple[str, str]:
	"""Parse ``/model`` input into ``(provider, model)``.

	Supports ``provider:model`` syntax to switch providers at runtime::

	openrouter:anthropic/claude-sonnet-4.5 → ("openrouter", "anthropic/claude-sonnet-4.5")
	nous:hermes-3 → ("nous", "hermes-3")
	anthropic/claude-sonnet-4.5 → (current_provider, "anthropic/claude-sonnet-4.5")
	gpt-5.4 → (current_provider, "gpt-5.4")

	The colon is only treated as a provider delimiter if the left side is a
	recognized provider name or alias. This avoids misinterpreting model names
	that happen to contain colons (e.g. ``anthropic/claude-3.5-sonnet:beta``).

	Returns ``(provider, model)`` where provider is either the explicit
	provider from the input or current_provider if none was specified.
	"""
	stripped = raw.strip()
	colon = stripped.find(":")
	if colon > 0:
	provider_part = stripped[:colon].strip().lower()
	model_part = stripped[colon + 1:].strip()
	if provider_part and model_part and provider_part in _KNOWN_PROVIDER_NAMES:
	# Support custom:name:model triple syntax for named custom
	# providers. ``custom:local:qwen`` → ("custom:local", "qwen").
	# Single colon ``custom:qwen`` → ("custom", "qwen") as before.
	if provider_part == "custom" and ":" in model_part:
	second_colon = model_part.find(":")
	custom_name = model_part[:second_colon].strip()
	actual_model = model_part[second_colon + 1:].strip()
	if custom_name and actual_model:
	return (f"custom:{custom_name}", actual_model)
	return (normalize_provider(provider_part), model_part)
	return (current_provider, stripped)


	def _get_custom_base_url() -> str:
	"""Get the custom endpoint base_url from config.yaml."""
	try:
	from hermes_cli.config import load_config
	config = load_config()
	model_cfg = config.get("model", {})
	if isinstance(model_cfg, dict):
	return str(model_cfg.get("base_url", "")).strip()
	except Exception:
	pass
	return ""


	def curated_models_for_provider(
	provider: Optional[str],
	*,
	force_refresh: bool = False,
	) -> list[tuple[str, str]]:
	"""Return ``(model_id, description)`` tuples for a provider's model list.

	Tries to fetch the live model list from the provider's API first,
	falling back to the static ``_PROVIDER_MODELS`` catalog if the API
	is unreachable.
	"""
	normalized = normalize_provider(provider)
	if normalized == "openrouter":
	return fetch_openrouter_models(force_refresh=force_refresh)

	# Try live API first (Codex, Nous, etc. all support /models)
	live = provider_model_ids(normalized)
	if live:
	return [(m, "") for m in live]

	# Fallback to static catalog
	models = _PROVIDER_MODELS.get(normalized, [])
	return [(m, "") for m in models]


	def detect_provider_for_model(
	model_name: str,
	current_provider: str,
	) -> Optional[tuple[str, str]]:
	"""Auto-detect the best provider for a model name.

	Returns ``(provider_id, model_name)`` — the model name may be remapped
	(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
	Returns ``None`` when no confident match is found.

	Priority:
	0. Bare provider name → switch to that provider's default model
	1. Direct provider with credentials (highest)
	2. Direct provider without credentials → remap to OpenRouter slug
	3. OpenRouter catalog match
	"""
	name = (model_name or "").strip()
	if not name:
	return None

	name_lower = name.lower()

	# --- Step 0: bare provider name typed as model ---
	# If someone types `/model nous` or `/model anthropic`, treat it as a
	# provider switch and pick the first model from that provider's catalog.
	# Skip "custom" and "openrouter" — custom has no model catalog, and
	# openrouter requires an explicit model name to be useful.
	resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower)
	if resolved_provider not in {"custom", "openrouter"}:
	default_models = _PROVIDER_MODELS.get(resolved_provider, [])
	if (
	resolved_provider in _PROVIDER_LABELS
	and default_models
	and resolved_provider != normalize_provider(current_provider)
	):
	return (resolved_provider, default_models[0])

	# Aggregators list other providers' models — never auto-switch TO them
	_AGGREGATORS = {"nous", "openrouter"}

	# If the model belongs to the current provider's catalog, don't suggest switching
	current_models = _PROVIDER_MODELS.get(current_provider, [])
	if any(name_lower == m.lower() for m in current_models):
	return None

	# --- Step 1: check static provider catalogs for a direct match ---
	direct_match: Optional[str] = None
	for pid, models in _PROVIDER_MODELS.items():
	if pid == current_provider or pid in _AGGREGATORS:
	continue
	if any(name_lower == m.lower() for m in models):
	direct_match = pid
	break

	if direct_match:
	# Check if we have credentials for this provider
	has_creds = False
	try:
	from hermes_cli.auth import PROVIDER_REGISTRY
	pconfig = PROVIDER_REGISTRY.get(direct_match)
	if pconfig:
	import os
	for env_var in pconfig.api_key_env_vars:
	if os.getenv(env_var, "").strip():
	has_creds = True
	break
	except Exception:
	pass

	if has_creds:
	return (direct_match, name)

	# No direct creds — try to find this model on OpenRouter instead
	or_slug = _find_openrouter_slug(name)
	if or_slug:
	return ("openrouter", or_slug)
	# Still return the direct provider — credential resolution will
	# give a clear error rather than silently using the wrong provider
	return (direct_match, name)

	# --- Step 2: check OpenRouter catalog ---
	# First try exact match (handles provider/model format)
	or_slug = _find_openrouter_slug(name)
	if or_slug:
	if current_provider != "openrouter":
	return ("openrouter", or_slug)
	# Already on openrouter, just return the resolved slug
	if or_slug != name:
	return ("openrouter", or_slug)
	return None # already on openrouter with matching name

	return None


	def _find_openrouter_slug(model_name: str) -> Optional[str]:
	"""Find the full OpenRouter model slug for a bare or partial model name.

	Handles:
	- Exact match: ``anthropic/claude-opus-4.6`` → as-is
	- Bare name: ``deepseek-chat`` → ``deepseek/deepseek-chat``
	- Bare name: ``claude-opus-4.6`` → ``anthropic/claude-opus-4.6``
	"""
	name_lower = model_name.strip().lower()
	if not name_lower:
	return None

	# Exact match (already has provider/ prefix)
	for mid in model_ids():
	if name_lower == mid.lower():
	return mid

	# Try matching just the model part (after the /)
	for mid in model_ids():
	if "/" in mid:
	_, model_part = mid.split("/", 1)
	if name_lower == model_part.lower():
	return mid

	return None


	def normalize_provider(provider: Optional[str]) -> str:
	"""Normalize provider aliases to Hermes' canonical provider ids.

	Note: ``"auto"`` passes through unchanged — use
	``hermes_cli.auth.resolve_provider()`` to resolve it to a concrete
	provider based on credentials and environment.
	"""
	normalized = (provider or "openrouter").strip().lower()
	return _PROVIDER_ALIASES.get(normalized, normalized)


	def provider_label(provider: Optional[str]) -> str:
	"""Return a human-friendly label for a provider id or alias."""
	original = (provider or "openrouter").strip()
	normalized = original.lower()
	if normalized == "auto":
	return "Auto"
	normalized = normalize_provider(normalized)
	return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")


	# Models that support OpenAI Priority Processing (service_tier="priority").
	# See https://openai.com/api-priority-processing/ for the canonical list.
	# Only the bare model slug is stored (no vendor prefix).
	_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
	"gpt-5.4",
	"gpt-5.4-mini",
	"gpt-5.2",
	"gpt-5.1",
	"gpt-5",
	"gpt-5-mini",
	"gpt-4.1",
	"gpt-4.1-mini",
	"gpt-4.1-nano",
	"gpt-4o",
	"gpt-4o-mini",
	"o3",
	"o4-mini",
	})

	# Models that support Anthropic Fast Mode (speed="fast").
	# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
	# Currently only Claude Opus 4.6. Both hyphen and dot variants are stored
	# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
	_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
	"claude-opus-4-6",
	"claude-opus-4.6",
	})


	def _strip_vendor_prefix(model_id: str) -> str:
	"""Strip vendor/ prefix from a model ID (e.g. 'anthropic/claude-opus-4-6' -> 'claude-opus-4-6')."""
	raw = str(model_id or "").strip().lower()
	if "/" in raw:
	raw = raw.split("/", 1)[1]
	return raw


	def model_supports_fast_mode(model_id: Optional[str]) -> bool:
	"""Return whether Hermes should expose the /fast toggle for this model."""
	raw = _strip_vendor_prefix(str(model_id or ""))
	if raw in _PRIORITY_PROCESSING_MODELS:
	return True
	# Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
	# and OpenRouter variant tags (:fast, :beta) for matching.
	base = raw.split(":")[0]
	return base in _ANTHROPIC_FAST_MODE_MODELS


	def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
	"""Return True if the model supports Anthropic's fast mode (speed='fast')."""
	raw = _strip_vendor_prefix(str(model_id or ""))
	base = raw.split(":")[0]
	return base in _ANTHROPIC_FAST_MODE_MODELS


	def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] \| None:
	"""Return request_overrides for fast/priority mode, or None if unsupported.

	Returns provider-appropriate overrides:
	- OpenAI models: ``{"service_tier": "priority"}`` (Priority Processing)
	- Anthropic models: ``{"speed": "fast"}`` (Anthropic Fast Mode beta)

	The overrides are injected into the API request kwargs by
	``_build_api_kwargs`` in run_agent.py — each API path handles its own
	keys (service_tier for OpenAI/Codex, speed for Anthropic Messages).
	"""
	if not model_supports_fast_mode(model_id):
	return None
	if _is_anthropic_fast_model(model_id):
	return {"speed": "fast"}
	return {"service_tier": "priority"}


	def _resolve_copilot_catalog_api_key() -> str:
	"""Best-effort GitHub token for fetching the Copilot model catalog."""
	try:
	from hermes_cli.auth import resolve_api_key_provider_credentials

	creds = resolve_api_key_provider_credentials("copilot")
	return str(creds.get("api_key") or "").strip()
	except Exception:
	return ""


	def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) -> list[str]:
	"""Return the best known model catalog for a provider.

	Tries live API endpoints for providers that support them (Codex, Nous),
	falling back to static lists.
	"""
	normalized = normalize_provider(provider)
	if normalized == "openrouter":
	return model_ids(force_refresh=force_refresh)
	if normalized == "openai-codex":
	from hermes_cli.codex_models import get_codex_model_ids

	return get_codex_model_ids()
	if normalized in {"copilot", "copilot-acp"}:
	try:
	live = _fetch_github_models(_resolve_copilot_catalog_api_key())
	if live:
	return live
	except Exception:
	pass
	if normalized == "copilot-acp":
	return list(_PROVIDER_MODELS.get("copilot", []))
	if normalized == "nous":
	# Try live Nous Portal /models endpoint
	try:
	from hermes_cli.auth import fetch_nous_models, resolve_nous_runtime_credentials
	creds = resolve_nous_runtime_credentials()
	if creds:
	live = fetch_nous_models(api_key=creds.get("api_key", ""), inference_base_url=creds.get("base_url", ""))
	if live:
	return live
	except Exception:
	pass
	if normalized == "anthropic":
	live = _fetch_anthropic_models()
	if live:
	return live
	if normalized == "ai-gateway":
	live = _fetch_ai_gateway_models()
	if live:
	return live
	if normalized == "custom":
	base_url = _get_custom_base_url()
	if base_url:
	# Try common API key env vars for custom endpoints
	api_key = (
	os.getenv("CUSTOM_API_KEY", "")
	or os.getenv("OPENAI_API_KEY", "")
	or os.getenv("OPENROUTER_API_KEY", "")
	)
	live = fetch_api_models(api_key, base_url)
	if live:
	return live
	return list(_PROVIDER_MODELS.get(normalized, []))


	def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
	"""Fetch available models from the Anthropic /v1/models endpoint.

	Uses resolve_anthropic_token() to find credentials (env vars or
	Claude Code auto-discovery). Returns sorted model IDs or None.
	"""
	try:
	from agent.anthropic_adapter import resolve_anthropic_token, _is_oauth_token
	except ImportError:
	return None

	token = resolve_anthropic_token()
	if not token:
	return None

	headers: dict[str, str] = {"anthropic-version": "2023-06-01"}
	if _is_oauth_token(token):
	headers["Authorization"] = f"Bearer {token}"
	from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS
	headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
	else:
	headers["x-api-key"] = token

	req = urllib.request.Request(
	"https://api.anthropic.com/v1/models",
	headers=headers,
	)
	try:
	with urllib.request.urlopen(req, timeout=timeout) as resp:
	data = json.loads(resp.read().decode())
	models = [m["id"] for m in data.get("data", []) if m.get("id")]
	# Sort: latest/largest first (opus > sonnet > haiku, higher version first)
	return sorted(models, key=lambda m: (
	"opus" not in m, # opus first
	"sonnet" not in m, # then sonnet
	"haiku" not in m, # then haiku
	m, # alphabetical within tier
	))
	except Exception as e:
	import logging
	logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e)
	return None


	def _payload_items(payload: Any) -> list[dict[str, Any]]:
	if isinstance(payload, list):
	return [item for item in payload if isinstance(item, dict)]
	if isinstance(payload, dict):
	data = payload.get("data", [])
	if isinstance(data, list):
	return [item for item in data if isinstance(item, dict)]
	return []


	def copilot_default_headers() -> dict[str, str]:
	"""Standard headers for Copilot API requests.

	Includes Openai-Intent and x-initiator headers that opencode and the
	Copilot CLI send on every request.
	"""
	try:
	from hermes_cli.copilot_auth import copilot_request_headers
	return copilot_request_headers(is_agent_turn=True)
	except ImportError:
	return {
	"Editor-Version": COPILOT_EDITOR_VERSION,
	"User-Agent": "HermesAgent/1.0",
	"Openai-Intent": "conversation-edits",
	"x-initiator": "agent",
	}


	def _copilot_catalog_item_is_text_model(item: dict[str, Any]) -> bool:
	model_id = str(item.get("id") or "").strip()
	if not model_id:
	return False

	if item.get("model_picker_enabled") is False:
	return False

	capabilities = item.get("capabilities")
	if isinstance(capabilities, dict):
	model_type = str(capabilities.get("type") or "").strip().lower()
	if model_type and model_type != "chat":
	return False

	supported_endpoints = item.get("supported_endpoints")
	if isinstance(supported_endpoints, list):
	normalized_endpoints = {
	str(endpoint).strip()
	for endpoint in supported_endpoints
	if str(endpoint).strip()
	}
	if normalized_endpoints and not normalized_endpoints.intersection(
	{"/chat/completions", "/responses", "/v1/messages"}
	):
	return False

	return True


	def fetch_github_model_catalog(
	api_key: Optional[str] = None, timeout: float = 5.0
	) -> Optional[list[dict[str, Any]]]:
	"""Fetch the live GitHub Copilot model catalog for this account."""
	attempts: list[dict[str, str]] = []
	if api_key:
	attempts.append({
	**copilot_default_headers(),
	"Authorization": f"Bearer {api_key}",
	})
	attempts.append(copilot_default_headers())

	for headers in attempts:
	req = urllib.request.Request(COPILOT_MODELS_URL, headers=headers)
	try:
	with urllib.request.urlopen(req, timeout=timeout) as resp:
	data = json.loads(resp.read().decode())
	items = _payload_items(data)
	models: list[dict[str, Any]] = []
	seen_ids: set[str] = set()
	for item in items:
	if not _copilot_catalog_item_is_text_model(item):
	continue
	model_id = str(item.get("id") or "").strip()
	if not model_id or model_id in seen_ids:
	continue
	seen_ids.add(model_id)
	models.append(item)
	if models:
	return models
	except Exception:
	continue
	return None


	def _is_github_models_base_url(base_url: Optional[str]) -> bool:
	normalized = (base_url or "").strip().rstrip("/").lower()
	return (
	normalized.startswith(COPILOT_BASE_URL)
	or normalized.startswith("https://models.github.ai/inference")
	)


	def _fetch_github_models(api_key: Optional[str] = None, timeout: float = 5.0) -> Optional[list[str]]:
	catalog = fetch_github_model_catalog(api_key=api_key, timeout=timeout)
	if not catalog:
	return None
	return [item.get("id", "") for item in catalog if item.get("id")]


	_COPILOT_MODEL_ALIASES = {
	"openai/gpt-5": "gpt-5-mini",
	"openai/gpt-5-chat": "gpt-5-mini",
	"openai/gpt-5-mini": "gpt-5-mini",
	"openai/gpt-5-nano": "gpt-5-mini",
	"openai/gpt-4.1": "gpt-4.1",
	"openai/gpt-4.1-mini": "gpt-4.1",
	"openai/gpt-4.1-nano": "gpt-4.1",
	"openai/gpt-4o": "gpt-4o",
	"openai/gpt-4o-mini": "gpt-4o-mini",
	"openai/o1": "gpt-5.2",
	"openai/o1-mini": "gpt-5-mini",
	"openai/o1-preview": "gpt-5.2",
	"openai/o3": "gpt-5.3-codex",
	"openai/o3-mini": "gpt-5-mini",
	"openai/o4-mini": "gpt-5-mini",
	"anthropic/claude-opus-4.6": "claude-opus-4.6",
	"anthropic/claude-sonnet-4.6": "claude-sonnet-4.6",
	"anthropic/claude-sonnet-4.5": "claude-sonnet-4.5",
	"anthropic/claude-haiku-4.5": "claude-haiku-4.5",
	}


	def _copilot_catalog_ids(
	catalog: Optional[list[dict[str, Any]]] = None,
	api_key: Optional[str] = None,
	) -> set[str]:
	if catalog is None and api_key:
	catalog = fetch_github_model_catalog(api_key=api_key)
	if not catalog:
	return set()
	return {
	str(item.get("id") or "").strip()
	for item in catalog
	if str(item.get("id") or "").strip()
	}


	def normalize_copilot_model_id(
	model_id: Optional[str],
	*,
	catalog: Optional[list[dict[str, Any]]] = None,
	api_key: Optional[str] = None,
	) -> str:
	raw = str(model_id or "").strip()
	if not raw:
	return ""

	catalog_ids = _copilot_catalog_ids(catalog=catalog, api_key=api_key)
	alias = _COPILOT_MODEL_ALIASES.get(raw)
	if alias:
	return alias

	candidates = [raw]
	if "/" in raw:
	candidates.append(raw.split("/", 1)[1].strip())

	if raw.endswith("-mini"):
	candidates.append(raw[:-5])
	if raw.endswith("-nano"):
	candidates.append(raw[:-5])
	if raw.endswith("-chat"):
	candidates.append(raw[:-5])

	seen: set[str] = set()
	for candidate in candidates:
	if not candidate or candidate in seen:
	continue
	seen.add(candidate)
	if candidate in _COPILOT_MODEL_ALIASES:
	return _COPILOT_MODEL_ALIASES[candidate]
	if candidate in catalog_ids:
	return candidate

	if "/" in raw:
	return raw.split("/", 1)[1].strip()
	return raw


	def _github_reasoning_efforts_for_model_id(model_id: str) -> list[str]:
	raw = (model_id or "").strip().lower()
	if raw.startswith(("openai/o1", "openai/o3", "openai/o4", "o1", "o3", "o4")):
	return list(COPILOT_REASONING_EFFORTS_O_SERIES)
	normalized = normalize_copilot_model_id(model_id).lower()
	if normalized.startswith("gpt-5"):
	return list(COPILOT_REASONING_EFFORTS_GPT5)
	return []


	def _should_use_copilot_responses_api(model_id: str) -> bool:
	"""Decide whether a Copilot model should use the Responses API.

	Replicates opencode's ``shouldUseCopilotResponsesApi`` logic:
	GPT-5+ models use Responses API, except ``gpt-5-mini`` which uses
	Chat Completions. All non-GPT models (Claude, Gemini, etc.) use
	Chat Completions.
	"""
	import re

	match = re.match(r"^gpt-(\d+)", model_id)
	if not match:
	return False
	major = int(match.group(1))
	return major >= 5 and not model_id.startswith("gpt-5-mini")


	def copilot_model_api_mode(
	model_id: Optional[str],
	*,
	catalog: Optional[list[dict[str, Any]]] = None,
	api_key: Optional[str] = None,
	) -> str:
	"""Determine the API mode for a Copilot model.

	Uses the model ID pattern (matching opencode's approach) as the
	primary signal. Falls back to the catalog's ``supported_endpoints``
	only for models not covered by the pattern check.
	"""
	normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
	if not normalized:
	return "chat_completions"

	# Primary: model ID pattern (matches opencode's shouldUseCopilotResponsesApi)
	if _should_use_copilot_responses_api(normalized):
	return "codex_responses"

	# Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.)
	if catalog is None and api_key:
	catalog = fetch_github_model_catalog(api_key=api_key)

	if catalog:
	catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
	if isinstance(catalog_entry, dict):
	supported_endpoints = {
	str(endpoint).strip()
	for endpoint in (catalog_entry.get("supported_endpoints") or [])
	if str(endpoint).strip()
	}
	# For non-GPT-5 models, check if they only support messages API
	if "/v1/messages" in supported_endpoints and "/chat/completions" not in supported_endpoints:
	return "anthropic_messages"

	return "chat_completions"


	def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str:
	"""Normalize OpenCode config IDs to the bare model slug used in API requests."""
	provider = normalize_provider(provider_id)
	current = str(model_id or "").strip()
	if not current or provider not in {"opencode-zen", "opencode-go"}:
	return current

	prefix = f"{provider}/"
	if current.lower().startswith(prefix):
	return current[len(prefix):]
	return current


	def opencode_model_api_mode(provider_id: Optional[str], model_id: Optional[str]) -> str:
	"""Determine the API mode for an OpenCode Zen / Go model.

	OpenCode routes different models behind different API surfaces:

	- GPT-5 / Codex models on Zen use ``/v1/responses``
	- Claude models on Zen use ``/v1/messages``
	- MiniMax models on Go use ``/v1/messages``
	- GLM / Kimi on Go use ``/v1/chat/completions``
	- Other Zen models (Gemini, GLM, Kimi, MiniMax, Qwen, etc.) use
	``/v1/chat/completions``

	This follows the published OpenCode docs for Zen and Go endpoints.
	"""
	provider = normalize_provider(provider_id)
	normalized = normalize_opencode_model_id(provider_id, model_id).lower()
	if not normalized:
	return "chat_completions"

	if provider == "opencode-go":
	if normalized.startswith("minimax-"):
	return "anthropic_messages"
	return "chat_completions"

	if provider == "opencode-zen":
	if normalized.startswith("claude-"):
	return "anthropic_messages"
	if normalized.startswith("gpt-"):
	return "codex_responses"
	return "chat_completions"

	return "chat_completions"


	def github_model_reasoning_efforts(
	model_id: Optional[str],
	*,
	catalog: Optional[list[dict[str, Any]]] = None,
	api_key: Optional[str] = None,
	) -> list[str]:
	"""Return supported reasoning-effort levels for a Copilot-visible model."""
	normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
	if not normalized:
	return []

	catalog_entry = None
	if catalog is not None:
	catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
	elif api_key:
	fetched_catalog = fetch_github_model_catalog(api_key=api_key)
	if fetched_catalog:
	catalog_entry = next((item for item in fetched_catalog if item.get("id") == normalized), None)

	if catalog_entry is not None:
	capabilities = catalog_entry.get("capabilities")
	if isinstance(capabilities, dict):
	supports = capabilities.get("supports")
	if isinstance(supports, dict):
	efforts = supports.get("reasoning_effort")
	if isinstance(efforts, list):
	normalized_efforts = [
	str(effort).strip().lower()
	for effort in efforts
	if str(effort).strip()
	]
	return list(dict.fromkeys(normalized_efforts))
	return []
	legacy_capabilities = {
	str(capability).strip().lower()
	for capability in catalog_entry.get("capabilities", [])
	if str(capability).strip()
	}
	if "reasoning" not in legacy_capabilities:
	return []

	return _github_reasoning_efforts_for_model_id(str(model_id or normalized))


	def probe_api_models(
	api_key: Optional[str],
	base_url: Optional[str],
	timeout: float = 5.0,
	) -> dict[str, Any]:
	"""Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics."""
	normalized = (base_url or "").strip().rstrip("/")
	if not normalized:
	return {
	"models": None,
	"probed_url": None,
	"resolved_base_url": "",
	"suggested_base_url": None,
	"used_fallback": False,
	}

	if _is_github_models_base_url(normalized):
	models = _fetch_github_models(api_key=api_key, timeout=timeout)
	return {
	"models": models,
	"probed_url": COPILOT_MODELS_URL,
	"resolved_base_url": COPILOT_BASE_URL,
	"suggested_base_url": None,
	"used_fallback": False,
	}

	if normalized.endswith("/v1"):
	alternate_base = normalized[:-3].rstrip("/")
	else:
	alternate_base = normalized + "/v1"

	candidates: list[tuple[str, bool]] = [(normalized, False)]
	if alternate_base and alternate_base != normalized:
	candidates.append((alternate_base, True))

	tried: list[str] = []
	headers: dict[str, str] = {}
	if api_key:
	headers["Authorization"] = f"Bearer {api_key}"
	if normalized.startswith(COPILOT_BASE_URL):
	headers.update(copilot_default_headers())

	for candidate_base, is_fallback in candidates:
	url = candidate_base.rstrip("/") + "/models"
	tried.append(url)
	req = urllib.request.Request(url, headers=headers)
	try:
	with urllib.request.urlopen(req, timeout=timeout) as resp:
	data = json.loads(resp.read().decode())
	return {
	"models": [m.get("id", "") for m in data.get("data", [])],
	"probed_url": url,
	"resolved_base_url": candidate_base.rstrip("/"),
	"suggested_base_url": alternate_base if alternate_base != candidate_base else normalized,
	"used_fallback": is_fallback,
	}
	except Exception:
	continue

	return {
	"models": None,
	"probed_url": tried[0] if tried else normalized.rstrip("/") + "/models",
	"resolved_base_url": normalized,
	"suggested_base_url": alternate_base if alternate_base != normalized else None,
	"used_fallback": False,
	}


	def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]:
	"""Fetch available language models with tool-use from AI Gateway."""
	api_key = os.getenv("AI_GATEWAY_API_KEY", "").strip()
	if not api_key:
	return None
	base_url = os.getenv("AI_GATEWAY_BASE_URL", "").strip()
	if not base_url:
	from hermes_constants import AI_GATEWAY_BASE_URL
	base_url = AI_GATEWAY_BASE_URL

	url = base_url.rstrip("/") + "/models"
	headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"}
	req = urllib.request.Request(url, headers=headers)
	try:
	with urllib.request.urlopen(req, timeout=timeout) as resp:
	data = json.loads(resp.read().decode())
	return [
	m["id"]
	for m in data.get("data", [])
	if m.get("id")
	and m.get("type") == "language"
	and "tool-use" in (m.get("tags") or [])
	]
	except Exception:
	return None


	def fetch_api_models(
	api_key: Optional[str],
	base_url: Optional[str],
	timeout: float = 5.0,
	) -> Optional[list[str]]:
	"""Fetch the list of available model IDs from the provider's ``/models`` endpoint.

	Returns a list of model ID strings, or ``None`` if the endpoint could not
	be reached (network error, timeout, auth failure, etc.).
	"""
	return probe_api_models(api_key, base_url, timeout=timeout).get("models")


	def validate_requested_model(
	model_name: str,
	provider: Optional[str],
	*,
	api_key: Optional[str] = None,
	base_url: Optional[str] = None,
	) -> dict[str, Any]:
	"""
	Validate a ``/model`` value for the active provider.

	Performs format checks first, then probes the live API to confirm
	the model actually exists.

	Returns a dict with:
	- accepted: whether the CLI should switch to the requested model now
	- persist: whether it is safe to save to config
	- recognized: whether it matched a known provider catalog
	- message: optional warning / guidance for the user
	"""
	requested = (model_name or "").strip()
	normalized = normalize_provider(provider)
	if normalized == "openrouter" and base_url and "openrouter.ai" not in base_url:
	normalized = "custom"
	requested_for_lookup = requested
	if normalized == "copilot":
	requested_for_lookup = normalize_copilot_model_id(
	requested,
	api_key=api_key,
	) or requested

	if not requested:
	return {
	"accepted": False,
	"persist": False,
	"recognized": False,
	"message": "Model name cannot be empty.",
	}

	if any(ch.isspace() for ch in requested):
	return {
	"accepted": False,
	"persist": False,
	"recognized": False,
	"message": "Model names cannot contain spaces.",
	}

	if normalized == "custom":
	probe = probe_api_models(api_key, base_url)
	api_models = probe.get("models")
	if api_models is not None:
	if requested_for_lookup in set(api_models):
	return {
	"accepted": True,
	"persist": True,
	"recognized": True,
	"message": None,
	}

	suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
	suggestion_text = ""
	if suggestions:
	suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)

	message = (
	f"Note: `{requested}` was not found in this custom endpoint's model listing "
	f"({probe.get('probed_url')}). It may still work if the server supports hidden or aliased models."
	f"{suggestion_text}"
	)
	if probe.get("used_fallback"):
	message += (
	f"\n Endpoint verification succeeded after trying `{probe.get('resolved_base_url')}`. "
	f"Consider saving that as your base URL."
	)

	return {
	"accepted": True,
	"persist": True,
	"recognized": False,
	"message": message,
	}

	message = (
	f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. "
	f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification."
	)
	if probe.get("suggested_base_url"):
	message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`"

	return {
	"accepted": True,
	"persist": True,
	"recognized": False,
	"message": message,
	}

	# OpenAI Codex has its own catalog path; /v1/models probing is not the right validation path.
	if normalized == "openai-codex":
	try:
	codex_models = provider_model_ids("openai-codex")
	except Exception:
	codex_models = []
	if codex_models:
	if requested_for_lookup in set(codex_models):
	return {
	"accepted": True,
	"persist": True,
	"recognized": True,
	"message": None,
	}
	suggestions = get_close_matches(requested_for_lookup, codex_models, n=3, cutoff=0.5)
	suggestion_text = ""
	if suggestions:
	suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
	return {
	"accepted": True,
	"persist": True,
	"recognized": False,
	"message": (
	f"Note: `{requested}` was not found in the OpenAI Codex model listing. "
	f"It may still work if your account has access to it."
	f"{suggestion_text}"
	),
	}

	# Probe the live API to check if the model actually exists
	api_models = fetch_api_models(api_key, base_url)

	if api_models is not None:
	if requested_for_lookup in set(api_models):
	# API confirmed the model exists
	return {
	"accepted": True,
	"persist": True,
	"recognized": True,
	"message": None,
	}
	else:
	# API responded but model is not listed. Accept anyway —
	# the user may have access to models not shown in the public
	# listing (e.g. Z.AI Pro/Max plans can use glm-5 on coding
	# endpoints even though it's not in /models). Warn but allow.
	suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
	suggestion_text = ""
	if suggestions:
	suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)

	return {
	"accepted": True,
	"persist": True,
	"recognized": False,
	"message": (
	f"Note: `{requested}` was not found in this provider's model listing. "
	f"It may still work if your plan supports it."
	f"{suggestion_text}"
	),
	}

	# api_models is None — couldn't reach API. Accept and persist,
	# but warn so typos don't silently break things.
	provider_label = _PROVIDER_LABELS.get(normalized, normalized)
	return {
	"accepted": True,
	"persist": True,
	"recognized": False,
	"message": (
	f"Could not reach the {provider_label} API to validate `{requested}`. "
	f"If the service isn't down, this model may not be valid."
	),
	}