| """Models.dev registry integration for provider-aware context length detection. |
| |
| Fetches model metadata from https://models.dev/api.json — a community-maintained |
| database of 3800+ models across 100+ providers, including per-provider context |
| windows, pricing, and capabilities. |
| |
| Data is cached in memory (1hr TTL) and on disk (~/.hermes/models_dev_cache.json) |
| to avoid cold-start network latency. |
| """ |
|
|
| import json |
| import logging |
| import os |
| import time |
| from pathlib import Path |
| from typing import Any, Dict, Optional |
|
|
| import requests |
|
|
| logger = logging.getLogger(__name__) |
|
|
| MODELS_DEV_URL = "https://models.dev/api.json" |
| _MODELS_DEV_CACHE_TTL = 3600 |
|
|
| |
| _models_dev_cache: Dict[str, Any] = {} |
| _models_dev_cache_time: float = 0 |
|
|
| |
| PROVIDER_TO_MODELS_DEV: Dict[str, str] = { |
| "openrouter": "openrouter", |
| "anthropic": "anthropic", |
| "zai": "zai", |
| "kimi-coding": "kimi-for-coding", |
| "minimax": "minimax", |
| "minimax-cn": "minimax-cn", |
| "deepseek": "deepseek", |
| "alibaba": "alibaba", |
| "copilot": "github-copilot", |
| "ai-gateway": "vercel", |
| "opencode-zen": "opencode", |
| "opencode-go": "opencode-go", |
| "kilocode": "kilo", |
| } |
|
|
|
|
| def _get_cache_path() -> Path: |
| """Return path to disk cache file.""" |
| env_val = os.environ.get("HERMES_HOME", "") |
| hermes_home = Path(env_val) if env_val else Path.home() / ".hermes" |
| return hermes_home / "models_dev_cache.json" |
|
|
|
|
| def _load_disk_cache() -> Dict[str, Any]: |
| """Load models.dev data from disk cache.""" |
| try: |
| cache_path = _get_cache_path() |
| if cache_path.exists(): |
| with open(cache_path, encoding="utf-8") as f: |
| return json.load(f) |
| except Exception as e: |
| logger.debug("Failed to load models.dev disk cache: %s", e) |
| return {} |
|
|
|
|
| def _save_disk_cache(data: Dict[str, Any]) -> None: |
| """Save models.dev data to disk cache.""" |
| try: |
| cache_path = _get_cache_path() |
| cache_path.parent.mkdir(parents=True, exist_ok=True) |
| with open(cache_path, "w", encoding="utf-8") as f: |
| json.dump(data, f, separators=(",", ":")) |
| except Exception as e: |
| logger.debug("Failed to save models.dev disk cache: %s", e) |
|
|
|
|
| def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]: |
| """Fetch models.dev registry. In-memory cache (1hr) + disk fallback. |
| |
| Returns the full registry dict keyed by provider ID, or empty dict on failure. |
| """ |
| global _models_dev_cache, _models_dev_cache_time |
|
|
| |
| if ( |
| not force_refresh |
| and _models_dev_cache |
| and (time.time() - _models_dev_cache_time) < _MODELS_DEV_CACHE_TTL |
| ): |
| return _models_dev_cache |
|
|
| |
| try: |
| response = requests.get(MODELS_DEV_URL, timeout=15) |
| response.raise_for_status() |
| data = response.json() |
| if isinstance(data, dict) and len(data) > 0: |
| _models_dev_cache = data |
| _models_dev_cache_time = time.time() |
| _save_disk_cache(data) |
| logger.debug( |
| "Fetched models.dev registry: %d providers, %d total models", |
| len(data), |
| sum(len(p.get("models", {})) for p in data.values() if isinstance(p, dict)), |
| ) |
| return data |
| except Exception as e: |
| logger.debug("Failed to fetch models.dev: %s", e) |
|
|
| |
| |
| if not _models_dev_cache: |
| _models_dev_cache = _load_disk_cache() |
| if _models_dev_cache: |
| _models_dev_cache_time = time.time() - _MODELS_DEV_CACHE_TTL + 300 |
| logger.debug("Loaded models.dev from disk cache (%d providers)", len(_models_dev_cache)) |
|
|
| return _models_dev_cache |
|
|
|
|
| def lookup_models_dev_context(provider: str, model: str) -> Optional[int]: |
| """Look up context_length for a provider+model combo in models.dev. |
| |
| Returns the context window in tokens, or None if not found. |
| Handles case-insensitive matching and filters out context=0 entries. |
| """ |
| mdev_provider_id = PROVIDER_TO_MODELS_DEV.get(provider) |
| if not mdev_provider_id: |
| return None |
|
|
| data = fetch_models_dev() |
| provider_data = data.get(mdev_provider_id) |
| if not isinstance(provider_data, dict): |
| return None |
|
|
| models = provider_data.get("models", {}) |
| if not isinstance(models, dict): |
| return None |
|
|
| |
| entry = models.get(model) |
| if entry: |
| ctx = _extract_context(entry) |
| if ctx: |
| return ctx |
|
|
| |
| model_lower = model.lower() |
| for mid, mdata in models.items(): |
| if mid.lower() == model_lower: |
| ctx = _extract_context(mdata) |
| if ctx: |
| return ctx |
|
|
| return None |
|
|
|
|
| def _extract_context(entry: Dict[str, Any]) -> Optional[int]: |
| """Extract context_length from a models.dev model entry. |
| |
| Returns None for invalid/zero values (some audio/image models have context=0). |
| """ |
| if not isinstance(entry, dict): |
| return None |
| limit = entry.get("limit") |
| if not isinstance(limit, dict): |
| return None |
| ctx = limit.get("context") |
| if isinstance(ctx, (int, float)) and ctx > 0: |
| return int(ctx) |
| return None |
|
|