Spaces:

lenson78
/

hermes

Paused

App Files Files Community

hermes / agent /models_dev.py

lenson78

initial upload: v2026.3.23 with HF Spaces deployment

9aa5185 verified 5 days ago

raw

history blame contribute delete

5.46 kB

	"""Models.dev registry integration for provider-aware context length detection.

	Fetches model metadata from https://models.dev/api.json — a community-maintained
	database of 3800+ models across 100+ providers, including per-provider context
	windows, pricing, and capabilities.

	Data is cached in memory (1hr TTL) and on disk (~/.hermes/models_dev_cache.json)
	to avoid cold-start network latency.
	"""

	import json
	import logging
	import os
	import time
	from pathlib import Path
	from typing import Any, Dict, Optional

	import requests

	logger = logging.getLogger(__name__)

	MODELS_DEV_URL = "https://models.dev/api.json"
	_MODELS_DEV_CACHE_TTL = 3600 # 1 hour in-memory

	# In-memory cache
	_models_dev_cache: Dict[str, Any] = {}
	_models_dev_cache_time: float = 0

	# Provider ID mapping: Hermes provider names → models.dev provider IDs
	PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
	"openrouter": "openrouter",
	"anthropic": "anthropic",
	"zai": "zai",
	"kimi-coding": "kimi-for-coding",
	"minimax": "minimax",
	"minimax-cn": "minimax-cn",
	"deepseek": "deepseek",
	"alibaba": "alibaba",
	"copilot": "github-copilot",
	"ai-gateway": "vercel",
	"opencode-zen": "opencode",
	"opencode-go": "opencode-go",
	"kilocode": "kilo",
	}


	def _get_cache_path() -> Path:
	"""Return path to disk cache file."""
	env_val = os.environ.get("HERMES_HOME", "")
	hermes_home = Path(env_val) if env_val else Path.home() / ".hermes"
	return hermes_home / "models_dev_cache.json"


	def _load_disk_cache() -> Dict[str, Any]:
	"""Load models.dev data from disk cache."""
	try:
	cache_path = _get_cache_path()
	if cache_path.exists():
	with open(cache_path, encoding="utf-8") as f:
	return json.load(f)
	except Exception as e:
	logger.debug("Failed to load models.dev disk cache: %s", e)
	return {}


	def _save_disk_cache(data: Dict[str, Any]) -> None:
	"""Save models.dev data to disk cache."""
	try:
	cache_path = _get_cache_path()
	cache_path.parent.mkdir(parents=True, exist_ok=True)
	with open(cache_path, "w", encoding="utf-8") as f:
	json.dump(data, f, separators=(",", ":"))
	except Exception as e:
	logger.debug("Failed to save models.dev disk cache: %s", e)


	def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
	"""Fetch models.dev registry. In-memory cache (1hr) + disk fallback.

	Returns the full registry dict keyed by provider ID, or empty dict on failure.
	"""
	global _models_dev_cache, _models_dev_cache_time

	# Check in-memory cache
	if (
	not force_refresh
	and _models_dev_cache
	and (time.time() - _models_dev_cache_time) < _MODELS_DEV_CACHE_TTL
	):
	return _models_dev_cache

	# Try network fetch
	try:
	response = requests.get(MODELS_DEV_URL, timeout=15)
	response.raise_for_status()
	data = response.json()
	if isinstance(data, dict) and len(data) > 0:
	_models_dev_cache = data
	_models_dev_cache_time = time.time()
	_save_disk_cache(data)
	logger.debug(
	"Fetched models.dev registry: %d providers, %d total models",
	len(data),
	sum(len(p.get("models", {})) for p in data.values() if isinstance(p, dict)),
	)
	return data
	except Exception as e:
	logger.debug("Failed to fetch models.dev: %s", e)

	# Fall back to disk cache — use a short TTL (5 min) so we retry
	# the network fetch soon instead of serving stale data for a full hour.
	if not _models_dev_cache:
	_models_dev_cache = _load_disk_cache()
	if _models_dev_cache:
	_models_dev_cache_time = time.time() - _MODELS_DEV_CACHE_TTL + 300
	logger.debug("Loaded models.dev from disk cache (%d providers)", len(_models_dev_cache))

	return _models_dev_cache


	def lookup_models_dev_context(provider: str, model: str) -> Optional[int]:
	"""Look up context_length for a provider+model combo in models.dev.

	Returns the context window in tokens, or None if not found.
	Handles case-insensitive matching and filters out context=0 entries.
	"""
	mdev_provider_id = PROVIDER_TO_MODELS_DEV.get(provider)
	if not mdev_provider_id:
	return None

	data = fetch_models_dev()
	provider_data = data.get(mdev_provider_id)
	if not isinstance(provider_data, dict):
	return None

	models = provider_data.get("models", {})
	if not isinstance(models, dict):
	return None

	# Exact match
	entry = models.get(model)
	if entry:
	ctx = _extract_context(entry)
	if ctx:
	return ctx

	# Case-insensitive match
	model_lower = model.lower()
	for mid, mdata in models.items():
	if mid.lower() == model_lower:
	ctx = _extract_context(mdata)
	if ctx:
	return ctx

	return None


	def _extract_context(entry: Dict[str, Any]) -> Optional[int]:
	"""Extract context_length from a models.dev model entry.

	Returns None for invalid/zero values (some audio/image models have context=0).
	"""
	if not isinstance(entry, dict):
	return None
	limit = entry.get("limit")
	if not isinstance(limit, dict):
	return None
	ctx = limit.get("context")
	if isinstance(ctx, (int, float)) and ctx > 0:
	return int(ctx)
	return None