Spaces:

edyxapi
/

rushagentrush

Sleeping

adityaverma977

Fix Space runtime HF router integration

cb64216 9 days ago

4.04 kB

	import os
	import time

	import httpx

	HF_API_TOKEN = (os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN") or "").strip()
	ROUTER_MODELS_URL = "https://router.huggingface.co/v1/models"

	PREFERRED_MODELS = [
	{"id": "meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B Instruct", "size": "medium", "description": "Fast general-purpose instruct model"},
	{"id": "Qwen/Qwen2.5-7B-Instruct", "name": "Qwen 2.5 7B Instruct", "size": "medium", "description": "Reliable JSON-following instruction model"},
	{"id": "meta-llama/Meta-Llama-3-8B-Instruct", "name": "Meta Llama 3 8B Instruct", "size": "medium", "description": "Strong general chat behavior"},
	{"id": "google/gemma-3n-E4B-it", "name": "Gemma 3n E4B", "size": "small", "description": "Lightweight instruction-tuned Gemma model"},
	{"id": "Sao10K/L3-8B-Stheno-v3.2", "name": "L3 8B Stheno v3.2", "size": "medium", "description": "Creative 8B chat model"},
	{"id": "XiaomiMiMo/MiMo-V2-Flash", "name": "MiMo V2 Flash", "size": "medium", "description": "Fast flash-tier chat model"},
	{"id": "google/gemma-4-26B-A4B-it", "name": "Gemma 4 26B A4B", "size": "large", "description": "Higher-capacity Gemma instruct model"},
	{"id": "google/gemma-4-31B-it", "name": "Gemma 4 31B", "size": "large", "description": "Large Gemma chat model"},
	{"id": "Qwen/Qwen3.5-35B-A3B", "name": "Qwen 3.5 35B A3B", "size": "large", "description": "Large Qwen instruction model"},
	{"id": "google/gemma-3-27b-it", "name": "Gemma 3 27B", "size": "large", "description": "Large Gemma 3 instruct model"},
	{"id": "moonshotai/Kimi-K2.5", "name": "Kimi K2.5", "size": "large", "description": "Large reasoning-oriented chat model"},
	{"id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "name": "Qwen 3 Coder 30B", "size": "large", "description": "Coder-tuned instruction model"},
	{"id": "meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B Instruct", "size": "xl", "description": "Large instruction-following flagship model"},
	]

	_CACHE = {"expires_at": 0.0, "ids": None}


	def _headers() -> dict[str, str]:
	if not HF_API_TOKEN:
	return {}
	return {"Authorization": f"Bearer {HF_API_TOKEN}"}


	def _extract_router_models(payload) -> list[dict]:
	if isinstance(payload, list):
	return [item for item in payload if isinstance(item, dict)]
	if isinstance(payload, dict):
	data = payload.get("data")
	if isinstance(data, list):
	return [item for item in data if isinstance(item, dict)]
	return []


	async def _fetch_router_model_ids() -> set[str] \| None:
	now = time.monotonic()
	cached_ids = _CACHE["ids"]
	if isinstance(cached_ids, set) and _CACHE["expires_at"] > now:
	return cached_ids

	if not HF_API_TOKEN:
	return None

	try:
	async with httpx.AsyncClient(timeout=15.0) as client:
	response = await client.get(ROUTER_MODELS_URL, headers=_headers())
	response.raise_for_status()
	payload = response.json()
	except Exception:
	return None

	models = _extract_router_models(payload)
	ids = {item["id"] for item in models if isinstance(item.get("id"), str)}
	_CACHE["ids"] = ids
	_CACHE["expires_at"] = now + 300
	return ids


	def get_supported_model_ids() -> set[str]:
	return {model["id"] for model in PREFERRED_MODELS}


	def is_supported_model(model_id: str) -> bool:
	return model_id in get_supported_model_ids()


	def get_default_model_id() -> str:
	return PREFERRED_MODELS[0]["id"]


	async def get_available_models() -> dict:
	live_ids = await _fetch_router_model_ids()
	if live_ids:
	models = [model for model in PREFERRED_MODELS if model["id"] in live_ids]
	else:
	models = list(PREFERRED_MODELS)
	return {"models": models, "total": len(models)}


	def get_model_display_name(model_id: str) -> str:
	for model in PREFERRED_MODELS:
	if model["id"] == model_id:
	return model["name"]
	return model_id.split("/")[-1].split("-")[0].capitalize()