File size: 6,905 Bytes
4caa453 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
"""Helper functions to fetch and filter free models from OpenRouter API."""
import json
import logging
import os
import time
from pathlib import Path
from typing import Any
import requests
logger = logging.getLogger(__name__)
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
CACHE_DIR = Path(".cache")
CACHE_FILE = CACHE_DIR / "openrouter_models.json"
CACHE_DURATION_SECONDS = 24 * 60 * 60 # 24 hours
def is_free_model(model: dict[str, Any]) -> bool:
"""
Check if a model is free based on its ID or pricing.
Args:
model: Model dictionary from OpenRouter API
Returns:
True if the model is free, False otherwise
"""
model_id = model.get("id", "")
# Check if model has :free suffix
if ":free" in model_id:
return True
# Check if pricing is zero or null
pricing = model.get("pricing", {})
prompt_price = pricing.get("prompt", "0")
completion_price = pricing.get("completion", "0")
# Convert to float if possible, otherwise check if it's "0" or null
try:
prompt_price_float = float(prompt_price) if prompt_price else 0.0
completion_price_float = float(completion_price) if completion_price else 0.0
return prompt_price_float == 0.0 and completion_price_float == 0.0
except (ValueError, TypeError):
# If conversion fails, check if both are "0" or null/empty
return (prompt_price in ["0", None, ""] and
completion_price in ["0", None, ""])
def _load_cache() -> tuple[list[dict[str, Any]] | None, float | None]:
"""
Load cached models from file.
Returns:
Tuple of (cached_models, cache_timestamp) or (None, None) if cache doesn't exist or is invalid
"""
if not CACHE_FILE.exists():
return None, None
try:
with open(CACHE_FILE, "r", encoding="utf-8") as f:
cache_data = json.load(f)
cached_models = cache_data.get("models", None)
cache_timestamp = cache_data.get("timestamp", None)
if cached_models is None or cache_timestamp is None:
return None, None
return cached_models, cache_timestamp
except (json.JSONDecodeError, IOError) as e:
logger.warning(f"Error loading cache: {e}")
return None, None
def _save_cache(models: list[dict[str, Any]]) -> None:
"""
Save models to cache file.
Args:
models: List of model dictionaries to cache
"""
try:
CACHE_DIR.mkdir(parents=True, exist_ok=True)
cache_data = {
"models": models,
"timestamp": time.time(),
}
with open(CACHE_FILE, "w", encoding="utf-8") as f:
json.dump(cache_data, f)
logger.info(f"Cached {len(models)} free models to {CACHE_FILE}")
except IOError as e:
logger.warning(f"Error saving cache: {e}")
def fetch_free_models() -> list[dict[str, Any]]:
"""
Fetch all free models from OpenRouter API.
Uses file-based cache that refreshes once per day.
Returns:
List of free model dictionaries with metadata
"""
# Check cache first
cached_models, cache_timestamp = _load_cache()
if cached_models is not None and cache_timestamp is not None:
# Check if cache is still valid (less than 24 hours old)
age_seconds = time.time() - cache_timestamp
if age_seconds < CACHE_DURATION_SECONDS:
logger.info(f"Using cached models (age: {age_seconds / 3600:.1f} hours)")
return cached_models
else:
logger.info(f"Cache expired (age: {age_seconds / 3600:.1f} hours), fetching fresh data")
# Cache is invalid or doesn't exist, fetch from API
try:
# OpenRouter API doesn't require authentication for listing models
response = requests.get(OPENROUTER_API_URL, timeout=10)
response.raise_for_status()
data = response.json()
models = data.get("data", [])
# Filter to only free models
free_models = [model for model in models if is_free_model(model)]
logger.info(f"Fetched {len(free_models)} free models from OpenRouter")
# Save to cache
_save_cache(free_models)
return free_models
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching models from OpenRouter: {e}")
# If API call fails but we have cached data, return cached data even if expired
if cached_models is not None:
logger.warning("API call failed, using expired cache as fallback")
return cached_models
return []
except Exception as e:
logger.error(f"Unexpected error fetching models: {e}")
# If API call fails but we have cached data, return cached data even if expired
if cached_models is not None:
logger.warning("Unexpected error, using expired cache as fallback")
return cached_models
return []
def get_model_config(model: dict[str, Any]) -> dict[str, Any]:
"""
Extract model configuration from OpenRouter API response.
Args:
model: Model dictionary from OpenRouter API
Returns:
Model configuration dictionary with type, model, max_context, tokenizer
"""
model_id = model.get("id", "")
context_length = model.get("context_length")
architecture = model.get("architecture", {})
tokenizer_group = architecture.get("tokenizer", "")
# Infer tokenizer from model ID
tokenizer = None
hugging_face_id = model.get("hugging_face_id")
# Use Hugging Face ID if available
if hugging_face_id:
tokenizer = f"hf/{hugging_face_id}"
else:
# Try to construct tokenizer name from model ID
# For example: "nvidia/nemotron-3-nano-30b-a3b:free" -> "hf/nvidia/nemotron-3-nano-30b-a3b"
parts = model_id.split("/")
if len(parts) > 1:
org = parts[0]
model_name = parts[-1].split(":")[0] # Remove :free suffix
tokenizer = f"hf/{org}/{model_name}"
else:
# Single part model ID
model_name = model_id.split(":")[0]
tokenizer = f"hf/{model_name}"
# Fallback to a generic tokenizer if we can't infer
if not tokenizer:
tokenizer = "gpt2" # Generic fallback
# Default context length if not provided
if context_length is None:
context_length = 131072
return {
"type": "free_openrouter",
"model": f"openrouter/{model_id}", # litellm format
"max_context": context_length,
"tokenizer": tokenizer,
"model_id": model_id,
"name": model.get("name", model_id),
"description": model.get("description", ""),
}
|