File size: 6,905 Bytes
4caa453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
"""Helper functions to fetch and filter free models from OpenRouter API."""

import json
import logging
import os
import time
from pathlib import Path
from typing import Any

import requests

logger = logging.getLogger(__name__)

OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
CACHE_DIR = Path(".cache")
CACHE_FILE = CACHE_DIR / "openrouter_models.json"
CACHE_DURATION_SECONDS = 24 * 60 * 60  # 24 hours


def is_free_model(model: dict[str, Any]) -> bool:
    """
    Check if a model is free based on its ID or pricing.
    
    Args:
        model: Model dictionary from OpenRouter API
        
    Returns:
        True if the model is free, False otherwise
    """
    model_id = model.get("id", "")
    
    # Check if model has :free suffix
    if ":free" in model_id:
        return True
    
    # Check if pricing is zero or null
    pricing = model.get("pricing", {})
    prompt_price = pricing.get("prompt", "0")
    completion_price = pricing.get("completion", "0")
    
    # Convert to float if possible, otherwise check if it's "0" or null
    try:
        prompt_price_float = float(prompt_price) if prompt_price else 0.0
        completion_price_float = float(completion_price) if completion_price else 0.0
        return prompt_price_float == 0.0 and completion_price_float == 0.0
    except (ValueError, TypeError):
        # If conversion fails, check if both are "0" or null/empty
        return (prompt_price in ["0", None, ""] and 
                completion_price in ["0", None, ""])


def _load_cache() -> tuple[list[dict[str, Any]] | None, float | None]:
    """
    Load cached models from file.
    
    Returns:
        Tuple of (cached_models, cache_timestamp) or (None, None) if cache doesn't exist or is invalid
    """
    if not CACHE_FILE.exists():
        return None, None
    
    try:
        with open(CACHE_FILE, "r", encoding="utf-8") as f:
            cache_data = json.load(f)
        
        cached_models = cache_data.get("models", None)
        cache_timestamp = cache_data.get("timestamp", None)
        
        if cached_models is None or cache_timestamp is None:
            return None, None
        
        return cached_models, cache_timestamp
    except (json.JSONDecodeError, IOError) as e:
        logger.warning(f"Error loading cache: {e}")
        return None, None


def _save_cache(models: list[dict[str, Any]]) -> None:
    """
    Save models to cache file.
    
    Args:
        models: List of model dictionaries to cache
    """
    try:
        CACHE_DIR.mkdir(parents=True, exist_ok=True)
        
        cache_data = {
            "models": models,
            "timestamp": time.time(),
        }
        
        with open(CACHE_FILE, "w", encoding="utf-8") as f:
            json.dump(cache_data, f)
        
        logger.info(f"Cached {len(models)} free models to {CACHE_FILE}")
    except IOError as e:
        logger.warning(f"Error saving cache: {e}")


def fetch_free_models() -> list[dict[str, Any]]:
    """
    Fetch all free models from OpenRouter API.
    Uses file-based cache that refreshes once per day.
    
    Returns:
        List of free model dictionaries with metadata
    """
    # Check cache first
    cached_models, cache_timestamp = _load_cache()
    
    if cached_models is not None and cache_timestamp is not None:
        # Check if cache is still valid (less than 24 hours old)
        age_seconds = time.time() - cache_timestamp
        if age_seconds < CACHE_DURATION_SECONDS:
            logger.info(f"Using cached models (age: {age_seconds / 3600:.1f} hours)")
            return cached_models
        else:
            logger.info(f"Cache expired (age: {age_seconds / 3600:.1f} hours), fetching fresh data")
    
    # Cache is invalid or doesn't exist, fetch from API
    try:
        # OpenRouter API doesn't require authentication for listing models
        response = requests.get(OPENROUTER_API_URL, timeout=10)
        response.raise_for_status()
        
        data = response.json()
        models = data.get("data", [])
        
        # Filter to only free models
        free_models = [model for model in models if is_free_model(model)]
        
        logger.info(f"Fetched {len(free_models)} free models from OpenRouter")
        
        # Save to cache
        _save_cache(free_models)
        
        return free_models
        
    except requests.exceptions.RequestException as e:
        logger.error(f"Error fetching models from OpenRouter: {e}")
        # If API call fails but we have cached data, return cached data even if expired
        if cached_models is not None:
            logger.warning("API call failed, using expired cache as fallback")
            return cached_models
        return []
    except Exception as e:
        logger.error(f"Unexpected error fetching models: {e}")
        # If API call fails but we have cached data, return cached data even if expired
        if cached_models is not None:
            logger.warning("Unexpected error, using expired cache as fallback")
            return cached_models
        return []


def get_model_config(model: dict[str, Any]) -> dict[str, Any]:
    """
    Extract model configuration from OpenRouter API response.
    
    Args:
        model: Model dictionary from OpenRouter API
        
    Returns:
        Model configuration dictionary with type, model, max_context, tokenizer
    """
    model_id = model.get("id", "")
    context_length = model.get("context_length")
    architecture = model.get("architecture", {})
    tokenizer_group = architecture.get("tokenizer", "")
    
    # Infer tokenizer from model ID
    tokenizer = None
    hugging_face_id = model.get("hugging_face_id")
    
    # Use Hugging Face ID if available
    if hugging_face_id:
        tokenizer = f"hf/{hugging_face_id}"
    else:
        # Try to construct tokenizer name from model ID
        # For example: "nvidia/nemotron-3-nano-30b-a3b:free" -> "hf/nvidia/nemotron-3-nano-30b-a3b"
        parts = model_id.split("/")
        if len(parts) > 1:
            org = parts[0]
            model_name = parts[-1].split(":")[0]  # Remove :free suffix
            tokenizer = f"hf/{org}/{model_name}"
        else:
            # Single part model ID
            model_name = model_id.split(":")[0]
            tokenizer = f"hf/{model_name}"
    
    # Fallback to a generic tokenizer if we can't infer
    if not tokenizer:
        tokenizer = "gpt2"  # Generic fallback
    
    # Default context length if not provided
    if context_length is None:
        context_length = 131072
    
    return {
        "type": "free_openrouter",
        "model": f"openrouter/{model_id}",  # litellm format
        "max_context": context_length,
        "tokenizer": tokenizer,
        "model_id": model_id,
        "name": model.get("name", model_id),
        "description": model.get("description", ""),
    }