Spaces:
Paused
Paused
File size: 9,505 Bytes
f25a1ae f8d7ba5 0558c1b f25a1ae f8d7ba5 f25a1ae 0558c1b f25a1ae 0558c1b f25a1ae 2d2a7fa 0558c1b 2d2a7fa 0558c1b 2d2a7fa f8d7ba5 2d2a7fa 0558c1b 148f469 0558c1b 148f469 f25a1ae 0558c1b f25a1ae 0558c1b f25a1ae 0558c1b f25a1ae 2d2a7fa 0558c1b f8d7ba5 0558c1b f25a1ae 0558c1b e4aaef9 0558c1b 148f469 0558c1b e4aaef9 0558c1b e4aaef9 0558c1b e4aaef9 0558c1b f25a1ae 0558c1b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | import requests
import json
import logging
from typing import Dict, Any, Tuple
from .token_limiter import get_token_limiter
from .prompt_cache import get_prompt_cache_manager
# Configure logging
logger = logging.getLogger(__name__)
class OpenRouterProxy:
"""Proxy for OpenRouter API requests with cost optimization"""
def __init__(self):
self.base_url = "https://openrouter.ai/api/v1"
self.timeout = 30
self.token_limiter = get_token_limiter()
self.cache_manager = get_prompt_cache_manager()
def forward_request(self, data: Dict[Any, Any], api_key: str, is_special_token: bool = False) -> requests.Response:
"""Forward chat completion request to OpenRouter with cost optimization"""
model = data.get('model', '')
if not is_special_token:
# Apply normal limitations for regular tokens
# 1. APLICAR LÍMITE DE TOKENS OBLIGATORIO
is_valid, limited_data, limit_reason = self.token_limiter.validate_and_limit_request(data, model)
if not is_valid:
# Crear respuesta de error por límite de tokens
error_response = self._create_error_response(
400,
"token_limit_exceeded",
limit_reason
)
return error_response
# 2. APLICAR OPTIMIZACIONES DE COSTO
optimized_data = self.token_limiter.add_cost_optimization_to_request(limited_data, model)
# 3. PROCESAR CON CACHE OBLIGATORIO
cache_hit, cached_response, final_data, cache_key = self.cache_manager.process_request_with_cache(
optimized_data, model
)
if cache_hit and cached_response:
# Retornar respuesta desde cache
logger.info(f"✓ OpenRouter Cache HIT for model {model} (key: {cache_key[:16]}...)")
return self._create_cached_response(cached_response)
else:
# Special token - bypass all limitations
final_data = data.copy()
cache_key = "special_token_bypass"
# 4. CLEAN STOP SEQUENCES TO PREVENT API ERRORS
final_data = self._clean_stop_sequences(final_data)
# 5. PREPARAR HEADERS OPTIMIZADOS
headers = self._prepare_optimized_headers(api_key, cache_key)
# 6. ENVIAR REQUEST A OPENROUTER
url = f"{self.base_url}/chat/completions"
try:
if final_data.get('stream', False):
# Handle streaming requests
response = requests.post(
url,
json=final_data,
headers=headers,
timeout=self.timeout,
stream=True
)
else:
# Handle non-streaming requests
response = requests.post(
url,
json=final_data,
headers=headers,
timeout=self.timeout
)
# 6. GUARDAR EN CACHE SI LA RESPUESTA ES EXITOSA (only for regular tokens)
if not is_special_token and response.status_code == 200 and not final_data.get('stream', False):
try:
response_json = response.json()
self.cache_manager.store_in_cache(cache_key, response_json, model)
logger.info(f"✓ OpenRouter Cache STORED for model {model} (key: {cache_key[:16]}...)")
except Exception as e:
logger.warning(f"Failed to store OpenRouter response in cache: {e}")
return response
except requests.exceptions.RequestException as e:
# Create a mock response for connection errors
return self._create_error_response(
500,
"connection_error",
f"Failed to connect to OpenRouter: {str(e)}",
{"provider": "openrouter", "model": model}
)
def _clean_stop_sequences(self, data: Dict[Any, Any]) -> Dict[Any, Any]:
"""Clean stop sequences to avoid 'stop_sequences must contain non-whitespace' errors"""
cleaned_data = data.copy()
if 'stop' in cleaned_data and isinstance(cleaned_data['stop'], list):
# Filter valid stop sequences (non-empty and containing non-whitespace characters)
valid_stops = []
for stop in cleaned_data['stop']:
if stop and isinstance(stop, str) and stop.strip():
valid_stops.append(stop)
if valid_stops:
cleaned_data['stop'] = valid_stops
else:
# If no valid stop sequences remain, remove the field entirely
del cleaned_data['stop']
logger.info("Removed empty stop sequences to prevent API error")
return cleaned_data
def _prepare_optimized_headers(self, api_key: str, cache_key: str) -> Dict[str, str]:
"""Prepara headers optimizados para OpenRouter con configuración de cache"""
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://github.com/openrouter-proxy",
"X-Title": "Cost-Optimized OpenRouter Proxy"
}
# Agregar headers específicos de cache si están disponibles
cache_headers = {
"X-OpenRouter-Cache": "true",
"X-OpenRouter-Cache-Key": cache_key[:16],
"X-OpenRouter-Cost-Optimization": "enabled"
}
headers.update(cache_headers)
return headers
def _create_error_response(self, status_code: int, error_type: str, message: str, context: Dict = None) -> requests.Response:
"""Crea una respuesta de error mock con contexto adicional"""
mock_response = requests.Response()
mock_response.status_code = status_code
error_data = {
"error": {
"message": message,
"type": error_type,
"code": status_code
}
}
# Add context information if provided
if context:
error_data["error"].update(context)
mock_response._content = json.dumps(error_data).encode('utf-8')
mock_response.headers['Content-Type'] = 'application/json'
return mock_response
def _create_cached_response(self, cached_data: Dict) -> requests.Response:
"""Crea una respuesta mock desde datos del cache"""
mock_response = requests.Response()
mock_response.status_code = 200
# Agregar metadata de cache a la respuesta
enhanced_response = cached_data.copy()
enhanced_response['x_cache_info'] = {
"cache_hit": True,
"source": "local_cache",
"cost_savings": True
}
mock_response._content = json.dumps(enhanced_response).encode('utf-8')
mock_response.headers['Content-Type'] = 'application/json'
mock_response.headers['X-Cache-Status'] = 'HIT'
mock_response.headers['X-Cost-Optimized'] = 'true'
return mock_response
def get_models(self, api_key: str) -> requests.Response:
"""Get available models from OpenRouter"""
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
url = f"{self.base_url}/models"
try:
response = requests.get(url, headers=headers, timeout=self.timeout)
return response
except requests.exceptions.RequestException as e:
return self._create_error_response(
500,
"connection_error",
f"Failed to connect to OpenRouter: {str(e)}"
)
def get_cost_optimization_stats(self) -> Dict[str, Any]:
"""Obtiene estadísticas de optimización de costos"""
token_stats = self.token_limiter.get_statistics()
cache_stats = self.cache_manager.get_cache_statistics()
# Calcular ahorros combinados
total_requests = cache_stats.get('cache_hits', 0) + cache_stats.get('cache_misses', 0)
return {
"token_optimization": token_stats,
"cache_optimization": cache_stats,
"combined_metrics": {
"total_requests_processed": total_requests,
"cache_hit_ratio": cache_stats.get('hit_ratio', 0),
"tokens_saved": token_stats.get('total_tokens_saved', 0),
"requests_rejected": token_stats.get('total_rejected_requests', 0),
"cost_optimization_enabled": True
}
}
def reset_optimization_stats(self):
"""Reinicia las estadísticas de optimización"""
self.token_limiter = get_token_limiter()
self.cache_manager = get_prompt_cache_manager()
self.cache_manager.clear_cache()
|