Spaces:

justadri23
/

movidik

Paused

File size: 9,505 Bytes

f25a1ae
 
f8d7ba5
0558c1b
 
 
f25a1ae
f8d7ba5
 
 
f25a1ae
0558c1b
f25a1ae
 
 
 
0558c1b
 
f25a1ae
2d2a7fa
0558c1b
 
 
2d2a7fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0558c1b
2d2a7fa
 
 
f8d7ba5
2d2a7fa
 
 
 
 
0558c1b
148f469
 
 
 
0558c1b
 
148f469
f25a1ae
 
 
0558c1b
f25a1ae
 
 
0558c1b
f25a1ae
 
 
 
 
 
 
 
0558c1b
f25a1ae
 
 
 
2d2a7fa
 
0558c1b
 
 
f8d7ba5
 
 
0558c1b
f25a1ae
 
 
 
0558c1b
 
 
e4aaef9
 
0558c1b
 
148f469
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0558c1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4aaef9
 
0558c1b
 
e4aaef9
 
0558c1b
 
 
 
 
e4aaef9
 
 
 
 
 
 
0558c1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f25a1ae
 
 
 
 
 
 
 
 
 
 
 
 
 
0558c1b

import requests
import json
import logging
from typing import Dict, Any, Tuple
from .token_limiter import get_token_limiter
from .prompt_cache import get_prompt_cache_manager

# Configure logging
logger = logging.getLogger(__name__)

class OpenRouterProxy:
    """Proxy for OpenRouter API requests with cost optimization"""
    
    def __init__(self):
        self.base_url = "https://openrouter.ai/api/v1"
        self.timeout = 30
        self.token_limiter = get_token_limiter()
        self.cache_manager = get_prompt_cache_manager()
    
    def forward_request(self, data: Dict[Any, Any], api_key: str, is_special_token: bool = False) -> requests.Response:
        """Forward chat completion request to OpenRouter with cost optimization"""
        model = data.get('model', '')
        
        if not is_special_token:
            # Apply normal limitations for regular tokens
            
            # 1. APLICAR LÍMITE DE TOKENS OBLIGATORIO
            is_valid, limited_data, limit_reason = self.token_limiter.validate_and_limit_request(data, model)
            
            if not is_valid:
                # Crear respuesta de error por límite de tokens
                error_response = self._create_error_response(
                    400, 
                    "token_limit_exceeded", 
                    limit_reason
                )
                return error_response
            
            # 2. APLICAR OPTIMIZACIONES DE COSTO
            optimized_data = self.token_limiter.add_cost_optimization_to_request(limited_data, model)
            
            # 3. PROCESAR CON CACHE OBLIGATORIO
            cache_hit, cached_response, final_data, cache_key = self.cache_manager.process_request_with_cache(
                optimized_data, model
            )
            
            if cache_hit and cached_response:
                # Retornar respuesta desde cache
                logger.info(f"✓ OpenRouter Cache HIT for model {model} (key: {cache_key[:16]}...)")
                return self._create_cached_response(cached_response)
        else:
            # Special token - bypass all limitations
            final_data = data.copy()
            cache_key = "special_token_bypass"
        
        # 4. CLEAN STOP SEQUENCES TO PREVENT API ERRORS
        final_data = self._clean_stop_sequences(final_data)
        
        # 5. PREPARAR HEADERS OPTIMIZADOS
        headers = self._prepare_optimized_headers(api_key, cache_key)
        
        # 6. ENVIAR REQUEST A OPENROUTER
        url = f"{self.base_url}/chat/completions"
        
        try:
            if final_data.get('stream', False):
                # Handle streaming requests
                response = requests.post(
                    url,
                    json=final_data,
                    headers=headers,
                    timeout=self.timeout,
                    stream=True
                )
            else:
                # Handle non-streaming requests
                response = requests.post(
                    url,
                    json=final_data,
                    headers=headers,
                    timeout=self.timeout
                )
            
            # 6. GUARDAR EN CACHE SI LA RESPUESTA ES EXITOSA (only for regular tokens)
            if not is_special_token and response.status_code == 200 and not final_data.get('stream', False):
                try:
                    response_json = response.json()
                    self.cache_manager.store_in_cache(cache_key, response_json, model)
                    logger.info(f"✓ OpenRouter Cache STORED for model {model} (key: {cache_key[:16]}...)")
                except Exception as e:
                    logger.warning(f"Failed to store OpenRouter response in cache: {e}")
            
            return response
            
        except requests.exceptions.RequestException as e:
            # Create a mock response for connection errors
            return self._create_error_response(
                500,
                "connection_error",
                f"Failed to connect to OpenRouter: {str(e)}",
                {"provider": "openrouter", "model": model}
            )
    
    def _clean_stop_sequences(self, data: Dict[Any, Any]) -> Dict[Any, Any]:
        """Clean stop sequences to avoid 'stop_sequences must contain non-whitespace' errors"""
        cleaned_data = data.copy()
        
        if 'stop' in cleaned_data and isinstance(cleaned_data['stop'], list):
            # Filter valid stop sequences (non-empty and containing non-whitespace characters)
            valid_stops = []
            for stop in cleaned_data['stop']:
                if stop and isinstance(stop, str) and stop.strip():
                    valid_stops.append(stop)
            
            if valid_stops:
                cleaned_data['stop'] = valid_stops
            else:
                # If no valid stop sequences remain, remove the field entirely
                del cleaned_data['stop']
                logger.info("Removed empty stop sequences to prevent API error")
        
        return cleaned_data
    
    def _prepare_optimized_headers(self, api_key: str, cache_key: str) -> Dict[str, str]:
        """Prepara headers optimizados para OpenRouter con configuración de cache"""
        headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
            "HTTP-Referer": "https://github.com/openrouter-proxy",
            "X-Title": "Cost-Optimized OpenRouter Proxy"
        }
        
        # Agregar headers específicos de cache si están disponibles
        cache_headers = {
            "X-OpenRouter-Cache": "true",
            "X-OpenRouter-Cache-Key": cache_key[:16],
            "X-OpenRouter-Cost-Optimization": "enabled"
        }
        
        headers.update(cache_headers)
        return headers
    
    def _create_error_response(self, status_code: int, error_type: str, message: str, context: Dict = None) -> requests.Response:
        """Crea una respuesta de error mock con contexto adicional"""
        mock_response = requests.Response()
        mock_response.status_code = status_code
        
        error_data = {
            "error": {
                "message": message,
                "type": error_type,
                "code": status_code
            }
        }
        
        # Add context information if provided
        if context:
            error_data["error"].update(context)
        
        mock_response._content = json.dumps(error_data).encode('utf-8')
        mock_response.headers['Content-Type'] = 'application/json'
        return mock_response
    
    def _create_cached_response(self, cached_data: Dict) -> requests.Response:
        """Crea una respuesta mock desde datos del cache"""
        mock_response = requests.Response()
        mock_response.status_code = 200
        
        # Agregar metadata de cache a la respuesta
        enhanced_response = cached_data.copy()
        enhanced_response['x_cache_info'] = {
            "cache_hit": True,
            "source": "local_cache",
            "cost_savings": True
        }
        
        mock_response._content = json.dumps(enhanced_response).encode('utf-8')
        mock_response.headers['Content-Type'] = 'application/json'
        mock_response.headers['X-Cache-Status'] = 'HIT'
        mock_response.headers['X-Cost-Optimized'] = 'true'
        return mock_response
    
    def get_models(self, api_key: str) -> requests.Response:
        """Get available models from OpenRouter"""
        headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        
        url = f"{self.base_url}/models"
        
        try:
            response = requests.get(url, headers=headers, timeout=self.timeout)
            return response
        except requests.exceptions.RequestException as e:
            return self._create_error_response(
                500,
                "connection_error",
                f"Failed to connect to OpenRouter: {str(e)}"
            )
    
    def get_cost_optimization_stats(self) -> Dict[str, Any]:
        """Obtiene estadísticas de optimización de costos"""
        token_stats = self.token_limiter.get_statistics()
        cache_stats = self.cache_manager.get_cache_statistics()
        
        # Calcular ahorros combinados
        total_requests = cache_stats.get('cache_hits', 0) + cache_stats.get('cache_misses', 0)
        
        return {
            "token_optimization": token_stats,
            "cache_optimization": cache_stats,
            "combined_metrics": {
                "total_requests_processed": total_requests,
                "cache_hit_ratio": cache_stats.get('hit_ratio', 0),
                "tokens_saved": token_stats.get('total_tokens_saved', 0),
                "requests_rejected": token_stats.get('total_rejected_requests', 0),
                "cost_optimization_enabled": True
            }
        }
    
    def reset_optimization_stats(self):
        """Reinicia las estadísticas de optimización"""
        self.token_limiter = get_token_limiter()
        self.cache_manager = get_prompt_cache_manager()
        self.cache_manager.clear_cache()