"""
CORE LLM MEJORADO - SAMBANOVA ENTERPRISE
- Timeouts configurados
- Retry automático
- Streaming optimizado
- Métricas de calidad
"""
import os
import time
import json
from typing import Dict, List, Optional, Generator, Any, Union
from dataclasses import dataclass
from enum import Enum
import asyncio
import aiohttp
from openai import OpenAI, AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam

# ===================== CONFIGURACIÓN =====================
SAMBANOVA_BASE = "https://api.sambanova.ai/v1"
DEFAULT_TIMEOUT = 60.0
MAX_RETRIES = 3
RETRY_DELAY = 1.0

# ===================== MODELOS ENTERPRISE =====================
ENTERPRISE_MODELS = {
    "Scout": {
        "id": "Llama-4-Maverick-17B-128E-Instruct",
        "context_window": 131072,
        "vision": False,
        "strengths": ["general", "reasoning", "code"],
        "optimal_temperature": 0.1
    },
    "Heavy": {
        "id": "Meta-Llama-3.3-70B-Instruct",
        "context_window": 131072,
        "vision": False,
        "strengths": ["complex reasoning", "analysis", "strategy"],
        "optimal_temperature": 0.05
    },
    "Coder": {
        "id": "Qwen2.5-Coder-32B-Instruct",
        "context_window": 32768,
        "vision": False,
        "strengths": ["programming", "debugging", "architecture"],
        "optimal_temperature": 0.01
    },
    "Vision": {
        "id": "Meta-Llama-3.2-11B-Vision-Instruct",
        "context_window": 131072,
        "vision": True,
        "strengths": ["image analysis", "visual reasoning"],
        "optimal_temperature": 0.1
    },
    "Reasoner": {
        "id": "DeepSeek-R1",
        "context_window": 128000,
        "vision": False,
        "strengths": ["step-by-step reasoning", "mathematics", "logic"],
        "optimal_temperature": 0.3
    },
    "Fast": {
        "id": "Llama-4-Scout-17B-16E-Instruct",
        "context_window": 131072,
        "vision": False,
        "strengths": ["quick responses", "general purpose"],
        "optimal_temperature": 0.2
    }
}

# ===================== CLIENTE MEJORADO =====================
class SambanovaClient:
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.sync_client = OpenAI(
            base_url=SAMBANOVA_BASE,
            api_key=api_key,
            timeout=DEFAULT_TIMEOUT,
            max_retries=MAX_RETRIES
        )
        self.async_client = AsyncOpenAI(
            base_url=SAMBANOVA_BASE,
            api_key=api_key,
            timeout=DEFAULT_TIMEOUT,
            max_retries=MAX_RETRIES
        )
        self.request_count = 0
        self.error_count = 0
        self.avg_latency = 0.0
        
    def get_model_info(self, model_key: str) -> Optional[Dict]:
        """Obtiene información detallada del modelo"""
        if model_key in ENTERPRISE_MODELS:
            return ENTERPRISE_MODELS[model_key]
        
        # Buscar por ID si no coincide la clave
        for key, info in ENTERPRISE_MODELS.items():
            if info["id"] == model_key:
                return info
        
        return None
    
    def chat_completion(
        self,
        model: str,
        messages: List[ChatCompletionMessageParam],
        temperature: float = 0.1,
        top_p: float = 0.9,
        max_tokens: int = 4096,
        stream: bool = True,
        **kwargs
    ):
        """Completación de chat con métricas"""
        start_time = time.time()
        self.request_count += 1
        
        try:
            # Ajustar parámetros según modelo
            model_info = self.get_model_info(model)
            if model_info:
                temperature = kwargs.get('temperature', model_info.get('optimal_temperature', temperature))
            
            response = self.sync_client.chat.completions.create(
                model=model if model in [v["id"] for v in ENTERPRISE_MODELS.values()] 
                      else self.get_model_info(model)["id"],
                messages=messages,
                temperature=temperature,
                top_p=top_p,
                max_tokens=max_tokens,
                stream=stream,
                **{k: v for k, v in kwargs.items() if k not in ['model', 'messages', 'stream']}
            )
            
            latency = time.time() - start_time
            self.avg_latency = (self.avg_latency * (self.request_count - 1) + latency) / self.request_count
            
            return response
            
        except Exception as e:
            self.error_count += 1
            latency = time.time() - start_time
            raise
    
    async def async_chat_completion(
        self,
        model: str,
        messages: List[ChatCompletionMessageParam],
        temperature: float = 0.1,
        top_p: float = 0.9,
        max_tokens: int = 4096,
        stream: bool = True,
        **kwargs
    ):
        """Versión asíncrona"""
        start_time = time.time()
        self.request_count += 1
        
        try:
            model_info = self.get_model_info(model)
            if model_info:
                temperature = kwargs.get('temperature', model_info.get('optimal_temperature', temperature))
            
            response = await self.async_client.chat.completions.create(
                model=model if model in [v["id"] for v in ENTERPRISE_MODELS.values()] 
                      else self.get_model_info(model)["id"],
                messages=messages,
                temperature=temperature,
                top_p=top_p,
                max_tokens=max_tokens,
                stream=stream,
                **{k: v for k, v in kwargs.items() if k not in ['model', 'messages', 'stream']}
            )
            
            latency = time.time() - start_time
            self.avg_latency = (self.avg_latency * (self.request_count - 1) + latency) / self.request_count
            
            return response
            
        except Exception as e:
            self.error_count += 1
            latency = time.time() - start_time
            raise
    
    def get_metrics(self) -> Dict[str, Any]:
        """Obtiene métricas del cliente"""
        return {
            "request_count": self.request_count,
            "error_count": self.error_count,
            "success_rate": 1.0 - (self.error_count / max(self.request_count, 1)),
            "average_latency": self.avg_latency,
            "models_available": len(ENTERPRISE_MODELS)
        }

# ===================== FUNCIÓN PRINCIPAL MEJORADA =====================
def create_sambanova_client(api_key: Optional[str] = None) -> SambanovaClient:
    """Crea y retorna cliente Sambanova"""
    key = api_key or os.getenv("SAMBANOVA_API_KEY_2")
    if not key:
        raise RuntimeError(
            "❌ Falta SAMBANOVA_API_KEY_2. Configura la variable de entorno o pasa la clave directamente."
        )
    
    return SambanovaClient(key)

def chat_sambanova_optimized(
    api_key: str,
    model: str,
    messages: List[ChatCompletionMessageParam],
    temperature: float = 0.1,
    top_p: float = 0.9,
    max_tokens: int = 4096,
    stream: bool = True,
    retry_on_failure: bool = True,
    **kwargs
):
    """
    Función optimizada con manejo de errores y retry
    """
    client = create_sambanova_client(api_key)
    
    for attempt in range(MAX_RETRIES if retry_on_failure else 1):
        try:
            return client.chat_completion(
                model=model,
                messages=messages,
                temperature=temperature,
                top_p=top_p,
                max_tokens=max_tokens,
                stream=stream,
                **kwargs
            )
        except Exception as e:
            if attempt == MAX_RETRIES - 1 or not retry_on_failure:
                raise
            time.sleep(RETRY_DELAY * (attempt + 1))
            continue

# ===================== STREAMING OPTIMIZADO =====================
def stream_response_generator(
    api_key: str,
    model: str,
    messages: List[ChatCompletionMessageParam],
    **kwargs
) -> Generator[str, None, Dict]:
    """
    Generador optimizado para streaming con métricas
    """
    start_time = time.time()
    full_response = ""
    
    try:
        stream = chat_sambanova_optimized(
            api_key=api_key,
            model=model,
            messages=messages,
            stream=True,
            **kwargs
        )
        
        for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content:
                content = chunk.choices[0].delta.content
                full_response += content
                yield content
        
        metrics = {
            "success": True,
            "total_time": time.time() - start_time,
            "response_length": len(full_response),
            "model_used": model,
            "final_response": full_response
        }
        
        yield metrics
        
    except Exception as e:
        error_metrics = {
            "success": False,
            "error": str(e),
            "total_time": time.time() - start_time,
            "response_length": len(full_response),
            "model_attempted": model
        }
        yield error_metrics