import os
import numpy as np
from typing import List, Dict, Optional
import logging

# Configure logging for Hugging Face Spaces
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class CooperativaAdvancedRAG:
    _instance = None
    _models_loaded = False
    
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance
    
    def __init__(self):
        if hasattr(self, 'initialized') and self.initialized:
            return
            
        self.initialized = True
        self._models_loaded = False
        logger.info("--- Inicializando RAG Service (carga perezosa) ---")
        
        # Get the correct paths for Hugging Face Spaces
        self._setup_paths()
        
    def _setup_paths(self):
        """Setup paths for Hugging Face Spaces"""
        # In Hugging Face Spaces, the current working directory is the app root
        self.backend_dir = os.getcwd()
        
        # Check for FAISS index in common locations
        possible_paths = [
            os.path.join(self.backend_dir, "faiss_index"),
            os.path.join(self.backend_dir, "backend", "faiss_index"),
            os.path.join(os.path.dirname(self.backend_dir), "faiss_index"),
        ]
        
        self.persist_directory = None
        for path in possible_paths:
            if os.path.exists(path):
                self.persist_directory = path
                logger.info(f"FAISS index encontrado en: {path}")
                break
        
        # Get API token from environment (Hugging Face Spaces secrets)
        self.hf_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN") or os.environ.get("HF_TOKEN")
        
        if not self.hf_token:
            logger.warning("HUGGINGFACEHUB_API_TOKEN no encontrado. El LLM no funcionará correctamente.")
        else:
            logger.info("Token de Hugging Face encontrado")
    
    def _load_models(self):
        """Lazy loading of models - only called when needed"""
        if self._models_loaded:
            return
            
        logger.info("--- Cargando modelos de IA a la memoria ---")
        
        try:
            # Import here to avoid loading at startup
            from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
            from langchain_community.vectorstores import FAISS
            from sentence_transformers import CrossEncoder
            
            # Check if FAISS index exists
            if not self.persist_directory or not os.path.exists(self.persist_directory):
                error_msg = f"FAISS index no encontrado en: {self.persist_directory}"
                logger.error(error_msg)
                raise RuntimeError(error_msg)
            
            # -------------------------
            # EMBEDDINGS
            # -------------------------
            logger.info("Cargando modelo de embeddings...")
            self.embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
                model_kwargs={'device': 'cpu'},
                encode_kwargs={'normalize_embeddings': True}
            )
            
            # -------------------------
            # VECTOR DATABASE
            # -------------------------
            logger.info("Cargando FAISS index...")
            self.db = FAISS.load_local(
                self.persist_directory,
                self.embeddings,
                allow_dangerous_deserialization=True,
            )
            
            # -------------------------
            # CROSS ENCODER (RERANK)
            # -------------------------
            logger.info("Cargando CrossEncoder...")
            self.cross_encoder = CrossEncoder(
                "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1",
                device='cpu'
            )
            
            # -------------------------
            # LLM (solo si hay token)
            # -------------------------
            if self.hf_token:
                logger.info("Inicializando HuggingFaceEndpoint...")
                self.llm = HuggingFaceEndpoint(
                    endpoint_url="https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
                    huggingfacehub_api_token=self.hf_token,
                    task="text-generation",
                    max_new_tokens=512,
                    temperature=0.1,
                    do_sample=True,
                    top_p=0.95,
                    typical_p=0.95,
                    repetition_penalty=1.1,
                    timeout=120,
                )
                
                # Test the connection
                try:
                    test_response = self.llm.invoke("Hola")
                    logger.info("LLM inicializado correctamente")
                except Exception as e:
                    logger.error(f"Error al probar LLM: {e}")
                    self.llm = None
            else:
                logger.warning("No hay token disponible - LLM no inicializado")
                self.llm = None
            
            self._models_loaded = True
            logger.info("--- Sistema RAG listo para recibir consultas ---")
            
        except Exception as e:
            logger.error(f"Error crítico cargando modelos: {e}")
            raise
    
    # --------------------------------------------------
    # MAIN QUERY
    # --------------------------------------------------
    
    def query(
        self,
        question: str,
        chat_history: Optional[List[Dict[str, str]]] = None,
        top_k_initial: int = 25,
        top_k_final: int = 3,
    ) -> str:
        
        # Load models on first query
        try:
            self._load_models()
        except Exception as e:
            return f"Error inicializando el sistema: {str(e)}"
        
        # Check if LLM is available
        if not self.llm:
            return "Error: Token de Hugging Face no configurado. Por favor, configura HUGGINGFACEHUB_API_TOKEN en los secretos del Space."
        
        # -------------------------
        # CHAT HISTORY
        # -------------------------
        history_text = ""
        if chat_history:
            for turn in chat_history[-5:]:
                role = "Usuario" if turn.get("role") == "user" else "Asistente"
                content = turn.get("content", "")
                if content:
                    history_text += f"{role}: {content}\n"
        
        standalone_question = question
        
        # -------------------------
        # QUESTION REWRITE (solo si hay historial)
        # -------------------------
        if history_text.strip():
            rewrite_prompt = f"""<s>[INST] Reformula la siguiente pregunta para que sea independiente del historial de la conversación.

Historial:
{history_text}

Pregunta actual:
{question}

Pregunta reformulada (solo la pregunta, sin explicaciones): [/INST]"""
            
            try:
                rewritten = self.llm.invoke(rewrite_prompt).strip()
                if rewritten and len(rewritten) > 10:
                    standalone_question = rewritten
                    logger.info(f"Pregunta reformulada: {standalone_question}")
            except Exception as e:
                logger.error(f"Error en rewrite: {e}")
                # Continue with original question
        
        # -------------------------
        # FAISS SEARCH
        # -------------------------
        try:
            initial_docs = self.db.similarity_search_with_score(
                standalone_question,
                k=top_k_initial
            )
            
            # Filter by score (lower is better for FAISS)
            valid_docs = [
                doc for doc, score in initial_docs
                if score < 2.0  # Ajusta este umbral según necesidad
            ]
            
            if not valid_docs:
                return "No encontré información relevante en los documentos disponibles."
            
        except Exception as e:
            logger.error(f"Error en búsqueda FAISS: {e}")
            return f"Error en la búsqueda: {str(e)}"
        
        # -------------------------
        # CROSS ENCODER RERANK
        # -------------------------
        try:
            cross_inputs = [
                [standalone_question, doc.page_content]
                for doc in valid_docs
            ]
            
            scores = self.cross_encoder.predict(cross_inputs)
            
            # Sort by score (higher is better for cross-encoder)
            sorted_idx = np.argsort(scores)[::-1]
            
            top_docs = [
                valid_docs[i]
                for i in sorted_idx[:top_k_final]
            ]
            
        except Exception as e:
            logger.error(f"Error en reranking: {e}")
            # Fallback to use valid_docs without reranking
            top_docs = valid_docs[:top_k_final]
        
        # -------------------------
        # CONTEXT
        # -------------------------
        context = "\n\n".join(
            [
                f"Documento {i+1}:\n{doc.page_content}"
                for i, doc in enumerate(top_docs)
            ]
        )
        
        # -------------------------
        # FINAL PROMPT
        # -------------------------
        prompt = f"""<s>[INST] Eres un asistente experto en análisis de documentos bancarios y contractuales.

INSTRUCCIONES:
- Responde SOLO usando el CONTEXTO proporcionado
- No inventes información
- Si la información no está en el contexto, responde EXACTAMENTE:
  "No tengo suficiente información en los documentos disponibles para responder a esta consulta."
- Indica el documento utilizado (ej: "Según el Documento 1...")
- Sé conciso y profesional

CONTEXTO:
{context}

PREGUNTA:
{question}

RESPUESTA: [/INST]"""
        
        # -------------------------
        # GENERATE ANSWER
        # -------------------------
        try:
            response = self.llm.invoke(prompt)
            
            # Clean up response
            if response:
                response = response.strip()
                # Remove any instruction tags if present
                response = response.replace("</s>", "").replace("<s>", "").strip()
            
            return response if response else "No se pudo generar una respuesta."
            
        except Exception as e:
            logger.error(f"Error generando respuesta: {e}")
            return f"Error al generar respuesta: {str(e)}"