Spaces:

TomacGonz
/

fintech-coop-api

Sleeping

App Files Files Community

TomacGonz commited on Mar 6

Commit

15cb869

verified ·

1 Parent(s): aae6fb6

Update api/services/rag_service.py

Browse files

Files changed (1) hide show

api/services/rag_service.py +217 -139

api/services/rag_service.py CHANGED Viewed

@@ -1,84 +1,147 @@
 import os
 import numpy as np
 from typing import List, Dict, Optional
-from dotenv import load_dotenv
-from langchain_community.vectorstores import FAISS
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_huggingface import HuggingFaceEndpoint
-from langchain_community.llms import HuggingFaceHub
-from sentence_transformers import CrossEncoder
 class CooperativaAdvancedRAG:
     def __init__(self):
-        print("--- Cargando modelos de IA a la memoria (FAISS + Embeddings + CrossEncoder) ---")
-        # -------------------------
-        # PATHS
-        # -------------------------
-        current_dir = os.path.dirname(os.path.abspath(__file__))
-        api_dir = os.path.dirname(current_dir)
-        backend_dir = os.path.dirname(api_dir)
-        env_path = os.path.join(backend_dir, ".env")
-        load_dotenv(env_path)
-        self.persist_directory = os.path.join(backend_dir, "faiss_index")
-        if not os.path.exists(self.persist_directory):
-            raise RuntimeError("FAISS index does not exist")
-        # -------------------------
-        # EMBEDDINGS
-        # -------------------------
-        self.embeddings = HuggingFaceEmbeddings(
-            model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
-        )
-        # -------------------------
-        # VECTOR DATABASE
-        # -------------------------
-        self.db = FAISS.load_local(
-            self.persist_directory,
-            self.embeddings,
-            allow_dangerous_deserialization=True,
-        )
-        # -------------------------
-        # CROSS ENCODER (RERANK)
-        # -------------------------
-        self.cross_encoder = CrossEncoder(
-            "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1"
-        )
-        # -------------------------
-        # LLM
-        # -------------------------
-        self.llm = HuggingFaceHub(
-            repo_id="mistralai/Mistral-7B-Instruct-v0.3",
-            huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN"),
-            model_kwargs={
-                "temperature": 0.1,
-                "max_new_tokens": 512,
-                "typical_p": 1,
-            }
-        )
-        print("--- Sistema RAG listo para recibir consultas ---")
     # --------------------------------------------------
     # MAIN QUERY
     # --------------------------------------------------
     def query(
         self,
         question: str,
@@ -86,29 +149,35 @@ class CooperativaAdvancedRAG:
         top_k_initial: int = 25,
         top_k_final: int = 3,
     ) -> str:
         # -------------------------
         # CHAT HISTORY
         # -------------------------
         history_text = ""
         if chat_history:
             for turn in chat_history[-5:]:
                 role = "Usuario" if turn.get("role") == "user" else "Asistente"
                 content = turn.get("content", "")
-                history_text += f"{role}: {content}\n"
         standalone_question = question
         # -------------------------
-        # QUESTION REWRITE
         # -------------------------
-        if history_text:
-            rewrite_prompt = f"""
-Reformula la pregunta para que sea independiente.
 Historial:
 {history_text}
@@ -116,79 +185,85 @@ Historial:
 Pregunta actual:
 {question}
-Pregunta reformulada:
-"""
             try:
                 rewritten = self.llm.invoke(rewrite_prompt).strip()
-                if rewritten:
                     standalone_question = rewritten
             except Exception as e:
-                print(f"Error rewriting: {e}")
         # -------------------------
         # FAISS SEARCH
         # -------------------------
-        initial_docs = self.db.similarity_search_with_score(
-            standalone_question,
-            k=top_k_initial
-        )
-        valid_docs = [
-            doc for doc, score in initial_docs
-            if score < 1.8
-        ]
-        if not valid_docs:
-            return "No encontré información relevante en los documentos disponibles."
         # -------------------------
         # CROSS ENCODER RERANK
         # -------------------------
-        cross_inputs = [
-            [standalone_question, doc.page_content]
-            for doc in valid_docs
-        ]
-        scores = self.cross_encoder.predict(cross_inputs)
-        sorted_idx = np.argsort(scores)[::-1]
-        top_docs = [
-            valid_docs[i]
-            for i in sorted_idx[:top_k_final]
-        ]
         # -------------------------
         # CONTEXT
         # -------------------------
         context = "\n\n".join(
             [
                 f"Documento {i+1}:\n{doc.page_content}"
                 for i, doc in enumerate(top_docs)
             ]
         )
         # -------------------------
         # FINAL PROMPT
         # -------------------------
-        prompt = f"""
-Eres un asistente experto en análisis de documentos bancarios y contractuales.
 INSTRUCCIONES:
-- Responde SOLO usando el CONTEXTO
 - No inventes información
-- Si la información no está responde exactamente:
-"No tengo suficiente información en los documentos disponibles para responder a esta consulta."
 - Indica el documento utilizado (ej: "Según el Documento 1...")
 CONTEXTO:
 {context}
@@ -196,19 +271,22 @@ CONTEXTO:
 PREGUNTA:
 {question}
-RESPUESTA:
-"""
         # -------------------------
         # GENERATE ANSWER
         # -------------------------
         try:
             response = self.llm.invoke(prompt)
-            return response.strip()
         except Exception as e:
             return f"Error al generar respuesta: {str(e)}"

 import os
 import numpy as np
 from typing import List, Dict, Optional
+import logging
+# Configure logging for Hugging Face Spaces
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 class CooperativaAdvancedRAG:
+    _instance = None
+    _models_loaded = False
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
     def __init__(self):
+        if hasattr(self, 'initialized') and self.initialized:
+            return
+        self.initialized = True
+        self._models_loaded = False
+        logger.info("--- Inicializando RAG Service (carga perezosa) ---")
+        # Get the correct paths for Hugging Face Spaces
+        self._setup_paths()
+    def _setup_paths(self):
+        """Setup paths for Hugging Face Spaces"""
+        # In Hugging Face Spaces, the current working directory is the app root
+        self.backend_dir = os.getcwd()
+        # Check for FAISS index in common locations
+        possible_paths = [
+            os.path.join(self.backend_dir, "faiss_index"),
+            os.path.join(self.backend_dir, "backend", "faiss_index"),
+            os.path.join(os.path.dirname(self.backend_dir), "faiss_index"),
+        ]
+        self.persist_directory = None
+        for path in possible_paths:
+            if os.path.exists(path):
+                self.persist_directory = path
+                logger.info(f"FAISS index encontrado en: {path}")
+                break
+        # Get API token from environment (Hugging Face Spaces secrets)
+        self.hf_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN") or os.environ.get("HF_TOKEN")
+        if not self.hf_token:
+            logger.warning("HUGGINGFACEHUB_API_TOKEN no encontrado. El LLM no funcionará correctamente.")
+        else:
+            logger.info("Token de Hugging Face encontrado")
+    def _load_models(self):
+        """Lazy loading of models - only called when needed"""
+        if self._models_loaded:
+            return
+        logger.info("--- Cargando modelos de IA a la memoria ---")
+        try:
+            # Import here to avoid loading at startup
+            from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
+            from langchain_community.vectorstores import FAISS
+            from sentence_transformers import CrossEncoder
+            # Check if FAISS index exists
+            if not self.persist_directory or not os.path.exists(self.persist_directory):
+                error_msg = f"FAISS index no encontrado en: {self.persist_directory}"
+                logger.error(error_msg)
+                raise RuntimeError(error_msg)
+            # -------------------------
+            # EMBEDDINGS
+            # -------------------------
+            logger.info("Cargando modelo de embeddings...")
+            self.embeddings = HuggingFaceEmbeddings(
+                model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+                model_kwargs={'device': 'cpu'},
+                encode_kwargs={'normalize_embeddings': True}
+            )
+            # -------------------------
+            # VECTOR DATABASE
+            # -------------------------
+            logger.info("Cargando FAISS index...")
+            self.db = FAISS.load_local(
+                self.persist_directory,
+                self.embeddings,
+                allow_dangerous_deserialization=True,
+            )
+            # -------------------------
+            # CROSS ENCODER (RERANK)
+            # -------------------------
+            logger.info("Cargando CrossEncoder...")
+            self.cross_encoder = CrossEncoder(
+                "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1",
+                device='cpu'
+            )
+            # -------------------------
+            # LLM (solo si hay token)
+            # -------------------------
+            if self.hf_token:
+                logger.info("Inicializando HuggingFaceEndpoint...")
+                self.llm = HuggingFaceEndpoint(
+                    endpoint_url="https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
+                    huggingfacehub_api_token=self.hf_token,
+                    task="text-generation",
+                    max_new_tokens=512,
+                    temperature=0.1,
+                    do_sample=True,
+                    top_p=0.95,
+                    typical_p=0.95,
+                    repetition_penalty=1.1,
+                    timeout=120,
+                )
+                # Test the connection
+                try:
+                    test_response = self.llm.invoke("Hola")
+                    logger.info("LLM inicializado correctamente")
+                except Exception as e:
+                    logger.error(f"Error al probar LLM: {e}")
+                    self.llm = None
+            else:
+                logger.warning("No hay token disponible - LLM no inicializado")
+                self.llm = None
+            self._models_loaded = True
+            logger.info("--- Sistema RAG listo para recibir consultas ---")
+        except Exception as e:
+            logger.error(f"Error crítico cargando modelos: {e}")
+            raise
     # --------------------------------------------------
     # MAIN QUERY
     # --------------------------------------------------
     def query(
         self,
         question: str,
         top_k_initial: int = 25,
         top_k_final: int = 3,
     ) -> str:
+        # Load models on first query
+        try:
+            self._load_models()
+        except Exception as e:
+            return f"Error inicializando el sistema: {str(e)}"
+        # Check if LLM is available
+        if not self.llm:
+            return "Error: Token de Hugging Face no configurado. Por favor, configura HUGGINGFACEHUB_API_TOKEN en los secretos del Space."
         # -------------------------
         # CHAT HISTORY
         # -------------------------
         history_text = ""
         if chat_history:
             for turn in chat_history[-5:]:
                 role = "Usuario" if turn.get("role") == "user" else "Asistente"
                 content = turn.get("content", "")
+                if content:
+                    history_text += f"{role}: {content}\n"
         standalone_question = question
         # -------------------------
+        # QUESTION REWRITE (solo si hay historial)
         # -------------------------
+        if history_text.strip():
+            rewrite_prompt = f"""<s>[INST] Reformula la siguiente pregunta para que sea independiente del historial de la conversación.
 Historial:
 {history_text}
 Pregunta actual:
 {question}
+Pregunta reformulada (solo la pregunta, sin explicaciones): [/INST]"""
             try:
                 rewritten = self.llm.invoke(rewrite_prompt).strip()
+                if rewritten and len(rewritten) > 10:
                     standalone_question = rewritten
+                    logger.info(f"Pregunta reformulada: {standalone_question}")
             except Exception as e:
+                logger.error(f"Error en rewrite: {e}")
+                # Continue with original question
         # -------------------------
         # FAISS SEARCH
         # -------------------------
+        try:
+            initial_docs = self.db.similarity_search_with_score(
+                standalone_question,
+                k=top_k_initial
+            )
+            # Filter by score (lower is better for FAISS)
+            valid_docs = [
+                doc for doc, score in initial_docs
+                if score < 2.0  # Ajusta este umbral según necesidad
+            ]
+            if not valid_docs:
+                return "No encontré información relevante en los documentos disponibles."
+        except Exception as e:
+            logger.error(f"Error en búsqueda FAISS: {e}")
+            return f"Error en la búsqueda: {str(e)}"
         # -------------------------
         # CROSS ENCODER RERANK
         # -------------------------
+        try:
+            cross_inputs = [
+                [standalone_question, doc.page_content]
+                for doc in valid_docs
+            ]
+            scores = self.cross_encoder.predict(cross_inputs)
+            # Sort by score (higher is better for cross-encoder)
+            sorted_idx = np.argsort(scores)[::-1]
+            top_docs = [
+                valid_docs[i]
+                for i in sorted_idx[:top_k_final]
+            ]
+        except Exception as e:
+            logger.error(f"Error en reranking: {e}")
+            # Fallback to use valid_docs without reranking
+            top_docs = valid_docs[:top_k_final]
         # -------------------------
         # CONTEXT
         # -------------------------
         context = "\n\n".join(
             [
                 f"Documento {i+1}:\n{doc.page_content}"
                 for i, doc in enumerate(top_docs)
             ]
         )
         # -------------------------
         # FINAL PROMPT
         # -------------------------
+        prompt = f"""<s>[INST] Eres un asistente experto en análisis de documentos bancarios y contractuales.
 INSTRUCCIONES:
+- Responde SOLO usando el CONTEXTO proporcionado
 - No inventes información
+- Si la información no está en el contexto, responde EXACTAMENTE:
+  "No tengo suficiente información en los documentos disponibles para responder a esta consulta."
 - Indica el documento utilizado (ej: "Según el Documento 1...")
+- Sé conciso y profesional
 CONTEXTO:
 {context}
 PREGUNTA:
 {question}
+RESPUESTA: [/INST]"""
         # -------------------------
         # GENERATE ANSWER
         # -------------------------
         try:
             response = self.llm.invoke(prompt)
+            # Clean up response
+            if response:
+                response = response.strip()
+                # Remove any instruction tags if present
+                response = response.replace("</s>", "").replace("<s>", "").strip()
+            return response if response else "No se pudo generar una respuesta."
         except Exception as e:
+            logger.error(f"Error generando respuesta: {e}")
             return f"Error al generar respuesta: {str(e)}"