File size: 10,620 Bytes
a2fe39f
 
 
15cb869
083d7cb
15cb869
 
 
cf1e4bd
a2fe39f
15cb869
 
 
 
 
 
 
 
a2fe39f
15cb869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf1e4bd
b38893a
cf1e4bd
15cb869
18d8998
 
 
 
cf1e4bd
18d8998
 
15cb869
 
 
 
 
 
 
 
 
 
 
cf1e4bd
b38893a
cf1e4bd
a2fe39f
 
18d8998
cf1e4bd
 
15cb869
 
 
18d8998
15cb869
cf1e4bd
15cb869
cf1e4bd
15cb869
 
cf1e4bd
 
 
 
 
 
 
15cb869
 
18d8998
cf1e4bd
15cb869
cf1e4bd
15cb869
18d8998
15cb869
 
 
cf1e4bd
b38893a
cf1e4bd
15cb869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf1e4bd
 
 
15cb869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf1e4bd
b38893a
cf1e4bd
 
 
 
 
 
 
15cb869
cf1e4bd
 
 
15cb869
b38893a
 
15cb869
b38893a
15cb869
 
b38893a
15cb869
b38893a
 
 
 
 
 
 
15cb869
 
b38893a
 
cf1e4bd
bb474c8
083d7cb
15cb869
 
 
 
 
 
 
 
 
bb474c8
15cb869
cf1e4bd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
import os
import numpy as np
from typing import List, Dict, Optional
import logging

# Configure logging for Hugging Face Spaces
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class CooperativaAdvancedRAG:
    _instance = None
    _models_loaded = False
    
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance
    
    def __init__(self):
        if hasattr(self, 'initialized') and self.initialized:
            return
            
        self.initialized = True
        self._models_loaded = False
        logger.info("--- Inicializando RAG Service (carga perezosa) ---")
        
        # Get the correct paths for Hugging Face Spaces
        self._setup_paths()
        
    def _setup_paths(self):
        """Setup paths for Hugging Face Spaces"""
        # In Hugging Face Spaces, the current working directory is the app root
        self.backend_dir = os.getcwd()
        
        # Check for FAISS index in common locations
        possible_paths = [
            os.path.join(self.backend_dir, "faiss_index"),
            os.path.join(self.backend_dir, "backend", "faiss_index"),
            os.path.join(os.path.dirname(self.backend_dir), "faiss_index"),
        ]
        
        self.persist_directory = None
        for path in possible_paths:
            if os.path.exists(path):
                self.persist_directory = path
                logger.info(f"FAISS index encontrado en: {path}")
                break
        
        # Get API token from environment (Hugging Face Spaces secrets)
        self.hf_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN") or os.environ.get("HF_TOKEN")
        
        if not self.hf_token:
            logger.warning("HUGGINGFACEHUB_API_TOKEN no encontrado. El LLM no funcionará correctamente.")
        else:
            logger.info("Token de Hugging Face encontrado")
    
    def _load_models(self):
        """Lazy loading of models - only called when needed"""
        if self._models_loaded:
            return
            
        logger.info("--- Cargando modelos de IA a la memoria ---")
        
        try:
            # Import here to avoid loading at startup
            from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
            from langchain_community.vectorstores import FAISS
            from sentence_transformers import CrossEncoder
            
            # Check if FAISS index exists
            if not self.persist_directory or not os.path.exists(self.persist_directory):
                error_msg = f"FAISS index no encontrado en: {self.persist_directory}"
                logger.error(error_msg)
                raise RuntimeError(error_msg)
            
            # -------------------------
            # EMBEDDINGS
            # -------------------------
            logger.info("Cargando modelo de embeddings...")
            self.embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
                model_kwargs={'device': 'cpu'},
                encode_kwargs={'normalize_embeddings': True}
            )
            
            # -------------------------
            # VECTOR DATABASE
            # -------------------------
            logger.info("Cargando FAISS index...")
            self.db = FAISS.load_local(
                self.persist_directory,
                self.embeddings,
                allow_dangerous_deserialization=True,
            )
            
            # -------------------------
            # CROSS ENCODER (RERANK)
            # -------------------------
            logger.info("Cargando CrossEncoder...")
            self.cross_encoder = CrossEncoder(
                "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1",
                device='cpu'
            )
            
            # -------------------------
            # LLM (solo si hay token)
            # -------------------------
            if self.hf_token:
                logger.info("Inicializando HuggingFaceEndpoint...")
                self.llm = HuggingFaceEndpoint(
                    endpoint_url="https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
                    huggingfacehub_api_token=self.hf_token,
                    task="text-generation",
                    max_new_tokens=512,
                    temperature=0.1,
                    do_sample=True,
                    top_p=0.95,
                    typical_p=0.95,
                    repetition_penalty=1.1,
                    timeout=120,
                )
                
                # Test the connection
                try:
                    test_response = self.llm.invoke("Hola")
                    logger.info("LLM inicializado correctamente")
                except Exception as e:
                    logger.error(f"Error al probar LLM: {e}")
                    self.llm = None
            else:
                logger.warning("No hay token disponible - LLM no inicializado")
                self.llm = None
            
            self._models_loaded = True
            logger.info("--- Sistema RAG listo para recibir consultas ---")
            
        except Exception as e:
            logger.error(f"Error crítico cargando modelos: {e}")
            raise
    
    # --------------------------------------------------
    # MAIN QUERY
    # --------------------------------------------------
    
    def query(
        self,
        question: str,
        chat_history: Optional[List[Dict[str, str]]] = None,
        top_k_initial: int = 25,
        top_k_final: int = 3,
    ) -> str:
        
        # Load models on first query
        try:
            self._load_models()
        except Exception as e:
            return f"Error inicializando el sistema: {str(e)}"
        
        # Check if LLM is available
        if not self.llm:
            return "Error: Token de Hugging Face no configurado. Por favor, configura HUGGINGFACEHUB_API_TOKEN en los secretos del Space."
        
        # -------------------------
        # CHAT HISTORY
        # -------------------------
        history_text = ""
        if chat_history:
            for turn in chat_history[-5:]:
                role = "Usuario" if turn.get("role") == "user" else "Asistente"
                content = turn.get("content", "")
                if content:
                    history_text += f"{role}: {content}\n"
        
        standalone_question = question
        
        # -------------------------
        # QUESTION REWRITE (solo si hay historial)
        # -------------------------
        if history_text.strip():
            rewrite_prompt = f"""<s>[INST] Reformula la siguiente pregunta para que sea independiente del historial de la conversación.

Historial:
{history_text}

Pregunta actual:
{question}

Pregunta reformulada (solo la pregunta, sin explicaciones): [/INST]"""
            
            try:
                rewritten = self.llm.invoke(rewrite_prompt).strip()
                if rewritten and len(rewritten) > 10:
                    standalone_question = rewritten
                    logger.info(f"Pregunta reformulada: {standalone_question}")
            except Exception as e:
                logger.error(f"Error en rewrite: {e}")
                # Continue with original question
        
        # -------------------------
        # FAISS SEARCH
        # -------------------------
        try:
            initial_docs = self.db.similarity_search_with_score(
                standalone_question,
                k=top_k_initial
            )
            
            # Filter by score (lower is better for FAISS)
            valid_docs = [
                doc for doc, score in initial_docs
                if score < 2.0  # Ajusta este umbral según necesidad
            ]
            
            if not valid_docs:
                return "No encontré información relevante en los documentos disponibles."
            
        except Exception as e:
            logger.error(f"Error en búsqueda FAISS: {e}")
            return f"Error en la búsqueda: {str(e)}"
        
        # -------------------------
        # CROSS ENCODER RERANK
        # -------------------------
        try:
            cross_inputs = [
                [standalone_question, doc.page_content]
                for doc in valid_docs
            ]
            
            scores = self.cross_encoder.predict(cross_inputs)
            
            # Sort by score (higher is better for cross-encoder)
            sorted_idx = np.argsort(scores)[::-1]
            
            top_docs = [
                valid_docs[i]
                for i in sorted_idx[:top_k_final]
            ]
            
        except Exception as e:
            logger.error(f"Error en reranking: {e}")
            # Fallback to use valid_docs without reranking
            top_docs = valid_docs[:top_k_final]
        
        # -------------------------
        # CONTEXT
        # -------------------------
        context = "\n\n".join(
            [
                f"Documento {i+1}:\n{doc.page_content}"
                for i, doc in enumerate(top_docs)
            ]
        )
        
        # -------------------------
        # FINAL PROMPT
        # -------------------------
        prompt = f"""<s>[INST] Eres un asistente experto en análisis de documentos bancarios y contractuales.

INSTRUCCIONES:
- Responde SOLO usando el CONTEXTO proporcionado
- No inventes información
- Si la información no está en el contexto, responde EXACTAMENTE:
  "No tengo suficiente información en los documentos disponibles para responder a esta consulta."
- Indica el documento utilizado (ej: "Según el Documento 1...")
- Sé conciso y profesional

CONTEXTO:
{context}

PREGUNTA:
{question}

RESPUESTA: [/INST]"""
        
        # -------------------------
        # GENERATE ANSWER
        # -------------------------
        try:
            response = self.llm.invoke(prompt)
            
            # Clean up response
            if response:
                response = response.strip()
                # Remove any instruction tags if present
                response = response.replace("</s>", "").replace("<s>", "").strip()
            
            return response if response else "No se pudo generar una respuesta."
            
        except Exception as e:
            logger.error(f"Error generando respuesta: {e}")
            return f"Error al generar respuesta: {str(e)}"