TomacGonz commited on
Commit
15cb869
verified
1 Parent(s): aae6fb6

Update api/services/rag_service.py

Browse files
Files changed (1) hide show
  1. api/services/rag_service.py +217 -139
api/services/rag_service.py CHANGED
@@ -1,84 +1,147 @@
1
  import os
2
  import numpy as np
3
  from typing import List, Dict, Optional
 
4
 
5
- from dotenv import load_dotenv
6
-
7
- from langchain_community.vectorstores import FAISS
8
- from langchain_huggingface import HuggingFaceEmbeddings
9
- from langchain_huggingface import HuggingFaceEndpoint
10
- from langchain_community.llms import HuggingFaceHub
11
- from sentence_transformers import CrossEncoder
12
-
13
 
14
  class CooperativaAdvancedRAG:
15
-
 
 
 
 
 
 
 
16
  def __init__(self):
17
-
18
- print("--- Cargando modelos de IA a la memoria (FAISS + Embeddings + CrossEncoder) ---")
19
-
20
- # -------------------------
21
- # PATHS
22
- # -------------------------
23
-
24
- current_dir = os.path.dirname(os.path.abspath(__file__))
25
- api_dir = os.path.dirname(current_dir)
26
- backend_dir = os.path.dirname(api_dir)
27
-
28
- env_path = os.path.join(backend_dir, ".env")
29
- load_dotenv(env_path)
30
-
31
- self.persist_directory = os.path.join(backend_dir, "faiss_index")
32
-
33
- if not os.path.exists(self.persist_directory):
34
- raise RuntimeError("FAISS index does not exist")
35
-
36
- # -------------------------
37
- # EMBEDDINGS
38
- # -------------------------
39
-
40
- self.embeddings = HuggingFaceEmbeddings(
41
- model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
42
- )
43
-
44
- # -------------------------
45
- # VECTOR DATABASE
46
- # -------------------------
47
-
48
- self.db = FAISS.load_local(
49
- self.persist_directory,
50
- self.embeddings,
51
- allow_dangerous_deserialization=True,
52
- )
53
-
54
- # -------------------------
55
- # CROSS ENCODER (RERANK)
56
- # -------------------------
57
-
58
- self.cross_encoder = CrossEncoder(
59
- "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1"
60
- )
61
-
62
- # -------------------------
63
- # LLM
64
- # -------------------------
65
-
66
- self.llm = HuggingFaceHub(
67
- repo_id="mistralai/Mistral-7B-Instruct-v0.3",
68
- huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN"),
69
- model_kwargs={
70
- "temperature": 0.1,
71
- "max_new_tokens": 512,
72
- "typical_p": 1,
73
- }
74
- )
75
-
76
- print("--- Sistema RAG listo para recibir consultas ---")
77
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  # --------------------------------------------------
79
  # MAIN QUERY
80
  # --------------------------------------------------
81
-
82
  def query(
83
  self,
84
  question: str,
@@ -86,29 +149,35 @@ class CooperativaAdvancedRAG:
86
  top_k_initial: int = 25,
87
  top_k_final: int = 3,
88
  ) -> str:
89
-
 
 
 
 
 
 
 
 
 
 
90
  # -------------------------
91
  # CHAT HISTORY
92
  # -------------------------
93
-
94
  history_text = ""
95
-
96
  if chat_history:
97
  for turn in chat_history[-5:]:
98
  role = "Usuario" if turn.get("role") == "user" else "Asistente"
99
  content = turn.get("content", "")
100
- history_text += f"{role}: {content}\n"
101
-
 
102
  standalone_question = question
103
-
104
  # -------------------------
105
- # QUESTION REWRITE
106
  # -------------------------
107
-
108
- if history_text:
109
-
110
- rewrite_prompt = f"""
111
- Reformula la pregunta para que sea independiente.
112
 
113
  Historial:
114
  {history_text}
@@ -116,79 +185,85 @@ Historial:
116
  Pregunta actual:
117
  {question}
118
 
119
- Pregunta reformulada:
120
- """
121
-
122
  try:
123
-
124
  rewritten = self.llm.invoke(rewrite_prompt).strip()
125
-
126
- if rewritten:
127
  standalone_question = rewritten
128
-
129
  except Exception as e:
130
-
131
- print(f"Error rewriting: {e}")
132
-
133
  # -------------------------
134
  # FAISS SEARCH
135
  # -------------------------
136
-
137
- initial_docs = self.db.similarity_search_with_score(
138
- standalone_question,
139
- k=top_k_initial
140
- )
141
-
142
- valid_docs = [
143
- doc for doc, score in initial_docs
144
- if score < 1.8
145
- ]
146
-
147
- if not valid_docs:
148
- return "No encontr茅 informaci贸n relevante en los documentos disponibles."
149
-
 
 
 
 
 
150
  # -------------------------
151
  # CROSS ENCODER RERANK
152
  # -------------------------
153
-
154
- cross_inputs = [
155
- [standalone_question, doc.page_content]
156
- for doc in valid_docs
157
- ]
158
-
159
- scores = self.cross_encoder.predict(cross_inputs)
160
-
161
- sorted_idx = np.argsort(scores)[::-1]
162
-
163
- top_docs = [
164
- valid_docs[i]
165
- for i in sorted_idx[:top_k_final]
166
- ]
167
-
 
 
 
 
 
 
168
  # -------------------------
169
  # CONTEXT
170
  # -------------------------
171
-
172
  context = "\n\n".join(
173
  [
174
  f"Documento {i+1}:\n{doc.page_content}"
175
  for i, doc in enumerate(top_docs)
176
  ]
177
  )
178
-
179
  # -------------------------
180
  # FINAL PROMPT
181
  # -------------------------
182
-
183
- prompt = f"""
184
- Eres un asistente experto en an谩lisis de documentos bancarios y contractuales.
185
 
186
  INSTRUCCIONES:
187
- - Responde SOLO usando el CONTEXTO
188
  - No inventes informaci贸n
189
- - Si la informaci贸n no est谩 responde exactamente:
190
- "No tengo suficiente informaci贸n en los documentos disponibles para responder a esta consulta."
191
  - Indica el documento utilizado (ej: "Seg煤n el Documento 1...")
 
192
 
193
  CONTEXTO:
194
  {context}
@@ -196,19 +271,22 @@ CONTEXTO:
196
  PREGUNTA:
197
  {question}
198
 
199
- RESPUESTA:
200
- """
201
-
202
  # -------------------------
203
  # GENERATE ANSWER
204
  # -------------------------
205
-
206
  try:
207
-
208
  response = self.llm.invoke(prompt)
209
-
210
- return response.strip()
211
-
 
 
 
 
 
 
212
  except Exception as e:
213
-
214
  return f"Error al generar respuesta: {str(e)}"
 
1
  import os
2
  import numpy as np
3
  from typing import List, Dict, Optional
4
+ import logging
5
 
6
+ # Configure logging for Hugging Face Spaces
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger(__name__)
 
 
 
 
 
9
 
10
  class CooperativaAdvancedRAG:
11
+ _instance = None
12
+ _models_loaded = False
13
+
14
+ def __new__(cls):
15
+ if cls._instance is None:
16
+ cls._instance = super().__new__(cls)
17
+ return cls._instance
18
+
19
  def __init__(self):
20
+ if hasattr(self, 'initialized') and self.initialized:
21
+ return
22
+
23
+ self.initialized = True
24
+ self._models_loaded = False
25
+ logger.info("--- Inicializando RAG Service (carga perezosa) ---")
26
+
27
+ # Get the correct paths for Hugging Face Spaces
28
+ self._setup_paths()
29
+
30
+ def _setup_paths(self):
31
+ """Setup paths for Hugging Face Spaces"""
32
+ # In Hugging Face Spaces, the current working directory is the app root
33
+ self.backend_dir = os.getcwd()
34
+
35
+ # Check for FAISS index in common locations
36
+ possible_paths = [
37
+ os.path.join(self.backend_dir, "faiss_index"),
38
+ os.path.join(self.backend_dir, "backend", "faiss_index"),
39
+ os.path.join(os.path.dirname(self.backend_dir), "faiss_index"),
40
+ ]
41
+
42
+ self.persist_directory = None
43
+ for path in possible_paths:
44
+ if os.path.exists(path):
45
+ self.persist_directory = path
46
+ logger.info(f"FAISS index encontrado en: {path}")
47
+ break
48
+
49
+ # Get API token from environment (Hugging Face Spaces secrets)
50
+ self.hf_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN") or os.environ.get("HF_TOKEN")
51
+
52
+ if not self.hf_token:
53
+ logger.warning("HUGGINGFACEHUB_API_TOKEN no encontrado. El LLM no funcionar谩 correctamente.")
54
+ else:
55
+ logger.info("Token de Hugging Face encontrado")
56
+
57
+ def _load_models(self):
58
+ """Lazy loading of models - only called when needed"""
59
+ if self._models_loaded:
60
+ return
61
+
62
+ logger.info("--- Cargando modelos de IA a la memoria ---")
63
+
64
+ try:
65
+ # Import here to avoid loading at startup
66
+ from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
67
+ from langchain_community.vectorstores import FAISS
68
+ from sentence_transformers import CrossEncoder
69
+
70
+ # Check if FAISS index exists
71
+ if not self.persist_directory or not os.path.exists(self.persist_directory):
72
+ error_msg = f"FAISS index no encontrado en: {self.persist_directory}"
73
+ logger.error(error_msg)
74
+ raise RuntimeError(error_msg)
75
+
76
+ # -------------------------
77
+ # EMBEDDINGS
78
+ # -------------------------
79
+ logger.info("Cargando modelo de embeddings...")
80
+ self.embeddings = HuggingFaceEmbeddings(
81
+ model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
82
+ model_kwargs={'device': 'cpu'},
83
+ encode_kwargs={'normalize_embeddings': True}
84
+ )
85
+
86
+ # -------------------------
87
+ # VECTOR DATABASE
88
+ # -------------------------
89
+ logger.info("Cargando FAISS index...")
90
+ self.db = FAISS.load_local(
91
+ self.persist_directory,
92
+ self.embeddings,
93
+ allow_dangerous_deserialization=True,
94
+ )
95
+
96
+ # -------------------------
97
+ # CROSS ENCODER (RERANK)
98
+ # -------------------------
99
+ logger.info("Cargando CrossEncoder...")
100
+ self.cross_encoder = CrossEncoder(
101
+ "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1",
102
+ device='cpu'
103
+ )
104
+
105
+ # -------------------------
106
+ # LLM (solo si hay token)
107
+ # -------------------------
108
+ if self.hf_token:
109
+ logger.info("Inicializando HuggingFaceEndpoint...")
110
+ self.llm = HuggingFaceEndpoint(
111
+ endpoint_url="https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
112
+ huggingfacehub_api_token=self.hf_token,
113
+ task="text-generation",
114
+ max_new_tokens=512,
115
+ temperature=0.1,
116
+ do_sample=True,
117
+ top_p=0.95,
118
+ typical_p=0.95,
119
+ repetition_penalty=1.1,
120
+ timeout=120,
121
+ )
122
+
123
+ # Test the connection
124
+ try:
125
+ test_response = self.llm.invoke("Hola")
126
+ logger.info("LLM inicializado correctamente")
127
+ except Exception as e:
128
+ logger.error(f"Error al probar LLM: {e}")
129
+ self.llm = None
130
+ else:
131
+ logger.warning("No hay token disponible - LLM no inicializado")
132
+ self.llm = None
133
+
134
+ self._models_loaded = True
135
+ logger.info("--- Sistema RAG listo para recibir consultas ---")
136
+
137
+ except Exception as e:
138
+ logger.error(f"Error cr铆tico cargando modelos: {e}")
139
+ raise
140
+
141
  # --------------------------------------------------
142
  # MAIN QUERY
143
  # --------------------------------------------------
144
+
145
  def query(
146
  self,
147
  question: str,
 
149
  top_k_initial: int = 25,
150
  top_k_final: int = 3,
151
  ) -> str:
152
+
153
+ # Load models on first query
154
+ try:
155
+ self._load_models()
156
+ except Exception as e:
157
+ return f"Error inicializando el sistema: {str(e)}"
158
+
159
+ # Check if LLM is available
160
+ if not self.llm:
161
+ return "Error: Token de Hugging Face no configurado. Por favor, configura HUGGINGFACEHUB_API_TOKEN en los secretos del Space."
162
+
163
  # -------------------------
164
  # CHAT HISTORY
165
  # -------------------------
 
166
  history_text = ""
 
167
  if chat_history:
168
  for turn in chat_history[-5:]:
169
  role = "Usuario" if turn.get("role") == "user" else "Asistente"
170
  content = turn.get("content", "")
171
+ if content:
172
+ history_text += f"{role}: {content}\n"
173
+
174
  standalone_question = question
175
+
176
  # -------------------------
177
+ # QUESTION REWRITE (solo si hay historial)
178
  # -------------------------
179
+ if history_text.strip():
180
+ rewrite_prompt = f"""<s>[INST] Reformula la siguiente pregunta para que sea independiente del historial de la conversaci贸n.
 
 
 
181
 
182
  Historial:
183
  {history_text}
 
185
  Pregunta actual:
186
  {question}
187
 
188
+ Pregunta reformulada (solo la pregunta, sin explicaciones): [/INST]"""
189
+
 
190
  try:
 
191
  rewritten = self.llm.invoke(rewrite_prompt).strip()
192
+ if rewritten and len(rewritten) > 10:
 
193
  standalone_question = rewritten
194
+ logger.info(f"Pregunta reformulada: {standalone_question}")
195
  except Exception as e:
196
+ logger.error(f"Error en rewrite: {e}")
197
+ # Continue with original question
198
+
199
  # -------------------------
200
  # FAISS SEARCH
201
  # -------------------------
202
+ try:
203
+ initial_docs = self.db.similarity_search_with_score(
204
+ standalone_question,
205
+ k=top_k_initial
206
+ )
207
+
208
+ # Filter by score (lower is better for FAISS)
209
+ valid_docs = [
210
+ doc for doc, score in initial_docs
211
+ if score < 2.0 # Ajusta este umbral seg煤n necesidad
212
+ ]
213
+
214
+ if not valid_docs:
215
+ return "No encontr茅 informaci贸n relevante en los documentos disponibles."
216
+
217
+ except Exception as e:
218
+ logger.error(f"Error en b煤squeda FAISS: {e}")
219
+ return f"Error en la b煤squeda: {str(e)}"
220
+
221
  # -------------------------
222
  # CROSS ENCODER RERANK
223
  # -------------------------
224
+ try:
225
+ cross_inputs = [
226
+ [standalone_question, doc.page_content]
227
+ for doc in valid_docs
228
+ ]
229
+
230
+ scores = self.cross_encoder.predict(cross_inputs)
231
+
232
+ # Sort by score (higher is better for cross-encoder)
233
+ sorted_idx = np.argsort(scores)[::-1]
234
+
235
+ top_docs = [
236
+ valid_docs[i]
237
+ for i in sorted_idx[:top_k_final]
238
+ ]
239
+
240
+ except Exception as e:
241
+ logger.error(f"Error en reranking: {e}")
242
+ # Fallback to use valid_docs without reranking
243
+ top_docs = valid_docs[:top_k_final]
244
+
245
  # -------------------------
246
  # CONTEXT
247
  # -------------------------
 
248
  context = "\n\n".join(
249
  [
250
  f"Documento {i+1}:\n{doc.page_content}"
251
  for i, doc in enumerate(top_docs)
252
  ]
253
  )
254
+
255
  # -------------------------
256
  # FINAL PROMPT
257
  # -------------------------
258
+ prompt = f"""<s>[INST] Eres un asistente experto en an谩lisis de documentos bancarios y contractuales.
 
 
259
 
260
  INSTRUCCIONES:
261
+ - Responde SOLO usando el CONTEXTO proporcionado
262
  - No inventes informaci贸n
263
+ - Si la informaci贸n no est谩 en el contexto, responde EXACTAMENTE:
264
+ "No tengo suficiente informaci贸n en los documentos disponibles para responder a esta consulta."
265
  - Indica el documento utilizado (ej: "Seg煤n el Documento 1...")
266
+ - S茅 conciso y profesional
267
 
268
  CONTEXTO:
269
  {context}
 
271
  PREGUNTA:
272
  {question}
273
 
274
+ RESPUESTA: [/INST]"""
275
+
 
276
  # -------------------------
277
  # GENERATE ANSWER
278
  # -------------------------
 
279
  try:
 
280
  response = self.llm.invoke(prompt)
281
+
282
+ # Clean up response
283
+ if response:
284
+ response = response.strip()
285
+ # Remove any instruction tags if present
286
+ response = response.replace("</s>", "").replace("<s>", "").strip()
287
+
288
+ return response if response else "No se pudo generar una respuesta."
289
+
290
  except Exception as e:
291
+ logger.error(f"Error generando respuesta: {e}")
292
  return f"Error al generar respuesta: {str(e)}"