Spaces:
Running
Running
NoeMartinezSanchez commited on
Commit ·
5c4e69b
1
Parent(s): fbe249f
Correccion mas afondo
Browse files- api/main.py +26 -4
- models/gemma_wrapper.py +13 -6
- rag/retriever.py +16 -5
api/main.py
CHANGED
|
@@ -129,10 +129,32 @@ async def chat(request: ChatRequest):
|
|
| 129 |
user_id = request.user_id or str(uuid.uuid4())
|
| 130 |
conversation_id = request.conversation_id or str(uuid.uuid4())
|
| 131 |
|
| 132 |
-
#
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
# DEBUG: Verificar qué se recibe
|
| 138 |
logger.info(f"🔍 DEBUG - response_text tipo: {type(response_text)}, largo: {len(response_text) if response_text else 0}")
|
|
|
|
| 129 |
user_id = request.user_id or str(uuid.uuid4())
|
| 130 |
conversation_id = request.conversation_id or str(uuid.uuid4())
|
| 131 |
|
| 132 |
+
# Detectar saludos y responder directamente sin RAG
|
| 133 |
+
msg_lower = request.message.lower().strip()
|
| 134 |
+
saludos = ["hola", "buenos días", "buenas tardes", "buenas", "holi", "hello", "hey", "qué tal", "cómo estás", "buen día"]
|
| 135 |
+
despedidas = ["adiós", "chao", "bye", "hasta luego", "me voy", "nos vemos", "me retiro"]
|
| 136 |
+
gracias = ["gracias", "thank", "agradezco", "muchas gracias", "te agradezco"]
|
| 137 |
+
|
| 138 |
+
if any(s in msg_lower for s in saludos):
|
| 139 |
+
response_text = "¡Hola! Bienvenido a Prepa en Línea SEP. Estoy aquí para ayudarte con tus dudas sobre el programa. ¿Qué necesitas saber?"
|
| 140 |
+
is_rag = False
|
| 141 |
+
confidence = 1.0
|
| 142 |
+
sources = []
|
| 143 |
+
elif any(s in msg_lower for s in despedidas):
|
| 144 |
+
response_text = "¡Hasta luego! Éxito en tus estudios. Cuando tengas dudas sobre Prepa en Línea, vuelve a escribirme."
|
| 145 |
+
is_rag = False
|
| 146 |
+
confidence = 1.0
|
| 147 |
+
sources = []
|
| 148 |
+
elif any(s in msg_lower for s in gracias):
|
| 149 |
+
response_text = "¡De nada! Si tienes más dudas sobre Prepa en Línea, con gusto te ayudo. ¡Éxito en tus estudios!"
|
| 150 |
+
is_rag = False
|
| 151 |
+
confidence = 1.0
|
| 152 |
+
sources = []
|
| 153 |
+
else:
|
| 154 |
+
# Procesar consulta normal con RAG
|
| 155 |
+
response_text, is_rag, confidence, sources = rag_system.process_query(
|
| 156 |
+
request.message
|
| 157 |
+
)
|
| 158 |
|
| 159 |
# DEBUG: Verificar qué se recibe
|
| 160 |
logger.info(f"🔍 DEBUG - response_text tipo: {type(response_text)}, largo: {len(response_text) if response_text else 0}")
|
models/gemma_wrapper.py
CHANGED
|
@@ -263,10 +263,10 @@ question: str,
|
|
| 263 |
logger.info(f"RAG generation - Context length: {len(context)}, Question: {question[:50]}...")
|
| 264 |
raw_response = self.generate(
|
| 265 |
prompt=prompt,
|
| 266 |
-
max_new_tokens=
|
| 267 |
-
temperature=0.
|
| 268 |
-
top_p=0.
|
| 269 |
-
repetition_penalty=1.
|
| 270 |
)
|
| 271 |
|
| 272 |
response = self._clean_and_fix_response(raw_response)
|
|
@@ -353,11 +353,20 @@ Responde de forma clara y útil en español. Si no tienes información suficient
|
|
| 353 |
|
| 354 |
response = re.sub(r'\s+', ' ', response).strip()
|
| 355 |
|
|
|
|
| 356 |
if len(response) >= 2:
|
| 357 |
first_word = response.split()[0] if response.split() else ""
|
| 358 |
if len(first_word) <= 2 and first_word.islower():
|
| 359 |
response = response[len(first_word):].strip()
|
| 360 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
response = response.lower()
|
| 362 |
|
| 363 |
sentences = re.split(r'([.!?]+)', response)
|
|
@@ -375,8 +384,6 @@ Responde de forma clara y útil en español. Si no tienes información suficient
|
|
| 375 |
response = ''.join(fixed)
|
| 376 |
|
| 377 |
response = response.strip()
|
| 378 |
-
if not response or len(response) < 5:
|
| 379 |
-
response = "Lo siento, no pude generar una respuesta adecuada. ¿Podrías reformular tu pregunta?"
|
| 380 |
|
| 381 |
return response
|
| 382 |
|
|
|
|
| 263 |
logger.info(f"RAG generation - Context length: {len(context)}, Question: {question[:50]}...")
|
| 264 |
raw_response = self.generate(
|
| 265 |
prompt=prompt,
|
| 266 |
+
max_new_tokens=256,
|
| 267 |
+
temperature=0.5,
|
| 268 |
+
top_p=0.85,
|
| 269 |
+
repetition_penalty=1.1,
|
| 270 |
)
|
| 271 |
|
| 272 |
response = self._clean_and_fix_response(raw_response)
|
|
|
|
| 353 |
|
| 354 |
response = re.sub(r'\s+', ' ', response).strip()
|
| 355 |
|
| 356 |
+
# Fix truncated responses - remove short truncated first words
|
| 357 |
if len(response) >= 2:
|
| 358 |
first_word = response.split()[0] if response.split() else ""
|
| 359 |
if len(first_word) <= 2 and first_word.islower():
|
| 360 |
response = response[len(first_word):].strip()
|
| 361 |
|
| 362 |
+
# Fix non-alphabetic characters at start
|
| 363 |
+
while response and not response[0].isalpha():
|
| 364 |
+
response = response[1:].strip()
|
| 365 |
+
|
| 366 |
+
# Ensure minimum length
|
| 367 |
+
if not response or len(response) < 20:
|
| 368 |
+
response = "No encontré información específica sobre ese tema en los materiales disponibles. Te recomiendo consultar la guía del aspirante de Prepa en Línea SEP."
|
| 369 |
+
|
| 370 |
response = response.lower()
|
| 371 |
|
| 372 |
sentences = re.split(r'([.!?]+)', response)
|
|
|
|
| 384 |
response = ''.join(fixed)
|
| 385 |
|
| 386 |
response = response.strip()
|
|
|
|
|
|
|
| 387 |
|
| 388 |
return response
|
| 389 |
|
rag/retriever.py
CHANGED
|
@@ -318,13 +318,14 @@ class VectorStoreFAISS:
|
|
| 318 |
'distances': [distances],
|
| 319 |
'metadatas': [results]
|
| 320 |
}
|
| 321 |
-
def search_documents(self, query_embedding: np.ndarray, top_k: int =
|
| 322 |
"""
|
| 323 |
Buscar documentos similares al embedding de consulta.
|
| 324 |
|
| 325 |
Args:
|
| 326 |
query_embedding: Embedding de la consulta
|
| 327 |
top_k: Número de resultados a retornar
|
|
|
|
| 328 |
|
| 329 |
Returns:
|
| 330 |
Diccionario con formato compatible con ChromaDB
|
|
@@ -345,16 +346,26 @@ class VectorStoreFAISS:
|
|
| 345 |
min(top_k, self.index.ntotal)
|
| 346 |
)
|
| 347 |
|
| 348 |
-
# Formatear resultados
|
| 349 |
documents_result = []
|
| 350 |
metadatas_result = []
|
| 351 |
distances_result = []
|
| 352 |
|
| 353 |
for i, idx in enumerate(indices[0]):
|
| 354 |
if idx < len(self.documents):
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
|
| 359 |
return {
|
| 360 |
'documents': [documents_result],
|
|
|
|
| 318 |
'distances': [distances],
|
| 319 |
'metadatas': [results]
|
| 320 |
}
|
| 321 |
+
def search_documents(self, query_embedding: np.ndarray, top_k: int = 5, min_similarity: float = 0.6) -> Dict:
|
| 322 |
"""
|
| 323 |
Buscar documentos similares al embedding de consulta.
|
| 324 |
|
| 325 |
Args:
|
| 326 |
query_embedding: Embedding de la consulta
|
| 327 |
top_k: Número de resultados a retornar
|
| 328 |
+
min_similarity: Umbral mínimo de similitud (0-1)
|
| 329 |
|
| 330 |
Returns:
|
| 331 |
Diccionario con formato compatible con ChromaDB
|
|
|
|
| 346 |
min(top_k, self.index.ntotal)
|
| 347 |
)
|
| 348 |
|
| 349 |
+
# Formatear resultados con filtrado por similitud
|
| 350 |
documents_result = []
|
| 351 |
metadatas_result = []
|
| 352 |
distances_result = []
|
| 353 |
|
| 354 |
for i, idx in enumerate(indices[0]):
|
| 355 |
if idx < len(self.documents):
|
| 356 |
+
similarity = float(distances[0][i])
|
| 357 |
+
if similarity >= min_similarity:
|
| 358 |
+
documents_result.append(self.documents[idx])
|
| 359 |
+
metadatas_result.append(self.metadata[idx])
|
| 360 |
+
distances_result.append(similarity)
|
| 361 |
+
|
| 362 |
+
# Si no hay resultados con suficiente similitud,返回 vacío
|
| 363 |
+
if not documents_result:
|
| 364 |
+
return {
|
| 365 |
+
'documents': [[]],
|
| 366 |
+
'distances': [[]],
|
| 367 |
+
'metadatas': [[]]
|
| 368 |
+
}
|
| 369 |
|
| 370 |
return {
|
| 371 |
'documents': [documents_result],
|