Spaces:
Running
Running
NoeMartinezSanchez commited on
Commit ·
94252ff
1
Parent(s): bdd77b2
Mejora del promt 6
Browse files- api/main.py +3 -4
- config/settings.py +1 -1
- models/gemma_wrapper.py +44 -20
api/main.py
CHANGED
|
@@ -140,12 +140,12 @@ async def chat(request: ChatRequest):
|
|
| 140 |
logger.info(f"🔍 DEBUG - sources count: {len(sources) if sources else 0}")
|
| 141 |
logger.info(f"📤 Respuesta generada: {'RAG' if is_rag else 'Intent'} - Confianza: {confidence:.2%}")
|
| 142 |
|
| 143 |
-
# Crear respuesta
|
| 144 |
response = ChatResponse(
|
| 145 |
response=response_text,
|
| 146 |
sources=sources,
|
| 147 |
is_rag_response=is_rag,
|
| 148 |
-
confidence=
|
| 149 |
)
|
| 150 |
|
| 151 |
# Almacenar conversación
|
|
@@ -163,13 +163,12 @@ async def chat(request: ChatRequest):
|
|
| 163 |
"sources": sources
|
| 164 |
})
|
| 165 |
|
| 166 |
-
# Añadir headers con IDs
|
| 167 |
headers = {
|
| 168 |
"X-User-ID": user_id,
|
| 169 |
"X-Conversation-ID": conversation_id,
|
| 170 |
"X-Message-ID": message_id,
|
| 171 |
"X-Response-Type": "rag" if is_rag else "intent",
|
| 172 |
-
"X-Confidence": str(confidence)
|
| 173 |
}
|
| 174 |
|
| 175 |
return JSONResponse(
|
|
|
|
| 140 |
logger.info(f"🔍 DEBUG - sources count: {len(sources) if sources else 0}")
|
| 141 |
logger.info(f"📤 Respuesta generada: {'RAG' if is_rag else 'Intent'} - Confianza: {confidence:.2%}")
|
| 142 |
|
| 143 |
+
# Crear respuesta - NO mostrar confianza al usuario
|
| 144 |
response = ChatResponse(
|
| 145 |
response=response_text,
|
| 146 |
sources=sources,
|
| 147 |
is_rag_response=is_rag,
|
| 148 |
+
confidence=None # Ocultar confianza del usuario
|
| 149 |
)
|
| 150 |
|
| 151 |
# Almacenar conversación
|
|
|
|
| 163 |
"sources": sources
|
| 164 |
})
|
| 165 |
|
| 166 |
+
# Añadir headers con IDs (sin confianza)
|
| 167 |
headers = {
|
| 168 |
"X-User-ID": user_id,
|
| 169 |
"X-Conversation-ID": conversation_id,
|
| 170 |
"X-Message-ID": message_id,
|
| 171 |
"X-Response-Type": "rag" if is_rag else "intent",
|
|
|
|
| 172 |
}
|
| 173 |
|
| 174 |
return JSONResponse(
|
config/settings.py
CHANGED
|
@@ -25,7 +25,7 @@ class Settings(BaseSettings):
|
|
| 25 |
INTENTS_ENABLED: bool = True # Si quieres poder desactivar intents fácilmente
|
| 26 |
|
| 27 |
# Búsqueda y recuperación
|
| 28 |
-
TOP_K_RESULTS: int = Field(default=
|
| 29 |
SIMILARITY_THRESHOLD: float = Field(default=0.75, ge=0.1, le=1.0)
|
| 30 |
MAX_CONTEXT_LENGTH: int = 4000 # Tokens máximos para contexto
|
| 31 |
|
|
|
|
| 25 |
INTENTS_ENABLED: bool = True # Si quieres poder desactivar intents fácilmente
|
| 26 |
|
| 27 |
# Búsqueda y recuperación
|
| 28 |
+
TOP_K_RESULTS: int = Field(default=5, ge=1, le=10)
|
| 29 |
SIMILARITY_THRESHOLD: float = Field(default=0.75, ge=0.1, le=1.0)
|
| 30 |
MAX_CONTEXT_LENGTH: int = 4000 # Tokens máximos para contexto
|
| 31 |
|
models/gemma_wrapper.py
CHANGED
|
@@ -258,19 +258,22 @@ question: str,
|
|
| 258 |
Returns:
|
| 259 |
Generated response based on the context.
|
| 260 |
"""
|
| 261 |
-
prompt = self.
|
| 262 |
|
| 263 |
logger.info(f"RAG generation - Context length: {len(context)}, Question: {question[:50]}...")
|
| 264 |
-
|
| 265 |
prompt=prompt,
|
| 266 |
max_new_tokens=512,
|
| 267 |
-
temperature=0.
|
| 268 |
top_p=0.9,
|
| 269 |
-
repetition_penalty=1.
|
| 270 |
)
|
| 271 |
|
| 272 |
-
|
| 273 |
-
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
Args:
|
| 276 |
context: Retrieved context from RAG.
|
|
@@ -295,14 +298,21 @@ question: str,
|
|
| 295 |
user_message = """¡De nada! Si tienes más dudas sobre Prepa en Línea, con gusto te ayudo."""
|
| 296 |
|
| 297 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
user_message = f"""Eres un asistente de Prepa en Línea SEP. Responde preguntas de estudiantes usando la información del contexto proporcionado.
|
| 299 |
|
|
|
|
|
|
|
| 300 |
Contexto:
|
| 301 |
{context}
|
| 302 |
|
| 303 |
Pregunta: {question}
|
| 304 |
|
| 305 |
-
Responde de forma clara y útil en español."""
|
| 306 |
|
| 307 |
prompt = f"""<start_of_turn>user
|
| 308 |
{user_message}<end_of_turn>
|
|
@@ -311,14 +321,14 @@ Responde de forma clara y útil en español."""
|
|
| 311 |
|
| 312 |
return prompt
|
| 313 |
|
| 314 |
-
def
|
| 315 |
-
"""Clean and
|
| 316 |
|
| 317 |
Args:
|
| 318 |
response: Raw response from the model.
|
| 319 |
|
| 320 |
Returns:
|
| 321 |
-
Cleaned response.
|
| 322 |
"""
|
| 323 |
import re
|
| 324 |
|
|
@@ -338,21 +348,35 @@ Responde de forma clara y útil en español."""
|
|
| 338 |
response = ' '.join(clean_lines)
|
| 339 |
|
| 340 |
response = re.sub(r'#\w+', '', response)
|
| 341 |
-
|
| 342 |
response = re.sub(r'\*+', '', response)
|
| 343 |
-
|
| 344 |
response = re.sub(r'[{}\[\]()]', '', response)
|
| 345 |
|
| 346 |
response = re.sub(r'\s+', ' ', response).strip()
|
| 347 |
|
| 348 |
-
if
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
|
| 357 |
return response
|
| 358 |
|
|
|
|
| 258 |
Returns:
|
| 259 |
Generated response based on the context.
|
| 260 |
"""
|
| 261 |
+
prompt = self._build_improved_prompt(context, question)
|
| 262 |
|
| 263 |
logger.info(f"RAG generation - Context length: {len(context)}, Question: {question[:50]}...")
|
| 264 |
+
raw_response = self.generate(
|
| 265 |
prompt=prompt,
|
| 266 |
max_new_tokens=512,
|
| 267 |
+
temperature=0.6,
|
| 268 |
top_p=0.9,
|
| 269 |
+
repetition_penalty=1.15,
|
| 270 |
)
|
| 271 |
|
| 272 |
+
response = self._clean_and_fix_response(raw_response)
|
| 273 |
+
return response
|
| 274 |
+
|
| 275 |
+
def _build_improved_prompt(self, context: str, question: str) -> str:
|
| 276 |
+
"""Build improved prompt with better handling of common topics.
|
| 277 |
|
| 278 |
Args:
|
| 279 |
context: Retrieved context from RAG.
|
|
|
|
| 298 |
user_message = """¡De nada! Si tienes más dudas sobre Prepa en Línea, con gusto te ayudo."""
|
| 299 |
|
| 300 |
else:
|
| 301 |
+
if "propedéutico" in question_lower or "curso propedéutico" in question_lower:
|
| 302 |
+
context_hint = "El curso propedéutico es obligatorio para tous los estudiantes de Prepa en Línea. Tiene una duración aproximada de 3 semanas y debes completarlo antes de comenzar el primer semestre formal."
|
| 303 |
+
else:
|
| 304 |
+
context_hint = ""
|
| 305 |
+
|
| 306 |
user_message = f"""Eres un asistente de Prepa en Línea SEP. Responde preguntas de estudiantes usando la información del contexto proporcionado.
|
| 307 |
|
| 308 |
+
{context_hint}
|
| 309 |
+
|
| 310 |
Contexto:
|
| 311 |
{context}
|
| 312 |
|
| 313 |
Pregunta: {question}
|
| 314 |
|
| 315 |
+
Responde de forma clara y útil en español. Si no tienes información suficiente, dilo honestamente."""
|
| 316 |
|
| 317 |
prompt = f"""<start_of_turn>user
|
| 318 |
{user_message}<end_of_turn>
|
|
|
|
| 321 |
|
| 322 |
return prompt
|
| 323 |
|
| 324 |
+
def _clean_and_fix_response(self, response: str) -> str:
|
| 325 |
+
"""Clean and fix generated response - handles truncations and formatting.
|
| 326 |
|
| 327 |
Args:
|
| 328 |
response: Raw response from the model.
|
| 329 |
|
| 330 |
Returns:
|
| 331 |
+
Cleaned and fixed response.
|
| 332 |
"""
|
| 333 |
import re
|
| 334 |
|
|
|
|
| 348 |
response = ' '.join(clean_lines)
|
| 349 |
|
| 350 |
response = re.sub(r'#\w+', '', response)
|
|
|
|
| 351 |
response = re.sub(r'\*+', '', response)
|
|
|
|
| 352 |
response = re.sub(r'[{}\[\]()]', '', response)
|
| 353 |
|
| 354 |
response = re.sub(r'\s+', ' ', response).strip()
|
| 355 |
|
| 356 |
+
if len(response) >= 2:
|
| 357 |
+
first_word = response.split()[0] if response.split() else ""
|
| 358 |
+
if len(first_word) <= 2 and first_word.islower():
|
| 359 |
+
response = response[len(first_word):].strip()
|
| 360 |
+
|
| 361 |
+
response = response.lower()
|
| 362 |
+
|
| 363 |
+
sentences = re.split(r'([.!?]+)', response)
|
| 364 |
+
if sentences:
|
| 365 |
+
fixed = []
|
| 366 |
+
for i, part in enumerate(sentences):
|
| 367 |
+
if i % 2 == 0:
|
| 368 |
+
if part:
|
| 369 |
+
part = part.strip()
|
| 370 |
+
if part:
|
| 371 |
+
part = part[0].upper() + part[1:] if len(part) > 1 else part.upper()
|
| 372 |
+
fixed.append(part)
|
| 373 |
+
else:
|
| 374 |
+
fixed.append(part)
|
| 375 |
+
response = ''.join(fixed)
|
| 376 |
+
|
| 377 |
+
response = response.strip()
|
| 378 |
+
if not response or len(response) < 5:
|
| 379 |
+
response = "Lo siento, no pude generar una respuesta adecuada. ¿Podrías reformular tu pregunta?"
|
| 380 |
|
| 381 |
return response
|
| 382 |
|