Chatbot-RAG-v4 / rag /generator.py
NoeMartinezSanchez
Se ajustaron nuevamente los parametros por que la respuesta era muy corta
65fb599
"""RAG Generator using TinyLlama.
This module replaces the previous BERT-based generator with TinyLlama
for improved response quality in the RAG architecture.
"""
import logging
import time
from typing import List, Optional
from loguru import logger
from models.tinyllama_wrapper import TinyLlamaWrapper
class TinyLlamaGenerator:
"""Generator using TinyLlama for RAG-based responses.
This class wraps TinyLlamaWrapper to provide a simple interface
for generating responses with or without context.
"""
def __init__(self, use_quantization: bool = False, cache_dir: str = "models/cache"):
"""Initialize the TinyLlama generator.
Args:
use_quantization: Whether to use 4-bit quantization.
cache_dir: Directory to cache model files.
"""
logger.info("Initializing TinyLlamaGenerator...")
start_time = time.time()
try:
self.wrapper = TinyLlamaWrapper(
use_quantization=use_quantization,
cache_dir=cache_dir,
)
load_time = time.time() - start_time
logger.success(f"TinyLlamaGenerator initialized in {load_time:.1f}s")
except Exception as e:
logger.error(f"Failed to initialize TinyLlamaGenerator: {e}")
raise RuntimeError(f"Generator initialization failed: {e}") from e
def generate(
self,
query: str,
context: str = "",
max_length: int = 256,
) -> str:
"""Generate a response for the given query.
Args:
query: User question/query.
context: Retrieved context from RAG system (optional).
max_length: Maximum tokens to generate.
Returns:
Generated response string.
"""
start_time = time.time()
try:
logger.info(f"Generating response for query (length: {len(query)})")
if not context or context.strip() == "":
logger.info("No context provided, using direct generation")
response = self.wrapper.generate(
prompt=query,
max_new_tokens=max_length,
temperature=0.2,
top_p=0.9,
)
else:
logger.info(f"Using RAG with context (length: {len(context)})")
response = self.generate_with_context(
context=context,
question=query,
max_new_tokens=max_length,
)
elapsed = time.time() - start_time
logger.info(f"Response generated in {elapsed:.2f}s")
return response
except Exception as e:
logger.error(f"Error generating response: {e}")
return "Lo siento, tuve un problema al generar la respuesta. Por favor, intenta de nuevo."
def generate_with_context(
self,
context: str,
question: str,
max_new_tokens: int = 200,
) -> str:
"""Genera respuesta basada en contexto para Prepa en Línea SEP."""
import re
lines = context.split('\n')
clean_lines = []
for line in lines:
if re.match(r'^\[.*?\]$', line):
continue
if re.match(r'^#{2,}', line):
continue
if re.match(r'^📄', line):
continue
if re.match(r'^Fila:', line):
continue
if re.match(r'^Hoja:', line):
continue
if line.strip() and len(line.strip()) > 10:
clean_lines.append(line.strip())
clean_context = ' '.join(clean_lines)
if len(clean_context) > 1500:
clean_context = clean_context[:1500] + "..."
if not clean_context or len(clean_context) < 50:
return "Lo siento, no encontré información específica sobre eso en los materiales de Prepa en Línea SEP."
prompt = f"""Eres un asesor académico de Prepa en Línea SEP. Responde solo usando esta información del contexto.
Contexto: {clean_context}
Pregunta: {question}
Respuesta directa y completa:"""
logger.info(f"RAG generation - Context: {len(clean_context)} chars, Question: {question[:50]}...")
try:
return self.wrapper.generate(
prompt=prompt,
max_new_tokens=150,
temperature=0.1,
top_p=0.7,
min_new_tokens=40,
)
except Exception as e:
logger.error(f"Error in generate_with_context: {e}")
return "Lo siento, no encontré información específica sobre eso en los materiales de Prepa en Línea SEP."
def generate_fallback(self, query: str) -> str:
"""Generate a fallback response when no relevant information is found.
Args:
query: The user's query.
Returns:
Fallback response string.
"""
fallback_responses = [
f"No encontré información específica sobre '{query}' en los materiales disponibles.",
f"Esa pregunta está fuera del alcance de mi conocimiento actual. ¿Hay algo más en lo que pueda ayudarte?",
"No tengo información suficiente para responder eso. ¿Podrías reformular tu pregunta?",
]
import random
return random.choice(fallback_responses)
class ResponseGenerator(TinyLlamaGenerator):
"""Backward compatibility wrapper.
This class maintains compatibility with existing code that uses
ResponseGenerator while internally using TinyLlama.
"""
pass