Spaces:

ecotecds
/

Chatbot-RAG-v4

Running

NoeMartinezSanchez

Se ajustaron nuevamente los parametros por que la respuesta era muy corta

65fb599 2 months ago

5.67 kB

	"""RAG Generator using TinyLlama.

	This module replaces the previous BERT-based generator with TinyLlama
	for improved response quality in the RAG architecture.
	"""
	import logging
	import time
	from typing import List, Optional

	from loguru import logger

	from models.tinyllama_wrapper import TinyLlamaWrapper


	class TinyLlamaGenerator:
	"""Generator using TinyLlama for RAG-based responses.

	This class wraps TinyLlamaWrapper to provide a simple interface
	for generating responses with or without context.
	"""

	def __init__(self, use_quantization: bool = False, cache_dir: str = "models/cache"):
	"""Initialize the TinyLlama generator.

	Args:
	use_quantization: Whether to use 4-bit quantization.
	cache_dir: Directory to cache model files.
	"""
	logger.info("Initializing TinyLlamaGenerator...")
	start_time = time.time()

	try:
	self.wrapper = TinyLlamaWrapper(
	use_quantization=use_quantization,
	cache_dir=cache_dir,
	)
	load_time = time.time() - start_time
	logger.success(f"TinyLlamaGenerator initialized in {load_time:.1f}s")

	except Exception as e:
	logger.error(f"Failed to initialize TinyLlamaGenerator: {e}")
	raise RuntimeError(f"Generator initialization failed: {e}") from e

	def generate(
	self,
	query: str,
	context: str = "",
	max_length: int = 256,
	) -> str:
	"""Generate a response for the given query.

	Args:
	query: User question/query.
	context: Retrieved context from RAG system (optional).
	max_length: Maximum tokens to generate.

	Returns:
	Generated response string.
	"""
	start_time = time.time()

	try:
	logger.info(f"Generating response for query (length: {len(query)})")

	if not context or context.strip() == "":
	logger.info("No context provided, using direct generation")
	response = self.wrapper.generate(
	prompt=query,
	max_new_tokens=max_length,
	temperature=0.2,
	top_p=0.9,
	)
	else:
	logger.info(f"Using RAG with context (length: {len(context)})")
	response = self.generate_with_context(
	context=context,
	question=query,
	max_new_tokens=max_length,
	)

	elapsed = time.time() - start_time
	logger.info(f"Response generated in {elapsed:.2f}s")

	return response

	except Exception as e:
	logger.error(f"Error generating response: {e}")
	return "Lo siento, tuve un problema al generar la respuesta. Por favor, intenta de nuevo."

	def generate_with_context(
	self,
	context: str,
	question: str,
	max_new_tokens: int = 200,
	) -> str:
	"""Genera respuesta basada en contexto para Prepa en Línea SEP."""
	import re

	lines = context.split('\n')
	clean_lines = []
	for line in lines:
	if re.match(r'^\[.*?\]$', line):
	continue
	if re.match(r'^#{2,}', line):
	continue
	if re.match(r'^📄', line):
	continue
	if re.match(r'^Fila:', line):
	continue
	if re.match(r'^Hoja:', line):
	continue
	if line.strip() and len(line.strip()) > 10:
	clean_lines.append(line.strip())

	clean_context = ' '.join(clean_lines)

	if len(clean_context) > 1500:
	clean_context = clean_context[:1500] + "..."

	if not clean_context or len(clean_context) < 50:
	return "Lo siento, no encontré información específica sobre eso en los materiales de Prepa en Línea SEP."

	prompt = f"""Eres un asesor académico de Prepa en Línea SEP. Responde solo usando esta información del contexto.

	Contexto: {clean_context}

	Pregunta: {question}

	Respuesta directa y completa:"""

	logger.info(f"RAG generation - Context: {len(clean_context)} chars, Question: {question[:50]}...")

	try:
	return self.wrapper.generate(
	prompt=prompt,
	max_new_tokens=150,
	temperature=0.1,
	top_p=0.7,
	min_new_tokens=40,
	)
	except Exception as e:
	logger.error(f"Error in generate_with_context: {e}")
	return "Lo siento, no encontré información específica sobre eso en los materiales de Prepa en Línea SEP."

	def generate_fallback(self, query: str) -> str:
	"""Generate a fallback response when no relevant information is found.

	Args:
	query: The user's query.

	Returns:
	Fallback response string.
	"""
	fallback_responses = [
	f"No encontré información específica sobre '{query}' en los materiales disponibles.",
	f"Esa pregunta está fuera del alcance de mi conocimiento actual. ¿Hay algo más en lo que pueda ayudarte?",
	"No tengo información suficiente para responder eso. ¿Podrías reformular tu pregunta?",
	]
	import random
	return random.choice(fallback_responses)


	class ResponseGenerator(TinyLlamaGenerator):
	"""Backward compatibility wrapper.

	This class maintains compatibility with existing code that uses
	ResponseGenerator while internally using TinyLlama.
	"""

	pass