"""RAG Generator using TinyLlama. This module replaces the previous BERT-based generator with TinyLlama for improved response quality in the RAG architecture. """ import logging import time from typing import List, Optional from loguru import logger from models.tinyllama_wrapper import TinyLlamaWrapper class TinyLlamaGenerator: """Generator using TinyLlama for RAG-based responses. This class wraps TinyLlamaWrapper to provide a simple interface for generating responses with or without context. """ def __init__(self, use_quantization: bool = False, cache_dir: str = "models/cache"): """Initialize the TinyLlama generator. Args: use_quantization: Whether to use 4-bit quantization. cache_dir: Directory to cache model files. """ logger.info("Initializing TinyLlamaGenerator...") start_time = time.time() try: self.wrapper = TinyLlamaWrapper( use_quantization=use_quantization, cache_dir=cache_dir, ) load_time = time.time() - start_time logger.success(f"TinyLlamaGenerator initialized in {load_time:.1f}s") except Exception as e: logger.error(f"Failed to initialize TinyLlamaGenerator: {e}") raise RuntimeError(f"Generator initialization failed: {e}") from e def generate( self, query: str, context: str = "", max_length: int = 256, ) -> str: """Generate a response for the given query. Args: query: User question/query. context: Retrieved context from RAG system (optional). max_length: Maximum tokens to generate. Returns: Generated response string. """ start_time = time.time() try: logger.info(f"Generating response for query (length: {len(query)})") if not context or context.strip() == "": logger.info("No context provided, using direct generation") response = self.wrapper.generate( prompt=query, max_new_tokens=max_length, temperature=0.2, top_p=0.9, ) else: logger.info(f"Using RAG with context (length: {len(context)})") response = self.generate_with_context( context=context, question=query, max_new_tokens=max_length, ) elapsed = time.time() - start_time logger.info(f"Response generated in {elapsed:.2f}s") return response except Exception as e: logger.error(f"Error generating response: {e}") return "Lo siento, tuve un problema al generar la respuesta. Por favor, intenta de nuevo." def generate_with_context( self, context: str, question: str, max_new_tokens: int = 200, ) -> str: """Genera respuesta basada en contexto para Prepa en Línea SEP.""" import re lines = context.split('\n') clean_lines = [] for line in lines: if re.match(r'^\[.*?\]$', line): continue if re.match(r'^#{2,}', line): continue if re.match(r'^📄', line): continue if re.match(r'^Fila:', line): continue if re.match(r'^Hoja:', line): continue if line.strip() and len(line.strip()) > 10: clean_lines.append(line.strip()) clean_context = ' '.join(clean_lines) if len(clean_context) > 1500: clean_context = clean_context[:1500] + "..." if not clean_context or len(clean_context) < 50: return "Lo siento, no encontré información específica sobre eso en los materiales de Prepa en Línea SEP." prompt = f"""Eres un asesor académico de Prepa en Línea SEP. Responde solo usando esta información del contexto. Contexto: {clean_context} Pregunta: {question} Respuesta directa y completa:""" logger.info(f"RAG generation - Context: {len(clean_context)} chars, Question: {question[:50]}...") try: return self.wrapper.generate( prompt=prompt, max_new_tokens=150, temperature=0.1, top_p=0.7, min_new_tokens=40, ) except Exception as e: logger.error(f"Error in generate_with_context: {e}") return "Lo siento, no encontré información específica sobre eso en los materiales de Prepa en Línea SEP." def generate_fallback(self, query: str) -> str: """Generate a fallback response when no relevant information is found. Args: query: The user's query. Returns: Fallback response string. """ fallback_responses = [ f"No encontré información específica sobre '{query}' en los materiales disponibles.", f"Esa pregunta está fuera del alcance de mi conocimiento actual. ¿Hay algo más en lo que pueda ayudarte?", "No tengo información suficiente para responder eso. ¿Podrías reformular tu pregunta?", ] import random return random.choice(fallback_responses) class ResponseGenerator(TinyLlamaGenerator): """Backward compatibility wrapper. This class maintains compatibility with existing code that uses ResponseGenerator while internally using TinyLlama. """ pass