Spaces:
Running
Running
NoeMartinezSanchez
Se ajustaron nuevamente los parametros por que la respuesta era muy corta
65fb599 | """RAG Generator using TinyLlama. | |
| This module replaces the previous BERT-based generator with TinyLlama | |
| for improved response quality in the RAG architecture. | |
| """ | |
| import logging | |
| import time | |
| from typing import List, Optional | |
| from loguru import logger | |
| from models.tinyllama_wrapper import TinyLlamaWrapper | |
| class TinyLlamaGenerator: | |
| """Generator using TinyLlama for RAG-based responses. | |
| This class wraps TinyLlamaWrapper to provide a simple interface | |
| for generating responses with or without context. | |
| """ | |
| def __init__(self, use_quantization: bool = False, cache_dir: str = "models/cache"): | |
| """Initialize the TinyLlama generator. | |
| Args: | |
| use_quantization: Whether to use 4-bit quantization. | |
| cache_dir: Directory to cache model files. | |
| """ | |
| logger.info("Initializing TinyLlamaGenerator...") | |
| start_time = time.time() | |
| try: | |
| self.wrapper = TinyLlamaWrapper( | |
| use_quantization=use_quantization, | |
| cache_dir=cache_dir, | |
| ) | |
| load_time = time.time() - start_time | |
| logger.success(f"TinyLlamaGenerator initialized in {load_time:.1f}s") | |
| except Exception as e: | |
| logger.error(f"Failed to initialize TinyLlamaGenerator: {e}") | |
| raise RuntimeError(f"Generator initialization failed: {e}") from e | |
| def generate( | |
| self, | |
| query: str, | |
| context: str = "", | |
| max_length: int = 256, | |
| ) -> str: | |
| """Generate a response for the given query. | |
| Args: | |
| query: User question/query. | |
| context: Retrieved context from RAG system (optional). | |
| max_length: Maximum tokens to generate. | |
| Returns: | |
| Generated response string. | |
| """ | |
| start_time = time.time() | |
| try: | |
| logger.info(f"Generating response for query (length: {len(query)})") | |
| if not context or context.strip() == "": | |
| logger.info("No context provided, using direct generation") | |
| response = self.wrapper.generate( | |
| prompt=query, | |
| max_new_tokens=max_length, | |
| temperature=0.2, | |
| top_p=0.9, | |
| ) | |
| else: | |
| logger.info(f"Using RAG with context (length: {len(context)})") | |
| response = self.generate_with_context( | |
| context=context, | |
| question=query, | |
| max_new_tokens=max_length, | |
| ) | |
| elapsed = time.time() - start_time | |
| logger.info(f"Response generated in {elapsed:.2f}s") | |
| return response | |
| except Exception as e: | |
| logger.error(f"Error generating response: {e}") | |
| return "Lo siento, tuve un problema al generar la respuesta. Por favor, intenta de nuevo." | |
| def generate_with_context( | |
| self, | |
| context: str, | |
| question: str, | |
| max_new_tokens: int = 200, | |
| ) -> str: | |
| """Genera respuesta basada en contexto para Prepa en Línea SEP.""" | |
| import re | |
| lines = context.split('\n') | |
| clean_lines = [] | |
| for line in lines: | |
| if re.match(r'^\[.*?\]$', line): | |
| continue | |
| if re.match(r'^#{2,}', line): | |
| continue | |
| if re.match(r'^📄', line): | |
| continue | |
| if re.match(r'^Fila:', line): | |
| continue | |
| if re.match(r'^Hoja:', line): | |
| continue | |
| if line.strip() and len(line.strip()) > 10: | |
| clean_lines.append(line.strip()) | |
| clean_context = ' '.join(clean_lines) | |
| if len(clean_context) > 1500: | |
| clean_context = clean_context[:1500] + "..." | |
| if not clean_context or len(clean_context) < 50: | |
| return "Lo siento, no encontré información específica sobre eso en los materiales de Prepa en Línea SEP." | |
| prompt = f"""Eres un asesor académico de Prepa en Línea SEP. Responde solo usando esta información del contexto. | |
| Contexto: {clean_context} | |
| Pregunta: {question} | |
| Respuesta directa y completa:""" | |
| logger.info(f"RAG generation - Context: {len(clean_context)} chars, Question: {question[:50]}...") | |
| try: | |
| return self.wrapper.generate( | |
| prompt=prompt, | |
| max_new_tokens=150, | |
| temperature=0.1, | |
| top_p=0.7, | |
| min_new_tokens=40, | |
| ) | |
| except Exception as e: | |
| logger.error(f"Error in generate_with_context: {e}") | |
| return "Lo siento, no encontré información específica sobre eso en los materiales de Prepa en Línea SEP." | |
| def generate_fallback(self, query: str) -> str: | |
| """Generate a fallback response when no relevant information is found. | |
| Args: | |
| query: The user's query. | |
| Returns: | |
| Fallback response string. | |
| """ | |
| fallback_responses = [ | |
| f"No encontré información específica sobre '{query}' en los materiales disponibles.", | |
| f"Esa pregunta está fuera del alcance de mi conocimiento actual. ¿Hay algo más en lo que pueda ayudarte?", | |
| "No tengo información suficiente para responder eso. ¿Podrías reformular tu pregunta?", | |
| ] | |
| import random | |
| return random.choice(fallback_responses) | |
| class ResponseGenerator(TinyLlamaGenerator): | |
| """Backward compatibility wrapper. | |
| This class maintains compatibility with existing code that uses | |
| ResponseGenerator while internally using TinyLlama. | |
| """ | |
| pass |