Spaces:

JairoDanielMT
/

edullm

Paused

File size: 9,494 Bytes

# core/integrations/telegram_bot.py
import os
import re
import tempfile
import time

import fitz  # PyMuPDF
from docx import Document
from dotenv import load_dotenv
from telegram import InlineKeyboardButton, InlineKeyboardMarkup, InputFile, Update
from telegram.ext import (
    ApplicationBuilder,
    CallbackQueryHandler,
    CommandHandler,
    ContextTypes,
    MessageHandler,
    filters,
)

from core.integrations.doc_converter import gestionar_descarga, procesar_markdown
from core.logging.usage_logger import registrar_uso
from core.pipeline.edullm_rag_pipeline import edullm_rag_pipeline

# ==== CONFIGURACIÓN GENERAL ====
load_dotenv(dotenv_path="config/.env")
TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
DOCX_FILENAME = "material_educativo.docx"
FORMAT_WARNING_IMAGE = "assets/formatos_soportados.png"

if not TELEGRAM_TOKEN:
    raise ValueError("❌ TELEGRAM_TOKEN no está definido en las variables de entorno.")


# ==== FUNCIONES AUXILIARES ====
def extract_text_from_pdf(file_path):
    text = ""
    with fitz.open(file_path) as pdf:
        for page in pdf:
            text += page.get_text()
    return text.strip()


def extract_text_from_docx(file_path):
    doc = Document(file_path)
    return "\n".join(para.text for para in doc.paragraphs if para.text.strip())


def extract_text_from_txt(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        return f.read().strip()


def escape_markdown(text: str) -> str:
    """
    Escapa caracteres especiales para MarkdownV2 de Telegram.
    """
    escape_chars = r"_*[]()~`>#+-=|{}.!"
    return re.sub(f"([{re.escape(escape_chars)}])", r"\\\1", text)


def detectar_tipo_entrada(user_input) -> str:
    if isinstance(user_input, str):
        return "Texto"
    elif isinstance(user_input, bytes):
        return "Imagen"
    else:
        return "Otro"


# ==== COMANDO /start ====
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await update.message.reply_text(
        "👋 *¡Bienvenido a EduLLM Bot!*\n\n"
        "📌 *Formatos aceptados:* Texto, Imagen, PDF, DOCX o TXT.\n"
        "📄 *Formato que genero:* Material educativo listo para descargar en DOCX.\n\n"
        "✅ *¿Qué puedo generar?*\n"
        "Materiales educativos alineados al *CNEB, MBDD y MINEDU – Perú*, como:\n\n"
        "1️⃣ *Ficha*\n"
        "- Incluye: Metadatos, Resumen, Desarrollo, Preguntas DECO, Conclusión, Recomendación, Instrumento (opcional, debes indicar si quieres instrumentos de evaluación).\n\n"
        "2️⃣ *Resumen temático*\n"
        "- Incluye: Metadatos, Ideas clave (mínimo 3), Desarrollo, Conclusión.\n\n"
        "3️⃣ *Banco de preguntas*\n"
        "- Incluye: Metadatos, 10+ Preguntas DECO, Claves o respuestas (opcional, debes indicar que quieres respuestas).\n\n"
        "4️⃣ *Rúbrica o Lista de cotejo*\n"
        "- Incluye: Metadatos, Criterios, Niveles, Descriptores.\n\n"
        "🎯 *¿Qué necesito de ti?*\n"
        "Indícame: *área curricular*, *grado*, *bimestre*, *competencia*, *capacidad* y *desempeño esperado*.\n\n"
        "📌 *Ejemplo:*\n"
        "`Quiero 10 preguntas sobre los animales vertebrados para 4.º primaria (Ciencia y Tecnología, bim 1) con sus respectivas respuestas.`",
        parse_mode="Markdown",
    )



# ==== MANEJO DE MENSAJES ====
async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
    user_input = ""

    try:
        if update.message.text:
            user_input = update.message.text

        elif update.message.photo:
            photo = update.message.photo[-1]
            file = await photo.get_file()
            with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_img:
                await file.download_to_drive(temp_img.name)
                with open(temp_img.name, "rb") as img_file:
                    user_input = img_file.read()

        elif update.message.document:
            file = await update.message.document.get_file()
            ext = update.message.document.file_name.split(".")[-1].lower()

            with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_doc:
                await file.download_to_drive(tmp_doc.name)

                if ext == "pdf":
                    extracted_text = extract_text_from_pdf(tmp_doc.name)
                elif ext == "docx":
                    extracted_text = extract_text_from_docx(tmp_doc.name)
                elif ext == "txt":
                    extracted_text = extract_text_from_txt(tmp_doc.name)
                else:
                    await enviar_mensaje_formato_no_soportado(update)
                    return

                mensaje_texto = update.message.caption or ""
                user_input = f"{mensaje_texto}\n\n{extracted_text}".strip()

        elif update.message.audio or update.message.voice or update.message.video:
            await update.message.reply_text(
                "🎙️🎥 *Audios y videos no son compatibles.* Solo acepto texto, imágenes o documentos (PDF, DOCX, TXT).",
                parse_mode="Markdown",
            )
            return

        elif update.message.sticker:
            await update.message.reply_text(
                "🟢 Gracias por el sticker, pero necesito texto, imagen o documento educativo."
            )
            return

        elif update.message.location:
            await update.message.reply_text(
                "📍 He recibido tu ubicación, pero solo trabajo con contenido educativo."
            )
            return

        elif update.message.contact:
            await update.message.reply_text(
                "📞 Recibí un contacto, pero por favor envíame contenido académico (texto, imagen o documento)."
            )
            return

        elif update.message.animation:
            await update.message.reply_text(
                "🎞️ Los GIFs no son compatibles. Por favor envía texto, imagen o documentos."
            )
            return

        else:
            await enviar_mensaje_formato_no_soportado(update)
            return

    finally:
        for temp_var in ["temp_img", "tmp_doc"]:
            if temp_var in locals() and os.path.exists(locals()[temp_var].name):
                os.remove(locals()[temp_var].name)

    if not user_input:
        await update.message.reply_text("⚠️ No se pudo obtener contenido válido.")
        return

    await update.message.reply_text("⏳ Generando tu material educativo...")
    start_time = time.time()
    try:
        resultado_md = edullm_rag_pipeline(user_input)
        exito = True
    except Exception as e:
        resultado_md = f"❌ Error: {str(e)}"
        exito = False
    duracion = time.time() - start_time
    registrar_uso(
        user_id=update.effective_user.id,
        username=update.effective_user.username,
        tipo_entrada=detectar_tipo_entrada(user_input),
        duracion_segundos=duracion,
        exito=exito,
    )
    context.user_data["ultimo_markdown"] = resultado_md

    preview = resultado_md[:1000] + ("\n..." if len(resultado_md) > 1000 else "")
    preview_safe = escape_markdown(preview)
    await update.message.reply_text(
        f"✅ *Material generado*:\n\n```\n{preview_safe}\n```", parse_mode="MarkdownV2"
    )

    botones = [[InlineKeyboardButton("📄 Descargar DOCX", callback_data="descargar_docx")]]
    await update.message.reply_text(
        "¿Deseas descargar el material?", reply_markup=InlineKeyboardMarkup(botones)
    )


# ==== MENSAJE DE FORMATO NO SOPORTADO ====
async def enviar_mensaje_formato_no_soportado(update: Update):
    await update.message.reply_photo(
        photo=InputFile(FORMAT_WARNING_IMAGE),
        caption="⚠️ *Formato no soportado.*\n\nAcepto:\n- Texto\n- Imagen\n- PDF (.pdf)\n- Word (.docx)\n- Texto plano (.txt)",
        parse_mode=None,
    )


# ==== CALLBACK BOTONES ====
async def button_handler(update: Update, context: ContextTypes.DEFAULT_TYPE):
    query = update.callback_query
    await query.answer()

    if query.data == "descargar_docx":
        markdown_content = context.user_data.get("ultimo_markdown")
        if not markdown_content:
            await query.edit_message_text("⚠️ No hay material disponible para convertir.")
            return

        resultado = procesar_markdown(markdown_content)
        if "error" in resultado:
            await query.edit_message_text("❌ Error al generar el archivo DOCX.")
            return

        file_id = resultado["file_id"]
        file_response = gestionar_descarga(file_id)

        if isinstance(file_response, dict):
            await query.edit_message_text(f"⚠️ {file_response.get('error')}")
        else:
            await query.edit_message_text("📥 Aquí tienes tu archivo DOCX:")
            await context.bot.send_document(
                chat_id=query.message.chat_id,
                document=file_response.path,
                filename=DOCX_FILENAME,
            )


# ==== INICIAR BOT ====
async def start_bot():
    app = ApplicationBuilder().token(TELEGRAM_TOKEN).build()
    
    app.add_handler(CommandHandler("start", start))
    app.add_handler(MessageHandler(filters.ALL, handle_message))
    app.add_handler(CallbackQueryHandler(button_handler))

    print("🤖 EduLLM Bot en ejecución...")

    # 🔁 Esta secuencia evita que se cierre el event loop
    await app.initialize()
    await app.start()
    await app.updater.start_polling()