Spaces:

JairoDanielMT
/

edullm

Paused

App Files Files Community

edullm / core /integrations /telegram_bot.py

JairoDanielMT

Update core/integrations/telegram_bot.py

2cf4a9e verified 7 months ago

raw

history blame contribute delete

9.49 kB

	# core/integrations/telegram_bot.py
	import os
	import re
	import tempfile
	import time

	import fitz # PyMuPDF
	from docx import Document
	from dotenv import load_dotenv
	from telegram import InlineKeyboardButton, InlineKeyboardMarkup, InputFile, Update
	from telegram.ext import (
	ApplicationBuilder,
	CallbackQueryHandler,
	CommandHandler,
	ContextTypes,
	MessageHandler,
	filters,
	)

	from core.integrations.doc_converter import gestionar_descarga, procesar_markdown
	from core.logging.usage_logger import registrar_uso
	from core.pipeline.edullm_rag_pipeline import edullm_rag_pipeline

	# ==== CONFIGURACIÓN GENERAL ====
	load_dotenv(dotenv_path="config/.env")
	TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
	DOCX_FILENAME = "material_educativo.docx"
	FORMAT_WARNING_IMAGE = "assets/formatos_soportados.png"

	if not TELEGRAM_TOKEN:
	raise ValueError("❌ TELEGRAM_TOKEN no está definido en las variables de entorno.")


	# ==== FUNCIONES AUXILIARES ====
	def extract_text_from_pdf(file_path):
	text = ""
	with fitz.open(file_path) as pdf:
	for page in pdf:
	text += page.get_text()
	return text.strip()


	def extract_text_from_docx(file_path):
	doc = Document(file_path)
	return "\n".join(para.text for para in doc.paragraphs if para.text.strip())


	def extract_text_from_txt(file_path):
	with open(file_path, "r", encoding="utf-8") as f:
	return f.read().strip()


	def escape_markdown(text: str) -> str:
	"""
	Escapa caracteres especiales para MarkdownV2 de Telegram.
	"""
	escape_chars = r"_*[]()~`>#+-=\|{}.!"
	return re.sub(f"([{re.escape(escape_chars)}])", r"\\\1", text)


	def detectar_tipo_entrada(user_input) -> str:
	if isinstance(user_input, str):
	return "Texto"
	elif isinstance(user_input, bytes):
	return "Imagen"
	else:
	return "Otro"


	# ==== COMANDO /start ====
	async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
	await update.message.reply_text(
	"👋 ¡Bienvenido a EduLLM Bot!\n\n"
	"📌 Formatos aceptados: Texto, Imagen, PDF, DOCX o TXT.\n"
	"📄 Formato que genero: Material educativo listo para descargar en DOCX.\n\n"
	"✅ ¿Qué puedo generar?\n"
	"Materiales educativos alineados al CNEB, MBDD y MINEDU – Perú, como:\n\n"
	"1️⃣ Ficha\n"
	"- Incluye: Metadatos, Resumen, Desarrollo, Preguntas DECO, Conclusión, Recomendación, Instrumento (opcional, debes indicar si quieres instrumentos de evaluación).\n\n"
	"2️⃣ Resumen temático\n"
	"- Incluye: Metadatos, Ideas clave (mínimo 3), Desarrollo, Conclusión.\n\n"
	"3️⃣ Banco de preguntas\n"
	"- Incluye: Metadatos, 10+ Preguntas DECO, Claves o respuestas (opcional, debes indicar que quieres respuestas).\n\n"
	"4️⃣ Rúbrica o Lista de cotejo\n"
	"- Incluye: Metadatos, Criterios, Niveles, Descriptores.\n\n"
	"🎯 ¿Qué necesito de ti?\n"
	"Indícame: área curricular, grado, bimestre, competencia, capacidad y desempeño esperado.\n\n"
	"📌 Ejemplo:\n"
	"`Quiero 10 preguntas sobre los animales vertebrados para 4.º primaria (Ciencia y Tecnología, bim 1) con sus respectivas respuestas.`",
	parse_mode="Markdown",
	)



	# ==== MANEJO DE MENSAJES ====
	async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
	user_input = ""

	try:
	if update.message.text:
	user_input = update.message.text

	elif update.message.photo:
	photo = update.message.photo[-1]
	file = await photo.get_file()
	with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_img:
	await file.download_to_drive(temp_img.name)
	with open(temp_img.name, "rb") as img_file:
	user_input = img_file.read()

	elif update.message.document:
	file = await update.message.document.get_file()
	ext = update.message.document.file_name.split(".")[-1].lower()

	with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_doc:
	await file.download_to_drive(tmp_doc.name)

	if ext == "pdf":
	extracted_text = extract_text_from_pdf(tmp_doc.name)
	elif ext == "docx":
	extracted_text = extract_text_from_docx(tmp_doc.name)
	elif ext == "txt":
	extracted_text = extract_text_from_txt(tmp_doc.name)
	else:
	await enviar_mensaje_formato_no_soportado(update)
	return

	mensaje_texto = update.message.caption or ""
	user_input = f"{mensaje_texto}\n\n{extracted_text}".strip()

	elif update.message.audio or update.message.voice or update.message.video:
	await update.message.reply_text(
	"🎙️🎥 Audios y videos no son compatibles. Solo acepto texto, imágenes o documentos (PDF, DOCX, TXT).",
	parse_mode="Markdown",
	)
	return

	elif update.message.sticker:
	await update.message.reply_text(
	"🟢 Gracias por el sticker, pero necesito texto, imagen o documento educativo."
	)
	return

	elif update.message.location:
	await update.message.reply_text(
	"📍 He recibido tu ubicación, pero solo trabajo con contenido educativo."
	)
	return

	elif update.message.contact:
	await update.message.reply_text(
	"📞 Recibí un contacto, pero por favor envíame contenido académico (texto, imagen o documento)."
	)
	return

	elif update.message.animation:
	await update.message.reply_text(
	"🎞️ Los GIFs no son compatibles. Por favor envía texto, imagen o documentos."
	)
	return

	else:
	await enviar_mensaje_formato_no_soportado(update)
	return

	finally:
	for temp_var in ["temp_img", "tmp_doc"]:
	if temp_var in locals() and os.path.exists(locals()[temp_var].name):
	os.remove(locals()[temp_var].name)

	if not user_input:
	await update.message.reply_text("⚠️ No se pudo obtener contenido válido.")
	return

	await update.message.reply_text("⏳ Generando tu material educativo...")
	start_time = time.time()
	try:
	resultado_md = edullm_rag_pipeline(user_input)
	exito = True
	except Exception as e:
	resultado_md = f"❌ Error: {str(e)}"
	exito = False
	duracion = time.time() - start_time
	registrar_uso(
	user_id=update.effective_user.id,
	username=update.effective_user.username,
	tipo_entrada=detectar_tipo_entrada(user_input),
	duracion_segundos=duracion,
	exito=exito,
	)
	context.user_data["ultimo_markdown"] = resultado_md

	preview = resultado_md[:1000] + ("\n..." if len(resultado_md) > 1000 else "")
	preview_safe = escape_markdown(preview)
	await update.message.reply_text(
	f"✅ Material generado:\n\n```\n{preview_safe}\n```", parse_mode="MarkdownV2"
	)

	botones = [[InlineKeyboardButton("📄 Descargar DOCX", callback_data="descargar_docx")]]
	await update.message.reply_text(
	"¿Deseas descargar el material?", reply_markup=InlineKeyboardMarkup(botones)
	)


	# ==== MENSAJE DE FORMATO NO SOPORTADO ====
	async def enviar_mensaje_formato_no_soportado(update: Update):
	await update.message.reply_photo(
	photo=InputFile(FORMAT_WARNING_IMAGE),
	caption="⚠️ Formato no soportado.\n\nAcepto:\n- Texto\n- Imagen\n- PDF (.pdf)\n- Word (.docx)\n- Texto plano (.txt)",
	parse_mode=None,
	)


	# ==== CALLBACK BOTONES ====
	async def button_handler(update: Update, context: ContextTypes.DEFAULT_TYPE):
	query = update.callback_query
	await query.answer()

	if query.data == "descargar_docx":
	markdown_content = context.user_data.get("ultimo_markdown")
	if not markdown_content:
	await query.edit_message_text("⚠️ No hay material disponible para convertir.")
	return

	resultado = procesar_markdown(markdown_content)
	if "error" in resultado:
	await query.edit_message_text("❌ Error al generar el archivo DOCX.")
	return

	file_id = resultado["file_id"]
	file_response = gestionar_descarga(file_id)

	if isinstance(file_response, dict):
	await query.edit_message_text(f"⚠️ {file_response.get('error')}")
	else:
	await query.edit_message_text("📥 Aquí tienes tu archivo DOCX:")
	await context.bot.send_document(
	chat_id=query.message.chat_id,
	document=file_response.path,
	filename=DOCX_FILENAME,
	)


	# ==== INICIAR BOT ====
	async def start_bot():
	app = ApplicationBuilder().token(TELEGRAM_TOKEN).build()

	app.add_handler(CommandHandler("start", start))
	app.add_handler(MessageHandler(filters.ALL, handle_message))
	app.add_handler(CallbackQueryHandler(button_handler))

	print("🤖 EduLLM Bot en ejecución...")

	# 🔁 Esta secuencia evita que se cierre el event loop
	await app.initialize()
	await app.start()
	await app.updater.start_polling()