Spaces:
Sleeping
Sleeping
| # ============================================================================= | |
| # LibreTranslate ProDoc — app.py | |
| # Desarrollado para Hugging Face Spaces (SDK: Streamlit) | |
| # Motor de traducción: Helsinki-NLP OPUS-MT (HuggingFace Transformers) | |
| # Autor: AdVision AI | Versión: 2.0.0 | |
| # ============================================================================= | |
| # DESCRIPCIÓN GENERAL: | |
| # Aplicación que traduce archivos PDF y DOCX manteniendo el diseño original. | |
| # Utiliza los modelos Helsinki-NLP/opus-mt de HuggingFace para funcionar | |
| # con alta calidad, cubriendo los 11 idiomas más relevantes del mundo. | |
| # ============================================================================= | |
| import streamlit as st | |
| import os | |
| import sys | |
| import time | |
| import threading | |
| import tempfile | |
| import shutil | |
| import logging | |
| from pathlib import Path | |
| from datetime import datetime | |
| # ─── Configuración de logging ────────────────────────────────────────────────── | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(message)s" | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # ─── CONFIGURACIÓN DE PÁGINA (DEBE IR PRIMERO) ───────────────────────────────── | |
| st.set_page_config( | |
| page_title="LibreTranslate ProDoc", | |
| page_icon="🌐", | |
| layout="wide", | |
| initial_sidebar_state="expanded", | |
| ) | |
| # ============================================================================= | |
| # SECCIÓN 1: INYECCIÓN DE CSS PERSONALIZADO | |
| # Paleta de colores extraída del logotipo AdVision AI: | |
| # - Fondo principal: #0a0a0f (negro profundo) | |
| # - Cian eléctrico: #00e5ff | |
| # - Azul vibrante: #3d5afe | |
| # - Violeta: #9c27b0 | |
| # - Magenta: #e91e8c | |
| # ============================================================================= | |
| CUSTOM_CSS = """ | |
| <style> | |
| /* ── Importar fuentes de Google Fonts ─────────────────────────────────────── */ | |
| @import url('https://fonts.googleapis.com/css2?family=Exo+2:wght@300;400;600;700;900&family=Space+Mono:wght@400;700&display=swap'); | |
| /* ── Variables CSS globales ───────────────────────────────────────────────── */ | |
| :root { | |
| --bg-primary: #0a0a0f; | |
| --bg-secondary: #12121a; | |
| --bg-card: #1a1a2e; | |
| --cyan: #00e5ff; | |
| --blue: #3d5afe; | |
| --violet: #9c27b0; | |
| --magenta: #e91e8c; | |
| --white: #f0f0f5; | |
| --gray: #8888aa; | |
| --gradient-main: linear-gradient(135deg, #00e5ff 0%, #3d5afe 35%, #9c27b0 65%, #e91e8c 100%); | |
| --gradient-glow: linear-gradient(135deg, rgba(0,229,255,0.15) 0%, rgba(233,30,140,0.15) 100%); | |
| --radius-lg: 16px; | |
| --radius-md: 10px; | |
| --radius-sm: 6px; | |
| --shadow-glow: 0 0 30px rgba(0,229,255,0.12), 0 0 60px rgba(156,39,176,0.08); | |
| } | |
| /* ── Reset general de Streamlit ───────────────────────────────────────────── */ | |
| html, body, [data-testid="stAppViewContainer"] { | |
| background-color: var(--bg-primary) !important; | |
| font-family: 'Exo 2', sans-serif !important; | |
| color: var(--white) !important; | |
| } | |
| /* ── Fondo con patrón de cuadrícula sutil ─────────────────────────────────── */ | |
| [data-testid="stAppViewContainer"]::before { | |
| content: ""; | |
| position: fixed; | |
| top: 0; left: 0; right: 0; bottom: 0; | |
| background-image: | |
| linear-gradient(rgba(0,229,255,0.03) 1px, transparent 1px), | |
| linear-gradient(90deg, rgba(0,229,255,0.03) 1px, transparent 1px); | |
| background-size: 40px 40px; | |
| pointer-events: none; | |
| z-index: 0; | |
| } | |
| /* ── Sidebar ──────────────────────────────────────────────────────────────── */ | |
| [data-testid="stSidebar"] { | |
| background: var(--bg-secondary) !important; | |
| border-right: 1px solid rgba(0,229,255,0.15) !important; | |
| box-shadow: 4px 0 20px rgba(0,0,0,0.4) !important; | |
| } | |
| [data-testid="stSidebar"] * { | |
| color: var(--white) !important; | |
| } | |
| /* ── Título principal con degradado ──────────────────────────────────────── */ | |
| .app-title { | |
| font-family: 'Exo 2', sans-serif; | |
| font-weight: 900; | |
| font-size: 2.6rem; | |
| background: var(--gradient-main); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| text-align: center; | |
| letter-spacing: -0.5px; | |
| margin-bottom: 0.2rem; | |
| } | |
| .app-subtitle { | |
| font-family: 'Space Mono', monospace; | |
| font-size: 0.78rem; | |
| color: var(--cyan); | |
| text-align: center; | |
| letter-spacing: 3px; | |
| text-transform: uppercase; | |
| opacity: 0.7; | |
| margin-bottom: 1.5rem; | |
| } | |
| /* ── Contenedores / cards ─────────────────────────────────────────────────── */ | |
| .pro-card { | |
| background: var(--bg-card); | |
| border: 1px solid rgba(0,229,255,0.12); | |
| border-radius: var(--radius-lg); | |
| padding: 1.8rem; | |
| margin: 1rem 0; | |
| box-shadow: var(--shadow-glow); | |
| position: relative; | |
| overflow: hidden; | |
| transition: border-color 0.3s ease; | |
| } | |
| .pro-card::before { | |
| content: ""; | |
| position: absolute; | |
| top: 0; left: 0; right: 0; | |
| height: 2px; | |
| background: var(--gradient-main); | |
| border-radius: var(--radius-lg) var(--radius-lg) 0 0; | |
| } | |
| .pro-card:hover { | |
| border-color: rgba(0,229,255,0.3); | |
| } | |
| /* ── Separador con degradado ──────────────────────────────────────────────── */ | |
| .gradient-divider { | |
| height: 1px; | |
| background: var(--gradient-main); | |
| margin: 1.2rem 0; | |
| opacity: 0.4; | |
| border: none; | |
| } | |
| /* ── Etiquetas de sección ─────────────────────────────────────────────────── */ | |
| .section-label { | |
| font-family: 'Space Mono', monospace; | |
| font-size: 0.68rem; | |
| color: var(--cyan); | |
| letter-spacing: 2.5px; | |
| text-transform: uppercase; | |
| margin-bottom: 0.6rem; | |
| opacity: 0.85; | |
| } | |
| /* ── Badge de estado ──────────────────────────────────────────────────────── */ | |
| .status-badge { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 6px; | |
| padding: 4px 12px; | |
| border-radius: 20px; | |
| font-size: 0.75rem; | |
| font-weight: 600; | |
| font-family: 'Space Mono', monospace; | |
| } | |
| .status-badge.ready { | |
| background: rgba(0,229,255,0.12); | |
| border: 1px solid rgba(0,229,255,0.3); | |
| color: var(--cyan); | |
| } | |
| .status-badge.processing { | |
| background: rgba(156,39,176,0.15); | |
| border: 1px solid rgba(156,39,176,0.4); | |
| color: #ce93d8; | |
| } | |
| .status-badge.success { | |
| background: rgba(76,175,80,0.12); | |
| border: 1px solid rgba(76,175,80,0.35); | |
| color: #a5d6a7; | |
| } | |
| .status-badge.error { | |
| background: rgba(244,67,54,0.12); | |
| border: 1px solid rgba(244,67,54,0.35); | |
| color: #ef9a9a; | |
| } | |
| /* ── Botones de Streamlit ─────────────────────────────────────────────────── */ | |
| .stButton > button { | |
| background: var(--gradient-main) !important; | |
| color: #000 !important; | |
| font-family: 'Exo 2', sans-serif !important; | |
| font-weight: 700 !important; | |
| font-size: 0.95rem !important; | |
| border: none !important; | |
| border-radius: var(--radius-md) !important; | |
| padding: 0.65rem 2rem !important; | |
| letter-spacing: 0.5px !important; | |
| transition: all 0.3s ease !important; | |
| box-shadow: 0 4px 20px rgba(0,229,255,0.25) !important; | |
| } | |
| .stButton > button:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 30px rgba(0,229,255,0.4) !important; | |
| } | |
| .stButton > button:active { | |
| transform: translateY(0) !important; | |
| } | |
| /* ── Selectbox y otros widgets ────────────────────────────────────────────── */ | |
| .stSelectbox > div > div { | |
| background: var(--bg-card) !important; | |
| border: 1px solid rgba(0,229,255,0.2) !important; | |
| border-radius: var(--radius-sm) !important; | |
| color: var(--white) !important; | |
| } | |
| /* ── Progress bar ─────────────────────────────────────────────────────────── */ | |
| .stProgress > div > div > div { | |
| background: var(--gradient-main) !important; | |
| border-radius: 4px !important; | |
| } | |
| /* ── File uploader ────────────────────────────────────────────────────────── */ | |
| [data-testid="stFileUploader"] { | |
| background: rgba(0,229,255,0.03) !important; | |
| border: 2px dashed rgba(0,229,255,0.25) !important; | |
| border-radius: var(--radius-lg) !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| [data-testid="stFileUploader"]:hover { | |
| border-color: rgba(0,229,255,0.5) !important; | |
| background: rgba(0,229,255,0.06) !important; | |
| } | |
| /* ── Texto de info/warning/error ──────────────────────────────────────────── */ | |
| .stAlert { | |
| border-radius: var(--radius-md) !important; | |
| border: 1px solid rgba(0,229,255,0.15) !important; | |
| } | |
| /* ── Sidebar botón de PayPal ──────────────────────────────────────────────── */ | |
| .paypal-btn-container { | |
| text-align: center; | |
| margin: 1rem 0; | |
| } | |
| .paypal-btn-container a { | |
| display: inline-block; | |
| background: linear-gradient(135deg, #003087, #009cde, #012169); | |
| color: #fff !important; | |
| font-family: 'Exo 2', sans-serif; | |
| font-weight: 700; | |
| font-size: 0.85rem; | |
| padding: 10px 20px; | |
| border-radius: 25px; | |
| text-decoration: none !important; | |
| letter-spacing: 0.5px; | |
| box-shadow: 0 4px 15px rgba(0,156,222,0.4); | |
| transition: all 0.3s ease; | |
| } | |
| .paypal-btn-container a:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 8px 25px rgba(0,156,222,0.6); | |
| } | |
| /* ── Botón de WhatsApp ────────────────────────────────────────────────────── */ | |
| .whatsapp-btn { | |
| text-align: center; | |
| margin: 0.5rem 0; | |
| } | |
| .whatsapp-btn a { | |
| display: inline-block; | |
| background: linear-gradient(135deg, #25d366, #128c7e); | |
| color: #fff !important; | |
| font-family: 'Exo 2', sans-serif; | |
| font-weight: 700; | |
| font-size: 0.82rem; | |
| padding: 9px 18px; | |
| border-radius: 25px; | |
| text-decoration: none !important; | |
| box-shadow: 0 4px 15px rgba(37,211,102,0.3); | |
| transition: all 0.3s ease; | |
| } | |
| .whatsapp-btn a:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 8px 25px rgba(37,211,102,0.5); | |
| } | |
| /* ── Sección de donaciones en sidebar ────────────────────────────────────── */ | |
| .donation-section { | |
| background: linear-gradient(135deg, rgba(0,229,255,0.06), rgba(233,30,140,0.06)); | |
| border: 1px solid rgba(0,229,255,0.15); | |
| border-radius: var(--radius-md); | |
| padding: 1rem; | |
| margin: 0.8rem 0; | |
| } | |
| /* ── Logo sidebar ─────────────────────────────────────────────────────────── */ | |
| .sidebar-logo-container { | |
| text-align: center; | |
| padding: 0.5rem 0 1rem 0; | |
| } | |
| /* ── Footer ───────────────────────────────────────────────────────────────── */ | |
| .footer-text { | |
| font-family: 'Space Mono', monospace; | |
| font-size: 0.65rem; | |
| color: var(--gray); | |
| text-align: center; | |
| letter-spacing: 1px; | |
| margin-top: 2rem; | |
| opacity: 0.6; | |
| } | |
| /* ── Scrollbar personalizado ──────────────────────────────────────────────── */ | |
| ::-webkit-scrollbar { width: 6px; } | |
| ::-webkit-scrollbar-track { background: var(--bg-primary); } | |
| ::-webkit-scrollbar-thumb { | |
| background: linear-gradient(var(--cyan), var(--magenta)); | |
| border-radius: 3px; | |
| } | |
| /* ── Animación de pulso para badges ───────────────────────────────────────── */ | |
| @keyframes pulse-glow { | |
| 0% { box-shadow: 0 0 5px rgba(0,229,255,0.3); } | |
| 50% { box-shadow: 0 0 20px rgba(0,229,255,0.6); } | |
| 100% { box-shadow: 0 0 5px rgba(0,229,255,0.3); } | |
| } | |
| .pulse { animation: pulse-glow 2s infinite; } | |
| /* ── Métricas ─────────────────────────────────────────────────────────────── */ | |
| [data-testid="stMetric"] { | |
| background: var(--bg-card) !important; | |
| border: 1px solid rgba(0,229,255,0.1) !important; | |
| border-radius: var(--radius-md) !important; | |
| padding: 0.8rem !important; | |
| } | |
| </style> | |
| """ | |
| # Inyectar el CSS al inicio de la app | |
| st.markdown(CUSTOM_CSS, unsafe_allow_html=True) | |
| # ============================================================================= | |
| # SECCIÓN 2: CONSTANTES Y CONFIGURACIÓN GLOBAL | |
| # ============================================================================= | |
| # Directorio de caché para modelos HuggingFace (persiste en runtime del Space) | |
| MODELS_DIR = Path(os.environ.get("HF_HOME", "/tmp/hf_cache")) | |
| MODELS_DIR.mkdir(parents=True, exist_ok=True) | |
| os.environ["HF_HOME"] = str(MODELS_DIR) | |
| os.environ["TRANSFORMERS_CACHE"] = str(MODELS_DIR) | |
| # Tiempo de vida de los archivos generados (en segundos) — 5 minutos | |
| FILE_TTL_SECONDS = 300 | |
| # Tamaño máximo de archivo permitido (80 MB) | |
| MAX_FILE_SIZE_MB = 80 | |
| MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 | |
| # Diccionario de idiomas soportados: código ISO → nombre para mostrar | |
| # Top 11 idiomas más usados en el mundo con cobertura directa en Helsinki-NLP | |
| SUPPORTED_LANGUAGES = { | |
| "es": "🇪🇸 Español", | |
| "en": "🇺🇸 Inglés", | |
| "zh": "🇨🇳 Chino", | |
| "ja": "🇯🇵 Japonés", | |
| "ru": "🇷🇺 Ruso", | |
| "it": "🇮🇹 Italiano", | |
| "fr": "🇫🇷 Francés", | |
| "pt": "🇵🇹 Portugués", | |
| "ko": "🇰🇷 Coreano", | |
| "ar": "🇸🇦 Árabe", | |
| "de": "🇩🇪 Alemán", | |
| } | |
| # Mapa inverso: nombre de display → código | |
| LANG_NAME_TO_CODE = {v: k for k, v in SUPPORTED_LANGUAGES.items()} | |
| # Tabla de modelos Helsinki-NLP disponibles por par de idiomas. | |
| # Formato: (from_code, to_code) → nombre del modelo en HuggingFace Hub | |
| # Para pares no listados aquí, se usará pivote vía inglés automáticamente. | |
| HELSINKI_MODEL_MAP: dict[tuple[str, str], str] = { | |
| # Desde/hacia Español | |
| ("es", "en"): "Helsinki-NLP/opus-mt-es-en", | |
| ("en", "es"): "Helsinki-NLP/opus-mt-en-es", | |
| ("es", "fr"): "Helsinki-NLP/opus-mt-es-fr", | |
| ("fr", "es"): "Helsinki-NLP/opus-mt-fr-es", | |
| ("es", "pt"): "Helsinki-NLP/opus-mt-es-pt", | |
| ("pt", "es"): "Helsinki-NLP/opus-mt-pt-es", | |
| ("es", "it"): "Helsinki-NLP/opus-mt-es-it", | |
| ("it", "es"): "Helsinki-NLP/opus-mt-it-es", | |
| ("es", "de"): "Helsinki-NLP/opus-mt-es-de", | |
| ("de", "es"): "Helsinki-NLP/opus-mt-de-es", | |
| # Desde/hacia Inglés | |
| ("en", "fr"): "Helsinki-NLP/opus-mt-en-fr", | |
| ("fr", "en"): "Helsinki-NLP/opus-mt-fr-en", | |
| ("en", "de"): "Helsinki-NLP/opus-mt-en-de", | |
| ("de", "en"): "Helsinki-NLP/opus-mt-de-en", | |
| ("en", "it"): "Helsinki-NLP/opus-mt-en-it", | |
| ("it", "en"): "Helsinki-NLP/opus-mt-it-en", | |
| ("en", "pt"): "Helsinki-NLP/opus-mt-en-pt", | |
| ("pt", "en"): "Helsinki-NLP/opus-mt-pt-en", | |
| ("en", "ru"): "Helsinki-NLP/opus-mt-en-ru", | |
| ("ru", "en"): "Helsinki-NLP/opus-mt-ru-en", | |
| ("en", "zh"): "Helsinki-NLP/opus-mt-en-zh", | |
| ("zh", "en"): "Helsinki-NLP/opus-mt-zh-en", | |
| ("en", "ja"): "Helsinki-NLP/opus-mt-en-jap", | |
| ("ja", "en"): "Helsinki-NLP/opus-mt-jap-en", | |
| ("en", "ko"): "Helsinki-NLP/opus-mt-en-ko", | |
| ("ko", "en"): "Helsinki-NLP/opus-mt-ko-en", | |
| ("en", "ar"): "Helsinki-NLP/opus-mt-en-ar", | |
| ("ar", "en"): "Helsinki-NLP/opus-mt-ar-en", | |
| } | |
| # ============================================================================= | |
| # SECCIÓN 3: SISTEMA DE LIMPIEZA AUTOMÁTICA (AUTO-CLEANUP) | |
| # Cada archivo generado se elimina automáticamente 5 minutos después | |
| # usando threading.Timer para no bloquear la interfaz. | |
| # ============================================================================= | |
| def schedule_file_deletion(filepath: str, delay: int = FILE_TTL_SECONDS) -> None: | |
| """ | |
| Programa la eliminación automática de un archivo tras 'delay' segundos. | |
| Args: | |
| filepath: Ruta absoluta del archivo a eliminar. | |
| delay: Segundos hasta la eliminación (por defecto 300 = 5 minutos). | |
| """ | |
| def _delete(): | |
| try: | |
| if os.path.exists(filepath): | |
| os.remove(filepath) | |
| logger.info(f"🗑️ Archivo eliminado automáticamente: {filepath}") | |
| except Exception as e: | |
| logger.warning(f"⚠️ No se pudo eliminar {filepath}: {e}") | |
| # Crear un timer en hilo demonio para no bloquear el proceso principal | |
| timer = threading.Timer(delay, _delete) | |
| timer.daemon = True # El timer muere si el proceso principal termina | |
| timer.start() | |
| logger.info(f"⏱️ Eliminación programada en {delay}s para: {filepath}") | |
| # ============================================================================= | |
| # SECCIÓN 4: MOTOR DE TRADUCCIÓN — HELSINKI-NLP (OPUS-MT) + CTRANSLATE2 | |
| # Implementa Lazy Loading con conversión automática a CTranslate2 INT8. | |
| # CTranslate2 es 2-3x más rápido que Transformers puro en CPU y usa menos RAM. | |
| # El batching agrupa TODOS los fragmentos de texto antes de llamar al modelo, | |
| # eliminando el overhead de llamadas individuales (el principal cuello de botella). | |
| # ============================================================================= | |
| def _get_ct2_components(model_name: str): | |
| """ | |
| Carga (o convierte + cachea) un modelo Helsinki-NLP como CTranslate2 INT8. | |
| Flujo en primera llamada: | |
| 1. Descarga el modelo HF con transformers (solo para conversión) | |
| 2. Convierte a formato CTranslate2 con cuantización INT8 | |
| 3. Elimina la copia HF para liberar disco | |
| 4. Carga el ctranslate2.Translator y lo devuelve | |
| Llamadas posteriores devuelven el Translator desde RAM (cache_resource). | |
| Args: | |
| model_name: Nombre del modelo en HF Hub (ej: 'Helsinki-NLP/opus-mt-es-en'). | |
| Returns: | |
| Tupla (tokenizer, translator) o (None, None) si ocurre un error. | |
| """ | |
| import ctranslate2 | |
| from transformers import MarianTokenizer, MarianMTModel | |
| model_slug = model_name.replace("/", "_") | |
| ct2_dir = MODELS_DIR / "ct2" / model_slug | |
| try: | |
| # El tokenizer siempre se descarga de HF (es pequeño, ~3 MB) | |
| tokenizer = MarianTokenizer.from_pretrained(model_name) | |
| if not ct2_dir.exists(): | |
| logger.info(f"⬇️ Descargando y convirtiendo {model_name} → CTranslate2 INT8...") | |
| ct2_dir.mkdir(parents=True, exist_ok=True) | |
| # 1. Descargar modelo HF a directorio temporal | |
| hf_tmp = MODELS_DIR / "hf_tmp" / model_slug | |
| hf_tmp.mkdir(parents=True, exist_ok=True) | |
| hf_model = MarianMTModel.from_pretrained(model_name) | |
| hf_model.save_pretrained(str(hf_tmp)) | |
| tokenizer.save_pretrained(str(hf_tmp)) | |
| del hf_model # liberar RAM inmediatamente tras guardar | |
| # 2. Convertir a CTranslate2 con cuantización INT8 | |
| # INT8: ~75 MB por modelo vs ~300 MB FP32 — 4x menos RAM | |
| converter = ctranslate2.converters.OpusMTConverter(str(hf_tmp)) | |
| converter.convert(str(ct2_dir), quantization="int8") | |
| # 3. Eliminar copia HF (ya no necesaria) | |
| shutil.rmtree(str(hf_tmp), ignore_errors=True) | |
| logger.info(f"✅ Modelo CT2 listo en: {ct2_dir}") | |
| # 4. Cargar el Translator de CTranslate2 (CPU multi-hilo) | |
| translator = ctranslate2.Translator( | |
| str(ct2_dir), | |
| device="cpu", | |
| inter_threads=2, # hasta 2 lotes en paralelo | |
| intra_threads=4, # 4 hilos por lote | |
| ) | |
| return tokenizer, translator | |
| except Exception as e: | |
| logger.error(f"Error al cargar/convertir {model_name}: {e}") | |
| return None, None | |
| def _resolve_model_name(from_code: str, to_code: str) -> str | None: | |
| """Retorna el nombre del modelo HF para un par de idiomas, o None si no existe.""" | |
| return HELSINKI_MODEL_MAP.get((from_code, to_code)) | |
| def ensure_language_pair(from_code: str, to_code: str, status_placeholder) -> bool: | |
| """ | |
| Pre-carga el modelo CTranslate2 para el par de idiomas dado. | |
| Muestra progreso al usuario durante la conversión/descarga. | |
| """ | |
| model_name = _resolve_model_name(from_code, to_code) | |
| if model_name: | |
| status_placeholder.markdown( | |
| f'<div class="status-badge processing pulse">⬇️ Preparando modelo {from_code}→{to_code} (CTranslate2 INT8)...</div>', | |
| unsafe_allow_html=True | |
| ) | |
| tokenizer, translator = _get_ct2_components(model_name) | |
| if tokenizer is not None and translator is not None: | |
| status_placeholder.markdown( | |
| '<div class="status-badge success">✅ Motor listo</div>', | |
| unsafe_allow_html=True | |
| ) | |
| return True | |
| else: | |
| status_placeholder.markdown( | |
| f'<div class="status-badge error">❌ Error al preparar modelo {model_name}</div>', | |
| unsafe_allow_html=True | |
| ) | |
| return False | |
| else: | |
| logger.info(f"Par {from_code}→{to_code} no en mapa directo. Se usará pivote inglés.") | |
| return False | |
| def translate_batch(texts: list[str], from_code: str, to_code: str) -> list[str]: | |
| """ | |
| Traduce una LISTA COMPLETA de textos en un solo paso (batch) usando CTranslate2. | |
| Es el núcleo de la optimización: en lugar de N llamadas individuales al modelo, | |
| se hace UNA SOLA llamada con todos los textos agrupados. | |
| Args: | |
| texts: Lista de textos a traducir. | |
| from_code: Código del idioma origen. | |
| to_code: Código del idioma destino. | |
| Returns: | |
| Lista de textos traducidos, en el mismo orden que la entrada. | |
| """ | |
| if not texts: | |
| return texts | |
| # Registro de textos no vacíos: (índice_original, texto) | |
| results = list(texts) | |
| non_empty = [(i, t) for i, t in enumerate(texts) if t and t.strip() and len(t.strip()) >= 2] | |
| if not non_empty: | |
| return results | |
| def _run_ct2_batch(model_name: str, idx_texts: list[tuple[int, str]]) -> list[str]: | |
| """Ejecuta inferencia en lote con CTranslate2.""" | |
| tokenizer, translator = _get_ct2_components(model_name) | |
| if tokenizer is None or translator is None: | |
| return [t for _, t in idx_texts] | |
| batch_raw = [t for _, t in idx_texts] | |
| final_translations = [] | |
| try: | |
| # Procesar en sub-lotes pequeños para evitar MemoryError en libros masivos | |
| chunk_size = 120 | |
| for i in range(0, len(batch_raw), chunk_size): | |
| chunk_raw = batch_raw[i:i + chunk_size] | |
| tokenized = [ | |
| tokenizer.convert_ids_to_tokens( | |
| tokenizer.encode(t, truncation=True, max_length=512) | |
| ) | |
| for t in chunk_raw | |
| ] | |
| # Inferencia | |
| translations = translator.translate_batch( | |
| tokenized, | |
| beam_size=2, | |
| max_batch_size=64, | |
| max_decoding_length=512, | |
| ) | |
| chunk_trans = [ | |
| tokenizer.convert_tokens_to_string(r.hypotheses[0]) | |
| for r in translations | |
| ] | |
| final_translations.extend(chunk_trans) | |
| return final_translations | |
| except Exception as e: | |
| logger.warning(f"Error en batch CT2 ({model_name}): {e}") | |
| # Si el lote explota entero, devolver originales | |
| return batch_raw | |
| # ── Traducción directa ────────────────────────────────────────────────── | |
| direct_model = _resolve_model_name(from_code, to_code) | |
| if direct_model: | |
| translated = _run_ct2_batch(direct_model, non_empty) | |
| for (orig_idx, _), t in zip(non_empty, translated): | |
| results[orig_idx] = t | |
| return results | |
| # ── Pivote vía inglés ─────────────────────────────────────────────────── | |
| logger.info(f"Batch pivote: {from_code}→en→{to_code}") | |
| intermediate = list(texts) | |
| if from_code != "en": | |
| m2en = _resolve_model_name(from_code, "en") | |
| if m2en: | |
| step1 = _run_ct2_batch(m2en, non_empty) | |
| for (orig_idx, _), t in zip(non_empty, step1): | |
| intermediate[orig_idx] = t | |
| if to_code != "en": | |
| non_empty2 = [(i, intermediate[i]) for i, _ in non_empty] | |
| m_from_en = _resolve_model_name("en", to_code) | |
| if m_from_en: | |
| step2 = _run_ct2_batch(m_from_en, non_empty2) | |
| for (orig_idx, _), t in zip(non_empty2, step2): | |
| results[orig_idx] = t | |
| return results | |
| def translate_text(text: str, from_code: str, to_code: str) -> str: | |
| """Wrapper de un solo texto sobre translate_batch (compatibilidad).""" | |
| if not text or not text.strip(): | |
| return text | |
| return translate_batch([text], from_code, to_code)[0] | |
| # ============================================================================= | |
| # SECCIÓN 5: PROCESAMIENTO DE PDF | |
| # Pipeline profesional: PDF → DOCX estructural → DOCX Traducido | |
| # ============================================================================= | |
| def _pdf_to_docx(pdf_path: str, progress_bar, status_text) -> str: | |
| """Convierte estructuralmente PDF a DOCX reconstruyendo tablas, bordes e imágenes.""" | |
| from pdf2docx import Converter | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| docx_path = os.path.join(tempfile.gettempdir(), f"converted_{timestamp}.docx") | |
| status_text.markdown( | |
| '<div class="status-badge processing pulse">📐 Convirtiendo PDF a DOCX (Reconstruyendo diseño)...</div>', | |
| unsafe_allow_html=True | |
| ) | |
| cv = Converter(pdf_path) | |
| cpu_cores = max(1, os.cpu_count() or 1) | |
| # Esta función puede demorar decenas de minutos en libros gigantes con múltiples columnas | |
| cv.convert(docx_path, start=0, end=None, multi_processing=True, cpu_count=cpu_cores) | |
| cv.close() | |
| if not os.path.exists(docx_path) or os.path.getsize(docx_path) == 0: | |
| raise RuntimeError("Fallo al reconstruir la maquetación. DOCX vacío.") | |
| schedule_file_deletion(docx_path) | |
| logger.info(f"✅ Estructura extraída a DOCX temporal: {docx_path}") | |
| progress_bar.progress(0.40, text="✅ Estructura recuperada. Iniciando traducción...") | |
| return docx_path | |
| def translate_pdf( | |
| input_path: str, | |
| from_code: str, | |
| to_code: str, | |
| progress_bar, | |
| status_text | |
| ) -> str: | |
| """Implementa el flujo PDF -> DOCX Estructural -> Traducir DOCX""" | |
| try: | |
| # PASO 1: PDF → DOCX estructural (El paso más lento en libros, retiene layouts perfectos) | |
| interim_docx = _pdf_to_docx(input_path, progress_bar, status_text) | |
| # PASO 2: DOCX → DOCX traducido (Reutiliza motor de docx nativo) | |
| output_path = translate_docx( | |
| input_path=interim_docx, | |
| from_code=from_code, | |
| to_code=to_code, | |
| progress_bar=progress_bar, | |
| status_text=status_text, | |
| ) | |
| return output_path | |
| except MemoryError: | |
| raise MemoryError("Proceso abortado. Divide el PDF en menos páginas (Ej: max 100).") | |
| except Exception as e: | |
| logger.error(f"Falla fatal en la reconstrucción PDF->DOCX: {e}", exc_info=True) | |
| raise | |
| # ============================================================================= | |
| # SECCIÓN 6: PROCESAMIENTO DE WORD (DOCX) | |
| # Usa python-docx para preservar: | |
| # - Negritas, cursivas, subrayado | |
| # - Alineación de párrafos | |
| # - Contenido de tablas (celda por celda) | |
| # ============================================================================= | |
| def translate_docx( | |
| input_path: str, | |
| from_code: str, | |
| to_code: str, | |
| progress_bar, | |
| status_text | |
| ) -> str: | |
| """ | |
| Traduce un archivo Word (.docx) respetando el formato original. | |
| Proceso: | |
| - Párrafos: traducir cada run preservando bold/italic/underline/color | |
| - Tablas: iterar filas → celdas → párrafos → runs | |
| - Headers y Footers: traducir si contienen texto relevante | |
| Args: | |
| input_path: Ruta al .docx original. | |
| from_code: Código de idioma origen. | |
| to_code: Código de idioma destino. | |
| progress_bar: Componente st.progress(). | |
| status_text: Componente st.empty(). | |
| Returns: | |
| Ruta al .docx traducido. | |
| """ | |
| try: | |
| from docx import Document | |
| doc = Document(input_path) | |
| total_paragraphs = len(doc.paragraphs) | |
| status_text.markdown( | |
| f'<div class="status-badge processing">📝 Recopilando textos del documento...</div>', | |
| unsafe_allow_html=True | |
| ) | |
| # ── PASO 1: Recopilar TODOS los párrafos del documento ───────────── | |
| # Body + tablas + headers/footers en una sola lista plana. | |
| # Esto permite hacer UNA SOLA llamada en lote al modelo de traducción | |
| # en lugar de traducir párrafo por párrafo. | |
| all_paragraphs: list = [] | |
| for para in doc.paragraphs: | |
| all_paragraphs.append(para) | |
| for table in doc.tables: | |
| for row in table.rows: | |
| for cell in row.cells: | |
| for para in cell.paragraphs: | |
| all_paragraphs.append(para) | |
| for section in doc.sections: | |
| if section.header: | |
| for para in section.header.paragraphs: | |
| all_paragraphs.append(para) | |
| if section.footer: | |
| for para in section.footer.paragraphs: | |
| all_paragraphs.append(para) | |
| # ── PASO 2: Extraer los textos completos de cada párrafo ─────────── | |
| original_texts = [ | |
| "".join(run.text for run in para.runs) | |
| for para in all_paragraphs | |
| ] | |
| total_elements = max(len(all_paragraphs), 1) | |
| status_text.markdown( | |
| f'<div class="status-badge processing pulse">🔄 Traduciendo {total_elements} elementos en lote...</div>', | |
| unsafe_allow_html=True | |
| ) | |
| progress_bar.progress(0.05, text="Enviando lote al motor CTranslate2...") | |
| # ── PASO 3: Traducción en lote (1 sola llamada al modelo) ────────── | |
| translated_texts = translate_batch(original_texts, from_code, to_code) | |
| progress_bar.progress(0.90, text="Aplicando traducciones al documento...") | |
| # ── PASO 4: Aplicar traducciones de vuelta a los párrafos ────────── | |
| for para, translated in zip(all_paragraphs, translated_texts): | |
| if not para.runs or not translated or not translated.strip(): | |
| continue | |
| # Poner toda la traducción en el primer run, vaciar los demás | |
| # (preserva el formato bold/italic/color del run principal) | |
| para.runs[0].text = translated | |
| for run in para.runs[1:]: | |
| run.text = "" | |
| progress_bar.progress(1.0, text="✅ Documento procesado") | |
| # ── Guardar el documento traducido ───────────────────────────────── | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| output_filename = f"traducido_{from_code}_to_{to_code}_{timestamp}.docx" | |
| output_path = os.path.join(tempfile.gettempdir(), output_filename) | |
| doc.save(output_path) | |
| schedule_file_deletion(output_path) | |
| logger.info(f"✅ DOCX traducido guardado: {output_path}") | |
| return output_path | |
| except MemoryError: | |
| raise MemoryError( | |
| "El documento Word es demasiado pesado. " | |
| "Por favor, divídelo en documentos más pequeños." | |
| ) | |
| except Exception as e: | |
| logger.error(f"Error al traducir DOCX: {e}") | |
| raise | |
| # ============================================================================= | |
| # SECCIÓN 7: VALIDACIÓN DE ARCHIVOS | |
| # Verifica formato, extensión y tamaño antes de procesar. | |
| # ============================================================================= | |
| def validate_uploaded_file(uploaded_file) -> tuple[bool, str]: | |
| """ | |
| Valida que el archivo subido sea válido para procesamiento. | |
| Verificaciones: | |
| 1. Extensión permitida (.pdf o .docx) | |
| 2. Tamaño máximo (MAX_FILE_SIZE_BYTES) | |
| 3. Que el archivo no esté vacío | |
| Args: | |
| uploaded_file: Objeto retornado por st.file_uploader(). | |
| Returns: | |
| Tupla (es_válido: bool, mensaje_error: str). | |
| Si es_válido=True, mensaje_error estará vacío. | |
| """ | |
| if uploaded_file is None: | |
| return False, "No se ha subido ningún archivo." | |
| # Verificar extensión | |
| filename = uploaded_file.name.lower() | |
| if not (filename.endswith(".pdf") or filename.endswith(".docx")): | |
| return False, ( | |
| "❌ Formato no permitido. Solo se aceptan archivos `.pdf` y `.docx`.\n" | |
| "Si tienes un `.doc` antiguo, conviértelo primero a `.docx` con Word o LibreOffice." | |
| ) | |
| # Verificar tamaño | |
| file_size = uploaded_file.size | |
| if file_size > MAX_FILE_SIZE_BYTES: | |
| size_mb = file_size / (1024 * 1024) | |
| return False, ( | |
| f"❌ El archivo pesa {size_mb:.1f} MB, excede el límite de {MAX_FILE_SIZE_MB} MB.\n" | |
| "Por favor, divide el archivo en partes más pequeñas." | |
| ) | |
| # Verificar que no esté vacío | |
| if file_size == 0: | |
| return False, "❌ El archivo está vacío." | |
| return True, "" | |
| # ============================================================================= | |
| # SECCIÓN 8: INTERFAZ DE USUARIO — SIDEBAR | |
| # Contiene: logo, selectores de idioma, donaciones, soporte. | |
| # ============================================================================= | |
| def render_sidebar() -> tuple[str, str]: | |
| """ | |
| Renderiza la barra lateral completa con todos sus componentes. | |
| Returns: | |
| Tupla (código_idioma_origen, código_idioma_destino). | |
| """ | |
| with st.sidebar: | |
| # ── Logo de la empresa ───────────────────────────────────────────── | |
| st.markdown('<div class="sidebar-logo-container">', unsafe_allow_html=True) | |
| try: | |
| # Intentar mostrar el logo si está disponible | |
| logo_path = Path("LOGO_ADVISION_AI_TRANSPARENTE.png") | |
| if logo_path.exists(): | |
| st.image(str(logo_path), use_container_width=True) | |
| else: | |
| # Fallback: texto estilizado si no hay imagen | |
| st.markdown( | |
| '<div style="font-family:\'Exo 2\',sans-serif;font-weight:900;' | |
| 'font-size:1.4rem;background:linear-gradient(135deg,#00e5ff,#e91e8c);' | |
| '-webkit-background-clip:text;-webkit-text-fill-color:transparent;' | |
| 'background-clip:text;text-align:center;">AdVision AI</div>', | |
| unsafe_allow_html=True | |
| ) | |
| except Exception: | |
| pass | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # ── Divisor visual ───────────────────────────────────────────────── | |
| st.markdown('<hr class="gradient-divider">', unsafe_allow_html=True) | |
| # ── Selector de idioma origen ────────────────────────────────────── | |
| st.markdown('<p class="section-label">🔤 Idioma Origen</p>', unsafe_allow_html=True) | |
| lang_names = list(SUPPORTED_LANGUAGES.values()) | |
| # Índice por defecto: Español (índice 1) | |
| default_from_idx = lang_names.index("🇪🇸 Español") if "🇪🇸 Español" in lang_names else 0 | |
| from_lang_name = st.selectbox( | |
| "Origen", | |
| options=lang_names, | |
| index=default_from_idx, | |
| label_visibility="collapsed", | |
| key="select_from_lang" | |
| ) | |
| # ── Flecha indicadora de dirección ───────────────────────────────── | |
| st.markdown( | |
| '<div style="text-align:center;font-size:1.3rem;margin:4px 0;' | |
| 'background:linear-gradient(135deg,#00e5ff,#e91e8c);' | |
| '-webkit-background-clip:text;-webkit-text-fill-color:transparent;">⬇️</div>', | |
| unsafe_allow_html=True | |
| ) | |
| # ── Selector de idioma destino ───────────────────────────────────── | |
| st.markdown('<p class="section-label">🎯 Idioma Destino</p>', unsafe_allow_html=True) | |
| # Índice por defecto: Inglés (índice 0) | |
| default_to_idx = lang_names.index("🇺🇸 Inglés") if "🇺🇸 Inglés" in lang_names else 1 | |
| to_lang_name = st.selectbox( | |
| "Destino", | |
| options=lang_names, | |
| index=default_to_idx, | |
| label_visibility="collapsed", | |
| key="select_to_lang" | |
| ) | |
| # Aviso si el usuario seleccionó el mismo idioma en origen y destino | |
| from_code = LANG_NAME_TO_CODE[from_lang_name] | |
| to_code = LANG_NAME_TO_CODE[to_lang_name] | |
| if from_code == to_code: | |
| st.warning("⚠️ Selecciona idiomas diferentes para origen y destino.") | |
| # ── Divisor ──────────────────────────────────────────────────────── | |
| st.markdown('<hr class="gradient-divider">', unsafe_allow_html=True) | |
| # ── Sección de Soporte y Donaciones ─────────────────────────────── | |
| st.markdown( | |
| '<div class="donation-section">' | |
| '<p class="section-label" style="text-align:center;">💎 Soporte & Donaciones</p>' | |
| '<p style="font-size:0.78rem;color:#aaa;text-align:center;margin-bottom:0.8rem;">' | |
| 'Si esta herramienta te resulta útil, considera apoyar su desarrollo:</p>', | |
| unsafe_allow_html=True | |
| ) | |
| # Botón de PayPal — REEMPLAZA TU_USUARIO_AQUI con tu usuario real | |
| st.markdown( | |
| '<div class="paypal-btn-container">' | |
| '<a href="https://www.paypal.me/Noru3D" target="_blank" rel="noopener">' | |
| '💙 Donar con PayPal' | |
| '</a>' | |
| '</div>', | |
| unsafe_allow_html=True | |
| ) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # ── Botón de contacto WhatsApp ───────────────────────────────────── | |
| st.markdown( | |
| '<div class="whatsapp-btn" style="margin-top:0.6rem;">' | |
| # REEMPLAZA el número con tu número real de WhatsApp (con código de país) | |
| '<a href="https://wa.me/5215537494034?text=Hola%2C%20necesito%20soporte%20con%20LibreTranslate%20ProDoc" ' | |
| 'target="_blank" rel="noopener">' | |
| '💬 Soporte por WhatsApp' | |
| '</a>' | |
| '</div>', | |
| unsafe_allow_html=True | |
| ) | |
| # ── Información adicional ────────────────────────────────────────── | |
| st.markdown('<hr class="gradient-divider">', unsafe_allow_html=True) | |
| st.markdown( | |
| '<p style="font-size:0.68rem;color:#555;text-align:center;line-height:1.5;">' | |
| '🔒 Sin telemetría · 🌐 Offline · 🗑️ Auto-limpieza 5 min<br>' | |
| '<span style="color:#333;">Motor: Helsinki-NLP OPUS-MT v2</span>' | |
| '</p>', | |
| unsafe_allow_html=True | |
| ) | |
| return from_code, to_code | |
| # ============================================================================= | |
| # SECCIÓN 9: INTERFAZ DE USUARIO — ÁREA PRINCIPAL | |
| # ============================================================================= | |
| def render_main_area(from_code: str, to_code: str) -> None: | |
| """ | |
| Renderiza el área principal de la aplicación: | |
| - Encabezado con título y descripción | |
| - Zona de carga de archivo | |
| - Panel de información | |
| - Proceso de traducción y descarga | |
| Args: | |
| from_code: Código del idioma origen seleccionado en el sidebar. | |
| to_code: Código del idioma destino seleccionado en el sidebar. | |
| """ | |
| # ── Encabezado principal ─────────────────────────────────────────────── | |
| st.markdown('<h1 class="app-title">LibreTranslate ProDoc</h1>', unsafe_allow_html=True) | |
| st.markdown( | |
| '<p class="app-subtitle">Traducción offline · Preserva el diseño · Sin límites</p>', | |
| unsafe_allow_html=True | |
| ) | |
| # ── Descripción / intro ──────────────────────────────────────────────── | |
| col_info1, col_info2, col_info3 = st.columns(3) | |
| with col_info1: | |
| st.markdown( | |
| '<div class="pro-card" style="text-align:center;">' | |
| '<div style="font-size:2rem;">📄</div>' | |
| '<div style="font-weight:700;margin:6px 0;font-size:0.9rem;">PDF & Word</div>' | |
| '<div style="font-size:0.75rem;color:#888;">Soporte completo para .pdf y .docx con preservación de diseño</div>' | |
| '</div>', | |
| unsafe_allow_html=True | |
| ) | |
| with col_info2: | |
| st.markdown( | |
| '<div class="pro-card" style="text-align:center;">' | |
| '<div style="font-size:2rem;">🔒</div>' | |
| '<div style="font-weight:700;margin:6px 0;font-size:0.9rem;">100% Offline</div>' | |
| '<div style="font-size:0.75rem;color:#888;">Motor Helsinki-NLP OPUS-MT. Alta calidad · Tus documentos no salen del servidor</div>' | |
| '</div>', | |
| unsafe_allow_html=True | |
| ) | |
| with col_info3: | |
| st.markdown( | |
| '<div class="pro-card" style="text-align:center;">' | |
| '<div style="font-size:2rem;">🗑️</div>' | |
| '<div style="font-weight:700;margin:6px 0;font-size:0.9rem;">Auto-limpieza</div>' | |
| '<div style="font-size:0.75rem;color:#888;">Los archivos se eliminan automáticamente del servidor en 5 minutos</div>' | |
| '</div>', | |
| unsafe_allow_html=True | |
| ) | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| # ── Mostrar idiomas seleccionados ────────────────────────────────────── | |
| from_name = SUPPORTED_LANGUAGES.get(from_code, from_code) | |
| to_name = SUPPORTED_LANGUAGES.get(to_code, to_code) | |
| st.markdown( | |
| f'<div class="pro-card">' | |
| f'<p class="section-label">Par de traducción activo</p>' | |
| f'<div style="display:flex;align-items:center;gap:12px;flex-wrap:wrap;">' | |
| f'<span class="status-badge ready">{from_name}</span>' | |
| f'<span style="font-size:1.2rem;opacity:0.6;">→</span>' | |
| f'<span class="status-badge ready">{to_name}</span>' | |
| f'</div>' | |
| f'</div>', | |
| unsafe_allow_html=True | |
| ) | |
| # ── Zona de carga de archivo ─────────────────────────────────────────── | |
| st.markdown( | |
| '<div class="pro-card">' | |
| '<p class="section-label">📤 Cargar Documento</p>', | |
| unsafe_allow_html=True | |
| ) | |
| uploaded_file = st.file_uploader( | |
| label="Arrastra tu archivo aquí o haz clic para seleccionarlo", | |
| type=["pdf", "docx"], | |
| help=f"Formatos permitidos: PDF y DOCX · Tamaño máximo: {MAX_FILE_SIZE_MB} MB", | |
| accept_multiple_files=False | |
| ) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # ── Validación y proceso de traducción ──────────────────────────────── | |
| if uploaded_file is not None: | |
| # Validar el archivo antes de continuar | |
| is_valid, error_msg = validate_uploaded_file(uploaded_file) | |
| if not is_valid: | |
| st.error(error_msg) | |
| return | |
| # Mostrar información del archivo | |
| file_size_mb = uploaded_file.size / (1024 * 1024) | |
| file_ext = Path(uploaded_file.name).suffix.lower() | |
| st.markdown( | |
| f'<div class="pro-card">' | |
| f'<p class="section-label">📁 Archivo Cargado</p>' | |
| f'<div style="display:flex;gap:12px;align-items:center;flex-wrap:wrap;">' | |
| f'<span class="status-badge ready">{"📄 PDF" if file_ext == ".pdf" else "📝 DOCX"}</span>' | |
| f'<span style="font-size:0.85rem;color:#ccc;">{uploaded_file.name}</span>' | |
| f'<span style="font-size:0.8rem;color:#888;">{file_size_mb:.2f} MB</span>' | |
| f'</div>' | |
| f'</div>', | |
| unsafe_allow_html=True | |
| ) | |
| # Verificar que los idiomas sean diferentes | |
| if from_code == to_code: | |
| st.warning("⚠️ Selecciona idiomas diferentes en la barra lateral para continuar.") | |
| return | |
| # ── Botón principal de traducción ────────────────────────────────── | |
| col_btn, col_space = st.columns([1, 3]) | |
| with col_btn: | |
| translate_btn = st.button( | |
| "🌐 Traducir Documento", | |
| use_container_width=True, | |
| type="primary" | |
| ) | |
| if translate_btn: | |
| _process_translation( | |
| uploaded_file=uploaded_file, | |
| from_code=from_code, | |
| to_code=to_code, | |
| file_ext=file_ext | |
| ) | |
| def _process_translation( | |
| uploaded_file, | |
| from_code: str, | |
| to_code: str, | |
| file_ext: str | |
| ) -> None: | |
| """ | |
| Orquesta el proceso completo de traducción: | |
| 1. Descarga el modelo de idioma si es necesario (Lazy Loading) | |
| 2. Guarda el archivo subido en /tmp | |
| 3. Llama al traductor correspondiente (PDF o DOCX) | |
| 4. Ofrece el archivo traducido para descarga | |
| 5. Limpia el archivo de entrada | |
| Args: | |
| uploaded_file: Archivo subido por el usuario (BytesIO-like object). | |
| from_code: Código del idioma origen. | |
| to_code: Código del idioma destino. | |
| file_ext: Extensión del archivo ('.pdf' o '.docx'). | |
| """ | |
| # ── Contenedores de estado para retroalimentación visual ─────────────── | |
| status_placeholder = st.empty() | |
| progress_placeholder = st.empty() | |
| # Barra de progreso inicial | |
| progress_bar = progress_placeholder.progress(0, text="Iniciando traducción...") | |
| try: | |
| # ── PASO 1: Verificar/descargar modelo de idioma ─────────────────── | |
| status_placeholder.markdown( | |
| '<div class="status-badge processing pulse">⚙️ Verificando modelos de idioma...</div>', | |
| unsafe_allow_html=True | |
| ) | |
| model_ok = ensure_language_pair(from_code, to_code, status_placeholder) | |
| if not model_ok: | |
| # Intentar con pivote inglés (en→to_code si from!=en, o from→en si to!=en) | |
| st.info( | |
| "ℹ️ El par directo no está disponible. " | |
| "Se intentará la traducción vía inglés como idioma pivote. " | |
| "Esto puede requerir descargar hasta 2 modelos adicionales." | |
| ) | |
| # Descargar from→en | |
| if from_code != "en": | |
| ensure_language_pair(from_code, "en", status_placeholder) | |
| # Descargar en→to | |
| if to_code != "en": | |
| ensure_language_pair("en", to_code, status_placeholder) | |
| # ── PASO 2: Guardar archivo subido a /tmp ────────────────────────── | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| input_filename = f"input_{timestamp}{file_ext}" | |
| input_path = os.path.join(tempfile.gettempdir(), input_filename) | |
| with open(input_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| # Programar eliminación del archivo de entrada también | |
| schedule_file_deletion(input_path) | |
| progress_bar.progress(0.05, text="Archivo cargado al servidor (se eliminará en 5 min)...") | |
| # ── PASO 3: Ejecutar la traducción según el tipo de archivo ──────── | |
| start_time = time.time() | |
| if file_ext == ".pdf": | |
| # PDF → DOCX estructural → DOCX traducido | |
| status_placeholder.markdown( | |
| '<div class="status-badge processing pulse">📐 PDF detectado: convirtiendo estructura a Word primero...</div>', | |
| unsafe_allow_html=True | |
| ) | |
| output_path = translate_pdf( | |
| input_path=input_path, | |
| from_code=from_code, | |
| to_code=to_code, | |
| progress_bar=progress_bar, | |
| status_text=status_placeholder | |
| ) | |
| # El output es siempre .docx aunque la entrada era .pdf | |
| output_ext = ".docx" | |
| elif file_ext == ".docx": | |
| status_placeholder.markdown( | |
| '<div class="status-badge processing pulse">🔄 Traduciendo DOCX...</div>', | |
| unsafe_allow_html=True | |
| ) | |
| output_path = translate_docx( | |
| input_path=input_path, | |
| from_code=from_code, | |
| to_code=to_code, | |
| progress_bar=progress_bar, | |
| status_text=status_placeholder | |
| ) | |
| output_ext = ".docx" | |
| else: | |
| raise ValueError(f"Extensión no soportada: {file_ext}") | |
| elapsed_time = time.time() - start_time | |
| progress_bar.progress(1.0, text="✅ ¡Traducción completada!") | |
| # ── PASO 4: Mostrar resultado y botón de descarga ────────────────── | |
| status_placeholder.markdown( | |
| f'<div class="status-badge success">✅ Traducción completada en {elapsed_time:.1f}s</div>', | |
| unsafe_allow_html=True | |
| ) | |
| # Leer el archivo traducido para la descarga | |
| with open(output_path, "rb") as f: | |
| translated_bytes = f.read() | |
| # Nombre sugerido para la descarga | |
| # Siempre descargamos como .docx (PDF también pasa por Word internamente) | |
| original_stem = Path(uploaded_file.name).stem | |
| download_name = f"{original_stem}_traducido_{to_code}{output_ext}" | |
| # ── Card de resultado ────────────────────────────────────────────── | |
| st.markdown( | |
| '<div class="pro-card">' | |
| '<p class="section-label">✨ Documento Traducido</p>', | |
| unsafe_allow_html=True | |
| ) | |
| col_dl, col_info = st.columns([1, 2]) | |
| with col_dl: | |
| # Siempre DOCX: PDF también se entrega como Word (pipeline PDF→DOCX) | |
| mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
| st.download_button( | |
| label=f"⬇️ Descargar DOCX Traducido", | |
| data=translated_bytes, | |
| file_name=download_name, | |
| mime=mime_type, | |
| use_container_width=True | |
| ) | |
| with col_info: | |
| output_size_mb = len(translated_bytes) / (1024 * 1024) | |
| st.markdown( | |
| f'<div style="font-size:0.8rem;color:#888;line-height:1.8;">' | |
| f'📁 <b style="color:#ccc;">{download_name}</b><br>' | |
| f'📦 Tamaño: <b style="color:#ccc;">{output_size_mb:.2f} MB</b><br>' | |
| f'⏱️ Tiempo: <b style="color:#ccc;">{elapsed_time:.1f} segundos</b><br>' | |
| f'🗑️ Se eliminará del servidor en <b style="color:#00e5ff;">5 minutos</b>' | |
| f'</div>', | |
| unsafe_allow_html=True | |
| ) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # ── Advertencia sobre limitaciones ───────────────────────────────── | |
| st.info( | |
| "📌 **Nota:** La preservación del diseño depende de la complejidad del documento. " | |
| "PDFs con fuentes incrustadas no estándar o con mucho contenido de imagen " | |
| "pueden mostrar diferencias visuales. El texto traducido puede ser más largo " | |
| "que el original, lo que ocasionalmente afecta el layout." | |
| ) | |
| except MemoryError as me: | |
| progress_bar.progress(0, text="Error") | |
| st.error(str(me)) | |
| status_placeholder.markdown( | |
| '<div class="status-badge error">❌ Error: Archivo demasiado grande</div>', | |
| unsafe_allow_html=True | |
| ) | |
| except Exception as e: | |
| progress_bar.progress(0, text="Error") | |
| error_detail = str(e) | |
| st.error( | |
| f"❌ **Error durante la traducción:**\n\n" | |
| f"```\n{error_detail[:300]}\n```\n\n" | |
| "Por favor, intenta con un archivo más pequeño o verifica que no esté corrupto." | |
| ) | |
| status_placeholder.markdown( | |
| '<div class="status-badge error">❌ Error en el procesamiento</div>', | |
| unsafe_allow_html=True | |
| ) | |
| logger.error(f"Error en _process_translation: {e}", exc_info=True) | |
| # ============================================================================= | |
| # SECCIÓN 10: PUNTO DE ENTRADA PRINCIPAL | |
| # ============================================================================= | |
| def main(): | |
| """ | |
| Función principal que orquesta el renderizado de la aplicación completa. | |
| Flujo de ejecución: | |
| 1. Renderizar sidebar y obtener preferencias de idioma del usuario | |
| 2. Renderizar el área principal con la lógica de carga y traducción | |
| 3. Mostrar footer | |
| """ | |
| # ── Renderizar sidebar y obtener selección de idiomas ────────────────── | |
| from_code, to_code = render_sidebar() | |
| # ── Renderizar área principal ────────────────────────────────────────── | |
| render_main_area(from_code, to_code) | |
| # ── Footer ───────────────────────────────────────────────────────────── | |
| st.markdown( | |
| '<div class="footer-text">' | |
| 'LibreTranslate ProDoc · Powered by Helsinki-NLP OPUS-MT & PyMuPDF · ' | |
| 'Desarrollado con ❤️ por AdVision AI · ' | |
| f'Versión 2.0.0' | |
| '</div>', | |
| unsafe_allow_html=True | |
| ) | |
| # ── Ejecutar la aplicación ───────────────────────────────────────────────── | |
| if __name__ == "__main__": | |
| main() | |