Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| Script para pre-descargar modelos de WhisperX/Faster-Whisper | |
| Esto permite que el sistema funcione offline después de la primera ejecución. | |
| """ | |
| import os | |
| import sys | |
| import pathlib | |
| # Configurar el entorno para usar las carpetas locales antes de importar WhisperX | |
| SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) | |
| sys.path.insert(0, SCRIPT_DIR) | |
| # Importar la función de configuración portátil del archivo principal | |
| try: | |
| from desgrabador_usuario_final import setup_portable_environment | |
| APP_TMP_DIR = setup_portable_environment() | |
| print("✅ Entorno portátil configurado") | |
| except Exception as e: | |
| print(f"⚠️ Error configurando entorno portátil: {e}") | |
| # Configuración mínima fallback | |
| app_cache_dir = os.path.join(SCRIPT_DIR, "app_cache") | |
| os.makedirs(app_cache_dir, exist_ok=True) | |
| os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS", "1") | |
| try: | |
| import whisperx | |
| print("✅ WhisperX importado correctamente") | |
| except ImportError as e: | |
| print(f"❌ Error importando WhisperX: {e}") | |
| print("Asegúrate de que WhisperX esté instalado en el entorno embebido") | |
| sys.exit(1) | |
| def download_model(model_name, device="cpu"): | |
| """Descargar un modelo específico de Whisper""" | |
| try: | |
| print(f"📥 Descargando modelo: {model_name}") | |
| # Cargar modelo (esto fuerza la descarga si no existe) | |
| model = whisperx.load_model( | |
| model_name, | |
| device=device, | |
| compute_type="int8" # Usar menos memoria durante descarga | |
| ) | |
| print(f"✅ Modelo {model_name} descargado exitosamente") | |
| # Limpiar memoria | |
| del model | |
| import gc | |
| gc.collect() | |
| return True | |
| except Exception as e: | |
| print(f"❌ Error descargando {model_name}: {e}") | |
| return False | |
| def download_alignment_models(): | |
| """Descargar modelos de alineación para idiomas principales""" | |
| languages = ["es", "en", "fr", "de", "it", "pt"] | |
| for lang in languages: | |
| try: | |
| print(f"📥 Descargando modelo de alineación para: {lang}") | |
| alignment_model, metadata = whisperx.load_align_model( | |
| language_code=lang, | |
| device="cpu" | |
| ) | |
| print(f"✅ Modelo de alineación {lang} descargado") | |
| # Limpiar memoria | |
| del alignment_model | |
| import gc | |
| gc.collect() | |
| except Exception as e: | |
| print(f"⚠️ Error descargando alineación {lang}: {e}") | |
| def main(): | |
| """Función principal para descargar todos los modelos""" | |
| print("🚀 Iniciando descarga de modelos WhisperX para uso offline") | |
| print("=" * 60) | |
| # Lista de modelos a descargar (de menor a mayor tamaño) | |
| models = [ | |
| "tiny", # ~39 MB | |
| "base", # ~74 MB | |
| "small", # ~244 MB | |
| "medium", # ~769 MB | |
| "large-v2", # ~1550 MB | |
| "large-v3" # ~1550 MB | |
| ] | |
| successful_downloads = 0 | |
| for model in models: | |
| print(f"\n📦 Procesando modelo: {model}") | |
| if download_model(model): | |
| successful_downloads += 1 | |
| else: | |
| print(f"⚠️ Saltando modelo {model} por error") | |
| print(f"\n🌐 Descargando modelos de alineación...") | |
| download_alignment_models() | |
| print("\n" + "=" * 60) | |
| print(f"✅ Descarga completada: {successful_downloads}/{len(models)} modelos") | |
| if successful_downloads > 0: | |
| print("🎉 ¡Sistema listo para uso offline!") | |
| print("📁 Los modelos están guardados en las carpetas app_cache/") | |
| else: | |
| print("❌ No se pudo descargar ningún modelo") | |
| return 1 | |
| return 0 | |
| if __name__ == "__main__": | |
| try: | |
| exit_code = main() | |
| sys.exit(exit_code) | |
| except KeyboardInterrupt: | |
| print("\n⚠️ Descarga interrumpida por el usuario") | |
| sys.exit(1) | |
| except Exception as e: | |
| print(f"\n❌ Error inesperado: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| sys.exit(1) |