| import streamlit as st |
| import whisper |
| import yt_dlp |
| import os |
| import shutil |
| import tempfile |
| import time |
| import re |
| from pydub import AudioSegment |
| from pydub.exceptions import CouldntDecodeError |
|
|
| |
| st.set_page_config( |
| page_title="Vimeo Transcriptor Pro", |
| layout="centered" |
| ) |
|
|
| |
| logo_base64 = "..." |
| st.markdown(f""" |
| <style> |
| /* ... (tu CSS aquí, no es necesario cambiarlo) ... */ |
| :root {{ --gunmetal: #14323C; --cambridge-blue: #94CBB1; --charcoal: #2F4952; --silver: #C1C7CA; --white: #FFFFFF; }} |
| .main {{ background-color: #F0F2F6; color: var(--charcoal); }} .stApp {{ max-width: 800px; margin: auto; }} |
| .stButton>button {{ width: 100%; background-color: var(--gunmetal); color: var(--white); border: none; padding: 12px; border-radius: 8px; font-size: 16px; font-weight: bold; }} |
| .stButton>button:hover {{ background-color: var(--charcoal); color: var(--white); }} |
| .logo-container {{ display: flex; justify-content: center; align-items: center; padding: 10px; background-color: var(--white); border-radius: 10px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); margin-bottom: 2rem; }} |
| .logo-container img {{ width: 200px; }} .stProgress > div > div > div > div {{ background-color: var(--cambridge-blue); }} |
| .copy-btn {{ background-color: var(--silver); color: var(--charcoal); border: none; padding: 5px 10px; border-radius: 5px; font-size: 14px; font-weight: bold; cursor: pointer; transition: background-color 0.2s; float: right; }} |
| .copy-btn:hover {{ background-color: var(--cambridge-blue); }} |
| </style> |
| <script> |
| function copyToClipboard(text, buttonId) {{ |
| navigator.clipboard.writeText(text).then(function() {{ |
| var button = document.getElementById(buttonId); |
| var originalText = button.innerText; |
| button.innerText = "¡Copiado!"; |
| setTimeout(function() {{ button.innerText = originalText; }}, 2000); |
| }}, function(err) {{ console.error('Error al copiar texto: ', err); }}); |
| }} |
| </script> |
| """, unsafe_allow_html=True) |
|
|
| if logo_base64 != "...": |
| st.markdown(f'<div class="logo-container"><img src="data:image/png;base64,{logo_base64}" alt="Logo de la empresa"></div>', unsafe_allow_html=True) |
|
|
| st.title("Transcriptor Profesional de Vídeos") |
| st.markdown("Pega las URLs de los vídeos de Vimeo (públicos o privados) que quieres transcribir. **Límite de 30 minutos por vídeo.**") |
|
|
| @st.cache_resource(show_spinner="Cargando modelo de IA por primera vez...") |
| def load_whisper_model(model_name): |
| return whisper.load_model(model_name, device="cpu") |
|
|
| urls_input = st.text_area("📝 Pega aquí las URLs de Vimeo (una por línea):", height=100, placeholder="https://vimeo.com/...") |
| col1, col2 = st.columns(2) |
| with col1: |
| whisper_model = st.selectbox("🤖 Modelo de IA:", ["tiny", "base", "small"], index=1, help="El modelo 'base' ofrece un excelente equilibrio entre velocidad y precisión.") |
| with col2: |
| force_language = st.checkbox("🇪🇸 Forzar idioma español", value=True, help="Márcalo si todos los vídeos están en español.") |
|
|
| if st.button("🚀 Iniciar Transcripción"): |
| urls = [line.strip() for line in urls_input.splitlines() if line.strip()] |
| if not urls: |
| st.error("❌ Por favor, introduce al menos una URL de Vimeo.") |
| else: |
| try: |
| model = load_whisper_model(whisper_model) |
| with tempfile.TemporaryDirectory() as temp_dir: |
| transcriptions_dir = os.path.join(temp_dir, "transcripciones") |
| os.makedirs(transcriptions_dir) |
| st.markdown("---") |
| progress_bar = st.progress(0, text="Progreso general") |
| |
| for i, raw_url in enumerate(urls): |
| st.markdown(f"### 🎬 Procesando Vídeo {i+1} de {len(urls)}") |
| status_placeholder = st.empty() |
| audio_file_path = None |
| video_title = "Título desconocido" |
| |
| try: |
| |
| url = raw_url.split('?')[0] |
|
|
| |
| |
| ydl_opts = { |
| 'format': 'bestaudio/best', |
| 'outtmpl': os.path.join(temp_dir, '%(title)s.%(ext)s'), |
| 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '64'}], |
| 'quiet': True, 'no_warnings': True, 'retries': 2, 'socket_timeout': 30, |
| 'impersonate': 'chrome120', |
| 'postprocessor_args': ['-ar', '16000', '-ac', '1'] |
| } |
| |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
| status_placeholder.info("1/5 - Obteniendo información del vídeo (modo suplantación)...") |
| info_dict = ydl.extract_info(url, download=False) |
| video_title = info_dict.get('title', 'Título desconocido') |
| duration = info_dict.get('duration') |
|
|
| MAX_DURATION_SECONDS = 1800 |
| if not duration or duration > MAX_DURATION_SECONDS: |
| status_placeholder.warning(f"⚠️ **'{video_title}' omitido:** El vídeo es demasiado largo (límite {int(MAX_DURATION_SECONDS/60)} min).") |
| continue |
|
|
| status_placeholder.info(f"2/5 - Descargando audio de '{video_title}'...") |
| ydl.download([url]) |
| |
| audio_file_path = next((os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.mp3')), None) |
| if not audio_file_path or os.path.getsize(audio_file_path) < 1024: |
| status_placeholder.error(f"❌ **Error con '{video_title}':** La descarga falló. Se omite.") |
| continue |
|
|
| status_placeholder.info(f"3/5 - Verificando integridad del audio...") |
| try: |
| audio = AudioSegment.from_mp3(audio_file_path) |
| if len(audio) == 0: raise ValueError("Duración cero.") |
| except (CouldntDecodeError, ValueError): |
| status_placeholder.error(f"❌ **Error con '{video_title}':** No se pudo decodificar el audio. Se omite.") |
| continue |
|
|
| status_placeholder.info(f"4/5 - Transcribiendo '{video_title}'...") |
| result = model.transcribe(audio_file_path, fp16=False, language="es" if force_language else None, task="transcribe") |
| transcription_text = result['text'] |
| |
| status_placeholder.success(f"5/5 - ✅ Transcripción de '{video_title}' completada.") |
| |
| with st.expander(f"Ver Transcripción de '{video_title}'", expanded=True): |
| |
| js_text = transcription_text.replace("`", "\\`").replace("'", "\\'").replace("\n", "\\n") |
| button_id = f"copy-btn-{i}" |
| st.markdown(f'<button id="{button_id}" class="copy-btn" onclick="copyToClipboard(`{js_text}`, \'{button_id}\')">Copiar</button>', unsafe_allow_html=True) |
| st.text_area("Transcripción:", value=transcription_text, height=200, key=f"text_{i}", label_visibility="collapsed") |
| |
| clean_title = re.sub(r'[\\/*?:"<>|]', "", video_title) |
| with open(os.path.join(transcriptions_dir, f"{clean_title}.txt"), "w", encoding="utf-8") as f: f.write(transcription_text) |
| |
| except Exception as e: |
| status_placeholder.error(f"❌ Error al procesar '{video_title}'. Causa: {e}") |
| |
| finally: |
| if audio_file_path and os.path.exists(audio_file_path): os.remove(audio_file_path) |
| |
| progress_bar.progress((i + 1) / len(urls), text=f"Progreso: {i+1}/{len(urls)} vídeos completados") |
|
|
| |
| st.markdown("---") |
| transcription_files = os.listdir(transcriptions_dir) |
| if transcription_files: |
| st.success("🎉 ¡Proceso completado!") |
| zip_path = os.path.join(temp_dir, "transcripciones_vimeo") |
| shutil.make_archive(zip_path, 'zip', transcriptions_dir) |
| with open(f"{zip_path}.zip", "rb") as fp: |
| st.download_button(label=f"📥 Descargar {len(transcription_files)} transcripciones (.zip)", data=fp, file_name="transcripciones_vimeo.zip", mime="application/zip") |
| else: |
| st.warning("⚠️ No se generó ninguna transcripción.") |
| except Exception as e: |
| st.error(f"🚨 Ha ocurrido un error crítico durante la configuración: {e}") |