Update app.py
Browse files
app.py
CHANGED
|
@@ -16,44 +16,19 @@ st.set_page_config(
|
|
| 16 |
)
|
| 17 |
|
| 18 |
# --- IDENTIDAD CORPORATIVA, ESTILOS Y SCRIPTS ---
|
| 19 |
-
|
| 20 |
-
# 1. Logo de la empresa en Base64
|
| 21 |
-
# Asegúrate de que esta variable contiene tu logo en Base64
|
| 22 |
logo_base64 = "..." # Pega aquí el código base64 de tu logo
|
| 23 |
-
|
| 24 |
-
# 2. Estilos y el Script de copiado
|
| 25 |
st.markdown(f"""
|
| 26 |
<style>
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
--silver: #C1C7CA;
|
| 32 |
-
--white: #FFFFFF;
|
| 33 |
-
}}
|
| 34 |
-
.main {{ background-color: #F0F2F6; color: var(--charcoal); }}
|
| 35 |
-
.stApp {{ max-width: 800px; margin: auto; }}
|
| 36 |
-
.stButton>button {{
|
| 37 |
-
width: 100%; background-color: var(--gunmetal); color: var(--white);
|
| 38 |
-
border: none; padding: 12px; border-radius: 8px; font-size: 16px; font-weight: bold;
|
| 39 |
-
}}
|
| 40 |
.stButton>button:hover {{ background-color: var(--charcoal); color: var(--white); }}
|
| 41 |
-
.logo-container {{
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
margin-bottom: 2rem;
|
| 45 |
-
}}
|
| 46 |
-
.logo-container img {{ width: 200px; }}
|
| 47 |
-
.stProgress > div > div > div > div {{ background-color: var(--cambridge-blue); }}
|
| 48 |
-
|
| 49 |
-
.copy-btn {{
|
| 50 |
-
background-color: var(--silver); color: var(--charcoal); border: none;
|
| 51 |
-
padding: 5px 10px; border-radius: 5px; font-size: 14px; font-weight: bold;
|
| 52 |
-
cursor: pointer; transition: background-color 0.2s; float: right;
|
| 53 |
-
}}
|
| 54 |
.copy-btn:hover {{ background-color: var(--cambridge-blue); }}
|
| 55 |
</style>
|
| 56 |
-
|
| 57 |
<script>
|
| 58 |
function copyToClipboard(text, buttonId) {{
|
| 59 |
navigator.clipboard.writeText(text).then(function() {{
|
|
@@ -66,43 +41,25 @@ st.markdown(f"""
|
|
| 66 |
</script>
|
| 67 |
""", unsafe_allow_html=True)
|
| 68 |
|
| 69 |
-
# 3. Mostrar el logo y título
|
| 70 |
if logo_base64 != "...":
|
| 71 |
-
|
| 72 |
-
st.markdown(f"""
|
| 73 |
-
<div class="logo-container">
|
| 74 |
-
<img src="data:image/png;base64,{logo_base64}" alt="Logo de la empresa">
|
| 75 |
-
</div>
|
| 76 |
-
""", unsafe_allow_html=True)
|
| 77 |
|
| 78 |
st.title("Transcriptor Profesional de Vídeos")
|
| 79 |
st.markdown("Pega las URLs de los vídeos de Vimeo (públicos o privados) que quieres transcribir. **Límite de 30 minutos por vídeo.**")
|
| 80 |
|
| 81 |
-
# --- Funciones Principales ---
|
| 82 |
@st.cache_resource(show_spinner="Cargando modelo de IA por primera vez...")
|
| 83 |
def load_whisper_model(model_name):
|
| 84 |
-
|
| 85 |
-
return model
|
| 86 |
|
| 87 |
-
|
| 88 |
-
urls_input = st.text_area(
|
| 89 |
-
"📝 Pega aquí las URLs de Vimeo (una por línea):",
|
| 90 |
-
height=100,
|
| 91 |
-
placeholder="https://vimeo.com/..."
|
| 92 |
-
)
|
| 93 |
col1, col2 = st.columns(2)
|
| 94 |
with col1:
|
| 95 |
whisper_model = st.selectbox("🤖 Modelo de IA:", ["tiny", "base", "small"], index=1, help="El modelo 'base' ofrece un excelente equilibrio entre velocidad y precisión.")
|
| 96 |
with col2:
|
| 97 |
force_language = st.checkbox("🇪🇸 Forzar idioma español", value=True, help="Márcalo si todos los vídeos están en español.")
|
| 98 |
|
| 99 |
-
# --- Lógica de Procesamiento ---
|
| 100 |
if st.button("🚀 Iniciar Transcripción"):
|
| 101 |
urls = [line.strip() for line in urls_input.splitlines() if line.strip()]
|
| 102 |
-
total_urls = len(urls)
|
| 103 |
-
|
| 104 |
-
MAX_DURATION_SECONDS = 1800
|
| 105 |
-
|
| 106 |
if not urls:
|
| 107 |
st.error("❌ Por favor, introduce al menos una URL de Vimeo.")
|
| 108 |
else:
|
|
@@ -114,50 +71,44 @@ if st.button("🚀 Iniciar Transcripción"):
|
|
| 114 |
st.markdown("---")
|
| 115 |
progress_bar = st.progress(0, text="Progreso general")
|
| 116 |
|
| 117 |
-
for i,
|
| 118 |
-
st.markdown(f"### 🎬 Procesando Vídeo {i+1} de {
|
| 119 |
status_placeholder = st.empty()
|
| 120 |
audio_file_path = None
|
| 121 |
video_title = "Título desconocido"
|
| 122 |
|
| 123 |
try:
|
| 124 |
-
#
|
| 125 |
-
|
| 126 |
-
|
|
|
|
|
|
|
| 127 |
ydl_opts = {
|
| 128 |
'format': 'bestaudio/best',
|
| 129 |
'outtmpl': os.path.join(temp_dir, '%(title)s.%(ext)s'),
|
| 130 |
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '64'}],
|
| 131 |
-
'quiet': True,
|
| 132 |
-
'
|
| 133 |
-
'retries': 2,
|
| 134 |
-
'socket_timeout': 30,
|
| 135 |
-
'http_headers': {
|
| 136 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
| 137 |
-
'Authorization': 'bearer 27d734898683a4b1f863c3453b3b4331',
|
| 138 |
-
'Referer': url # El Referer ahora es la URL del vídeo
|
| 139 |
-
},
|
| 140 |
'postprocessor_args': ['-ar', '16000', '-ac', '1']
|
| 141 |
}
|
| 142 |
-
# --- FIN DE LA MODIFICACIÓN CLAVE ---
|
| 143 |
|
| 144 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 145 |
-
status_placeholder.info("1/5 - Obteniendo información del vídeo...")
|
| 146 |
info_dict = ydl.extract_info(url, download=False)
|
| 147 |
video_title = info_dict.get('title', 'Título desconocido')
|
| 148 |
duration = info_dict.get('duration')
|
| 149 |
|
|
|
|
| 150 |
if not duration or duration > MAX_DURATION_SECONDS:
|
| 151 |
-
status_placeholder.warning(f"⚠️ **'{video_title}' omitido:** El vídeo es demasiado largo (límite {int(MAX_DURATION_SECONDS/60)} min)
|
| 152 |
continue
|
| 153 |
|
| 154 |
status_placeholder.info(f"2/5 - Descargando audio de '{video_title}'...")
|
| 155 |
ydl.download([url])
|
| 156 |
|
| 157 |
audio_file_path = next((os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.mp3')), None)
|
| 158 |
-
|
| 159 |
if not audio_file_path or os.path.getsize(audio_file_path) < 1024:
|
| 160 |
-
status_placeholder.error(f"❌ **Error con '{video_title}':** La descarga falló
|
| 161 |
continue
|
| 162 |
|
| 163 |
status_placeholder.info(f"3/5 - Verificando integridad del audio...")
|
|
@@ -168,37 +119,31 @@ if st.button("🚀 Iniciar Transcripción"):
|
|
| 168 |
status_placeholder.error(f"❌ **Error con '{video_title}':** No se pudo decodificar el audio. Se omite.")
|
| 169 |
continue
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
result = model.transcribe(audio_file_path, fp16=False, language="es" if force_language else None, task="transcribe")
|
| 175 |
-
transcription_text = result['text']
|
| 176 |
-
|
| 177 |
-
status_placeholder.success(f"5/5 - ✅ Transcripción de '{video_title}' completada en {time.time() - start_time:.2f} segundos.")
|
| 178 |
-
|
| 179 |
-
with st.expander(f"Ver Transcripción de '{video_title}'", expanded=True):
|
| 180 |
-
js_text = transcription_text.replace("`", "\\`").replace("'", "\\'").replace("\n", "\\n")
|
| 181 |
-
button_id = f"copy-btn-{i}"
|
| 182 |
-
button_html = f'<button id="{button_id}" class="copy-btn" onclick="copyToClipboard(`{js_text}`, \'{button_id}\')">Copiar</button>'
|
| 183 |
-
st.markdown(button_html, unsafe_allow_html=True)
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
except Exception as e:
|
| 191 |
-
|
| 192 |
-
status_placeholder.error(f"❌ **Error con '{video_title}':** La IA no pudo procesar el audio (posiblemente silencio). Se omite.")
|
| 193 |
-
else:
|
| 194 |
-
status_placeholder.error(f"❌ Error inesperado al procesar el vídeo '{video_title}'. Causa: {e}")
|
| 195 |
|
| 196 |
finally:
|
| 197 |
-
if audio_file_path and os.path.exists(audio_file_path):
|
| 198 |
-
os.remove(audio_file_path)
|
| 199 |
|
| 200 |
-
progress_bar.progress((i + 1) /
|
| 201 |
|
|
|
|
| 202 |
st.markdown("---")
|
| 203 |
transcription_files = os.listdir(transcriptions_dir)
|
| 204 |
if transcription_files:
|
|
@@ -208,6 +153,6 @@ if st.button("🚀 Iniciar Transcripción"):
|
|
| 208 |
with open(f"{zip_path}.zip", "rb") as fp:
|
| 209 |
st.download_button(label=f"📥 Descargar {len(transcription_files)} transcripciones (.zip)", data=fp, file_name="transcripciones_vimeo.zip", mime="application/zip")
|
| 210 |
else:
|
| 211 |
-
st.warning("⚠️ No se generó ninguna transcripción.
|
| 212 |
except Exception as e:
|
| 213 |
st.error(f"🚨 Ha ocurrido un error crítico durante la configuración: {e}")
|
|
|
|
| 16 |
)
|
| 17 |
|
| 18 |
# --- IDENTIDAD CORPORATIVA, ESTILOS Y SCRIPTS ---
|
|
|
|
|
|
|
|
|
|
| 19 |
logo_base64 = "..." # Pega aquí el código base64 de tu logo
|
|
|
|
|
|
|
| 20 |
st.markdown(f"""
|
| 21 |
<style>
|
| 22 |
+
/* ... (tu CSS aquí, no es necesario cambiarlo) ... */
|
| 23 |
+
:root {{ --gunmetal: #14323C; --cambridge-blue: #94CBB1; --charcoal: #2F4952; --silver: #C1C7CA; --white: #FFFFFF; }}
|
| 24 |
+
.main {{ background-color: #F0F2F6; color: var(--charcoal); }} .stApp {{ max-width: 800px; margin: auto; }}
|
| 25 |
+
.stButton>button {{ width: 100%; background-color: var(--gunmetal); color: var(--white); border: none; padding: 12px; border-radius: 8px; font-size: 16px; font-weight: bold; }}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
.stButton>button:hover {{ background-color: var(--charcoal); color: var(--white); }}
|
| 27 |
+
.logo-container {{ display: flex; justify-content: center; align-items: center; padding: 10px; background-color: var(--white); border-radius: 10px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); margin-bottom: 2rem; }}
|
| 28 |
+
.logo-container img {{ width: 200px; }} .stProgress > div > div > div > div {{ background-color: var(--cambridge-blue); }}
|
| 29 |
+
.copy-btn {{ background-color: var(--silver); color: var(--charcoal); border: none; padding: 5px 10px; border-radius: 5px; font-size: 14px; font-weight: bold; cursor: pointer; transition: background-color 0.2s; float: right; }}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
.copy-btn:hover {{ background-color: var(--cambridge-blue); }}
|
| 31 |
</style>
|
|
|
|
| 32 |
<script>
|
| 33 |
function copyToClipboard(text, buttonId) {{
|
| 34 |
navigator.clipboard.writeText(text).then(function() {{
|
|
|
|
| 41 |
</script>
|
| 42 |
""", unsafe_allow_html=True)
|
| 43 |
|
|
|
|
| 44 |
if logo_base64 != "...":
|
| 45 |
+
st.markdown(f'<div class="logo-container"><img src="data:image/png;base64,{logo_base64}" alt="Logo de la empresa"></div>', unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
st.title("Transcriptor Profesional de Vídeos")
|
| 48 |
st.markdown("Pega las URLs de los vídeos de Vimeo (públicos o privados) que quieres transcribir. **Límite de 30 minutos por vídeo.**")
|
| 49 |
|
|
|
|
| 50 |
@st.cache_resource(show_spinner="Cargando modelo de IA por primera vez...")
|
| 51 |
def load_whisper_model(model_name):
|
| 52 |
+
return whisper.load_model(model_name, device="cpu")
|
|
|
|
| 53 |
|
| 54 |
+
urls_input = st.text_area("📝 Pega aquí las URLs de Vimeo (una por línea):", height=100, placeholder="https://vimeo.com/...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
col1, col2 = st.columns(2)
|
| 56 |
with col1:
|
| 57 |
whisper_model = st.selectbox("🤖 Modelo de IA:", ["tiny", "base", "small"], index=1, help="El modelo 'base' ofrece un excelente equilibrio entre velocidad y precisión.")
|
| 58 |
with col2:
|
| 59 |
force_language = st.checkbox("🇪🇸 Forzar idioma español", value=True, help="Márcalo si todos los vídeos están en español.")
|
| 60 |
|
|
|
|
| 61 |
if st.button("🚀 Iniciar Transcripción"):
|
| 62 |
urls = [line.strip() for line in urls_input.splitlines() if line.strip()]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
if not urls:
|
| 64 |
st.error("❌ Por favor, introduce al menos una URL de Vimeo.")
|
| 65 |
else:
|
|
|
|
| 71 |
st.markdown("---")
|
| 72 |
progress_bar = st.progress(0, text="Progreso general")
|
| 73 |
|
| 74 |
+
for i, raw_url in enumerate(urls):
|
| 75 |
+
st.markdown(f"### 🎬 Procesando Vídeo {i+1} de {len(urls)}")
|
| 76 |
status_placeholder = st.empty()
|
| 77 |
audio_file_path = None
|
| 78 |
video_title = "Título desconocido"
|
| 79 |
|
| 80 |
try:
|
| 81 |
+
# Limpiar la URL de parámetros innecesarios
|
| 82 |
+
url = raw_url.split('?')[0]
|
| 83 |
+
|
| 84 |
+
# --- LA SOLUCIÓN DEFINITIVA ---
|
| 85 |
+
# Se añade el parámetro 'impersonate' para simular un navegador real
|
| 86 |
ydl_opts = {
|
| 87 |
'format': 'bestaudio/best',
|
| 88 |
'outtmpl': os.path.join(temp_dir, '%(title)s.%(ext)s'),
|
| 89 |
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '64'}],
|
| 90 |
+
'quiet': True, 'no_warnings': True, 'retries': 2, 'socket_timeout': 30,
|
| 91 |
+
'impersonate': 'chrome120', # <-- ¡ESTA ES LA LÍNEA CLAVE!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
'postprocessor_args': ['-ar', '16000', '-ac', '1']
|
| 93 |
}
|
|
|
|
| 94 |
|
| 95 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 96 |
+
status_placeholder.info("1/5 - Obteniendo información del vídeo (modo suplantación)...")
|
| 97 |
info_dict = ydl.extract_info(url, download=False)
|
| 98 |
video_title = info_dict.get('title', 'Título desconocido')
|
| 99 |
duration = info_dict.get('duration')
|
| 100 |
|
| 101 |
+
MAX_DURATION_SECONDS = 1800
|
| 102 |
if not duration or duration > MAX_DURATION_SECONDS:
|
| 103 |
+
status_placeholder.warning(f"⚠️ **'{video_title}' omitido:** El vídeo es demasiado largo (límite {int(MAX_DURATION_SECONDS/60)} min).")
|
| 104 |
continue
|
| 105 |
|
| 106 |
status_placeholder.info(f"2/5 - Descargando audio de '{video_title}'...")
|
| 107 |
ydl.download([url])
|
| 108 |
|
| 109 |
audio_file_path = next((os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.mp3')), None)
|
|
|
|
| 110 |
if not audio_file_path or os.path.getsize(audio_file_path) < 1024:
|
| 111 |
+
status_placeholder.error(f"❌ **Error con '{video_title}':** La descarga falló. Se omite.")
|
| 112 |
continue
|
| 113 |
|
| 114 |
status_placeholder.info(f"3/5 - Verificando integridad del audio...")
|
|
|
|
| 119 |
status_placeholder.error(f"❌ **Error con '{video_title}':** No se pudo decodificar el audio. Se omite.")
|
| 120 |
continue
|
| 121 |
|
| 122 |
+
status_placeholder.info(f"4/5 - Transcribiendo '{video_title}'...")
|
| 123 |
+
result = model.transcribe(audio_file_path, fp16=False, language="es" if force_language else None, task="transcribe")
|
| 124 |
+
transcription_text = result['text']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
+
status_placeholder.success(f"5/5 - ✅ Transcripción de '{video_title}' completada.")
|
| 127 |
+
|
| 128 |
+
with st.expander(f"Ver Transcripción de '{video_title}'", expanded=True):
|
| 129 |
+
# ... (código para mostrar y copiar texto, sin cambios)
|
| 130 |
+
js_text = transcription_text.replace("`", "\\`").replace("'", "\\'").replace("\n", "\\n")
|
| 131 |
+
button_id = f"copy-btn-{i}"
|
| 132 |
+
st.markdown(f'<button id="{button_id}" class="copy-btn" onclick="copyToClipboard(`{js_text}`, \'{button_id}\')">Copiar</button>', unsafe_allow_html=True)
|
| 133 |
+
st.text_area("Transcripción:", value=transcription_text, height=200, key=f"text_{i}", label_visibility="collapsed")
|
| 134 |
+
|
| 135 |
+
clean_title = re.sub(r'[\\/*?:"<>|]', "", video_title)
|
| 136 |
+
with open(os.path.join(transcriptions_dir, f"{clean_title}.txt"), "w", encoding="utf-8") as f: f.write(transcription_text)
|
| 137 |
|
| 138 |
except Exception as e:
|
| 139 |
+
status_placeholder.error(f"❌ Error al procesar '{video_title}'. Causa: {e}")
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
finally:
|
| 142 |
+
if audio_file_path and os.path.exists(audio_file_path): os.remove(audio_file_path)
|
|
|
|
| 143 |
|
| 144 |
+
progress_bar.progress((i + 1) / len(urls), text=f"Progreso: {i+1}/{len(urls)} vídeos completados")
|
| 145 |
|
| 146 |
+
# ... (resto del código para el zip, sin cambios)
|
| 147 |
st.markdown("---")
|
| 148 |
transcription_files = os.listdir(transcriptions_dir)
|
| 149 |
if transcription_files:
|
|
|
|
| 153 |
with open(f"{zip_path}.zip", "rb") as fp:
|
| 154 |
st.download_button(label=f"📥 Descargar {len(transcription_files)} transcripciones (.zip)", data=fp, file_name="transcripciones_vimeo.zip", mime="application/zip")
|
| 155 |
else:
|
| 156 |
+
st.warning("⚠️ No se generó ninguna transcripción.")
|
| 157 |
except Exception as e:
|
| 158 |
st.error(f"🚨 Ha ocurrido un error crítico durante la configuración: {e}")
|