joseluisthepower commited on
Commit
d6698e2
·
verified ·
1 Parent(s): 8c61f1a

implementacion mejoras claude. antes funcionaba

Browse files
Files changed (1) hide show
  1. app.py +323 -85
app.py CHANGED
@@ -8,6 +8,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
8
  import moviepy.editor as mp
9
  import langdetect
10
  import uuid
 
 
11
 
12
  # --- CONFIGURACIÓN INICIAL ---
13
  print("Starting the program...")
@@ -20,6 +22,64 @@ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float
20
  model = model.eval()
21
  print("Model successfully loaded.")
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # --- FUNCIONES AUXILIARES ---
24
  def generate_unique_filename(extension):
25
  return f"{uuid.uuid4()}{extension}"
@@ -33,150 +93,328 @@ def cleanup_files(*files):
33
  except OSError as e:
34
  print(f"Error removing file {file}: {e}")
35
 
36
- # --- LÓGICA PRINCIPAL DE PROCESAMIENTO ---
37
- def download_youtube_audio(url):
38
- print(f"Downloading audio from YouTube: {url}")
39
- # Usar un nombre de archivo temporal sin la extensión final en outtmpl
 
 
 
 
 
 
 
 
40
  temp_filename = generate_unique_filename("")
41
  output_path = f"{temp_filename}.wav"
42
 
43
- ydl_opts = {
44
- 'format': 'bestaudio/best',
45
- 'postprocessors': [{
46
- 'key': 'FFmpegExtractAudio',
47
- 'preferredcodec': 'wav',
48
- }],
49
- 'outtmpl': temp_filename, # yt-dlp añadirá la extensión
50
- 'keepvideo': False,
51
- }
52
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
53
- ydl.download([url])
54
 
55
- if not os.path.exists(output_path):
56
- raise FileNotFoundError(f"Error: Expected file {output_path} was not found after download.")
57
-
58
- print(f"Audio download completed. File saved at: {output_path}")
59
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- def transcribe_audio(file_path):
 
62
  print(f"Starting transcription of file: {file_path}")
63
  temp_audio = None
64
  original_file_to_clean = file_path
65
 
66
  try:
 
67
  if not file_path.endswith('.wav'):
68
- print("Non-WAV file detected. Extracting audio...")
69
  video = mp.VideoFileClip(file_path)
70
  temp_audio = generate_unique_filename(".wav")
71
- video.audio.write_audiofile(temp_audio)
72
- file_path = temp_audio # Usar el archivo de audio extraído para la transcripción
 
73
 
74
  output_file = generate_unique_filename(".json")
 
 
75
  command = [
76
  "insanely-fast-whisper",
77
  "--file-name", file_path,
78
- "--device-id", "cpu", # Configurado para CPU
79
  "--model-name", "openai/whisper-large-v3",
80
  "--task", "transcribe",
81
  "--timestamp", "chunk",
82
- "--transcript-path", output_file
 
83
  ]
84
 
85
- print(f"Executing command: {' '.join(command)}")
86
- subprocess.run(command, check=True, capture_output=True, text=True)
 
 
 
 
 
 
87
 
88
- print(f"Reading transcription file: {output_file}")
89
- with open(output_file, "r") as f:
 
 
 
90
  transcription_data = json.load(f)
91
 
92
- result_text = transcription_data.get("text", " ".join([chunk["text"] for chunk in transcription_data.get("chunks", [])]))
93
- print("Transcription completed.")
 
 
 
94
 
 
95
  cleanup_files(output_file)
96
- return result_text
97
 
 
 
 
98
  except Exception as e:
99
- print(f"An error occurred during transcription: {e}")
100
- raise # Vuelve a lanzar la excepción para que sea manejada arriba
101
-
102
  finally:
103
- # Limpieza de archivos temporales
104
  if temp_audio:
105
  cleanup_files(temp_audio)
106
- # Si el archivo original era una subida, Gradio lo borra. Si era de YT, lo borramos nosotros.
107
  if original_file_to_clean != file_path:
108
- cleanup_files(original_file_to_clean)
109
-
110
 
111
  def generate_summary_stream(transcription):
 
112
  if not transcription or len(transcription.strip()) < 20:
113
  return "Transcription is too short to summarize."
114
 
115
- print("Starting summary generation...")
116
- detected_language = langdetect.detect(transcription)
117
 
118
- prompt = f"""Summarize the following video transcription in 150-300 words. The summary should be in the same language as the transcription ({detected_language}). Please capture the main points and key ideas of the text:
119
- {transcription[:20000]}..."""
 
 
 
120
 
121
- response, _ = model.chat(tokenizer, prompt, history=[])
122
- print("Summary generation completed.")
123
- return response
 
 
 
 
 
124
 
125
- # --- FUNCIONES DE INTERFAZ PARA GRADIO ---
126
- def process_youtube_url(url):
127
- if not url:
128
- return "Please enter a YouTube URL.", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- print(f"Processing YouTube URL: {url}")
131
  audio_file = None
132
  try:
133
- audio_file = download_youtube_audio(url)
134
- transcription = transcribe_audio(audio_file)
135
- return transcription, ""
 
 
 
 
 
 
136
  except Exception as e:
137
- print(f"Error processing YouTube: {e}")
138
- return f"Error processing YouTube: {str(e)}", ""
 
 
 
 
 
 
 
 
 
 
 
 
139
  finally:
140
- cleanup_files(audio_file)
 
141
 
142
- def process_uploaded_video(video_path):
 
143
  if video_path is None:
144
  return "Please upload a video file first.", ""
145
 
146
- print(f"Processing uploaded video at: {video_path}")
 
147
  try:
148
- transcription = transcribe_audio(video_path)
149
- return transcription, ""
 
 
 
 
 
150
  except Exception as e:
151
- print(f"Error processing video: {e}")
152
- return f"Error processing video: {str(e)}", ""
 
 
 
 
153
 
154
- # --- CONSTRUCCIÓN DE LA INTERFAZ DE GRADIO ---
155
- print("Setting up Gradio interface...")
156
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
157
- gr.Markdown("# 🎥 Video Transcription and Smart Summary")
158
- gr.Markdown("Upload a video or provide a YouTube link to get a transcription and AI-generated summary.")
 
 
 
 
 
 
 
159
 
160
  with gr.Tabs():
161
- with gr.TabItem("📤 Video Upload"):
162
- video_input = gr.Video(label="Upload Video")
163
- video_button = gr.Button("🚀 Process Video", variant="primary")
 
 
 
 
 
164
 
165
- with gr.TabItem("🔗 YouTube Link"):
166
- url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
167
- url_button = gr.Button("🚀 Process URL", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  with gr.Row():
170
- transcription_output = gr.Textbox(label="📝 Transcription", lines=10, interactive=True)
171
- summary_output = gr.Textbox(label="📊 Summary", lines=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
- summary_button = gr.Button("📝 Generate Summary", variant="secondary")
 
 
 
 
174
 
175
- # Conexiones de la UI
176
- video_button.click(fn=process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
177
- url_button.click(fn=process_youtube_url, inputs=[url_input], outputs=[transcription_output, summary_output])
178
- summary_button.click(fn=generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
 
179
 
180
- print("Launching Gradio interface...")
181
- # Configuración de launch() para ser compatible con Docker y Hugging Face Spaces
182
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
8
  import moviepy.editor as mp
9
  import langdetect
10
  import uuid
11
+ import time
12
+ import random
13
 
14
  # --- CONFIGURACIÓN INICIAL ---
15
  print("Starting the program...")
 
22
  model = model.eval()
23
  print("Model successfully loaded.")
24
 
25
+ # --- CONFIGURACIÓN AVANZADA DE YT-DLP PARA VIMEO ---
26
+ def get_enhanced_ydl_opts():
27
+ """
28
+ Configuración optimizada para evitar bloqueos de Vimeo
29
+ """
30
+ return {
31
+ 'format': 'bestaudio/best',
32
+ 'postprocessors': [{
33
+ 'key': 'FFmpegExtractAudio',
34
+ 'preferredcodec': 'wav',
35
+ }],
36
+ 'keepvideo': False,
37
+
38
+ # === CONFIGURACIONES ANTI-BLOQUEO ===
39
+ # User Agent realista (Chrome más reciente)
40
+ 'http_headers': {
41
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
42
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
43
+ 'Accept-Language': 'en-US,en;q=0.9,es;q=0.8',
44
+ 'Accept-Encoding': 'gzip, deflate, br',
45
+ 'DNT': '1',
46
+ 'Connection': 'keep-alive',
47
+ 'Upgrade-Insecure-Requests': '1',
48
+ },
49
+
50
+ # Rate limiting y sleep para parecer humano
51
+ 'sleep_interval': random.uniform(2, 5), # Espera aleatoria entre 2-5 segundos
52
+ 'max_sleep_interval': 8,
53
+ 'sleep_interval_requests': random.uniform(0.5, 2), # Entre requests
54
+
55
+ # Configuraciones de red
56
+ 'socket_timeout': 60,
57
+ 'retries': 5,
58
+ 'fragment_retries': 10,
59
+ 'retry_sleep_functions': {'http': lambda n: 2 ** n + random.uniform(0, 1)},
60
+
61
+ # Bypass de restricciones geográficas
62
+ 'geo_bypass': True,
63
+ 'geo_bypass_country': 'US',
64
+
65
+ # Configuraciones específicas para Vimeo
66
+ 'extractor_args': {
67
+ 'vimeo': {
68
+ 'client': 'web', # Usar cliente web en lugar de android/ios
69
+ 'original_format_policy': 'auto', # Política automática para formatos originales
70
+ }
71
+ },
72
+
73
+ # Opciones adicionales para estabilidad
74
+ 'no_warnings': False,
75
+ 'ignoreerrors': False,
76
+ 'abort_on_unavailable_fragments': False,
77
+ 'keep_fragments': False,
78
+
79
+ # Impersonación de navegador (si está disponible)
80
+ 'impersonate': 'chrome', # Impersonar Chrome
81
+ }
82
+
83
  # --- FUNCIONES AUXILIARES ---
84
  def generate_unique_filename(extension):
85
  return f"{uuid.uuid4()}{extension}"
 
93
  except OSError as e:
94
  print(f"Error removing file {file}: {e}")
95
 
96
+ def human_like_delay():
97
+ """Simula comportamiento humano con delays aleatorios"""
98
+ delay = random.uniform(1, 3)
99
+ print(f"Waiting {delay:.1f} seconds...")
100
+ time.sleep(delay)
101
+
102
+ # --- LÓGICA PRINCIPAL DE PROCESAMIENTO MEJORADA ---
103
+ def download_video_audio_enhanced(url):
104
+ """
105
+ Función mejorada para descargar audio de videos con anti-bloqueo
106
+ """
107
+ print(f"Downloading audio from: {url}")
108
  temp_filename = generate_unique_filename("")
109
  output_path = f"{temp_filename}.wav"
110
 
111
+ # Delay inicial para parecer humano
112
+ human_like_delay()
 
 
 
 
 
 
 
 
 
113
 
114
+ # Configuración optimizada
115
+ ydl_opts = get_enhanced_ydl_opts()
116
+ ydl_opts['outtmpl'] = temp_filename
117
+
118
+ max_retries = 3
119
+ for attempt in range(max_retries):
120
+ try:
121
+ print(f"Attempt {attempt + 1}/{max_retries}")
122
+
123
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
124
+ # Extraer información primero sin descargar
125
+ print("Extracting video information...")
126
+ info = ydl.extract_info(url, download=False)
127
+
128
+ # Verificar si el video está disponible
129
+ if not info:
130
+ raise Exception("Could not extract video information")
131
+
132
+ print(f"Video found: {info.get('title', 'Unknown')}")
133
+
134
+ # Delay adicional antes de la descarga
135
+ human_like_delay()
136
+
137
+ # Proceder con la descarga
138
+ print("Starting download...")
139
+ ydl.download([url])
140
+
141
+ if os.path.exists(output_path):
142
+ print(f"Download completed successfully: {output_path}")
143
+ return output_path
144
+ else:
145
+ raise FileNotFoundError(f"Expected file {output_path} was not found")
146
+
147
+ except Exception as e:
148
+ print(f"Attempt {attempt + 1} failed: {str(e)}")
149
+
150
+ if attempt < max_retries - 1:
151
+ # Delay exponencial con jitter entre reintentos
152
+ delay = (2 ** attempt) + random.uniform(1, 3)
153
+ print(f"Retrying in {delay:.1f} seconds...")
154
+ time.sleep(delay)
155
+ else:
156
+ # Todos los intentos fallaron
157
+ raise Exception(f"Failed to download after {max_retries} attempts: {str(e)}")
158
 
159
+ def transcribe_audio_enhanced(file_path):
160
+ """Función mejorada de transcripción con mejor manejo de errores"""
161
  print(f"Starting transcription of file: {file_path}")
162
  temp_audio = None
163
  original_file_to_clean = file_path
164
 
165
  try:
166
+ # Convertir a WAV si es necesario
167
  if not file_path.endswith('.wav'):
168
+ print("Non-WAV file detected. Converting...")
169
  video = mp.VideoFileClip(file_path)
170
  temp_audio = generate_unique_filename(".wav")
171
+ video.audio.write_audiofile(temp_audio, verbose=False, logger=None)
172
+ video.close() # Cerrar explícitamente
173
+ file_path = temp_audio
174
 
175
  output_file = generate_unique_filename(".json")
176
+
177
+ # Comando mejorado de Whisper
178
  command = [
179
  "insanely-fast-whisper",
180
  "--file-name", file_path,
181
+ "--device-id", "cpu",
182
  "--model-name", "openai/whisper-large-v3",
183
  "--task", "transcribe",
184
  "--timestamp", "chunk",
185
+ "--transcript-path", output_file,
186
+ "--batch-size", "4", # Reducir batch size para evitar OOM
187
  ]
188
 
189
+ print(f"Executing transcription command...")
190
+ result = subprocess.run(
191
+ command,
192
+ check=True,
193
+ capture_output=True,
194
+ text=True,
195
+ timeout=600 # Timeout de 10 minutos
196
+ )
197
 
198
+ # Leer resultado
199
+ if not os.path.exists(output_file):
200
+ raise FileNotFoundError("Transcription output file not found")
201
+
202
+ with open(output_file, "r", encoding='utf-8') as f:
203
  transcription_data = json.load(f)
204
 
205
+ result_text = transcription_data.get("text", "")
206
+ if not result_text:
207
+ # Fallback: concatenar chunks
208
+ chunks = transcription_data.get("chunks", [])
209
+ result_text = " ".join([chunk.get("text", "") for chunk in chunks])
210
 
211
+ print("Transcription completed successfully.")
212
  cleanup_files(output_file)
213
+ return result_text.strip()
214
 
215
+ except subprocess.TimeoutExpired:
216
+ print("Transcription timed out")
217
+ raise Exception("Transcription process timed out")
218
  except Exception as e:
219
+ print(f"Transcription error: {e}")
220
+ raise
 
221
  finally:
222
+ # Limpieza mejorada
223
  if temp_audio:
224
  cleanup_files(temp_audio)
 
225
  if original_file_to_clean != file_path:
226
+ cleanup_files(original_file_to_clean)
 
227
 
228
  def generate_summary_stream(transcription):
229
+ """Función mejorada de generación de resumen"""
230
  if not transcription or len(transcription.strip()) < 20:
231
  return "Transcription is too short to summarize."
232
 
233
+ print("Generating summary...")
 
234
 
235
+ try:
236
+ detected_language = langdetect.detect(transcription)
237
+ print(f"Detected language: {detected_language}")
238
+ except:
239
+ detected_language = "en" # Fallback a inglés
240
 
241
+ # Truncar transcripción si es muy larga
242
+ max_chars = 15000
243
+ truncated_text = transcription[:max_chars]
244
+ if len(transcription) > max_chars:
245
+ truncated_text += "..."
246
+
247
+ prompt = f"""Please create a comprehensive summary of the following video transcription in {detected_language}.
248
+ The summary should be 150-300 words and capture the main points, key ideas, and important details:
249
 
250
+ {truncated_text}"""
251
+
252
+ try:
253
+ response, _ = model.chat(tokenizer, prompt, history=[])
254
+ print("Summary generated successfully.")
255
+ return response
256
+ except Exception as e:
257
+ print(f"Summary generation error: {e}")
258
+ return f"Error generating summary: {str(e)}"
259
+
260
+ # --- FUNCIONES DE INTERFAZ MEJORADAS ---
261
+ def process_video_url_enhanced(url):
262
+ """Función unificada para procesar URLs de video (YouTube, Vimeo, etc.)"""
263
+ if not url or not url.strip():
264
+ return "Please enter a valid video URL.", ""
265
+
266
+ url = url.strip()
267
+ print(f"Processing video URL: {url}")
268
+
269
+ # Detectar plataforma
270
+ platform = "Unknown"
271
+ if "youtube.com" in url or "youtu.be" in url:
272
+ platform = "YouTube"
273
+ elif "vimeo.com" in url:
274
+ platform = "Vimeo"
275
+
276
+ print(f"Detected platform: {platform}")
277
 
 
278
  audio_file = None
279
  try:
280
+ # Usar función mejorada de descarga
281
+ audio_file = download_video_audio_enhanced(url)
282
+ transcription = transcribe_audio_enhanced(audio_file)
283
+
284
+ if not transcription:
285
+ return "No transcription could be generated from this video.", ""
286
+
287
+ return transcription, f"✅ Successfully processed {platform} video"
288
+
289
  except Exception as e:
290
+ error_msg = str(e)
291
+ print(f"Error processing {platform} video: {error_msg}")
292
+
293
+ # Mensajes de error más informativos
294
+ if "HTTP Error 401" in error_msg:
295
+ return "❌ Access denied. The video might be private or require authentication.", ""
296
+ elif "HTTP Error 403" in error_msg:
297
+ return "❌ Video blocked. Try again in a few minutes or check if the video is publicly accessible.", ""
298
+ elif "HTTP Error 429" in error_msg:
299
+ return "❌ Rate limited. Please wait a few minutes before trying again.", ""
300
+ elif "TLS fingerprint" in error_msg:
301
+ return "❌ Connection blocked by security measures. Try again later.", ""
302
+ else:
303
+ return f"❌ Error processing video: {error_msg}", ""
304
  finally:
305
+ if audio_file:
306
+ cleanup_files(audio_file)
307
 
308
+ def process_uploaded_video_enhanced(video_path):
309
+ """Función mejorada para procesar videos subidos"""
310
  if video_path is None:
311
  return "Please upload a video file first.", ""
312
 
313
+ print(f"Processing uploaded video: {video_path}")
314
+
315
  try:
316
+ transcription = transcribe_audio_enhanced(video_path)
317
+
318
+ if not transcription:
319
+ return "No transcription could be generated from this video.", ""
320
+
321
+ return transcription, "✅ Successfully processed uploaded video"
322
+
323
  except Exception as e:
324
+ error_msg = str(e)
325
+ print(f"Error processing uploaded video: {error_msg}")
326
+ return f"❌ Error processing video: {error_msg}", ""
327
+
328
+ # --- CONSTRUCCIÓN DE LA INTERFAZ MEJORADA ---
329
+ print("Setting up enhanced Gradio interface...")
330
 
331
+ with gr.Blocks(theme=gr.themes.Soft(), title="🎥 Enhanced Video Transcription") as demo:
332
+ gr.Markdown("# 🎥 Enhanced Video Transcription & AI Summary")
333
+ gr.Markdown("""
334
+ Upload a video or provide a video URL (YouTube, Vimeo, etc.) to get a transcription and AI-generated summary.
335
+
336
+ **✨ Enhanced features:**
337
+ - 🛡️ Anti-blocking measures for Vimeo and other platforms
338
+ - 🔄 Automatic retry with exponential backoff
339
+ - 🌍 Geographic restriction bypass
340
+ - 🤖 Human-like behavior simulation
341
+ - 📊 Better error handling and reporting
342
+ """)
343
 
344
  with gr.Tabs():
345
+ with gr.TabItem("🔗 Video URL (YouTube, Vimeo, etc.)"):
346
+ with gr.Row():
347
+ url_input = gr.Textbox(
348
+ label="Video URL",
349
+ placeholder="https://www.youtube.com/watch?v=... or https://vimeo.com/...",
350
+ scale=4
351
+ )
352
+ url_button = gr.Button("🚀 Process URL", variant="primary", scale=1)
353
 
354
+ with gr.TabItem("📤 Upload Video File"):
355
+ with gr.Row():
356
+ video_input = gr.Video(label="Upload Video File", scale=4)
357
+ video_button = gr.Button("🚀 Process Video", variant="primary", scale=1)
358
+
359
+ with gr.Row():
360
+ with gr.Column():
361
+ transcription_output = gr.Textbox(
362
+ label="📝 Transcription",
363
+ lines=12,
364
+ interactive=True,
365
+ placeholder="Transcription will appear here..."
366
+ )
367
+ with gr.Column():
368
+ summary_output = gr.Textbox(
369
+ label="📊 AI Summary",
370
+ lines=12,
371
+ placeholder="AI-generated summary will appear here..."
372
+ )
373
 
374
  with gr.Row():
375
+ status_output = gr.Textbox(
376
+ label="📊 Status",
377
+ interactive=False,
378
+ placeholder="Ready to process videos..."
379
+ )
380
+ summary_button = gr.Button("📝 Generate Summary", variant="secondary")
381
+
382
+ # Información adicional
383
+ with gr.Accordion("ℹ️ Usage Tips", open=False):
384
+ gr.Markdown("""
385
+ **For best results:**
386
+ - ✅ Use public videos (private videos may not work)
387
+ - ✅ If you get blocked, wait 5-10 minutes before trying again
388
+ - ✅ Vimeo links work best in format: `https://vimeo.com/VIDEO_ID`
389
+ - ✅ For YouTube, both long and short URLs are supported
390
+ - ✅ The system includes automatic retries with delays to avoid blocks
391
+
392
+ **Supported formats:** MP4, AVI, MOV, MKV, WEBM, and most video formats
393
+ """)
394
+
395
+ # Conexiones de eventos
396
+ url_button.click(
397
+ fn=process_video_url_enhanced,
398
+ inputs=[url_input],
399
+ outputs=[transcription_output, status_output]
400
+ )
401
 
402
+ video_button.click(
403
+ fn=process_uploaded_video_enhanced,
404
+ inputs=[video_input],
405
+ outputs=[transcription_output, status_output]
406
+ )
407
 
408
+ summary_button.click(
409
+ fn=generate_summary_stream,
410
+ inputs=[transcription_output],
411
+ outputs=[summary_output]
412
+ )
413
 
414
+ print("Launching enhanced Gradio interface...")
415
+ demo.launch(
416
+ server_name="0.0.0.0",
417
+ server_port=7860,
418
+ show_error=True,
419
+ share=False
420
+ )