Spaces:

joseluisthepower
/

test33_transcriptor

Sleeping

App Files Files Community

joseluisthepower commited on Aug 1, 2025

Commit

d6698e2

verified ·

1 Parent(s): 8c61f1a

implementacion mejoras claude. antes funcionaba

Browse files

Files changed (1) hide show

app.py +323 -85

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import moviepy.editor as mp
 import langdetect
 import uuid
 # --- CONFIGURACIÓN INICIAL ---
 print("Starting the program...")
@@ -20,6 +22,64 @@ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float
 model = model.eval()
 print("Model successfully loaded.")
 # --- FUNCIONES AUXILIARES ---
 def generate_unique_filename(extension):
     return f"{uuid.uuid4()}{extension}"
@@ -33,150 +93,328 @@ def cleanup_files(*files):
             except OSError as e:
                 print(f"Error removing file {file}: {e}")
-# --- LÓGICA PRINCIPAL DE PROCESAMIENTO ---
-def download_youtube_audio(url):
-    print(f"Downloading audio from YouTube: {url}")
-    # Usar un nombre de archivo temporal sin la extensión final en outtmpl
     temp_filename = generate_unique_filename("")
     output_path = f"{temp_filename}.wav"
-    ydl_opts = {
-        'format': 'bestaudio/best',
-        'postprocessors': [{
-            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'wav',
-        }],
-        'outtmpl': temp_filename, # yt-dlp añadirá la extensión
-        'keepvideo': False,
-    }
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-        ydl.download([url])
-    if not os.path.exists(output_path):
-        raise FileNotFoundError(f"Error: Expected file {output_path} was not found after download.")
-    print(f"Audio download completed. File saved at: {output_path}")
-    return output_path
-def transcribe_audio(file_path):
     print(f"Starting transcription of file: {file_path}")
     temp_audio = None
     original_file_to_clean = file_path
     try:
         if not file_path.endswith('.wav'):
-            print("Non-WAV file detected. Extracting audio...")
             video = mp.VideoFileClip(file_path)
             temp_audio = generate_unique_filename(".wav")
-            video.audio.write_audiofile(temp_audio)
-            file_path = temp_audio # Usar el archivo de audio extraído para la transcripción
         output_file = generate_unique_filename(".json")
         command = [
             "insanely-fast-whisper",
             "--file-name", file_path,
-            "--device-id", "cpu",  # Configurado para CPU
             "--model-name", "openai/whisper-large-v3",
             "--task", "transcribe",
             "--timestamp", "chunk",
-            "--transcript-path", output_file
         ]
-        print(f"Executing command: {' '.join(command)}")
-        subprocess.run(command, check=True, capture_output=True, text=True)
-        print(f"Reading transcription file: {output_file}")
-        with open(output_file, "r") as f:
             transcription_data = json.load(f)
-        result_text = transcription_data.get("text", " ".join([chunk["text"] for chunk in transcription_data.get("chunks", [])]))
-        print("Transcription completed.")
         cleanup_files(output_file)
-        return result_text
     except Exception as e:
-        print(f"An error occurred during transcription: {e}")
-        raise # Vuelve a lanzar la excepción para que sea manejada arriba
     finally:
-        # Limpieza de archivos temporales
         if temp_audio:
             cleanup_files(temp_audio)
-        # Si el archivo original era una subida, Gradio lo borra. Si era de YT, lo borramos nosotros.
         if original_file_to_clean != file_path:
-             cleanup_files(original_file_to_clean)
 def generate_summary_stream(transcription):
     if not transcription or len(transcription.strip()) < 20:
         return "Transcription is too short to summarize."
-    print("Starting summary generation...")
-    detected_language = langdetect.detect(transcription)
-    prompt = f"""Summarize the following video transcription in 150-300 words. The summary should be in the same language as the transcription ({detected_language}). Please capture the main points and key ideas of the text:
-    {transcription[:20000]}..."""
-    response, _ = model.chat(tokenizer, prompt, history=[])
-    print("Summary generation completed.")
-    return response
-# --- FUNCIONES DE INTERFAZ PARA GRADIO ---
-def process_youtube_url(url):
-    if not url:
-        return "Please enter a YouTube URL.", ""
-    print(f"Processing YouTube URL: {url}")
     audio_file = None
     try:
-        audio_file = download_youtube_audio(url)
-        transcription = transcribe_audio(audio_file)
-        return transcription, ""
     except Exception as e:
-        print(f"Error processing YouTube: {e}")
-        return f"Error processing YouTube: {str(e)}", ""
     finally:
-        cleanup_files(audio_file)
-def process_uploaded_video(video_path):
     if video_path is None:
         return "Please upload a video file first.", ""
-    print(f"Processing uploaded video at: {video_path}")
     try:
-        transcription = transcribe_audio(video_path)
-        return transcription, ""
     except Exception as e:
-        print(f"Error processing video: {e}")
-        return f"Error processing video: {str(e)}", ""
-# --- CONSTRUCCIÓN DE LA INTERFAZ DE GRADIO ---
-print("Setting up Gradio interface...")
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🎥 Video Transcription and Smart Summary")
-    gr.Markdown("Upload a video or provide a YouTube link to get a transcription and AI-generated summary.")
     with gr.Tabs():
-        with gr.TabItem("📤 Video Upload"):
-            video_input = gr.Video(label="Upload Video")
-            video_button = gr.Button("🚀 Process Video", variant="primary")
-        with gr.TabItem("🔗 YouTube Link"):
-            url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
-            url_button = gr.Button("🚀 Process URL", variant="primary")
     with gr.Row():
-        transcription_output = gr.Textbox(label="📝 Transcription", lines=10, interactive=True)
-        summary_output = gr.Textbox(label="📊 Summary", lines=10)
-    summary_button = gr.Button("📝 Generate Summary", variant="secondary")
-    # Conexiones de la UI
-    video_button.click(fn=process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
-    url_button.click(fn=process_youtube_url, inputs=[url_input], outputs=[transcription_output, summary_output])
-    summary_button.click(fn=generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
-print("Launching Gradio interface...")
-# Configuración de launch() para ser compatible con Docker y Hugging Face Spaces
-demo.launch(server_name="0.0.0.0", server_port=7860)

 import moviepy.editor as mp
 import langdetect
 import uuid
+import time
+import random
 # --- CONFIGURACIÓN INICIAL ---
 print("Starting the program...")
 model = model.eval()
 print("Model successfully loaded.")
+# --- CONFIGURACIÓN AVANZADA DE YT-DLP PARA VIMEO ---
+def get_enhanced_ydl_opts():
+    """
+    Configuración optimizada para evitar bloqueos de Vimeo
+    """
+    return {
+        'format': 'bestaudio/best',
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'wav',
+        }],
+        'keepvideo': False,
+        # === CONFIGURACIONES ANTI-BLOQUEO ===
+        # User Agent realista (Chrome más reciente)
+        'http_headers': {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+            'Accept-Language': 'en-US,en;q=0.9,es;q=0.8',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'DNT': '1',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
+        },
+        # Rate limiting y sleep para parecer humano
+        'sleep_interval': random.uniform(2, 5),  # Espera aleatoria entre 2-5 segundos
+        'max_sleep_interval': 8,
+        'sleep_interval_requests': random.uniform(0.5, 2),  # Entre requests
+        # Configuraciones de red
+        'socket_timeout': 60,
+        'retries': 5,
+        'fragment_retries': 10,
+        'retry_sleep_functions': {'http': lambda n: 2 ** n + random.uniform(0, 1)},
+        # Bypass de restricciones geográficas
+        'geo_bypass': True,
+        'geo_bypass_country': 'US',
+        # Configuraciones específicas para Vimeo
+        'extractor_args': {
+            'vimeo': {
+                'client': 'web',  # Usar cliente web en lugar de android/ios
+                'original_format_policy': 'auto',  # Política automática para formatos originales
+            }
+        },
+        # Opciones adicionales para estabilidad
+        'no_warnings': False,
+        'ignoreerrors': False,
+        'abort_on_unavailable_fragments': False,
+        'keep_fragments': False,
+        # Impersonación de navegador (si está disponible)
+        'impersonate': 'chrome',  # Impersonar Chrome
+    }
 # --- FUNCIONES AUXILIARES ---
 def generate_unique_filename(extension):
     return f"{uuid.uuid4()}{extension}"
             except OSError as e:
                 print(f"Error removing file {file}: {e}")
+def human_like_delay():
+    """Simula comportamiento humano con delays aleatorios"""
+    delay = random.uniform(1, 3)
+    print(f"Waiting {delay:.1f} seconds...")
+    time.sleep(delay)
+# --- LÓGICA PRINCIPAL DE PROCESAMIENTO MEJORADA ---
+def download_video_audio_enhanced(url):
+    """
+    Función mejorada para descargar audio de videos con anti-bloqueo
+    """
+    print(f"Downloading audio from: {url}")
     temp_filename = generate_unique_filename("")
     output_path = f"{temp_filename}.wav"
+    # Delay inicial para parecer humano
+    human_like_delay()
+    # Configuración optimizada
+    ydl_opts = get_enhanced_ydl_opts()
+    ydl_opts['outtmpl'] = temp_filename
+    max_retries = 3
+    for attempt in range(max_retries):
+        try:
+            print(f"Attempt {attempt + 1}/{max_retries}")
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                # Extraer información primero sin descargar
+                print("Extracting video information...")
+                info = ydl.extract_info(url, download=False)
+                # Verificar si el video está disponible
+                if not info:
+                    raise Exception("Could not extract video information")
+                print(f"Video found: {info.get('title', 'Unknown')}")
+                # Delay adicional antes de la descarga
+                human_like_delay()
+                # Proceder con la descarga
+                print("Starting download...")
+                ydl.download([url])
+            if os.path.exists(output_path):
+                print(f"Download completed successfully: {output_path}")
+                return output_path
+            else:
+                raise FileNotFoundError(f"Expected file {output_path} was not found")
+        except Exception as e:
+            print(f"Attempt {attempt + 1} failed: {str(e)}")
+            if attempt < max_retries - 1:
+                # Delay exponencial con jitter entre reintentos
+                delay = (2 ** attempt) + random.uniform(1, 3)
+                print(f"Retrying in {delay:.1f} seconds...")
+                time.sleep(delay)
+            else:
+                # Todos los intentos fallaron
+                raise Exception(f"Failed to download after {max_retries} attempts: {str(e)}")
+def transcribe_audio_enhanced(file_path):
+    """Función mejorada de transcripción con mejor manejo de errores"""
     print(f"Starting transcription of file: {file_path}")
     temp_audio = None
     original_file_to_clean = file_path
     try:
+        # Convertir a WAV si es necesario
         if not file_path.endswith('.wav'):
+            print("Non-WAV file detected. Converting...")
             video = mp.VideoFileClip(file_path)
             temp_audio = generate_unique_filename(".wav")
+            video.audio.write_audiofile(temp_audio, verbose=False, logger=None)
+            video.close()  # Cerrar explícitamente
+            file_path = temp_audio
         output_file = generate_unique_filename(".json")
+        # Comando mejorado de Whisper
         command = [
             "insanely-fast-whisper",
             "--file-name", file_path,
+            "--device-id", "cpu",
             "--model-name", "openai/whisper-large-v3",
             "--task", "transcribe",
             "--timestamp", "chunk",
+            "--transcript-path", output_file,
+            "--batch-size", "4",  # Reducir batch size para evitar OOM
         ]
+        print(f"Executing transcription command...")
+        result = subprocess.run(
+            command,
+            check=True,
+            capture_output=True,
+            text=True,
+            timeout=600  # Timeout de 10 minutos
+        )
+        # Leer resultado
+        if not os.path.exists(output_file):
+            raise FileNotFoundError("Transcription output file not found")
+        with open(output_file, "r", encoding='utf-8') as f:
             transcription_data = json.load(f)
+        result_text = transcription_data.get("text", "")
+        if not result_text:
+            # Fallback: concatenar chunks
+            chunks = transcription_data.get("chunks", [])
+            result_text = " ".join([chunk.get("text", "") for chunk in chunks])
+        print("Transcription completed successfully.")
         cleanup_files(output_file)
+        return result_text.strip()
+    except subprocess.TimeoutExpired:
+        print("Transcription timed out")
+        raise Exception("Transcription process timed out")
     except Exception as e:
+        print(f"Transcription error: {e}")
+        raise
     finally:
+        # Limpieza mejorada
         if temp_audio:
             cleanup_files(temp_audio)
         if original_file_to_clean != file_path:
+            cleanup_files(original_file_to_clean)
 def generate_summary_stream(transcription):
+    """Función mejorada de generación de resumen"""
     if not transcription or len(transcription.strip()) < 20:
         return "Transcription is too short to summarize."
+    print("Generating summary...")
+    try:
+        detected_language = langdetect.detect(transcription)
+        print(f"Detected language: {detected_language}")
+    except:
+        detected_language = "en"  # Fallback a inglés
+    # Truncar transcripción si es muy larga
+    max_chars = 15000
+    truncated_text = transcription[:max_chars]
+    if len(transcription) > max_chars:
+        truncated_text += "..."
+    prompt = f"""Please create a comprehensive summary of the following video transcription in {detected_language}.
+    The summary should be 150-300 words and capture the main points, key ideas, and important details:
+    {truncated_text}"""
+    try:
+        response, _ = model.chat(tokenizer, prompt, history=[])
+        print("Summary generated successfully.")
+        return response
+    except Exception as e:
+        print(f"Summary generation error: {e}")
+        return f"Error generating summary: {str(e)}"
+# --- FUNCIONES DE INTERFAZ MEJORADAS ---
+def process_video_url_enhanced(url):
+    """Función unificada para procesar URLs de video (YouTube, Vimeo, etc.)"""
+    if not url or not url.strip():
+        return "Please enter a valid video URL.", ""
+    url = url.strip()
+    print(f"Processing video URL: {url}")
+    # Detectar plataforma
+    platform = "Unknown"
+    if "youtube.com" in url or "youtu.be" in url:
+        platform = "YouTube"
+    elif "vimeo.com" in url:
+        platform = "Vimeo"
+    print(f"Detected platform: {platform}")
     audio_file = None
     try:
+        # Usar función mejorada de descarga
+        audio_file = download_video_audio_enhanced(url)
+        transcription = transcribe_audio_enhanced(audio_file)
+        if not transcription:
+            return "No transcription could be generated from this video.", ""
+        return transcription, f"✅ Successfully processed {platform} video"
     except Exception as e:
+        error_msg = str(e)
+        print(f"Error processing {platform} video: {error_msg}")
+        # Mensajes de error más informativos
+        if "HTTP Error 401" in error_msg:
+            return "❌ Access denied. The video might be private or require authentication.", ""
+        elif "HTTP Error 403" in error_msg:
+            return "❌ Video blocked. Try again in a few minutes or check if the video is publicly accessible.", ""
+        elif "HTTP Error 429" in error_msg:
+            return "❌ Rate limited. Please wait a few minutes before trying again.", ""
+        elif "TLS fingerprint" in error_msg:
+            return "❌ Connection blocked by security measures. Try again later.", ""
+        else:
+            return f"❌ Error processing video: {error_msg}", ""
     finally:
+        if audio_file:
+            cleanup_files(audio_file)
+def process_uploaded_video_enhanced(video_path):
+    """Función mejorada para procesar videos subidos"""
     if video_path is None:
         return "Please upload a video file first.", ""
+    print(f"Processing uploaded video: {video_path}")
     try:
+        transcription = transcribe_audio_enhanced(video_path)
+        if not transcription:
+            return "No transcription could be generated from this video.", ""
+        return transcription, "✅ Successfully processed uploaded video"
     except Exception as e:
+        error_msg = str(e)
+        print(f"Error processing uploaded video: {error_msg}")
+        return f"❌ Error processing video: {error_msg}", ""
+# --- CONSTRUCCIÓN DE LA INTERFAZ MEJORADA ---
+print("Setting up enhanced Gradio interface...")
+with gr.Blocks(theme=gr.themes.Soft(), title="🎥 Enhanced Video Transcription") as demo:
+    gr.Markdown("# 🎥 Enhanced Video Transcription & AI Summary")
+    gr.Markdown("""
+    Upload a video or provide a video URL (YouTube, Vimeo, etc.) to get a transcription and AI-generated summary.
+    **✨ Enhanced features:**
+    - 🛡️ Anti-blocking measures for Vimeo and other platforms
+    - 🔄 Automatic retry with exponential backoff
+    - 🌍 Geographic restriction bypass
+    - 🤖 Human-like behavior simulation
+    - 📊 Better error handling and reporting
+    """)
     with gr.Tabs():
+        with gr.TabItem("🔗 Video URL (YouTube, Vimeo, etc.)"):
+            with gr.Row():
+                url_input = gr.Textbox(
+                    label="Video URL",
+                    placeholder="https://www.youtube.com/watch?v=... or https://vimeo.com/...",
+                    scale=4
+                )
+                url_button = gr.Button("🚀 Process URL", variant="primary", scale=1)
+        with gr.TabItem("📤 Upload Video File"):
+            with gr.Row():
+                video_input = gr.Video(label="Upload Video File", scale=4)
+                video_button = gr.Button("🚀 Process Video", variant="primary", scale=1)
+    with gr.Row():
+        with gr.Column():
+            transcription_output = gr.Textbox(
+                label="📝 Transcription",
+                lines=12,
+                interactive=True,
+                placeholder="Transcription will appear here..."
+            )
+        with gr.Column():
+            summary_output = gr.Textbox(
+                label="📊 AI Summary",
+                lines=12,
+                placeholder="AI-generated summary will appear here..."
+            )
     with gr.Row():
+        status_output = gr.Textbox(
+            label="📊 Status",
+            interactive=False,
+            placeholder="Ready to process videos..."
+        )
+        summary_button = gr.Button("📝 Generate Summary", variant="secondary")
+    # Información adicional
+    with gr.Accordion("ℹ️ Usage Tips", open=False):
+        gr.Markdown("""
+        **For best results:**
+        - ✅ Use public videos (private videos may not work)
+        - ✅ If you get blocked, wait 5-10 minutes before trying again
+        - ✅ Vimeo links work best in format: `https://vimeo.com/VIDEO_ID`
+        - ✅ For YouTube, both long and short URLs are supported
+        - ✅ The system includes automatic retries with delays to avoid blocks
+        **Supported formats:** MP4, AVI, MOV, MKV, WEBM, and most video formats
+        """)
+    # Conexiones de eventos
+    url_button.click(
+        fn=process_video_url_enhanced,
+        inputs=[url_input],
+        outputs=[transcription_output, status_output]
+    )
+    video_button.click(
+        fn=process_uploaded_video_enhanced,
+        inputs=[video_input],
+        outputs=[transcription_output, status_output]
+    )
+    summary_button.click(
+        fn=generate_summary_stream,
+        inputs=[transcription_output],
+        outputs=[summary_output]
+    )
+print("Launching enhanced Gradio interface...")
+demo.launch(
+    server_name="0.0.0.0",
+    server_port=7860,
+    show_error=True,
+    share=False
+)