Spaces:

joseluisthepower
/

test33_transcriptor

Sleeping

App Files Files Community

joseluisthepower commited on Aug 1, 2025

Commit

652ee18

verified ·

1 Parent(s): aa9793c

prueba nueva app. antes funcionaba.

Browse files

Files changed (1) hide show

app.py +446 -159

app.py CHANGED Viewed

@@ -10,9 +10,21 @@ import langdetect
 import uuid
 import time
 import random
 # --- CONFIGURACIÓN INICIAL ---
-print("Starting the program...")
 # Carga del modelo en CPU
 model_path = "Qwen/Qwen2.5-7B-Instruct"
@@ -20,64 +32,154 @@ print(f"Loading model {model_path}...")
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True)
 model = model.eval()
-print("Model successfully loaded.")
-# --- CONFIGURACIÓN AVANZADA DE YT-DLP PARA VIMEO ---
-def get_enhanced_ydl_opts():
-    """
-    Configuración optimizada para evitar bloqueos de Vimeo
-    """
-    return {
         'format': 'bestaudio/best',
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
             'preferredcodec': 'wav',
         }],
         'keepvideo': False,
-        # === CONFIGURACIONES ANTI-BLOQUEO ===
-        # User Agent realista (Chrome más reciente)
         'http_headers': {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
-            'Accept-Language': 'en-US,en;q=0.9,es;q=0.8',
             'Accept-Encoding': 'gzip, deflate, br',
             'DNT': '1',
             'Connection': 'keep-alive',
             'Upgrade-Insecure-Requests': '1',
         },
-        # Rate limiting y sleep para parecer humano
-        'sleep_interval': random.uniform(2, 5),  # Espera aleatoria entre 2-5 segundos
-        'max_sleep_interval': 8,
-        'sleep_interval_requests': random.uniform(0.5, 2),  # Entre requests
-        # Configuraciones de red
-        'socket_timeout': 60,
-        'retries': 5,
-        'fragment_retries': 10,
-        'retry_sleep_functions': {'http': lambda n: 2 ** n + random.uniform(0, 1)},
-        # Bypass de restricciones geográficas
-        'geo_bypass': True,
-        'geo_bypass_country': 'US',
         # Configuraciones específicas para Vimeo
         'extractor_args': {
             'vimeo': {
-                'client': 'web',  # Usar cliente web en lugar de android/ios
-                'original_format_policy': 'auto',  # Política automática para formatos originales
             }
         },
-        # Opciones adicionales para estabilidad
         'no_warnings': False,
         'ignoreerrors': False,
         'abort_on_unavailable_fragments': False,
-        'keep_fragments': False,
-        # Impersonación de navegador (si está disponible)
-        'impersonate': 'chrome',  # Impersonar Chrome
     }
 # --- FUNCIONES AUXILIARES ---
@@ -89,92 +191,189 @@ def cleanup_files(*files):
         if file and os.path.exists(file):
             try:
                 os.remove(file)
-                print(f"Removed file: {file}")
             except OSError as e:
-                print(f"Error removing file {file}: {e}")
-def human_like_delay():
     """Simula comportamiento humano con delays aleatorios"""
-    delay = random.uniform(1, 3)
-    print(f"Waiting {delay:.1f} seconds...")
     time.sleep(delay)
-# --- LÓGICA PRINCIPAL DE PROCESAMIENTO MEJORADA ---
-def download_video_audio_enhanced(url):
     """
-    Función mejorada para descargar audio de videos con anti-bloqueo
     """
-    print(f"Downloading audio from: {url}")
     temp_filename = generate_unique_filename("")
     output_path = f"{temp_filename}.wav"
-    # Delay inicial para parecer humano
-    human_like_delay()
-    # Configuración optimizada
-    ydl_opts = get_enhanced_ydl_opts()
-    ydl_opts['outtmpl'] = temp_filename
-    max_retries = 3
-    for attempt in range(max_retries):
-        try:
-            print(f"Attempt {attempt + 1}/{max_retries}")
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                # Extraer información primero sin descargar
-                print("Extracting video information...")
-                info = ydl.extract_info(url, download=False)
-                # Verificar si el video está disponible
-                if not info:
-                    raise Exception("Could not extract video information")
-                print(f"Video found: {info.get('title', 'Unknown')}")
-                # Delay adicional antes de la descarga
-                human_like_delay()
-                # Proceder con la descarga
-                print("Starting download...")
-                ydl.download([url])
-            if os.path.exists(output_path):
-                print(f"Download completed successfully: {output_path}")
-                return output_path
-            else:
-                raise FileNotFoundError(f"Expected file {output_path} was not found")
-        except Exception as e:
-            print(f"Attempt {attempt + 1} failed: {str(e)}")
-            if attempt < max_retries - 1:
-                # Delay exponencial con jitter entre reintentos
-                delay = (2 ** attempt) + random.uniform(1, 3)
-                print(f"Retrying in {delay:.1f} seconds...")
-                time.sleep(delay)
-            else:
-                # Todos los intentos fallaron
-                raise Exception(f"Failed to download after {max_retries} attempts: {str(e)}")
 def transcribe_audio_enhanced(file_path):
     """Función mejorada de transcripción con mejor manejo de errores"""
-    print(f"Starting transcription of file: {file_path}")
     temp_audio = None
     original_file_to_clean = file_path
     try:
         # Convertir a WAV si es necesario
         if not file_path.endswith('.wav'):
-            print("Non-WAV file detected. Converting...")
             video = mp.VideoFileClip(file_path)
             temp_audio = generate_unique_filename(".wav")
             video.audio.write_audiofile(temp_audio, verbose=False, logger=None)
-            video.close()  # Cerrar explícitamente
             file_path = temp_audio
         output_file = generate_unique_filename(".json")
-        # Comando mejorado de Whisper
         command = [
             "insanely-fast-whisper",
             "--file-name", file_path,
@@ -183,66 +382,78 @@ def transcribe_audio_enhanced(file_path):
             "--task", "transcribe",
             "--timestamp", "chunk",
             "--transcript-path", output_file,
-            "--batch-size", "4",  # Reducir batch size para evitar OOM
         ]
-        print(f"Executing transcription command...")
         result = subprocess.run(
             command,
             check=True,
             capture_output=True,
             text=True,
-            timeout=600  # Timeout de 10 minutos
         )
-        # Leer resultado
         if not os.path.exists(output_file):
             raise FileNotFoundError("Transcription output file not found")
         with open(output_file, "r", encoding='utf-8') as f:
             transcription_data = json.load(f)
-        result_text = transcription_data.get("text", "")
         if not result_text:
-            # Fallback: concatenar chunks
             chunks = transcription_data.get("chunks", [])
-            result_text = " ".join([chunk.get("text", "") for chunk in chunks])
-        print("Transcription completed successfully.")
         cleanup_files(output_file)
-        return result_text.strip()
     except subprocess.TimeoutExpired:
-        print("Transcription timed out")
-        raise Exception("Transcription process timed out")
     except Exception as e:
-        print(f"Transcription error: {e}")
         raise
     finally:
-        # Limpieza mejorada
-        if temp_audio:
             cleanup_files(temp_audio)
-        if original_file_to_clean != file_path:
             cleanup_files(original_file_to_clean)
 def generate_summary_stream(transcription):
     """Función mejorada de generación de resumen"""
     if not transcription or len(transcription.strip()) < 20:
-        return "Transcription is too short to summarize."
-    print("Generating summary...")
     try:
         detected_language = langdetect.detect(transcription)
-        print(f"Detected language: {detected_language}")
     except:
-        detected_language = "en"  # Fallback a inglés
     # Truncar transcripción si es muy larga
-    max_chars = 15000
     truncated_text = transcription[:max_chars]
     if len(transcription) > max_chars:
         truncated_text += "..."
     prompt = f"""Please create a comprehensive summary of the following video transcription in {detected_language}.
     The summary should be 150-300 words and capture the main points, key ideas, and important details:
@@ -251,20 +462,23 @@ def generate_summary_stream(transcription):
     try:
         response, _ = model.chat(tokenizer, prompt, history=[])
-        print("Summary generated successfully.")
         return response
     except Exception as e:
-        print(f"Summary generation error: {e}")
-        return f"Error generating summary: {str(e)}"
 # --- FUNCIONES DE INTERFAZ MEJORADAS ---
 def process_video_url_enhanced(url):
-    """Función unificada para procesar URLs de video (YouTube, Vimeo, etc.)"""
     if not url or not url.strip():
-        return "Please enter a valid video URL.", ""
     url = url.strip()
-    print(f"Processing video URL: {url}")
     # Detectar plataforma
     platform = "Unknown"
@@ -273,72 +487,114 @@ def process_video_url_enhanced(url):
     elif "vimeo.com" in url:
         platform = "Vimeo"
-    print(f"Detected platform: {platform}")
     audio_file = None
     try:
-        # Usar función mejorada de descarga
-        audio_file = download_video_audio_enhanced(url)
         transcription = transcribe_audio_enhanced(audio_file)
         if not transcription:
-            return "No transcription could be generated from this video.", ""
-        return transcription, f"✅ Successfully processed {platform} video"
     except Exception as e:
         error_msg = str(e)
-        print(f"Error processing {platform} video: {error_msg}")
-        # Mensajes de error más informativos
         if "HTTP Error 401" in error_msg:
-            return "❌ Access denied. The video might be private or require authentication.", ""
-        elif "HTTP Error 403" in error_msg:
-            return "❌ Video blocked. Try again in a few minutes or check if the video is publicly accessible.", ""
         elif "HTTP Error 429" in error_msg:
-            return "❌ Rate limited. Please wait a few minutes before trying again.", ""
         elif "TLS fingerprint" in error_msg:
-            return "❌ Connection blocked by security measures. Try again later.", ""
         else:
-            return f"❌ Error processing video: {error_msg}", ""
     finally:
-        if audio_file:
             cleanup_files(audio_file)
 def process_uploaded_video_enhanced(video_path):
     """Función mejorada para procesar videos subidos"""
     if video_path is None:
-        return "Please upload a video file first.", ""
-    print(f"Processing uploaded video: {video_path}")
     try:
         transcription = transcribe_audio_enhanced(video_path)
         if not transcription:
-            return "No transcription could be generated from this video.", ""
-        return transcription, "✅ Successfully processed uploaded video"
     except Exception as e:
         error_msg = str(e)
-        print(f"Error processing uploaded video: {error_msg}")
-        return f"❌ Error processing video: {error_msg}", ""
 # --- CONSTRUCCIÓN DE LA INTERFAZ MEJORADA ---
-print("Setting up enhanced Gradio interface...")
-with gr.Blocks(theme=gr.themes.Soft(), title="🎥 Enhanced Video Transcription") as demo:
-    gr.Markdown("# 🎥 Enhanced Video Transcription & AI Summary")
-    gr.Markdown("""
-    Upload a video or provide a video URL (YouTube, Vimeo, etc.) to get a transcription and AI-generated summary.
-    **✨ Enhanced features:**
-    - 🛡️ Anti-blocking measures for Vimeo and other platforms
-    - 🔄 Automatic retry with exponential backoff
-    - 🌍 Geographic restriction bypass
-    - 🤖 Human-like behavior simulation
-    - 📊 Better error handling and reporting
     """)
     with gr.Tabs():
@@ -346,7 +602,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="🎥 Enhanced Video Transcription"
             with gr.Row():
                 url_input = gr.Textbox(
                     label="Video URL",
-                    placeholder="https://www.youtube.com/watch?v=... or https://vimeo.com/...",
                     scale=4
                 )
                 url_button = gr.Button("🚀 Process URL", variant="primary", scale=1)
@@ -360,39 +616,67 @@ with gr.Blocks(theme=gr.themes.Soft(), title="🎥 Enhanced Video Transcription"
         with gr.Column():
             transcription_output = gr.Textbox(
                 label="📝 Transcription",
-                lines=12,
                 interactive=True,
                 placeholder="Transcription will appear here..."
             )
         with gr.Column():
             summary_output = gr.Textbox(
                 label="📊 AI Summary",
-                lines=12,
                 placeholder="AI-generated summary will appear here..."
             )
     with gr.Row():
         status_output = gr.Textbox(
-            label="📊 Status",
             interactive=False,
-            placeholder="Ready to process videos..."
         )
         summary_button = gr.Button("📝 Generate Summary", variant="secondary")
-    # Información adicional
-    with gr.Accordion("ℹ️ Usage Tips", open=False):
-        gr.Markdown("""
-        **For best results:**
-        - ✅ Use public videos (private videos may not work)
-        - ✅ If you get blocked, wait 5-10 minutes before trying again
-        - ✅ Vimeo links work best in format: `https://vimeo.com/VIDEO_ID`
-        - ✅ For YouTube, both long and short URLs are supported
-        - ✅ The system includes automatic retries with delays to avoid blocks
-        **Supported formats:** MP4, AVI, MOV, MKV, WEBM, and most video formats
         """)
-    # Conexiones de eventos
     url_button.click(
         fn=process_video_url_enhanced,
         inputs=[url_input],
@@ -411,7 +695,10 @@ with gr.Blocks(theme=gr.themes.Soft(), title="🎥 Enhanced Video Transcription"
         outputs=[summary_output]
     )
-print("Launching enhanced Gradio interface...")
 demo.launch(
     server_name="0.0.0.0",
     server_port=7860,

 import uuid
 import time
 import random
+import re
+from urllib.parse import urlparse
 # --- CONFIGURACIÓN INICIAL ---
+print("Starting the enhanced program...")
+print("Checking dependencies...")
+# Verificar si curl-cffi está disponible
+try:
+    import curl_cffi
+    CURL_CFFI_AVAILABLE = True
+    print("✅ curl-cffi is available - Advanced impersonation enabled")
+except ImportError:
+    CURL_CFFI_AVAILABLE = False
+    print("⚠️ curl-cffi not available - Using fallback methods")
 # Carga del modelo en CPU
 model_path = "Qwen/Qwen2.5-7B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True)
 model = model.eval()
+print("✅ Model successfully loaded.")
+# --- CONFIGURACIONES ESPECIALIZADAS POR MÉTODO ---
+def get_vimeo_player_url(vimeo_url):
+    """Convierte URLs de Vimeo a formato player.vimeo.com"""
+    # Extraer ID del video de diferentes formatos de URL
+    patterns = [
+        r'vimeo\.com/(\d+)',
+        r'player\.vimeo\.com/video/(\d+)',
+        r'vimeo\.com/.*/(\d+)',
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, vimeo_url)
+        if match:
+            video_id = match.group(1)
+            return f"https://player.vimeo.com/video/{video_id}"
+    return vimeo_url
+def get_primary_ydl_opts(output_path):
+    """Configuración principal con curl-cffi si está disponible"""
+    opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
             'preferredcodec': 'wav',
         }],
+        'outtmpl': output_path,
         'keepvideo': False,
+        # Configuraciones básicas de red
+        'socket_timeout': 60,
+        'retries': 3,
+        'fragment_retries': 5,
+        # Headers realistas
         'http_headers': {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.9',
             'Accept-Encoding': 'gzip, deflate, br',
             'DNT': '1',
             'Connection': 'keep-alive',
+            'Sec-Fetch-Dest': 'document',
+            'Sec-Fetch-Mode': 'navigate',
+            'Sec-Fetch-Site': 'none',
+            'Sec-Fetch-User': '?1',
             'Upgrade-Insecure-Requests': '1',
         },
+        # Rate limiting
+        'sleep_interval': random.uniform(3, 6),
+        'max_sleep_interval': 10,
+        'sleep_interval_requests': random.uniform(1, 2),
         # Configuraciones específicas para Vimeo
         'extractor_args': {
             'vimeo': {
+                'client': 'web',
+                'original_format_policy': 'never',  # Evitar requests extra que pueden causar bloqueos
             }
         },
+        # Bypass geo
+        'geo_bypass': True,
+        'geo_bypass_country': 'US',
+        # Configuraciones adicionales
         'no_warnings': False,
         'ignoreerrors': False,
         'abort_on_unavailable_fragments': False,
+    }
+    # Añadir impersonación solo si curl-cffi está disponible
+    if CURL_CFFI_AVAILABLE:
+        opts['impersonate'] = 'chrome'
+        print("🔐 Using Chrome impersonation")
+    else:
+        print("⚠️ Using basic user agent (curl-cffi not available)")
+    return opts
+def get_fallback_ydl_opts(output_path, method="player"):
+    """Configuraciones alternativas cuando el método principal falla"""
+    opts = {
+        'format': 'bestaudio/best',
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'wav',
+        }],
+        'outtmpl': output_path,
+        'keepvideo': False,
+        # Configuraciones más conservadoras
+        'socket_timeout': 120,
+        'retries': 2,
+        'fragment_retries': 3,
+        # Rate limiting más agresivo
+        'sleep_interval': random.uniform(5, 10),
+        'max_sleep_interval': 15,
+        'sleep_interval_requests': random.uniform(2, 4),
+        # Headers simplificados
+        'http_headers': {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Accept-Encoding': 'gzip, deflate',
+            'DNT': '1',
+        },
+        # Configuraciones específicas según el método
+        'extractor_args': {
+            'vimeo': {
+                'client': 'android' if method == "android" else 'web',
+                'original_format_policy': 'never',
+            }
+        },
+        # Configuraciones adicionales para estabilidad
+        'no_warnings': True,
+        'ignoreerrors': True,
+        'abort_on_unavailable_fragments': True,
+    }
+    return opts
+def get_generic_ydl_opts(output_path):
+    """Configuración genérica como último recurso"""
+    return {
+        'format': 'bestaudio/best',
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'wav',
+        }],
+        'outtmpl': output_path,
+        'keepvideo': False,
+        'socket_timeout': 180,
+        'retries': 1,
+        'http_headers': {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+        },
+        'sleep_interval': 10,
+        'no_warnings': True,
+        'ignoreerrors': True,
+        'geo_bypass': True,
     }
 # --- FUNCIONES AUXILIARES ---
         if file and os.path.exists(file):
             try:
                 os.remove(file)
+                print(f"🗑️ Removed file: {file}")
             except OSError as e:
+                print(f"❌ Error removing file {file}: {e}")
+def human_like_delay(min_sec=2, max_sec=5):
     """Simula comportamiento humano con delays aleatorios"""
+    delay = random.uniform(min_sec, max_sec)
+    print(f"⏳ Waiting {delay:.1f} seconds...")
     time.sleep(delay)
+def is_vimeo_url(url):
+    """Detecta si una URL es de Vimeo"""
+    return 'vimeo.com' in url.lower()
+def extract_vimeo_id(url):
+    """Extrae el ID del video de Vimeo"""
+    patterns = [
+        r'vimeo\.com/(\d+)',
+        r'player\.vimeo\.com/video/(\d+)',
+        r'vimeo\.com/.*/(\d+)',
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, url)
+        if match:
+            return match.group(1)
+    return None
+# --- LÓGICA PRINCIPAL MEJORADA CON MÚLTIPLES FALLBACKS ---
+def download_video_audio_multi_fallback(url):
     """
+    Función robusta con múltiples métodos de fallback para Vimeo
     """
+    print(f"🎯 Processing URL: {url}")
     temp_filename = generate_unique_filename("")
     output_path = f"{temp_filename}.wav"
+    # Delay inicial
+    human_like_delay(1, 3)
+    # Detectar si es Vimeo y preparar URLs alternativas
+    is_vimeo = is_vimeo_url(url)
+    urls_to_try = [url]
+    if is_vimeo:
+        print("🎬 Vimeo video detected - Preparing fallback URLs")
+        video_id = extract_vimeo_id(url)
+        if video_id:
+            # Añadir URLs alternativas para Vimeo
+            player_url = f"https://player.vimeo.com/video/{video_id}"
+            if player_url != url:
+                urls_to_try.append(player_url)
+            # URL con query parameters para bypassing
+            urls_to_try.append(f"https://player.vimeo.com/video/{video_id}?color=ffffff&title=0&byline=0&portrait=0")
+    # Métodos a intentar en orden de preferencia
+    methods = [
+        ("primary", "🔐 Primary method (with impersonation)", get_primary_ydl_opts),
+        ("player", "🌐 Player URL method", get_fallback_ydl_opts),
+        ("android", "📱 Android client method", lambda p: get_fallback_ydl_opts(p, "android")),
+        ("generic", "🛠️ Generic fallback method", get_generic_ydl_opts),
+    ]
+    total_attempts = 0
+    max_total_attempts = 12  # 3 URLs × 4 métodos
+    for current_url in urls_to_try:
+        print(f"\n🔄 Trying URL: {current_url}")
+        for method_name, method_desc, get_opts_func in methods:
+            total_attempts += 1
+            print(f"\n📊 Attempt {total_attempts}/{max_total_attempts}")
+            print(f"🛡️ Using: {method_desc}")
+            try:
+                # Configurar opciones según el método
+                ydl_opts = get_opts_func(temp_filename)
+                # Delay antes del intento
+                if total_attempts > 1:
+                    delay_time = min(2 ** (total_attempts // 3), 15)  # Backoff exponencial limitado
+                    human_like_delay(delay_time, delay_time + 2)
+                with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                    # Primero extraer información
+                    print("📋 Extracting video information...")
+                    try:
+                        info = ydl.extract_info(current_url, download=False)
+                        if not info:
+                            raise Exception("Could not extract video information")
+                        title = info.get('title', 'Unknown')
+                        duration = info.get('duration', 'Unknown')
+                        print(f"✅ Video found: {title} (Duration: {duration})")
+                        # Pequeño delay antes de descargar
+                        human_like_delay(1, 2)
+                        # Proceder con la descarga
+                        print("⬇️ Starting download...")
+                        ydl.download([current_url])
+                    except Exception as extract_error:
+                        print(f"❌ Extraction failed: {str(extract_error)}")
+                        raise extract_error
+                # Verificar que el archivo se creó
+                if os.path.exists(output_path):
+                    file_size = os.path.getsize(output_path)
+                    print(f"✅ Download successful! File size: {file_size} bytes")
+                    if file_size > 1000:  # Al menos 1KB
+                        return output_path
+                    else:
+                        raise Exception("Downloaded file is too small (possible error)")
+                else:
+                    raise FileNotFoundError(f"Expected file {output_path} was not found")
+            except Exception as e:
+                error_msg = str(e).lower()
+                print(f"❌ Method '{method_name}' failed: {str(e)}")
+                # Análisis específico del error
+                if "http error 401" in error_msg:
+                    print("🔐 Authentication issue detected")
+                elif "http error 403" in error_msg:
+                    print("🚫 Access forbidden - likely blocked")
+                elif "http error 429" in error_msg:
+                    print("⏰ Rate limited - increasing delay")
+                    human_like_delay(10, 15)  # Delay extra para rate limiting
+                elif "tls fingerprint" in error_msg:
+                    print("🛡️ TLS fingerprinting detected")
+                elif "oauth token" in error_msg:
+                    print("🔑 OAuth token issue")
+                # Limpiar archivo parcial si existe
+                if os.path.exists(output_path):
+                    cleanup_files(output_path)
+                # Continuar con el siguiente método si no es el último
+                if total_attempts < max_total_attempts:
+                    print(f"🔄 Trying next method...")
+                    continue
+                else:
+                    # Último intento falló
+                    break
+    # Todos los métodos fallaron
+    raise Exception(f"All {total_attempts} download attempts failed. Vimeo may be blocking this IP or the video is not accessible.")
 def transcribe_audio_enhanced(file_path):
     """Función mejorada de transcripción con mejor manejo de errores"""
+    print(f"🎤 Starting transcription of file: {file_path}")
     temp_audio = None
     original_file_to_clean = file_path
     try:
+        # Verificar que el archivo existe y tiene contenido
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"Audio file not found: {file_path}")
+        file_size = os.path.getsize(file_path)
+        print(f"📊 Audio file size: {file_size} bytes")
+        if file_size < 1000:
+            raise Exception("Audio file is too small - may be corrupted")
         # Convertir a WAV si es necesario
         if not file_path.endswith('.wav'):
+            print("🔄 Converting to WAV format...")
             video = mp.VideoFileClip(file_path)
+            if not video.audio:
+                raise Exception("No audio track found in video file")
             temp_audio = generate_unique_filename(".wav")
             video.audio.write_audiofile(temp_audio, verbose=False, logger=None)
+            video.close()
             file_path = temp_audio
         output_file = generate_unique_filename(".json")
+        # Comando de Whisper con configuraciones robustas
         command = [
             "insanely-fast-whisper",
             "--file-name", file_path,
             "--task", "transcribe",
             "--timestamp", "chunk",
             "--transcript-path", output_file,
+            "--batch-size", "2",  # Batch size más pequeño para estabilidad
+            "--hf-token", "dummy",  # Token dummy para evitar warnings
         ]
+        print(f"🤖 Executing transcription...")
         result = subprocess.run(
             command,
             check=True,
             capture_output=True,
             text=True,
+            timeout=900  # 15 minutos de timeout
         )
+        print(f"✅ Transcription command completed")
+        # Verificar que el archivo de salida existe
         if not os.path.exists(output_file):
             raise FileNotFoundError("Transcription output file not found")
+        # Leer y procesar resultado
         with open(output_file, "r", encoding='utf-8') as f:
             transcription_data = json.load(f)
+        result_text = transcription_data.get("text", "").strip()
+        # Fallback: concatenar chunks si no hay texto principal
         if not result_text:
             chunks = transcription_data.get("chunks", [])
+            if chunks:
+                result_text = " ".join([chunk.get("text", "").strip() for chunk in chunks])
+        # Validar resultado
+        if not result_text or len(result_text) < 10:
+            raise Exception("Transcription produced no meaningful text")
+        print(f"✅ Transcription completed. Length: {len(result_text)} characters")
         cleanup_files(output_file)
+        return result_text
     except subprocess.TimeoutExpired:
+        print("⏰ Transcription timed out")
+        raise Exception("Transcription process timed out (15 minutes)")
     except Exception as e:
+        print(f"❌ Transcription error: {e}")
         raise
     finally:
+        # Limpieza
+        if temp_audio and os.path.exists(temp_audio):
             cleanup_files(temp_audio)
+        if original_file_to_clean != file_path and os.path.exists(original_file_to_clean):
             cleanup_files(original_file_to_clean)
 def generate_summary_stream(transcription):
     """Función mejorada de generación de resumen"""
     if not transcription or len(transcription.strip()) < 20:
+        return "⚠️ Transcription is too short to summarize (less than 20 characters)."
+    print("🤖 Generating AI summary...")
     try:
         detected_language = langdetect.detect(transcription)
+        print(f"🌍 Detected language: {detected_language}")
     except:
+        detected_language = "en"
+        print("🌍 Language detection failed, defaulting to English")
     # Truncar transcripción si es muy larga
+    max_chars = 12000  # Reducido para evitar problemas de memoria
     truncated_text = transcription[:max_chars]
     if len(transcription) > max_chars:
         truncated_text += "..."
+        print(f"📝 Transcription truncated to {max_chars} characters")
     prompt = f"""Please create a comprehensive summary of the following video transcription in {detected_language}.
     The summary should be 150-300 words and capture the main points, key ideas, and important details:
     try:
         response, _ = model.chat(tokenizer, prompt, history=[])
+        print("✅ Summary generated successfully")
         return response
     except Exception as e:
+        print(f"❌ Summary generation error: {e}")
+        return f"⚠️ Error generating summary: {str(e)}\n\nOriginal transcription:\n{transcription[:1000]}..."
 # --- FUNCIONES DE INTERFAZ MEJORADAS ---
 def process_video_url_enhanced(url):
+    """Función mejorada para procesar URLs con diagnóstico detallado"""
     if not url or not url.strip():
+        return "❌ Please enter a valid video URL.", "⚠️ No URL provided"
     url = url.strip()
+    print(f"\n{'='*50}")
+    print(f"🎯 PROCESSING VIDEO URL")
+    print(f"{'='*50}")
+    print(f"URL: {url}")
     # Detectar plataforma
     platform = "Unknown"
     elif "vimeo.com" in url:
         platform = "Vimeo"
+    print(f"Platform: {platform}")
+    print(f"curl-cffi available: {CURL_CFFI_AVAILABLE}")
     audio_file = None
     try:
+        # Usar función robusta con múltiples fallbacks
+        print(f"\n🚀 Starting download process...")
+        audio_file = download_video_audio_multi_fallback(url)
+        print(f"\n🎤 Starting transcription process...")
         transcription = transcribe_audio_enhanced(audio_file)
         if not transcription:
+            return "❌ No transcription could be generated from this video.", "⚠️ Transcription failed"
+        print(f"\n✅ Process completed successfully!")
+        success_msg = f"✅ Successfully processed {platform} video ({len(transcription)} chars transcribed)"
+        return transcription, success_msg
     except Exception as e:
         error_msg = str(e)
+        print(f"\n❌ ERROR: {error_msg}")
+        # Análisis de errores mejorado
         if "HTTP Error 401" in error_msg:
+            return ("❌ ACCESS DENIED: The video might be private, require authentication, or have restricted access. "
+                   "Try with a public video or check if the URL is correct."), "🔐 Authentication Required"
+        elif "HTTP Error 403" in error_msg or "blocked" in error_msg.lower():
+            return ("❌ BLOCKED: Your IP or this server has been temporarily blocked by Vimeo. "
+                   "This is common with datacenter IPs. Please try again in 10-15 minutes."), "🚫 Temporarily Blocked"
         elif "HTTP Error 429" in error_msg:
+            return ("❌ RATE LIMITED: Too many requests sent to Vimeo. "
+                   "Please wait 5-10 minutes before trying again."), "⏰ Rate Limited"
         elif "TLS fingerprint" in error_msg:
+            return ("❌ TLS BLOCKED: Vimeo detected automated access. "
+                   f"curl-cffi status: {'✅ Available' if CURL_CFFI_AVAILABLE else '❌ Missing'}. "
+                   "Try again later or contact support."), "🛡️ Security Block"
+        elif "oauth token" in error_msg or "Bad Request" in error_msg:
+            return ("❌ API ERROR: Vimeo's API is experiencing issues or the video format is not supported. "
+                   "Try with a different Vimeo video."), "🔑 API Issue"
+        elif "not accessible" in error_msg.lower():
+            return ("❌ VIDEO NOT ACCESSIBLE: All download methods failed. The video might be: "
+                   "1) Private/Password protected, 2) Geo-restricted, 3) Deleted, or 4) Not a valid video URL."), "🚫 Not Accessible"
+        elif "timeout" in error_msg.lower():
+            return ("❌ TIMEOUT: The process took too long. This might be due to: "
+                   "1) Very long video, 2) Network issues, or 3) Server overload. Try with a shorter video."), "⏰ Timeout"
         else:
+            return f"❌ UNEXPECTED ERROR: {error_msg}", "❌ Unknown Error"
     finally:
+        # Limpieza final
+        if audio_file and os.path.exists(audio_file):
             cleanup_files(audio_file)
 def process_uploaded_video_enhanced(video_path):
     """Función mejorada para procesar videos subidos"""
     if video_path is None:
+        return "❌ Please upload a video file first.", "⚠️ No file uploaded"
+    print(f"\n{'='*50}")
+    print(f"📤 PROCESSING UPLOADED VIDEO")
+    print(f"{'='*50}")
+    print(f"File path: {video_path}")
     try:
+        # Verificar archivo
+        if not os.path.exists(video_path):
+            return "❌ Uploaded file not found.", "❌ File not found"
+        file_size = os.path.getsize(video_path)
+        print(f"File size: {file_size} bytes")
+        if file_size < 1000:
+            return "❌ Uploaded file is too small or corrupted.", "❌ Invalid file"
+        print(f"🎤 Starting transcription...")
         transcription = transcribe_audio_enhanced(video_path)
         if not transcription:
+            return "❌ No transcription could be generated from this video.", "⚠️ Transcription failed"
+        print(f"✅ Process completed successfully!")
+        success_msg = f"✅ Successfully processed uploaded video ({len(transcription)} chars transcribed)"
+        return transcription, success_msg
     except Exception as e:
         error_msg = str(e)
+        print(f"❌ ERROR: {error_msg}")
+        if "No audio track" in error_msg:
+            return "❌ NO AUDIO: The uploaded video doesn't contain an audio track.", "🔇 No Audio"
+        elif "timeout" in error_msg.lower():
+            return "❌ TIMEOUT: Video processing took too long. Try with a shorter video.", "⏰ Timeout"
+        else:
+            return f"❌ ERROR: {error_msg}", "❌ Processing Error"
 # --- CONSTRUCCIÓN DE LA INTERFAZ MEJORADA ---
+print("🎨 Setting up enhanced Gradio interface...")
+with gr.Blocks(theme=gr.themes.Soft(), title="🎥 Anti-Block Video Transcription") as demo:
+    gr.Markdown("# 🎥 Anti-Block Video Transcription & AI Summary")
+    gr.Markdown(f"""
+    Advanced video transcription with **anti-blocking technology** for Vimeo and other platforms.
+    **🛡️ Current Status:**
+    - curl-cffi (Advanced Impersonation): {'✅ Available' if CURL_CFFI_AVAILABLE else '❌ Not Available'}
+    - Multiple Fallback Methods: ✅ Enabled
+    - Rate Limiting Protection: ✅ Enabled
+    - TLS Fingerprint Evasion: {'✅ Enabled' if CURL_CFFI_AVAILABLE else '⚠️ Basic Protection'}
     """)
     with gr.Tabs():
             with gr.Row():
                 url_input = gr.Textbox(
                     label="Video URL",
+                    placeholder="https://vimeo.com/123456789 or https://www.youtube.com/watch?v=...",
                     scale=4
                 )
                 url_button = gr.Button("🚀 Process URL", variant="primary", scale=1)
         with gr.Column():
             transcription_output = gr.Textbox(
                 label="📝 Transcription",
+                lines=15,
                 interactive=True,
                 placeholder="Transcription will appear here..."
             )
         with gr.Column():
             summary_output = gr.Textbox(
                 label="📊 AI Summary",
+                lines=15,
                 placeholder="AI-generated summary will appear here..."
             )
     with gr.Row():
         status_output = gr.Textbox(
+            label="📊 Status & Diagnostics",
             interactive=False,
+            placeholder="Ready to process videos...",
+            lines=2
         )
         summary_button = gr.Button("📝 Generate Summary", variant="secondary")
+    # Sección de ayuda expandida
+    with gr.Accordion("ℹ️ Troubleshooting & Tips", open=False):
+        gr.Markdown(f"""
+        ## 🛠️ System Status
+        - **curl-cffi Library**: {'✅ Installed' if CURL_CFFI_AVAILABLE else '❌ Missing (using fallback methods)'}
+        - **Fallback Methods**: ✅ 4 different methods available
+        - **Error Recovery**: ✅ Automatic retry with exponential backoff
+        ## 🎯 Best Practices for Vimeo
+        1. **Public Videos Work Best**: Private/password-protected videos may fail
+        2. **Wait Between Requests**: If blocked, wait 10-15 minutes before retrying
+        3. **Use Standard URLs**: Format like `https://vimeo.com/123456789`
+        4. **Check Video Accessibility**: Ensure video plays in your browser first
+        ## 🚨 Common Error Solutions
+        **"Access Denied" or "HTTP 401"**
+        - Video is private or requires login
+        - Try with a public video
+        **"Blocked" or "HTTP 403"**
+        - Temporary IP block (common with datacenter IPs)
+        - Wait 10-15 minutes and try again
+        **"Rate Limited" or "HTTP 429"**
+        - Too many requests sent
+        - Wait 5-10 minutes before retrying
+        **"TLS Fingerprint Blocked"**
+        - Advanced anti-bot protection detected
+        - System will try multiple fallback methods automatically
+        **"All download attempts failed"**
+        - Video may be geo-restricted or deleted
+        - Try a different video to test if service is working
+        ## 📞 Support
+        If problems persist, check if the video plays normally in your browser and try with a different public video.
         """)
+    # Eventos de la interfaz
     url_button.click(
         fn=process_video_url_enhanced,
         inputs=[url_input],
         outputs=[summary_output]
     )
+print("🌟 Enhanced Gradio interface ready!")
+print(f"🔧 curl-cffi status: {'Available' if CURL_CFFI_AVAILABLE else 'Not available'}")
+print("🚀 Launching application...")
 demo.launch(
     server_name="0.0.0.0",
     server_port=7860,