Spaces:

ayloll
/

VideoToText_YouTube

Sleeping

App Files Files Community

ayloll commited on Jul 13, 2025

Commit

d185723

verified ·

1 Parent(s): 9246621

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -147

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from transformers import pipeline
 import yt_dlp
 import whisper
 import os
-import re
 from urllib.parse import urlparse
 # Delete temporary files
@@ -13,7 +12,7 @@ def clean_temp_files():
         if os.path.exists(file):
             os.remove(file)
-# Download YouTube video with improved options
 def download_video(video_url):
     try:
         ydl_opts = {
@@ -25,78 +24,36 @@ def download_video(video_url):
             'retries': 3,
             'socket_timeout': 30,
             'extract_flat': False,
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(video_url, download=True)
-            filename = ydl.prepare_filename(info)
-            return filename if os.path.exists(filename) else None
     except Exception as e:
-        print(f"Download error: {str(e)}")
-        return None
-# Extract audio with better error handling
-def extract_audio(video_path):
-    try:
-        if not os.path.exists(video_path):
-            return None
-        audio_path = "temp_audio.mp3"
-        os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 2 \"{audio_path}\" -y -loglevel error")
-        return audio_path if os.path.exists(audio_path) else None
-    except Exception as e:
-        print(f"Audio extraction error: {str(e)}")
-        return None
-# Transcribe audio with model caching
-def transcribe_audio(audio_path):
-    try:
-        if not os.path.exists(audio_path):
-            return None
-        model = whisper.load_model("base")
-        result = model.transcribe(audio_path, fp16=False)  # fp16=False for better compatibility
-        return result['text']
-    except Exception as e:
-        print(f"Transcription error: {str(e)}")
-        return None
-# Classify content with fallback
-def classify_content(text):
-    try:
-        if not text or len(text.strip()) == 0:
-            return None, None
-        classifier = pipeline("zero-shot-classification",
-                           model="facebook/bart-large-mnli")
-        labels = ["educational", "entertainment", "news", "political",
-                "religious", "technical", "advertisement", "social"]
-        result = classifier(text,
-                          candidate_labels=labels,
-                          hypothesis_template="This text is about {}.")
-        return result['labels'][0], result['scores'][0]
-    except Exception as e:
-        print(f"Classification error: {str(e)}")
-        return None, None
-# Validate YouTube URL
-def is_valid_youtube_url(url):
-    youtube_domains = ['youtube.com', 'www.youtube.com', 'youtu.be', 'www.youtu.be']
-    try:
-        parsed = urlparse(url)
-        if not parsed.scheme in ('http', 'https'):
-            return False
-        if not any(domain in parsed.netloc for domain in youtube_domains):
-            return False
-        return True
-    except:
-        return False
-# Main processing function with better error handling
 def process_video(video_url):
     clean_temp_files()
@@ -104,87 +61,15 @@ def process_video(video_url):
         return "Please enter a valid YouTube URL", ""
     if not is_valid_youtube_url(video_url):
-        return "Please enter a valid YouTube URL (should start with https://youtube.com or https://youtu.be)", ""
-    try:
-        # Download video
-        video_path = download_video(video_url)
-        if not video_path:
-            return "Failed to download video (may be private, age-restricted, or unavailable)", ""
-        # Extract audio
-        audio_path = extract_audio(video_path)
-        if not audio_path:
-            clean_temp_files()
-            return "Failed to extract audio from video", ""
-        # Transcribe
-        transcription = transcribe_audio(audio_path)
-        if not transcription:
-            clean_temp_files()
-            return "Failed to transcribe audio (may be no speech detected)", ""
-        # Classify
-        category, confidence = classify_content(transcription)
-        if not category:
-            clean_temp_files()
-            return transcription, "Failed to classify content"
-        # Clean up
-        clean_temp_files()
-        # Format classification result
-        classification_result = f"{category} (confidence: {confidence:.2f})"
-        return transcription, classification_result
-    except Exception as e:
         clean_temp_files()
-        return f"An error occurred: {str(e)}", ""
-# Gradio interface
-with gr.Blocks(title="YouTube Content Analyzer") as demo:
-    gr.Markdown("""
-    # ▶️ YouTube Content Analyzer
-    Enter a YouTube video URL to get transcription and content classification
-    """)
-    with gr.Row():
-        url_input = gr.Textbox(
-            label="YouTube URL",
-            placeholder="Enter YouTube video URL here...",
-            max_lines=1
-        )
-    with gr.Row():
-        transcription_output = gr.Textbox(
-            label="Transcription",
-            interactive=True,
-            lines=10,
-            max_lines=20
-        )
-    with gr.Row():
-        category_output = gr.Textbox(
-            label="Content Category",
-            interactive=False
-        )
-    submit_btn = gr.Button("Analyze Video", variant="primary")
-    # Examples
-    gr.Examples(
-        examples=[
-            ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],  # Rick Astley - Never Gonna Give You Up
-            ["https://youtu.be/J---aiyznGQ"]  # Keyboard Cat
-        ],
-        inputs=url_input
-    )
-    submit_btn.click(
-        fn=process_video,
-        inputs=url_input,
-        outputs=[transcription_output, category_output]
-    )
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import yt_dlp
 import whisper
 import os
 from urllib.parse import urlparse
 # Delete temporary files
         if os.path.exists(file):
             os.remove(file)
+# Enhanced YouTube downloader with error handling
 def download_video(video_url):
     try:
         ydl_opts = {
             'retries': 3,
             'socket_timeout': 30,
             'extract_flat': False,
+            'ignoreerrors': True,
+            'cookiefile': 'cookies.txt' if os.path.exists('cookies.txt') else None,
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            # First check if video is available
+            try:
+                info = ydl.extract_info(video_url, download=False)
+                if info.get('availability') == 'unavailable':
+                    return None, "Video is unavailable (private, deleted, or region-locked)"
+                if info.get('age_limit', 0) > 0:
+                    return None, "Age-restricted content (try with cookies)"
+            except:
+                pass
+            # Try to download
+            try:
+                ydl.download([video_url])
+                filename = 'temp_video.mp4' if os.path.exists('temp_video.mp4') else None
+                return filename, None
+            except yt_dlp.utils.DownloadError as e:
+                return None, f"Download failed: {str(e)}"
     except Exception as e:
+        return None, f"Error: {str(e)}"
+# [Rest of your functions (extract_audio, transcribe_audio, classify_content) remain the same...]
+# Main processing function with enhanced error handling
 def process_video(video_url):
     clean_temp_files()
         return "Please enter a valid YouTube URL", ""
     if not is_valid_youtube_url(video_url):
+        return "Please enter a valid YouTube URL", ""
+    # Download video
+    video_path, download_error = download_video(video_url)
+    if not video_path:
         clean_temp_files()
+        error_msg = download_error or "Failed to download video"
+        return error_msg, ""
+    # [Rest of your processing logic remains the same...]
+# [Rest of your Gradio interface code remains the same...]