Spaces:

jonathanagustin
/

video_analyzer

Runtime error

Claude commited on Dec 27, 2025

Commit

93bbd17

unverified ·

1 Parent(s): 8aa153c

feat: Add YouTube download and Whisper transcription

- Add yt-dlp for downloading YouTube videos and playlists
- Add transformers with Whisper for speech-to-text
- Add URL input box that accepts video or playlist URLs
- Require login to use transcription feature
- Show progress during download and transcription

Files changed (3) hide show

app.py +114 -0
pyproject.toml +5 -1
uv.lock +0 -0

app.py CHANGED Viewed

@@ -1,7 +1,14 @@
 from __future__ import annotations
 import gradio as gr
 from huggingface_hub import whoami
 def hello(profile: gr.OAuthProfile | None) -> str:
@@ -19,11 +26,118 @@ def list_organizations(oauth_token: gr.OAuthToken | None) -> str:
     return "You don't belong to any organizations."
 with gr.Blocks() as demo:
     gr.Markdown("# Video Analyzer")
     gr.LoginButton()
     m1 = gr.Markdown()
     m2 = gr.Markdown()
     demo.load(hello, inputs=None, outputs=m1)
     demo.load(list_organizations, inputs=None, outputs=m2)

 from __future__ import annotations
+import os
+import tempfile
+from pathlib import Path
 import gradio as gr
+import torch
+import yt_dlp
 from huggingface_hub import whoami
+from transformers import pipeline
 def hello(profile: gr.OAuthProfile | None) -> str:
     return "You don't belong to any organizations."
+def get_whisper_model():
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    return pipeline(
+        "automatic-speech-recognition",
+        model="openai/whisper-base",
+        device=device,
+    )
+def download_audio(url: str, output_dir: str) -> list[dict]:
+    """Download audio from YouTube URL (video or playlist)."""
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "postprocessors": [{
+            "key": "FFmpegExtractAudio",
+            "preferredcodec": "mp3",
+            "preferredquality": "192",
+        }],
+        "outtmpl": os.path.join(output_dir, "%(title)s.%(ext)s"),
+        "quiet": True,
+        "no_warnings": True,
+    }
+    downloaded = []
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info = ydl.extract_info(url, download=True)
+        if "entries" in info:
+            for entry in info["entries"]:
+                if entry:
+                    downloaded.append({
+                        "title": entry.get("title", "Unknown"),
+                        "path": os.path.join(output_dir, f"{entry['title']}.mp3"),
+                    })
+        else:
+            downloaded.append({
+                "title": info.get("title", "Unknown"),
+                "path": os.path.join(output_dir, f"{info['title']}.mp3"),
+            })
+    return downloaded
+def transcribe_audio(audio_path: str, whisper_model) -> str:
+    """Transcribe audio file using Whisper."""
+    result = whisper_model(audio_path, return_timestamps=True)
+    return result["text"]
+def process_youtube(
+    url: str,
+    profile: gr.OAuthProfile | None,
+    progress: gr.Progress = gr.Progress(),
+) -> str:
+    if profile is None:
+        return "Please log in to use this feature."
+    if not url or not url.strip():
+        return "Please enter a YouTube URL."
+    try:
+        progress(0, desc="Initializing...")
+        whisper_model = get_whisper_model()
+        with tempfile.TemporaryDirectory() as tmpdir:
+            progress(0.1, desc="Downloading audio...")
+            downloaded = download_audio(url.strip(), tmpdir)
+            results = []
+            total = len(downloaded)
+            for i, item in enumerate(downloaded):
+                progress((0.1 + 0.9 * (i / total)), desc=f"Transcribing: {item['title']}")
+                if os.path.exists(item["path"]):
+                    transcript = transcribe_audio(item["path"], whisper_model)
+                    results.append(f"## {item['title']}\n\n{transcript}")
+                else:
+                    audio_files = list(Path(tmpdir).glob("*.mp3"))
+                    if audio_files:
+                        transcript = transcribe_audio(str(audio_files[0]), whisper_model)
+                        results.append(f"## {item['title']}\n\n{transcript}")
+            progress(1.0, desc="Done!")
+            return "\n\n---\n\n".join(results) if results else "No audio found to transcribe."
+    except Exception as e:
+        return f"Error: {e!s}"
 with gr.Blocks() as demo:
     gr.Markdown("# Video Analyzer")
+    gr.Markdown("Download and transcribe YouTube videos using Whisper AI")
     gr.LoginButton()
     m1 = gr.Markdown()
     m2 = gr.Markdown()
+    gr.Markdown("---")
+    with gr.Row():
+        url_input = gr.Textbox(
+            label="YouTube URL",
+            placeholder="Enter a YouTube video or playlist URL",
+            scale=4,
+        )
+        submit_btn = gr.Button("Transcribe", variant="primary", scale=1)
+    output = gr.Markdown(label="Transcription")
+    submit_btn.click(
+        fn=process_youtube,
+        inputs=[url_input],
+        outputs=[output],
+    )
     demo.load(hello, inputs=None, outputs=m1)
     demo.load(list_organizations, inputs=None, outputs=m2)

pyproject.toml CHANGED Viewed

@@ -1,10 +1,14 @@
 [project]
 name = "video-analyzer"
 version = "0.1.0"
-description = "A Gradio application"
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
     "gradio>=6.0.0",
     "huggingface_hub>=0.20.0",
 ]

 [project]
 name = "video-analyzer"
 version = "0.1.0"
+description = "A Gradio application for downloading and transcribing YouTube videos"
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
     "gradio>=6.0.0",
     "huggingface_hub>=0.20.0",
+    "yt-dlp>=2024.1.0",
+    "transformers>=4.36.0",
+    "torch>=2.0.0",
+    "accelerate>=0.25.0",
 ]

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff