Claude commited on
Commit
93bbd17
·
unverified ·
1 Parent(s): 8aa153c

feat: Add YouTube download and Whisper transcription

Browse files

- Add yt-dlp for downloading YouTube videos and playlists
- Add transformers with Whisper for speech-to-text
- Add URL input box that accepts video or playlist URLs
- Require login to use transcription feature
- Show progress during download and transcription

Files changed (3) hide show
  1. app.py +114 -0
  2. pyproject.toml +5 -1
  3. uv.lock +0 -0
app.py CHANGED
@@ -1,7 +1,14 @@
1
  from __future__ import annotations
2
 
 
 
 
 
3
  import gradio as gr
 
 
4
  from huggingface_hub import whoami
 
5
 
6
 
7
  def hello(profile: gr.OAuthProfile | None) -> str:
@@ -19,11 +26,118 @@ def list_organizations(oauth_token: gr.OAuthToken | None) -> str:
19
  return "You don't belong to any organizations."
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  with gr.Blocks() as demo:
23
  gr.Markdown("# Video Analyzer")
 
 
24
  gr.LoginButton()
25
  m1 = gr.Markdown()
26
  m2 = gr.Markdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  demo.load(hello, inputs=None, outputs=m1)
28
  demo.load(list_organizations, inputs=None, outputs=m2)
29
 
 
1
  from __future__ import annotations
2
 
3
+ import os
4
+ import tempfile
5
+ from pathlib import Path
6
+
7
  import gradio as gr
8
+ import torch
9
+ import yt_dlp
10
  from huggingface_hub import whoami
11
+ from transformers import pipeline
12
 
13
 
14
  def hello(profile: gr.OAuthProfile | None) -> str:
 
26
  return "You don't belong to any organizations."
27
 
28
 
29
+ def get_whisper_model():
30
+ device = "cuda" if torch.cuda.is_available() else "cpu"
31
+ return pipeline(
32
+ "automatic-speech-recognition",
33
+ model="openai/whisper-base",
34
+ device=device,
35
+ )
36
+
37
+
38
+ def download_audio(url: str, output_dir: str) -> list[dict]:
39
+ """Download audio from YouTube URL (video or playlist)."""
40
+ ydl_opts = {
41
+ "format": "bestaudio/best",
42
+ "postprocessors": [{
43
+ "key": "FFmpegExtractAudio",
44
+ "preferredcodec": "mp3",
45
+ "preferredquality": "192",
46
+ }],
47
+ "outtmpl": os.path.join(output_dir, "%(title)s.%(ext)s"),
48
+ "quiet": True,
49
+ "no_warnings": True,
50
+ }
51
+
52
+ downloaded = []
53
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
54
+ info = ydl.extract_info(url, download=True)
55
+ if "entries" in info:
56
+ for entry in info["entries"]:
57
+ if entry:
58
+ downloaded.append({
59
+ "title": entry.get("title", "Unknown"),
60
+ "path": os.path.join(output_dir, f"{entry['title']}.mp3"),
61
+ })
62
+ else:
63
+ downloaded.append({
64
+ "title": info.get("title", "Unknown"),
65
+ "path": os.path.join(output_dir, f"{info['title']}.mp3"),
66
+ })
67
+ return downloaded
68
+
69
+
70
+ def transcribe_audio(audio_path: str, whisper_model) -> str:
71
+ """Transcribe audio file using Whisper."""
72
+ result = whisper_model(audio_path, return_timestamps=True)
73
+ return result["text"]
74
+
75
+
76
+ def process_youtube(
77
+ url: str,
78
+ profile: gr.OAuthProfile | None,
79
+ progress: gr.Progress = gr.Progress(),
80
+ ) -> str:
81
+ if profile is None:
82
+ return "Please log in to use this feature."
83
+
84
+ if not url or not url.strip():
85
+ return "Please enter a YouTube URL."
86
+
87
+ try:
88
+ progress(0, desc="Initializing...")
89
+ whisper_model = get_whisper_model()
90
+
91
+ with tempfile.TemporaryDirectory() as tmpdir:
92
+ progress(0.1, desc="Downloading audio...")
93
+ downloaded = download_audio(url.strip(), tmpdir)
94
+
95
+ results = []
96
+ total = len(downloaded)
97
+ for i, item in enumerate(downloaded):
98
+ progress((0.1 + 0.9 * (i / total)), desc=f"Transcribing: {item['title']}")
99
+ if os.path.exists(item["path"]):
100
+ transcript = transcribe_audio(item["path"], whisper_model)
101
+ results.append(f"## {item['title']}\n\n{transcript}")
102
+ else:
103
+ audio_files = list(Path(tmpdir).glob("*.mp3"))
104
+ if audio_files:
105
+ transcript = transcribe_audio(str(audio_files[0]), whisper_model)
106
+ results.append(f"## {item['title']}\n\n{transcript}")
107
+
108
+ progress(1.0, desc="Done!")
109
+ return "\n\n---\n\n".join(results) if results else "No audio found to transcribe."
110
+
111
+ except Exception as e:
112
+ return f"Error: {e!s}"
113
+
114
+
115
  with gr.Blocks() as demo:
116
  gr.Markdown("# Video Analyzer")
117
+ gr.Markdown("Download and transcribe YouTube videos using Whisper AI")
118
+
119
  gr.LoginButton()
120
  m1 = gr.Markdown()
121
  m2 = gr.Markdown()
122
+
123
+ gr.Markdown("---")
124
+
125
+ with gr.Row():
126
+ url_input = gr.Textbox(
127
+ label="YouTube URL",
128
+ placeholder="Enter a YouTube video or playlist URL",
129
+ scale=4,
130
+ )
131
+ submit_btn = gr.Button("Transcribe", variant="primary", scale=1)
132
+
133
+ output = gr.Markdown(label="Transcription")
134
+
135
+ submit_btn.click(
136
+ fn=process_youtube,
137
+ inputs=[url_input],
138
+ outputs=[output],
139
+ )
140
+
141
  demo.load(hello, inputs=None, outputs=m1)
142
  demo.load(list_organizations, inputs=None, outputs=m2)
143
 
pyproject.toml CHANGED
@@ -1,10 +1,14 @@
1
  [project]
2
  name = "video-analyzer"
3
  version = "0.1.0"
4
- description = "A Gradio application"
5
  readme = "README.md"
6
  requires-python = ">=3.11"
7
  dependencies = [
8
  "gradio>=6.0.0",
9
  "huggingface_hub>=0.20.0",
 
 
 
 
10
  ]
 
1
  [project]
2
  name = "video-analyzer"
3
  version = "0.1.0"
4
+ description = "A Gradio application for downloading and transcribing YouTube videos"
5
  readme = "README.md"
6
  requires-python = ">=3.11"
7
  dependencies = [
8
  "gradio>=6.0.0",
9
  "huggingface_hub>=0.20.0",
10
+ "yt-dlp>=2024.1.0",
11
+ "transformers>=4.36.0",
12
+ "torch>=2.0.0",
13
+ "accelerate>=0.25.0",
14
  ]
uv.lock CHANGED
The diff for this file is too large to render. See raw diff