Kalp97 commited on
Commit
1b24485
·
verified ·
1 Parent(s): a2e872f

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +125 -0
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import whisper
 
3
 
4
  models = {}
5
 
@@ -34,6 +35,42 @@ def devanagari_to_roman(text):
34
  result.append(DEVA_MAP.get(ch, ch))
35
  return ''.join(result)
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def transcribe(file, model_name, language, show_timestamps, translate):
38
  if file is None:
39
  return "Please upload a video or audio file.", ""
@@ -104,6 +141,67 @@ def save_transcript(text):
104
  f.write(text)
105
  return path
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  custom_css = """
108
  @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@1&family=Geist:wght@300;400;500;600&display=swap');
109
 
@@ -312,6 +410,16 @@ by Kalpi Edition
312
 
313
  with gr.Row():
314
  with gr.Column(scale=5):
 
 
 
 
 
 
 
 
 
 
315
  file_input = gr.File(
316
  label="Drop your file here — MP4 · MOV · MP3 · WAV · M4A"
317
  )
@@ -368,6 +476,23 @@ by Kalpi Edition
368
  inputs=[file_input, model_choice, language, timestamps, translate],
369
  outputs=[output, plain_output]
370
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  download_btn.click(fn=save_transcript, inputs=plain_output, outputs=download_file)
372
 
373
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import whisper
3
+ import yt_dlp
4
 
5
  models = {}
6
 
 
35
  result.append(DEVA_MAP.get(ch, ch))
36
  return ''.join(result)
37
 
38
+
39
+ def download_from_url(url):
40
+ """Download audio from Instagram, YouTube, Twitter, Facebook using yt-dlp"""
41
+ tmp_dir = tempfile.mkdtemp()
42
+ output_path = os.path.join(tmp_dir, 'audio.%(ext)s')
43
+ ydl_opts = {
44
+ 'format': 'bestaudio/best',
45
+ 'outtmpl': output_path,
46
+ 'quiet': True,
47
+ 'no_warnings': True,
48
+ 'extract_flat': False,
49
+ 'postprocessors': [{
50
+ 'key': 'FFmpegExtractAudio',
51
+ 'preferredcodec': 'mp3',
52
+ 'preferredquality': '192',
53
+ }],
54
+ # Rotate user agents to avoid blocks
55
+ 'http_headers': {
56
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
57
+ },
58
+ 'cookiefile': None,
59
+ 'socket_timeout': 30,
60
+ }
61
+ try:
62
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
63
+ info = ydl.extract_info(url, download=True)
64
+ title = info.get('title', 'video')
65
+ except Exception as e:
66
+ raise Exception(f"Could not download from URL: {str(e)}")
67
+
68
+ # Find the downloaded mp3
69
+ for f in os.listdir(tmp_dir):
70
+ if f.endswith('.mp3'):
71
+ return os.path.join(tmp_dir, f), title
72
+ raise Exception("Download succeeded but audio file not found.")
73
+
74
  def transcribe(file, model_name, language, show_timestamps, translate):
75
  if file is None:
76
  return "Please upload a video or audio file.", ""
 
141
  f.write(text)
142
  return path
143
 
144
+
145
+
146
+ def download_video_only(url):
147
+ """Download video in highest quality and return file path"""
148
+ if not url or not url.strip():
149
+ return None, "Please paste a valid URL."
150
+ url = url.strip()
151
+ supported = ['instagram.com','youtube.com','youtu.be','twitter.com',
152
+ 'x.com','facebook.com','fb.watch']
153
+ if not any(s in url.lower() for s in supported):
154
+ return None, "Unsupported URL."
155
+ tmp_dir = tempfile.mkdtemp()
156
+ output_path = os.path.join(tmp_dir, 'video.%(ext)s')
157
+ ydl_opts = {
158
+ 'format': 'bestvideo+bestaudio/best',
159
+ 'outtmpl': output_path,
160
+ 'quiet': True,
161
+ 'no_warnings': True,
162
+ 'merge_output_format': 'mp4',
163
+ 'http_headers': {
164
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
165
+ },
166
+ }
167
+ try:
168
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
169
+ info = ydl.extract_info(url, download=True)
170
+ title = info.get('title', 'video')
171
+ for f in os.listdir(tmp_dir):
172
+ if f.endswith('.mp4'):
173
+ return os.path.join(tmp_dir, f), title
174
+ return None, "Download succeeded but file not found."
175
+ except Exception as e:
176
+ return None, f"Download failed: {str(e)}"
177
+
178
+ def transcribe_url(url, model_name, language, show_timestamps, translate):
179
+ """Download from URL then transcribe"""
180
+ if not url or not url.strip():
181
+ return "Please paste a valid URL.", ""
182
+ url = url.strip()
183
+
184
+ # Validate URL is from supported platforms
185
+ supported = ['instagram.com', 'youtube.com', 'youtu.be', 'twitter.com',
186
+ 'x.com', 'facebook.com', 'fb.watch', 'fb.com', 'tiktok.com']
187
+ if not any(s in url.lower() for s in supported):
188
+ return "Unsupported URL. Please use Instagram, YouTube, Twitter/X, or Facebook links.", ""
189
+
190
+ tmp_path = None
191
+ try:
192
+ tmp_path, title = download_from_url(url)
193
+ # Create a mock file object with .name attribute
194
+ class FileObj:
195
+ def __init__(self, path):
196
+ self.name = path
197
+ result = transcribe(FileObj(tmp_path), model_name, language, show_timestamps, translate)
198
+ return result
199
+ except Exception as e:
200
+ return f"Error: {str(e)}", ""
201
+ finally:
202
+ if tmp_path and os.path.exists(tmp_path):
203
+ os.unlink(tmp_path)
204
+
205
  custom_css = """
206
  @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@1&family=Geist:wght@300;400;500;600&display=swap');
207
 
 
410
 
411
  with gr.Row():
412
  with gr.Column(scale=5):
413
+ url_input = gr.Textbox(
414
+ label="Paste URL (Instagram · YouTube · Twitter/X · Facebook)",
415
+ placeholder="https://www.instagram.com/reel/...",
416
+ lines=1
417
+ )
418
+ with gr.Row():
419
+ url_btn = gr.Button("Transcribe URL →", variant="primary")
420
+ download_btn_url = gr.Button("Download video", variant="secondary")
421
+ download_output = gr.File(label="Download", visible=False)
422
+ gr.Markdown("<div style='text-align:center;font-size:11px;color:#555;margin:4px 0'>— or upload a file —</div>")
423
  file_input = gr.File(
424
  label="Drop your file here — MP4 · MOV · MP3 · WAV · M4A"
425
  )
 
476
  inputs=[file_input, model_choice, language, timestamps, translate],
477
  outputs=[output, plain_output]
478
  )
479
+ url_btn.click(
480
+ fn=transcribe_url,
481
+ inputs=[url_input, model_choice, language, timestamps, translate],
482
+ outputs=[output, plain_output]
483
+ )
484
+
485
+ def handle_download(url):
486
+ path, title = download_video_only(url)
487
+ if path:
488
+ return gr.File(value=path, visible=True, label=f"Download: {title}")
489
+ return gr.File(visible=False)
490
+
491
+ download_btn_url.click(
492
+ fn=handle_download,
493
+ inputs=[url_input],
494
+ outputs=[download_output]
495
+ )
496
  download_btn.click(fn=save_transcript, inputs=plain_output, outputs=download_file)
497
 
498
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -4,3 +4,5 @@ openai-whisper
4
  gunicorn
5
 
6
  indic-transliteration
 
 
 
4
  gunicorn
5
 
6
  indic-transliteration
7
+
8
+ yt-dlp