LiamKhoaLe commited on
Commit
bef4025
·
1 Parent(s): 07b891e

Upd yt video router and chunker

Browse files
Files changed (1) hide show
  1. app.py +30 -9
app.py CHANGED
@@ -214,7 +214,7 @@ def _return_yt_html_embed(yt_url):
214
  )
215
  return HTML_str
216
 
217
- def download_yt_audio(yt_url, filename):
218
  info_loader = youtube_dl.YoutubeDL()
219
 
220
  try:
@@ -237,21 +237,41 @@ def download_yt_audio(yt_url, filename):
237
  file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
238
  raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
239
 
240
- ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
243
- try:
244
- ydl.download([yt_url])
245
- except youtube_dl.utils.ExtractorError as err:
246
- raise gr.Error(str(err))
 
 
 
 
247
 
248
  @spaces.GPU(duration=120)
249
- def yt_transcribe(yt_url, task, summarize=False, max_filesize=75.0):
250
  html_embed_str = _return_yt_html_embed(yt_url)
251
 
252
  with tempfile.TemporaryDirectory() as tmpdirname:
253
  filepath = os.path.join(tmpdirname, "video.mp4")
254
- download_yt_audio(yt_url, filepath)
 
 
 
255
  with open(filepath, "rb") as f:
256
  inputs = f.read()
257
 
@@ -329,6 +349,7 @@ yt_transcribe = gr.Interface(
329
  gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
330
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
331
  gr.Checkbox(label="Summarize with Gemini", value=False),
 
332
  ],
333
  outputs=[
334
  "html",
 
214
  )
215
  return HTML_str
216
 
217
+ def download_yt_audio(yt_url, filename, cookies_txt: str | None = None):
218
  info_loader = youtube_dl.YoutubeDL()
219
 
220
  try:
 
237
  file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
238
  raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
239
 
240
+ ydl_opts = {
241
+ "outtmpl": filename,
242
+ "format": "bestaudio/best",
243
+ "quiet": True,
244
+ "noplaylist": True,
245
+ "retries": 3,
246
+ }
247
+ cookie_path = None
248
+ if cookies_txt and cookies_txt.strip():
249
+ tf = tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False)
250
+ tf.write(cookies_txt)
251
+ tf.close()
252
+ cookie_path = tf.name
253
+ ydl_opts["cookiefile"] = cookie_path
254
 
255
+ try:
256
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
257
+ try:
258
+ ydl.download([yt_url])
259
+ except youtube_dl.utils.ExtractorError as err:
260
+ raise gr.Error(str(err))
261
+ finally:
262
+ if cookie_path and os.path.exists(cookie_path):
263
+ os.unlink(cookie_path)
264
 
265
  @spaces.GPU(duration=120)
266
+ def yt_transcribe(yt_url, task, summarize=False, cookies_txt=None, max_filesize=75.0):
267
  html_embed_str = _return_yt_html_embed(yt_url)
268
 
269
  with tempfile.TemporaryDirectory() as tmpdirname:
270
  filepath = os.path.join(tmpdirname, "video.mp4")
271
+ try:
272
+ download_yt_audio(yt_url, filepath, cookies_txt=cookies_txt)
273
+ except gr.Error as e:
274
+ raise gr.Error(str(e) + "\n\nTip: Provide exported YouTube cookies (Netscape format) in the optional cookies box if the video requires sign-in or captcha.")
275
  with open(filepath, "rb") as f:
276
  inputs = f.read()
277
 
 
349
  gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
350
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
351
  gr.Checkbox(label="Summarize with Gemini", value=False),
352
+ gr.Textbox(lines=4, placeholder="Optional: paste exported YouTube cookies in Netscape format here if the video requires sign-in.", label="YouTube cookies (optional)"),
353
  ],
354
  outputs=[
355
  "html",