rlackey commited on
Commit
49b9f7c
·
1 Parent(s): f2efdf2

Improve YouTube download with CLI subprocess and multi-browser cookie support

Browse files
Files changed (1) hide show
  1. app.py +80 -31
app.py CHANGED
@@ -266,43 +266,92 @@ footer { display: none !important; }
266
  # ============================================================================
267
 
268
  def download_youtube(url: str) -> tuple:
269
- """Download audio from YouTube"""
270
- if not HAS_YTDLP:
 
 
 
 
271
  return None, "yt-dlp not installed"
272
 
273
  try:
274
- output_path = OUTPUT_DIR / f"yt_{datetime.now().strftime('%H%M%S')}.wav"
275
- ydl_opts = {
276
- 'format': 'bestaudio/best',
277
- 'outtmpl': str(output_path).replace('.wav', ''),
278
- 'postprocessors': [{
279
- 'key': 'FFmpegExtractAudio',
280
- 'preferredcodec': 'wav',
281
- }],
282
- 'quiet': True,
283
- 'no_warnings': True,
284
- 'socket_timeout': 60,
285
- 'retries': 5,
286
- 'extractor_args': {
287
- 'youtube': {
288
- 'player_client': ['web', 'android', 'ios'],
289
- }
290
- },
291
- }
292
-
293
- # Try with browser cookies on local, skip on HF Space
294
  if not IS_HF_SPACE:
295
- ydl_opts['cookiesfrombrowser'] = ('chrome',)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
298
- info = ydl.extract_info(url, download=True)
299
- title = info.get('title', 'Unknown')
300
- return str(output_path), title
301
  except Exception as e:
302
- error_msg = str(e)
303
- if 'Sign in to confirm' in error_msg or 'bot' in error_msg.lower():
304
- return None, "YouTube requires authentication. Please upload the audio file directly instead, or try a different video."
305
- return None, error_msg
306
 
307
  @spaces.GPU(duration=120)
308
  def separate_stems(audio_path: str, progress=None) -> dict:
 
266
  # ============================================================================
267
 
268
  def download_youtube(url: str) -> tuple:
269
+ """Download audio from YouTube using CLI for better cookie support"""
270
+ import subprocess
271
+ import shutil
272
+
273
+ yt_dlp_path = shutil.which('yt-dlp')
274
+ if not yt_dlp_path:
275
  return None, "yt-dlp not installed"
276
 
277
  try:
278
+ output_dir = OUTPUT_DIR / f"yt_{datetime.now().strftime('%H%M%S')}"
279
+ output_dir.mkdir(parents=True, exist_ok=True)
280
+ audio_path = output_dir / "audio.wav"
281
+
282
+ # Check for cookies.txt file
283
+ cookies_file = Path(__file__).parent / "cookies.txt"
284
+
285
+ def build_cmd(with_cookies=None):
286
+ cmd = [yt_dlp_path, '--socket-timeout', '60', '--retries', '5', '--no-warnings']
287
+ if with_cookies == 'file' and cookies_file.exists():
288
+ cmd.extend(['--cookies', str(cookies_file)])
289
+ elif with_cookies and not IS_HF_SPACE:
290
+ cmd.extend(['--cookies-from-browser', with_cookies])
291
+ return cmd
292
+
293
+ # Try different cookie sources (skip browser cookies on HF Space)
294
+ cookie_sources = []
295
+ if cookies_file.exists():
296
+ cookie_sources.append('file')
 
297
  if not IS_HF_SPACE:
298
+ cookie_sources.extend(['chrome', 'edge', 'firefox', 'brave'])
299
+ cookie_sources.append(None) # Try without cookies last
300
+
301
+ title = 'Unknown'
302
+ download_success = False
303
+ last_error = ""
304
+
305
+ for cookie_source in cookie_sources:
306
+ source_name = "cookies.txt" if cookie_source == 'file' else (cookie_source or 'no cookies')
307
+ print(f"Trying YouTube download with {source_name}...")
308
+
309
+ base_cmd = build_cmd(cookie_source)
310
+
311
+ # Get title
312
+ title_cmd = base_cmd + ['--print', 'title', '--no-download', url]
313
+ result = subprocess.run(title_cmd, capture_output=True, text=True, timeout=30)
314
+
315
+ if result.returncode == 0 and result.stdout.strip():
316
+ title = result.stdout.strip()
317
+
318
+ # Download audio
319
+ audio_cmd = base_cmd + [
320
+ '-f', 'bestaudio/best',
321
+ '-x', '--audio-format', 'wav',
322
+ '-o', str(audio_path).replace('.wav', '.%(ext)s'),
323
+ url
324
+ ]
325
+
326
+ print(f"Downloading: {title}")
327
+ result = subprocess.run(audio_cmd, capture_output=True, text=True, timeout=300)
328
+
329
+ if result.returncode == 0:
330
+ download_success = True
331
+ break
332
+
333
+ last_error = result.stderr if result.stderr else "Unknown error"
334
+ if 'Sign in' not in last_error and 'bot' not in last_error.lower():
335
+ break
336
+
337
+ if not download_success:
338
+ if IS_HF_SPACE:
339
+ return None, "YouTube requires authentication on HuggingFace. Please upload the audio file directly."
340
+ return None, f"{last_error}\n\nTry uploading the audio file directly instead."
341
+
342
+ # Find the audio file
343
+ if not audio_path.exists():
344
+ for f in output_dir.glob('audio.*'):
345
+ if f.suffix == '.wav':
346
+ audio_path = f
347
+ break
348
+
349
+ return str(audio_path), title
350
 
351
+ except subprocess.TimeoutExpired:
352
+ return None, "Download timed out. Try a shorter video."
 
 
353
  except Exception as e:
354
+ return None, str(e)
 
 
 
355
 
356
  @spaces.GPU(duration=120)
357
  def separate_stems(audio_path: str, progress=None) -> dict: