Improve YouTube download with CLI subprocess and multi-browser cookie support
Browse files
app.py
CHANGED
|
@@ -266,43 +266,92 @@ footer { display: none !important; }
|
|
| 266 |
# ============================================================================
|
| 267 |
|
| 268 |
def download_youtube(url: str) -> tuple:
|
| 269 |
-
"""Download audio from YouTube"""
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
return None, "yt-dlp not installed"
|
| 272 |
|
| 273 |
try:
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
'
|
| 283 |
-
'
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
# Try with browser cookies on local, skip on HF Space
|
| 294 |
if not IS_HF_SPACE:
|
| 295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
title = info.get('title', 'Unknown')
|
| 300 |
-
return str(output_path), title
|
| 301 |
except Exception as e:
|
| 302 |
-
|
| 303 |
-
if 'Sign in to confirm' in error_msg or 'bot' in error_msg.lower():
|
| 304 |
-
return None, "YouTube requires authentication. Please upload the audio file directly instead, or try a different video."
|
| 305 |
-
return None, error_msg
|
| 306 |
|
| 307 |
@spaces.GPU(duration=120)
|
| 308 |
def separate_stems(audio_path: str, progress=None) -> dict:
|
|
|
|
| 266 |
# ============================================================================
|
| 267 |
|
| 268 |
def download_youtube(url: str) -> tuple:
|
| 269 |
+
"""Download audio from YouTube using CLI for better cookie support"""
|
| 270 |
+
import subprocess
|
| 271 |
+
import shutil
|
| 272 |
+
|
| 273 |
+
yt_dlp_path = shutil.which('yt-dlp')
|
| 274 |
+
if not yt_dlp_path:
|
| 275 |
return None, "yt-dlp not installed"
|
| 276 |
|
| 277 |
try:
|
| 278 |
+
output_dir = OUTPUT_DIR / f"yt_{datetime.now().strftime('%H%M%S')}"
|
| 279 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 280 |
+
audio_path = output_dir / "audio.wav"
|
| 281 |
+
|
| 282 |
+
# Check for cookies.txt file
|
| 283 |
+
cookies_file = Path(__file__).parent / "cookies.txt"
|
| 284 |
+
|
| 285 |
+
def build_cmd(with_cookies=None):
|
| 286 |
+
cmd = [yt_dlp_path, '--socket-timeout', '60', '--retries', '5', '--no-warnings']
|
| 287 |
+
if with_cookies == 'file' and cookies_file.exists():
|
| 288 |
+
cmd.extend(['--cookies', str(cookies_file)])
|
| 289 |
+
elif with_cookies and not IS_HF_SPACE:
|
| 290 |
+
cmd.extend(['--cookies-from-browser', with_cookies])
|
| 291 |
+
return cmd
|
| 292 |
+
|
| 293 |
+
# Try different cookie sources (skip browser cookies on HF Space)
|
| 294 |
+
cookie_sources = []
|
| 295 |
+
if cookies_file.exists():
|
| 296 |
+
cookie_sources.append('file')
|
|
|
|
| 297 |
if not IS_HF_SPACE:
|
| 298 |
+
cookie_sources.extend(['chrome', 'edge', 'firefox', 'brave'])
|
| 299 |
+
cookie_sources.append(None) # Try without cookies last
|
| 300 |
+
|
| 301 |
+
title = 'Unknown'
|
| 302 |
+
download_success = False
|
| 303 |
+
last_error = ""
|
| 304 |
+
|
| 305 |
+
for cookie_source in cookie_sources:
|
| 306 |
+
source_name = "cookies.txt" if cookie_source == 'file' else (cookie_source or 'no cookies')
|
| 307 |
+
print(f"Trying YouTube download with {source_name}...")
|
| 308 |
+
|
| 309 |
+
base_cmd = build_cmd(cookie_source)
|
| 310 |
+
|
| 311 |
+
# Get title
|
| 312 |
+
title_cmd = base_cmd + ['--print', 'title', '--no-download', url]
|
| 313 |
+
result = subprocess.run(title_cmd, capture_output=True, text=True, timeout=30)
|
| 314 |
+
|
| 315 |
+
if result.returncode == 0 and result.stdout.strip():
|
| 316 |
+
title = result.stdout.strip()
|
| 317 |
+
|
| 318 |
+
# Download audio
|
| 319 |
+
audio_cmd = base_cmd + [
|
| 320 |
+
'-f', 'bestaudio/best',
|
| 321 |
+
'-x', '--audio-format', 'wav',
|
| 322 |
+
'-o', str(audio_path).replace('.wav', '.%(ext)s'),
|
| 323 |
+
url
|
| 324 |
+
]
|
| 325 |
+
|
| 326 |
+
print(f"Downloading: {title}")
|
| 327 |
+
result = subprocess.run(audio_cmd, capture_output=True, text=True, timeout=300)
|
| 328 |
+
|
| 329 |
+
if result.returncode == 0:
|
| 330 |
+
download_success = True
|
| 331 |
+
break
|
| 332 |
+
|
| 333 |
+
last_error = result.stderr if result.stderr else "Unknown error"
|
| 334 |
+
if 'Sign in' not in last_error and 'bot' not in last_error.lower():
|
| 335 |
+
break
|
| 336 |
+
|
| 337 |
+
if not download_success:
|
| 338 |
+
if IS_HF_SPACE:
|
| 339 |
+
return None, "YouTube requires authentication on HuggingFace. Please upload the audio file directly."
|
| 340 |
+
return None, f"{last_error}\n\nTry uploading the audio file directly instead."
|
| 341 |
+
|
| 342 |
+
# Find the audio file
|
| 343 |
+
if not audio_path.exists():
|
| 344 |
+
for f in output_dir.glob('audio.*'):
|
| 345 |
+
if f.suffix == '.wav':
|
| 346 |
+
audio_path = f
|
| 347 |
+
break
|
| 348 |
+
|
| 349 |
+
return str(audio_path), title
|
| 350 |
|
| 351 |
+
except subprocess.TimeoutExpired:
|
| 352 |
+
return None, "Download timed out. Try a shorter video."
|
|
|
|
|
|
|
| 353 |
except Exception as e:
|
| 354 |
+
return None, str(e)
|
|
|
|
|
|
|
|
|
|
| 355 |
|
| 356 |
@spaces.GPU(duration=120)
|
| 357 |
def separate_stems(audio_path: str, progress=None) -> dict:
|