Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -351,14 +351,8 @@ def download_audio(url, cookie_file=None):
|
|
| 351 |
return download_from_youtube(url, cookie_path)
|
| 352 |
|
| 353 |
def download_from_youtube(url, cookie_path):
|
| 354 |
-
#
|
| 355 |
-
|
| 356 |
-
'format': 'bestvideo+bestaudio/best',
|
| 357 |
-
'postprocessors': [{
|
| 358 |
-
'key': 'FFmpegExtractAudio',
|
| 359 |
-
'preferredcodec': 'wav',
|
| 360 |
-
'preferredquality': '192',
|
| 361 |
-
}],
|
| 362 |
'outtmpl': 'ytdl/%(title)s.%(ext)s',
|
| 363 |
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36',
|
| 364 |
'geo_bypass': True,
|
|
@@ -370,69 +364,116 @@ def download_from_youtube(url, cookie_path):
|
|
| 370 |
'ignoreerrors': False,
|
| 371 |
'no_check_certificate': True,
|
| 372 |
'verbose': True,
|
| 373 |
-
'merge_output_format': 'mp4',
|
| 374 |
}
|
| 375 |
|
|
|
|
| 376 |
try:
|
| 377 |
-
logger.info("Attempting video+audio download
|
| 378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
info_dict = ydl.extract_info(url, download=True)
|
| 380 |
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
|
| 381 |
|
| 382 |
-
if
|
| 383 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
|
| 385 |
-
|
| 386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
logger.
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
'format':
|
| 394 |
'postprocessors': [{
|
| 395 |
'key': 'FFmpegExtractAudio',
|
| 396 |
'preferredcodec': 'wav',
|
| 397 |
-
'preferredquality': '192',
|
| 398 |
}],
|
| 399 |
-
|
| 400 |
-
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36',
|
| 401 |
-
'geo_bypass': True,
|
| 402 |
-
'force_ipv4': True,
|
| 403 |
-
'referer': 'https://www.youtube.com/',
|
| 404 |
-
'noplaylist': True,
|
| 405 |
-
'cookiefile': cookie_path,
|
| 406 |
-
'extractor_retries': 5,
|
| 407 |
-
'ignoreerrors': False,
|
| 408 |
-
'no_check_certificate': True,
|
| 409 |
-
'verbose': True,
|
| 410 |
-
}
|
| 411 |
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
if not os.path.exists(file_path):
|
| 419 |
-
return None, "Downloaded audio file not found after audio-only processing", None
|
| 420 |
-
|
| 421 |
sample_rate, data = scipy.io.wavfile.read(file_path)
|
| 422 |
-
return file_path, "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
|
| 424 |
-
|
| 425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
return None, f"YouTube download error: {str(e)}", None
|
| 431 |
-
|
| 432 |
-
except yt_dlp.utils.GeoRestrictedError:
|
| 433 |
-
return None, "Video is geo-restricted in your region", None
|
| 434 |
except Exception as e:
|
| 435 |
-
|
|
|
|
|
|
|
| 436 |
|
| 437 |
def download_from_google_drive(url):
|
| 438 |
temp_output_path = 'ytdl/gdrive_temp_audio'
|
|
|
|
| 351 |
return download_from_youtube(url, cookie_path)
|
| 352 |
|
| 353 |
def download_from_youtube(url, cookie_path):
|
| 354 |
+
# Common options
|
| 355 |
+
base_opts = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
'outtmpl': 'ytdl/%(title)s.%(ext)s',
|
| 357 |
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36',
|
| 358 |
'geo_bypass': True,
|
|
|
|
| 364 |
'ignoreerrors': False,
|
| 365 |
'no_check_certificate': True,
|
| 366 |
'verbose': True,
|
|
|
|
| 367 |
}
|
| 368 |
|
| 369 |
+
# Strategy 1: Video+audio (best quality)
|
| 370 |
try:
|
| 371 |
+
logger.info("Attempting video+audio download")
|
| 372 |
+
ydl_opts = base_opts.copy()
|
| 373 |
+
ydl_opts.update({
|
| 374 |
+
'format': 'bestvideo+bestaudio/best',
|
| 375 |
+
'postprocessors': [{
|
| 376 |
+
'key': 'FFmpegExtractAudio',
|
| 377 |
+
'preferredcodec': 'wav',
|
| 378 |
+
}],
|
| 379 |
+
'merge_output_format': 'mp4',
|
| 380 |
+
})
|
| 381 |
+
|
| 382 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 383 |
info_dict = ydl.extract_info(url, download=True)
|
| 384 |
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
|
| 385 |
|
| 386 |
+
if os.path.exists(file_path):
|
| 387 |
+
sample_rate, data = scipy.io.wavfile.read(file_path)
|
| 388 |
+
return file_path, "YouTube video+audio download successful", (sample_rate, data)
|
| 389 |
+
else:
|
| 390 |
+
logger.warning("Video+audio download succeeded but output file missing")
|
| 391 |
+
except Exception as e:
|
| 392 |
+
logger.warning(f"Video+audio download failed: {str(e)}")
|
| 393 |
+
|
| 394 |
+
# Strategy 2: Audio-only (best quality)
|
| 395 |
+
try:
|
| 396 |
+
logger.info("Attempting audio-only download")
|
| 397 |
+
ydl_opts = base_opts.copy()
|
| 398 |
+
ydl_opts.update({
|
| 399 |
+
'format': 'bestaudio/best',
|
| 400 |
+
'postprocessors': [{
|
| 401 |
+
'key': 'FFmpegExtractAudio',
|
| 402 |
+
'preferredcodec': 'wav',
|
| 403 |
+
}],
|
| 404 |
+
})
|
| 405 |
+
|
| 406 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 407 |
+
info_dict = ydl.extract_info(url, download=True)
|
| 408 |
+
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
|
| 409 |
|
| 410 |
+
if os.path.exists(file_path):
|
| 411 |
+
sample_rate, data = scipy.io.wavfile.read(file_path)
|
| 412 |
+
return file_path, "YouTube audio-only download successful", (sample_rate, data)
|
| 413 |
+
else:
|
| 414 |
+
logger.warning("Audio-only download succeeded but output file missing")
|
| 415 |
+
except Exception as e:
|
| 416 |
+
logger.warning(f"Audio-only download failed: {str(e)}")
|
| 417 |
+
|
| 418 |
+
# Strategy 3: Specific format IDs (common audio formats)
|
| 419 |
+
format_ids = [
|
| 420 |
+
'140', # m4a 128k
|
| 421 |
+
'139', # m4a 48k
|
| 422 |
+
'251', # webm 160k (opus)
|
| 423 |
+
'250', # webm 70k (opus)
|
| 424 |
+
'249', # webm 50k (opus)
|
| 425 |
+
]
|
| 426 |
|
| 427 |
+
for fid in format_ids:
|
| 428 |
+
try:
|
| 429 |
+
logger.info(f"Attempting download with format ID: {fid}")
|
| 430 |
+
ydl_opts = base_opts.copy()
|
| 431 |
+
ydl_opts.update({
|
| 432 |
+
'format': fid,
|
| 433 |
'postprocessors': [{
|
| 434 |
'key': 'FFmpegExtractAudio',
|
| 435 |
'preferredcodec': 'wav',
|
|
|
|
| 436 |
}],
|
| 437 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
|
| 439 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 440 |
+
info_dict = ydl.extract_info(url, download=True)
|
| 441 |
+
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
|
| 442 |
+
|
| 443 |
+
if os.path.exists(file_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
sample_rate, data = scipy.io.wavfile.read(file_path)
|
| 445 |
+
return file_path, f"Download successful with format {fid}", (sample_rate, data)
|
| 446 |
+
except Exception as e:
|
| 447 |
+
logger.warning(f"Download with format {fid} failed: {str(e)}")
|
| 448 |
+
|
| 449 |
+
# Strategy 4: Direct URL extraction
|
| 450 |
+
try:
|
| 451 |
+
logger.info("Attempting direct URL extraction")
|
| 452 |
+
ydl_opts = base_opts.copy()
|
| 453 |
+
ydl_opts.update({
|
| 454 |
+
'format': 'best',
|
| 455 |
+
'forceurl': True,
|
| 456 |
+
'quiet': True,
|
| 457 |
+
})
|
| 458 |
+
|
| 459 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 460 |
+
info_dict = ydl.extract_info(url, download=False)
|
| 461 |
+
direct_url = info_dict.get('url')
|
| 462 |
|
| 463 |
+
if direct_url:
|
| 464 |
+
temp_path = 'ytdl/direct_audio.wav'
|
| 465 |
+
ffmpeg_command = [
|
| 466 |
+
"ffmpeg", "-i", direct_url, "-c", "copy", temp_path
|
| 467 |
+
]
|
| 468 |
+
subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
|
| 469 |
|
| 470 |
+
if os.path.exists(temp_path):
|
| 471 |
+
sample_rate, data = scipy.io.wavfile.read(temp_path)
|
| 472 |
+
return temp_path, "Direct URL download successful", (sample_rate, data)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
except Exception as e:
|
| 474 |
+
logger.warning(f"Direct URL extraction failed: {str(e)}")
|
| 475 |
+
|
| 476 |
+
return None, "All download strategies failed. This video may not be available in your region or requires authentication.", None
|
| 477 |
|
| 478 |
def download_from_google_drive(url):
|
| 479 |
temp_output_path = 'ytdl/gdrive_temp_audio'
|