|
|
import os, os.path as osp |
|
|
import json |
|
|
import yt_dlp |
|
|
|
|
|
|
|
|
def ytb_download(url, json_info=None, output_dir="ytb_videos/"): |
|
|
uid = osp.basename(url).split("?v=")[-1] |
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
yt_opts = { |
|
|
"format": "best", |
|
|
"outtmpl": osp.join(output_dir, f"{uid}.%(ext)s"), |
|
|
"postprocessors": [ |
|
|
{ |
|
|
"key": "FFmpegVideoConvertor", |
|
|
"preferedformat": "mp4", |
|
|
} |
|
|
], |
|
|
} |
|
|
|
|
|
video_path = osp.join(output_dir, f"{uid}.mp4") |
|
|
meta_path = osp.join(output_dir, f"{uid}.json") |
|
|
if osp.exists(video_path): |
|
|
print(f"{uid} already downloaded.") |
|
|
return 0 |
|
|
|
|
|
try: |
|
|
with yt_dlp.YoutubeDL(yt_opts) as ydl: |
|
|
ydl.download([url]) |
|
|
if json_info is not None: |
|
|
with open(osp.join(output_dir, f"{uid}.json"), "w") as fp: |
|
|
json.dump(json_info, fp, indent=2) |
|
|
return 0 |
|
|
except: |
|
|
return -1 |
|
|
|
|
|
with open("Video-MME.json", "r") as fp: |
|
|
info = json.load(fp) |
|
|
|
|
|
from tqdm import tqdm |
|
|
urls = [] |
|
|
errors = [] |
|
|
for d in tqdm(info): |
|
|
r = ytb_download(d['url']) |
|
|
if r != 0: |
|
|
errors.append(d['url']) |
|
|
|
|
|
with open("errors.txt", "w") as fp: |
|
|
fp.write("\n".join(errors)) |
|
|
|