import os, os.path as osp import json import yt_dlp def ytb_download(url, json_info=None, output_dir="ytb_videos/"): uid = osp.basename(url).split("?v=")[-1] os.makedirs(output_dir, exist_ok=True) # uid = url.split("?v=")[-1] yt_opts = { "format": "best", # Download the best quality available "outtmpl": osp.join(output_dir, f"{uid}.%(ext)s"), # Set the output template "postprocessors": [ { "key": "FFmpegVideoConvertor", "preferedformat": "mp4", # Convert video to mp4 format } ], } video_path = osp.join(output_dir, f"{uid}.mp4") meta_path = osp.join(output_dir, f"{uid}.json") if osp.exists(video_path): print(f"{uid} already downloaded.") return 0 try: with yt_dlp.YoutubeDL(yt_opts) as ydl: ydl.download([url]) if json_info is not None: with open(osp.join(output_dir, f"{uid}.json"), "w") as fp: json.dump(json_info, fp, indent=2) return 0 except: return -1 with open("Video-MME.json", "r") as fp: info = json.load(fp) from tqdm import tqdm urls = [] errors = [] for d in tqdm(info): r = ytb_download(d['url']) if r != 0: errors.append(d['url']) with open("errors.txt", "w") as fp: fp.write("\n".join(errors))