Spaces:
Build error
Build error
| import os | |
| import shutil | |
| import pandas as pd | |
| from vidfetch import compress_folder, pull_from_hf | |
| def download_video_links(hf_token: str, filename: str, save_dir: str): | |
| # check save dir | |
| if not os.path.exists(save_dir): | |
| os.makedirs(save_dir) | |
| # download | |
| pull_from_hf( | |
| hf_token=hf_token, | |
| hf_repo_id="OpenVideo/Panda-70M-Original-Links", | |
| filename=filename, | |
| save_dir=save_dir | |
| ) | |
| def download_videos_by_csv( | |
| csv_file_path: str, | |
| save_dir: str, | |
| targz_filename: str, | |
| ): | |
| try: | |
| import youtube_dl | |
| except: | |
| raise ModuleNotFoundError( | |
| "youtube_dl missed, please install it by ``vidfetch.package.youtube.youtube_dl_install_helper``" | |
| ) | |
| # path/dir | |
| folder_name = targz_filename.replace(".tar.gz", "") | |
| download_videos_dir = os.path.join(save_dir, folder_name, "download_raw") | |
| log_path = os.path.join(download_videos_dir, "log.txt") | |
| targz_path = os.path.join(save_dir, targz_filename) | |
| # make dirs | |
| if not os.path.exists(download_videos_dir): | |
| os.makedirs(download_videos_dir) | |
| # read from csv | |
| csv_filename = os.path.basename(csv_file_path) | |
| shutil.copy(src=csv_file_path, dst=os.path.join(download_videos_dir, csv_filename)) | |
| data = pd.read_csv(csv_file_path) | |
| links = data["url"].tolist() | |
| videos_id = data["videoID"].to_list() | |
| failed_links = [] # record failed links | |
| for link, video_id in zip(links, videos_id): | |
| # check if downloaded | |
| video_save_path = os.path.join(download_videos_dir, video_id[1:]+".mp4") | |
| if os.path.exists(video_save_path): | |
| continue | |
| # download | |
| ydl_opts = { | |
| 'format': 'best', | |
| 'quiet': False, | |
| 'outtmpl': os.path.join(download_videos_dir, video_id[1:]+".mp4"), | |
| } | |
| with youtube_dl.YoutubeDL(ydl_opts) as ydl: | |
| try: | |
| ydl.download([link]) | |
| except: | |
| failed_links.append(link) | |
| # delete videos larger than 100MB | |
| video_files = os.listdir(download_videos_dir) | |
| delete_videos = [] | |
| for file in video_files: | |
| file_path = os.path.join(download_videos_dir, file) | |
| file_size_mb = os.path.getsize(file_path) / (1024 * 1024) # Convert to megabytes | |
| if file_size_mb > 500: | |
| delete_videos.append(file_path) | |
| os.remove(file_path) | |
| # Write to log file | |
| with open(log_path, 'w') as file: | |
| file.write('Fail to download\n') | |
| file.write('\n'.join(failed_links)) | |
| file.write('Delete videos larger than 500MB\n') | |
| file.write('\n'.join(failed_links)) | |
| compress_folder(download_videos_dir, targz_path) |