Spaces:
Configuration error
Configuration error
| from pathlib import PosixPath | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| import decord | |
| from tqdm import tqdm | |
| # 检查 decord 是否安装 | |
| try: | |
| import decord | |
| except ImportError: | |
| raise ImportError( | |
| "The `decord` package is required for loading the video dataset. Install with `pip install decord`" | |
| ) | |
| # 设置 decord 使用的桥接 | |
| decord.bridge.set_bridge("torch") | |
| # 从文件中读取行 | |
| def read_lines_from_file(file_path): | |
| try: | |
| with open(file_path, 'r') as file: | |
| lines = file.readlines() | |
| for line in lines: | |
| print(line.strip()) # 使用 strip() 去除行末的换行符 | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| # 将文件行读取到列表中 | |
| def read_lines_to_list(file_path): | |
| lines_list = [] | |
| try: | |
| with open(file_path, 'r') as file: | |
| lines_list = [line.strip() for line in file] # 使用列表推导式逐行读取并去除行末换行符 | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| return lines_list | |
| # 处理单个视频文件 | |
| def process_video(file, error_file_path): | |
| filename = PosixPath(file) | |
| try: | |
| video_reader = decord.VideoReader(uri=filename.as_posix()) | |
| except Exception as e: | |
| with open(error_file_path, 'a') as f: | |
| f.write(f"{file}\n") | |
| print(f"Could not read video: {file}. Error: {e}") | |
| # 使用示例 | |
| file_path = '/home/cn/Datasets/SakugaDataset/output_81.txt' | |
| file_list = read_lines_to_list(file_path) | |
| error_file_path = 'unreadable_videos_81.txt' | |
| # 使用 ThreadPoolExecutor 实现多线程处理 | |
| with ThreadPoolExecutor(max_workers=16) as executor: # 可以根据需要调整 max_workers 的数量 | |
| futures = {executor.submit(process_video, file, error_file_path): file for file in file_list} | |
| for future in tqdm(as_completed(futures), total=len(file_list)): | |
| try: | |
| future.result() | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |