| import os | |
| import shutil | |
| import json | |
| source_folder = '/mnt/petrelfs/zhuchenglin/diffusion/coco/images/train2017' | |
| target_folder = '/mnt/petrelfs/zhuchenglin/diffusion/images_large' | |
| if not os.path.exists(target_folder): | |
| os.makedirs(target_folder) | |
| anno_json_path = "/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json" | |
| with open(anno_json_path, 'r') as f: | |
| annotation_data = json.load(f) | |
| annotations = annotation_data["annotations"][:200000] | |
| count = 0 | |
| for image in annotations: | |
| source_path = os.path.join(source_folder, f'{image["image_id"]:012}.jpg') | |
| target_path = os.path.join(target_folder, f'{image["image_id"]:012}.jpg') | |
| count += 1 | |
| print(source_path,count) | |
| shutil.copy(source_path, target_path) | |