| import os | |
| import shutil | |
| import json | |
| import random | |
| # path to the generated images | |
| source_image_folder = "/mnt/petrelfs/zhuchenglin/diffusion/images_large" | |
| # path to the target folder | |
| target_image_folder = ( | |
| "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images" | |
| ) | |
| # path to the COCO annotations file | |
| annotations_coco_path = ( | |
| "/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json" | |
| ) | |
| with open(annotations_coco_path, "r") as f: | |
| annotations = json.load(f) | |
| new_annotations = [] | |
| for index, annotation in enumerate(annotations["annotations"][:200000]): | |
| print(index) | |
| image_id, pid = annotation["image_id"], annotation["id"] | |
| source_image_path = os.path.join( | |
| source_image_folder, f"{image_id:012d}_{pid}_gen.jpg" | |
| ) | |
| target_image_path = os.path.join(target_image_folder, f"{index}.jpg") | |
| if os.path.exists(source_image_path): | |
| shutil.copy(source_image_path, target_image_path) | |