import os import shutil import json from PIL import Image image_folder = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images" target_folder = "/mnt/petrelfs/zhuchenglin/select_gen_100k" annotations_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/select_gen_100k.json" target_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/sharegpt4v_100k.json" with open(annotations_path, "r") as f: annotations = json.load(f) for index, annotation in enumerate(annotations[:10000]): print(index) source_image_path = os.path.join(image_folder, annotation['image']) target_image_path = os.path.join(target_folder, f'{index}.jpg') with Image.open(source_image_path) as source_img: source_img.save(target_image_path) # print(index) # source_image_path = os.path.join(image_folder, annotation['image']) # target_image_path = os.path.join(target_folder, annotation['image']) # with Image.open(source_image_path) as source_img: # source_img.save(target_image_path) # if annotation['image'][:5] == "llava": # token = annotation['image'].split('/') # annotation['image'] = token[3] + '/' + token[4] # image_path = os.path.join(target_folder, annotation['image']) # if os.path.exists(image_path): # try: # with Image.open(image_path) as img: # img.verify() # Verify that it is, in fact, an image # new_annotations.append(annotation) # except (IOError, SyntaxError) as e: # print(f"Image {image_path} is corrupted or cannot be opened. Error: {e}") # else: # print(f"Image {image_path} does not exist.") # with open(target_path, "w") as fp: # json.dump(new_annotations, fp) # print(len(new_annotations))