| import json | |
| from pycocotools.coco import COCO | |
| from tqdm import tqdm | |
| import concurrent.futures | |
| if __name__ == '__main__': | |
| instrutions = { | |
| 'Please segment all of objects in this image': [ | |
| 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', | |
| 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', | |
| 'stop sign', 'parking meter', 'bench', 'bird', 'cat', | |
| 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', | |
| 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', | |
| 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', | |
| 'sports ball', 'kite', 'baseball bat', 'baseball glove', | |
| 'skateboard', 'surfboard', 'tennis racket', 'bottle', | |
| 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', | |
| 'banana', 'apple', 'sandwich', 'orange', 'broccoli', | |
| 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', | |
| 'couch', 'potted plant', 'bed', 'dining table', 'toilet', | |
| 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', | |
| 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', | |
| 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' | |
| ] | |
| } | |
| coco_class_ids = [ | |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, | |
| 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, | |
| 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, | |
| 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
| 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, | |
| 82, 84, 85, 86, 87, 88, 89, 90 | |
| ] | |
| coco_id_to_cont_id = {coco_id: cont_id for cont_id, coco_id in enumerate(coco_class_ids)} | |
| splits = ['train','val'] | |
| for split in splits: | |
| coco_path = 'datasets/coco/annotations/instances_{}2017.json'.format(split) | |
| print(coco_path) | |
| output_file = 'datasets/coco/instance_{}_psalm.json'.format(split) | |
| coco = COCO(coco_path) | |
| custom_dataset = [] | |
| all_classes = set(class_name for classes in instrutions.values() for class_name in classes) | |
| new_img_id = 0 | |
| for img_id in tqdm(coco.imgs): | |
| img_info = coco.imgs[img_id] | |
| class_to_anns = {} | |
| for class_name in all_classes: | |
| masks = [] | |
| cat_id = coco.getCatIds(catNms=[class_name]) | |
| ann_ids = coco.getAnnIds(imgIds=img_id, catIds=cat_id) | |
| anns = coco.loadAnns(ann_ids) | |
| if anns: | |
| class_to_anns[class_name] = anns | |
| for instruction, classes in instrutions.items(): | |
| if any(class_name in class_to_anns for class_name in classes): | |
| custom_dataset.append({ | |
| 'image': img_info['file_name'], | |
| 'image_info': img_info, | |
| 'instruction': instruction, | |
| 'new_img_id': new_img_id, | |
| 'anns': [ann for class_name in classes if class_name in class_to_anns for ann in | |
| class_to_anns[class_name]], | |
| 'mask_classes': [class_name for class_name in classes if class_name in class_to_anns for mask in | |
| class_to_anns[class_name]], | |
| 'mask_classes_id': [coco.getCatIds(catNms=[class_name]) for class_name in classes if | |
| class_name in class_to_anns for mask in class_to_anns[class_name]] | |
| }) | |
| new_img_id += 1 | |
| else: | |
| custom_dataset.append({ | |
| 'image': img_info['file_name'], | |
| 'image_info': img_info, | |
| 'instruction': instruction, | |
| 'new_img_id': new_img_id, | |
| 'anns': [], | |
| 'mask_classes': [], | |
| 'mask_classes_id': [] | |
| }) | |
| new_img_id += 1 | |
| with open(output_file, 'w') as f: | |
| json.dump(custom_dataset, f, indent=2) | |
| print('dataset save in {}, max new_img_id: {}'.format(output_file,new_img_id)) | |