ObjectRelator-Original / datasets /build_COCO_instance.py
YuqianFu's picture
Upload folder using huggingface_hub
625a17f verified
import json
from pycocotools.coco import COCO
from tqdm import tqdm
import concurrent.futures
if __name__ == '__main__':
instrutions = {
'Please segment all of objects in this image': [
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat',
'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
'sports ball', 'kite', 'baseball bat', 'baseball glove',
'skateboard', 'surfboard', 'tennis racket', 'bottle',
'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
'couch', 'potted plant', 'bed', 'dining table', 'toilet',
'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
}
coco_class_ids = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49,
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90
]
coco_id_to_cont_id = {coco_id: cont_id for cont_id, coco_id in enumerate(coco_class_ids)}
splits = ['train','val']
for split in splits:
coco_path = 'datasets/coco/annotations/instances_{}2017.json'.format(split)
print(coco_path)
output_file = 'datasets/coco/instance_{}_psalm.json'.format(split)
coco = COCO(coco_path)
custom_dataset = []
all_classes = set(class_name for classes in instrutions.values() for class_name in classes)
new_img_id = 0
for img_id in tqdm(coco.imgs):
img_info = coco.imgs[img_id]
class_to_anns = {}
for class_name in all_classes:
masks = []
cat_id = coco.getCatIds(catNms=[class_name])
ann_ids = coco.getAnnIds(imgIds=img_id, catIds=cat_id)
anns = coco.loadAnns(ann_ids)
if anns:
class_to_anns[class_name] = anns
for instruction, classes in instrutions.items():
if any(class_name in class_to_anns for class_name in classes):
custom_dataset.append({
'image': img_info['file_name'],
'image_info': img_info,
'instruction': instruction,
'new_img_id': new_img_id,
'anns': [ann for class_name in classes if class_name in class_to_anns for ann in
class_to_anns[class_name]],
'mask_classes': [class_name for class_name in classes if class_name in class_to_anns for mask in
class_to_anns[class_name]],
'mask_classes_id': [coco.getCatIds(catNms=[class_name]) for class_name in classes if
class_name in class_to_anns for mask in class_to_anns[class_name]]
})
new_img_id += 1
else:
custom_dataset.append({
'image': img_info['file_name'],
'image_info': img_info,
'instruction': instruction,
'new_img_id': new_img_id,
'anns': [],
'mask_classes': [],
'mask_classes_id': []
})
new_img_id += 1
with open(output_file, 'w') as f:
json.dump(custom_dataset, f, indent=2)
print('dataset save in {}, max new_img_id: {}'.format(output_file,new_img_id))