VRIS_vip / tools /data /convert_refexp_to_coco.py
dianecy's picture
Add files using upload-large-folder tool
9b855a7 verified
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parents[2]))
import numpy as np
import os
from datasets.refer import REFER
import cv2
from tqdm import tqdm
import json
import pickle
import json
def convert_to_coco(data_root='data/coco', output_root='data/coco', dataset='refcoco', dataset_split='unc'):
dataset_dir = os.path.join(data_root, dataset)
output_dir = os.path.join(output_root, dataset) # .json save path
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# read REFER
refer = REFER(data_root, dataset, dataset_split)
refs = refer.Refs
anns = refer.Anns
imgs = refer.Imgs
cats = refer.Cats
sents = refer.Sents
"""
# create sets of mapping
# 1) Refs: {ref_id: ref}
# 2) Anns: {ann_id: ann}
# 3) Imgs: {image_id: image}
# 4) Cats: {category_id: category_name}
# 5) Sents: {sent_id: sent}
# 6) imgToRefs: {image_id: refs}
# 7) imgToAnns: {image_id: anns}
# 8) refToAnn: {ref_id: ann}
# 9) annToRef: {ann_id: ref}
# 10) catToRefs: {category_id: refs}
# 11) sentToRef: {sent_id: ref}
# 12) sentToTokens: {sent_id: tokens}
Refs: List[Dict], "sent_ids", "file_name", "ann_id", "ref_id", "image_id", "category_id", "split", "sentences"
"sentences": List[Dict], "tokens"(List), "raw", "sent_id", "sent"
Anns: List[Dict], "segmentation", "area", "iscrowd", "image_id", "bbox", "category_id", "id"
Imgs: List[Dict], "license", "file_name", "coco_url", "height", "width", "date_captured", "flickr_url", "id"
Cats: List[Dict], "supercategory", "name", "id"
Sents: List[Dict], "tokens"(List), "raw", "sent_id", "sent", here the "sent_id" is consistent
"""
print('Dataset [%s_%s] contains: ' % (dataset, dataset_split))
ref_ids = refer.getRefIds()
image_ids = refer.getImgIds()
print('There are %s expressions for %s refereed objects in %s images.' % (len(refer.Sents), len(ref_ids), len(image_ids)))
print('\nAmong them:')
if dataset == 'refcoco':
splits = ['train', 'val', 'testA', 'testB']
elif dataset == 'refcoco+':
splits = ['train', 'val', 'testA', 'testB']
elif dataset == 'refcocog':
splits = ['train', 'val', 'test'] # we don't have test split for refcocog right now.
for split in splits:
ref_ids = refer.getRefIds(split=split)
print(' %s referred objects are in split [%s].' % (len(ref_ids), split))
with open(os.path.join(dataset_dir, "instances.json"), "r") as f:
ann_json = json.load(f)
# 1. for each split: train, val...
for split in splits:
max_length = 0 # max length of a sentence
coco_ann = {
"info": "",
"licenses": "",
"images": [], # each caption is a image sample
"annotations": [],
"categories": []
}
coco_ann['info'], coco_ann['licenses'], coco_ann['categories'] = \
ann_json['info'], ann_json['licenses'], ann_json['categories']
num_images = 0 # each caption is a sample, create a "images" and a "annotations", since each image has one box
ref_ids = refer.getRefIds(split=split)
# 2. for each referred object
for i in tqdm(ref_ids):
ref = refs[i]
# "sent_ids", "file_name", "ann_id", "ref_id", "image_id", "category_id", "split", "sentences"
# "sentences": List[Dict], "tokens"(List), "raw", "sent_id", "sent"
img = imgs[ref["image_id"]]
ann = anns[ref["ann_id"]]
# 3. for each sentence, which is a sample
for sentence in ref["sentences"]:
num_images += 1
# append image info
image_info = {
"file_name": img["file_name"],
"height": img["height"],
"width": img["width"],
"original_id": img["id"],
"id": num_images,
"caption": sentence["sent"],
"dataset_name": dataset
}
coco_ann["images"].append(image_info)
# append annotation info
ann_info = {
"segmentation": ann["segmentation"],
"area": ann["area"],
"iscrowd": ann["iscrowd"],
"bbox": ann["bbox"],
"image_id": num_images,
"category_id": ann["category_id"],
"id": num_images,
"original_id": ann["id"]
}
coco_ann["annotations"].append(ann_info)
max_length = max(max_length, len(sentence["tokens"]))
print("Total expression: {} in split {}".format(num_images, split))
print("Max sentence length of the split: ", max_length)
# save the json file
save_file = "instances_{}_{}.json".format(dataset, split)
with open(os.path.join(output_dir, save_file), 'w') as f:
json.dump(coco_ann, f)
if __name__ == '__main__':
datasets = ["refcoco", "refcoco+", "refcocog"]
datasets_split = ["unc", "unc", "umd"]
for (dataset, dataset_split) in zip(datasets, datasets_split):
convert_to_coco(dataset=dataset, dataset_split=dataset_split)
print("")
"""
# original mapping
{'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4, 'airplane': 5, 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9,
'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13, 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17,
'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 'bear': 23, 'zebra': 24, 'giraffe': 25, 'backpack': 27,
'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33, 'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37,
'kite': 38, 'baseball bat': 39, 'baseball glove': 40, 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43, 'bottle': 44,
'wine glass': 46, 'cup': 47, 'fork': 48, 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 'apple': 53, 'sandwich': 54,
'orange': 55, 'broccoli': 56, 'carrot': 57, 'hot dog': 58, 'pizza': 59, 'donut': 60, 'cake': 61, 'chair': 62, 'couch': 63,
'potted plant': 64, 'bed': 65, 'dining table': 67, 'toilet': 70, 'tv': 72, 'laptop': 73, 'mouse': 74, 'remote': 75,
'keyboard': 76, 'cell phone': 77, 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81, 'refrigerator': 82, 'book': 84,
'clock': 85, 'vase': 86, 'scissors': 87, 'teddy bear': 88, 'hair drier': 89, 'toothbrush': 90}
"""