ObjectRelator-Original / datasets /build_gRefCOCO.py
YuqianFu's picture
Upload folder using huggingface_hub
625a17f verified
import json
import pickle
import os
from tqdm import tqdm
def build_referring_dataset(instance_path, refs_path, split, save_path):
assert os.path.exists(instance_path), f'Path not found: {instance_path}'
assert os.path.exists(refs_path), f'Path not found: {refs_path}'
with open(instance_path) as f:
instance = json.load(f)
with open(refs_path, 'rb') as f:
refs = pickle.load(f)
images = instance['images']
annotations = instance['annotations']
img_id2info = {}
for image in images:
img_id2info[image['id']] = image
anno_id2info = {}
for annotation in annotations:
anno_id2info[annotation['id']] = annotation
outputs = []
new_img_id = 0
for sample in tqdm(refs):
if sample['split'] != split:
continue
if -1 in sample['ann_id']:
image = sample['file_name']
image_id = sample['image_id']
image_info = img_id2info[image_id]
instruction = sample['sentences']
ann_ids = sample['ann_id']
anns = []
result = {
'image': image,
'image_info': image_info,
'instruction': instruction,
'new_img_id': new_img_id,
'anns': anns
}
outputs.append(result)
new_img_id += 1
continue
image = sample['file_name']
image_id = sample['image_id']
image_info = img_id2info[image_id]
instruction = sample['sentences']
ann_ids = sample['ann_id']
anns = [anno_id2info[id] for id in ann_ids]
result = {
'image': image,
'image_info': image_info,
'instruction': instruction,
'new_img_id': new_img_id,
'anns': anns
}
outputs.append(result)
new_img_id += 1
with open(save_path, 'w') as f:
json.dump(outputs, f)
print(f'Saving at {save_path}. Total sample: {len(outputs)}.')
if __name__ == '__main__':
# Change root path to your own directory
root_path = 'datasets/refer_seg'
datasets = 'grefcoco'
splits = ['train', 'val', 'testA', 'testB']
for split in splits:
instance_path = os.path.join(root_path, datasets, 'instances.json')
refs_path = os.path.join(root_path, datasets, 'grefs(unc).json')
save_path = os.path.join(root_path, datasets, f'{split}_psalm.json')
print(f'Processing gRefCOCO: {split}...')
build_referring_dataset(instance_path, refs_path, split, save_path)