|
|
import json
|
|
|
import pickle
|
|
|
import os
|
|
|
from tqdm import tqdm
|
|
|
|
|
|
def build_referring_dataset(instance_path, refs_path, split, save_path):
|
|
|
assert os.path.exists(instance_path), f'Path not found: {instance_path}'
|
|
|
assert os.path.exists(refs_path), f'Path not found: {refs_path}'
|
|
|
|
|
|
with open(instance_path) as f:
|
|
|
instance = json.load(f)
|
|
|
with open(refs_path, 'rb') as f:
|
|
|
refs = pickle.load(f)
|
|
|
|
|
|
images = instance['images']
|
|
|
annotations = instance['annotations']
|
|
|
|
|
|
img_id2info = {}
|
|
|
for image in images:
|
|
|
img_id2info[image['id']] = image
|
|
|
anno_id2info = {}
|
|
|
for annotation in annotations:
|
|
|
anno_id2info[annotation['id']] = annotation
|
|
|
|
|
|
outputs = []
|
|
|
new_img_id = 0
|
|
|
for sample in tqdm(refs):
|
|
|
if sample['split'] != split:
|
|
|
continue
|
|
|
sample_annotation = anno_id2info[sample['ann_id']]
|
|
|
sample_image = img_id2info[sample['image_id']]
|
|
|
outputs.append(
|
|
|
{
|
|
|
'image': sample_image['file_name'],
|
|
|
'image_info': sample_image,
|
|
|
'instruction': sample['sentences'],
|
|
|
'anns': [sample_annotation],
|
|
|
'new_img_id': new_img_id,
|
|
|
}
|
|
|
)
|
|
|
new_img_id += 1
|
|
|
|
|
|
with open(save_path, 'w') as f:
|
|
|
json.dump(outputs, f)
|
|
|
print(f'Saving at {save_path}. Total sample: {len(outputs)}.')
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
root_path = 'datasets/refer_seg'
|
|
|
datasets = ['refcoco', 'refcoco+', 'refcocog']
|
|
|
splits = ['train', 'val', 'testA', 'testB']
|
|
|
for dataset in datasets:
|
|
|
if dataset == 'refcocog':
|
|
|
splits = ['train', 'val', 'test']
|
|
|
|
|
|
for split in splits:
|
|
|
instance_path = os.path.join(root_path, f'{dataset}', 'instances.json')
|
|
|
if dataset == 'refcocog':
|
|
|
refs_name = 'refs(umd).p'
|
|
|
else:
|
|
|
refs_name = 'refs(unc).p'
|
|
|
refs_path = os.path.join(root_path, f'{dataset}', refs_name)
|
|
|
save_path = os.path.join(root_path, f'{dataset}', f'{split}_psalm.json')
|
|
|
print(f'Processing {dataset}: {split}...')
|
|
|
|
|
|
build_referring_dataset(instance_path, refs_path, split, save_path)
|
|
|
|
|
|
print(f'Done')
|
|
|
|