File size: 5,768 Bytes
625a17f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import json
import os
from PIL import Image
import numpy as np
from pycocotools.mask import encode, decode, frPyObjects
from tqdm import tqdm
import copy
if __name__ == '__main__':
root_path = '/work/yuqian_fu/Data/datasets/DAVIS'
splits = ['trainval', 'test-dev']
# we only do val evaluation
annotation_path = os.path.join(root_path, f'2017/{splits[0]}/Annotations/480p')
image_path = os.path.join(root_path, f'2017/{splits[0]}/JPEGImages/480p')
set_path = os.path.join(root_path, f'2017/{splits[0]}/ImageSets/2017/val.txt')
save_path = os.path.join(root_path, f'2017/{splits[0]}_test_psalm_20gap.json')
val_set = []
with open(set_path, 'r') as f:
for line in f:
val_set.append(line.strip())
new_img_id = 0
DAVIS_dataset = []
for val_name in tqdm(val_set):
vid_path = os.path.join(image_path, val_name)
anno_path = os.path.join(annotation_path, val_name)
# Get all frames in the video
frame_list = sorted(os.listdir(vid_path))
anno_list = sorted(os.listdir(anno_path))
video_len = len(frame_list)
# 添加验证
assert len(frame_list) == len(anno_list), f"Mismatch in {val_name}: {len(frame_list)} frames vs {len(anno_list)} annotations"
# Iterate through frames and create pairs with 20-frame gap
for i in range(video_len):
# Check if target frame (i+20) exists
if i + 20 > video_len - 1:
break
target_idx = i + 20
# Process reference frame (first_frame)
first_frame_img_path = os.path.join(vid_path, frame_list[i])
first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)
first_frame_annotation_path = os.path.join(anno_path, anno_list[i])
first_frame_annotation_relpath = os.path.relpath(first_frame_annotation_path, root_path)
first_frame_annotation_img = Image.open(first_frame_annotation_path)
first_frame_annotation = np.array(first_frame_annotation_img)
height, width = first_frame_annotation.shape
unique_instances = np.unique(first_frame_annotation)
unique_instances = unique_instances[unique_instances != 0]
coco_format_annotations = []
# for semi-supervised VOS, we use first frame's GT for input
for instance_value in unique_instances:
binary_mask = (first_frame_annotation == instance_value).astype(np.uint8)
segmentation = encode(np.asfortranarray(binary_mask))
segmentation = {
'counts': segmentation['counts'].decode('ascii'),
'size': segmentation['size'],
}
area = binary_mask.sum().astype(float)
coco_format_annotations.append(
{
'segmentation': segmentation,
'area': area,
'category_id': instance_value.astype(float),
}
)
# Process target frame (sample)
sample_img_path = os.path.join(vid_path, frame_list[target_idx])
sample_img_relpath = os.path.relpath(sample_img_path, root_path)
image_info = {
'file_name': sample_img_relpath,
'height': height,
'width': width,
}
sample_annotation_path = os.path.join(anno_path, anno_list[target_idx])
sample_annotation = np.array(Image.open(sample_annotation_path))
sample_unique_instances = np.unique(sample_annotation)
sample_unique_instances = sample_unique_instances[sample_unique_instances != 0]
anns = []
skip = False
for instance_value in sample_unique_instances:
if instance_value not in unique_instances:
print(f"Skip {sample_img_relpath}: new instance not in reference frame")
skip = True
break
binary_mask = (sample_annotation == instance_value).astype(np.uint8)
segmentation = encode(np.asfortranarray(binary_mask))
segmentation = {
'counts': segmentation['counts'].decode('ascii'),
'size': segmentation['size'],
}
area = binary_mask.sum().astype(float)
anns.append(
{
'segmentation': segmentation,
'area': area,
'category_id': instance_value.astype(float),
}
)
if skip:
continue
first_frame_anns = copy.deepcopy(coco_format_annotations)
if len(anns) < len(first_frame_anns):
first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances]
assert len(anns) == len(first_frame_anns), f"Annotation mismatch at {sample_img_relpath}"
sample = {
'image': sample_img_relpath,
'image_info': image_info,
'anns': anns,
'first_frame_image': first_frame_img_relpath,
'first_frame_anns': first_frame_anns,
'new_img_id': new_img_id,
'video_name': val_name,
}
DAVIS_dataset.append(sample)
new_img_id += 1
with open(save_path, 'w') as f:
json.dump(DAVIS_dataset, f)
print(f'Save at {save_path}. Total sample: {len(DAVIS_dataset)}') |