File size: 5,768 Bytes
625a17f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import json
import os
from PIL import Image
import numpy as np
from pycocotools.mask import encode, decode, frPyObjects
from tqdm import tqdm
import copy

if __name__ == '__main__':
    root_path = '/work/yuqian_fu/Data/datasets/DAVIS'
    splits = ['trainval', 'test-dev']
    # we only do val evaluation
    annotation_path = os.path.join(root_path, f'2017/{splits[0]}/Annotations/480p')
    image_path = os.path.join(root_path, f'2017/{splits[0]}/JPEGImages/480p')

    set_path = os.path.join(root_path, f'2017/{splits[0]}/ImageSets/2017/val.txt')
    save_path = os.path.join(root_path, f'2017/{splits[0]}_test_psalm_20gap.json')

    val_set = []
    with open(set_path, 'r') as f:
        for line in f:
            val_set.append(line.strip())
    new_img_id = 0
    DAVIS_dataset = []
    for val_name in tqdm(val_set):
        vid_path = os.path.join(image_path, val_name)
        anno_path = os.path.join(annotation_path, val_name)

        # Get all frames in the video
        frame_list = sorted(os.listdir(vid_path))
        anno_list = sorted(os.listdir(anno_path))
        video_len = len(frame_list)

        # 添加验证
        assert len(frame_list) == len(anno_list), f"Mismatch in {val_name}: {len(frame_list)} frames vs {len(anno_list)} annotations"

        # Iterate through frames and create pairs with 20-frame gap
        for i in range(video_len):
            # Check if target frame (i+20) exists
            if i + 20 > video_len - 1:
                break
            
            target_idx = i + 20

            # Process reference frame (first_frame)
            first_frame_img_path = os.path.join(vid_path, frame_list[i])
            first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)

            first_frame_annotation_path = os.path.join(anno_path, anno_list[i])
            first_frame_annotation_relpath = os.path.relpath(first_frame_annotation_path, root_path)

            first_frame_annotation_img = Image.open(first_frame_annotation_path)
            first_frame_annotation = np.array(first_frame_annotation_img)
            height, width = first_frame_annotation.shape
            
            unique_instances = np.unique(first_frame_annotation)
            unique_instances = unique_instances[unique_instances != 0]
            
            coco_format_annotations = []
            # for semi-supervised VOS, we use first frame's GT for input
            for instance_value in unique_instances:
                binary_mask = (first_frame_annotation == instance_value).astype(np.uint8)
                segmentation = encode(np.asfortranarray(binary_mask))
                segmentation = {
                    'counts': segmentation['counts'].decode('ascii'),
                    'size': segmentation['size'],
                }
                area = binary_mask.sum().astype(float)
                coco_format_annotations.append(
                    {
                        'segmentation': segmentation,
                        'area': area,
                        'category_id': instance_value.astype(float),
                    }
                )

            # Process target frame (sample)
            sample_img_path = os.path.join(vid_path, frame_list[target_idx])
            sample_img_relpath = os.path.relpath(sample_img_path, root_path)
            image_info = {
                'file_name': sample_img_relpath,
                'height': height,
                'width': width,
            }

            sample_annotation_path = os.path.join(anno_path, anno_list[target_idx])
            sample_annotation = np.array(Image.open(sample_annotation_path))
            
            sample_unique_instances = np.unique(sample_annotation)
            sample_unique_instances = sample_unique_instances[sample_unique_instances != 0]
            
            anns = []
            skip = False
            for instance_value in sample_unique_instances:
                if instance_value not in unique_instances:
                    print(f"Skip {sample_img_relpath}: new instance not in reference frame")
                    skip = True
                    break
                binary_mask = (sample_annotation == instance_value).astype(np.uint8)
                segmentation = encode(np.asfortranarray(binary_mask))
                segmentation = {
                    'counts': segmentation['counts'].decode('ascii'),
                    'size': segmentation['size'],
                }
                area = binary_mask.sum().astype(float)
                anns.append(
                    {
                        'segmentation': segmentation,
                        'area': area,
                        'category_id': instance_value.astype(float),
                    }
                )
            
            if skip:
                continue
            
            first_frame_anns = copy.deepcopy(coco_format_annotations)
            if len(anns) < len(first_frame_anns):
                first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances]
            assert len(anns) == len(first_frame_anns), f"Annotation mismatch at {sample_img_relpath}"
            
            sample = {
                'image': sample_img_relpath,
                'image_info': image_info,
                'anns': anns,
                'first_frame_image': first_frame_img_relpath,
                'first_frame_anns': first_frame_anns,
                'new_img_id': new_img_id,
                'video_name': val_name,
            }
            DAVIS_dataset.append(sample)
            new_img_id += 1

    with open(save_path, 'w') as f:
        json.dump(DAVIS_dataset, f)
    print(f'Save at {save_path}. Total sample: {len(DAVIS_dataset)}')