File size: 4,926 Bytes
625a17f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import json
import os
from PIL import Image
import numpy as np
from pycocotools.mask import encode, decode, frPyObjects
from tqdm import tqdm
import copy

if __name__ == '__main__':
    root_path = '/work/yuqian_fu/Data/datasets/DAVIS'
    splits = ['trainval', 'test-dev']
    # we only do val evaluation
    annotation_path = os.path.join(root_path, f'2017/{splits[0]}/Annotations/480p')
    image_path = os.path.join(root_path, f'2017/{splits[0]}/JPEGImages/480p')

    set_path = os.path.join(root_path, f'2017/{splits[0]}/ImageSets/2017/train.txt')
    save_path = os.path.join(root_path, f'2017/{splits[0]}_val_psalm_train_newnewnew.json')

    val_set = []
    with open(set_path, 'r') as f:
        for line in f:
            val_set.append(line.strip())
    new_img_id = 0
    DAVIS_dataset = []
    for val_name in tqdm(val_set):
        vid_path = os.path.join(image_path, val_name)
        anno_path = os.path.join(annotation_path, val_name)

        first_frame_annotation_path = os.path.join(anno_path, sorted(os.listdir(anno_path))[0])
        first_frame_annotation_relpath = os.path.relpath(first_frame_annotation_path, root_path)

        first_frame_img_path = os.path.join(vid_path, sorted(os.listdir(vid_path))[0])
        first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)

        first_frame_annotation_img = Image.open(first_frame_annotation_path)
        first_frame_annotation = np.array(first_frame_annotation_img)
        height, width = first_frame_annotation.shape
        unique_instances = np.unique(first_frame_annotation)
        unique_instances = unique_instances[unique_instances != 0]
        coco_format_annotations = []
        # for semi-supervised VOS, we use first frame's GT for input
        for instance_value in unique_instances:
            binary_mask = (first_frame_annotation == instance_value).astype(np.uint8)
            segmentation = encode(np.asfortranarray(binary_mask))
            segmentation = {
                'counts': segmentation['counts'].decode('ascii'),
                'size': segmentation['size'],
            }
            area = binary_mask.sum().astype(float)
            coco_format_annotations.append(
                {
                    'segmentation': segmentation,
                    'area': area,
                    'category_id': instance_value.astype(float),
                }
            )

        for filename, annfilename in zip(sorted(os.listdir(vid_path))[1:], sorted(os.listdir(anno_path))[1:]):
            sample_img_path = os.path.join(vid_path, filename)
            sample_img_relpath = os.path.relpath(sample_img_path, root_path)
            image_info = {
                'file_name': sample_img_relpath,
                'height': height,
                'width': width,
            }

            sample_annotation_path = os.path.join(anno_path, annfilename)
            sample_annotation = np.array(Image.open(sample_annotation_path))
            sample_unique_instances = np.unique(sample_annotation)
            sample_unique_instances = sample_unique_instances[sample_unique_instances != 0]
            anns = []
            skip = False
            for instance_value in sample_unique_instances:
                if instance_value not in unique_instances: 
                    print(sample_img_relpath)
                    skip = True
                    break
                binary_mask = (sample_annotation == instance_value).astype(np.uint8)
                segmentation = encode(np.asfortranarray(binary_mask))
                segmentation = {
                    'counts': segmentation['counts'].decode('ascii'),
                    'size': segmentation['size'],
                }
                area = binary_mask.sum().astype(float)
                anns.append(
                    {
                        'segmentation': segmentation,
                        'area': area,
                        'category_id': instance_value.astype(float),
                    }
                )
            if skip:
                continue
            first_frame_anns = copy.deepcopy(coco_format_annotations)
            if len(anns) < len(first_frame_anns):
                first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances]
            assert len(anns) == len(first_frame_anns)
            sample = {
                'image': sample_img_relpath,
                'image_info': image_info,
                'anns': anns,
                'first_frame_image': first_frame_img_relpath,
                'first_frame_anns': first_frame_anns,
                'new_img_id': new_img_id,
                'video_name': val_name,
            }
            DAVIS_dataset.append(sample)
            new_img_id += 1

    with open(save_path, 'w') as f:
        json.dump(DAVIS_dataset, f)
    print(f'Save at {save_path}. Total sample: {len(DAVIS_dataset)}')