File size: 4,819 Bytes
625a17f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import json
import os
from PIL import Image
import numpy as np
from pycocotools.mask import encode, decode, frPyObjects
from tqdm import tqdm
import copy
from natsort import natsorted

if __name__ == '__main__':
    root_path = '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/HANDAL/handal_dataset_mugs/test'
    save_path = "/data/work-gcp-europe-west4-a/yuqian_fu/datasets/HANDAL/handal_dataset_mugs/handal_datasets_mugs_test.json"
    val_set = os.listdir(root_path)
    new_img_id = 0
    handal_dataset = []
    for val_name in tqdm(val_set):
        vid_path = os.path.join(root_path, val_name)
        img_path = os.path.join(vid_path, "rgb")
        anno_path = os.path.join(vid_path, "mask")
        frame_idx = natsorted(os.listdir(img_path))
        frame_idx  = [f.split(".")[0] for f in frame_idx]
        video_len = len(frame_idx)
        for i,idx in enumerate(frame_idx):
            if i+100 > video_len-1:
                break
            target_idx = frame_idx[i+100]

            first_frame_annotation_path = os.path.join(anno_path, idx+"_000000.png")
            first_frame_annotation_relpath = os.path.relpath(first_frame_annotation_path, root_path)

            first_frame_img_path = os.path.join(img_path, idx+".jpg")
            first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)

            first_frame_annotation_img = Image.open(first_frame_annotation_path)
            first_frame_annotation = np.array(first_frame_annotation_img)
            height, width = first_frame_annotation.shape
            unique_instances = np.unique(first_frame_annotation)
            unique_instances = unique_instances[unique_instances != 0]
            coco_format_annotations = []
            # for semi-supervised VOS, we use first frame's GT for input
            for instance_value in unique_instances:
                binary_mask = (first_frame_annotation == instance_value).astype(np.uint8)
                segmentation = encode(np.asfortranarray(binary_mask))
                segmentation = {
                    'counts': segmentation['counts'].decode('ascii'),
                    'size': segmentation['size'],
                }
                area = binary_mask.sum().astype(float)
                coco_format_annotations.append(
                    {
                        'segmentation': segmentation,
                        'area': area,
                        'category_id': instance_value.astype(float),
                    }
                )

            sample_img_path = os.path.join(img_path, target_idx+".jpg")
            sample_img_relpath = os.path.relpath(sample_img_path, root_path)
            image_info = {
                'file_name': sample_img_relpath,
                'height': height,
                'width': width,
            }
            sample_annotation_path = os.path.join(anno_path, target_idx+"_000000.png")
            sample_annotation = np.array(Image.open(sample_annotation_path))

            sample_unique_instances = np.unique(sample_annotation)
            sample_unique_instances = sample_unique_instances[sample_unique_instances != 0]
            anns = []
            for instance_value in sample_unique_instances:
                assert instance_value in unique_instances, 'Found new target not in the first frame'
                binary_mask = (sample_annotation == instance_value).astype(np.uint8)
                segmentation = encode(np.asfortranarray(binary_mask))
                segmentation = {
                    'counts': segmentation['counts'].decode('ascii'),
                    'size': segmentation['size'],
                }
                area = binary_mask.sum().astype(float)
                anns.append(
                    {
                        'segmentation': segmentation,
                        'area': area,
                        'category_id': instance_value.astype(float),
                    }
                )
            first_frame_anns = copy.deepcopy(coco_format_annotations)
            if len(anns) < len(first_frame_anns):
                first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances]
            assert len(anns) == len(first_frame_anns)
            sample = {
                'image': sample_img_relpath,
                'image_info': image_info,
                'anns': anns,
                'first_frame_image': first_frame_img_relpath,
                'first_frame_anns': first_frame_anns,
                'new_img_id': new_img_id,
                'video_name': val_name,
            }
            handal_dataset.append(sample)
            new_img_id += 1
    
   
    with open(save_path, 'w') as f:
        json.dump(handal_dataset, f)
    print(f'Save at {save_path}. Total sample: {len(handal_dataset)}')