File size: 5,570 Bytes
36c1e62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import json
import os
from PIL import Image
import numpy as np
from pycocotools.mask import encode, decode, frPyObjects
from tqdm import tqdm
import copy
from natsort import natsorted
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--root_path", type=str, required=True, help="Root path of the dataset.")
parser.add_argument("--save_path", type=str, required=True, help="Path to save the output JSON file.")
parser.add_argument("--split", type=str, choices=["train", "test"], default="train", help="Dataset split to build.")
args = parser.parse_args()
if __name__ == '__main__':
root_path = args.root_path
save_path = args.save_path
# to store
handal_dataset = []
new_img_id = 0
obj_name = os.listdir(root_path)[:1]
for obj in tqdm(obj_name):
full_path = os.path.join(root_path, obj)
if not os.path.isdir(full_path):
continue
data_path = os.path.join(full_path, args.split)
val_set = os.listdir(data_path)
for val_name in val_set:
vid_path = os.path.join(data_path, val_name)
img_path = os.path.join(vid_path, "rgb")
anno_path = os.path.join(vid_path, "mask")
frame_idx = natsorted(os.listdir(img_path))
frame_idx = [f.split(".")[0] for f in frame_idx]
video_len = len(frame_idx)
for i,idx in enumerate(frame_idx):
if i+100 > video_len-1:
break
target_idx = frame_idx[i+100]
first_frame_annotation_path = os.path.join(anno_path, idx+"_000000.png")
first_frame_annotation_relpath = os.path.relpath(first_frame_annotation_path, root_path)
first_frame_img_path = os.path.join(img_path, idx+".jpg")
first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)
first_frame_annotation_img = Image.open(first_frame_annotation_path)
first_frame_annotation = np.array(first_frame_annotation_img)
height, width = first_frame_annotation.shape
unique_instances = np.unique(first_frame_annotation)
unique_instances = unique_instances[unique_instances != 0]
coco_format_annotations = []
for instance_value in unique_instances:
binary_mask = (first_frame_annotation == instance_value).astype(np.uint8)
segmentation = encode(np.asfortranarray(binary_mask))
segmentation = {
'counts': segmentation['counts'].decode('ascii'),
'size': segmentation['size'],
}
area = binary_mask.sum().astype(float)
coco_format_annotations.append(
{
'segmentation': segmentation,
'area': area,
'category_id': instance_value.astype(float),
}
)
sample_img_path = os.path.join(img_path, target_idx+".jpg")
sample_img_relpath = os.path.relpath(sample_img_path, root_path)
image_info = {
'file_name': sample_img_relpath,
'height': height,
'width': width,
}
sample_annotation_path = os.path.join(anno_path, target_idx+"_000000.png")
sample_annotation = np.array(Image.open(sample_annotation_path))
sample_unique_instances = np.unique(sample_annotation)
sample_unique_instances = sample_unique_instances[sample_unique_instances != 0]
anns = []
for instance_value in sample_unique_instances:
assert instance_value in unique_instances, 'Found new target not in the first frame'
binary_mask = (sample_annotation == instance_value).astype(np.uint8)
segmentation = encode(np.asfortranarray(binary_mask))
segmentation = {
'counts': segmentation['counts'].decode('ascii'),
'size': segmentation['size'],
}
area = binary_mask.sum().astype(float)
anns.append(
{
'segmentation': segmentation,
'area': area,
'category_id': instance_value.astype(float),
}
)
first_frame_anns = copy.deepcopy(coco_format_annotations)
if len(anns) < len(first_frame_anns):
first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances]
assert len(anns) == len(first_frame_anns)
sample = {
'image': sample_img_relpath,
'image_info': image_info,
'anns': anns,
'first_frame_image': first_frame_img_relpath,
'first_frame_anns': first_frame_anns,
'new_img_id': new_img_id,
'video_name': sample_img_relpath.split("/")[0],
}
handal_dataset.append(sample)
new_img_id += 1
with open(save_path, 'w') as f:
json.dump(handal_dataset, f)
print(f'Save at {save_path}. Total sample: {len(handal_dataset)}') |