import json import os from PIL import Image import numpy as np from pycocotools.mask import encode, decode, frPyObjects from tqdm import tqdm import copy if __name__ == '__main__': new_img_id = 0 nerf_dataset = [] first_frame_annotation_relpath = "teacup/Annotations/000334.png" first_frame_img_relpath = "teacup/JPEGImages/000334.png" first_frame_annotation_path = "/home/yuqian_fu/Projects/sam2/teacup/Annotations/000334.png" first_frame_annotation_img = Image.open(first_frame_annotation_path) first_frame_annotation = np.array(first_frame_annotation_img) height, width = first_frame_annotation.shape unique_instances = np.unique(first_frame_annotation) unique_instances = unique_instances[unique_instances != 0] print("unique_instances:", unique_instances) coco_format_annotations = [] # for semi-supervised VOS, we use first frame's GT for input for instance_value in unique_instances: binary_mask = (first_frame_annotation == instance_value).astype(np.uint8) segmentation = encode(np.asfortranarray(binary_mask)) segmentation = { 'counts': segmentation['counts'].decode('ascii'), 'size': segmentation['size'], } area = binary_mask.sum().astype(float) coco_format_annotations.append( { 'segmentation': segmentation, 'area': area, 'category_id': instance_value.astype(float), } ) sample_img_relpath = "teacup/JPEGImages/000345.png" image_info = { 'file_name': sample_img_relpath, 'height': height, 'width': width, } print("coco_format", len(coco_format_annotations)) sample_annotation_path = "/home/yuqian_fu/Projects/sam2/teacup/Annotations/000345.png" sample_annotation = np.array(Image.open(sample_annotation_path)) sample_unique_instances = np.unique(sample_annotation) sample_unique_instances_tmp = sample_unique_instances[sample_unique_instances != 0] sample_unique_instances = [] for i in unique_instances: if i in sample_unique_instances_tmp: sample_unique_instances.append(i) print("sample_unique_instances:",sample_unique_instances) anns = [] for instance_value in sample_unique_instances: assert instance_value in unique_instances, 'Found new target not in the first frame' binary_mask = (sample_annotation == instance_value).astype(np.uint8) segmentation = encode(np.asfortranarray(binary_mask)) segmentation = { 'counts': segmentation['counts'].decode('ascii'), 'size': segmentation['size'], } area = binary_mask.sum().astype(float) anns.append( { 'segmentation': segmentation, 'area': area, 'category_id': instance_value.astype(float), } ) first_frame_anns = copy.deepcopy(coco_format_annotations) if len(anns) < len(first_frame_anns): first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances] print("anns:",len(anns)) print("first_frame_anns",len(first_frame_anns)) assert len(anns) == len(first_frame_anns) sample = { 'image': sample_img_relpath, 'image_info': image_info, 'anns': anns, 'first_frame_image': first_frame_img_relpath, 'first_frame_anns': first_frame_anns, 'new_img_id': new_img_id, 'video_name': "teacup", } nerf_dataset.append(sample) save_path = "/home/yuqian_fu/Projects/sam2/predicted_mask/kitti_1.json" with open(save_path, 'w') as f: json.dump(nerf_dataset, f) print(f'Save at {save_path}. Total sample: {len(nerf_dataset)}')