import json import os from PIL import Image import numpy as np from pycocotools.mask import encode, decode, frPyObjects from tqdm import tqdm import copy if __name__ == '__main__': root_path = '/work/yuqian_fu/Data/datasets/DAVIS' splits = ['trainval', 'test-dev'] # we only do val evaluation annotation_path = os.path.join(root_path, f'2017/{splits[0]}/Annotations/480p') image_path = os.path.join(root_path, f'2017/{splits[0]}/JPEGImages/480p') set_path = os.path.join(root_path, f'2017/{splits[0]}/ImageSets/2017/val.txt') save_path = os.path.join(root_path, f'2017/{splits[0]}_test_psalm_20gap.json') val_set = [] with open(set_path, 'r') as f: for line in f: val_set.append(line.strip()) new_img_id = 0 DAVIS_dataset = [] for val_name in tqdm(val_set): vid_path = os.path.join(image_path, val_name) anno_path = os.path.join(annotation_path, val_name) # Get all frames in the video frame_list = sorted(os.listdir(vid_path)) anno_list = sorted(os.listdir(anno_path)) video_len = len(frame_list) # 添加验证 assert len(frame_list) == len(anno_list), f"Mismatch in {val_name}: {len(frame_list)} frames vs {len(anno_list)} annotations" # Iterate through frames and create pairs with 20-frame gap for i in range(video_len): # Check if target frame (i+20) exists if i + 20 > video_len - 1: break target_idx = i + 20 # Process reference frame (first_frame) first_frame_img_path = os.path.join(vid_path, frame_list[i]) first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path) first_frame_annotation_path = os.path.join(anno_path, anno_list[i]) first_frame_annotation_relpath = os.path.relpath(first_frame_annotation_path, root_path) first_frame_annotation_img = Image.open(first_frame_annotation_path) first_frame_annotation = np.array(first_frame_annotation_img) height, width = first_frame_annotation.shape unique_instances = np.unique(first_frame_annotation) unique_instances = unique_instances[unique_instances != 0] coco_format_annotations = [] # for semi-supervised VOS, we use first frame's GT for input for instance_value in unique_instances: binary_mask = (first_frame_annotation == instance_value).astype(np.uint8) segmentation = encode(np.asfortranarray(binary_mask)) segmentation = { 'counts': segmentation['counts'].decode('ascii'), 'size': segmentation['size'], } area = binary_mask.sum().astype(float) coco_format_annotations.append( { 'segmentation': segmentation, 'area': area, 'category_id': instance_value.astype(float), } ) # Process target frame (sample) sample_img_path = os.path.join(vid_path, frame_list[target_idx]) sample_img_relpath = os.path.relpath(sample_img_path, root_path) image_info = { 'file_name': sample_img_relpath, 'height': height, 'width': width, } sample_annotation_path = os.path.join(anno_path, anno_list[target_idx]) sample_annotation = np.array(Image.open(sample_annotation_path)) sample_unique_instances = np.unique(sample_annotation) sample_unique_instances = sample_unique_instances[sample_unique_instances != 0] anns = [] skip = False for instance_value in sample_unique_instances: if instance_value not in unique_instances: print(f"Skip {sample_img_relpath}: new instance not in reference frame") skip = True break binary_mask = (sample_annotation == instance_value).astype(np.uint8) segmentation = encode(np.asfortranarray(binary_mask)) segmentation = { 'counts': segmentation['counts'].decode('ascii'), 'size': segmentation['size'], } area = binary_mask.sum().astype(float) anns.append( { 'segmentation': segmentation, 'area': area, 'category_id': instance_value.astype(float), } ) if skip: continue first_frame_anns = copy.deepcopy(coco_format_annotations) if len(anns) < len(first_frame_anns): first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances] assert len(anns) == len(first_frame_anns), f"Annotation mismatch at {sample_img_relpath}" sample = { 'image': sample_img_relpath, 'image_info': image_info, 'anns': anns, 'first_frame_image': first_frame_img_relpath, 'first_frame_anns': first_frame_anns, 'new_img_id': new_img_id, 'video_name': val_name, } DAVIS_dataset.append(sample) new_img_id += 1 with open(save_path, 'w') as f: json.dump(DAVIS_dataset, f) print(f'Save at {save_path}. Total sample: {len(DAVIS_dataset)}')