ObjectRelator-Original / datasets /build_DAVIS_trainset_20gap.py

Upload folder using huggingface_hub

625a17f verified 3 months ago

5.78 kB

	import json
	import os
	from PIL import Image
	import numpy as np
	from pycocotools.mask import encode, decode, frPyObjects
	from tqdm import tqdm
	import copy

	if __name__ == '__main__':
	root_path = '/work/yuqian_fu/Data/datasets/DAVIS'
	splits = ['trainval', 'test-dev']
	# we only do val evaluation
	annotation_path = os.path.join(root_path, f'2017/{splits[0]}/Annotations/480p')
	image_path = os.path.join(root_path, f'2017/{splits[0]}/JPEGImages/480p')

	set_path = os.path.join(root_path, f'2017/{splits[0]}/ImageSets/2017/train.txt')
	save_path = os.path.join(root_path, f'2017/{splits[0]}_val_psalm_train_20gap.json')

	val_set = []
	with open(set_path, 'r') as f:
	for line in f:
	val_set.append(line.strip())
	new_img_id = 0
	DAVIS_dataset = []
	for val_name in tqdm(val_set):
	vid_path = os.path.join(image_path, val_name)
	anno_path = os.path.join(annotation_path, val_name)

	# Get all frames in the video
	frame_list = sorted(os.listdir(vid_path))
	anno_list = sorted(os.listdir(anno_path))
	video_len = len(frame_list)

	# 添加验证
	assert len(frame_list) == len(anno_list), f"Mismatch in {val_name}: {len(frame_list)} frames vs {len(anno_list)} annotations"

	# Iterate through frames and create pairs with 20-frame gap
	for i in range(video_len):
	# Check if target frame (i+20) exists
	if i + 20 > video_len - 1:
	break

	target_idx = i + 20

	# Process reference frame (first_frame)
	first_frame_img_path = os.path.join(vid_path, frame_list[i])
	first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)

	first_frame_annotation_path = os.path.join(anno_path, anno_list[i])
	first_frame_annotation_relpath = os.path.relpath(first_frame_annotation_path, root_path)

	first_frame_annotation_img = Image.open(first_frame_annotation_path)
	first_frame_annotation = np.array(first_frame_annotation_img)
	height, width = first_frame_annotation.shape

	unique_instances = np.unique(first_frame_annotation)
	unique_instances = unique_instances[unique_instances != 0]

	coco_format_annotations = []
	# for semi-supervised VOS, we use first frame's GT for input
	for instance_value in unique_instances:
	binary_mask = (first_frame_annotation == instance_value).astype(np.uint8)
	segmentation = encode(np.asfortranarray(binary_mask))
	segmentation = {
	'counts': segmentation['counts'].decode('ascii'),
	'size': segmentation['size'],
	}
	area = binary_mask.sum().astype(float)
	coco_format_annotations.append(
	{
	'segmentation': segmentation,
	'area': area,
	'category_id': instance_value.astype(float),
	}
	)

	# Process target frame (sample)
	sample_img_path = os.path.join(vid_path, frame_list[target_idx])
	sample_img_relpath = os.path.relpath(sample_img_path, root_path)
	image_info = {
	'file_name': sample_img_relpath,
	'height': height,
	'width': width,
	}

	sample_annotation_path = os.path.join(anno_path, anno_list[target_idx])
	sample_annotation = np.array(Image.open(sample_annotation_path))

	sample_unique_instances = np.unique(sample_annotation)
	sample_unique_instances = sample_unique_instances[sample_unique_instances != 0]

	anns = []
	skip = False
	for instance_value in sample_unique_instances:
	if instance_value not in unique_instances:
	print(f"Skip {sample_img_relpath}: new instance not in reference frame")
	skip = True
	break
	binary_mask = (sample_annotation == instance_value).astype(np.uint8)
	segmentation = encode(np.asfortranarray(binary_mask))
	segmentation = {
	'counts': segmentation['counts'].decode('ascii'),
	'size': segmentation['size'],
	}
	area = binary_mask.sum().astype(float)
	anns.append(
	{
	'segmentation': segmentation,
	'area': area,
	'category_id': instance_value.astype(float),
	}
	)

	if skip:
	continue

	first_frame_anns = copy.deepcopy(coco_format_annotations)
	if len(anns) < len(first_frame_anns):
	first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances]
	assert len(anns) == len(first_frame_anns), f"Annotation mismatch at {sample_img_relpath}"

	sample = {
	'image': sample_img_relpath,
	'image_info': image_info,
	'anns': anns,
	'first_frame_image': first_frame_img_relpath,
	'first_frame_anns': first_frame_anns,
	'new_img_id': new_img_id,
	'video_name': val_name,
	}
	DAVIS_dataset.append(sample)
	new_img_id += 1

	with open(save_path, 'w') as f:
	json.dump(DAVIS_dataset, f)
	print(f'Save at {save_path}. Total sample: {len(DAVIS_dataset)}')