ObjectRelator-Original / datasets /build_egoexo_competition.py

Upload folder using huggingface_hub

625a17f verified 4 months ago

5.77 kB

	import json
	import os
	from PIL import Image
	import numpy as np
	from pycocotools.mask import encode, decode, frPyObjects
	from tqdm import tqdm
	import copy
	from natsort import natsorted
	import cv2

	if __name__ == '__main__':
	# 定义相关路径，获取takes
	root_path = '/scratch/yuqian_fu/data_segswap_test'
	save_path = os.path.join(root_path, 'egoexo_test_framelevel.json')
	split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json"
	with open(split_path, "r") as fp:
	data_split = json.load(fp)
	takes = data_split["test"]


	# 计数
	new_img_id = 0
	# 存储保存的数据
	egoexo_dataset = []

	for take in tqdm(takes):
	# 获取本take下的注释
	vid_root_path = os.path.join(root_path, take)
	anno_path = os.path.join(vid_root_path, "annotation.json")
	with open(anno_path, 'r') as fp:
	annotations = json.load(fp)

	# 取出本take下的所有物体
	objs = natsorted(list(annotations["masks"].keys()))
	coco_id_to_cont_id = {coco_id: cont_id + 1 for cont_id, coco_id in enumerate(objs)}
	print(f"coco_id_to_cont_id:{coco_id_to_cont_id}")

	# 获取相机
	valid_cams = os.listdir(vid_root_path)
	valid_cams.remove("annotation.json")
	ego_cams = []
	exo_cams = []
	for vc in valid_cams:
	if 'aria' in vc:
	ego_cams.append(vc)
	else:
	exo_cams.append(vc)
	if len(ego_cams) > 1:
	print(f"Warning: {take} has more than one ego camera, only the first one will be used.")
	ego = ego_cams[0]

	# 获取帧数
	vid_ego_path = os.path.join(vid_root_path, ego)
	ego_frames = natsorted(os.listdir(vid_ego_path))
	idxs = [f.split(".")[0] for f in ego_frames]

	# 匹配所有exo
	for exo in exo_cams:
	vid_exo_path = os.path.join(vid_root_path, exo)

	# 开始按帧数处理
	for idx in idxs:
	'''query'''
	# 定义query_cam_path
	filename = f"{idx}.jpg"
	first_frame_img_path = os.path.join(vid_ego_path, filename)
	first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)
	# 确定ego相机下的每一帧的物体
	ego_obj = []
	for obj in objs:
	if idx in annotations['masks'][obj][ego].keys(): # debug: ego可能会报相机的key error
	ego_obj.append(obj)
	print("len of ego_obj:", len(ego_obj)) # debug: 确定idx的数据类型
	# 提取每个物体的注释，定义first_frame_anns
	first_frame_anns = []
	for obj in ego_obj:
	segmentation_tmp = annotations["masks"][obj][ego][idx]
	binary_mask = decode(segmentation_tmp)
	h, w = binary_mask.shape
	binary_mask = cv2.resize(binary_mask, (w // 2, h // 2), interpolation=cv2.INTER_NEAREST)
	area = binary_mask.sum().astype(float)
	segmentation = encode(np.asfortranarray(binary_mask))
	segmentation = {
	'counts': segmentation['counts'].decode('ascii'),
	'size': segmentation["size"],
	}
	first_frame_anns.append(
	{
	'segmentation': segmentation,
	'area': area,
	'category_id': float(coco_id_to_cont_id[obj]),
	}
	)


	'''target'''
	# 定义target_cam_path
	sample_img_path = os.path.join(vid_exo_path, filename)
	sample_img_relpath = os.path.relpath(sample_img_path, root_path)

	# 获取h,w
	exo_img = cv2.imread(sample_img_path)
	h, w, _ = exo_img.shape

	#定义anns，推理不需要，全部设置为null mask
	anns = []
	for obj in ego_obj:
	binary_mask = np.zeros((h, w))
	binary_mask = binary_mask.astype(np.uint8)
	area = float(0)
	segmentation = encode(np.asfortranarray(binary_mask))
	segmentation = {
	'counts': segmentation['counts'].decode('ascii'),
	'size': segmentation["size"],
	}
	anns.append(
	{
	'segmentation': segmentation,
	'area': area,
	'category_id': float(coco_id_to_cont_id[obj]),
	}
	)


	'''其余信息'''
	image_info = {
	'file_name': sample_img_relpath,
	'height': h,
	'width': w,
	}


	'''汇总一个pairs的信息'''
	sample = {
	'image': sample_img_relpath,
	'image_info': image_info,
	'anns': anns,
	'first_frame_image': first_frame_img_relpath,
	'first_frame_anns': first_frame_anns,
	'new_img_id': new_img_id,
	'video_name': take,
	}
	egoexo_dataset.append(sample)
	new_img_id += 1



	with open(save_path, 'w') as f:
	json.dump(egoexo_dataset, f)
	print(f'Save at {save_path}. Total sample: {len(egoexo_dataset)}')