| import json |
| import os |
| from PIL import Image |
| import numpy as np |
| from pycocotools.mask import encode, decode, frPyObjects |
| from tqdm import tqdm |
| import copy |
| from natsort import natsorted |
| import cv2 |
|
|
| if __name__ == '__main__': |
| |
| root_path = '/scratch/yuqian_fu/data_segswap_test' |
| save_path = os.path.join(root_path, 'egoexo_test_framelevel.json') |
| split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json" |
| with open(split_path, "r") as fp: |
| data_split = json.load(fp) |
| takes = data_split["test"] |
| |
|
|
| |
| new_img_id = 0 |
| |
| egoexo_dataset = [] |
|
|
| for take in tqdm(takes): |
| |
| vid_root_path = os.path.join(root_path, take) |
| anno_path = os.path.join(vid_root_path, "annotation.json") |
| with open(anno_path, 'r') as fp: |
| annotations = json.load(fp) |
|
|
| |
| objs = natsorted(list(annotations["masks"].keys())) |
| coco_id_to_cont_id = {coco_id: cont_id + 1 for cont_id, coco_id in enumerate(objs)} |
| print(f"coco_id_to_cont_id:{coco_id_to_cont_id}") |
|
|
| |
| valid_cams = os.listdir(vid_root_path) |
| valid_cams.remove("annotation.json") |
| ego_cams = [] |
| exo_cams = [] |
| for vc in valid_cams: |
| if 'aria' in vc: |
| ego_cams.append(vc) |
| else: |
| exo_cams.append(vc) |
| if len(ego_cams) > 1: |
| print(f"Warning: {take} has more than one ego camera, only the first one will be used.") |
| ego = ego_cams[0] |
|
|
| |
| vid_ego_path = os.path.join(vid_root_path, ego) |
| ego_frames = natsorted(os.listdir(vid_ego_path)) |
| idxs = [f.split(".")[0] for f in ego_frames] |
|
|
| |
| for exo in exo_cams: |
| vid_exo_path = os.path.join(vid_root_path, exo) |
|
|
| |
| for idx in idxs: |
| '''query''' |
| |
| filename = f"{idx}.jpg" |
| first_frame_img_path = os.path.join(vid_ego_path, filename) |
| first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path) |
| |
| ego_obj = [] |
| for obj in objs: |
| if idx in annotations['masks'][obj][ego].keys(): |
| ego_obj.append(obj) |
| print("len of ego_obj:", len(ego_obj)) |
| |
| first_frame_anns = [] |
| for obj in ego_obj: |
| segmentation_tmp = annotations["masks"][obj][ego][idx] |
| binary_mask = decode(segmentation_tmp) |
| h, w = binary_mask.shape |
| binary_mask = cv2.resize(binary_mask, (w // 2, h // 2), interpolation=cv2.INTER_NEAREST) |
| area = binary_mask.sum().astype(float) |
| segmentation = encode(np.asfortranarray(binary_mask)) |
| segmentation = { |
| 'counts': segmentation['counts'].decode('ascii'), |
| 'size': segmentation["size"], |
| } |
| first_frame_anns.append( |
| { |
| 'segmentation': segmentation, |
| 'area': area, |
| 'category_id': float(coco_id_to_cont_id[obj]), |
| } |
| ) |
|
|
|
|
| '''target''' |
| |
| sample_img_path = os.path.join(vid_exo_path, filename) |
| sample_img_relpath = os.path.relpath(sample_img_path, root_path) |
|
|
| |
| exo_img = cv2.imread(sample_img_path) |
| h, w, _ = exo_img.shape |
|
|
| |
| anns = [] |
| for obj in ego_obj: |
| binary_mask = np.zeros((h, w)) |
| binary_mask = binary_mask.astype(np.uint8) |
| area = float(0) |
| segmentation = encode(np.asfortranarray(binary_mask)) |
| segmentation = { |
| 'counts': segmentation['counts'].decode('ascii'), |
| 'size': segmentation["size"], |
| } |
| anns.append( |
| { |
| 'segmentation': segmentation, |
| 'area': area, |
| 'category_id': float(coco_id_to_cont_id[obj]), |
| } |
| ) |
| |
|
|
| '''其余信息''' |
| image_info = { |
| 'file_name': sample_img_relpath, |
| 'height': h, |
| 'width': w, |
| } |
| |
|
|
| '''汇总一个pairs的信息''' |
| sample = { |
| 'image': sample_img_relpath, |
| 'image_info': image_info, |
| 'anns': anns, |
| 'first_frame_image': first_frame_img_relpath, |
| 'first_frame_anns': first_frame_anns, |
| 'new_img_id': new_img_id, |
| 'video_name': take, |
| } |
| egoexo_dataset.append(sample) |
| new_img_id += 1 |
|
|
| |
| |
| with open(save_path, 'w') as f: |
| json.dump(egoexo_dataset, f) |
| print(f'Save at {save_path}. Total sample: {len(egoexo_dataset)}') |
|
|