| import os | |
| import json | |
| from lzstring import LZString | |
| from pycocotools import mask as mask_utils | |
| import numpy as np | |
| from PIL import Image | |
| from decord import VideoReader | |
| from decord import cpu | |
| import argparse | |
| import cv2 | |
| from time import time | |
| def save_frames(frames, frame_idxes, output_folder, is_aria=False): | |
| # resize and save frames | |
| scale = 4 | |
| if is_aria: | |
| scale = 2 | |
| for img, fidx in zip(frames, frame_idxes): | |
| H, W, C = img.shape | |
| if H < 1408: | |
| break | |
| img2 = cv2.resize(img, (W//scale, H//scale)) | |
| cv2.imwrite(os.path.join(output_folder, f'{fidx}.jpg'), img2) | |
| def processVideo(takepath, take_name, ego_cam, exo_cams, outputpath, take_id): | |
| # if not os.path.exists(f"{takepath}/{take_name}/frame_aligned_videos/{ego_cam}.mp4"): | |
| # return -1 | |
| print("take_name:", take_name) # debug | |
| # Subsample the ego video | |
| # vr = VideoReader( | |
| # f"{takepath}/{take_name}/frame_aligned_videos/{ego_cam}.mp4", ctx=cpu(0) | |
| # ) | |
| # len_video = len(vr) | |
| # subsampling at 1fps -- none of the videos are annotated at more than 1 fps | |
| # subsample_idx = np.arange(0, len_video, 1) # debug | |
| subsample_idx = np.arange(3510, 4111, 1) # debug | |
| # if not os.path.exists(f"{outputpath}/{take_id}/{ego_cam}"): | |
| # os.makedirs(f"{outputpath}/{take_id}/{ego_cam}") | |
| # frames = vr.get_batch(subsample_idx).asnumpy()[...,::-1] | |
| # save_frames(frames=frames, frame_idxes=subsample_idx, output_folder=f"{outputpath}/{take_id}/{ego_cam}", is_aria=True) | |
| # Subsample the exo videos | |
| for exo_cam in ["cam03"]: | |
| vr = VideoReader( | |
| f"{takepath}/{take_name}/frame_aligned_videos/{exo_cam}.mp4", ctx=cpu(0) | |
| ) | |
| os.makedirs(f"{outputpath}/{take_id}/{exo_cam}", exist_ok=True) | |
| frames = vr.get_batch(subsample_idx).asnumpy()[...,::-1] | |
| save_frames(frames=frames, frame_idxes=subsample_idx, output_folder=f"{outputpath}/{take_id}/{exo_cam}", is_aria=False) | |
| return subsample_idx.tolist() | |
| # def decode_mask(width, height, encoded_mask): | |
| # try: | |
| # decomp_string = LZString.decompressFromEncodedURIComponent(encoded_mask) | |
| # except: | |
| # return None | |
| # decomp_encoded = decomp_string.encode() | |
| # rle_obj = { | |
| # "size": [height, width], | |
| # "counts": decomp_encoded, | |
| # } | |
| # rle_obj['counts'] = rle_obj['counts'].decode('ascii') | |
| # return rle_obj | |
| # def processMask(anno, new_anno): | |
| # for object_id in anno.keys(): | |
| # new_anno[object_id] = {} | |
| # for cam_id in anno[object_id].keys(): | |
| # new_anno[object_id][cam_id] = {} | |
| # for frame_id in anno[object_id][cam_id]["annotation"].keys(): | |
| # width = anno[object_id][cam_id]["annotation"][frame_id]["width"] | |
| # height = anno[object_id][cam_id]["annotation"][frame_id]["height"] | |
| # encoded_mask = anno[object_id][cam_id]["annotation"][frame_id]["encodedMask"] | |
| # coco_mask = decode_mask(width, height, encoded_mask) | |
| # new_anno[object_id][cam_id][frame_id] = coco_mask | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--takepath", | |
| help="EgoExo take data root", | |
| required=True | |
| ) | |
| parser.add_argument( | |
| "--annotationpath", | |
| help="Annotations json file path", | |
| required=True | |
| ) | |
| parser.add_argument( | |
| "--split_path", | |
| help="path to split.json", | |
| required=True | |
| ) | |
| parser.add_argument( | |
| "--split", | |
| help="train/val/test split to process", | |
| required=True | |
| ) | |
| parser.add_argument( | |
| "--outputpath", | |
| help="Output data root", | |
| required=True | |
| ) | |
| args = parser.parse_args() | |
| with open(args.split_path, "r") as fp: | |
| data_split = json.load(fp) | |
| # take_list = data_split[args.split] | |
| take_list = ['3c744ca5-c64a-4de3-8235-c2f542ac5056'] # debug | |
| os.makedirs(args.outputpath, exist_ok=True) | |
| # Read the annotation file | |
| # with open(args.annotationpath, "r") as f: | |
| # annos = json.load(f) | |
| # annos = annos['annotations'] | |
| start = time() | |
| for take_id in take_list: | |
| # if os.path.exists(f"{args.outputpath}/{take_id}"): | |
| # print(f"{take_id} already done!") | |
| # continue | |
| # Create the output folder | |
| os.makedirs(f"{args.outputpath}/{take_id}", exist_ok=True) | |
| new_anno = {} | |
| # Get the corresponding take name | |
| # anno = annos[take_id] | |
| # take_name = anno["take_name"] | |
| take_name = "sfu_cooking022_2" | |
| # valid_cams = set() | |
| # for x in anno['object_masks'].keys(): | |
| # valid_cams.update(set(anno['object_masks'][x].keys())) | |
| # ego_cams = [] | |
| # exo_cams = [] | |
| # for vc in valid_cams: | |
| # if 'aria' in vc: | |
| # ego_cams.append(vc) | |
| # else: | |
| # exo_cams.append(vc) | |
| ego_cams = ["aria"] | |
| exo_cams = ["cam03"] # debug | |
| # if len(ego_cams) > 1: | |
| # print(take_id, 'HAS MORE THAN ONE EGO') | |
| # breakpoint() | |
| print(f"Processing take {take_id} {take_name}") | |
| # Process the masks | |
| print("Start processing masks") | |
| # new_anno["masks"] = {} # debug | |
| # processMask(anno['object_masks'], new_anno["masks"]) | |
| # # Process the videos | |
| print("Start processing Videos") | |
| subsample_idx = processVideo(args.takepath, take_name, ego_cam=ego_cams[0], exo_cams=exo_cams, outputpath=args.outputpath, take_id=take_id) | |
| if subsample_idx == -1: | |
| print(f"{args.takepath}/{take_name}/frame_aligned_videos/{ego_cams[0]}.mp4 does not exist") | |
| continue | |
| # new_anno["subsample_idx"] = subsample_idx # debug | |
| # Save the annotation | |
| # with open(f"{args.outputpath}/{take_id}/annotation.json", "w") as f: # debug | |
| # json.dump(new_anno, f) | |
| end = time() | |
| print(f"Total time: {end-start} seconds") |