import copy import json import os import numpy as np import cv2 import pycocotools.mask as maskUtils def get_video_frames(video_path): cap = cv2.VideoCapture(video_path) if not cap.isOpened(): print("Error: Cannot open video file.") return frames = [] frame_id = 0 while True: ret, frame = cap.read() if not ret: break frames.append(frame) frame_id += 1 cap.release() return frames def images_to_video(frames, video_name, fps=6): height, width, layers = frames[0].shape fourcc = cv2.VideoWriter_fourcc(*'mp4v') video = cv2.VideoWriter(video_name, fourcc, fps, (width, height)) for frame in frames: video.write(frame) # cv2.destroyAllWindows() video.release() return def decode_masklet(masklet): masks = [] for _rle in masklet: mask = maskUtils.decode(_rle) print('mask_shape: ', mask.shape) masks.append(mask) print(len(masks)) return masks def draw_mask(image, mask): obj_mask = mask * 255 obj_mask = np.stack([obj_mask * 1, obj_mask * 0, obj_mask * 0], axis=2) obj_mask = obj_mask * 0.5 + copy.deepcopy(image) * 0.5 obj_mask = obj_mask.astype(np.uint8) return obj_mask def add_mask2images(frames, masklets): show_videos = [] for i_frames, (frame, masks) in enumerate(zip(frames, masklets)): if i_frames == 0: n_obj = masks.shape[-1] for i_obj in range(n_obj): show_videos.append([]) n_obj = masks.shape[-1] for i_obj in range(n_obj): show_videos[i_obj].append(draw_mask(copy.deepcopy(frame), masks[:, :, i_obj])) return show_videos demo_video_anno = '/mnt/bn/xiangtai-training-data-video/dataset/segmentation_datasets/sam_v_full/sav_000/sav_train/sav_000/sav_000001_manual.json' video_root = '/mnt/bn/xiangtai-training-data-video/dataset/segmentation_datasets/sam_v_full/sav_000/sav_train/sav_000' video_save_path = '/mnt/bn/xiangtai-training-data/project/xiangtai-windows/tt_vlm/work_dirs/sam_v_demos/demo.mp4' with open(demo_video_anno, 'r') as f: data = json.load(f) print(data.keys()) for key in data.keys(): if key == 'masklet': continue print(key, ': ') print(data[key]) video_path = os.path.join(video_root, '{}.mp4'.format(data['video_id'])) frames = get_video_frames(video_path) masklents = decode_masklet(data['masklet']) frames = frames[::4] assert len(frames) == len(masklents) show_videos = add_mask2images(frames, masklents) for i, show_video in enumerate(show_videos): video_save_path_ = video_save_path.replace('demo.mp4', 'demo_{}.mp4'.format(i)) images_to_video(show_video, video_save_path_)