| import torch |
| from models.modules.ehm import EHM_v2 |
| from models.pipeline.ehm_pipeline import Ehm_Pipeline |
| import os |
| import torch |
| from utils.pipeline_utils import to_tensor |
| from utils.graphics_utils import GS_Camera |
| from models.modules.renderer.body_renderer import Renderer2 as BodyRenderer |
| from pytorch3d.renderer import PointLights |
| import cv2 |
|
|
| import os |
| import torch |
| import argparse |
| import lightning |
| import numpy as np |
| from models.pipeline.ehm_pipeline import Ehm_Pipeline |
| from utils.general_utils import ( |
| ConfigDict, rtqdm, device_parser, add_extra_cfgs |
| ) |
| import glob |
| import tqdm |
|
|
| from typing import Union, Optional, Tuple,List |
|
|
|
|
|
|
| def flex_resize_video( |
| frames : np.ndarray, |
| tgt_wh : Union[Tuple[int, int], None] = None, |
| ratio : Union[float, None] = None, |
| kp_mod : int = 1, |
| ): |
| ''' |
| Resize the frames to the target width and height. Set one of width and height to -1 to keep the aspect ratio. |
| Only one of `tgt_wh` and `ratio` can be set, if both are set, `tgt_wh` will be used. |
| |
| ### Args |
| - frames: np.ndarray, (L, H, W, 3) |
| - tgt_wh: Tuple[int, int], default=None |
| - The target width and height, set one of them to -1 to keep the aspect ratio. |
| - ratio: float, default=None |
| - The ratio to resize the frames. It will be used if `tgt_wh` is not set. |
| - kp_mod: int, default 1 |
| - Keep the width and height as multiples of `kp_mod`. |
| - For example, if `kp_mod=16`, the width and height will be rounded to the nearest multiple of 16. |
| |
| ### Returns |
| - np.ndarray, (L, H', W', 3) |
| - The resized frames. |
| ''' |
| assert tgt_wh is not None or ratio is not None, 'At least one of tgt_wh and ratio must be set.' |
| if tgt_wh is not None: |
| assert len(tgt_wh) == 2, 'tgt_wh must be a tuple of 2 elements.' |
| assert tgt_wh[0] > 0 or tgt_wh[1] > 0, 'At least one of width and height must be positive.' |
| if ratio is not None: |
| assert ratio > 0, 'ratio must be positive.' |
| assert len(frames.shape) == 4, 'frames must have 3 or 4 dimensions.' |
|
|
| def align_size(val:float): |
| ''' It will round the value to the nearest multiple of `kp_mod`. ''' |
| return int(round(val / kp_mod) * kp_mod) |
|
|
| |
| orig_h, orig_w = frames.shape[1], frames.shape[2] |
| tgt_wh = (int(orig_w * ratio), int(orig_h * ratio)) if tgt_wh is None else tgt_wh |
| tgt_w, tgt_h = tgt_wh |
| tgt_w = align_size(orig_w * tgt_h / orig_h) if tgt_w == -1 else align_size(tgt_w) |
| tgt_h = align_size(orig_h * tgt_w / orig_w) if tgt_h == -1 else align_size(tgt_h) |
| |
| resized_frames = np.stack([cv2.resize(frame, (tgt_w, tgt_h)) for frame in frames]) |
|
|
| return resized_frames |
|
|
|
|
|
|
| def flex_resize_img( |
| img : np.ndarray, |
| tgt_wh : Union[Tuple[int, int], None] = None, |
| ratio : Union[float, None] = None, |
| kp_mod : int = 1, |
| ): |
| ''' |
| Resize the image to the target width and height. Set one of width and height to -1 to keep the aspect ratio. |
| Only one of `tgt_wh` and `ratio` can be set, if both are set, `tgt_wh` will be used. |
| |
| ### Args |
| - img: np.ndarray, (H, W, 3) |
| - tgt_wh: Tuple[int, int], default=None |
| - The target width and height, set one of them to -1 to keep the aspect ratio. |
| - ratio: float, default=None |
| - The ratio to resize the frames. It will be used if `tgt_wh` is not set. |
| - kp_mod: int, default 1 |
| - Keep the width and height as multiples of `kp_mod`. |
| - For example, if `kp_mod=16`, the width and height will be rounded to the nearest multiple of 16. |
| |
| ### Returns |
| - np.ndarray, (H', W', 3) |
| - The resized iamges. |
| ''' |
| assert len(img.shape) == 3, 'img must have 3 dimensions.' |
| return flex_resize_video(img[None], tgt_wh, ratio, kp_mod)[0] |
|
|
|
|
| def lurb_to_cwh( |
| lurb : Union[list, np.ndarray, torch.Tensor], |
| ): |
| ''' |
| Convert the left-upper-right-bottom format to the center-width-height format. |
| |
| ### Args |
| - lurb: Union[list, np.ndarray, torch.Tensor], (..., 4) |
| - The left-upper-right-bottom format bounding box. |
| |
| ### Returns |
| - Union[list, np.ndarray, torch.Tensor], (..., 4) |
| - The center-width-height format bounding box. |
| ''' |
| lurb, recover_type_back = to_tensor(lurb, device=None, temporary=True) |
| assert lurb.shape[-1] == 4, f"Invalid shape: {lurb.shape}, should be (..., 4)" |
|
|
| c = (lurb[..., :2] + lurb[..., 2:]) / 2 |
| wh = lurb[..., 2:] - lurb[..., :2] |
|
|
| cwh = torch.cat([c, wh], dim=-1) |
| return recover_type_back(cwh) |
|
|
|
|
| def cwh_to_lurb( |
| cwh : Union[list, np.ndarray, torch.Tensor], |
| ): |
| ''' |
| Convert the center-width-height format to the left-upper-right-bottom format. |
| |
| ### Args |
| - cwh: Union[list, np.ndarray, torch.Tensor], (..., 4) |
| - The center-width-height format bounding box. |
| |
| ### Returns |
| - Union[list, np.ndarray, torch.Tensor], (..., 4) |
| - The left-upper-right-bottom format bounding box. |
| ''' |
| cwh, recover_type_back = to_tensor(cwh, device=None, temporary=True) |
| assert cwh.shape[-1] == 4, f"Invalid shape: {cwh.shape}, should be (..., 4)" |
|
|
| l = cwh[..., :2] - cwh[..., 2:] / 2 |
| r = cwh[..., :2] + cwh[..., 2:] / 2 |
|
|
| lurb = torch.cat([l, r], dim=-1) |
| return recover_type_back(lurb) |
|
|
|
|
| def cwh_to_cs( |
| cwh : Union[list, np.ndarray, torch.Tensor], |
| reduce : Optional[str] = None, |
| ): |
| ''' |
| Convert the center-width-height format to the center-scale format. |
| *Only works when width and height are the same.* |
| |
| ### Args |
| - cwh: Union[list, np.ndarray, torch.Tensor], (..., 4) |
| - The center-width-height format bounding box. |
| - reduce: Optional[str], default None, valid values: None, 'max' |
| - Determine how to reduce the width and height to a single scale. |
| |
| ### Returns |
| - Union[list, np.ndarray, torch.Tensor], (..., 3) |
| - The center-scale format bounding box. |
| ''' |
| cwh, recover_type_back = to_tensor(cwh, device=None, temporary=True) |
| assert cwh.shape[-1] == 4, f"Invalid shape: {cwh.shape}, should be (..., 4)" |
|
|
| if reduce is None: |
| if (cwh[..., 2] != cwh[..., 3]).any(): |
| print(f"Width and height are supposed to be the same, but they're not. The larger one will be used.") |
|
|
| c = cwh[..., :2] |
| s = cwh[..., 2:].max(dim=-1)[0] |
|
|
| cs = torch.cat([c, s[..., None]], dim=-1) |
| return recover_type_back(cs) |
|
|
|
|
| def cs_to_cwh( |
| cs : Union[list, np.ndarray, torch.Tensor], |
| ): |
| ''' |
| Convert the center-scale format to the center-width-height format. |
| |
| ### Args |
| - cs: Union[list, np.ndarray, torch.Tensor], (..., 3) |
| - The center-scale format bounding box. |
| |
| ### Returns |
| - Union[list, np.ndarray, torch.Tensor], (..., 4) |
| - The center-width-height format bounding box. |
| ''' |
| cs, recover_type_back = to_tensor(cs, device=None, temporary=True) |
| assert cs.shape[-1] == 3, f"Invalid shape: {cs.shape}, should be (..., 3)" |
|
|
| c = cs[..., :2] |
| s = cs[..., 2] |
|
|
| cwh = torch.cat([c, s[..., None], s[..., None]], dim=-1) |
| return recover_type_back(cwh) |
|
|
|
|
| def lurb_to_cs( |
| lurb : Union[list, np.ndarray, torch.Tensor], |
| ): |
| ''' |
| Convert the left-upper-right-bottom format to the center-scale format. |
| *Only works when width and height are the same.* |
| |
| ### Args |
| - lurb: Union[list, np.ndarray, torch.Tensor], (..., 4) |
| - The left-upper-right-bottom format bounding box. |
| |
| ### Returns |
| - Union[list, np.ndarray, torch.Tensor], (..., 3) |
| - The center-scale format bounding box. |
| ''' |
| return cwh_to_cs(lurb_to_cwh(lurb), reduce='max') |
|
|
|
|
| def cs_to_lurb( |
| cs : Union[list, np.ndarray, torch.Tensor], |
| ): |
| ''' |
| Convert the center-scale format to the left-upper-right-bottom format. |
| |
| ### Args |
| - cs: Union[list, np.ndarray, torch.Tensor], (..., 3) |
| - The center-scale format bounding box. |
| |
| ### Returns |
| - Union[list, np.ndarray, torch.Tensor], (..., 4) |
| - The left-upper-right-bottom format bounding box. |
| ''' |
| return cwh_to_lurb(cs_to_cwh(cs)) |
|
|
|
|
| def lurb_to_luwh( |
| lurb : Union[list, np.ndarray, torch.Tensor], |
| ): |
| ''' |
| Convert the left-upper-right-bottom format to the left-upper-width-height format. |
| |
| ### Args |
| - lurb: Union[list, np.ndarray, torch.Tensor] |
| - The left-upper-right-bottom format bounding box. |
| |
| ### Returns |
| - Union[list, np.ndarray, torch.Tensor] |
| - The left-upper-width-height format bounding box. |
| ''' |
| lurb, recover_type_back = to_tensor(lurb, device=None, temporary=True) |
| assert lurb.shape[-1] == 4, f"Invalid shape: {lurb.shape}, should be (..., 4)" |
|
|
| lu = lurb[..., :2] |
| wh = lurb[..., 2:] - lurb[..., :2] |
|
|
| luwh = torch.cat([lu, wh], dim=-1) |
| return recover_type_back(luwh) |
|
|
|
|
| def luwh_to_lurb( |
| luwh : Union[list, np.ndarray, torch.Tensor], |
| ): |
| ''' |
| Convert the left-upper-width-height format to the left-upper-right-bottom format. |
| |
| ### Args |
| - luwh: Union[list, np.ndarray, torch.Tensor] |
| - The left-upper-width-height format bounding box. |
| |
| ### Returns |
| - Union[list, np.ndarray, torch.Tensor] |
| - The left-upper-right-bottom format bounding box. |
| ''' |
| luwh, recover_type_back = to_tensor(luwh, device=None, temporary=True) |
| assert luwh.shape[-1] == 4, f"Invalid shape: {luwh.shape}, should be (..., 4)" |
|
|
| l = luwh[..., :2] |
| r = luwh[..., :2] + luwh[..., 2:] |
|
|
| lurb = torch.cat([l, r], dim=-1) |
| return recover_type_back(lurb) |
|
|
|
|
| def crop_with_lurb(data, lurb, padding=0): |
| """ |
| Crop the img-like data according to the lurb bounding box. |
| |
| ### Args |
| - data: Union[np.ndarray, torch.Tensor], shape (H, W, C) |
| - Data like image. |
| - lurb: Union[list, np.ndarray, torch.Tensor], shape (4,) |
| - Bounding box with [left, upper, right, bottom] coordinates. |
| - padding: int, default 0 |
| - Padding value for out-of-bound areas. |
| |
| ### Returns |
| - Union[np.ndarray, torch.Tensor], shape (H', W', C) |
| - Cropped image with padding if necessary. |
| """ |
| data, recover_type_back = to_tensor(data, device=None, temporary=True) |
|
|
| |
| lurb = np.array(lurb).astype(np.int64) |
| l_, u_, r_, b_ = lurb |
|
|
| |
| H_raw, W_raw, C_raw = data.size() |
|
|
| |
| H_patch = b_ - u_ |
| W_patch = r_ - l_ |
|
|
| |
| if isinstance(data, np.ndarray): |
| output = np.full((H_patch, W_patch, C_raw), padding, dtype=data.dtype) |
| else: |
| output = torch.full((H_patch, W_patch, C_raw), padding, dtype=data.dtype) |
|
|
| |
| valid_l_ = max(0, l_) |
| valid_u_ = max(0, u_) |
| valid_r_ = min(W_raw, r_) |
| valid_b_ = min(H_raw, b_) |
|
|
| |
| target_l_ = valid_l_ - l_ |
| target_u_ = valid_u_ - u_ |
| target_r_ = target_l_ + (valid_r_ - valid_l_) |
| target_b_ = target_u_ + (valid_b_ - valid_u_) |
|
|
| |
| output[target_u_:target_b_, target_l_:target_r_, :] = data[valid_u_:valid_b_, valid_l_:valid_r_, :] |
|
|
| return recover_type_back(output) |
|
|
|
|
| def fit_bbox_to_aspect_ratio( |
| bbox : np.ndarray, |
| tgt_ratio : Optional[Tuple[int, int]] = None, |
| bbox_type : str = 'lurb' |
| ): |
| ''' |
| Fit a random bounding box to a target aspect ratio through enlarging the bounding box with least change. |
| |
| ### Args |
| - bbox: np.ndarray, shape is determined by `bbox_type`, e.g. for 'lurb', shape is (4,) |
| - The bounding box to be modified. The format is determined by `bbox_type`. |
| - tgt_ratio: Optional[Tuple[int, int]], default None |
| - The target aspect ratio to be matched. |
| - bbox_type: str, default 'lurb', valid values: 'lurb', 'cwh'. |
| |
| ### Returns |
| - np.ndarray, shape is determined by `bbox_type`, e.g. for 'lurb', shape is (4,) |
| - The modified bounding box. |
| ''' |
| bbox = bbox.copy() |
| if bbox_type == 'lurb': |
| bbx_cwh = lurb_to_cwh(bbox) |
| bbx_wh = bbx_cwh[2:] |
| elif bbox_type == 'cwh': |
| bbx_wh = bbox[2:] |
| else: |
| raise ValueError(f"Unsupported bbox type: {bbox_type}") |
|
|
| new_bbx_wh = expand_wh_to_aspect_ratio(bbx_wh, tgt_ratio) |
|
|
| if bbox_type == 'lurb': |
| bbx_cwh[2:] = new_bbx_wh |
| new_bbox = cwh_to_lurb(bbx_cwh) |
| elif bbox_type == 'cwh': |
| new_bbox = np.concatenate([bbox[:2], new_bbx_wh]) |
| else: |
| raise ValueError(f"Unsupported bbox type: {bbox_type}") |
|
|
| return new_bbox |
|
|
| def to_numpy(x, temporary:bool=False): |
| if isinstance(x, torch.Tensor): |
| if temporary: |
| recover_type_back = lambda x_: torch.from_numpy(x_).type_as(x).to(x.device) |
| return x.detach().cpu().numpy(), recover_type_back |
| else: |
| return x.detach().cpu().numpy() |
| if isinstance(x, np.ndarray): |
| if temporary: |
| recover_type_back = lambda x_: x_ |
| return x.copy(), recover_type_back |
| else: |
| return x |
| if isinstance(x, List): |
| if temporary: |
| recover_type_back = lambda x_: x_.tolist() |
| return np.array(x), recover_type_back |
| else: |
| return np.array(x) |
| raise ValueError(f"Unsupported type: {type(x)}") |
|
|
|
|
| def expand_wh_to_aspect_ratio(bbx_wh:np.ndarray, tgt_aspect_ratio:Optional[Tuple[int, int]]=None): |
| ''' |
| Increase the size of the bounding box to match the target shape. |
| Modified from https://github.com/shubham-goel/4D-Humans/blob/6ec79656a23c33237c724742ca2a0ec00b398b53/hmr2/datasets/utils.py#L14-L33 |
| ''' |
| if tgt_aspect_ratio is None: |
| return bbx_wh |
|
|
| try: |
| bbx_w , bbx_h = bbx_wh |
| except (ValueError, TypeError): |
| print(f"Invalid bbox_wh content: {bbx_wh}") |
| return bbx_wh |
|
|
| tgt_w, tgt_h = tgt_aspect_ratio |
| if bbx_h / bbx_w < tgt_h / tgt_w: |
| new_h = max(bbx_w * tgt_h / tgt_w, bbx_h) |
| new_w = bbx_w |
| else: |
| new_h = bbx_h |
| new_w = max(bbx_h * tgt_w / tgt_h, bbx_w) |
| assert new_h >= bbx_h and new_w >= bbx_w |
|
|
| return to_numpy([new_w, new_h]) |
|
|
| def fit_bbox_to_aspect_ratio( |
| bbox : np.ndarray, |
| tgt_ratio : Optional[Tuple[int, int]] = None, |
| bbox_type : str = 'lurb' |
| ): |
| ''' |
| Fit a random bounding box to a target aspect ratio through enlarging the bounding box with least change. |
| |
| ### Args |
| - bbox: np.ndarray, shape is determined by `bbox_type`, e.g. for 'lurb', shape is (4,) |
| - The bounding box to be modified. The format is determined by `bbox_type`. |
| - tgt_ratio: Optional[Tuple[int, int]], default None |
| - The target aspect ratio to be matched. |
| - bbox_type: str, default 'lurb', valid values: 'lurb', 'cwh'. |
| |
| ### Returns |
| - np.ndarray, shape is determined by `bbox_type`, e.g. for 'lurb', shape is (4,) |
| - The modified bounding box. |
| ''' |
| bbox = bbox.copy() |
| if bbox_type == 'lurb': |
| bbx_cwh = lurb_to_cwh(bbox) |
| bbx_wh = bbx_cwh[2:] |
| elif bbox_type == 'cwh': |
| bbx_wh = bbox[2:] |
| else: |
| raise ValueError(f"Unsupported bbox type: {bbox_type}") |
|
|
| new_bbx_wh = expand_wh_to_aspect_ratio(bbx_wh, tgt_ratio) |
|
|
| if bbox_type == 'lurb': |
| bbx_cwh[2:] = new_bbx_wh |
| new_bbox = cwh_to_lurb(bbx_cwh) |
| elif bbox_type == 'cwh': |
| new_bbox = np.concatenate([bbox[:2], new_bbx_wh]) |
| else: |
| raise ValueError(f"Unsupported bbox type: {bbox_type}") |
|
|
| return new_bbox |
|
|
| def _img_det2patches(imgs, det_instances, downsample_ratio:float, max_instances:int=5): |
| ''' |
| 1. Filter out the trusted human detections. |
| 2. Enlarge the bounding boxes to aspect ratio (ViT backbone only use 192*256 pixels, make sure these |
| pixels can capture main contents) and then to squares (to adapt the data module). |
| 3. Crop the image with the bounding boxes and resize them to 256x256. |
| 4. Normalize the cropped images. |
| ''' |
| if det_instances is None: |
| return to_numpy([]), to_numpy([]) |
| CLASS_HUMAN_ID, DET_THRESHOLD_SCORE = 0, 0.5 |
|
|
| |
| is_human_mask = det_instances['pred_classes'] == CLASS_HUMAN_ID |
| reliable_mask = det_instances['scores'] > DET_THRESHOLD_SCORE |
| active_mask = is_human_mask & reliable_mask |
|
|
| |
| if active_mask.sum().item() > max_instances: |
| humans_scores = det_instances['scores'] * is_human_mask.float() |
| _, top_idx = humans_scores.topk(max_instances) |
| valid_mask = torch.zeros_like(active_mask).bool() |
| valid_mask[top_idx] = True |
| else: |
| valid_mask = active_mask |
|
|
| |
| lurb_all = det_instances['pred_boxes'][valid_mask].numpy() / downsample_ratio |
| lurb_all = [fit_bbox_to_aspect_ratio(bbox=lurb, tgt_ratio=(192, 256)) for lurb in lurb_all] |
| |
| cs_all = [lurb_to_cs(lurb) for lurb in lurb_all] |
| |
| lurb_all = [cs_to_lurb(cs) for cs in cs_all] |
| cropped_imgs = [crop_with_lurb(imgs, lurb) for lurb in lurb_all] |
| |
| patches = to_numpy([flex_resize_img(cropped_img, (256, 256)) for cropped_img in cropped_imgs]) |
| return patches, cs_all |
|
|
| def imgs_det2patches(imgs, dets, downsample_ratios, max_instances_per_img): |
| ''' Given the raw images and the detection results, return the image patches of human instances. ''' |
| assert len(imgs) == len(dets), f'L_img = {len(imgs)}, L_det = {len(dets)}' |
| patches, n_patch_per_img, bbx_cs = [], [], [] |
| for i in tqdm(range(len(imgs))): |
| patches_i, bbx_cs_i = _img_det2patches(imgs[i], dets[i], downsample_ratios[i], max_instances_per_img) |
| n_patch_per_img.append(len(patches_i)) |
| if len(patches_i) > 0: |
| patches.append(patches_i.astype(np.float32)) |
| bbx_cs.append(bbx_cs_i) |
| else: |
| print(f'No human detection results on image No.{i}.') |
| det_meta = { |
| 'n_patch_per_img' : n_patch_per_img, |
| 'bbx_cs' : bbx_cs, |
| } |
| return patches, det_meta |
|
|
|
|
|
|
| def load_img_meta( |
| img_path : Union[str, Path], |
| ): |
| ''' Read the image meta from the given path without opening image. ''' |
| assert Path(img_path).exists(), f'Image not found: {img_path}' |
| H, W = imageio.v3.improps(img_path).shape[:2] |
| meta = {'w': W, 'h': H} |
| return meta |
|
|
|
|
| def load_img( |
| img_path : Union[str, Path], |
| mode : str = 'RGB', |
| ): |
| ''' Read the image from the given path. ''' |
| assert Path(img_path).exists(), f'Image not found: {img_path}' |
|
|
| img = imageio.v3.imread(img_path, plugin='pillow', mode=mode) |
|
|
| meta = { |
| 'w': img.shape[1], |
| 'h': img.shape[0], |
| } |
| return img, meta |
|
|
|
|
| from pathlib import Path |
|
|
| def load_inputs(input_path, input_type, MAX_IMG_W=1920, MAX_IMG_H=1080): |
| |
| inputs_path = Path(input_path) |
| if input_type != 'auto': inputs_type = input_type |
| else: inputs_type = 'video' if Path(input_path).is_file() else 'imgs' |
| print(f'🚚 Loading inputs from: {inputs_path}, regarded as <{inputs_type}>.') |
|
|
| |
| inputs_meta = {'type': inputs_type} |
| if inputs_type == 'video': |
| inputs_meta['seq_name'] = inputs_path.stem |
| frames, _ = load_video(inputs_path) |
| if frames.shape[1] > MAX_IMG_H: |
| frames = flex_resize_video(frames, (MAX_IMG_H, -1), kp_mod=4) |
| if frames.shape[2] > MAX_IMG_W: |
| frames = flex_resize_video(frames, (-1, MAX_IMG_W), kp_mod=4) |
| raw_imgs = [frame for frame in frames] |
| elif inputs_type == 'imgs': |
| img_fns = list(inputs_path.glob('*.*')) |
| img_fns = [fn for fn in img_fns if fn.suffix.lower() in ['.jpg', '.jpeg', '.png', '.webp']] |
| inputs_meta['seq_name'] = f'{inputs_path.stem}-img_cnt={len(img_fns)}' |
| raw_imgs = [] |
| for fn in img_fns: |
| img, _ = load_img(fn) |
| if img.shape[0] > MAX_IMG_H: |
| img = flex_resize_img(img, (MAX_IMG_H, -1), kp_mod=4) |
| if img.shape[1] > MAX_IMG_W: |
| img = flex_resize_img(img, (-1, MAX_IMG_W), kp_mod=4) |
| raw_imgs.append(img) |
| inputs_meta['img_fns'] = img_fns |
| else: |
| raise ValueError(f'Unsupported inputs type: {inputs_type}.') |
| print(f'📦 Totally {len(raw_imgs)} images are loaded.') |
|
|
| return raw_imgs, inputs_meta |
|
|
|
|
| import imageio |
|
|
| def load_video( |
| video_path : Union[str, Path], |
| ): |
| ''' Read the video from the given path. ''' |
| if isinstance(video_path, str): |
| video_path = Path(video_path) |
|
|
| assert video_path.exists(), f'Video not found: {video_path}' |
|
|
| if video_path.is_dir(): |
| print(f'Found {video_path} is a directory. It will be regarded as a image folder.') |
| imgs_path = sorted(glob(str(video_path / '*'))) |
| frames = [] |
| for img_path in tqdm(imgs_path): |
| frames.append(imageio.imread(img_path)) |
| fps = 30 |
| else: |
| print(f'Found {video_path} is a file. It will be regarded as a video file.') |
| reader = imageio.get_reader(video_path, format='FFMPEG') |
| frames = [] |
| for frame in tqdm(reader, total=reader.count_frames()): |
| frames.append(frame) |
| fps = reader.get_meta_data()['fps'] |
| frames = np.stack(frames, axis=0) |
| meta = { |
| 'fps': fps, |
| 'w' : frames.shape[2], |
| 'h' : frames.shape[1], |
| 'L' : frames.shape[0], |
| } |
|
|
| return frames, meta |
|
|