Spaces:
Runtime error
Runtime error
| # yapf: disable | |
| import copy | |
| import glob | |
| import os | |
| import os.path as osp | |
| import shutil | |
| import warnings | |
| from functools import partial | |
| from pathlib import Path | |
| from typing import List, Optional, Tuple, Union | |
| import mmcv | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| from colormap import Color | |
| from detrsmpl.core.cameras import ( | |
| WeakPerspectiveCameras, | |
| compute_orbit_cameras, | |
| ) | |
| from detrsmpl.core.cameras.builder import build_cameras | |
| from detrsmpl.core.conventions.cameras.convert_convention import \ | |
| convert_camera_matrix # prevent yapf isort conflict | |
| from detrsmpl.core.conventions.segmentation import body_segmentation | |
| from detrsmpl.core.renderer.torch3d_renderer import render_runner | |
| from detrsmpl.core.renderer.torch3d_renderer.meshes import \ | |
| ParametricMeshes # noqa: E501 | |
| from detrsmpl.core.renderer.torch3d_renderer.render_smpl_config import ( | |
| RENDER_CONFIGS, | |
| ) | |
| from detrsmpl.core.renderer.torch3d_renderer.smpl_renderer import SMPLRenderer | |
| from detrsmpl.core.renderer.torch3d_renderer.utils import \ | |
| align_input_to_padded # noqa: E501 | |
| from detrsmpl.models.body_models.builder import build_body_model | |
| from detrsmpl.utils.demo_utils import ( | |
| convert_bbox_to_intrinsic, | |
| convert_crop_cam_to_orig_img, | |
| convert_kp2d_to_bbox, | |
| get_default_hmr_intrinsic, | |
| get_different_colors, | |
| ) | |
| from detrsmpl.utils.ffmpeg_utils import ( | |
| check_input_path, | |
| images_to_array, | |
| prepare_output_path, | |
| vid_info_reader, | |
| video_to_array, | |
| video_to_images, | |
| ) | |
| from detrsmpl.utils.mesh_utils import save_meshes_as_objs, save_meshes_as_plys | |
| from detrsmpl.utils.path_utils import check_path_suffix | |
| # yapf: enable | |
| try: | |
| from typing import Literal | |
| except ImportError: | |
| from typing_extensions import Literal | |
| def _prepare_background(image_array, frame_list, origin_frames, output_path, | |
| start, end, img_format, overwrite, num_frames, | |
| read_frames_batch): | |
| """Compare among `image_array`, `frame_list` and `origin_frames` and decide | |
| whether to save the temp background images.""" | |
| if num_frames > 300: | |
| read_frames_batch = True | |
| frames_folder = None | |
| remove_folder = False | |
| if isinstance(image_array, np.ndarray): | |
| image_array = torch.Tensor(image_array) | |
| if image_array is not None: | |
| if image_array.ndim == 3: | |
| image_array = image_array[None] | |
| if image_array.shape[0] == 1: | |
| image_array = image_array.repeat(num_frames, 1, 1, 1) | |
| frame_list = None | |
| origin_frames = None | |
| image_array = image_array[start:end] | |
| # check the output path and get the image_array | |
| if output_path is not None: | |
| prepare_output_path(output_path=output_path, | |
| allowed_suffix=['.mp4', 'gif', '.png', '.jpg','.jpeg'], | |
| tag='output video', | |
| path_type='auto', | |
| overwrite=overwrite) | |
| if image_array is None: | |
| # choose in frame_list or origin_frames | |
| # if all None, will use pure white background | |
| if frame_list is None and origin_frames is None: | |
| print( | |
| 'No background provided, will use pure white background.') | |
| elif frame_list is not None and origin_frames is not None: | |
| warnings.warn('Redundant input, will only use frame_list.') | |
| origin_frames = None | |
| # read the origin frames as array if any. | |
| if frame_list is None and origin_frames is not None: | |
| check_input_path(input_path=origin_frames, | |
| allowed_suffix=['.mp4', '.gif', ''], | |
| tag='origin frames', | |
| path_type='auto') | |
| # if origin_frames is a video, write it as a folder of images | |
| # if read_frames_batch is True, else read directly as an array. | |
| if Path(origin_frames).is_file(): | |
| if read_frames_batch: | |
| frames_folder = osp.join( | |
| Path(output_path).parent, | |
| Path(output_path).name + '_input_temp') | |
| os.makedirs(frames_folder, exist_ok=True) | |
| video_to_images(origin_frames, | |
| frames_folder, | |
| img_format=img_format, | |
| start=start, | |
| end=end) | |
| remove_folder = True | |
| else: | |
| remove_folder = False | |
| frames_folder = None | |
| image_array = video_to_array(origin_frames, | |
| start=start, | |
| end=end) | |
| # if origin_frames is a folder, write it as a folder of images | |
| # read the folder as an array if read_frames_batch is True | |
| # else return frames_folder for reading during rendering. | |
| else: | |
| if read_frames_batch: | |
| frames_folder = origin_frames | |
| remove_folder = False | |
| image_array = None | |
| else: | |
| image_array = images_to_array(origin_frames, | |
| img_format=img_format, | |
| start=start, | |
| end=end) | |
| remove_folder = False | |
| frames_folder = origin_frames | |
| # if frame_list is not None, move the images into a folder | |
| # read the folder as an array if read_frames_batch is True | |
| # else return frames_folder for reading during rendering. | |
| elif frame_list is not None and origin_frames is None: | |
| frames_folder = osp.join( | |
| Path(output_path).parent, | |
| Path(output_path).name + '_input_temp') | |
| os.makedirs(frames_folder, exist_ok=True) | |
| for frame_idx, frame_path in enumerate(frame_list): | |
| if check_path_suffix( | |
| path_str=frame_path, | |
| allowed_suffix=['.jpg', '.png', '.jpeg']): | |
| shutil.copy( | |
| frame_path, | |
| os.path.join(frames_folder, | |
| '%06d.png' % frame_idx)) | |
| img_format = '%06d.png' | |
| if not read_frames_batch: | |
| image_array = images_to_array(frames_folder, | |
| img_format=img_format, | |
| remove_raw_files=True) | |
| frames_folder = None | |
| remove_folder = False | |
| else: | |
| image_array = None | |
| remove_folder = True | |
| return image_array, remove_folder, frames_folder | |
| def _prepare_body_model(body_model, body_model_config): | |
| """Prepare `body_model` from `body_model_config` or existing | |
| `body_model`.""" | |
| if body_model is None: | |
| if body_model_config is not None: | |
| body_model_config = copy.deepcopy(body_model_config) | |
| model_path = body_model_config.get('model_path', None) | |
| model_type = body_model_config.get('type').lower() | |
| if model_type not in ['smpl', 'smplx']: | |
| raise ValueError(f'Do not support {model_type}, please choose' | |
| f' in `smpl` or `smplx.') | |
| if model_path and osp.isdir(model_path): | |
| model_path = osp.join(model_path, model_type) | |
| body_model_config.update(model_path=model_path) | |
| body_model = build_body_model(body_model_config) | |
| assert os.path.isdir(model_path) | |
| else: | |
| raise FileNotFoundError('Wrong model_path.' | |
| ' File or directory does not exist.') | |
| else: | |
| raise ValueError('Please input body_model_config.') | |
| else: | |
| if body_model_config is not None: | |
| warnings.warn('Redundant input, will take body_model directly' | |
| 'and ignore body_model_config.') | |
| return body_model | |
| def _prepare_input_pose(verts, poses, betas, transl): | |
| """Prepare input pose data as tensor and ensure correct temporal slice.""" | |
| if verts is None and poses is None: | |
| raise ValueError('Please input valid poses or verts.') | |
| elif (verts is not None) and (poses is not None): | |
| warnings.warn('Redundant input, will take verts and ignore poses & ' | |
| 'betas & transl.') | |
| poses = None | |
| transl = None | |
| betas = None | |
| elif isinstance(poses, dict): | |
| transl = poses.get('transl', transl) | |
| betas = poses.get('betas', betas) | |
| if isinstance(verts, np.ndarray): | |
| verts = torch.Tensor(verts) | |
| num_frames = verts.shape[0] | |
| elif isinstance(verts, torch.Tensor): | |
| num_frames = verts.shape[0] | |
| if isinstance(poses, np.ndarray): | |
| poses = torch.Tensor(poses) | |
| num_frames = poses.shape[0] | |
| elif isinstance(poses, torch.Tensor): | |
| num_frames = poses.shape[0] | |
| elif isinstance(poses, dict): | |
| for k, v in poses.items(): | |
| if isinstance(v, np.ndarray): | |
| poses[k] = torch.tensor(v) | |
| num_frames = poses['body_pose'].shape[0] | |
| if isinstance(betas, np.ndarray): | |
| betas = torch.Tensor(betas) | |
| if betas is not None: | |
| if betas.shape[0] != num_frames: | |
| times = num_frames // betas.shape[0] | |
| if betas.ndim == 2: | |
| betas = betas.repeat(times, 1)[:num_frames] | |
| elif betas.ndim == 3: | |
| betas = betas.repeat(times, 1, 1)[:num_frames] | |
| print(f'betas will be repeated by dim 0 for {times} times.') | |
| if isinstance(transl, np.ndarray): | |
| transl = torch.Tensor(transl) | |
| return verts, poses, betas, transl | |
| def _prepare_mesh(poses, betas, transl, verts, start, end, body_model): | |
| """Prepare the mesh info for rendering.""" | |
| NUM_JOINTS = body_model.NUM_JOINTS | |
| NUM_BODY_JOINTS = body_model.NUM_BODY_JOINTS | |
| NUM_DIM = 3 * (NUM_JOINTS + 1) | |
| body_pose_keys = body_model.body_pose_keys | |
| joints = None | |
| if poses is not None: | |
| if isinstance(poses, dict): | |
| if not body_pose_keys.issubset(poses): | |
| raise KeyError( | |
| f'{str(poses.keys())}, Please make sure that your ' | |
| f'input dict has all of {", ".join(body_pose_keys)}') | |
| num_frames = poses['body_pose'].shape[0] | |
| _, num_person, _ = poses['body_pose'].view( | |
| num_frames, -1, NUM_BODY_JOINTS * 3).shape | |
| full_pose = body_model.dict2tensor(poses) | |
| full_pose = full_pose[start:end] | |
| elif isinstance(poses, torch.Tensor): | |
| if poses.shape[-1] != NUM_DIM: | |
| raise ValueError( | |
| f'Please make sure your poses is {NUM_DIM} dims in' | |
| f'the last axis. Your input shape: {poses.shape}') | |
| poses = poses.view(poses.shape[0], -1, (NUM_JOINTS + 1) * 3) | |
| num_frames, num_person, _ = poses.shape | |
| full_pose = poses[start:end] | |
| else: | |
| raise ValueError('Wrong pose type, should be `dict` or `tensor`.') | |
| # multi person check | |
| if num_person > 1: | |
| if betas is not None: | |
| num_betas = betas.shape[-1] | |
| betas = betas.view(num_frames, -1, num_betas) | |
| if betas.shape[1] == 1: | |
| betas = betas.repeat(1, num_person, 1) | |
| warnings.warn( | |
| 'Only one betas for multi-person, will all be the ' | |
| 'same body shape.') | |
| elif betas.shape[1] > num_person: | |
| betas = betas[:, :num_person] | |
| warnings.warn( | |
| f'Betas shape exceed, will be sliced as {betas.shape}.' | |
| ) | |
| elif betas.shape[1] == num_person: | |
| pass | |
| else: | |
| raise ValueError( | |
| f'Odd betas shape: {betas.shape}, inconsistent' | |
| f'with poses in num_person: {poses.shape}.') | |
| else: | |
| warnings.warn('None betas for multi-person, will all be the ' | |
| 'default body shape.') | |
| if transl is not None: | |
| transl = transl.view(poses.shape[0], -1, 3) | |
| if transl.shape[1] == 1: | |
| transl = transl.repeat(1, num_person, 1) | |
| warnings.warn( | |
| 'Only one transl for multi-person, will all be the ' | |
| 'same translation.') | |
| elif transl.shape[1] > num_person: | |
| transl = transl[:, :num_person] | |
| warnings.warn(f'Transl shape exceed, will be sliced as' | |
| f'{transl.shape}.') | |
| elif transl.shape[1] == num_person: | |
| pass | |
| else: | |
| raise ValueError( | |
| f'Odd transl shape: {transl.shape}, inconsistent' | |
| f'with poses in num_person: {poses.shape}.') | |
| else: | |
| warnings.warn('None transl for multi-person, will all be the ' | |
| 'default translation.') | |
| # slice the input poses, betas, and transl. | |
| betas = betas[start:end] if betas is not None else None | |
| transl = transl[start:end] if transl is not None else None | |
| pose_dict = body_model.tensor2dict(full_pose=full_pose, | |
| betas=betas, | |
| transl=transl) | |
| # get new num_frames | |
| num_frames = full_pose.shape[0] | |
| model_output = body_model(**pose_dict) | |
| vertices = model_output['vertices'] | |
| joints = model_output['joints'][0] # hardcode here | |
| elif verts is not None: | |
| if isinstance(verts, np.ndarray): | |
| verts = torch.Tensor(verts) | |
| verts = verts[start:end] | |
| pose_dict = body_model.tensor2dict(torch.zeros(1, | |
| (NUM_JOINTS + 1) * 3)) | |
| if verts.ndim == 3: | |
| joints = torch.einsum('bik,ji->bjk', | |
| [verts, body_model.J_regressor]) | |
| elif verts.ndim == 4: | |
| joints = torch.einsum('fpik,ji->fpjk', | |
| [verts, body_model.J_regressor]) | |
| num_verts = body_model.NUM_VERTS | |
| assert verts.shape[-2] == num_verts, 'Wrong input verts shape.' | |
| num_frames = verts.shape[0] | |
| vertices = verts.view(num_frames, -1, num_verts, 3) | |
| num_joints = joints.shape[-2] | |
| joints = joints.view(num_frames, -1, num_joints, 3) | |
| num_person = vertices.shape[1] | |
| else: | |
| raise ValueError('Poses and verts are all None.') | |
| return vertices, joints, num_frames, num_person | |
| def _prepare_colors(palette, render_choice, num_person, num_verts, model_type): | |
| """Prepare the `color` as a tensor of shape (num_person, num_verts, 3) | |
| according to `palette`. | |
| This is to make the identity in video clear. | |
| """ | |
| if not len(palette) == num_person: | |
| raise ValueError('Please give the right number of palette.') | |
| body_segger = body_segmentation(model_type) | |
| if render_choice == 'silhouette': | |
| colors = torch.ones(num_person, num_verts, 3) | |
| elif render_choice == 'part_silhouette': | |
| colors = torch.zeros(num_person, num_verts, 3) | |
| for i, k in enumerate(body_segger.keys()): | |
| colors[:, body_segger[k]] = i + 1 | |
| else: | |
| if isinstance(palette, torch.Tensor): | |
| if palette.max() > 1: | |
| palette = palette / 255.0 | |
| palette = torch.clip(palette, min=0, max=1) | |
| colors = palette.view(num_person, | |
| 3).unsqueeze(1).repeat(1, num_verts, 1) | |
| elif isinstance(palette, list): | |
| colors = [] | |
| for person_idx in range(num_person): | |
| if palette[person_idx] == 'random': | |
| color_person = get_different_colors( | |
| num_person, int_dtype=False)[person_idx] | |
| color_person = torch.FloatTensor(color_person) | |
| color_person = torch.clip(color_person * 1.5, | |
| min=0.6, | |
| max=1) | |
| color_person = color_person.view(1, 1, 3).repeat( | |
| 1, num_verts, 1) | |
| elif palette[person_idx] == 'segmentation': | |
| verts_labels = torch.zeros(num_verts) | |
| color_person = torch.ones(1, num_verts, 3) | |
| color_part = get_different_colors(len(body_segger), | |
| int_dtype=False) | |
| for part_idx, k in enumerate(body_segger.keys()): | |
| index = body_segger[k] | |
| verts_labels[index] = part_idx | |
| color_person[:, index] = torch.FloatTensor( | |
| color_part[part_idx]) | |
| elif palette[person_idx] in Color.color_names: | |
| color_person = torch.FloatTensor( | |
| Color(palette[person_idx]).rgb).view(1, 1, 3).repeat( | |
| 1, num_verts, 1) | |
| else: | |
| raise ValueError('Wrong palette string. ' | |
| 'Please choose in the pre-defined range.') | |
| colors.append(color_person) | |
| colors = torch.cat(colors, 0) | |
| assert colors.shape == (num_person, num_verts, 3) | |
| # the color passed to renderer will be (num_person, num_verts, 3) | |
| else: | |
| raise ValueError( | |
| 'Palette should be tensor, array or list of strs.') | |
| return colors | |
| def render_smpl( | |
| # smpl parameters | |
| poses: Optional[Union[torch.Tensor, np.ndarray, dict]] = None, | |
| betas: Optional[Union[torch.Tensor, np.ndarray]] = None, | |
| transl: Optional[Union[torch.Tensor, np.ndarray]] = None, | |
| verts: Optional[Union[torch.Tensor, np.ndarray]] = None, | |
| body_model: Optional[nn.Module] = None, | |
| body_model_config: Optional[dict] = None, | |
| # camera parameters | |
| R: Optional[Union[torch.Tensor, np.ndarray]] = None, | |
| T: Optional[Union[torch.Tensor, np.ndarray]] = None, | |
| K: Optional[Union[torch.Tensor, np.ndarray]] = None, | |
| orig_cam: Optional[Union[torch.Tensor, np.ndarray]] = None, | |
| Ks: Optional[Union[torch.Tensor, np.ndarray]] = None, | |
| in_ndc: bool = True, | |
| convention: str = 'pytorch3d', | |
| projection: Literal['weakperspective', 'perspective', 'fovperspective', | |
| 'orthographics', | |
| 'fovorthographics'] = 'perspective', | |
| orbit_speed: Union[float, Tuple[float, float]] = 0.0, | |
| # render choice parameters | |
| render_choice: Literal['lq', 'mq', 'hq', 'silhouette', 'depth', | |
| 'normal', 'pointcloud', | |
| 'part_silhouette'] = 'hq', | |
| palette: Union[List[str], str, np.ndarray, torch.Tensor] = 'white', | |
| texture_image: Union[torch.Tensor, np.ndarray] = None, | |
| resolution: Optional[Union[List[int], Tuple[int, int]]] = None, | |
| start: int = 0, | |
| end: Optional[int] = None, | |
| alpha: float = 1.0, | |
| no_grad: bool = True, | |
| batch_size: int = 10, | |
| device: Union[torch.device, str] = 'cuda', | |
| # file io parameters | |
| return_tensor: bool = False, | |
| output_path: str = None, | |
| origin_frames: Optional[str] = None, | |
| frame_list: Optional[List[str]] = None, | |
| image_array: Optional[Union[np.ndarray, torch.Tensor]] = None, | |
| img_format: str = '%06d.png', | |
| overwrite: bool = False, | |
| mesh_file_path: Optional[str] = None, | |
| read_frames_batch: bool = False, | |
| # visualize keypoints | |
| plot_kps: bool = False, | |
| kp3d: Optional[Union[np.ndarray, torch.Tensor]] = None, | |
| mask: Optional[Union[np.ndarray, List[int]]] = None, | |
| vis_kp_index: bool = False, | |
| verbose: bool = False) -> Union[None, torch.Tensor]: | |
| """Render SMPL or SMPL-X mesh or silhouette into differentiable tensors, | |
| and export video or images. | |
| Args: | |
| # smpl parameters: | |
| poses (Union[torch.Tensor, np.ndarray, dict]): | |
| 1). `tensor` or `array` and ndim is 2, shape should be | |
| (frame, 72). | |
| 2). `tensor` or `array` and ndim is 3, shape should be | |
| (frame, num_person, 72/165). num_person equals 1 means | |
| single-person. | |
| Rendering predicted multi-person should feed together with | |
| multi-person weakperspective cameras. meshes would be computed | |
| and use an identity intrinsic matrix. | |
| 3). `dict`, standard dict format defined in smplx.body_models. | |
| will be treated as single-person. | |
| Lower priority than `verts`. | |
| Defaults to None. | |
| betas (Optional[Union[torch.Tensor, np.ndarray]], optional): | |
| 1). ndim is 2, shape should be (frame, 10). | |
| 2). ndim is 3, shape should be (frame, num_person, 10). num_person | |
| equals 1 means single-person. If poses are multi-person, betas | |
| should be set to the same person number. | |
| None will use default betas. | |
| Defaults to None. | |
| transl (Optional[Union[torch.Tensor, np.ndarray]], optional): | |
| translations of smpl(x). | |
| 1). ndim is 2, shape should be (frame, 3). | |
| 2). ndim is 3, shape should be (frame, num_person, 3). num_person | |
| equals 1 means single-person. If poses are multi-person, | |
| transl should be set to the same person number. | |
| Defaults to None. | |
| verts (Optional[Union[torch.Tensor, np.ndarray]], optional): | |
| 1). ndim is 3, shape should be (frame, num_verts, 3). | |
| 2). ndim is 4, shape should be (frame, num_person, num_verts, 3). | |
| num_person equals 1 means single-person. | |
| Higher priority over `poses` & `betas` & `transl`. | |
| Defaults to None. | |
| body_model (nn.Module, optional): body_model created from smplx.create. | |
| Higher priority than `body_model_config`. If `body_model` is not | |
| None, it will override `body_model_config`. | |
| Should not both be None. | |
| Defaults to None. | |
| body_model_config (dict, optional): body_model_config for build_model. | |
| Lower priority than `body_model`. Should not both be None. | |
| Defaults to None. | |
| # camera parameters: | |
| K (Optional[Union[torch.Tensor, np.ndarray]], optional): | |
| shape should be (frame, 4, 4) or (frame, 3, 3), frame could be 1. | |
| if (4, 4) or (3, 3), dim 0 will be added automatically. | |
| Will be default `FovPerspectiveCameras` intrinsic if None. | |
| Lower priority than `orig_cam`. | |
| R (Optional[Union[torch.Tensor, np.ndarray]], optional): | |
| shape should be (frame, 3, 3), If f equals 1, camera will have | |
| identical rotation. | |
| If `K` and `orig_cam` is None, will be generated by `look_at_view`. | |
| If have `K` or `orig_cam` and `R` is None, will be generated by | |
| `convert_camera_matrix`. | |
| Defaults to None. | |
| T (Optional[Union[torch.Tensor, np.ndarray]], optional): | |
| shape should be (frame, 3). If f equals 1, camera will have | |
| identical translation. | |
| If `K` and `orig_cam` is None, will be generated by `look_at_view`. | |
| If have `K` or `orig_cam` and `T` is None, will be generated by | |
| `convert_camera_matrix`. | |
| Defaults to None. | |
| orig_cam (Optional[Union[torch.Tensor, np.ndarray]], optional): | |
| shape should be (frame, 4) or (frame, num_person, 4). If f equals | |
| 1, will be repeated to num_frames. num_person should be 1 if single | |
| person. Usually for HMR, VIBE predicted cameras. | |
| Higher priority than `K` & `R` & `T`. | |
| Defaults to None. | |
| Ks (Optional[Union[torch.Tensor, np.ndarray]], optional): | |
| shape should be (frame, 4, 4). | |
| This is for HMR or SPIN multi-person demo. | |
| in_ndc (bool, optional): . Defaults to True. | |
| convention (str, optional): If want to use an existing convention, | |
| choose in ['opengl', 'opencv', 'pytorch3d', 'pyrender', 'open3d', | |
| 'maya', 'blender', 'unity']. | |
| If want to use a new convention, define your convention in | |
| (CAMERA_CONVENTION_FACTORY)[mmhuman3d/core/conventions/cameras/ | |
| __init__.py] by the order of right, front and up. | |
| Defaults to 'pytorch3d'. | |
| projection (Literal[, optional): projection mode of camers. Choose in | |
| ['orthographics, fovperspective', 'perspective', 'weakperspective', | |
| 'fovorthographics'] | |
| Defaults to 'perspective'. | |
| orbit_speed (float, optional): orbit speed for viewing when no `K` | |
| provided. `float` for only azim speed and Tuple for `azim` and | |
| `elev`. | |
| # render choice parameters: | |
| render_choice (Literal[, optional): | |
| choose in ['lq', 'mq', 'hq', 'silhouette', 'depth', 'normal', | |
| 'pointcloud', 'part_silhouette'] . | |
| `lq`, `mq`, `hq` would output (frame, h, w, 4) FloatTensor. | |
| `lq` means low quality, `mq` means medium quality, | |
| h`q means high quality. | |
| `silhouette` would output (frame, h, w) soft binary FloatTensor. | |
| `part_silhouette` would output (frame, h, w, 1) LongTensor. | |
| Every pixel stores a class index. | |
| `depth` will output a depth map of (frame, h, w, 1) FloatTensor | |
| and 'normal' will output a normal map of (frame, h, w, 1). | |
| `pointcloud` will output a (frame, h, w, 4) FloatTensor. | |
| Defaults to 'mq'. | |
| palette (Union[List[str], str, np.ndarray], optional): | |
| color theme str or list of color str or `array`. | |
| 1). If use str to represent the color, | |
| should choose in ['segmentation', 'random'] or color from | |
| Colormap https://en.wikipedia.org/wiki/X11_color_names. | |
| If choose 'segmentation', will get a color for each part. | |
| 2). If you have multi-person, better give a list of str or all | |
| will be in the same color. | |
| 3). If you want to define your specific color, use an `array` | |
| of shape (3,) for single person and (N, 3) for multiple persons. | |
| If (3,) for multiple persons, all will be in the same color. | |
| Your `array` should be in range [0, 255] for 8 bit color. | |
| Defaults to 'white'. | |
| texture_image (Union[torch.Tensor, np.ndarray], optional): | |
| Texture image to be wrapped on the smpl mesh. If not None, | |
| the `palette` will be ignored, and the `body_model` is required | |
| to have `uv_param_path`. | |
| Should pass list or tensor of shape (num_person, H, W, 3). | |
| The color channel should be `RGB`. | |
| Defaults to None. | |
| resolution (Union[Iterable[int], int], optional): | |
| 1). If iterable, should be (height, width) of output images. | |
| 2). If int, would be taken as (resolution, resolution). | |
| Defaults to (1024, 1024). | |
| This will influence the overlay results when render with | |
| backgrounds. The output video will be rendered following the | |
| size of background images and finally resized to resolution. | |
| start (int, optional): start frame index. Defaults to 0. | |
| end (int, optional): end frame index. Exclusive. | |
| Could be positive int or negative int or None. | |
| None represents include all the frames. | |
| Defaults to None. | |
| alpha (float, optional): Transparency of the mesh. | |
| Range in [0.0, 1.0] | |
| Defaults to 1.0. | |
| no_grad (bool, optional): Set to True if do not need differentiable | |
| render. | |
| Defaults to False. | |
| batch_size (int, optional): Batch size for render. | |
| Related to your gpu memory. | |
| Defaults to 10. | |
| # file io parameters: | |
| return_tensor (bool, optional): Whether return the result tensors. | |
| Defaults to False, will return None. | |
| output_path (str, optional): output video or gif or image folder. | |
| Defaults to None, pass export procedure. | |
| # background frames, priority: image_array > frame_list > origin_frames | |
| origin_frames (Optional[str], optional): origin background frame path, | |
| could be `.mp4`, `.gif`(will be sliced into a folder) or an image | |
| folder. | |
| Defaults to None. | |
| frame_list (Optional[List[str]], optional): list of origin background | |
| frame paths, element in list each should be a image path like | |
| `*.jpg` or `*.png`. | |
| Use this when your file names is hard to sort or you only want to | |
| render a small number frames. | |
| Defaults to None. | |
| image_array: (Optional[Union[np.ndarray, torch.Tensor]], optional): | |
| origin background frame `tensor` or `array`, use this when you | |
| want your frames in memory as array or tensor. | |
| overwrite (bool, optional): whether overwriting the existing files. | |
| Defaults to False. | |
| mesh_file_path (bool, optional): the directory path to store the `.ply` | |
| or '.ply' files. Will be named like 'frame_idx_person_idx.ply'. | |
| Defaults to None. | |
| read_frames_batch (bool, optional): Whether read frames by batch. | |
| Set it as True if your video is large in size. | |
| Defaults to False. | |
| # visualize keypoints | |
| plot_kps (bool, optional): whether plot keypoints on the output video. | |
| Defaults to False. | |
| kp3d (Optional[Union[np.ndarray, torch.Tensor]], optional): | |
| the keypoints of any convention, should pass `mask` if have any | |
| none-sense points. Shape should be (frame, ) | |
| Defaults to None. | |
| mask (Optional[Union[np.ndarray, List[int]]], optional): | |
| Mask of keypoints existence. | |
| Defaults to None. | |
| vis_kp_index (bool, optional): | |
| Whether plot keypoint index number on human mesh. | |
| Defaults to False. | |
| # visualize render progress | |
| verbose (bool, optional): | |
| Whether print the progress bar for rendering. | |
| Returns: | |
| Union[None, torch.Tensor]: return the rendered image tensors or None. | |
| """ | |
| # initialize the device | |
| device = torch.device(device) if isinstance(device, str) else device | |
| if isinstance(resolution, int): | |
| resolution = (resolution, resolution) | |
| elif isinstance(resolution, list): | |
| resolution = tuple(resolution) | |
| verts, poses, betas, transl = _prepare_input_pose(verts, poses, betas, | |
| transl) | |
| body_model = _prepare_body_model(body_model, body_model_config) | |
| model_type = body_model.name().replace('-', '').lower() | |
| assert model_type in ['smpl', 'smplx'] | |
| vertices, joints, num_frames, num_person = _prepare_mesh( | |
| poses, betas, transl, verts, start, end, body_model) | |
| end = num_frames if end is None else end | |
| vertices = vertices.view(num_frames, num_person, -1, 3) | |
| num_verts = vertices.shape[-2] | |
| if not plot_kps: | |
| joints = None | |
| if kp3d is not None: | |
| warnings.warn('`plot_kps` is False, `kp3d` will be set as None.') | |
| kp3d = None | |
| image_array, remove_folder, frames_folder = _prepare_background( | |
| image_array, frame_list, origin_frames, output_path, start, end, | |
| img_format, overwrite, num_frames, read_frames_batch) | |
| render_resolution = None | |
| if image_array is not None: | |
| render_resolution = (image_array.shape[1], image_array.shape[2]) | |
| elif frames_folder is not None: | |
| frame_path_list = glob.glob(osp.join( | |
| frames_folder, '*.jpg')) + glob.glob( | |
| osp.join(frames_folder, '*.png')) + glob.glob( | |
| osp.join(frames_folder, '*.jpeg')) | |
| vid_info = vid_info_reader(frame_path_list[0]) | |
| render_resolution = (int(vid_info['height']), int(vid_info['width'])) | |
| if resolution is not None: | |
| if render_resolution is not None: | |
| if render_resolution != resolution: | |
| warnings.warn( | |
| f'Size of background: {render_resolution} !=' | |
| f' resolution: {resolution}, the output video will be ' | |
| f'resized as {resolution}') | |
| final_resolution = resolution | |
| elif render_resolution is None: | |
| render_resolution = final_resolution = resolution | |
| elif resolution is None: | |
| if render_resolution is None: | |
| render_resolution = final_resolution = (1024, 1024) | |
| elif render_resolution is not None: | |
| final_resolution = render_resolution | |
| if isinstance(kp3d, np.ndarray): | |
| kp3d = torch.Tensor(kp3d) | |
| if kp3d is not None: | |
| if mask is not None: | |
| map_index = np.where(np.array(mask) != 0)[0] | |
| kp3d = kp3d[map_index.tolist()] | |
| kp3d = kp3d[start:end] | |
| kp3d = kp3d.view(num_frames, -1, 3) | |
| # prepare render_param_dict | |
| render_param_dict = copy.deepcopy(RENDER_CONFIGS[render_choice.lower()]) | |
| if model_type == 'smpl': | |
| render_param_dict.update(num_class=24) | |
| elif model_type == 'smplx': | |
| render_param_dict.update(num_class=27) | |
| if render_choice not in [ | |
| 'hq', 'mq', 'lq', 'silhouette', 'part_silhouette', 'depth', | |
| 'pointcloud', 'normal' | |
| ]: | |
| raise ValueError('Please choose the right render_choice.') | |
| # body part colorful visualization should use flat shader to be sharper. | |
| if texture_image is None: | |
| if isinstance(palette, str): | |
| palette = [palette] * num_person | |
| elif isinstance(palette, np.ndarray): | |
| palette = torch.Tensor(palette) | |
| palette = palette.view(-1, 3) | |
| if palette.shape[0] != num_person: | |
| _times = num_person // palette.shape[0] | |
| palette = palette.repeat(_times, 1)[:num_person] | |
| if palette.shape[0] == 1: | |
| print(f'Same color for all the {num_person} people') | |
| else: | |
| print('Repeat palette for multi-person.') | |
| else: | |
| raise ValueError('Wrong input palette type. ' | |
| 'Palette should be tensor, array or list of strs') | |
| colors_all = _prepare_colors(palette, render_choice, num_person, | |
| num_verts, model_type) | |
| colors_all = colors_all.view(-1, num_person * num_verts, 3) | |
| # verts of ParametricMeshes should be in (N, V, 3) | |
| vertices = vertices.view(num_frames, -1, 3) | |
| meshes = ParametricMeshes( | |
| body_model=body_model, | |
| verts=vertices, | |
| N_individual_overdide=num_person, | |
| model_type=model_type, | |
| texture_image=texture_image, | |
| use_nearest=bool(render_choice == 'part_silhouette'), | |
| vertex_color=colors_all) | |
| # write .ply or .obj files | |
| if mesh_file_path is not None: | |
| mmcv.mkdir_or_exist(mesh_file_path) | |
| for person_idx in range(meshes.shape[1]): | |
| mesh_person = meshes[:, person_idx] | |
| if texture_image is None: | |
| ply_paths = [ | |
| f'{mesh_file_path}/frame{frame_idx}_' | |
| f'person{person_idx}.ply' | |
| for frame_idx in range(num_frames) | |
| ] | |
| save_meshes_as_plys(meshes=mesh_person, files=ply_paths) | |
| else: | |
| obj_paths = [ | |
| f'{mesh_file_path}/frame{frame_idx}_' | |
| f'person{person_idx}.obj' | |
| for frame_idx in range(num_frames) | |
| ] | |
| save_meshes_as_objs(meshes=mesh_person, files=obj_paths) | |
| vertices = meshes.verts_padded().view(num_frames, num_person, -1, 3) | |
| # prepare camera matrixs | |
| if Ks is not None: | |
| projection = 'perspective' | |
| orig_cam = None | |
| if isinstance(Ks, np.ndarray): | |
| Ks = torch.Tensor(Ks) | |
| Ks = Ks.view(-1, num_person, 3, 3) | |
| Ks = Ks[start:end] | |
| Ks = Ks.view(-1, 3, 3) | |
| K = K.repeat(num_frames * num_person, 1, 1) | |
| Ks = K.inverse() @ Ks @ K | |
| vertices = vertices.view(num_frames * num_person, -1, 3) | |
| if T is None: | |
| T = torch.zeros(num_frames, num_person, 1, 3) | |
| elif isinstance(T, np.ndarray): | |
| T = torch.Tensor(T) | |
| T = T[start:end] | |
| T = T.view(num_frames * num_person, 1, 3) | |
| vertices = torch.einsum('blc,bvc->bvl', Ks, vertices + T) | |
| R = None | |
| T = None | |
| vertices = vertices.view(num_frames, num_person, -1, 3) | |
| if orig_cam is not None: | |
| if isinstance(orig_cam, np.ndarray): | |
| orig_cam = torch.Tensor(orig_cam) | |
| projection = 'weakperspective' | |
| r = render_resolution[1] / render_resolution[0] | |
| orig_cam = orig_cam[start:end] | |
| orig_cam = orig_cam.view(num_frames, num_person, 4) | |
| # if num_person > 1: | |
| sx, sy, tx, ty = torch.unbind(orig_cam, -1) | |
| vertices[..., 0] += tx.view(num_frames, num_person, 1) | |
| vertices[..., 1] += ty.view(num_frames, num_person, 1) | |
| vertices[..., 0] *= sx.view(num_frames, num_person, 1) | |
| vertices[..., 1] *= sy.view(num_frames, num_person, 1) | |
| orig_cam = torch.tensor([1.0, 1.0, 0.0, | |
| 0.0]).view(1, 4).repeat(num_frames, 1) | |
| K, R, T = WeakPerspectiveCameras.convert_orig_cam_to_matrix( | |
| orig_cam=orig_cam, | |
| znear=torch.min(vertices[..., 2] - 1), | |
| aspect_ratio=r) | |
| if num_person > 1: | |
| vertices = vertices.reshape(num_frames, -1, 3) | |
| else: | |
| vertices = vertices.view(num_frames, -1, 3) | |
| meshes = meshes.update_padded(new_verts_padded=vertices) | |
| # orig_cam and K are None, use look_at_view | |
| if K is None: | |
| projection = 'fovperspective' | |
| K, R, T = compute_orbit_cameras(at=(torch.mean(vertices.view(-1, 3), | |
| 0)).detach().cpu(), | |
| orbit_speed=orbit_speed, | |
| batch_size=num_frames, | |
| convention=convention) | |
| convention = 'pytorch3d' | |
| if isinstance(R, np.ndarray): | |
| R = torch.Tensor(R).view(-1, 3, 3) | |
| elif isinstance(R, torch.Tensor): | |
| R = R.view(-1, 3, 3) | |
| elif isinstance(R, list): | |
| R = torch.Tensor(R).view(-1, 3, 3) | |
| elif R is None: | |
| pass | |
| else: | |
| raise ValueError(f'Wrong type of R: {type(R)}!') | |
| if R is not None: | |
| if len(R) > num_frames: | |
| R = R[start:end] | |
| if isinstance(T, np.ndarray): | |
| T = torch.Tensor(T).view(-1, 3) | |
| elif isinstance(T, torch.Tensor): | |
| T = T.view(-1, 3) | |
| elif isinstance(T, list): | |
| T = torch.Tensor(T).view(-1, 3) | |
| elif T is None: | |
| pass | |
| else: | |
| raise ValueError(f'Wrong type of T: {type(T)}!') | |
| if T is not None: | |
| if len(T) > num_frames: | |
| T = T[start:end] | |
| if isinstance(K, np.ndarray): | |
| K = torch.Tensor(K).view(-1, K.shape[-2], K.shape[-1]) | |
| elif isinstance(K, torch.Tensor): | |
| K = K.view(-1, K.shape[-2], K.shape[-1]) | |
| elif isinstance(K, list): | |
| K = torch.Tensor(K) | |
| K = K.view(-1, K.shape[-2], K.shape[-1]) | |
| else: | |
| raise ValueError(f'Wrong type of K: {type(K)}!') | |
| if K is not None: | |
| if len(K) > num_frames: | |
| K = K[start:end] | |
| assert projection in [ | |
| 'perspective', 'weakperspective', 'orthographics', 'fovorthographics', | |
| 'fovperspective' | |
| ], f'Wrong camera projection: {projection}' | |
| if projection in ['fovperspective', 'perspective']: | |
| is_perspective = True | |
| elif projection in [ | |
| 'fovorthographics', 'weakperspective', 'orthographics' | |
| ]: | |
| is_perspective = False | |
| if projection in ['fovperspective', 'fovorthographics', 'weakperspective']: | |
| assert in_ndc | |
| K, R, T = convert_camera_matrix(convention_dst='pytorch3d', | |
| K=K, | |
| R=R, | |
| T=T, | |
| is_perspective=is_perspective, | |
| convention_src=convention, | |
| resolution_src=render_resolution, | |
| in_ndc_src=in_ndc, | |
| in_ndc_dst=in_ndc) | |
| # initialize the renderer. | |
| renderer = SMPLRenderer(resolution=render_resolution, | |
| device=device, | |
| output_path=output_path, | |
| return_tensor=return_tensor, | |
| alpha=alpha, | |
| read_img_format=img_format, | |
| render_choice=render_choice, | |
| frames_folder=frames_folder, | |
| plot_kps=plot_kps, | |
| vis_kp_index=vis_kp_index, | |
| final_resolution=final_resolution, | |
| **render_param_dict) | |
| cameras = build_cameras( | |
| dict(type=projection, | |
| in_ndc=in_ndc, | |
| device=device, | |
| K=K, | |
| R=R, | |
| T=T, | |
| resolution=render_resolution)) | |
| if image_array is not None: | |
| image_array = torch.Tensor(image_array) | |
| image_array = align_input_to_padded(image_array, | |
| ndim=4, | |
| batch_size=num_frames, | |
| padding_mode='ones') | |
| # prepare the render data. | |
| render_data = dict( | |
| images=image_array, | |
| meshes=meshes, | |
| cameras=cameras, | |
| joints=joints, | |
| joints_gt=kp3d, | |
| ) | |
| results = render_runner.render(renderer=renderer, | |
| device=device, | |
| batch_size=batch_size, | |
| output_path=output_path, | |
| return_tensor=return_tensor, | |
| no_grad=no_grad, | |
| verbose=verbose, | |
| **render_data) | |
| if remove_folder: | |
| if Path(frames_folder).is_dir(): | |
| shutil.rmtree(frames_folder) | |
| if return_tensor: | |
| return results | |
| else: | |
| return None | |
| def visualize_smpl_calibration( | |
| K, | |
| R, | |
| T, | |
| resolution, | |
| **kwargs, | |
| ) -> None: | |
| """Visualize a smpl mesh which has opencv calibration matrix defined in | |
| screen.""" | |
| assert K is not None, '`K` is required.' | |
| assert resolution is not None, '`resolution`(h, w) is required.' | |
| func = partial(render_smpl, | |
| projection='perspective', | |
| convention='opencv', | |
| orig_cam=None, | |
| in_ndc=False) | |
| for k in func.keywords.keys(): | |
| if k in kwargs: | |
| kwargs.pop(k) | |
| return func(K=K, R=R, T=T, resolution=resolution, **kwargs) | |
| def visualize_smpl_hmr(cam_transl, | |
| bbox=None, | |
| kp2d=None, | |
| focal_length=5000, | |
| det_width=224, | |
| det_height=224, | |
| bbox_format='xyxy', | |
| **kwargs) -> None: | |
| """Simplest way to visualize HMR or SPIN or Smplify pred smpl with origin | |
| frames and predicted cameras.""" | |
| if kp2d is not None: | |
| bbox = convert_kp2d_to_bbox(kp2d, bbox_format=bbox_format) | |
| Ks = convert_bbox_to_intrinsic(bbox, bbox_format=bbox_format) | |
| K = torch.Tensor( | |
| get_default_hmr_intrinsic(focal_length=focal_length, | |
| det_height=det_height, | |
| det_width=det_width)) | |
| func = partial( | |
| render_smpl, | |
| projection='perspective', | |
| convention='opencv', | |
| in_ndc=False, | |
| K=None, | |
| R=None, | |
| orig_cam=None, | |
| ) | |
| if isinstance(cam_transl, np.ndarray): | |
| cam_transl = torch.Tensor(cam_transl) | |
| T = torch.cat([ | |
| cam_transl[..., [1]], cam_transl[..., [2]], 2 * focal_length / | |
| (det_width * cam_transl[..., [0]] + 1e-9) | |
| ], -1) | |
| for k in func.keywords.keys(): | |
| if k in kwargs: | |
| kwargs.pop(k) | |
| return func(Ks=Ks, K=K, T=T, **kwargs) | |
| def visualize_smpl_vibe(orig_cam=None, | |
| pred_cam=None, | |
| bbox=None, | |
| output_path='sample.mp4', | |
| resolution=None, | |
| aspect_ratio=1.0, | |
| bbox_scale_factor=1.25, | |
| bbox_format='xyxy', | |
| **kwargs) -> None: | |
| """Simplest way to visualize pred smpl with origin frames and predicted | |
| cameras.""" | |
| assert resolution is not None | |
| if pred_cam is not None and bbox is not None: | |
| orig_cam = torch.Tensor( | |
| convert_crop_cam_to_orig_img(pred_cam, bbox, resolution[1], | |
| resolution[0], aspect_ratio, | |
| bbox_scale_factor, bbox_format)) | |
| assert orig_cam is not None, '`orig_cam` is required.' | |
| func = partial( | |
| render_smpl, | |
| projection='weakperspective', | |
| convention='opencv', | |
| in_ndc=True, | |
| ) | |
| for k in func.keywords.keys(): | |
| if k in kwargs: | |
| kwargs.pop(k) | |
| return func(orig_cam=orig_cam, | |
| output_path=output_path, | |
| resolution=resolution, | |
| **kwargs) | |
| def visualize_T_pose(num_frames, | |
| body_model_config=None, | |
| body_model=None, | |
| orbit_speed=1.0, | |
| **kwargs) -> None: | |
| """Simplest way to visualize a sequence of T pose.""" | |
| assert num_frames > 0, '`num_frames` is required.' | |
| assert body_model_config is not None or body_model is not None | |
| model_type = body_model_config[ | |
| 'type'] if body_model_config is not None else body_model.name( | |
| ).replace('-', '').lower() | |
| if model_type == 'smpl': | |
| poses = torch.zeros(num_frames, 72) | |
| else: | |
| poses = torch.zeros(num_frames, 165) | |
| func = partial(render_smpl, | |
| betas=None, | |
| transl=None, | |
| verts=None, | |
| convention='pytorch3d', | |
| projection='fovperspective', | |
| K=None, | |
| R=None, | |
| T=None, | |
| origin_frames=None) | |
| for k in func.keywords.keys(): | |
| if k in kwargs: | |
| kwargs.pop(k) | |
| return func(poses=poses, | |
| body_model_config=body_model_config, | |
| body_model=body_model, | |
| orbit_speed=orbit_speed, | |
| **kwargs) | |
| def visualize_smpl_pose(poses=None, verts=None, **kwargs) -> None: | |
| """Simplest way to visualize a sequence of smpl pose. | |
| Cameras will focus on the center of smpl mesh. `orbit speed` is | |
| recommended. | |
| """ | |
| assert (poses | |
| is not None) or (verts | |
| is not None), 'Pass either `poses` or `verts`.' | |
| func = partial(render_smpl, | |
| convention='opencv', | |
| projection='fovperspective', | |
| K=None, | |
| R=None, | |
| T=None, | |
| in_ndc=True, | |
| origin_frames=None, | |
| frame_list=None, | |
| image_array=None) | |
| for k in func.keywords.keys(): | |
| if k in kwargs: | |
| kwargs.pop(k) | |
| return func(poses=poses, verts=verts, **kwargs) | |