Spaces:
Runtime error
Runtime error
| """ | |
| Copyright (c) Meta Platforms, Inc. and affiliates. | |
| All rights reserved. | |
| This source code is licensed under the license found in the | |
| LICENSE file in the root directory of this source tree. | |
| """ | |
| import warnings | |
| from typing import Dict, Final, List, Optional, overload, Sequence, Tuple, Union | |
| import cv2 | |
| import numpy as np | |
| import torch as th | |
| import torch.nn.functional as thf | |
| Color = Tuple[np.uint8, np.uint8, np.uint8] | |
| __DEFAULT_WB_SCALE: np.ndarray = np.array([1.05, 0.95, 1.45], dtype=np.float32) | |
| def linear2srgb(img: th.Tensor, gamma: float = 2.4) -> th.Tensor: | |
| ... | |
| def linear2srgb(img: np.ndarray, gamma: float = 2.4) -> np.ndarray: | |
| ... | |
| def linear2srgb( | |
| img: Union[th.Tensor, np.ndarray], gamma: float = 2.4 | |
| ) -> Union[th.Tensor, np.ndarray]: | |
| if isinstance(img, th.Tensor): | |
| # Note: The following combines the linear and exponential parts of the sRGB curve without | |
| # causing NaN values or gradients for negative inputs (where the curve would be linear). | |
| linear_part = img * 12.92 # linear part of sRGB curve | |
| exp_part = 1.055 * th.pow(th.clamp(img, min=0.0031308), 1 / gamma) - 0.055 | |
| return th.where(img <= 0.0031308, linear_part, exp_part) | |
| else: | |
| linear_part = img * 12.92 | |
| exp_part = 1.055 * (np.maximum(img, 0.0031308) ** (1 / gamma)) - 0.055 | |
| return np.where(img <= 0.0031308, linear_part, exp_part) | |
| def linear2color_corr(img: th.Tensor, dim: int = -1) -> th.Tensor: | |
| ... | |
| def linear2color_corr(img: np.ndarray, dim: int = -1) -> np.ndarray: | |
| ... | |
| def linear2color_corr( | |
| img: Union[th.Tensor, np.ndarray], dim: int = -1 | |
| ) -> Union[th.Tensor, np.ndarray]: | |
| """Applies ad-hoc 'color correction' to a linear RGB Mugsy image along | |
| color channel `dim` and returns the gamma-corrected result.""" | |
| if dim == -1: | |
| dim = len(img.shape) - 1 | |
| gamma = 2.0 | |
| black = 3.0 / 255.0 | |
| color_scale = [1.4, 1.1, 1.6] | |
| assert img.shape[dim] == 3 | |
| if dim == -1: | |
| dim = len(img.shape) - 1 | |
| if isinstance(img, th.Tensor): | |
| scale = th.FloatTensor(color_scale).view([3 if i == dim else 1 for i in range(img.dim())]) | |
| img = img * scale.to(img) / 1.1 | |
| return th.clamp( | |
| (((1.0 / (1 - black)) * 0.95 * th.clamp(img - black, 0, 2)).pow(1.0 / gamma)) | |
| - 15.0 / 255.0, | |
| 0, | |
| 2, | |
| ) | |
| else: | |
| scale = np.array(color_scale).reshape([3 if i == dim else 1 for i in range(img.ndim)]) | |
| img = img * scale / 1.1 | |
| return np.clip( | |
| (((1.0 / (1 - black)) * 0.95 * np.clip(img - black, 0, 2)) ** (1.0 / gamma)) | |
| - 15.0 / 255.0, | |
| 0, | |
| 2, | |
| ) | |
| def linear2displayBatch( | |
| val: th.Tensor, | |
| gamma: float = 1.5, | |
| wbscale: np.ndarray = __DEFAULT_WB_SCALE, | |
| black: float = 5.0 / 255.0, | |
| mode: str = "srgb", | |
| ) -> th.Tensor: | |
| scaling: th.Tensor = th.from_numpy(wbscale).to(val.device) | |
| val = val.float() / 255.0 * scaling[None, :, None, None] - black | |
| if mode == "srgb": | |
| val = linear2srgb(val, gamma=gamma) | |
| else: | |
| val = val ** th.tensor(1.0 / gamma) | |
| return th.clamp(val, 0, 1) * 255.0 | |
| def linear2color_corr_inv(img: th.Tensor, dim: int) -> th.Tensor: | |
| """Inverse of linear2color_corr. | |
| Removes ad-hoc 'color correction' from a gamma-corrected RGB Mugsy image | |
| along color channel `dim` and returns the linear RGB result.""" | |
| gamma = 2.0 | |
| black = 3.0 / 255.0 | |
| color_scale = [1.4, 1.1, 1.6] | |
| assert img.shape[dim] == 3 | |
| if dim == -1: | |
| dim = len(img.shape) - 1 | |
| scale = th.FloatTensor(color_scale).view([3 if i == dim else 1 for i in range(img.dim())]) | |
| img = (img + 15.0 / 255.0).pow(gamma) / (0.95 / (1 - black)) + black | |
| return th.clamp(img / (scale.to(img) / 1.1), 0, 1) | |
| DEFAULT_CCM: List[List[float]] = [[1, 0, 0], [0, 1, 0], [0, 0, 1]] | |
| DEFAULT_DC_OFFSET: List[float] = [0, 0, 0] | |
| DEFAULT_GAMMA: float = 1.0 | |
| def mapped2linear( | |
| img: th.Tensor, | |
| dim: int = -1, | |
| ccm: Union[List[List[float]], th.Tensor, np.ndarray] = DEFAULT_CCM, | |
| dc_offset: Union[List[float], th.Tensor, np.ndarray] = DEFAULT_DC_OFFSET, | |
| gamma: float = DEFAULT_GAMMA, | |
| ) -> th.Tensor: | |
| ... | |
| def mapped2linear( | |
| img: np.ndarray, | |
| dim: int = -1, | |
| ccm: Union[List[List[float]], th.Tensor, np.ndarray] = DEFAULT_CCM, | |
| dc_offset: Union[List[float], th.Tensor, np.ndarray] = DEFAULT_DC_OFFSET, | |
| gamma: float = DEFAULT_GAMMA, | |
| ) -> np.ndarray: | |
| ... | |
| def mapped2linear( | |
| img: Union[th.Tensor, np.ndarray], | |
| dim: int = -1, | |
| ccm: Union[List[List[float]], th.Tensor, np.ndarray] = DEFAULT_CCM, | |
| dc_offset: Union[List[float], th.Tensor, np.ndarray] = DEFAULT_DC_OFFSET, | |
| gamma: float = DEFAULT_GAMMA, | |
| ) -> Union[th.Tensor, np.ndarray]: | |
| """Maps a previously-characterized camera color space into a linear | |
| color space. IMPORTANT: This function assumes RGB channel order, | |
| not BGR. | |
| The characterization is specified by `ccm`, `dc_offset`, and `gamma`. | |
| The dimension index of the color channel is specified with `dim` (de- | |
| fault is -1 i.e. last dimension.) | |
| The function accepts both [0, 255] integer and [0, 1] float formats. | |
| However, the return value is always floating point in [0, 1]-range. | |
| FIXME(swirajaya) - | |
| This is a reimplementation of `RGBMapping::map_to_lin_rgb` in | |
| `//arvr/projects/codec_avatar/calibration/colorcal:colorspace`. To | |
| figure out a C++ / Py binding solution that works for both DGX and | |
| PROD, as well as `np.ndarray` and `th.Tensor`. | |
| Args: | |
| @param img the image in RGB, as th.Tensor or np.ndarray | |
| @param dim dimension of color channel | |
| @param ccm 3x3 color correction matrix | |
| @param dc_offset camera black level/dc offset | |
| @param gamma encoding gamma | |
| Returns: | |
| @return the corrected image as float th.Tensor or np.ndarray | |
| """ | |
| assert img.shape[dim] == 3 | |
| if dim == -1: | |
| dim = len(img.shape) - 1 | |
| ndim: int = img.dim() if th.is_tensor(img) else img.ndim | |
| pixel_shape: List[int] = [3 if i == dim else 1 for i in range(ndim)] | |
| # Summation indices for CCM matrix multiplication | |
| # e.g. [sum_j] CCM_ij * Img_kljnpq -> ImgCorr_klinpq if say, dim == 2 | |
| ein_ccm: List[int] = [0, 1] | |
| ein_inp: List[int] = [1 if i == dim else i + 2 for i in range(ndim)] | |
| ein_out: List[int] = [0 if i == dim else i + 2 for i in range(ndim)] | |
| EPS: float = 1e-7 | |
| if isinstance(img, th.Tensor): | |
| if th.is_floating_point(img): | |
| input_saturated = img > (1.0 - EPS) | |
| imgf = img.double() | |
| else: | |
| input_saturated = img == 255 | |
| imgf = img.double() / 255.0 | |
| dc_offset = th.DoubleTensor(dc_offset).view(pixel_shape).to(img.device) | |
| img_linear = th.clamp( | |
| imgf - dc_offset, | |
| min=EPS, | |
| ).pow(1.0 / gamma) | |
| img_corr = th.clamp( # CCM * img_linear | |
| th.einsum(th.DoubleTensor(ccm).to(img.device), ein_ccm, img_linear, ein_inp, ein_out), | |
| min=0.0, | |
| max=1.0, | |
| ) | |
| img_corr = th.where(input_saturated, 1.0, img_corr) | |
| else: | |
| if np.issubdtype(img.dtype, np.floating): | |
| input_saturated = img > (1.0 - EPS) | |
| imgf = img.astype(float) | |
| else: | |
| input_saturated = img == 255 | |
| imgf = img.astype(float) / 255.0 | |
| dc_offset = np.array(dc_offset).reshape(pixel_shape) | |
| img_linear = np.clip(imgf - dc_offset, a_min=EPS, a_max=None) ** (1.0 / gamma) | |
| img_corr: np.ndarray = np.clip( # CCM * img_linear | |
| np.einsum(np.array(ccm), ein_ccm, img_linear, ein_inp, ein_out), | |
| a_min=0.0, | |
| a_max=1.0, | |
| ) | |
| img_corr: np.ndarray = np.where(input_saturated, 1.0, img_corr) | |
| return img_corr | |
| def mapped2srgb( | |
| img: th.Tensor, | |
| dim: int = -1, | |
| ccm: Union[List[List[float]], th.Tensor, np.ndarray] = DEFAULT_CCM, | |
| dc_offset: Union[List[float], th.Tensor, np.ndarray] = DEFAULT_DC_OFFSET, | |
| gamma: float = DEFAULT_GAMMA, | |
| ) -> th.Tensor: | |
| ... | |
| def mapped2srgb( | |
| img: np.ndarray, | |
| dim: int = -1, | |
| ccm: Union[List[List[float]], th.Tensor, np.ndarray] = DEFAULT_CCM, | |
| dc_offset: Union[List[float], th.Tensor, np.ndarray] = DEFAULT_DC_OFFSET, | |
| gamma: float = DEFAULT_GAMMA, | |
| ) -> np.ndarray: | |
| ... | |
| def mapped2srgb( | |
| img: Union[th.Tensor, np.ndarray], | |
| dim: int = -1, | |
| ccm: Union[List[List[float]], th.Tensor, np.ndarray] = DEFAULT_CCM, | |
| dc_offset: Union[List[float], th.Tensor, np.ndarray] = DEFAULT_DC_OFFSET, | |
| gamma: float = DEFAULT_GAMMA, | |
| ) -> Union[th.Tensor, np.ndarray]: | |
| """Maps a previously-characterized camera color space into sRGB co- | |
| lor space (assuming mapped to Rec709). IMPORTANT: This function | |
| assumes RGB channel order, not BGR. | |
| The characterization is specified by `ccm`, `dc_offset`, and `gamma`. | |
| The dimension index of the color channel is specified with `dim` | |
| (default is -1 i.e. last dimension.) | |
| """ | |
| # Note: The redundant if-statement below is due to a Pyre bug. | |
| # Currently Pyre fails to handle arguments into overloaded functions that are typed | |
| # as a union of the overloaded method parameter types. | |
| if isinstance(img, th.Tensor): | |
| return linear2srgb(mapped2linear(img, dim, ccm, dc_offset, gamma), gamma=2.4) | |
| else: | |
| return linear2srgb(mapped2linear(img, dim, ccm, dc_offset, gamma), gamma=2.4) | |
| def srgb2linear(img: th.Tensor, gamma: float = 2.4) -> th.Tensor: | |
| ... | |
| def srgb2linear(img: np.ndarray, gamma: float = 2.4) -> np.ndarray: | |
| ... | |
| def srgb2linear( | |
| img: Union[th.Tensor, np.ndarray], gamma: float = 2.4 | |
| ) -> Union[th.Tensor, np.ndarray]: | |
| linear_part = img / 12.92 # linear part of sRGB curve | |
| if isinstance(img, th.Tensor): | |
| # Note: The following combines the linear and exponential parts of the sRGB curve without | |
| # causing NaN values or gradients for negative inputs (where the curve would be linear). | |
| exp_part = th.pow((th.clamp(img, min=0.04045) + 0.055) / 1.055, gamma) | |
| return th.where(img <= 0.04045, linear_part, exp_part) | |
| else: | |
| exp_part = ((np.maximum(img, 0.04045) + 0.055) / 1.055) ** gamma | |
| return np.where(img <= 0.04045, linear_part, exp_part) | |
| def scale_diff_image(diff_img: th.Tensor) -> th.Tensor: | |
| """Takes a difference image returns a new version scaled s.t. its values | |
| are remapped from [-IMG_MAX, IMG_MAX] -> [0, IMG_MAX] where IMG_MAX is | |
| either 1 or 255 dpeending on the range of the input.""" | |
| mval = abs(diff_img).max().item() | |
| pix_range = (0, 128 if mval > 1 else 0.5, 255 if mval > 1 else 1) | |
| return (pix_range[1] * (diff_img / mval) + pix_range[1]).clamp(pix_range[0], pix_range[2]) | |
| class LaplacianTexture(th.nn.Module): | |
| def __init__( | |
| self, n_levels: int, n_channels: int = 3, init_scalar: Optional[float] = None | |
| ) -> None: | |
| super().__init__() | |
| self.n_levels = n_levels | |
| self.n_channels = n_channels | |
| if init_scalar is not None: | |
| init_scalar = init_scalar / n_levels | |
| pyr_texs = [] | |
| for level in range(n_levels): | |
| if init_scalar is not None: | |
| pyr_texs.append( | |
| th.nn.Parameter(init_scalar * th.ones(1, n_channels, 2**level, 2**level)) | |
| ) | |
| else: | |
| pyr_texs.append(th.nn.Parameter(th.zeros(1, n_channels, 2**level, 2**level))) | |
| self.pyr_texs = th.nn.ParameterList(pyr_texs) | |
| def forward(self) -> th.Tensor: | |
| tex = self.pyr_texs[0] | |
| for level in range(1, self.n_levels): | |
| tex = ( | |
| thf.interpolate(tex, scale_factor=2, mode="bilinear", align_corners=False) | |
| + self.pyr_texs[level] | |
| ) | |
| return tex | |
| def init_from_tex(self, tex: th.Tensor) -> None: | |
| ds = [tex] | |
| for level in range(1, self.n_levels): | |
| ds.append(thf.avg_pool2d(tex, 2**level)) | |
| ds = ds[::-1] | |
| self.pyr_texs[0].data[:] = ds[0].data | |
| for level in range(1, self.n_levels): | |
| self.pyr_texs[level].data[:] = ds[level].data - thf.interpolate( | |
| ds[level - 1].data, | |
| scale_factor=2, | |
| mode="bilinear", | |
| align_corners=False, | |
| ) | |
| def render_grad(self) -> th.Tensor: | |
| gtex = self.pyr_texs[0].grad | |
| for level in range(1, self.n_levels): | |
| gtex = ( | |
| thf.interpolate(gtex, scale_factor=2, mode="bilinear", align_corners=False) | |
| + self.pyr_texs[level].grad | |
| ) | |
| return gtex | |
| morph_cache: Dict[Tuple[int, th.device], th.Tensor] = {} | |
| def dilate(x: th.Tensor, ks: int) -> th.Tensor: | |
| assert (ks % 2) == 1 | |
| orig_dtype = x.dtype | |
| if x.dtype in [th.bool, th.int64, th.int32]: | |
| x = x.float() | |
| if x.dim() == 3: | |
| x = x[:, None] | |
| if (ks, x.device) in morph_cache: | |
| w = morph_cache[(ks, x.device)] | |
| else: | |
| w = th.ones(1, 1, ks, ks, device=x.device) | |
| morph_cache[(ks, x.device)] = w | |
| return (thf.conv2d(x, w, padding=ks // 2) > 0).to(dtype=orig_dtype) | |
| def erode(x: th.Tensor, ks: int) -> th.Tensor: | |
| if x.dtype is th.bool: | |
| flip_x = ~x | |
| else: | |
| flip_x = 1 - x | |
| flip_out = dilate(flip_x, ks) | |
| if flip_out.dtype is th.bool: | |
| return ~flip_out | |
| else: | |
| return 1 - flip_out | |
| def smoothstep(e0: np.ndarray, e1: np.ndarray, x: np.ndarray) -> np.ndarray: | |
| t = np.clip(((x - e0) / (e1 - e0)), 0, 1) | |
| return t * t * (3.0 - 2.0 * t) | |
| def smootherstep(e0: np.ndarray, e1: np.ndarray, x: np.ndarray) -> np.ndarray: | |
| t = np.clip(((x - e0) / (e1 - e0)), 0, 1) | |
| return (t**3) * (t * (t * 6 - 15) + 10) | |
| def tensor2rgbjet( | |
| tensor: th.Tensor, x_max: Optional[float] = None, x_min: Optional[float] = None | |
| ) -> np.ndarray: | |
| """Converts a tensor to an uint8 image Numpy array with `cv2.COLORMAP_JET` applied. | |
| Args: | |
| tensor: Input tensor to be converted. | |
| x_max: The output color will be normalized as (x-x_min)/(x_max-x_min)*255. | |
| x_max = tensor.max() if None is given. | |
| x_min: The output color will be normalized as (x-x_min)/(x_max-x_min)*255. | |
| x_min = tensor.min() if None is given. | |
| """ | |
| return cv2.applyColorMap(tensor2rgb(tensor, x_max=x_max, x_min=x_min), cv2.COLORMAP_JET) | |
| def tensor2rgb( | |
| tensor: th.Tensor, x_max: Optional[float] = None, x_min: Optional[float] = None | |
| ) -> np.ndarray: | |
| """Converts a tensor to an uint8 image Numpy array. | |
| Args: | |
| tensor: Input tensor to be converted. | |
| x_max: The output color will be normalized as (x-x_min)/(x_max-x_min)*255. | |
| x_max = tensor.max() if None is given. | |
| x_min: The output color will be normalized as (x-x_min)/(x_max-x_min)*255. | |
| x_min = tensor.min() if None is given. | |
| """ | |
| x = tensor.data.cpu().numpy() | |
| if x_min is None: | |
| x_min = x.min() | |
| if x_max is None: | |
| x_max = x.max() | |
| gain = 255 / np.clip(x_max - x_min, 1e-3, None) | |
| x = (x - x_min) * gain | |
| x = x.clip(0.0, 255.0) | |
| x = x.astype(np.uint8) | |
| return x | |
| def tensor2image( | |
| tensor: th.Tensor, | |
| x_max: Optional[float] = 1.0, | |
| x_min: Optional[float] = 0.0, | |
| mode: str = "rgb", | |
| mask: Optional[th.Tensor] = None, | |
| label: Optional[str] = None, | |
| ) -> np.ndarray: | |
| """Converts a tensor to an image. | |
| Args: | |
| tensor: Input tensor to be converted. | |
| The shape of the tensor should be CxHxW or HxW. The channels are assumed to be in RGB format. | |
| x_max: The output color will be normalized as (x-x_min)/(x_max-x_min)*255. | |
| x_max = tensor.max() if None is explicitly given. | |
| x_min: The output color will be normalized as (x-x_min)/(x_max-x_min)*255. | |
| x_min = tensor.min() if None is explicitly given. | |
| mode: Can be `rgb` or `jet`. If `jet` is given, cv2.COLORMAP_JET would be applied. | |
| mask: Optional mask to be applied to the input tensor. | |
| label: Optional text to be added to the output image. | |
| """ | |
| tensor = tensor.detach() | |
| # Apply mask | |
| if mask is not None: | |
| tensor = tensor * mask | |
| if len(tensor.size()) == 2: | |
| tensor = tensor[None] | |
| # Make three channel image | |
| assert len(tensor.size()) == 3, tensor.size() | |
| n_channels = tensor.shape[0] | |
| if n_channels == 1: | |
| tensor = tensor.repeat(3, 1, 1) | |
| elif n_channels != 3: | |
| raise ValueError(f"Unsupported number of channels {n_channels}.") | |
| # Convert to display format | |
| img = tensor.permute(1, 2, 0) | |
| if mode == "rgb": | |
| img = tensor2rgb(img, x_max=x_max, x_min=x_min) | |
| elif mode == "jet": | |
| # `cv2.applyColorMap` assumes input format in BGR | |
| img[:, :, :3] = img[:, :, [2, 1, 0]] | |
| img = tensor2rgbjet(img, x_max=x_max, x_min=x_min) | |
| # convert back to rgb | |
| img[:, :, :3] = img[:, :, [2, 1, 0]] | |
| else: | |
| raise ValueError(f"Unsupported mode {mode}.") | |
| if label is not None: | |
| img = add_label_centered(img, label) | |
| return img | |
| def add_label_centered( | |
| img: np.ndarray, | |
| text: str, | |
| font_scale: float = 1.0, | |
| thickness: int = 2, | |
| alignment: str = "top", | |
| color: Tuple[int, int, int] = (0, 255, 0), | |
| ) -> np.ndarray: | |
| """Adds label to an image | |
| Args: | |
| img: Input image. | |
| text: Text to be added on the image. | |
| font_scale: The scale of the font. | |
| thickness: Thinkness of the lines. | |
| alignment: Can be `top` or `buttom`. The alignment of the text. | |
| color: The color of the text. Assumes the same color space as `img`. | |
| """ | |
| font = cv2.FONT_HERSHEY_SIMPLEX | |
| textsize = cv2.getTextSize(text, font, font_scale, thickness=thickness)[0] | |
| img = img.astype(np.uint8).copy() | |
| if alignment == "top": | |
| cv2.putText( | |
| img, | |
| text, | |
| ((img.shape[1] - textsize[0]) // 2, 50), | |
| font, | |
| font_scale, | |
| color, | |
| thickness=thickness, | |
| lineType=cv2.LINE_AA, | |
| ) | |
| elif alignment == "bottom": | |
| cv2.putText( | |
| img, | |
| text, | |
| ((img.shape[1] - textsize[0]) // 2, img.shape[0] - textsize[1]), | |
| font, | |
| font_scale, | |
| color, | |
| thickness=thickness, | |
| lineType=cv2.LINE_AA, | |
| ) | |
| else: | |
| raise ValueError("Unknown text alignment") | |
| return img | |
| def get_color_map(name: str = "COLORMAP_JET") -> np.ndarray: | |
| """Return a 256 x 3 array representing a color map from OpenCV.""" | |
| color_map = np.arange(256, dtype=np.uint8).reshape(1, 256) | |
| color_map = cv2.applyColorMap(color_map, getattr(cv2, name)) | |
| return color_map[0, :, ::-1].copy() | |
| def feature2rgb(x: Union[th.Tensor, np.ndarray], scale: int = -1) -> np.ndarray: | |
| # expect 3 dim tensor | |
| b = (x[::3].sum(0)).data.cpu().numpy()[:, :, None] | |
| g = (x[1::3].sum(0)).data.cpu().numpy()[:, :, None] | |
| r = (x[2::3].sum(0)).data.cpu().numpy()[:, :, None] | |
| rgb = np.concatenate((b, g, r), axis=2) | |
| rgb_norm = (rgb - rgb.min()) / (rgb.max() - rgb.min()) | |
| rgb_norm = (rgb_norm * 255).astype(np.uint8) | |
| if scale != -1: | |
| rgb_norm = cv2.resize(rgb_norm, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) | |
| return rgb_norm | |
| def kpts2delta(kpts: th.Tensor, size: Sequence[int]) -> th.Tensor: | |
| # kpts: B x N x 2 | |
| # Return: B x N x H x W x 2, 2D vectors from each grid location to kpts. | |
| h, w = size | |
| grid = th.meshgrid( | |
| th.arange(h, dtype=kpts.dtype, device=kpts.device), | |
| th.arange(w, dtype=kpts.dtype, device=kpts.device), | |
| indexing="xy", | |
| ) | |
| delta = kpts.unflatten(-1, (1, 1, 2)) - th.stack(grid, dim=-1).unflatten(0, (1, 1, h)) | |
| return delta | |
| def kpts2heatmap(kpts: th.Tensor, size: Sequence[int], sigma: int = 7) -> th.Tensor: | |
| # kpts: B x N x 2 | |
| dist = kpts2delta(kpts, size).square().sum(-1) | |
| heatmap = th.exp(-dist / (2 * sigma**2)) | |
| return heatmap | |
| def make_image_grid( | |
| data: Union[th.Tensor, Dict[str, th.Tensor]], | |
| keys_to_draw: Optional[List[str]] = None, | |
| scale_factor: Optional[float] = None, | |
| draw_labels: bool = True, | |
| grid_size: Optional[Tuple[int, int]] = None, | |
| ) -> np.ndarray: | |
| """Arranges a tensor of images (or a dict with labeled image tensors) into | |
| a grid. | |
| Params: | |
| data: Either a single image tensor [N, {1, 3}, H, W] containing images to | |
| arrange in a grid layout, or a dict with tensors of the same shape. | |
| If a dict is given, assume each entry in the dict is a batch of | |
| images, and form a grid where each cell contains one sample from | |
| each entry in the dict. Images should be in the range [0, 255]. | |
| keys_to_draw: Select which keys in the dict should be included in each | |
| grid cell. If none are given, draw all keys. | |
| scale_factor: Optional scale factor applied to each image. | |
| draw_labels: Whether or not to draw the keys on each image. | |
| grid_size: Optionally specify the size of the resulting grid. | |
| """ | |
| if isinstance(data, th.Tensor): | |
| data = {"": data} | |
| keys_to_draw = [""] | |
| if keys_to_draw is None: | |
| keys_to_draw = list(data.keys()) | |
| n_cells = data[keys_to_draw[0]].shape[0] | |
| img_h = data[keys_to_draw[0]].shape[2] | |
| img_w = data[keys_to_draw[0]].shape[3] | |
| # Resize all images to match the shape of the first image, and convert | |
| # Greyscale -> RGB. | |
| for key in keys_to_draw: | |
| if data[key].shape[1] == 1: | |
| data[key] = data[key].expand(-1, 3, -1, -1) | |
| elif data[key].shape[1] != 3: | |
| raise ValueError( | |
| f"Image data must all be of shape [N, {1,3}, H, W]. Got shape {data[key].shape}." | |
| ) | |
| data[key] = data[key].clamp(min=0, max=255) | |
| if data[key].shape[2] != img_h or data[key].shape[3] != img_w: | |
| data[key] = thf.interpolate(data[key], size=(img_h, img_w), mode="area") | |
| if scale_factor is not None: | |
| data[key] = thf.interpolate(data[key], scale_factor=scale_factor, mode="area") | |
| # Make an image for each grid cell by labeling and concatenating a sample | |
| # from each key in the data. | |
| cell_imgs = [] | |
| for i in range(n_cells): | |
| imgs = [data[key][i].byte().cpu().numpy().transpose(1, 2, 0) for key in keys_to_draw] | |
| imgs = [np.ascontiguousarray(img) for img in imgs] | |
| if draw_labels: | |
| for img, label in zip(imgs, keys_to_draw): | |
| cv2.putText( | |
| img, label, (31, 31), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 2, cv2.LINE_AA | |
| ) | |
| cv2.putText( | |
| img, | |
| label, | |
| (30, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.75, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| cell_imgs.append(np.concatenate(imgs, axis=1)) | |
| cell_h, cell_w = cell_imgs[0].shape[:2] | |
| # Find the most-square grid layout. | |
| if grid_size is not None: | |
| gh, gw = grid_size | |
| if gh * gw < n_cells: | |
| raise ValueError( | |
| f"Requested grid size ({gh}, {gw}) (H, W) cannot hold {n_cells} images." | |
| ) | |
| else: | |
| best_diff = np.inf | |
| best_side = np.inf | |
| best_leftover = np.inf | |
| gw = 0 | |
| for gh_ in range(1, n_cells + 1): | |
| for gw_ in range(1, n_cells + 1): | |
| if gh_ * gw_ < n_cells: | |
| continue | |
| h = gh_ * cell_h | |
| w = gw_ * cell_w | |
| diff = abs(h - w) | |
| max_side = max(gh_, gw_) | |
| leftover = gh_ * gw_ - n_cells | |
| if diff <= best_diff and max_side <= best_side and leftover <= best_leftover: | |
| gh = gh_ | |
| gw = gw_ | |
| best_diff = diff | |
| best_side = max_side | |
| best_leftover = leftover | |
| # Put the images into the grid. | |
| img = np.zeros((gh * cell_h, gw * cell_w, 3), dtype=np.uint8) | |
| for i in range(n_cells): | |
| gr = i // gw | |
| gc = i % gw | |
| img[gr * cell_h : (gr + 1) * cell_h, gc * cell_w : (gc + 1) * cell_w] = cell_imgs[i] | |
| return img | |
| def make_image_grid_batched( | |
| data: Dict[str, th.Tensor], | |
| max_row_hight: Optional[int] = None, | |
| draw_labels: bool = True, | |
| input_is_in_0_1: bool = False, | |
| ) -> np.ndarray: | |
| """A simpler version of `make_image_grid` that works for the whole batch at once. | |
| Usecase: A dict containing diagnostic output. All tensors in the dict have a shape of [N, {1, 3}, H, W] | |
| where N concides for all entries. The goal is to arranges images into a grid so that each column | |
| corrensponds to a key, and each row corrensponds to an index in batch. | |
| Example: | |
| Data: | |
| dict = {"A": A, "B": B, "C": C} | |
| Grid: | |
| | A[0] | B[0] | C[0] | | |
| | A[1] | B[1] | C[1] | | |
| | A[2] | B[2] | C[2] | | |
| The the grid will be aranged such way, that: | |
| - Each row corrensponds to an index in the batch. | |
| - Each column corrensponds to a key in the dict | |
| - For each row, images are resize such that the vertical edge matches the largest image | |
| Args: | |
| data (Dict[str, th.Tensor]): Diagnostic data. | |
| max_row_hight (int): The maximum allowed hight of a row. | |
| draw_labels (bool): Whether the keys should be drawn as labels | |
| input_is_in_0_1 (bool): If true, input data is assumed to be in range 0..1 otherwise in range 0..255 | |
| """ | |
| data_list = list(data.values()) | |
| keys_to_draw = data.keys() | |
| if not all(x.ndim == 4 and (x.shape[1] == 1 or x.shape[1] == 3) for x in data_list): | |
| raise ValueError( | |
| f"Image data must all be of shape [N, {1, 3}, H, W]. Got shapes {[x.shape for x in data_list]}." | |
| ) | |
| if not all(x.shape[0] == data_list[0].shape[0] for x in data_list): | |
| raise ValueError("Batch sizes must be the same.") | |
| data_list = resize_to_match(data_list, edge="vertical", max_size=max_row_hight) | |
| if not all(x.shape[2] == data_list[0].shape[2] for x in data_list): | |
| raise ValueError("Heights must be the same.") | |
| with th.no_grad(): | |
| # Make all images contain 3 channels | |
| data_list = [x.expand(-1, 3, -1, -1) if x.shape[1] == 1 else x for x in data_list] | |
| # Convert to byte | |
| scale = 255.0 if input_is_in_0_1 else 1.0 | |
| data_list = [x.mul(scale).round().clamp(min=0, max=255).byte() for x in data_list] | |
| # Convert to numpy and make it BHWC | |
| data_list = [x.cpu().numpy().transpose(0, 2, 3, 1) for x in data_list] | |
| rows = [] | |
| # Iterate by key | |
| for j, label in zip(range(len(data_list)), keys_to_draw): | |
| col = [] | |
| # Iterate by batch index | |
| for i in range(data_list[0].shape[0]): | |
| img = np.ascontiguousarray(data_list[j][i]) | |
| if draw_labels: | |
| cv2.putText( | |
| img, label, (31, 31), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 2, cv2.LINE_AA | |
| ) | |
| cv2.putText( | |
| img, | |
| label, | |
| (30, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.75, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| col.append(img) | |
| rows.append(np.concatenate(col, axis=0)) | |
| return np.concatenate(rows, axis=1) | |
| def resize_to_match( | |
| tensors: List[th.Tensor], | |
| edge: str = "long", | |
| mode: str = "nearest", | |
| max_size: Optional[int] = None, | |
| ) -> List[th.Tensor]: | |
| """Resizes a list of image tensors s.t. a chosen edge ("long", "short", "vertical", or "horizontal") | |
| matches that edge on the largest image in the list.""" | |
| assert edge in {"short", "long", "vertical", "horizontal"} | |
| max_shape = [max(x) for x in zip(*[t.shape for t in tensors])] | |
| resized_tensors = [] | |
| for tensor in tensors: | |
| if edge == "long": | |
| edge_idx = np.argmax(tensor.shape[-2:]) | |
| elif edge == "short": | |
| edge_idx = np.argmin(tensor.shape[-2:]) | |
| elif edge == "vertical": | |
| edge_idx = 0 | |
| else: # edge == "horizontal": | |
| edge_idx = 1 | |
| target_size = max_shape[-2:][edge_idx] | |
| if max_size is not None: | |
| target_size = min(max_size, max_shape[-2:][edge_idx]) | |
| if tensor.shape[-2:][edge_idx] != target_size: | |
| ratio = target_size / tensor.shape[-2:][edge_idx] | |
| tensor = thf.interpolate( | |
| tensor, | |
| scale_factor=ratio, | |
| align_corners=False if mode in ["bilinear", "bicubic"] else None, | |
| recompute_scale_factor=True, | |
| mode=mode, | |
| ) | |
| resized_tensors.append(tensor) | |
| return resized_tensors | |
| def draw_text( | |
| canvas: th.Tensor, | |
| text: str, | |
| loc: Tuple[float, float], | |
| font: int = cv2.FONT_HERSHEY_SIMPLEX, | |
| scale: float = 2, | |
| color: Tuple[float, float, float] = (0, 0, 0), | |
| thickness: float = 3, | |
| ) -> th.Tensor: | |
| """Helper used by Rosetta to draw text on tensors using OpenCV.""" | |
| device = canvas.device | |
| canvas_new = canvas.cpu().numpy().transpose(0, 2, 3, 1) | |
| for i in range(canvas_new.shape[0]): | |
| image = canvas_new[i].copy() | |
| if isinstance(text, list): | |
| cv2.putText(image, text[i], loc, font, scale, color, thickness) | |
| else: | |
| cv2.putText(image, text, loc, font, scale, color, thickness) | |
| canvas_new[i] = image | |
| canvas_tensor = th.ByteTensor(canvas_new.transpose(0, 3, 1, 2)).to(device) | |
| return canvas_tensor | |
| # TODO(T153410551): Deprecate this function | |
| def visualize_scalar_image( | |
| img: np.ndarray, | |
| min_val: float, | |
| val_range: float, | |
| color_map: int = cv2.COLORMAP_JET, | |
| convert_to_rgb: bool = True, | |
| ) -> np.ndarray: | |
| """ | |
| Visualizes a scalar image using specified color map. | |
| """ | |
| scaled_img = (img.astype(np.float32) - min_val) / val_range | |
| vis = cv2.applyColorMap((scaled_img * 255).clip(0, 255).astype(np.uint8), color_map) | |
| if convert_to_rgb: | |
| vis = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB) | |
| return vis | |
| def process_depth_image( | |
| depth_img: np.ndarray, depth_min: float, depth_max: float, depth_err_range: float | |
| ) -> Tuple[np.ndarray, np.ndarray]: | |
| """ | |
| Process the depth image within the range for visualization. | |
| """ | |
| valid_pixels = np.logical_and(depth_img > 0, depth_img <= depth_max) | |
| new_depth_img = np.zeros_like(depth_img) | |
| new_depth_img[valid_pixels] = depth_img[valid_pixels] | |
| err_image = np.abs(new_depth_img - depth_img).astype(np.float32) / depth_err_range | |
| return new_depth_img, err_image | |
| def draw_keypoints(img: np.ndarray, kpt: np.ndarray, kpt_w: float) -> np.ndarray: | |
| """ | |
| Draw Keypoints on given image. | |
| """ | |
| x, y = kpt[:, 0], kpt[:, 1] | |
| w = kpt[:, 2] * kpt_w | |
| col = np.array([-255.0, 255.0, -255.0]) * w[:, np.newaxis] | |
| pts = np.column_stack((x.astype(np.int32), y.astype(np.int32))) | |
| for pt, c in zip(pts, col): | |
| cv2.circle(img, tuple(pt), 2, tuple(c), -1) | |
| return img | |
| def tensor_to_rgb_array(tensor: th.Tensor) -> np.ndarray: | |
| """Moves channels dimension to the end of tensor. | |
| Makes it more suitable for visualizations. | |
| """ | |
| return tensor.permute(0, 2, 3, 1).detach().cpu().numpy() | |
| def draw_keypoints_with_color( | |
| image: np.ndarray, keypoints_uvw: np.ndarray, color: Color | |
| ) -> np.ndarray: | |
| """Renders keypoints onto a given image with particular color. | |
| Supports overlaps. | |
| """ | |
| assert len(image.shape) == 3 | |
| assert image.shape[-1] == 3 | |
| coords = keypoints_uvw[:, :2].astype(np.int32) | |
| tmp_img = np.zeros(image.shape, dtype=np.float32) | |
| for uv in coords: | |
| cv2.circle(tmp_img, tuple(uv), 2, color, -1) | |
| return (image + tmp_img).clip(0.0, 255.0).astype(np.uint8) | |
| def draw_contour(img: np.ndarray, contour_corrs: np.ndarray) -> np.ndarray: | |
| """ | |
| Draw Contour on given image. | |
| """ | |
| for corr in contour_corrs: | |
| mesh_uv = corr[1:3] | |
| seg_uv = corr[3:] | |
| x, y = int(mesh_uv[0] + 0.5), int(mesh_uv[1] + 0.5) | |
| cv2.circle(img, (x, y), 1, (255, 0, 0), -1) | |
| cv2.line( | |
| img, | |
| (int(mesh_uv[0]), int(mesh_uv[1])), | |
| (int(seg_uv[0]), int(seg_uv[1])), | |
| (-255, -255, 255), | |
| 1, | |
| ) | |
| return img | |