import torch import torch.nn.functional as F def rot6d2mat(x): """Convert 6D rotation representation to 3x3 rotation matrix. Based on Zhou et al., "On the Continuity of Rotation Representations in Neural Networks", CVPR 2019 """ device = x.device B = x.shape[0] a1 = x[:, 0:3] a2 = x[:, 3:6] b1 = F.normalize(a1) b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1) b3 = torch.cross(b1, b2, dim=1) rotMat = torch.stack((b1, b2, b3), dim=-1) # [B,3,3] return rotMat def quat2mat(quat): """Convert quaternion coefficients to rotation matrix. """ norm_quat = quat norm_quat = norm_quat / norm_quat.norm(p=2, dim=1, keepdim=True) w, x, y, z = norm_quat[:, 0], norm_quat[:, 1], norm_quat[:, 2], norm_quat[:, 3] B = quat.size(0) w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2) wx, wy, wz = w * x, w * y, w * z xy, xz, yz = x * y, x * z, y * z rotMat = torch.stack([w2 + x2 - y2 - z2, 2 * xy - 2 * wz, 2 * wy + 2 * xz, 2 * wz + 2 * xy, w2 - x2 + y2 - z2, 2 * yz - 2 * wx, 2 * xz - 2 * wy, 2 * wx + 2 * yz, w2 - x2 - y2 + z2], dim=1).view(B, 3, 3) return rotMat def mat2quat(x): # x: SE3 matrix in shape [B,4,4] trans = x[:,:3,3] rot = x[:,:3,:3] quat = mat2quat_transform(rot) return torch.cat([quat, trans], dim=1) def mat2quat_transform(rotation_matrix, eps=1e-6): """Convert 3x3 rotation matrix to 4d quaternion vector""" if not torch.is_tensor(rotation_matrix): raise TypeError("Input type is not a torch.Tensor. Got {}".format( type(rotation_matrix))) if len(rotation_matrix.shape) > 3: raise ValueError( "Input size must be a three dimensional tensor. Got {}".format( rotation_matrix.shape)) if not rotation_matrix.shape[-2:] == (3, 3): raise ValueError( "Input size must be a N x 3 x 4 tensor. Got {}".format( rotation_matrix.shape)) rmat_t = torch.transpose(rotation_matrix, 1, 2) mask_d2 = rmat_t[:, 2, 2] < eps mask_d0_d1 = rmat_t[:, 0, 0] > rmat_t[:, 1, 1] mask_d0_nd1 = rmat_t[:, 0, 0] < -rmat_t[:, 1, 1] t0 = 1 + rmat_t[:, 0, 0] - rmat_t[:, 1, 1] - rmat_t[:, 2, 2] q0 = torch.stack([rmat_t[:, 1, 2] - rmat_t[:, 2, 1], t0, rmat_t[:, 0, 1] + rmat_t[:, 1, 0], rmat_t[:, 2, 0] + rmat_t[:, 0, 2]], -1) t0_rep = t0.repeat(4, 1).t() t1 = 1 - rmat_t[:, 0, 0] + rmat_t[:, 1, 1] - rmat_t[:, 2, 2] q1 = torch.stack([rmat_t[:, 2, 0] - rmat_t[:, 0, 2], rmat_t[:, 0, 1] + rmat_t[:, 1, 0], t1, rmat_t[:, 1, 2] + rmat_t[:, 2, 1]], -1) t1_rep = t1.repeat(4, 1).t() t2 = 1 - rmat_t[:, 0, 0] - rmat_t[:, 1, 1] + rmat_t[:, 2, 2] q2 = torch.stack([rmat_t[:, 0, 1] - rmat_t[:, 1, 0], rmat_t[:, 2, 0] + rmat_t[:, 0, 2], rmat_t[:, 1, 2] + rmat_t[:, 2, 1], t2], -1) t2_rep = t2.repeat(4, 1).t() t3 = 1 + rmat_t[:, 0, 0] + rmat_t[:, 1, 1] + rmat_t[:, 2, 2] q3 = torch.stack([t3, rmat_t[:, 1, 2] - rmat_t[:, 2, 1], rmat_t[:, 2, 0] - rmat_t[:, 0, 2], rmat_t[:, 0, 1] - rmat_t[:, 1, 0]], -1) t3_rep = t3.repeat(4, 1).t() mask_c0 = mask_d2 * mask_d0_d1 mask_c1 = mask_d2 * ~mask_d0_d1 mask_c2 = ~mask_d2 * mask_d0_nd1 mask_c3 = ~mask_d2 * ~mask_d0_nd1 mask_c0 = mask_c0.view(-1, 1).type_as(q0) mask_c1 = mask_c1.view(-1, 1).type_as(q1) mask_c2 = mask_c2.view(-1, 1).type_as(q2) mask_c3 = mask_c3.view(-1, 1).type_as(q3) q = q0 * mask_c0 + q1 * mask_c1 + q2 * mask_c2 + q3 * mask_c3 q /= torch.sqrt(t0_rep * mask_c0 + t1_rep * mask_c1 + # noqa t2_rep * mask_c2 + t3_rep * mask_c3) # noqa q *= 0.5 return q def compute_scene_scale(cam_translations): """ Computes the scale for each scene based on camera translations. Args: cam_translations (Tensor): Tensor of shape [B, V, 3] representing camera translations for B scenes with V cameras per scene. Returns: Tensor: A tensor of shape [B] containing the scale for each scene. """ # Compute the camera center for each scene as the mean translation across all cameras cam_centers = cam_translations.mean(dim=1) # shape: [B, 3] # Compute the Euclidean distances from each camera to the scene's center # Expand cam_centers to [B, 1, 3] so that it can be broadcasted against cam_translations [B, V, 3] distances = torch.norm(cam_translations - cam_centers.unsqueeze(1), dim=2) # shape: [B, V] # The scale is the maximum distance from the center for each scene scene_scales = distances.max(dim=1)[0] # shape: [B] return scene_scales