import cv2 import numpy as np import torch import torch.nn.functional as F from einops import einsum import genmo.utils.matrix as matrix from genmo.utils.pylogger import Log from genmo.utils.rotation_conversions import ( euler_angles_to_matrix, matrix_to_quaternion, matrix_to_rotation_6d, quaternion_to_axis_angle, ) from genmo.utils.so3 import so3_exp_map, so3_log_map from third_party.GVHMR.hmr4d.utils.geo.quaternion import qbetween def homo_points(points): """ Args: points: (..., C) Returns: (..., C+1), with 1 padded """ return F.pad(points, [0, 1], value=1.0) def apply_Ts_on_seq_points(points, Ts): """ perform translation matrix on related point Args: points: (..., N, 3) Ts: (..., N, 4, 4) Returns: (..., N, 3) """ points = ( torch.torch.einsum("...ki,...i->...k", Ts[..., :3, :3], points) + Ts[..., :3, 3] ) return points def apply_T_on_points(points, T): """ Args: points: (..., N, 3) T: (..., 4, 4) Returns: (..., N, 3) """ points_T = ( torch.einsum("...ki,...ji->...jk", T[..., :3, :3], points) + T[..., None, :3, 3] ) return points_T def T_transforms_points(T, points, pattern): """manual mode of apply_T_on_points T: (..., 4, 4) points: (..., 3) pattern: "... c d, ... d -> ... c" """ return einsum(T, homo_points(points), pattern)[..., :3] def project_p2d(points, K=None, is_pinhole=True): """ Args: points: (..., (N), 3) K: (..., 3, 3) Returns: shape is similar to points but without z """ points = points.clone() if is_pinhole: z = points[..., [-1]] z.masked_fill_(z.abs() < 1e-6, 1e-6) points_proj = points / z else: # orthogonal points_proj = F.pad(points[..., :2], (0, 1), value=1) if K is not None: # Handle N if len(points_proj.shape) == len(K.shape): p2d_h = torch.einsum("...ki,...ji->...jk", K, points_proj) else: p2d_h = torch.einsum("...ki,...i->...k", K, points_proj) else: p2d_h = points_proj[..., :2] return p2d_h[..., :2] def gen_uv_from_HW(H, W, device="cpu"): """Returns: (H, W, 2), as float. Note: uv not ij""" grid_v, grid_u = torch.meshgrid(torch.arange(H), torch.arange(W)) return ( torch.stack( [grid_u, grid_v], dim=-1, ) .float() .to(device) ) # (H, W, 2) def unproject_p2d(uv, z, K): """we assume a pinhole camera for unprojection uv: (B, N, 2) z: (B, N, 1) K: (B, 3, 3) Returns: (B, N, 3) """ xy_atz1 = (uv - K[:, None, :2, 2]) / K[:, None, [0, 1], [0, 1]] # (B, N, 2) xyz = torch.cat([xy_atz1 * z, z], dim=-1) return xyz def cvt_p2d_from_i_to_c(uv, K): """ Args: uv: (..., 2) or (..., N, 2) K: (..., 3, 3) Returns: the same shape as input uv """ if len(uv.shape) == len(K.shape): xy = (uv - K[..., None, :2, 2]) / K[..., None, [0, 1], [0, 1]] else: # without N xy = (uv - K[..., :2, 2]) / K[..., [0, 1], [0, 1]] return xy def cvt_to_bi01_p2d(p2d, bbx_lurb): """ p2d: (..., (N), 2) bbx_lurb: (..., 4) """ if len(p2d.shape) == len(bbx_lurb.shape) + 1: bbx_lurb = bbx_lurb[..., None, :] bbx_wh = bbx_lurb[..., 2:] - bbx_lurb[..., :2] bi01_p2d = (p2d - bbx_lurb[..., :2]) / bbx_wh return bi01_p2d def cvt_from_bi01_p2d(bi01_p2d, bbx_lurb): """Use bbx_lurb to resize bi01_p2d to p2d (image-coordinates) Args: p2d: (..., 2) or (..., N, 2) bbx_lurb: (..., 4) Returns: p2d: shape is the same as input """ bbx_wh = bbx_lurb[..., 2:] - bbx_lurb[..., :2] # (..., 2) if len(bi01_p2d.shape) == len(bbx_wh.shape) + 1: p2d = (bi01_p2d * bbx_wh.unsqueeze(-2)) + bbx_lurb[..., None, :2] else: p2d = (bi01_p2d * bbx_wh) + bbx_lurb[..., :2] return p2d def cvt_p2d_from_bi01_to_c(bi01, bbxs_lurb, Ks): """ Args: bi01: (..., (N), 2), value in range (0,1), the point in the bbx image bbxs_lurb: (..., 4) Ks: (..., 3, 3) Returns: c: (..., (N), 2) """ i = cvt_from_bi01_p2d(bi01, bbxs_lurb) c = cvt_p2d_from_i_to_c(i, Ks) return c def cvt_p2d_from_pm1_to_i(p2d_pm1, bbx_xys): """ Args: p2d_pm1: (..., (N), 2), value in range (-1,1), the point in the bbx image bbx_xys: (..., 3) Returns: p2d: (..., (N), 2) """ return bbx_xys[..., :2] + p2d_pm1 * bbx_xys[..., [2]] / 2 def uv2l_index(uv, W): return uv[..., 0] + uv[..., 1] * W def l2uv_index(L, W): v = torch.div(L, W, rounding_mode="floor") u = L % W return torch.stack([u, v], dim=-1) def transform_mat(R, t): """ Args: R: Bx3x3 array of a batch of rotation matrices t: Bx3x(1) array of a batch of translation vectors Returns: T: Bx4x4 Transformation matrix """ # No padding left or right, only add an extra row if len(R.shape) > len(t.shape): t = t[..., None] return torch.cat([F.pad(R, [0, 0, 0, 1]), F.pad(t, [0, 0, 0, 1], value=1)], dim=-1) def axis_angle_to_matrix_exp_map(aa): """use pytorch3d so3_exp_map Args: aa: (*, 3) Returns: R: (*, 3, 3) """ print("Use pytorch3d.transforms.axis_angle_to_matrix instead!!!") ori_shape = aa.shape[:-1] return so3_exp_map(aa.reshape(-1, 3)).reshape(*ori_shape, 3, 3) def matrix_to_axis_angle_log_map(R): """use pytorch3d so3_log_map Args: aa: (*, 3, 3) Returns: R: (*, 3) """ print( "WARINING! I met singularity problem with this function, use matrix_to_axis_angle instead!" ) ori_shape = R.shape[:-2] return so3_log_map(R.reshape(-1, 3, 3)).reshape(*ori_shape, 3) def matrix_to_axis_angle(R): """use pytorch3d so3_log_map Args: aa: (*, 3, 3) Returns: R: (*, 3) """ return quaternion_to_axis_angle(matrix_to_quaternion(R)) def ransac_PnP(K, pts_2d, pts_3d, err_thr=10): """solve pnp""" dist_coeffs = np.zeros(shape=[8, 1], dtype="float64") pts_2d = np.ascontiguousarray(pts_2d.astype(np.float64)) pts_3d = np.ascontiguousarray(pts_3d.astype(np.float64)) K = K.astype(np.float64) try: _, rvec, tvec, inliers = cv2.solvePnPRansac( pts_3d, pts_2d, K, dist_coeffs, reprojectionError=err_thr, iterationsCount=10000, flags=cv2.SOLVEPNP_EPNP, ) rotation = cv2.Rodrigues(rvec)[0] pose = np.concatenate([rotation, tvec], axis=-1) pose_homo = np.concatenate([pose, np.array([[0, 0, 0, 1]])], axis=0) inliers = [] if inliers is None else inliers return pose, pose_homo, inliers except cv2.error: print("CV ERROR") return np.eye(4)[:3], np.eye(4), [] def ransac_PnP_batch(K_raw, pts_2d, pts_3d, err_thr=10): fit_R, fit_t = [], [] for b in range(K_raw.shape[0]): pose, _, inliers = ransac_PnP(K_raw[b], pts_2d[b], pts_3d[b], err_thr=err_thr) fit_R.append(pose[:3, :3]) fit_t.append(pose[:3, 3]) fit_R = np.stack(fit_R, axis=0) fit_t = np.stack(fit_t, axis=0) return fit_R, fit_t def get_nearby_points(points, query_verts, padding=0.0, p=1): import pytorch3d.ops.knn as knn """ points: (S, 3) query_verts: (V, 3) """ if p == 1: max_xyz = query_verts.max(0)[0] + padding min_xyz = query_verts.min(0)[0] - padding idx = ( ( ((points - min_xyz) > 0).all(dim=-1) * ((points - max_xyz) < 0).all(dim=-1) ) .nonzero() .squeeze(-1) ) nearby_points = points[idx] elif p == 2: squared_dist, _, _ = knn.knn_points( points[None], query_verts[None], K=1, return_nn=False ) mask = squared_dist[0, :, 0] < padding**2 # (S,) nearby_points = points[mask] return nearby_points def unproj_bbx_to_fst(bbx_lurb, K, near_z=0.5, far_z=12.5): B = bbx_lurb.size(0) uv = bbx_lurb[:, [[0, 1], [2, 1], [2, 3], [0, 3], [0, 1], [2, 1], [2, 3], [0, 3]]] if isinstance(near_z, float): z = uv.new([near_z] * 4 + [far_z] * 4).reshape(1, 8, 1).repeat(B, 1, 1) else: z = torch.cat( [ near_z[:, None, None].repeat(1, 4, 1), far_z[:, None, None].repeat(1, 4, 1), ], dim=1, ) c_frustum_points = unproject_p2d(uv, z, K) # (B, 8, 3) return c_frustum_points def convert_bbx_xys_to_lurb(bbx_xys): """ Args: bbx_xys (..., 3) -> bbx_lurb (..., 4) """ size = bbx_xys[..., 2:] center = bbx_xys[..., :2] lurb = torch.cat([center - size / 2, center + size / 2], dim=-1) return lurb def convert_lurb_to_bbx_xys(bbx_lurb): """ Args: bbx_lurb (..., 4) -> bbx_xys (..., 3) be aware that it is squared """ size = (bbx_lurb[..., 2:] - bbx_lurb[..., :2]).max(-1, keepdim=True)[0] center = (bbx_lurb[..., :2] + bbx_lurb[..., 2:]) / 2 return torch.cat([center, size], dim=-1) def get_bbx_xys( i_j2d, i_j2d_mask=None, bbx_ratio=[192, 256], do_augment=False, base_enlarge=1.2 ): """ Args: i_j2d: (B, L, J, 3) [x,y,c] or (B, L, J, 2) [x,y] i_j2d_mask: (B, L, J) boolean mask indicating valid joints, if None use all joints bbx_ratio: [width, height] ratio for the bounding box do_augment: whether to apply random augmentation base_enlarge: factor to enlarge the bounding box Returns: bbx_xys: (B, L, 3) [center_x, center_y, size] """ # Apply mask if provided if i_j2d_mask is not None: # Create a masked version of i_j2d for min/max calculations # For min calculation, set masked-out joints to large positive values # For max calculation, set masked-out joints to large negative values mask_expanded = i_j2d_mask.unsqueeze(-1) # (B, L, J, 1) # Create copies for min and max calculations i_j2d_for_min = i_j2d.clone() i_j2d_for_max = i_j2d.clone() # Set coordinates of masked joints appropriately invalid_mask = ~mask_expanded.expand_as(i_j2d[..., :2]) i_j2d_for_min[..., :2][invalid_mask] = float( "inf" ) # For min, set to large positive i_j2d_for_max[..., :2][invalid_mask] = float( "-inf" ) # For max, set to large negative # Calculate min/max using the filtered joints min_x = i_j2d_for_min[..., 0].min(-1)[0] max_x = i_j2d_for_max[..., 0].max(-1)[0] min_y = i_j2d_for_min[..., 1].min(-1)[0] max_y = i_j2d_for_max[..., 1].max(-1)[0] else: # Use all joints min_x = i_j2d[..., 0].min(-1)[0] max_x = i_j2d[..., 0].max(-1)[0] min_y = i_j2d[..., 1].min(-1)[0] max_y = i_j2d[..., 1].max(-1)[0] center_x = (min_x + max_x) / 2 center_y = (min_y + max_y) / 2 # Size h = max_y - min_y # (B, L) w = max_x - min_x # (B, L) if True: # fit w and h into aspect-ratio aspect_ratio = bbx_ratio[0] / bbx_ratio[1] mask1 = w > aspect_ratio * h h[mask1] = w[mask1] / aspect_ratio mask2 = w < aspect_ratio * h w[mask2] = h[mask2] * aspect_ratio # apply a common factor to enlarge the bounding box bbx_size = torch.max(h, w) * base_enlarge if do_augment: B, L = bbx_size.shape[:2] device = bbx_size.device if True: scaleFactor = torch.rand((B, L), device=device) * 0.3 + 1.05 # 1.05~1.35 txFactor = torch.rand((B, L), device=device) * 1.6 - 0.8 # -0.8~0.8 tyFactor = torch.rand((B, L), device=device) * 1.6 - 0.8 # -0.8~0.8 else: scaleFactor = torch.rand((B, 1), device=device) * 0.3 + 1.05 # 1.05~1.35 txFactor = torch.rand((B, 1), device=device) * 1.6 - 0.8 # -0.8~0.8 tyFactor = torch.rand((B, 1), device=device) * 1.6 - 0.8 # -0.8~0.8 raw_bbx_size = bbx_size / base_enlarge bbx_size = raw_bbx_size * scaleFactor center_x += raw_bbx_size / 2 * ((scaleFactor - 1) * txFactor) center_y += raw_bbx_size / 2 * ((scaleFactor - 1) * tyFactor) return torch.stack([center_x, center_y, bbx_size], dim=-1) def get_bbx_xys_from_xyxy(bbx_xyxy, base_enlarge=1.2): """ Args: bbx_xyxy: (N, 4) [x1, y1, x2, y2] Returns: bbx_xys: (N, 3) [center_x, center_y, size] """ i_p2d = torch.stack([bbx_xyxy[:, [0, 1]], bbx_xyxy[:, [2, 3]]], dim=1) # (L, 2, 2) bbx_xys = get_bbx_xys(i_p2d[None], base_enlarge=base_enlarge)[0] return bbx_xys def normalize_kp2d(obs_kp2d, bbx_xys, clamp_scale_min=False): """ Args: obs_kp2d: (B, L, J, 3) [x, y, c] bbx_xys: (B, L, 3) Returns: obs: (B, L, J, 3) [x, y, c] """ obs_xy = obs_kp2d[..., :2] # (B, L, J, 2) center = bbx_xys[..., :2] scale = bbx_xys[..., [2]] # Mark keypoints outside the bounding box as invisible xy_max = center + scale / 2 xy_min = center - scale / 2 invisible_mask = ( (obs_xy[..., 0] < xy_min[..., None, 0]) + (obs_xy[..., 0] > xy_max[..., None, 0]) + (obs_xy[..., 1] < xy_min[..., None, 1]) + (obs_xy[..., 1] > xy_max[..., None, 1]) ) scale = scale.clamp(min=1e-2) normalized_obs_xy = 2 * (obs_xy - center.unsqueeze(-2)) / scale.unsqueeze(-2) if obs_kp2d.shape[-1] > 2: obs_conf = obs_kp2d[..., 2] # (B, L, J) obs_conf = obs_conf * ~invisible_mask return torch.cat([normalized_obs_xy, obs_conf[..., None]], dim=-1) else: return normalized_obs_xy # ================== AZ/AY Transformations ================== # def compute_T_ayf2az(joints, inverse=False): """ Args: joints: (B, J, 3), in the start-frame, az-coordinate Returns: if inverse == False: T_af2az: (B, 4, 4) else : T_az2af: (B, 4, 4) """ t_ayf2az = joints[:, 0, :].detach().clone() t_ayf2az[:, 2] = 0 # do not modify z RL_xy_h = ( joints[:, 1, [0, 1]] - joints[:, 2, [0, 1]] ) # (B, 2), hip point to left side RL_xy_s = ( joints[:, 16, [0, 1]] - joints[:, 17, [0, 1]] ) # (B, 2), shoulder point to left side RL_xy = RL_xy_h + RL_xy_s I_mask = ( RL_xy.pow(2).sum(-1) < 1e-4 ) # do not rotate, when can't decided the face direction if I_mask.sum() > 0: Log.warn("{} samples can't decide the face direction".format(I_mask.sum())) x_dir = F.pad(F.normalize(RL_xy, 2, -1), (0, 1), value=0) # (B, 3) y_dir = torch.zeros_like(x_dir) y_dir[..., 2] = 1 z_dir = torch.cross(x_dir, y_dir, dim=-1) R_ayf2az = torch.stack([x_dir, y_dir, z_dir], dim=-1) # (B, 3, 3) R_ayf2az[I_mask] = torch.eye(3).to(R_ayf2az) if inverse: R_az2ayf = R_ayf2az.transpose(1, 2) # (B, 3, 3) t_az2ayf = -einsum(R_ayf2az, t_ayf2az, "b i j , b i -> b j") # (B, 3) return transform_mat(R_az2ayf, t_az2ayf) else: return transform_mat(R_ayf2az, t_ayf2az) def compute_T_ayfz2ay(joints, inverse=False): """ Args: joints: (B, J, 3), in the start-frame, ay-coordinate Returns: if inverse == False: T_ayfz2ay: (B, 4, 4) else : T_ay2ayfz: (B, 4, 4) """ t_ayfz2ay = joints[:, 0, :].detach().clone() t_ayfz2ay[:, 1] = 0 # do not modify y RL_xz_h = ( joints[:, 1, [0, 2]] - joints[:, 2, [0, 2]] ) # (B, 2), hip point to left side RL_xz_s = ( joints[:, 16, [0, 2]] - joints[:, 17, [0, 2]] ) # (B, 2), shoulder point to left side RL_xz = RL_xz_h + RL_xz_s I_mask = ( RL_xz.pow(2).sum(-1) < 1e-4 ) # do not rotate, when can't decided the face direction if I_mask.sum() > 0: Log.warn("{} samples can't decide the face direction".format(I_mask.sum())) x_dir = torch.zeros_like(t_ayfz2ay) # (B, 3) x_dir[:, [0, 2]] = F.normalize(RL_xz, 2, -1) y_dir = torch.zeros_like(x_dir) y_dir[..., 1] = 1 # (B, 3) z_dir = torch.cross(x_dir, y_dir, dim=-1) R_ayfz2ay = torch.stack([x_dir, y_dir, z_dir], dim=-1) # (B, 3, 3) R_ayfz2ay[I_mask] = torch.eye(3).to(R_ayfz2ay) if inverse: R_ay2ayfz = R_ayfz2ay.transpose(1, 2) t_ay2ayfz = -einsum(R_ayfz2ay, t_ayfz2ay, "b i j , b i -> b j") return transform_mat(R_ay2ayfz, t_ay2ayfz) else: return transform_mat(R_ayfz2ay, t_ayfz2ay) def compute_T_ay2ayrot(joints): """ Args: joints: (B, J, 3), in the start-frame, ay-coordinate Returns: T_ay2ayrot: (B, 4, 4) """ t_ayrot2ay = joints[:, 0, :].detach().clone() t_ayrot2ay[:, 1] = 0 # do not modify y B = joints.shape[0] euler_angle = torch.zeros((B, 3), device=joints.device) yrot_angle = torch.rand((B,), device=joints.device) * 2 * torch.pi euler_angle[:, 0] = yrot_angle R_ay2ayrot = euler_angles_to_matrix(euler_angle, "YXZ") # (B, 3, 3) R_ayrot2ay = R_ay2ayrot.transpose(1, 2) t_ay2ayrot = -einsum(R_ayrot2ay, t_ayrot2ay, "b i j , b i -> b j") return transform_mat(R_ay2ayrot, t_ay2ayrot) def compute_root_quaternion_ay(joints): """ Args: joints: (B, J, 3), in the start-frame, ay-coordinate Returns: root_quat: (B, 4) from z-axis to fz """ joints_shape = joints.shape joints = joints.reshape((-1,) + joints_shape[-2:]) t_ayfz2ay = joints[:, 0, :].detach().clone() t_ayfz2ay[:, 1] = 0 # do not modify y RL_xz_h = ( joints[:, 1, [0, 2]] - joints[:, 2, [0, 2]] ) # (B, 2), hip point to left side RL_xz_s = ( joints[:, 16, [0, 2]] - joints[:, 17, [0, 2]] ) # (B, 2), shoulder point to left side RL_xz = RL_xz_h + RL_xz_s I_mask = ( RL_xz.pow(2).sum(-1) < 1e-4 ) # do not rotate, when can't decided the face direction if I_mask.sum() > 0: Log.warn("{} samples can't decide the face direction".format(I_mask.sum())) x_dir = torch.zeros_like(t_ayfz2ay) # (B, 3) x_dir[:, [0, 2]] = F.normalize(RL_xz, 2, -1) y_dir = torch.zeros_like(x_dir) y_dir[..., 1] = 1 # (B, 3) z_dir = torch.cross(x_dir, y_dir, dim=-1) z_dir[..., 2] += 1e-9 pos_z_vec = torch.tensor([0, 0, 1]).to(joints.device).float() # (3,) root_quat = qbetween(pos_z_vec[None], z_dir) # (B, 4) root_quat = root_quat.reshape(joints_shape[:-2] + (4,)) return root_quat # ================== Transformations between two sets of features ================== # def similarity_transform_batch(S1, S2): """ Computes a similarity transform (sR, t) that solves the orthogonal Procrutes problem. Args: S1, S2: (*, L, 3) """ assert S1.shape == S2.shape S_shape = S1.shape S1 = S1.reshape(-1, *S_shape[-2:]) S2 = S2.reshape(-1, *S_shape[-2:]) S1 = S1.transpose(-2, -1) S2 = S2.transpose(-2, -1) # --- The code is borrowed from WHAM --- # 1. Remove mean. mu1 = S1.mean(axis=-1, keepdims=True) # axis is along N, S1(B, 3, N) mu2 = S2.mean(axis=-1, keepdims=True) X1 = S1 - mu1 X2 = S2 - mu2 # 2. Compute variance of X1 used for scale. var1 = torch.sum(X1**2, dim=1).sum(dim=1) # 3. The outer product of X1 and X2. K = X1.bmm(X2.permute(0, 2, 1)) # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are # singular vectors of K. U, s, V = torch.svd(K) # Construct Z that fixes the orientation of R to get det(R)=1. Z = torch.eye(U.shape[1], device=S1.device).unsqueeze(0) Z = Z.repeat(U.shape[0], 1, 1) Z[:, -1, -1] *= torch.sign(torch.det(U.bmm(V.permute(0, 2, 1)))) # Construct R. R = V.bmm(Z.bmm(U.permute(0, 2, 1))) # 5. Recover scale. scale = torch.cat([torch.trace(x).unsqueeze(0) for x in R.bmm(K)]) / var1 # 6. Recover translation. t = mu2 - (scale.unsqueeze(-1).unsqueeze(-1) * (R.bmm(mu1))) # ------- # reshape back # sR = scale[:, None, None] * R # sR = sR.reshape(*S_shape[:-2], 3, 3) scale = scale.reshape(*S_shape[:-2], 1, 1) R = R.reshape(*S_shape[:-2], 3, 3) t = t.reshape(*S_shape[:-2], 3, 1) return (scale, R), t def kabsch_algorithm_batch(X1, X2): """ Computes a rigid transform (R, t) Args: X1, X2: (*, L, 3) """ assert X1.shape == X2.shape X_shape = X1.shape X1 = X1.reshape(-1, *X_shape[-2:]) X2 = X2.reshape(-1, *X_shape[-2:]) # 1. 计算质心 centroid_X1 = torch.mean(X1, dim=-2, keepdim=True) centroid_X2 = torch.mean(X2, dim=-2, keepdim=True) # 2. 去中心化 X1_centered = X1 - centroid_X1 X2_centered = X2 - centroid_X2 # 3. 计算协方差矩阵 H = torch.matmul(X1_centered.transpose(-2, -1), X2_centered) # 4. 奇异值分解 U, S, Vt = torch.linalg.svd(H) # 5. 计算旋转矩阵 R = torch.matmul(Vt.transpose(-2, -1), U.transpose(-2, -1)) # 修正反射矩阵 d = (torch.det(R) < 0).unsqueeze(-1).unsqueeze(-1) Vt = torch.where(d, -Vt, Vt) R = torch.matmul(Vt.transpose(-2, -1), U.transpose(-2, -1)) # 6. 计算平移向量 t = centroid_X2.transpose(-2, -1) - torch.matmul(R, centroid_X1.transpose(-2, -1)) # ------- # reshape back R = R.reshape(*X_shape[:-2], 3, 3) t = t.reshape(*X_shape[:-2], 3, 1) return R, t # ===== WHAM cam_angvel ===== # def compute_cam_angvel(R_w2c, padding_last=True): """ R_w2c : (F, 3, 3) """ # R @ R0 = R1, so R = R1 @ R0^T cam_angvel = matrix_to_rotation_6d( R_w2c[1:] @ R_w2c[:-1].transpose(-1, -2) ) # (F-1, 6) # cam_angvel = (cam_angvel - torch.tensor([[1, 0, 0, 0, 1, 0]])) * FPS assert padding_last cam_angvel = torch.cat([cam_angvel, cam_angvel[-1:]], dim=0) # (F, 6) return cam_angvel.float() def compute_cam_tvel(t_w2c, padding_last=True): """ t_w2c : (F, 3) """ cam_tvel = t_w2c[1:] - t_w2c[:-1] assert padding_last cam_tvel = torch.cat([cam_tvel, cam_tvel[-1:]], dim=0) # (F, 3) return cam_tvel.float() def compute_cam_tcw2_vel(T_w2c, padding_last=True): """ T_w2c : (F, 4, 4) """ T_c2w = T_w2c.inverse() t_c2w = T_c2w[:, :3, 3] cam_tvel = t_c2w[1:] - t_c2w[:-1] assert padding_last cam_tvel = torch.cat([cam_tvel, cam_tvel[-1:]], dim=0) # (F, 3) return cam_tvel.float() def ransac_gravity_vec(xyz, num_iterations=100, threshold=0.05, verbose=False): # xyz: (L, 3) N = xyz.shape[0] max_inliers = [] # best_model = None norms = xyz.norm(dim=-1) # (L,) for _ in range(num_iterations): # random select a sample sample_index = np.random.randint(N) sample = xyz[sample_index] # (3,) # compute the angle difference between all points and the sample dot_product = (xyz * sample).sum(dim=-1) # (L,) angles = dot_product / norms * norms[sample_index] # (L,) angles = torch.clamp(angles, -1, 1) # prevent numerical errors angles = torch.acos(angles) # determine the inliers inliers = xyz[angles < threshold] if len(inliers) > len(max_inliers): max_inliers = inliers # best_model = sample if len(max_inliers) == N: break if verbose: print(f"Inliers: {len(max_inliers)} / {N}") result = max_inliers.mean(dim=0) return result, max_inliers def sequence_best_cammat(w_j3d, c_j3d, cam_rot): # get best camera estimation along the sequence, requires static camera # w_j3d: (L, J, 3) # c_j3d: (L, J, 3) # cam_rot: (L, 3, 3) L, J, _ = w_j3d.shape root_in_w = w_j3d[:, 0] # (L, 3) root_in_c = c_j3d[:, 0] # (L, 3) cam_mat = matrix.get_TRS(cam_rot, root_in_w) # (L, 4, 4) cam_pos = matrix.get_position_from(-root_in_c[:, None], cam_mat)[:, 0] # (L, 3) cam_mat = matrix.set_position(cam_mat, cam_pos) # (L, 4, 4) w_j3d_expand = w_j3d[None].expand(L, -1, -1, -1) # (L, L, J, 3) w_j3d_expand = w_j3d_expand.reshape(L, -1, 3) # (L, L*J, 3) # get reproject error w_j3d_expand_in_c = matrix.get_relative_position_to( w_j3d_expand, cam_mat ) # (L, L*J, 3) w_j2d_expand_in_c = project_p2d(w_j3d_expand_in_c) # (L, L*J, 2) w_j2d_expand_in_c = w_j2d_expand_in_c.reshape(L, L, J, 2) # (L, L, J, 2) c_j2d = project_p2d(c_j3d) # (L, J, 2) error = w_j2d_expand_in_c - c_j2d[None] # (L, L, J, 2) error = error.norm(dim=-1).mean(dim=-1) # (L, L) error = error.mean(dim=-1) # (L,) ind = error.argmin() return cam_mat[ind], ind def get_sequence_cammat(w_j3d, c_j3d, cam_rot): # w_j3d: (L, J, 3) # c_j3d: (L, J, 3) # cam_rot: (L, 3, 3) L, J, _ = w_j3d.shape root_in_w = w_j3d[:, 0] # (L, 3) root_in_c = c_j3d[:, 0] # (L, 3) cam_mat = matrix.get_TRS(cam_rot, root_in_w) # (L, 4, 4) cam_pos = matrix.get_position_from(-root_in_c[:, None], cam_mat)[:, 0] # (L, 3) cam_mat = matrix.set_position(cam_mat, cam_pos) # (L, 4, 4) return cam_mat def ransac_vec(vel, min_multiply=20, verbose=False): # xyz: (L, 3) # remove outlier velocity N = vel.shape[0] vel_1 = vel[None].expand(N, -1, -1) # (L, L, 3) vel_2 = vel[:, None].expand(-1, N, -1) # (L, L, 3) dist_mat = (vel_1 - vel_2).norm(dim=-1) # (L, L) big_identity = torch.eye(N, device=vel.device) * 1e6 dist_mat_ = dist_mat + big_identity threshold = dist_mat_.min() * min_multiply inner_mask = dist_mat < threshold # (L, L) inner_num = inner_mask.sum(dim=-1) # (L, ) ind = inner_num.argmax() result = vel[inner_mask[ind]].mean(dim=0) # (3,) if verbose: print(inner_mask[ind].sum().item()) return result, inner_mask[ind] def as_identity(R): is_I = matrix_to_axis_angle(R).norm(dim=-1) < 1e-5 R[is_I] = torch.eye(3)[None].expand(is_I.sum(), -1, -1).to(R) return R def normalize_T_w2c(T_w2c): if T_w2c.ndim == 2: T_w2c = T_w2c[None] L = T_w2c.shape[0] device = T_w2c.device norm_T_c2w = torch.eye(4)[None].repeat(L, 1, 1).to(device) T_c2w = T_w2c.inverse() R_c2w = as_identity(T_c2w[:, :3, :3]) t_c2w = T_c2w[:, :3, 3] # align the first frame R0_c2w = R_c2w[:1] t0_c2w = t_c2w[:1] norm_R_c2w = R0_c2w.mT @ R_c2w norm_t_c2w = (R0_c2w.mT @ (t_c2w - t0_c2w)[..., None])[..., 0] norm_T_c2w[:, :3, :3] = norm_R_c2w norm_T_c2w[:, :3, 3] = norm_t_c2w norm_T_w2c = norm_T_c2w.inverse() norm_T_w2c[:, :3, :3] = as_identity(norm_T_w2c[:, :3, :3]) norm_T_w2c[:, 3, :3] = 0 return norm_T_w2c