|
|
import cv2 |
|
|
import numpy as np |
|
|
import torch |
|
|
import torch.nn.functional as F |
|
|
from einops import einsum |
|
|
|
|
|
import genmo.utils.matrix as matrix |
|
|
from genmo.utils.pylogger import Log |
|
|
from genmo.utils.rotation_conversions import ( |
|
|
euler_angles_to_matrix, |
|
|
matrix_to_quaternion, |
|
|
matrix_to_rotation_6d, |
|
|
quaternion_to_axis_angle, |
|
|
) |
|
|
from genmo.utils.so3 import so3_exp_map, so3_log_map |
|
|
from third_party.GVHMR.hmr4d.utils.geo.quaternion import qbetween |
|
|
|
|
|
|
|
|
def homo_points(points): |
|
|
""" |
|
|
Args: |
|
|
points: (..., C) |
|
|
Returns: (..., C+1), with 1 padded |
|
|
""" |
|
|
return F.pad(points, [0, 1], value=1.0) |
|
|
|
|
|
|
|
|
def apply_Ts_on_seq_points(points, Ts): |
|
|
""" |
|
|
perform translation matrix on related point |
|
|
Args: |
|
|
points: (..., N, 3) |
|
|
Ts: (..., N, 4, 4) |
|
|
Returns: (..., N, 3) |
|
|
""" |
|
|
points = ( |
|
|
torch.torch.einsum("...ki,...i->...k", Ts[..., :3, :3], points) + Ts[..., :3, 3] |
|
|
) |
|
|
return points |
|
|
|
|
|
|
|
|
def apply_T_on_points(points, T): |
|
|
""" |
|
|
Args: |
|
|
points: (..., N, 3) |
|
|
T: (..., 4, 4) |
|
|
Returns: (..., N, 3) |
|
|
""" |
|
|
points_T = ( |
|
|
torch.einsum("...ki,...ji->...jk", T[..., :3, :3], points) + T[..., None, :3, 3] |
|
|
) |
|
|
return points_T |
|
|
|
|
|
|
|
|
def T_transforms_points(T, points, pattern): |
|
|
"""manual mode of apply_T_on_points |
|
|
T: (..., 4, 4) |
|
|
points: (..., 3) |
|
|
pattern: "... c d, ... d -> ... c" |
|
|
""" |
|
|
return einsum(T, homo_points(points), pattern)[..., :3] |
|
|
|
|
|
|
|
|
def project_p2d(points, K=None, is_pinhole=True): |
|
|
""" |
|
|
Args: |
|
|
points: (..., (N), 3) |
|
|
K: (..., 3, 3) |
|
|
Returns: shape is similar to points but without z |
|
|
""" |
|
|
points = points.clone() |
|
|
if is_pinhole: |
|
|
z = points[..., [-1]] |
|
|
z.masked_fill_(z.abs() < 1e-6, 1e-6) |
|
|
points_proj = points / z |
|
|
else: |
|
|
points_proj = F.pad(points[..., :2], (0, 1), value=1) |
|
|
|
|
|
if K is not None: |
|
|
|
|
|
if len(points_proj.shape) == len(K.shape): |
|
|
p2d_h = torch.einsum("...ki,...ji->...jk", K, points_proj) |
|
|
else: |
|
|
p2d_h = torch.einsum("...ki,...i->...k", K, points_proj) |
|
|
else: |
|
|
p2d_h = points_proj[..., :2] |
|
|
|
|
|
return p2d_h[..., :2] |
|
|
|
|
|
|
|
|
def gen_uv_from_HW(H, W, device="cpu"): |
|
|
"""Returns: (H, W, 2), as float. Note: uv not ij""" |
|
|
grid_v, grid_u = torch.meshgrid(torch.arange(H), torch.arange(W)) |
|
|
return ( |
|
|
torch.stack( |
|
|
[grid_u, grid_v], |
|
|
dim=-1, |
|
|
) |
|
|
.float() |
|
|
.to(device) |
|
|
) |
|
|
|
|
|
|
|
|
def unproject_p2d(uv, z, K): |
|
|
"""we assume a pinhole camera for unprojection |
|
|
uv: (B, N, 2) |
|
|
z: (B, N, 1) |
|
|
K: (B, 3, 3) |
|
|
Returns: (B, N, 3) |
|
|
""" |
|
|
xy_atz1 = (uv - K[:, None, :2, 2]) / K[:, None, [0, 1], [0, 1]] |
|
|
xyz = torch.cat([xy_atz1 * z, z], dim=-1) |
|
|
return xyz |
|
|
|
|
|
|
|
|
def cvt_p2d_from_i_to_c(uv, K): |
|
|
""" |
|
|
Args: |
|
|
uv: (..., 2) or (..., N, 2) |
|
|
K: (..., 3, 3) |
|
|
Returns: the same shape as input uv |
|
|
""" |
|
|
if len(uv.shape) == len(K.shape): |
|
|
xy = (uv - K[..., None, :2, 2]) / K[..., None, [0, 1], [0, 1]] |
|
|
else: |
|
|
xy = (uv - K[..., :2, 2]) / K[..., [0, 1], [0, 1]] |
|
|
return xy |
|
|
|
|
|
|
|
|
def cvt_to_bi01_p2d(p2d, bbx_lurb): |
|
|
""" |
|
|
p2d: (..., (N), 2) |
|
|
bbx_lurb: (..., 4) |
|
|
""" |
|
|
if len(p2d.shape) == len(bbx_lurb.shape) + 1: |
|
|
bbx_lurb = bbx_lurb[..., None, :] |
|
|
|
|
|
bbx_wh = bbx_lurb[..., 2:] - bbx_lurb[..., :2] |
|
|
bi01_p2d = (p2d - bbx_lurb[..., :2]) / bbx_wh |
|
|
return bi01_p2d |
|
|
|
|
|
|
|
|
def cvt_from_bi01_p2d(bi01_p2d, bbx_lurb): |
|
|
"""Use bbx_lurb to resize bi01_p2d to p2d (image-coordinates) |
|
|
Args: |
|
|
p2d: (..., 2) or (..., N, 2) |
|
|
bbx_lurb: (..., 4) |
|
|
Returns: |
|
|
p2d: shape is the same as input |
|
|
""" |
|
|
bbx_wh = bbx_lurb[..., 2:] - bbx_lurb[..., :2] |
|
|
if len(bi01_p2d.shape) == len(bbx_wh.shape) + 1: |
|
|
p2d = (bi01_p2d * bbx_wh.unsqueeze(-2)) + bbx_lurb[..., None, :2] |
|
|
else: |
|
|
p2d = (bi01_p2d * bbx_wh) + bbx_lurb[..., :2] |
|
|
return p2d |
|
|
|
|
|
|
|
|
def cvt_p2d_from_bi01_to_c(bi01, bbxs_lurb, Ks): |
|
|
""" |
|
|
Args: |
|
|
bi01: (..., (N), 2), value in range (0,1), the point in the bbx image |
|
|
bbxs_lurb: (..., 4) |
|
|
Ks: (..., 3, 3) |
|
|
Returns: |
|
|
c: (..., (N), 2) |
|
|
""" |
|
|
i = cvt_from_bi01_p2d(bi01, bbxs_lurb) |
|
|
c = cvt_p2d_from_i_to_c(i, Ks) |
|
|
return c |
|
|
|
|
|
|
|
|
def cvt_p2d_from_pm1_to_i(p2d_pm1, bbx_xys): |
|
|
""" |
|
|
Args: |
|
|
p2d_pm1: (..., (N), 2), value in range (-1,1), the point in the bbx image |
|
|
bbx_xys: (..., 3) |
|
|
Returns: |
|
|
p2d: (..., (N), 2) |
|
|
""" |
|
|
return bbx_xys[..., :2] + p2d_pm1 * bbx_xys[..., [2]] / 2 |
|
|
|
|
|
|
|
|
def uv2l_index(uv, W): |
|
|
return uv[..., 0] + uv[..., 1] * W |
|
|
|
|
|
|
|
|
def l2uv_index(L, W): |
|
|
v = torch.div(L, W, rounding_mode="floor") |
|
|
u = L % W |
|
|
return torch.stack([u, v], dim=-1) |
|
|
|
|
|
|
|
|
def transform_mat(R, t): |
|
|
""" |
|
|
Args: |
|
|
R: Bx3x3 array of a batch of rotation matrices |
|
|
t: Bx3x(1) array of a batch of translation vectors |
|
|
Returns: |
|
|
T: Bx4x4 Transformation matrix |
|
|
""" |
|
|
|
|
|
if len(R.shape) > len(t.shape): |
|
|
t = t[..., None] |
|
|
return torch.cat([F.pad(R, [0, 0, 0, 1]), F.pad(t, [0, 0, 0, 1], value=1)], dim=-1) |
|
|
|
|
|
|
|
|
def axis_angle_to_matrix_exp_map(aa): |
|
|
"""use pytorch3d so3_exp_map |
|
|
Args: |
|
|
aa: (*, 3) |
|
|
Returns: |
|
|
R: (*, 3, 3) |
|
|
""" |
|
|
print("Use pytorch3d.transforms.axis_angle_to_matrix instead!!!") |
|
|
ori_shape = aa.shape[:-1] |
|
|
return so3_exp_map(aa.reshape(-1, 3)).reshape(*ori_shape, 3, 3) |
|
|
|
|
|
|
|
|
def matrix_to_axis_angle_log_map(R): |
|
|
"""use pytorch3d so3_log_map |
|
|
Args: |
|
|
aa: (*, 3, 3) |
|
|
Returns: |
|
|
R: (*, 3) |
|
|
""" |
|
|
print( |
|
|
"WARINING! I met singularity problem with this function, use matrix_to_axis_angle instead!" |
|
|
) |
|
|
ori_shape = R.shape[:-2] |
|
|
return so3_log_map(R.reshape(-1, 3, 3)).reshape(*ori_shape, 3) |
|
|
|
|
|
|
|
|
def matrix_to_axis_angle(R): |
|
|
"""use pytorch3d so3_log_map |
|
|
Args: |
|
|
aa: (*, 3, 3) |
|
|
Returns: |
|
|
R: (*, 3) |
|
|
""" |
|
|
return quaternion_to_axis_angle(matrix_to_quaternion(R)) |
|
|
|
|
|
|
|
|
def ransac_PnP(K, pts_2d, pts_3d, err_thr=10): |
|
|
"""solve pnp""" |
|
|
dist_coeffs = np.zeros(shape=[8, 1], dtype="float64") |
|
|
|
|
|
pts_2d = np.ascontiguousarray(pts_2d.astype(np.float64)) |
|
|
pts_3d = np.ascontiguousarray(pts_3d.astype(np.float64)) |
|
|
K = K.astype(np.float64) |
|
|
|
|
|
try: |
|
|
_, rvec, tvec, inliers = cv2.solvePnPRansac( |
|
|
pts_3d, |
|
|
pts_2d, |
|
|
K, |
|
|
dist_coeffs, |
|
|
reprojectionError=err_thr, |
|
|
iterationsCount=10000, |
|
|
flags=cv2.SOLVEPNP_EPNP, |
|
|
) |
|
|
|
|
|
rotation = cv2.Rodrigues(rvec)[0] |
|
|
|
|
|
pose = np.concatenate([rotation, tvec], axis=-1) |
|
|
pose_homo = np.concatenate([pose, np.array([[0, 0, 0, 1]])], axis=0) |
|
|
|
|
|
inliers = [] if inliers is None else inliers |
|
|
|
|
|
return pose, pose_homo, inliers |
|
|
except cv2.error: |
|
|
print("CV ERROR") |
|
|
return np.eye(4)[:3], np.eye(4), [] |
|
|
|
|
|
|
|
|
def ransac_PnP_batch(K_raw, pts_2d, pts_3d, err_thr=10): |
|
|
fit_R, fit_t = [], [] |
|
|
for b in range(K_raw.shape[0]): |
|
|
pose, _, inliers = ransac_PnP(K_raw[b], pts_2d[b], pts_3d[b], err_thr=err_thr) |
|
|
fit_R.append(pose[:3, :3]) |
|
|
fit_t.append(pose[:3, 3]) |
|
|
fit_R = np.stack(fit_R, axis=0) |
|
|
fit_t = np.stack(fit_t, axis=0) |
|
|
return fit_R, fit_t |
|
|
|
|
|
|
|
|
def get_nearby_points(points, query_verts, padding=0.0, p=1): |
|
|
import pytorch3d.ops.knn as knn |
|
|
|
|
|
""" |
|
|
points: (S, 3) |
|
|
query_verts: (V, 3) |
|
|
""" |
|
|
if p == 1: |
|
|
max_xyz = query_verts.max(0)[0] + padding |
|
|
min_xyz = query_verts.min(0)[0] - padding |
|
|
idx = ( |
|
|
( |
|
|
((points - min_xyz) > 0).all(dim=-1) |
|
|
* ((points - max_xyz) < 0).all(dim=-1) |
|
|
) |
|
|
.nonzero() |
|
|
.squeeze(-1) |
|
|
) |
|
|
nearby_points = points[idx] |
|
|
elif p == 2: |
|
|
squared_dist, _, _ = knn.knn_points( |
|
|
points[None], query_verts[None], K=1, return_nn=False |
|
|
) |
|
|
mask = squared_dist[0, :, 0] < padding**2 |
|
|
nearby_points = points[mask] |
|
|
|
|
|
return nearby_points |
|
|
|
|
|
|
|
|
def unproj_bbx_to_fst(bbx_lurb, K, near_z=0.5, far_z=12.5): |
|
|
B = bbx_lurb.size(0) |
|
|
uv = bbx_lurb[:, [[0, 1], [2, 1], [2, 3], [0, 3], [0, 1], [2, 1], [2, 3], [0, 3]]] |
|
|
if isinstance(near_z, float): |
|
|
z = uv.new([near_z] * 4 + [far_z] * 4).reshape(1, 8, 1).repeat(B, 1, 1) |
|
|
else: |
|
|
z = torch.cat( |
|
|
[ |
|
|
near_z[:, None, None].repeat(1, 4, 1), |
|
|
far_z[:, None, None].repeat(1, 4, 1), |
|
|
], |
|
|
dim=1, |
|
|
) |
|
|
c_frustum_points = unproject_p2d(uv, z, K) |
|
|
return c_frustum_points |
|
|
|
|
|
|
|
|
def convert_bbx_xys_to_lurb(bbx_xys): |
|
|
""" |
|
|
Args: bbx_xys (..., 3) -> bbx_lurb (..., 4) |
|
|
""" |
|
|
size = bbx_xys[..., 2:] |
|
|
center = bbx_xys[..., :2] |
|
|
lurb = torch.cat([center - size / 2, center + size / 2], dim=-1) |
|
|
return lurb |
|
|
|
|
|
|
|
|
def convert_lurb_to_bbx_xys(bbx_lurb): |
|
|
""" |
|
|
Args: bbx_lurb (..., 4) -> bbx_xys (..., 3) be aware that it is squared |
|
|
""" |
|
|
size = (bbx_lurb[..., 2:] - bbx_lurb[..., :2]).max(-1, keepdim=True)[0] |
|
|
center = (bbx_lurb[..., :2] + bbx_lurb[..., 2:]) / 2 |
|
|
return torch.cat([center, size], dim=-1) |
|
|
|
|
|
|
|
|
def get_bbx_xys( |
|
|
i_j2d, i_j2d_mask=None, bbx_ratio=[192, 256], do_augment=False, base_enlarge=1.2 |
|
|
): |
|
|
""" |
|
|
Args: |
|
|
i_j2d: (B, L, J, 3) [x,y,c] or (B, L, J, 2) [x,y] |
|
|
i_j2d_mask: (B, L, J) boolean mask indicating valid joints, if None use all joints |
|
|
bbx_ratio: [width, height] ratio for the bounding box |
|
|
do_augment: whether to apply random augmentation |
|
|
base_enlarge: factor to enlarge the bounding box |
|
|
Returns: |
|
|
bbx_xys: (B, L, 3) [center_x, center_y, size] |
|
|
""" |
|
|
|
|
|
if i_j2d_mask is not None: |
|
|
|
|
|
|
|
|
|
|
|
mask_expanded = i_j2d_mask.unsqueeze(-1) |
|
|
|
|
|
|
|
|
i_j2d_for_min = i_j2d.clone() |
|
|
i_j2d_for_max = i_j2d.clone() |
|
|
|
|
|
|
|
|
invalid_mask = ~mask_expanded.expand_as(i_j2d[..., :2]) |
|
|
i_j2d_for_min[..., :2][invalid_mask] = float( |
|
|
"inf" |
|
|
) |
|
|
i_j2d_for_max[..., :2][invalid_mask] = float( |
|
|
"-inf" |
|
|
) |
|
|
|
|
|
|
|
|
min_x = i_j2d_for_min[..., 0].min(-1)[0] |
|
|
max_x = i_j2d_for_max[..., 0].max(-1)[0] |
|
|
min_y = i_j2d_for_min[..., 1].min(-1)[0] |
|
|
max_y = i_j2d_for_max[..., 1].max(-1)[0] |
|
|
else: |
|
|
|
|
|
min_x = i_j2d[..., 0].min(-1)[0] |
|
|
max_x = i_j2d[..., 0].max(-1)[0] |
|
|
min_y = i_j2d[..., 1].min(-1)[0] |
|
|
max_y = i_j2d[..., 1].max(-1)[0] |
|
|
|
|
|
center_x = (min_x + max_x) / 2 |
|
|
center_y = (min_y + max_y) / 2 |
|
|
|
|
|
|
|
|
h = max_y - min_y |
|
|
w = max_x - min_x |
|
|
|
|
|
if True: |
|
|
aspect_ratio = bbx_ratio[0] / bbx_ratio[1] |
|
|
mask1 = w > aspect_ratio * h |
|
|
h[mask1] = w[mask1] / aspect_ratio |
|
|
mask2 = w < aspect_ratio * h |
|
|
w[mask2] = h[mask2] * aspect_ratio |
|
|
|
|
|
|
|
|
bbx_size = torch.max(h, w) * base_enlarge |
|
|
|
|
|
if do_augment: |
|
|
B, L = bbx_size.shape[:2] |
|
|
device = bbx_size.device |
|
|
if True: |
|
|
scaleFactor = torch.rand((B, L), device=device) * 0.3 + 1.05 |
|
|
txFactor = torch.rand((B, L), device=device) * 1.6 - 0.8 |
|
|
tyFactor = torch.rand((B, L), device=device) * 1.6 - 0.8 |
|
|
else: |
|
|
scaleFactor = torch.rand((B, 1), device=device) * 0.3 + 1.05 |
|
|
txFactor = torch.rand((B, 1), device=device) * 1.6 - 0.8 |
|
|
tyFactor = torch.rand((B, 1), device=device) * 1.6 - 0.8 |
|
|
|
|
|
raw_bbx_size = bbx_size / base_enlarge |
|
|
bbx_size = raw_bbx_size * scaleFactor |
|
|
center_x += raw_bbx_size / 2 * ((scaleFactor - 1) * txFactor) |
|
|
center_y += raw_bbx_size / 2 * ((scaleFactor - 1) * tyFactor) |
|
|
|
|
|
return torch.stack([center_x, center_y, bbx_size], dim=-1) |
|
|
|
|
|
|
|
|
def get_bbx_xys_from_xyxy(bbx_xyxy, base_enlarge=1.2): |
|
|
""" |
|
|
Args: |
|
|
bbx_xyxy: (N, 4) [x1, y1, x2, y2] |
|
|
Returns: |
|
|
bbx_xys: (N, 3) [center_x, center_y, size] |
|
|
""" |
|
|
|
|
|
i_p2d = torch.stack([bbx_xyxy[:, [0, 1]], bbx_xyxy[:, [2, 3]]], dim=1) |
|
|
bbx_xys = get_bbx_xys(i_p2d[None], base_enlarge=base_enlarge)[0] |
|
|
return bbx_xys |
|
|
|
|
|
|
|
|
def normalize_kp2d(obs_kp2d, bbx_xys, clamp_scale_min=False): |
|
|
""" |
|
|
Args: |
|
|
obs_kp2d: (B, L, J, 3) [x, y, c] |
|
|
bbx_xys: (B, L, 3) |
|
|
Returns: |
|
|
obs: (B, L, J, 3) [x, y, c] |
|
|
""" |
|
|
obs_xy = obs_kp2d[..., :2] |
|
|
center = bbx_xys[..., :2] |
|
|
scale = bbx_xys[..., [2]] |
|
|
|
|
|
|
|
|
xy_max = center + scale / 2 |
|
|
xy_min = center - scale / 2 |
|
|
invisible_mask = ( |
|
|
(obs_xy[..., 0] < xy_min[..., None, 0]) |
|
|
+ (obs_xy[..., 0] > xy_max[..., None, 0]) |
|
|
+ (obs_xy[..., 1] < xy_min[..., None, 1]) |
|
|
+ (obs_xy[..., 1] > xy_max[..., None, 1]) |
|
|
) |
|
|
scale = scale.clamp(min=1e-2) |
|
|
normalized_obs_xy = 2 * (obs_xy - center.unsqueeze(-2)) / scale.unsqueeze(-2) |
|
|
|
|
|
if obs_kp2d.shape[-1] > 2: |
|
|
obs_conf = obs_kp2d[..., 2] |
|
|
obs_conf = obs_conf * ~invisible_mask |
|
|
return torch.cat([normalized_obs_xy, obs_conf[..., None]], dim=-1) |
|
|
else: |
|
|
return normalized_obs_xy |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compute_T_ayf2az(joints, inverse=False): |
|
|
""" |
|
|
Args: |
|
|
joints: (B, J, 3), in the start-frame, az-coordinate |
|
|
Returns: |
|
|
if inverse == False: |
|
|
T_af2az: (B, 4, 4) |
|
|
else : |
|
|
T_az2af: (B, 4, 4) |
|
|
""" |
|
|
|
|
|
t_ayf2az = joints[:, 0, :].detach().clone() |
|
|
t_ayf2az[:, 2] = 0 |
|
|
|
|
|
RL_xy_h = ( |
|
|
joints[:, 1, [0, 1]] - joints[:, 2, [0, 1]] |
|
|
) |
|
|
RL_xy_s = ( |
|
|
joints[:, 16, [0, 1]] - joints[:, 17, [0, 1]] |
|
|
) |
|
|
RL_xy = RL_xy_h + RL_xy_s |
|
|
I_mask = ( |
|
|
RL_xy.pow(2).sum(-1) < 1e-4 |
|
|
) |
|
|
if I_mask.sum() > 0: |
|
|
Log.warn("{} samples can't decide the face direction".format(I_mask.sum())) |
|
|
x_dir = F.pad(F.normalize(RL_xy, 2, -1), (0, 1), value=0) |
|
|
y_dir = torch.zeros_like(x_dir) |
|
|
y_dir[..., 2] = 1 |
|
|
z_dir = torch.cross(x_dir, y_dir, dim=-1) |
|
|
R_ayf2az = torch.stack([x_dir, y_dir, z_dir], dim=-1) |
|
|
R_ayf2az[I_mask] = torch.eye(3).to(R_ayf2az) |
|
|
|
|
|
if inverse: |
|
|
R_az2ayf = R_ayf2az.transpose(1, 2) |
|
|
t_az2ayf = -einsum(R_ayf2az, t_ayf2az, "b i j , b i -> b j") |
|
|
return transform_mat(R_az2ayf, t_az2ayf) |
|
|
else: |
|
|
return transform_mat(R_ayf2az, t_ayf2az) |
|
|
|
|
|
|
|
|
def compute_T_ayfz2ay(joints, inverse=False): |
|
|
""" |
|
|
Args: |
|
|
joints: (B, J, 3), in the start-frame, ay-coordinate |
|
|
Returns: |
|
|
if inverse == False: |
|
|
T_ayfz2ay: (B, 4, 4) |
|
|
else : |
|
|
T_ay2ayfz: (B, 4, 4) |
|
|
""" |
|
|
t_ayfz2ay = joints[:, 0, :].detach().clone() |
|
|
t_ayfz2ay[:, 1] = 0 |
|
|
|
|
|
RL_xz_h = ( |
|
|
joints[:, 1, [0, 2]] - joints[:, 2, [0, 2]] |
|
|
) |
|
|
RL_xz_s = ( |
|
|
joints[:, 16, [0, 2]] - joints[:, 17, [0, 2]] |
|
|
) |
|
|
RL_xz = RL_xz_h + RL_xz_s |
|
|
I_mask = ( |
|
|
RL_xz.pow(2).sum(-1) < 1e-4 |
|
|
) |
|
|
if I_mask.sum() > 0: |
|
|
Log.warn("{} samples can't decide the face direction".format(I_mask.sum())) |
|
|
|
|
|
x_dir = torch.zeros_like(t_ayfz2ay) |
|
|
x_dir[:, [0, 2]] = F.normalize(RL_xz, 2, -1) |
|
|
y_dir = torch.zeros_like(x_dir) |
|
|
y_dir[..., 1] = 1 |
|
|
z_dir = torch.cross(x_dir, y_dir, dim=-1) |
|
|
R_ayfz2ay = torch.stack([x_dir, y_dir, z_dir], dim=-1) |
|
|
R_ayfz2ay[I_mask] = torch.eye(3).to(R_ayfz2ay) |
|
|
|
|
|
if inverse: |
|
|
R_ay2ayfz = R_ayfz2ay.transpose(1, 2) |
|
|
t_ay2ayfz = -einsum(R_ayfz2ay, t_ayfz2ay, "b i j , b i -> b j") |
|
|
return transform_mat(R_ay2ayfz, t_ay2ayfz) |
|
|
else: |
|
|
return transform_mat(R_ayfz2ay, t_ayfz2ay) |
|
|
|
|
|
|
|
|
def compute_T_ay2ayrot(joints): |
|
|
""" |
|
|
Args: |
|
|
joints: (B, J, 3), in the start-frame, ay-coordinate |
|
|
Returns: |
|
|
T_ay2ayrot: (B, 4, 4) |
|
|
""" |
|
|
t_ayrot2ay = joints[:, 0, :].detach().clone() |
|
|
t_ayrot2ay[:, 1] = 0 |
|
|
|
|
|
B = joints.shape[0] |
|
|
euler_angle = torch.zeros((B, 3), device=joints.device) |
|
|
yrot_angle = torch.rand((B,), device=joints.device) * 2 * torch.pi |
|
|
euler_angle[:, 0] = yrot_angle |
|
|
R_ay2ayrot = euler_angles_to_matrix(euler_angle, "YXZ") |
|
|
|
|
|
R_ayrot2ay = R_ay2ayrot.transpose(1, 2) |
|
|
t_ay2ayrot = -einsum(R_ayrot2ay, t_ayrot2ay, "b i j , b i -> b j") |
|
|
return transform_mat(R_ay2ayrot, t_ay2ayrot) |
|
|
|
|
|
|
|
|
def compute_root_quaternion_ay(joints): |
|
|
""" |
|
|
Args: |
|
|
joints: (B, J, 3), in the start-frame, ay-coordinate |
|
|
Returns: |
|
|
root_quat: (B, 4) from z-axis to fz |
|
|
""" |
|
|
joints_shape = joints.shape |
|
|
joints = joints.reshape((-1,) + joints_shape[-2:]) |
|
|
t_ayfz2ay = joints[:, 0, :].detach().clone() |
|
|
t_ayfz2ay[:, 1] = 0 |
|
|
|
|
|
RL_xz_h = ( |
|
|
joints[:, 1, [0, 2]] - joints[:, 2, [0, 2]] |
|
|
) |
|
|
RL_xz_s = ( |
|
|
joints[:, 16, [0, 2]] - joints[:, 17, [0, 2]] |
|
|
) |
|
|
RL_xz = RL_xz_h + RL_xz_s |
|
|
I_mask = ( |
|
|
RL_xz.pow(2).sum(-1) < 1e-4 |
|
|
) |
|
|
if I_mask.sum() > 0: |
|
|
Log.warn("{} samples can't decide the face direction".format(I_mask.sum())) |
|
|
|
|
|
x_dir = torch.zeros_like(t_ayfz2ay) |
|
|
x_dir[:, [0, 2]] = F.normalize(RL_xz, 2, -1) |
|
|
y_dir = torch.zeros_like(x_dir) |
|
|
y_dir[..., 1] = 1 |
|
|
z_dir = torch.cross(x_dir, y_dir, dim=-1) |
|
|
|
|
|
z_dir[..., 2] += 1e-9 |
|
|
pos_z_vec = torch.tensor([0, 0, 1]).to(joints.device).float() |
|
|
root_quat = qbetween(pos_z_vec[None], z_dir) |
|
|
root_quat = root_quat.reshape(joints_shape[:-2] + (4,)) |
|
|
return root_quat |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def similarity_transform_batch(S1, S2): |
|
|
""" |
|
|
Computes a similarity transform (sR, t) that solves the orthogonal Procrutes problem. |
|
|
Args: |
|
|
S1, S2: (*, L, 3) |
|
|
""" |
|
|
assert S1.shape == S2.shape |
|
|
S_shape = S1.shape |
|
|
S1 = S1.reshape(-1, *S_shape[-2:]) |
|
|
S2 = S2.reshape(-1, *S_shape[-2:]) |
|
|
|
|
|
S1 = S1.transpose(-2, -1) |
|
|
S2 = S2.transpose(-2, -1) |
|
|
|
|
|
|
|
|
|
|
|
mu1 = S1.mean(axis=-1, keepdims=True) |
|
|
mu2 = S2.mean(axis=-1, keepdims=True) |
|
|
|
|
|
X1 = S1 - mu1 |
|
|
X2 = S2 - mu2 |
|
|
|
|
|
|
|
|
var1 = torch.sum(X1**2, dim=1).sum(dim=1) |
|
|
|
|
|
|
|
|
K = X1.bmm(X2.permute(0, 2, 1)) |
|
|
|
|
|
|
|
|
|
|
|
U, s, V = torch.svd(K) |
|
|
|
|
|
|
|
|
Z = torch.eye(U.shape[1], device=S1.device).unsqueeze(0) |
|
|
Z = Z.repeat(U.shape[0], 1, 1) |
|
|
Z[:, -1, -1] *= torch.sign(torch.det(U.bmm(V.permute(0, 2, 1)))) |
|
|
|
|
|
|
|
|
R = V.bmm(Z.bmm(U.permute(0, 2, 1))) |
|
|
|
|
|
|
|
|
scale = torch.cat([torch.trace(x).unsqueeze(0) for x in R.bmm(K)]) / var1 |
|
|
|
|
|
|
|
|
t = mu2 - (scale.unsqueeze(-1).unsqueeze(-1) * (R.bmm(mu1))) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scale = scale.reshape(*S_shape[:-2], 1, 1) |
|
|
R = R.reshape(*S_shape[:-2], 3, 3) |
|
|
t = t.reshape(*S_shape[:-2], 3, 1) |
|
|
|
|
|
return (scale, R), t |
|
|
|
|
|
|
|
|
def kabsch_algorithm_batch(X1, X2): |
|
|
""" |
|
|
Computes a rigid transform (R, t) |
|
|
Args: |
|
|
X1, X2: (*, L, 3) |
|
|
""" |
|
|
assert X1.shape == X2.shape |
|
|
X_shape = X1.shape |
|
|
X1 = X1.reshape(-1, *X_shape[-2:]) |
|
|
X2 = X2.reshape(-1, *X_shape[-2:]) |
|
|
|
|
|
|
|
|
centroid_X1 = torch.mean(X1, dim=-2, keepdim=True) |
|
|
centroid_X2 = torch.mean(X2, dim=-2, keepdim=True) |
|
|
|
|
|
|
|
|
X1_centered = X1 - centroid_X1 |
|
|
X2_centered = X2 - centroid_X2 |
|
|
|
|
|
|
|
|
H = torch.matmul(X1_centered.transpose(-2, -1), X2_centered) |
|
|
|
|
|
|
|
|
U, S, Vt = torch.linalg.svd(H) |
|
|
|
|
|
|
|
|
R = torch.matmul(Vt.transpose(-2, -1), U.transpose(-2, -1)) |
|
|
|
|
|
|
|
|
d = (torch.det(R) < 0).unsqueeze(-1).unsqueeze(-1) |
|
|
Vt = torch.where(d, -Vt, Vt) |
|
|
R = torch.matmul(Vt.transpose(-2, -1), U.transpose(-2, -1)) |
|
|
|
|
|
|
|
|
t = centroid_X2.transpose(-2, -1) - torch.matmul(R, centroid_X1.transpose(-2, -1)) |
|
|
|
|
|
|
|
|
|
|
|
R = R.reshape(*X_shape[:-2], 3, 3) |
|
|
t = t.reshape(*X_shape[:-2], 3, 1) |
|
|
|
|
|
return R, t |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compute_cam_angvel(R_w2c, padding_last=True): |
|
|
""" |
|
|
R_w2c : (F, 3, 3) |
|
|
""" |
|
|
|
|
|
cam_angvel = matrix_to_rotation_6d( |
|
|
R_w2c[1:] @ R_w2c[:-1].transpose(-1, -2) |
|
|
) |
|
|
|
|
|
assert padding_last |
|
|
cam_angvel = torch.cat([cam_angvel, cam_angvel[-1:]], dim=0) |
|
|
return cam_angvel.float() |
|
|
|
|
|
|
|
|
def compute_cam_tvel(t_w2c, padding_last=True): |
|
|
""" |
|
|
t_w2c : (F, 3) |
|
|
""" |
|
|
cam_tvel = t_w2c[1:] - t_w2c[:-1] |
|
|
assert padding_last |
|
|
cam_tvel = torch.cat([cam_tvel, cam_tvel[-1:]], dim=0) |
|
|
return cam_tvel.float() |
|
|
|
|
|
|
|
|
def compute_cam_tcw2_vel(T_w2c, padding_last=True): |
|
|
""" |
|
|
T_w2c : (F, 4, 4) |
|
|
""" |
|
|
T_c2w = T_w2c.inverse() |
|
|
t_c2w = T_c2w[:, :3, 3] |
|
|
cam_tvel = t_c2w[1:] - t_c2w[:-1] |
|
|
assert padding_last |
|
|
cam_tvel = torch.cat([cam_tvel, cam_tvel[-1:]], dim=0) |
|
|
return cam_tvel.float() |
|
|
|
|
|
|
|
|
def ransac_gravity_vec(xyz, num_iterations=100, threshold=0.05, verbose=False): |
|
|
|
|
|
N = xyz.shape[0] |
|
|
max_inliers = [] |
|
|
|
|
|
norms = xyz.norm(dim=-1) |
|
|
|
|
|
for _ in range(num_iterations): |
|
|
|
|
|
sample_index = np.random.randint(N) |
|
|
sample = xyz[sample_index] |
|
|
|
|
|
|
|
|
dot_product = (xyz * sample).sum(dim=-1) |
|
|
angles = dot_product / norms * norms[sample_index] |
|
|
angles = torch.clamp(angles, -1, 1) |
|
|
angles = torch.acos(angles) |
|
|
|
|
|
|
|
|
inliers = xyz[angles < threshold] |
|
|
|
|
|
if len(inliers) > len(max_inliers): |
|
|
max_inliers = inliers |
|
|
|
|
|
if len(max_inliers) == N: |
|
|
break |
|
|
if verbose: |
|
|
print(f"Inliers: {len(max_inliers)} / {N}") |
|
|
result = max_inliers.mean(dim=0) |
|
|
|
|
|
return result, max_inliers |
|
|
|
|
|
|
|
|
def sequence_best_cammat(w_j3d, c_j3d, cam_rot): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
L, J, _ = w_j3d.shape |
|
|
|
|
|
root_in_w = w_j3d[:, 0] |
|
|
root_in_c = c_j3d[:, 0] |
|
|
cam_mat = matrix.get_TRS(cam_rot, root_in_w) |
|
|
cam_pos = matrix.get_position_from(-root_in_c[:, None], cam_mat)[:, 0] |
|
|
cam_mat = matrix.set_position(cam_mat, cam_pos) |
|
|
|
|
|
w_j3d_expand = w_j3d[None].expand(L, -1, -1, -1) |
|
|
w_j3d_expand = w_j3d_expand.reshape(L, -1, 3) |
|
|
|
|
|
|
|
|
w_j3d_expand_in_c = matrix.get_relative_position_to( |
|
|
w_j3d_expand, cam_mat |
|
|
) |
|
|
w_j2d_expand_in_c = project_p2d(w_j3d_expand_in_c) |
|
|
w_j2d_expand_in_c = w_j2d_expand_in_c.reshape(L, L, J, 2) |
|
|
c_j2d = project_p2d(c_j3d) |
|
|
error = w_j2d_expand_in_c - c_j2d[None] |
|
|
error = error.norm(dim=-1).mean(dim=-1) |
|
|
error = error.mean(dim=-1) |
|
|
ind = error.argmin() |
|
|
return cam_mat[ind], ind |
|
|
|
|
|
|
|
|
def get_sequence_cammat(w_j3d, c_j3d, cam_rot): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
L, J, _ = w_j3d.shape |
|
|
|
|
|
root_in_w = w_j3d[:, 0] |
|
|
root_in_c = c_j3d[:, 0] |
|
|
cam_mat = matrix.get_TRS(cam_rot, root_in_w) |
|
|
cam_pos = matrix.get_position_from(-root_in_c[:, None], cam_mat)[:, 0] |
|
|
cam_mat = matrix.set_position(cam_mat, cam_pos) |
|
|
return cam_mat |
|
|
|
|
|
|
|
|
def ransac_vec(vel, min_multiply=20, verbose=False): |
|
|
|
|
|
|
|
|
N = vel.shape[0] |
|
|
vel_1 = vel[None].expand(N, -1, -1) |
|
|
vel_2 = vel[:, None].expand(-1, N, -1) |
|
|
dist_mat = (vel_1 - vel_2).norm(dim=-1) |
|
|
big_identity = torch.eye(N, device=vel.device) * 1e6 |
|
|
dist_mat_ = dist_mat + big_identity |
|
|
threshold = dist_mat_.min() * min_multiply |
|
|
inner_mask = dist_mat < threshold |
|
|
inner_num = inner_mask.sum(dim=-1) |
|
|
ind = inner_num.argmax() |
|
|
result = vel[inner_mask[ind]].mean(dim=0) |
|
|
if verbose: |
|
|
print(inner_mask[ind].sum().item()) |
|
|
|
|
|
return result, inner_mask[ind] |
|
|
|
|
|
|
|
|
def as_identity(R): |
|
|
is_I = matrix_to_axis_angle(R).norm(dim=-1) < 1e-5 |
|
|
R[is_I] = torch.eye(3)[None].expand(is_I.sum(), -1, -1).to(R) |
|
|
return R |
|
|
|
|
|
|
|
|
def normalize_T_w2c(T_w2c): |
|
|
if T_w2c.ndim == 2: |
|
|
T_w2c = T_w2c[None] |
|
|
L = T_w2c.shape[0] |
|
|
device = T_w2c.device |
|
|
norm_T_c2w = torch.eye(4)[None].repeat(L, 1, 1).to(device) |
|
|
|
|
|
T_c2w = T_w2c.inverse() |
|
|
R_c2w = as_identity(T_c2w[:, :3, :3]) |
|
|
t_c2w = T_c2w[:, :3, 3] |
|
|
|
|
|
|
|
|
R0_c2w = R_c2w[:1] |
|
|
t0_c2w = t_c2w[:1] |
|
|
norm_R_c2w = R0_c2w.mT @ R_c2w |
|
|
norm_t_c2w = (R0_c2w.mT @ (t_c2w - t0_c2w)[..., None])[..., 0] |
|
|
norm_T_c2w[:, :3, :3] = norm_R_c2w |
|
|
norm_T_c2w[:, :3, 3] = norm_t_c2w |
|
|
norm_T_w2c = norm_T_c2w.inverse() |
|
|
norm_T_w2c[:, :3, :3] = as_identity(norm_T_w2c[:, :3, :3]) |
|
|
norm_T_w2c[:, 3, :3] = 0 |
|
|
|
|
|
return norm_T_w2c |
|
|
|