hmr-dataset / genmo /utils /geo_transform.py

Upload folder using huggingface_hub

fbb20ff verified 26 days ago

26.9 kB

	import cv2
	import numpy as np
	import torch
	import torch.nn.functional as F
	from einops import einsum

	import genmo.utils.matrix as matrix
	from genmo.utils.pylogger import Log
	from genmo.utils.rotation_conversions import (
	euler_angles_to_matrix,
	matrix_to_quaternion,
	matrix_to_rotation_6d,
	quaternion_to_axis_angle,
	)
	from genmo.utils.so3 import so3_exp_map, so3_log_map
	from third_party.GVHMR.hmr4d.utils.geo.quaternion import qbetween


	def homo_points(points):
	"""
	Args:
	points: (..., C)
	Returns: (..., C+1), with 1 padded
	"""
	return F.pad(points, [0, 1], value=1.0)


	def apply_Ts_on_seq_points(points, Ts):
	"""
	perform translation matrix on related point
	Args:
	points: (..., N, 3)
	Ts: (..., N, 4, 4)
	Returns: (..., N, 3)
	"""
	points = (
	torch.torch.einsum("...ki,...i->...k", Ts[..., :3, :3], points) + Ts[..., :3, 3]
	)
	return points


	def apply_T_on_points(points, T):
	"""
	Args:
	points: (..., N, 3)
	T: (..., 4, 4)
	Returns: (..., N, 3)
	"""
	points_T = (
	torch.einsum("...ki,...ji->...jk", T[..., :3, :3], points) + T[..., None, :3, 3]
	)
	return points_T


	def T_transforms_points(T, points, pattern):
	"""manual mode of apply_T_on_points
	T: (..., 4, 4)
	points: (..., 3)
	pattern: "... c d, ... d -> ... c"
	"""
	return einsum(T, homo_points(points), pattern)[..., :3]


	def project_p2d(points, K=None, is_pinhole=True):
	"""
	Args:
	points: (..., (N), 3)
	K: (..., 3, 3)
	Returns: shape is similar to points but without z
	"""
	points = points.clone()
	if is_pinhole:
	z = points[..., [-1]]
	z.masked_fill_(z.abs() < 1e-6, 1e-6)
	points_proj = points / z
	else: # orthogonal
	points_proj = F.pad(points[..., :2], (0, 1), value=1)

	if K is not None:
	# Handle N
	if len(points_proj.shape) == len(K.shape):
	p2d_h = torch.einsum("...ki,...ji->...jk", K, points_proj)
	else:
	p2d_h = torch.einsum("...ki,...i->...k", K, points_proj)
	else:
	p2d_h = points_proj[..., :2]

	return p2d_h[..., :2]


	def gen_uv_from_HW(H, W, device="cpu"):
	"""Returns: (H, W, 2), as float. Note: uv not ij"""
	grid_v, grid_u = torch.meshgrid(torch.arange(H), torch.arange(W))
	return (
	torch.stack(
	[grid_u, grid_v],
	dim=-1,
	)
	.float()
	.to(device)
	) # (H, W, 2)


	def unproject_p2d(uv, z, K):
	"""we assume a pinhole camera for unprojection
	uv: (B, N, 2)
	z: (B, N, 1)
	K: (B, 3, 3)
	Returns: (B, N, 3)
	"""
	xy_atz1 = (uv - K[:, None, :2, 2]) / K[:, None, [0, 1], [0, 1]] # (B, N, 2)
	xyz = torch.cat([xy_atz1 * z, z], dim=-1)
	return xyz


	def cvt_p2d_from_i_to_c(uv, K):
	"""
	Args:
	uv: (..., 2) or (..., N, 2)
	K: (..., 3, 3)
	Returns: the same shape as input uv
	"""
	if len(uv.shape) == len(K.shape):
	xy = (uv - K[..., None, :2, 2]) / K[..., None, [0, 1], [0, 1]]
	else: # without N
	xy = (uv - K[..., :2, 2]) / K[..., [0, 1], [0, 1]]
	return xy


	def cvt_to_bi01_p2d(p2d, bbx_lurb):
	"""
	p2d: (..., (N), 2)
	bbx_lurb: (..., 4)
	"""
	if len(p2d.shape) == len(bbx_lurb.shape) + 1:
	bbx_lurb = bbx_lurb[..., None, :]

	bbx_wh = bbx_lurb[..., 2:] - bbx_lurb[..., :2]
	bi01_p2d = (p2d - bbx_lurb[..., :2]) / bbx_wh
	return bi01_p2d


	def cvt_from_bi01_p2d(bi01_p2d, bbx_lurb):
	"""Use bbx_lurb to resize bi01_p2d to p2d (image-coordinates)
	Args:
	p2d: (..., 2) or (..., N, 2)
	bbx_lurb: (..., 4)
	Returns:
	p2d: shape is the same as input
	"""
	bbx_wh = bbx_lurb[..., 2:] - bbx_lurb[..., :2] # (..., 2)
	if len(bi01_p2d.shape) == len(bbx_wh.shape) + 1:
	p2d = (bi01_p2d * bbx_wh.unsqueeze(-2)) + bbx_lurb[..., None, :2]
	else:
	p2d = (bi01_p2d * bbx_wh) + bbx_lurb[..., :2]
	return p2d


	def cvt_p2d_from_bi01_to_c(bi01, bbxs_lurb, Ks):
	"""
	Args:
	bi01: (..., (N), 2), value in range (0,1), the point in the bbx image
	bbxs_lurb: (..., 4)
	Ks: (..., 3, 3)
	Returns:
	c: (..., (N), 2)
	"""
	i = cvt_from_bi01_p2d(bi01, bbxs_lurb)
	c = cvt_p2d_from_i_to_c(i, Ks)
	return c


	def cvt_p2d_from_pm1_to_i(p2d_pm1, bbx_xys):
	"""
	Args:
	p2d_pm1: (..., (N), 2), value in range (-1,1), the point in the bbx image
	bbx_xys: (..., 3)
	Returns:
	p2d: (..., (N), 2)
	"""
	return bbx_xys[..., :2] + p2d_pm1 * bbx_xys[..., [2]] / 2


	def uv2l_index(uv, W):
	return uv[..., 0] + uv[..., 1] * W


	def l2uv_index(L, W):
	v = torch.div(L, W, rounding_mode="floor")
	u = L % W
	return torch.stack([u, v], dim=-1)


	def transform_mat(R, t):
	"""
	Args:
	R: Bx3x3 array of a batch of rotation matrices
	t: Bx3x(1) array of a batch of translation vectors
	Returns:
	T: Bx4x4 Transformation matrix
	"""
	# No padding left or right, only add an extra row
	if len(R.shape) > len(t.shape):
	t = t[..., None]
	return torch.cat([F.pad(R, [0, 0, 0, 1]), F.pad(t, [0, 0, 0, 1], value=1)], dim=-1)


	def axis_angle_to_matrix_exp_map(aa):
	"""use pytorch3d so3_exp_map
	Args:
	aa: (*, 3)
	Returns:
	R: (*, 3, 3)
	"""
	print("Use pytorch3d.transforms.axis_angle_to_matrix instead!!!")
	ori_shape = aa.shape[:-1]
	return so3_exp_map(aa.reshape(-1, 3)).reshape(*ori_shape, 3, 3)


	def matrix_to_axis_angle_log_map(R):
	"""use pytorch3d so3_log_map
	Args:
	aa: (*, 3, 3)
	Returns:
	R: (*, 3)
	"""
	print(
	"WARINING! I met singularity problem with this function, use matrix_to_axis_angle instead!"
	)
	ori_shape = R.shape[:-2]
	return so3_log_map(R.reshape(-1, 3, 3)).reshape(*ori_shape, 3)


	def matrix_to_axis_angle(R):
	"""use pytorch3d so3_log_map
	Args:
	aa: (*, 3, 3)
	Returns:
	R: (*, 3)
	"""
	return quaternion_to_axis_angle(matrix_to_quaternion(R))


	def ransac_PnP(K, pts_2d, pts_3d, err_thr=10):
	"""solve pnp"""
	dist_coeffs = np.zeros(shape=[8, 1], dtype="float64")

	pts_2d = np.ascontiguousarray(pts_2d.astype(np.float64))
	pts_3d = np.ascontiguousarray(pts_3d.astype(np.float64))
	K = K.astype(np.float64)

	try:
	_, rvec, tvec, inliers = cv2.solvePnPRansac(
	pts_3d,
	pts_2d,
	K,
	dist_coeffs,
	reprojectionError=err_thr,
	iterationsCount=10000,
	flags=cv2.SOLVEPNP_EPNP,
	)

	rotation = cv2.Rodrigues(rvec)[0]

	pose = np.concatenate([rotation, tvec], axis=-1)
	pose_homo = np.concatenate([pose, np.array([[0, 0, 0, 1]])], axis=0)

	inliers = [] if inliers is None else inliers

	return pose, pose_homo, inliers
	except cv2.error:
	print("CV ERROR")
	return np.eye(4)[:3], np.eye(4), []


	def ransac_PnP_batch(K_raw, pts_2d, pts_3d, err_thr=10):
	fit_R, fit_t = [], []
	for b in range(K_raw.shape[0]):
	pose, _, inliers = ransac_PnP(K_raw[b], pts_2d[b], pts_3d[b], err_thr=err_thr)
	fit_R.append(pose[:3, :3])
	fit_t.append(pose[:3, 3])
	fit_R = np.stack(fit_R, axis=0)
	fit_t = np.stack(fit_t, axis=0)
	return fit_R, fit_t


	def get_nearby_points(points, query_verts, padding=0.0, p=1):
	import pytorch3d.ops.knn as knn

	"""
	points: (S, 3)
	query_verts: (V, 3)
	"""
	if p == 1:
	max_xyz = query_verts.max(0)[0] + padding
	min_xyz = query_verts.min(0)[0] - padding
	idx = (
	(
	((points - min_xyz) > 0).all(dim=-1)
	* ((points - max_xyz) < 0).all(dim=-1)
	)
	.nonzero()
	.squeeze(-1)
	)
	nearby_points = points[idx]
	elif p == 2:
	squared_dist, _, _ = knn.knn_points(
	points[None], query_verts[None], K=1, return_nn=False
	)
	mask = squared_dist[0, :, 0] < padding**2 # (S,)
	nearby_points = points[mask]

	return nearby_points


	def unproj_bbx_to_fst(bbx_lurb, K, near_z=0.5, far_z=12.5):
	B = bbx_lurb.size(0)
	uv = bbx_lurb[:, [[0, 1], [2, 1], [2, 3], [0, 3], [0, 1], [2, 1], [2, 3], [0, 3]]]
	if isinstance(near_z, float):
	z = uv.new([near_z] * 4 + [far_z] * 4).reshape(1, 8, 1).repeat(B, 1, 1)
	else:
	z = torch.cat(
	[
	near_z[:, None, None].repeat(1, 4, 1),
	far_z[:, None, None].repeat(1, 4, 1),
	],
	dim=1,
	)
	c_frustum_points = unproject_p2d(uv, z, K) # (B, 8, 3)
	return c_frustum_points


	def convert_bbx_xys_to_lurb(bbx_xys):
	"""
	Args: bbx_xys (..., 3) -> bbx_lurb (..., 4)
	"""
	size = bbx_xys[..., 2:]
	center = bbx_xys[..., :2]
	lurb = torch.cat([center - size / 2, center + size / 2], dim=-1)
	return lurb


	def convert_lurb_to_bbx_xys(bbx_lurb):
	"""
	Args: bbx_lurb (..., 4) -> bbx_xys (..., 3) be aware that it is squared
	"""
	size = (bbx_lurb[..., 2:] - bbx_lurb[..., :2]).max(-1, keepdim=True)[0]
	center = (bbx_lurb[..., :2] + bbx_lurb[..., 2:]) / 2
	return torch.cat([center, size], dim=-1)


	def get_bbx_xys(
	i_j2d, i_j2d_mask=None, bbx_ratio=[192, 256], do_augment=False, base_enlarge=1.2
	):
	"""
	Args:
	i_j2d: (B, L, J, 3) [x,y,c] or (B, L, J, 2) [x,y]
	i_j2d_mask: (B, L, J) boolean mask indicating valid joints, if None use all joints
	bbx_ratio: [width, height] ratio for the bounding box
	do_augment: whether to apply random augmentation
	base_enlarge: factor to enlarge the bounding box
	Returns:
	bbx_xys: (B, L, 3) [center_x, center_y, size]
	"""
	# Apply mask if provided
	if i_j2d_mask is not None:
	# Create a masked version of i_j2d for min/max calculations
	# For min calculation, set masked-out joints to large positive values
	# For max calculation, set masked-out joints to large negative values
	mask_expanded = i_j2d_mask.unsqueeze(-1) # (B, L, J, 1)

	# Create copies for min and max calculations
	i_j2d_for_min = i_j2d.clone()
	i_j2d_for_max = i_j2d.clone()

	# Set coordinates of masked joints appropriately
	invalid_mask = ~mask_expanded.expand_as(i_j2d[..., :2])
	i_j2d_for_min[..., :2][invalid_mask] = float(
	"inf"
	) # For min, set to large positive
	i_j2d_for_max[..., :2][invalid_mask] = float(
	"-inf"
	) # For max, set to large negative

	# Calculate min/max using the filtered joints
	min_x = i_j2d_for_min[..., 0].min(-1)[0]
	max_x = i_j2d_for_max[..., 0].max(-1)[0]
	min_y = i_j2d_for_min[..., 1].min(-1)[0]
	max_y = i_j2d_for_max[..., 1].max(-1)[0]
	else:
	# Use all joints
	min_x = i_j2d[..., 0].min(-1)[0]
	max_x = i_j2d[..., 0].max(-1)[0]
	min_y = i_j2d[..., 1].min(-1)[0]
	max_y = i_j2d[..., 1].max(-1)[0]

	center_x = (min_x + max_x) / 2
	center_y = (min_y + max_y) / 2

	# Size
	h = max_y - min_y # (B, L)
	w = max_x - min_x # (B, L)

	if True: # fit w and h into aspect-ratio
	aspect_ratio = bbx_ratio[0] / bbx_ratio[1]
	mask1 = w > aspect_ratio * h
	h[mask1] = w[mask1] / aspect_ratio
	mask2 = w < aspect_ratio * h
	w[mask2] = h[mask2] * aspect_ratio

	# apply a common factor to enlarge the bounding box
	bbx_size = torch.max(h, w) * base_enlarge

	if do_augment:
	B, L = bbx_size.shape[:2]
	device = bbx_size.device
	if True:
	scaleFactor = torch.rand((B, L), device=device) * 0.3 + 1.05 # 1.05~1.35
	txFactor = torch.rand((B, L), device=device) * 1.6 - 0.8 # -0.8~0.8
	tyFactor = torch.rand((B, L), device=device) * 1.6 - 0.8 # -0.8~0.8
	else:
	scaleFactor = torch.rand((B, 1), device=device) * 0.3 + 1.05 # 1.05~1.35
	txFactor = torch.rand((B, 1), device=device) * 1.6 - 0.8 # -0.8~0.8
	tyFactor = torch.rand((B, 1), device=device) * 1.6 - 0.8 # -0.8~0.8

	raw_bbx_size = bbx_size / base_enlarge
	bbx_size = raw_bbx_size * scaleFactor
	center_x += raw_bbx_size / 2 * ((scaleFactor - 1) * txFactor)
	center_y += raw_bbx_size / 2 * ((scaleFactor - 1) * tyFactor)

	return torch.stack([center_x, center_y, bbx_size], dim=-1)


	def get_bbx_xys_from_xyxy(bbx_xyxy, base_enlarge=1.2):
	"""
	Args:
	bbx_xyxy: (N, 4) [x1, y1, x2, y2]
	Returns:
	bbx_xys: (N, 3) [center_x, center_y, size]
	"""

	i_p2d = torch.stack([bbx_xyxy[:, [0, 1]], bbx_xyxy[:, [2, 3]]], dim=1) # (L, 2, 2)
	bbx_xys = get_bbx_xys(i_p2d[None], base_enlarge=base_enlarge)[0]
	return bbx_xys


	def normalize_kp2d(obs_kp2d, bbx_xys, clamp_scale_min=False):
	"""
	Args:
	obs_kp2d: (B, L, J, 3) [x, y, c]
	bbx_xys: (B, L, 3)
	Returns:
	obs: (B, L, J, 3) [x, y, c]
	"""
	obs_xy = obs_kp2d[..., :2] # (B, L, J, 2)
	center = bbx_xys[..., :2]
	scale = bbx_xys[..., [2]]

	# Mark keypoints outside the bounding box as invisible
	xy_max = center + scale / 2
	xy_min = center - scale / 2
	invisible_mask = (
	(obs_xy[..., 0] < xy_min[..., None, 0])
	+ (obs_xy[..., 0] > xy_max[..., None, 0])
	+ (obs_xy[..., 1] < xy_min[..., None, 1])
	+ (obs_xy[..., 1] > xy_max[..., None, 1])
	)
	scale = scale.clamp(min=1e-2)
	normalized_obs_xy = 2 * (obs_xy - center.unsqueeze(-2)) / scale.unsqueeze(-2)

	if obs_kp2d.shape[-1] > 2:
	obs_conf = obs_kp2d[..., 2] # (B, L, J)
	obs_conf = obs_conf * ~invisible_mask
	return torch.cat([normalized_obs_xy, obs_conf[..., None]], dim=-1)
	else:
	return normalized_obs_xy


	# ================== AZ/AY Transformations ================== #


	def compute_T_ayf2az(joints, inverse=False):
	"""
	Args:
	joints: (B, J, 3), in the start-frame, az-coordinate
	Returns:
	if inverse == False:
	T_af2az: (B, 4, 4)
	else :
	T_az2af: (B, 4, 4)
	"""

	t_ayf2az = joints[:, 0, :].detach().clone()
	t_ayf2az[:, 2] = 0 # do not modify z

	RL_xy_h = (
	joints[:, 1, [0, 1]] - joints[:, 2, [0, 1]]
	) # (B, 2), hip point to left side
	RL_xy_s = (
	joints[:, 16, [0, 1]] - joints[:, 17, [0, 1]]
	) # (B, 2), shoulder point to left side
	RL_xy = RL_xy_h + RL_xy_s
	I_mask = (
	RL_xy.pow(2).sum(-1) < 1e-4
	) # do not rotate, when can't decided the face direction
	if I_mask.sum() > 0:
	Log.warn("{} samples can't decide the face direction".format(I_mask.sum()))
	x_dir = F.pad(F.normalize(RL_xy, 2, -1), (0, 1), value=0) # (B, 3)
	y_dir = torch.zeros_like(x_dir)
	y_dir[..., 2] = 1
	z_dir = torch.cross(x_dir, y_dir, dim=-1)
	R_ayf2az = torch.stack([x_dir, y_dir, z_dir], dim=-1) # (B, 3, 3)
	R_ayf2az[I_mask] = torch.eye(3).to(R_ayf2az)

	if inverse:
	R_az2ayf = R_ayf2az.transpose(1, 2) # (B, 3, 3)
	t_az2ayf = -einsum(R_ayf2az, t_ayf2az, "b i j , b i -> b j") # (B, 3)
	return transform_mat(R_az2ayf, t_az2ayf)
	else:
	return transform_mat(R_ayf2az, t_ayf2az)


	def compute_T_ayfz2ay(joints, inverse=False):
	"""
	Args:
	joints: (B, J, 3), in the start-frame, ay-coordinate
	Returns:
	if inverse == False:
	T_ayfz2ay: (B, 4, 4)
	else :
	T_ay2ayfz: (B, 4, 4)
	"""
	t_ayfz2ay = joints[:, 0, :].detach().clone()
	t_ayfz2ay[:, 1] = 0 # do not modify y

	RL_xz_h = (
	joints[:, 1, [0, 2]] - joints[:, 2, [0, 2]]
	) # (B, 2), hip point to left side
	RL_xz_s = (
	joints[:, 16, [0, 2]] - joints[:, 17, [0, 2]]
	) # (B, 2), shoulder point to left side
	RL_xz = RL_xz_h + RL_xz_s
	I_mask = (
	RL_xz.pow(2).sum(-1) < 1e-4
	) # do not rotate, when can't decided the face direction
	if I_mask.sum() > 0:
	Log.warn("{} samples can't decide the face direction".format(I_mask.sum()))

	x_dir = torch.zeros_like(t_ayfz2ay) # (B, 3)
	x_dir[:, [0, 2]] = F.normalize(RL_xz, 2, -1)
	y_dir = torch.zeros_like(x_dir)
	y_dir[..., 1] = 1 # (B, 3)
	z_dir = torch.cross(x_dir, y_dir, dim=-1)
	R_ayfz2ay = torch.stack([x_dir, y_dir, z_dir], dim=-1) # (B, 3, 3)
	R_ayfz2ay[I_mask] = torch.eye(3).to(R_ayfz2ay)

	if inverse:
	R_ay2ayfz = R_ayfz2ay.transpose(1, 2)
	t_ay2ayfz = -einsum(R_ayfz2ay, t_ayfz2ay, "b i j , b i -> b j")
	return transform_mat(R_ay2ayfz, t_ay2ayfz)
	else:
	return transform_mat(R_ayfz2ay, t_ayfz2ay)


	def compute_T_ay2ayrot(joints):
	"""
	Args:
	joints: (B, J, 3), in the start-frame, ay-coordinate
	Returns:
	T_ay2ayrot: (B, 4, 4)
	"""
	t_ayrot2ay = joints[:, 0, :].detach().clone()
	t_ayrot2ay[:, 1] = 0 # do not modify y

	B = joints.shape[0]
	euler_angle = torch.zeros((B, 3), device=joints.device)
	yrot_angle = torch.rand((B,), device=joints.device) * 2 * torch.pi
	euler_angle[:, 0] = yrot_angle
	R_ay2ayrot = euler_angles_to_matrix(euler_angle, "YXZ") # (B, 3, 3)

	R_ayrot2ay = R_ay2ayrot.transpose(1, 2)
	t_ay2ayrot = -einsum(R_ayrot2ay, t_ayrot2ay, "b i j , b i -> b j")
	return transform_mat(R_ay2ayrot, t_ay2ayrot)


	def compute_root_quaternion_ay(joints):
	"""
	Args:
	joints: (B, J, 3), in the start-frame, ay-coordinate
	Returns:
	root_quat: (B, 4) from z-axis to fz
	"""
	joints_shape = joints.shape
	joints = joints.reshape((-1,) + joints_shape[-2:])
	t_ayfz2ay = joints[:, 0, :].detach().clone()
	t_ayfz2ay[:, 1] = 0 # do not modify y

	RL_xz_h = (
	joints[:, 1, [0, 2]] - joints[:, 2, [0, 2]]
	) # (B, 2), hip point to left side
	RL_xz_s = (
	joints[:, 16, [0, 2]] - joints[:, 17, [0, 2]]
	) # (B, 2), shoulder point to left side
	RL_xz = RL_xz_h + RL_xz_s
	I_mask = (
	RL_xz.pow(2).sum(-1) < 1e-4
	) # do not rotate, when can't decided the face direction
	if I_mask.sum() > 0:
	Log.warn("{} samples can't decide the face direction".format(I_mask.sum()))

	x_dir = torch.zeros_like(t_ayfz2ay) # (B, 3)
	x_dir[:, [0, 2]] = F.normalize(RL_xz, 2, -1)
	y_dir = torch.zeros_like(x_dir)
	y_dir[..., 1] = 1 # (B, 3)
	z_dir = torch.cross(x_dir, y_dir, dim=-1)

	z_dir[..., 2] += 1e-9
	pos_z_vec = torch.tensor([0, 0, 1]).to(joints.device).float() # (3,)
	root_quat = qbetween(pos_z_vec[None], z_dir) # (B, 4)
	root_quat = root_quat.reshape(joints_shape[:-2] + (4,))
	return root_quat


	# ================== Transformations between two sets of features ================== #


	def similarity_transform_batch(S1, S2):
	"""
	Computes a similarity transform (sR, t) that solves the orthogonal Procrutes problem.
	Args:
	S1, S2: (*, L, 3)
	"""
	assert S1.shape == S2.shape
	S_shape = S1.shape
	S1 = S1.reshape(-1, *S_shape[-2:])
	S2 = S2.reshape(-1, *S_shape[-2:])

	S1 = S1.transpose(-2, -1)
	S2 = S2.transpose(-2, -1)

	# --- The code is borrowed from WHAM ---
	# 1. Remove mean.
	mu1 = S1.mean(axis=-1, keepdims=True) # axis is along N, S1(B, 3, N)
	mu2 = S2.mean(axis=-1, keepdims=True)

	X1 = S1 - mu1
	X2 = S2 - mu2

	# 2. Compute variance of X1 used for scale.
	var1 = torch.sum(X1**2, dim=1).sum(dim=1)

	# 3. The outer product of X1 and X2.
	K = X1.bmm(X2.permute(0, 2, 1))

	# 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are
	# singular vectors of K.
	U, s, V = torch.svd(K)

	# Construct Z that fixes the orientation of R to get det(R)=1.
	Z = torch.eye(U.shape[1], device=S1.device).unsqueeze(0)
	Z = Z.repeat(U.shape[0], 1, 1)
	Z[:, -1, -1] *= torch.sign(torch.det(U.bmm(V.permute(0, 2, 1))))

	# Construct R.
	R = V.bmm(Z.bmm(U.permute(0, 2, 1)))

	# 5. Recover scale.
	scale = torch.cat([torch.trace(x).unsqueeze(0) for x in R.bmm(K)]) / var1

	# 6. Recover translation.
	t = mu2 - (scale.unsqueeze(-1).unsqueeze(-1) * (R.bmm(mu1)))

	# -------
	# reshape back
	# sR = scale[:, None, None] * R
	# sR = sR.reshape(*S_shape[:-2], 3, 3)
	scale = scale.reshape(*S_shape[:-2], 1, 1)
	R = R.reshape(*S_shape[:-2], 3, 3)
	t = t.reshape(*S_shape[:-2], 3, 1)

	return (scale, R), t


	def kabsch_algorithm_batch(X1, X2):
	"""
	Computes a rigid transform (R, t)
	Args:
	X1, X2: (*, L, 3)
	"""
	assert X1.shape == X2.shape
	X_shape = X1.shape
	X1 = X1.reshape(-1, *X_shape[-2:])
	X2 = X2.reshape(-1, *X_shape[-2:])

	# 1. 计算质心
	centroid_X1 = torch.mean(X1, dim=-2, keepdim=True)
	centroid_X2 = torch.mean(X2, dim=-2, keepdim=True)

	# 2. 去中心化
	X1_centered = X1 - centroid_X1
	X2_centered = X2 - centroid_X2

	# 3. 计算协方差矩阵
	H = torch.matmul(X1_centered.transpose(-2, -1), X2_centered)

	# 4. 奇异值分解
	U, S, Vt = torch.linalg.svd(H)

	# 5. 计算旋转矩阵
	R = torch.matmul(Vt.transpose(-2, -1), U.transpose(-2, -1))

	# 修正反射矩阵
	d = (torch.det(R) < 0).unsqueeze(-1).unsqueeze(-1)
	Vt = torch.where(d, -Vt, Vt)
	R = torch.matmul(Vt.transpose(-2, -1), U.transpose(-2, -1))

	# 6. 计算平移向量
	t = centroid_X2.transpose(-2, -1) - torch.matmul(R, centroid_X1.transpose(-2, -1))

	# -------
	# reshape back
	R = R.reshape(*X_shape[:-2], 3, 3)
	t = t.reshape(*X_shape[:-2], 3, 1)

	return R, t


	# ===== WHAM cam_angvel ===== #


	def compute_cam_angvel(R_w2c, padding_last=True):
	"""
	R_w2c : (F, 3, 3)
	"""
	# R @ R0 = R1, so R = R1 @ R0^T
	cam_angvel = matrix_to_rotation_6d(
	R_w2c[1:] @ R_w2c[:-1].transpose(-1, -2)
	) # (F-1, 6)
	# cam_angvel = (cam_angvel - torch.tensor([[1, 0, 0, 0, 1, 0]])) * FPS
	assert padding_last
	cam_angvel = torch.cat([cam_angvel, cam_angvel[-1:]], dim=0) # (F, 6)
	return cam_angvel.float()


	def compute_cam_tvel(t_w2c, padding_last=True):
	"""
	t_w2c : (F, 3)
	"""
	cam_tvel = t_w2c[1:] - t_w2c[:-1]
	assert padding_last
	cam_tvel = torch.cat([cam_tvel, cam_tvel[-1:]], dim=0) # (F, 3)
	return cam_tvel.float()


	def compute_cam_tcw2_vel(T_w2c, padding_last=True):
	"""
	T_w2c : (F, 4, 4)
	"""
	T_c2w = T_w2c.inverse()
	t_c2w = T_c2w[:, :3, 3]
	cam_tvel = t_c2w[1:] - t_c2w[:-1]
	assert padding_last
	cam_tvel = torch.cat([cam_tvel, cam_tvel[-1:]], dim=0) # (F, 3)
	return cam_tvel.float()


	def ransac_gravity_vec(xyz, num_iterations=100, threshold=0.05, verbose=False):
	# xyz: (L, 3)
	N = xyz.shape[0]
	max_inliers = []
	# best_model = None
	norms = xyz.norm(dim=-1) # (L,)

	for _ in range(num_iterations):
	# random select a sample
	sample_index = np.random.randint(N)
	sample = xyz[sample_index] # (3,)

	# compute the angle difference between all points and the sample
	dot_product = (xyz * sample).sum(dim=-1) # (L,)
	angles = dot_product / norms * norms[sample_index] # (L,)
	angles = torch.clamp(angles, -1, 1) # prevent numerical errors
	angles = torch.acos(angles)

	# determine the inliers
	inliers = xyz[angles < threshold]

	if len(inliers) > len(max_inliers):
	max_inliers = inliers
	# best_model = sample
	if len(max_inliers) == N:
	break
	if verbose:
	print(f"Inliers: {len(max_inliers)} / {N}")
	result = max_inliers.mean(dim=0)

	return result, max_inliers


	def sequence_best_cammat(w_j3d, c_j3d, cam_rot):
	# get best camera estimation along the sequence, requires static camera
	# w_j3d: (L, J, 3)
	# c_j3d: (L, J, 3)
	# cam_rot: (L, 3, 3)

	L, J, _ = w_j3d.shape

	root_in_w = w_j3d[:, 0] # (L, 3)
	root_in_c = c_j3d[:, 0] # (L, 3)
	cam_mat = matrix.get_TRS(cam_rot, root_in_w) # (L, 4, 4)
	cam_pos = matrix.get_position_from(-root_in_c[:, None], cam_mat)[:, 0] # (L, 3)
	cam_mat = matrix.set_position(cam_mat, cam_pos) # (L, 4, 4)

	w_j3d_expand = w_j3d[None].expand(L, -1, -1, -1) # (L, L, J, 3)
	w_j3d_expand = w_j3d_expand.reshape(L, -1, 3) # (L, L*J, 3)

	# get reproject error
	w_j3d_expand_in_c = matrix.get_relative_position_to(
	w_j3d_expand, cam_mat
	) # (L, L*J, 3)
	w_j2d_expand_in_c = project_p2d(w_j3d_expand_in_c) # (L, L*J, 2)
	w_j2d_expand_in_c = w_j2d_expand_in_c.reshape(L, L, J, 2) # (L, L, J, 2)
	c_j2d = project_p2d(c_j3d) # (L, J, 2)
	error = w_j2d_expand_in_c - c_j2d[None] # (L, L, J, 2)
	error = error.norm(dim=-1).mean(dim=-1) # (L, L)
	error = error.mean(dim=-1) # (L,)
	ind = error.argmin()
	return cam_mat[ind], ind


	def get_sequence_cammat(w_j3d, c_j3d, cam_rot):
	# w_j3d: (L, J, 3)
	# c_j3d: (L, J, 3)
	# cam_rot: (L, 3, 3)

	L, J, _ = w_j3d.shape

	root_in_w = w_j3d[:, 0] # (L, 3)
	root_in_c = c_j3d[:, 0] # (L, 3)
	cam_mat = matrix.get_TRS(cam_rot, root_in_w) # (L, 4, 4)
	cam_pos = matrix.get_position_from(-root_in_c[:, None], cam_mat)[:, 0] # (L, 3)
	cam_mat = matrix.set_position(cam_mat, cam_pos) # (L, 4, 4)
	return cam_mat


	def ransac_vec(vel, min_multiply=20, verbose=False):
	# xyz: (L, 3)
	# remove outlier velocity
	N = vel.shape[0]
	vel_1 = vel[None].expand(N, -1, -1) # (L, L, 3)
	vel_2 = vel[:, None].expand(-1, N, -1) # (L, L, 3)
	dist_mat = (vel_1 - vel_2).norm(dim=-1) # (L, L)
	big_identity = torch.eye(N, device=vel.device) * 1e6
	dist_mat_ = dist_mat + big_identity
	threshold = dist_mat_.min() * min_multiply
	inner_mask = dist_mat < threshold # (L, L)
	inner_num = inner_mask.sum(dim=-1) # (L, )
	ind = inner_num.argmax()
	result = vel[inner_mask[ind]].mean(dim=0) # (3,)
	if verbose:
	print(inner_mask[ind].sum().item())

	return result, inner_mask[ind]


	def as_identity(R):
	is_I = matrix_to_axis_angle(R).norm(dim=-1) < 1e-5
	R[is_I] = torch.eye(3)[None].expand(is_I.sum(), -1, -1).to(R)
	return R


	def normalize_T_w2c(T_w2c):
	if T_w2c.ndim == 2:
	T_w2c = T_w2c[None]
	L = T_w2c.shape[0]
	device = T_w2c.device
	norm_T_c2w = torch.eye(4)[None].repeat(L, 1, 1).to(device)

	T_c2w = T_w2c.inverse()
	R_c2w = as_identity(T_c2w[:, :3, :3])
	t_c2w = T_c2w[:, :3, 3]

	# align the first frame
	R0_c2w = R_c2w[:1]
	t0_c2w = t_c2w[:1]
	norm_R_c2w = R0_c2w.mT @ R_c2w
	norm_t_c2w = (R0_c2w.mT @ (t_c2w - t0_c2w)[..., None])[..., 0]
	norm_T_c2w[:, :3, :3] = norm_R_c2w
	norm_T_c2w[:, :3, 3] = norm_t_c2w
	norm_T_w2c = norm_T_c2w.inverse()
	norm_T_w2c[:, :3, :3] = as_identity(norm_T_w2c[:, :3, :3])
	norm_T_w2c[:, 3, :3] = 0

	return norm_T_w2c