|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from typing import List |
|
|
|
|
|
import numpy as np |
|
|
import torch |
|
|
from evo.core.trajectory import PosePath3D |
|
|
|
|
|
from depth_anything_3.utils.geometry import affine_inverse, affine_inverse_np |
|
|
|
|
|
|
|
|
def batch_apply_alignment_to_enc( |
|
|
rots: torch.Tensor, trans: torch.Tensor, scales: torch.Tensor, enc_list: List[torch.Tensor] |
|
|
): |
|
|
pass |
|
|
|
|
|
|
|
|
def batch_apply_alignment_to_ext( |
|
|
rots: torch.Tensor, trans: torch.Tensor, scales: torch.Tensor, ext: torch.Tensor |
|
|
): |
|
|
device, _ = ext.device, ext.dtype |
|
|
if ext.shape[-2:] == (3, 4): |
|
|
pad = torch.zeros((*ext.shape[:-2], 4, 4), dtype=ext.dtype, device=device) |
|
|
pad[..., :3, :4] = ext |
|
|
pad[..., 3, 3] = 1.0 |
|
|
ext = pad |
|
|
pose_est = affine_inverse(ext) |
|
|
pose_new_align_rot = rots[:, None] @ pose_est[..., :3, :3] |
|
|
pose_new_align_trans = ( |
|
|
scales[:, None, None] * (rots[:, None] @ pose_est[..., :3, 3:])[..., 0] + trans[:, None] |
|
|
) |
|
|
pose_new_align = torch.zeros_like(ext) |
|
|
pose_new_align[..., :3, :3] = pose_new_align_rot |
|
|
pose_new_align[..., :3, 3] = pose_new_align_trans |
|
|
pose_new_align[..., 3, 3] = 1.0 |
|
|
return affine_inverse(pose_new_align)[:, :3] |
|
|
|
|
|
|
|
|
def batch_align_poses_umeyama(ext_ref: torch.Tensor, ext_est: torch.Tensor): |
|
|
device, dtype = ext_ref.device, ext_ref.dtype |
|
|
assert ext_ref.dtype in [torch.float32, torch.float64] |
|
|
assert ext_est.dtype in [torch.float32, torch.float64] |
|
|
assert ext_ref.requires_grad is False |
|
|
assert ext_est.requires_grad is False |
|
|
rots, trans, scales = [], [], [] |
|
|
for b in range(ext_ref.shape[0]): |
|
|
r, t, s = align_poses_umeyama(ext_ref[b].cpu().numpy(), ext_est[b].cpu().numpy()) |
|
|
rots.append(torch.from_numpy(r).to(device=device, dtype=dtype)) |
|
|
trans.append(torch.from_numpy(t).to(device=device, dtype=dtype)) |
|
|
scales.append(torch.tensor(s, device=device, dtype=dtype)) |
|
|
return torch.stack(rots), torch.stack(trans), torch.stack(scales) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _to44(ext): |
|
|
if ext.shape[1] == 3: |
|
|
out = np.eye(4)[None].repeat(len(ext), 0) |
|
|
out[:, :3, :4] = ext |
|
|
return out |
|
|
return ext |
|
|
|
|
|
|
|
|
def _poses_from_ext(ext_ref, ext_est): |
|
|
ext_ref = _to44(ext_ref) |
|
|
ext_est = _to44(ext_est) |
|
|
pose_ref = affine_inverse_np(ext_ref) |
|
|
pose_est = affine_inverse_np(ext_est) |
|
|
return pose_ref, pose_est |
|
|
|
|
|
|
|
|
def _umeyama_sim3_from_paths(pose_ref, pose_est): |
|
|
path_ref = PosePath3D(poses_se3=pose_ref.copy()) |
|
|
path_est = PosePath3D(poses_se3=pose_est.copy()) |
|
|
r, t, s = path_est.align(path_ref, correct_scale=True) |
|
|
pose_est_aligned = np.stack(path_est.poses_se3) |
|
|
return r, t, s, pose_est_aligned |
|
|
|
|
|
|
|
|
def _apply_sim3_to_poses(poses, r, t, s): |
|
|
out = poses.copy() |
|
|
Ri = poses[:, :3, :3] |
|
|
ti = poses[:, :3, 3] |
|
|
out[:, :3, :3] = r @ Ri |
|
|
out[:, :3, 3] = (r @ (s * ti.T)).T + t |
|
|
return out |
|
|
|
|
|
|
|
|
def _median_nn_thresh(pose_ref, pose_est_aligned): |
|
|
P_ref = pose_ref[:, :3, 3] |
|
|
P_est = pose_est_aligned[:, :3, 3] |
|
|
dists = [] |
|
|
for p in P_est: |
|
|
dd = np.linalg.norm(P_ref - p[None, :], axis=1) |
|
|
dists.append(dd.min()) |
|
|
return float(np.median(dists)) if dists else 0.0 |
|
|
|
|
|
|
|
|
def _ransac_align_sim3( |
|
|
pose_ref, pose_est, sub_n=None, inlier_thresh=None, max_iters=10, random_state=None |
|
|
): |
|
|
rng = np.random.default_rng(random_state) |
|
|
N = pose_ref.shape[0] |
|
|
idx_all = np.arange(N) |
|
|
if sub_n is None: |
|
|
sub_n = max(3, (N + 1) // 2) |
|
|
else: |
|
|
sub_n = max(3, min(sub_n, N)) |
|
|
|
|
|
|
|
|
r0, t0, s0, pose_est0 = _umeyama_sim3_from_paths(pose_ref, pose_est) |
|
|
if inlier_thresh is None: |
|
|
inlier_thresh = _median_nn_thresh(pose_ref, pose_est0) |
|
|
|
|
|
P_ref_all = pose_ref[:, :3, 3] |
|
|
|
|
|
best_model = (r0, t0, s0) |
|
|
best_inliers = None |
|
|
best_score = (-1, np.inf) |
|
|
|
|
|
for _ in range(max_iters): |
|
|
sample = rng.choice(idx_all, size=sub_n, replace=False) |
|
|
try: |
|
|
r, t, s, _ = _umeyama_sim3_from_paths(pose_ref[sample], pose_est[sample]) |
|
|
except Exception: |
|
|
continue |
|
|
pose_h = _apply_sim3_to_poses(pose_est, r, t, s) |
|
|
P_h = pose_h[:, :3, 3] |
|
|
errs = np.linalg.norm(P_h - P_ref_all, axis=1) |
|
|
inliers = errs <= inlier_thresh |
|
|
k = int(inliers.sum()) |
|
|
mean_err = float(errs[inliers].mean()) if k > 0 else np.inf |
|
|
if (k > best_score[0]) or (k == best_score[0] and mean_err < best_score[1]): |
|
|
best_score = (k, mean_err) |
|
|
best_model = (r, t, s) |
|
|
best_inliers = inliers |
|
|
|
|
|
|
|
|
if best_inliers is not None and best_inliers.sum() >= 3: |
|
|
r, t, s, _ = _umeyama_sim3_from_paths(pose_ref[best_inliers], pose_est[best_inliers]) |
|
|
else: |
|
|
r, t, s = best_model |
|
|
return r, t, s |
|
|
|
|
|
|
|
|
def align_poses_umeyama( |
|
|
ext_ref: np.ndarray, |
|
|
ext_est: np.ndarray, |
|
|
return_aligned=False, |
|
|
ransac=False, |
|
|
sub_n=None, |
|
|
inlier_thresh=None, |
|
|
ransac_max_iters=10, |
|
|
random_state=None, |
|
|
): |
|
|
""" |
|
|
Align estimated trajectory to reference using Umeyama Sim(3). |
|
|
Default no RANSAC; if ransac=True, use RANSAC (max iterations default 10). |
|
|
- sub_n defaults to half the number of frames (rounded up, at least 3) |
|
|
- inlier_thresh defaults to median of "distance from each estimated pose to |
|
|
nearest reference pose after pre-alignment" |
|
|
Returns rotation (3x3), translation (3,), scale; optionally returns aligned extrinsics (4x4). |
|
|
""" |
|
|
pose_ref, pose_est = _poses_from_ext(ext_ref, ext_est) |
|
|
|
|
|
if not ransac: |
|
|
r, t, s, pose_est_aligned = _umeyama_sim3_from_paths(pose_ref, pose_est) |
|
|
else: |
|
|
r, t, s = _ransac_align_sim3( |
|
|
pose_ref, |
|
|
pose_est, |
|
|
sub_n=sub_n, |
|
|
inlier_thresh=inlier_thresh, |
|
|
max_iters=ransac_max_iters, |
|
|
random_state=random_state, |
|
|
) |
|
|
pose_est_aligned = _apply_sim3_to_poses(pose_est, r, t, s) |
|
|
|
|
|
if return_aligned: |
|
|
ext_est_aligned = affine_inverse_np(pose_est_aligned) |
|
|
return r, t, s, ext_est_aligned |
|
|
return r, t, s |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def apply_umeyama_alignment_to_ext( |
|
|
rot: np.ndarray, |
|
|
trans: np.ndarray, |
|
|
scale: float, |
|
|
ext_est: np.ndarray, |
|
|
) -> np.ndarray: |
|
|
""" |
|
|
Apply Sim(3) (R, t, s) to a batch of world-to-camera extrinsics ext_est. |
|
|
Returns the aligned extrinsics, with the same shape as input. |
|
|
""" |
|
|
|
|
|
|
|
|
if ext_est.shape[-2:] == (3, 4): |
|
|
pad = np.zeros((*ext_est.shape[:-2], 4, 4), dtype=ext_est.dtype) |
|
|
pad[..., :3, :4] = ext_est |
|
|
pad[..., 3, 3] = 1.0 |
|
|
ext_est = pad |
|
|
|
|
|
|
|
|
pose_est = affine_inverse_np(ext_est) |
|
|
R_e = pose_est[..., :3, :3] |
|
|
t_e = pose_est[..., :3, 3] |
|
|
|
|
|
|
|
|
R_a = np.einsum("ij,...jk->...ik", rot, R_e) |
|
|
t_a = scale * np.einsum("ij,...j->...i", rot, t_e) + trans |
|
|
|
|
|
|
|
|
pose_a = np.zeros_like(pose_est) |
|
|
pose_a[..., :3, :3] = R_a |
|
|
pose_a[..., :3, 3] = t_a |
|
|
pose_a[..., 3, 3] = 1.0 |
|
|
|
|
|
|
|
|
return affine_inverse_np(pose_a) |
|
|
|
|
|
|
|
|
def transform_points_sim3(points, rot, trans, scale, inverse=False): |
|
|
""" |
|
|
Sim(3) transform point cloud |
|
|
points: (N, 3) |
|
|
rot: (3, 3) |
|
|
trans: (3,) or (1, 3) |
|
|
scale: float |
|
|
inverse: Whether to do inverse transform (ref->est) |
|
|
Returns: (N, 3) |
|
|
""" |
|
|
if not inverse: |
|
|
|
|
|
return scale * (points @ rot.T) + trans |
|
|
else: |
|
|
|
|
|
return ((points - trans) @ rot) / scale |
|
|
|
|
|
|
|
|
def _rand_rot(): |
|
|
u1, u2, u3 = np.random.rand(3) |
|
|
q = np.array( |
|
|
[ |
|
|
np.sqrt(1 - u1) * np.sin(2 * np.math.pi * u2), |
|
|
np.sqrt(1 - u1) * np.cos(2 * np.math.pi * u2), |
|
|
np.sqrt(u1) * np.sin(2 * np.math.pi * u3), |
|
|
np.sqrt(u1) * np.cos(2 * np.math.pi * u3), |
|
|
] |
|
|
) |
|
|
w, x, y, z = q |
|
|
return np.array( |
|
|
[ |
|
|
[1 - 2 * (y * y + z * z), 2 * (x * y - z * w), 2 * (x * z + y * w)], |
|
|
[2 * (x * y + z * w), 1 - 2 * (x * x + z * z), 2 * (y * z - x * w)], |
|
|
[2 * (x * z - y * w), 2 * (y * z + x * w), 1 - 2 * (x * x + y * y)], |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
def _rand_pose(): |
|
|
R, t = _rand_rot(), np.random.randn(3) |
|
|
P = np.eye(4) |
|
|
P[:3, :3] = R |
|
|
P[:3, 3] = t |
|
|
return P |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
np.random.seed(42) |
|
|
|
|
|
N = 8 |
|
|
pose_ref = np.stack([_rand_pose() for _ in range(N)]) |
|
|
rot_gt = _rand_rot() |
|
|
scale_gt = 2.3 |
|
|
trans_gt = np.random.randn(3) |
|
|
|
|
|
pose_est = np.zeros_like(pose_ref) |
|
|
for i in range(N): |
|
|
R = pose_ref[i][:3, :3] |
|
|
t = pose_ref[i][:3, 3] |
|
|
pose_est[i][:3, :3] = rot_gt @ R |
|
|
pose_est[i][:3, 3] = scale_gt * (rot_gt @ t) + trans_gt |
|
|
pose_est[i][3, 3] = 1.0 |
|
|
|
|
|
ext_ref = affine_inverse_np(pose_ref) |
|
|
ext_est = affine_inverse_np(pose_est) |
|
|
|
|
|
r_est, t_est, s_est = align_poses_umeyama(ext_ref, ext_est) |
|
|
print("GT scale:", scale_gt, "Estimated:", s_est) |
|
|
print("GT trans:", trans_gt, "Estimated:", t_est) |
|
|
print("GT rot:\n", rot_gt, "\nEstimated:\n", r_est) |
|
|
|
|
|
num_points = 100 |
|
|
points_ref = np.random.randn(num_points, 3) |
|
|
|
|
|
points_est = transform_points_sim3(points_ref, rot_gt, trans_gt, scale_gt, inverse=True) |
|
|
|
|
|
points_ref_recovered = transform_points_sim3(points_est, r_est, t_est, s_est, inverse=False) |
|
|
|
|
|
err = np.abs(points_ref_recovered - points_ref) |
|
|
print("Point cloud sim3 transform error (mean abs):", err.mean()) |
|
|
print("Point cloud sim3 transform error (max abs):", err.max()) |
|
|
assert err.mean() < 1e-6, "Mean sim3 transform error too large!" |
|
|
assert err.max() < 1e-5, "Max sim3 transform error too large!" |
|
|
print("Sim(3) point cloud transform & alignment test passed!") |
|
|
|