| |
| from dataclasses import dataclass |
| from typing import Literal |
|
|
| import torch |
| import copy |
| from jaxtyping import Float, Int64 |
| from torch import Tensor |
| import random |
| from .view_sampler import ViewSampler |
|
|
|
|
| @dataclass |
| class ViewSamplerRankCfg: |
| name: Literal["rank"] |
| num_context_views: int |
| num_target_views: int |
| min_distance_between_context_views: int |
| max_distance_between_context_views: int |
| min_distance_to_context_views: int |
| warm_up_steps: int |
| initial_min_distance_between_context_views: int |
| initial_max_distance_between_context_views: int |
| max_img_per_gpu: int |
|
|
|
|
| def rotation_angle(R1, R2): |
| |
| R = R1.T @ R2 |
| |
| val = (torch.trace(R) - 1) / 2 |
| val = torch.clamp(val, -1.0, 1.0) |
| angle_rad = torch.acos(val) |
| angle_deg = angle_rad * 180 / torch.pi |
| return angle_deg |
|
|
| def extrinsic_distance(extrinsic1, extrinsic2, lambda_t=1.0): |
| R1, t1 = extrinsic1[:3, :3], extrinsic1[:3, 3] |
| R2, t2 = extrinsic2[:3, :3], extrinsic2[:3, 3] |
| rot_diff = rotation_angle(R1, R2) / 180 |
| |
| center_diff = torch.norm(t1 - t2) |
| return rot_diff + lambda_t * center_diff |
|
|
| def rotation_angle_batch(R1, R2): |
| |
| |
| |
| |
| |
| |
| R1_t = R1.transpose(-2, -1)[:, None, :, :] |
| R2_b = R2[None, :, :, :] |
| R_mult = torch.matmul(R1_t, R2_b) |
| |
| trace_vals = R_mult[..., 0, 0] + R_mult[..., 1, 1] + R_mult[..., 2, 2] |
| val = (trace_vals - 1) / 2 |
| val = torch.clamp(val, -1.0, 1.0) |
| angle_rad = torch.acos(val) |
| angle_deg = angle_rad * 180 / torch.pi |
| return angle_deg / 180.0 |
|
|
| def extrinsic_distance_batch(extrinsics, lambda_t=1.0): |
| |
| |
| R = extrinsics[:, :3, :3] |
| t = extrinsics[:, :3, 3] |
| |
| rot_diff = rotation_angle_batch(R, R) |
| |
| |
| |
| t_i = t[:, None, :] |
| t_j = t[None, :, :] |
| trans_diff = torch.norm(t_i - t_j, dim=2) |
| dists = rot_diff + lambda_t * trans_diff |
| return dists |
|
|
|
|
| def compute_ranking(extrinsics, lambda_t=1.0, normalize=True, batched=True): |
| |
| if normalize: |
| extrinsics = copy.deepcopy(extrinsics) |
| camera_center = copy.deepcopy(extrinsics[:, :3, 3]) |
| camera_center_scale = torch.norm(camera_center, dim=1) |
| avg_scale = torch.mean(camera_center_scale) |
| extrinsics[:, :3, 3] = extrinsics[:, :3, 3] / avg_scale |
|
|
| |
| if batched: |
| dists = extrinsic_distance_batch(extrinsics, lambda_t=lambda_t) |
| else: |
| N = extrinsics.shape[0] |
| dists = torch.zeros((N, N), device=extrinsics.device) |
| for i in range(N): |
| for j in range(N): |
| dists[i,j] = extrinsic_distance(extrinsics[i], extrinsics[j], lambda_t=lambda_t) |
| ranking = torch.argsort(dists, dim=1) |
| return ranking, dists |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| class ViewSamplerRank(ViewSampler[ViewSamplerRankCfg]): |
| |
| def sample( |
| self, |
| scene: str, |
| num_context_views: int, |
| extrinsics: Float[Tensor, "view 4 4"], |
| intrinsics: Float[Tensor, "view 3 3"], |
| device: torch.device = torch.device("cpu"), |
| ) -> tuple[ |
| Int64[Tensor, " context_view"], |
| Int64[Tensor, " target_view"], |
| Float[Tensor, " overlap"], |
| ]: |
| num_views, _, _ = extrinsics.shape |
| |
| extrinsics = extrinsics.clone() |
| |
| ranking, dists = compute_ranking(extrinsics, lambda_t=1.0, normalize=True, batched=True) |
| reference_view = random.sample(range(num_views), 1)[0] |
| |
| refview_ranking = ranking[reference_view] |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| min_gap, max_gap = self.num_ctxt_gap_mapping[num_context_views] |
|
|
| |
| |
| |
| max_gap = min(max_gap, num_views-1) |
| |
| index_context_left = reference_view |
| rightmost_index = random.sample(range(min_gap, max_gap + 1), 1)[0] |
| |
| |
| |
|
|
| index_context_right = refview_ranking[rightmost_index].item() |
| |
| middle_indices = refview_ranking[1: rightmost_index].tolist() |
| index_target = random.sample(middle_indices, self.num_target_views) |
| |
| remaining_indices = [idx for idx in middle_indices if idx not in index_target] |
| |
| |
| extra_views = [] |
| num_extra_views = num_context_views - 2 |
| if num_extra_views > 0 and remaining_indices: |
| extra_views = random.sample(remaining_indices, min(num_extra_views, len(remaining_indices))) |
| else: |
| extra_views = [] |
| |
| overlap = torch.zeros(1) |
|
|
| return ( |
| torch.tensor((index_context_left, *extra_views, index_context_right)), |
| torch.tensor(index_target), |
| overlap |
| ) |
| |
|
|
| @property |
| def num_context_views(self) -> int: |
| return self.cfg.num_context_views |
|
|
| @property |
| def num_target_views(self) -> int: |
| return self.cfg.num_target_views |
| |
| @property |
| def num_ctxt_gap_mapping_target(self) -> dict: |
| mapping = dict() |
| for num_ctxt in range(2, self.cfg.num_context_views + 1): |
| mapping[num_ctxt] = [max(num_ctxt * 2, self.cfg.num_target_views + num_ctxt), max(self.cfg.num_target_views + num_ctxt, min(num_ctxt ** 2, self.cfg.max_distance_between_context_views))] |
| return mapping |
| |
| @property |
| def num_ctxt_gap_mapping(self) -> dict: |
| mapping = dict() |
| for num_ctxt in range(2, self.cfg.num_context_views + 1): |
| mapping[num_ctxt] = [min(num_ctxt * 3, self.cfg.min_distance_between_context_views), min(max(num_ctxt * 5, num_ctxt ** 2), self.cfg.max_distance_between_context_views)] |
| return mapping |
|
|
| |
|
|