Spaces:
Running on Zero
Running on Zero
| import torch | |
| from typing import Tuple, Union | |
| from .rotation import quat_to_mat, mat_to_quat | |
| def extri_to_pose_encoding360( | |
| extrinsics: torch.Tensor, | |
| pose_encoding_type: Union[str, "absT_quaR"] = "absT_quaR" | |
| ) -> torch.Tensor: | |
| """ | |
| Convert camera extrinsic parameters to a compact pose encoding (absolute translation + quaternion rotation). | |
| Transforms OpenCV-style camera extrinsics (3x4 [R|t] matrix) into a flattened encoding format | |
| suitable for machine learning tasks like pose prediction or representation learning. | |
| Args: | |
| extrinsics: Camera extrinsic matrices with shape [B, S, 3, 4] or [B, S, 4, 4] | |
| - B: Batch size | |
| - S: Sequence length (number of frames) | |
| - 3x4/4x4: Extrinsic matrix in OpenCV coordinate system (x-right, y-down, z-forward) | |
| representing the transformation from world to camera space ([R|t] where R=3x3 rotation, t=3x1 translation) | |
| pose_encoding_type: Type of pose encoding format (only "absT_quaR" supported): | |
| - "absT_quaR": Absolute translation (3D) + quaternion rotation (4D) | |
| Returns: | |
| Encoded pose tensor with shape [B, S, 7] | |
| - [:3]: Absolute translation vector (T) in world coordinates | |
| - [3:7]: Rotation represented as unit quaternion (quat) | |
| """ | |
| # Extract rotation matrix (R) and translation vector (T) from extrinsics | |
| # Handle both 3x4 and 4x4 extrinsic matrix inputs | |
| R = extrinsics[:, :, :3, :3] # [B, S, 3, 3] - rotation matrix | |
| T = extrinsics[:, :, :3, 3] # [B, S, 3] - translation vector | |
| if pose_encoding_type == "absT_quaR": | |
| # Convert rotation matrix to quaternion (4D) | |
| quat = mat_to_quat(R) | |
| # Concatenate translation and quaternion to form compact pose encoding | |
| pose_encoding = torch.cat([T, quat], dim=-1).float() | |
| else: | |
| raise NotImplementedError(f"Pose encoding type '{pose_encoding_type}' not supported. Only 'absT_quaR' is implemented.") | |
| return pose_encoding | |
| def pose_encoding_to_extri360( | |
| pose_encoding: torch.Tensor, | |
| pose_encoding_type: Union[str, "absT_quaR"] = "absT_quaR" | |
| ) -> Tuple[torch.Tensor, torch.Tensor]: | |
| """ | |
| Convert compact pose encoding back to full camera extrinsic parameters (inverse of extri_to_pose_encoding360). | |
| Reconstructs the 4x4 homogeneous extrinsic matrix from the flattened pose encoding, | |
| including extraction of confidence scores from the encoding's extra dimensions. | |
| Args: | |
| pose_encoding: Encoded pose tensor with shape [B, S, 9] | |
| - B: Batch size | |
| - S: Sequence length (number of frames) | |
| - [:3]: Absolute translation vector (T) | |
| - [3:7]: Rotation quaternion (quat) | |
| - [-2:]: Confidence scores for translation and rotation | |
| pose_encoding_type: Type of pose encoding format (only "absT_quaR" supported): | |
| - "absT_quaR": Absolute translation (3D) + quaternion rotation (4D) | |
| Returns: | |
| Tuple containing: | |
| 1. extrinsics: Reconstructed camera extrinsic matrices with shape [B, S, 4, 4] | |
| (homogeneous matrix in OpenCV coordinate system: [R|t; 0 0 0 1]) | |
| 2. conf: Confidence scores with shape [B, S, 2] | |
| - [:, :, 0]: Translation confidence | |
| - [:, :, 1]: Rotation confidence | |
| Raises: | |
| NotImplementedError: If unsupported pose encoding type is provided | |
| """ | |
| if pose_encoding_type == "absT_quaR": | |
| # Extract translation (T) and rotation quaternion (quat) from pose encoding | |
| T = pose_encoding[..., :3] # [B, S, 3] - translation vector | |
| quat = pose_encoding[..., 3:7] # [B, S, 4] - rotation quaternion | |
| # Convert quaternion back to rotation matrix (3x3) | |
| R = quat_to_mat(quat) # [B, S, 3, 3] | |
| # Reconstruct 3x4 [R|t] matrix (rotation + translation) | |
| extri_3x4 = torch.cat([R, T[..., None]], dim=-1) # [B, S, 3, 4] | |
| # Add homogeneous row [0, 0, 0, 1] to form 4x4 extrinsic matrix | |
| batch_size, seq_len = extri_3x4.shape[:2] | |
| homogenous_row = torch.tensor( | |
| [0, 0, 0, 1], | |
| device=extri_3x4.device, | |
| dtype=extri_3x4.dtype | |
| ).expand(batch_size, seq_len, 1, 4) # [B, S, 1, 4] | |
| # Combine to form 4x4 homogeneous extrinsic matrix | |
| extrinsics = torch.cat((extri_3x4, homogenous_row), dim=2) # [B, S, 4, 4] | |
| # Extract confidence scores (last two dimensions of pose encoding) | |
| conf = pose_encoding[..., -2:] # [B, S, 2] | |
| return extrinsics, conf | |
| raise NotImplementedError(f"Pose encoding type '{pose_encoding_type}' not supported. Only 'absT_quaR' is implemented.") |