File size: 3,817 Bytes
7a87926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""
Coordinate system conversion utilities.
ARKit uses Y-up, right-handed.
DA3/COLMAP typically use Z-up, right-handed (OpenCV convention).
"""

import numpy as np


def arkit_to_opencv_transform() -> np.ndarray:
    """
    Transform from ARKit coordinate system to OpenCV/COLMAP convention.

    ARKit: Y-up, right-handed
    OpenCV: Z-up, right-handed

    Conversion: Rotate -90° around X axis (Y -> Z)
    """
    # Rotation matrix: -90° around X axis
    # This maps: Y -> Z, Z -> -Y
    R = np.array(
        [
            [1, 0, 0],
            [0, 0, 1],
            [0, -1, 0],
        ]
    )
    return R


def convert_arkit_to_opencv(pose_c2w: np.ndarray) -> np.ndarray:
    """
    Convert ARKit camera-to-world pose to OpenCV/COLMAP convention.

    Args:
        pose_c2w: (4, 4) ARKit c2w pose (Y-up)

    Returns:
        (4, 4) OpenCV c2w pose (Z-up)
    """
    if pose_c2w.shape != (4, 4):
        raise ValueError(f"Expected (4, 4) pose, got {pose_c2w.shape}")

    R_convert = arkit_to_opencv_transform()

    # Extract rotation and translation
    R_arkit = pose_c2w[:3, :3]
    t_arkit = pose_c2w[:3, 3]

    # Convert rotation: R_opencv = R_convert @ R_arkit @ R_convert^T
    R_opencv = R_convert @ R_arkit @ R_convert.T

    # Convert translation: t_opencv = R_convert @ t_arkit
    t_opencv = R_convert @ t_arkit

    # Build new pose
    pose_opencv = np.eye(4)
    pose_opencv[:3, :3] = R_opencv
    pose_opencv[:3, 3] = t_opencv

    return pose_opencv


def convert_opencv_to_arkit(pose_c2w: np.ndarray) -> np.ndarray:
    """
    Convert OpenCV/COLMAP camera-to-world pose to ARKit convention.

    Args:
        pose_c2w: (4, 4) OpenCV c2w pose (Z-up)

    Returns:
        (4, 4) ARKit c2w pose (Y-up)
    """
    R_convert = arkit_to_opencv_transform()
    R_convert_inv = R_convert.T  # Inverse rotation

    R_opencv = pose_c2w[:3, :3]
    t_opencv = pose_c2w[:3, 3]

    # Convert rotation: R_arkit = R_convert_inv @ R_opencv @ R_convert
    R_arkit = R_convert_inv @ R_opencv @ R_convert

    # Convert translation: t_arkit = R_convert_inv @ t_opencv
    t_arkit = R_convert_inv @ t_opencv

    pose_arkit = np.eye(4)
    pose_arkit[:3, :3] = R_arkit
    pose_arkit[:3, 3] = t_arkit

    return pose_arkit


def convert_arkit_c2w_to_w2c(pose_c2w: np.ndarray, convert_coords: bool = True) -> np.ndarray:
    """
    Convert ARKit c2w pose to w2c (for DA3 compatibility).

    Args:
        pose_c2w: (4, 4) ARKit c2w pose
        convert_coords: If True, convert from ARKit to OpenCV coordinate system

    Returns:
        (3, 4) w2c pose in OpenCV convention (if convert_coords=True) or ARKit convention
    """
    if convert_coords:
        pose_c2w = convert_arkit_to_opencv(pose_c2w)

    # Invert to get w2c
    pose_w2c = np.linalg.inv(pose_c2w)

    # Extract 3x4
    return pose_w2c[:3, :]


def test_coordinate_conversion():
    """Test coordinate conversion."""
    # Create a test ARKit pose (Y-up)
    # Camera at origin, looking down +Z (in ARKit convention)
    pose_arkit = np.eye(4)
    pose_arkit[:3, 3] = [1.0, 2.0, 3.0]  # Translation

    print("ARKit pose (Y-up):")
    print(pose_arkit)
    print(f"  Translation: {pose_arkit[:3, 3]}")

    # Convert to OpenCV
    pose_opencv = convert_arkit_to_opencv(pose_arkit)
    print("\nOpenCV pose (Z-up):")
    print(pose_opencv)
    print(f"  Translation: {pose_opencv[:3, 3]}")

    # Convert back
    pose_arkit_back = convert_opencv_to_arkit(pose_opencv)
    print("\nARKit pose (converted back):")
    print(pose_arkit_back)
    print(f"  Translation: {pose_arkit_back[:3, 3]}")

    # Check round-trip
    assert np.allclose(pose_arkit, pose_arkit_back), "Round-trip conversion failed!"
    print("\n✓ Round-trip conversion successful!")


if __name__ == "__main__":
    test_coordinate_conversion()