File size: 8,036 Bytes
434b0b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# -*- coding: utf-8 -*-
# @Organization  : Tongyi Lab, Alibaba
# @Author        : Lingteng Qiu
# @Email         : 220019047@link.cuhk.edu.cn
# @Time          : 2025-10-15 16:04:19
# @Function      : Camera and Projection Matrix

import math

import numpy as np
import torch


def generate_rotation_matrix_y(degrees):
    """
    Generates a rotation matrix for rotation around the Y-axis by a given angle in degrees.

    Args:
        degrees (float): The rotation angle in degrees.

    Returns:
        numpy.ndarray: A 3x3 rotation matrix representing rotation about the Y-axis.
    """

    theta = math.radians(degrees)
    cos_theta = math.cos(theta)
    sin_theta = math.sin(theta)

    R = [[cos_theta, 0, sin_theta], [0, 1, 0], [-sin_theta, 0, cos_theta]]

    return np.asarray(R, dtype=np.float32)


def getWorld2View2(R, t, translate=np.array([0.0, 0.0, 0.0]), scale=1.0):
    """
    Computes the world-to-view (camera) transformation matrix.

    Args:
        R (numpy.ndarray): A 3x3 rotation matrix.
        t (numpy.ndarray): A 3-element translation vector.
        translate (numpy.ndarray, optional): Additional translation to apply to the camera center after transformation. Defaults to np.array([0.0, 0.0, 0.0]).
        scale (float, optional): Scaling factor for the camera center position. Defaults to 1.0.

    Returns:
        numpy.ndarray: A 4x4 world-to-view transformation matrix suitable for use in computer graphics pipelines.
    """

    Rt = np.zeros((4, 4))
    Rt[:3, :3] = R.transpose()
    Rt[:3, 3] = t
    Rt[3, 3] = 1.0

    C2W = np.linalg.inv(Rt)
    cam_center = C2W[:3, 3]
    cam_center = (cam_center + translate) * scale
    C2W[:3, 3] = cam_center
    Rt = np.linalg.inv(C2W)
    return np.float32(Rt)


def getProjectionMatrix(znear, zfar, fovX, fovY):
    """
    Constructs a perspective projection matrix.

    Args:
        znear (float): The near clipping plane distance.
        zfar (float): The far clipping plane distance.
        fovX (float): The horizontal field of view in radians.
        fovY (float): The vertical field of view in radians.

    Returns:
        torch.Tensor: A 4x4 projection matrix suitable for 3D rendering.
    """

    tanHalfFovY = math.tan((fovY / 2))
    tanHalfFovX = math.tan((fovX / 2))

    top = tanHalfFovY * znear
    bottom = -top
    right = tanHalfFovX * znear
    left = -right

    P = torch.zeros(4, 4)

    z_sign = 1.0

    P[0, 0] = 2.0 * znear / (right - left)
    P[1, 1] = 2.0 * znear / (top - bottom)
    P[0, 2] = (right + left) / (right - left)
    P[1, 2] = (top + bottom) / (top - bottom)
    P[3, 2] = z_sign
    P[2, 2] = z_sign * zfar / (zfar - znear)
    P[2, 3] = -(zfar * znear) / (zfar - znear)
    return P


def intrinsic_to_fov(intrinsic, w, h):
    fx, fy = intrinsic[0, 0], intrinsic[1, 1]
    fov_x = 2 * torch.arctan2(w, 2 * fx)
    fov_y = 2 * torch.arctan2(h, 2 * fy)
    return fov_x, fov_y


class Camera:
    """
    Camera class for representing a pinhole or perspective camera model.

    Attributes:
        FoVx (float): Horizontal field of view in radians.
        FoVy (float): Vertical field of view in radians.
        height (int): Image height in pixels.
        width (int): Image width in pixels.
        world_view_transform (torch.Tensor): 4x4 matrix transforming world coordinates to camera (view) coordinates.
        zfar (float): Far clipping plane distance.
        znear (float): Near clipping plane distance.
        trans (np.ndarray): Camera translation vector applied after transformation.
        scale (float): Scale factor applied to the camera center.
        projection_matrix (torch.Tensor): 4x4 projection matrix for camera intrinsics.
        full_proj_transform (torch.Tensor): Combined view and projection transform matrix.
        camera_center (torch.Tensor): 3D location of the camera center in world coordinates.
        intrinsic (torch.Tensor): Camera intrinsic matrix.

    Methods:
        from_c2w(c2w, intrinsic, height, width):
            Instantiates a Camera object from a camera-to-world matrix and intrinsics.
    """

    def __init__(
        self,
        w2c,
        intrinsic,
        FoVx,
        FoVy,
        height,
        width,
        trans=np.array([0.0, 0.0, 0.0]),
        scale=1.0,
    ) -> None:
        """
        Initializes the Camera object with extrinsics, intrinsics, field of view and additional parameters.

        Args:
            w2c (torch.Tensor): 4x4 world-to-camera extrinsic transformation matrix (transposed and used as view transform).
            intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
            FoVx (float): Horizontal field of view in radians.
            FoVy (float): Vertical field of view in radians.
            height (int): Image height in pixels.
            width (int): Image width in pixels.
            trans (np.ndarray, optional): Camera translation vector to apply after other transforms (default: [0.0, 0.0, 0.0]).
            scale (float, optional): Scale factor applied to the camera center (default: 1.0).
        """

        self.FoVx = FoVx
        self.FoVy = FoVy
        self.height = height
        self.width = width
        self.world_view_transform = w2c.transpose(0, 1)

        self.zfar = 100.0
        self.znear = 0.01

        self.trans = trans
        self.scale = scale

        self.projection_matrix = (
            getProjectionMatrix(
                znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy
            )
            .transpose(0, 1)
            .to(w2c.device)
        )
        self.full_proj_transform = (
            self.world_view_transform.unsqueeze(0).bmm(
                self.projection_matrix.unsqueeze(0)
            )
        ).squeeze(0)
        self.camera_center = self.world_view_transform.inverse()[3, :3]

        self.intrinsic = intrinsic

    @staticmethod
    def from_c2w(c2w, intrinsic, height, width):
        """
        Creates a Camera object from a camera-to-world (c2w) matrix and intrinsic parameters.

        Args:
            c2w (torch.Tensor): 4x4 camera-to-world extrinsic matrix.
            intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
            height (int): Image height in pixels.
            width (int): Image width in pixels.

        Returns:
            Camera: An instance of the Camera class constructed from the provided parameters.
        """

        w2c = torch.inverse(c2w)
        FoVx, FoVy = intrinsic_to_fov(
            intrinsic,
            w=torch.tensor(width, device=w2c.device),
            h=torch.tensor(height, device=w2c.device),
        )

        return Camera(
            w2c=w2c,
            intrinsic=intrinsic,
            FoVx=FoVx,
            FoVy=FoVy,
            height=height,
            width=width,
        )

    @staticmethod
    def from_c2w_center_modfied(c2w, intrinsic, height, width):
        """
        Creates a Camera object from a camera-to-world (c2w) matrix and intrinsic parameters,
        but modifies the intrinsic matrix so that the principal point is set to the image center.

        Args:
            c2w (torch.Tensor): 4x4 camera-to-world extrinsic matrix.
            intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
            height (int): Image height in pixels.
            width (int): Image width in pixels.

        Returns:
            Camera: An instance of the Camera class constructed from the provided parameters, with adjusted intrinsic center.
        """

        w2c = torch.inverse(c2w)
        intrinsic = intrinsic.clone()

        intrinsic[0, 2] = width / 2.0
        intrinsic[1, 2] = height / 2.0

        FoVx, FoVy = intrinsic_to_fov(
            intrinsic,
            w=torch.tensor(width, device=w2c.device),
            h=torch.tensor(height, device=w2c.device),
        )

        return Camera(
            w2c=w2c,
            intrinsic=intrinsic,
            FoVx=FoVx,
            FoVy=FoVy,
            height=height,
            width=width,
        )