LHMPP / core /structures /camera.py
Lingteng Qiu (邱陵腾)
rm assets & wheels
434b0b0
# -*- coding: utf-8 -*-
# @Organization : Tongyi Lab, Alibaba
# @Author : Lingteng Qiu
# @Email : 220019047@link.cuhk.edu.cn
# @Time : 2025-10-15 16:04:19
# @Function : Camera and Projection Matrix
import math
import numpy as np
import torch
def generate_rotation_matrix_y(degrees):
"""
Generates a rotation matrix for rotation around the Y-axis by a given angle in degrees.
Args:
degrees (float): The rotation angle in degrees.
Returns:
numpy.ndarray: A 3x3 rotation matrix representing rotation about the Y-axis.
"""
theta = math.radians(degrees)
cos_theta = math.cos(theta)
sin_theta = math.sin(theta)
R = [[cos_theta, 0, sin_theta], [0, 1, 0], [-sin_theta, 0, cos_theta]]
return np.asarray(R, dtype=np.float32)
def getWorld2View2(R, t, translate=np.array([0.0, 0.0, 0.0]), scale=1.0):
"""
Computes the world-to-view (camera) transformation matrix.
Args:
R (numpy.ndarray): A 3x3 rotation matrix.
t (numpy.ndarray): A 3-element translation vector.
translate (numpy.ndarray, optional): Additional translation to apply to the camera center after transformation. Defaults to np.array([0.0, 0.0, 0.0]).
scale (float, optional): Scaling factor for the camera center position. Defaults to 1.0.
Returns:
numpy.ndarray: A 4x4 world-to-view transformation matrix suitable for use in computer graphics pipelines.
"""
Rt = np.zeros((4, 4))
Rt[:3, :3] = R.transpose()
Rt[:3, 3] = t
Rt[3, 3] = 1.0
C2W = np.linalg.inv(Rt)
cam_center = C2W[:3, 3]
cam_center = (cam_center + translate) * scale
C2W[:3, 3] = cam_center
Rt = np.linalg.inv(C2W)
return np.float32(Rt)
def getProjectionMatrix(znear, zfar, fovX, fovY):
"""
Constructs a perspective projection matrix.
Args:
znear (float): The near clipping plane distance.
zfar (float): The far clipping plane distance.
fovX (float): The horizontal field of view in radians.
fovY (float): The vertical field of view in radians.
Returns:
torch.Tensor: A 4x4 projection matrix suitable for 3D rendering.
"""
tanHalfFovY = math.tan((fovY / 2))
tanHalfFovX = math.tan((fovX / 2))
top = tanHalfFovY * znear
bottom = -top
right = tanHalfFovX * znear
left = -right
P = torch.zeros(4, 4)
z_sign = 1.0
P[0, 0] = 2.0 * znear / (right - left)
P[1, 1] = 2.0 * znear / (top - bottom)
P[0, 2] = (right + left) / (right - left)
P[1, 2] = (top + bottom) / (top - bottom)
P[3, 2] = z_sign
P[2, 2] = z_sign * zfar / (zfar - znear)
P[2, 3] = -(zfar * znear) / (zfar - znear)
return P
def intrinsic_to_fov(intrinsic, w, h):
fx, fy = intrinsic[0, 0], intrinsic[1, 1]
fov_x = 2 * torch.arctan2(w, 2 * fx)
fov_y = 2 * torch.arctan2(h, 2 * fy)
return fov_x, fov_y
class Camera:
"""
Camera class for representing a pinhole or perspective camera model.
Attributes:
FoVx (float): Horizontal field of view in radians.
FoVy (float): Vertical field of view in radians.
height (int): Image height in pixels.
width (int): Image width in pixels.
world_view_transform (torch.Tensor): 4x4 matrix transforming world coordinates to camera (view) coordinates.
zfar (float): Far clipping plane distance.
znear (float): Near clipping plane distance.
trans (np.ndarray): Camera translation vector applied after transformation.
scale (float): Scale factor applied to the camera center.
projection_matrix (torch.Tensor): 4x4 projection matrix for camera intrinsics.
full_proj_transform (torch.Tensor): Combined view and projection transform matrix.
camera_center (torch.Tensor): 3D location of the camera center in world coordinates.
intrinsic (torch.Tensor): Camera intrinsic matrix.
Methods:
from_c2w(c2w, intrinsic, height, width):
Instantiates a Camera object from a camera-to-world matrix and intrinsics.
"""
def __init__(
self,
w2c,
intrinsic,
FoVx,
FoVy,
height,
width,
trans=np.array([0.0, 0.0, 0.0]),
scale=1.0,
) -> None:
"""
Initializes the Camera object with extrinsics, intrinsics, field of view and additional parameters.
Args:
w2c (torch.Tensor): 4x4 world-to-camera extrinsic transformation matrix (transposed and used as view transform).
intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
FoVx (float): Horizontal field of view in radians.
FoVy (float): Vertical field of view in radians.
height (int): Image height in pixels.
width (int): Image width in pixels.
trans (np.ndarray, optional): Camera translation vector to apply after other transforms (default: [0.0, 0.0, 0.0]).
scale (float, optional): Scale factor applied to the camera center (default: 1.0).
"""
self.FoVx = FoVx
self.FoVy = FoVy
self.height = height
self.width = width
self.world_view_transform = w2c.transpose(0, 1)
self.zfar = 100.0
self.znear = 0.01
self.trans = trans
self.scale = scale
self.projection_matrix = (
getProjectionMatrix(
znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy
)
.transpose(0, 1)
.to(w2c.device)
)
self.full_proj_transform = (
self.world_view_transform.unsqueeze(0).bmm(
self.projection_matrix.unsqueeze(0)
)
).squeeze(0)
self.camera_center = self.world_view_transform.inverse()[3, :3]
self.intrinsic = intrinsic
@staticmethod
def from_c2w(c2w, intrinsic, height, width):
"""
Creates a Camera object from a camera-to-world (c2w) matrix and intrinsic parameters.
Args:
c2w (torch.Tensor): 4x4 camera-to-world extrinsic matrix.
intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
height (int): Image height in pixels.
width (int): Image width in pixels.
Returns:
Camera: An instance of the Camera class constructed from the provided parameters.
"""
w2c = torch.inverse(c2w)
FoVx, FoVy = intrinsic_to_fov(
intrinsic,
w=torch.tensor(width, device=w2c.device),
h=torch.tensor(height, device=w2c.device),
)
return Camera(
w2c=w2c,
intrinsic=intrinsic,
FoVx=FoVx,
FoVy=FoVy,
height=height,
width=width,
)
@staticmethod
def from_c2w_center_modfied(c2w, intrinsic, height, width):
"""
Creates a Camera object from a camera-to-world (c2w) matrix and intrinsic parameters,
but modifies the intrinsic matrix so that the principal point is set to the image center.
Args:
c2w (torch.Tensor): 4x4 camera-to-world extrinsic matrix.
intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
height (int): Image height in pixels.
width (int): Image width in pixels.
Returns:
Camera: An instance of the Camera class constructed from the provided parameters, with adjusted intrinsic center.
"""
w2c = torch.inverse(c2w)
intrinsic = intrinsic.clone()
intrinsic[0, 2] = width / 2.0
intrinsic[1, 2] = height / 2.0
FoVx, FoVy = intrinsic_to_fov(
intrinsic,
w=torch.tensor(width, device=w2c.device),
h=torch.tensor(height, device=w2c.device),
)
return Camera(
w2c=w2c,
intrinsic=intrinsic,
FoVx=FoVx,
FoVy=FoVy,
height=height,
width=width,
)