Spaces:

Lingteng
/

LHMPP

Running on Zero

File size: 8,036 Bytes

434b0b0

# -*- coding: utf-8 -*-
# @Organization  : Tongyi Lab, Alibaba
# @Author        : Lingteng Qiu
# @Email         : 220019047@link.cuhk.edu.cn
# @Time          : 2025-10-15 16:04:19
# @Function      : Camera and Projection Matrix

import math

import numpy as np
import torch


def generate_rotation_matrix_y(degrees):
    """
    Generates a rotation matrix for rotation around the Y-axis by a given angle in degrees.

    Args:
        degrees (float): The rotation angle in degrees.

    Returns:
        numpy.ndarray: A 3x3 rotation matrix representing rotation about the Y-axis.
    """

    theta = math.radians(degrees)
    cos_theta = math.cos(theta)
    sin_theta = math.sin(theta)

    R = [[cos_theta, 0, sin_theta], [0, 1, 0], [-sin_theta, 0, cos_theta]]

    return np.asarray(R, dtype=np.float32)


def getWorld2View2(R, t, translate=np.array([0.0, 0.0, 0.0]), scale=1.0):
    """
    Computes the world-to-view (camera) transformation matrix.

    Args:
        R (numpy.ndarray): A 3x3 rotation matrix.
        t (numpy.ndarray): A 3-element translation vector.
        translate (numpy.ndarray, optional): Additional translation to apply to the camera center after transformation. Defaults to np.array([0.0, 0.0, 0.0]).
        scale (float, optional): Scaling factor for the camera center position. Defaults to 1.0.

    Returns:
        numpy.ndarray: A 4x4 world-to-view transformation matrix suitable for use in computer graphics pipelines.
    """

    Rt = np.zeros((4, 4))
    Rt[:3, :3] = R.transpose()
    Rt[:3, 3] = t
    Rt[3, 3] = 1.0

    C2W = np.linalg.inv(Rt)
    cam_center = C2W[:3, 3]
    cam_center = (cam_center + translate) * scale
    C2W[:3, 3] = cam_center
    Rt = np.linalg.inv(C2W)
    return np.float32(Rt)


def getProjectionMatrix(znear, zfar, fovX, fovY):
    """
    Constructs a perspective projection matrix.

    Args:
        znear (float): The near clipping plane distance.
        zfar (float): The far clipping plane distance.
        fovX (float): The horizontal field of view in radians.
        fovY (float): The vertical field of view in radians.

    Returns:
        torch.Tensor: A 4x4 projection matrix suitable for 3D rendering.
    """

    tanHalfFovY = math.tan((fovY / 2))
    tanHalfFovX = math.tan((fovX / 2))

    top = tanHalfFovY * znear
    bottom = -top
    right = tanHalfFovX * znear
    left = -right

    P = torch.zeros(4, 4)

    z_sign = 1.0

    P[0, 0] = 2.0 * znear / (right - left)
    P[1, 1] = 2.0 * znear / (top - bottom)
    P[0, 2] = (right + left) / (right - left)
    P[1, 2] = (top + bottom) / (top - bottom)
    P[3, 2] = z_sign
    P[2, 2] = z_sign * zfar / (zfar - znear)
    P[2, 3] = -(zfar * znear) / (zfar - znear)
    return P


def intrinsic_to_fov(intrinsic, w, h):
    fx, fy = intrinsic[0, 0], intrinsic[1, 1]
    fov_x = 2 * torch.arctan2(w, 2 * fx)
    fov_y = 2 * torch.arctan2(h, 2 * fy)
    return fov_x, fov_y


class Camera:
    """
    Camera class for representing a pinhole or perspective camera model.

    Attributes:
        FoVx (float): Horizontal field of view in radians.
        FoVy (float): Vertical field of view in radians.
        height (int): Image height in pixels.
        width (int): Image width in pixels.
        world_view_transform (torch.Tensor): 4x4 matrix transforming world coordinates to camera (view) coordinates.
        zfar (float): Far clipping plane distance.
        znear (float): Near clipping plane distance.
        trans (np.ndarray): Camera translation vector applied after transformation.
        scale (float): Scale factor applied to the camera center.
        projection_matrix (torch.Tensor): 4x4 projection matrix for camera intrinsics.
        full_proj_transform (torch.Tensor): Combined view and projection transform matrix.
        camera_center (torch.Tensor): 3D location of the camera center in world coordinates.
        intrinsic (torch.Tensor): Camera intrinsic matrix.

    Methods:
        from_c2w(c2w, intrinsic, height, width):
            Instantiates a Camera object from a camera-to-world matrix and intrinsics.
    """

    def __init__(
        self,
        w2c,
        intrinsic,
        FoVx,
        FoVy,
        height,
        width,
        trans=np.array([0.0, 0.0, 0.0]),
        scale=1.0,
    ) -> None:
        """
        Initializes the Camera object with extrinsics, intrinsics, field of view and additional parameters.

        Args:
            w2c (torch.Tensor): 4x4 world-to-camera extrinsic transformation matrix (transposed and used as view transform).
            intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
            FoVx (float): Horizontal field of view in radians.
            FoVy (float): Vertical field of view in radians.
            height (int): Image height in pixels.
            width (int): Image width in pixels.
            trans (np.ndarray, optional): Camera translation vector to apply after other transforms (default: [0.0, 0.0, 0.0]).
            scale (float, optional): Scale factor applied to the camera center (default: 1.0).
        """

        self.FoVx = FoVx
        self.FoVy = FoVy
        self.height = height
        self.width = width
        self.world_view_transform = w2c.transpose(0, 1)

        self.zfar = 100.0
        self.znear = 0.01

        self.trans = trans
        self.scale = scale

        self.projection_matrix = (
            getProjectionMatrix(
                znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy
            )
            .transpose(0, 1)
            .to(w2c.device)
        )
        self.full_proj_transform = (
            self.world_view_transform.unsqueeze(0).bmm(
                self.projection_matrix.unsqueeze(0)
            )
        ).squeeze(0)
        self.camera_center = self.world_view_transform.inverse()[3, :3]

        self.intrinsic = intrinsic

    @staticmethod
    def from_c2w(c2w, intrinsic, height, width):
        """
        Creates a Camera object from a camera-to-world (c2w) matrix and intrinsic parameters.

        Args:
            c2w (torch.Tensor): 4x4 camera-to-world extrinsic matrix.
            intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
            height (int): Image height in pixels.
            width (int): Image width in pixels.

        Returns:
            Camera: An instance of the Camera class constructed from the provided parameters.
        """

        w2c = torch.inverse(c2w)
        FoVx, FoVy = intrinsic_to_fov(
            intrinsic,
            w=torch.tensor(width, device=w2c.device),
            h=torch.tensor(height, device=w2c.device),
        )

        return Camera(
            w2c=w2c,
            intrinsic=intrinsic,
            FoVx=FoVx,
            FoVy=FoVy,
            height=height,
            width=width,
        )

    @staticmethod
    def from_c2w_center_modfied(c2w, intrinsic, height, width):
        """
        Creates a Camera object from a camera-to-world (c2w) matrix and intrinsic parameters,
        but modifies the intrinsic matrix so that the principal point is set to the image center.

        Args:
            c2w (torch.Tensor): 4x4 camera-to-world extrinsic matrix.
            intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
            height (int): Image height in pixels.
            width (int): Image width in pixels.

        Returns:
            Camera: An instance of the Camera class constructed from the provided parameters, with adjusted intrinsic center.
        """

        w2c = torch.inverse(c2w)
        intrinsic = intrinsic.clone()

        intrinsic[0, 2] = width / 2.0
        intrinsic[1, 2] = height / 2.0

        FoVx, FoVy = intrinsic_to_fov(
            intrinsic,
            w=torch.tensor(width, device=w2c.device),
            h=torch.tensor(height, device=w2c.device),
        )

        return Camera(
            w2c=w2c,
            intrinsic=intrinsic,
            FoVx=FoVx,
            FoVy=FoVy,
            height=height,
            width=width,
        )