Spaces:

Lingteng
/

LHMPP

Running on Zero

Lingteng Qiu (邱陵腾）

rm assets & wheels

434b0b0 3 days ago

8.04 kB

	# -- coding: utf-8 --
	# @Organization : Tongyi Lab, Alibaba
	# @Author : Lingteng Qiu
	# @Email : 220019047@link.cuhk.edu.cn
	# @Time : 2025-10-15 16:04:19
	# @Function : Camera and Projection Matrix

	import math

	import numpy as np
	import torch


	def generate_rotation_matrix_y(degrees):
	"""
	Generates a rotation matrix for rotation around the Y-axis by a given angle in degrees.

	Args:
	degrees (float): The rotation angle in degrees.

	Returns:
	numpy.ndarray: A 3x3 rotation matrix representing rotation about the Y-axis.
	"""

	theta = math.radians(degrees)
	cos_theta = math.cos(theta)
	sin_theta = math.sin(theta)

	R = [[cos_theta, 0, sin_theta], [0, 1, 0], [-sin_theta, 0, cos_theta]]

	return np.asarray(R, dtype=np.float32)


	def getWorld2View2(R, t, translate=np.array([0.0, 0.0, 0.0]), scale=1.0):
	"""
	Computes the world-to-view (camera) transformation matrix.

	Args:
	R (numpy.ndarray): A 3x3 rotation matrix.
	t (numpy.ndarray): A 3-element translation vector.
	translate (numpy.ndarray, optional): Additional translation to apply to the camera center after transformation. Defaults to np.array([0.0, 0.0, 0.0]).
	scale (float, optional): Scaling factor for the camera center position. Defaults to 1.0.

	Returns:
	numpy.ndarray: A 4x4 world-to-view transformation matrix suitable for use in computer graphics pipelines.
	"""

	Rt = np.zeros((4, 4))
	Rt[:3, :3] = R.transpose()
	Rt[:3, 3] = t
	Rt[3, 3] = 1.0

	C2W = np.linalg.inv(Rt)
	cam_center = C2W[:3, 3]
	cam_center = (cam_center + translate) * scale
	C2W[:3, 3] = cam_center
	Rt = np.linalg.inv(C2W)
	return np.float32(Rt)


	def getProjectionMatrix(znear, zfar, fovX, fovY):
	"""
	Constructs a perspective projection matrix.

	Args:
	znear (float): The near clipping plane distance.
	zfar (float): The far clipping plane distance.
	fovX (float): The horizontal field of view in radians.
	fovY (float): The vertical field of view in radians.

	Returns:
	torch.Tensor: A 4x4 projection matrix suitable for 3D rendering.
	"""

	tanHalfFovY = math.tan((fovY / 2))
	tanHalfFovX = math.tan((fovX / 2))

	top = tanHalfFovY * znear
	bottom = -top
	right = tanHalfFovX * znear
	left = -right

	P = torch.zeros(4, 4)

	z_sign = 1.0

	P[0, 0] = 2.0 * znear / (right - left)
	P[1, 1] = 2.0 * znear / (top - bottom)
	P[0, 2] = (right + left) / (right - left)
	P[1, 2] = (top + bottom) / (top - bottom)
	P[3, 2] = z_sign
	P[2, 2] = z_sign * zfar / (zfar - znear)
	P[2, 3] = -(zfar * znear) / (zfar - znear)
	return P


	def intrinsic_to_fov(intrinsic, w, h):
	fx, fy = intrinsic[0, 0], intrinsic[1, 1]
	fov_x = 2 * torch.arctan2(w, 2 * fx)
	fov_y = 2 * torch.arctan2(h, 2 * fy)
	return fov_x, fov_y


	class Camera:
	"""
	Camera class for representing a pinhole or perspective camera model.

	Attributes:
	FoVx (float): Horizontal field of view in radians.
	FoVy (float): Vertical field of view in radians.
	height (int): Image height in pixels.
	width (int): Image width in pixels.
	world_view_transform (torch.Tensor): 4x4 matrix transforming world coordinates to camera (view) coordinates.
	zfar (float): Far clipping plane distance.
	znear (float): Near clipping plane distance.
	trans (np.ndarray): Camera translation vector applied after transformation.
	scale (float): Scale factor applied to the camera center.
	projection_matrix (torch.Tensor): 4x4 projection matrix for camera intrinsics.
	full_proj_transform (torch.Tensor): Combined view and projection transform matrix.
	camera_center (torch.Tensor): 3D location of the camera center in world coordinates.
	intrinsic (torch.Tensor): Camera intrinsic matrix.

	Methods:
	from_c2w(c2w, intrinsic, height, width):
	Instantiates a Camera object from a camera-to-world matrix and intrinsics.
	"""

	def __init__(
	self,
	w2c,
	intrinsic,
	FoVx,
	FoVy,
	height,
	width,
	trans=np.array([0.0, 0.0, 0.0]),
	scale=1.0,
	) -> None:
	"""
	Initializes the Camera object with extrinsics, intrinsics, field of view and additional parameters.

	Args:
	w2c (torch.Tensor): 4x4 world-to-camera extrinsic transformation matrix (transposed and used as view transform).
	intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
	FoVx (float): Horizontal field of view in radians.
	FoVy (float): Vertical field of view in radians.
	height (int): Image height in pixels.
	width (int): Image width in pixels.
	trans (np.ndarray, optional): Camera translation vector to apply after other transforms (default: [0.0, 0.0, 0.0]).
	scale (float, optional): Scale factor applied to the camera center (default: 1.0).
	"""

	self.FoVx = FoVx
	self.FoVy = FoVy
	self.height = height
	self.width = width
	self.world_view_transform = w2c.transpose(0, 1)

	self.zfar = 100.0
	self.znear = 0.01

	self.trans = trans
	self.scale = scale

	self.projection_matrix = (
	getProjectionMatrix(
	znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy
	)
	.transpose(0, 1)
	.to(w2c.device)
	)
	self.full_proj_transform = (
	self.world_view_transform.unsqueeze(0).bmm(
	self.projection_matrix.unsqueeze(0)
	)
	).squeeze(0)
	self.camera_center = self.world_view_transform.inverse()[3, :3]

	self.intrinsic = intrinsic

	@staticmethod
	def from_c2w(c2w, intrinsic, height, width):
	"""
	Creates a Camera object from a camera-to-world (c2w) matrix and intrinsic parameters.

	Args:
	c2w (torch.Tensor): 4x4 camera-to-world extrinsic matrix.
	intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
	height (int): Image height in pixels.
	width (int): Image width in pixels.

	Returns:
	Camera: An instance of the Camera class constructed from the provided parameters.
	"""

	w2c = torch.inverse(c2w)
	FoVx, FoVy = intrinsic_to_fov(
	intrinsic,
	w=torch.tensor(width, device=w2c.device),
	h=torch.tensor(height, device=w2c.device),
	)

	return Camera(
	w2c=w2c,
	intrinsic=intrinsic,
	FoVx=FoVx,
	FoVy=FoVy,
	height=height,
	width=width,
	)

	@staticmethod
	def from_c2w_center_modfied(c2w, intrinsic, height, width):
	"""
	Creates a Camera object from a camera-to-world (c2w) matrix and intrinsic parameters,
	but modifies the intrinsic matrix so that the principal point is set to the image center.

	Args:
	c2w (torch.Tensor): 4x4 camera-to-world extrinsic matrix.
	intrinsic (torch.Tensor): 3x3 camera intrinsic matrix.
	height (int): Image height in pixels.
	width (int): Image width in pixels.

	Returns:
	Camera: An instance of the Camera class constructed from the provided parameters, with adjusted intrinsic center.
	"""

	w2c = torch.inverse(c2w)
	intrinsic = intrinsic.clone()

	intrinsic[0, 2] = width / 2.0
	intrinsic[1, 2] = height / 2.0

	FoVx, FoVy = intrinsic_to_fov(
	intrinsic,
	w=torch.tensor(width, device=w2c.device),
	h=torch.tensor(height, device=w2c.device),
	)

	return Camera(
	w2c=w2c,
	intrinsic=intrinsic,
	FoVx=FoVx,
	FoVy=FoVy,
	height=height,
	width=width,
	)