Spaces:

XinxuanLu
/

viewtoken-harmon-demo

Running on Zero

App Files Files Community

viewtoken-harmon-demo / src /datasets /camera_utils.py

XinxuanLu

Initial demo

becf13a verified 1 day ago

raw

history blame contribute delete

27.9 kB

	"""
	Camera transformation utilities for relative pose dataset.

	Provides unified camera transformation functions for converting between
	different camera representations and computing relative poses.

	Coordinate System: OpenCV/COLMAP convention
	- X: right
	- Y: down
	- Z: forward (viewing direction, into the scene)
	- Transformation: X_cam = R @ X_world + T (left-multiply convention)

	Note: This differs from:
	- PyTorch3D: X left, Y up, Z forward (same Z direction, flipped X and Y)
	- Blender camera: X right, Y up, Z backward (same X direction, flipped Y and Z)
	(Blender world coordinates use Z-up: X right, Y forward, Z up)
	"""

	import torch
	import numpy as np
	from typing import Dict, List, Tuple
	from scipy.spatial.transform import Rotation


	class CameraTransformUtils:
	"""Unified camera transformation utilities for relative pose training."""

	@staticmethod
	def rotation_matrix_to_6d(R: np.ndarray) -> np.ndarray:
	"""
	Convert 3x3 rotation matrix to 6D representation.

	Uses first two columns of rotation matrix, flattened.

	Args:
	R: (3, 3) rotation matrix

	Returns:
	rot_6d: (6,) vector [r1, r2] where r1 and r2 are the first two columns of R

	Example:
	>>> R = np.eye(3)
	>>> rot_6d = rotation_matrix_to_6d(R)
	>>> np.allclose(rot_6d, [1, 0, 0, 0, 1, 0])
	True
	"""
	rot_6d = R[:, :2].flatten()
	return rot_6d

	@staticmethod
	def rotation_matrix_to_axis_angle(R: np.ndarray) -> np.ndarray:
	"""
	Convert 3x3 rotation matrix to axis-angle representation.

	Uses Rodrigues formula via scipy.

	Args:
	R: (3, 3) rotation matrix

	Returns:
	axis_angle: (3,) vector where:
	- direction is the rotation axis
	- magnitude is the rotation angle in radians
	- range: each component in [-π, π]

	Example:
	>>> R = np.eye(3) # Identity rotation
	>>> axis_angle = rotation_matrix_to_axis_angle(R)
	>>> np.allclose(axis_angle, [0, 0, 0])
	True
	"""
	# Use scipy's Rotation class
	rot = Rotation.from_matrix(R)
	axis_angle = rot.as_rotvec() # Returns axis-angle representation
	return axis_angle

	@staticmethod
	def axis_angle_to_rotation_matrix(axis_angle: np.ndarray) -> np.ndarray:
	"""
	Convert axis-angle representation to 3x3 rotation matrix.

	Args:
	axis_angle: (3,) vector [rot_x, rot_y, rot_z]
	- direction is the rotation axis
	- magnitude is the rotation angle in radians

	Returns:
	R: (3, 3) rotation matrix

	Example:
	>>> axis_angle = np.array([0, 0, np.pi/2]) # 90° rotation around Z
	>>> R = axis_angle_to_rotation_matrix(axis_angle)
	>>> # Should be approximately [[0, -1, 0], [1, 0, 0], [0, 0, 1]]
	"""
	# Use scipy's Rotation class
	rot = Rotation.from_rotvec(axis_angle)
	R = rot.as_matrix()
	return R

	@staticmethod
	def compute_relative_pose(
	R_src: np.ndarray,
	T_src: np.ndarray,
	R_tgt: np.ndarray,
	T_tgt: np.ndarray,
	) -> Dict[str, np.ndarray]:
	"""
	Compute relative pose from source camera to target camera.

	The relative pose transforms points from source camera frame to target camera frame.

	Math:
	R_rel = R_tgt @ R_src.T
	T_rel = T_tgt - R_rel @ T_src

	This means: X_tgt = R_rel @ X_src + T_rel

	Args:
	R_src: (3, 3) source camera rotation matrix
	T_src: (3,) source camera translation vector
	R_tgt: (3, 3) target camera rotation matrix
	T_tgt: (3,) target camera translation vector

	Returns:
	relative_pose: {
	'rotation': (3,) axis-angle [rot_x, rot_y, rot_z],
	'translation': (3,) [trans_x, trans_y, trans_z]
	}

	Example:
	>>> # Same camera -> identity transformation
	>>> R = np.eye(3)
	>>> T = np.array([1.0, 2.0, 3.0])
	>>> rel = compute_relative_pose(R, T, R, T)
	>>> np.allclose(rel['rotation'], [0, 0, 0])
	True
	>>> np.allclose(rel['translation'], [0, 0, 0])
	True
	"""
	# Compute relative rotation
	R_rel = R_tgt @ R_src.T

	# Compute relative translation
	# Formula: T_rel = T_tgt - R_rel @ T_src
	# This gives the translation from source camera origin to target camera origin
	# in the target camera's coordinate frame
	T_rel = T_tgt - R_rel @ T_src

	# Convert rotation to axis-angle
	rotation_axis_angle = CameraTransformUtils.rotation_matrix_to_axis_angle(R_rel)

	rotation_6d = CameraTransformUtils.rotation_matrix_to_6d(R_rel)

	return {
	'rotation_6d': rotation_6d,
	'rotation': rotation_axis_angle,
	'translation': T_rel,
	}

	@staticmethod
	def apply_relative_pose(
	R_src: np.ndarray,
	T_src: np.ndarray,
	relative_pose: Dict[str, np.ndarray]
	) -> Tuple[np.ndarray, np.ndarray]:
	"""
	Apply relative pose to source camera to get target camera.

	This is the inverse operation of compute_relative_pose.
	Can be used to validate that relative pose computation is correct.

	Args:
	R_src: (3, 3) source camera rotation matrix
	T_src: (3,) source camera translation vector
	relative_pose: dict with:
	- 'rotation': (3,) axis-angle
	- 'translation': (3,) relative translation

	Returns:
	R_tgt: (3, 3) target camera rotation matrix
	T_tgt: (3,) target camera translation vector

	Example:
	>>> # Round-trip test
	>>> R1, T1 = random_rotation(), random_translation()
	>>> R2, T2 = random_rotation(), random_translation()
	>>> rel = compute_relative_pose(R1, T1, R2, T2)
	>>> R2_rec, T2_rec = apply_relative_pose(R1, T1, rel)
	>>> np.allclose(R2, R2_rec) and np.allclose(T2, T2_rec)
	True
	"""
	# Convert axis-angle to rotation matrix
	R_rel = CameraTransformUtils.axis_angle_to_rotation_matrix(relative_pose['rotation'])
	T_rel = relative_pose['translation']

	# Apply relative transformation
	# R_rel = R_tgt @ R_src.T => R_tgt = R_rel @ R_src
	R_tgt = R_rel @ R_src

	# T_rel = T_tgt - R_rel @ T_src => T_tgt = R_rel @ T_src + T_rel
	T_tgt = R_rel @ T_src + T_rel

	return R_tgt, T_tgt

	@staticmethod
	def normalize_scene_translations(
	translations: List[np.ndarray],
	scale_factor: float = 2.0
	) -> Tuple[List[np.ndarray], float]:
	"""
	Normalize translations for a scene based on bounding sphere.

	Computes the radius of the bounding sphere containing all camera positions,
	then normalizes all translations to approximately [-0.5, 0.5] range.

	Args:
	translations: List of (3,) translation vectors
	scale_factor: Divisor for normalization (default: 2.0)
	- scale_factor=2.0 → range ≈ [-0.5, 0.5]
	- scale_factor=1.0 → range ≈ [-1.0, 1.0]

	Returns:
	normalized_translations: List of (3,) normalized vectors
	scene_radius: Float, radius of bounding sphere

	Example:
	>>> translations = [np.array([1, 0, 0]), np.array([0, 2, 0])]
	>>> norm_trans, radius = normalize_scene_translations(translations)
	>>> radius # max(\|\|T\|\|) = 2.0
	2.0
	>>> # With scale_factor=2.0, translations are in [-0.5, 0.5] range
	"""
	# Compute bounding sphere radius (maximum distance from origin)
	norms = [np.linalg.norm(T) for T in translations]
	scene_radius = max(norms)

	if scene_radius < 1e-6:
	# Degenerate case: all cameras at origin
	return translations, 1.0

	# Normalize all translations
	normalized = [T / (scale_factor * scene_radius) for T in translations]

	return normalized, scene_radius

	@staticmethod
	def create_lookat_rotation(
	camera_pos: np.ndarray,
	target_pos: np.ndarray,
	up_vector: np.ndarray = np.array([0.0, 0.0, 1.0])
	) -> np.ndarray:
	"""
	Create a look-at rotation matrix for left-multiply convention.

	NOTE: This function produces Blender convention (+X right, +Y up, +Z backward).
	Must apply Y,Z flip (diag(1, -1, -1)) to convert to our standard.

	Constructs a world-to-camera rotation matrix where:
	- Camera -Z axis points from camera to target (viewing direction)
	- Camera +Y axis aligns with the up vector as much as possible
	- Camera +X axis is the right direction (right-handed system)

	Args:
	camera_pos: (3,) camera position in world coordinates
	target_pos: (3,) target position to look at (usually origin)
	up_vector: (3,) approximate up direction in world (default: [0,0,1] for Blender Z-up)

	Returns:
	R: (3, 3) world-to-camera rotation matrix (Blender convention)
	Rows are camera axes (X, Y, Z) expressed in world coordinates
	Output: +X right, +Y up (aligns with world Z-up), +Z backward (away from scene)

	Example:
	>>> camera_pos = np.array([4.0, 0.0, 2.0])
	>>> target_pos = np.array([0.0, 0.0, 0.0])
	>>> R = create_lookat_rotation(camera_pos, target_pos)
	>>> # Camera at (4, 0, 2) in world Z-up coordinates, looking at origin
	>>> # Camera Y-axis aligns with world Z-up
	>>> # Apply diag(1, -1, -1) to convert to our standard
	"""
	# Compute forward direction (from camera toward target)
	forward = target_pos - camera_pos
	forward_norm = np.linalg.norm(forward)
	if forward_norm < 1e-6:
	raise ValueError("Camera position and target position are too close")
	forward = forward / forward_norm

	# Camera -Z points toward target, so camera +Z points away from target
	z_axis = -forward

	# Compute right direction (X axis)
	# X = up × Z (cross product gives right direction)
	right = np.cross(up_vector, z_axis)
	right_norm = np.linalg.norm(right)
	if right_norm < 1e-6:
	# Camera is looking straight up or down, choose arbitrary right vector
	# If looking up/down, use X axis as right
	right = np.array([1.0, 0.0, 0.0])
	# Make sure it's perpendicular to z_axis
	right = right - np.dot(right, z_axis) * z_axis
	right_norm = np.linalg.norm(right)
	if right_norm < 1e-6:
	# Use Y axis instead
	right = np.array([0.0, 1.0, 0.0])
	right = right - np.dot(right, z_axis) * z_axis
	right_norm = np.linalg.norm(right)
	right = right / right_norm

	# Recompute up to ensure orthonormality
	# Y = Z × X (ensures right-handed coordinate system)
	up_actual = np.cross(z_axis, right)

	# Construct rotation matrix
	# Rows are camera axes in world coordinates (for left-multiply convention)
	R = np.array([
	right, # Camera X axis (right)
	up_actual, # Camera Y axis (up)
	z_axis # Camera Z axis (backward, away from target)
	], dtype=np.float32)

	return R

	@staticmethod
	def spherical_to_rotation_matrix(
	azimuth: float,
	elevation: float,
	roll: float = 0.0
	) -> np.ndarray:
	"""
	Convert spherical camera angles to rotation matrix.

	DEPRECATED: Use create_lookat_rotation instead for accurate look-at cameras.

	NOTE: This produces Blender convention (+X right, +Y up, +Z backward).
	Must apply Y,Z flip (diag(1, -1, -1)) to convert to our standard.

	Assumes camera is on a sphere looking at the origin.
	Rotation order: Y (azimuth) → X (elevation) → Z (roll)

	This is commonly used for Objaverse-style datasets where cameras
	are positioned on a sphere around an object.

	Args:
	azimuth: Horizontal rotation in radians [-π, π]
	- 0: camera at +X
	- π/2: camera at +Z
	elevation: Vertical rotation in radians [-π/2, π/2]
	- 0: camera on equator
	- π/2: camera at north pole (+Y)
	roll: Roll rotation in radians (default: 0)

	Returns:
	R: (3, 3) rotation matrix (Blender convention)
	Output: +X right, +Y up, +Z backward (away from scene)

	Example:
	>>> # Camera at +X looking at origin
	>>> R = spherical_to_rotation_matrix(azimuth=0, elevation=0)
	>>> # R produces Blender convention: +Z backward (toward camera)
	"""
	# Rotation matrix for rotation around Y-axis (azimuth)
	cos_az, sin_az = np.cos(azimuth), np.sin(azimuth)
	R_y = np.array([
	[cos_az, 0, sin_az],
	[0, 1, 0],
	[-sin_az, 0, cos_az]
	])

	# Rotation matrix for rotation around X-axis (elevation)
	cos_el, sin_el = np.cos(elevation), np.sin(elevation)
	R_x = np.array([
	[1, 0, 0],
	[0, cos_el, -sin_el],
	[0, sin_el, cos_el]
	])

	# Rotation matrix for rotation around Z-axis (roll)
	cos_roll, sin_roll = np.cos(roll), np.sin(roll)
	R_z = np.array([
	[cos_roll, -sin_roll, 0],
	[sin_roll, cos_roll, 0],
	[0, 0, 1]
	])

	# Combined rotation: first azimuth (Y), then elevation (X), then roll (Z)
	R = R_z @ R_x @ R_y

	return R

	@staticmethod
	def rotation_matrix_to_spherical(R: np.ndarray) -> Tuple[float, float, float]:
	"""
	Extract azimuth, elevation, and roll from rotation matrix.

	This is the inverse of spherical_to_rotation_matrix.
	Extracts the Euler angles assuming rotation order: Y (azimuth) → X (elevation) → Z (roll)

	Args:
	R: (3, 3) rotation matrix

	Returns:
	azimuth: Horizontal rotation in radians [-π, π]
	- 0: camera looking along +X
	- π/2: camera looking along +Z
	elevation: Vertical rotation in radians [-π/2, π/2]
	- 0: camera on equator
	- π/2: camera at north pole (+Y)
	roll: Roll rotation in radians [-π, π]
	- 0: no roll

	Example:
	>>> # Round-trip test
	>>> R_orig = spherical_to_rotation_matrix(az=0.5, el=0.3, roll=0.1)
	>>> az, el, roll = rotation_matrix_to_spherical(R_orig)
	>>> R_reconstructed = spherical_to_rotation_matrix(az, el, roll)
	>>> np.allclose(R_orig, R_reconstructed)
	True
	"""
	from scipy.spatial.transform import Rotation as R_scipy

	# Convert to scipy Rotation
	rot = R_scipy.from_matrix(R)

	# Extract Euler angles in YXZ order (intrinsic)
	# This matches the construction order in spherical_to_rotation_matrix:
	# R = R_z @ R_x @ R_y means intrinsic rotations: Y, then X, then Z
	# In scipy, lowercase = intrinsic, uppercase = extrinsic
	angles = rot.as_euler('yxz', degrees=False)

	azimuth = angles[0]
	elevation = angles[1]
	roll = angles[2]

	return azimuth, elevation, roll

	@staticmethod
	def rotation_matrix_to_camera_angles(R: np.ndarray) -> Dict[str, float]:
	"""
	Extract viewing direction angles from rotation matrix.

	Computes azimuth and elevation from the camera's forward direction (+Z axis).
	More robust than rotation_matrix_to_spherical for general rotations.

	Args:
	R: (3, 3) rotation matrix (world-to-camera, our convention)
	- Rows are camera axes in world coordinates
	- Our convention: +X right, +Y down, +Z forward

	Returns:
	dict with:
	- 'azimuth': Horizontal angle in XZ plane (radians) [-π, π]
	- 'elevation': Vertical angle from XZ plane (radians) [-π/2, π/2]
	- 'roll': Roll around viewing direction (radians) [-π, π]

	Example:
	>>> R = create_lookat_rotation([2, 1, 0], [0, 0, 0])
	>>> # (After applying Y,Z flip to R)
	>>> angles = rotation_matrix_to_camera_angles(R)
	>>> # azimuth ≈ 0, elevation ≈ atan2(-1, 2) (Y is down!)
	"""
	# Camera viewing direction in world coords (camera looks along +Z in our convention)
	forward = R[2, :] # Z-axis (row 2)

	# Azimuth: angle in XZ plane (rotation around Y axis)
	azimuth = np.arctan2(forward[2], forward[0])

	# Elevation: angle from XZ plane (rotation around X axis)
	# In our convention, +Y is down, so negative Y component means looking up
	# elevation = arcsin(-forward_y) if forward is normalized
	# More stable: elevation = atan2(-y, sqrt(x^2 + z^2))
	elevation = np.arctan2(-forward[1], np.sqrt(forward[0]2 + forward[2]2))

	# Roll: rotation around viewing direction
	# Compute expected up vector for zero roll
	# Create rotation with same azimuth/elevation but zero roll
	R_no_roll = CameraTransformUtils.spherical_to_rotation_matrix(azimuth, elevation, roll=0.0)
	expected_up = R_no_roll[1, :] # Expected up vector (row 1)
	actual_up = R[1, :] # Actual up vector

	# Project both onto plane perpendicular to forward
	expected_up_proj = expected_up - np.dot(expected_up, forward) * forward
	actual_up_proj = actual_up - np.dot(actual_up, forward) * forward

	# Normalize projections
	expected_up_norm = np.linalg.norm(expected_up_proj)
	actual_up_norm = np.linalg.norm(actual_up_proj)

	if expected_up_norm > 1e-6 and actual_up_norm > 1e-6:
	expected_up_proj = expected_up_proj / expected_up_norm
	actual_up_proj = actual_up_proj / actual_up_norm

	# Compute roll angle using atan2 for proper quadrant
	cos_roll = np.dot(expected_up_proj, actual_up_proj)
	# Cross product gives vector along forward direction, sign indicates rotation direction
	cross_prod = np.cross(expected_up_proj, actual_up_proj)
	sin_roll = np.dot(cross_prod, forward)
	roll = np.arctan2(sin_roll, cos_roll)
	else:
	# Degenerate case: camera pointing straight up or down
	roll = 0.0

	return {
	'azimuth': azimuth,
	'elevation': elevation,
	'roll': roll
	}

	@staticmethod
	def clip_and_warn(
	values: np.ndarray,
	min_val: float,
	max_val: float,
	name: str = "values"
	) -> np.ndarray:
	"""
	Clip values to range and warn if clipping occurs.

	Useful for detecting normalization issues.

	Args:
	values: Array to clip
	min_val: Minimum value
	max_val: Maximum value
	name: Name for warning message

	Returns:
	clipped: Clipped array
	"""
	clipped = np.clip(values, min_val, max_val)

	# Check if any values were clipped
	num_clipped_low = np.sum(values < min_val)
	num_clipped_high = np.sum(values > max_val)

	if num_clipped_low > 0 or num_clipped_high > 0:
	print(f"Warning: {name} clipped:")
	if num_clipped_low > 0:
	print(f" {num_clipped_low} values below {min_val} (min: {values.min():.4f})")
	if num_clipped_high > 0:
	print(f" {num_clipped_high} values above {max_val} (max: {values.max():.4f})")

	return clipped


	def compute_angular_offset(rotation: 'torch.Tensor', translation: 'torch.Tensor', normalizer: float = 7.0) -> 'torch.Tensor':
	"""
	Compute angular offset (pitch, yaw) between actual and expected looking directions.

	The camera is generated to look at the origin with some noise added to the direction.
	This function extracts that noise as angular deviations in the camera's local frame.

	Args:
	rotation: (3, 3) rotation matrix [R] from world-to-camera
	translation: (3,) translation vector [T] from world-to-camera
	normalizer: Scale factor for camera position (default: 7.0)

	Returns:
	angular_offset: (2,) tensor with [pitch, yaw] in radians
	- pitch: Up/down angular offset (positive = camera tilted UP, object appears below center)
	- yaw: Left/right angular offset (positive = camera turned RIGHT, object appears left of center)
	"""
	import torch

	# Calculate camera position in world coordinates: C = -R^T @ T
	camera_position = -rotation.T @ translation

	# Expected direction: camera should look directly at origin
	# This is the unit vector from camera position toward origin
	expected_dir_world = -camera_position / torch.norm(camera_position)

	# Transform expected direction into camera's local coordinate frame
	# Camera frame: X=right, Y=up, Z=backward (OpenGL convention)
	# If camera looks perfectly at origin, expected_dir_cam should be (0, 0, -1)
	expected_dir_cam = rotation @ expected_dir_world

	# Compute angular deviations in camera frame
	# Since the actual looking direction is (0, 0, -1) in camera space,
	# the angles represent how much the expected direction deviates from actual

	# Pitch (up/down): rotation around camera's X-axis (right vector)
	# atan2(y, -z) gives the angle in the YZ plane
	# Positive pitch = expected direction is above actual = camera is tilted UP from origin-pointing
	# (object appears below center in the image)
	pitch = torch.atan2(expected_dir_cam[1], -expected_dir_cam[2])

	# Yaw (left/right): rotation around camera's Y-axis (up vector)
	# atan2(x, -z) gives the angle in the XZ plane
	# Positive yaw = expected direction is to the right = camera is turned RIGHT from origin-pointing
	# (object appears left of center in the image)
	yaw = torch.atan2(expected_dir_cam[0], -expected_dir_cam[2])

	return torch.tensor([pitch, yaw], dtype=torch.float32)


	def reconstruct_camera_from_factorized(
	azimuth: float,
	elevation: float,
	radius: float,
	pitch: float = 0.0,
	yaw: float = 0.0,
	return_numpy: bool = True
	):
	"""
	Reconstruct camera rotation and translation from factorized parameters.

	This is the inverse operation of compute_angular_offset. Given spherical coordinates
	and angular offsets, reconstructs the world-to-camera transformation matrices.

	Args:
	azimuth: Azimuth angle in radians (horizontal rotation)
	elevation: Elevation angle in radians (vertical rotation)
	radius: Distance from origin
	pitch: Pitch offset in radians (rotation around camera X-axis, up/down)
	Positive = camera tilted UP from origin-pointing
	yaw: Yaw offset in radians (rotation around camera Y-axis, left/right)
	Positive = camera turned RIGHT from origin-pointing
	return_numpy: If True, return numpy arrays; if False, return torch tensors

	Returns:
	R: (3, 3) world-to-camera rotation matrix
	T: (3,) world-to-camera translation vector

	Example:
	>>> # Camera at azimuth=45°, elevation=30°, radius=7, looking at origin
	>>> R, T = reconstruct_camera_from_factorized(
	... azimuth=np.pi/4, elevation=np.pi/6, radius=7.0, pitch=0.0, yaw=0.0
	... )
	>>> # Verify: camera position should be at (7cos(30°)cos(45°), 7cos(30°)sin(45°), 7*sin(30°))
	"""
	import torch
	import numpy as np

	# 1. Compute camera position in world coordinates (spherical to Cartesian)
	cos_el = np.cos(elevation)
	sin_el = np.sin(elevation)
	cos_az = np.cos(azimuth)
	sin_az = np.sin(azimuth)

	camera_pos = np.array([
	radius * cos_el * cos_az,
	radius * cos_el * sin_az,
	radius * sin_el
	], dtype=np.float32)

	# 2. Construct base rotation matrix (camera looking at origin, pitch=yaw=0)
	target_pos = np.array([0.0, 0.0, 0.0], dtype=np.float32)
	up_vector = np.array([0.0, 0.0, 1.0], dtype=np.float32)
	R_base = CameraTransformUtils.create_lookat_rotation(camera_pos, target_pos, up_vector)

	# 3. Create rotation offset from pitch/yaw in camera frame
	# Pitch: rotation around X-axis (up/down)
	# Yaw: rotation around Y-axis (left/right)
	# When pitch=yaw=0, R_base @ expected_dir_world = (0, 0, -1)
	# When pitch/yaw ≠ 0, we apply R_offset to rotate the viewing direction

	if abs(pitch) < 1e-9 and abs(yaw) < 1e-9:
	# No offset, use base rotation
	R = R_base
	else:
	# Compute target direction in camera frame from pitch/yaw
	# Working backwards from the atan2 extraction formulas:
	# pitch = atan2(y, -z) → tan(pitch) = y / (-z)
	# yaw = atan2(x, -z) → tan(yaw) = x / (-z)
	#
	# To satisfy both simultaneously with unit length:
	tan_pitch = np.tan(pitch)
	tan_yaw = np.tan(yaw)

	# Normalization factor
	norm_factor = np.sqrt(1 + tan_pitch2 + tan_yaw2)

	# Target direction that produces exact pitch/yaw when passed through atan2
	target_dir_cam = np.array([
	tan_yaw / norm_factor, # x component
	tan_pitch / norm_factor, # y component
	-1.0 / norm_factor # z component (negative = backward)
	], dtype=np.float32)

	# Initial direction in camera frame (looking at origin after R_base)
	initial_dir_cam = np.array([0.0, 0.0, -1.0], dtype=np.float32)

	# Compute rotation from initial to target using Rodrigues' formula
	# axis = initial × target
	# angle = acos(initial · target)
	dot_product = np.dot(initial_dir_cam, target_dir_cam)

	if dot_product > 0.999999:
	# Directions are nearly identical, no rotation needed
	R_offset = np.eye(3, dtype=np.float32)
	elif dot_product < -0.999999:
	# Directions are opposite, rotate 180° around any perpendicular axis
	# Use Y-axis for consistency
	R_offset = np.array([
	[-1, 0, 0],
	[0, 1, 0],
	[0, 0, -1]
	], dtype=np.float32)
	else:
	# General case: use Rodrigues' formula
	axis = np.cross(initial_dir_cam, target_dir_cam)
	axis = axis / np.linalg.norm(axis)
	angle = np.arccos(np.clip(dot_product, -1.0, 1.0))

	# Rodrigues' formula: R = I + sin(θ)K + (1-cos(θ))K^2
	# where K is the skew-symmetric matrix of the axis
	K = np.array([
	[0, -axis[2], axis[1]],
	[axis[2], 0, -axis[0]],
	[-axis[1], axis[0], 0]
	], dtype=np.float32)

	R_offset = (np.eye(3, dtype=np.float32) +
	np.sin(angle) * K +
	(1 - np.cos(angle)) * (K @ K))

	# Final rotation: apply offset in camera frame
	R = R_offset @ R_base

	# 4. Compute world-to-camera translation
	T = -R @ camera_pos

	if return_numpy:
	return R, T
	else:
	return torch.from_numpy(R).float(), torch.from_numpy(T).float()