4dgs-dpm / gs /utils /camera_utils.py
dxm21's picture
Upload folder using huggingface_hub
8c48cce verified
import os
import json
import numpy as np
from utils.math_utils import world_to_view, projection_matrix
# Y down, Z forward
def load_camera(camera_info):
"""Load camera parameters from camera info dictionary"""
# Extract camera parameters
camera_id = camera_info["camera_id"]
camera_to_world = np.asarray(camera_info["camera_to_world"], dtype=np.float64)
# Change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward)
camera_to_world[:3, 1:3] *= -1
# Calculate world to camera transform
world_to_camera = np.linalg.inv(camera_to_world).astype(np.float32)
# Extract rotation and translation
R = world_to_camera[:3, :3]
T = world_to_camera[:3, 3]
world_to_camera[3, 3] = 1.
world_to_camera = world_to_camera.T
width = camera_info.get("width")
height = camera_info.get("height")
fx = camera_info.get("focal")
fy = camera_info.get("focal")
cx = width / 2
cy = height / 2
# Calculate field of view from focal length
fovx = 2 * np.arctan(width / (2 * fx))
fovy = 2 * np.arctan(height / (2 * fy))
# Create view matrix
view_matrix = world_to_view(R=R, t=T)
# Create projection matrix
znear = 0.01
zfar = 100.0
proj_matrix = projection_matrix(fovx=fovx, fovy=fovy, znear=znear, zfar=zfar).T
full_proj_matrix = world_to_camera @ proj_matrix
# Calculate other parameters
tan_fovx = np.tan(fovx * 0.5)
tan_fovy = np.tan(fovy * 0.5)
camera_center = np.linalg.inv(world_to_camera)[3, :3]
# Handle camera type and distortion
camera_model = camera_info.get("camera_model", "OPENCV")
if camera_model == "OPENCV" or camera_model is None:
camera_type = 0 # PERSPECTIVE
elif camera_model == "OPENCV_FISHEYE":
camera_type = 1 # FISHEYE
else:
raise ValueError(f"Unsupported camera_model '{camera_model}'")
# Get distortion parameters
distortion_params = []
for param_name in ["k1", "k2", "p1", "p2", "k3", "k4"]:
distortion_params.append(camera_info.get(param_name, 0.0))
camera_params = {
'R': R,
'T': T,
'camera_center': camera_center,
'view_matrix': view_matrix,
'proj_matrix': proj_matrix,
'full_proj_matrix': full_proj_matrix,
'tan_fovx': tan_fovx,
'tan_fovy': tan_fovy,
'fx': fx,
'fy': fy,
'cx': cx,
'cy': cy,
'width': width,
'height': height,
'camera_to_world': camera_to_world,
'world_to_camera': world_to_camera,
'camera_type': camera_type,
'distortion_params': np.array(distortion_params, dtype=np.float32)
}
return camera_params
def load_camera_from_json(input_path, camera_id=0):
"""Load camera parameters from camera.json file"""
camera_file = os.path.join(os.path.dirname(input_path), "cameras.json")
if not os.path.exists(camera_file):
print(f"Warning: No cameras.json found in {os.path.dirname(input_path)}, using default camera")
return None
try:
with open(camera_file, 'r') as f:
cameras = json.load(f)
# Find camera with specified ID, or use the first one
camera = next((cam for cam in cameras if cam["id"] == camera_id), cameras[0])
# Use load_camera to process the camera parameters
return load_camera(camera)
except Exception as e:
print(f"Error loading camera from cameras.json: {e}")
return None
def load_camera_colmap(cam_info):
"""
Load camera from COLMAP format (dust3r output) with exact compatibility to original load_camera.
Args:
cam_info: Dictionary containing:
- width, height: image dimensions
- fx, fy: focal lengths
- cx, cy: principal point
- camera_id: unique identifier
- R: rotation matrix (world-to-camera rotation)
- T: translation vector (world-to-camera translation)
- Optional: camera_model, distortion params
"""
# Extract camera parameters
camera_id = cam_info["camera_id"]
# Use provided R and T directly (COLMAP convention - world to camera)
R = cam_info['R']
T = cam_info['T'] # This is world-to-camera translation
# Build world-to-camera matrix
world_to_camera = np.eye(4, dtype=np.float64)
world_to_camera[:3, :3] = R
world_to_camera[:3, 3] = T
# Invert to get camera-to-world
camera_to_world = np.linalg.inv(world_to_camera).astype(np.float64)
# IMPORTANT FIX: Ensure Z direction is correctly oriented for COLMAP convention
# COLMAP uses +Z forward, so no need to flip Z axis
# If frustums are still backwards, uncomment this line:
# camera_to_world[:3, 2] *= -1 # Flip Z axis if needed
# Recalculate world_to_camera after any modifications
world_to_camera = np.linalg.inv(camera_to_world).astype(np.float32)
# Extract intrinsics
width = cam_info.get("width")
height = cam_info.get("height")
fx = cam_info.get("fx", cam_info.get("focal", width * 0.7))
fy = cam_info.get("fy", cam_info.get("focal", height * 0.7))
cx = cam_info.get("cx", width / 2)
cy = cam_info.get("cy", height / 2)
# Calculate field of view from focal length
fovx = 2 * np.arctan(width / (2 * fx))
fovy = 2 * np.arctan(height / (2 * fy))
# Create view matrix using the original R and T
view_matrix = world_to_view(R=R, t=T)
# Create projection matrix
znear = 0.01
zfar = 100.0
proj_matrix = projection_matrix(fovx=fovx, fovy=fovy, znear=znear, zfar=zfar).T
full_proj_matrix = world_to_camera @ proj_matrix
# Calculate other parameters
tan_fovx = np.tan(fovx * 0.5)
tan_fovy = np.tan(fovy * 0.5)
# IMPORTANT FIX: Correctly calculate camera center
camera_center = camera_to_world[:3, 3] # Extract translation from c2w matrix
# Handle camera type and distortion
camera_model = cam_info.get("camera_model", "OPENCV")
if camera_model == "OPENCV" or camera_model is None:
camera_type = 0 # PERSPECTIVE
elif camera_model == "OPENCV_FISHEYE":
camera_type = 1 # FISHEYE
else:
camera_type = 0 # Default to PERSPECTIVE
# Get distortion parameters
distortion_params = []
for param_name in ["k1", "k2", "p1", "p2", "k3", "k4"]:
distortion_params.append(cam_info.get(param_name, 0.0))
# Return camera parameters
camera_params = {
'R': R,
'T': T,
'camera_center': camera_center,
'view_matrix': view_matrix,
'proj_matrix': proj_matrix,
'full_proj_matrix': full_proj_matrix,
'tan_fovx': tan_fovx,
'tan_fovy': tan_fovy,
'fx': fx,
'fy': fy,
'cx': cx,
'cy': cy,
'width': width,
'height': height,
'camera_to_world': camera_to_world,
'world_to_camera': world_to_camera,
'camera_type': camera_type,
'distortion_params': np.array(distortion_params, dtype=np.float32)
}
return camera_params