import os import json import numpy as np from utils.math_utils import world_to_view, projection_matrix # Y down, Z forward def load_camera(camera_info): """Load camera parameters from camera info dictionary""" # Extract camera parameters camera_id = camera_info["camera_id"] camera_to_world = np.asarray(camera_info["camera_to_world"], dtype=np.float64) # Change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward) camera_to_world[:3, 1:3] *= -1 # Calculate world to camera transform world_to_camera = np.linalg.inv(camera_to_world).astype(np.float32) # Extract rotation and translation R = world_to_camera[:3, :3] T = world_to_camera[:3, 3] world_to_camera[3, 3] = 1. world_to_camera = world_to_camera.T width = camera_info.get("width") height = camera_info.get("height") fx = camera_info.get("focal") fy = camera_info.get("focal") cx = width / 2 cy = height / 2 # Calculate field of view from focal length fovx = 2 * np.arctan(width / (2 * fx)) fovy = 2 * np.arctan(height / (2 * fy)) # Create view matrix view_matrix = world_to_view(R=R, t=T) # Create projection matrix znear = 0.01 zfar = 100.0 proj_matrix = projection_matrix(fovx=fovx, fovy=fovy, znear=znear, zfar=zfar).T full_proj_matrix = world_to_camera @ proj_matrix # Calculate other parameters tan_fovx = np.tan(fovx * 0.5) tan_fovy = np.tan(fovy * 0.5) camera_center = np.linalg.inv(world_to_camera)[3, :3] # Handle camera type and distortion camera_model = camera_info.get("camera_model", "OPENCV") if camera_model == "OPENCV" or camera_model is None: camera_type = 0 # PERSPECTIVE elif camera_model == "OPENCV_FISHEYE": camera_type = 1 # FISHEYE else: raise ValueError(f"Unsupported camera_model '{camera_model}'") # Get distortion parameters distortion_params = [] for param_name in ["k1", "k2", "p1", "p2", "k3", "k4"]: distortion_params.append(camera_info.get(param_name, 0.0)) camera_params = { 'R': R, 'T': T, 'camera_center': camera_center, 'view_matrix': view_matrix, 'proj_matrix': proj_matrix, 'full_proj_matrix': full_proj_matrix, 'tan_fovx': tan_fovx, 'tan_fovy': tan_fovy, 'fx': fx, 'fy': fy, 'cx': cx, 'cy': cy, 'width': width, 'height': height, 'camera_to_world': camera_to_world, 'world_to_camera': world_to_camera, 'camera_type': camera_type, 'distortion_params': np.array(distortion_params, dtype=np.float32) } return camera_params def load_camera_from_json(input_path, camera_id=0): """Load camera parameters from camera.json file""" camera_file = os.path.join(os.path.dirname(input_path), "cameras.json") if not os.path.exists(camera_file): print(f"Warning: No cameras.json found in {os.path.dirname(input_path)}, using default camera") return None try: with open(camera_file, 'r') as f: cameras = json.load(f) # Find camera with specified ID, or use the first one camera = next((cam for cam in cameras if cam["id"] == camera_id), cameras[0]) # Use load_camera to process the camera parameters return load_camera(camera) except Exception as e: print(f"Error loading camera from cameras.json: {e}") return None def load_camera_colmap(cam_info): """ Load camera from COLMAP format (dust3r output) with exact compatibility to original load_camera. Args: cam_info: Dictionary containing: - width, height: image dimensions - fx, fy: focal lengths - cx, cy: principal point - camera_id: unique identifier - R: rotation matrix (world-to-camera rotation) - T: translation vector (world-to-camera translation) - Optional: camera_model, distortion params """ # Extract camera parameters camera_id = cam_info["camera_id"] # Use provided R and T directly (COLMAP convention - world to camera) R = cam_info['R'] T = cam_info['T'] # This is world-to-camera translation # Build world-to-camera matrix world_to_camera = np.eye(4, dtype=np.float64) world_to_camera[:3, :3] = R world_to_camera[:3, 3] = T # Invert to get camera-to-world camera_to_world = np.linalg.inv(world_to_camera).astype(np.float64) # IMPORTANT FIX: Ensure Z direction is correctly oriented for COLMAP convention # COLMAP uses +Z forward, so no need to flip Z axis # If frustums are still backwards, uncomment this line: # camera_to_world[:3, 2] *= -1 # Flip Z axis if needed # Recalculate world_to_camera after any modifications world_to_camera = np.linalg.inv(camera_to_world).astype(np.float32) # Extract intrinsics width = cam_info.get("width") height = cam_info.get("height") fx = cam_info.get("fx", cam_info.get("focal", width * 0.7)) fy = cam_info.get("fy", cam_info.get("focal", height * 0.7)) cx = cam_info.get("cx", width / 2) cy = cam_info.get("cy", height / 2) # Calculate field of view from focal length fovx = 2 * np.arctan(width / (2 * fx)) fovy = 2 * np.arctan(height / (2 * fy)) # Create view matrix using the original R and T view_matrix = world_to_view(R=R, t=T) # Create projection matrix znear = 0.01 zfar = 100.0 proj_matrix = projection_matrix(fovx=fovx, fovy=fovy, znear=znear, zfar=zfar).T full_proj_matrix = world_to_camera @ proj_matrix # Calculate other parameters tan_fovx = np.tan(fovx * 0.5) tan_fovy = np.tan(fovy * 0.5) # IMPORTANT FIX: Correctly calculate camera center camera_center = camera_to_world[:3, 3] # Extract translation from c2w matrix # Handle camera type and distortion camera_model = cam_info.get("camera_model", "OPENCV") if camera_model == "OPENCV" or camera_model is None: camera_type = 0 # PERSPECTIVE elif camera_model == "OPENCV_FISHEYE": camera_type = 1 # FISHEYE else: camera_type = 0 # Default to PERSPECTIVE # Get distortion parameters distortion_params = [] for param_name in ["k1", "k2", "p1", "p2", "k3", "k4"]: distortion_params.append(cam_info.get(param_name, 0.0)) # Return camera parameters camera_params = { 'R': R, 'T': T, 'camera_center': camera_center, 'view_matrix': view_matrix, 'proj_matrix': proj_matrix, 'full_proj_matrix': full_proj_matrix, 'tan_fovx': tan_fovx, 'tan_fovy': tan_fovy, 'fx': fx, 'fy': fy, 'cx': cx, 'cy': cy, 'width': width, 'height': height, 'camera_to_world': camera_to_world, 'world_to_camera': world_to_camera, 'camera_type': camera_type, 'distortion_params': np.array(distortion_params, dtype=np.float32) } return camera_params