Spaces:
Runtime error
Runtime error
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| import numpy as np | |
| import pycolmap | |
| # TODO: frame_idx should start from 1 instead of 0 in colmap | |
| def batch_matrix_to_pycolmap( | |
| points3d, | |
| extrinsics, | |
| intrinsics, | |
| tracks, | |
| image_size, | |
| masks=None, | |
| max_reproj_error=None, | |
| max_points3D_val=3000, | |
| shared_camera=False, | |
| camera_type="SIMPLE_PINHOLE", | |
| extra_params=None, | |
| ): | |
| """ | |
| Convert Batched Pytorch Tensors to PyCOLMAP | |
| Check https://github.com/colmap/pycolmap for more details about its format | |
| """ | |
| # points3d: Px3 | |
| # extrinsics: Nx3x4 | |
| # intrinsics: Nx3x3 | |
| # tracks: NxPx2 | |
| # masks: NxP | |
| # image_size: 2, assume all the frames have been padded to the same size | |
| # where N is the number of frames and P is the number of tracks | |
| N, P, _ = tracks.shape | |
| assert len(extrinsics) == N | |
| assert len(intrinsics) == N | |
| assert len(points3d) == P | |
| assert image_size.shape[0] == 2 | |
| projected_points_2d, projected_points_cam = project_3D_points(points3d, extrinsics, intrinsics, return_points_cam=True) | |
| projected_diff = (projected_points_2d - tracks).norm(dim=-1) | |
| projected_points_2d[projected_points_cam[:, -1] <= 0] = 1e6 | |
| reproj_mask = projected_diff < max_reproj_error | |
| if masks is not None: | |
| masks = torch.logical_and(masks, reproj_mask) | |
| else: | |
| masks = reproj_mask | |
| extrinsics = extrinsics.cpu().numpy() | |
| intrinsics = intrinsics.cpu().numpy() | |
| if extra_params is not None: | |
| extra_params = extra_params.cpu().numpy() | |
| tracks = tracks.cpu().numpy() | |
| points3d = points3d.cpu().numpy() | |
| image_size = image_size.cpu().numpy() | |
| # Reconstruction object, following the format of PyCOLMAP/COLMAP | |
| reconstruction = pycolmap.Reconstruction() | |
| masks = masks.cpu().numpy() | |
| inlier_num = masks.sum(0) | |
| valid_mask = inlier_num >= 2 # a track is invalid if without two inliers | |
| valid_idx = np.nonzero(valid_mask)[0] | |
| # Only add 3D points that have sufficient 2D points | |
| for vidx in valid_idx: | |
| reconstruction.add_point3D( | |
| points3d[vidx], pycolmap.Track(), np.zeros(3) | |
| ) | |
| num_points3D = len(valid_idx) | |
| camera = None | |
| # frame idx | |
| for fidx in range(N): | |
| # set camera | |
| if camera is None or (not shared_camera): | |
| if camera_type == "SIMPLE_RADIAL": | |
| focal = (intrinsics[fidx][0, 0] + intrinsics[fidx][1, 1]) / 2 | |
| pycolmap_intri = np.array( | |
| [ | |
| focal, | |
| intrinsics[fidx][0, 2], | |
| intrinsics[fidx][1, 2], | |
| extra_params[fidx][0], | |
| ] | |
| ) | |
| elif camera_type == "SIMPLE_PINHOLE": | |
| focal = (intrinsics[fidx][0, 0] + intrinsics[fidx][1, 1]) / 2 | |
| pycolmap_intri = np.array( | |
| [ | |
| focal, | |
| intrinsics[fidx][0, 2], | |
| intrinsics[fidx][1, 2], | |
| ] | |
| ) | |
| else: | |
| raise ValueError( | |
| f"Camera type {camera_type} is not supported yet" | |
| ) | |
| camera = pycolmap.Camera( | |
| model=camera_type, | |
| width=image_size[0], | |
| height=image_size[1], | |
| params=pycolmap_intri, | |
| camera_id=fidx, | |
| ) | |
| # add camera | |
| reconstruction.add_camera(camera) | |
| # set image | |
| cam_from_world = pycolmap.Rigid3d( | |
| pycolmap.Rotation3d(extrinsics[fidx][:3, :3]), | |
| extrinsics[fidx][:3, 3], | |
| ) # Rot and Trans | |
| image = pycolmap.Image( | |
| id=fidx, | |
| name=f"image_{fidx}", | |
| camera_id=camera.camera_id, | |
| cam_from_world=cam_from_world, | |
| ) | |
| points2D_list = [] | |
| point2D_idx = 0 | |
| # NOTE point3D_id start by 1 | |
| for point3D_id in range(1, num_points3D + 1): | |
| original_track_idx = valid_idx[point3D_id - 1] | |
| if ( | |
| reconstruction.points3D[point3D_id].xyz < max_points3D_val | |
| ).all(): | |
| if masks[fidx][original_track_idx]: | |
| # It seems we don't need +0.5 for BA | |
| point2D_xy = tracks[fidx][original_track_idx] | |
| # Please note when adding the Point2D object | |
| # It not only requires the 2D xy location, but also the id to 3D point | |
| points2D_list.append( | |
| pycolmap.Point2D(point2D_xy, point3D_id) | |
| ) | |
| # add element | |
| track = reconstruction.points3D[point3D_id].track | |
| track.add_element(fidx, point2D_idx) | |
| point2D_idx += 1 | |
| assert point2D_idx == len(points2D_list) | |
| try: | |
| image.points2D = pycolmap.ListPoint2D(points2D_list) | |
| image.registered = True | |
| except: | |
| print(f"frame {fidx} is out of BA") | |
| image.registered = False | |
| # add image | |
| reconstruction.add_image(image) | |
| return reconstruction | |
| def pycolmap_to_batch_matrix( | |
| reconstruction, device="cuda", camera_type="SIMPLE_PINHOLE" | |
| ): | |
| """ | |
| Convert a PyCOLMAP Reconstruction Object to batched PyTorch tensors. | |
| Args: | |
| reconstruction (pycolmap.Reconstruction): The reconstruction object from PyCOLMAP. | |
| device (str): The device to place the tensors on (default: "cuda"). | |
| camera_type (str): The type of camera model used (default: "SIMPLE_PINHOLE"). | |
| Returns: | |
| tuple: A tuple containing points3D, extrinsics, intrinsics, and optionally extra_params. | |
| """ | |
| num_images = len(reconstruction.images) | |
| max_points3D_id = max(reconstruction.point3D_ids()) | |
| points3D = np.zeros((max_points3D_id, 3)) | |
| for point3D_id in reconstruction.points3D: | |
| points3D[point3D_id - 1] = reconstruction.points3D[point3D_id].xyz | |
| points3D = torch.from_numpy(points3D).to(device) | |
| extrinsics = [] | |
| intrinsics = [] | |
| extra_params = [] if camera_type == "SIMPLE_RADIAL" else None | |
| for i in range(num_images): | |
| # Extract and append extrinsics | |
| pyimg = reconstruction.images[i] | |
| pycam = reconstruction.cameras[pyimg.camera_id] | |
| matrix = pyimg.cam_from_world.matrix() | |
| extrinsics.append(matrix) | |
| # Extract and append intrinsics | |
| calibration_matrix = pycam.calibration_matrix() | |
| intrinsics.append(calibration_matrix) | |
| if camera_type == "SIMPLE_RADIAL": | |
| extra_params.append(pycam.params[-1]) | |
| # Convert lists to torch tensors | |
| extrinsics = torch.from_numpy(np.stack(extrinsics)).to(device) | |
| intrinsics = torch.from_numpy(np.stack(intrinsics)).to(device) | |
| if camera_type == "SIMPLE_RADIAL": | |
| extra_params = torch.from_numpy(np.stack(extra_params)).to(device) | |
| extra_params = extra_params[:, None] | |
| return points3D, extrinsics, intrinsics, extra_params | |
| def project_3D_points( | |
| points3D, | |
| extrinsics, | |
| intrinsics=None, | |
| extra_params=None, | |
| return_points_cam=False, | |
| default=0, | |
| only_points_cam=False, | |
| ): | |
| """ | |
| Transforms 3D points to 2D using extrinsic and intrinsic parameters. | |
| Args: | |
| points3D (torch.Tensor): 3D points of shape Px3. | |
| extrinsics (torch.Tensor): Extrinsic parameters of shape Bx3x4. | |
| intrinsics (torch.Tensor): Intrinsic parameters of shape Bx3x3. | |
| extra_params (torch.Tensor): Extra parameters of shape BxN, which is used for radial distortion. | |
| Returns: | |
| torch.Tensor: Transformed 2D points of shape BxNx2. | |
| """ | |
| with torch.cuda.amp.autocast(dtype=torch.double): | |
| N = points3D.shape[0] # Number of points | |
| B = extrinsics.shape[0] # Batch size, i.e., number of cameras | |
| points3D_homogeneous = torch.cat( | |
| [points3D, torch.ones_like(points3D[..., 0:1])], dim=1 | |
| ) # Nx4 | |
| # Reshape for batch processing | |
| points3D_homogeneous = points3D_homogeneous.unsqueeze(0).expand( | |
| B, -1, -1 | |
| ) # BxNx4 | |
| # Step 1: Apply extrinsic parameters | |
| # Transform 3D points to camera coordinate system for all cameras | |
| points_cam = torch.bmm( | |
| extrinsics, points3D_homogeneous.transpose(-1, -2) | |
| ) | |
| if only_points_cam: | |
| return points_cam | |
| # Step 2: Apply intrinsic parameters and (optional) distortion | |
| points2D = img_from_cam(intrinsics, points_cam, extra_params) | |
| if return_points_cam: | |
| return points2D, points_cam | |
| return points2D | |
| def img_from_cam(intrinsics, points_cam, extra_params=None, default=0.0): | |
| """ | |
| Applies intrinsic parameters and optional distortion to the given 3D points. | |
| Args: | |
| intrinsics (torch.Tensor): Intrinsic camera parameters of shape Bx3x3. | |
| points_cam (torch.Tensor): 3D points in camera coordinates of shape Bx3xN. | |
| extra_params (torch.Tensor, optional): Distortion parameters of shape BxN, where N can be 1, 2, or 4. | |
| default (float, optional): Default value to replace NaNs in the output. | |
| Returns: | |
| points2D (torch.Tensor): 2D points in pixel coordinates of shape BxNx2. | |
| """ | |
| # Normalize by the third coordinate (homogeneous division) | |
| points_cam = points_cam / points_cam[:, 2:3, :] | |
| # Extract uv | |
| uv = points_cam[:, :2, :] | |
| # Apply distortion if extra_params are provided | |
| if extra_params is not None: | |
| uu, vv = apply_distortion(extra_params, uv[:, 0], uv[:, 1]) | |
| uv = torch.stack([uu, vv], dim=1) | |
| # Prepare points_cam for batch matrix multiplication | |
| points_cam_homo = torch.cat( | |
| (uv, torch.ones_like(uv[:, :1, :])), dim=1 | |
| ) # Bx3xN | |
| # Apply intrinsic parameters using batch matrix multiplication | |
| points2D_homo = torch.bmm(intrinsics, points_cam_homo) # Bx3xN | |
| # Extract x and y coordinates | |
| points2D = points2D_homo[:, :2, :] # Bx2xN | |
| # Replace NaNs with default value | |
| points2D = torch.nan_to_num(points2D, nan=default) | |
| return points2D.transpose(1, 2) # BxNx2 | |