File size: 5,263 Bytes
18b382b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import pycolmap
from PIL import Image
from depth_anything_3.specs import Prediction
from depth_anything_3.utils.logger import logger
from .glb import _depths_to_world_points_with_colors
def export_to_colmap(
prediction: Prediction,
export_dir: str,
image_paths: list[str],
conf_thresh_percentile: float = 40.0,
process_res_method: str = "upper_bound_resize",
) -> None:
# 1. Data preparation
conf_thresh = np.percentile(prediction.conf, conf_thresh_percentile)
points, colors = _depths_to_world_points_with_colors(
prediction.depth,
prediction.intrinsics,
prediction.extrinsics, # w2c
prediction.processed_images,
prediction.conf,
conf_thresh,
)
num_points = len(points)
logger.info(f"Exporting to COLMAP with {num_points} points")
num_frames = len(prediction.processed_images)
h, w = prediction.processed_images.shape[1:3]
points_xyf = _create_xyf(num_frames, h, w)
points_xyf = points_xyf[prediction.conf >= conf_thresh]
# 2. Set Reconstruction
reconstruction = pycolmap.Reconstruction()
point3d_ids = []
for vidx in range(num_points):
point3d_id = reconstruction.add_point3D(points[vidx], pycolmap.Track(), colors[vidx])
point3d_ids.append(point3d_id)
for fidx in range(num_frames):
orig_w, orig_h = Image.open(image_paths[fidx]).size
intrinsic = prediction.intrinsics[fidx]
if process_res_method.endswith("resize"):
intrinsic[:1] *= orig_w / w
intrinsic[1:2] *= orig_h / h
elif process_res_method == "crop":
raise NotImplementedError("COLMAP export for crop method is not implemented")
else:
raise ValueError(f"Unknown process_res_method: {process_res_method}")
pycolmap_intri = np.array(
[intrinsic[0, 0], intrinsic[1, 1], intrinsic[0, 2], intrinsic[1, 2]]
)
extrinsic = prediction.extrinsics[fidx]
cam_from_world = pycolmap.Rigid3d(pycolmap.Rotation3d(extrinsic[:3, :3]), extrinsic[:3, 3])
# set and add camera
camera = pycolmap.Camera()
camera.camera_id = fidx + 1
camera.model = pycolmap.CameraModelId.PINHOLE
camera.width = orig_w
camera.height = orig_h
camera.params = pycolmap_intri
reconstruction.add_camera(camera)
# set and add rig (from camera)
rig = pycolmap.Rig()
rig.rig_id = camera.camera_id
rig.add_ref_sensor(camera.sensor_id)
reconstruction.add_rig(rig)
# set image
image = pycolmap.Image()
image.image_id = fidx + 1
image.camera_id = camera.camera_id
# set and add frame (from image)
frame = pycolmap.Frame()
frame.frame_id = image.image_id
frame.rig_id = camera.camera_id
frame.add_data_id(image.data_id)
frame.rig_from_world = cam_from_world
reconstruction.add_frame(frame)
# set point2d and update track
point2d_list = []
points_in_frame = points_xyf[:, 2].astype(np.int32) == fidx
for vidx in np.where(points_in_frame)[0]:
point2d = points_xyf[vidx][:2]
point2d[0] *= orig_w / w
point2d[1] *= orig_h / h
point3d_id = point3d_ids[vidx]
point2d_list.append(pycolmap.Point2D(point2d, point3d_id))
reconstruction.point3D(point3d_id).track.add_element(
image.image_id, len(point2d_list) - 1
)
# set and add image
image.frame_id = image.image_id
image.name = os.path.basename(image_paths[fidx])
image.points2D = pycolmap.Point2DList(point2d_list)
reconstruction.add_image(image)
# 3. Export
reconstruction.write(export_dir)
def _create_xyf(num_frames, height, width):
"""
Creates a grid of pixel coordinates and frame indices (fidx) for all frames.
"""
# Create coordinate grids for a single frame
y_grid, x_grid = np.indices((height, width), dtype=np.int32)
x_grid = x_grid[np.newaxis, :, :]
y_grid = y_grid[np.newaxis, :, :]
# Broadcast to all frames
x_coords = np.broadcast_to(x_grid, (num_frames, height, width))
y_coords = np.broadcast_to(y_grid, (num_frames, height, width))
# Create frame indices and broadcast
f_idx = np.arange(num_frames, dtype=np.int32)[:, np.newaxis, np.newaxis]
f_coords = np.broadcast_to(f_idx, (num_frames, height, width))
# Stack coordinates and frame indices
points_xyf = np.stack((x_coords, y_coords, f_coords), axis=-1)
return points_xyf
|