|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
|
|
|
import numpy as np |
|
|
import pycolmap |
|
|
from PIL import Image |
|
|
|
|
|
from depth_anything_3.specs import Prediction |
|
|
from depth_anything_3.utils.logger import logger |
|
|
|
|
|
from .glb import _depths_to_world_points_with_colors |
|
|
|
|
|
|
|
|
def export_to_colmap( |
|
|
prediction: Prediction, |
|
|
export_dir: str, |
|
|
image_paths: list[str], |
|
|
conf_thresh_percentile: float = 40.0, |
|
|
process_res_method: str = "upper_bound_resize", |
|
|
) -> None: |
|
|
|
|
|
conf_thresh = np.percentile(prediction.conf, conf_thresh_percentile) |
|
|
points, colors = _depths_to_world_points_with_colors( |
|
|
prediction.depth, |
|
|
prediction.intrinsics, |
|
|
prediction.extrinsics, |
|
|
prediction.processed_images, |
|
|
prediction.conf, |
|
|
conf_thresh, |
|
|
) |
|
|
num_points = len(points) |
|
|
logger.info(f"Exporting to COLMAP with {num_points} points") |
|
|
num_frames = len(prediction.processed_images) |
|
|
h, w = prediction.processed_images.shape[1:3] |
|
|
points_xyf = _create_xyf(num_frames, h, w) |
|
|
points_xyf = points_xyf[prediction.conf >= conf_thresh] |
|
|
|
|
|
|
|
|
reconstruction = pycolmap.Reconstruction() |
|
|
|
|
|
point3d_ids = [] |
|
|
for vidx in range(num_points): |
|
|
point3d_id = reconstruction.add_point3D(points[vidx], pycolmap.Track(), colors[vidx]) |
|
|
point3d_ids.append(point3d_id) |
|
|
|
|
|
for fidx in range(num_frames): |
|
|
orig_w, orig_h = Image.open(image_paths[fidx]).size |
|
|
|
|
|
intrinsic = prediction.intrinsics[fidx] |
|
|
if process_res_method.endswith("resize"): |
|
|
intrinsic[:1] *= orig_w / w |
|
|
intrinsic[1:2] *= orig_h / h |
|
|
elif process_res_method == "crop": |
|
|
raise NotImplementedError("COLMAP export for crop method is not implemented") |
|
|
else: |
|
|
raise ValueError(f"Unknown process_res_method: {process_res_method}") |
|
|
|
|
|
pycolmap_intri = np.array( |
|
|
[intrinsic[0, 0], intrinsic[1, 1], intrinsic[0, 2], intrinsic[1, 2]] |
|
|
) |
|
|
|
|
|
extrinsic = prediction.extrinsics[fidx] |
|
|
cam_from_world = pycolmap.Rigid3d(pycolmap.Rotation3d(extrinsic[:3, :3]), extrinsic[:3, 3]) |
|
|
|
|
|
|
|
|
camera = pycolmap.Camera() |
|
|
camera.camera_id = fidx + 1 |
|
|
camera.model = pycolmap.CameraModelId.PINHOLE |
|
|
camera.width = orig_w |
|
|
camera.height = orig_h |
|
|
camera.params = pycolmap_intri |
|
|
reconstruction.add_camera(camera) |
|
|
|
|
|
|
|
|
rig = pycolmap.Rig() |
|
|
rig.rig_id = camera.camera_id |
|
|
rig.add_ref_sensor(camera.sensor_id) |
|
|
reconstruction.add_rig(rig) |
|
|
|
|
|
|
|
|
image = pycolmap.Image() |
|
|
image.image_id = fidx + 1 |
|
|
image.camera_id = camera.camera_id |
|
|
|
|
|
|
|
|
frame = pycolmap.Frame() |
|
|
frame.frame_id = image.image_id |
|
|
frame.rig_id = camera.camera_id |
|
|
frame.add_data_id(image.data_id) |
|
|
frame.rig_from_world = cam_from_world |
|
|
reconstruction.add_frame(frame) |
|
|
|
|
|
|
|
|
point2d_list = [] |
|
|
points_in_frame = points_xyf[:, 2].astype(np.int32) == fidx |
|
|
for vidx in np.where(points_in_frame)[0]: |
|
|
point2d = points_xyf[vidx][:2] |
|
|
point2d[0] *= orig_w / w |
|
|
point2d[1] *= orig_h / h |
|
|
point3d_id = point3d_ids[vidx] |
|
|
point2d_list.append(pycolmap.Point2D(point2d, point3d_id)) |
|
|
reconstruction.point3D(point3d_id).track.add_element( |
|
|
image.image_id, len(point2d_list) - 1 |
|
|
) |
|
|
|
|
|
|
|
|
image.frame_id = image.image_id |
|
|
image.name = os.path.basename(image_paths[fidx]) |
|
|
image.points2D = pycolmap.Point2DList(point2d_list) |
|
|
reconstruction.add_image(image) |
|
|
|
|
|
|
|
|
reconstruction.write(export_dir) |
|
|
|
|
|
|
|
|
def _create_xyf(num_frames, height, width): |
|
|
""" |
|
|
Creates a grid of pixel coordinates and frame indices (fidx) for all frames. |
|
|
""" |
|
|
|
|
|
y_grid, x_grid = np.indices((height, width), dtype=np.int32) |
|
|
x_grid = x_grid[np.newaxis, :, :] |
|
|
y_grid = y_grid[np.newaxis, :, :] |
|
|
|
|
|
|
|
|
x_coords = np.broadcast_to(x_grid, (num_frames, height, width)) |
|
|
y_coords = np.broadcast_to(y_grid, (num_frames, height, width)) |
|
|
|
|
|
|
|
|
f_idx = np.arange(num_frames, dtype=np.int32)[:, np.newaxis, np.newaxis] |
|
|
f_coords = np.broadcast_to(f_idx, (num_frames, height, width)) |
|
|
|
|
|
|
|
|
points_xyf = np.stack((x_coords, y_coords, f_coords), axis=-1) |
|
|
|
|
|
return points_xyf |
|
|
|