File size: 24,624 Bytes
66003a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 |
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import os
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
import cv2
import math
import numpy as np
from PIL import Image
import PIL
try:
lanczos = PIL.Image.Resampling.LANCZOS
bicubic = PIL.Image.Resampling.BICUBIC
except AttributeError:
lanczos = PIL.Image.LANCZOS
bicubic = PIL.Image.BICUBIC
from vggt.utils.geometry import closed_form_inverse_se3
#####################################################################################################################
def crop_image_depth_and_intrinsic_by_pp(
image, depth_map, intrinsic, target_shape, track=None, filepath=None, strict=False
):
"""
TODO: some names of width and height seem not consistent. Need to check.
Crops the given image and depth map around the camera's principal point, as defined by `intrinsic`.
Specifically:
- Ensures that the crop is centered on (cx, cy).
- Optionally pads the image (and depth map) if `strict=True` and the result is smaller than `target_shape`.
- Shifts the camera intrinsic matrix (and `track` if provided) accordingly.
Args:
image (np.ndarray):
Input image array of shape (H, W, 3).
depth_map (np.ndarray or None):
Depth map array of shape (H, W), or None if not available.
intrinsic (np.ndarray):
Camera intrinsic matrix (3x3). The principal point is assumed to be at (intrinsic[1,2], intrinsic[0,2]).
target_shape (tuple[int, int]):
Desired output shape.
track (np.ndarray or None):
Optional array of shape (N, 2). Interpreted as (x, y) pixel coordinates. Will be shifted after cropping.
filepath (str or None):
An optional file path for debug logging (only used if strict mode triggers warnings).
strict (bool):
If True, will zero-pad to ensure the exact target_shape even if the cropped region is smaller.
Raises:
AssertionError:
If the input image is smaller than `target_shape`.
ValueError:
If the cropped image is larger than `target_shape` (in strict mode), which should not normally happen.
Returns:
tuple:
(cropped_image, cropped_depth_map, updated_intrinsic, updated_track)
- cropped_image (np.ndarray): Cropped (and optionally padded) image.
- cropped_depth_map (np.ndarray or None): Cropped (and optionally padded) depth map.
- updated_intrinsic (np.ndarray): Intrinsic matrix adjusted for the crop.
- updated_track (np.ndarray or None): Track array adjusted for the crop, or None if track was not provided.
"""
original_size = np.array(image.shape)
intrinsic = np.copy(intrinsic)
if original_size[0] < target_shape[0]:
error_message = (
f"Width check failed: original width {original_size[0]} "
f"is less than target width {target_shape[0]}."
)
print(error_message)
raise AssertionError(error_message)
if original_size[1] < target_shape[1]:
error_message = (
f"Height check failed: original height {original_size[1]} "
f"is less than target height {target_shape[1]}."
)
print(error_message)
raise AssertionError(error_message)
# Identify principal point (cx, cy) from intrinsic
cx = (intrinsic[1, 2])
cy = (intrinsic[0, 2])
# Compute how far we can crop in each direction
if strict:
half_x = min((target_shape[0] / 2), cx)
half_y = min((target_shape[1] / 2), cy)
else:
half_x = min((target_shape[0] / 2), cx, original_size[0] - cx)
half_y = min((target_shape[1] / 2), cy, original_size[1] - cy)
# Compute starting indices
start_x = math.floor(cx) - math.floor(half_x)
start_y = math.floor(cy) - math.floor(half_y)
assert start_x >= 0
assert start_y >= 0
# Compute ending indices
if strict:
end_x = start_x + target_shape[0]
end_y = start_y + target_shape[1]
else:
end_x = start_x + 2 * math.floor(half_x)
end_y = start_y + 2 * math.floor(half_y)
# Perform the crop
image = image[start_x:end_x, start_y:end_y, :]
if depth_map is not None:
depth_map = depth_map[start_x:end_x, start_y:end_y]
# Shift the principal point in the intrinsic
intrinsic[1, 2] = intrinsic[1, 2] - start_x
intrinsic[0, 2] = intrinsic[0, 2] - start_y
# Adjust track if provided
if track is not None:
track[:, 1] = track[:, 1] - start_x
track[:, 0] = track[:, 0] - start_y
# If strict, zero-pad if the new shape is smaller than target_shape
if strict:
if (image.shape[:2] != target_shape).any():
print(f"{filepath} does not meet the target shape")
current_h, current_w = image.shape[:2]
target_h, target_w = target_shape[0], target_shape[1]
pad_h = target_h - current_h
pad_w = target_w - current_w
if pad_h < 0 or pad_w < 0:
raise ValueError(
f"The cropped image is bigger than the target shape: "
f"cropped=({current_h},{current_w}), "
f"target=({target_h},{target_w})."
)
image = np.pad(
image,
pad_width=((0, pad_h), (0, pad_w), (0, 0)),
mode="constant",
constant_values=0,
)
if depth_map is not None:
depth_map = np.pad(
depth_map,
pad_width=((0, pad_h), (0, pad_w)),
mode="constant",
constant_values=0,
)
return image, depth_map, intrinsic, track
def resize_image_depth_and_intrinsic(
image,
depth_map,
intrinsic,
target_shape,
original_size,
track=None,
pixel_center=True,
safe_bound=4,
rescale_aug=True,
):
"""
Resizes the given image and depth map (if provided) to slightly larger than `target_shape`,
updating the intrinsic matrix (and track array if present). Optionally uses random rescaling
to create some additional margin (based on `rescale_aug`).
Steps:
1. Compute a scaling factor so that the resized result is at least `target_shape + safe_bound`.
2. Apply an optional triangular random factor if `rescale_aug=True`.
3. Resize the image with LANCZOS if downscaling, BICUBIC if upscaling.
4. Resize the depth map with nearest-neighbor.
5. Update the camera intrinsic and track coordinates (if any).
Args:
image (np.ndarray):
Input image array (H, W, 3).
depth_map (np.ndarray or None):
Depth map array (H, W), or None if unavailable.
intrinsic (np.ndarray):
Camera intrinsic matrix (3x3).
target_shape (np.ndarray or tuple[int, int]):
Desired final shape (height, width).
original_size (np.ndarray or tuple[int, int]):
Original size of the image in (height, width).
track (np.ndarray or None):
Optional (N, 2) array of pixel coordinates. Will be scaled.
pixel_center (bool):
If True, accounts for 0.5 pixel center shift during resizing.
safe_bound (int or float):
Additional margin (in pixels) to add to target_shape before resizing.
rescale_aug (bool):
If True, randomly increase the `safe_bound` within a certain range to simulate augmentation.
Returns:
tuple:
(resized_image, resized_depth_map, updated_intrinsic, updated_track)
- resized_image (np.ndarray): The resized image.
- resized_depth_map (np.ndarray or None): The resized depth map.
- updated_intrinsic (np.ndarray): Camera intrinsic updated for new resolution.
- updated_track (np.ndarray or None): Track array updated or None if not provided.
Raises:
AssertionError:
If the shapes of the resized image and depth map do not match.
"""
if rescale_aug:
random_boundary = np.random.triangular(0, 0, 0.3)
safe_bound = safe_bound + random_boundary * target_shape.max()
resize_scales = (target_shape + safe_bound) / original_size
max_resize_scale = np.max(resize_scales)
intrinsic = np.copy(intrinsic)
# Convert image to PIL for resizing
image = Image.fromarray(image)
input_resolution = np.array(image.size)
output_resolution = np.floor(input_resolution * max_resize_scale).astype(int)
image = image.resize(tuple(output_resolution), resample=lanczos if max_resize_scale < 1 else bicubic)
image = np.array(image)
if depth_map is not None:
depth_map = cv2.resize(
depth_map,
output_resolution,
fx=max_resize_scale,
fy=max_resize_scale,
interpolation=cv2.INTER_NEAREST,
)
actual_size = np.array(image.shape[:2])
actual_resize_scale = np.max(actual_size / original_size)
if pixel_center:
intrinsic[0, 2] = intrinsic[0, 2] + 0.5
intrinsic[1, 2] = intrinsic[1, 2] + 0.5
intrinsic[:2, :] = intrinsic[:2, :] * actual_resize_scale
if track is not None:
track = track * actual_resize_scale
if pixel_center:
intrinsic[0, 2] = intrinsic[0, 2] - 0.5
intrinsic[1, 2] = intrinsic[1, 2] - 0.5
assert image.shape[:2] == depth_map.shape[:2]
return image, depth_map, intrinsic, track
def threshold_depth_map(
depth_map: np.ndarray,
max_percentile: float = 99,
min_percentile: float = 1,
max_depth: float = -1,
) -> np.ndarray:
"""
Thresholds a depth map using percentile-based limits and optional maximum depth clamping.
Steps:
1. If `max_depth > 0`, clamp all values above `max_depth` to zero.
2. Compute `max_percentile` and `min_percentile` thresholds using nanpercentile.
3. Zero out values above/below these thresholds, if thresholds are > 0.
Args:
depth_map (np.ndarray):
Input depth map (H, W).
max_percentile (float):
Upper percentile (0-100). Values above this will be set to zero.
min_percentile (float):
Lower percentile (0-100). Values below this will be set to zero.
max_depth (float):
Absolute maximum depth. If > 0, any depth above this is set to zero.
If <= 0, no maximum-depth clamp is applied.
Returns:
np.ndarray:
Depth map (H, W) after thresholding. Some or all values may be zero.
Returns None if depth_map is None.
"""
if depth_map is None:
return None
depth_map = depth_map.astype(float, copy=True)
# Optional clamp by max_depth
if max_depth > 0:
depth_map[depth_map > max_depth] = 0.0
# Percentile-based thresholds
depth_max_thres = (
np.nanpercentile(depth_map, max_percentile) if max_percentile > 0 else None
)
depth_min_thres = (
np.nanpercentile(depth_map, min_percentile) if min_percentile > 0 else None
)
# Apply the thresholds if they are > 0
if depth_max_thres is not None and depth_max_thres > 0:
depth_map[depth_map > depth_max_thres] = 0.0
if depth_min_thres is not None and depth_min_thres > 0:
depth_map[depth_map < depth_min_thres] = 0.0
return depth_map
def depth_to_world_coords_points(
depth_map: np.ndarray,
extrinsic: np.ndarray,
intrinsic: np.ndarray,
eps=1e-8,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Converts a depth map to world coordinates (HxWx3) given the camera extrinsic and intrinsic.
Returns both the world coordinates and the intermediate camera coordinates,
as well as a mask for valid depth.
Args:
depth_map (np.ndarray):
Depth map of shape (H, W).
extrinsic (np.ndarray):
Extrinsic matrix of shape (3, 4), representing the camera pose in OpenCV convention (camera-from-world).
intrinsic (np.ndarray):
Intrinsic matrix of shape (3, 3).
eps (float):
Small epsilon for thresholding valid depth.
Returns:
tuple[np.ndarray, np.ndarray, np.ndarray]:
(world_coords_points, cam_coords_points, point_mask)
- world_coords_points: (H, W, 3) array of 3D points in world frame.
- cam_coords_points: (H, W, 3) array of 3D points in camera frame.
- point_mask: (H, W) boolean array where True indicates valid (non-zero) depth.
"""
if depth_map is None:
return None, None, None
# Valid depth mask
point_mask = depth_map > eps
# Convert depth map to camera coordinates
cam_coords_points = depth_to_cam_coords_points(depth_map, intrinsic)
# The extrinsic is camera-from-world, so invert it to transform camera->world
cam_to_world_extrinsic = closed_form_inverse_se3(extrinsic[None])[0]
R_cam_to_world = cam_to_world_extrinsic[:3, :3]
t_cam_to_world = cam_to_world_extrinsic[:3, 3]
# Apply the rotation and translation to the camera coordinates
world_coords_points = (
np.dot(cam_coords_points, R_cam_to_world.T) + t_cam_to_world
) # HxWx3, 3x3 -> HxWx3
# world_coords_points = np.einsum("ij,hwj->hwi", R_cam_to_world, cam_coords_points) + t_cam_to_world
return world_coords_points, cam_coords_points, point_mask
def depth_to_cam_coords_points(
depth_map: np.ndarray, intrinsic: np.ndarray
) -> np.ndarray:
"""
Unprojects a depth map into camera coordinates, returning (H, W, 3).
Args:
depth_map (np.ndarray):
Depth map of shape (H, W).
intrinsic (np.ndarray):
3x3 camera intrinsic matrix.
Assumes zero skew and standard OpenCV layout:
[ fx 0 cx ]
[ 0 fy cy ]
[ 0 0 1 ]
Returns:
np.ndarray:
An (H, W, 3) array, where each pixel is mapped to (x, y, z) in the camera frame.
"""
H, W = depth_map.shape
assert intrinsic.shape == (3, 3), "Intrinsic matrix must be 3x3"
assert (
intrinsic[0, 1] == 0 and intrinsic[1, 0] == 0
), "Intrinsic matrix must have zero skew"
# Intrinsic parameters
fu, fv = intrinsic[0, 0], intrinsic[1, 1]
cu, cv = intrinsic[0, 2], intrinsic[1, 2]
# Generate grid of pixel coordinates
u, v = np.meshgrid(np.arange(W), np.arange(H))
# Unproject to camera coordinates
x_cam = (u - cu) * depth_map / fu
y_cam = (v - cv) * depth_map / fv
z_cam = depth_map
# Stack to form camera coordinates
return np.stack((x_cam, y_cam, z_cam), axis=-1).astype(np.float32)
def rotate_90_degrees(
image, depth_map, extri_opencv, intri_opencv, clockwise=True, track=None
):
"""
Rotates the input image, depth map, and camera parameters by 90 degrees.
Applies one of two 90-degree rotations:
- Clockwise
- Counterclockwise (if clockwise=False)
The extrinsic and intrinsic matrices are adjusted accordingly to maintain
correct camera geometry. Track coordinates are also updated if provided.
Args:
image (np.ndarray):
Input image of shape (H, W, 3).
depth_map (np.ndarray or None):
Depth map of shape (H, W), or None if not available.
extri_opencv (np.ndarray):
Extrinsic matrix (3x4) in OpenCV convention.
intri_opencv (np.ndarray):
Intrinsic matrix (3x3).
clockwise (bool):
If True, rotates the image 90 degrees clockwise; else 90 degrees counterclockwise.
track (np.ndarray or None):
Optional (N, 2) track array. Will be rotated accordingly.
Returns:
tuple:
(
rotated_image,
rotated_depth_map,
new_extri_opencv,
new_intri_opencv,
new_track
)
Where each is the updated version after the rotation.
"""
image_height, image_width = image.shape[:2]
# Rotate the image and depth map
rotated_image, rotated_depth_map = rotate_image_and_depth_rot90(image, depth_map, clockwise)
# Adjust the intrinsic matrix
new_intri_opencv = adjust_intrinsic_matrix_rot90(intri_opencv, image_width, image_height, clockwise)
if track is not None:
new_track = adjust_track_rot90(track, image_width, image_height, clockwise)
else:
new_track = None
# Adjust the extrinsic matrix
new_extri_opencv = adjust_extrinsic_matrix_rot90(extri_opencv, clockwise)
return (
rotated_image,
rotated_depth_map,
new_extri_opencv,
new_intri_opencv,
new_track,
)
def rotate_image_and_depth_rot90(image, depth_map, clockwise):
"""
Rotates the given image and depth map by 90 degrees (clockwise or counterclockwise),
using a transpose+flip pattern.
Args:
image (np.ndarray):
Input image of shape (H, W, 3).
depth_map (np.ndarray or None):
Depth map of shape (H, W), or None if not available.
clockwise (bool):
If True, rotate 90 degrees clockwise; else 90 degrees counterclockwise.
Returns:
tuple:
(rotated_image, rotated_depth_map)
"""
rotated_depth_map = None
if clockwise:
rotated_image = np.transpose(image, (1, 0, 2)) # Transpose height and width
rotated_image = np.flip(rotated_image, axis=1) # Flip horizontally
if depth_map is not None:
rotated_depth_map = np.transpose(depth_map, (1, 0))
rotated_depth_map = np.flip(rotated_depth_map, axis=1)
else:
rotated_image = np.transpose(image, (1, 0, 2)) # Transpose height and width
rotated_image = np.flip(rotated_image, axis=0) # Flip vertically
if depth_map is not None:
rotated_depth_map = np.transpose(depth_map, (1, 0))
rotated_depth_map = np.flip(rotated_depth_map, axis=0)
return np.copy(rotated_image), np.copy(rotated_depth_map)
def adjust_extrinsic_matrix_rot90(extri_opencv, clockwise):
"""
Adjusts the extrinsic matrix (3x4) for a 90-degree rotation of the image.
The rotation is in the image plane. This modifies the camera orientation
accordingly. The function applies either a clockwise or counterclockwise
90-degree rotation.
Args:
extri_opencv (np.ndarray):
Extrinsic matrix (3x4) in OpenCV convention.
clockwise (bool):
If True, rotate extrinsic for a 90-degree clockwise image rotation;
otherwise, counterclockwise.
Returns:
np.ndarray:
A new 3x4 extrinsic matrix after the rotation.
"""
R = extri_opencv[:, :3]
t = extri_opencv[:, 3]
if clockwise:
R_rotation = np.array([
[0, -1, 0],
[1, 0, 0],
[0, 0, 1]
])
else:
R_rotation = np.array([
[0, 1, 0],
[-1, 0, 0],
[0, 0, 1]
])
new_R = np.dot(R_rotation, R)
new_t = np.dot(R_rotation, t)
new_extri_opencv = np.hstack((new_R, new_t.reshape(-1, 1)))
return new_extri_opencv
def adjust_intrinsic_matrix_rot90(intri_opencv, image_width, image_height, clockwise):
"""
Adjusts the intrinsic matrix (3x3) for a 90-degree rotation of the image in the image plane.
Args:
intri_opencv (np.ndarray):
Intrinsic matrix (3x3).
image_width (int):
Original width of the image.
image_height (int):
Original height of the image.
clockwise (bool):
If True, rotate 90 degrees clockwise; else 90 degrees counterclockwise.
Returns:
np.ndarray:
A new 3x3 intrinsic matrix after the rotation.
"""
fx, fy, cx, cy = (
intri_opencv[0, 0],
intri_opencv[1, 1],
intri_opencv[0, 2],
intri_opencv[1, 2],
)
new_intri_opencv = np.eye(3)
if clockwise:
new_intri_opencv[0, 0] = fy
new_intri_opencv[1, 1] = fx
new_intri_opencv[0, 2] = image_height - cy
new_intri_opencv[1, 2] = cx
else:
new_intri_opencv[0, 0] = fy
new_intri_opencv[1, 1] = fx
new_intri_opencv[0, 2] = cy
new_intri_opencv[1, 2] = image_width - cx
return new_intri_opencv
def adjust_track_rot90(track, image_width, image_height, clockwise):
"""
Adjusts a track (N, 2) for a 90-degree rotation of the image in the image plane.
Args:
track (np.ndarray):
(N, 2) array of pixel coordinates, each row is (x, y).
image_width (int):
Original image width.
image_height (int):
Original image height.
clockwise (bool):
Whether the rotation is 90 degrees clockwise or counterclockwise.
Returns:
np.ndarray:
A new track of shape (N, 2) after rotation.
"""
if clockwise:
# (x, y) -> (y, image_width - 1 - x)
new_track = np.stack((track[:, 1], image_width - 1 - track[:, 0]), axis=-1)
else:
# (x, y) -> (image_height - 1 - y, x)
new_track = np.stack((image_height - 1 - track[:, 1], track[:, 0]), axis=-1)
return new_track
def read_image_cv2(path: str, rgb: bool = True) -> np.ndarray:
"""
Reads an image from disk using OpenCV, returning it as an RGB image array (H, W, 3).
Args:
path (str):
File path to the image.
rgb (bool):
If True, convert the image to RGB.
If False, leave the image in BGR/grayscale.
Returns:
np.ndarray or None:
A numpy array of shape (H, W, 3) if successful,
or None if the file does not exist or could not be read.
"""
if not os.path.exists(path) or os.path.getsize(path) == 0:
print(f"File does not exist or is empty: {path}")
return None
img = cv2.imread(path)
if img is None:
print(f"Could not load image={path}. Retrying...")
img = cv2.imread(path)
if img is None:
print("Retry failed.")
return None
if rgb:
if len(img.shape) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
else:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
return img
def read_depth(path: str, scale_adjustment=1.0) -> np.ndarray:
"""
Reads a depth map from disk in either .exr or .png format. The .exr is loaded using OpenCV
with the environment variable OPENCV_IO_ENABLE_OPENEXR=1. The .png is assumed to be a 16-bit
PNG (converted from half float).
Args:
path (str):
File path to the depth image. Must end with .exr or .png.
scale_adjustment (float):
A multiplier for adjusting the loaded depth values (default=1.0).
Returns:
np.ndarray:
A float32 array (H, W) containing the loaded depth. Zeros or non-finite values
may indicate invalid regions.
Raises:
ValueError:
If the file extension is not supported.
"""
if path.lower().endswith(".exr"):
# Ensure OPENCV_IO_ENABLE_OPENEXR is set to "1"
d = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)[..., 0]
d[d > 1e9] = 0.0
elif path.lower().endswith(".png"):
d = load_16big_png_depth(path)
else:
raise ValueError(f'unsupported depth file name "{path}"')
d = d * scale_adjustment
d[~np.isfinite(d)] = 0.0
return d
def load_16big_png_depth(depth_png: str) -> np.ndarray:
"""
Loads a 16-bit PNG as a half-float depth map (H, W), returning a float32 NumPy array.
Implementation detail:
- PIL loads 16-bit data as 32-bit "I" mode.
- We reinterpret the bits as float16, then cast to float32.
Args:
depth_png (str):
File path to the 16-bit PNG.
Returns:
np.ndarray:
A float32 depth array of shape (H, W).
"""
with Image.open(depth_png) as depth_pil:
depth = (
np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16)
.astype(np.float32)
.reshape((depth_pil.size[1], depth_pil.size[0]))
)
return depth
|