|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" |
|
|
import cv2 |
|
|
import math |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
import PIL |
|
|
try: |
|
|
lanczos = PIL.Image.Resampling.LANCZOS |
|
|
bicubic = PIL.Image.Resampling.BICUBIC |
|
|
except AttributeError: |
|
|
lanczos = PIL.Image.LANCZOS |
|
|
bicubic = PIL.Image.BICUBIC |
|
|
|
|
|
from vggt.utils.geometry import closed_form_inverse_se3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def crop_image_depth_and_intrinsic_by_pp( |
|
|
image, depth_map, intrinsic, target_shape, track=None, filepath=None, strict=False |
|
|
): |
|
|
""" |
|
|
TODO: some names of width and height seem not consistent. Need to check. |
|
|
|
|
|
|
|
|
Crops the given image and depth map around the camera's principal point, as defined by `intrinsic`. |
|
|
Specifically: |
|
|
- Ensures that the crop is centered on (cx, cy). |
|
|
- Optionally pads the image (and depth map) if `strict=True` and the result is smaller than `target_shape`. |
|
|
- Shifts the camera intrinsic matrix (and `track` if provided) accordingly. |
|
|
|
|
|
Args: |
|
|
image (np.ndarray): |
|
|
Input image array of shape (H, W, 3). |
|
|
depth_map (np.ndarray or None): |
|
|
Depth map array of shape (H, W), or None if not available. |
|
|
intrinsic (np.ndarray): |
|
|
Camera intrinsic matrix (3x3). The principal point is assumed to be at (intrinsic[1,2], intrinsic[0,2]). |
|
|
target_shape (tuple[int, int]): |
|
|
Desired output shape. |
|
|
track (np.ndarray or None): |
|
|
Optional array of shape (N, 2). Interpreted as (x, y) pixel coordinates. Will be shifted after cropping. |
|
|
filepath (str or None): |
|
|
An optional file path for debug logging (only used if strict mode triggers warnings). |
|
|
strict (bool): |
|
|
If True, will zero-pad to ensure the exact target_shape even if the cropped region is smaller. |
|
|
|
|
|
Raises: |
|
|
AssertionError: |
|
|
If the input image is smaller than `target_shape`. |
|
|
ValueError: |
|
|
If the cropped image is larger than `target_shape` (in strict mode), which should not normally happen. |
|
|
|
|
|
Returns: |
|
|
tuple: |
|
|
(cropped_image, cropped_depth_map, updated_intrinsic, updated_track) |
|
|
|
|
|
- cropped_image (np.ndarray): Cropped (and optionally padded) image. |
|
|
- cropped_depth_map (np.ndarray or None): Cropped (and optionally padded) depth map. |
|
|
- updated_intrinsic (np.ndarray): Intrinsic matrix adjusted for the crop. |
|
|
- updated_track (np.ndarray or None): Track array adjusted for the crop, or None if track was not provided. |
|
|
""" |
|
|
original_size = np.array(image.shape) |
|
|
intrinsic = np.copy(intrinsic) |
|
|
|
|
|
if original_size[0] < target_shape[0]: |
|
|
error_message = ( |
|
|
f"Width check failed: original width {original_size[0]} " |
|
|
f"is less than target width {target_shape[0]}." |
|
|
) |
|
|
print(error_message) |
|
|
raise AssertionError(error_message) |
|
|
|
|
|
if original_size[1] < target_shape[1]: |
|
|
error_message = ( |
|
|
f"Height check failed: original height {original_size[1]} " |
|
|
f"is less than target height {target_shape[1]}." |
|
|
) |
|
|
print(error_message) |
|
|
raise AssertionError(error_message) |
|
|
|
|
|
|
|
|
cx = (intrinsic[1, 2]) |
|
|
cy = (intrinsic[0, 2]) |
|
|
|
|
|
|
|
|
if strict: |
|
|
half_x = min((target_shape[0] / 2), cx) |
|
|
half_y = min((target_shape[1] / 2), cy) |
|
|
else: |
|
|
half_x = min((target_shape[0] / 2), cx, original_size[0] - cx) |
|
|
half_y = min((target_shape[1] / 2), cy, original_size[1] - cy) |
|
|
|
|
|
|
|
|
start_x = math.floor(cx) - math.floor(half_x) |
|
|
start_y = math.floor(cy) - math.floor(half_y) |
|
|
|
|
|
assert start_x >= 0 |
|
|
assert start_y >= 0 |
|
|
|
|
|
|
|
|
if strict: |
|
|
end_x = start_x + target_shape[0] |
|
|
end_y = start_y + target_shape[1] |
|
|
else: |
|
|
end_x = start_x + 2 * math.floor(half_x) |
|
|
end_y = start_y + 2 * math.floor(half_y) |
|
|
|
|
|
|
|
|
image = image[start_x:end_x, start_y:end_y, :] |
|
|
if depth_map is not None: |
|
|
depth_map = depth_map[start_x:end_x, start_y:end_y] |
|
|
|
|
|
|
|
|
intrinsic[1, 2] = intrinsic[1, 2] - start_x |
|
|
intrinsic[0, 2] = intrinsic[0, 2] - start_y |
|
|
|
|
|
|
|
|
if track is not None: |
|
|
track[:, 1] = track[:, 1] - start_x |
|
|
track[:, 0] = track[:, 0] - start_y |
|
|
|
|
|
|
|
|
if strict: |
|
|
if (image.shape[:2] != target_shape).any(): |
|
|
print(f"{filepath} does not meet the target shape") |
|
|
current_h, current_w = image.shape[:2] |
|
|
target_h, target_w = target_shape[0], target_shape[1] |
|
|
pad_h = target_h - current_h |
|
|
pad_w = target_w - current_w |
|
|
if pad_h < 0 or pad_w < 0: |
|
|
raise ValueError( |
|
|
f"The cropped image is bigger than the target shape: " |
|
|
f"cropped=({current_h},{current_w}), " |
|
|
f"target=({target_h},{target_w})." |
|
|
) |
|
|
image = np.pad( |
|
|
image, |
|
|
pad_width=((0, pad_h), (0, pad_w), (0, 0)), |
|
|
mode="constant", |
|
|
constant_values=0, |
|
|
) |
|
|
if depth_map is not None: |
|
|
depth_map = np.pad( |
|
|
depth_map, |
|
|
pad_width=((0, pad_h), (0, pad_w)), |
|
|
mode="constant", |
|
|
constant_values=0, |
|
|
) |
|
|
|
|
|
return image, depth_map, intrinsic, track |
|
|
|
|
|
|
|
|
def resize_image_depth_and_intrinsic( |
|
|
image, |
|
|
depth_map, |
|
|
intrinsic, |
|
|
target_shape, |
|
|
original_size, |
|
|
track=None, |
|
|
pixel_center=True, |
|
|
safe_bound=4, |
|
|
rescale_aug=True, |
|
|
): |
|
|
""" |
|
|
Resizes the given image and depth map (if provided) to slightly larger than `target_shape`, |
|
|
updating the intrinsic matrix (and track array if present). Optionally uses random rescaling |
|
|
to create some additional margin (based on `rescale_aug`). |
|
|
|
|
|
Steps: |
|
|
1. Compute a scaling factor so that the resized result is at least `target_shape + safe_bound`. |
|
|
2. Apply an optional triangular random factor if `rescale_aug=True`. |
|
|
3. Resize the image with LANCZOS if downscaling, BICUBIC if upscaling. |
|
|
4. Resize the depth map with nearest-neighbor. |
|
|
5. Update the camera intrinsic and track coordinates (if any). |
|
|
|
|
|
Args: |
|
|
image (np.ndarray): |
|
|
Input image array (H, W, 3). |
|
|
depth_map (np.ndarray or None): |
|
|
Depth map array (H, W), or None if unavailable. |
|
|
intrinsic (np.ndarray): |
|
|
Camera intrinsic matrix (3x3). |
|
|
target_shape (np.ndarray or tuple[int, int]): |
|
|
Desired final shape (height, width). |
|
|
original_size (np.ndarray or tuple[int, int]): |
|
|
Original size of the image in (height, width). |
|
|
track (np.ndarray or None): |
|
|
Optional (N, 2) array of pixel coordinates. Will be scaled. |
|
|
pixel_center (bool): |
|
|
If True, accounts for 0.5 pixel center shift during resizing. |
|
|
safe_bound (int or float): |
|
|
Additional margin (in pixels) to add to target_shape before resizing. |
|
|
rescale_aug (bool): |
|
|
If True, randomly increase the `safe_bound` within a certain range to simulate augmentation. |
|
|
|
|
|
Returns: |
|
|
tuple: |
|
|
(resized_image, resized_depth_map, updated_intrinsic, updated_track) |
|
|
|
|
|
- resized_image (np.ndarray): The resized image. |
|
|
- resized_depth_map (np.ndarray or None): The resized depth map. |
|
|
- updated_intrinsic (np.ndarray): Camera intrinsic updated for new resolution. |
|
|
- updated_track (np.ndarray or None): Track array updated or None if not provided. |
|
|
|
|
|
Raises: |
|
|
AssertionError: |
|
|
If the shapes of the resized image and depth map do not match. |
|
|
""" |
|
|
if rescale_aug: |
|
|
random_boundary = np.random.triangular(0, 0, 0.3) |
|
|
safe_bound = safe_bound + random_boundary * target_shape.max() |
|
|
|
|
|
resize_scales = (target_shape + safe_bound) / original_size |
|
|
max_resize_scale = np.max(resize_scales) |
|
|
intrinsic = np.copy(intrinsic) |
|
|
|
|
|
|
|
|
image = Image.fromarray(image) |
|
|
input_resolution = np.array(image.size) |
|
|
output_resolution = np.floor(input_resolution * max_resize_scale).astype(int) |
|
|
image = image.resize(tuple(output_resolution), resample=lanczos if max_resize_scale < 1 else bicubic) |
|
|
image = np.array(image) |
|
|
|
|
|
if depth_map is not None: |
|
|
depth_map = cv2.resize( |
|
|
depth_map, |
|
|
output_resolution, |
|
|
fx=max_resize_scale, |
|
|
fy=max_resize_scale, |
|
|
interpolation=cv2.INTER_NEAREST, |
|
|
) |
|
|
|
|
|
actual_size = np.array(image.shape[:2]) |
|
|
actual_resize_scale = np.max(actual_size / original_size) |
|
|
|
|
|
if pixel_center: |
|
|
intrinsic[0, 2] = intrinsic[0, 2] + 0.5 |
|
|
intrinsic[1, 2] = intrinsic[1, 2] + 0.5 |
|
|
|
|
|
intrinsic[:2, :] = intrinsic[:2, :] * actual_resize_scale |
|
|
|
|
|
if track is not None: |
|
|
track = track * actual_resize_scale |
|
|
|
|
|
if pixel_center: |
|
|
intrinsic[0, 2] = intrinsic[0, 2] - 0.5 |
|
|
intrinsic[1, 2] = intrinsic[1, 2] - 0.5 |
|
|
|
|
|
assert image.shape[:2] == depth_map.shape[:2] |
|
|
return image, depth_map, intrinsic, track |
|
|
|
|
|
|
|
|
def threshold_depth_map( |
|
|
depth_map: np.ndarray, |
|
|
max_percentile: float = 99, |
|
|
min_percentile: float = 1, |
|
|
max_depth: float = -1, |
|
|
) -> np.ndarray: |
|
|
""" |
|
|
Thresholds a depth map using percentile-based limits and optional maximum depth clamping. |
|
|
|
|
|
Steps: |
|
|
1. If `max_depth > 0`, clamp all values above `max_depth` to zero. |
|
|
2. Compute `max_percentile` and `min_percentile` thresholds using nanpercentile. |
|
|
3. Zero out values above/below these thresholds, if thresholds are > 0. |
|
|
|
|
|
Args: |
|
|
depth_map (np.ndarray): |
|
|
Input depth map (H, W). |
|
|
max_percentile (float): |
|
|
Upper percentile (0-100). Values above this will be set to zero. |
|
|
min_percentile (float): |
|
|
Lower percentile (0-100). Values below this will be set to zero. |
|
|
max_depth (float): |
|
|
Absolute maximum depth. If > 0, any depth above this is set to zero. |
|
|
If <= 0, no maximum-depth clamp is applied. |
|
|
|
|
|
Returns: |
|
|
np.ndarray: |
|
|
Depth map (H, W) after thresholding. Some or all values may be zero. |
|
|
Returns None if depth_map is None. |
|
|
""" |
|
|
if depth_map is None: |
|
|
return None |
|
|
|
|
|
depth_map = depth_map.astype(float, copy=True) |
|
|
|
|
|
|
|
|
if max_depth > 0: |
|
|
depth_map[depth_map > max_depth] = 0.0 |
|
|
|
|
|
|
|
|
depth_max_thres = ( |
|
|
np.nanpercentile(depth_map, max_percentile) if max_percentile > 0 else None |
|
|
) |
|
|
depth_min_thres = ( |
|
|
np.nanpercentile(depth_map, min_percentile) if min_percentile > 0 else None |
|
|
) |
|
|
|
|
|
|
|
|
if depth_max_thres is not None and depth_max_thres > 0: |
|
|
depth_map[depth_map > depth_max_thres] = 0.0 |
|
|
if depth_min_thres is not None and depth_min_thres > 0: |
|
|
depth_map[depth_map < depth_min_thres] = 0.0 |
|
|
|
|
|
return depth_map |
|
|
|
|
|
|
|
|
def depth_to_world_coords_points( |
|
|
depth_map: np.ndarray, |
|
|
extrinsic: np.ndarray, |
|
|
intrinsic: np.ndarray, |
|
|
eps=1e-8, |
|
|
) -> tuple[np.ndarray, np.ndarray, np.ndarray]: |
|
|
""" |
|
|
Converts a depth map to world coordinates (HxWx3) given the camera extrinsic and intrinsic. |
|
|
Returns both the world coordinates and the intermediate camera coordinates, |
|
|
as well as a mask for valid depth. |
|
|
|
|
|
Args: |
|
|
depth_map (np.ndarray): |
|
|
Depth map of shape (H, W). |
|
|
extrinsic (np.ndarray): |
|
|
Extrinsic matrix of shape (3, 4), representing the camera pose in OpenCV convention (camera-from-world). |
|
|
intrinsic (np.ndarray): |
|
|
Intrinsic matrix of shape (3, 3). |
|
|
eps (float): |
|
|
Small epsilon for thresholding valid depth. |
|
|
|
|
|
Returns: |
|
|
tuple[np.ndarray, np.ndarray, np.ndarray]: |
|
|
(world_coords_points, cam_coords_points, point_mask) |
|
|
|
|
|
- world_coords_points: (H, W, 3) array of 3D points in world frame. |
|
|
- cam_coords_points: (H, W, 3) array of 3D points in camera frame. |
|
|
- point_mask: (H, W) boolean array where True indicates valid (non-zero) depth. |
|
|
""" |
|
|
if depth_map is None: |
|
|
return None, None, None |
|
|
|
|
|
|
|
|
point_mask = depth_map > eps |
|
|
|
|
|
|
|
|
cam_coords_points = depth_to_cam_coords_points(depth_map, intrinsic) |
|
|
|
|
|
|
|
|
cam_to_world_extrinsic = closed_form_inverse_se3(extrinsic[None])[0] |
|
|
R_cam_to_world = cam_to_world_extrinsic[:3, :3] |
|
|
t_cam_to_world = cam_to_world_extrinsic[:3, 3] |
|
|
|
|
|
|
|
|
world_coords_points = ( |
|
|
np.dot(cam_coords_points, R_cam_to_world.T) + t_cam_to_world |
|
|
) |
|
|
|
|
|
|
|
|
return world_coords_points, cam_coords_points, point_mask |
|
|
|
|
|
|
|
|
def depth_to_cam_coords_points( |
|
|
depth_map: np.ndarray, intrinsic: np.ndarray |
|
|
) -> np.ndarray: |
|
|
""" |
|
|
Unprojects a depth map into camera coordinates, returning (H, W, 3). |
|
|
|
|
|
Args: |
|
|
depth_map (np.ndarray): |
|
|
Depth map of shape (H, W). |
|
|
intrinsic (np.ndarray): |
|
|
3x3 camera intrinsic matrix. |
|
|
Assumes zero skew and standard OpenCV layout: |
|
|
[ fx 0 cx ] |
|
|
[ 0 fy cy ] |
|
|
[ 0 0 1 ] |
|
|
|
|
|
Returns: |
|
|
np.ndarray: |
|
|
An (H, W, 3) array, where each pixel is mapped to (x, y, z) in the camera frame. |
|
|
""" |
|
|
H, W = depth_map.shape |
|
|
assert intrinsic.shape == (3, 3), "Intrinsic matrix must be 3x3" |
|
|
assert ( |
|
|
intrinsic[0, 1] == 0 and intrinsic[1, 0] == 0 |
|
|
), "Intrinsic matrix must have zero skew" |
|
|
|
|
|
|
|
|
fu, fv = intrinsic[0, 0], intrinsic[1, 1] |
|
|
cu, cv = intrinsic[0, 2], intrinsic[1, 2] |
|
|
|
|
|
|
|
|
u, v = np.meshgrid(np.arange(W), np.arange(H)) |
|
|
|
|
|
|
|
|
x_cam = (u - cu) * depth_map / fu |
|
|
y_cam = (v - cv) * depth_map / fv |
|
|
z_cam = depth_map |
|
|
|
|
|
|
|
|
return np.stack((x_cam, y_cam, z_cam), axis=-1).astype(np.float32) |
|
|
|
|
|
|
|
|
def rotate_90_degrees( |
|
|
image, depth_map, extri_opencv, intri_opencv, clockwise=True, track=None |
|
|
): |
|
|
""" |
|
|
Rotates the input image, depth map, and camera parameters by 90 degrees. |
|
|
|
|
|
Applies one of two 90-degree rotations: |
|
|
- Clockwise |
|
|
- Counterclockwise (if clockwise=False) |
|
|
|
|
|
The extrinsic and intrinsic matrices are adjusted accordingly to maintain |
|
|
correct camera geometry. Track coordinates are also updated if provided. |
|
|
|
|
|
Args: |
|
|
image (np.ndarray): |
|
|
Input image of shape (H, W, 3). |
|
|
depth_map (np.ndarray or None): |
|
|
Depth map of shape (H, W), or None if not available. |
|
|
extri_opencv (np.ndarray): |
|
|
Extrinsic matrix (3x4) in OpenCV convention. |
|
|
intri_opencv (np.ndarray): |
|
|
Intrinsic matrix (3x3). |
|
|
clockwise (bool): |
|
|
If True, rotates the image 90 degrees clockwise; else 90 degrees counterclockwise. |
|
|
track (np.ndarray or None): |
|
|
Optional (N, 2) track array. Will be rotated accordingly. |
|
|
|
|
|
Returns: |
|
|
tuple: |
|
|
( |
|
|
rotated_image, |
|
|
rotated_depth_map, |
|
|
new_extri_opencv, |
|
|
new_intri_opencv, |
|
|
new_track |
|
|
) |
|
|
|
|
|
Where each is the updated version after the rotation. |
|
|
""" |
|
|
image_height, image_width = image.shape[:2] |
|
|
|
|
|
|
|
|
rotated_image, rotated_depth_map = rotate_image_and_depth_rot90(image, depth_map, clockwise) |
|
|
|
|
|
new_intri_opencv = adjust_intrinsic_matrix_rot90(intri_opencv, image_width, image_height, clockwise) |
|
|
|
|
|
if track is not None: |
|
|
new_track = adjust_track_rot90(track, image_width, image_height, clockwise) |
|
|
else: |
|
|
new_track = None |
|
|
|
|
|
|
|
|
new_extri_opencv = adjust_extrinsic_matrix_rot90(extri_opencv, clockwise) |
|
|
|
|
|
return ( |
|
|
rotated_image, |
|
|
rotated_depth_map, |
|
|
new_extri_opencv, |
|
|
new_intri_opencv, |
|
|
new_track, |
|
|
) |
|
|
|
|
|
|
|
|
def rotate_image_and_depth_rot90(image, depth_map, clockwise): |
|
|
""" |
|
|
Rotates the given image and depth map by 90 degrees (clockwise or counterclockwise), |
|
|
using a transpose+flip pattern. |
|
|
|
|
|
Args: |
|
|
image (np.ndarray): |
|
|
Input image of shape (H, W, 3). |
|
|
depth_map (np.ndarray or None): |
|
|
Depth map of shape (H, W), or None if not available. |
|
|
clockwise (bool): |
|
|
If True, rotate 90 degrees clockwise; else 90 degrees counterclockwise. |
|
|
|
|
|
Returns: |
|
|
tuple: |
|
|
(rotated_image, rotated_depth_map) |
|
|
""" |
|
|
rotated_depth_map = None |
|
|
if clockwise: |
|
|
rotated_image = np.transpose(image, (1, 0, 2)) |
|
|
rotated_image = np.flip(rotated_image, axis=1) |
|
|
if depth_map is not None: |
|
|
rotated_depth_map = np.transpose(depth_map, (1, 0)) |
|
|
rotated_depth_map = np.flip(rotated_depth_map, axis=1) |
|
|
else: |
|
|
rotated_image = np.transpose(image, (1, 0, 2)) |
|
|
rotated_image = np.flip(rotated_image, axis=0) |
|
|
if depth_map is not None: |
|
|
rotated_depth_map = np.transpose(depth_map, (1, 0)) |
|
|
rotated_depth_map = np.flip(rotated_depth_map, axis=0) |
|
|
return np.copy(rotated_image), np.copy(rotated_depth_map) |
|
|
|
|
|
|
|
|
def adjust_extrinsic_matrix_rot90(extri_opencv, clockwise): |
|
|
""" |
|
|
Adjusts the extrinsic matrix (3x4) for a 90-degree rotation of the image. |
|
|
|
|
|
The rotation is in the image plane. This modifies the camera orientation |
|
|
accordingly. The function applies either a clockwise or counterclockwise |
|
|
90-degree rotation. |
|
|
|
|
|
Args: |
|
|
extri_opencv (np.ndarray): |
|
|
Extrinsic matrix (3x4) in OpenCV convention. |
|
|
clockwise (bool): |
|
|
If True, rotate extrinsic for a 90-degree clockwise image rotation; |
|
|
otherwise, counterclockwise. |
|
|
|
|
|
Returns: |
|
|
np.ndarray: |
|
|
A new 3x4 extrinsic matrix after the rotation. |
|
|
""" |
|
|
R = extri_opencv[:, :3] |
|
|
t = extri_opencv[:, 3] |
|
|
|
|
|
if clockwise: |
|
|
R_rotation = np.array([ |
|
|
[0, -1, 0], |
|
|
[1, 0, 0], |
|
|
[0, 0, 1] |
|
|
]) |
|
|
else: |
|
|
R_rotation = np.array([ |
|
|
[0, 1, 0], |
|
|
[-1, 0, 0], |
|
|
[0, 0, 1] |
|
|
]) |
|
|
|
|
|
new_R = np.dot(R_rotation, R) |
|
|
new_t = np.dot(R_rotation, t) |
|
|
new_extri_opencv = np.hstack((new_R, new_t.reshape(-1, 1))) |
|
|
return new_extri_opencv |
|
|
|
|
|
|
|
|
def adjust_intrinsic_matrix_rot90(intri_opencv, image_width, image_height, clockwise): |
|
|
""" |
|
|
Adjusts the intrinsic matrix (3x3) for a 90-degree rotation of the image in the image plane. |
|
|
|
|
|
Args: |
|
|
intri_opencv (np.ndarray): |
|
|
Intrinsic matrix (3x3). |
|
|
image_width (int): |
|
|
Original width of the image. |
|
|
image_height (int): |
|
|
Original height of the image. |
|
|
clockwise (bool): |
|
|
If True, rotate 90 degrees clockwise; else 90 degrees counterclockwise. |
|
|
|
|
|
Returns: |
|
|
np.ndarray: |
|
|
A new 3x3 intrinsic matrix after the rotation. |
|
|
""" |
|
|
fx, fy, cx, cy = ( |
|
|
intri_opencv[0, 0], |
|
|
intri_opencv[1, 1], |
|
|
intri_opencv[0, 2], |
|
|
intri_opencv[1, 2], |
|
|
) |
|
|
|
|
|
new_intri_opencv = np.eye(3) |
|
|
if clockwise: |
|
|
new_intri_opencv[0, 0] = fy |
|
|
new_intri_opencv[1, 1] = fx |
|
|
new_intri_opencv[0, 2] = image_height - cy |
|
|
new_intri_opencv[1, 2] = cx |
|
|
else: |
|
|
new_intri_opencv[0, 0] = fy |
|
|
new_intri_opencv[1, 1] = fx |
|
|
new_intri_opencv[0, 2] = cy |
|
|
new_intri_opencv[1, 2] = image_width - cx |
|
|
|
|
|
return new_intri_opencv |
|
|
|
|
|
|
|
|
def adjust_track_rot90(track, image_width, image_height, clockwise): |
|
|
""" |
|
|
Adjusts a track (N, 2) for a 90-degree rotation of the image in the image plane. |
|
|
|
|
|
Args: |
|
|
track (np.ndarray): |
|
|
(N, 2) array of pixel coordinates, each row is (x, y). |
|
|
image_width (int): |
|
|
Original image width. |
|
|
image_height (int): |
|
|
Original image height. |
|
|
clockwise (bool): |
|
|
Whether the rotation is 90 degrees clockwise or counterclockwise. |
|
|
|
|
|
Returns: |
|
|
np.ndarray: |
|
|
A new track of shape (N, 2) after rotation. |
|
|
""" |
|
|
if clockwise: |
|
|
|
|
|
new_track = np.stack((track[:, 1], image_width - 1 - track[:, 0]), axis=-1) |
|
|
else: |
|
|
|
|
|
new_track = np.stack((image_height - 1 - track[:, 1], track[:, 0]), axis=-1) |
|
|
|
|
|
return new_track |
|
|
|
|
|
|
|
|
def read_image_cv2(path: str, rgb: bool = True) -> np.ndarray: |
|
|
""" |
|
|
Reads an image from disk using OpenCV, returning it as an RGB image array (H, W, 3). |
|
|
|
|
|
Args: |
|
|
path (str): |
|
|
File path to the image. |
|
|
rgb (bool): |
|
|
If True, convert the image to RGB. |
|
|
If False, leave the image in BGR/grayscale. |
|
|
|
|
|
Returns: |
|
|
np.ndarray or None: |
|
|
A numpy array of shape (H, W, 3) if successful, |
|
|
or None if the file does not exist or could not be read. |
|
|
""" |
|
|
if not os.path.exists(path) or os.path.getsize(path) == 0: |
|
|
print(f"File does not exist or is empty: {path}") |
|
|
return None |
|
|
|
|
|
img = cv2.imread(path) |
|
|
if img is None: |
|
|
print(f"Could not load image={path}. Retrying...") |
|
|
img = cv2.imread(path) |
|
|
if img is None: |
|
|
print("Retry failed.") |
|
|
return None |
|
|
|
|
|
if rgb: |
|
|
if len(img.shape) == 2: |
|
|
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) |
|
|
else: |
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
return img |
|
|
|
|
|
|
|
|
def read_depth(path: str, scale_adjustment=1.0) -> np.ndarray: |
|
|
""" |
|
|
Reads a depth map from disk in either .exr or .png format. The .exr is loaded using OpenCV |
|
|
with the environment variable OPENCV_IO_ENABLE_OPENEXR=1. The .png is assumed to be a 16-bit |
|
|
PNG (converted from half float). |
|
|
|
|
|
Args: |
|
|
path (str): |
|
|
File path to the depth image. Must end with .exr or .png. |
|
|
scale_adjustment (float): |
|
|
A multiplier for adjusting the loaded depth values (default=1.0). |
|
|
|
|
|
Returns: |
|
|
np.ndarray: |
|
|
A float32 array (H, W) containing the loaded depth. Zeros or non-finite values |
|
|
may indicate invalid regions. |
|
|
|
|
|
Raises: |
|
|
ValueError: |
|
|
If the file extension is not supported. |
|
|
""" |
|
|
if path.lower().endswith(".exr"): |
|
|
|
|
|
d = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)[..., 0] |
|
|
d[d > 1e9] = 0.0 |
|
|
elif path.lower().endswith(".png"): |
|
|
d = load_16big_png_depth(path) |
|
|
else: |
|
|
raise ValueError(f'unsupported depth file name "{path}"') |
|
|
|
|
|
d = d * scale_adjustment |
|
|
d[~np.isfinite(d)] = 0.0 |
|
|
|
|
|
return d |
|
|
|
|
|
|
|
|
def load_16big_png_depth(depth_png: str) -> np.ndarray: |
|
|
""" |
|
|
Loads a 16-bit PNG as a half-float depth map (H, W), returning a float32 NumPy array. |
|
|
|
|
|
Implementation detail: |
|
|
- PIL loads 16-bit data as 32-bit "I" mode. |
|
|
- We reinterpret the bits as float16, then cast to float32. |
|
|
|
|
|
Args: |
|
|
depth_png (str): |
|
|
File path to the 16-bit PNG. |
|
|
|
|
|
Returns: |
|
|
np.ndarray: |
|
|
A float32 depth array of shape (H, W). |
|
|
""" |
|
|
with Image.open(depth_png) as depth_pil: |
|
|
depth = ( |
|
|
np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16) |
|
|
.astype(np.float32) |
|
|
.reshape((depth_pil.size[1], depth_pil.size[0])) |
|
|
) |
|
|
return depth |
|
|
|