|
|
import numpy as np |
|
|
import os |
|
|
import cv2 |
|
|
from pathlib import Path |
|
|
import trimesh as tm |
|
|
from sklearn.neighbors import KDTree |
|
|
|
|
|
def unproject_depth_to_world(depth_x, depth_y, depths, intrinsics, extrinsics): |
|
|
""" |
|
|
Правильная функция для обратной проекции пикселей в мировые координаты |
|
|
|
|
|
Args: |
|
|
depth_x, depth_y: координаты пикселей |
|
|
depths: значения глубины в метрах |
|
|
intrinsics: внутренние параметры камеры (3x3) |
|
|
extrinsics: внешние параметры камеры (4x4, world-to-camera) |
|
|
|
|
|
Returns: |
|
|
points: мировые координаты точек (Nx3) |
|
|
""" |
|
|
|
|
|
pixel_coords = np.vstack([depth_x, depth_y, np.ones(len(depth_x))]) |
|
|
|
|
|
|
|
|
|
|
|
camera_rays = np.linalg.inv(intrinsics) @ pixel_coords |
|
|
|
|
|
|
|
|
camera_points = camera_rays * depths[np.newaxis, :] |
|
|
|
|
|
|
|
|
camera_points_homogeneous = np.vstack([camera_points, np.ones(len(depth_x))]) |
|
|
|
|
|
|
|
|
|
|
|
world_points_homogeneous = np.linalg.inv(extrinsics) @ camera_points_homogeneous |
|
|
|
|
|
|
|
|
world_points = world_points_homogeneous[:3, :] / world_points_homogeneous[3, :] |
|
|
|
|
|
return world_points.T |
|
|
|
|
|
def process_mask_with_morphology(mask, kernel_size=5, |
|
|
post_process_erosion=True, |
|
|
post_process_dilation=True, |
|
|
post_process_component=True, |
|
|
post_process_component_num=1): |
|
|
""" |
|
|
Обработка маски с морфологическими операциями |
|
|
""" |
|
|
img = np.uint8(mask) * 255 |
|
|
|
|
|
|
|
|
kernel = np.ones((kernel_size * 2 + 1, kernel_size * 2 + 1), np.uint8) |
|
|
|
|
|
|
|
|
if post_process_erosion: |
|
|
img = cv2.erode(img, kernel, iterations=1) |
|
|
|
|
|
|
|
|
if post_process_dilation: |
|
|
img = cv2.dilate(img, kernel, iterations=1) |
|
|
|
|
|
|
|
|
num_labels, labels_im = cv2.connectedComponents(img) |
|
|
|
|
|
if post_process_component and num_labels > 1: |
|
|
|
|
|
component_areas = [ |
|
|
(label, np.sum(labels_im == label)) for label in range(1, num_labels) |
|
|
] |
|
|
component_areas.sort(key=lambda x: x[1], reverse=True) |
|
|
largest_components = [ |
|
|
x[0] for x in component_areas[:post_process_component_num] |
|
|
] |
|
|
img = np.isin(labels_im, largest_components).astype(np.uint8) |
|
|
|
|
|
return img.astype(bool) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
scene_id = "scene0011_00" |
|
|
path = Path(f"/home/jovyan/users/bulat/workspace/3drec/Indoor/MaskClustering/data/prediction/scannet/test/{scene_id}.npz") |
|
|
base_path = Path(f"/home/jovyan/users/lemeshko/scripts/gsam_result/yolo/{scene_id}") |
|
|
source_path = Path(f"/home/jovyan/users/kolodiazhnyi/data/scannet/posed_images/{scene_id}") |
|
|
scan_path = Path(f"data/scannet/processed/{scene_id}/{scene_id}_vh_clean_2.ply") |
|
|
info_path = base_path / "infos.npy" |
|
|
|
|
|
|
|
|
if not scan_path.exists(): |
|
|
raise FileNotFoundError(f"Mesh file not found: {scan_path}") |
|
|
if not info_path.exists(): |
|
|
raise FileNotFoundError(f"Info file not found: {info_path}") |
|
|
|
|
|
|
|
|
mesh = tm.load(scan_path) |
|
|
vertices = mesh.vertices |
|
|
data = np.load(path, allow_pickle=True) |
|
|
info_data = np.load(info_path, allow_pickle=True).item() |
|
|
|
|
|
key, item = next(iter(info_data.items())) |
|
|
print(f"Processing object {key}") |
|
|
print(f"Object info: {item}") |
|
|
|
|
|
frames = item['frames'] |
|
|
intrinsics = np.loadtxt(source_path / 'intrinsic.txt') |
|
|
|
|
|
frame = frames[0] |
|
|
frame_id = str(frame['frame_id']).zfill(5) |
|
|
mask_path = frame['mask_path'] |
|
|
mask_path = base_path / mask_path |
|
|
|
|
|
|
|
|
depth_file = source_path / f'{frame_id}.png' |
|
|
extrinsics_file = source_path / f'{frame_id}.txt' |
|
|
|
|
|
if not depth_file.exists(): |
|
|
raise FileNotFoundError(f"Depth file not found: {depth_file}") |
|
|
if not extrinsics_file.exists(): |
|
|
raise FileNotFoundError(f"Extrinsics file not found: {extrinsics_file}") |
|
|
|
|
|
|
|
|
mask = np.load(mask_path, allow_pickle=True) |
|
|
mask = mask == key |
|
|
depth = cv2.imread(str(depth_file), -1) / 1000.0 |
|
|
extrinsics = np.loadtxt(extrinsics_file) |
|
|
|
|
|
print(f"Original mask shape: {mask.shape}") |
|
|
print(f"Depth shape: {depth.shape}") |
|
|
print(f"Mask pixels count: {mask.sum()}") |
|
|
|
|
|
|
|
|
processed_mask = process_mask_with_morphology( |
|
|
mask, |
|
|
kernel_size=5, |
|
|
post_process_erosion=True, |
|
|
post_process_dilation=True, |
|
|
post_process_component=True, |
|
|
post_process_component_num=1 |
|
|
) |
|
|
|
|
|
|
|
|
final_mask = cv2.resize( |
|
|
processed_mask.astype(np.uint8), |
|
|
depth.shape[::-1], |
|
|
interpolation=cv2.INTER_NEAREST_EXACT |
|
|
).astype(bool) |
|
|
|
|
|
print(f"Final mask shape: {final_mask.shape}") |
|
|
print(f"Final mask pixels count: {final_mask.sum()}") |
|
|
|
|
|
|
|
|
depth_y, depth_x = np.where(final_mask) |
|
|
depths = depth[final_mask] |
|
|
|
|
|
|
|
|
valid_depth = (depths > 0) & (depths < 10.0) |
|
|
depth_x = depth_x[valid_depth] |
|
|
depth_y = depth_y[valid_depth] |
|
|
depths = depths[valid_depth] |
|
|
|
|
|
print(f"Valid depth points: {len(depths)}") |
|
|
|
|
|
if len(depths) == 0: |
|
|
print("No valid depth points found!") |
|
|
else: |
|
|
|
|
|
world_points = unproject_depth_to_world(depth_x, depth_y, depths, intrinsics, extrinsics) |
|
|
|
|
|
print(f"World points shape: {world_points.shape}") |
|
|
print(f"World points range:") |
|
|
print(f" X: [{world_points[:, 0].min():.3f}, {world_points[:, 0].max():.3f}]") |
|
|
print(f" Y: [{world_points[:, 1].min():.3f}, {world_points[:, 1].max():.3f}]") |
|
|
print(f" Z: [{world_points[:, 2].min():.3f}, {world_points[:, 2].max():.3f}]") |
|
|
|
|
|
|
|
|
tree = KDTree(vertices) |
|
|
distances, indices = tree.query(world_points, k=1) |
|
|
|
|
|
|
|
|
max_distance = 0.05 |
|
|
valid_matches = distances.flatten() < max_distance |
|
|
|
|
|
print(f"Points within {max_distance}m: {valid_matches.sum()}/{len(valid_matches)}") |
|
|
|
|
|
|
|
|
point_mask = np.zeros(len(vertices), dtype=bool) |
|
|
if valid_matches.sum() > 0: |
|
|
point_mask[indices.flatten()[valid_matches]] = True |
|
|
|
|
|
print(f"Final point mask sum: {point_mask.sum()}") |
|
|
print(f"Mesh vertices total: {len(vertices)}") |
|
|
print(f"Coverage: {point_mask.sum()/len(vertices)*100:.2f}%") |