| import numpy as np |
| import os |
| import cv2 |
| from pathlib import Path |
| import trimesh as tm |
| from sklearn.neighbors import KDTree |
| from tqdm import tqdm |
| from tqdm.contrib.concurrent import thread_map |
|
|
| def process_frame(frame, vertices, intrinsics, source_path, base_path, key): |
| frame_id = str(frame['frame_id']).zfill(5) |
| mask_path = frame['mask_path'] |
| mask_path = base_path / mask_path |
| mask = np.load(mask_path, allow_pickle=True) |
| mask = mask == key |
| depth = cv2.imread(source_path / f'{frame_id}.png', cv2.IMREAD_UNCHANGED) / 1000. |
| |
| extrinsics = np.loadtxt(source_path / f'{frame_id}.txt') |
|
|
| point_mask = np.zeros(len(vertices), dtype=bool) |
| |
| |
| kernel_size = 3 |
| post_process_erosion = True |
| post_process_dilation = False |
| post_process_component = True |
| post_process_component_num = 1 |
|
|
| |
| img = np.uint8(mask) * 255 |
|
|
| |
| |
| kernel_shape = cv2.MORPH_ELLIPSE |
| kernel = cv2.getStructuringElement(kernel_shape, |
| (2 * kernel_size + 1, 2 * kernel_size + 1), |
| (kernel_size, kernel_size)) |
|
|
| |
| if post_process_erosion: |
| |
| img = cv2.erode(img, kernel, iterations=1) |
|
|
| |
| if post_process_dilation: |
| |
| img = cv2.dilate(img, kernel, iterations=1) |
|
|
| |
| num_labels, labels_im = cv2.connectedComponents( |
| img |
| ) |
| if post_process_component and num_labels > 1: |
| |
| component_areas = [ |
| (label, np.sum(labels_im == label)) for label in range(1, num_labels) |
| ] |
| component_areas.sort(key=lambda x: x[1], reverse=True) |
| largest_components = [ |
| x[0] for x in component_areas[: post_process_component_num] |
| ] |
| img = np.isin(labels_im, largest_components).astype(np.uint8) |
|
|
| |
| |
|
|
| |
| mask = cv2.resize(img, depth.shape[::-1]) |
| mask = mask > 0.5 |
| mask = mask & (depth > 0) |
|
|
| cv2.imwrite("mask.png", (mask * 255).astype(np.uint8)) |
|
|
| |
| depth_y, depth_x = np.where(mask) |
| depths = depth[mask] |
|
|
| |
| |
|
|
| if len(depth_x) == 0: |
| return np.zeros(len(vertices), dtype=bool) |
| |
| |
| pixel_coords = np.vstack([depth_x, depth_y, np.ones(len(depth_x))]) |
| |
| |
| |
| normalized_coords = np.linalg.inv(intrinsics) @ pixel_coords |
| |
| |
| camera_points_3d = normalized_coords * depths[np.newaxis, :] |
| |
| |
| camera_points_homogeneous = np.vstack([camera_points_3d, np.ones(len(depth_x))]) |
| |
| |
| |
| world_points_homogeneous = extrinsics @ camera_points_homogeneous |
| |
| |
| points = (world_points_homogeneous[:3, :] / world_points_homogeneous[3, :]).T |
| |
| points = points[~np.isnan(points).any(axis=1)] |
| if len(points) == 0: |
| return np.zeros(len(vertices), dtype=bool) |
| tree = KDTree(vertices) |
| |
| dist, ind = tree.query(points, k=1) |
| ind = ind.flatten() |
| dist = dist.flatten() |
| |
| max_distance = 0.05 |
| valid_matches = dist < max_distance |
| ind = ind[valid_matches] |
| ind = np.unique(ind) |
| print(f"unique ind: {len(ind)}") |
| |
| |
| if valid_matches.sum() > 0: |
| point_mask[ind] = True |
| |
| return point_mask |
|
|
| def process_object(data): |
| key, item, vertices, intrinsics, source_path, base_path, num_frames = data |
| frames = item['frames'] |
| total_points_mask = np.zeros(len(vertices), dtype=bool) |
| for frame in frames[:num_frames]: |
| point_mask = process_frame(frame, vertices, intrinsics, source_path, base_path, key) |
| total_points_mask = total_points_mask | point_mask |
| return total_points_mask |
|
|
|
|
| def load_scan(pcd_path): |
| pcd_data = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, 6)[:, :3] |
| return pcd_data |
|
|
| def process_scene(data): |
| scene_id, exp_name = data |
| pred_path = Path(f"data/prediction/scannet/baseline_scannet200/{scene_id}.npz") |
| out_path = Path(f"data/prediction/scannet/{exp_name}/{scene_id}.npz") |
| base_path = Path(f"/home/jovyan/users/lemeshko/scripts/gsam_result/yolo/{scene_id}") |
| source_path = Path(f"/home/jovyan/users/kolodiazhnyi/data/scannet/posed_images/{scene_id}") |
| scan_path = Path(f"/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/scannet200/points/{scene_id}.bin") |
| info_path = base_path / "infos.npy" |
|
|
| |
| |
| vertices = load_scan(scan_path) |
| info_data = np.load(info_path, allow_pickle=True).item() |
|
|
| base_data = np.load(pred_path, allow_pickle=True) |
| |
| |
| print(f"Mesh vertices shape: {vertices.shape}") |
| print(f"Mesh vertices range:") |
| print(f" X: [{vertices[:, 0].min():.3f}, {vertices[:, 0].max():.3f}]") |
| print(f" Y: [{vertices[:, 1].min():.3f}, {vertices[:, 1].max():.3f}]") |
| print(f" Z: [{vertices[:, 2].min():.3f}, {vertices[:, 2].max():.3f}]") |
|
|
|
|
| intrinsics = np.loadtxt(source_path / 'intrinsic.txt')[:3, :3] |
| intrinsics[0, :] *= 640 / 1296 |
| intrinsics[1, :] *= 480 / 968 |
| |
| num_frames = 500 |
| object_data = [[key, item, vertices, intrinsics, source_path, base_path, num_frames] for key, item in info_data.items()] |
| total_points_masks = thread_map(process_object, object_data, chunksize=100) |
| |
| |
| new_data = { |
| k: v for k, v in base_data.items() |
| } |
| for i, key in enumerate(info_data.keys()): |
| new_data['pred_masks'][:, i] = total_points_masks[i] |
| out_path.parent.mkdir(parents=True, exist_ok=True) |
| vs = [] |
| cs = [] |
| for i in range(new_data['pred_masks'].shape[1]): |
| os.makedirs(f"pred_masks", exist_ok=True) |
| v = vertices[new_data['pred_masks'][:, i]] |
| c = np.random.rand(3) |
| c = np.repeat(c[np.newaxis, :], len(v), axis=0) |
| vs.append(v) |
| cs.append(c) |
| tm.PointCloud(np.concatenate(vs, axis=0), colors=np.concatenate(cs, axis=0)).export(f"pred_masks/{scene_id}_mask.ply") |
| |
| print("uniques", np.unique(new_data['pred_masks'].sum(1)), [[k, v.shape] for k, v in new_data.items()]) |
| np.savez(out_path, **new_data) |
|
|
| |
|
|
| if __name__ == "__main__": |
| exp_name = "erode_mask" |
| scenes = np.loadtxt("/home/jovyan/users/bulat/workspace/3drec/Indoor/MaskClustering/splits/scannet200_subset.txt", dtype=str) |
| for scene in scenes: |
| process_scene((scene, exp_name)) |
| |
| |
|
|
|
|