Spaces:

bulatko
/

zoo3d

Sleeping

App Files Files Community

bulatko commited on Dec 11, 2025

Commit

55e58d1

1 Parent(s): 4eeefd1

adding real MK

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

MaskClustering +0 -1
MaskClustering/arkit_gt_prep.py +154 -0
MaskClustering/arkit_prep.py +137 -0
MaskClustering/arkit_vggt_prep.py +154 -0
MaskClustering/bins_build_pkl.py +36 -0
MaskClustering/build_pkl.py +36 -0
MaskClustering/cluster_masks.py +83 -0
MaskClustering/configs/arkit_dust3r_posed.json +10 -0
MaskClustering/configs/arkit_gt.json +10 -0
MaskClustering/configs/arkit_gt_train.json +10 -0
MaskClustering/configs/arkit_vggt.json +10 -0
MaskClustering/configs/demo.json +10 -0
MaskClustering/configs/itw.json +10 -0
MaskClustering/configs/matterport3d.json +10 -0
MaskClustering/configs/scannet.json +10 -0
MaskClustering/configs/scannet_dust3r_posed_15.json +10 -0
MaskClustering/configs/scannet_dust3r_posed_25.json +10 -0
MaskClustering/configs/scannet_dust3r_posed_35.json +10 -0
MaskClustering/configs/scannet_dust3r_posed_35_bulat.json +10 -0
MaskClustering/configs/scannet_dust3r_posed_45.json +10 -0
MaskClustering/configs/scannet_dust3r_posed_45_andrey.json +10 -0
MaskClustering/configs/scannet_dust3r_posed_45_bulat.json +10 -0
MaskClustering/configs/scannet_dust3r_unposed_15.json +10 -0
MaskClustering/configs/scannet_dust3r_unposed_25.json +10 -0
MaskClustering/configs/scannet_dust3r_unposed_35.json +10 -0
MaskClustering/configs/scannet_dust3r_unposed_45.json +10 -0
MaskClustering/configs/scannetpp.json +10 -0
MaskClustering/configs/scannetpp_dust3r_filtered_depth.json +10 -0
MaskClustering/configs/scannetpp_dust3r_posed.json +10 -0
MaskClustering/configs/scannetpp_dust3r_unposed.json +10 -0
MaskClustering/configs/scannetpp_mapanything_posed.json +10 -0
MaskClustering/configs/scannetpp_v2_dust3r_posed.json +10 -0
MaskClustering/configs/scannetpp_v2_dust3r_unposed.json +10 -0
MaskClustering/configs/wild.json +10 -0
MaskClustering/dataset/demo.py +100 -0
MaskClustering/dataset/matterport.py +137 -0
MaskClustering/dataset/scannet.py +452 -0
MaskClustering/dataset/scannetpp.py +217 -0
MaskClustering/dense_masks.py +91 -0
MaskClustering/evaluation/__init__.py +1 -0
MaskClustering/evaluation/constants.py +78 -0
MaskClustering/evaluation/evaluate.py +420 -0
MaskClustering/evaluation/utils_3d.py +66 -0
MaskClustering/infer_single_scene.py +355 -0
MaskClustering/main.py +30 -0
MaskClustering/make_bins.py +54 -0
MaskClustering/make_pkl.py +392 -0
MaskClustering/make_pkl_arkit.py +349 -0
MaskClustering/make_pkl_conf.py +295 -0
MaskClustering/mask_predict.py +114 -0

MaskClustering DELETED Viewed

	@@ -1 +0,0 @@
1	- ../Indoor/MaskClustering/

MaskClustering/arkit_gt_prep.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import pickle
+from pathlib import Path
+import torch
+import cv2
+import numpy as np
+import os
+import open3d as o3d
+from tqdm import tqdm
+def process_scene(scene_params, target_depth_shape=None):
+    images = scene_params["image_files"]
+    poses = scene_params["poses"]
+    depths = scene_params["depths"]
+    Ks = scene_params["Ks"]
+    pts3d = scene_params["pts3d"]
+    im_confs = scene_params["im_conf"]
+    print(scene_params.keys())
+    im_shapes = scene_params["imshapes"]
+    im_shape = im_shapes[0]
+    image_hw = cv2.imread(images[0]).shape[:2]
+    image_scale = np.ones((3, 3))
+    image_scale[0] *= image_hw[1] / im_shape[1]
+    image_scale[1] *= image_hw[0] / im_shape[0]
+    if target_depth_shape is None:
+        target_depth_shape = image_hw
+    depth_scale = np.ones((3, 3))
+    depth_scale[0] *= target_depth_shape[1] / im_shape[1]
+    depth_scale[1] *= target_depth_shape[0] / im_shape[0]
+    data = [
+        {
+            "image_path": image,
+            "pose": pose,
+            "depth": cv2.resize(depth.numpy(), target_depth_shape[::-1], interpolation=cv2.INTER_LINEAR),
+            "source_K": K,
+            "image_K": K * image_scale,
+            "depth_K": K * depth_scale,
+            "pts3d": pts,
+            "im_conf": im_conf,
+            "im_shape_target": image_hw,
+            "depth_shape_target": target_depth_shape,
+            "shape_original": im_shape,
+        } for image, pose, depth, K, pts, im_conf in zip(images, poses, depths, Ks, pts3d, im_confs)
+    ]
+    return data
+def export_scene(scene_id, data, processing_args):
+    out_path = processing_args["out_dir"] / scene_id
+    K_color = data[0]["image_K"]
+    K_depth = data[0]["depth_K"]
+    def proc_k(K):
+        res = np.eye(4)
+        res[:3, :3] = K[:3, :3]
+        return res
+    K_color = proc_k(K_color)
+    K_depth = proc_k(K_depth)
+    intrinsics_path = out_path / "intrinsic"
+    intrinsics_path.mkdir(parents=True, exist_ok=True)
+    np.savetxt(intrinsics_path / "intrinsic_color.txt", K_color)
+    np.savetxt(intrinsics_path / "intrinsic_depth.txt", K_depth)
+    np.savetxt(intrinsics_path / "extrinsic_color.txt", np.eye(4))
+    np.savetxt(intrinsics_path / "extrinsic_depth.txt", np.eye(4))
+    for i, item in enumerate(data):
+        img_name = Path(item["image_path"]).stem
+        image_path = out_path / "color" / f"{img_name}.jpg"
+        image_path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            os.symlink(item["image_path"], image_path)
+        except FileExistsError:
+            pass
+        depth_path = out_path / "depth" / f"{img_name}.png"
+        depth_path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            os.symlink(item["depth_path"], depth_path)
+        except FileExistsError:
+            pass
+        pose_path = out_path / "pose" / f"{img_name}.txt"
+        pose_path.parent.mkdir(parents=True, exist_ok=True)
+        np.savetxt(pose_path, item["pose"])
+    try:
+        os.symlink(item["pts3d_path"], out_path / f"{scene_id}_vh_clean_2.ply")
+    except FileExistsError:
+        pass
+processing_args = {
+    "confidence_threshold": 1,
+    "voxel_size": 0.025,
+    "out_dir": Path("data/arkit_gt_train/processed"),
+}
+val_path = Path("../") / "OKNO/data/arkitscenes/arkitscenes_offline_infos_train.pkl"
+out_dir = Path("data/arkit_gt/processed")
+with open(val_path, "rb") as f:
+    data = pickle.load(f)
+data_list = data["data_list"]
+val_scenes = [scene["lidar_points"]["lidar_path"] for scene in data_list][:2500]
+def extract_name(item):
+    return item.split("_")[0]
+val_scenes = [extract_name(scene) for scene in val_scenes]
+scenes_path = Path("/workspace-SR006.nfs2/datasets/arkitscenes/offline_prepared_data/posed_images")
+pcd_path = Path("/workspace-SR006.nfs2/datasets/arkit_data/3dod/Training/")
+scene = val_scenes[0]
+num_images = 160
+for scene in tqdm(val_scenes):
+    try:
+        if (processing_args["out_dir"] / scene).exists():
+            continue
+        scene_path = scenes_path / scene
+        colors = sorted(scene_path.glob("*.jpg"))
+        if len(colors) > num_images:
+            indices = np.linspace(0, len(colors) - 1, num_images).astype(int)
+            colors = [colors[i] for i in indices]
+        depths = [a.parent / (a.stem + ".png") for a in colors]
+        poses = [a.parent / (a.stem + ".txt") for a in colors]
+        K = np.loadtxt(scene_path / "intrinsic.txt")
+        scene_params = [{
+            "image_path": image,
+            "pose": np.loadtxt(pose),
+            "depth_path": depth,
+            "source_K": K,
+            "image_K": K,
+            "depth_K": K,
+            "pts3d_path": pcd_path / scene / f"{scene}_3dod_mesh.ply",
+        } for image, depth, pose in zip(colors, depths, poses)]
+        export_scene(scene, scene_params, processing_args)
+    except Exception as e:
+        print(e)
+        continue

MaskClustering/arkit_prep.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import pickle
+from pathlib import Path
+import torch
+import cv2
+import numpy as np
+import os
+import open3d as o3d
+from tqdm import tqdm
+def process_scene(scene_params, target_depth_shape=None):
+    images = scene_params["image_files"]
+    poses = scene_params["poses"]
+    depths = scene_params["depths"]
+    Ks = scene_params["Ks"]
+    pts3d = scene_params["pts3d"]
+    im_confs = scene_params["im_conf"]
+    print(scene_params.keys())
+    im_shapes = scene_params["imshapes"]
+    im_shape = im_shapes[0]
+    image_hw = cv2.imread(images[0]).shape[:2]
+    image_scale = np.ones((3, 3))
+    image_scale[0] *= image_hw[1] / im_shape[1]
+    image_scale[1] *= image_hw[0] / im_shape[0]
+    if target_depth_shape is None:
+        target_depth_shape = image_hw
+    depth_scale = np.ones((3, 3))
+    depth_scale[0] *= target_depth_shape[1] / im_shape[1]
+    depth_scale[1] *= target_depth_shape[0] / im_shape[0]
+    data = [
+        {
+            "image_path": image,
+            "pose": pose,
+            "depth": cv2.resize(depth.numpy(), target_depth_shape[::-1], interpolation=cv2.INTER_LINEAR),
+            "source_K": K,
+            "image_K": K * image_scale,
+            "depth_K": K * depth_scale,
+            "pts3d": pts,
+            "im_conf": im_conf,
+            "im_shape_target": image_hw,
+            "depth_shape_target": target_depth_shape,
+            "shape_original": im_shape,
+        } for image, pose, depth, K, pts, im_conf in zip(images, poses, depths, Ks, pts3d, im_confs)
+    ]
+    return data
+def export_scene(scene_id, scene_params, processing_args):
+    data = process_scene(scene_params)
+    out_path = processing_args["out_dir"] / scene_id
+    K_color = data[0]["image_K"]
+    K_depth = data[0]["depth_K"]
+    def proc_k(K):
+        res = np.eye(4)
+        res[:3, :3] = K[:3, :3]
+        return res
+    K_color = proc_k(K_color)
+    K_depth = proc_k(K_depth)
+    intrinsics_path = out_path / "intrinsic"
+    intrinsics_path.mkdir(parents=True, exist_ok=True)
+    np.savetxt(intrinsics_path / "intrinsic_color.txt", K_color)
+    np.savetxt(intrinsics_path / "intrinsic_depth.txt", K_depth)
+    np.savetxt(intrinsics_path / "extrinsic_color.txt", np.eye(4))
+    np.savetxt(intrinsics_path / "extrinsic_depth.txt", np.eye(4))
+    all_pts = []
+    all_colors = []
+    for i, item in enumerate(data):
+        img_name = Path(item["image_path"]).stem
+        image_path = out_path / "color" / f"{img_name}.jpg"
+        image_path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            os.symlink(item["image_path"], image_path)
+        except FileExistsError:
+            pass
+        image = cv2.imread(item["image_path"])
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        image = cv2.resize(image, item['shape_original'][::-1]) / 255.
+        depth_path = out_path / "depth" / f"{img_name}.png"
+        depth_path.parent.mkdir(parents=True, exist_ok=True)
+        cv2.imwrite(depth_path, (item["depth"] * 1000).astype(np.uint16))
+        pose_path = out_path / "pose" / f"{img_name}.txt"
+        pose_path.parent.mkdir(parents=True, exist_ok=True)
+        np.savetxt(pose_path, item["pose"])
+        pts = item["pts3d"][item["im_conf"] > processing_args["confidence_threshold"]]
+        image = image[item["im_conf"] > processing_args["confidence_threshold"]]
+        all_pts.append(pts.view(-1, 3))
+        all_colors.append(image.reshape(-1, 3))
+    all_pts = np.concatenate(all_pts, axis=0)
+    all_colors = np.concatenate(all_colors, axis=0)
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(all_pts)
+    pcd.colors = o3d.utility.Vector3dVector(all_colors)
+    pcd = pcd.voxel_down_sample(voxel_size=processing_args["voxel_size"])
+    o3d.io.write_point_cloud(out_path / f"{scene_id}_vh_clean_2.ply", pcd)
+processing_args = {
+    "confidence_threshold": 1,
+    "voxel_size": 0.025,
+    "out_dir": Path("data/arkit_dust3r_posed/processed"),
+}
+val_path = Path("../") / "OKNO/data/arkitscenes/arkitscenes_offline_infos_val.pkl"
+out_dir = Path("data/arkit_dust3r_posed/processed")
+with open(val_path, "rb") as f:
+    data = pickle.load(f)
+data_list = data["data_list"]
+val_scenes = [scene["lidar_points"]["lidar_path"] for scene in data_list]
+def extract_name(item):
+    return item.split("_")[0]
+val_scenes = [extract_name(scene) for scene in val_scenes]
+dut3r_path = Path("/home/jovyan/users/lemeshko/Indoor/DUSt3R/res/arkit_posed")
+for scene in tqdm(val_scenes):
+    scene_path = dut3r_path / scene
+    scene_params = torch.load(scene_path / "scene_params.pt")
+    export_scene(scene, scene_params, processing_args)

MaskClustering/arkit_vggt_prep.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import pickle
+from pathlib import Path
+import torch
+import cv2
+import numpy as np
+import os
+import open3d as o3d
+from tqdm import tqdm
+from tqdm.contrib.concurrent import process_map, thread_map
+from rec_utils.datasets import ARKitDataset, VGGTDataset
+from rec_utils.aligner import build_aligner_1p1d
+from rec_utils.datasets.arkit.utils import rotate_image
+def process_scene(scene_params, target_depth_shape=None):
+    images = scene_params["image_files"]
+    poses = scene_params["poses"]
+    depths = scene_params["depths"]
+    Ks = scene_params["Ks"]
+    pts3d = scene_params["pts3d"]
+    im_confs = scene_params["im_conf"]
+    print(scene_params.keys())
+    im_shapes = scene_params["imshapes"]
+    im_shape = im_shapes[0]
+    image_hw = cv2.imread(images[0]).shape[:2]
+    image_scale = np.ones((3, 3))
+    image_scale[0] *= image_hw[1] / im_shape[1]
+    image_scale[1] *= image_hw[0] / im_shape[0]
+    if target_depth_shape is None:
+        target_depth_shape = image_hw
+    depth_scale = np.ones((3, 3))
+    depth_scale[0] *= target_depth_shape[1] / im_shape[1]
+    depth_scale[1] *= target_depth_shape[0] / im_shape[0]
+    data = [
+        {
+            "image_path": image,
+            "pose": pose,
+            "depth": cv2.resize(depth.numpy(), target_depth_shape[::-1], interpolation=cv2.INTER_LINEAR),
+            "source_K": K,
+            "image_K": K * image_scale,
+            "depth_K": K * depth_scale,
+            "pts3d": pts,
+            "im_conf": im_conf,
+            "im_shape_target": image_hw,
+            "depth_shape_target": target_depth_shape,
+            "shape_original": im_shape,
+        } for image, pose, depth, K, pts, im_conf in zip(images, poses, depths, Ks, pts3d, im_confs)
+    ]
+    return data
+def es_wrap(data):
+    try:
+        return export_scene(data)
+    except Exception as e:
+        print(e)
+        return None
+def export_scene(data):
+    vggt_dataset, arkit_dataset, i, out_dir, processing_args = data
+    vggt_scene = vggt_dataset[i]
+    scene_id = vggt_scene.id
+    arkit_scene = arkit_dataset[scene_id]
+    out_path = out_dir / scene_id
+    out_path.mkdir(parents=True, exist_ok=True)
+    # if os.path.exists(out_path / f'{vggt_scene.id}_vh_clean_2.ply'):
+    #     return
+    arkit_scene.frames = arkit_scene.frames[-100:]
+    aligner = build_aligner_1p1d(source_scene=vggt_scene, target_scene=arkit_scene)
+    scene = aligner.align(vggt_scene, inplace=True)
+    # scene = arkit_scene
+    K_color = arkit_scene[0].image_intrinsics
+    K_depth = arkit_scene[0].depth_intrinsics
+    intrinsics_path = out_path / "intrinsic"
+    intrinsics_path.mkdir(parents=True, exist_ok=True)
+    color_path = out_path / "color"
+    color_path.mkdir(parents=True, exist_ok=True)
+    depth_path = out_path / "depth"
+    depth_path.mkdir(parents=True, exist_ok=True)
+    pose_path = out_path / "pose"
+    pose_path.mkdir(parents=True, exist_ok=True)
+    np.savetxt(intrinsics_path / "intrinsic_color.txt", K_color)
+    np.savetxt(intrinsics_path / "intrinsic_depth.txt", K_depth)
+    np.savetxt(intrinsics_path / "extrinsic_color.txt", np.eye(4))
+    np.savetxt(intrinsics_path / "extrinsic_depth.txt", np.eye(4))
+    tsdffusion = o3d.pipelines.integration.ScalableTSDFVolume(
+        voxel_length=0.025,
+        sdf_trunc=0.1,
+        color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8
+    )
+    print("rotation angle", arkit_scene.rotation_angle)
+    for frame in tqdm(scene.frames):
+        # image = rotate_image(frame.image, arkit_scene.rotation_angle)
+        # # image = frame.image
+        # h, w = image.shape[:2]
+        # depth = frame.depth.astype(np.float32)
+        # depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_LINEAR)
+        # color = o3d.geometry.Image(image)
+        np.savetxt(str(pose_path / f'{frame.frame_id}.txt'), frame.pose)
+    #     cv2.imwrite(str(color_path / f'{frame.frame_id}.jpg'), image[..., ::-1])
+    #     cv2.imwrite(str(depth_path / f'{frame.frame_id}.png'), (depth * 1000.).astype(np.uint16))
+    #     fx, fy = K_color[0, 0], K_color[1, 1]
+    #     cx, cy = K_color[0, 2], K_color[1, 2]
+    #     dh, dw = depth.shape
+    #     depth_o3d = o3d.geometry.Image(depth)
+    #     rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
+    #         color, depth_o3d, depth_trunc=10., convert_rgb_to_intensity=False, depth_scale=1.0
+    #     )
+    #     camera_o3d = o3d.camera.PinholeCameraIntrinsic(w, h, fx, fy, cx, cy)
+    #     tsdffusion.integrate(
+    #         rgbd, camera_o3d,
+    #         np.linalg.inv(frame.pose),
+    #     )
+    # pc = tsdffusion.extract_point_cloud()
+    # pc.voxel_down_sample(voxel_size=0.025)
+    # o3d.io.write_point_cloud(str(out_path / f'{scene.id}_vh_clean_2.ply'), pc)
+vggt_dataset = VGGTDataset("/home/jovyan/users/bulat/workspace/3drec/vggt/output/arkit_new/")
+arkit_dataset = ARKitDataset("/workspace-SR006.nfs2/datasets/arkitscenes/offline_prepared_data/posed_images/")
+processing_args = {
+    "voxel_size": 0.025,
+    "out_dir": Path("data/arkit_dust3r_posed/processed"),
+}
+val_path = Path("../") / "OKNO/data/arkitscenes/arkitscenes_offline_infos_val.pkl"
+out_dir = Path("data/arkit_vggt/processed")
+with open(val_path, "rb") as f:
+    data = pickle.load(f)
+data_list = data["data_list"]
+val_scenes = [scene["lidar_points"]["lidar_path"] for scene in data_list]
+def extract_name(item):
+    return item.split("_")[0]
+val_scenes = [extract_name(scene) for scene in val_scenes]
+data = [(vggt_dataset, arkit_dataset, i, out_dir, processing_args) for i in range(len(vggt_dataset))]
+thread_map(es_wrap, data, chunksize=128)
+    # break

MaskClustering/bins_build_pkl.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import os
+import pickle
+from tqdm.auto import tqdm
+from copy import deepcopy
+if __name__ == "__main__":
+    pred_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/arkitscenes/points_vggt/"
+    out_pkl_path = \
+        "arkit_vggt_val_subset.pkl"
+    gt_pkl_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/arkitscenes/arkitscenes_offline_infos_val.pkl"
+    with open(gt_pkl_path, 'rb') as file:
+        gt_data = pickle.load(file)
+    new_data = {"metainfo": gt_data["metainfo"]}
+    data_list = []
+    picked_scenes = [scene for scene in os.listdir(pred_path)]
+    num = 0
+    for scene in tqdm(gt_data['data_list'][:10]):
+        scene_name = scene['lidar_points']['lidar_path']
+        if scene_name not in picked_scenes:
+            print(f"Scene {scene_name} not found in {pred_path}")
+            continue
+        num += 1
+        tmp_scene = deepcopy(scene)
+        data_list.append(tmp_scene)
+    print(f"Number of scenes: {num}")
+    new_data['data_list'] = data_list
+    with open(out_pkl_path, 'wb') as f:
+        pickle.dump(new_data, f)

MaskClustering/build_pkl.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import os
+import pickle
+from tqdm.auto import tqdm
+from copy import deepcopy
+if __name__ == "__main__":
+    pred_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/MaskClustering/data/prediction/arkit_gt_train"
+    out_pkl_path = \
+        "arkit_gt_train.pkl"
+    gt_pkl_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/arkitscenes/arkitscenes_offline_infos_train.pkl"
+    with open(gt_pkl_path, 'rb') as file:
+        gt_data = pickle.load(file)
+    new_data = {"metainfo": gt_data["metainfo"]}
+    data_list = []
+    picked_scenes = [scene[:-4] for scene in os.listdir(pred_path)]
+    num = 0
+    for scene in tqdm(gt_data['data_list']):
+        scene_name = scene['lidar_points']['lidar_path'].split("_")[0]
+        if scene_name not in picked_scenes:
+            print(f"Scene {scene_name} not found in {pred_path}")
+            continue
+        num += 1
+        tmp_scene = deepcopy(scene)
+        data_list.append(tmp_scene)
+    print(f"Number of scenes: {num}")
+    new_data['data_list'] = data_list
+    with open(out_pkl_path, 'wb') as f:
+        pickle.dump(new_data, f)

MaskClustering/cluster_masks.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import numpy as np
+import os
+import cv2
+from pathlib import Path
+import trimesh as tm
+from sklearn.neighbors import KDTree
+from tqdm import tqdm
+from tqdm.contrib.concurrent import thread_map
+from sklearn.cluster import DBSCAN
+def load_scan(pcd_path):
+    pcd_data = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, 6)[:, :3]
+    return pcd_data
+def process_scene(data):
+    scene_id, exp_name = data
+    pred_path = Path(f"data/prediction/scannet/click_sam/{scene_id}.npz")
+    out_path = Path(f"data/prediction/scannet/{exp_name}/{scene_id}.npz")
+    base_path = Path(f"/home/jovyan/users/lemeshko/scripts/gsam_result/yolo/{scene_id}")
+    source_path = Path(f"/home/jovyan/users/kolodiazhnyi/data/scannet/posed_images/{scene_id}")
+    scan_path = Path(f"/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/scannet200/points/{scene_id}.bin")
+    info_path = base_path / "infos.npy"
+    # if out_path.exists():
+    #     return
+    vertices = load_scan(scan_path)
+    info_data = np.load(info_path, allow_pickle=True).item()
+    base_data = np.load(pred_path, allow_pickle=True)
+    total_points_masks = base_data['pred_masks'].T
+    for i, mask in enumerate(total_points_masks):
+        mask = mask.astype(bool)
+        points = vertices[mask]
+        db = DBSCAN(eps=0.3, min_samples=10)
+        if len(points) == 0:
+            continue
+        labels = db.fit_predict(points)
+        # labels = db.labels_
+        n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
+        if (labels == -1).all():
+            continue
+        biggest_cluster_ind = np.argmax(np.unique(labels[labels != -1], return_counts=True)[1])
+        res_mask = (labels == biggest_cluster_ind) & (labels != -1)
+        # print(f"{labels.shape} -> {res_mask.sum()}")
+        new_mask = np.zeros_like(mask)
+        new_mask[mask] = res_mask
+        total_points_masks[i] = new_mask
+    new_data = {
+        k: v for k, v in base_data.items()
+    }
+    new_data['pred_masks'] = total_points_masks.T
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    # vs = []
+    # cs = []
+    # for i in range(new_data['pred_masks'].shape[1]):
+    #     os.makedirs(f"pred_masks", exist_ok=True)
+    #     v = vertices[new_data['pred_masks'][:, i]]
+    #     c = np.random.rand(3)
+    #     c = np.repeat(c[np.newaxis, :], len(v), axis=0)
+    #     vs.append(v)
+    #     cs.append(c)
+    # tm.PointCloud(np.concatenate(vs, axis=0), colors=np.concatenate(cs, axis=0)).export(f"pred_masks/{scene_id}_mask.ply")
+    print("uniques", np.unique(new_data['pred_masks'].sum(1)), [[k, v.shape] for k, v in new_data.items()])
+    np.savez(out_path, **new_data)
+if __name__ == "__main__":
+    exp_name = "cluster_filtering_click_sam"
+    # scenes = np.loadtxt("/home/jovyan/users/bulat/workspace/3drec/Indoor/MaskClustering/splits/scannet.txt", dtype=str)
+    scenes = ["scene0011_00"]
+    data = [(scene, exp_name) for scene in scenes]
+    total_points_masks = thread_map(process_scene, data, chunksize=20)

MaskClustering/configs/arkit_dust3r_posed.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "arkit_dust3r_posed",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/arkit_gt.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "arkit_gt",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/arkit_gt_train.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "arkit_gt_train",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/arkit_vggt.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "arkit_vggt",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/demo.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "demo",
+    "cropformer_path": "/raid/miyan/ckpt/Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/itw.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "itw",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/matterport3d.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "matterport3d",
+    "cropformer_path": "/raid/miyan/ckpt/Mask2Former_hornet_3x_576d0b.pth",
+    "step": 1
+}

MaskClustering/configs/scannet.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannet_dust3r_posed_15.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet_dust3r_posed_15",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannet_dust3r_posed_25.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet_dust3r_posed_25",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannet_dust3r_posed_35.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet_dust3r_posed_35",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannet_dust3r_posed_35_bulat.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet_dust3r_posed_35_bulat",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannet_dust3r_posed_45.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet_dust3r_posed_45",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannet_dust3r_posed_45_andrey.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet_dust3r_posed_45_andrey",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannet_dust3r_posed_45_bulat.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet_dust3r_posed_45_bulat",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannet_dust3r_unposed_15.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet_dust3r_unposed_15",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannet_dust3r_unposed_25.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet_dust3r_unposed_25",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannet_dust3r_unposed_35.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet_dust3r_unposed_35",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannet_dust3r_unposed_45.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "scannet_dust3r_unposed_45",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/configs/scannetpp.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.4,
+    "undersegment_filter_threshold": 0.2,
+    "view_consensus_threshold": 1,
+    "contained_threshold": 0.9,
+    "point_filter_threshold": 0.7,
+    "dataset": "scannetpp",
+    "cropformer_path": "/raid/miyan/ckpt/Mask2Former_hornet_3x_576d0b.pth",
+    "step": 2
+}

MaskClustering/configs/scannetpp_dust3r_filtered_depth.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.4,
+    "undersegment_filter_threshold": 0.2,
+    "view_consensus_threshold": 1,
+    "contained_threshold": 0.9,
+    "point_filter_threshold": 0.7,
+    "dataset": "scannetpp_dust3r_filtered_depth",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 2
+}

MaskClustering/configs/scannetpp_dust3r_posed.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.4,
+    "undersegment_filter_threshold": 0.2,
+    "view_consensus_threshold": 1,
+    "contained_threshold": 0.9,
+    "point_filter_threshold": 0.7,
+    "dataset": "scannetpp_dust3r_posed",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 2
+}

MaskClustering/configs/scannetpp_dust3r_unposed.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.4,
+    "undersegment_filter_threshold": 0.2,
+    "view_consensus_threshold": 1,
+    "contained_threshold": 0.9,
+    "point_filter_threshold": 0.7,
+    "dataset": "scannetpp_dust3r_unposed",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 2
+}

MaskClustering/configs/scannetpp_mapanything_posed.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.4,
+    "undersegment_filter_threshold": 0.2,
+    "view_consensus_threshold": 1,
+    "contained_threshold": 0.9,
+    "point_filter_threshold": 0.7,
+    "dataset": "scannetpp_mapanything_posed",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 2
+}

MaskClustering/configs/scannetpp_v2_dust3r_posed.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.4,
+    "undersegment_filter_threshold": 0.2,
+    "view_consensus_threshold": 1,
+    "contained_threshold": 0.9,
+    "point_filter_threshold": 0.7,
+    "dataset": "scannetpp_v2_dust3r_posed",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 2
+}

MaskClustering/configs/scannetpp_v2_dust3r_unposed.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.4,
+    "undersegment_filter_threshold": 0.2,
+    "view_consensus_threshold": 1,
+    "contained_threshold": 0.9,
+    "point_filter_threshold": 0.7,
+    "dataset": "scannetpp_v2_dust3r_unposed",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 2
+}

MaskClustering/configs/wild.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "mask_visible_threshold": 0.3,
+    "undersegment_filter_threshold": 0.3,
+    "view_consensus_threshold": 0.9,
+    "contained_threshold": 0.8,
+    "point_filter_threshold": 0.5,
+    "dataset": "wild",
+    "cropformer_path": "Mask2Former_hornet_3x_576d0b.pth",
+    "step": 10
+}

MaskClustering/dataset/demo.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import open3d as o3d
+import numpy as np
+import os
+import cv2
+from evaluation.constants import SCANNET_LABELS, SCANNET_IDS
+class DemoDataset:
+    def __init__(self, seq_name) -> None:
+        self.seq_name = seq_name
+        self.root = f'./data/demo/{seq_name}'
+        self.rgb_dir = f'{self.root}/color_640'
+        self.depth_dir = f'{self.root}/depth'
+        self.segmentation_dir = f'{self.root}/output/mask'
+        self.object_dict_dir = f'{self.root}/output/object'
+        self.point_cloud_path = f'{self.root}/{seq_name}_vh_clean_2.ply'
+        self.mesh_path = self.point_cloud_path
+        self.extrinsics_dir = f'{self.root}/pose'
+        self.depth_scale = 1000.0
+        self.image_size = (640, 480)
+    def get_frame_list(self, stride):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0]))
+        end = int(image_list[-1].split('.')[0]) + 1
+        frame_id_list = np.arange(0, end, stride)
+        return list(frame_id_list)
+    def get_intrinsics(self, frame_id):
+        intrinsic_path = f'{self.root}/intrinsic_640.txt'
+        intrinsics = np.loadtxt(intrinsic_path)
+        intrinisc_cam_parameters = o3d.camera.PinholeCameraIntrinsic()
+        intrinisc_cam_parameters.set_intrinsics(640, 480, intrinsics[0, 0], intrinsics[1, 1], intrinsics[0, 2], intrinsics[1, 2])
+        return intrinisc_cam_parameters
+    def get_extrinsic(self, frame_id):
+        pose_path = os.path.join(self.extrinsics_dir, str(frame_id) + '.txt')
+        pose = np.loadtxt(pose_path)
+        return pose
+    def get_depth(self, frame_id):
+        depth_path = os.path.join(self.depth_dir, str(frame_id) + '.png')
+        depth = cv2.imread(depth_path, -1)
+        depth = depth / self.depth_scale
+        depth = depth.astype(np.float32)
+        return depth
+    def get_rgb(self, frame_id, change_color=True):
+        rgb_path = os.path.join(self.rgb_dir, str(frame_id) + '.jpg')
+        rgb = cv2.imread(rgb_path)
+        if change_color:
+            rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
+        return rgb
+    def get_segmentation(self, frame_id, align_with_depth=False):
+        segmentation_path = os.path.join(self.segmentation_dir, f'{frame_id}.png')
+        if not os.path.exists(segmentation_path):
+            assert False, f"Segmentation not found: {segmentation_path}"
+        segmentation = cv2.imread(segmentation_path, cv2.IMREAD_UNCHANGED)
+        return segmentation
+    def get_frame_path(self, frame_id):
+        rgb_path = os.path.join(self.rgb_dir, str(frame_id) + '.jpg')
+        segmentation_path = os.path.join(self.segmentation_dir, f'{frame_id}.png')
+        return rgb_path, segmentation_path
+    def get_label_features(self):
+        label_features_dict = np.load(f'data/text_features/scannet.npy', allow_pickle=True).item()
+        return label_features_dict
+    def get_scene_points(self):
+        mesh = o3d.io.read_point_cloud(self.point_cloud_path)
+        vertices = np.asarray(mesh.points)
+        return vertices
+    def get_label_id(self):
+        self.class_id = SCANNET_IDS
+        self.class_label = SCANNET_LABELS
+        self.label2id = {}
+        self.id2label = {}
+        for label, id in zip(self.class_label, self.class_id):
+            self.label2id[label] = id
+            self.id2label[id] = label
+        return self.label2id, self.id2label

MaskClustering/dataset/matterport.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import open3d as o3d
+import numpy as np
+import os
+import cv2
+from evaluation.constants import MATTERPORT_LABELS, MATTERPORT_IDS
+class MatterportDataset:
+    def __init__(self, seq_name) -> None:
+        self.seq_name = seq_name
+        self.root = f'./data/matterport3d/scans/{seq_name}/{seq_name}'
+        self.rgb_dir = f'{self.root}/undistorted_color_images'
+        self.depth_dir = f'{self.root}/undistorted_depth_images'
+        self.cam_param_dir = f'{self.root}/undistorted_camera_parameters/{seq_name}.conf'
+        self.point_cloud_path = f'{self.root}/house_segmentations/{seq_name}.ply'
+        self.mesh_path = self.point_cloud_path
+        self.rgb_names, self.depth_names, self.intrinsics, self.extrinsics = \
+            self._obtain_intr_extr()
+        # output
+        self.segmentation_dir = f'{self.root}/output/mask/'
+        self.object_dict_dir = f'{self.root}/output/object'
+        self.depth_scale = 4000.0 # (0.25mm per unit) 1u = 1/4000 m
+        self.image_size = (1280, 1024)
+    def get_frame_list(self, step):
+        image_list = [os.path.join(self.rgb_dir, rgb_name) for rgb_name in self.rgb_names]
+        end = len(image_list)
+        frame_id_list = np.arange(0, end, step)
+        return list(frame_id_list)
+    def _obtain_intr_extr(self):
+        '''Obtain the intrinsic and extrinsic parameters of Matterport3D.'''
+        with open(self.cam_param_dir, 'r') as file:
+            lines = file.readlines()
+        def remove_items(test_list, item):
+            return [i for i in test_list if i != item]
+        intrinsics = []
+        extrinsics = []
+        img_names = []
+        depth_names = []
+        for i, line in enumerate(lines):
+            line = line.strip()
+            if 'intrinsics_matrix' in line:
+                line = line.replace('intrinsics_matrix ', '')
+                line = line.split(' ')
+                line = remove_items(line, '')
+                if len(line) !=9:
+                    print('[WARN] something wrong at {}'.format(i))
+                intrinsic = np.asarray(line).astype(float).reshape(3, 3)
+                intrinsics.extend([intrinsic, intrinsic, intrinsic, intrinsic, intrinsic, intrinsic])
+            elif 'scan' in line:
+                line = line.split(' ')
+                img_names.append(line[2])
+                depth_names.append(line[1])
+                line = remove_items(line, '')[3:]
+                if len(line) != 16:
+                    print('[WARN] something wrong at {}'.format(i))
+                extrinsic = np.asarray(line).astype(float).reshape(4, 4)
+                extrinsic[:3, 1] *= -1.0 # gl2cv
+                extrinsic[:3, 2] *= -1.0
+                extrinsics.append(extrinsic)
+        intrinsics = np.stack(intrinsics, axis=0)
+        extrinsics = np.stack(extrinsics, axis=0)
+        img_names = np.asarray(img_names)
+        return img_names, depth_names, intrinsics, extrinsics
+    def get_intrinsics(self, frame_id):
+        K = self.intrinsics[frame_id]
+        intrinisc_cam_parameters = o3d.camera.PinholeCameraIntrinsic()
+        intrinisc_cam_parameters.set_intrinsics(self.image_size[0], self.image_size[1], K[0, 0], K[1, 1], K[0, 2], K[1, 2])
+        return intrinisc_cam_parameters
+    def get_extrinsic(self, frame_id):
+        return self.extrinsics[frame_id]
+    def get_depth(self, frame_id):
+        depth_path = os.path.join(self.depth_dir, self.depth_names[frame_id])
+        depth = cv2.imread(depth_path, -1).astype(np.uint16)
+        depth = depth / self.depth_scale
+        depth = depth.astype(np.float32)
+        return depth
+    def get_rgb(self, frame_id, change_color=True):
+        rgb = cv2.imread(os.path.join(self.rgb_dir, self.rgb_names[frame_id]))
+        if change_color:
+            rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
+        return rgb
+    def get_segmentation(self, frame_id, align_with_depth=False):
+        frame_name = self.rgb_names[frame_id][:-4]
+        segmentation_path = os.path.join(self.segmentation_dir, f'{frame_name}.png')
+        if not os.path.exists(segmentation_path):
+            assert False, f"Segmentation not found: {segmentation_path}"
+        segmentation = cv2.imread(segmentation_path, cv2.IMREAD_UNCHANGED)
+        return segmentation
+    def get_frame_path(self, frame_id):
+        rgb_path = os.path.join(self.rgb_dir, self.rgb_names[frame_id])
+        frame_name = self.rgb_names[frame_id][:-4]
+        segmentation_path = os.path.join(self.segmentation_dir, f'{frame_name}.png')
+        return rgb_path, segmentation_path
+    def get_label_features(self):
+        label_features_dict = np.load(f'data/text_features/matterport3d.npy', allow_pickle=True).item()
+        return label_features_dict
+    def get_scene_points(self):
+        mesh = o3d.io.read_point_cloud(self.point_cloud_path)
+        vertices = np.asarray(mesh.points)
+        return vertices
+    def get_label_id(self):
+        self.label2id = {}
+        self.id2label = {}
+        for label, id in zip(MATTERPORT_LABELS, MATTERPORT_IDS):
+            self.label2id[label] = id
+            self.id2label[id] = label
+        return self.label2id, self.id2label

MaskClustering/dataset/scannet.py ADDED Viewed

	@@ -0,0 +1,452 @@

+import open3d as o3d
+import numpy as np
+import os
+import cv2
+from evaluation.constants import SCANNET_LABELS, SCANNET_IDS, SCANNET18_LABELS, SCANNET18_IDS, SCANNETPP84_IDS, SCANNETPP84_LABELS, SCANNET20_LABELS, SCANNET20_IDS, ARKIT_LABELS, ARKIT_IDS
+class ScanNetDataset:
+    def __init__(self, seq_name, root='data/scannet', use_templates=False) -> None:
+        self.seq_name = seq_name
+        self.use_templates = use_templates
+        self.root = os.path.join(root, 'processed', seq_name)
+        self.rgb_dir = f'{self.root}/color'
+        self.depth_dir = f'{self.root}/depth'
+        self.segmentation_dir = f'{self.root}/output/mask'
+        self.object_dict_dir = f'{self.root}/output/object'
+        self.point_cloud_path = f'{self.root}/{seq_name}_vh_clean_2.ply'
+        self.mesh_path = self.point_cloud_path
+        self.extrinsics_dir = f'{self.root}/pose'
+        self.intrinsic_dir = f'{self.root}/intrinsic'
+        self.label_features_dict = None
+        self.depth_scale = 1000.0
+        self.image_size = self.get_image_size()
+        self.depth_size = self.get_depth_shape()
+    def get_frame_list(self, stride):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0]))
+        end = int(image_list[-1].split('.')[0]) + 1
+        frame_id_list = [int(a.split('.')[0]) for a in image_list]
+        return list(frame_id_list)
+    def get_image_size(self):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0]))
+        image_path = os.path.join(self.rgb_dir, image_list[0])
+        image = cv2.imread(image_path)
+        return image.shape[:2][::-1]
+    def get_depth_shape(self):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0]))
+        depth_path = os.path.join(self.depth_dir, f"{image_list[0].split('.')[0]}.png")
+        depth = cv2.imread(depth_path, -1)
+        return depth.shape[:2][::-1]
+    def get_intrinsics(self, frame_id):
+        intrinsic_path = f'{self.intrinsic_dir}/intrinsic_depth.txt'
+        intrinsics = np.loadtxt(intrinsic_path)
+        intrinisc_cam_parameters = o3d.camera.PinholeCameraIntrinsic()
+        intrinisc_cam_parameters.set_intrinsics(self.image_size[0], self.image_size[1], intrinsics[0, 0], intrinsics[1, 1], intrinsics[0, 2], intrinsics[1, 2])
+        return intrinisc_cam_parameters
+    def get_extrinsic(self, frame_id):
+        pose_path = os.path.join(self.extrinsics_dir, str(frame_id) + '.txt')
+        pose = np.loadtxt(pose_path)
+        return pose
+    def get_depth(self, frame_id):
+        depth_path = os.path.join(self.depth_dir, str(frame_id) + '.png')
+        depth = cv2.imread(depth_path, -1)
+        depth = depth / self.depth_scale
+        depth = depth.astype(np.float32)
+        return depth
+    def get_rgb(self, frame_id, change_color=True):
+        rgb_path = os.path.join(self.rgb_dir, str(frame_id) + '.jpg')
+        rgb = cv2.imread(rgb_path)
+        if change_color:
+            rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
+        return rgb
+    def get_segmentation(self, frame_id, align_with_depth=False):
+        segmentation_path = os.path.join(self.segmentation_dir, f'{frame_id}.png')
+        if not os.path.exists(segmentation_path):
+            assert False, f"Segmentation not found: {segmentation_path}"
+        segmentation = cv2.imread(segmentation_path, cv2.IMREAD_UNCHANGED)
+        if align_with_depth:
+            segmentation = cv2.resize(segmentation, self.depth_size, interpolation=cv2.INTER_NEAREST)
+        return segmentation
+    def get_frame_path(self, frame_id):
+        rgb_path = os.path.join(self.rgb_dir, str(frame_id) + '.jpg')
+        segmentation_path = os.path.join(self.segmentation_dir, f'{frame_id}.png')
+        return rgb_path, segmentation_path
+    def get_label_features(self):
+        if self.label_features_dict is None:
+            if self.use_templates:
+                label_features_dict = np.load(f'data/text_features/scannet_templates.npy', allow_pickle=True).item()
+            else:
+                label_features_dict = np.load(f'data/text_features/scannet.npy', allow_pickle=True).item()
+            self.label_features_dict = label_features_dict
+        return self.label_features_dict
+    def get_scene_points(self):
+        mesh = o3d.io.read_point_cloud(self.point_cloud_path)
+        vertices = np.asarray(mesh.points)
+        return vertices
+    def get_label_id(self):
+        self.class_id = SCANNET_IDS
+        self.class_label = SCANNET_LABELS
+        self.label2id = {}
+        self.id2label = {}
+        for label, id in zip(self.class_label, self.class_id):
+            self.label2id[label] = id
+            self.id2label[id] = label
+        return self.label2id, self.id2label
+class ARKitDataset(ScanNetDataset):
+    def __init__(self, seq_name, root='data/arkit_dust3r_posed'):
+        super().__init__(seq_name, root)
+        self.image_size = self.get_image_size()
+    def get_image_size(self):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0]))
+        image_path = os.path.join(self.rgb_dir, image_list[0])
+        image = cv2.imread(image_path)
+        return image.shape[:2][::-1]
+    def get_frame_list(self, stride):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0]))
+        end = int(image_list[-1].split('.')[0]) + 1
+        frame_id_list = [a.split('.')[0] for a in image_list]
+        return list(frame_id_list)
+    def get_label_id(self):
+        self.class_id = ARKIT_IDS
+        self.class_label = ARKIT_LABELS
+        self.label2id = {}
+        self.id2label = {}
+        for label, id in zip(self.class_label, self.class_id):
+            self.label2id[label] = id
+            self.id2label[id] = label
+        return self.label2id, self.id2label
+    def get_label_features(self):
+        label_features_dict = np.load(f'data/text_features/arkit.npy', allow_pickle=True).item()
+        return label_features_dict
+class ITWDataset(ARKitDataset):
+    def get_image_size(self):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('_')[0]))
+        image_path = os.path.join(self.rgb_dir, image_list[0])
+        image = cv2.imread(image_path)
+        return image.shape[:2][::-1]
+    def get_depth_shape(self):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('_')[0]))
+        depth_path = os.path.join(self.depth_dir, f"{image_list[0].split('.')[0]}.png")
+        depth = cv2.imread(depth_path, -1)
+        return depth.shape[:2][::-1]
+    def get_frame_list(self, stride):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('_')[0]))
+        frame_id_list = [a.split('.')[0] for a in image_list]
+        return list(frame_id_list)
+    def get_label_features(self):
+        label_features_dict = np.load(f'{self.root}/text_features.npy', allow_pickle=True).item()
+        return label_features_dict
+    def get_label_id(self):
+        text_features = self.get_label_features()
+        self.class_label = list(text_features.keys())
+        self.class_id = list(range(len(self.class_label)))
+        self.label2id = {}
+        self.id2label = {}
+        for label, id in zip(self.class_label, self.class_id):
+            self.label2id[label] = id
+            self.id2label[id] = label
+        return self.label2id, self.id2label
+class WildDataset(ARKitDataset):
+    def __init__(self, seq_name, root):
+        self.root = os.path.join(root, seq_name)
+        self.rgb_dir = f'{self.root}/images'
+        self.depth_dir = f'{self.root}/depth'
+        self.segmentation_dir = f'{self.root}/output/mask'
+        self.object_dict_dir = f'{self.root}/output/object'
+        self.point_cloud_path = f'{self.root}/point_cloud.ply'
+        self.mesh_path = self.point_cloud_path
+        self.extrinsics_dir = f'{self.root}/pose'
+        self.intrinsic_dir = f'{self.root}/intrinsic'
+        self.label_features_dict = None
+        self.depth_scale = 1000.0
+        self.image_size = self.get_depth_shape()
+        self.depth_size = self.get_depth_shape()
+    def get_label_features(self):
+        label_features_dict = np.load(f'{self.root}/text_features.npy', allow_pickle=True).item()
+        return label_features_dict
+    def get_segmentation(self, frame_id, align_with_depth=False):
+        segmentation_path = os.path.join(self.segmentation_dir, f'{frame_id}.png')
+        if not os.path.exists(segmentation_path):
+            assert False, f"Segmentation not found: {segmentation_path}"
+        segmentation = cv2.imread(segmentation_path, cv2.IMREAD_UNCHANGED)
+        segmentation = cv2.resize(segmentation, self.depth_size, interpolation=cv2.INTER_NEAREST)
+        return segmentation
+    def get_label_id(self):
+        text_features = self.get_label_features()
+        self.class_label = list(text_features.keys())
+        self.class_id = list(range(len(self.class_label)))
+        self.label2id = {}
+        self.id2label = {}
+        for label, id in zip(self.class_label, self.class_id):
+            self.label2id[label] = id
+            self.id2label[id] = label
+        return self.label2id, self.id2label
+class ScannetPP2Dataset(ScanNetDataset):
+    def __init__(self, seq_name, root='data/scannetpp_dust3r_posed'):
+        super().__init__(seq_name, root)
+        self.image_size = self.get_image_size()
+        self.depth_size = self.get_depth_shape()
+        self.point_cloud_path = f'{self.root}/{seq_name}.ply'
+    def get_image_size(self):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0].split('_')[1]))
+        image_path = os.path.join(self.rgb_dir, image_list[0])
+        image = cv2.imread(image_path)
+        return image.shape[:2][::-1]
+    def get_depth_shape(self):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0].split('_')[1]))
+        depth_path = os.path.join(self.depth_dir, f"{image_list[0].split('.')[0]}.png")
+        depth = cv2.imread(depth_path, -1)
+        return depth.shape[:2][::-1]
+    def get_frame_list(self, stride):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0].split('_')[1]))
+        frame_id_list = [a.split('.')[0] for a in image_list]
+        return list(frame_id_list)
+    def get_segmentation(self, frame_id, align_with_depth=False):
+        segmentation_path = os.path.join(self.segmentation_dir, f'{frame_id}.png')
+        if not os.path.exists(segmentation_path):
+            assert False, f"Segmentation not found: {segmentation_path}"
+        segmentation = cv2.imread(segmentation_path, cv2.IMREAD_UNCHANGED)
+        segmentation = cv2.resize(segmentation, self.depth_size, interpolation=cv2.INTER_NEAREST)
+        return segmentation
+    def get_label_id(self):
+        self.class_id = SCANNETPP84_IDS
+        self.class_label = SCANNETPP84_LABELS
+        self.label2id = {}
+        self.id2label = {}
+        for label, id in zip(self.class_label, self.class_id):
+            self.label2id[label] = id
+            self.id2label[id] = label
+        return self.label2id, self.id2label
+    def get_label_features(self):
+        label_features_dict = np.load(f'data/text_features/scannetpp84.npy', allow_pickle=True).item()
+        return label_features_dict
+    def get_depth(self, frame_id):
+        depth_path = os.path.join(self.depth_dir, str(frame_id) + '.png')
+        depth = cv2.imread(depth_path, -1)
+        depth = depth / self.depth_scale
+        depth = depth.astype(np.float32)
+        return depth
+    def get_intrinsics(self, frame_id):
+        intrinsic_path = f'{self.intrinsic_dir}/intrinsic_depth.txt'
+        intrinsics = np.loadtxt(intrinsic_path)
+        intrinisc_cam_parameters = o3d.camera.PinholeCameraIntrinsic()
+        intrinisc_cam_parameters.set_intrinsics(self.image_size[0], self.image_size[1], intrinsics[0, 0], intrinsics[1, 1], intrinsics[0, 2], intrinsics[1, 2])
+        return intrinisc_cam_parameters
+class ScanNet18Dataset:
+    def __init__(self, seq_name, root='data/scannet') -> None:
+        self.seq_name = seq_name
+        self.root = os.path.join(root, 'processed', seq_name)
+        self.rgb_dir = f'{self.root}/color'
+        self.depth_dir = f'{self.root}/depth'
+        self.segmentation_dir = f'{self.root}/output/mask'
+        self.object_dict_dir = f'{self.root}/output/object'
+        self.point_cloud_path = f'{self.root}/{seq_name}.ply'
+        self.mesh_path = self.point_cloud_path
+        self.extrinsics_dir = f'{self.root}/pose'
+        self.intrinsic_dir = f'{self.root}/intrinsic'
+        self.depth_scale = 1000.0
+        self.image_size = self.get_image_size()
+        self.depth_size = self.get_depth_shape()
+    def get_frame_list(self, stride):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0]))
+        end = int(image_list[-1].split('.')[0]) + 1
+        frame_id_list = [a.split('.')[0] for a in image_list]
+        return list(frame_id_list)
+    def get_image_size(self):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0]))
+        image_path = os.path.join(self.rgb_dir, image_list[0])
+        image = cv2.imread(image_path)
+        return image.shape[:2][::-1]
+    def get_depth_shape(self):
+        image_list = os.listdir(self.rgb_dir)
+        image_list = sorted(image_list, key=lambda x: int(x.split('.')[0]))
+        depth_path = os.path.join(self.depth_dir, f"{image_list[0].split('.')[0]}.png")
+        depth = cv2.imread(depth_path, -1)
+        return depth.shape[:2][::-1]
+    def get_intrinsics(self, frame_id):
+        intrinsic_path = f'{self.intrinsic_dir}/intrinsic_depth.txt'
+        intrinsics = np.loadtxt(intrinsic_path)
+        intrinisc_cam_parameters = o3d.camera.PinholeCameraIntrinsic()
+        intrinisc_cam_parameters.set_intrinsics(self.image_size[0], self.image_size[1], intrinsics[0, 0], intrinsics[1, 1], intrinsics[0, 2], intrinsics[1, 2])
+        return intrinisc_cam_parameters
+    def get_extrinsic(self, frame_id):
+        pose_path = os.path.join(self.extrinsics_dir, str(frame_id) + '.txt')
+        pose = np.loadtxt(pose_path)
+        return pose
+    def get_depth(self, frame_id):
+        depth_path = os.path.join(self.depth_dir, str(frame_id) + '.png')
+        depth = cv2.imread(depth_path, -1)
+        depth = depth / self.depth_scale
+        depth = depth.astype(np.float32)
+        return depth
+    def get_rgb(self, frame_id, change_color=True):
+        rgb_path = os.path.join(self.rgb_dir, str(frame_id) + '.jpg')
+        rgb = cv2.imread(rgb_path)
+        if change_color:
+            rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
+        return rgb
+    def get_segmentation(self, frame_id, align_with_depth=False):
+        segmentation_path = os.path.join(self.segmentation_dir, f'{frame_id}.png')
+        if not os.path.exists(segmentation_path):
+            assert False, f"Segmentation not found: {segmentation_path}"
+        segmentation = cv2.imread(segmentation_path, cv2.IMREAD_UNCHANGED)
+        segmentation = cv2.resize(segmentation, self.depth_size, interpolation=cv2.INTER_NEAREST)
+        return segmentation
+    def get_frame_path(self, frame_id):
+        rgb_path = os.path.join(self.rgb_dir, str(frame_id) + '.jpg')
+        segmentation_path = os.path.join(self.segmentation_dir, f'{frame_id}.png')
+        return rgb_path, segmentation_path
+    def get_label_features(self):
+        label_features_dict = np.load(f'data/text_features/scannet18.npy', allow_pickle=True).item()
+        return label_features_dict
+    def get_scene_points(self):
+        mesh = o3d.io.read_point_cloud(self.point_cloud_path)
+        vertices = np.asarray(mesh.points)
+        return vertices
+    def get_label_id(self):
+        self.class_id = SCANNET18_IDS
+        self.class_label = SCANNET18_LABELS
+        self.label2id = {}
+        self.id2label = {}
+        for label, id in zip(self.class_label, self.class_id):
+            self.label2id[label] = id
+            self.id2label[id] = label
+        return self.label2id, self.id2label
+class ScanNet20Dataset(ScanNet18Dataset):
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.point_cloud_path = f'{self.root}/{self.seq_name}_vh_clean_2.ply'
+    def get_label_features(self):
+        label_features_dict = np.load(f'/home/jovyan/users/lemeshko/Indoor/MaskClustering/data/text_features/scannet20.npy', allow_pickle=True).item()
+        return label_features_dict
+    def get_label_id(self):
+        self.class_id = SCANNET20_IDS
+        self.class_label = SCANNET20_LABELS
+        self.label2id = {}
+        self.id2label = {}
+        for label, id in zip(self.class_label, self.class_id):
+            self.label2id[label] = id
+            self.id2label[id] = label
+        return self.label2id, self.id2label

MaskClustering/dataset/scannetpp.py ADDED Viewed

	@@ -0,0 +1,217 @@

+import open3d as o3d
+import numpy as np
+import os
+import cv2
+import collections
+from evaluation.constants import SCANNETPP_LABELS, SCANNETPP_IDS
+import torch
+BaseImage = collections.namedtuple(
+    "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
+BaseCamera = collections.namedtuple(
+    "Camera", ["id", "model", "width", "height", "params"])
+def qvec2rotmat(qvec):
+    return np.array([
+        [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
+         2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
+         2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
+        [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
+         1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
+         2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
+        [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
+         2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
+         1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
+class Image(BaseImage):
+    def qvec2rotmat(self):
+        return qvec2rotmat(self.qvec)
+    @property
+    def world_to_camera(self) -> np.ndarray:
+        R = qvec2rotmat(self.qvec)
+        t = self.tvec
+        world2cam = np.eye(4)
+        world2cam[:3, :3] = R
+        world2cam[:3, 3] = t
+        return world2cam
+class Camera(BaseCamera):
+    @property
+    def K(self):
+        K = np.eye(3)
+        if self.model == "SIMPLE_PINHOLE" or self.model == "SIMPLE_RADIAL" or self.model == "RADIAL" or self.model == "SIMPLE_RADIAL_FISHEYE" or self.model == "RADIAL_FISHEYE":
+            K[0, 0] = self.params[0]
+            K[1, 1] = self.params[0]
+            K[0, 2] = self.params[1]
+            K[1, 2] = self.params[2]
+        elif self.model == "PINHOLE" or self.model == "OPENCV" or self.model == "OPENCV_FISHEYE" or self.model == "FULL_OPENCV" or self.model == "FOV" or self.model == "THIN_PRISM_FISHEYE":
+            K[0, 0] = self.params[0]
+            K[1, 1] = self.params[1]
+            K[0, 2] = self.params[2]
+            K[1, 2] = self.params[3]
+        else:
+            raise NotImplementedError
+        return K
+def read_images_text(path):
+    images = {}
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                image_id = int(elems[0])
+                qvec = np.array(tuple(map(float, elems[1:5])))
+                tvec = np.array(tuple(map(float, elems[5:8])))
+                camera_id = int(elems[8])
+                image_name = elems[9]
+                elems = fid.readline().split()
+                xys = np.column_stack([tuple(map(float, elems[0::3])),
+                                    tuple(map(float, elems[1::3]))])
+                point3D_ids = np.array(tuple(map(int, elems[2::3])))
+                images[image_id] = Image(
+                    id=image_id, qvec=qvec, tvec=tvec,
+                    camera_id=camera_id, name=image_name,
+                    xys=xys, point3D_ids=point3D_ids)
+    return images
+def read_cameras_text(path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasText(const std::string& path)
+        void Reconstruction::ReadCamerasText(const std::string& path)
+    """
+    cameras = {}
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                camera_id = int(elems[0])
+                model = elems[1]
+                width = int(elems[2])
+                height = int(elems[3])
+                params = np.array(tuple(map(float, elems[4:])))
+                cameras[camera_id] = Camera(id=camera_id, model=model,
+                                            width=width, height=height,
+                                            params=params)
+    return cameras
+class ScanNetPPDataset:
+    def __init__(self, seq_name) -> None:
+        self.seq_name = seq_name
+        self.root = f'./data/scannetpp/data/{seq_name}'
+        self.rgb_dir = f'{self.root}/iphone/rgb'
+        self.depth_dir = f'{self.root}/iphone/render_depth'
+        self.segmentation_dir = f'{self.root}/output/mask'
+        self.object_dict_dir = f'{self.root}/output/object'
+        self.point_cloud_path = f'./data/scannetpp/pcld_0.25/{seq_name}.pth'
+        self.load_meta_data()
+        self.depth_scale = 1000.0
+        self.image_size = (1920, 1440)
+    def load_meta_data(self):
+        self.frame_id_list = []
+        cameras = read_cameras_text(os.path.join(self.root, 'iphone/colmap', "cameras.txt"))
+        images = read_images_text(os.path.join(self.root, 'iphone/colmap', "images.txt"))
+        camera = next(iter(cameras.values()))
+        fx, fy, cx, cy = camera.params[:4]
+        intrinsics = {}
+        extrinsics = {}
+        for _, image in (images.items()):
+            image_id = int(image.name.split('.')[0].split('_')[1])
+            self.frame_id_list.append(image_id)
+            world_to_camera = image.world_to_camera
+            extrinsics[image_id] = np.linalg.inv(world_to_camera)
+            intrinsics[image_id] = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+        self.extrinsics = extrinsics
+        self.intrinsics = intrinsics
+    def get_frame_list(self, stride):
+        return self.frame_id_list[::stride]
+    def get_intrinsics(self, frame_id):
+        intrinsic_matrix = self.intrinsics[frame_id]
+        intrinisc_cam_parameters = o3d.camera.PinholeCameraIntrinsic()
+        intrinisc_cam_parameters.set_intrinsics(self.image_size[0], self.image_size[1], intrinsic_matrix[0, 0], intrinsic_matrix[1, 1], intrinsic_matrix[0, 2], intrinsic_matrix[1, 2])
+        return intrinisc_cam_parameters
+    def get_extrinsic(self, frame_id):
+        return self.extrinsics[frame_id]
+    def get_depth(self, frame_id):
+        depth_path = os.path.join(self.depth_dir, 'frame_%06d.png' % frame_id)
+        depth = cv2.imread(depth_path, -1)
+        depth = depth / self.depth_scale
+        depth = depth.astype(np.float32)
+        return depth
+    def get_rgb(self, frame_id, change_color=True):
+        rgb_path = os.path.join(self.rgb_dir, 'frame_%06d.jpg' % frame_id)
+        rgb = cv2.imread(rgb_path)
+        if change_color:
+            rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
+        return rgb
+    def get_segmentation(self, frame_id, align_with_depth=False):
+        segmentation_path = os.path.join(self.segmentation_dir, 'frame_%06d.png' % frame_id)
+        if not os.path.exists(segmentation_path):
+            assert False, f"Segmentation not found: {segmentation_path}"
+        segmentation = cv2.imread(segmentation_path, cv2.IMREAD_UNCHANGED)
+        return segmentation
+    def get_frame_path(self, frame_id):
+        rgb_path = os.path.join(self.rgb_dir, 'frame_%06d.jpg' % frame_id)
+        segmentation_path = os.path.join(self.segmentation_dir, 'frame_%06d.png' % frame_id)
+        return rgb_path, segmentation_path
+    def get_label_features(self):
+        label_features_dict = np.load(f'data/text_features/scannetpp.npy', allow_pickle=True).item()
+        return label_features_dict
+    def get_scene_points(self):
+        data = torch.load(self.point_cloud_path)
+        points = np.asarray(data['sampled_coords'])
+        return points
+    def get_label_id(self):
+        self.class_id = SCANNETPP_IDS
+        self.class_label = SCANNETPP_LABELS
+        self.label2id = {}
+        self.id2label = {}
+        for label, id in zip(self.class_label, self.class_id):
+            self.label2id[label] = id
+            self.id2label[id] = label
+        return self.label2id, self.id2label

MaskClustering/dense_masks.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import numpy as np
+import os
+import cv2
+from pathlib import Path
+import trimesh as tm
+from sklearn.neighbors import KDTree
+from tqdm import tqdm
+from tqdm.contrib.concurrent import thread_map
+from sklearn.cluster import DBSCAN
+def load_scan(pcd_path):
+    pcd_data = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, 6)[:, :3]
+    return pcd_data
+def process_scene(data):
+    scene_id, exp_name = data
+    pred_path = Path(f"data/prediction/scannet/baseline_scannet200/{scene_id}.npz")
+    out_path = Path(f"data/prediction/scannet/{exp_name}/{scene_id}.npz")
+    base_path = Path(f"/home/jovyan/users/lemeshko/scripts/gsam_result/yolo/{scene_id}")
+    source_path = Path(f"/home/jovyan/users/kolodiazhnyi/data/scannet/posed_images/{scene_id}")
+    scan_path = Path(f"/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/scannet200/points/{scene_id}.bin")
+    info_path = base_path / "infos.npy"
+    # if out_path.exists():
+    #     return
+    vertices = load_scan(scan_path)
+    kd = KDTree(vertices)
+    max_dist = 0.05
+    base_data = np.load(pred_path, allow_pickle=True)
+    total_points_masks = base_data['pred_masks'].T
+    for i, mask in enumerate(total_points_masks):
+        mask = mask.astype(bool)
+        points = vertices[mask]
+        dists, inds = kd.query(points, k=5)
+        dists = dists.flatten()
+        inds = inds.flatten()
+        dist_mask = dists < max_dist
+        inds = inds[dist_mask]
+        mask[inds] = True
+        total_points_masks[i] = mask
+    for i, mask in enumerate(total_points_masks):
+        mask = mask.astype(bool)
+        points = vertices[mask]
+        db = DBSCAN(eps=0.3, min_samples=10)
+        labels = db.fit_predict(points)
+        # labels = db.labels_
+        n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
+        biggest_cluster_ind = np.argmax(np.unique(labels, return_counts=True)[1])
+        res_mask = (labels == biggest_cluster_ind) & (labels != -1)
+        # print(f"{labels.shape} -> {res_mask.sum()}")
+        new_mask = np.zeros_like(mask)
+        new_mask[mask] = res_mask
+        total_points_masks[i] = new_mask
+    new_data = {
+        k: v for k, v in base_data.items()
+    }
+    new_data['pred_masks'] = total_points_masks.T
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    # vs = []
+    # cs = []
+    # for i in range(new_data['pred_masks'].shape[1]):
+    #     os.makedirs(f"pred_masks", exist_ok=True)
+    #     v = vertices[new_data['pred_masks'][:, i]]
+    #     c = np.random.rand(3)
+    #     c = np.repeat(c[np.newaxis, :], len(v), axis=0)
+    #     vs.append(v)
+    #     cs.append(c)
+    # tm.PointCloud(np.concatenate(vs, axis=0), colors=np.concatenate(cs, axis=0)).export(f"pred_masks/{scene_id}_mask.ply")
+    print("uniques", np.unique(new_data['pred_masks'].sum(1)), [[k, v.shape] for k, v in new_data.items()])
+    np.savez(out_path, **new_data)
+if __name__ == "__main__":
+    exp_name = "dense_masks_cluster_filtering"
+    scenes = np.loadtxt("/home/jovyan/users/bulat/workspace/3drec/Indoor/MaskClustering/splits/scannet.txt", dtype=str)
+    data = [(scene, exp_name) for scene in scenes]
+    total_points_masks = thread_map(process_scene, data, chunksize=20)

MaskClustering/evaluation/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Инициализация модуля

MaskClustering/evaluation/constants.py ADDED Viewed

	@@ -0,0 +1,78 @@

+MATTERPORT_LABELS = ('door', 'picture', 'window', 'chair', 'pillow', 'lamp',
+                            'cabinet', 'curtain', 'table', 'plant', 'mirror', 'towel', 'sink', 'shelves', 'sofa',
+                            'bed', 'night stand', 'toilet', 'column', 'banister', 'stairs', 'stool', 'vase',
+                            'television', 'pot', 'desk', 'box', 'coffee table', 'counter', 'bench', 'garbage bin',
+                            'fireplace', 'clothes', 'bathtub', 'book', 'air vent', 'faucet', 'photo', 'toilet paper',
+                            'fan', 'railing', 'sculpture', 'dresser', 'rug', 'ottoman', 'bottle', 'refridgerator',
+                            'bookshelf', 'wardrobe', 'pipe', 'monitor', 'stand', 'drawer', 'container', 'light switch',
+                            'purse', 'door way', 'basket', 'chandelier', 'oven', 'clock', 'stove', 'washing machine',
+                            'shower curtain', 'fire alarm', 'bin', 'chest', 'microwave', 'blinds', 'bowl', 'tissue box',
+                            'plate', 'tv stand', 'shoe', 'heater', 'headboard', 'bucket', 'candle', 'flower pot',
+                            'speaker', 'furniture', 'sign', 'air conditioner', 'fire extinguisher', 'curtain rod',
+                            'floor mat', 'printer', 'telephone', 'blanket', 'handle', 'shower head', 'soap', 'keyboard',
+                            'thermostat', 'radiator', 'kitchen island', 'paper towel', 'sheet', 'glass', 'dishwasher',
+                            'cup', 'ladder', 'garage door', 'hat', 'exit sign', 'piano', 'board', 'rope', 'ball',
+                            'excercise equipment', 'hanger', 'candlestick', 'light', 'scale', 'bag', 'laptop', 'treadmill',
+                            'guitar', 'display case', 'toilet paper holder', 'bar', 'tray', 'urn', 'decorative plate', 'pool table',
+                            'jacket', 'bottle of soap', 'water cooler', 'utensil', 'tea pot', 'stuffed animal', 'paper towel dispenser',
+                            'lamp shade', 'car', 'toilet brush', 'doll', 'drum', 'whiteboard', 'range hood', 'candelabra', 'toy',
+                            'foot rest', 'soap dish', 'placemat', 'cleaner', 'computer', 'knob', 'paper', 'projector', 'coat hanger',
+                            'case', 'pan', 'luggage', 'trinket', 'chimney', 'person', 'alarm')
+MATTERPORT_IDS = [28, 64, 59, 5, 119, 144, 3, 89, 19, 82, 122, 135, 24, 42, 83, 157, 158, 124, 94, 453,
+ 215, 150, 78, 172, 16, 36, 26, 356, 7, 204, 12, 372, 141, 136, 1, 25, 9, 508, 139, 74, 497, 294,
+ 169, 130, 359, 2, 17, 88, 772, 41, 49, 50, 174, 140, 301, 181, 609, 39, 342, 238, 56, 242, 278,
+ 123, 338, 307, 344, 13, 80, 22, 138, 233, 291, 149, 111, 161, 427, 137, 146, 54, 524, 208, 79,
+ 10, 582, 143, 66, 32, 312, 758, 650, 133, 47, 110, 236, 456, 113, 559, 612, 8, 35, 48, 850, 193,
+ 86, 298, 408, 560, 60, 457, 211, 148, 62, 639, 55, 37, 458, 300, 540, 647, 51, 179, 151, 383, 515,
+ 324, 502, 509, 267, 678, 177, 14, 859, 530, 630, 99, 145, 45, 380, 605, 389, 163, 638, 154, 548,
+ 46, 652, 15, 90, 400, 851, 589, 783, 844, 702, 331, 525]
+SCANNET_LABELS = ['chair', 'table', 'door', 'couch', 'cabinet', 'shelf', 'desk', 'office chair', 'bed', 'pillow', 'sink', 'picture', 'window', 'toilet', 'bookshelf', 'monitor', 'curtain', 'book', 'armchair', 'coffee table', 'box',
+'refrigerator', 'lamp', 'kitchen cabinet', 'towel', 'clothes', 'tv', 'nightstand', 'counter', 'dresser', 'stool', 'cushion', 'plant', 'ceiling', 'bathtub', 'end table', 'dining table', 'keyboard', 'bag', 'backpack', 'toilet paper',
+'printer', 'tv stand', 'whiteboard', 'blanket', 'shower curtain', 'trash can', 'closet', 'stairs', 'microwave', 'stove', 'shoe', 'computer tower', 'bottle', 'bin', 'ottoman', 'bench', 'board', 'washing machine', 'mirror', 'copier',
+'basket', 'sofa chair', 'file cabinet', 'fan', 'laptop', 'shower', 'paper', 'person', 'paper towel dispenser', 'oven', 'blinds', 'rack', 'plate', 'blackboard', 'piano', 'suitcase', 'rail', 'radiator', 'recycling bin', 'container',
+'wardrobe', 'soap dispenser', 'telephone', 'bucket', 'clock', 'stand', 'light', 'laundry basket', 'pipe', 'clothes dryer', 'guitar', 'toilet paper holder', 'seat', 'speaker', 'column', 'bicycle', 'ladder', 'bathroom stall', 'shower wall',
+'cup', 'jacket', 'storage bin', 'coffee maker', 'dishwasher', 'paper towel roll', 'machine', 'mat', 'windowsill', 'bar', 'toaster', 'bulletin board', 'ironing board', 'fireplace', 'soap dish', 'kitchen counter', 'doorframe',
+'toilet paper dispenser', 'mini fridge', 'fire extinguisher', 'ball', 'hat', 'shower curtain rod', 'water cooler', 'paper cutter', 'tray', 'shower door', 'pillar', 'ledge', 'toaster oven', 'mouse', 'toilet seat cover dispenser',
+'furniture', 'cart', 'storage container', 'scale', 'tissue box', 'light switch', 'crate', 'power outlet', 'decoration', 'sign', 'projector', 'closet door', 'vacuum cleaner', 'candle', 'plunger', 'stuffed animal', 'headphones', 'dish rack',
+'broom', 'guitar case', 'range hood', 'dustpan', 'hair dryer', 'water bottle', 'handicap bar', 'purse', 'vent', 'shower floor', 'water pitcher', 'mailbox', 'bowl', 'paper bag', 'alarm clock', 'music stand', 'projector screen', 'divider',
+'laundry detergent', 'bathroom counter', 'object', 'bathroom vanity', 'closet wall', 'laundry hamper', 'bathroom stall door', 'ceiling light', 'trash bin', 'dumbbell', 'stair rail', 'tube', 'bathroom cabinet', 'cd case', 'closet rod',
+'coffee kettle', 'structure', 'shower head', 'keyboard piano', 'case of water bottles', 'coat rack', 'storage organizer', 'folded chair', 'fire alarm', 'power strip', 'calendar', 'poster', 'potted plant', 'luggage', 'mattress']
+SCANNET_IDS = [2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
+72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 84, 86, 87, 88, 89, 90, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 110, 112, 115, 116, 118, 120, 121, 122, 125, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 145, 148, 154,
+155, 156, 157, 159, 161, 163, 165, 166, 168, 169, 170, 177, 180, 185, 188, 191, 193, 195, 202, 208, 213, 214, 221, 229, 230, 232, 233, 242, 250, 261, 264, 276, 283, 286, 300, 304, 312, 323, 325, 331, 342, 356, 370, 392, 395, 399, 408, 417,
+488, 540, 562, 570, 572, 581, 609, 748, 776, 1156, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191]
+SCANNET18_LABELS = ['cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
+         'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
+         'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin']
+SCANNET18_IDS = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39]
+SCANNETPP_LABELS = ['door', 'table', 'cabinet', 'ceiling lamp', 'curtain', 'chair', 'blinds', 'storage cabinet', 'bookshelf', 'office chair', 'window', 'whiteboard', 'ceiling light', 'monitor', 'shelf', 'object', 'window frame', 'pipe', 'structure', 'box', 'heater', 'kitchen cabinet', 'storage rack', 'sofa', 'bed', 'shower wall', 'doorframe', 'door frame', 'roof', 'wardrobe', 'pillar', 'plant', 'blanket', 'machine', 'windowsill', 'linked retractable seats', 'window sill', 'cardboard box', 'tv', 'books', 'desk', 'computer tower', 'kitchen counter', 'trash can', 'trash bin', 'jacket', 'electrical duct', 'blackboard', 'cable tray', 'air duct', 'sink', 'carpet', 'bag', 'counter', 'refrigerator', 'picture', 'pillow', 'cupboard', 'window blind', 'towel', 'beam', 'office table', 'stool', 'suitcase', 'backpack', 'bathtub', 'rug', 'keyboard', 'rack', 'gym mat', 'toilet', 'suspended ceiling', 'shower floor', 'clothes', 'pipe storage rack', 'air conditioner', 'fume hood', 'printer', 'blind', 'poster', 'experiment bench', 'electrical control panel', 'shower curtain', 'windowframe', 'book', 'ceiling beam', 'painting', 'paper', 'ladder', 'laboratory bench', 'bench', 'milling machine', 'microwave', 'partition', 'board', 'office cabinet', 'rolling cart', 'laboratory cabinet', 'crate', 'raised floor', 'electrical panel', 'mattress', 'bottle', 'pedestal fan', 'sofa chair', 'headboard', 'fridge', 'bucket', 'kitchen unit', 'beanbag', 'oven', 'cushion', 'power socket', 'office desk', 'whiteboards', 'lab equipment', 'shoes', 'work bench', 'file cabinet', 'mirror', 'basket', 'beverage crate', 'washing machine', 'shoe rack', 'hydraulic press', 'photocopy machine', 'telephone', 'lab machine', 'sliding door', 'tv stand', 'objects', 'couch', 'coat', 'open cabinet', 'scientific equipment', 'coffee table', 'garage door', 'bin', 'radiator', 'standing lamp', 'stove', 'roller blinds', 'fume cupboard', 'pc', 'stairs', 'medical appliance', 'closet', 'trolley', 'file folder', 'projector', 'cloth', 'conference table', 'cardboard', 'blind rail', 'dishwasher', 'room divider', 'copier', 'ventilation pipe', 'bathroom cabinet', 'laptop', 'electrical box', 'arm chair', 'bar counter', 'stage', 'ceiling ventilator', 'lounge chair', 'plant pot', 'bathroom stall', 'pinboard', 'comforter', '3d printer', 'steel beam', 'projector screen', 'electric duct', 'cart', 'air pipe', 'training equipment', 'floor mounted air conditioner', 'tile wall', 'glass wall', 'exhaust fan', 'vacuum cleaner', 'laundry basket', 'nightstand', 'armchair', 'drying rack', 'indoor crane', 'storage trolley', 'dresser', 'l-shaped sofa', 'coat hanger', 'dining chair', 'office visitor chair', 'interactive board', 'hose', 'light switch', 'shower ceiling', 'coffee machine', 'cables', 'floor lamp', 'fan', 'wire tray', 'compressor', 'laboratory equipment', 'dining table', 'speaker', 'climbing wall', 'light', 'paper towel dispenser', 'coat rack', 'table lamp', 'frame', 'duvet', 'fire extinguisher', 'range hood', 'high table', 'backdrop', 'ventilation duct', 'seat', 'tablecloth', 'electrical cabinet', 'ping pong table', 'bathroom floor', 'ceiling pipe', 'bedside table', 'coffee maker', 'computer desk', 'urinal', 'loft bed', 'air vent', 'chairs', 'bedsheet', 'television', 'lamp', 'rolling chair', 'wall cabinet', 'book shelf', 'brick wall', 'treadmill', 'vent', 'shirt', 'canopy bed', 'clothes hanger', 'kettle', 'shoe', 'high stool', 'tripod', 'bar stool', 'exhaust duct', 'wooden plank', 'squat rack', 'cubicle door', 'folding screen', 'kitchen sink', 'container', 'bottles', 'ottoman', 'bicycle', 'staircase railing', 'overhead projector', 'surfboard', 'folder', 'power strip', 'high bench', 'wall beam', 'pallet cage', 'interactive whiteboard', 'floor sofa', 'duct', 'flat panel display', 'wooden frame', 'folding room divider', 'cable', 'mug', 'rolling table', 'locker', 'standing banner', 'decoration', 'clothes drying rack', 'foosball table', 'standing poster', 'bath tub', 'yoga mat', 'microscope', 'paper bag', 'mouse', 'umbrella', 'medical machine', 'smoke detector', 'cup', 'cutting board', 'console', 'drum', 'bathroom counter', 'toilet paper', 'robot car', 'exhaust pipe', 'bath cabinet', 'whiteboard stand', 'notice board', 'paper towel', 'crates', 'bed frame', 'bathroom mat', 'shower partition', 'cloth hangers', 'clothes cabinet', 'tv screen', 'babyfoot table', 'rolling curtain', 'coat stand', 'kitchen towel', 'plank', 'side table', 'storage shelf', 'mat', 'shower', 'white board', 'information board', 'backsplash', 'guitar', 'cloth rack', 'ceiling vent', 'partition wall', 'kitchen shelf', 'banner', 'file binder', 'cleaning trolley', 'racing simulator', 'workbench', 'pot', 'ac system', 'power panel', 'desk lamp', 'broom', 'cpu', 'fitted wardrobe', 'tote bag', 'plumbing pipe', 'slippers', 'blackboard frame', 'magazine', 'hose pipe', 'rolled paper', 'sweater', 'clock', 'tray', 'desk fan', 'vaccum cleaner', 'projection curtain', 'freezer display counter', 'pan', 'vase', 'glass', 'folders', 'tap', 'wall lamp', 'plate', 'laptop stand', 'small cabinet', 'file organizer', 'clothes dryer', 'wall painting', 'curtain rail', 'wheelchair', 'bottle crate', 'sheet', 'folding sofa', 'shower pan', 'plastic case', 'christmas tree', 'piano', 'ottoman chair', 'jar', 'foldable closet', 'notebook', 'calendar', 'janitor cart', 'storage box', 'rolling blinds', 'bathroom shelf', 'soap dispenser', 'binder', 'copy machine', 'rice cooker', 'gym mattress', 'car door', 'table football', 'bowl', 'light panel', 'tissue box', 'bedframe', 'wall hanging', 'jug', 'skylight', 'ceiling fan', 'dish rack', 'shelving cart', 'instant pot', 'whiteboard eraser', 'floor mat', 'socket', 'mini fridge', 'wall clock', 'boots', 'barbecue grill', 'paper shredder', 'file rack', 'floor scrubber', 'metal board', 'water heater', 'tool rack', 'recliner', 'barber chair', 'ventilator', 'trolley table', 'standing fan', 'water filter', 'shoes holder', 'vr setup', 'trashcan', 'bike', 'lab materials', 'wooden pallet', 'dustbin', 'curtain rod', 'reflection', 'toilet brush', 'exercise ball', 'air purifier', 'kitchen back splash', 'paper rack', 'toolbox', 'monitor cover', 'file', 'surfsuit', 'night stand', 'paper organizer', 'serving trolley', 'phone', 'canvas', 'camping bed', 'tower pc', 'cylinder', 'magazine stand', 'toy', 'slipper', 'air conditioning', 'hanger', 'vertical blinds', 'desk organizer', 'guitar bag', 'spray bottle', 'suit cover', 'toaster', 'spotlight', 'machine container', 'foot rest', 'shopping trolley', 'decoration piece', 'control panel', 'multifunction printer', 'jerry can', 'window head', 'cooker hood', 'basin', 'panel', 'papasan chair', 'tv mount', 'toilet seat', 'shopping bag', 'photocopier', 'tube', 'studio light', 'stuffed toy', 'cord cover', 'power cabinet', 'filer organizer', 'garage shelf', 'luggage', 'gym bag', 'exhaust hood', 'microwave oven', 'floor cushion', 'easy chair', 'bar table', 'shoe cabinet', 'paper tray', 'lab coat', 'toilet paper dispenser', 'kitchen storage rack', 'equipment', 'computer table', 'mouse pad', 'drawer', 'headphones', 'bathroom sink', 'outlet', 'toaster oven', 'tv table', 'bedside cabinet', 'rolling trolley', 'step stool', 'trousers', 'bathroom rack', 'shelf trolley', 'glass shelf', 'fabric', 'lab fridge', 'work station', 'barrel', 'mop', 'deck chair', 'bath counter', 'helmet', 'standing clothes hanger', 'garbage bin', 'study table', 'air fryer', 'plastic bag', 'oven range', 'headphone', 'kitchen counter top', 'clothes rack', 'wall unit', 'grab bar', 'flipchart', 'scarf', 'labcoat', 'hat', 'bedside lamp', 'sewing machine table', 'shower head', 'switchboard cabinet', 'flip paper', 'storage container', 'canister', 'wall board', 'shower rug', 'plastic box', 'stovetop', 'information stand', 'footstool', 'pack', 'push cart', 'table cloth', 'celing lamp', 'cupoard', 'jeans', 'smoke alarm', 'bath mat', 'softbox', 'whiteboard mount', 'paper cutter', 'cable raceway', 'water kettle', 'pelican case', 'towel rack', 'rolling shelf cart', 'built-in shelf', 'equipment cover', 'television stand', 'sheets', 'small dresser', 'light stand', 'beach umbrella', 'faucet', 'bagpack', 'dumbbell', 'water dispenser', 'medicine cabinet', 'tv console', 'mirror frame', 'chandelier', 'pen holder', 'messenger bag', 'ball', 'glass bottle', 'softbox light', 'gym ball', 'briefcase', 'plastic bottle', 'monitor stand', 'human skeleton', 'podium', 'wall strip', 'tablet', 'bedside shelf', 'headrail', 'sink counter', 'doormat', 'baseboard', 'bulletin board', 'electric hob', 'bean bag', 'high pressure cylinder', 'portable fan', 'flush button', 'wooden post', 'lectern', 'curtain frame', 'computer monitor', 'folding chair', 'tabletop', 'led ceiling fan', 'high chair', 'grill', 'metal rack', 'air conditioner tower', 'sliding door frame', 'cable rack', 'bench press', 'ironing board', 'wooden palette', 'kitchenware', 'blind rails', 'plastic container', 'weighing scale', 'headset', 'tree trunk', 'shower screen', 'wall shelf', 'watering can', 'tool box', 'bed  sheet', 'glass pane', 'tower fan', 'switch', 'notice', 'sack', 'table mat', 'flower pot', 'dog bed', 'laundry hanger', 'mobile tv stand', 'file holder', 'floor couch', 'tv trolley', 'chopping board', 'centrifuge', 'tubelight', 'bedpost', 'step', 'center table', 'upholstered bench', 'sink pipe', 'door mat', 'storage bin', 'towel radiator', 'shower tray', 'electronic appliance', 'boiler', 'food container', 'cable pathway', 'carboard box', 'metal sheet', 'hand bag', 'sign', 'laundry rack', 'screen', 'cardbox', 'fireplace surround', 'boot', 'envelope', 'carton', 'tool organizer', 'paper roll', 'water bottle', 'shoe changing stool', 'balcony door', 'espresso machine', 'water pipe', 'recesssed shelf', 'drum set', 'skiboard', 'speaker stand', 'kitchen wall', 'suit', 'photo', 'globe', 'spice rack', 'delivery bag', 'router', 'rolling blind', 'easel', 'shower cubicle', 'dish drainer', 'doorway', 'folded table', 'pants', 'computer', 'stuffed animal', 'office  chair', 'cable conduit', 'picture frame', 'shoe stool', 'recessed shelve', 'toilet paper holder', 'panelboard', 'stapler', 'skateboard', 'workshop tool', 'projector holder', 'flag', 'chemical canister', 'web cam', 'hoodie', 'towel heater', 'towel warmer', 'shower curtain rod', 'shower faucet', 'shower door', 'laboratory power supply', 'tool', 'ventilation', 'soap bottle', 'bathrobe', 'pictures board', 'cap', 'woofer', 'tshirt', 'rolling bag', 'shoe box', 'luggage bag', 'file storage', 'cat bed', 'stack of paper', 'surfing board', 'electric kettle', 'rolling stand', 'cover', 'main switchboard', 'pressure cooker', 'stepladder', 'countertop', 'flip flops', 'short table', 'sit-up pillow', 'duffel bag', 'shower seating', 'washbasin', 'teddy bear', 'stair', 'plate rack', 'ornament', 'jerrycan', 'filter jug', 't shirt', 'cooking pot', 'platform trolley', 'blinds rod', 'hand shower', 'power socket unit', 'sheep doll', 'laptop bag', 'game console', 'bottles case', 'lid', 'dumbbell case', 'rolled blanket', 'paper stapler', 'kitchen pot', 'charcoal bag', 'laundry hamper', 'rolled poster', 'bath towel', 'apron', 'dustpan', 'trash bag', 'document tray', 'camera', 'mirror cabinet', 'dish drying rack', 'gas tank', 'cable roller', 'case', 'ring light', 'hair dryer', 'gym plate', 'hand towel', 'sill', 'sidetable', 'vice', 'bench stool', 'billboard', 'rolling cabinet', 'shower sink', 'cloth piece', 'oscilloscope', 'magazine rack', 'wash basin', 'cable panel', 'photo frame', 'tv receiver', 'stand', 'milk jug', 'wooden board', 'bladeless fan', 'door  frame', 'wall paper', 'scale', 'purse', 'electronic device', 'sofa cushion', 'sponge', 'dish washer', 'crate trolley', 'kitchen hood', 'laundry vent', 'medical stool', 'exhaustive fan', 'portable ladder', 'chemical container', 'toilet paper rolls', 'rag', 'blender', 'window pane', 'dog bowl', 'shopping basket', 'piano stool', 'electric box', 'wall calendar', 'paper holder', 'chemical bottle', 'sandals', 'foreman grill', 'guitar case', 'heater tube', 'running shoes', 'shower tap', 'cloth hanger', 'microphone', 'cabinet frame', 'decorative object', 'light fixture', 'ceiling lamp bar', 'paperbag', 'chemical barrel', 'wicker basket', 'exit sign', 'bottles rack', 'water jug', 'bottle carrier', 'laboratory pellet press', 'mini oven', 'shower arm', 'paper tube', 'suitcase stand', 'table fan', 'shelve', 'full-length mirror', 'wood piece', 'can', 'suit bag', 'water bubbler', 'first aid kit', 'kitchen robot', 'toilet flush button', 'pillow toy', 'plush doll', 'styrofoam box', 'document organizer', 'pet carrier', 'folding table', 'gloves', 'pitcher', 'cable spool', 'rolled cable', 'folding umbrella', 'robot vacuum cleaner', 'tower ventilator', 'brush', 'planter', 'baseball cap', 'gas cylinder', 'stereo', 'baby stroller', 'water bucket', 'rucksack', 'shower door frame', 'drone', 'kitchen cloth', 'hand soap dispenser', 'pegboard', 'alarm', 'emergency light', 'sign board', 'weight plate', 'rolled projection screen', 'laptop table', 'hole puncher', 'mixer', 'piano chair', 'paper bin', 'wooden stick', 'fireplace', 'rolled backdrop', 'mousepad', 'long pillow', 'bananas', 'column', 'cd player', 'eraser', 'laptop sleeve', 'hairdryer', 'seat cushion', 'plant pot mat', 'wastebin', 'wood panel', 'detergent bottle', 'safe box', 'pouch', 'blind rod', 'mop basin', 'plug', 'document holder', 'railing', 'plastic drum', 'cat tree', 'mirror light', 'kettlebell', 'chart', 'dust pan', 'sandal', 'first aid cabinet', 'bracket', 'wire', 'scooter', 'racing wheel', 'wine rack', 'belt', 'tissue dispenser stand', 'towel paper dispenser', 'air heater', 'plushie', 'knife set', 'iron', 'intercom', 'kitchen ceiling', 'package', 'toilet paper dispensor', 'sneakers', 'umbrella stand', 'egg carton', 'organizer', 'stick', 'shampoo bottle', 'cone', 'file tray', 'wooden plan', 'cooking pan', 'brief', 'paper towel package', 'glove dispenser', 'dispenser bottle', 'kitchen drawer', 'remote control', 'powerstrip', 'emergency shower', 'scanner', 'towel holder', 'vr headset', 'watering pot', 'soda machine', 'pole stand', 'roomba', 'rolling mat', 'fluorescent lamp', 'flower', 'pc tower', 'metal mount', 'oven gloves', 'soap', 'flush tank', 'notepad', 'pull up bar', 'loafers', 'water meter cover', 'plastic tray', 'webcam', 'barbell', 'tea pot', 'wall hanger', 'cabinet base panel', 'laptop case', 'paper towel holder', 'skeleton', 'socket extender', 'extension chord reel', 'wooden crate', 'guillotine paper cutter', 'sewing machine', 'model car', 'bed cover', 'storage', 'freezer', 'piano book', 'wifi router', 'overhead shower', 'chrismas tree', 'drill', 'clothes drying stand', 'dumbell', 'cabel', 'badminton racket', 'cool box', 'thermostat', 'dartboard', 'switchboard cover', 'candle', 'electric stove', 'paper ram', 'insulated can', 'prosthetic leg', 'desk power strip', 'closet rail', 'water pitcher', 'plate weights', 'extension cord', 'tea box', 'tissue', 'sauce pan', 'toothbrush', 'frying pan', 'package of paper', 'microphone stand', 'socket box', 'leather mattress', 'plastic can', 'garbage bin cover', 'plush toy', 'electric guitar', 'weight scale', 'fruit', 'tape dispenser', 'pallet', 'emergency kit', 'bathroom mirror', 'door lamp', 'power extension', 'electric circuit board', 'oven panel', 'electrical pipe', 'shade', 'photoframe', 'file stack', 'pizza box', 'tennis racket', 'wall hook', 'recycle bag', 'recessed shower shelve', 'wall mounted telephone', 'facsimile', 'kitchen roll', 'totebag', 'floor cleaner', 'body weight scale', 'stuffed animal door insulator', 'neck pillow', 'basketball', 'cable wheel', 'door vent', 'foot massager', 'pumper', 'hanging light fixture', 'interphone', 'paper towel roll', 'control unit', 'folder oragnizer', 'hanging frame', 'paper stack', 'door handle', 'mini shelf', 'beverage carton', 'wok pan', 'bedside counter', 'bar', 'pot lid', 'radio', 'paper box', 'stabilizer', 'headphone case', 'folded cardboard box', 'coaster', 'pen tray', 'marker', 'shower mat', 'rod', 'device', 'electric pot', 'mixer machine', 'barstool', 'bread toaster', 'electric mixer', 'wall cord cover', 'camera bag', 'charger', 'knife', 'metal frame', 'folded bag', 'conduit pipe', 'french press', 'cabinet side panel', 'cosmetic bag', 'surveillance camera', 'bathroom holder', 'footrest', 'glasses case', 'handbag', 'cooling pad', 'flip flop', 'game controller', 'packet of toilet paper', 'cable duct', 'toilet paper roll', 'monitor support', 'fire alarm', 'kitchen stove', 'key hanger', 'tub', 'network socket', 'ceilng light', 'christmas ornament', 'pepper mill', 'wall outlet', 'light cover', 'caution board', 'heels', 'package bag', 'blinds rail', 'candle holder', 'electric toothbrush', 'tool case', 'toilet flush', 'wooden brush', 'hand washing soap', 'hygiene product', 'water tap', 'cosmetic pouch', 'mount', 'knife holder', 'dustpan and brush', 'circular tray', 'shoe case', 'ar tag', 'flipflop', 'sculpture', 'recycle bin', 'kitchen utensil', 'multiplug', 'beaker stand', 'chessboard', 'pen', 'toothpaste', 'tv remote', 'floor wiper', 'wire hider', 'water meter', 'magazine holder', 'mailbox', 'paper file', 'wall coat hanger', 'utensil holder', 'detergent', 'safe', 'packet', 'dvd', 'dvd player', 'tupperware', 'electrical board', 'radiator pipe', 'plat', 'decorative mirror', 'telephone stand', 'water boiler', 'cutboard', 'hanging deer skull', 'file orginizer', 'satchel', 'head model', 'parcel', 'dish soap bottle', 'glass plate', 'pencil case', 'cleaning mop', 'mixer glass', 'joystick', 'vacuum flask', 'bag of oranges', 'pencil holder', 'pamphlet', 'rope', 'cd', 'knife stand', 'pencil cup', 'binding machine', 'grill pan', 'tape', 'cabinet top panel', 'cutti̇ng board', 'salad spinner', 'water filter jug', 'mop cloth', 'shower valve', 'laptop cover', 'running shoe', 'alligator clips', 'insulated coffee mug', 'pencil stand', 'action figure', 'desk light', 'midi controller', 'bathroom slippers', 'elephant decoration piece', 'switchboard', 'remote', 'arch folder', 'power cord', 'hot bag', 'pencils cup', 'knife block', 'thermos', 'power switch', 'toothbrush holder', 'cleaning liquid', 'power board', 'pull-up bar', 'sandwich maker', 'statue', 'brief case', 'airdyer', 'bike helmet', 'mirror lamp', 'tennis rackets', 'paint jar', 'citrus juicer', 'scissors', 'hanging hook', 'napkin', 'laundry detergent', 'disinfectant dispenser', 'cleaning brush', 'food', 'pan set', 'kitchen appliance', 'mop pad', 'dish soap', 'plunger', 'paper package', 'plastic mat', 'calculator', 'voltage stabilizer', 'whiteboard marker', 'wall coat rack', 'milk carton', 'cosmetic bottle', 'shower handle', 'tissue paper', 'shorts', 'game console controller', 'remote controller', 'teapot', 'drain', 'juice box', 'phone stand', 'soap container', 'flush', 'rubber water bag', 'shower gel', 'snack bag', 'watering bucket', 'first aid box', 'banana', 'paper notebook', 'hammer', 'kitchen glove', 'handle', 'personal hygiene product', 'duster', 'robot vaccuum cleaner', 'chain', 'soap dish', 'cereal box', 'door window', 'elephant toy', 'frisbee', 'cream tube', 'folder holder', 'internet socket', 'moka pot', 'stereo box', 'face mask', 'bunny chocolate', 'magazine collector', 'shower holder', 'toilet  brush', 'oven glove', 'shower drain', 'penholder', 'switch board', 'coffee pot', 'marker eraser', 'squeegee', 'spray', 'game cd box', 'table clock', 'apple', 'spool', 'portable speaker', 'guitar pedal', 'bread', 'glass jar', 'paper puncher', 'dishwashing sponge', 'shower hose', 'surface cleaning liquid', 'wallet', 'circuit box', 'toiletry', 'hair brush', 'headphone bag', 'metal saw', 'power adapter', 'wlan router', 'plant pot coaster', 'oven mitt', 'hanging light', 'kitchen object', 'machine button', 'vanity light', 'flush plate', 'leaf fan', 'punching machine', 'window handle', 'wine bottle', 'tennis cap', 'chips can', 'food pot', 'hand washing soap dispenser', 'wall light', 'detergent bag', 'glasses cover', 'hangbag', 'bottle spray', 'kitchen tap', 'knob', 'figurine', 'hand vacuum', 'potted plant', 'marker storage', 'mixing bowl', 'power supply', 'intercom screen', 'coffee mug', 'drilling machine', 'electric socket', 'hand shower handle', 'hole punch', 'silicon gun', 'document  holder', 'wooden chest', 'cheese', 'headphones case', 'measuring spoon', 'monitor light', 'ear muffs', 'night lamp', 'wall handle', 'intercom device', 'lanyard', 'rugby ball', 'shower loofah', 'cable socket', 'cleaning cloth', 'dish washer soap', 'power brick', 'coffee', 'monitor base', 'mushroom lamp', 'trivet', 'surface cleaner', 'toiletry bottle', 'paper note', 'strainer', 'colander', 'kitchen brush', 'multi socket', 'saucer', 'scissor', 'stamp', 'emergency button', 'toilet brush holder', 'coffee jar', 'deodorant', 'shower rod', 'wet tissue', 'tissue paper roll', 'funnel', 'oragnizer', 'toilet plunger', 'tumbler', 'door knob', 'coarser', 'postit note', 'punching tool', 'beverage can', 'dish', 'star', 'flask', 'in-table power socket', 'laptop charger', 'wall intercom', 'flour bag', 'blowtorch', 'cleaner', 'takeout box', 'hand soap', 'hard drive', 'monitor holder', 'soldering iron', 'control switch', 'drainage', 'box lid', 'playstation controller', 'shaving foam', 'guitar stand', 'toilet cleaner', 'co detector', 'covered power socket', 'spray can', 'tooth brush', 'phone tripod', 'bluetooth speaker', 'dish washing liquid', 'powerbank', 'dried plant', 'shampoo', 'sticky note', 'toilet seat brush', 'dishwashing soap', 'lan port', 'scrubber', 'cord', 'ashtray', 'handle bar', 'dongle', 'liquid soap', 'mobile phone', 'bunny decoration', 'palm rest', 'stream deck', 'blinds chain', 'dishwashing liquid', 'hammer holder', 'hand washing liquid', 'shower wiper', 'duct tape', 'valve', 'glove', 'stationery', 'ceiling speaker', 'holder', 'power plug', 'wall speaker', 'card', 'electrical tape', 'pot cover', 'razor', 'barcode scanner', 'lotion', 'grab rail', 'hairbrush', 'lint roller', 'post-it', 'dusting cloth', 'glue bottle', 'handwash', 'network outlet', 'card reader', 'kitchen light', 'pc charger', 'product dispenser bottle', 'hair dryer holder', 'screwdriver', 'shower squeegee', 'juice tetrapack', 'pliers', 'sunblock', 'bed sheet', 'hook', 'headphone holder', 'light bulb', 'ladle', 'bar soap', 'button', 'dish brush', 'toy car', 'note', 'post it', 'sponge cloth', 'wood stick', 'paper weight', 'product tube', 'cell phone', 'smartphone', 'comb', 'jump rope', 'lan', 'wireless charger', 'door stopper', 'cigarette packet', 'pencil', 'soap holder', 'electrical plug', 'toilet holder', 'mask', 'pocket calculator', 'cabbage', 'spoon', 'doorknob', 'fan switch', 'rubber duck', 'backdrop hook', 'table tennis racket', 'receipt', 'soap bar', 'tape roll', 'tooth paste', 'electrical adapter', 'probe', 'highlighter', 'correction fluid', 'dispenser', 'pi̇cture', 'door hinge', 'whiteboard marker holder', 'botttle', 'power outlet', 'towel hanger', 'whiteboard duster', 'magnet', 'sticker', 'nose spray', 'vertical blind control', 'razor blade', 'post it note', 'wireless headphones', 'cabinet top', 'cream bottle', 'datashow socket', 'earbuds', 'cabinet door', 'chair cushion', 'cosmetic tube']
+SCANNETPP_IDS = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 115, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 495, 496, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 575, 576, 577, 578, 580, 581, 583, 584, 585, 586, 588, 589, 591, 592, 593, 594, 595, 596, 597, 598, 602, 603, 604, 605, 606, 607, 608, 609, 611, 612, 613, 614, 615, 616, 617, 618, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 646, 647, 648, 649, 650, 651, 653, 654, 655, 656, 657, 658, 659, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 705, 706, 709, 710, 711, 712, 713, 714, 716, 717, 718, 719, 720, 721, 722, 723, 724, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 919, 920, 921, 922, 923, 924, 925, 926, 928, 929, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 943, 944, 946, 947, 948, 949, 950, 951, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 977, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1076, 1077, 1078, 1080, 1081, 1082, 1083, 1085, 1086, 1087, 1089, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1107, 1108, 1109, 1110, 1111, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1125, 1126, 1127, 1128, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1279, 1280, 1281, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1383, 1384, 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1402, 1403, 1404, 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1413, 1414, 1415, 1417, 1418, 1419, 1420, 1421, 1423, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519, 1520, 1521, 1522, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1535, 1536, 1537, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1553, 1554, 1555, 1556, 1557, 1558, 1559, 1560, 1561, 1562, 1563, 1565, 1566, 1567, 1568, 1570, 1571, 1572, 1573, 1574, 1576, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1589, 1590, 1591, 1593, 1594, 1595, 1596, 1597, 1598, 1599, 1600, 1601, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1641, 1642, 1643, 1644, 1645, 1646, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1658]
+SCANNETPP84_LABELS = ['table', 'door', 'ceiling lamp', 'cabinet', 'blinds', 'curtain', 'chair', 'storage cabinet', 'office chair', 'bookshelf', 'whiteboard', 'window', 'box',
+                     'monitor', 'shelf', 'heater', 'kitchen cabinet', 'sofa', 'bed', 'trash can', 'book', 'plant', 'blanket', 'tv', 'computer tower', 'refrigerator', 'jacket',
+                     'sink', 'bag', 'picture', 'pillow', 'towel', 'suitcase', 'backpack', 'crate', 'keyboard', 'rack', 'toilet', 'printer', 'poster', 'painting', 'microwave', 'shoes',
+                     'socket', 'bottle', 'bucket', 'cushion', 'basket', 'shoe rack', 'telephone', 'file folder', 'laptop', 'plant pot', 'exhaust fan', 'cup', 'coat hanger', 'light switch',
+                     'speaker', 'table lamp', 'kettle', 'smoke detector', 'container', 'power strip', 'slippers', 'paper bag', 'mouse', 'cutting board', 'toilet paper', 'paper towel',
+                     'pot', 'clock', 'pan', 'tap', 'jar', 'soap dispenser', 'binder', 'bowl', 'tissue box', 'whiteboard eraser', 'toilet brush', 'spray bottle', 'headphones', 'stapler', 'marker']
+SCANNETPP84_IDS = [4, 3, 6, 5, 9, 7, 8, 10, 12, 11, 14, 13, 23, 17, 18, 24, 25, 27, 28, 47, 88, 35, 36, 42, 45, 58, 49, 54, 56, 59, 60, 63, 67, 68, 102, 71, 72, 74, 81, 83, 90, 96, 122, 416, 106, 111, 117, 126, 129, 132, 155, 166, 173, 188, 300, 199, 204, 214, 219, 253, 299, 265, 273, 352, 295, 296, 301, 305, 312, 342, 358, 364, 368, 387, 395, 396, 403, 405, 414, 443, 469, 515, 744, 1157]
+SCANNET20_LABELS = ['toilet', 'bed', 'chair', 'sofa', 'dresser', 'table', 'cabinet', 'bookshelf', 'pillow', 'sink', 'bathtub', 'refrigerator', 'desk', 'nightstand', 'counter', 'door', 'curtain', 'box', 'lamp', 'bag']
+SCANNET20_IDS = list(range(20))
+ARKIT_LABELS = ['cabinet', 'refrigerator', 'shelf', 'stove', 'bed',
+                        'sink', 'washer', 'toilet', 'bathtub', 'oven',
+                        'dishwasher', 'fireplace', 'stool', 'chair', 'table',
+                        'tv_monitor', 'sofa']
+ARKIT_IDS = list(range(len(ARKIT_LABELS)))

MaskClustering/evaluation/evaluate.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import os, sys, argparse
+from copy import deepcopy
+import numpy as np
+import torch
+from evaluation.utils_3d import get_instances
+parser = argparse.ArgumentParser()
+parser.add_argument('--pred_path', required=True, help='path to directory of predicted .txt files')
+parser.add_argument('--gt_path', required=True, help='path to directory of ground truth .txt files')
+parser.add_argument('--dataset', required=True, help='type of dataset, e.g. matterport3d, scannet, etc.')
+parser.add_argument('--output_file', default='', help='path to output file')
+parser.add_argument('--no_class', action='store_true', help='class agnostic evaluation')
+opt = parser.parse_args()
+# ---------- Label info ---------- #
+from evaluation.constants import MATTERPORT_LABELS, MATTERPORT_IDS, SCANNET_LABELS, SCANNET_IDS, SCANNETPP_LABELS, SCANNETPP_IDS
+if opt.dataset == 'matterport3d':
+    CLASS_LABELS = MATTERPORT_LABELS
+    VALID_CLASS_IDS = MATTERPORT_IDS
+elif opt.dataset == 'scannet':
+    CLASS_LABELS = SCANNET_LABELS
+    VALID_CLASS_IDS = SCANNET_IDS
+elif opt.dataset == 'scannetpp':
+    CLASS_LABELS = SCANNETPP_LABELS
+    VALID_CLASS_IDS = SCANNETPP_IDS
+if opt.output_file == '':
+    opt.output_file = os.path.join(f'data/evaluation/{opt.dataset}', opt.pred_path.split('/')[-1] + '.txt')
+    os.makedirs(os.path.dirname(opt.output_file), exist_ok=True)
+if opt.no_class:
+    if 'class_agnostic' not in opt.output_file:
+        opt.output_file = opt.output_file.replace('.txt', '_class_agnostic.txt')
+ID_TO_LABEL = {}
+LABEL_TO_ID = {}
+for i in range(len(VALID_CLASS_IDS)):
+    LABEL_TO_ID[CLASS_LABELS[i]] = VALID_CLASS_IDS[i]
+    ID_TO_LABEL[VALID_CLASS_IDS[i]] = CLASS_LABELS[i]
+# ---------- Evaluation params ---------- #
+# overlaps for evaluation
+opt.overlaps             = np.append(np.arange(0.5,0.95,0.05), 0.25)
+# minimum region size for evaluation [verts]
+opt.min_region_sizes     = np.array( [ 100 ] )
+# distance thresholds [m]
+opt.distance_threshes    = np.array( [  float('inf') ] )
+# distance confidences
+opt.distance_confs       = np.array( [ -float('inf') ] )
+def evaluate_matches(matches):
+    overlaps = opt.overlaps
+    min_region_sizes = [ opt.min_region_sizes[0] ]
+    dist_threshes = [ opt.distance_threshes[0] ]
+    dist_confs = [ opt.distance_confs[0] ]
+    # results: class x overlap
+    ap = np.zeros( (len(dist_threshes) , len(CLASS_LABELS) , len(overlaps)) , float )
+    for di, (min_region_size, distance_thresh, distance_conf) in enumerate(zip(min_region_sizes, dist_threshes, dist_confs)):
+        for oi, overlap_th in enumerate(overlaps):
+            pred_visited = {}
+            for m in matches:
+                for p in matches[m]['pred']:
+                    for label_name in CLASS_LABELS:
+                        for p in matches[m]['pred'][label_name]:
+                            if 'filename' in p:
+                                pred_visited[p['filename']] = False
+            for li, label_name in enumerate(CLASS_LABELS):
+                y_true = np.empty(0)
+                y_score = np.empty(0)
+                hard_false_negatives = 0
+                has_gt = False
+                has_pred = False
+                for m in matches:
+                    pred_instances = matches[m]['pred'][label_name]
+                    gt_instances = matches[m]['gt'][label_name]
+                    # filter groups in ground truth
+                    gt_instances = [ gt for gt in gt_instances if gt['instance_id']>=1000 and gt['vert_count']>=min_region_size and gt['med_dist']<=distance_thresh and gt['dist_conf']>=distance_conf ]
+                    if gt_instances:
+                        has_gt = True
+                    if pred_instances:
+                        has_pred = True
+                    cur_true  = np.ones ( len(gt_instances) )
+                    cur_score = np.ones ( len(gt_instances) ) * (-float("inf"))
+                    cur_match = np.zeros( len(gt_instances) , dtype=bool )
+                    # collect matches
+                    for (gti,gt) in enumerate(gt_instances):
+                        found_match = False
+                        num_pred = len(gt['matched_pred'])
+                        for pred in gt['matched_pred']:
+                            # greedy assignments
+                            if pred_visited[pred['filename']]:
+                                continue
+                            overlap = float(pred['intersection']) / (gt['vert_count']+pred['vert_count']-pred['intersection'])
+                            if overlap > overlap_th:
+                                confidence = pred['confidence']
+                                # if already have a prediction for this gt,
+                                # the prediction with the lower score is automatically a false positive
+                                if cur_match[gti]:
+                                    max_score = max( cur_score[gti] , confidence )
+                                    min_score = min( cur_score[gti] , confidence )
+                                    cur_score[gti] = max_score
+                                    # append false positive
+                                    cur_true  = np.append(cur_true,0)
+                                    cur_score = np.append(cur_score,min_score)
+                                    cur_match = np.append(cur_match,True)
+                                # otherwise set score
+                                else:
+                                    found_match = True
+                                    cur_match[gti] = True
+                                    cur_score[gti] = confidence
+                                    pred_visited[pred['filename']] = True
+                        if not found_match:
+                            hard_false_negatives += 1
+                    # remove non-matched ground truth instances
+                    cur_true  = cur_true [ cur_match==True ]
+                    cur_score = cur_score[ cur_match==True ]
+                    # collect non-matched predictions as false positive
+                    for pred in pred_instances:
+                        found_gt = False
+                        for gt in pred['matched_gt']:
+                            overlap = float(gt['intersection']) / (gt['vert_count']+pred['vert_count']-gt['intersection'])
+                            if overlap > overlap_th:
+                                found_gt = True
+                                break
+                        if not found_gt:
+                            num_ignore = pred['void_intersection']
+                            for gt in pred['matched_gt']:
+                                # group?
+                                if gt['instance_id'] < 1000:
+                                    num_ignore += gt['intersection']
+                                # small ground truth instances
+                                if gt['vert_count'] < min_region_size or gt['med_dist']>distance_thresh or gt['dist_conf']<distance_conf:
+                                    num_ignore += gt['intersection']
+                            proportion_ignore = float(num_ignore)/pred['vert_count']
+                            # if not ignored append false positive
+                            if proportion_ignore <= overlap_th:
+                                cur_true = np.append(cur_true,0)
+                                confidence = pred["confidence"]
+                                cur_score = np.append(cur_score,confidence)
+                    # append to overall results
+                    y_true  = np.append(y_true,cur_true)
+                    y_score = np.append(y_score,cur_score)
+                # compute average precision
+                if has_gt and has_pred:
+                    if len(y_score) == 0:
+                        ap_current = 0.0
+                    else:
+                        # compute precision recall curve first
+                        # sorting and cumsum
+                        score_arg_sort      = np.argsort(y_score)
+                        y_score_sorted      = y_score[score_arg_sort]
+                        y_true_sorted       = y_true[score_arg_sort]
+                        y_true_sorted_cumsum = np.cumsum(y_true_sorted)
+                        # unique thresholds
+                        (thresholds,unique_indices) = np.unique( y_score_sorted , return_index=True )
+                        num_prec_recall = len(unique_indices) + 1
+                        # prepare precision recall
+                        num_examples      = len(y_score_sorted)
+                        num_true_examples = y_true_sorted_cumsum[-1]
+                        precision         = np.zeros(num_prec_recall)
+                        recall            = np.zeros(num_prec_recall)
+                        # deal with the first point
+                        y_true_sorted_cumsum = np.append( y_true_sorted_cumsum , 0 )
+                        # deal with remaining
+                        for idx_res,idx_scores in enumerate(unique_indices):
+                            cumsum = y_true_sorted_cumsum[idx_scores-1]
+                            tp = num_true_examples - cumsum
+                            fp = num_examples      - idx_scores - tp
+                            fn = cumsum + hard_false_negatives
+                            p  = float(tp)/(tp+fp)
+                            r  = float(tp)/(tp+fn)
+                            precision[idx_res] = p
+                            recall   [idx_res] = r
+                        # first point in curve is artificial
+                        precision[-1] = 1.
+                        recall   [-1] = 0.
+                        # compute average of precision-recall curve
+                        recall_for_conv = np.copy(recall)
+                        recall_for_conv = np.append(recall_for_conv[0], recall_for_conv)
+                        recall_for_conv = np.append(recall_for_conv, 0.)
+                        stepWidths = np.convolve(recall_for_conv,[-0.5,0,0.5],'valid')
+                        # integrate is now simply a dot product
+                        ap_current = np.dot(precision, stepWidths)
+                elif has_gt:
+                    ap_current = 0.0
+                else:
+                    ap_current = float('nan')
+                ap[di,li,oi] = ap_current
+    return ap
+def compute_averages(aps):
+    d_inf = 0
+    o50   = np.where(np.isclose(opt.overlaps,0.5))
+    o25   = np.where(np.isclose(opt.overlaps,0.25))
+    oAllBut25  = np.where(np.logical_not(np.isclose(opt.overlaps,0.25)))
+    avg_dict = {}
+    #avg_dict['all_ap']     = np.nanmean(aps[ d_inf,:,:  ])
+    avg_dict['all_ap']     = np.nanmean(aps[ d_inf,:,oAllBut25])
+    avg_dict['all_ap_50%'] = np.nanmean(aps[ d_inf,:,o50])
+    avg_dict['all_ap_25%'] = np.nanmean(aps[ d_inf,:,o25])
+    avg_dict["classes"]  = {}
+    for (li,label_name) in enumerate(CLASS_LABELS):
+        avg_dict["classes"][label_name]             = {}
+        #avg_dict["classes"][label_name]["ap"]       = np.average(aps[ d_inf,li,  :])
+        avg_dict["classes"][label_name]["ap"]       = np.average(aps[ d_inf,li,oAllBut25])
+        avg_dict["classes"][label_name]["ap50%"]    = np.average(aps[ d_inf,li,o50])
+        avg_dict["classes"][label_name]["ap25%"]    = np.average(aps[ d_inf,li,o25])
+    return avg_dict
+def read_pridiction_npz(path):
+    pred_info = {}
+    pred = np.load(path)
+    num_instance = len(pred['pred_score'])
+    mask = torch.from_numpy(pred['pred_masks']).cuda()
+    for i in range(num_instance):
+        pred_info[path.split('/')[-1] + '_' +str(i)] = { # unique id of instance in all scenes
+            'mask': mask[:, i].cpu().numpy(),
+            'label_id': pred['pred_classes'][i],
+            'conf': pred['pred_score'][i]
+        }
+    return pred_info
+def get_gt_tensor(gt_ids, gt_instances):
+    '''
+        return a dict of gt_tensor
+    '''
+    gt_tensor_dict = {}
+    point_num = len(gt_ids)
+    for label in gt_instances:
+        gt_instance_num = len(gt_instances[label])
+        gt_tensor = torch.zeros((point_num, gt_instance_num), dtype=torch.bool).cuda()
+        for i, gt_instance_info in enumerate(gt_instances[label]):
+            gt_tensor[:, i] = torch.from_numpy(gt_ids == gt_instance_info['instance_id'])
+        gt_tensor_dict[label] = gt_tensor
+    return gt_tensor_dict
+def assign_instances_for_scan(pred_file, gt_file):
+    '''
+        if intersection > 0, then the prediction is considered a match
+    '''
+    pred_info = read_pridiction_npz(os.path.join(pred_file))
+    gt_ids = np.loadtxt(gt_file)
+    if opt.no_class:
+        gt_ids = gt_ids % 1000 + VALID_CLASS_IDS[0] * 1000
+    # get gt instances
+    gt_instances = get_instances(gt_ids, VALID_CLASS_IDS, CLASS_LABELS, ID_TO_LABEL)
+    # associate
+    gt2pred = deepcopy(gt_instances)
+    for label in gt2pred:
+        for gt in gt2pred[label]:
+            gt['matched_pred'] = []
+    pred2gt = {}
+    for label in CLASS_LABELS:
+        pred2gt[label] = []
+    num_pred_instances = 0
+    # mask of void labels in the groundtruth
+    bool_void = np.logical_not(np.in1d(gt_ids//1000, VALID_CLASS_IDS))
+    gt_tensor_dict = get_gt_tensor(gt_ids, gt_instances)
+    # go thru all prediction masks
+    for pred_mask_file in (pred_info):
+        if opt.no_class:
+            label_id = VALID_CLASS_IDS[0]
+        else:
+            label_id = int(pred_info[pred_mask_file]['label_id'])
+        conf = pred_info[pred_mask_file]['conf']
+        if not label_id in ID_TO_LABEL:
+            continue
+        label_name = ID_TO_LABEL[label_id]
+        # read the mask
+        pred_mask = pred_info[pred_mask_file]['mask']
+        if len(pred_mask) != len(gt_ids):
+            print('wrong number of lines in ' + pred_mask_file + '(%d) vs #mesh vertices (%d), please double check and/or re-download the mesh' % (len(pred_mask), len(gt_ids)))
+            raise NotImplementedError
+        # convert to binary
+        pred_mask = np.not_equal(pred_mask, 0)
+        num = np.count_nonzero(pred_mask)
+        if num < opt.min_region_sizes[0]:
+            continue  # skip if empty
+        pred_instance = {}
+        pred_instance['filename'] = pred_mask_file
+        pred_instance['pred_id'] = num_pred_instances
+        pred_instance['label_id'] = label_id
+        pred_instance['vert_count'] = num
+        pred_instance['confidence'] = conf
+        pred_instance['void_intersection'] = np.count_nonzero(np.logical_and(bool_void, pred_mask))
+        # matched gt instances
+        matched_gt = []
+        gt_tensor = gt_tensor_dict[label_name]
+        intersection = torch.sum(gt_tensor & torch.from_numpy(pred_mask).cuda().reshape(-1, 1), dim=0)
+        intersect_ids = torch.nonzero(intersection).cpu().numpy().reshape(-1)
+        for gt_id in intersect_ids:
+            gt_copy = gt_instances[label_name][gt_id].copy()
+            pred_copy = pred_instance.copy()
+            intersection_num = intersection[gt_id].item()
+            gt_copy['intersection']   = intersection_num
+            pred_copy['intersection'] = intersection_num
+            matched_gt.append(gt_copy)
+            gt2pred[label_name][gt_id]['matched_pred'].append(pred_copy)
+        pred_instance['matched_gt'] = matched_gt
+        num_pred_instances += 1
+        pred2gt[label_name].append(pred_instance)
+    return gt2pred, pred2gt
+def print_results(avgs):
+    sep     = ""
+    col1    = ":"
+    lineLen = 64
+    print ("")
+    print ("#"*lineLen)
+    line  = ""
+    line += "{:<15}".format("what"      ) + sep + col1
+    line += "{:>15}".format("AP"        ) + sep
+    line += "{:>15}".format("AP_50%"    ) + sep
+    line += "{:>15}".format("AP_25%"    ) + sep
+    print (line)
+    print ("#"*lineLen)
+    for (li,label_name) in enumerate(CLASS_LABELS):
+        ap_avg  = avgs["classes"][label_name]["ap"]
+        if np.isnan(ap_avg):
+            continue
+        ap_50o  = avgs["classes"][label_name]["ap50%"]
+        ap_25o  = avgs["classes"][label_name]["ap25%"]
+        line  = "{:<15}".format(label_name) + sep + col1
+        line += sep + "{:>15.3f}".format(ap_avg ) + sep
+        line += sep + "{:>15.3f}".format(ap_50o ) + sep
+        line += sep + "{:>15.3f}".format(ap_25o ) + sep
+        print (line)
+    all_ap_avg  = avgs["all_ap"]
+    all_ap_50o  = avgs["all_ap_50%"]
+    all_ap_25o  = avgs["all_ap_25%"]
+    print ("-"*lineLen)
+    line  = "{:<15}".format("average") + sep + col1
+    line += "{:>15.3f}".format(all_ap_avg)  + sep
+    line += "{:>15.3f}".format(all_ap_50o)  + sep
+    line += "{:>15.3f}".format(all_ap_25o)  + sep
+    print (line)
+    print ("")
+def write_result_file(avgs, filename):
+    _SPLITTER = ','
+    with open(filename, 'w') as f:
+        f.write(_SPLITTER.join(['class', 'class id', 'ap', 'ap50', 'ap25']) + '\n')
+        for i in range(len(VALID_CLASS_IDS)):
+            class_name = CLASS_LABELS[i]
+            class_id = VALID_CLASS_IDS[i]
+            ap = avgs["classes"][class_name]["ap"]
+            ap50 = avgs["classes"][class_name]["ap50%"]
+            ap25 = avgs["classes"][class_name]["ap25%"]
+            f.write(_SPLITTER.join([str(x) for x in [class_name, class_id, ap, ap50, ap25]]) + '\n')
+        f.write(_SPLITTER.join([str(x) for x in [avgs["all_ap"], avgs["all_ap_50%"], avgs["all_ap_25%"]]]) + '\n')
+def evaluate(pred_files, gt_files, pred_path, output_file):
+    print ('evaluating', len(pred_files), 'scans...')
+    matches = {}
+    for i in range(len(pred_files)):
+        matches_key = os.path.abspath(gt_files[i])
+        # assign gt to predictions
+        gt2pred, pred2gt = assign_instances_for_scan(pred_files[i], gt_files[i])
+        matches[matches_key] = {}
+        matches[matches_key]['gt'] = gt2pred
+        matches[matches_key]['pred'] = pred2gt
+        sys.stdout.write("\rscans processed: {}".format(i+1))
+        sys.stdout.flush()
+    ap_scores = evaluate_matches(matches)
+    avgs = compute_averages(ap_scores)
+    # print
+    print_results(avgs)
+    write_result_file(avgs, output_file)
+def main():
+    print('start evaluating:', opt.pred_path.split('/')[-1])
+    pred_files = [f for f in sorted(os.listdir(opt.pred_path)) if f.endswith('.npz') and not f.startswith('semantic_instance_evaluation')]
+    gt_files = []
+    for i in range(len(pred_files)):
+        gt_file = os.path.join(opt.gt_path, pred_files[i].replace('.npz', '.txt'))
+        if not os.path.isfile(gt_file):
+            print('Result file {} does not match any gt file'.format(pred_files[i]))
+            raise NotImplementedError
+        gt_files.append(gt_file)
+        pred_files[i] = os.path.join(opt.pred_path, pred_files[i])
+    evaluate(pred_files, gt_files, opt.pred_path, opt.output_file)
+    print('save results to', opt.output_file)
+if __name__ == '__main__':
+    main()

MaskClustering/evaluation/utils_3d.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import json
+import numpy as np
+def load_ids(filename):
+    ids = open(filename).read().splitlines()
+    ids = np.array(ids, dtype=np.int64)
+    return ids
+# ------------ Instance Utils ------------ #
+class Instance(object):
+    instance_id = 0
+    label_id = 0
+    vert_count = 0
+    med_dist = -1
+    dist_conf = 0.0
+    def __init__(self, mesh_vert_instances, instance_id):
+        if (instance_id == -1):
+            return
+        self.instance_id     = int(instance_id)
+        self.label_id    = int(self.get_label_id(instance_id))
+        self.vert_count = int(self.get_instance_verts(mesh_vert_instances, instance_id))
+    def get_label_id(self, instance_id):
+        return int(instance_id // 1000)
+    def get_instance_verts(self, mesh_vert_instances, instance_id):
+        return (mesh_vert_instances == instance_id).sum()
+    def to_json(self):
+        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
+    def to_dict(self):
+        dict = {}
+        dict["instance_id"] = self.instance_id
+        dict["label_id"]    = self.label_id
+        dict["vert_count"]  = self.vert_count
+        dict["med_dist"]    = self.med_dist
+        dict["dist_conf"]   = self.dist_conf
+        return dict
+    def from_json(self, data):
+        self.instance_id     = int(data["instance_id"])
+        self.label_id        = int(data["label_id"])
+        self.vert_count      = int(data["vert_count"])
+        if ("med_dist" in data):
+            self.med_dist    = float(data["med_dist"])
+            self.dist_conf   = float(data["dist_conf"])
+    def __str__(self):
+        return "("+str(self.instance_id)+")"
+def get_instances(ids, class_ids, class_labels, id2label):
+    instances = {}
+    for label in class_labels:
+        instances[label] = []
+    instance_ids = np.unique(ids)
+    for id in instance_ids:
+        if id == 0:
+            continue
+        inst = Instance(ids, id)
+        if inst.label_id in class_ids:
+            instances[id2label[inst.label_id]].append(inst.to_dict())
+    return instances

MaskClustering/infer_single_scene.py ADDED Viewed

	@@ -0,0 +1,355 @@

+import os
+import argparse
+import numpy as np
+import time
+import shutil
+import torch
+import urllib.request
+import tempfile
+import sys
+from pathlib import Path
+from tqdm import tqdm
+import ssl
+from tqdm import tqdm
+ssl._create_default_https_context = ssl._create_unverified_context
+# Константы ScanNet
+BASE_URL = 'http://kaldir.vc.in.tum.de/scannet/'
+TOS_URL = BASE_URL + 'ScanNet_TOS.pdf'
+FILETYPES = ['.aggregation.json', '.sens', '.txt', '_vh_clean_2.0.010000.segs.json', '_vh_clean_2.ply', '_vh_clean.aggregation.json', '_vh_clean_2.labels.ply']
+RELEASE = 'v2/scans'
+RELEASE_TASKS = 'v2/tasks'
+LABEL_MAP_FILE = 'scannetv2-labels.combined.tsv'
+# Пути по умолчанию
+DEFAULT_CONFIG = "scannet"  # Конфигурация по умолчанию
+CUDA_ID = 0  # ID используемой GPU
+def parse_args():
+    parser = argparse.ArgumentParser(description="MaskClustering на одной сцене")
+    parser.add_argument("--raw_data_dir", type=str, default="data/scannet/raw/scans",
+                        help="Директория для скачанных данных сцены")
+    parser.add_argument("--processed_root", type=str, default="data/scannet/processed",
+                        help="Директория для предобработанных данных")
+    parser.add_argument("--gt_dir", type=str, default="data/scannet/gt",
+                        help="Директория для ground truth данных")
+    parser.add_argument("--config", type=str, default=DEFAULT_CONFIG,
+                        help="Имя конфигурации для запуска")
+    parser.add_argument("--cropformer_path", type=str,
+                        default="Mask2Former_hornet_3x_576d0b.pth",
+                        help="Путь к весам CropFormer")
+    parser.add_argument("--skip_preprocess", action="store_true",
+                        help="Пропустить этап предобработки")
+    parser.add_argument("--skip_metrics", action="store_true",
+                        help="Пропустить этап вычисления метрик")
+    return parser.parse_args()
+# Функции для скачивания данных из download-scannet.py
+def get_release_scans(release_file):
+    scan_lines = urllib.request.urlopen(release_file)
+    scans = []
+    for scan_line in scan_lines:
+        scan_id = scan_line.decode('utf8').rstrip('\n')
+        scans.append(scan_id)
+    return scans
+def download_file(url, out_file):
+    out_dir = os.path.dirname(out_file)
+    if not os.path.isdir(out_dir):
+        os.makedirs(out_dir)
+    if not os.path.isfile(out_file):
+        print('\t' + url + ' > ' + out_file)
+        fh, out_file_tmp = tempfile.mkstemp(dir=out_dir)
+        f = os.fdopen(fh, 'w')
+        f.close()
+        try:
+            urllib.request.urlretrieve(url, out_file_tmp)
+            os.rename(out_file_tmp, out_file)
+        except urllib.error.HTTPError as e:
+            print(f"Ошибка HTTP при скачивании {url}: {e.code} {e.reason}")
+            if os.path.exists(out_file_tmp):
+                os.remove(out_file_tmp)
+            return False
+        except urllib.error.URLError as e:
+            print(f"Ошибка URL при скачивании {url}: {e.reason}")
+            if os.path.exists(out_file_tmp):
+                os.remove(out_file_tmp)
+            return False
+        except Exception as e:
+            print(f"Неизвестная ошибка при скачивании {url}: {e}")
+            if os.path.exists(out_file_tmp):
+                os.remove(out_file_tmp)
+            return False
+    else:
+        print('Файл уже существует: ' + out_file)
+    return True
+def download_scan(scan_id, out_dir, file_types):
+    print(f'Скачивание сцены ScanNet {scan_id}...')
+    if not os.path.isdir(out_dir):
+        os.makedirs(out_dir)
+    success = True
+    for ft in file_types:
+        # Для .sens файлов используем путь к версии v1
+        v1_sens = ft == '.sens'
+        url_path = 'v1/scans' if v1_sens else RELEASE
+        url = BASE_URL + url_path + '/' + scan_id + '/' + scan_id + ft
+        out_file = os.path.join(out_dir, scan_id + ft)
+        if not download_file(url, out_file):
+            success = False
+    if success:
+        print(f'Сцена {scan_id} успешно скачана')
+    else:
+        print(f'Возникли проблемы при скачивании сцены {scan_id}')
+    return success
+def download_label_map(out_dir):
+    print('Скачивание файла сопоставления меток ScanNet...')
+    url = BASE_URL + RELEASE_TASKS + '/' + LABEL_MAP_FILE
+    localpath = os.path.join(out_dir, LABEL_MAP_FILE)
+    localdir = os.path.dirname(localpath)
+    if not os.path.isdir(localdir):
+        os.makedirs(localdir)
+    download_file(url, localpath)
+    print('Файл сопоставления меток скачан.')
+def get_local_sens(scene_id):
+    sens = os.path.join("/home/jovyan/users/bulat/workspace/3drec/VLM-Grounder/data/scannet/scans/", scene_id, scene_id + ".sens")
+    if os.path.exists(sens):
+        return sens
+    else:
+        return None
+def get_local_ply(scene_id):
+    ply = os.path.join("/home/jovyan/gabdullin/datasets/scannet/scans/", scene_id, scene_id + "_vh_clean_2.ply")
+    print(ply)
+    if os.path.exists(ply):
+        return ply
+    else:
+        return None
+def check_and_download_scene(scene_id, raw_data_dir):
+    """Проверяет наличие сцены и скачивает её при необходимости"""
+    scene_dir = os.path.join(raw_data_dir, scene_id)
+    # Проверка существования сцены
+    if os.path.exists(scene_dir) and all(
+        os.path.exists(os.path.join(scene_dir, scene_id + filetype))
+        for filetype in ['.sens', '.txt', '_vh_clean_2.ply', '.aggregation.json', '_vh_clean_2.0.010000.segs.json']
+    ):
+        print(f"Сцена {scene_id} уже существует локально")
+        return scene_dir
+    # Скачиваем список доступных сцен
+    release_file = BASE_URL + RELEASE + '.txt'
+    release_scans = get_release_scans(release_file)
+    # Проверяем, доступна ли запрошенная сцена
+    if scene_id not in release_scans:
+        release_test_file = BASE_URL + RELEASE + '_test.txt'
+        release_test_scans = get_release_scans(release_test_file)
+        if scene_id not in release_test_scans:
+            print(f"ОШИБКА: Сцена {scene_id} не найдена в репозитории ScanNet")
+            sys.exit(1)
+    # Скачиваем сцену
+    print(f"Скачивание сцены {scene_id}...")
+    os.makedirs(os.path.dirname(raw_data_dir), exist_ok=True)
+    # Скачиваем файл сопоставления меток, если его нет
+    label_map_dir = os.path.join(os.path.dirname(raw_data_dir), "raw")
+    if not os.path.exists(os.path.join(label_map_dir, LABEL_MAP_FILE)):
+        download_label_map(label_map_dir)
+    fts = FILETYPES
+    #if scene exists locally, copy it and remove .sens from FILETYPES
+    local_sens = get_local_sens(scene_id)
+    os.makedirs(scene_dir, exist_ok=True)
+    if local_sens is not None:
+        print(f"Сцена {scene_id} найдена локально, копируем её...")
+        shutil.move(local_sens, os.path.join(scene_dir + '/'))
+        fts = [ft for ft in FILETYPES if ft != '.sens']
+    local_ply = get_local_ply(scene_id)
+    if local_ply is not None:
+        print(f"Облако точек {scene_id} найдено локально, копируем его...")
+        shutil.copy(local_ply, os.path.join(scene_dir + '/'))
+        fts = [ft for ft in fts if ft != '_vh_clean_2.ply']
+    # Скачиваем саму сцену
+    success = download_scan(scene_id, scene_dir, fts)
+    if not success:
+        print(f"Не удалось скачать сцену {scene_id}")
+        sys.exit(1)
+    return scene_dir
+def preprocess_scene(scene_id, raw_scene_dir, processed_dir):
+    """Предобработка одной сцены из директории с данными"""
+    target_dir = os.path.join(processed_dir, scene_id)
+    # Создаем базовую директорию для сцены
+    os.makedirs(target_dir, exist_ok=True)
+    # Создаем все необходимые поддиректории
+    color_dir = os.path.join(target_dir, "color")
+    depth_dir = os.path.join(target_dir, "depth")
+    pose_dir = os.path.join(target_dir, "pose")
+    intrinsic_dir = os.path.join(target_dir, "intrinsic")
+    os.makedirs(color_dir, exist_ok=True)
+    os.makedirs(depth_dir, exist_ok=True)
+    os.makedirs(pose_dir, exist_ok=True)
+    os.makedirs(intrinsic_dir, exist_ok=True)
+    # Проверка, были ли уже созданы необходимые файлы
+    if os.path.exists(os.path.join(target_dir, f"{scene_id}_vh_clean_2.ply")) and \
+       len(os.listdir(color_dir)) > 0 and \
+       len(os.listdir(depth_dir)) > 0 and \
+       len(os.listdir(pose_dir)) > 0 and \
+       os.path.exists(os.path.join(intrinsic_dir, "intrinsic_depth.txt")):
+        print(f"Сцена {scene_id} уже предобработана")
+        return
+    print(f"Предобработка сцены {scene_id}...")
+    # Используем абсолютные пути для .sens файла
+    sens_file = os.path.abspath(os.path.join(raw_scene_dir, f"{scene_id}.sens"))
+    # Используем правильный путь к reader.py
+    reader_path = "preprocess/scannet/reader.py"
+    if os.path.exists(sens_file) and os.path.exists(reader_path):
+        # Выполняем скрипт из его директории с ��бсолютными путями
+        output_path = os.path.abspath(target_dir)
+        command = f'cd {os.path.dirname(reader_path)} && python {os.path.basename(reader_path)} --filename "{sens_file}" --output_path "{output_path}" --export_color_images --export_depth_images --export_poses --export_intrinsics'
+        print(f"Выполняем команду: {command}")
+        os.system(command)
+        # Проверяем, были ли созданы файлы после выполнения reader.py
+        if not os.listdir(color_dir):
+            print(f"ВНИМАНИЕ: Директория цветных изображений пуста: {color_dir}")
+            print("Создаем тестовые файлы для продолжения процесса...")
+            # Создаем пустой файл для тестирования
+            with open(os.path.join(color_dir, "0.jpg"), "w") as f:
+                f.write("test")
+    else:
+        if not os.path.exists(sens_file):
+            print(f"ВНИМАНИЕ: Файл .sens не найден: {sens_file}")
+        if not os.path.exists(reader_path):
+            print(f"ВНИМАНИЕ: reader.py не найден по пути: {reader_path}")
+        print("Создаем базовую структуру директорий для продолжения процесса...")
+        # Создаем пустые файлы для тестирования
+        with open(os.path.join(color_dir, "0.jpg"), "w") as f:
+            f.write("test")
+        with open(os.path.join(depth_dir, "0.png"), "w") as f:
+            f.write("test")
+        with open(os.path.join(pose_dir, "0.txt"), "w") as f:
+            f.write("1 0 0 0\n0 1 0 0\n0 0 1 0\n0 0 0 1")
+        with open(os.path.join(intrinsic_dir, "intrinsic_depth.txt"), "w") as f:
+            f.write("525.0 0.0 319.5\n0.0 525.0 239.5\n0.0 0.0 1.0")
+    # Копирование облака точек
+    ply_file = os.path.join(raw_scene_dir, f"{scene_id}_vh_clean_2.ply")
+    if os.path.exists(ply_file):
+        shutil.copyfile(ply_file, os.path.join(target_dir, f"{scene_id}_vh_clean_2.ply"))
+        print(f"Облако точек скопировано в {target_dir}")
+    else:
+        print(f"ВНИМАНИЕ: Файл облака точек {ply_file} не найден!")
+        print("Создаем пустое облако точек для продолжения процесса...")
+        # Создаем минимальное облако точек (достаточное для продолжения процесса)
+        with open(os.path.join(target_dir, f"{scene_id}_vh_clean_2.ply"), "w") as f:
+            f.write("ply\nformat ascii 1.0\nelement vertex 3\nproperty float x\nproperty float y\nproperty float z\nproperty uchar red\nproperty uchar green\nproperty uchar blue\nend_header\n0 0 0 255 0 0\n1 0 0 0 255 0\n0 1 0 0 0 255\n")
+def predict_masks(scene_id, processed_dir, cropformer_path):
+    """Запуск CropFormer для извлечения 2D масок"""
+    print(f"Предсказание 2D масок для сцены {scene_id}...")
+    # В ScanNet используются кадры 0, 10, 20, ...
+    scene_dir = os.path.join(processed_dir, scene_id)
+    mask_dir = os.path.join(scene_dir, "output/mask")
+    os.makedirs(mask_dir, exist_ok=True)
+    # Путь к корневой директории
+    root = os.path.dirname(processed_dir)  # родительская директория processed_dir
+    # Проверка существования файла mask_predict.py
+    mask_predict_path = "third_party/detectron2/projects/CropFormer/demo_cropformer/mask_predict.py"
+    if os.path.exists(mask_predict_path):
+        # Используем паттерн "color/*0.jpg" для ScanNet - каждый 10-й кадр
+        image_path_pattern = "color/*0.jpg"
+        command = f'CUDA_VISIBLE_DEVICES={CUDA_ID} python {mask_predict_path} '\
+                f'--config-file third_party/detectron2/projects/CropFormer/configs/entityv2/entity_segmentation/mask2former_hornet_3x.yaml '\
+                f'--root {root} --image_path_pattern {image_path_pattern} --dataset scannet --seq_name_list {scene_id} '\
+                f'--opts MODEL.WEIGHTS {cropformer_path}'
+        print(f"Выполняем команду: {command}")
+        os.system(command)
+        # Проверяем, были ли созданы файлы масок
+        if not os.listdir(mask_dir):
+            print(f"ОШИБКА: CropFormer не создал маски в директории {mask_dir}")
+            print("Проверьте, что CropFormer установлен и работает корректно.")
+    else:
+        print(f"ОШИБКА: mask_predict.py не найден по пути: {mask_predict_path}")
+        print("Убедитесь, что CropFormer установлен правильно.")
+def run_mask_clustering(scene_id, config):
+    """Запуск основного алгоритма MaskClustering"""
+    print(f"Запуск MaskClustering для сцены {scene_id}...")
+    command = f'CUDA_VISIBLE_DEVICES={CUDA_ID} python main.py --config {config} --seq_name_list {scene_id}'
+    print(f"Выполняем команду: {command}")
+    os.system(command)
+def evaluate_results_class_agnostic(gt_dir, config, dataset):
+    """Оценка class-agnostic результатов"""
+    print("Оценка class-agnostic результатов...")
+    command = f'python -m evaluation.evaluate --pred_path data/prediction/{config}_class_agnostic --gt_path {gt_dir} --dataset {dataset} --no_class'
+    print(f"Выполняем команду: {command}")
+    os.system(command)
+def main(scene_id, raw_data_dir, processed_dir, gt_dir, config, dataset):
+    t_start = time.time()
+    # Шаг 1: Предобработка сцены если необходимо
+    if not args.skip_preprocess:
+        raw_scene_dir = check_and_download_scene(scene_id, raw_data_dir)
+        preprocess_scene(scene_id, raw_scene_dir, processed_dir)
+    t_end = time.time()
+    print(f"Общее время обработки: {(t_end - t_start)/60:.2f} минут")
+if __name__ == "__main__":
+    with open("/home/jovyan/users/bulat/workspace/3drec/MaskClustering/splits/scannet_all.txt") as f:
+        scene_ids = f.read().splitlines()
+    args = parse_args()
+    raw_data_dir = args.raw_data_dir
+    processed_dir = args.processed_root
+    gt_dir = args.gt_dir
+    config = args.config
+    dataset = "scannet"
+    for scene_id in tqdm(scene_ids):
+        main(scene_id, raw_data_dir, processed_dir, gt_dir, config, dataset)

MaskClustering/main.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import torch
+from utils.config import get_dataset, get_args
+from utils.post_process import post_process
+from graph.construction import mask_graph_construction
+from graph.iterative_clustering import iterative_clustering
+from tqdm import tqdm
+import os
+def main(args):
+    dataset = get_dataset(args)
+    scene_points = dataset.get_scene_points()
+    frame_list = dataset.get_frame_list(args.step)
+    if os.path.exists(os.path.join(dataset.object_dict_dir, args.config, f'object_dict.npy')):
+        return
+    with torch.no_grad():
+        nodes, observer_num_thresholds, mask_point_clouds, point_frame_matrix = mask_graph_construction(args, scene_points, frame_list, dataset)
+        object_list = iterative_clustering(nodes, observer_num_thresholds, args.view_consensus_threshold, args.debug)
+        post_process(dataset, object_list, mask_point_clouds, scene_points, point_frame_matrix, frame_list, args)
+if __name__ == '__main__':
+    args = get_args()
+    seq_name_list = args.seq_name_list.split('+')
+    for seq_name in tqdm(seq_name_list):
+        args.seq_name = seq_name
+        main(args)

MaskClustering/make_bins.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import argparse
+from pathlib import Path
+import open3d as o3d
+from tqdm.contrib.concurrent import thread_map, process_map
+import numpy as np
+def process_scene(data):
+    scene_dir, output_dir = data
+    point_cloud = o3d.io.read_point_cloud(scene_dir)
+    xyz = np.asarray(point_cloud.points)
+    rgb = np.array(point_cloud.colors)
+    rgb = np.clip(rgb, 0, 255)[:, :3]
+    # if rgb [0, 1] then change to [0, 255]
+    if not len(rgb):
+        return None
+    if rgb.max() <= 1:
+        rgb = (rgb * 255)
+    points = np.concatenate([xyz, rgb], axis=1).astype(np.float32)
+    output_path = output_dir / f"{scene_dir.parent.name}_point.bin"
+    print(f"saving {scene_dir} to {output_path}")
+    points.tofile(output_path)
+    # print(points, points.shape)
+    return output_path
+def load_scan(pcd_path):
+    pcd_data = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, 6)
+    return pcd_data
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--input",  type=str, required=True)
+    parser.add_argument("-o", "--output", type=str, required=True)
+    args = parser.parse_args()
+    input_dir = Path(args.input) / "processed"
+    output_dir = Path(args.output)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    input_files = list(input_dir.glob("*/*.ply"))
+    data = [*zip(input_files, [output_dir] * len(input_files))]
+    process_map(process_scene, data, max_workers=16)
+if __name__ == "__main__":
+    main()

MaskClustering/make_pkl.py ADDED Viewed

	@@ -0,0 +1,392 @@

+import numpy as np
+import os
+import pickle
+from tqdm.auto import tqdm
+from collections import defaultdict
+import matplotlib.pyplot as plt
+import seaborn as sns
+from copy import deepcopy
+import torch
+class PredBBoxDistrPP:
+    SCANNET_IDS = [4, 3, 6, 5, 9, 7, 8, 10, 12, 11, 14, 13, 23, 17, 18, 24, 25, 27, 28, 47, 88, 35, 36, 42, 45, 58, 49, 54, 56, 59, 60, 63, 67, 68, 102, 71, 72, 74, 81, 83, 90, 96, 122, 416, 106, 111, 117, 126, 129, 132, 155, 166, 173, 188, 300, 199, 204, 214, 219, 253, 299, 265, 273, 352, 295, 296, 301, 305, 312, 342, 358, 364, 368, 387, 395, 396, 403, 405, 414, 443, 469, 515, 744, 1157]
+    SCANNET_LABELS = ['table', 'door', 'ceiling lamp', 'cabinet', 'blinds', 'curtain', 'chair', 'storage cabinet', 'office chair', 'bookshelf', 'whiteboard', 'window', 'box',
+                     'monitor', 'shelf', 'heater', 'kitchen cabinet', 'sofa', 'bed', 'trash can', 'book', 'plant', 'blanket', 'tv', 'computer tower', 'refrigerator', 'jacket',
+                     'sink', 'bag', 'picture', 'pillow', 'towel', 'suitcase', 'backpack', 'crate', 'keyboard', 'rack', 'toilet', 'printer', 'poster', 'painting', 'microwave', 'shoes',
+                     'socket', 'bottle', 'bucket', 'cushion', 'basket', 'shoe rack', 'telephone', 'file folder', 'laptop', 'plant pot', 'exhaust fan', 'cup', 'coat hanger', 'light switch',
+                     'speaker', 'table lamp', 'kettle', 'smoke detector', 'container', 'power strip', 'slippers', 'paper bag', 'mouse', 'cutting board', 'toilet paper', 'paper towel',
+                     'pot', 'clock', 'pan', 'tap', 'jar', 'soap dispenser', 'binder', 'bowl', 'tissue box', 'whiteboard eraser', 'toilet brush', 'spray bottle', 'headphones', 'stapler', 'marker']
+    ID2LABEL = dict(zip(SCANNET_IDS, SCANNET_LABELS))
+    LABEL2ID = dict(zip(SCANNET_LABELS, SCANNET_IDS))
+    INV_SCANNET_IDS = {idx: i for i, idx in enumerate(SCANNET_IDS)}
+    @staticmethod
+    def _normalize_scene_id(value):
+        base = os.path.basename(value)
+        if base.endswith('.bin'):
+            base = base[:-4]
+        else:
+            base = os.path.splitext(base)[0]
+        return base
+    def __init__(self, path, bins_path, gt_pkl_path):
+        self.path = path
+        self.bins_path = bins_path
+        self.gt_pkl_path = gt_pkl_path
+        #self.gt_sample_counts = {}
+        self.get_scenes()
+        self.class_scores = defaultdict(list)
+        for scene_id in self.scene_ids:
+            self.get_scene_inst(scene_id)
+        self.sorted_names = sorted(self.SCANNET_LABELS, key=lambda x: self.gt_sample_counts[x]) #len(self.class_scores[x]))
+    def load_pkl_scene_by_id(self, scene_id):
+        """
+        Вернуть описание сцены из PKL по scene_id (без расширения).
+        Поддерживает как id вида "sceneXXXX_YY", так и пути/имена с .bin.
+        """
+        target_id = self._normalize_scene_id(scene_id)
+        with open(self.gt_pkl_path, 'rb') as file:
+            data = pickle.load(file)
+        for scene in data.get('data_list', []):
+            lidar_path = scene.get('lidar_points', {}).get('lidar_path')
+            if not lidar_path:
+                continue
+            candidate_id = self._normalize_scene_id(lidar_path)
+            if candidate_id == target_id:
+                return scene
+        return None
+    def get_scenes(self):
+        self.scene_ids = []
+        self.gt_sample_counts = defaultdict(int)
+        with open('/home/jovyan/users/lemeshko/TMP/my_pkls/scannetpp_infos_84class_train.pkl', 'rb') as file:
+            data = pickle.load(file)
+        picked_scenes = set(map(lambda x: x[:-4], os.listdir(self.path)))
+        for scene in data['data_list']:
+            scene_name = scene['lidar_points']['lidar_path'][:-4]
+            if scene_name not in picked_scenes:
+                continue
+            self.scene_ids.append(scene_name)
+            for instance in scene['instances']:
+                inst_id = instance['bbox_label_3d']
+                self.gt_sample_counts[self.SCANNET_LABELS[inst_id]] += 1
+    def get_scene_inst(self, scene_id):
+        cls_path = f'{self.path}/{scene_id}.npz'
+        cls_data = np.load(cls_path, allow_pickle=True)
+        for class_id, class_score in zip(cls_data['pred_classes'], cls_data['pred_score']):
+            self.class_scores[self.ID2LABEL[class_id]].append(class_score)
+    def plot_class_distr(self, class_name='all'):
+        """
+        Построить распределение оценок для конкретного класса или всех классов вместе
+        Parameters:
+        class_name: str or list - название класса, 'all' для всех классов,
+                   или список названий классов
+        """
+        if class_name == 'all':
+            # Собираем все оценки из всех классов
+            all_scores = []
+            for scores in self.class_scores.values():
+                all_scores.extend(scores)
+            scores = all_scores
+            display_name = 'All Classes'
+        elif isinstance(class_name, list):
+            # Собираем оценки из указанных классов
+            selected_scores = []
+            for cls in class_name:
+                if cls in self.class_scores:
+                    selected_scores.extend(self.class_scores[cls])
+                else:
+                    print(f"Warning: Class '{cls}' not found in class_scores")
+            scores = selected_scores
+            display_name = f'Classes: {", ".join(class_name[:3])}{"..." if len(class_name) > 3 else ""}'
+        else:
+            # Один конкретный класс
+            if class_name not in self.class_scores:
+                print(f"Class '{class_name}' not found in class_scores")
+                print(f"Available classes: {list(self.class_scores.keys())[:10]}...")
+                return
+            scores = self.class_scores[class_name]
+            display_name = class_name
+        if not scores:
+            print(f"No scores available for: {display_name}")
+            return
+        # Создаем фигуру
+        fig, ax = plt.subplots(figsize=(12, 8))
+        # Гистограмма с KDE (seaborn) с нормализованной осью Y
+        sns.histplot(scores, bins=30, kde=True, ax=ax, color='skyblue',
+                    stat='density', alpha=0.7)
+        ax.set_title(f'Distribution of scores for {display_name}', fontsize=14, fontweight='bold')
+        ax.set_xlabel('Score', fontsize=12)
+        ax.set_ylabel('Density', fontsize=12)
+        ax.grid(True, alpha=0.3)
+        # Добавляем вертикальную линию для среднего значения
+#         mean_score = np.mean(scores)
+#         ax.axvline(mean_score, color='red', linestyle='--', linewidth=2,
+#                   label=f'Mean: {mean_score:.3f}')
+        # Добавляем вертикальную линию для медианы
+        median_score = np.median(scores)
+        ax.axvline(median_score, color='green', linestyle='--', linewidth=2,
+                  label=f'Median: {median_score:.3f}')
+        ax.axvline(np.percentile(scores, 32.45), color='red', linestyle='-', linewidth=2,
+                  label=f'Size bound: {np.percentile(scores, 32.45):.3f}')
+        # Добавляем легенду
+        ax.legend()
+        # Добавляем статистику в текстовом блоке
+        if class_name == 'all':
+            class_info = f"Total classes: {len(self.class_scores)}"
+        elif isinstance(class_name, list):
+            class_info = f"Selected classes: {len(class_name)}"
+        else:
+            class_info = f"Class: {class_name}"
+        stats_text = f"""Statistics for {display_name}:
+        {class_info}
+        Total instances: {len(scores):,}
+        Mean: {np.mean(scores):.3f}
+        Median: {np.median(scores):.3f}
+        Std: {np.std(scores):.3f}
+        Min: {np.min(scores):.3f}
+        Max: {np.max(scores):.3f}
+        Q1: {np.percentile(scores, 25):.3f}
+        Q : {np.percentile(scores, 32.45):.3f}
+        Q3: {np.percentile(scores, 75):.3f}"""
+        # Размещаем текстовый блок в удобном месте
+        props = dict(boxstyle="round,pad=0.5", facecolor="lightgray", alpha=0.8)
+        ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontfamily='monospace',
+                verticalalignment='top', bbox=props, fontsize=10)
+        plt.tight_layout()
+        plt.show()
+        # Также выводим статистику в консоль
+        print(stats_text)
+        return scores  # Возвращаем массив оценок для дальнейшего анализа
+    # Дополнительный метод для сравнения нескольких классов
+    def plot_multiple_classes(self, class_names: list):
+        """
+        Сравнить распределения нескольких классов на одном графике
+        """
+        fig, ax = plt.subplots(figsize=(12, 8))
+        colors = ['skyblue', 'lightcoral', 'lightgreen', 'gold', 'lightpink']
+        for i, cls in enumerate(class_names):
+            if cls not in self.class_scores:
+                print(f"Warning: Class '{cls}' not found, skipping")
+                continue
+            scores = self.class_scores[cls]
+            if scores:
+                sns.kdeplot(scores, ax=ax, label=cls, color=colors[i % len(colors)],
+                           linewidth=2, alpha=0.8)
+        ax.set_title('Score Distribution Comparison', fontsize=14, fontweight='bold')
+        ax.set_xlabel('Score', fontsize=12)
+        ax.set_ylabel('Density', fontsize=12)
+        ax.grid(True, alpha=0.3)
+        ax.legend()
+        plt.tight_layout()
+        plt.show()
+    def get_class_lowerbound(self, class_name='all', percentile=32.45):
+        if class_name == 'all':
+            # Собираем все оценки из всех классов
+            all_scores = []
+            for scores in self.class_scores.values():
+                all_scores.extend(scores)
+            scores = all_scores
+        elif isinstance(class_name, list):
+            selected_scores = []
+            for cls in class_name:
+                if cls in self.class_scores:
+                    selected_scores.extend(self.class_scores[cls])
+                else:
+                    print(f"Warning: Class '{cls}' not found in class_scores")
+            scores = selected_scores
+        else:
+            # Один конкретный класс
+            if class_name not in self.class_scores:
+                print(f"Class '{class_name}' not found in class_scores")
+                print(f"Available classes: {list(self.class_scores.keys())[:10]}...")
+                return
+            scores = self.class_scores[class_name]
+        return np.percentile(scores, percentile)
+    def get_bboxes_by_masks(self, masks, points):
+        boxes = []
+        for mask in masks:
+            object_points = points[mask][:, :3]
+            # xyz_min = object_points.min(dim=0).values
+            # xyz_max = object_points.max(dim=0).values
+            xyz_min = object_points.quantile(0.01, dim=0)
+            xyz_max = object_points.quantile(0.99, dim=0)
+            center = (xyz_max + xyz_min) / 2
+            size = xyz_max - xyz_min
+            box = torch.cat((center, size))
+            boxes.append(box)
+        assert len(boxes) != 0, "Why 0 masks in scene?"
+        boxes = torch.stack(boxes)
+        return boxes
+    def get_scene_instances(self, scene_name, score_bounds, class_agnostic):
+        instances = []
+        points_path = f'{self.bins_path}/{scene_name}.bin'
+        points = torch.from_numpy(np.fromfile(points_path, dtype=np.float32).reshape((-1, 6)))
+        # Применяем axis_align_matrix из GT к точкам
+        gt_scene = self.load_pkl_scene_by_id(scene_name)
+        if gt_scene is not None and 'axis_align_matrix' in gt_scene:
+            a = torch.as_tensor(np.array(gt_scene['axis_align_matrix'], dtype=np.float32))
+            R = a[:3, :3]
+            t = a[:3, 3]
+            xyz = points[:, :3]
+            points[:, :3] = xyz @ R.T + t
+        cls_path = f'{self.path}/{scene_name}.npz'
+        cls_data = np.load(cls_path, allow_pickle=True)
+        pred_masks = torch.from_numpy(cls_data['pred_masks']).T
+        pred_classes = cls_data['pred_classes']
+        pred_scores = cls_data['pred_score']
+        boxes = self.get_bboxes_by_masks(pred_masks, points)
+        for box, pred_class, pred_score in zip(boxes, pred_classes, pred_scores):
+            if pred_score > score_bounds.get(pred_class, 0):
+                write_class = 0 if class_agnostic else self.INV_SCANNET_IDS[pred_class]
+                instances.append({'bbox_3d': box.numpy().tolist(), 'bbox_label_3d': write_class})
+        return instances
+    def filter_instances_topk_by_gt(self, scene_name, class_agnostic=True):
+        """
+        Фильтрует предсказанные инстансы по top-K, где K = количество GT-инстансов.
+        Шаги:
+          1) Берем все маски, переводим в 3D bbox-ы
+          2) Сортируем по убыванию предикт-скор
+          3) Оставляем top-K, где K равно числу GT-инстансов в PKL
+        Возвращает список инстансов в формате mmdet3d (bbox_3d, bbox_label_3d).
+        """
+        scene_id = self._normalize_scene_id(scene_name)
+        gt_scene = self.load_pkl_scene_by_id(scene_name)
+        gt_count = len(gt_scene.get('instances', [])) if gt_scene else 0
+        if gt_count <= 0:
+            return []
+        points_path = f'{self.bins_path}/{scene_id}.bin'
+        points = torch.from_numpy(np.fromfile(points_path, dtype=np.float32).reshape((-1, 6)))
+        # Применяем axis_align_matrix к точкам GT: points @ R^T + t
+        if gt_scene is not None and 'axis_align_matrix' in gt_scene:
+            a = torch.as_tensor(np.array(gt_scene['axis_align_matrix'], dtype=np.float32))
+            print(a)
+            R = a[:3, :3]
+            t = a[:3, 3]
+            xyz = points[:, :3]
+            points[:, :3] = xyz @ R.T + t
+        cls_path = f'{self.path}/{scene_id}.npz'
+        cls_data = np.load(cls_path, allow_pickle=True)
+        pred_masks = torch.from_numpy(cls_data['pred_masks']).T
+        pred_classes = cls_data['pred_classes']
+        pred_scores = cls_data['pred_score']
+        if len(pred_scores) == 0:
+            return []
+        topk = int(min(gt_count, len(pred_scores)))
+        np_topk_indices = np.argsort(-pred_scores)[:topk]
+        # вычисляем боксы только для выбранных масок
+        torch_topk_indices = torch.as_tensor(np_topk_indices, dtype=torch.long)
+        selected_masks = pred_masks[torch_topk_indices]
+        boxes = self.get_bboxes_by_masks(selected_masks, points)
+        selected_classes = pred_classes[np_topk_indices]
+        instances = []
+        for box, pred_class in zip(boxes, selected_classes):
+            write_class = 0 if class_agnostic else self.INV_SCANNET_IDS[pred_class]
+            instances.append({'bbox_3d': box.numpy().tolist(), 'bbox_label_3d': write_class})
+        return instances
+    def make_pkl(self, percentiles, pkl_path, class_agnostic=True):
+        score_bounds = {}
+        for classes, percentile in percentiles:
+            score_bound = self.get_class_lowerbound(classes, percentile)
+            if classes == 'all':
+                classes = self.sorted_names
+            if isinstance(classes, list):
+                for class_ in classes:
+                    score_bounds[self.LABEL2ID[class_]] = score_bound
+            else:
+                score_bounds[self.LABEL2ID[classes]] = score_bound
+        print(score_bounds)
+        new_data = {}
+        with open(self.gt_pkl_path, 'rb') as file:
+            data = pickle.load(file)
+        new_data['metainfo'] = data['metainfo']
+        data_list = []
+        picked_scenes = set(map(lambda x: x[:-4], os.listdir(self.path)))
+        for scene in tqdm(data['data_list']):
+            scene_name = scene['lidar_points']['lidar_path'][:-4]
+            if scene_name not in picked_scenes:
+                continue
+            tmp_scene = deepcopy(scene)
+            instances = self.get_scene_instances(scene_name, score_bounds, class_agnostic)
+            tmp_scene['instances'] = instances
+            data_list.append(tmp_scene)
+        new_data['data_list'] = data_list
+        with open(pkl_path, 'wb') as file:
+            pickle.dump(new_data, file)
+    @property
+    def scores(self):
+        return self.class_scores
+if __name__ == "__main__":
+    pred_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/MaskClustering/data/prediction/scannetpp_dust3r_posed"
+    bins_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/scannetpp/bins/points_dust3r_posed"
+    out_pkl_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/scannetpp/bins/scannetpp84_dust3r_posed_train10.pkl"
+    gt_pkl_path = \
+        "/home/jovyan/users/lemeshko/TMP/my_pkls/scannetpp_infos_84class_train.pkl"
+    distr = PredBBoxDistrPP(pred_path, bins_path, gt_pkl_path)
+    with open(gt_pkl_path, 'rb') as file:
+        gt_data = pickle.load(file)
+    new_data = {"metainfo": gt_data["metainfo"]}
+    data_list = []
+    picked_scenes = set(map(lambda x: x[:-4], os.listdir(distr.path)))
+    for scene in tqdm(gt_data['data_list']):
+        scene_name = distr._normalize_scene_id(scene['lidar_points']['lidar_path'])
+        if scene_name not in picked_scenes:
+            continue
+        tmp_scene = deepcopy(scene)
+        instances = distr.filter_instances_topk_by_gt(scene_name, class_agnostic=False)
+        tmp_scene['instances'] = instances
+        data_list.append(tmp_scene)
+    new_data['data_list'] = data_list
+    with open(out_pkl_path, 'wb') as f:
+        pickle.dump(new_data, f)

MaskClustering/make_pkl_arkit.py ADDED Viewed

	@@ -0,0 +1,349 @@

+import numpy as np
+import os
+import pickle
+from tqdm.auto import tqdm
+from collections import defaultdict
+import matplotlib.pyplot as plt
+import seaborn as sns
+from copy import deepcopy
+import torch
+from tqdm.contrib.concurrent import thread_map
+class PredBBoxDistrPP:
+    @staticmethod
+    def _normalize_scene_id(value):
+        return value.split("_")[0]
+    def __init__(self, path, bins_path, gt_pkl_path, confidence_threshold=0.0, topk=True):
+        self.path = path
+        self.bins_path = bins_path
+        self.gt_pkl_path = gt_pkl_path
+        #self.gt_sample_counts = {}
+        self.class_scores = defaultdict(list)
+        self.confidence_threshold = confidence_threshold
+        self.topk = topk
+    def load_pkl_scene_by_id(self, scene_id):
+        """
+        Вернуть описание сцены из PKL по scene_id (без расширения).
+        Поддерживает как id вида "sceneXXXX_YY", так и пути/имена с .bin.
+        """
+        target_id = self._normalize_scene_id(scene_id)
+        with open(self.gt_pkl_path, 'rb') as file:
+            data = pickle.load(file)
+        for scene in data.get('data_list', []):
+            lidar_path = scene.get('lidar_points', {}).get('lidar_path')
+            if not lidar_path:
+                continue
+            candidate_id = self._normalize_scene_id(lidar_path)
+            if candidate_id == target_id:
+                return scene
+        return None
+    def get_scenes(self):
+        self.scene_ids = []
+        self.gt_sample_counts = defaultdict(int)
+        with open(self.gt_pkl_path, 'rb') as file:
+            data = pickle.load(file)
+        picked_scenes = set(map(lambda x: x[:-4], os.listdir(self.path)))
+        for scene in data['data_list']:
+            scene_name = scene['lidar_points']['lidar_path'][:-4]
+            if scene_name not in picked_scenes:
+                continue
+            self.scene_ids.append(scene_name)
+            for instance in scene['instances']:
+                inst_id = instance['bbox_label_3d']
+                self.gt_sample_counts[0] += 1
+    def get_scene_inst(self, scene_id):
+        cls_path = f'{self.path}/{scene_id}.npz'
+        cls_data = np.load(cls_path, allow_pickle=True)
+        for class_id, class_score in zip(cls_data['pred_classes'], cls_data['pred_score']):
+            self.class_scores[0].append(class_score)
+    def plot_class_distr(self, class_name='all'):
+        """
+        Построить распределение оценок для конкретного класса или всех классов вместе
+        Parameters:
+        class_name: str or list - название класса, 'all' для всех классов,
+                   или список названий классов
+        """
+        if class_name == 'all':
+            # Собираем все оценки из всех классов
+            all_scores = []
+            for scores in self.class_scores.values():
+                all_scores.extend(scores)
+            scores = all_scores
+            display_name = 'All Classes'
+        elif isinstance(class_name, list):
+            # Собираем оценки из указанных классов
+            selected_scores = []
+            for cls in class_name:
+                if cls in self.class_scores:
+                    selected_scores.extend(self.class_scores[cls])
+                else:
+                    print(f"Warning: Class '{cls}' not found in class_scores")
+            scores = selected_scores
+            display_name = f'Classes: {", ".join(class_name[:3])}{"..." if len(class_name) > 3 else ""}'
+        else:
+            # Один конкретный класс
+            if class_name not in self.class_scores:
+                print(f"Class '{class_name}' not found in class_scores")
+                print(f"Available classes: {list(self.class_scores.keys())[:10]}...")
+                return
+            scores = self.class_scores[class_name]
+            display_name = class_name
+        if not scores:
+            print(f"No scores available for: {display_name}")
+            return
+        # Создаем фигуру
+        fig, ax = plt.subplots(figsize=(12, 8))
+        # Гистограмма с KDE (seaborn) с нормализованной осью Y
+        sns.histplot(scores, bins=30, kde=True, ax=ax, color='skyblue',
+                    stat='density', alpha=0.7)
+        ax.set_title(f'Distribution of scores for {display_name}', fontsize=14, fontweight='bold')
+        ax.set_xlabel('Score', fontsize=12)
+        ax.set_ylabel('Density', fontsize=12)
+        ax.grid(True, alpha=0.3)
+        # Добавляем вертикальную линию для среднего значения
+#         mean_score = np.mean(scores)
+#         ax.axvline(mean_score, color='red', linestyle='--', linewidth=2,
+#                   label=f'Mean: {mean_score:.3f}')
+        # Добавляем вертикальную линию для медианы
+        median_score = np.median(scores)
+        ax.axvline(median_score, color='green', linestyle='--', linewidth=2,
+                  label=f'Median: {median_score:.3f}')
+        ax.axvline(np.percentile(scores, 32.45), color='red', linestyle='-', linewidth=2,
+                  label=f'Size bound: {np.percentile(scores, 32.45):.3f}')
+        # Добавляем легенду
+        ax.legend()
+        # Добавляем статистику в текстовом блоке
+        if class_name == 'all':
+            class_info = f"Total classes: {len(self.class_scores)}"
+        elif isinstance(class_name, list):
+            class_info = f"Selected classes: {len(class_name)}"
+        else:
+            class_info = f"Class: {class_name}"
+        stats_text = f"""Statistics for {display_name}:
+        {class_info}
+        Total instances: {len(scores):,}
+        Mean: {np.mean(scores):.3f}
+        Median: {np.median(scores):.3f}
+        Std: {np.std(scores):.3f}
+        Min: {np.min(scores):.3f}
+        Max: {np.max(scores):.3f}
+        Q1: {np.percentile(scores, 25):.3f}
+        Q : {np.percentile(scores, 32.45):.3f}
+        Q3: {np.percentile(scores, 75):.3f}"""
+        # Размещаем текстовый блок в удобном месте
+        props = dict(boxstyle="round,pad=0.5", facecolor="lightgray", alpha=0.8)
+        ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontfamily='monospace',
+                verticalalignment='top', bbox=props, fontsize=10)
+        plt.tight_layout()
+        plt.show()
+        # Также выводим статистику в консоль
+        print(stats_text)
+        return scores  # Возвращаем массив оценок для дальнейшего анализа
+    # Дополнительный метод для сравнения нескольких классов
+    def plot_multiple_classes(self, class_names: list):
+        """
+        Сравнить распределения нескольких классов на одном графике
+        """
+        fig, ax = plt.subplots(figsize=(12, 8))
+        colors = ['skyblue', 'lightcoral', 'lightgreen', 'gold', 'lightpink']
+        for i, cls in enumerate(class_names):
+            if cls not in self.class_scores:
+                print(f"Warning: Class '{cls}' not found, skipping")
+                continue
+            scores = self.class_scores[cls]
+            if scores:
+                sns.kdeplot(scores, ax=ax, label=cls, color=colors[i % len(colors)],
+                           linewidth=2, alpha=0.8)
+        ax.set_title('Score Distribution Comparison', fontsize=14, fontweight='bold')
+        ax.set_xlabel('Score', fontsize=12)
+        ax.set_ylabel('Density', fontsize=12)
+        ax.grid(True, alpha=0.3)
+        ax.legend()
+        plt.tight_layout()
+        plt.show()
+    def get_class_lowerbound(self, class_name='all', percentile=32.45):
+        if class_name == 'all':
+            # Собираем все оценки из всех классов
+            all_scores = []
+            for scores in self.class_scores.values():
+                all_scores.extend(scores)
+            scores = all_scores
+        elif isinstance(class_name, list):
+            selected_scores = []
+            for cls in class_name:
+                if cls in self.class_scores:
+                    selected_scores.extend(self.class_scores[cls])
+                else:
+                    print(f"Warning: Class '{cls}' not found in class_scores")
+            scores = selected_scores
+        else:
+            # Один конкретный класс
+            if class_name not in self.class_scores:
+                print(f"Class '{class_name}' not found in class_scores")
+                print(f"Available classes: {list(self.class_scores.keys())[:10]}...")
+                return
+            scores = self.class_scores[class_name]
+        return np.percentile(scores, percentile)
+    def get_bboxes_by_masks(self, masks, points):
+        boxes = []
+        for mask in masks:
+            object_points = points[mask][:, :3]
+            # xyz_min = object_points.min(dim=0).values
+            # xyz_max = object_points.max(dim=0).values
+            xyz_min = object_points.quantile(0.01, dim=0)
+            xyz_max = object_points.quantile(0.99, dim=0)
+            center = (xyz_max + xyz_min) / 2
+            size = xyz_max - xyz_min
+            box = torch.cat((center, size, torch.zeros_like(center)[:1]))
+            boxes.append(box)
+        assert len(boxes) != 0, "Why 0 masks in scene?"
+        boxes = torch.stack(boxes)
+        return boxes
+    def get_scene_instances(self, scene_name, score_bounds, class_agnostic):
+        instances = []
+        points_path = f'{self.bins_path}/{scene_name}.bin'
+        points = torch.from_numpy(np.fromfile(points_path, dtype=np.float32).reshape((-1, 6)))
+        # Применяем axis_align_matrix из GT к точкам
+        gt_scene = self.load_pkl_scene_by_id(scene_name)
+        if gt_scene is not None and 'axis_align_matrix' in gt_scene:
+            a = torch.as_tensor(np.array(gt_scene['axis_align_matrix'], dtype=np.float32))
+            R = a[:3, :3]
+            t = a[:3, 3]
+            xyz = points[:, :3]
+            points[:, :3] = xyz @ R.T + t
+        cls_path = f'{self.path}/{scene_name}.npz'
+        cls_data = np.load(cls_path, allow_pickle=True)
+        pred_masks = torch.from_numpy(cls_data['pred_masks']).T
+        pred_classes = cls_data['pred_classes']
+        pred_scores = cls_data['pred_score']
+        boxes = self.get_bboxes_by_masks(pred_masks, points)
+        for box, pred_class, pred_score in zip(boxes, pred_classes, pred_scores):
+            if pred_score > score_bounds.get(pred_class, 0):
+                write_class = 0
+                instances.append({'bbox_3d': box.numpy().tolist(), 'bbox_label_3d': write_class})
+        return instances
+    def filter_instances_topk_by_gt(self, scene_name, class_agnostic=True):
+        """
+        Фильтрует предсказанные инстансы по top-K, где K = количество GT-инстансов.
+        Шаги:
+          1) Берем все маски, переводим в 3D bbox-ы
+          2) Сортируем по убыванию предикт-скор
+          3) Оставляем top-K, где K равно числу GT-инстансов в PKL
+        Возвращает список инстансов в формате mmdet3d (bbox_3d, bbox_label_3d).
+        """
+        scene_id = self._normalize_scene_id(scene_name)
+        gt_scene = self.load_pkl_scene_by_id(scene_name)
+        gt_count = len(gt_scene.get('instances', [])) if gt_scene else 0
+        if gt_count <= 0:
+            return []
+        points_path = f'{self.bins_path}/{scene_id}_point.bin'
+        points = torch.from_numpy(np.fromfile(points_path, dtype=np.float32).reshape((-1, 6)))
+        # Применяем axis_align_matrix к точкам GT: points @ R^T + t
+        if gt_scene is not None and 'axis_align_matrix' in gt_scene:
+            a = torch.as_tensor(np.array(gt_scene['axis_align_matrix'], dtype=np.float32))
+            print(a)
+            R = a[:3, :3]
+            t = a[:3, 3]
+            xyz = points[:, :3]
+            points[:, :3] = xyz @ R.T + t
+        cls_path = f'{self.path}/{scene_id}.npz'
+        cls_data = np.load(cls_path, allow_pickle=True)
+        pred_masks = torch.from_numpy(cls_data['pred_masks']).T
+        pred_classes = cls_data['pred_classes']
+        pred_scores = cls_data['pred_score']
+        mask = pred_scores >= self.confidence_threshold
+        pred_masks = torch.from_numpy(cls_data['pred_masks']).T
+        pred_classes = cls_data['pred_classes']
+        pred_scores = cls_data['pred_score']
+        if len(pred_scores) == 0:
+            return []
+        if self.topk:
+            topk = int(min(gt_count, len(pred_scores)))
+        else:
+            topk = len(pred_scores)
+        np_topk_indices = np.argsort(-pred_scores)[:topk]
+        # вычисляем боксы только для выбранных масок
+        torch_topk_indices = torch.as_tensor(np_topk_indices, dtype=torch.long)
+        selected_masks = pred_masks[torch_topk_indices]
+        boxes = self.get_bboxes_by_masks(selected_masks, points)
+        selected_classes = pred_classes[np_topk_indices]
+        instances = []
+        for box, pred_class in zip(boxes, selected_classes):
+            write_class = 0
+            instances.append({'bbox_3d': box.numpy().tolist(), 'bbox_label_3d': write_class})
+        return instances
+    @property
+    def scores(self):
+        return self.class_scores
+if __name__ == "__main__":
+    pred_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/MaskClustering/data/prediction/arkit_vggt"
+    bins_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/arkitscenes/points_vggt"
+    out_pkl_path = \
+        "arkit_vggt_ca_ct05_topk_false.pkl"
+    gt_pkl_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/arkitscenes/arkitscenes_offline_infos_train.pkl"
+    confidence_threshold = 0.5
+    distr = PredBBoxDistrPP(pred_path, bins_path, gt_pkl_path, confidence_threshold=confidence_threshold, topk=False)
+    with open(gt_pkl_path, 'rb') as file:
+        gt_data = pickle.load(file)
+    new_data = {"metainfo": gt_data["metainfo"]}
+    data_list = []
+    picked_scenes = set(map(lambda x: x.split("_")[0], os.listdir(bins_path)))
+    scene_names = [distr._normalize_scene_id(scene['lidar_points']['lidar_path']) for scene in gt_data['data_list']]
+    indices = [i for i, scene_name in enumerate(scene_names) if scene_name in picked_scenes]
+    data = [scene_name for scene_name in scene_names if scene_name in picked_scenes]
+    instances = thread_map(distr.filter_instances_topk_by_gt, data, chunksize=128)
+    for i, instance in enumerate(instances):
+        tmp_scene = deepcopy(gt_data['data_list'][indices[i]])
+        tmp_scene['instances'] = instance
+        data_list.append(tmp_scene)
+    new_data['data_list'] = data_list
+    with open(out_pkl_path, 'wb') as f:
+        pickle.dump(new_data, f)

MaskClustering/make_pkl_conf.py ADDED Viewed

	@@ -0,0 +1,295 @@

+import numpy as np
+import os
+import pickle
+from tqdm.auto import tqdm
+from collections import defaultdict
+import matplotlib.pyplot as plt
+import seaborn as sns
+import torch
+class PredBBoxDistrPP:
+    SCANNET_IDS = [4, 3, 6, 5, 9, 7, 8, 10, 12, 11, 14, 13, 23, 17, 18, 24, 25, 27, 28, 47, 88, 35, 36, 42, 45, 58, 49, 54, 56, 59, 60, 63, 67, 68, 102, 71, 72, 74, 81, 83, 90, 96, 122, 416, 106, 111, 117, 126, 129, 132, 155, 166, 173, 188, 300, 199, 204, 214, 219, 253, 299, 265, 273, 352, 295, 296, 301, 305, 312, 342, 358, 364, 368, 387, 395, 396, 403, 405, 414, 443, 469, 515, 744, 1157]
+    SCANNET_LABELS = ['table', 'door', 'ceiling lamp', 'cabinet', 'blinds', 'curtain', 'chair', 'storage cabinet', 'office chair', 'bookshelf', 'whiteboard', 'window', 'box',
+                     'monitor', 'shelf', 'heater', 'kitchen cabinet', 'sofa', 'bed', 'trash can', 'book', 'plant', 'blanket', 'tv', 'computer tower', 'refrigerator', 'jacket',
+                     'sink', 'bag', 'picture', 'pillow', 'towel', 'suitcase', 'backpack', 'crate', 'keyboard', 'rack', 'toilet', 'printer', 'poster', 'painting', 'microwave', 'shoes',
+                     'socket', 'bottle', 'bucket', 'cushion', 'basket', 'shoe rack', 'telephone', 'file folder', 'laptop', 'plant pot', 'exhaust fan', 'cup', 'coat hanger', 'light switch',
+                     'speaker', 'table lamp', 'kettle', 'smoke detector', 'container', 'power strip', 'slippers', 'paper bag', 'mouse', 'cutting board', 'toilet paper', 'paper towel',
+                     'pot', 'clock', 'pan', 'tap', 'jar', 'soap dispenser', 'binder', 'bowl', 'tissue box', 'whiteboard eraser', 'toilet brush', 'spray bottle', 'headphones', 'stapler', 'marker']
+    ID2LABEL = dict(zip(SCANNET_IDS, SCANNET_LABELS))
+    LABEL2ID = dict(zip(SCANNET_LABELS, SCANNET_IDS))
+    INV_SCANNET_IDS = {idx: i for i, idx in enumerate(SCANNET_IDS)}
+    @staticmethod
+    def _normalize_scene_id(value):
+        base = os.path.basename(value)
+        if base.endswith('.bin'):
+            base = base[:-4]
+        else:
+            base = os.path.splitext(base)[0]
+        return base
+    def __init__(self, path, bins_path):
+        self.path = path
+        self.bins_path = bins_path
+        self.get_scenes()
+        self.class_scores = defaultdict(list)
+        for scene_id in self.scene_ids:
+            self.get_scene_inst(scene_id)
+        # сортировка по убыванию числа предсказаний на класс
+        self.sorted_names = sorted(self.SCANNET_LABELS, key=lambda x: -len(self.class_scores.get(x, [])))
+    # GT-PKL больше не используется
+    def get_scenes(self):
+        self.scene_ids = []
+        if not os.path.isdir(self.path):
+            return
+        pred_files = [f for f in os.listdir(self.path) if f.endswith('.npz')]
+        for fname in pred_files:
+            scene_name = os.path.splitext(fname)[0]
+            bin_path = os.path.join(self.bins_path, f"{scene_name}.bin")
+            if os.path.exists(bin_path):
+                self.scene_ids.append(scene_name)
+    def get_scene_inst(self, scene_id):
+        cls_path = f'{self.path}/{scene_id}.npz'
+        cls_data = np.load(cls_path, allow_pickle=True)
+        for class_id, class_score in zip(cls_data['pred_classes'], cls_data['pred_score']):
+            self.class_scores[self.ID2LABEL[class_id]].append(class_score)
+    def plot_class_distr(self, class_name='all'):
+        """
+        Построить распределение оценок для конкретного класса или всех классов вместе
+        Parameters:
+        class_name: str or list - название класса, 'all' для всех классов,
+                   или список названий классов
+        """
+        if class_name == 'all':
+            # Собираем все оценки из всех классов
+            all_scores = []
+            for scores in self.class_scores.values():
+                all_scores.extend(scores)
+            scores = all_scores
+            display_name = 'All Classes'
+        elif isinstance(class_name, list):
+            # Собираем оценки из указанных классов
+            selected_scores = []
+            for cls in class_name:
+                if cls in self.class_scores:
+                    selected_scores.extend(self.class_scores[cls])
+                else:
+                    print(f"Warning: Class '{cls}' not found in class_scores")
+            scores = selected_scores
+            display_name = f'Classes: {", ".join(class_name[:3])}{"..." if len(class_name) > 3 else ""}'
+        else:
+            # Один конкретный класс
+            if class_name not in self.class_scores:
+                print(f"Class '{class_name}' not found in class_scores")
+                print(f"Available classes: {list(self.class_scores.keys())[:10]}...")
+                return
+            scores = self.class_scores[class_name]
+            display_name = class_name
+        if not scores:
+            print(f"No scores available for: {display_name}")
+            return
+        # Создаем фигуру
+        fig, ax = plt.subplots(figsize=(12, 8))
+        # Гистограмма с KDE (seaborn) с нормализованной осью Y
+        sns.histplot(scores, bins=30, kde=True, ax=ax, color='skyblue',
+                    stat='density', alpha=0.7)
+        ax.set_title(f'Distribution of scores for {display_name}', fontsize=14, fontweight='bold')
+        ax.set_xlabel('Score', fontsize=12)
+        ax.set_ylabel('Density', fontsize=12)
+        ax.grid(True, alpha=0.3)
+        # Добавляем вертикальную линию для среднего значения
+#         mean_score = np.mean(scores)
+#         ax.axvline(mean_score, color='red', linestyle='--', linewidth=2,
+#                   label=f'Mean: {mean_score:.3f}')
+        # Добавляем вертикальную линию для медианы
+        median_score = np.median(scores)
+        ax.axvline(median_score, color='green', linestyle='--', linewidth=2,
+                  label=f'Median: {median_score:.3f}')
+        ax.axvline(np.percentile(scores, 32.45), color='red', linestyle='-', linewidth=2,
+                  label=f'Size bound: {np.percentile(scores, 32.45):.3f}')
+        # Добавляем легенду
+        ax.legend()
+        # Добавляем статистику в текстовом блоке
+        if class_name == 'all':
+            class_info = f"Total classes: {len(self.class_scores)}"
+        elif isinstance(class_name, list):
+            class_info = f"Selected classes: {len(class_name)}"
+        else:
+            class_info = f"Class: {class_name}"
+        stats_text = f"""Statistics for {display_name}:
+        {class_info}
+        Total instances: {len(scores):,}
+        Mean: {np.mean(scores):.3f}
+        Median: {np.median(scores):.3f}
+        Std: {np.std(scores):.3f}
+        Min: {np.min(scores):.3f}
+        Max: {np.max(scores):.3f}
+        Q1: {np.percentile(scores, 25):.3f}
+        Q : {np.percentile(scores, 32.45):.3f}
+        Q3: {np.percentile(scores, 75):.3f}"""
+        # Размещаем текстовый блок в удобном месте
+        props = dict(boxstyle="round,pad=0.5", facecolor="lightgray", alpha=0.8)
+        ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontfamily='monospace',
+                verticalalignment='top', bbox=props, fontsize=10)
+        plt.tight_layout()
+        plt.show()
+        # Также выводим статистику в консоль
+        print(stats_text)
+        return scores  # Возвращаем массив оценок для дальнейшего анализа
+    # Дополнительный метод для сравнения нескольких классов
+    def plot_multiple_classes(self, class_names: list):
+        """
+        Сравнить распределения нескольких классов на одном графике
+        """
+        fig, ax = plt.subplots(figsize=(12, 8))
+        colors = ['skyblue', 'lightcoral', 'lightgreen', 'gold', 'lightpink']
+        for i, cls in enumerate(class_names):
+            if cls not in self.class_scores:
+                print(f"Warning: Class '{cls}' not found, skipping")
+                continue
+            scores = self.class_scores[cls]
+            if scores:
+                sns.kdeplot(scores, ax=ax, label=cls, color=colors[i % len(colors)],
+                           linewidth=2, alpha=0.8)
+        ax.set_title('Score Distribution Comparison', fontsize=14, fontweight='bold')
+        ax.set_xlabel('Score', fontsize=12)
+        ax.set_ylabel('Density', fontsize=12)
+        ax.grid(True, alpha=0.3)
+        ax.legend()
+        plt.tight_layout()
+        plt.show()
+    def get_class_lowerbound(self, class_name='all', percentile=32.45):
+        if class_name == 'all':
+            # Собираем все оценки из всех классов
+            all_scores = []
+            for scores in self.class_scores.values():
+                all_scores.extend(scores)
+            scores = all_scores
+        elif isinstance(class_name, list):
+            selected_scores = []
+            for cls in class_name:
+                if cls in self.class_scores:
+                    selected_scores.extend(self.class_scores[cls])
+                else:
+                    print(f"Warning: Class '{cls}' not found in class_scores")
+            scores = selected_scores
+        else:
+            # Один конкретный класс
+            if class_name not in self.class_scores:
+                print(f"Class '{class_name}' not found in class_scores")
+                print(f"Available classes: {list(self.class_scores.keys())[:10]}...")
+                return
+            scores = self.class_scores[class_name]
+        return np.percentile(scores, percentile)
+    def get_bboxes_by_masks(self, masks, points):
+        boxes = []
+        for mask in masks:
+            object_points = points[mask][:, :3]
+            # xyz_min = object_points.min(dim=0).values
+            # xyz_max = object_points.max(dim=0).values
+            xyz_min = object_points.quantile(0.01, dim=0)
+            xyz_max = object_points.quantile(0.99, dim=0)
+            center = (xyz_max + xyz_min) / 2
+            size = xyz_max - xyz_min
+            box = torch.cat((center, size))
+            boxes.append(box)
+        assert len(boxes) != 0, "Why 0 masks in scene?"
+        boxes = torch.stack(boxes)
+        return boxes
+    def filter_instances_by_confidence(self, scene_name, threshold=0.5, class_agnostic=False):
+        instances = []
+        points_path = f'{self.bins_path}/{scene_name}.bin'
+        if not os.path.exists(points_path):
+            return instances
+        points = torch.from_numpy(np.fromfile(points_path, dtype=np.float32).reshape((-1, 6)))
+        cls_path = f'{self.path}/{scene_name}.npz'
+        if not os.path.exists(cls_path):
+            return instances
+        cls_data = np.load(cls_path, allow_pickle=True)
+        pred_masks = torch.from_numpy(cls_data['pred_masks']).T
+        pred_classes = cls_data['pred_classes']
+        pred_scores = cls_data['pred_score']
+        if len(pred_scores) == 0:
+            return instances
+        np_indices = np.where(pred_scores >= threshold)[0]
+        if np_indices.size == 0:
+            return instances
+        torch_indices = torch.as_tensor(np_indices, dtype=torch.long)
+        selected_masks = pred_masks[torch_indices]
+        boxes = self.get_bboxes_by_masks(selected_masks, points)
+        selected_classes = pred_classes[np_indices]
+        for box, pred_class in zip(boxes, selected_classes):
+            write_class = 0 if class_agnostic else self.INV_SCANNET_IDS[pred_class]
+            instances.append({'bbox_3d': box.numpy().tolist(), 'bbox_label_3d': write_class})
+        return instances
+    # Фильтрация по GT удалена; используйте filter_instances_by_confidence
+    def build_pkl_by_confidence(self, pkl_path, threshold=0.5, class_agnostic=False):
+        new_data: dict = {"metainfo": {'categories': {'table': 0, 'door': 1, 'ceiling lamp': 2, 'cabinet': 3, 'blinds': 4, 'curtain': 5, 'chair': 6, 'storage cabinet': 7, 'office chair': 8, 'bookshelf': 9, 'whiteboard': 10, 'window': 11, 'box': 12, 'monitor': 13, 'shelf': 14, 'heater': 15, 'kitchen cabinet': 16, 'sofa': 17, 'bed': 18, 'trash can': 19, 'book': 20, 'plant': 21, 'blanket': 22, 'tv': 23, 'computer tower': 24, 'refrigerator': 25, 'jacket': 26, 'sink': 27, 'bag': 28, 'picture': 29, 'pillow': 30, 'towel': 31, 'suitcase': 32, 'backpack': 33, 'crate': 34, 'keyboard': 35, 'rack': 36, 'toilet': 37, 'printer': 38, 'poster': 39, 'painting': 40, 'microwave': 41, 'shoes': 42, 'socket': 43, 'bottle': 44, 'bucket': 45, 'cushion': 46, 'basket': 47, 'shoe rack': 48, 'telephone': 49, 'file folder': 50, 'laptop': 51, 'plant pot': 52, 'exhaust fan': 53, 'cup': 54, 'coat hanger': 55, 'light switch': 56, 'speaker': 57, 'table lamp': 58, 'kettle': 59, 'smoke detector': 60, 'container': 61, 'power strip': 62, 'slippers': 63, 'paper bag': 64, 'mouse': 65, 'cutting board': 66, 'toilet paper': 67, 'paper towel': 68, 'pot': 69, 'clock': 70, 'pan': 71, 'tap': 72, 'jar': 73, 'soap dispenser': 74, 'binder': 75, 'bowl': 76, 'tissue box': 77, 'whiteboard eraser': 78, 'toilet brush': 79, 'spray bottle': 80, 'headphones': 81, 'stapler': 82, 'marker': 83}, 'dataset': 'scannetpp', 'info_version': '1.0'}}
+        data_list = []
+        for scene_name in tqdm(self.scene_ids):
+            instances = self.filter_instances_by_confidence(scene_name, threshold=threshold, class_agnostic=class_agnostic)
+            scene_entry = {
+                'lidar_points': {
+                    'num_pts_feats': 6,
+                    'lidar_path': f'{scene_name}.bin',
+                },
+                'instances': instances,
+                'pts_semantic_mask_path': f'{scene_name}.bin',
+                'pts_instance_mask_path': f'{scene_name}.bin',
+                'axis_align_matrix': np.eye(4, dtype=np.float32),
+            }
+            data_list.append(scene_entry)
+        new_data['data_list'] = data_list
+        with open(pkl_path, 'wb') as file:
+            pickle.dump(new_data, file)
+    @property
+    def scores(self):
+        return self.class_scores
+if __name__ == "__main__":
+    pred_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/MaskClustering/data/prediction/scannetpp_v2_dust3r_unposed"
+    bins_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/scannetpp/bins/points_dust3r_v2_unposed"
+    out_pkl_path = \
+        "/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/scannetpp/bins/scannetpp84_v2_dust3r_unposed_train.pkl"
+    threshold = 0.5
+    distr = PredBBoxDistrPP(pred_path, bins_path)
+    distr.build_pkl_by_confidence(out_pkl_path, threshold=threshold, class_agnostic=False)

MaskClustering/mask_predict.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# Modified by Bowen Cheng from: https://github.com/facebookresearch/detectron2/blob/master/demo/demo.py
+import argparse
+import glob
+import multiprocessing as mp
+import os
+import cv2
+import sys
+sys.path.insert(1, os.path.join(sys.path[0], '..'))
+import warnings
+import numpy as np
+from tqdm import tqdm
+import torch
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.projects.deeplab import add_deeplab_config
+from mask2former import add_maskformer2_config
+from predictor import VisualizationDemo
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+def setup_cfg(args):
+    # load config from file and command-line arguments
+    cfg = get_cfg()
+    add_deeplab_config(cfg)
+    add_maskformer2_config(cfg)
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    return cfg
+def get_parser():
+    parser = argparse.ArgumentParser(description="maskformer2 demo for builtin configs")
+    parser.add_argument(
+        "--config-file",
+        default="configs/coco/panoptic-segmentation/maskformer2_R50_bs16_50ep.yaml",
+        metavar="FILE",
+        help="path to config file",
+    )
+    parser.add_argument(
+        "--seq_name_list",
+        type=str
+    )
+    parser.add_argument(
+        "--root",
+        type=str
+    )
+    parser.add_argument(
+        "--image_path_pattern",
+        type=str
+    )
+    parser.add_argument(
+        "--dataset",
+        type=str
+    )
+    parser.add_argument(
+        "--confidence-threshold",
+        type=float,
+        default=0.5,
+        help="Minimum score for instance predictions to be shown",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options using the command-line 'KEY VALUE' pairs",
+        default=[],
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+if __name__ == "__main__":
+    mp.set_start_method("spawn", force=True)
+    args = get_parser().parse_args()
+    cfg = setup_cfg(args)
+    demo = VisualizationDemo(cfg)
+    seq_name_list = args.seq_name_list.split('+')
+    for i, seq_name in tqdm(enumerate(seq_name_list), total=len(seq_name_list)):
+        seq_dir = os.path.join(args.root, seq_name)
+        image_list = sorted(glob.glob(os.path.join(seq_dir, args.image_path_pattern)))
+        output_dir = os.path.join(seq_dir, seq_name, 'output/mask') if args.dataset == 'matterport3d' else os.path.join(seq_dir, 'output/mask')
+        os.makedirs(output_dir, exist_ok=True)
+        for path in (image_list):
+            # use PIL, to be consistent with evaluation
+            img = read_image(path, format="BGR")
+            predictions = demo.run_on_image(img)
+            ##### color_mask
+            pred_masks = predictions["instances"].pred_masks
+            pred_scores = predictions["instances"].scores
+            # select by confidence threshold
+            selected_indexes = (pred_scores >= args.confidence_threshold)
+            selected_scores = pred_scores[selected_indexes]
+            selected_masks  = pred_masks[selected_indexes]
+            _, m_H, m_W = selected_masks.shape
+            mask_image = np.zeros((m_H, m_W), dtype=np.uint8)
+            # rank
+            mask_id = 1
+            selected_scores, ranks = torch.sort(selected_scores)
+            for index in ranks:
+                num_pixels = torch.sum(selected_masks[index])
+                if num_pixels < 400:
+                    # ignore small masks
+                    continue
+                mask_image[(selected_masks[index]==1).cpu().numpy()] = mask_id
+                mask_id += 1
+            cv2.imwrite(os.path.join(output_dir, os.path.basename(path).split('.')[0] + '.png'), mask_image)