Spaces:

JeffLiang
/

sam_3d

Runtime error

App Files Files Community

JeffLiang commited on Apr 20, 2023

Commit

fcdbf88

1 Parent(s): d66c7c7

init

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +88 -0
requirements.txt +9 -0
sam_3d.py +266 -0
sam_vit_b_01ec64.pth +3 -0
scannet_data/scene0000_00/color/0.jpg +0 -0
scannet_data/scene0000_00/color/100.jpg +0 -0
scannet_data/scene0000_00/color/1000.jpg +0 -0
scannet_data/scene0000_00/color/1020.jpg +0 -0
scannet_data/scene0000_00/color/1040.jpg +0 -0
scannet_data/scene0000_00/color/1060.jpg +0 -0
scannet_data/scene0000_00/color/1080.jpg +0 -0
scannet_data/scene0000_00/color/1100.jpg +0 -0
scannet_data/scene0000_00/color/1120.jpg +0 -0
scannet_data/scene0000_00/color/1140.jpg +0 -0
scannet_data/scene0000_00/color/1160.jpg +0 -0
scannet_data/scene0000_00/color/1180.jpg +0 -0
scannet_data/scene0000_00/color/120.jpg +0 -0
scannet_data/scene0000_00/color/1200.jpg +0 -0
scannet_data/scene0000_00/color/1220.jpg +0 -0
scannet_data/scene0000_00/color/1240.jpg +0 -0
scannet_data/scene0000_00/color/1260.jpg +0 -0
scannet_data/scene0000_00/color/1280.jpg +0 -0
scannet_data/scene0000_00/color/1300.jpg +0 -0
scannet_data/scene0000_00/color/1320.jpg +0 -0
scannet_data/scene0000_00/color/1340.jpg +0 -0
scannet_data/scene0000_00/color/1360.jpg +0 -0
scannet_data/scene0000_00/color/1380.jpg +0 -0
scannet_data/scene0000_00/color/140.jpg +0 -0
scannet_data/scene0000_00/color/1400.jpg +0 -0
scannet_data/scene0000_00/color/1420.jpg +0 -0
scannet_data/scene0000_00/color/1440.jpg +0 -0
scannet_data/scene0000_00/color/1460.jpg +0 -0
scannet_data/scene0000_00/color/1480.jpg +0 -0
scannet_data/scene0000_00/color/1500.jpg +0 -0
scannet_data/scene0000_00/color/1520.jpg +0 -0
scannet_data/scene0000_00/color/1540.jpg +0 -0
scannet_data/scene0000_00/color/1560.jpg +0 -0
scannet_data/scene0000_00/color/1580.jpg +0 -0
scannet_data/scene0000_00/color/160.jpg +0 -0
scannet_data/scene0000_00/color/1600.jpg +0 -0
scannet_data/scene0000_00/color/1620.jpg +0 -0
scannet_data/scene0000_00/color/1640.jpg +0 -0
scannet_data/scene0000_00/color/1660.jpg +0 -0
scannet_data/scene0000_00/color/1680.jpg +0 -0
scannet_data/scene0000_00/color/1700.jpg +0 -0
scannet_data/scene0000_00/color/1720.jpg +0 -0
scannet_data/scene0000_00/color/1740.jpg +0 -0
scannet_data/scene0000_00/color/1760.jpg +0 -0
scannet_data/scene0000_00/color/1780.jpg +0 -0
scannet_data/scene0000_00/color/180.jpg +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# Copyright (c) Meta Platforms, Inc. All Rights Reserved
+import os
+import ast
+import time
+import random
+from PIL import Image
+import torch
+import numpy as np
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+from plyfile import PlyData
+import gradio as gr
+import plotly.graph_objs as go
+from sam_3d import SAM3DDemo
+def pc_to_plot(pc):
+    return go.Figure(
+        data=[
+            go.Scatter3d(
+                x=pc['x'], y=pc['y'], z=pc['z'],
+                mode='markers',
+                marker=dict(
+                  size=2,
+                  color=['rgb({},{},{})'.format(r,g,b) for r,g,b in zip(pc['red'], pc['green'], pc['blue'])],
+              )
+            )
+        ],
+        layout=dict(
+            scene=dict(xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False))
+        ),
+    )
+def inference(scene_name, granularity, coords, plot):
+    print(scene_name, coords)
+    sam_3d = SAM3DDemo('vit_b', 'sam_vit_b_01ec64.pth', scene_name)
+    coords = ast.literal_eval(coords)
+    data_point_select, rgb_img_w_points, rgb_img_w_masks, data_final = sam_3d.run_with_coord(coords, int(granularity))
+    return pc_to_plot(data_point_select), Image.fromarray(rgb_img_w_points), Image.fromarray(rgb_img_w_masks), pc_to_plot(data_final)
+plydatas = []
+for scene_name in ['scene0000_00', 'scene0001_00', 'scene0002_00']:
+    plydata = PlyData.read(f"./scannet_data/{scene_name}/{scene_name}.ply")
+    data = plydata.elements[0].data
+    plydatas.append(data)
+examples = [['scene0000_00', 0, [0, -2.5, 0.7], pc_to_plot(plydatas[0])],
+            ['scene0001_00', 0, [0, -2.5, 1], pc_to_plot(plydatas[1])],
+            ['scene0002_00', 0, [0, -2.5, 1], pc_to_plot(plydatas[2])],]
+title = 'Segment_Anything on 3D in-door point clouds'
+description = """
+Gradio Demo for Segment Anything on 3D indoor scenes (ScanNet supported). \n
+The logic is straighforward: 1) Find a point in 3D; 2) project the 3D point to valid images; 3) perform 2D SAM on valid images; 4) reproject 2D results back to 3D; 5) Visualization.
+Unfortunatly, it does not support click the point cloud to generate coordinates automatically. You may want to write down the coordinates and put it manually. \n
+"""
+article = """
+<p style='text-align: center'>
+<a href='https://arxiv.org/abs/2210.04150' target='_blank'>
+Open-Vocabulary Semantic Segmentation with Mask-adapted CLIP
+</a>
+|
+<a href='https://github.com/facebookresearch/ov-seg' target='_blank'>Github Repo</a></p>
+"""
+gr.Interface(
+    inference,
+    inputs=[
+        gr.Dropdown(choices=['scene0000_00', 'scene0001_00', 'scene0002_00'], label="Scannet scene name (limited scenes supported)"),
+        gr.Dropdown(choices=[0, 1, 2], label="Mask granularity from 0 (most coarse) to 2 (most precise)"),
+        gr.Textbox(lines=1, label='Coordinates'),
+        gr.Plot(label="Input Point cloud (For visualization and point finding only, click responce not supported yet.)"),
+    ],
+    outputs=[gr.Plot(label='Selected point(s): red points show the top 10 cloest points for your input anchor point'),
+             gr.Image(label='Selected image with projected points'),
+             gr.Image(label='Selected image processed after SAM'),
+             gr.Plot(label='Output Point cloud: blue points represent the mask')],
+    title=title,
+    description=description,
+    article=article,
+    examples=examples).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio
+numpy
+plyfile
+plotly
+matplotlib
+opencv-python
+torch==1.10.1+cu113
+torchvision==0.11.2+cu113
+git+https://github.com/facebookresearch/segment-anything.git

sam_3d.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import os
+import copy
+import random
+from PIL import Image
+import torch
+import matplotlib.pyplot as plt
+import numpy as np
+from plyfile import PlyData
+from segment_anything import SamPredictor, sam_model_registry
+def get_image_ids(path):
+    files = os.listdir(path)
+    files = [f.split('.')[0] for f in files if os.path.isfile(path+'/'+f)] #Filtering only the files.
+    return sorted(files)
+def load_align_matrix_from_txt(path):
+    lines = open(path).readlines()
+    # test set data doesn't have align_matrix
+    axis_align_matrix = np.eye(4)
+    for line in lines:
+        if 'axisAlignment' in line:
+            axis_align_matrix = [
+                float(x)
+                for x in line.rstrip().strip('axisAlignment = ').split(' ')
+            ]
+            break
+    axis_align_matrix = np.array(axis_align_matrix).reshape((4, 4))
+    return axis_align_matrix
+def load_matrix_from_txt(path, shape=(4, 4)):
+    with open(path) as f:
+        txt = f.readlines()
+    txt = ''.join(txt).replace('\n', ' ')
+    matrix = [float(v) for v in txt.split()]
+    return np.array(matrix).reshape(shape)
+def load_image(path):
+    image = Image.open(path)
+    return np.array(image)
+def convert_from_uvd(u, v, d, intr, pose, align):
+    extr = np.linalg.inv(pose)
+    if d == 0:
+        return None, None, None
+    fx = intr[0, 0]
+    fy = intr[1, 1]
+    cx = intr[0, 2]
+    cy = intr[1, 2]
+    depth_scale = 1000
+    z = d / depth_scale
+    x = (u - cx) * z / fx
+    y = (v - cy) * z / fy
+    world = (align @ pose @ np.array([x, y, z, 1]))
+    return world[:3] / world[3]
+# Find the cloest point in the cloud with select
+def find_closest_point(point, point_cloud, num=1):
+    # calculate the Euclidean distances between the input vector and each row of the matrix
+    distances = np.linalg.norm(point_cloud - point, axis=1)
+    # find the index of the row with the minimum distance
+    closest_index = np.argsort(distances)[:num]
+    # get the closest vector from the matrix
+    closest_vector = point_cloud[closest_index]
+    return closest_index, closest_vector
+def plot_3d(xdata, ydata, zdata, color=None, b_min=2, b_max=8, view=(45, 45)):
+        fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, dpi=200)
+        ax.view_init(view[0], view[1])
+        ax.set_xlim(b_min, b_max)
+        ax.set_ylim(b_min, b_max)
+        ax.set_zlim(b_min, b_max)
+        ax.scatter3D(xdata, ydata, zdata, c=color, cmap='rgb', s=0.1)
+class SAM3DDemo(object):
+    def __init__(self, sam_model, sam_ckpt, scene_name):
+        sam = sam_model_registry[sam_model](checkpoint=sam_ckpt).cuda()
+        self.predictor = SamPredictor(sam)
+        self.scene_name = scene_name
+        scene_path = os.path.join('./scannet_data', scene_name)
+        self.color_path = os.path.join(scene_path, 'color')
+        self.depth_path = os.path.join(scene_path, 'depth')
+        self.pose_path = os.path.join(scene_path, 'pose')
+        self.intrinsic_path = os.path.join(scene_path, 'intrinsic')
+        self.align_matirx_path = f'{scene_path}/{scene_name}.txt'
+        self.img_ids = get_image_ids(self.color_path)
+        self.align_matrix = load_align_matrix_from_txt(self.align_matirx_path)
+        self.intrinsic_depth = load_matrix_from_txt(os.path.join(self.intrinsic_path, 'intrinsic_depth.txt'))
+        self.poses = [load_matrix_from_txt(os.path.join(self.pose_path, f'{i}.txt')) for i in self.img_ids]
+        self.rgb_images = [load_image(os.path.join(self.color_path, f'{i}.jpg')) for i in self.img_ids]
+        self.depth_images = [load_image(os.path.join(self.depth_path, f'{i}.png'))for i in self.img_ids]
+    def project_3D_to_images(self, select_points, valid_margin=20):
+        valid_img_ids = []
+        valid_points = {}
+        for img_i in range(len(self.img_ids)):
+            rgb_img = self.rgb_images[img_i]
+            depth_img = self.depth_images[img_i]
+            extrinsics = self.poses[img_i]
+            projection_matrix = self.intrinsic_depth @ np.linalg.inv(self.align_matrix @ extrinsics)
+            raw_points = np.vstack((select_points.T, np.ones((1, select_points.T.shape[1]))))
+            raw_points = np.dot(projection_matrix, raw_points)
+            # bounding simplest
+            points = raw_points[:2, :] / raw_points[2, :]
+            points = np.round(points).astype(np.int32)
+            valid = (points[0] >= valid_margin).all() & (points[1] >= valid_margin).all() \
+                & (points[0] < (rgb_img.shape[1] - valid_margin)).all() & (points[1] < (rgb_img.shape[0] - valid_margin)).all() \
+                & (raw_points[2, :] > 0).all()
+            if valid:
+                depth_margin = 0.4
+                gt_depths = depth_img[points[1], points[0]] / 1000
+                proj_depths = raw_points[2, :]
+                if (proj_depths[0] > (1 - depth_margin / 2.0) * gt_depths[0]) & (proj_depths[0] < (1 + depth_margin / 2.0) * gt_depths[0]):
+                    valid_img_ids.append(img_i)
+                    valid_points[img_i] = points
+        show_id = valid_img_ids[-1]
+        show_points = valid_points[show_id]
+        rgb_img = self.rgb_images[show_id]
+        fig, ax = plt.subplots()
+        ax.imshow(rgb_img)
+        for x, y  in zip(show_points[0], show_points[1]):
+            ax.plot(x, y, 'ro')
+        canvas = fig.canvas
+        canvas.draw()
+        w, h = canvas.get_width_height()
+        rgb_img_w_points = np.frombuffer(canvas.tostring_rgb(), dtype='uint8').reshape(h, w, 3)
+        print("projecting 3D point to images successfully...")
+        return valid_img_ids, valid_points, rgb_img_w_points
+    def process_img_w_sam(self, valid_img_ids, valid_points, granularity):
+        mask_colors = []
+        for img_i in range(len(self.img_ids)):
+            rgb_img = self.rgb_images[img_i]
+            msk_color = np.full(rgb_img.shape, 0.5)
+            if img_i in valid_img_ids:
+                self.predictor.set_image(rgb_img)
+                point_coor = valid_points[img_i].T[0][None]
+                masks, _, _ = self.predictor.predict(point_coords=point_coor, point_labels=np.array([1]))
+                # fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(10, 5))
+                # for i in range(3):
+                #     mask_img = masks[i][:,:,None] * rgb_img
+                #     axs[i].set_title(f'granularity {i}')
+                #     axs[i].imshow(mask_img)
+                m = masks[granularity]
+                msk_color[m] = [0, 0, 1.0]
+            mask_colors.append(msk_color)
+        fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(24, 8))
+        for i in range(3):
+            mask_img = masks[i][:,:,None] * rgb_img
+            axs[i].set_title(f'granularity {i}')
+            axs[i].imshow(mask_img)
+        canvas = fig.canvas
+        canvas.draw()
+        w, h = canvas.get_width_height()
+        rgb_img_w_masks = np.frombuffer(canvas.tostring_rgb(), dtype='uint8').reshape(h, w, 3)
+        print("processing images with SAM successfully...")
+        return mask_colors, rgb_img_w_masks
+    def project_mask_to_3d(self, mask_colors, sample_ratio=0.002):
+        x_data, y_data, z_data, c_data = [], [], [], []
+        for img_i in range(len(self.img_ids)):
+            id = self.img_ids[img_i]
+            # RGBD
+            d = self.depth_images[img_i]
+            c = self.rgb_images[img_i]
+            p = self.poses[img_i]
+            msk_color = mask_colors[img_i]
+            # Projecting RGB features into the point space
+            for i in range(d.shape[0]):
+                for j in range(d.shape[1]):
+                    if random.random() < sample_ratio:
+                        x, y, z = convert_from_uvd(j, i, d[i, j], self.intrinsic_depth, p, self.align_matrix)
+                        if x is None:
+                            continue
+                        x_data.append(x)
+                        y_data.append(y)
+                        z_data.append(z)
+                        ci = int(i * c.shape[0] / d.shape[0])
+                        cj = int(j * c.shape[1] / d.shape[1])
+                        c_data.append([msk_color[ci, cj]])
+        print("reprojecting images to 3D points successfully...")
+        return x_data, y_data, z_data, c_data
+    def match_projected_point_to_gt_point(self, x_data, y_data, z_data, c_data, gt_coords):
+        c_data = torch.tensor(np.concatenate(c_data, axis=0))
+        img_coords = np.array([x_data, y_data, z_data], dtype=np.float32).T
+        gt_quant_coords = np.floor_divide(gt_coords, 0.2)
+        img_quant_coords = np.floor_divide(img_coords, 0.2)
+        # Remove the reduandant coords
+        unique_gt_coords, gt_inverse_indices = np.unique(gt_quant_coords, axis=0, return_inverse=True)
+        unique_img_coords, img_inverse_indices = np.unique(img_quant_coords, axis=0, return_inverse=True)
+        # Match the coords in gt_coords to img_corrds
+        def find_loc(vec):
+            obj = np.empty((), dtype=object)
+            out = np.where((unique_img_coords == vec).all(1))[0]
+            obj[()] = out
+            return obj
+        gt_2_img_map = np.apply_along_axis(find_loc, 1, unique_gt_coords)
+        # Since some places are empty, using the simple round interplation
+        gt_2_img_map_filled = []
+        start_id = np.array([0])
+        for loc in gt_2_img_map:
+            if not np.any(loc):
+                loc = start_id
+            else:
+                start_id = loc
+            gt_2_img_map_filled.append(int(loc))
+        mean_colors = []
+        for i in range(unique_img_coords.shape[0]):
+            valid_locs = np.where(img_inverse_indices == i)
+            mean_f = torch.mean(c_data[valid_locs], axis=0)
+            # mean_f, _ = torch.mode(c_data[valid_locs], dim=0)
+            mean_colors.append(mean_f.unsqueeze(0))
+        mean_colors = torch.cat(mean_colors)
+        # Project the averaged features back to groundtruth point clouds
+        img_2_gt_colors = mean_colors[gt_2_img_map_filled]
+        projected_gt_colors = img_2_gt_colors[gt_inverse_indices]
+        print("convert projected points to GT points successfully...")
+        return projected_gt_colors
+    def render_point_cloud(self, data, color):
+        data_copy = copy.copy(data)
+        uint_color = torch.round(torch.tensor(color) * 255).to(torch.uint8)
+        data_copy['red'] = uint_color[:, 0]
+        data_copy['green'] = uint_color[:, 1]
+        data_copy['blue'] = uint_color[:, 2]
+        return data_copy
+    def run_with_coord(self, point, granularity):
+        x_data, y_data, z_data, c_data = [], [], [], []
+        plydata = PlyData.read(f"./scannet_data/{self.scene_name}/{self.scene_name}.ply")
+        data = plydata.elements[0].data
+        # gt_coords stand for the groudtruth point clouds coordinates
+        gt_coords = np.array([data['x'], data['y'], data['z']], dtype=np.float32).T
+        gt_color = np.array([data['red'], data['green'], data['blue']], dtype=np.float32).T
+        blank_color = np.full(gt_color.shape, 0.5)
+        select_index, select_points = find_closest_point(point, gt_coords, num=10)
+        point_select_color = blank_color.copy()
+        point_select_color[select_index] = [1.0, 0, 0]
+        data_point_select = self.render_point_cloud(data, point_select_color)
+        valid_img_ids, valid_points, rgb_img_w_points = self.project_3D_to_images(select_points)
+        mask_colors, rgb_img_w_masks = self.process_img_w_sam(valid_img_ids, valid_points, granularity)
+        x_data, y_data, z_data, c_data = self.project_mask_to_3d(mask_colors)
+        projected_gt_colors = self.match_projected_point_to_gt_point(x_data, y_data, z_data, c_data, gt_coords)
+        data_final = self.render_point_cloud(data, projected_gt_colors)
+        return data_point_select, rgb_img_w_points, rgb_img_w_masks, data_final

sam_vit_b_01ec64.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec2df62732614e57411cdcf32a23ffdf28910380d03139ee0f4fcbe91eb8c912
+size 375042383

scannet_data/scene0000_00/color/0.jpg ADDED Viewed

scannet_data/scene0000_00/color/100.jpg ADDED Viewed

scannet_data/scene0000_00/color/1000.jpg ADDED Viewed

scannet_data/scene0000_00/color/1020.jpg ADDED Viewed

scannet_data/scene0000_00/color/1040.jpg ADDED Viewed

scannet_data/scene0000_00/color/1060.jpg ADDED Viewed

scannet_data/scene0000_00/color/1080.jpg ADDED Viewed

scannet_data/scene0000_00/color/1100.jpg ADDED Viewed

scannet_data/scene0000_00/color/1120.jpg ADDED Viewed

scannet_data/scene0000_00/color/1140.jpg ADDED Viewed

scannet_data/scene0000_00/color/1160.jpg ADDED Viewed

scannet_data/scene0000_00/color/1180.jpg ADDED Viewed

scannet_data/scene0000_00/color/120.jpg ADDED Viewed

scannet_data/scene0000_00/color/1200.jpg ADDED Viewed

scannet_data/scene0000_00/color/1220.jpg ADDED Viewed

scannet_data/scene0000_00/color/1240.jpg ADDED Viewed

scannet_data/scene0000_00/color/1260.jpg ADDED Viewed

scannet_data/scene0000_00/color/1280.jpg ADDED Viewed

scannet_data/scene0000_00/color/1300.jpg ADDED Viewed

scannet_data/scene0000_00/color/1320.jpg ADDED Viewed

scannet_data/scene0000_00/color/1340.jpg ADDED Viewed

scannet_data/scene0000_00/color/1360.jpg ADDED Viewed

scannet_data/scene0000_00/color/1380.jpg ADDED Viewed

scannet_data/scene0000_00/color/140.jpg ADDED Viewed

scannet_data/scene0000_00/color/1400.jpg ADDED Viewed

scannet_data/scene0000_00/color/1420.jpg ADDED Viewed

scannet_data/scene0000_00/color/1440.jpg ADDED Viewed

scannet_data/scene0000_00/color/1460.jpg ADDED Viewed

scannet_data/scene0000_00/color/1480.jpg ADDED Viewed

scannet_data/scene0000_00/color/1500.jpg ADDED Viewed

scannet_data/scene0000_00/color/1520.jpg ADDED Viewed

scannet_data/scene0000_00/color/1540.jpg ADDED Viewed

scannet_data/scene0000_00/color/1560.jpg ADDED Viewed

scannet_data/scene0000_00/color/1580.jpg ADDED Viewed

scannet_data/scene0000_00/color/160.jpg ADDED Viewed

scannet_data/scene0000_00/color/1600.jpg ADDED Viewed

scannet_data/scene0000_00/color/1620.jpg ADDED Viewed

scannet_data/scene0000_00/color/1640.jpg ADDED Viewed

scannet_data/scene0000_00/color/1660.jpg ADDED Viewed

scannet_data/scene0000_00/color/1680.jpg ADDED Viewed

scannet_data/scene0000_00/color/1700.jpg ADDED Viewed

scannet_data/scene0000_00/color/1720.jpg ADDED Viewed

scannet_data/scene0000_00/color/1740.jpg ADDED Viewed

scannet_data/scene0000_00/color/1760.jpg ADDED Viewed

scannet_data/scene0000_00/color/1780.jpg ADDED Viewed

scannet_data/scene0000_00/color/180.jpg ADDED Viewed