Spaces:

chongjie
/

MCC_slim

Sleeping

hugoycj commited on Jul 26, 2023

Commit

fa4f45e

1 Parent(s): e08a54a

Add depth image support and point cloud generation

- Added a new function `backproject_depth_to_pointcloud` to convert depth images to point clouds.
- Added a new function `get_intrinsics` to estimate camera intrinsics.
- Replaced the point cloud file input in the `infer` function with a depth image input.
- Updated the `infer` function to generate a point cloud from the depth image using the new `backproject_depth_to_pointcloud` function.
- Updated the Gradio interface to accept a depth image file instead of a point cloud file.
- Added a depth image file to the demo examples.

Files changed (2) hide show

app.py +57 -10
demo/quest2_depth.png +3 -0

app.py CHANGED Viewed

@@ -71,7 +71,52 @@ def pad_image(im, value):
         diff = im.shape[1] - im.shape[0]
         return torch.cat([im, (torch.zeros((diff, im.shape[1], im.shape[2])) + value)], dim=0)
 def normalize(seen_xyz):
     seen_xyz = seen_xyz / (seen_xyz[torch.isfinite(seen_xyz.sum(dim=-1))].var(dim=0) ** 0.5).mean()
     seen_xyz = seen_xyz - seen_xyz[torch.isfinite(seen_xyz.sum(dim=-1))].mean(axis=0)
@@ -79,15 +124,18 @@ def normalize(seen_xyz):
 def infer(
           image,
-          point_cloud,
           seg,
           granularity,
           temperature,
           ):
     rgb = image
-    obj = load_obj(point_cloud.name)
     seen_rgb = (torch.tensor(rgb).float() / 255)[..., [2, 1, 0]]
     H, W = seen_rgb.shape[:2]
     seen_rgb = torch.nn.functional.interpolate(
@@ -97,11 +145,10 @@ def infer(
         align_corners=False,
     )[0].permute(1, 2, 0)
-    seen_xyz = obj[0].reshape(H, W, 3)
     seg = cv2.imread(seg.name, cv2.IMREAD_UNCHANGED)
     mask = torch.tensor(cv2.resize(seg, (W, H))).bool()
     seen_xyz[~mask] = float('inf')
     seen_xyz = normalize(seen_xyz)
     bottom, right = mask.nonzero().max(dim=0)[0]
@@ -138,7 +185,7 @@ def infer(
     ]
     pred_colors, pred_occupy, unseen_xyz = run_inference(model, samples, device, temperature, args)
-    _masks = pred_occupy > 0.1
     unseen_xyz = unseen_xyz[_masks]
     pred_colors = pred_colors[None, ...][_masks] * 255
@@ -179,12 +226,12 @@ if __name__ == '__main__':
     demo = gr.Interface(fn=infer,
                         inputs=[gr.Image(label="Input Image"),
-                                gr.File(label="Pointcloud File"),
                                 gr.File(label="Segmentation File"),
-                                gr.Slider(minimum=0.05, maximum=0.5, step=0.05, value=0.2, label="Granularity"),
-                                gr.Slider(minimum=0, maximum=1.0, step=0.1, value=0.1, label="Temperature")
                                 ],
                         outputs=[gr.outputs.File(label="Point Cloud")],
-                        examples=[["demo/quest2.jpg", "demo/quest2.obj", "demo/quest2_seg.png", 0.2, 0.1]],
                         cache_examples=True)
     demo.launch(server_name="0.0.0.0", server_port=7860)

         diff = im.shape[1] - im.shape[0]
         return torch.cat([im, (torch.zeros((diff, im.shape[1], im.shape[2])) + value)], dim=0)
+def backproject_depth_to_pointcloud(depth, rotation=np.eye(3), translation=np.zeros(3)):
+    # Calculate the principal point as the center of the image
+    principal_point = [depth.shape[1] / 2, depth.shape[0] / 2]
+    intrinsics = get_intrinsics(depth.shape[0], depth.shape[1], principal_point)
+    intrinsics = get_intrinsics(depth.shape[0], depth.shape[1], principal_point)
+    # Get the depth map shape
+    height, width = depth.shape
+    # Create a matrix of pixel coordinates
+    u, v = np.meshgrid(np.arange(width), np.arange(height))
+    uv_homogeneous = np.stack((u, v, np.ones_like(u)), axis=-1).reshape(-1, 3)
+    # Invert the intrinsic matrix
+    inv_intrinsics = np.linalg.inv(intrinsics)
+    # Convert depth to the camera coordinate system
+    points_cam_homogeneous = np.dot(uv_homogeneous, inv_intrinsics.T) * depth.flatten()[:, np.newaxis]
+    # Convert to 3D homogeneous coordinates
+    points_cam_homogeneous = np.concatenate((points_cam_homogeneous, np.ones((len(points_cam_homogeneous), 1))), axis=1)
+    # Apply the rotation and translation to get the 3D point cloud in the world coordinate system
+    extrinsics = np.hstack((rotation, translation[:, np.newaxis]))
+    pointcloud = np.dot(points_cam_homogeneous, extrinsics.T)
+    pointcloud[:, 1:] *= -1
+    # Reshape the point cloud back to the original depth map shape
+    pointcloud = pointcloud[:, :3].reshape(height, width, 3)
+    return pointcloud
+# estimate camera intrinsics
+def get_intrinsics(H,W, principal_point):
+    """
+    Intrinsics for a pinhole camera model.
+    Assume fov of 55 degrees and central principal point
+    of bounding box.
+    """
+    f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0)
+    cx, cy = principal_point
+    return np.array([[f, 0, cx],
+                     [0, f, cy],
+                     [0, 0, 1]])
 def normalize(seen_xyz):
     seen_xyz = seen_xyz / (seen_xyz[torch.isfinite(seen_xyz.sum(dim=-1))].var(dim=0) ** 0.5).mean()
     seen_xyz = seen_xyz - seen_xyz[torch.isfinite(seen_xyz.sum(dim=-1))].mean(axis=0)
 def infer(
           image,
+          depth_image,
           seg,
           granularity,
           temperature,
           ):
+    args.viz_granularity = granularity
     rgb = image
+    depth_image = cv2.imread(depth_image.name, -1)
+    depth_image = depth_image.astype(np.float32) / 256
+    seen_xyz = backproject_depth_to_pointcloud(depth_image)
     seen_rgb = (torch.tensor(rgb).float() / 255)[..., [2, 1, 0]]
     H, W = seen_rgb.shape[:2]
     seen_rgb = torch.nn.functional.interpolate(
         align_corners=False,
     )[0].permute(1, 2, 0)
     seg = cv2.imread(seg.name, cv2.IMREAD_UNCHANGED)
     mask = torch.tensor(cv2.resize(seg, (W, H))).bool()
     seen_xyz[~mask] = float('inf')
+    seen_xyz = torch.tensor(seen_xyz).float()
     seen_xyz = normalize(seen_xyz)
     bottom, right = mask.nonzero().max(dim=0)[0]
     ]
     pred_colors, pred_occupy, unseen_xyz = run_inference(model, samples, device, temperature, args)
+    _masks = pred_occupy > 0.1
     unseen_xyz = unseen_xyz[_masks]
     pred_colors = pred_colors[None, ...][_masks] * 255
     demo = gr.Interface(fn=infer,
                         inputs=[gr.Image(label="Input Image"),
+                                gr.File(label="Depth Image"),
                                 gr.File(label="Segmentation File"),
+                                gr.Slider(minimum=0.05, maximum=0.5, step=0.05, value=0.2, label="Grain Size"),
+                                gr.Slider(minimum=0, maximum=1.0, step=0.1, value=0.1, label="Color Temperature")
                                 ],
                         outputs=[gr.outputs.File(label="Point Cloud")],
+                        examples=[["demo/quest2.jpg", "demo/quest2_depth.png", "demo/quest2_seg.png", 0.2, 0.1]],
                         cache_examples=True)
     demo.launch(server_name="0.0.0.0", server_port=7860)

demo/quest2_depth.png ADDED Viewed

Git LFS Details

SHA256: 085b84c9f82155c5b1e5d7660d993f9445c08debb82b0867546a15f351c776fd
Pointer size: 131 Bytes
Size of remote file: 117 kB