Spaces:

oxkitsune
/

rerun-ml-depth-pro

Running

App Files Files Community

oxkitsune commited on Oct 17, 2024

Commit

e9ee731

1 Parent(s): 8465eec

fix visualization

Browse files

Files changed (1) hide show

app.py +21 -10

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ model = model.to(device)
 model.eval()
-def resize_image(image_buffer, max_size=128):
     with Image.fromarray(image_buffer) as img:
         # Calculate the new size while maintaining aspect ratio
         ratio = max_size / max(img.size)
@@ -109,27 +109,30 @@ def run_rerun(path_to_video):
         temp_file = None
         try:
-            temp_file = resize_image(frame, max_size=128)
             depth, focal_length = predict_depth(temp_file)
-            # scale the depth image to the original frame size
-            depth = cv2.resize(
-                depth, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_NEAREST
-            )
             rr.set_time_nanos("video_time", frame_timestamps_ns[i])
             rr.log(
                 "world/camera/depth",
-                rr.DepthImage(depth, meter=1, depth_range=(0, 10)),
             )
             rr.log(
                 "world/camera/frame",
                 rr.VideoFrameReference(
-                    timestamp=rr.components.VideoTimestamp(seconds=1.0),
                     video_reference="world/video",
                 ),
             )
             rr.log(
@@ -170,12 +173,20 @@ with gr.Blocks() as interface:
         """
         # DepthPro Rerun Demo
-        [DepthPro](https://huggingface.co/apple/DepthPro) is a fast metric depth prediction model. Simply upload an image to predict its inverse depth map and focal length. Large images will be automatically resized to 1536x1536 pixels.
         """
     )
     with gr.Row():
         with gr.Column(variant="compact"):
-            video = gr.Video(format="mp4", interactive=True, label="Video")
             visualize = gr.Button("Visualize ML Depth Pro")
         with gr.Column():
             viewer = Rerun(

 model.eval()
+def resize_image(image_buffer, max_size=256):
     with Image.fromarray(image_buffer) as img:
         # Calculate the new size while maintaining aspect ratio
         ratio = max_size / max(img.size)
         temp_file = None
         try:
+            # Resize the image to make the inference faster
+            temp_file = resize_image(frame, max_size=256)
             depth, focal_length = predict_depth(temp_file)
+            # find x and y scale factors, which can be applied to image
+            x_scale = depth.shape[1] / frame.shape[1]
+            y_scale = depth.shape[0] / frame.shape[0]
             rr.set_time_nanos("video_time", frame_timestamps_ns[i])
             rr.log(
                 "world/camera/depth",
+                rr.DepthImage(depth, meter=1),
             )
             rr.log(
                 "world/camera/frame",
                 rr.VideoFrameReference(
+                    timestamp=rr.components.VideoTimestamp(
+                        nanoseconds=frame_timestamps_ns[i]
+                    ),
                     video_reference="world/video",
                 ),
+                rr.Transform3D(scale=(x_scale, y_scale, 1)),
             )
             rr.log(
         """
         # DepthPro Rerun Demo
+        [DepthPro](https://huggingface.co/apple/DepthPro) is a fast metric depth prediction model. Simply upload an image to predict its inverse depth map and focal length.
+        High resolution videos will be automatically resized to 256x256 pixels, to speed up the inference and visualize multiple frames.
         """
     )
     with gr.Row():
         with gr.Column(variant="compact"):
+            video = gr.Video(
+                format="mp4",
+                interactive=True,
+                label="Video",
+                include_audio=False,
+                max_length=10,
+            )
             visualize = gr.Button("Visualize ML Depth Pro")
         with gr.Column():
             viewer = Rerun(