Spaces:

prithivMLmods
/

Map-Anything-v1

Running on Zero

App Files Files Community

prithivMLmods commited on 19 days ago

Commit

60c5d65

verified ·

1 Parent(s): e6c43aa

Update app.py

Browse files

Files changed (1) hide show

app.py +809 -84

app.py CHANGED Viewed

@@ -7,7 +7,9 @@ from datetime import datetime
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 import gradio as gr
 import numpy as np
 import spaces
 import torch
@@ -23,14 +25,45 @@ from mapanything.utils.hf_utils.hf_helpers import initialize_mapanything_model
 from mapanything.utils.hf_utils.viz import predictions_to_glb
 from mapanything.utils.image import load_images
 register_heif_opener()
 sys.path.append("mapanything/")
 # ============================================================================
 # Global Configuration
 # ============================================================================
-# MapAnything Configuration
 high_level_config = {
     "path": "configs/train.yaml",
     "hf_model_name": "facebook/map-anything",
@@ -50,11 +83,12 @@ high_level_config = {
     "resolution": 518,
 }
-# Global model variables
 model = None
 # ============================================================================
-# Core Model Inference
 # ============================================================================
 @spaces.GPU(duration=120)
@@ -108,13 +142,13 @@ def run_model(
     images_list = []
     final_mask_list = []
     confidences = []
     for pred in outputs:
         depthmap_torch = pred["depth_z"][0].squeeze(-1)
         intrinsics_torch = pred["intrinsics"][0]
         camera_pose_torch = pred["camera_poses"][0]
         conf = pred["conf"][0].squeeze(-1)
         pts3d_computed, valid_mask = depthmap_to_world_frame(
             depthmap_torch, intrinsics_torch, camera_pose_torch
         )
@@ -139,12 +173,12 @@ def run_model(
     predictions["intrinsic"] = np.stack(intrinsic_list, axis=0)
     predictions["world_points"] = np.stack(world_points_list, axis=0)
     predictions["conf"] = np.stack(confidences, axis=0)
     depth_maps = np.stack(depth_maps_list, axis=0)
     if len(depth_maps.shape) == 3:
         depth_maps = depth_maps[..., np.newaxis]
     predictions["depth"] = depth_maps
     predictions["images"] = np.stack(images_list, axis=0)
     predictions["final_mask"] = np.stack(final_mask_list, axis=0)
@@ -155,11 +189,362 @@ def run_model(
 # ============================================================================
-# Helper Functions
 # ============================================================================
 def handle_uploads(input_images):
-    """Handle uploaded images"""
     start_time = time.time()
     gc.collect()
     torch.cuda.empty_cache()
@@ -175,7 +560,6 @@ def handle_uploads(input_images):
     image_paths = []
-    # Handle images
     if input_images is not None:
         for file_data in input_images:
             if isinstance(file_data, dict) and "name" in file_data:
@@ -211,7 +595,7 @@ def handle_uploads(input_images):
 def update_gallery_on_upload(input_images):
-    """Update gallery on upload"""
     if not input_images:
         return None, None, None, None
     target_dir, image_paths = handle_uploads(input_images)
@@ -223,6 +607,10 @@ def update_gallery_on_upload(input_images):
     )
 @spaces.GPU(duration=120)
 def gradio_demo(
     target_dir,
@@ -234,9 +622,15 @@ def gradio_demo(
     apply_mask=True,
     show_mesh=True,
 ):
-    """Perform reconstruction"""
     if not os.path.isdir(target_dir) or target_dir == "None":
-        return None, "Please upload files first", None
     start_time = time.time()
     gc.collect()
@@ -247,18 +641,19 @@ def gradio_demo(
     all_files_display = [f"{i}: {filename}" for i, filename in enumerate(all_files)]
     frame_filter_choices = ["All"] + all_files_display
     print("Running MapAnything model...")
     with torch.no_grad():
         predictions = run_model(target_dir, apply_mask)
-    # Save prediction results
     prediction_save_path = os.path.join(target_dir, "predictions.npz")
     np.savez(prediction_save_path, **predictions)
     if frame_filter is None:
         frame_filter = "All"
-    # Generate raw GLB
     glbfile = os.path.join(
         target_dir,
         f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}.glb",
@@ -275,6 +670,13 @@ def gradio_demo(
     )
     glbscene.export(file_obj=glbfile)
     # Cleanup
     del predictions
     gc.collect()
@@ -285,19 +687,32 @@ def gradio_demo(
     log_msg = f"✅ Reconstruction successful ({len(all_files)} frames)"
     return (
-        glbfile,
-        log_msg,
-        gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True),
     )
 def clear_fields():
-    """Clear 3D viewer"""
     return None
 def update_log():
-    """Display log message"""
     return "Loading and reconstructing..."
@@ -311,7 +726,10 @@ def update_visualization(
     filter_white_bg=False,
     show_mesh=True,
 ):
-    """Update visualization"""
     if is_example == "True":
         return gr.update(), "No reconstruction available. Please click the reconstruct button first."
@@ -344,12 +762,72 @@ def update_visualization(
     return glbfile, "Visualization updated."
 # ============================================================================
-# Example Scenes
 # ============================================================================
 def get_scene_info(examples_dir):
-    """Get information about scenes in the examples directory"""
     import glob
     scenes = []
@@ -384,7 +862,7 @@ def get_scene_info(examples_dir):
 def load_example_scene(scene_name, examples_dir="examples"):
-    """Load a scene from examples directory"""
     scenes = get_scene_info(examples_dir)
     selected_scene = None
@@ -407,12 +885,11 @@ def load_example_scene(scene_name, examples_dir="examples"):
 # ============================================================================
-# Gradio UI
 # ============================================================================
 theme = get_gradio_theme()
-# Custom CSS to prevent UI jitter
 APP_CSS = GRADIO_CSS + """
 /* Prevent components from expanding the layout */
 .gradio-container {
@@ -440,57 +917,150 @@ APP_CSS = GRADIO_CSS + """
 .tab-content {
     min-height: 550px !important;
 }
 """
 with gr.Blocks() as demo:
     is_example = gr.Textbox(label="is_example", visible=False, value="None")
     target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
     with gr.Row(equal_height=False):
-        # Left Side: Input Area
         with gr.Column(scale=1, min_width=300):
             gr.Markdown("### 📤 Input")
             input_images = gr.File(
-                file_count="multiple",
-                label="Upload multiple images (3-10 recommended)",
                 interactive=True,
-                height=200
             )
             image_gallery = gr.Gallery(
-                label="Image Preview", columns=3, height=350,
-                object_fit="contain", preview=True
             )
             with gr.Row():
-                submit_btn = gr.Button("🚀 Start Reconstruction", variant="primary", scale=2)
                 clear_btn = gr.ClearButton(
                     [input_images, target_dir_output, image_gallery],
-                    value="🗑️ Clear", scale=1
                 )
-        # Right Side: Output Area
         with gr.Column(scale=2, min_width=600):
             gr.Markdown("### 🎯 Output")
             with gr.Tabs():
                 with gr.Tab("🏗️ Raw 3D"):
                     reconstruction_output = gr.Model3D(
-                        height=550, zoom_speed=0.5, pan_speed=0.5,
-                        clear_color=[0.0, 0.0, 0.0, 0.0]
                     )
             log_output = gr.Textbox(
                 value="📌 Please upload images, then click 'Start Reconstruction'",
                 label="Status Information",
                 interactive=False,
                 lines=1,
-                max_lines=1
             )
-    # Advanced Options (Collapsible)
     with gr.Accordion("⚙️ Advanced Options", open=False):
         with gr.Row(equal_height=False):
             with gr.Column(scale=1, min_width=300):
@@ -499,21 +1069,28 @@ with gr.Blocks() as demo:
                     choices=["All"], value="All", label="Display Frame"
                 )
                 conf_thres = gr.Slider(
-                    minimum=0, maximum=100, value=0, step=0.1,
-                    label="Confidence Threshold (Percentile)"
                 )
                 show_cam = gr.Checkbox(label="Show Camera", value=True)
                 show_mesh = gr.Checkbox(label="Show Mesh", value=True)
-                filter_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
-                filter_white_bg = gr.Checkbox(label="Filter White Background", value=False)
             with gr.Column(scale=1, min_width=300):
                 gr.Markdown("#### Reconstruction Parameters")
                 apply_mask_checkbox = gr.Checkbox(
                     label="Apply Depth Mask", value=True
                 )
-    # Example Scenes (Collapsible)
     with gr.Accordion("🖼️ Example Scenes", open=False):
         scenes = get_scene_info("examples")
         if scenes:
@@ -525,68 +1102,216 @@ with gr.Blocks() as demo:
                             scene = scenes[scene_idx]
                             with gr.Column(scale=1, min_width=150):
                                 scene_img = gr.Image(
-                                    value=scene["thumbnail"],
                                     height=150,
-                                    interactive=False,
-                                    show_label=False,
                                     sources=[],
-                                    container=False
                                 )
                                 gr.Markdown(
                                     f"**{scene['name']}** ({scene['num_images']} images)",
-                                    elem_classes=["text-center"]
                                 )
                                 scene_img.select(
-                                    fn=lambda name=scene["name"]: load_example_scene(name),
                                     outputs=[
                                         reconstruction_output,
-                                        target_dir_output, image_gallery, log_output
-                                    ]
                                 )
-    # === Event Binding ===
-    # Auto update on file upload
     input_images.change(
         fn=update_gallery_on_upload,
         inputs=[input_images],
-        outputs=[reconstruction_output, target_dir_output, image_gallery, log_output]
     )
-    # Reconstruction button
     submit_btn.click(
-        fn=clear_fields,
-        outputs=[reconstruction_output]
     ).then(
         fn=update_log,
-        outputs=[log_output]
     ).then(
         fn=gradio_demo,
         inputs=[
-            target_dir_output, frame_filter, show_cam,
-            filter_black_bg, filter_white_bg, conf_thres,
-            apply_mask_checkbox, show_mesh
         ],
         outputs=[
-            reconstruction_output, log_output, frame_filter
-        ]
     ).then(
         fn=lambda: "False",
-        outputs=[is_example]
     )
-    # Clear button
-    clear_btn.add([reconstruction_output, log_output])
-    # Visualization parameters real-time update
-    for component in [frame_filter, show_cam, conf_thres, show_mesh, filter_black_bg, filter_white_bg]:
         component.change(
-            fn=update_visualization,
             inputs=[
-                target_dir_output, frame_filter, show_cam, is_example,
-                conf_thres, filter_black_bg, filter_white_bg, show_mesh
             ],
-            outputs=[reconstruction_output, log_output]
         )
-demo.queue(max_size=20).launch(theme=theme, css=APP_CSS, show_error=True, share=True, ssr_mode=False)

 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+import cv2
 import gradio as gr
+import matplotlib.pyplot as plt
 import numpy as np
 import spaces
 import torch
 from mapanything.utils.hf_utils.viz import predictions_to_glb
 from mapanything.utils.image import load_images
+# Optional imports with fallbacks
+try:
+    from mapanything.utils.geometry import points_to_normals
+except ImportError:
+    def points_to_normals(points3d, mask=None):
+        """Fallback: compute surface normals from 3D point cloud via cross products"""
+        H, W, _ = points3d.shape
+        dpdx = np.zeros_like(points3d)
+        dpdy = np.zeros_like(points3d)
+        dpdx[:, :-1] = points3d[:, 1:] - points3d[:, :-1]
+        dpdy[:-1, :] = points3d[1:, :] - points3d[:-1, :]
+        normals = np.cross(dpdx, dpdy)
+        norms = np.linalg.norm(normals, axis=-1, keepdims=True)
+        norms = np.maximum(norms, 1e-8)
+        normals = normals / norms
+        valid = norms.squeeze(-1) > 1e-6
+        if mask is not None:
+            valid = valid & mask
+        return normals, valid
+try:
+    from mapanything.utils.hf_utils.css_and_html import MEASURE_INSTRUCTIONS_HTML
+except ImportError:
+    MEASURE_INSTRUCTIONS_HTML = """
+**📏 Measurement Tool:**
+1. Click on the **first point** in the image to mark it
+2. Click on the **second point** to measure the 3D distance between them
+3. The depth of each point and the computed 3D distance will be displayed below
+4. After each measurement, click two new points for a new measurement
+"""
 register_heif_opener()
 sys.path.append("mapanything/")
 # ============================================================================
 # Global Configuration
 # ============================================================================
 high_level_config = {
     "path": "configs/train.yaml",
     "hf_model_name": "facebook/map-anything",
     "resolution": 518,
 }
+# Global model variable
 model = None
 # ============================================================================
+# Core Model Inference (KEPT AS-IS)
 # ============================================================================
 @spaces.GPU(duration=120)
     images_list = []
     final_mask_list = []
     confidences = []
     for pred in outputs:
         depthmap_torch = pred["depth_z"][0].squeeze(-1)
         intrinsics_torch = pred["intrinsics"][0]
         camera_pose_torch = pred["camera_poses"][0]
         conf = pred["conf"][0].squeeze(-1)
         pts3d_computed, valid_mask = depthmap_to_world_frame(
             depthmap_torch, intrinsics_torch, camera_pose_torch
         )
     predictions["intrinsic"] = np.stack(intrinsic_list, axis=0)
     predictions["world_points"] = np.stack(world_points_list, axis=0)
     predictions["conf"] = np.stack(confidences, axis=0)
     depth_maps = np.stack(depth_maps_list, axis=0)
     if len(depth_maps.shape) == 3:
         depth_maps = depth_maps[..., np.newaxis]
     predictions["depth"] = depth_maps
     predictions["images"] = np.stack(images_list, axis=0)
     predictions["final_mask"] = np.stack(final_mask_list, axis=0)
 # ============================================================================
+# Visualization Processing Functions (NEW - for Depth, Normal, Measure tabs)
+# ============================================================================
+def process_predictions_for_visualization(
+    predictions, filter_black_bg=False, filter_white_bg=False
+):
+    """Extract depth, normal, and 3D points from predictions for per-view visualization tabs."""
+    processed_data = {}
+    num_views = predictions["images"].shape[0]
+    for view_idx in range(num_views):
+        image = predictions["images"][view_idx]          # (H, W, 3)
+        pred_pts3d = predictions["world_points"][view_idx]  # (H, W, 3)
+        depth = predictions["depth"][view_idx].squeeze()    # (H, W)
+        mask = predictions["final_mask"][view_idx].copy()   # (H, W)
+        # Apply black background filtering
+        if filter_black_bg:
+            view_colors = image * 255 if image.max() <= 1.0 else image.copy()
+            black_bg_mask = view_colors.sum(axis=2) >= 16
+            mask = mask & black_bg_mask
+        # Apply white background filtering
+        if filter_white_bg:
+            view_colors = image * 255 if image.max() <= 1.0 else image.copy()
+            white_bg_mask = ~(
+                (view_colors[:, :, 0] > 240)
+                & (view_colors[:, :, 1] > 240)
+                & (view_colors[:, :, 2] > 240)
+            )
+            mask = mask & white_bg_mask
+        # Compute surface normals from 3D points
+        normals, _ = points_to_normals(pred_pts3d, mask=mask)
+        processed_data[view_idx] = {
+            "image": image,
+            "points3d": pred_pts3d,
+            "depth": depth,
+            "normal": normals,
+            "mask": mask,
+        }
+    return processed_data
+def colorize_depth(depth_map, mask=None):
+    """Convert depth map to colorized visualization using turbo_r colormap."""
+    if depth_map is None:
+        return None
+    depth_normalized = depth_map.copy()
+    valid_mask = depth_normalized > 0
+    if mask is not None:
+        valid_mask = valid_mask & mask
+    if valid_mask.sum() > 0:
+        valid_depths = depth_normalized[valid_mask]
+        p5 = np.percentile(valid_depths, 5)
+        p95 = np.percentile(valid_depths, 95)
+        if p95 > p5:
+            depth_normalized[valid_mask] = (depth_normalized[valid_mask] - p5) / (p95 - p5)
+        else:
+            depth_normalized[valid_mask] = 0.5
+    colormap = plt.cm.turbo_r
+    colored = colormap(np.clip(depth_normalized, 0, 1))
+    colored = (colored[:, :, :3] * 255).astype(np.uint8)
+    # Set invalid pixels to white
+    colored[~valid_mask] = [255, 255, 255]
+    return colored
+def colorize_normal(normal_map, mask=None):
+    """Convert normal map to colorized visualization."""
+    if normal_map is None:
+        return None
+    normal_vis = normal_map.copy()
+    if mask is not None:
+        normal_vis[~mask] = [0, 0, 0]
+    # Map normals from [-1, 1] to [0, 1] then to [0, 255]
+    normal_vis = (normal_vis + 1.0) / 2.0
+    normal_vis = np.clip(normal_vis, 0, 1)
+    normal_vis = (normal_vis * 255).astype(np.uint8)
+    return normal_vis
+def update_view_selectors(processed_data):
+    """Update view selector dropdowns based on available views."""
+    if processed_data is None or len(processed_data) == 0:
+        choices = ["View 1"]
+    else:
+        num_views = len(processed_data)
+        choices = [f"View {i + 1}" for i in range(num_views)]
+    return (
+        gr.Dropdown(choices=choices, value=choices[0]),  # depth_view_selector
+        gr.Dropdown(choices=choices, value=choices[0]),  # normal_view_selector
+        gr.Dropdown(choices=choices, value=choices[0]),  # measure_view_selector
+    )
+def get_view_data_by_index(processed_data, view_index):
+    """Get view data by index, handling bounds."""
+    if processed_data is None or len(processed_data) == 0:
+        return None
+    view_keys = list(processed_data.keys())
+    if view_index < 0 or view_index >= len(view_keys):
+        view_index = 0
+    return processed_data[view_keys[view_index]]
+def update_depth_view(processed_data, view_index):
+    """Update depth view for a specific view index."""
+    view_data = get_view_data_by_index(processed_data, view_index)
+    if view_data is None or view_data["depth"] is None:
+        return None
+    return colorize_depth(view_data["depth"], mask=view_data.get("mask"))
+def update_normal_view(processed_data, view_index):
+    """Update normal view for a specific view index."""
+    view_data = get_view_data_by_index(processed_data, view_index)
+    if view_data is None or view_data["normal"] is None:
+        return None
+    return colorize_normal(view_data["normal"], mask=view_data.get("mask"))
+def update_measure_view(processed_data, view_index):
+    """Update measure view for a specific view index with mask overlay."""
+    view_data = get_view_data_by_index(processed_data, view_index)
+    if view_data is None:
+        return None, []
+    image = view_data["image"].copy()
+    # Ensure image is uint8
+    if image.dtype != np.uint8:
+        if image.max() <= 1.0:
+            image = (image * 255).astype(np.uint8)
+        else:
+            image = image.astype(np.uint8)
+    # Apply mask overlay — light pink tint on invalid regions
+    if view_data["mask"] is not None:
+        invalid_mask = ~view_data["mask"]
+        if invalid_mask.any():
+            overlay_color = np.array([255, 220, 220], dtype=np.uint8)
+            alpha = 0.5
+            for c in range(3):
+                image[:, :, c] = np.where(
+                    invalid_mask,
+                    (1 - alpha) * image[:, :, c] + alpha * overlay_color[c],
+                    image[:, :, c],
+                ).astype(np.uint8)
+    return image, []
+def navigate_depth_view(processed_data, current_selector_value, direction):
+    """Navigate depth view (direction: -1 for previous, +1 for next)."""
+    if processed_data is None or len(processed_data) == 0:
+        return "View 1", None
+    try:
+        current_view = int(current_selector_value.split()[1]) - 1
+    except Exception:
+        current_view = 0
+    num_views = len(processed_data)
+    new_view = (current_view + direction) % num_views
+    new_selector_value = f"View {new_view + 1}"
+    depth_vis = update_depth_view(processed_data, new_view)
+    return new_selector_value, depth_vis
+def navigate_normal_view(processed_data, current_selector_value, direction):
+    """Navigate normal view (direction: -1 for previous, +1 for next)."""
+    if processed_data is None or len(processed_data) == 0:
+        return "View 1", None
+    try:
+        current_view = int(current_selector_value.split()[1]) - 1
+    except Exception:
+        current_view = 0
+    num_views = len(processed_data)
+    new_view = (current_view + direction) % num_views
+    new_selector_value = f"View {new_view + 1}"
+    normal_vis = update_normal_view(processed_data, new_view)
+    return new_selector_value, normal_vis
+def navigate_measure_view(processed_data, current_selector_value, direction):
+    """Navigate measure view (direction: -1 for previous, +1 for next)."""
+    if processed_data is None or len(processed_data) == 0:
+        return "View 1", None, []
+    try:
+        current_view = int(current_selector_value.split()[1]) - 1
+    except Exception:
+        current_view = 0
+    num_views = len(processed_data)
+    new_view = (current_view + direction) % num_views
+    new_selector_value = f"View {new_view + 1}"
+    measure_image, measure_points = update_measure_view(processed_data, new_view)
+    return new_selector_value, measure_image, measure_points
+def populate_visualization_tabs(processed_data):
+    """Populate the depth, normal, and measure tabs with initial data (view 0)."""
+    if processed_data is None or len(processed_data) == 0:
+        return None, None, None, []
+    depth_vis = update_depth_view(processed_data, 0)
+    normal_vis = update_normal_view(processed_data, 0)
+    measure_img, _ = update_measure_view(processed_data, 0)
+    return depth_vis, normal_vis, measure_img, []
+def measure(processed_data, measure_points, current_view_selector, event: gr.SelectData):
+    """Handle click-to-measure on images: two clicks → 3D distance."""
+    try:
+        if processed_data is None or len(processed_data) == 0:
+            return None, [], "No data available"
+        # Determine which view is currently active
+        try:
+            current_view_index = int(current_view_selector.split()[1]) - 1
+        except Exception:
+            current_view_index = 0
+        if current_view_index < 0 or current_view_index >= len(processed_data):
+            current_view_index = 0
+        view_keys = list(processed_data.keys())
+        current_view = processed_data[view_keys[current_view_index]]
+        if current_view is None:
+            return None, [], "No view data available"
+        point2d = event.index[0], event.index[1]
+        # Reject clicks on masked (invalid) areas
+        if (
+            current_view["mask"] is not None
+            and 0 <= point2d[1] < current_view["mask"].shape[0]
+            and 0 <= point2d[0] < current_view["mask"].shape[1]
+        ):
+            if not current_view["mask"][point2d[1], point2d[0]]:
+                masked_image, _ = update_measure_view(processed_data, current_view_index)
+                return (
+                    masked_image,
+                    measure_points,
+                    '<span style="color: red; font-weight: bold;">Cannot measure on masked areas (shown in grey)</span>',
+                )
+        measure_points.append(point2d)
+        # Get base image with mask overlay
+        image, _ = update_measure_view(processed_data, current_view_index)
+        if image is None:
+            return None, [], "No image available"
+        image = image.copy()
+        points3d = current_view["points3d"]
+        # Ensure uint8
+        if image.dtype != np.uint8:
+            if image.max() <= 1.0:
+                image = (image * 255).astype(np.uint8)
+            else:
+                image = image.astype(np.uint8)
+        # Draw circles on marked points
+        for p in measure_points:
+            if 0 <= p[0] < image.shape[1] and 0 <= p[1] < image.shape[0]:
+                image = cv2.circle(image, p, radius=5, color=(255, 0, 0), thickness=2)
+        # Build depth info text
+        depth_text = ""
+        for i, p in enumerate(measure_points):
+            if (
+                current_view["depth"] is not None
+                and 0 <= p[1] < current_view["depth"].shape[0]
+                and 0 <= p[0] < current_view["depth"].shape[1]
+            ):
+                d = current_view["depth"][p[1], p[0]]
+                depth_text += f"- **P{i + 1} depth: {d:.2f}m.**\n"
+            elif (
+                points3d is not None
+                and 0 <= p[1] < points3d.shape[0]
+                and 0 <= p[0] < points3d.shape[1]
+            ):
+                z = points3d[p[1], p[0], 2]
+                depth_text += f"- **P{i + 1} Z-coord: {z:.2f}m.**\n"
+        # If two points are marked, compute distance
+        if len(measure_points) == 2:
+            point1, point2 = measure_points
+            # Draw line between the two points
+            if (
+                0 <= point1[0] < image.shape[1]
+                and 0 <= point1[1] < image.shape[0]
+                and 0 <= point2[0] < image.shape[1]
+                and 0 <= point2[1] < image.shape[0]
+            ):
+                image = cv2.line(image, point1, point2, color=(255, 0, 0), thickness=2)
+            # Compute 3D Euclidean distance
+            distance_text = "- **Distance: Unable to compute**"
+            if (
+                points3d is not None
+                and 0 <= point1[1] < points3d.shape[0]
+                and 0 <= point1[0] < points3d.shape[1]
+                and 0 <= point2[1] < points3d.shape[0]
+                and 0 <= point2[0] < points3d.shape[1]
+            ):
+                try:
+                    p1_3d = points3d[point1[1], point1[0]]
+                    p2_3d = points3d[point2[1], point2[0]]
+                    distance = np.linalg.norm(p1_3d - p2_3d)
+                    distance_text = f"- **Distance: {distance:.2f}m**"
+                except Exception as e:
+                    distance_text = f"- **Distance computation error: {e}**"
+            # Reset points after measurement
+            measure_points = []
+            text = depth_text + distance_text
+            return [image, measure_points, text]
+        else:
+            return [image, measure_points, depth_text]
+    except Exception as e:
+        print(f"Measure error: {e}")
+        return None, [], f"Measure error: {e}"
+def reset_measure(processed_data):
+    """Reset measure points and return clean image."""
+    if processed_data is None or len(processed_data) == 0:
+        return None, [], ""
+    first_view = list(processed_data.values())[0]
+    return first_view["image"], [], ""
+# ============================================================================
+# Helper Functions (KEPT AS-IS)
 # ============================================================================
 def handle_uploads(input_images):
+    """Handle uploaded images."""
     start_time = time.time()
     gc.collect()
     torch.cuda.empty_cache()
     image_paths = []
     if input_images is not None:
         for file_data in input_images:
             if isinstance(file_data, dict) and "name" in file_data:
 def update_gallery_on_upload(input_images):
+    """Update gallery on upload."""
     if not input_images:
         return None, None, None, None
     target_dir, image_paths = handle_uploads(input_images)
     )
+# ============================================================================
+# Main Reconstruction Function (Extended for new tabs)
+# ============================================================================
 @spaces.GPU(duration=120)
 def gradio_demo(
     target_dir,
     apply_mask=True,
     show_mesh=True,
 ):
+    """Perform reconstruction and populate all tabs."""
     if not os.path.isdir(target_dir) or target_dir == "None":
+        return (
+            None, None,
+            "Please upload files first",
+            None, None,
+            None, None, None, "",
+            None, None, None,
+        )
     start_time = time.time()
     gc.collect()
     all_files_display = [f"{i}: {filename}" for i, filename in enumerate(all_files)]
     frame_filter_choices = ["All"] + all_files_display
+    # ---- Run model (KEPT AS-IS) ----
     print("Running MapAnything model...")
     with torch.no_grad():
         predictions = run_model(target_dir, apply_mask)
+    # ---- Save predictions (KEPT AS-IS) ----
     prediction_save_path = os.path.join(target_dir, "predictions.npz")
     np.savez(prediction_save_path, **predictions)
     if frame_filter is None:
         frame_filter = "All"
+    # ---- Generate GLB (KEPT AS-IS) ----
     glbfile = os.path.join(
         target_dir,
         f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}.glb",
     )
     glbscene.export(file_obj=glbfile)
+    # ---- NEW: Process data for Depth / Normal / Measure tabs ----
+    processed_data = process_predictions_for_visualization(
+        predictions, filter_black_bg, filter_white_bg
+    )
+    depth_vis, normal_vis, measure_img, _ = populate_visualization_tabs(processed_data)
+    depth_selector, normal_selector, measure_selector = update_view_selectors(processed_data)
     # Cleanup
     del predictions
     gc.collect()
     log_msg = f"✅ Reconstruction successful ({len(all_files)} frames)"
     return (
+        glbfile,                                                                      # reconstruction_output  (Raw 3D)
+        glbfile,                                                                      # reconstruction_output_3d (3D View)
+        log_msg,                                                                      # log_output
+        gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True),  # frame_filter
+        processed_data,                                                               # processed_data_state
+        depth_vis,                                                                    # depth_map
+        normal_vis,                                                                   # normal_map
+        measure_img,                                                                  # measure_image
+        "",                                                                           # measure_text
+        depth_selector,                                                               # depth_view_selector
+        normal_selector,                                                              # normal_view_selector
+        measure_selector,                                                             # measure_view_selector
     )
+# ============================================================================
+# UI Helper Functions
+# ============================================================================
 def clear_fields():
+    """Clear 3D viewer."""
     return None
 def update_log():
+    """Display log message while processing."""
     return "Loading and reconstructing..."
     filter_white_bg=False,
     show_mesh=True,
 ):
+    """
+    Reload saved predictions from npz, create (or reuse) the GLB for new parameters.
+    KEPT AS-IS from original code.
+    """
     if is_example == "True":
         return gr.update(), "No reconstruction available. Please click the reconstruct button first."
     return glbfile, "Visualization updated."
+def update_all_3d_views(
+    target_dir, frame_filter, show_cam, is_example,
+    conf_thres, filter_black_bg, filter_white_bg, show_mesh,
+):
+    """Wrapper: update both Raw 3D and 3D View tabs simultaneously."""
+    glb_result, log_msg = update_visualization(
+        target_dir, frame_filter, show_cam, is_example,
+        conf_thres, filter_black_bg, filter_white_bg, show_mesh,
+    )
+    return glb_result, glb_result, log_msg
+def update_all_views_on_filter_change(
+    target_dir, filter_black_bg, filter_white_bg, processed_data,
+    depth_view_selector, normal_view_selector, measure_view_selector,
+):
+    """
+    Re-process per-view visualization (depth / normal / measure) when
+    background filter checkboxes change.
+    """
+    if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
+        return processed_data, None, None, None, []
+    predictions_path = os.path.join(target_dir, "predictions.npz")
+    if not os.path.exists(predictions_path):
+        return processed_data, None, None, None, []
+    try:
+        loaded = np.load(predictions_path, allow_pickle=True)
+        predictions = {key: loaded[key] for key in loaded.keys()}
+        new_processed_data = process_predictions_for_visualization(
+            predictions, filter_black_bg, filter_white_bg
+        )
+        # Determine current view indices
+        try:
+            depth_idx = int(depth_view_selector.split()[1]) - 1 if depth_view_selector else 0
+        except Exception:
+            depth_idx = 0
+        try:
+            normal_idx = int(normal_view_selector.split()[1]) - 1 if normal_view_selector else 0
+        except Exception:
+            normal_idx = 0
+        try:
+            measure_idx = int(measure_view_selector.split()[1]) - 1 if measure_view_selector else 0
+        except Exception:
+            measure_idx = 0
+        depth_vis = update_depth_view(new_processed_data, depth_idx)
+        normal_vis = update_normal_view(new_processed_data, normal_idx)
+        measure_img, _ = update_measure_view(new_processed_data, measure_idx)
+        return new_processed_data, depth_vis, normal_vis, measure_img, []
+    except Exception as e:
+        print(f"Error updating views on filter change: {e}")
+        return processed_data, None, None, None, []
 # ============================================================================
+# Example Scenes (KEPT AS-IS)
 # ============================================================================
 def get_scene_info(examples_dir):
+    """Get information about scenes in the examples directory."""
     import glob
     scenes = []
 def load_example_scene(scene_name, examples_dir="examples"):
+    """Load a scene from examples directory."""
     scenes = get_scene_info(examples_dir)
     selected_scene = None
 # ============================================================================
+# Gradio UI — 5 Tabs: Raw 3D · 3D View · Depth · Normal · Measure
 # ============================================================================
 theme = get_gradio_theme()
 APP_CSS = GRADIO_CSS + """
 /* Prevent components from expanding the layout */
 .gradio-container {
 .tab-content {
     min-height: 550px !important;
 }
+/* Navigation row styling */
+.navigation-row {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+}
 """
 with gr.Blocks() as demo:
+    # Hidden state variables
     is_example = gr.Textbox(label="is_example", visible=False, value="None")
     target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
+    processed_data_state = gr.State(value=None)
+    measure_points_state = gr.State(value=[])
     with gr.Row(equal_height=False):
+        # ==================== Left Side: Input Area ====================
         with gr.Column(scale=1, min_width=300):
             gr.Markdown("### 📤 Input")
             input_images = gr.File(
+                file_count="multiple",
+                label="Upload multiple images (3-10 recommended)",
                 interactive=True,
+                height=200,
             )
             image_gallery = gr.Gallery(
+                label="Image Preview",
+                columns=3,
+                height=350,
+                object_fit="contain",
+                preview=True,
             )
             with gr.Row():
+                submit_btn = gr.Button(
+                    "🚀 Start Reconstruction", variant="primary", scale=2
+                )
                 clear_btn = gr.ClearButton(
                     [input_images, target_dir_output, image_gallery],
+                    value="🗑️ Clear",
+                    scale=1,
                 )
+        # ==================== Right Side: Output Area ====================
         with gr.Column(scale=2, min_width=600):
             gr.Markdown("### 🎯 Output")
             with gr.Tabs():
+                # ---------- Tab 1: Raw 3D (KEPT AS-IS) ----------
                 with gr.Tab("🏗️ Raw 3D"):
                     reconstruction_output = gr.Model3D(
+                        height=550,
+                        zoom_speed=0.5,
+                        pan_speed=0.5,
+                        clear_color=[0.0, 0.0, 0.0, 0.0],
+                    )
+                # ---------- Tab 2: 3D View (NEW) ----------
+                with gr.Tab("🌐 3D View"):
+                    reconstruction_output_3d = gr.Model3D(
+                        height=550,
+                        zoom_speed=0.5,
+                        pan_speed=0.5,
+                        clear_color=[0.05, 0.05, 0.05, 1.0],
+                    )
+                # ---------- Tab 3: Depth (NEW) ----------
+                with gr.Tab("🔵 Depth"):
+                    with gr.Row(elem_classes=["navigation-row"]):
+                        prev_depth_btn = gr.Button("◀ Prev", size="sm", scale=1)
+                        depth_view_selector = gr.Dropdown(
+                            choices=["View 1"],
+                            value="View 1",
+                            label="Select View",
+                            scale=2,
+                            interactive=True,
+                            allow_custom_value=True,
+                        )
+                        next_depth_btn = gr.Button("Next ▶", size="sm", scale=1)
+                    depth_map = gr.Image(
+                        type="numpy",
+                        label="Colorized Depth Map",
+                        format="png",
+                        interactive=False,
+                    )
+                # ---------- Tab 4: Normal (NEW) ----------
+                with gr.Tab("🟢 Normal"):
+                    with gr.Row(elem_classes=["navigation-row"]):
+                        prev_normal_btn = gr.Button("◀ Prev", size="sm", scale=1)
+                        normal_view_selector = gr.Dropdown(
+                            choices=["View 1"],
+                            value="View 1",
+                            label="Select View",
+                            scale=2,
+                            interactive=True,
+                            allow_custom_value=True,
+                        )
+                        next_normal_btn = gr.Button("Next ▶", size="sm", scale=1)
+                    normal_map = gr.Image(
+                        type="numpy",
+                        label="Normal Map",
+                        format="png",
+                        interactive=False,
+                    )
+                # ---------- Tab 5: Measure (NEW) ----------
+                with gr.Tab("📏 Measure"):
+                    gr.Markdown(MEASURE_INSTRUCTIONS_HTML)
+                    with gr.Row(elem_classes=["navigation-row"]):
+                        prev_measure_btn = gr.Button("◀ Prev", size="sm", scale=1)
+                        measure_view_selector = gr.Dropdown(
+                            choices=["View 1"],
+                            value="View 1",
+                            label="Select View",
+                            scale=2,
+                            interactive=True,
+                            allow_custom_value=True,
+                        )
+                        next_measure_btn = gr.Button("Next ▶", size="sm", scale=1)
+                    measure_image = gr.Image(
+                        type="numpy",
+                        show_label=False,
+                        format="webp",
+                        interactive=False,
+                        sources=[],
                     )
+                    gr.Markdown(
+                        "**Note:** Light-grey areas indicate regions with no depth information where measurements cannot be taken."
+                    )
+                    measure_text = gr.Markdown("")
             log_output = gr.Textbox(
                 value="📌 Please upload images, then click 'Start Reconstruction'",
                 label="Status Information",
                 interactive=False,
                 lines=1,
+                max_lines=1,
             )
+    # ==================== Advanced Options (Collapsible) ====================
     with gr.Accordion("⚙️ Advanced Options", open=False):
         with gr.Row(equal_height=False):
             with gr.Column(scale=1, min_width=300):
                     choices=["All"], value="All", label="Display Frame"
                 )
                 conf_thres = gr.Slider(
+                    minimum=0,
+                    maximum=100,
+                    value=0,
+                    step=0.1,
+                    label="Confidence Threshold (Percentile)",
                 )
                 show_cam = gr.Checkbox(label="Show Camera", value=True)
                 show_mesh = gr.Checkbox(label="Show Mesh", value=True)
+                filter_black_bg = gr.Checkbox(
+                    label="Filter Black Background", value=False
+                )
+                filter_white_bg = gr.Checkbox(
+                    label="Filter White Background", value=False
+                )
             with gr.Column(scale=1, min_width=300):
                 gr.Markdown("#### Reconstruction Parameters")
                 apply_mask_checkbox = gr.Checkbox(
                     label="Apply Depth Mask", value=True
                 )
+    # ==================== Example Scenes (Collapsible) ====================
     with gr.Accordion("🖼️ Example Scenes", open=False):
         scenes = get_scene_info("examples")
         if scenes:
                             scene = scenes[scene_idx]
                             with gr.Column(scale=1, min_width=150):
                                 scene_img = gr.Image(
+                                    value=scene["thumbnail"],
                                     height=150,
+                                    interactive=False,
+                                    show_label=False,
                                     sources=[],
+                                    container=False,
                                 )
                                 gr.Markdown(
                                     f"**{scene['name']}** ({scene['num_images']} images)",
+                                    elem_classes=["text-center"],
                                 )
                                 scene_img.select(
+                                    fn=lambda name=scene["name"]: load_example_scene(
+                                        name
+                                    ),
                                     outputs=[
                                         reconstruction_output,
+                                        target_dir_output,
+                                        image_gallery,
+                                        log_output,
+                                    ],
                                 )
+    # ====================================================================
+    # Event Binding
+    # ====================================================================
+    # ---- Auto-update gallery on file upload ----
     input_images.change(
         fn=update_gallery_on_upload,
         inputs=[input_images],
+        outputs=[
+            reconstruction_output,
+            target_dir_output,
+            image_gallery,
+            log_output,
+        ],
+    ).then(
+        fn=lambda: None,
+        outputs=[reconstruction_output_3d],
     )
+    # ---- Reconstruction button ----
     submit_btn.click(
+        fn=lambda: (None, None),
+        outputs=[reconstruction_output, reconstruction_output_3d],
     ).then(
         fn=update_log,
+        outputs=[log_output],
     ).then(
         fn=gradio_demo,
         inputs=[
+            target_dir_output,
+            frame_filter,
+            show_cam,
+            filter_black_bg,
+            filter_white_bg,
+            conf_thres,
+            apply_mask_checkbox,
+            show_mesh,
         ],
         outputs=[
+            reconstruction_output,       # Raw 3D
+            reconstruction_output_3d,    # 3D View
+            log_output,
+            frame_filter,
+            processed_data_state,
+            depth_map,
+            normal_map,
+            measure_image,
+            measure_text,
+            depth_view_selector,
+            normal_view_selector,
+            measure_view_selector,
+        ],
     ).then(
         fn=lambda: "False",
+        outputs=[is_example],
     )
+    # ---- Clear button: also clear new tabs ----
+    clear_btn.add([reconstruction_output, reconstruction_output_3d, log_output])
+    # ---- 3D visualization param changes (frame_filter, show_cam, conf, mesh) ----
+    for component in [frame_filter, show_cam, conf_thres, show_mesh]:
         component.change(
+            fn=update_all_3d_views,
             inputs=[
+                target_dir_output,
+                frame_filter,
+                show_cam,
+                is_example,
+                conf_thres,
+                filter_black_bg,
+                filter_white_bg,
+                show_mesh,
+            ],
+            outputs=[
+                reconstruction_output,
+                reconstruction_output_3d,
+                log_output,
             ],
         )
+    # ---- Background filter changes: update 3D viewers AND per-view tabs ----
+    for filter_component in [filter_black_bg, filter_white_bg]:
+        filter_component.change(
+            fn=update_all_3d_views,
+            inputs=[
+                target_dir_output,
+                frame_filter,
+                show_cam,
+                is_example,
+                conf_thres,
+                filter_black_bg,
+                filter_white_bg,
+                show_mesh,
+            ],
+            outputs=[
+                reconstruction_output,
+                reconstruction_output_3d,
+                log_output,
+            ],
+        ).then(
+            fn=update_all_views_on_filter_change,
+            inputs=[
+                target_dir_output,
+                filter_black_bg,
+                filter_white_bg,
+                processed_data_state,
+                depth_view_selector,
+                normal_view_selector,
+                measure_view_selector,
+            ],
+            outputs=[
+                processed_data_state,
+                depth_map,
+                normal_map,
+                measure_image,
+                measure_points_state,
+            ],
+        )
+    # ---- Depth tab navigation ----
+    prev_depth_btn.click(
+        fn=lambda pd, cs: navigate_depth_view(pd, cs, -1),
+        inputs=[processed_data_state, depth_view_selector],
+        outputs=[depth_view_selector, depth_map],
+    )
+    next_depth_btn.click(
+        fn=lambda pd, cs: navigate_depth_view(pd, cs, 1),
+        inputs=[processed_data_state, depth_view_selector],
+        outputs=[depth_view_selector, depth_map],
+    )
+    depth_view_selector.change(
+        fn=lambda pd, sv: (
+            update_depth_view(pd, int(sv.split()[1]) - 1) if sv else None
+        ),
+        inputs=[processed_data_state, depth_view_selector],
+        outputs=[depth_map],
+    )
+    # ---- Normal tab navigation ----
+    prev_normal_btn.click(
+        fn=lambda pd, cs: navigate_normal_view(pd, cs, -1),
+        inputs=[processed_data_state, normal_view_selector],
+        outputs=[normal_view_selector, normal_map],
+    )
+    next_normal_btn.click(
+        fn=lambda pd, cs: navigate_normal_view(pd, cs, 1),
+        inputs=[processed_data_state, normal_view_selector],
+        outputs=[normal_view_selector, normal_map],
+    )
+    normal_view_selector.change(
+        fn=lambda pd, sv: (
+            update_normal_view(pd, int(sv.split()[1]) - 1) if sv else None
+        ),
+        inputs=[processed_data_state, normal_view_selector],
+        outputs=[normal_map],
+    )
+    # ---- Measure tab navigation ----
+    prev_measure_btn.click(
+        fn=lambda pd, cs: navigate_measure_view(pd, cs, -1),
+        inputs=[processed_data_state, measure_view_selector],
+        outputs=[measure_view_selector, measure_image, measure_points_state],
+    )
+    next_measure_btn.click(
+        fn=lambda pd, cs: navigate_measure_view(pd, cs, 1),
+        inputs=[processed_data_state, measure_view_selector],
+        outputs=[measure_view_selector, measure_image, measure_points_state],
+    )
+    measure_view_selector.change(
+        fn=lambda pd, sv: (
+            update_measure_view(pd, int(sv.split()[1]) - 1)
+            if sv
+            else (None, [])
+        ),
+        inputs=[processed_data_state, measure_view_selector],
+        outputs=[measure_image, measure_points_state],
+    )
+    # ---- Measure click handler ----
+    measure_image.select(
+        fn=measure,
+        inputs=[processed_data_state, measure_points_state, measure_view_selector],
+        outputs=[measure_image, measure_points_state, measure_text],
+    )
+demo.queue(max_size=20).launch(
+    theme=theme, css=APP_CSS, show_error=True, share=True, ssr_mode=False
+)