Spaces:

prithivMLmods
/

Map-Anything-v1

Running on Zero

App Files Files Community

prithivMLmods commited on 20 days ago

Commit

fee1c33

verified ·

1 Parent(s): eb7073f

Update app.py

Browse files

Files changed (1) hide show

app.py +1122 -416

app.py CHANGED Viewed

@@ -1,10 +1,3 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-# conda activate hf3.10
 import gc
 import os
 import shutil
@@ -27,19 +20,16 @@ register_heif_opener()
 sys.path.append("mapanything/")
 from mapanything.utils.geometry import depthmap_to_world_frame, points_to_normals
 from mapanything.utils.hf_utils.hf_helpers import initialize_mapanything_model
 from mapanything.utils.hf_utils.viz import predictions_to_glb
 from mapanything.utils.image import load_images, rgb
-import rerun as rr
-# Attempt to import blueprint for advanced view configuration
-try:
-    import rerun.blueprint as rrb
-except ImportError:
-    rrb = None
-from gradio_rerun import Rerun
 # MapAnything Configuration
 high_level_config = {
@@ -61,177 +51,10 @@ high_level_config = {
     "resolution": 518,
 }
-# Initialize model
 model = None
-# -------------------------------------------------------------------------
-# Custom Modern CSS
-# -------------------------------------------------------------------------
-modern_css = r"""
-@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600&display=swap');
-*{box-sizing:border-box;margin:0;padding:0}
-body, .gradio-container {
-    background:#0f0f13!important;
-    font-family:'Inter',system-ui,-apple-system,sans-serif!important;
-    font-size:14px!important;
-    color:#e4e4e7!important;
-    min-height:100vh;
-}
-.dark body, .dark .gradio-container {
-    background:#0f0f13!important;
-    color:#e4e4e7!important;
-}
-footer{display:none!important}
-.app-shell {
-    background:#18181b;
-    border:1px solid #27272a;
-    border-radius:16px;
-    margin:12px auto;
-    max-width:1400px;
-    overflow:hidden;
-    box-shadow:0 25px 50px -12px rgba(0,0,0,.6), 0 0 0 1px rgba(255,255,255,.03);
-    display: flex;
-    flex-direction: column;
-}
-.app-header {
-    background:linear-gradient(135deg,#18181b 0%,#1e1e24 100%);
-    border-bottom:1px solid #27272a;
-    padding:14px 24px;
-    display:flex;
-    align-items:center;
-    justify-content:space-between;
-}
-.app-header-left {
-    display:flex;
-    align-items:center;
-    gap:12px;
-}
-.app-logo {
-    width:36px;height:36px;
-    background:linear-gradient(135deg,#6366f1,#8b5cf6,#a78bfa);
-    border-radius:10px;
-    display:flex;align-items:center;justify-content:center;
-    font-size:18px;font-weight:800;color:#fff;
-    box-shadow:0 4px 12px rgba(99,102,241,.35);
-}
-.app-title {
-    font-size:18px;font-weight:700;
-    background:linear-gradient(135deg,#e4e4e7,#a1a1aa);
-    -webkit-background-clip:text;
-    -webkit-text-fill-color:transparent;
-    letter-spacing:-.3px;
-}
-.app-badge {
-    font-size:11px;
-    font-weight:600;
-    padding:3px 10px;
-    border-radius:20px;
-    background:rgba(99,102,241,.15);
-    color:#818cf8;
-    border:1px solid rgba(99,102,241,.25);
-    letter-spacing:.3px;
-}
-.app-main-row {
-    display:flex;
-    gap:0;
-    flex:1;
-    overflow:hidden;
-    flex-wrap: wrap;
-}
-.app-main-left {
-    flex: 1;
-    min-width: 350px;
-    border-right:1px solid #27272a;
-    padding: 20px;
-    display: flex;
-    flex-direction: column;
-    gap: 16px;
-}
-.app-main-right {
-    flex: 2;
-    min-width: 500px;
-    display:flex;
-    flex-direction:column;
-    background:#18181b;
-}
-.modern-btn {
-    display:flex;align-items:center;justify-content:center;gap:8px;
-    width:100%;
-    background:linear-gradient(135deg,#6366f1,#7c3aed);
-    border:none;
-    border-radius:10px;
-    padding:12px 24px;
-    cursor:pointer;
-    font-size:15px;
-    font-weight:600;
-    color:#fff!important;
-    transition:all .2s ease;
-    box-shadow:0 4px 16px rgba(99,102,241,.3), inset 0 1px 0 rgba(255,255,255,.1);
-}
-.modern-btn:hover {
-    background:linear-gradient(135deg,#7c7cf5,#8b5cf6);
-    transform:translateY(-1px);
-}
-.modern-btn.secondary {
-    background:#27272a;
-    box-shadow:none;
-    border: 1px solid #3f3f46;
-}
-.modern-btn.secondary:hover {
-    background:#3f3f46;
-}
-.settings-group {
-    border:1px solid #27272a;
-    border-radius:10px;
-    overflow:hidden;
-    background:#18181b;
-    margin-bottom: 12px;
-}
-.settings-group-title {
-    font-size:12px;
-    font-weight:600;
-    color:#71717a;
-    text-transform:uppercase;
-    letter-spacing:.8px;
-    padding:10px 16px;
-    border-bottom:1px solid #27272a;
-    background:rgba(24,24,27,.5);
-}
-.settings-group-body {
-    padding:14px 16px;
-    display:flex;
-    flex-direction:column;
-    gap:12px;
-}
-/* Gradio Overrides */
-.tabs { background: transparent !important; border: none !important; }
-.tab-nav { border-bottom: 1px solid #27272a !important; margin-bottom: 10px; }
-.tab-nav button { color: #a1a1aa !important; border-radius: 8px 8px 0 0 !important; font-weight: 600 !important; }
-.tab-nav button.selected { color: #818cf8 !important; border-bottom: 2px solid #818cf8 !important; background: rgba(99,102,241,.08) !important; }
-.gradio-dropdown { background: #09090b !important; border: 1px solid #27272a !important; color: #fff !important; }
-.gallery-item { border-radius: 8px !important; overflow: hidden !important; border: 1px solid #27272a !important; }
-/* Custom log styling */
-.custom-log { padding: 12px; border-radius: 8px; background: rgba(99, 102, 241, 0.1); border: 1px solid rgba(99, 102, 241, 0.2); color: #c7d2fe; font-family: 'JetBrains Mono', monospace; font-size: 12px; }
-"""
-DOWNLOAD_SVG = '<svg viewBox="0 0 24 24" width="16" height="16" fill="currentColor" xmlns="http://www.w3.org/2000/svg"><path d="M12 16l-5-5h3V4h4v7h3l-5 5z"/><path d="M20 18H4v2h16v-2z"/></svg>'
 # -------------------------------------------------------------------------
 # 1) Core model inference
 # -------------------------------------------------------------------------
@@ -243,92 +66,126 @@ def run_model(
     filter_black_bg=False,
     filter_white_bg=False,
 ):
     global model
-    import torch
     device = "cuda" if torch.cuda.is_available() else "cpu"
     device = torch.device(device)
     if model is None:
         model = initialize_mapanything_model(high_level_config, device)
     else:
         model = model.to(device)
     model.eval()
     image_folder_path = os.path.join(target_dir, "images")
     views = load_images(image_folder_path)
     if len(views) == 0:
         raise ValueError("No images found. Check your upload.")
     outputs = model.infer(
         views, apply_mask=apply_mask, mask_edges=True, memory_efficient_inference=False
     )
     predictions = {}
-    extrinsic_list, intrinsic_list, world_points_list = [], [], []
-    depth_maps_list, images_list, final_mask_list = [], [], []
     for pred in outputs:
-        depthmap_torch = pred["depth_z"][0].squeeze(-1)
-        intrinsics_torch = pred["intrinsics"][0]
-        camera_pose_torch = pred["camera_poses"][0]
         pts3d_computed, valid_mask = depthmap_to_world_frame(
             depthmap_torch, intrinsics_torch, camera_pose_torch
         )
         if "mask" in pred:
             mask = pred["mask"][0].squeeze(-1).cpu().numpy().astype(bool)
         else:
             mask = np.ones_like(depthmap_torch.cpu().numpy(), dtype=bool)
         mask = mask & valid_mask.cpu().numpy()
         image = pred["img_no_norm"][0].cpu().numpy()
         extrinsic_list.append(camera_pose_torch.cpu().numpy())
         intrinsic_list.append(intrinsics_torch.cpu().numpy())
         world_points_list.append(pts3d_computed.cpu().numpy())
         depth_maps_list.append(depthmap_torch.cpu().numpy())
-        images_list.append(image)
-        final_mask_list.append(mask)
     predictions["extrinsic"] = np.stack(extrinsic_list, axis=0)
     predictions["intrinsic"] = np.stack(intrinsic_list, axis=0)
     predictions["world_points"] = np.stack(world_points_list, axis=0)
     depth_maps = np.stack(depth_maps_list, axis=0)
     if len(depth_maps.shape) == 3:
         depth_maps = depth_maps[..., np.newaxis]
     predictions["depth"] = depth_maps
     predictions["images"] = np.stack(images_list, axis=0)
     predictions["final_mask"] = np.stack(final_mask_list, axis=0)
     processed_data = process_predictions_for_visualization(
         predictions, views, high_level_config, filter_black_bg, filter_white_bg
     )
     torch.cuda.empty_cache()
     return predictions, processed_data
-def generate_rrd(glb_file, target_dir):
-    """Generates an RRD file from the exported GLB to be visualized via Rerun."""
-    rrd_path = os.path.join(target_dir, "reconstruction.rrd")
-    rr.init("MapAnything", spawn=False)
-    rr.log("scene", rr.Asset3D(path=glb_file))
-    if rrb is not None:
-        blueprint = rrb.Blueprint(
-            rrb.Spatial3DView(origin="scene"),
-            rrb.TimePanel(state="collapsed"),
-        )
-        rr.save(rrd_path, default_blueprint=blueprint)
-    else:
-        rr.save(rrd_path)
-    return rrd_path
 def update_view_selectors(processed_data):
     if processed_data is None or len(processed_data) == 0:
         choices = ["View 1"]
     else:
@@ -336,116 +193,173 @@ def update_view_selectors(processed_data):
         choices = [f"View {i + 1}" for i in range(num_views)]
     return (
-        gr.Dropdown(choices=choices, value=choices[0]),
-        gr.Dropdown(choices=choices, value=choices[0]),
-        gr.Dropdown(choices=choices, value=choices[0]),
     )
 def get_view_data_by_index(processed_data, view_index):
     if processed_data is None or len(processed_data) == 0:
         return None
     view_keys = list(processed_data.keys())
     if view_index < 0 or view_index >= len(view_keys):
         view_index = 0
     return processed_data[view_keys[view_index]]
 def update_depth_view(processed_data, view_index):
     view_data = get_view_data_by_index(processed_data, view_index)
     if view_data is None or view_data["depth"] is None:
         return None
     return colorize_depth(view_data["depth"], mask=view_data.get("mask"))
 def update_normal_view(processed_data, view_index):
     view_data = get_view_data_by_index(processed_data, view_index)
     if view_data is None or view_data["normal"] is None:
         return None
     return colorize_normal(view_data["normal"], mask=view_data.get("mask"))
 def update_measure_view(processed_data, view_index):
     view_data = get_view_data_by_index(processed_data, view_index)
     if view_data is None:
-        return None, []
     image = view_data["image"].copy()
     if image.dtype != np.uint8:
         if image.max() <= 1.0:
             image = (image * 255).astype(np.uint8)
         else:
             image = image.astype(np.uint8)
     if view_data["mask"] is not None:
         mask = view_data["mask"]
-        invalid_mask = ~mask
         if invalid_mask.any():
             overlay_color = np.array([255, 220, 220], dtype=np.uint8)
-            alpha = 0.5
-            for c in range(3):
                 image[:, :, c] = np.where(
                     invalid_mask,
                     (1 - alpha) * image[:, :, c] + alpha * overlay_color[c],
                     image[:, :, c],
                 ).astype(np.uint8)
     return image, []
 def navigate_depth_view(processed_data, current_selector_value, direction):
     if processed_data is None or len(processed_data) == 0:
         return "View 1", None
     try:
         current_view = int(current_selector_value.split()[1]) - 1
     except:
         current_view = 0
     num_views = len(processed_data)
     new_view = (current_view + direction) % num_views
     new_selector_value = f"View {new_view + 1}"
     depth_vis = update_depth_view(processed_data, new_view)
     return new_selector_value, depth_vis
 def navigate_normal_view(processed_data, current_selector_value, direction):
     if processed_data is None or len(processed_data) == 0:
         return "View 1", None
     try:
         current_view = int(current_selector_value.split()[1]) - 1
     except:
         current_view = 0
     num_views = len(processed_data)
     new_view = (current_view + direction) % num_views
     new_selector_value = f"View {new_view + 1}"
     normal_vis = update_normal_view(processed_data, new_view)
     return new_selector_value, normal_vis
 def navigate_measure_view(processed_data, current_selector_value, direction):
     if processed_data is None or len(processed_data) == 0:
         return "View 1", None, []
     try:
         current_view = int(current_selector_value.split()[1]) - 1
     except:
         current_view = 0
     num_views = len(processed_data)
     new_view = (current_view + direction) % num_views
     new_selector_value = f"View {new_view + 1}"
     measure_image, measure_points = update_measure_view(processed_data, new_view)
     return new_selector_value, measure_image, measure_points
 def populate_visualization_tabs(processed_data):
     if processed_data is None or len(processed_data) == 0:
         return None, None, None, []
     depth_vis = update_depth_view(processed_data, 0)
     normal_vis = update_normal_view(processed_data, 0)
     measure_img, _ = update_measure_view(processed_data, 0)
     return depth_vis, normal_vis, measure_img, []
 # -------------------------------------------------------------------------
-# 2) Handle uploaded video/images
 # -------------------------------------------------------------------------
 def handle_uploads(unified_upload, s_time_interval=1.0):
     start_time = time.time()
     gc.collect()
     torch.cuda.empty_cache()
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
     target_dir = f"input_images_{timestamp}"
     target_dir_images = os.path.join(target_dir, "images")
     if os.path.exists(target_dir):
         shutil.rmtree(target_dir)
     os.makedirs(target_dir)
@@ -453,6 +367,7 @@ def handle_uploads(unified_upload, s_time_interval=1.0):
     image_paths = []
     if unified_upload is not None:
         for file_data in unified_upload:
             if isinstance(file_data, dict) and "name" in file_data:
@@ -461,12 +376,24 @@ def handle_uploads(unified_upload, s_time_interval=1.0):
                 file_path = str(file_data)
             file_ext = os.path.splitext(file_path)[1].lower()
-            video_extensions = [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm", ".m4v", ".3gp"]
             if file_ext in video_extensions:
                 vs = cv2.VideoCapture(file_path)
                 fps = vs.get(cv2.CAP_PROP_FPS)
-                frame_interval = int(fps * s_time_interval)
                 count = 0
                 video_frame_num = 0
@@ -476,42 +403,90 @@ def handle_uploads(unified_upload, s_time_interval=1.0):
                         break
                     count += 1
                     if count % frame_interval == 0:
                         base_name = os.path.splitext(os.path.basename(file_path))[0]
-                        image_path = os.path.join(target_dir_images, f"{base_name}_{video_frame_num:06}.png")
                         cv2.imwrite(image_path, frame)
                         image_paths.append(image_path)
                         video_frame_num += 1
                 vs.release()
             else:
                 if file_ext in [".heic", ".heif"]:
                     try:
                         with Image.open(file_path) as img:
                             if img.mode not in ("RGB", "L"):
                                 img = img.convert("RGB")
                             base_name = os.path.splitext(os.path.basename(file_path))[0]
-                            dst_path = os.path.join(target_dir_images, f"{base_name}.jpg")
                             img.save(dst_path, "JPEG", quality=95)
                             image_paths.append(dst_path)
                     except Exception as e:
-                        dst_path = os.path.join(target_dir_images, os.path.basename(file_path))
                         shutil.copy(file_path, dst_path)
                         image_paths.append(dst_path)
                 else:
-                    dst_path = os.path.join(target_dir_images, os.path.basename(file_path))
                     shutil.copy(file_path, dst_path)
                     image_paths.append(dst_path)
     image_paths = sorted(image_paths)
     return target_dir, image_paths
 def update_gallery_on_upload(input_video, input_images, s_time_interval=1.0):
     if not input_video and not input_images:
         return None, None, None, None
     target_dir, image_paths = handle_uploads(input_video, input_images, s_time_interval)
-    return None, target_dir, image_paths, "Upload complete. Click 'Reconstruct' to begin."
 # -------------------------------------------------------------------------
-# 4) Reconstruction
 # -------------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def gradio_demo(
@@ -523,133 +498,233 @@ def gradio_demo(
     apply_mask=True,
     show_mesh=True,
 ):
     if not os.path.isdir(target_dir) or target_dir == "None":
         return None, "No valid target directory found. Please upload first.", None, None
     gc.collect()
     torch.cuda.empty_cache()
     target_dir_images = os.path.join(target_dir, "images")
-    all_files = sorted(os.listdir(target_dir_images)) if os.path.isdir(target_dir_images) else []
     all_files = [f"{i}: {filename}" for i, filename in enumerate(all_files)]
     frame_filter_choices = ["All"] + all_files
     with torch.no_grad():
         predictions, processed_data = run_model(target_dir, apply_mask)
     prediction_save_path = os.path.join(target_dir, "predictions.npz")
     np.savez(prediction_save_path, **predictions)
     if frame_filter is None:
         frame_filter = "All"
     glbfile = os.path.join(
         target_dir,
         f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb",
     )
     glbscene = predictions_to_glb(
         predictions,
         filter_by_frames=frame_filter,
         show_cam=show_cam,
         mask_black_bg=filter_black_bg,
         mask_white_bg=filter_white_bg,
-        as_mesh=show_mesh,
     )
     glbscene.export(file_obj=glbfile)
-    # Generate RRD
-    rrd_path = generate_rrd(glbfile, target_dir)
     del predictions
     gc.collect()
     torch.cuda.empty_cache()
-    log_msg = f"Reconstruction Success ({len(all_files)} frames). Visualization active."
-    depth_vis, normal_vis, measure_img, measure_pts = populate_visualization_tabs(processed_data)
-    depth_selector, normal_selector, measure_selector = update_view_selectors(processed_data)
     return (
-        rrd_path,
         log_msg,
         gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True),
         processed_data,
         depth_vis,
         normal_vis,
         measure_img,
-        "",
         depth_selector,
         normal_selector,
         measure_selector,
     )
 def colorize_depth(depth_map, mask=None):
-    if depth_map is None: return None
     depth_normalized = depth_map.copy()
     valid_mask = depth_normalized > 0
-    if mask is not None: valid_mask = valid_mask & mask
     if valid_mask.sum() > 0:
         valid_depths = depth_normalized[valid_mask]
         p5 = np.percentile(valid_depths, 5)
         p95 = np.percentile(valid_depths, 95)
         depth_normalized[valid_mask] = (depth_normalized[valid_mask] - p5) / (p95 - p5)
     import matplotlib.pyplot as plt
     colormap = plt.cm.turbo_r
     colored = colormap(depth_normalized)
     colored = (colored[:, :, :3] * 255).astype(np.uint8)
     colored[~valid_mask] = [255, 255, 255]
     return colored
 def colorize_normal(normal_map, mask=None):
-    if normal_map is None: return None
     normal_vis = normal_map.copy()
     if mask is not None:
         invalid_mask = ~mask
-        normal_vis[invalid_mask] = [0, 0, 0]
     normal_vis = (normal_vis + 1.0) / 2.0
     normal_vis = (normal_vis * 255).astype(np.uint8)
     return normal_vis
 def process_predictions_for_visualization(
     predictions, views, high_level_config, filter_black_bg=False, filter_white_bg=False
 ):
     processed_data = {}
     for view_idx, view in enumerate(views):
         image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
         pred_pts3d = predictions["world_points"][view_idx]
-        view_data = {"image": image[0], "points3d": pred_pts3d, "depth": None, "normal": None, "mask": None}
         mask = predictions["final_mask"][view_idx].copy()
         if filter_black_bg:
             view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
             black_bg_mask = view_colors.sum(axis=2) >= 16
             mask = mask & black_bg_mask
         if filter_white_bg:
             view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
-            white_bg_mask = ~((view_colors[:, :, 0] > 240) & (view_colors[:, :, 1] > 240) & (view_colors[:, :, 2] > 240))
             mask = mask & white_bg_mask
         view_data["mask"] = mask
         view_data["depth"] = predictions["depth"][view_idx].squeeze()
         normals, _ = points_to_normals(pred_pts3d, mask=view_data["mask"])
         view_data["normal"] = normals
         processed_data[view_idx] = view_data
     return processed_data
-def measure(processed_data, measure_points, current_view_selector, event: gr.SelectData):
     try:
         if processed_data is None or len(processed_data) == 0:
             return None, [], "No data available"
         try:
             current_view_index = int(current_view_selector.split()[1]) - 1
         except:
             current_view_index = 0
         if current_view_index < 0 or current_view_index >= len(processed_data):
             current_view_index = 0
@@ -660,60 +735,146 @@ def measure(processed_data, measure_points, current_view_selector, event: gr.Sel
             return None, [], "No view data available"
         point2d = event.index[0], event.index[1]
-        if (current_view["mask"] is not None and 0 <= point2d[1] < current_view["mask"].shape[0] and 0 <= point2d[0] < current_view["mask"].shape[1]):
             if not current_view["mask"][point2d[1], point2d[0]]:
-                masked_image, _ = update_measure_view(processed_data, current_view_index)
-                return masked_image, measure_points, 'Cannot measure on masked areas (shown in grey)'
         measure_points.append(point2d)
         image, _ = update_measure_view(processed_data, current_view_index)
         image = image.copy()
         points3d = current_view["points3d"]
-        if image.dtype != np.uint8:
-            image = (image * 255).astype(np.uint8) if image.max() <= 1.0 else image.astype(np.uint8)
-        for p in measure_points:
-            if 0 <= p[0] < image.shape[1] and 0 <= p[1] < image.shape[0]:
-                image = cv2.circle(image, p, radius=5, color=(255, 0, 0), thickness=2)
         depth_text = ""
-        for i, p in enumerate(measure_points):
-            if current_view["depth"] is not None and 0 <= p[1] < current_view["depth"].shape[0] and 0 <= p[0] < current_view["depth"].shape[1]:
-                d = current_view["depth"][p[1], p[0]]
-                depth_text += f"P{i + 1} depth: {d:.2f}m.\n"
-            else:
-                if points3d is not None and 0 <= p[1] < points3d.shape[0] and 0 <= p[0] < points3d.shape[1]:
-                    z = points3d[p[1], p[0], 2]
-                    depth_text += f"P{i + 1} Z-coord: {z:.2f}m.\n"
         if len(measure_points) == 2:
-            point1, point2 = measure_points
-            if (0 <= point1[0] < image.shape[1] and 0 <= point1[1] < image.shape[0] and 0 <= point2[0] < image.shape[1] and 0 <= point2[1] < image.shape[0]):
-                image = cv2.line(image, point1, point2, color=(255, 0, 0), thickness=2)
-            distance_text = "Distance: Unable to compute"
-            if (points3d is not None and 0 <= point1[1] < points3d.shape[0] and 0 <= point1[0] < points3d.shape[1] and 0 <= point2[1] < points3d.shape[0] and 0 <= point2[0] < points3d.shape[1]):
-                p1_3d = points3d[point1[1], point1[0]]
-                p2_3d = points3d[point2[1], point2[0]]
-                distance = np.linalg.norm(p1_3d - p2_3d)
-                distance_text = f"Distance: {distance:.2f}m"
-            measure_points = []
-            return [image, measure_points, depth_text + "\n" + distance_text]
         else:
             return [image, measure_points, depth_text]
     except Exception as e:
         return None, [], f"Measure function error: {e}"
 def clear_fields():
     return None
 def update_log():
     return "Loading and Reconstructing..."
 def update_visualization(
     target_dir,
     frame_filter,
@@ -723,15 +884,30 @@ def update_visualization(
     filter_white_bg=False,
     show_mesh=True,
 ):
     if is_example == "True":
-        return gr.update(), "Please reconstruct first."
     if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
-        return gr.update(), "Please reconstruct first."
     predictions_path = os.path.join(target_dir, "predictions.npz")
     if not os.path.exists(predictions_path):
-        return gr.update(), "No predictions found."
     loaded = np.load(predictions_path, allow_pickle=True)
     predictions = {key: loaded[key] for key in loaded.keys()}
@@ -743,16 +919,35 @@ def update_visualization(
     if not os.path.exists(glbfile):
         glbscene = predictions_to_glb(
-            predictions, filter_by_frames=frame_filter, show_cam=show_cam, mask_black_bg=filter_black_bg, mask_white_bg=filter_white_bg, as_mesh=show_mesh,
         )
         glbscene.export(file_obj=glbfile)
-    rrd_path = generate_rrd(glbfile, target_dir)
-    return rrd_path, "Visualization updated."
 def update_all_views_on_filter_change(
-    target_dir, filter_black_bg, filter_white_bg, processed_data, depth_view_selector, normal_view_selector, measure_view_selector,
 ):
     if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
         return processed_data, None, None, None, []
@@ -761,211 +956,722 @@ def update_all_views_on_filter_change(
         return processed_data, None, None, None, []
     try:
         loaded = np.load(predictions_path, allow_pickle=True)
         predictions = {key: loaded[key] for key in loaded.keys()}
-        views = load_images(os.path.join(target_dir, "images"))
         new_processed_data = process_predictions_for_visualization(
             predictions, views, high_level_config, filter_black_bg, filter_white_bg
         )
-        depth_view_idx = int(depth_view_selector.split()[1]) - 1 if depth_view_selector else 0
-        normal_view_idx = int(normal_view_selector.split()[1]) - 1 if normal_view_selector else 0
-        measure_view_idx = int(measure_view_selector.split()[1]) - 1 if measure_view_selector else 0
         depth_vis = update_depth_view(new_processed_data, depth_view_idx)
         normal_vis = update_normal_view(new_processed_data, normal_view_idx)
         measure_img, _ = update_measure_view(new_processed_data, measure_view_idx)
         return new_processed_data, depth_vis, normal_vis, measure_img, []
     except Exception as e:
         return processed_data, None, None, None, []
 # -------------------------------------------------------------------------
-# Build Gradio UI
 # -------------------------------------------------------------------------
-with gr.Blocks() as demo:
-    is_example = gr.Textbox(visible=False, value="None")
-    num_images = gr.Textbox(visible=False, value="None")
     processed_data_state = gr.State(value=None)
     measure_points_state = gr.State(value=[])
-    current_view_index = gr.State(value=0)
-    target_dir_output = gr.Textbox(visible=False, value="None")
-    with gr.Column(elem_classes="app-shell"):
-        # Header
-        gr.HTML(f"""
-        <div class="app-header">
-            <div class="app-header-left">
-                <div class="app-logo">M3D</div>
-                <span class="app-title">MapAnything 3D Reconstruction</span>
-                <span class="app-badge">Rerun View</span>
-            </div>
-        </div>
-        """)
-        with gr.Row(elem_classes="app-main-row"):
-            # Left Panel
-            with gr.Column(elem_classes="app-main-left"):
-                gr.Markdown("### Input Media")
-                unified_upload = gr.File(
-                    file_count="multiple",
-                    label="Upload Video or Images",
                     interactive=True,
-                    file_types=["image", "video"],
                 )
-                with gr.Row():
-                    s_time_interval = gr.Slider(
-                        minimum=0.1, maximum=5.0, value=1.0, step=0.1,
-                        label="Video Sample Interval (seconds)",
-                        interactive=True,
-                        scale=3,
-                    )
-                    resample_btn = gr.Button("Resample", visible=False, elem_classes="modern-btn secondary", scale=1)
-                image_gallery = gr.Gallery(
-                    label="Preview",
-                    columns=4,
-                    height="200px",
-                    show_download_button=True,
-                    object_fit="contain",
-                    preview=True,
                 )
-                with gr.Row():
-                    clear_uploads_btn = gr.ClearButton([unified_upload, image_gallery], value="Clear Media", elem_classes="modern-btn secondary")
-                    submit_btn = gr.Button("Reconstruct", elem_classes="modern-btn")
-                log_output = gr.Markdown("Ready to reconstruct.", elem_classes="custom-log")
-                with gr.Column(elem_classes="settings-group"):
-                    gr.HTML("<div class='settings-group-title'>Reconstruction Options</div>")
-                    with gr.Column(elem_classes="settings-group-body"):
-                        frame_filter = gr.Dropdown(choices=["All"], value="All", label="Show Points from Frame")
-                        show_cam = gr.Checkbox(label="Show Camera", value=True)
-                        show_mesh = gr.Checkbox(label="Show Mesh", value=True)
-                        filter_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
-                        filter_white_bg = gr.Checkbox(label="Filter White Background", value=False)
-                        apply_mask_checkbox = gr.Checkbox(label="Apply mask for predicted depth & edges", value=True)
-            # Right Panel
-            with gr.Column(elem_classes="app-main-right"):
                 with gr.Tabs():
                     with gr.Tab("3D View"):
-                        rerun_output = Rerun(label="Rerun 3D Viewer")
                     with gr.Tab("Depth"):
-                        with gr.Row():
-                            prev_depth_btn = gr.Button("Previous", size="sm", elem_classes="modern-btn secondary")
-                            depth_view_selector = gr.Dropdown(choices=["View 1"], value="View 1", label="Select View", interactive=True)
-                            next_depth_btn = gr.Button("Next", size="sm", elem_classes="modern-btn secondary")
-                        depth_map = gr.Image(type="numpy", label="Colorized Depth Map", interactive=False)
                     with gr.Tab("Normal"):
-                        with gr.Row():
-                            prev_normal_btn = gr.Button("Previous", size="sm", elem_classes="modern-btn secondary")
-                            normal_view_selector = gr.Dropdown(choices=["View 1"], value="View 1", label="Select View", interactive=True)
-                            next_normal_btn = gr.Button("Next", size="sm", elem_classes="modern-btn secondary")
-                        normal_map = gr.Image(type="numpy", label="Normal Map", interactive=False)
                     with gr.Tab("Measure"):
-                        with gr.Row():
-                            prev_measure_btn = gr.Button("Previous", size="sm", elem_classes="modern-btn secondary")
-                            measure_view_selector = gr.Dropdown(choices=["View 1"], value="View 1", label="Select View", interactive=True)
-                            next_measure_btn = gr.Button("Next", size="sm", elem_classes="modern-btn secondary")
-                        measure_image = gr.Image(type="numpy", show_label=False, interactive=False)
-                        measure_text = gr.Markdown("Select points to measure depth and distance.")
     # -------------------------------------------------------------------------
-    # Event Listeners
     # -------------------------------------------------------------------------
-    submit_btn.click(
-        fn=clear_fields, inputs=[], outputs=[rerun_output]
-    ).then(
         fn=update_log, inputs=[], outputs=[log_output]
     ).then(
         fn=gradio_demo,
         inputs=[
-            target_dir_output, frame_filter, show_cam, filter_black_bg, filter_white_bg, apply_mask_checkbox, show_mesh,
         ],
         outputs=[
-            rerun_output, log_output, frame_filter, processed_data_state,
-            depth_map, normal_map, measure_image, measure_text,
-            depth_view_selector, normal_view_selector, measure_view_selector,
         ],
-    ).then(fn=lambda: "False", inputs=[], outputs=[is_example])
     frame_filter.change(
-        update_visualization, [target_dir_output, frame_filter, show_cam, is_example, filter_black_bg, filter_white_bg, show_mesh], [rerun_output, log_output]
     )
     show_cam.change(
-        update_visualization, [target_dir_output, frame_filter, show_cam, is_example, filter_black_bg, filter_white_bg, show_mesh], [rerun_output, log_output]
-    )
-    show_mesh.change(
-        update_visualization, [target_dir_output, frame_filter, show_cam, is_example, filter_black_bg, filter_white_bg, show_mesh], [rerun_output, log_output]
     )
     filter_black_bg.change(
-        update_visualization, [target_dir_output, frame_filter, show_cam, is_example, filter_black_bg, filter_white_bg, show_mesh], [rerun_output, log_output]
     ).then(
-        update_all_views_on_filter_change,
-        [target_dir_output, filter_black_bg, filter_white_bg, processed_data_state, depth_view_selector, normal_view_selector, measure_view_selector],
-        [processed_data_state, depth_map, normal_map, measure_image, measure_points_state]
     )
     filter_white_bg.change(
-        update_visualization, [target_dir_output, frame_filter, show_cam, is_example, filter_black_bg, filter_white_bg, show_mesh], [rerun_output, log_output]
     ).then(
-        update_all_views_on_filter_change,
-        [target_dir_output, filter_black_bg, filter_white_bg, processed_data_state, depth_view_selector, normal_view_selector, measure_view_selector],
-        [processed_data_state, depth_map, normal_map, measure_image, measure_points_state]
     )
     def update_gallery_on_unified_upload(files, interval):
-        if not files: return None, None, None
         target_dir, image_paths = handle_uploads(files, interval)
-        return target_dir, image_paths, "Upload complete. Click 'Reconstruct'."
     def show_resample_button(files):
-        if not files: return gr.update(visible=False)
-        video_extensions = [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm", ".m4v", ".3gp"]
         has_video = False
         for file_data in files:
-            file_path = file_data["name"] if isinstance(file_data, dict) else str(file_data)
-            if os.path.splitext(file_path)[1].lower() in video_extensions:
                 has_video = True
                 break
         return gr.update(visible=has_video)
     def resample_video_with_new_interval(files, new_interval, current_target_dir):
-        if not files: return current_target_dir, None, "No files.", gr.update(visible=False)
-        if current_target_dir and os.path.exists(current_target_dir): shutil.rmtree(current_target_dir)
         target_dir, image_paths = handle_uploads(files, new_interval)
-        return target_dir, image_paths, "Video resampled.", gr.update(visible=False)
     unified_upload.change(
-        update_gallery_on_unified_upload, [unified_upload, s_time_interval], [target_dir_output, image_gallery, log_output]
-    ).then(show_resample_button, [unified_upload], [resample_btn])
-    s_time_interval.change(show_resample_button, [unified_upload], [resample_btn])
-    resample_btn.click(resample_video_with_new_interval, [unified_upload, s_time_interval, target_dir_output], [target_dir_output, image_gallery, log_output, resample_btn])
-    measure_image.select(measure, [processed_data_state, measure_points_state, measure_view_selector], [measure_image, measure_points_state, measure_text])
-    prev_depth_btn.click(lambda pd, sel: navigate_depth_view(pd, sel, -1), [processed_data_state, depth_view_selector], [depth_view_selector, depth_map])
-    next_depth_btn.click(lambda pd, sel: navigate_depth_view(pd, sel, 1), [processed_data_state, depth_view_selector], [depth_view_selector, depth_map])
-    depth_view_selector.change(lambda pd, sel: update_depth_view(pd, int(sel.split()[1])-1) if sel else None, [processed_data_state, depth_view_selector], [depth_map])
-    prev_normal_btn.click(lambda pd, sel: navigate_normal_view(pd, sel, -1), [processed_data_state, normal_view_selector], [normal_view_selector, normal_map])
-    next_normal_btn.click(lambda pd, sel: navigate_normal_view(pd, sel, 1), [processed_data_state, normal_view_selector], [normal_view_selector, normal_map])
-    normal_view_selector.change(lambda pd, sel: update_normal_view(pd, int(sel.split()[1])-1) if sel else None, [processed_data_state, normal_view_selector], [normal_map])
-    prev_measure_btn.click(lambda pd, sel: navigate_measure_view(pd, sel, -1), [processed_data_state, measure_view_selector], [measure_view_selector, measure_image, measure_points_state])
-    next_measure_btn.click(lambda pd, sel: navigate_measure_view(pd, sel, 1), [processed_data_state, measure_view_selector], [measure_view_selector, measure_image, measure_points_state])
-    measure_view_selector.change(lambda pd, sel: update_measure_view(pd, int(sel.split()[1])-1) if sel else (None, []), [processed_data_state, measure_view_selector], [measure_image, measure_points_state])
-if __name__ == "__main__":
-    demo.queue(max_size=20).launch(css=modern_css, show_error=True, share=True, ssr_mode=False)

 import gc
 import os
 import shutil
 sys.path.append("mapanything/")
 from mapanything.utils.geometry import depthmap_to_world_frame, points_to_normals
+from mapanything.utils.hf_utils.css_and_html import (
+    GRADIO_CSS,
+    MEASURE_INSTRUCTIONS_HTML,
+    get_acknowledgements_html,
+    get_gradio_theme,
+)
 from mapanything.utils.hf_utils.hf_helpers import initialize_mapanything_model
 from mapanything.utils.hf_utils.viz import predictions_to_glb
 from mapanything.utils.image import load_images, rgb
 # MapAnything Configuration
 high_level_config = {
     "resolution": 518,
 }
+# Initialize model - this will be done on GPU when needed
 model = None
 # -------------------------------------------------------------------------
 # 1) Core model inference
 # -------------------------------------------------------------------------
     filter_black_bg=False,
     filter_white_bg=False,
 ):
+    """
+    Run the MapAnything model on images in the 'target_dir/images' folder and return predictions.
+    """
     global model
+    import torch  # Ensure torch is available in function scope
+    print(f"Processing images from {target_dir}")
+    # Device check
     device = "cuda" if torch.cuda.is_available() else "cpu"
     device = torch.device(device)
+    # Initialize model if not already done
     if model is None:
         model = initialize_mapanything_model(high_level_config, device)
     else:
         model = model.to(device)
     model.eval()
+    # Load images using MapAnything's load_images function
+    print("Loading images...")
     image_folder_path = os.path.join(target_dir, "images")
     views = load_images(image_folder_path)
+    print(f"Loaded {len(views)} images")
     if len(views) == 0:
         raise ValueError("No images found. Check your upload.")
+    # Run model inference
+    print("Running inference...")
+    # apply_mask: Whether to apply the non-ambiguous mask to the output. Defaults to True.
+    # mask_edges: Whether to compute an edge mask based on normals and depth and apply it to the output. Defaults to True.
+    # Use checkbox values - mask_edges is set to True by default since there's no UI control for it
     outputs = model.infer(
         views, apply_mask=apply_mask, mask_edges=True, memory_efficient_inference=False
     )
+    # Convert predictions to format expected by visualization
     predictions = {}
+    # Initialize lists for the required keys
+    extrinsic_list = []
+    intrinsic_list = []
+    world_points_list = []
+    depth_maps_list = []
+    images_list = []
+    final_mask_list = []
+    # Loop through the outputs
     for pred in outputs:
+        # Extract data from predictions
+        depthmap_torch = pred["depth_z"][0].squeeze(-1)  # (H, W)
+        intrinsics_torch = pred["intrinsics"][0]  # (3, 3)
+        camera_pose_torch = pred["camera_poses"][0]  # (4, 4)
+        # Compute new pts3d using depth, intrinsics, and camera pose
         pts3d_computed, valid_mask = depthmap_to_world_frame(
             depthmap_torch, intrinsics_torch, camera_pose_torch
         )
+        # Convert to numpy arrays for visualization
+        # Check if mask key exists in pred, if not, fill with boolean trues in the size of depthmap_torch
         if "mask" in pred:
             mask = pred["mask"][0].squeeze(-1).cpu().numpy().astype(bool)
         else:
+            # Fill with boolean trues in the size of depthmap_torch
             mask = np.ones_like(depthmap_torch.cpu().numpy(), dtype=bool)
+        # Combine with valid depth mask
         mask = mask & valid_mask.cpu().numpy()
         image = pred["img_no_norm"][0].cpu().numpy()
+        # Append to lists
         extrinsic_list.append(camera_pose_torch.cpu().numpy())
         intrinsic_list.append(intrinsics_torch.cpu().numpy())
         world_points_list.append(pts3d_computed.cpu().numpy())
         depth_maps_list.append(depthmap_torch.cpu().numpy())
+        images_list.append(image)  # Add image to list
+        final_mask_list.append(mask)  # Add final_mask to list
+    # Convert lists to numpy arrays with required shapes
+    # extrinsic: (S, 3, 4) - batch of camera extrinsic matrices
     predictions["extrinsic"] = np.stack(extrinsic_list, axis=0)
+    # intrinsic: (S, 3, 3) - batch of camera intrinsic matrices
     predictions["intrinsic"] = np.stack(intrinsic_list, axis=0)
+    # world_points: (S, H, W, 3) - batch of 3D world points
     predictions["world_points"] = np.stack(world_points_list, axis=0)
+    # depth: (S, H, W, 1) or (S, H, W) - batch of depth maps
     depth_maps = np.stack(depth_maps_list, axis=0)
+    # Add channel dimension if needed to match (S, H, W, 1) format
     if len(depth_maps.shape) == 3:
         depth_maps = depth_maps[..., np.newaxis]
     predictions["depth"] = depth_maps
+    # images: (S, H, W, 3) - batch of input images
     predictions["images"] = np.stack(images_list, axis=0)
+    # final_mask: (S, H, W) - batch of final masks for filtering
     predictions["final_mask"] = np.stack(final_mask_list, axis=0)
+    # Process data for visualization tabs (depth, normal, measure)
     processed_data = process_predictions_for_visualization(
         predictions, views, high_level_config, filter_black_bg, filter_white_bg
     )
+    # Clean up
     torch.cuda.empty_cache()
     return predictions, processed_data
 def update_view_selectors(processed_data):
+    """Update view selector dropdowns based on available views"""
     if processed_data is None or len(processed_data) == 0:
         choices = ["View 1"]
     else:
         choices = [f"View {i + 1}" for i in range(num_views)]
     return (
+        gr.Dropdown(choices=choices, value=choices[0]),  # depth_view_selector
+        gr.Dropdown(choices=choices, value=choices[0]),  # normal_view_selector
+        gr.Dropdown(choices=choices, value=choices[0]),  # measure_view_selector
     )
 def get_view_data_by_index(processed_data, view_index):
+    """Get view data by index, handling bounds"""
     if processed_data is None or len(processed_data) == 0:
         return None
     view_keys = list(processed_data.keys())
     if view_index < 0 or view_index >= len(view_keys):
         view_index = 0
     return processed_data[view_keys[view_index]]
 def update_depth_view(processed_data, view_index):
+    """Update depth view for a specific view index"""
     view_data = get_view_data_by_index(processed_data, view_index)
     if view_data is None or view_data["depth"] is None:
         return None
     return colorize_depth(view_data["depth"], mask=view_data.get("mask"))
 def update_normal_view(processed_data, view_index):
+    """Update normal view for a specific view index"""
     view_data = get_view_data_by_index(processed_data, view_index)
     if view_data is None or view_data["normal"] is None:
         return None
     return colorize_normal(view_data["normal"], mask=view_data.get("mask"))
 def update_measure_view(processed_data, view_index):
+    """Update measure view for a specific view index with mask overlay"""
     view_data = get_view_data_by_index(processed_data, view_index)
     if view_data is None:
+        return None, []  # image, measure_points
+    # Get the base image
     image = view_data["image"].copy()
+    # Ensure image is in uint8 format
     if image.dtype != np.uint8:
         if image.max() <= 1.0:
             image = (image * 255).astype(np.uint8)
         else:
             image = image.astype(np.uint8)
+    # Apply mask overlay if mask is available
     if view_data["mask"] is not None:
         mask = view_data["mask"]
+        # Create light grey overlay for masked areas
+        # Masked areas (False values) will be overlaid with light grey
+        invalid_mask = ~mask  # Areas where mask is False
         if invalid_mask.any():
+            # Create a light grey overlay (RGB: 192, 192, 192)
             overlay_color = np.array([255, 220, 220], dtype=np.uint8)
+            # Apply overlay with some transparency
+            alpha = 0.5  # Transparency level
+            for c in range(3):  # RGB channels
                 image[:, :, c] = np.where(
                     invalid_mask,
                     (1 - alpha) * image[:, :, c] + alpha * overlay_color[c],
                     image[:, :, c],
                 ).astype(np.uint8)
     return image, []
 def navigate_depth_view(processed_data, current_selector_value, direction):
+    """Navigate depth view (direction: -1 for previous, +1 for next)"""
     if processed_data is None or len(processed_data) == 0:
         return "View 1", None
+    # Parse current view number
     try:
         current_view = int(current_selector_value.split()[1]) - 1
     except:
         current_view = 0
     num_views = len(processed_data)
     new_view = (current_view + direction) % num_views
     new_selector_value = f"View {new_view + 1}"
     depth_vis = update_depth_view(processed_data, new_view)
     return new_selector_value, depth_vis
 def navigate_normal_view(processed_data, current_selector_value, direction):
+    """Navigate normal view (direction: -1 for previous, +1 for next)"""
     if processed_data is None or len(processed_data) == 0:
         return "View 1", None
+    # Parse current view number
     try:
         current_view = int(current_selector_value.split()[1]) - 1
     except:
         current_view = 0
     num_views = len(processed_data)
     new_view = (current_view + direction) % num_views
     new_selector_value = f"View {new_view + 1}"
     normal_vis = update_normal_view(processed_data, new_view)
     return new_selector_value, normal_vis
 def navigate_measure_view(processed_data, current_selector_value, direction):
+    """Navigate measure view (direction: -1 for previous, +1 for next)"""
     if processed_data is None or len(processed_data) == 0:
         return "View 1", None, []
+    # Parse current view number
     try:
         current_view = int(current_selector_value.split()[1]) - 1
     except:
         current_view = 0
     num_views = len(processed_data)
     new_view = (current_view + direction) % num_views
     new_selector_value = f"View {new_view + 1}"
     measure_image, measure_points = update_measure_view(processed_data, new_view)
     return new_selector_value, measure_image, measure_points
 def populate_visualization_tabs(processed_data):
+    """Populate the depth, normal, and measure tabs with processed data"""
     if processed_data is None or len(processed_data) == 0:
         return None, None, None, []
+    # Use update functions to ensure confidence filtering is applied from the start
     depth_vis = update_depth_view(processed_data, 0)
     normal_vis = update_normal_view(processed_data, 0)
     measure_img, _ = update_measure_view(processed_data, 0)
     return depth_vis, normal_vis, measure_img, []
 # -------------------------------------------------------------------------
+# 2) Handle uploaded video/images --> produce target_dir + images
 # -------------------------------------------------------------------------
 def handle_uploads(unified_upload, s_time_interval=1.0):
+    """
+    Create a new 'target_dir' + 'images' subfolder, and place user-uploaded
+    images or extracted frames from video into it. Return (target_dir, image_paths).
+    """
     start_time = time.time()
     gc.collect()
     torch.cuda.empty_cache()
+    # Create a unique folder name
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
     target_dir = f"input_images_{timestamp}"
     target_dir_images = os.path.join(target_dir, "images")
+    # Clean up if somehow that folder already exists
     if os.path.exists(target_dir):
         shutil.rmtree(target_dir)
     os.makedirs(target_dir)
     image_paths = []
+    # --- Handle uploaded files (both images and videos) ---
     if unified_upload is not None:
         for file_data in unified_upload:
             if isinstance(file_data, dict) and "name" in file_data:
                 file_path = str(file_data)
             file_ext = os.path.splitext(file_path)[1].lower()
+            # Check if it's a video file
+            video_extensions = [
+                ".mp4",
+                ".avi",
+                ".mov",
+                ".mkv",
+                ".wmv",
+                ".flv",
+                ".webm",
+                ".m4v",
+                ".3gp",
+            ]
             if file_ext in video_extensions:
+                # Handle as video
                 vs = cv2.VideoCapture(file_path)
                 fps = vs.get(cv2.CAP_PROP_FPS)
+                frame_interval = int(fps * s_time_interval)  # frames per interval
                 count = 0
                 video_frame_num = 0
                         break
                     count += 1
                     if count % frame_interval == 0:
+                        # Use original filename as prefix for frames
                         base_name = os.path.splitext(os.path.basename(file_path))[0]
+                        image_path = os.path.join(
+                            target_dir_images, f"{base_name}_{video_frame_num:06}.png"
+                        )
                         cv2.imwrite(image_path, frame)
                         image_paths.append(image_path)
                         video_frame_num += 1
                 vs.release()
+                print(
+                    f"Extracted {video_frame_num} frames from video: {os.path.basename(file_path)}"
+                )
             else:
+                # Handle as image
+                # Check if the file is a HEIC image
                 if file_ext in [".heic", ".heif"]:
+                    # Convert HEIC to JPEG for better gallery compatibility
                     try:
                         with Image.open(file_path) as img:
+                            # Convert to RGB if necessary (HEIC can have different color modes)
                             if img.mode not in ("RGB", "L"):
                                 img = img.convert("RGB")
+                            # Create JPEG filename
                             base_name = os.path.splitext(os.path.basename(file_path))[0]
+                            dst_path = os.path.join(
+                                target_dir_images, f"{base_name}.jpg"
+                            )
+                            # Save as JPEG with high quality
                             img.save(dst_path, "JPEG", quality=95)
                             image_paths.append(dst_path)
+                            print(
+                                f"Converted HEIC to JPEG: {os.path.basename(file_path)} -> {os.path.basename(dst_path)}"
+                            )
                     except Exception as e:
+                        print(f"Error converting HEIC file {file_path}: {e}")
+                        # Fall back to copying as is
+                        dst_path = os.path.join(
+                            target_dir_images, os.path.basename(file_path)
+                        )
                         shutil.copy(file_path, dst_path)
                         image_paths.append(dst_path)
                 else:
+                    # Regular image files - copy as is
+                    dst_path = os.path.join(
+                        target_dir_images, os.path.basename(file_path)
+                    )
                     shutil.copy(file_path, dst_path)
                     image_paths.append(dst_path)
+    # Sort final images for gallery
     image_paths = sorted(image_paths)
+    end_time = time.time()
+    print(
+        f"Files processed to {target_dir_images}; took {end_time - start_time:.3f} seconds"
+    )
     return target_dir, image_paths
+# -------------------------------------------------------------------------
+# 3) Update gallery on upload
+# -------------------------------------------------------------------------
 def update_gallery_on_upload(input_video, input_images, s_time_interval=1.0):
+    """
+    Whenever user uploads or changes files, immediately handle them
+    and show in the gallery. Return (target_dir, image_paths).
+    If nothing is uploaded, returns "None" and empty list.
+    """
     if not input_video and not input_images:
         return None, None, None, None
     target_dir, image_paths = handle_uploads(input_video, input_images, s_time_interval)
+    return (
+        None,
+        target_dir,
+        image_paths,
+        "Upload complete. Click 'Reconstruct' to begin 3D processing.",
+    )
 # -------------------------------------------------------------------------
+# 4) Reconstruction: uses the target_dir plus any viz parameters
 # -------------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def gradio_demo(
     apply_mask=True,
     show_mesh=True,
 ):
+    """
+    Perform reconstruction using the already-created target_dir/images.
+    """
     if not os.path.isdir(target_dir) or target_dir == "None":
         return None, "No valid target directory found. Please upload first.", None, None
+    start_time = time.time()
     gc.collect()
     torch.cuda.empty_cache()
+    # Prepare frame_filter dropdown
     target_dir_images = os.path.join(target_dir, "images")
+    all_files = (
+        sorted(os.listdir(target_dir_images))
+        if os.path.isdir(target_dir_images)
+        else []
+    )
     all_files = [f"{i}: {filename}" for i, filename in enumerate(all_files)]
     frame_filter_choices = ["All"] + all_files
+    print("Running MapAnything model...")
     with torch.no_grad():
         predictions, processed_data = run_model(target_dir, apply_mask)
+    # Save predictions
     prediction_save_path = os.path.join(target_dir, "predictions.npz")
     np.savez(prediction_save_path, **predictions)
+    # Handle None frame_filter
     if frame_filter is None:
         frame_filter = "All"
+    # Build a GLB file name
     glbfile = os.path.join(
         target_dir,
         f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb",
     )
+    # Convert predictions to GLB
     glbscene = predictions_to_glb(
         predictions,
         filter_by_frames=frame_filter,
         show_cam=show_cam,
         mask_black_bg=filter_black_bg,
         mask_white_bg=filter_white_bg,
+        as_mesh=show_mesh,  # Use the show_mesh parameter
     )
     glbscene.export(file_obj=glbfile)
+    # Cleanup
     del predictions
     gc.collect()
     torch.cuda.empty_cache()
+    end_time = time.time()
+    print(f"Total time: {end_time - start_time:.2f} seconds")
+    log_msg = (
+        f"Reconstruction Success ({len(all_files)} frames). Waiting for visualization."
+    )
+    # Populate visualization tabs with processed data
+    depth_vis, normal_vis, measure_img, measure_pts = populate_visualization_tabs(
+        processed_data
+    )
+    # Update view selectors based on available views
+    depth_selector, normal_selector, measure_selector = update_view_selectors(
+        processed_data
+    )
     return (
+        glbfile,
         log_msg,
         gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True),
         processed_data,
         depth_vis,
         normal_vis,
         measure_img,
+        "",  # measure_text (empty initially)
         depth_selector,
         normal_selector,
         measure_selector,
     )
+# -------------------------------------------------------------------------
+# 5) Helper functions for UI resets + re-visualization
+# -------------------------------------------------------------------------
 def colorize_depth(depth_map, mask=None):
+    """Convert depth map to colorized visualization with optional mask"""
+    if depth_map is None:
+        return None
+    # Normalize depth to 0-1 range
     depth_normalized = depth_map.copy()
     valid_mask = depth_normalized > 0
+    # Apply additional mask if provided (for background filtering)
+    if mask is not None:
+        valid_mask = valid_mask & mask
     if valid_mask.sum() > 0:
         valid_depths = depth_normalized[valid_mask]
         p5 = np.percentile(valid_depths, 5)
         p95 = np.percentile(valid_depths, 95)
         depth_normalized[valid_mask] = (depth_normalized[valid_mask] - p5) / (p95 - p5)
+    # Apply colormap
     import matplotlib.pyplot as plt
     colormap = plt.cm.turbo_r
     colored = colormap(depth_normalized)
     colored = (colored[:, :, :3] * 255).astype(np.uint8)
+    # Set invalid pixels to white
     colored[~valid_mask] = [255, 255, 255]
     return colored
 def colorize_normal(normal_map, mask=None):
+    """Convert normal map to colorized visualization with optional mask"""
+    if normal_map is None:
+        return None
+    # Create a copy for modification
     normal_vis = normal_map.copy()
+    # Apply mask if provided (set masked areas to [0, 0, 0] which becomes grey after normalization)
     if mask is not None:
         invalid_mask = ~mask
+        normal_vis[invalid_mask] = [0, 0, 0]  # Set invalid areas to zero
+    # Normalize normals to [0, 1] range for visualization
     normal_vis = (normal_vis + 1.0) / 2.0
     normal_vis = (normal_vis * 255).astype(np.uint8)
     return normal_vis
 def process_predictions_for_visualization(
     predictions, views, high_level_config, filter_black_bg=False, filter_white_bg=False
 ):
+    """Extract depth, normal, and 3D points from predictions for visualization"""
     processed_data = {}
+    # Process each view
     for view_idx, view in enumerate(views):
+        # Get image
         image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
+        # Get predicted points
         pred_pts3d = predictions["world_points"][view_idx]
+        # Initialize data for this view
+        view_data = {
+            "image": image[0],
+            "points3d": pred_pts3d,
+            "depth": None,
+            "normal": None,
+            "mask": None,
+        }
+        # Start with the final mask from predictions
         mask = predictions["final_mask"][view_idx].copy()
+        # Apply black background filtering if enabled
         if filter_black_bg:
+            # Get the image colors (ensure they're in 0-255 range)
             view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
+            # Filter out black background pixels (sum of RGB < 16)
             black_bg_mask = view_colors.sum(axis=2) >= 16
             mask = mask & black_bg_mask
+        # Apply white background filtering if enabled
         if filter_white_bg:
+            # Get the image colors (ensure they're in 0-255 range)
             view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
+            # Filter out white background pixels (all RGB > 240)
+            white_bg_mask = ~(
+                (view_colors[:, :, 0] > 240)
+                & (view_colors[:, :, 1] > 240)
+                & (view_colors[:, :, 2] > 240)
+            )
             mask = mask & white_bg_mask
         view_data["mask"] = mask
         view_data["depth"] = predictions["depth"][view_idx].squeeze()
         normals, _ = points_to_normals(pred_pts3d, mask=view_data["mask"])
         view_data["normal"] = normals
         processed_data[view_idx] = view_data
     return processed_data
+def reset_measure(processed_data):
+    """Reset measure points"""
+    if processed_data is None or len(processed_data) == 0:
+        return None, [], ""
+    # Return the first view image
+    first_view = list(processed_data.values())[0]
+    return first_view["image"], [], ""
+def measure(
+    processed_data, measure_points, current_view_selector, event: gr.SelectData
+):
+    """Handle measurement on images"""
     try:
+        print(f"Measure function called with selector: {current_view_selector}")
         if processed_data is None or len(processed_data) == 0:
             return None, [], "No data available"
+        # Use the currently selected view instead of always using the first view
         try:
             current_view_index = int(current_view_selector.split()[1]) - 1
         except:
             current_view_index = 0
+        print(f"Using view index: {current_view_index}")
+        # Get view data safely
         if current_view_index < 0 or current_view_index >= len(processed_data):
             current_view_index = 0
             return None, [], "No view data available"
         point2d = event.index[0], event.index[1]
+        print(f"Clicked point: {point2d}")
+        # Check if the clicked point is in a masked area (prevent interaction)
+        if (
+            current_view["mask"] is not None
+            and 0 <= point2d[1] < current_view["mask"].shape[0]
+            and 0 <= point2d[0] < current_view["mask"].shape[1]
+        ):
+            # Check if the point is in a masked (invalid) area
             if not current_view["mask"][point2d[1], point2d[0]]:
+                print(f"Clicked point {point2d} is in masked area, ignoring click")
+                # Always return image with mask overlay
+                masked_image, _ = update_measure_view(
+                    processed_data, current_view_index
+                )
+                return (
+                    masked_image,
+                    measure_points,
+                    '<span style="color: red; font-weight: bold;">Cannot measure on masked areas (shown in grey)</span>',
+                )
         measure_points.append(point2d)
+        # Get image with mask overlay and ensure it's valid
         image, _ = update_measure_view(processed_data, current_view_index)
+        if image is None:
+            return None, [], "No image available"
         image = image.copy()
         points3d = current_view["points3d"]
+        # Ensure image is in uint8 format for proper cv2 operations
+        try:
+            if image.dtype != np.uint8:
+                if image.max() <= 1.0:
+                    # Image is in [0, 1] range, convert to [0, 255]
+                    image = (image * 255).astype(np.uint8)
+                else:
+                    # Image is already in [0, 255] range
+                    image = image.astype(np.uint8)
+        except Exception as e:
+            print(f"Image conversion error: {e}")
+            return None, [], f"Image conversion error: {e}"
+        # Draw circles for points
+        try:
+            for p in measure_points:
+                if 0 <= p[0] < image.shape[1] and 0 <= p[1] < image.shape[0]:
+                    image = cv2.circle(
+                        image, p, radius=5, color=(255, 0, 0), thickness=2
+                    )
+        except Exception as e:
+            print(f"Drawing error: {e}")
+            return None, [], f"Drawing error: {e}"
         depth_text = ""
+        try:
+            for i, p in enumerate(measure_points):
+                if (
+                    current_view["depth"] is not None
+                    and 0 <= p[1] < current_view["depth"].shape[0]
+                    and 0 <= p[0] < current_view["depth"].shape[1]
+                ):
+                    d = current_view["depth"][p[1], p[0]]
+                    depth_text += f"- **P{i + 1} depth: {d:.2f}m.**\n"
+                else:
+                    # Use Z coordinate of 3D points if depth not available
+                    if (
+                        points3d is not None
+                        and 0 <= p[1] < points3d.shape[0]
+                        and 0 <= p[0] < points3d.shape[1]
+                    ):
+                        z = points3d[p[1], p[0], 2]
+                        depth_text += f"- **P{i + 1} Z-coord: {z:.2f}m.**\n"
+        except Exception as e:
+            print(f"Depth text error: {e}")
+            depth_text = f"Error computing depth: {e}\n"
         if len(measure_points) == 2:
+            try:
+                point1, point2 = measure_points
+                # Draw line
+                if (
+                    0 <= point1[0] < image.shape[1]
+                    and 0 <= point1[1] < image.shape[0]
+                    and 0 <= point2[0] < image.shape[1]
+                    and 0 <= point2[1] < image.shape[0]
+                ):
+                    image = cv2.line(
+                        image, point1, point2, color=(255, 0, 0), thickness=2
+                    )
+                # Compute 3D distance
+                distance_text = "- **Distance: Unable to compute**"
+                if (
+                    points3d is not None
+                    and 0 <= point1[1] < points3d.shape[0]
+                    and 0 <= point1[0] < points3d.shape[1]
+                    and 0 <= point2[1] < points3d.shape[0]
+                    and 0 <= point2[0] < points3d.shape[1]
+                ):
+                    try:
+                        p1_3d = points3d[point1[1], point1[0]]
+                        p2_3d = points3d[point2[1], point2[0]]
+                        distance = np.linalg.norm(p1_3d - p2_3d)
+                        distance_text = f"- **Distance: {distance:.2f}m**"
+                    except Exception as e:
+                        print(f"Distance computation error: {e}")
+                        distance_text = f"- **Distance computation error: {e}**"
+                measure_points = []
+                text = depth_text + distance_text
+                print(f"Measurement complete: {text}")
+                return [image, measure_points, text]
+            except Exception as e:
+                print(f"Final measurement error: {e}")
+                return None, [], f"Measurement error: {e}"
         else:
+            print(f"Single point measurement: {depth_text}")
             return [image, measure_points, depth_text]
     except Exception as e:
+        print(f"Overall measure function error: {e}")
         return None, [], f"Measure function error: {e}"
 def clear_fields():
+    """
+    Clears the 3D viewer, the stored target_dir, and empties the gallery.
+    """
     return None
 def update_log():
+    """
+    Display a quick log message while waiting.
+    """
     return "Loading and Reconstructing..."
 def update_visualization(
     target_dir,
     frame_filter,
     filter_white_bg=False,
     show_mesh=True,
 ):
+    """
+    Reload saved predictions from npz, create (or reuse) the GLB for new parameters,
+    and return it for the 3D viewer. If is_example == "True", skip.
+    """
+    # If it's an example click, skip as requested
     if is_example == "True":
+        return (
+            gr.update(),
+            "No reconstruction available. Please click the Reconstruct button first.",
+        )
     if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
+        return (
+            gr.update(),
+            "No reconstruction available. Please click the Reconstruct button first.",
+        )
     predictions_path = os.path.join(target_dir, "predictions.npz")
     if not os.path.exists(predictions_path):
+        return (
+            gr.update(),
+            f"No reconstruction available at {predictions_path}. Please run 'Reconstruct' first.",
+        )
     loaded = np.load(predictions_path, allow_pickle=True)
     predictions = {key: loaded[key] for key in loaded.keys()}
     if not os.path.exists(glbfile):
         glbscene = predictions_to_glb(
+            predictions,
+            filter_by_frames=frame_filter,
+            show_cam=show_cam,
+            mask_black_bg=filter_black_bg,
+            mask_white_bg=filter_white_bg,
+            as_mesh=show_mesh,
         )
         glbscene.export(file_obj=glbfile)
+    return (
+        glbfile,
+        "Visualization updated.",
+    )
 def update_all_views_on_filter_change(
+    target_dir,
+    filter_black_bg,
+    filter_white_bg,
+    processed_data,
+    depth_view_selector,
+    normal_view_selector,
+    measure_view_selector,
 ):
+    """
+    Update all individual view tabs when background filtering checkboxes change.
+    This regenerates the processed data with new filtering and updates all views.
+    """
+    # Check if we have a valid target directory and predictions
     if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
         return processed_data, None, None, None, []
         return processed_data, None, None, None, []
     try:
+        # Load the original predictions and views
         loaded = np.load(predictions_path, allow_pickle=True)
         predictions = {key: loaded[key] for key in loaded.keys()}
+        # Load images using MapAnything's load_images function
+        image_folder_path = os.path.join(target_dir, "images")
+        views = load_images(image_folder_path)
+        # Regenerate processed data with new filtering settings
         new_processed_data = process_predictions_for_visualization(
             predictions, views, high_level_config, filter_black_bg, filter_white_bg
         )
+        # Get current view indices
+        try:
+            depth_view_idx = (
+                int(depth_view_selector.split()[1]) - 1 if depth_view_selector else 0
+            )
+        except:
+            depth_view_idx = 0
+        try:
+            normal_view_idx = (
+                int(normal_view_selector.split()[1]) - 1 if normal_view_selector else 0
+            )
+        except:
+            normal_view_idx = 0
+        try:
+            measure_view_idx = (
+                int(measure_view_selector.split()[1]) - 1
+                if measure_view_selector
+                else 0
+            )
+        except:
+            measure_view_idx = 0
+        # Update all views with new filtered data
         depth_vis = update_depth_view(new_processed_data, depth_view_idx)
         normal_vis = update_normal_view(new_processed_data, normal_view_idx)
         measure_img, _ = update_measure_view(new_processed_data, measure_view_idx)
         return new_processed_data, depth_vis, normal_vis, measure_img, []
     except Exception as e:
+        print(f"Error updating views on filter change: {e}")
         return processed_data, None, None, None, []
+# -------------------------------------------------------------------------
+# Example scene functions
+# -------------------------------------------------------------------------
+def get_scene_info(examples_dir):
+    """Get information about scenes in the examples directory"""
+    import glob
+    scenes = []
+    if not os.path.exists(examples_dir):
+        return scenes
+    for scene_folder in sorted(os.listdir(examples_dir)):
+        scene_path = os.path.join(examples_dir, scene_folder)
+        if os.path.isdir(scene_path):
+            # Find all image files in the scene folder
+            image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff", "*.tif"]
+            image_files = []
+            for ext in image_extensions:
+                image_files.extend(glob.glob(os.path.join(scene_path, ext)))
+                image_files.extend(glob.glob(os.path.join(scene_path, ext.upper())))
+            if image_files:
+                # Sort images and get the first one for thumbnail
+                image_files = sorted(image_files)
+                first_image = image_files[0]
+                num_images = len(image_files)
+                scenes.append(
+                    {
+                        "name": scene_folder,
+                        "path": scene_path,
+                        "thumbnail": first_image,
+                        "num_images": num_images,
+                        "image_files": image_files,
+                    }
+                )
+    return scenes
+def load_example_scene(scene_name, examples_dir="examples"):
+    """Load a scene from examples directory"""
+    scenes = get_scene_info(examples_dir)
+    # Find the selected scene
+    selected_scene = None
+    for scene in scenes:
+        if scene["name"] == scene_name:
+            selected_scene = scene
+            break
+    if selected_scene is None:
+        return None, None, None, "Scene not found"
+    # Create file-like objects for the unified upload system
+    # Convert image file paths to the format expected by unified_upload
+    file_objects = []
+    for image_path in selected_scene["image_files"]:
+        file_objects.append(image_path)
+    # Create target directory and copy images using the unified upload system
+    target_dir, image_paths = handle_uploads(file_objects, 1.0)
+    return (
+        None,  # Clear reconstruction output
+        target_dir,  # Set target directory
+        image_paths,  # Set gallery
+        f"Loaded scene '{scene_name}' with {selected_scene['num_images']} images. Click 'Reconstruct' to begin 3D processing.",
+    )
 # -------------------------------------------------------------------------
+# 6) Build Gradio UI
 # -------------------------------------------------------------------------
+theme = get_gradio_theme()
+with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
+    # State variables for the tabbed interface
+    is_example = gr.Textbox(label="is_example", visible=False, value="None")
+    num_images = gr.Textbox(label="num_images", visible=False, value="None")
     processed_data_state = gr.State(value=None)
     measure_points_state = gr.State(value=[])
+    current_view_index = gr.State(value=0)  # Track current view index for navigation
+    target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
+    with gr.Row():
+        with gr.Column(scale=2):
+            # Unified upload component for both videos and images
+            unified_upload = gr.File(
+                file_count="multiple",
+                label="Upload Video or Images",
+                interactive=True,
+                file_types=["image", "video"],
+            )
+            with gr.Row():
+                s_time_interval = gr.Slider(
+                    minimum=0.1,
+                    maximum=5.0,
+                    value=1.0,
+                    step=0.1,
+                    label="Video sample time interval (take a sample every x sec.)",
                     interactive=True,
+                    visible=True,
+                    scale=3,
                 )
+                resample_btn = gr.Button(
+                    "Resample Video",
+                    visible=False,
+                    variant="secondary",
+                    scale=1,
                 )
+            image_gallery = gr.Gallery(
+                label="Preview",
+                columns=4,
+                height="300px",
+                show_download_button=True,
+                object_fit="contain",
+                preview=True,
+            )
+            clear_uploads_btn = gr.ClearButton(
+                [unified_upload, image_gallery],
+                value="Clear Uploads",
+                variant="secondary",
+                size="sm",
+            )
+        with gr.Column(scale=4):
+            with gr.Column():
+                gr.Markdown(
+                    "**Metric 3D Reconstruction (Point Cloud and Camera Poses)**"
+                )
+                log_output = gr.Markdown(
+                    "Please upload a video or images, then click Reconstruct.",
+                    elem_classes=["custom-log"],
+                )
+                # Add tabbed interface similar to MoGe
                 with gr.Tabs():
                     with gr.Tab("3D View"):
+                        reconstruction_output = gr.Model3D(
+                            height=520,
+                            zoom_speed=0.5,
+                            pan_speed=0.5,
+                            clear_color=[0.0, 0.0, 0.0, 0.0],
+                            key="persistent_3d_viewer",
+                            elem_id="reconstruction_3d_viewer",
+                        )
                     with gr.Tab("Depth"):
+                        with gr.Row(elem_classes=["navigation-row"]):
+                            prev_depth_btn = gr.Button("◀ Previous", size="sm", scale=1)
+                            depth_view_selector = gr.Dropdown(
+                                choices=["View 1"],
+                                value="View 1",
+                                label="Select View",
+                                scale=2,
+                                interactive=True,
+                                allow_custom_value=True,
+                            )
+                            next_depth_btn = gr.Button("Next ▶", size="sm", scale=1)
+                        depth_map = gr.Image(
+                            type="numpy",
+                            label="Colorized Depth Map",
+                            format="png",
+                            interactive=False,
+                        )
                     with gr.Tab("Normal"):
+                        with gr.Row(elem_classes=["navigation-row"]):
+                            prev_normal_btn = gr.Button(
+                                "◀ Previous", size="sm", scale=1
+                            )
+                            normal_view_selector = gr.Dropdown(
+                                choices=["View 1"],
+                                value="View 1",
+                                label="Select View",
+                                scale=2,
+                                interactive=True,
+                                allow_custom_value=True,
+                            )
+                            next_normal_btn = gr.Button("Next ▶", size="sm", scale=1)
+                        normal_map = gr.Image(
+                            type="numpy",
+                            label="Normal Map",
+                            format="png",
+                            interactive=False,
+                        )
                     with gr.Tab("Measure"):
+                        gr.Markdown(MEASURE_INSTRUCTIONS_HTML)
+                        with gr.Row(elem_classes=["navigation-row"]):
+                            prev_measure_btn = gr.Button(
+                                "◀ Previous", size="sm", scale=1
+                            )
+                            measure_view_selector = gr.Dropdown(
+                                choices=["View 1"],
+                                value="View 1",
+                                label="Select View",
+                                scale=2,
+                                interactive=True,
+                                allow_custom_value=True,
+                            )
+                            next_measure_btn = gr.Button("Next ▶", size="sm", scale=1)
+                        measure_image = gr.Image(
+                            type="numpy",
+                            show_label=False,
+                            format="webp",
+                            interactive=False,
+                            sources=[],
+                        )
+                        gr.Markdown(
+                            "**Note:** Light-grey areas indicate regions with no depth information where measurements cannot be taken."
+                        )
+                        measure_text = gr.Markdown("")
+            with gr.Row():
+                submit_btn = gr.Button("Reconstruct", scale=1, variant="primary")
+                clear_btn = gr.ClearButton(
+                    [
+                        unified_upload,
+                        reconstruction_output,
+                        log_output,
+                        target_dir_output,
+                        image_gallery,
+                    ],
+                    scale=1,
+                )
+            with gr.Row():
+                frame_filter = gr.Dropdown(
+                    choices=["All"], value="All", label="Show Points from Frame"
+                )
+                with gr.Column():
+                    gr.Markdown("### Pointcloud Options: (live updates)")
+                    show_cam = gr.Checkbox(label="Show Camera", value=True)
+                    show_mesh = gr.Checkbox(label="Show Mesh", value=True)
+                    filter_black_bg = gr.Checkbox(
+                        label="Filter Black Background", value=False
+                    )
+                    filter_white_bg = gr.Checkbox(
+                        label="Filter White Background", value=False
+                    )
+                    gr.Markdown("### Reconstruction Options: (updated on next run)")
+                    apply_mask_checkbox = gr.Checkbox(
+                        label="Apply mask for predicted ambiguous depth classes & edges",
+                        value=True,
+                    )
+    # ---------------------- Example Scenes Section ----------------------
+    gr.Markdown("## Example Scenes (lists all scenes in the examples folder)")
+    gr.Markdown("Click any thumbnail to load the scene for reconstruction.")
+    # Get scene information
+    scenes = get_scene_info("examples")
+    # Create thumbnail grid (4 columns, N rows)
+    if scenes:
+        for i in range(0, len(scenes), 4):  # Process 4 scenes per row
+            with gr.Row():
+                for j in range(4):
+                    scene_idx = i + j
+                    if scene_idx < len(scenes):
+                        scene = scenes[scene_idx]
+                        with gr.Column(scale=1, elem_classes=["clickable-thumbnail"]):
+                            # Clickable thumbnail
+                            scene_img = gr.Image(
+                                value=scene["thumbnail"],
+                                height=150,
+                                interactive=False,
+                                show_label=False,
+                                elem_id=f"scene_thumb_{scene['name']}",
+                                sources=[],
+                            )
+                            # Scene name and image count as text below thumbnail
+                            gr.Markdown(
+                                f"**{scene['name']}** \n {scene['num_images']} images",
+                                elem_classes=["scene-info"],
+                            )
+                            # Connect thumbnail click to load scene
+                            scene_img.select(
+                                fn=lambda name=scene["name"]: load_example_scene(name),
+                                outputs=[
+                                    reconstruction_output,
+                                    target_dir_output,
+                                    image_gallery,
+                                    log_output,
+                                ],
+                            )
+                    else:
+                        # Empty column to maintain grid structure
+                        with gr.Column(scale=1):
+                            pass
     # -------------------------------------------------------------------------
+    # "Reconstruct" button logic:
+    #  - Clear fields
+    #  - Update log
+    #  - gradio_demo(...) with the existing target_dir
+    #  - Then set is_example = "False"
     # -------------------------------------------------------------------------
+    submit_btn.click(fn=clear_fields, inputs=[], outputs=[reconstruction_output]).then(
         fn=update_log, inputs=[], outputs=[log_output]
     ).then(
         fn=gradio_demo,
         inputs=[
+            target_dir_output,
+            frame_filter,
+            show_cam,
+            filter_black_bg,
+            filter_white_bg,
+            apply_mask_checkbox,
+            show_mesh,
         ],
         outputs=[
+            reconstruction_output,
+            log_output,
+            frame_filter,
+            processed_data_state,
+            depth_map,
+            normal_map,
+            measure_image,
+            measure_text,
+            depth_view_selector,
+            normal_view_selector,
+            measure_view_selector,
         ],
+    ).then(
+        fn=lambda: "False",
+        inputs=[],
+        outputs=[is_example],  # set is_example to "False"
+    )
+    # -------------------------------------------------------------------------
+    # Real-time Visualization Updates
+    # -------------------------------------------------------------------------
     frame_filter.change(
+        update_visualization,
+        [
+            target_dir_output,
+            frame_filter,
+            show_cam,
+            is_example,
+            filter_black_bg,
+            filter_white_bg,
+            show_mesh,
+        ],
+        [reconstruction_output, log_output],
     )
     show_cam.change(
+        update_visualization,
+        [
+            target_dir_output,
+            frame_filter,
+            show_cam,
+            is_example,
+        ],
+        [reconstruction_output, log_output],
     )
     filter_black_bg.change(
+        update_visualization,
+        [
+            target_dir_output,
+            frame_filter,
+            show_cam,
+            is_example,
+            filter_black_bg,
+            filter_white_bg,
+        ],
+        [reconstruction_output, log_output],
     ).then(
+        fn=update_all_views_on_filter_change,
+        inputs=[
+            target_dir_output,
+            filter_black_bg,
+            filter_white_bg,
+            processed_data_state,
+            depth_view_selector,
+            normal_view_selector,
+            measure_view_selector,
+        ],
+        outputs=[
+            processed_data_state,
+            depth_map,
+            normal_map,
+            measure_image,
+            measure_points_state,
+        ],
     )
     filter_white_bg.change(
+        update_visualization,
+        [
+            target_dir_output,
+            frame_filter,
+            show_cam,
+            is_example,
+            filter_black_bg,
+            filter_white_bg,
+            show_mesh,
+        ],
+        [reconstruction_output, log_output],
     ).then(
+        fn=update_all_views_on_filter_change,
+        inputs=[
+            target_dir_output,
+            filter_black_bg,
+            filter_white_bg,
+            processed_data_state,
+            depth_view_selector,
+            normal_view_selector,
+            measure_view_selector,
+        ],
+        outputs=[
+            processed_data_state,
+            depth_map,
+            normal_map,
+            measure_image,
+            measure_points_state,
+        ],
+    )
+    show_mesh.change(
+        update_visualization,
+        [
+            target_dir_output,
+            frame_filter,
+            show_cam,
+            is_example,
+            filter_black_bg,
+            filter_white_bg,
+            show_mesh,
+        ],
+        [reconstruction_output, log_output],
     )
+    # -------------------------------------------------------------------------
+    # Auto-update gallery whenever user uploads or changes their files
+    # -------------------------------------------------------------------------
     def update_gallery_on_unified_upload(files, interval):
+        if not files:
+            return None, None, None
         target_dir, image_paths = handle_uploads(files, interval)
+        return (
+            target_dir,
+            image_paths,
+            "Upload complete. Click 'Reconstruct' to begin 3D processing.",
+        )
     def show_resample_button(files):
+        """Show the resample button only if there are uploaded files containing videos"""
+        if not files:
+            return gr.update(visible=False)
+        # Check if any uploaded files are videos
+        video_extensions = [
+            ".mp4",
+            ".avi",
+            ".mov",
+            ".mkv",
+            ".wmv",
+            ".flv",
+            ".webm",
+            ".m4v",
+            ".3gp",
+        ]
         has_video = False
         for file_data in files:
+            if isinstance(file_data, dict) and "name" in file_data:
+                file_path = file_data["name"]
+            else:
+                file_path = str(file_data)
+            file_ext = os.path.splitext(file_path)[1].lower()
+            if file_ext in video_extensions:
                 has_video = True
                 break
         return gr.update(visible=has_video)
+    def hide_resample_button():
+        """Hide the resample button after use"""
+        return gr.update(visible=False)
     def resample_video_with_new_interval(files, new_interval, current_target_dir):
+        """Resample video with new slider value"""
+        if not files:
+            return (
+                current_target_dir,
+                None,
+                "No files to resample.",
+                gr.update(visible=False),
+            )
+        # Check if we have videos to resample
+        video_extensions = [
+            ".mp4",
+            ".avi",
+            ".mov",
+            ".mkv",
+            ".wmv",
+            ".flv",
+            ".webm",
+            ".m4v",
+            ".3gp",
+        ]
+        has_video = any(
+            os.path.splitext(
+                str(file_data["name"] if isinstance(file_data, dict) else file_data)
+            )[1].lower()
+            in video_extensions
+            for file_data in files
+        )
+        if not has_video:
+            return (
+                current_target_dir,
+                None,
+                "No videos found to resample.",
+                gr.update(visible=False),
+            )
+        # Clean up old target directory if it exists
+        if (
+            current_target_dir
+            and current_target_dir != "None"
+            and os.path.exists(current_target_dir)
+        ):
+            shutil.rmtree(current_target_dir)
+        # Process files with new interval
         target_dir, image_paths = handle_uploads(files, new_interval)
+        return (
+            target_dir,
+            image_paths,
+            f"Video resampled with {new_interval}s interval. Click 'Reconstruct' to begin 3D processing.",
+            gr.update(visible=False),
+        )
     unified_upload.change(
+        fn=update_gallery_on_unified_upload,
+        inputs=[unified_upload, s_time_interval],
+        outputs=[target_dir_output, image_gallery, log_output],
+    ).then(
+        fn=show_resample_button,
+        inputs=[unified_upload],
+        outputs=[resample_btn],
+    )
+    # Show resample button when slider changes (only if files are uploaded)
+    s_time_interval.change(
+        fn=show_resample_button,
+        inputs=[unified_upload],
+        outputs=[resample_btn],
+    )
+    # Handle resample button click
+    resample_btn.click(
+        fn=resample_video_with_new_interval,
+        inputs=[unified_upload, s_time_interval, target_dir_output],
+        outputs=[target_dir_output, image_gallery, log_output, resample_btn],
+    )
+    # -------------------------------------------------------------------------
+    # Measure tab functionality
+    # -------------------------------------------------------------------------
+    measure_image.select(
+        fn=measure,
+        inputs=[processed_data_state, measure_points_state, measure_view_selector],
+        outputs=[measure_image, measure_points_state, measure_text],
+    )
+    # -------------------------------------------------------------------------
+    # Navigation functionality for Depth, Normal, and Measure tabs
+    # -------------------------------------------------------------------------
+    # Depth tab navigation
+    prev_depth_btn.click(
+        fn=lambda processed_data, current_selector: navigate_depth_view(
+            processed_data, current_selector, -1
+        ),
+        inputs=[processed_data_state, depth_view_selector],
+        outputs=[depth_view_selector, depth_map],
+    )
+    next_depth_btn.click(
+        fn=lambda processed_data, current_selector: navigate_depth_view(
+            processed_data, current_selector, 1
+        ),
+        inputs=[processed_data_state, depth_view_selector],
+        outputs=[depth_view_selector, depth_map],
+    )
+    depth_view_selector.change(
+        fn=lambda processed_data, selector_value: (
+            update_depth_view(
+                processed_data,
+                int(selector_value.split()[1]) - 1,
+            )
+            if selector_value
+            else None
+        ),
+        inputs=[processed_data_state, depth_view_selector],
+        outputs=[depth_map],
+    )
+    # Normal tab navigation
+    prev_normal_btn.click(
+        fn=lambda processed_data, current_selector: navigate_normal_view(
+            processed_data, current_selector, -1
+        ),
+        inputs=[processed_data_state, normal_view_selector],
+        outputs=[normal_view_selector, normal_map],
+    )
+    next_normal_btn.click(
+        fn=lambda processed_data, current_selector: navigate_normal_view(
+            processed_data, current_selector, 1
+        ),
+        inputs=[processed_data_state, normal_view_selector],
+        outputs=[normal_view_selector, normal_map],
+    )
+    normal_view_selector.change(
+        fn=lambda processed_data, selector_value: (
+            update_normal_view(
+                processed_data,
+                int(selector_value.split()[1]) - 1,
+            )
+            if selector_value
+            else None
+        ),
+        inputs=[processed_data_state, normal_view_selector],
+        outputs=[normal_map],
+    )
+    # Measure tab navigation
+    prev_measure_btn.click(
+        fn=lambda processed_data, current_selector: navigate_measure_view(
+            processed_data, current_selector, -1
+        ),
+        inputs=[processed_data_state, measure_view_selector],
+        outputs=[measure_view_selector, measure_image, measure_points_state],
+    )
+    next_measure_btn.click(
+        fn=lambda processed_data, current_selector: navigate_measure_view(
+            processed_data, current_selector, 1
+        ),
+        inputs=[processed_data_state, measure_view_selector],
+        outputs=[measure_view_selector, measure_image, measure_points_state],
+    )
+    measure_view_selector.change(
+        fn=lambda processed_data, selector_value: (
+            update_measure_view(processed_data, int(selector_value.split()[1]) - 1)
+            if selector_value
+            else (None, [])
+        ),
+        inputs=[processed_data_state, measure_view_selector],
+        outputs=[measure_image, measure_points_state],
+    )
+    # -------------------------------------------------------------------------
+    # Acknowledgement section
+    # -------------------------------------------------------------------------
+    gr.HTML(get_acknowledgements_html())
+    demo.queue(max_size=20).launch(show_error=True, share=True, ssr_mode=False)