diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -4,7 +4,6 @@ import shutil import sys import time from datetime import datetime - os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" import cv2 @@ -20,14 +19,6 @@ register_heif_opener() sys.path.append("mapanything/") from mapanything.utils.geometry import depthmap_to_world_frame, points_to_normals -from mapanything.utils.hf_utils.css_and_html import ( - GRADIO_CSS, - MEASURE_INSTRUCTIONS_HTML, - get_acknowledgements_html, - get_description_html, - get_gradio_theme, - get_header_html, -) from mapanything.utils.hf_utils.hf_helpers import initialize_mapanything_model from mapanything.utils.hf_utils.viz import predictions_to_glb from mapanything.utils.image import load_images, rgb @@ -35,7 +26,7 @@ from mapanything.utils.image import load_images, rgb # MapAnything Configuration high_level_config = { "path": "configs/train.yaml", - "hf_model_name": "facebook/map-anything-v1", # -- facebook/map-anything + "hf_model_name": "facebook/map-anything-v1", "model_str": "mapanything", "config_overrides": [ "machine=aws", @@ -52,7 +43,6 @@ high_level_config = { "resolution": 518, } -# Initialize model - this will be done on GPU when needed model = None @@ -60,35 +50,21 @@ model = None # 1) Core model inference # ------------------------------------------------------------------------- @spaces.GPU(duration=120) -def run_model( - target_dir, - apply_mask=True, - mask_edges=True, - filter_black_bg=False, - filter_white_bg=False, -): - """ - Run the MapAnything model on images in the 'target_dir/images' folder and return predictions. - """ +def run_model(target_dir, apply_mask=True, mask_edges=True, filter_black_bg=False, filter_white_bg=False): global model - import torch # Ensure torch is available in function scope + import torch print(f"Processing images from {target_dir}") - - # Device check device = "cuda" if torch.cuda.is_available() else "cpu" device = torch.device(device) - # Initialize model if not already done if model is None: model = initialize_mapanything_model(high_level_config, device) - else: model = model.to(device) model.eval() - # Load images using MapAnything's load_images function print("Loading images...") image_folder_path = os.path.join(target_dir, "images") views = load_images(image_folder_path) @@ -97,249 +73,149 @@ def run_model( if len(views) == 0: raise ValueError("No images found. Check your upload.") - # Run model inference print("Running inference...") - # apply_mask: Whether to apply the non-ambiguous mask to the output. Defaults to True. - # mask_edges: Whether to compute an edge mask based on normals and depth and apply it to the output. Defaults to True. - # Use checkbox values - mask_edges is set to True by default since there's no UI control for it - outputs = model.infer( - views, apply_mask=apply_mask, mask_edges=True, memory_efficient_inference=False - ) + outputs = model.infer(views, apply_mask=apply_mask, mask_edges=True, memory_efficient_inference=False) - # Convert predictions to format expected by visualization predictions = {} + extrinsic_list, intrinsic_list, world_points_list = [], [], [] + depth_maps_list, images_list, final_mask_list = [], [], [] - # Initialize lists for the required keys - extrinsic_list = [] - intrinsic_list = [] - world_points_list = [] - depth_maps_list = [] - images_list = [] - final_mask_list = [] - - # Loop through the outputs for pred in outputs: - # Extract data from predictions - depthmap_torch = pred["depth_z"][0].squeeze(-1) # (H, W) - intrinsics_torch = pred["intrinsics"][0] # (3, 3) - camera_pose_torch = pred["camera_poses"][0] # (4, 4) - - # Compute new pts3d using depth, intrinsics, and camera pose - pts3d_computed, valid_mask = depthmap_to_world_frame( - depthmap_torch, intrinsics_torch, camera_pose_torch - ) - - # Convert to numpy arrays for visualization - # Check if mask key exists in pred, if not, fill with boolean trues in the size of depthmap_torch + depthmap_torch = pred["depth_z"][0].squeeze(-1) + intrinsics_torch = pred["intrinsics"][0] + camera_pose_torch = pred["camera_poses"][0] + + pts3d_computed, valid_mask = depthmap_to_world_frame(depthmap_torch, intrinsics_torch, camera_pose_torch) + if "mask" in pred: mask = pred["mask"][0].squeeze(-1).cpu().numpy().astype(bool) else: - # Fill with boolean trues in the size of depthmap_torch mask = np.ones_like(depthmap_torch.cpu().numpy(), dtype=bool) - # Combine with valid depth mask mask = mask & valid_mask.cpu().numpy() - image = pred["img_no_norm"][0].cpu().numpy() - # Append to lists extrinsic_list.append(camera_pose_torch.cpu().numpy()) intrinsic_list.append(intrinsics_torch.cpu().numpy()) world_points_list.append(pts3d_computed.cpu().numpy()) depth_maps_list.append(depthmap_torch.cpu().numpy()) - images_list.append(image) # Add image to list - final_mask_list.append(mask) # Add final_mask to list + images_list.append(image) + final_mask_list.append(mask) - # Convert lists to numpy arrays with required shapes - # extrinsic: (S, 3, 4) - batch of camera extrinsic matrices predictions["extrinsic"] = np.stack(extrinsic_list, axis=0) - - # intrinsic: (S, 3, 3) - batch of camera intrinsic matrices predictions["intrinsic"] = np.stack(intrinsic_list, axis=0) - - # world_points: (S, H, W, 3) - batch of 3D world points predictions["world_points"] = np.stack(world_points_list, axis=0) - - # depth: (S, H, W, 1) or (S, H, W) - batch of depth maps depth_maps = np.stack(depth_maps_list, axis=0) - # Add channel dimension if needed to match (S, H, W, 1) format if len(depth_maps.shape) == 3: depth_maps = depth_maps[..., np.newaxis] - predictions["depth"] = depth_maps - - # images: (S, H, W, 3) - batch of input images predictions["images"] = np.stack(images_list, axis=0) - - # final_mask: (S, H, W) - batch of final masks for filtering predictions["final_mask"] = np.stack(final_mask_list, axis=0) - # Process data for visualization tabs (depth, normal, measure) - processed_data = process_predictions_for_visualization( - predictions, views, high_level_config, filter_black_bg, filter_white_bg - ) - - # Clean up + processed_data = process_predictions_for_visualization(predictions, views, high_level_config, filter_black_bg, filter_white_bg) torch.cuda.empty_cache() - return predictions, processed_data def update_view_selectors(processed_data): - """Update view selector dropdowns based on available views""" if processed_data is None or len(processed_data) == 0: choices = ["View 1"] else: num_views = len(processed_data) choices = [f"View {i + 1}" for i in range(num_views)] - return ( - gr.Dropdown(choices=choices, value=choices[0]), # depth_view_selector - gr.Dropdown(choices=choices, value=choices[0]), # normal_view_selector - gr.Dropdown(choices=choices, value=choices[0]), # measure_view_selector + gr.Dropdown(choices=choices, value=choices[0]), + gr.Dropdown(choices=choices, value=choices[0]), + gr.Dropdown(choices=choices, value=choices[0]), ) def get_view_data_by_index(processed_data, view_index): - """Get view data by index, handling bounds""" if processed_data is None or len(processed_data) == 0: return None - view_keys = list(processed_data.keys()) if view_index < 0 or view_index >= len(view_keys): view_index = 0 - return processed_data[view_keys[view_index]] def update_depth_view(processed_data, view_index): - """Update depth view for a specific view index""" view_data = get_view_data_by_index(processed_data, view_index) if view_data is None or view_data["depth"] is None: return None - return colorize_depth(view_data["depth"], mask=view_data.get("mask")) def update_normal_view(processed_data, view_index): - """Update normal view for a specific view index""" view_data = get_view_data_by_index(processed_data, view_index) if view_data is None or view_data["normal"] is None: return None - return colorize_normal(view_data["normal"], mask=view_data.get("mask")) def update_measure_view(processed_data, view_index): - """Update measure view for a specific view index with mask overlay""" view_data = get_view_data_by_index(processed_data, view_index) if view_data is None: - return None, [] # image, measure_points - - # Get the base image + return None, [] image = view_data["image"].copy() - - # Ensure image is in uint8 format if image.dtype != np.uint8: if image.max() <= 1.0: image = (image * 255).astype(np.uint8) else: image = image.astype(np.uint8) - - # Apply mask overlay if mask is available if view_data["mask"] is not None: - mask = view_data["mask"] - - # Create light grey overlay for masked areas - # Masked areas (False values) will be overlaid with light grey - invalid_mask = ~mask # Areas where mask is False - + invalid_mask = ~view_data["mask"] if invalid_mask.any(): - # Create a light grey overlay (RGB: 192, 192, 192) overlay_color = np.array([255, 220, 220], dtype=np.uint8) - - # Apply overlay with some transparency - alpha = 0.5 # Transparency level - for c in range(3): # RGB channels - image[:, :, c] = np.where( - invalid_mask, - (1 - alpha) * image[:, :, c] + alpha * overlay_color[c], - image[:, :, c], - ).astype(np.uint8) - + alpha = 0.5 + for c in range(3): + image[:, :, c] = np.where(invalid_mask, (1 - alpha) * image[:, :, c] + alpha * overlay_color[c], image[:, :, c]).astype(np.uint8) return image, [] def navigate_depth_view(processed_data, current_selector_value, direction): - """Navigate depth view (direction: -1 for previous, +1 for next)""" if processed_data is None or len(processed_data) == 0: return "View 1", None - - # Parse current view number try: current_view = int(current_selector_value.split()[1]) - 1 except: current_view = 0 - num_views = len(processed_data) new_view = (current_view + direction) % num_views - - new_selector_value = f"View {new_view + 1}" - depth_vis = update_depth_view(processed_data, new_view) - - return new_selector_value, depth_vis + return f"View {new_view + 1}", update_depth_view(processed_data, new_view) def navigate_normal_view(processed_data, current_selector_value, direction): - """Navigate normal view (direction: -1 for previous, +1 for next)""" if processed_data is None or len(processed_data) == 0: return "View 1", None - - # Parse current view number try: current_view = int(current_selector_value.split()[1]) - 1 except: current_view = 0 - num_views = len(processed_data) new_view = (current_view + direction) % num_views - - new_selector_value = f"View {new_view + 1}" - normal_vis = update_normal_view(processed_data, new_view) - - return new_selector_value, normal_vis + return f"View {new_view + 1}", update_normal_view(processed_data, new_view) def navigate_measure_view(processed_data, current_selector_value, direction): - """Navigate measure view (direction: -1 for previous, +1 for next)""" if processed_data is None or len(processed_data) == 0: return "View 1", None, [] - - # Parse current view number try: current_view = int(current_selector_value.split()[1]) - 1 except: current_view = 0 - num_views = len(processed_data) new_view = (current_view + direction) % num_views - - new_selector_value = f"View {new_view + 1}" measure_image, measure_points = update_measure_view(processed_data, new_view) - - return new_selector_value, measure_image, measure_points + return f"View {new_view + 1}", measure_image, measure_points def populate_visualization_tabs(processed_data): - """Populate the depth, normal, and measure tabs with processed data""" if processed_data is None or len(processed_data) == 0: return None, None, None, [] - - # Use update functions to ensure confidence filtering is applied from the start depth_vis = update_depth_view(processed_data, 0) normal_vis = update_normal_view(processed_data, 0) measure_img, _ = update_measure_view(processed_data, 0) - return depth_vis, normal_vis, measure_img, [] @@ -347,20 +223,14 @@ def populate_visualization_tabs(processed_data): # 2) Handle uploaded video/images --> produce target_dir + images # ------------------------------------------------------------------------- def handle_uploads(unified_upload, s_time_interval=1.0): - """ - Create a new 'target_dir' + 'images' subfolder, and place user-uploaded - images or extracted frames from video into it. Return (target_dir, image_paths). - """ start_time = time.time() gc.collect() torch.cuda.empty_cache() - # Create a unique folder name timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") target_dir = f"input_images_{timestamp}" target_dir_images = os.path.join(target_dir, "images") - # Clean up if somehow that folder already exists if os.path.exists(target_dir): shutil.rmtree(target_dir) os.makedirs(target_dir) @@ -368,7 +238,6 @@ def handle_uploads(unified_upload, s_time_interval=1.0): image_paths = [] - # --- Handle uploaded files (both images and videos) --- if unified_upload is not None: for file_data in unified_upload: if isinstance(file_data, dict) and "name" in file_data: @@ -377,145 +246,66 @@ def handle_uploads(unified_upload, s_time_interval=1.0): file_path = str(file_data) file_ext = os.path.splitext(file_path)[1].lower() + video_extensions = [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm", ".m4v", ".3gp"] - # Check if it's a video file - video_extensions = [ - ".mp4", - ".avi", - ".mov", - ".mkv", - ".wmv", - ".flv", - ".webm", - ".m4v", - ".3gp", - ] if file_ext in video_extensions: - # Handle as video vs = cv2.VideoCapture(file_path) fps = vs.get(cv2.CAP_PROP_FPS) - frame_interval = int(fps * s_time_interval) # frames per interval - - count = 0 - video_frame_num = 0 + frame_interval = int(fps * s_time_interval) + count, video_frame_num = 0, 0 while True: gotit, frame = vs.read() if not gotit: break count += 1 if count % frame_interval == 0: - # Use original filename as prefix for frames base_name = os.path.splitext(os.path.basename(file_path))[0] - image_path = os.path.join( - target_dir_images, f"{base_name}_{video_frame_num:06}.png" - ) + image_path = os.path.join(target_dir_images, f"{base_name}_{video_frame_num:06}.png") cv2.imwrite(image_path, frame) image_paths.append(image_path) video_frame_num += 1 vs.release() - print( - f"Extracted {video_frame_num} frames from video: {os.path.basename(file_path)}" - ) - + print(f"Extracted {video_frame_num} frames from video: {os.path.basename(file_path)}") else: - # Handle as image - # Check if the file is a HEIC image if file_ext in [".heic", ".heif"]: - # Convert HEIC to JPEG for better gallery compatibility try: with Image.open(file_path) as img: - # Convert to RGB if necessary (HEIC can have different color modes) if img.mode not in ("RGB", "L"): img = img.convert("RGB") - - # Create JPEG filename base_name = os.path.splitext(os.path.basename(file_path))[0] - dst_path = os.path.join( - target_dir_images, f"{base_name}.jpg" - ) - - # Save as JPEG with high quality + dst_path = os.path.join(target_dir_images, f"{base_name}.jpg") img.save(dst_path, "JPEG", quality=95) image_paths.append(dst_path) - print( - f"Converted HEIC to JPEG: {os.path.basename(file_path)} -> {os.path.basename(dst_path)}" - ) except Exception as e: print(f"Error converting HEIC file {file_path}: {e}") - # Fall back to copying as is - dst_path = os.path.join( - target_dir_images, os.path.basename(file_path) - ) + dst_path = os.path.join(target_dir_images, os.path.basename(file_path)) shutil.copy(file_path, dst_path) image_paths.append(dst_path) else: - # Regular image files - copy as is - dst_path = os.path.join( - target_dir_images, os.path.basename(file_path) - ) + dst_path = os.path.join(target_dir_images, os.path.basename(file_path)) shutil.copy(file_path, dst_path) image_paths.append(dst_path) - # Sort final images for gallery image_paths = sorted(image_paths) - end_time = time.time() - print( - f"Files processed to {target_dir_images}; took {end_time - start_time:.3f} seconds" - ) + print(f"Files processed to {target_dir_images}; took {end_time - start_time:.3f} seconds") return target_dir, image_paths # ------------------------------------------------------------------------- -# 3) Update gallery on upload -# ------------------------------------------------------------------------- -def update_gallery_on_upload(input_video, input_images, s_time_interval=1.0): - """ - Whenever user uploads or changes files, immediately handle them - and show in the gallery. Return (target_dir, image_paths). - If nothing is uploaded, returns "None" and empty list. - """ - if not input_video and not input_images: - return None, None, None, None - target_dir, image_paths = handle_uploads(input_video, input_images, s_time_interval) - return ( - None, - target_dir, - image_paths, - "Upload complete. Click 'Reconstruct' to begin 3D processing.", - ) - - -# ------------------------------------------------------------------------- -# 4) Reconstruction: uses the target_dir plus any viz parameters +# 3) Reconstruction # ------------------------------------------------------------------------- @spaces.GPU(duration=120) -def gradio_demo( - target_dir, - frame_filter="All", - show_cam=True, - filter_black_bg=False, - filter_white_bg=False, - apply_mask=True, - show_mesh=True, -): - """ - Perform reconstruction using the already-created target_dir/images. - """ +def gradio_demo(target_dir, frame_filter="All", show_cam=True, filter_black_bg=False, filter_white_bg=False, apply_mask=True, show_mesh=True): if not os.path.isdir(target_dir) or target_dir == "None": - return None, "No valid target directory found. Please upload first.", None, None + return None, "No valid target directory found. Please upload first.", None, None, None, None, None, "", None, None, None start_time = time.time() gc.collect() torch.cuda.empty_cache() - # Prepare frame_filter dropdown target_dir_images = os.path.join(target_dir, "images") - all_files = ( - sorted(os.listdir(target_dir_images)) - if os.path.isdir(target_dir_images) - else [] - ) + all_files = sorted(os.listdir(target_dir_images)) if os.path.isdir(target_dir_images) else [] all_files = [f"{i}: {filename}" for i, filename in enumerate(all_files)] frame_filter_choices = ["All"] + all_files @@ -523,484 +313,214 @@ def gradio_demo( with torch.no_grad(): predictions, processed_data = run_model(target_dir, apply_mask) - # Save predictions prediction_save_path = os.path.join(target_dir, "predictions.npz") np.savez(prediction_save_path, **predictions) - # Handle None frame_filter if frame_filter is None: frame_filter = "All" - # Build a GLB file name - glbfile = os.path.join( - target_dir, - f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb", - ) - - # Convert predictions to GLB - glbscene = predictions_to_glb( - predictions, - filter_by_frames=frame_filter, - show_cam=show_cam, - mask_black_bg=filter_black_bg, - mask_white_bg=filter_white_bg, - as_mesh=show_mesh, # Use the show_mesh parameter - ) + glbfile = os.path.join(target_dir, f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb") + glbscene = predictions_to_glb(predictions, filter_by_frames=frame_filter, show_cam=show_cam, mask_black_bg=filter_black_bg, mask_white_bg=filter_white_bg, as_mesh=show_mesh) glbscene.export(file_obj=glbfile) - # Cleanup del predictions gc.collect() torch.cuda.empty_cache() end_time = time.time() print(f"Total time: {end_time - start_time:.2f} seconds") - log_msg = ( - f"Reconstruction Success ({len(all_files)} frames). Waiting for visualization." - ) - - # Populate visualization tabs with processed data - depth_vis, normal_vis, measure_img, measure_pts = populate_visualization_tabs( - processed_data - ) + log_msg = f"Reconstruction Success ({len(all_files)} frames). Waiting for visualization." - # Update view selectors based on available views - depth_selector, normal_selector, measure_selector = update_view_selectors( - processed_data - ) + depth_vis, normal_vis, measure_img, measure_pts = populate_visualization_tabs(processed_data) + depth_selector, normal_selector, measure_selector = update_view_selectors(processed_data) - return ( - glbfile, - log_msg, - gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True), - processed_data, - depth_vis, - normal_vis, - measure_img, - "", # measure_text (empty initially) - depth_selector, - normal_selector, - measure_selector, - ) + return (glbfile, log_msg, gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True), processed_data, depth_vis, normal_vis, measure_img, "", depth_selector, normal_selector, measure_selector) # ------------------------------------------------------------------------- -# 5) Helper functions for UI resets + re-visualization +# 4) Helper functions # ------------------------------------------------------------------------- def colorize_depth(depth_map, mask=None): - """Convert depth map to colorized visualization with optional mask""" if depth_map is None: return None - - # Normalize depth to 0-1 range depth_normalized = depth_map.copy() valid_mask = depth_normalized > 0 - - # Apply additional mask if provided (for background filtering) if mask is not None: valid_mask = valid_mask & mask - if valid_mask.sum() > 0: valid_depths = depth_normalized[valid_mask] p5 = np.percentile(valid_depths, 5) p95 = np.percentile(valid_depths, 95) - depth_normalized[valid_mask] = (depth_normalized[valid_mask] - p5) / (p95 - p5) - - # Apply colormap import matplotlib.pyplot as plt - colormap = plt.cm.turbo_r colored = colormap(depth_normalized) colored = (colored[:, :, :3] * 255).astype(np.uint8) - - # Set invalid pixels to white colored[~valid_mask] = [255, 255, 255] - return colored def colorize_normal(normal_map, mask=None): - """Convert normal map to colorized visualization with optional mask""" if normal_map is None: return None - - # Create a copy for modification normal_vis = normal_map.copy() - - # Apply mask if provided (set masked areas to [0, 0, 0] which becomes grey after normalization) if mask is not None: invalid_mask = ~mask - normal_vis[invalid_mask] = [0, 0, 0] # Set invalid areas to zero - - # Normalize normals to [0, 1] range for visualization + normal_vis[invalid_mask] = [0, 0, 0] normal_vis = (normal_vis + 1.0) / 2.0 normal_vis = (normal_vis * 255).astype(np.uint8) - return normal_vis -def process_predictions_for_visualization( - predictions, views, high_level_config, filter_black_bg=False, filter_white_bg=False -): - """Extract depth, normal, and 3D points from predictions for visualization""" +def process_predictions_for_visualization(predictions, views, high_level_config, filter_black_bg=False, filter_white_bg=False): processed_data = {} - - # Process each view for view_idx, view in enumerate(views): - # Get image image = rgb(view["img"], norm_type=high_level_config["data_norm_type"]) - - # Get predicted points pred_pts3d = predictions["world_points"][view_idx] - - # Initialize data for this view - view_data = { - "image": image[0], - "points3d": pred_pts3d, - "depth": None, - "normal": None, - "mask": None, - } - - # Start with the final mask from predictions + view_data = {"image": image[0], "points3d": pred_pts3d, "depth": None, "normal": None, "mask": None} mask = predictions["final_mask"][view_idx].copy() - - # Apply black background filtering if enabled if filter_black_bg: - # Get the image colors (ensure they're in 0-255 range) view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0] - # Filter out black background pixels (sum of RGB < 16) black_bg_mask = view_colors.sum(axis=2) >= 16 mask = mask & black_bg_mask - - # Apply white background filtering if enabled if filter_white_bg: - # Get the image colors (ensure they're in 0-255 range) view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0] - # Filter out white background pixels (all RGB > 240) - white_bg_mask = ~( - (view_colors[:, :, 0] > 240) - & (view_colors[:, :, 1] > 240) - & (view_colors[:, :, 2] > 240) - ) + white_bg_mask = ~((view_colors[:, :, 0] > 240) & (view_colors[:, :, 1] > 240) & (view_colors[:, :, 2] > 240)) mask = mask & white_bg_mask - view_data["mask"] = mask view_data["depth"] = predictions["depth"][view_idx].squeeze() - normals, _ = points_to_normals(pred_pts3d, mask=view_data["mask"]) view_data["normal"] = normals - processed_data[view_idx] = view_data - return processed_data def reset_measure(processed_data): - """Reset measure points""" if processed_data is None or len(processed_data) == 0: return None, [], "" - - # Return the first view image first_view = list(processed_data.values())[0] return first_view["image"], [], "" -def measure( - processed_data, measure_points, current_view_selector, event: gr.SelectData -): - """Handle measurement on images""" +def measure(processed_data, measure_points, current_view_selector, event: gr.SelectData): try: - print(f"Measure function called with selector: {current_view_selector}") - if processed_data is None or len(processed_data) == 0: return None, [], "No data available" - - # Use the currently selected view instead of always using the first view try: current_view_index = int(current_view_selector.split()[1]) - 1 except: current_view_index = 0 - - print(f"Using view index: {current_view_index}") - - # Get view data safely if current_view_index < 0 or current_view_index >= len(processed_data): current_view_index = 0 - view_keys = list(processed_data.keys()) current_view = processed_data[view_keys[current_view_index]] - if current_view is None: return None, [], "No view data available" point2d = event.index[0], event.index[1] - print(f"Clicked point: {point2d}") - - # Check if the clicked point is in a masked area (prevent interaction) - if ( - current_view["mask"] is not None - and 0 <= point2d[1] < current_view["mask"].shape[0] - and 0 <= point2d[0] < current_view["mask"].shape[1] - ): - # Check if the point is in a masked (invalid) area + if current_view["mask"] is not None and 0 <= point2d[1] < current_view["mask"].shape[0] and 0 <= point2d[0] < current_view["mask"].shape[1]: if not current_view["mask"][point2d[1], point2d[0]]: - print(f"Clicked point {point2d} is in masked area, ignoring click") - # Always return image with mask overlay - masked_image, _ = update_measure_view( - processed_data, current_view_index - ) - return ( - masked_image, - measure_points, - 'Cannot measure on masked areas (shown in grey)', - ) + masked_image, _ = update_measure_view(processed_data, current_view_index) + return masked_image, measure_points, 'Cannot measure on masked areas (shown in grey)' measure_points.append(point2d) - - # Get image with mask overlay and ensure it's valid image, _ = update_measure_view(processed_data, current_view_index) if image is None: return None, [], "No image available" - image = image.copy() points3d = current_view["points3d"] - # Ensure image is in uint8 format for proper cv2 operations - try: - if image.dtype != np.uint8: - if image.max() <= 1.0: - # Image is in [0, 1] range, convert to [0, 255] - image = (image * 255).astype(np.uint8) - else: - # Image is already in [0, 255] range - image = image.astype(np.uint8) - except Exception as e: - print(f"Image conversion error: {e}") - return None, [], f"Image conversion error: {e}" + if image.dtype != np.uint8: + if image.max() <= 1.0: + image = (image * 255).astype(np.uint8) + else: + image = image.astype(np.uint8) - # Draw circles for points - try: - for p in measure_points: - if 0 <= p[0] < image.shape[1] and 0 <= p[1] < image.shape[0]: - image = cv2.circle( - image, p, radius=5, color=(255, 0, 0), thickness=2 - ) - except Exception as e: - print(f"Drawing error: {e}") - return None, [], f"Drawing error: {e}" + for p in measure_points: + if 0 <= p[0] < image.shape[1] and 0 <= p[1] < image.shape[0]: + image = cv2.circle(image, p, radius=5, color=(255, 0, 0), thickness=2) depth_text = "" - try: - for i, p in enumerate(measure_points): - if ( - current_view["depth"] is not None - and 0 <= p[1] < current_view["depth"].shape[0] - and 0 <= p[0] < current_view["depth"].shape[1] - ): - d = current_view["depth"][p[1], p[0]] - depth_text += f"- **P{i + 1} depth: {d:.2f}m.**\n" - else: - # Use Z coordinate of 3D points if depth not available - if ( - points3d is not None - and 0 <= p[1] < points3d.shape[0] - and 0 <= p[0] < points3d.shape[1] - ): - z = points3d[p[1], p[0], 2] - depth_text += f"- **P{i + 1} Z-coord: {z:.2f}m.**\n" - except Exception as e: - print(f"Depth text error: {e}") - depth_text = f"Error computing depth: {e}\n" + for i, p in enumerate(measure_points): + if current_view["depth"] is not None and 0 <= p[1] < current_view["depth"].shape[0] and 0 <= p[0] < current_view["depth"].shape[1]: + d = current_view["depth"][p[1], p[0]] + depth_text += f"- **P{i + 1} depth: {d:.2f}m.**\n" + elif points3d is not None and 0 <= p[1] < points3d.shape[0] and 0 <= p[0] < points3d.shape[1]: + z = points3d[p[1], p[0], 2] + depth_text += f"- **P{i + 1} Z-coord: {z:.2f}m.**\n" if len(measure_points) == 2: - try: - point1, point2 = measure_points - # Draw line - if ( - 0 <= point1[0] < image.shape[1] - and 0 <= point1[1] < image.shape[0] - and 0 <= point2[0] < image.shape[1] - and 0 <= point2[1] < image.shape[0] - ): - image = cv2.line( - image, point1, point2, color=(255, 0, 0), thickness=2 - ) - - # Compute 3D distance - distance_text = "- **Distance: Unable to compute**" - if ( - points3d is not None - and 0 <= point1[1] < points3d.shape[0] - and 0 <= point1[0] < points3d.shape[1] - and 0 <= point2[1] < points3d.shape[0] - and 0 <= point2[0] < points3d.shape[1] - ): - try: - p1_3d = points3d[point1[1], point1[0]] - p2_3d = points3d[point2[1], point2[0]] - distance = np.linalg.norm(p1_3d - p2_3d) - distance_text = f"- **Distance: {distance:.2f}m**" - except Exception as e: - print(f"Distance computation error: {e}") - distance_text = f"- **Distance computation error: {e}**" - - measure_points = [] - text = depth_text + distance_text - print(f"Measurement complete: {text}") - return [image, measure_points, text] - except Exception as e: - print(f"Final measurement error: {e}") - return None, [], f"Measurement error: {e}" + point1, point2 = measure_points + if 0 <= point1[0] < image.shape[1] and 0 <= point1[1] < image.shape[0] and 0 <= point2[0] < image.shape[1] and 0 <= point2[1] < image.shape[0]: + image = cv2.line(image, point1, point2, color=(255, 0, 0), thickness=2) + distance_text = "- **Distance: Unable to compute**" + if points3d is not None and 0 <= point1[1] < points3d.shape[0] and 0 <= point1[0] < points3d.shape[1] and 0 <= point2[1] < points3d.shape[0] and 0 <= point2[0] < points3d.shape[1]: + p1_3d = points3d[point1[1], point1[0]] + p2_3d = points3d[point2[1], point2[0]] + distance = np.linalg.norm(p1_3d - p2_3d) + distance_text = f"- **Distance: {distance:.2f}m**" + measure_points = [] + return [image, measure_points, depth_text + distance_text] else: - print(f"Single point measurement: {depth_text}") return [image, measure_points, depth_text] - except Exception as e: - print(f"Overall measure function error: {e}") - return None, [], f"Measure function error: {e}" + print(f"Measure error: {e}") + return None, [], f"Measure error: {e}" def clear_fields(): - """ - Clears the 3D viewer, the stored target_dir, and empties the gallery. - """ return None def update_log(): - """ - Display a quick log message while waiting. - """ return "Loading and Reconstructing..." -def update_visualization( - target_dir, - frame_filter, - show_cam, - is_example, - filter_black_bg=False, - filter_white_bg=False, - show_mesh=True, -): - """ - Reload saved predictions from npz, create (or reuse) the GLB for new parameters, - and return it for the 3D viewer. If is_example == "True", skip. - """ - - # If it's an example click, skip as requested +def update_visualization(target_dir, frame_filter, show_cam, is_example, filter_black_bg=False, filter_white_bg=False, show_mesh=True): if is_example == "True": - return ( - gr.update(), - "No reconstruction available. Please click the Reconstruct button first.", - ) - + return gr.update(), "No reconstruction available. Please click Reconstruct first." if not target_dir or target_dir == "None" or not os.path.isdir(target_dir): - return ( - gr.update(), - "No reconstruction available. Please click the Reconstruct button first.", - ) - + return gr.update(), "No reconstruction available. Please click Reconstruct first." predictions_path = os.path.join(target_dir, "predictions.npz") if not os.path.exists(predictions_path): - return ( - gr.update(), - f"No reconstruction available at {predictions_path}. Please run 'Reconstruct' first.", - ) - + return gr.update(), f"No reconstruction available. Please run Reconstruct first." loaded = np.load(predictions_path, allow_pickle=True) predictions = {key: loaded[key] for key in loaded.keys()} - - glbfile = os.path.join( - target_dir, - f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb", - ) - + glbfile = os.path.join(target_dir, f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb") if not os.path.exists(glbfile): - glbscene = predictions_to_glb( - predictions, - filter_by_frames=frame_filter, - show_cam=show_cam, - mask_black_bg=filter_black_bg, - mask_white_bg=filter_white_bg, - as_mesh=show_mesh, - ) + glbscene = predictions_to_glb(predictions, filter_by_frames=frame_filter, show_cam=show_cam, mask_black_bg=filter_black_bg, mask_white_bg=filter_white_bg, as_mesh=show_mesh) glbscene.export(file_obj=glbfile) - - return ( - glbfile, - "Visualization updated.", - ) + return glbfile, "Visualization updated." -def update_all_views_on_filter_change( - target_dir, - filter_black_bg, - filter_white_bg, - processed_data, - depth_view_selector, - normal_view_selector, - measure_view_selector, -): - """ - Update all individual view tabs when background filtering checkboxes change. - This regenerates the processed data with new filtering and updates all views. - """ - # Check if we have a valid target directory and predictions +def update_all_views_on_filter_change(target_dir, filter_black_bg, filter_white_bg, processed_data, depth_view_selector, normal_view_selector, measure_view_selector): if not target_dir or target_dir == "None" or not os.path.isdir(target_dir): return processed_data, None, None, None, [] - predictions_path = os.path.join(target_dir, "predictions.npz") if not os.path.exists(predictions_path): return processed_data, None, None, None, [] - try: - # Load the original predictions and views loaded = np.load(predictions_path, allow_pickle=True) predictions = {key: loaded[key] for key in loaded.keys()} - - # Load images using MapAnything's load_images function image_folder_path = os.path.join(target_dir, "images") views = load_images(image_folder_path) - - # Regenerate processed data with new filtering settings - new_processed_data = process_predictions_for_visualization( - predictions, views, high_level_config, filter_black_bg, filter_white_bg - ) - - # Get current view indices + new_processed_data = process_predictions_for_visualization(predictions, views, high_level_config, filter_black_bg, filter_white_bg) try: - depth_view_idx = ( - int(depth_view_selector.split()[1]) - 1 if depth_view_selector else 0 - ) + depth_view_idx = int(depth_view_selector.split()[1]) - 1 if depth_view_selector else 0 except: depth_view_idx = 0 - try: - normal_view_idx = ( - int(normal_view_selector.split()[1]) - 1 if normal_view_selector else 0 - ) + normal_view_idx = int(normal_view_selector.split()[1]) - 1 if normal_view_selector else 0 except: normal_view_idx = 0 - try: - measure_view_idx = ( - int(measure_view_selector.split()[1]) - 1 - if measure_view_selector - else 0 - ) + measure_view_idx = int(measure_view_selector.split()[1]) - 1 if measure_view_selector else 0 except: measure_view_idx = 0 - - # Update all views with new filtered data depth_vis = update_depth_view(new_processed_data, depth_view_idx) normal_vis = update_normal_view(new_processed_data, normal_view_idx) measure_img, _ = update_measure_view(new_processed_data, measure_view_idx) - return new_processed_data, depth_vis, normal_vis, measure_img, [] - except Exception as e: print(f"Error updating views on filter change: {e}") return processed_data, None, None, None, [] @@ -1010,665 +530,627 @@ def update_all_views_on_filter_change( # Example scene functions # ------------------------------------------------------------------------- def get_scene_info(examples_dir): - """Get information about scenes in the examples directory""" import glob - scenes = [] if not os.path.exists(examples_dir): return scenes - for scene_folder in sorted(os.listdir(examples_dir)): scene_path = os.path.join(examples_dir, scene_folder) if os.path.isdir(scene_path): - # Find all image files in the scene folder image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff", "*.tif"] image_files = [] for ext in image_extensions: image_files.extend(glob.glob(os.path.join(scene_path, ext))) image_files.extend(glob.glob(os.path.join(scene_path, ext.upper()))) - if image_files: - # Sort images and get the first one for thumbnail image_files = sorted(image_files) - first_image = image_files[0] - num_images = len(image_files) - - scenes.append( - { - "name": scene_folder, - "path": scene_path, - "thumbnail": first_image, - "num_images": num_images, - "image_files": image_files, - } - ) - + scenes.append({"name": scene_folder, "path": scene_path, "thumbnail": image_files[0], "num_images": len(image_files), "image_files": image_files}) return scenes def load_example_scene(scene_name, examples_dir="examples"): - """Load a scene from examples directory""" scenes = get_scene_info(examples_dir) - - # Find the selected scene selected_scene = None for scene in scenes: if scene["name"] == scene_name: selected_scene = scene break - if selected_scene is None: return None, None, None, "Scene not found" + file_objects = [image_path for image_path in selected_scene["image_files"]] + target_dir, image_paths = handle_uploads(file_objects, 1.0) + return (None, target_dir, image_paths, f"Loaded scene '{scene_name}' with {selected_scene['num_images']} images. Click Reconstruct to begin 3D processing.") - # Create file-like objects for the unified upload system - # Convert image file paths to the format expected by unified_upload - file_objects = [] - for image_path in selected_scene["image_files"]: - file_objects.append(image_path) - # Create target directory and copy images using the unified upload system - target_dir, image_paths = handle_uploads(file_objects, 1.0) +# ------------------------------------------------------------------------- +# CSS - Lime Theme +# ------------------------------------------------------------------------- +css = r""" +@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600&display=swap'); +*{box-sizing:border-box;margin:0;padding:0} +body,.gradio-container{background:#0a0a0a!important;font-family:'Inter',system-ui,sans-serif!important;font-size:14px!important;color:#e4e4e7!important;min-height:100vh} +.dark body,.dark .gradio-container{background:#0a0a0a!important;color:#e4e4e7!important} +footer{display:none!important} +.hidden-input{display:none!important;height:0!important;overflow:hidden!important;margin:0!important;padding:0!important} + +.app-shell{background:#111111;border:1px solid #1a2e1a;border-radius:16px;margin:12px auto;max-width:1440px;overflow:hidden;box-shadow:0 25px 50px -12px rgba(0,0,0,.6),0 0 0 1px rgba(0,255,0,.04)} +.app-header{background:linear-gradient(135deg,#111111 0%,#141a14 100%);border-bottom:1px solid #1a2e1a;padding:14px 24px;display:flex;align-items:center;justify-content:space-between} +.app-header-left{display:flex;align-items:center;gap:12px} +.app-logo{width:36px;height:36px;background:linear-gradient(135deg,#00cc00,#00ff00,#66ff66);border-radius:10px;display:flex;align-items:center;justify-content:center;font-size:18px;font-weight:800;color:#000;box-shadow:0 4px 12px rgba(0,255,0,.3)} +.app-title{font-size:18px;font-weight:700;background:linear-gradient(135deg,#e4e4e7,#a1a1aa);-webkit-background-clip:text;-webkit-text-fill-color:transparent;letter-spacing:-.3px} +.app-badge{font-size:11px;font-weight:600;padding:3px 10px;border-radius:20px;background:rgba(0,255,0,.1);color:#00ff00;border:1px solid rgba(0,255,0,.2);letter-spacing:.3px} + +.app-toolbar{background:#111111;border-bottom:1px solid #1a2e1a;padding:8px 16px;display:flex;gap:4px;align-items:center;flex-wrap:wrap} +.tb-sep{width:1px;height:28px;background:#1a2e1a;margin:0 8px} +.modern-tb-btn{display:inline-flex;align-items:center;justify-content:center;gap:6px;min-width:32px;height:34px;background:transparent;border:1px solid transparent;border-radius:8px;cursor:pointer;font-size:13px;font-weight:600;padding:0 12px;font-family:'Inter',sans-serif;color:#fff!important;transition:all .15s ease} +.modern-tb-btn:hover{background:rgba(0,255,0,.1);color:#fff!important;border-color:rgba(0,255,0,.25)} +.modern-tb-btn:active,.modern-tb-btn.active{background:rgba(0,255,0,.18);color:#fff!important;border-color:rgba(0,255,0,.4)} +.modern-tb-btn .tb-icon{font-size:15px;line-height:1;color:#fff!important} +.modern-tb-btn .tb-label{font-size:13px;color:#fff!important;font-weight:600} + +.app-main-row{display:flex;gap:0;flex:1;overflow:hidden} +.app-main-left{flex:1;display:flex;flex-direction:column;min-width:0;border-right:1px solid #1a2e1a} +.app-main-right{width:460px;display:flex;flex-direction:column;flex-shrink:0;background:#111111;overflow-y:auto;max-height:calc(100vh - 120px)} + +.upload-area-wrap{position:relative;background:#090909;min-height:480px;overflow:hidden;display:flex;align-items:center;justify-content:center} +.upload-prompt-modern{position:absolute;z-index:20} +.upload-click-area{display:flex;flex-direction:column;align-items:center;justify-content:center;cursor:pointer;padding:36px 44px;border:2px dashed #1a3a1a;border-radius:16px;background:rgba(0,255,0,.02);transition:all .2s ease} +.upload-click-area:hover{background:rgba(0,255,0,.06);border-color:#00ff00;transform:scale(1.03)} +.upload-click-area:active{background:rgba(0,255,0,.1);transform:scale(.98)} +.upload-click-area svg{width:80px;height:80px} +.upload-help-text{margin-top:12px;font-size:13px;color:#52525b;text-align:center} + +.gallery-section{border-top:1px solid #1a2e1a;background:#0d0d0d} +.gallery-section-title{padding:10px 16px;font-size:12px;font-weight:600;color:#52525b;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid #1a2e1a;display:flex;align-items:center;gap:8px} +.gallery-grid{display:flex;flex-wrap:wrap;gap:4px;padding:8px;max-height:160px;overflow-y:auto} +.gallery-thumb{width:80px;height:60px;object-fit:cover;border-radius:6px;border:1px solid #1a2e1a;cursor:pointer;transition:all .15s} +.gallery-thumb:hover{border-color:#00ff00;box-shadow:0 0 8px rgba(0,255,0,.2)} +.gallery-empty{padding:20px;text-align:center;color:#3f3f46;font-size:13px} +.gallery-grid::-webkit-scrollbar{width:6px} +.gallery-grid::-webkit-scrollbar-track{background:#0d0d0d} +.gallery-grid::-webkit-scrollbar-thumb{background:#1a2e1a;border-radius:3px} + +.panel-card{border-bottom:1px solid #1a2e1a} +.panel-card-title{padding:12px 20px;font-size:12px;font-weight:600;color:#52525b;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(26,46,26,.6)} +.panel-card-body{padding:16px 20px;display:flex;flex-direction:column;gap:10px} + +.settings-group{border:1px solid #1a2e1a;border-radius:10px;margin:12px 16px;padding:0;overflow:hidden} +.settings-group-title{font-size:12px;font-weight:600;color:#52525b;text-transform:uppercase;letter-spacing:.8px;padding:10px 16px;border-bottom:1px solid #1a2e1a;background:rgba(17,17,17,.5)} +.settings-group-body{padding:14px 16px;display:flex;flex-direction:column;gap:12px} + +.checkbox-row{display:flex;align-items:center;gap:8px;font-size:13px;cursor:default;color:#a1a1aa} +.checkbox-row input[type="checkbox"]{accent-color:#00ff00;width:16px;height:16px;cursor:pointer} +.checkbox-row label{color:#a1a1aa;font-size:13px;cursor:pointer} + +.slider-row{display:flex;align-items:center;gap:10px;min-height:28px} +.slider-row label,.slider-row .dim-label{font-size:13px;font-weight:500;color:#a1a1aa;min-width:72px;flex-shrink:0} +.slider-row input[type="range"]{flex:1;-webkit-appearance:none;appearance:none;height:6px;background:#1a2e1a;border-radius:3px;outline:none;min-width:0} +.slider-row input[type="range"]::-webkit-slider-thumb{-webkit-appearance:none;appearance:none;width:16px;height:16px;background:linear-gradient(135deg,#00cc00,#00ff00);border-radius:50%;cursor:pointer;box-shadow:0 2px 6px rgba(0,255,0,.35);transition:transform .15s} +.slider-row input[type="range"]::-webkit-slider-thumb:hover{transform:scale(1.2)} +.slider-row input[type="range"]::-moz-range-thumb{width:16px;height:16px;background:linear-gradient(135deg,#00cc00,#00ff00);border-radius:50%;cursor:pointer;border:none;box-shadow:0 2px 6px rgba(0,255,0,.35)} +.slider-row .slider-val{min-width:52px;text-align:right;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:500;padding:3px 8px;background:#090909;border:1px solid #1a2e1a;border-radius:6px;color:#a1a1aa;flex-shrink:0} + +.btn-run{display:flex;align-items:center;justify-content:center;gap:8px;width:100%;background:linear-gradient(135deg,#00aa00,#00dd00);border:none;border-radius:10px;padding:12px 24px;cursor:pointer;font-size:15px;font-weight:700;font-family:'Inter',sans-serif;color:#000;transition:all .2s ease;box-shadow:0 4px 16px rgba(0,255,0,.25),inset 0 1px 0 rgba(255,255,255,.15);letter-spacing:-.2px} +.btn-run:hover{background:linear-gradient(135deg,#00cc00,#00ff00);box-shadow:0 6px 24px rgba(0,255,0,.4),inset 0 1px 0 rgba(255,255,255,.2);transform:translateY(-1px)} +.btn-run:active{transform:translateY(0);box-shadow:0 2px 8px rgba(0,255,0,.25)} +.btn-run:disabled{opacity:.5;cursor:not-allowed;transform:none} + +.output-frame{border-bottom:1px solid #1a2e1a;display:flex;flex-direction:column;position:relative} +.output-frame .out-title{padding:10px 20px;font-size:13px;font-weight:700;color:#fff!important;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(26,46,26,.6);display:flex;align-items:center;justify-content:space-between} +.output-frame .out-body{flex:1;background:#090909;display:flex;align-items:center;justify-content:center;overflow:hidden;min-height:220px;position:relative} +.output-frame .out-placeholder{color:#3f3f46;font-size:13px;text-align:center;padding:20px} + +.modern-loader{display:none;position:absolute;top:0;left:0;right:0;bottom:0;background:rgba(9,9,9,.92);z-index:15;flex-direction:column;align-items:center;justify-content:center;gap:16px;backdrop-filter:blur(4px)} +.modern-loader.active{display:flex} +.modern-loader .loader-spinner{width:36px;height:36px;border:3px solid #1a2e1a;border-top-color:#00ff00;border-radius:50%;animation:spin .8s linear infinite} +@keyframes spin{to{transform:rotate(360deg)}} +.modern-loader .loader-text{font-size:13px;color:#a1a1aa;font-weight:500} +.loader-bar-track{width:200px;height:4px;background:#1a2e1a;border-radius:2px;overflow:hidden} +.loader-bar-fill{height:100%;background:linear-gradient(90deg,#00aa00,#00ff00,#00aa00);background-size:200% 100%;animation:shimmer 1.5s ease-in-out infinite;border-radius:2px} +@keyframes shimmer{0%{background-position:200% 0}100%{background-position:-200% 0}} + +.toast-notification{position:fixed;top:24px;left:50%;transform:translateX(-50%) translateY(-120%);z-index:9999;padding:10px 24px;border-radius:10px;font-family:'Inter',sans-serif;font-size:14px;font-weight:600;display:flex;align-items:center;gap:8px;box-shadow:0 8px 24px rgba(0,0,0,.5);transition:transform .35s cubic-bezier(.34,1.56,.64,1),opacity .35s ease;opacity:0;pointer-events:none} +.toast-notification.visible{transform:translateX(-50%) translateY(0);opacity:1;pointer-events:auto} +.toast-notification.error{background:linear-gradient(135deg,#dc2626,#b91c1c);color:#fff;border:1px solid rgba(255,255,255,.15)} +.toast-notification.warning{background:linear-gradient(135deg,#d97706,#b45309);color:#fff;border:1px solid rgba(255,255,255,.15)} +.toast-notification.info{background:linear-gradient(135deg,#00aa00,#008800);color:#fff;border:1px solid rgba(255,255,255,.15)} +.toast-notification .toast-icon{font-size:16px;line-height:1} +.toast-notification .toast-text{line-height:1.3} + +.tab-bar{display:flex;gap:0;border-bottom:1px solid #1a2e1a;background:#111111} +.tab-btn{padding:10px 20px;font-size:13px;font-weight:600;color:#71717a;background:transparent;border:none;border-bottom:2px solid transparent;cursor:pointer;transition:all .15s;font-family:'Inter',sans-serif} +.tab-btn:hover{color:#a1a1aa;background:rgba(0,255,0,.03)} +.tab-btn.active{color:#00ff00;border-bottom-color:#00ff00;background:rgba(0,255,0,.05)} +.tab-content{display:none} +.tab-content.active{display:block} + +.nav-row{display:flex;align-items:center;gap:8px;padding:8px 12px;border-bottom:1px solid #1a2e1a} +.nav-btn{background:rgba(0,255,0,.08);border:1px solid rgba(0,255,0,.2);border-radius:6px;padding:4px 14px;font-size:12px;font-weight:600;color:#00ff00;cursor:pointer;font-family:'Inter',sans-serif;transition:all .15s} +.nav-btn:hover{background:rgba(0,255,0,.15);border-color:rgba(0,255,0,.35)} +.view-select{flex:1;background:#090909;border:1px solid #1a2e1a;border-radius:6px;padding:4px 10px;font-size:12px;color:#a1a1aa;font-family:'Inter',sans-serif;outline:none} +.view-select:focus{border-color:#00ff00} + +.view-image-area{background:#090909;min-height:300px;display:flex;align-items:center;justify-content:center;overflow:hidden;position:relative} +.view-image-area img{max-width:100%;max-height:460px;image-rendering:auto} +.measure-info{padding:10px 16px;font-size:13px;color:#a1a1aa;border-top:1px solid #1a2e1a} +.measure-result{padding:8px 16px;font-size:13px;color:#00ff00;font-weight:500;border-top:1px solid #1a2e1a} + +.examples-section{border-top:1px solid #1a2e1a;background:#0d0d0d} +.examples-title{padding:14px 20px;font-size:14px;font-weight:700;color:#a1a1aa;border-bottom:1px solid #1a2e1a} +.examples-grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(140px,1fr));gap:8px;padding:12px 16px;max-height:280px;overflow-y:auto} +.example-card{border:1px solid #1a2e1a;border-radius:8px;overflow:hidden;cursor:pointer;transition:all .2s;background:#111111} +.example-card:hover{border-color:#00ff00;box-shadow:0 0 12px rgba(0,255,0,.15);transform:translateY(-2px)} +.example-card img{width:100%;height:90px;object-fit:cover;display:block} +.example-card-info{padding:6px 8px;font-size:11px;color:#71717a;font-weight:500;text-align:center;white-space:nowrap;overflow:hidden;text-overflow:ellipsis} +.examples-grid::-webkit-scrollbar{width:6px} +.examples-grid::-webkit-scrollbar-track{background:#0d0d0d} +.examples-grid::-webkit-scrollbar-thumb{background:#1a2e1a;border-radius:3px} + +.app-statusbar{background:#111111;border-top:1px solid #1a2e1a;padding:6px 20px;display:flex;gap:12px;height:34px;align-items:center;font-size:12px} +.app-statusbar .sb-section{padding:0 12px;flex:1;display:flex;align-items:center;font-family:'JetBrains Mono',monospace;font-size:12px;color:#52525b;overflow:hidden;white-space:nowrap} +.app-statusbar .sb-section.sb-fixed{flex:0 0 auto;min-width:90px;text-align:center;justify-content:center;padding:3px 12px;background:rgba(0,255,0,.06);border-radius:6px;color:#00ff00;font-weight:500} + +#gradio-run-btn{position:absolute;left:-9999px;top:-9999px;width:1px;height:1px;opacity:0.01;pointer-events:none;overflow:hidden} +::-webkit-scrollbar{width:8px;height:8px} +::-webkit-scrollbar-track{background:#090909} +::-webkit-scrollbar-thumb{background:#1a2e1a;border-radius:4px} +::-webkit-scrollbar-thumb:hover{background:#2a4a2a} +@media(max-width:840px){.app-main-row{flex-direction:column}.app-main-right{width:100%}.app-main-left{border-right:none;border-bottom:1px solid #1a2e1a}} +""" - return ( - None, # Clear reconstruction output - target_dir, # Set target directory - image_paths, # Set gallery - f"Loaded scene '{scene_name}' with {selected_scene['num_images']} images. Click 'Reconstruct' to begin 3D processing.", - ) +# ------------------------------------------------------------------------- +# JavaScript +# ------------------------------------------------------------------------- +app_js = r""" +() => { +function initApp() { + if (window.__mapInitDone) return; + const fileInput = document.getElementById('custom-file-input'); + const uploadPrompt = document.getElementById('upload-prompt'); + const uploadClickArea = document.getElementById('upload-click-area'); + const galleryGrid = document.getElementById('gallery-grid'); + const galleryEmpty = document.getElementById('gallery-empty'); + const statusText = document.getElementById('sb-status-text'); + const fileCountText = document.getElementById('sb-file-count'); + if (!fileInput || !uploadClickArea) { setTimeout(initApp, 250); return; } + window.__mapInitDone = true; + window.__uploadedFiles = []; + window.__hasUpload = false; + let toastTimer = null; + + function showToast(message, type) { + let toast = document.getElementById('app-toast'); + if (!toast) { + toast = document.createElement('div'); + toast.id = 'app-toast'; + toast.className = 'toast-notification'; + toast.innerHTML = ''; + document.body.appendChild(toast); + } + const icon = toast.querySelector('.toast-icon'); + const text = toast.querySelector('.toast-text'); + toast.className = 'toast-notification ' + (type || 'error'); + if (type === 'warning') icon.textContent = '\u26A0'; + else if (type === 'info') icon.textContent = '\u2713'; + else icon.textContent = '\u2717'; + text.textContent = message; + if (toastTimer) clearTimeout(toastTimer); + void toast.offsetWidth; + toast.classList.add('visible'); + toastTimer = setTimeout(() => toast.classList.remove('visible'), 3500); + } + window.__showToast = showToast; + + function showLoaders() { + document.querySelectorAll('.modern-loader').forEach(l => l.classList.add('active')); + if (statusText) statusText.textContent = 'Processing...'; + } + function hideLoaders() { + document.querySelectorAll('.modern-loader').forEach(l => l.classList.remove('active')); + if (statusText) statusText.textContent = 'Done'; + } + window.__showLoaders = showLoaders; + window.__hideLoaders = hideLoaders; + + function updateGallery(files) { + if (!galleryGrid) return; + galleryGrid.innerHTML = ''; + if (!files || files.length === 0) { + if (galleryEmpty) galleryEmpty.style.display = ''; + if (fileCountText) fileCountText.textContent = 'No files'; + return; + } + if (galleryEmpty) galleryEmpty.style.display = 'none'; + if (fileCountText) fileCountText.textContent = files.length + ' file' + (files.length > 1 ? 's' : '') + ' uploaded'; + files.forEach(f => { + if (!f.type || !f.type.startsWith('image/')) return; + const reader = new FileReader(); + reader.onload = (e) => { + const img = document.createElement('img'); + img.className = 'gallery-thumb'; + img.src = e.target.result; + img.title = f.name; + galleryGrid.appendChild(img); + }; + reader.readAsDataURL(f); + }); + } + + function setGradioUpload(files) { + const container = document.getElementById('gradio-file-upload'); + if (!container) return; + const gInput = container.querySelector('input[type="file"]'); + if (!gInput) return; + const dt = new DataTransfer(); + files.forEach(f => dt.items.add(f)); + gInput.files = dt.files; + gInput.dispatchEvent(new Event('change', {bubbles: true})); + } + + function processFiles(fileList) { + const arr = Array.from(fileList); + if (arr.length === 0) return; + window.__uploadedFiles = arr; + window.__hasUpload = true; + updateGallery(arr); + if (uploadPrompt) uploadPrompt.style.display = 'none'; + setGradioUpload(arr); + showToast(arr.length + ' file' + (arr.length > 1 ? 's' : '') + ' uploaded successfully', 'info'); + if (statusText) statusText.textContent = 'Ready'; + } + + uploadClickArea.addEventListener('click', () => fileInput.click()); + const changeImgBtn = document.getElementById('tb-change-img'); + if (changeImgBtn) changeImgBtn.addEventListener('click', () => fileInput.click()); + + fileInput.addEventListener('change', (e) => { + if (e.target.files.length) processFiles(e.target.files); + e.target.value = ''; + }); + + const uploadWrap = document.getElementById('upload-area-wrap'); + if (uploadWrap) { + uploadWrap.addEventListener('dragover', (e) => { e.preventDefault(); uploadWrap.style.outline = '2px solid #00ff00'; uploadWrap.style.outlineOffset = '-2px'; }); + uploadWrap.addEventListener('dragleave', (e) => { e.preventDefault(); uploadWrap.style.outline = ''; }); + uploadWrap.addEventListener('drop', (e) => { e.preventDefault(); uploadWrap.style.outline = ''; if (e.dataTransfer.files.length) processFiles(e.dataTransfer.files); }); + } + + const resetBtn = document.getElementById('tb-reset'); + if (resetBtn) { + resetBtn.addEventListener('click', () => { + window.__uploadedFiles = []; + window.__hasUpload = false; + updateGallery([]); + if (uploadPrompt) uploadPrompt.style.display = ''; + showToast('Workspace cleared', 'info'); + if (statusText) statusText.textContent = 'Ready'; + }); + } + + // Tab switching + document.querySelectorAll('.tab-btn').forEach(btn => { + btn.addEventListener('click', () => { + const group = btn.closest('.app-main-left') || btn.closest('.app-main-right') || document; + const tabBar = btn.parentElement; + tabBar.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active')); + btn.classList.add('active'); + const target = btn.getAttribute('data-tab'); + const container = tabBar.nextElementSibling || tabBar.parentElement; + document.querySelectorAll('.tab-content').forEach(tc => { + if (tc.getAttribute('data-tab-id') === target) tc.classList.add('active'); + else if (tabBar.parentElement.contains(tc)) tc.classList.remove('active'); + }); + }); + }); + + // Custom slider sync + function syncSlider(customId, gradioId) { + const slider = document.getElementById(customId); + const valSpan = document.getElementById(customId + '-val'); + if (!slider) return; + slider.addEventListener('input', () => { + if (valSpan) valSpan.textContent = parseFloat(slider.value).toFixed(slider.step && slider.step.includes('.') ? 1 : 0); + const container = document.getElementById(gradioId); + if (!container) return; + container.querySelectorAll('input[type="range"],input[type="number"]').forEach(el => { + const ns = Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, 'value'); + if (ns && ns.set) { ns.set.call(el, slider.value); el.dispatchEvent(new Event('input', {bubbles:true})); el.dispatchEvent(new Event('change', {bubbles:true})); } + }); + }); + } + syncSlider('custom-interval', 'gradio-interval'); + + // Custom checkbox sync + function syncCheckbox(customId, gradioId) { + const cb = document.getElementById(customId); + if (!cb) return; + cb.addEventListener('change', () => { + const container = document.getElementById(gradioId); + if (!container) return; + const gcb = container.querySelector('input[type="checkbox"]'); + if (gcb && gcb.checked !== cb.checked) gcb.click(); + }); + } + syncCheckbox('custom-show-cam', 'gradio-show-cam'); + syncCheckbox('custom-show-mesh', 'gradio-show-mesh'); + syncCheckbox('custom-filter-black', 'gradio-filter-black'); + syncCheckbox('custom-filter-white', 'gradio-filter-white'); + syncCheckbox('custom-apply-mask', 'gradio-apply-mask'); + + // Run button with validation + const customRunBtn = document.getElementById('custom-run-btn'); + if (customRunBtn) { + customRunBtn.addEventListener('click', () => { + if (!window.__hasUpload || window.__uploadedFiles.length === 0) { + showToast('Please upload images or a video first', 'warning'); + return; + } + showLoaders(); + if (statusText) statusText.textContent = 'Reconstructing...'; + setTimeout(() => { + const gradioBtn = document.getElementById('gradio-run-btn'); + if (gradioBtn) { + const btn = gradioBtn.querySelector('button') || gradioBtn; + btn.click(); + } + }, 200); + }); + } + + // Watch for Gradio outputs + function watchOutputs() { + const model3d = document.getElementById('gradio-3d-output'); + if (!model3d) { setTimeout(watchOutputs, 500); return; } + const obs = new MutationObserver(() => { + hideLoaders(); + if (statusText) statusText.textContent = 'Done'; + }); + obs.observe(model3d, {childList:true, subtree:true, attributes:true}); + } + watchOutputs(); + + // Example scene click handler + document.addEventListener('click', (e) => { + const card = e.target.closest('.example-card'); + if (!card) return; + const sceneName = card.getAttribute('data-scene'); + if (!sceneName) return; + window.__hasUpload = true; + if (uploadPrompt) uploadPrompt.style.display = 'none'; + showToast('Loading scene: ' + sceneName, 'info'); + if (statusText) statusText.textContent = 'Loading scene...'; + // Trigger the hidden Gradio button for loading this scene + const sceneInput = document.getElementById('gradio-scene-input'); + if (sceneInput) { + const ta = sceneInput.querySelector('textarea, input'); + if (ta) { + const ns = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value') || Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, 'value'); + if (ns && ns.set) { ns.set.call(ta, sceneName); ta.dispatchEvent(new Event('input', {bubbles:true})); ta.dispatchEvent(new Event('change', {bubbles:true})); } + } + } + setTimeout(() => { + const sceneBtn = document.getElementById('gradio-scene-btn'); + if (sceneBtn) { const b = sceneBtn.querySelector('button') || sceneBtn; b.click(); } + }, 200); + }); + + if (statusText) statusText.textContent = 'Ready'; +} +initApp(); +} +""" # ------------------------------------------------------------------------- -# 6) Build Gradio UI +# Build HTML # ------------------------------------------------------------------------- -theme = get_gradio_theme() +def build_main_html(): + scenes = get_scene_info("examples") + scene_cards = "" + for s in scenes[-12:]: + thumb = s["thumbnail"].replace("\\", "/") + scene_cards += f'
{s[
{s["name"]} ({s["num_images"]})
' + + return f""" +
+
+
+ + MapAnything V1 + 3D Reconstruction +
+
+ +
+ +
+ +
+ +
+
+
+
+
+ + + + + +
Click to upload images or video
or drag and drop files here
+
+
+ +
+ + + +
+
Example Scenes
+
{scene_cards}
+
+
+ +
+
+ +
+ +
+
3D Visualization
+
+
+
+
Running reconstruction...
+
+
+
3D output will appear here after reconstruction
+
+
+ +
+
Settings
+
+
+ + + 1.0 +
+
+
+
+
+
+
+
+
+
+ +
+
No files
+
Ready
+
+
+ """ + -with gr.Blocks() as demo: - # State variables for the tabbed interface +# ------------------------------------------------------------------------- +# Gradio UI +# ------------------------------------------------------------------------- +with gr.Blocks(css=css) as demo: is_example = gr.Textbox(label="is_example", visible=False, value="None") - num_images = gr.Textbox(label="num_images", visible=False, value="None") processed_data_state = gr.State(value=None) measure_points_state = gr.State(value=[]) - current_view_index = gr.State(value=0) # Track current view index for navigation - target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None") - with gr.Row(): - with gr.Column(scale=2): - # Unified upload component for both videos and images - unified_upload = gr.File( - file_count="multiple", - label="Upload Video or Images", - interactive=True, - file_types=["image", "video"], - ) - with gr.Row(): - s_time_interval = gr.Slider( - minimum=0.1, - maximum=5.0, - value=1.0, - step=0.1, - label="Video sample time interval (take a sample every x sec.)", - interactive=True, - visible=True, - scale=3, - ) - resample_btn = gr.Button( - "Resample Video", - visible=False, - variant="secondary", - scale=1, - ) - - image_gallery = gr.Gallery( - label="Preview", - columns=4, - height="300px", - object_fit="contain", - preview=True, - ) - - clear_uploads_btn = gr.ClearButton( - [unified_upload, image_gallery], - value="Clear Uploads", - variant="secondary", - size="sm", - ) - - with gr.Column(scale=4): - with gr.Column(): - gr.Markdown( - "**Metric 3D Reconstruction (Point Cloud and Camera Poses)**" - ) - log_output = gr.Markdown( - "Please upload a video or images, then click Reconstruct.", - elem_classes=["custom-log"], - ) - - # Add tabbed interface similar to MoGe - with gr.Tabs(): - with gr.Tab("3D View"): - reconstruction_output = gr.Model3D( - height=520, - zoom_speed=0.5, - pan_speed=0.5, - clear_color=[0.0, 0.0, 0.0, 0.0], - key="persistent_3d_viewer", - elem_id="reconstruction_3d_viewer", - ) - with gr.Tab("Depth"): - with gr.Row(elem_classes=["navigation-row"]): - prev_depth_btn = gr.Button("◀ Previous", size="sm", scale=1) - depth_view_selector = gr.Dropdown( - choices=["View 1"], - value="View 1", - label="Select View", - scale=2, - interactive=True, - allow_custom_value=True, - ) - next_depth_btn = gr.Button("Next ▶", size="sm", scale=1) - depth_map = gr.Image( - type="numpy", - label="Colorized Depth Map", - format="png", - interactive=False, - ) - with gr.Tab("Normal"): - with gr.Row(elem_classes=["navigation-row"]): - prev_normal_btn = gr.Button( - "◀ Previous", size="sm", scale=1 - ) - normal_view_selector = gr.Dropdown( - choices=["View 1"], - value="View 1", - label="Select View", - scale=2, - interactive=True, - allow_custom_value=True, - ) - next_normal_btn = gr.Button("Next ▶", size="sm", scale=1) - normal_map = gr.Image( - type="numpy", - label="Normal Map", - format="png", - interactive=False, - ) - with gr.Tab("Measure"): - gr.Markdown(MEASURE_INSTRUCTIONS_HTML) - with gr.Row(elem_classes=["navigation-row"]): - prev_measure_btn = gr.Button( - "◀ Previous", size="sm", scale=1 - ) - measure_view_selector = gr.Dropdown( - choices=["View 1"], - value="View 1", - label="Select View", - scale=2, - interactive=True, - allow_custom_value=True, - ) - next_measure_btn = gr.Button("Next ▶", size="sm", scale=1) - measure_image = gr.Image( - type="numpy", - show_label=False, - format="webp", - interactive=False, - sources=[], - ) - gr.Markdown( - "**Note:** Light-grey areas indicate regions with no depth information where measurements cannot be taken." - ) - measure_text = gr.Markdown("") - - with gr.Row(): - submit_btn = gr.Button("Reconstruct", scale=1, variant="primary") - clear_btn = gr.ClearButton( - [ - unified_upload, - reconstruction_output, - log_output, - target_dir_output, - image_gallery, - ], - scale=1, - ) - - with gr.Row(): - frame_filter = gr.Dropdown( - choices=["All"], value="All", label="Show Points from Frame" - ) - with gr.Column(): - gr.Markdown("### Pointcloud Options: (live updates)") - show_cam = gr.Checkbox(label="Show Camera", value=True) - show_mesh = gr.Checkbox(label="Show Mesh", value=True) - filter_black_bg = gr.Checkbox( - label="Filter Black Background", value=False - ) - filter_white_bg = gr.Checkbox( - label="Filter White Background", value=False - ) - gr.Markdown("### Reconstruction Options: (updated on next run)") - apply_mask_checkbox = gr.Checkbox( - label="Apply mask for predicted ambiguous depth classes & edges", - value=True, - ) - # ---------------------- Example Scenes Section ---------------------- - gr.Markdown("## Example Scenes (lists all scenes in the examples folder)") - gr.Markdown("Click any thumbnail to load the scene for reconstruction.") - - # Get scene information - scenes = get_scene_info("examples") + unified_upload = gr.File(file_count="multiple", elem_id="gradio-file-upload", elem_classes="hidden-input", visible=False, file_types=["image", "video"]) + s_time_interval = gr.Slider(minimum=0.1, maximum=5.0, value=1.0, step=0.1, elem_id="gradio-interval", elem_classes="hidden-input", visible=False) + show_cam = gr.Checkbox(label="Show Camera", value=True, elem_id="gradio-show-cam", elem_classes="hidden-input", visible=False) + show_mesh = gr.Checkbox(label="Show Mesh", value=True, elem_id="gradio-show-mesh", elem_classes="hidden-input", visible=False) + filter_black_bg = gr.Checkbox(label="Filter Black BG", value=False, elem_id="gradio-filter-black", elem_classes="hidden-input", visible=False) + filter_white_bg = gr.Checkbox(label="Filter White BG", value=False, elem_id="gradio-filter-white", elem_classes="hidden-input", visible=False) + apply_mask_checkbox = gr.Checkbox(label="Apply Mask", value=True, elem_id="gradio-apply-mask", elem_classes="hidden-input", visible=False) + frame_filter = gr.Dropdown(choices=["All"], value="All", elem_classes="hidden-input", visible=False) - # Create thumbnail grid (4 columns, N rows) - if scenes: - for i in range(0, len(scenes), 4): # Process 4 scenes per row - with gr.Row(): - for j in range(4): - scene_idx = i + j - if scene_idx < len(scenes): - scene = scenes[scene_idx] - with gr.Column(scale=1, elem_classes=["clickable-thumbnail"]): - # Clickable thumbnail - scene_img = gr.Image( - value=scene["thumbnail"], - height=150, - interactive=False, - show_label=False, - elem_id=f"scene_thumb_{scene['name']}", - sources=[], - ) - - # Scene name and image count as text below thumbnail - gr.Markdown( - f"**{scene['name']}** \n {scene['num_images']} images", - elem_classes=["scene-info"], - ) - - # Connect thumbnail click to load scene - scene_img.select( - fn=lambda name=scene["name"]: load_example_scene(name), - outputs=[ - reconstruction_output, - target_dir_output, - image_gallery, - log_output, - ], - ) - else: - # Empty column to maintain grid structure - with gr.Column(scale=1): - pass - - # ------------------------------------------------------------------------- - # "Reconstruct" button logic: - # - Clear fields - # - Update log - # - gradio_demo(...) with the existing target_dir - # - Then set is_example = "False" - # ------------------------------------------------------------------------- - submit_btn.click(fn=clear_fields, inputs=[], outputs=[reconstruction_output]).then( - fn=update_log, inputs=[], outputs=[log_output] - ).then( - fn=gradio_demo, - inputs=[ - target_dir_output, - frame_filter, - show_cam, - filter_black_bg, - filter_white_bg, - apply_mask_checkbox, - show_mesh, - ], - outputs=[ - reconstruction_output, - log_output, - frame_filter, - processed_data_state, - depth_map, - normal_map, - measure_image, - measure_text, - depth_view_selector, - normal_view_selector, - measure_view_selector, - ], - ).then( - fn=lambda: "False", - inputs=[], - outputs=[is_example], # set is_example to "False" - ) + scene_name_input = gr.Textbox(value="", elem_id="gradio-scene-input", elem_classes="hidden-input", visible=False) - # ------------------------------------------------------------------------- - # Real-time Visualization Updates - # ------------------------------------------------------------------------- - frame_filter.change( - update_visualization, - [ - target_dir_output, - frame_filter, - show_cam, - is_example, - filter_black_bg, - filter_white_bg, - show_mesh, - ], - [reconstruction_output, log_output], - ) - show_cam.change( - update_visualization, - [ - target_dir_output, - frame_filter, - show_cam, - is_example, - ], - [reconstruction_output, log_output], - ) - filter_black_bg.change( - update_visualization, - [ - target_dir_output, - frame_filter, - show_cam, - is_example, - filter_black_bg, - filter_white_bg, - ], - [reconstruction_output, log_output], - ).then( - fn=update_all_views_on_filter_change, - inputs=[ - target_dir_output, - filter_black_bg, - filter_white_bg, - processed_data_state, - depth_view_selector, - normal_view_selector, - measure_view_selector, - ], - outputs=[ - processed_data_state, - depth_map, - normal_map, - measure_image, - measure_points_state, - ], - ) - filter_white_bg.change( - update_visualization, - [ - target_dir_output, - frame_filter, - show_cam, - is_example, - filter_black_bg, - filter_white_bg, - show_mesh, - ], - [reconstruction_output, log_output], - ).then( - fn=update_all_views_on_filter_change, - inputs=[ - target_dir_output, - filter_black_bg, - filter_white_bg, - processed_data_state, - depth_view_selector, - normal_view_selector, - measure_view_selector, - ], - outputs=[ - processed_data_state, - depth_map, - normal_map, - measure_image, - measure_points_state, - ], - ) + reconstruction_output = gr.Model3D(elem_id="gradio-3d-output", elem_classes="hidden-input", visible=False, height=520, zoom_speed=0.5, pan_speed=0.5, clear_color=[0.0, 0.0, 0.0, 0.0]) + log_output = gr.Markdown(visible=False) + image_gallery = gr.Gallery(visible=False) - show_mesh.change( - update_visualization, - [ - target_dir_output, - frame_filter, - show_cam, - is_example, - filter_black_bg, - filter_white_bg, - show_mesh, - ], - [reconstruction_output, log_output], - ) + depth_view_selector = gr.Dropdown(choices=["View 1"], value="View 1", visible=False, allow_custom_value=True) + normal_view_selector = gr.Dropdown(choices=["View 1"], value="View 1", visible=False, allow_custom_value=True) + measure_view_selector = gr.Dropdown(choices=["View 1"], value="View 1", visible=False, allow_custom_value=True) + depth_map = gr.Image(type="numpy", visible=False, format="png") + normal_map = gr.Image(type="numpy", visible=False, format="png") + measure_image = gr.Image(type="numpy", visible=False, format="webp", sources=[]) + measure_text = gr.Markdown(visible=False) - # ------------------------------------------------------------------------- - # Auto-update gallery whenever user uploads or changes their files - # ------------------------------------------------------------------------- - def update_gallery_on_unified_upload(files, interval): - if not files: - return None, None, None - target_dir, image_paths = handle_uploads(files, interval) - return ( - target_dir, - image_paths, - "Upload complete. Click 'Reconstruct' to begin 3D processing.", - ) - - def show_resample_button(files): - """Show the resample button only if there are uploaded files containing videos""" - if not files: - return gr.update(visible=False) - - # Check if any uploaded files are videos - video_extensions = [ - ".mp4", - ".avi", - ".mov", - ".mkv", - ".wmv", - ".flv", - ".webm", - ".m4v", - ".3gp", - ] - has_video = False - - for file_data in files: - if isinstance(file_data, dict) and "name" in file_data: - file_path = file_data["name"] - else: - file_path = str(file_data) + gr.HTML(build_main_html()) - file_ext = os.path.splitext(file_path)[1].lower() - if file_ext in video_extensions: - has_video = True - break + run_btn = gr.Button("Run", elem_id="gradio-run-btn") + scene_btn = gr.Button("Load Scene", elem_id="gradio-scene-btn", visible=False) - return gr.update(visible=has_video) + demo.load(fn=None, js=app_js) - def hide_resample_button(): - """Hide the resample button after use""" - return gr.update(visible=False) - - def resample_video_with_new_interval(files, new_interval, current_target_dir): - """Resample video with new slider value""" + # Upload handling + def update_gallery_on_unified_upload(files, interval): if not files: - return ( - current_target_dir, - None, - "No files to resample.", - gr.update(visible=False), - ) - - # Check if we have videos to resample - video_extensions = [ - ".mp4", - ".avi", - ".mov", - ".mkv", - ".wmv", - ".flv", - ".webm", - ".m4v", - ".3gp", - ] - has_video = any( - os.path.splitext( - str(file_data["name"] if isinstance(file_data, dict) else file_data) - )[1].lower() - in video_extensions - for file_data in files - ) - - if not has_video: - return ( - current_target_dir, - None, - "No videos found to resample.", - gr.update(visible=False), - ) - - # Clean up old target directory if it exists - if ( - current_target_dir - and current_target_dir != "None" - and os.path.exists(current_target_dir) - ): - shutil.rmtree(current_target_dir) - - # Process files with new interval - target_dir, image_paths = handle_uploads(files, new_interval) - - return ( - target_dir, - image_paths, - f"Video resampled with {new_interval}s interval. Click 'Reconstruct' to begin 3D processing.", - gr.update(visible=False), - ) + return None, None, None + target_dir, image_paths = handle_uploads(files, interval) + return target_dir, image_paths, "Upload complete. Click Reconstruct to begin 3D processing." unified_upload.change( fn=update_gallery_on_unified_upload, inputs=[unified_upload, s_time_interval], outputs=[target_dir_output, image_gallery, log_output], - ).then( - fn=show_resample_button, - inputs=[unified_upload], - outputs=[resample_btn], ) - # Show resample button when slider changes (only if files are uploaded) - s_time_interval.change( - fn=show_resample_button, - inputs=[unified_upload], - outputs=[resample_btn], + # Main reconstruct + run_btn.click(fn=clear_fields, inputs=[], outputs=[reconstruction_output]).then( + fn=update_log, inputs=[], outputs=[log_output] + ).then( + fn=gradio_demo, + inputs=[target_dir_output, frame_filter, show_cam, filter_black_bg, filter_white_bg, apply_mask_checkbox, show_mesh], + outputs=[reconstruction_output, log_output, frame_filter, processed_data_state, depth_map, normal_map, measure_image, measure_text, depth_view_selector, normal_view_selector, measure_view_selector], + ).then(fn=lambda: "False", inputs=[], outputs=[is_example]) + + # Scene loading + def load_scene_handler(scene_name): + if not scene_name or scene_name.strip() == "": + return None, None, None, "No scene selected" + return load_example_scene(scene_name) + + scene_btn.click( + fn=load_scene_handler, + inputs=[scene_name_input], + outputs=[reconstruction_output, target_dir_output, image_gallery, log_output], ) - # Handle resample button click - resample_btn.click( - fn=resample_video_with_new_interval, - inputs=[unified_upload, s_time_interval, target_dir_output], - outputs=[target_dir_output, image_gallery, log_output, resample_btn], - ) + # Visualization updates + frame_filter.change(update_visualization, [target_dir_output, frame_filter, show_cam, is_example, filter_black_bg, filter_white_bg, show_mesh], [reconstruction_output, log_output]) + show_cam.change(update_visualization, [target_dir_output, frame_filter, show_cam, is_example], [reconstruction_output, log_output]) + show_mesh.change(update_visualization, [target_dir_output, frame_filter, show_cam, is_example, filter_black_bg, filter_white_bg, show_mesh], [reconstruction_output, log_output]) - # ------------------------------------------------------------------------- - # Measure tab functionality - # ------------------------------------------------------------------------- - measure_image.select( - fn=measure, - inputs=[processed_data_state, measure_points_state, measure_view_selector], - outputs=[measure_image, measure_points_state, measure_text], + filter_black_bg.change( + update_visualization, [target_dir_output, frame_filter, show_cam, is_example, filter_black_bg, filter_white_bg], [reconstruction_output, log_output] + ).then( + fn=update_all_views_on_filter_change, + inputs=[target_dir_output, filter_black_bg, filter_white_bg, processed_data_state, depth_view_selector, normal_view_selector, measure_view_selector], + outputs=[processed_data_state, depth_map, normal_map, measure_image, measure_points_state], ) - # ------------------------------------------------------------------------- - # Navigation functionality for Depth, Normal, and Measure tabs - # ------------------------------------------------------------------------- - - # Depth tab navigation - prev_depth_btn.click( - fn=lambda processed_data, current_selector: navigate_depth_view( - processed_data, current_selector, -1 - ), - inputs=[processed_data_state, depth_view_selector], - outputs=[depth_view_selector, depth_map], + filter_white_bg.change( + update_visualization, [target_dir_output, frame_filter, show_cam, is_example, filter_black_bg, filter_white_bg, show_mesh], [reconstruction_output, log_output] + ).then( + fn=update_all_views_on_filter_change, + inputs=[target_dir_output, filter_black_bg, filter_white_bg, processed_data_state, depth_view_selector, normal_view_selector, measure_view_selector], + outputs=[processed_data_state, depth_map, normal_map, measure_image, measure_points_state], ) - next_depth_btn.click( - fn=lambda processed_data, current_selector: navigate_depth_view( - processed_data, current_selector, 1 - ), - inputs=[processed_data_state, depth_view_selector], - outputs=[depth_view_selector, depth_map], - ) + # Measure + measure_image.select(fn=measure, inputs=[processed_data_state, measure_points_state, measure_view_selector], outputs=[measure_image, measure_points_state, measure_text]) + # View navigation depth_view_selector.change( - fn=lambda processed_data, selector_value: ( - update_depth_view( - processed_data, - int(selector_value.split()[1]) - 1, - ) - if selector_value - else None - ), - inputs=[processed_data_state, depth_view_selector], - outputs=[depth_map], - ) - - # Normal tab navigation - prev_normal_btn.click( - fn=lambda processed_data, current_selector: navigate_normal_view( - processed_data, current_selector, -1 - ), - inputs=[processed_data_state, normal_view_selector], - outputs=[normal_view_selector, normal_map], + fn=lambda pd, sv: update_depth_view(pd, int(sv.split()[1]) - 1) if sv else None, + inputs=[processed_data_state, depth_view_selector], outputs=[depth_map], ) - - next_normal_btn.click( - fn=lambda processed_data, current_selector: navigate_normal_view( - processed_data, current_selector, 1 - ), - inputs=[processed_data_state, normal_view_selector], - outputs=[normal_view_selector, normal_map], - ) - normal_view_selector.change( - fn=lambda processed_data, selector_value: ( - update_normal_view( - processed_data, - int(selector_value.split()[1]) - 1, - ) - if selector_value - else None - ), - inputs=[processed_data_state, normal_view_selector], - outputs=[normal_map], + fn=lambda pd, sv: update_normal_view(pd, int(sv.split()[1]) - 1) if sv else None, + inputs=[processed_data_state, normal_view_selector], outputs=[normal_map], ) - - # Measure tab navigation - prev_measure_btn.click( - fn=lambda processed_data, current_selector: navigate_measure_view( - processed_data, current_selector, -1 - ), - inputs=[processed_data_state, measure_view_selector], - outputs=[measure_view_selector, measure_image, measure_points_state], - ) - - next_measure_btn.click( - fn=lambda processed_data, current_selector: navigate_measure_view( - processed_data, current_selector, 1 - ), - inputs=[processed_data_state, measure_view_selector], - outputs=[measure_view_selector, measure_image, measure_points_state], - ) - measure_view_selector.change( - fn=lambda processed_data, selector_value: ( - update_measure_view(processed_data, int(selector_value.split()[1]) - 1) - if selector_value - else (None, []) - ), - inputs=[processed_data_state, measure_view_selector], - outputs=[measure_image, measure_points_state], + fn=lambda pd, sv: update_measure_view(pd, int(sv.split()[1]) - 1) if sv else (None, []), + inputs=[processed_data_state, measure_view_selector], outputs=[measure_image, measure_points_state], ) - - demo.queue(max_size=20).launch(theme=theme, css=GRADIO_CSS, show_error=True, share=True, ssr_mode=False) \ No newline at end of file +demo.queue(max_size=20).launch(css=css, show_error=True, share=True, ssr_mode=False, allowed_paths=["examples"]) \ No newline at end of file