prithivMLmods commited on
Commit
97176ea
ยท
verified ยท
1 Parent(s): 082015a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +676 -379
app.py CHANGED
@@ -16,12 +16,12 @@ import torch
16
  from PIL import Image
17
  from pillow_heif import register_heif_opener
18
 
 
19
  import rerun as rr
20
  try:
21
  import rerun.blueprint as rrb
22
  except ImportError:
23
  rrb = None
24
-
25
  from gradio_rerun import Rerun
26
 
27
  register_heif_opener()
@@ -44,7 +44,7 @@ from mapanything.utils.image import load_images, rgb
44
  # MapAnything Configuration
45
  high_level_config = {
46
  "path": "configs/train.yaml",
47
- "hf_model_name": "facebook/map-anything-v1",
48
  "model_str": "mapanything",
49
  "config_overrides": [
50
  "machine=aws",
@@ -61,41 +61,37 @@ high_level_config = {
61
  "resolution": 518,
62
  }
63
 
 
64
  model = None
65
 
66
- TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
67
- os.makedirs(TMP_DIR, exist_ok=True)
68
-
69
 
70
  # -------------------------------------------------------------------------
71
- # Rerun visualization helper
72
  # -------------------------------------------------------------------------
73
- def predictions_to_rrd(predictions, glbfile, target_dir, frame_filter="All", show_cam=True):
74
  """
75
- Log the 3D reconstruction into a Rerun .rrd file and return the path.
76
- Logs the GLB model + camera poses as Rerun entities.
77
  """
78
  run_id = str(uuid.uuid4())
79
- timestamp = datetime.now().strftime("%Y-%m-%dT%H%M%S")
80
- rrd_path = os.path.join(target_dir, f"mapanything_{timestamp}.rrd")
81
-
82
- # Initialize recording
83
  rec = None
84
  if hasattr(rr, "new_recording"):
85
- rec = rr.new_recording(application_id="MapAnything-3D-Viewer", recording_id=run_id)
86
  elif hasattr(rr, "RecordingStream"):
87
- rec = rr.RecordingStream(application_id="MapAnything-3D-Viewer", recording_id=run_id)
88
  else:
89
- rr.init("MapAnything-3D-Viewer", recording_id=run_id, spawn=False)
90
  rec = rr
91
-
92
- # Clear state
93
  rec.log("world", rr.Clear(recursive=True), static=True)
94
-
95
- # Set coordinate system: RIGHT_HAND_Y_UP
96
  rec.log("world", rr.ViewCoordinates.RIGHT_HAND_Y_UP, static=True)
97
 
98
- # Log axes helpers
99
  try:
100
  rec.log("world/axes/x", rr.Arrows3D(vectors=[[0.5, 0, 0]], colors=[[255, 0, 0]]), static=True)
101
  rec.log("world/axes/y", rr.Arrows3D(vectors=[[0, 0.5, 0]], colors=[[0, 255, 0]]), static=True)
@@ -103,117 +99,10 @@ def predictions_to_rrd(predictions, glbfile, target_dir, frame_filter="All", sho
103
  except Exception:
104
  pass
105
 
106
- # Log the GLB model
107
- rec.log("world/model", rr.Asset3D(path=glbfile), static=True)
108
-
109
- # Log camera poses if requested
110
- if show_cam and "extrinsic" in predictions and "intrinsic" in predictions:
111
- try:
112
- extrinsics = predictions["extrinsic"] # (S, 4, 4)
113
- intrinsics = predictions["intrinsic"] # (S, 3, 3)
114
-
115
- for i, (ext, intr) in enumerate(zip(extrinsics, intrinsics)):
116
- # ext is (4,4) camera-to-world pose
117
- translation = ext[:3, 3]
118
- rotation_mat = ext[:3, :3]
119
-
120
- # Log camera transform
121
- rec.log(
122
- f"world/cameras/cam_{i:03d}",
123
- rr.Transform3D(
124
- translation=translation,
125
- mat3x3=rotation_mat,
126
- ),
127
- static=True,
128
- )
129
-
130
- # Log pinhole camera
131
- fx, fy = intr[0, 0], intr[1, 1]
132
- cx, cy = intr[0, 2], intr[1, 2]
133
-
134
- # Get image shape for resolution
135
- if "images" in predictions and i < len(predictions["images"]):
136
- h, w = predictions["images"][i].shape[:2]
137
- else:
138
- h, w = 518, 518
139
-
140
- rec.log(
141
- f"world/cameras/cam_{i:03d}/image",
142
- rr.Pinhole(
143
- focal_length=[fx, fy],
144
- principal_point=[cx, cy],
145
- width=w,
146
- height=h,
147
- ),
148
- static=True,
149
- )
150
-
151
- # Log the actual image on the camera
152
- if "images" in predictions and i < len(predictions["images"]):
153
- img = predictions["images"][i]
154
- if img.dtype != np.uint8:
155
- img = (np.clip(img, 0, 1) * 255).astype(np.uint8)
156
- rec.log(
157
- f"world/cameras/cam_{i:03d}/image/rgb",
158
- rr.Image(img),
159
- static=True,
160
- )
161
- except Exception as e:
162
- print(f"Camera logging failed (non-fatal): {e}")
163
-
164
- # Log point cloud
165
- if "world_points" in predictions and "images" in predictions:
166
- try:
167
- world_points = predictions["world_points"] # (S, H, W, 3)
168
- images = predictions["images"] # (S, H, W, 3)
169
- final_mask = predictions.get("final_mask") # (S, H, W) or None
170
-
171
- all_points = []
172
- all_colors = []
173
-
174
- for i in range(len(world_points)):
175
- pts = world_points[i] # (H, W, 3)
176
- img = images[i] # (H, W, 3)
177
-
178
- # Apply mask
179
- if final_mask is not None:
180
- mask = final_mask[i].astype(bool)
181
- else:
182
- mask = np.ones(pts.shape[:2], dtype=bool)
183
-
184
- pts_flat = pts[mask] # (N, 3)
185
- img_flat = img[mask] # (N, 3)
186
-
187
- if img_flat.dtype != np.uint8:
188
- img_flat = (np.clip(img_flat, 0, 1) * 255).astype(np.uint8)
189
-
190
- all_points.append(pts_flat)
191
- all_colors.append(img_flat)
192
-
193
- if all_points:
194
- all_points = np.concatenate(all_points, axis=0)
195
- all_colors = np.concatenate(all_colors, axis=0)
196
-
197
- # Subsample if too large (Rerun handles large clouds but this keeps it fast)
198
- max_pts = 500_000
199
- if len(all_points) > max_pts:
200
- idx = np.random.choice(len(all_points), max_pts, replace=False)
201
- all_points = all_points[idx]
202
- all_colors = all_colors[idx]
203
-
204
- rec.log(
205
- "world/point_cloud",
206
- rr.Points3D(
207
- positions=all_points,
208
- colors=all_colors,
209
- radii=0.002,
210
- ),
211
- static=True,
212
- )
213
- except Exception as e:
214
- print(f"Point cloud logging failed (non-fatal): {e}")
215
-
216
- # Send blueprint
217
  if rrb is not None:
218
  try:
219
  blueprint = rrb.Blueprint(
@@ -227,7 +116,10 @@ def predictions_to_rrd(predictions, glbfile, target_dir, frame_filter="All", sho
227
  except Exception as e:
228
  print(f"Blueprint creation failed (non-fatal): {e}")
229
 
 
 
230
  rec.save(rrd_path)
 
231
  return rrd_path
232
 
233
 
@@ -242,21 +134,28 @@ def run_model(
242
  filter_black_bg=False,
243
  filter_white_bg=False,
244
  ):
 
 
 
245
  global model
246
- import torch
247
 
248
  print(f"Processing images from {target_dir}")
249
 
 
250
  device = "cuda" if torch.cuda.is_available() else "cpu"
251
  device = torch.device(device)
252
 
 
253
  if model is None:
254
  model = initialize_mapanything_model(high_level_config, device)
 
255
  else:
256
  model = model.to(device)
257
 
258
  model.eval()
259
 
 
260
  print("Loading images...")
261
  image_folder_path = os.path.join(target_dir, "images")
262
  views = load_images(image_folder_path)
@@ -265,12 +164,19 @@ def run_model(
265
  if len(views) == 0:
266
  raise ValueError("No images found. Check your upload.")
267
 
 
268
  print("Running inference...")
 
 
 
269
  outputs = model.infer(
270
  views, apply_mask=apply_mask, mask_edges=True, memory_efficient_inference=False
271
  )
272
 
 
273
  predictions = {}
 
 
274
  extrinsic_list = []
275
  intrinsic_list = []
276
  world_points_list = []
@@ -278,52 +184,76 @@ def run_model(
278
  images_list = []
279
  final_mask_list = []
280
 
 
281
  for pred in outputs:
282
- depthmap_torch = pred["depth_z"][0].squeeze(-1)
283
- intrinsics_torch = pred["intrinsics"][0]
284
- camera_pose_torch = pred["camera_poses"][0]
 
285
 
 
286
  pts3d_computed, valid_mask = depthmap_to_world_frame(
287
  depthmap_torch, intrinsics_torch, camera_pose_torch
288
  )
289
 
 
 
290
  if "mask" in pred:
291
  mask = pred["mask"][0].squeeze(-1).cpu().numpy().astype(bool)
292
  else:
 
293
  mask = np.ones_like(depthmap_torch.cpu().numpy(), dtype=bool)
294
 
 
295
  mask = mask & valid_mask.cpu().numpy()
 
296
  image = pred["img_no_norm"][0].cpu().numpy()
297
 
 
298
  extrinsic_list.append(camera_pose_torch.cpu().numpy())
299
  intrinsic_list.append(intrinsics_torch.cpu().numpy())
300
  world_points_list.append(pts3d_computed.cpu().numpy())
301
  depth_maps_list.append(depthmap_torch.cpu().numpy())
302
- images_list.append(image)
303
- final_mask_list.append(mask)
304
 
 
 
305
  predictions["extrinsic"] = np.stack(extrinsic_list, axis=0)
 
 
306
  predictions["intrinsic"] = np.stack(intrinsic_list, axis=0)
 
 
307
  predictions["world_points"] = np.stack(world_points_list, axis=0)
308
 
 
309
  depth_maps = np.stack(depth_maps_list, axis=0)
 
310
  if len(depth_maps.shape) == 3:
311
  depth_maps = depth_maps[..., np.newaxis]
 
312
  predictions["depth"] = depth_maps
313
 
 
314
  predictions["images"] = np.stack(images_list, axis=0)
 
 
315
  predictions["final_mask"] = np.stack(final_mask_list, axis=0)
316
 
 
317
  processed_data = process_predictions_for_visualization(
318
  predictions, views, high_level_config, filter_black_bg, filter_white_bg
319
  )
320
 
 
321
  torch.cuda.empty_cache()
322
 
323
  return predictions, processed_data
324
 
325
 
326
  def update_view_selectors(processed_data):
 
327
  if processed_data is None or len(processed_data) == 0:
328
  choices = ["View 1"]
329
  else:
@@ -331,54 +261,73 @@ def update_view_selectors(processed_data):
331
  choices = [f"View {i + 1}" for i in range(num_views)]
332
 
333
  return (
334
- gr.Dropdown(choices=choices, value=choices[0]),
335
- gr.Dropdown(choices=choices, value=choices[0]),
336
- gr.Dropdown(choices=choices, value=choices[0]),
337
  )
338
 
339
 
340
  def get_view_data_by_index(processed_data, view_index):
 
341
  if processed_data is None or len(processed_data) == 0:
342
  return None
 
343
  view_keys = list(processed_data.keys())
344
  if view_index < 0 or view_index >= len(view_keys):
345
  view_index = 0
 
346
  return processed_data[view_keys[view_index]]
347
 
348
 
349
  def update_depth_view(processed_data, view_index):
 
350
  view_data = get_view_data_by_index(processed_data, view_index)
351
  if view_data is None or view_data["depth"] is None:
352
  return None
 
353
  return colorize_depth(view_data["depth"], mask=view_data.get("mask"))
354
 
355
 
356
  def update_normal_view(processed_data, view_index):
 
357
  view_data = get_view_data_by_index(processed_data, view_index)
358
  if view_data is None or view_data["normal"] is None:
359
  return None
 
360
  return colorize_normal(view_data["normal"], mask=view_data.get("mask"))
361
 
362
 
363
  def update_measure_view(processed_data, view_index):
 
364
  view_data = get_view_data_by_index(processed_data, view_index)
365
  if view_data is None:
366
- return None, []
367
 
 
368
  image = view_data["image"].copy()
 
 
369
  if image.dtype != np.uint8:
370
  if image.max() <= 1.0:
371
  image = (image * 255).astype(np.uint8)
372
  else:
373
  image = image.astype(np.uint8)
374
 
 
375
  if view_data["mask"] is not None:
376
  mask = view_data["mask"]
377
- invalid_mask = ~mask
 
 
 
 
378
  if invalid_mask.any():
 
379
  overlay_color = np.array([255, 220, 220], dtype=np.uint8)
380
- alpha = 0.5
381
- for c in range(3):
 
 
382
  image[:, :, c] = np.where(
383
  invalid_mask,
384
  (1 - alpha) * image[:, :, c] + alpha * overlay_color[c],
@@ -389,53 +338,75 @@ def update_measure_view(processed_data, view_index):
389
 
390
 
391
  def navigate_depth_view(processed_data, current_selector_value, direction):
 
392
  if processed_data is None or len(processed_data) == 0:
393
  return "View 1", None
 
 
394
  try:
395
  current_view = int(current_selector_value.split()[1]) - 1
396
  except:
397
  current_view = 0
 
398
  num_views = len(processed_data)
399
  new_view = (current_view + direction) % num_views
 
400
  new_selector_value = f"View {new_view + 1}"
401
  depth_vis = update_depth_view(processed_data, new_view)
 
402
  return new_selector_value, depth_vis
403
 
404
 
405
  def navigate_normal_view(processed_data, current_selector_value, direction):
 
406
  if processed_data is None or len(processed_data) == 0:
407
  return "View 1", None
 
 
408
  try:
409
  current_view = int(current_selector_value.split()[1]) - 1
410
  except:
411
  current_view = 0
 
412
  num_views = len(processed_data)
413
  new_view = (current_view + direction) % num_views
 
414
  new_selector_value = f"View {new_view + 1}"
415
  normal_vis = update_normal_view(processed_data, new_view)
 
416
  return new_selector_value, normal_vis
417
 
418
 
419
  def navigate_measure_view(processed_data, current_selector_value, direction):
 
420
  if processed_data is None or len(processed_data) == 0:
421
  return "View 1", None, []
 
 
422
  try:
423
  current_view = int(current_selector_value.split()[1]) - 1
424
  except:
425
  current_view = 0
 
426
  num_views = len(processed_data)
427
  new_view = (current_view + direction) % num_views
 
428
  new_selector_value = f"View {new_view + 1}"
429
  measure_image, measure_points = update_measure_view(processed_data, new_view)
 
430
  return new_selector_value, measure_image, measure_points
431
 
432
 
433
  def populate_visualization_tabs(processed_data):
 
434
  if processed_data is None or len(processed_data) == 0:
435
  return None, None, None, []
 
 
436
  depth_vis = update_depth_view(processed_data, 0)
437
  normal_vis = update_normal_view(processed_data, 0)
438
  measure_img, _ = update_measure_view(processed_data, 0)
 
439
  return depth_vis, normal_vis, measure_img, []
440
 
441
 
@@ -443,14 +414,20 @@ def populate_visualization_tabs(processed_data):
443
  # 2) Handle uploaded video/images --> produce target_dir + images
444
  # -------------------------------------------------------------------------
445
  def handle_uploads(unified_upload, s_time_interval=1.0):
 
 
 
 
446
  start_time = time.time()
447
  gc.collect()
448
  torch.cuda.empty_cache()
449
 
 
450
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
451
  target_dir = f"input_images_{timestamp}"
452
  target_dir_images = os.path.join(target_dir, "images")
453
 
 
454
  if os.path.exists(target_dir):
455
  shutil.rmtree(target_dir)
456
  os.makedirs(target_dir)
@@ -458,6 +435,7 @@ def handle_uploads(unified_upload, s_time_interval=1.0):
458
 
459
  image_paths = []
460
 
 
461
  if unified_upload is not None:
462
  for file_data in unified_upload:
463
  if isinstance(file_data, dict) and "name" in file_data:
@@ -467,11 +445,24 @@ def handle_uploads(unified_upload, s_time_interval=1.0):
467
 
468
  file_ext = os.path.splitext(file_path)[1].lower()
469
 
470
- video_extensions = [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm", ".m4v", ".3gp"]
 
 
 
 
 
 
 
 
 
 
 
471
  if file_ext in video_extensions:
 
472
  vs = cv2.VideoCapture(file_path)
473
  fps = vs.get(cv2.CAP_PROP_FPS)
474
- frame_interval = int(fps * s_time_interval)
 
475
  count = 0
476
  video_frame_num = 0
477
  while True:
@@ -480,36 +471,65 @@ def handle_uploads(unified_upload, s_time_interval=1.0):
480
  break
481
  count += 1
482
  if count % frame_interval == 0:
 
483
  base_name = os.path.splitext(os.path.basename(file_path))[0]
484
- image_path = os.path.join(target_dir_images, f"{base_name}_{video_frame_num:06}.png")
 
 
485
  cv2.imwrite(image_path, frame)
486
  image_paths.append(image_path)
487
  video_frame_num += 1
488
  vs.release()
489
- print(f"Extracted {video_frame_num} frames from video: {os.path.basename(file_path)}")
 
 
 
490
  else:
 
 
491
  if file_ext in [".heic", ".heif"]:
 
492
  try:
493
  with Image.open(file_path) as img:
 
494
  if img.mode not in ("RGB", "L"):
495
  img = img.convert("RGB")
 
 
496
  base_name = os.path.splitext(os.path.basename(file_path))[0]
497
- dst_path = os.path.join(target_dir_images, f"{base_name}.jpg")
 
 
 
 
498
  img.save(dst_path, "JPEG", quality=95)
499
  image_paths.append(dst_path)
 
 
 
500
  except Exception as e:
501
  print(f"Error converting HEIC file {file_path}: {e}")
502
- dst_path = os.path.join(target_dir_images, os.path.basename(file_path))
 
 
 
503
  shutil.copy(file_path, dst_path)
504
  image_paths.append(dst_path)
505
  else:
506
- dst_path = os.path.join(target_dir_images, os.path.basename(file_path))
 
 
 
507
  shutil.copy(file_path, dst_path)
508
  image_paths.append(dst_path)
509
 
 
510
  image_paths = sorted(image_paths)
 
511
  end_time = time.time()
512
- print(f"Files processed to {target_dir_images}; took {end_time - start_time:.3f} seconds")
 
 
513
  return target_dir, image_paths
514
 
515
 
@@ -517,6 +537,11 @@ def handle_uploads(unified_upload, s_time_interval=1.0):
517
  # 3) Update gallery on upload
518
  # -------------------------------------------------------------------------
519
  def update_gallery_on_upload(input_video, input_images, s_time_interval=1.0):
 
 
 
 
 
520
  if not input_video and not input_images:
521
  return None, None, None, None
522
  target_dir, image_paths = handle_uploads(input_video, input_images, s_time_interval)
@@ -529,7 +554,7 @@ def update_gallery_on_upload(input_video, input_images, s_time_interval=1.0):
529
 
530
 
531
  # -------------------------------------------------------------------------
532
- # 4) Reconstruction
533
  # -------------------------------------------------------------------------
534
  @spaces.GPU(duration=120)
535
  def gradio_demo(
@@ -541,6 +566,9 @@ def gradio_demo(
541
  apply_mask=True,
542
  show_mesh=True,
543
  ):
 
 
 
544
  if not os.path.isdir(target_dir) or target_dir == "None":
545
  return None, "No valid target directory found. Please upload first.", None, None
546
 
@@ -548,8 +576,13 @@ def gradio_demo(
548
  gc.collect()
549
  torch.cuda.empty_cache()
550
 
 
551
  target_dir_images = os.path.join(target_dir, "images")
552
- all_files = sorted(os.listdir(target_dir_images)) if os.path.isdir(target_dir_images) else []
 
 
 
 
553
  all_files = [f"{i}: {filename}" for i, filename in enumerate(all_files)]
554
  frame_filter_choices = ["All"] + all_files
555
 
@@ -557,50 +590,66 @@ def gradio_demo(
557
  with torch.no_grad():
558
  predictions, processed_data = run_model(target_dir, apply_mask)
559
 
 
560
  prediction_save_path = os.path.join(target_dir, "predictions.npz")
561
  np.savez(prediction_save_path, **predictions)
562
 
 
563
  if frame_filter is None:
564
  frame_filter = "All"
565
 
 
566
  glbfile = os.path.join(
567
  target_dir,
568
  f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb",
569
  )
570
 
 
571
  glbscene = predictions_to_glb(
572
  predictions,
573
  filter_by_frames=frame_filter,
574
  show_cam=show_cam,
575
  mask_black_bg=filter_black_bg,
576
  mask_white_bg=filter_white_bg,
577
- as_mesh=show_mesh,
578
  )
579
  glbscene.export(file_obj=glbfile)
 
 
 
 
 
580
 
581
- # --- Generate Rerun .rrd ---
582
- rrd_path = predictions_to_rrd(predictions, glbfile, target_dir, frame_filter, show_cam)
583
-
584
  del predictions
585
  gc.collect()
586
  torch.cuda.empty_cache()
587
 
588
  end_time = time.time()
589
  print(f"Total time: {end_time - start_time:.2f} seconds")
590
- log_msg = f"Reconstruction Success ({len(all_files)} frames). Waiting for visualization."
 
 
 
 
 
 
 
591
 
592
- depth_vis, normal_vis, measure_img, measure_pts = populate_visualization_tabs(processed_data)
593
- depth_selector, normal_selector, measure_selector = update_view_selectors(processed_data)
 
 
594
 
595
  return (
596
- rrd_path, # <-- now an .rrd path for the Rerun viewer
597
  log_msg,
598
  gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True),
599
  processed_data,
600
  depth_vis,
601
  normal_vis,
602
  measure_img,
603
- "",
604
  depth_selector,
605
  normal_selector,
606
  measure_selector,
@@ -608,47 +657,76 @@ def gradio_demo(
608
 
609
 
610
  # -------------------------------------------------------------------------
611
- # 5) Helper functions
612
  # -------------------------------------------------------------------------
613
  def colorize_depth(depth_map, mask=None):
 
614
  if depth_map is None:
615
  return None
 
 
616
  depth_normalized = depth_map.copy()
617
  valid_mask = depth_normalized > 0
 
 
618
  if mask is not None:
619
  valid_mask = valid_mask & mask
 
620
  if valid_mask.sum() > 0:
621
  valid_depths = depth_normalized[valid_mask]
622
  p5 = np.percentile(valid_depths, 5)
623
  p95 = np.percentile(valid_depths, 95)
 
624
  depth_normalized[valid_mask] = (depth_normalized[valid_mask] - p5) / (p95 - p5)
 
 
625
  import matplotlib.pyplot as plt
 
626
  colormap = plt.cm.turbo_r
627
  colored = colormap(depth_normalized)
628
  colored = (colored[:, :, :3] * 255).astype(np.uint8)
 
 
629
  colored[~valid_mask] = [255, 255, 255]
 
630
  return colored
631
 
632
 
633
  def colorize_normal(normal_map, mask=None):
 
634
  if normal_map is None:
635
  return None
 
 
636
  normal_vis = normal_map.copy()
 
 
637
  if mask is not None:
638
  invalid_mask = ~mask
639
- normal_vis[invalid_mask] = [0, 0, 0]
 
 
640
  normal_vis = (normal_vis + 1.0) / 2.0
641
  normal_vis = (normal_vis * 255).astype(np.uint8)
 
642
  return normal_vis
643
 
644
 
645
  def process_predictions_for_visualization(
646
  predictions, views, high_level_config, filter_black_bg=False, filter_white_bg=False
647
  ):
 
648
  processed_data = {}
 
 
649
  for view_idx, view in enumerate(views):
 
650
  image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
 
 
651
  pred_pts3d = predictions["world_points"][view_idx]
 
 
652
  view_data = {
653
  "image": image[0],
654
  "points3d": pred_pts3d,
@@ -656,44 +734,70 @@ def process_predictions_for_visualization(
656
  "normal": None,
657
  "mask": None,
658
  }
 
 
659
  mask = predictions["final_mask"][view_idx].copy()
 
 
660
  if filter_black_bg:
 
661
  view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
 
662
  black_bg_mask = view_colors.sum(axis=2) >= 16
663
  mask = mask & black_bg_mask
 
 
664
  if filter_white_bg:
 
665
  view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
 
666
  white_bg_mask = ~(
667
  (view_colors[:, :, 0] > 240)
668
  & (view_colors[:, :, 1] > 240)
669
  & (view_colors[:, :, 2] > 240)
670
  )
671
  mask = mask & white_bg_mask
 
672
  view_data["mask"] = mask
673
  view_data["depth"] = predictions["depth"][view_idx].squeeze()
 
674
  normals, _ = points_to_normals(pred_pts3d, mask=view_data["mask"])
675
  view_data["normal"] = normals
 
676
  processed_data[view_idx] = view_data
 
677
  return processed_data
678
 
679
 
680
  def reset_measure(processed_data):
 
681
  if processed_data is None or len(processed_data) == 0:
682
  return None, [], ""
 
 
683
  first_view = list(processed_data.values())[0]
684
  return first_view["image"], [], ""
685
 
686
 
687
- def measure(processed_data, measure_points, current_view_selector, event: gr.SelectData):
 
 
 
688
  try:
 
 
689
  if processed_data is None or len(processed_data) == 0:
690
  return None, [], "No data available"
691
 
 
692
  try:
693
  current_view_index = int(current_view_selector.split()[1]) - 1
694
  except:
695
  current_view_index = 0
696
 
 
 
 
697
  if current_view_index < 0 or current_view_index >= len(processed_data):
698
  current_view_index = 0
699
 
@@ -704,14 +808,21 @@ def measure(processed_data, measure_points, current_view_selector, event: gr.Sel
704
  return None, [], "No view data available"
705
 
706
  point2d = event.index[0], event.index[1]
 
707
 
 
708
  if (
709
  current_view["mask"] is not None
710
  and 0 <= point2d[1] < current_view["mask"].shape[0]
711
  and 0 <= point2d[0] < current_view["mask"].shape[1]
712
  ):
 
713
  if not current_view["mask"][point2d[1], point2d[0]]:
714
- masked_image, _ = update_measure_view(processed_data, current_view_index)
 
 
 
 
715
  return (
716
  masked_image,
717
  measure_points,
@@ -719,70 +830,103 @@ def measure(processed_data, measure_points, current_view_selector, event: gr.Sel
719
  )
720
 
721
  measure_points.append(point2d)
 
 
722
  image, _ = update_measure_view(processed_data, current_view_index)
723
  if image is None:
724
  return None, [], "No image available"
 
725
  image = image.copy()
726
  points3d = current_view["points3d"]
727
 
728
- if image.dtype != np.uint8:
729
- if image.max() <= 1.0:
730
- image = (image * 255).astype(np.uint8)
731
- else:
732
- image = image.astype(np.uint8)
 
 
 
 
 
 
 
733
 
734
- for p in measure_points:
735
- if 0 <= p[0] < image.shape[1] and 0 <= p[1] < image.shape[0]:
736
- image = cv2.circle(image, p, radius=5, color=(255, 0, 0), thickness=2)
 
 
 
 
 
 
 
737
 
738
  depth_text = ""
739
- for i, p in enumerate(measure_points):
740
- if (
741
- current_view["depth"] is not None
742
- and 0 <= p[1] < current_view["depth"].shape[0]
743
- and 0 <= p[0] < current_view["depth"].shape[1]
744
- ):
745
- d = current_view["depth"][p[1], p[0]]
746
- depth_text += f"- **P{i + 1} depth: {d:.2f}m.**\n"
747
- else:
748
  if (
749
- points3d is not None
750
- and 0 <= p[1] < points3d.shape[0]
751
- and 0 <= p[0] < points3d.shape[1]
752
  ):
753
- z = points3d[p[1], p[0], 2]
754
- depth_text += f"- **P{i + 1} Z-coord: {z:.2f}m.**\n"
 
 
 
 
 
 
 
 
 
 
 
 
755
 
756
  if len(measure_points) == 2:
757
- point1, point2 = measure_points
758
- if (
759
- 0 <= point1[0] < image.shape[1]
760
- and 0 <= point1[1] < image.shape[0]
761
- and 0 <= point2[0] < image.shape[1]
762
- and 0 <= point2[1] < image.shape[0]
763
- ):
764
- image = cv2.line(image, point1, point2, color=(255, 0, 0), thickness=2)
765
-
766
- distance_text = "- **Distance: Unable to compute**"
767
- if (
768
- points3d is not None
769
- and 0 <= point1[1] < points3d.shape[0]
770
- and 0 <= point1[0] < points3d.shape[1]
771
- and 0 <= point2[1] < points3d.shape[0]
772
- and 0 <= point2[0] < points3d.shape[1]
773
- ):
774
- try:
775
- p1_3d = points3d[point1[1], point1[0]]
776
- p2_3d = points3d[point2[1], point2[0]]
777
- distance = np.linalg.norm(p1_3d - p2_3d)
778
- distance_text = f"- **Distance: {distance:.2f}m**"
779
- except Exception as e:
780
- distance_text = f"- **Distance computation error: {e}**"
781
-
782
- measure_points = []
783
- text = depth_text + distance_text
784
- return [image, measure_points, text]
 
 
 
 
 
 
 
 
 
 
785
  else:
 
786
  return [image, measure_points, depth_text]
787
 
788
  except Exception as e:
@@ -791,10 +935,16 @@ def measure(processed_data, measure_points, current_view_selector, event: gr.Sel
791
 
792
 
793
  def clear_fields():
 
 
 
794
  return None
795
 
796
 
797
  def update_log():
 
 
 
798
  return "Loading and Reconstructing..."
799
 
800
 
@@ -807,6 +957,12 @@ def update_visualization(
807
  filter_white_bg=False,
808
  show_mesh=True,
809
  ):
 
 
 
 
 
 
810
  if is_example == "True":
811
  return (
812
  gr.update(),
@@ -844,11 +1000,14 @@ def update_visualization(
844
  as_mesh=show_mesh,
845
  )
846
  glbscene.export(file_obj=glbfile)
 
 
 
847
 
848
- # Re-generate Rerun recording with updated options
849
- rrd_path = predictions_to_rrd(predictions, glbfile, target_dir, frame_filter, show_cam)
850
-
851
- return rrd_path, "Visualization updated."
852
 
853
 
854
  def update_all_views_on_filter_change(
@@ -860,6 +1019,11 @@ def update_all_views_on_filter_change(
860
  normal_view_selector,
861
  measure_view_selector,
862
  ):
 
 
 
 
 
863
  if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
864
  return processed_data, None, None, None, []
865
 
@@ -868,27 +1032,44 @@ def update_all_views_on_filter_change(
868
  return processed_data, None, None, None, []
869
 
870
  try:
 
871
  loaded = np.load(predictions_path, allow_pickle=True)
872
  predictions = {key: loaded[key] for key in loaded.keys()}
 
 
873
  image_folder_path = os.path.join(target_dir, "images")
874
  views = load_images(image_folder_path)
 
 
875
  new_processed_data = process_predictions_for_visualization(
876
  predictions, views, high_level_config, filter_black_bg, filter_white_bg
877
  )
878
 
 
879
  try:
880
- depth_view_idx = int(depth_view_selector.split()[1]) - 1 if depth_view_selector else 0
 
 
881
  except:
882
  depth_view_idx = 0
 
883
  try:
884
- normal_view_idx = int(normal_view_selector.split()[1]) - 1 if normal_view_selector else 0
 
 
885
  except:
886
  normal_view_idx = 0
 
887
  try:
888
- measure_view_idx = int(measure_view_selector.split()[1]) - 1 if measure_view_selector else 0
 
 
 
 
889
  except:
890
  measure_view_idx = 0
891
 
 
892
  depth_vis = update_depth_view(new_processed_data, depth_view_idx)
893
  normal_vis = update_normal_view(new_processed_data, normal_view_idx)
894
  measure_img, _ = update_measure_view(new_processed_data, measure_view_idx)
@@ -904,41 +1085,69 @@ def update_all_views_on_filter_change(
904
  # Example scene functions
905
  # -------------------------------------------------------------------------
906
  def get_scene_info(examples_dir):
 
907
  import glob
 
908
  scenes = []
909
  if not os.path.exists(examples_dir):
910
  return scenes
 
911
  for scene_folder in sorted(os.listdir(examples_dir)):
912
  scene_path = os.path.join(examples_dir, scene_folder)
913
  if os.path.isdir(scene_path):
 
914
  image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff", "*.tif"]
915
  image_files = []
916
  for ext in image_extensions:
917
  image_files.extend(glob.glob(os.path.join(scene_path, ext)))
918
  image_files.extend(glob.glob(os.path.join(scene_path, ext.upper())))
 
919
  if image_files:
 
920
  image_files = sorted(image_files)
921
- scenes.append({
922
- "name": scene_folder,
923
- "path": scene_path,
924
- "thumbnail": image_files[0],
925
- "num_images": len(image_files),
926
- "image_files": image_files,
927
- })
 
 
 
 
 
 
928
  return scenes
929
 
930
 
931
  def load_example_scene(scene_name, examples_dir="examples"):
 
932
  scenes = get_scene_info(examples_dir)
933
- selected_scene = next((s for s in scenes if s["name"] == scene_name), None)
 
 
 
 
 
 
 
934
  if selected_scene is None:
935
  return None, None, None, "Scene not found"
936
- file_objects = selected_scene["image_files"]
 
 
 
 
 
 
 
937
  target_dir, image_paths = handle_uploads(file_objects, 1.0)
 
938
  return (
939
- None,
940
- target_dir,
941
- image_paths,
942
  f"Loaded scene '{scene_name}' with {selected_scene['num_images']} images. Click 'Reconstruct' to begin 3D processing.",
943
  )
944
 
@@ -948,124 +1157,173 @@ def load_example_scene(scene_name, examples_dir="examples"):
948
  # -------------------------------------------------------------------------
949
  theme = get_gradio_theme()
950
 
951
- with gr.Blocks() as demo:
 
952
  is_example = gr.Textbox(label="is_example", visible=False, value="None")
953
  num_images = gr.Textbox(label="num_images", visible=False, value="None")
954
  processed_data_state = gr.State(value=None)
955
  measure_points_state = gr.State(value=[])
956
- current_view_index = gr.State(value=0)
957
  target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
958
 
 
 
 
 
 
 
 
 
 
 
 
959
  with gr.Row():
960
- with gr.Column(scale=2):
961
- unified_upload = gr.File(
962
- file_count="multiple",
963
- label="Upload Video or Images",
964
- interactive=True,
965
- file_types=["image", "video"],
966
- )
967
- with gr.Row():
968
- s_time_interval = gr.Slider(
969
- minimum=0.1,
970
- maximum=5.0,
971
- value=1.0,
972
- step=0.1,
973
- label="Video sample time interval (take a sample every x sec.)",
974
  interactive=True,
975
- visible=True,
976
- scale=3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
977
  )
978
- resample_btn = gr.Button("Resample Video", visible=False, variant="secondary", scale=1)
979
-
980
- image_gallery = gr.Gallery(
981
- label="Preview",
982
- columns=4,
983
- height="300px",
984
- object_fit="contain",
985
- preview=True,
986
- )
987
- clear_uploads_btn = gr.ClearButton(
988
- [unified_upload, image_gallery],
989
- value="Clear Uploads",
990
- variant="secondary",
991
- size="sm",
992
- )
993
 
994
- with gr.Column(scale=4):
995
- with gr.Column():
996
- gr.Markdown("**Metric 3D Reconstruction (Point Cloud and Camera Poses)**")
997
- log_output = gr.Markdown(
998
- "Please upload a video or images, then click Reconstruct.",
999
- elem_classes=["custom-log"],
1000
  )
 
 
 
 
 
 
 
 
 
 
 
 
1001
 
1002
- with gr.Tabs():
1003
- with gr.Tab("3D View"):
1004
- # ---- RERUN VIEWER (replaces gr.Model3D) ----
1005
- reconstruction_output = Rerun(
1006
- label="Rerun 3D Viewer",
1007
- height=520,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1008
  )
1009
- with gr.Tab("Depth"):
1010
- with gr.Row(elem_classes=["navigation-row"]):
1011
- prev_depth_btn = gr.Button("โ—€ Previous", size="sm", scale=1)
1012
- depth_view_selector = gr.Dropdown(
1013
- choices=["View 1"], value="View 1", label="Select View",
1014
- scale=2, interactive=True, allow_custom_value=True,
1015
- )
1016
- next_depth_btn = gr.Button("Next โ–ถ", size="sm", scale=1)
1017
- depth_map = gr.Image(type="numpy", label="Colorized Depth Map", format="png", interactive=False)
1018
- with gr.Tab("Normal"):
1019
- with gr.Row(elem_classes=["navigation-row"]):
1020
- prev_normal_btn = gr.Button("โ—€ Previous", size="sm", scale=1)
1021
- normal_view_selector = gr.Dropdown(
1022
- choices=["View 1"], value="View 1", label="Select View",
1023
- scale=2, interactive=True, allow_custom_value=True,
1024
- )
1025
- next_normal_btn = gr.Button("Next โ–ถ", size="sm", scale=1)
1026
- normal_map = gr.Image(type="numpy", label="Normal Map", format="png", interactive=False)
1027
- with gr.Tab("Measure"):
1028
- gr.Markdown(MEASURE_INSTRUCTIONS_HTML)
1029
- with gr.Row(elem_classes=["navigation-row"]):
1030
- prev_measure_btn = gr.Button("โ—€ Previous", size="sm", scale=1)
1031
- measure_view_selector = gr.Dropdown(
1032
- choices=["View 1"], value="View 1", label="Select View",
1033
- scale=2, interactive=True, allow_custom_value=True,
1034
- )
1035
- next_measure_btn = gr.Button("Next โ–ถ", size="sm", scale=1)
1036
- measure_image = gr.Image(
1037
- type="numpy", show_label=False, format="webp",
1038
- interactive=False, sources=[],
1039
  )
1040
- gr.Markdown("**Note:** Light-grey areas indicate regions with no depth information where measurements cannot be taken.")
1041
- measure_text = gr.Markdown("")
1042
-
1043
- with gr.Row():
1044
- submit_btn = gr.Button("Reconstruct", scale=1, variant="primary")
1045
- clear_btn = gr.ClearButton(
1046
- [unified_upload, reconstruction_output, log_output, target_dir_output, image_gallery],
1047
- scale=1,
1048
- )
1049
-
1050
- with gr.Row():
1051
- frame_filter = gr.Dropdown(choices=["All"], value="All", label="Show Points from Frame")
1052
- with gr.Column():
1053
- gr.Markdown("### Pointcloud Options: (live updates)")
1054
- show_cam = gr.Checkbox(label="Show Camera", value=True)
1055
- show_mesh = gr.Checkbox(label="Show Mesh", value=True)
1056
- filter_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
1057
- filter_white_bg = gr.Checkbox(label="Filter White Background", value=False)
1058
- gr.Markdown("### Reconstruction Options: (updated on next run)")
1059
- apply_mask_checkbox = gr.Checkbox(
1060
- label="Apply mask for predicted ambiguous depth classes & edges", value=True
1061
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1062
 
1063
- gr.Markdown("## Example Scenes (lists all scenes in the examples folder)")
1064
- gr.Markdown("Click any thumbnail to load the scene for reconstruction.")
 
 
1065
 
1066
  scenes = get_scene_info("examples")
 
1067
  if scenes:
1068
- for i in range(0, len(scenes), 4):
1069
  with gr.Row():
1070
  for j in range(4):
1071
  scene_idx = i + j
@@ -1080,30 +1338,61 @@ with gr.Blocks() as demo:
1080
  elem_id=f"scene_thumb_{scene['name']}",
1081
  sources=[],
1082
  )
1083
- gr.Markdown(f"**{scene['name']}** \n {scene['num_images']} images", elem_classes=["scene-info"])
 
 
 
 
1084
  scene_img.select(
1085
  fn=lambda name=scene["name"]: load_example_scene(name),
1086
- outputs=[reconstruction_output, target_dir_output, image_gallery, log_output],
 
 
 
 
 
1087
  )
1088
  else:
1089
  with gr.Column(scale=1):
1090
  pass
1091
 
1092
- # -------------------------------------------------------------------------
1093
- # Event wiring
1094
- # -------------------------------------------------------------------------
 
1095
  submit_btn.click(fn=clear_fields, inputs=[], outputs=[reconstruction_output]).then(
1096
  fn=update_log, inputs=[], outputs=[log_output]
1097
  ).then(
1098
  fn=gradio_demo,
1099
- inputs=[target_dir_output, frame_filter, show_cam, filter_black_bg, filter_white_bg, apply_mask_checkbox, show_mesh],
 
 
 
 
 
 
 
 
1100
  outputs=[
1101
- reconstruction_output, log_output, frame_filter, processed_data_state,
1102
- depth_map, normal_map, measure_image, measure_text,
1103
- depth_view_selector, normal_view_selector, measure_view_selector,
 
 
 
 
 
 
 
 
1104
  ],
1105
- ).then(fn=lambda: "False", inputs=[], outputs=[is_example])
 
 
 
 
1106
 
 
1107
  frame_filter.change(
1108
  update_visualization,
1109
  [target_dir_output, frame_filter, show_cam, is_example, filter_black_bg, filter_white_bg, show_mesh],
@@ -1138,95 +1427,103 @@ with gr.Blocks() as demo:
1138
  [reconstruction_output, log_output],
1139
  )
1140
 
 
1141
  def update_gallery_on_unified_upload(files, interval):
1142
  if not files:
1143
- return None, None, None
1144
  target_dir, image_paths = handle_uploads(files, interval)
1145
- return target_dir, image_paths, "Upload complete. Click 'Reconstruct' to begin 3D processing."
1146
 
1147
  def show_resample_button(files):
1148
- if not files:
1149
- return gr.update(visible=False)
1150
- video_extensions = [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm", ".m4v", ".3gp"]
1151
- has_video = any(
1152
- os.path.splitext(str(f["name"] if isinstance(f, dict) else f))[1].lower() in video_extensions
1153
- for f in files
1154
- )
 
1155
  return gr.update(visible=has_video)
1156
 
1157
- def hide_resample_button():
1158
- return gr.update(visible=False)
1159
-
1160
  def resample_video_with_new_interval(files, new_interval, current_target_dir):
1161
- if not files:
1162
- return current_target_dir, None, "No files to resample.", gr.update(visible=False)
1163
- video_extensions = [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm", ".m4v", ".3gp"]
1164
- has_video = any(
1165
- os.path.splitext(str(f["name"] if isinstance(f, dict) else f))[1].lower() in video_extensions
1166
- for f in files
1167
- )
1168
- if not has_video:
1169
- return current_target_dir, None, "No videos found to resample.", gr.update(visible=False)
1170
  if current_target_dir and current_target_dir != "None" and os.path.exists(current_target_dir):
1171
  shutil.rmtree(current_target_dir)
 
1172
  target_dir, image_paths = handle_uploads(files, new_interval)
1173
- return target_dir, image_paths, f"Video resampled with {new_interval}s interval. Click 'Reconstruct' to begin 3D processing.", gr.update(visible=False)
1174
 
1175
  unified_upload.change(
1176
  fn=update_gallery_on_unified_upload,
1177
  inputs=[unified_upload, s_time_interval],
1178
  outputs=[target_dir_output, image_gallery, log_output],
1179
- ).then(fn=show_resample_button, inputs=[unified_upload], outputs=[resample_btn])
 
 
 
 
 
 
 
 
 
 
1180
 
1181
- s_time_interval.change(fn=show_resample_button, inputs=[unified_upload], outputs=[resample_btn])
1182
  resample_btn.click(
1183
  fn=resample_video_with_new_interval,
1184
  inputs=[unified_upload, s_time_interval, target_dir_output],
1185
  outputs=[target_dir_output, image_gallery, log_output, resample_btn],
1186
  )
1187
 
 
1188
  measure_image.select(
1189
  fn=measure,
1190
  inputs=[processed_data_state, measure_points_state, measure_view_selector],
1191
  outputs=[measure_image, measure_points_state, measure_text],
1192
  )
1193
 
 
1194
  prev_depth_btn.click(
1195
- fn=lambda pd, sel: navigate_depth_view(pd, sel, -1),
1196
  inputs=[processed_data_state, depth_view_selector], outputs=[depth_view_selector, depth_map],
1197
  )
1198
  next_depth_btn.click(
1199
- fn=lambda pd, sel: navigate_depth_view(pd, sel, 1),
1200
  inputs=[processed_data_state, depth_view_selector], outputs=[depth_view_selector, depth_map],
1201
  )
1202
  depth_view_selector.change(
1203
- fn=lambda pd, sel: update_depth_view(pd, int(sel.split()[1]) - 1) if sel else None,
1204
  inputs=[processed_data_state, depth_view_selector], outputs=[depth_map],
1205
  )
1206
 
1207
  prev_normal_btn.click(
1208
- fn=lambda pd, sel: navigate_normal_view(pd, sel, -1),
1209
  inputs=[processed_data_state, normal_view_selector], outputs=[normal_view_selector, normal_map],
1210
  )
1211
  next_normal_btn.click(
1212
- fn=lambda pd, sel: navigate_normal_view(pd, sel, 1),
1213
  inputs=[processed_data_state, normal_view_selector], outputs=[normal_view_selector, normal_map],
1214
  )
1215
  normal_view_selector.change(
1216
- fn=lambda pd, sel: update_normal_view(pd, int(sel.split()[1]) - 1) if sel else None,
1217
  inputs=[processed_data_state, normal_view_selector], outputs=[normal_map],
1218
  )
1219
 
1220
  prev_measure_btn.click(
1221
- fn=lambda pd, sel: navigate_measure_view(pd, sel, -1),
1222
  inputs=[processed_data_state, measure_view_selector], outputs=[measure_view_selector, measure_image, measure_points_state],
1223
  )
1224
  next_measure_btn.click(
1225
- fn=lambda pd, sel: navigate_measure_view(pd, sel, 1),
1226
  inputs=[processed_data_state, measure_view_selector], outputs=[measure_view_selector, measure_image, measure_points_state],
1227
  )
1228
  measure_view_selector.change(
1229
- fn=lambda pd, sel: update_measure_view(pd, int(sel.split()[1]) - 1) if sel else (None, []),
1230
  inputs=[processed_data_state, measure_view_selector], outputs=[measure_image, measure_points_state],
1231
  )
1232
 
 
16
  from PIL import Image
17
  from pillow_heif import register_heif_opener
18
 
19
+ # --- Rerun Imports ---
20
  import rerun as rr
21
  try:
22
  import rerun.blueprint as rrb
23
  except ImportError:
24
  rrb = None
 
25
  from gradio_rerun import Rerun
26
 
27
  register_heif_opener()
 
44
  # MapAnything Configuration
45
  high_level_config = {
46
  "path": "configs/train.yaml",
47
+ "hf_model_name": "facebook/map-anything-v1", # -- facebook/map-anything
48
  "model_str": "mapanything",
49
  "config_overrides": [
50
  "machine=aws",
 
61
  "resolution": 518,
62
  }
63
 
64
+ # Initialize model - this will be done on GPU when needed
65
  model = None
66
 
 
 
 
67
 
68
  # -------------------------------------------------------------------------
69
+ # Rerun Helper Function
70
  # -------------------------------------------------------------------------
71
+ def create_rerun_recording(glb_path, output_dir):
72
  """
73
+ Takes a generated GLB file, wraps it in a Rerun recording (.rrd),
74
+ and returns the path to the .rrd file for the UI to consume.
75
  """
76
  run_id = str(uuid.uuid4())
77
+
78
+ # Robustly handle different Rerun SDK versions
 
 
79
  rec = None
80
  if hasattr(rr, "new_recording"):
81
+ rec = rr.new_recording(application_id="MapAnything-3D", recording_id=run_id)
82
  elif hasattr(rr, "RecordingStream"):
83
+ rec = rr.RecordingStream(application_id="MapAnything-3D", recording_id=run_id)
84
  else:
85
+ rr.init("MapAnything-3D", recording_id=run_id, spawn=False)
86
  rec = rr
87
+
88
+ # Clear previous states
89
  rec.log("world", rr.Clear(recursive=True), static=True)
90
+
91
+ # Set coordinates
92
  rec.log("world", rr.ViewCoordinates.RIGHT_HAND_Y_UP, static=True)
93
 
94
+ # Add optional axes helpers
95
  try:
96
  rec.log("world/axes/x", rr.Arrows3D(vectors=[[0.5, 0, 0]], colors=[[255, 0, 0]]), static=True)
97
  rec.log("world/axes/y", rr.Arrows3D(vectors=[[0, 0.5, 0]], colors=[[0, 255, 0]]), static=True)
 
99
  except Exception:
100
  pass
101
 
102
+ # Log the 3D Model
103
+ rec.log("world/scene", rr.Asset3D(path=glb_path), static=True)
104
+
105
+ # Blueprint for clean layout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  if rrb is not None:
107
  try:
108
  blueprint = rrb.Blueprint(
 
116
  except Exception as e:
117
  print(f"Blueprint creation failed (non-fatal): {e}")
118
 
119
+ # Save the recording to the target directory
120
+ rrd_path = os.path.join(output_dir, f'scene_{run_id}.rrd')
121
  rec.save(rrd_path)
122
+
123
  return rrd_path
124
 
125
 
 
134
  filter_black_bg=False,
135
  filter_white_bg=False,
136
  ):
137
+ """
138
+ Run the MapAnything model on images in the 'target_dir/images' folder and return predictions.
139
+ """
140
  global model
141
+ import torch # Ensure torch is available in function scope
142
 
143
  print(f"Processing images from {target_dir}")
144
 
145
+ # Device check
146
  device = "cuda" if torch.cuda.is_available() else "cpu"
147
  device = torch.device(device)
148
 
149
+ # Initialize model if not already done
150
  if model is None:
151
  model = initialize_mapanything_model(high_level_config, device)
152
+
153
  else:
154
  model = model.to(device)
155
 
156
  model.eval()
157
 
158
+ # Load images using MapAnything's load_images function
159
  print("Loading images...")
160
  image_folder_path = os.path.join(target_dir, "images")
161
  views = load_images(image_folder_path)
 
164
  if len(views) == 0:
165
  raise ValueError("No images found. Check your upload.")
166
 
167
+ # Run model inference
168
  print("Running inference...")
169
+ # apply_mask: Whether to apply the non-ambiguous mask to the output. Defaults to True.
170
+ # mask_edges: Whether to compute an edge mask based on normals and depth and apply it to the output. Defaults to True.
171
+ # Use checkbox values - mask_edges is set to True by default since there's no UI control for it
172
  outputs = model.infer(
173
  views, apply_mask=apply_mask, mask_edges=True, memory_efficient_inference=False
174
  )
175
 
176
+ # Convert predictions to format expected by visualization
177
  predictions = {}
178
+
179
+ # Initialize lists for the required keys
180
  extrinsic_list = []
181
  intrinsic_list = []
182
  world_points_list = []
 
184
  images_list = []
185
  final_mask_list = []
186
 
187
+ # Loop through the outputs
188
  for pred in outputs:
189
+ # Extract data from predictions
190
+ depthmap_torch = pred["depth_z"][0].squeeze(-1) # (H, W)
191
+ intrinsics_torch = pred["intrinsics"][0] # (3, 3)
192
+ camera_pose_torch = pred["camera_poses"][0] # (4, 4)
193
 
194
+ # Compute new pts3d using depth, intrinsics, and camera pose
195
  pts3d_computed, valid_mask = depthmap_to_world_frame(
196
  depthmap_torch, intrinsics_torch, camera_pose_torch
197
  )
198
 
199
+ # Convert to numpy arrays for visualization
200
+ # Check if mask key exists in pred, if not, fill with boolean trues in the size of depthmap_torch
201
  if "mask" in pred:
202
  mask = pred["mask"][0].squeeze(-1).cpu().numpy().astype(bool)
203
  else:
204
+ # Fill with boolean trues in the size of depthmap_torch
205
  mask = np.ones_like(depthmap_torch.cpu().numpy(), dtype=bool)
206
 
207
+ # Combine with valid depth mask
208
  mask = mask & valid_mask.cpu().numpy()
209
+
210
  image = pred["img_no_norm"][0].cpu().numpy()
211
 
212
+ # Append to lists
213
  extrinsic_list.append(camera_pose_torch.cpu().numpy())
214
  intrinsic_list.append(intrinsics_torch.cpu().numpy())
215
  world_points_list.append(pts3d_computed.cpu().numpy())
216
  depth_maps_list.append(depthmap_torch.cpu().numpy())
217
+ images_list.append(image) # Add image to list
218
+ final_mask_list.append(mask) # Add final_mask to list
219
 
220
+ # Convert lists to numpy arrays with required shapes
221
+ # extrinsic: (S, 3, 4) - batch of camera extrinsic matrices
222
  predictions["extrinsic"] = np.stack(extrinsic_list, axis=0)
223
+
224
+ # intrinsic: (S, 3, 3) - batch of camera intrinsic matrices
225
  predictions["intrinsic"] = np.stack(intrinsic_list, axis=0)
226
+
227
+ # world_points: (S, H, W, 3) - batch of 3D world points
228
  predictions["world_points"] = np.stack(world_points_list, axis=0)
229
 
230
+ # depth: (S, H, W, 1) or (S, H, W) - batch of depth maps
231
  depth_maps = np.stack(depth_maps_list, axis=0)
232
+ # Add channel dimension if needed to match (S, H, W, 1) format
233
  if len(depth_maps.shape) == 3:
234
  depth_maps = depth_maps[..., np.newaxis]
235
+
236
  predictions["depth"] = depth_maps
237
 
238
+ # images: (S, H, W, 3) - batch of input images
239
  predictions["images"] = np.stack(images_list, axis=0)
240
+
241
+ # final_mask: (S, H, W) - batch of final masks for filtering
242
  predictions["final_mask"] = np.stack(final_mask_list, axis=0)
243
 
244
+ # Process data for visualization tabs (depth, normal, measure)
245
  processed_data = process_predictions_for_visualization(
246
  predictions, views, high_level_config, filter_black_bg, filter_white_bg
247
  )
248
 
249
+ # Clean up
250
  torch.cuda.empty_cache()
251
 
252
  return predictions, processed_data
253
 
254
 
255
  def update_view_selectors(processed_data):
256
+ """Update view selector dropdowns based on available views"""
257
  if processed_data is None or len(processed_data) == 0:
258
  choices = ["View 1"]
259
  else:
 
261
  choices = [f"View {i + 1}" for i in range(num_views)]
262
 
263
  return (
264
+ gr.Dropdown(choices=choices, value=choices[0]), # depth_view_selector
265
+ gr.Dropdown(choices=choices, value=choices[0]), # normal_view_selector
266
+ gr.Dropdown(choices=choices, value=choices[0]), # measure_view_selector
267
  )
268
 
269
 
270
  def get_view_data_by_index(processed_data, view_index):
271
+ """Get view data by index, handling bounds"""
272
  if processed_data is None or len(processed_data) == 0:
273
  return None
274
+
275
  view_keys = list(processed_data.keys())
276
  if view_index < 0 or view_index >= len(view_keys):
277
  view_index = 0
278
+
279
  return processed_data[view_keys[view_index]]
280
 
281
 
282
  def update_depth_view(processed_data, view_index):
283
+ """Update depth view for a specific view index"""
284
  view_data = get_view_data_by_index(processed_data, view_index)
285
  if view_data is None or view_data["depth"] is None:
286
  return None
287
+
288
  return colorize_depth(view_data["depth"], mask=view_data.get("mask"))
289
 
290
 
291
  def update_normal_view(processed_data, view_index):
292
+ """Update normal view for a specific view index"""
293
  view_data = get_view_data_by_index(processed_data, view_index)
294
  if view_data is None or view_data["normal"] is None:
295
  return None
296
+
297
  return colorize_normal(view_data["normal"], mask=view_data.get("mask"))
298
 
299
 
300
  def update_measure_view(processed_data, view_index):
301
+ """Update measure view for a specific view index with mask overlay"""
302
  view_data = get_view_data_by_index(processed_data, view_index)
303
  if view_data is None:
304
+ return None, [] # image, measure_points
305
 
306
+ # Get the base image
307
  image = view_data["image"].copy()
308
+
309
+ # Ensure image is in uint8 format
310
  if image.dtype != np.uint8:
311
  if image.max() <= 1.0:
312
  image = (image * 255).astype(np.uint8)
313
  else:
314
  image = image.astype(np.uint8)
315
 
316
+ # Apply mask overlay if mask is available
317
  if view_data["mask"] is not None:
318
  mask = view_data["mask"]
319
+
320
+ # Create light grey overlay for masked areas
321
+ # Masked areas (False values) will be overlaid with light grey
322
+ invalid_mask = ~mask # Areas where mask is False
323
+
324
  if invalid_mask.any():
325
+ # Create a light grey overlay (RGB: 192, 192, 192)
326
  overlay_color = np.array([255, 220, 220], dtype=np.uint8)
327
+
328
+ # Apply overlay with some transparency
329
+ alpha = 0.5 # Transparency level
330
+ for c in range(3): # RGB channels
331
  image[:, :, c] = np.where(
332
  invalid_mask,
333
  (1 - alpha) * image[:, :, c] + alpha * overlay_color[c],
 
338
 
339
 
340
  def navigate_depth_view(processed_data, current_selector_value, direction):
341
+ """Navigate depth view (direction: -1 for previous, +1 for next)"""
342
  if processed_data is None or len(processed_data) == 0:
343
  return "View 1", None
344
+
345
+ # Parse current view number
346
  try:
347
  current_view = int(current_selector_value.split()[1]) - 1
348
  except:
349
  current_view = 0
350
+
351
  num_views = len(processed_data)
352
  new_view = (current_view + direction) % num_views
353
+
354
  new_selector_value = f"View {new_view + 1}"
355
  depth_vis = update_depth_view(processed_data, new_view)
356
+
357
  return new_selector_value, depth_vis
358
 
359
 
360
  def navigate_normal_view(processed_data, current_selector_value, direction):
361
+ """Navigate normal view (direction: -1 for previous, +1 for next)"""
362
  if processed_data is None or len(processed_data) == 0:
363
  return "View 1", None
364
+
365
+ # Parse current view number
366
  try:
367
  current_view = int(current_selector_value.split()[1]) - 1
368
  except:
369
  current_view = 0
370
+
371
  num_views = len(processed_data)
372
  new_view = (current_view + direction) % num_views
373
+
374
  new_selector_value = f"View {new_view + 1}"
375
  normal_vis = update_normal_view(processed_data, new_view)
376
+
377
  return new_selector_value, normal_vis
378
 
379
 
380
  def navigate_measure_view(processed_data, current_selector_value, direction):
381
+ """Navigate measure view (direction: -1 for previous, +1 for next)"""
382
  if processed_data is None or len(processed_data) == 0:
383
  return "View 1", None, []
384
+
385
+ # Parse current view number
386
  try:
387
  current_view = int(current_selector_value.split()[1]) - 1
388
  except:
389
  current_view = 0
390
+
391
  num_views = len(processed_data)
392
  new_view = (current_view + direction) % num_views
393
+
394
  new_selector_value = f"View {new_view + 1}"
395
  measure_image, measure_points = update_measure_view(processed_data, new_view)
396
+
397
  return new_selector_value, measure_image, measure_points
398
 
399
 
400
  def populate_visualization_tabs(processed_data):
401
+ """Populate the depth, normal, and measure tabs with processed data"""
402
  if processed_data is None or len(processed_data) == 0:
403
  return None, None, None, []
404
+
405
+ # Use update functions to ensure confidence filtering is applied from the start
406
  depth_vis = update_depth_view(processed_data, 0)
407
  normal_vis = update_normal_view(processed_data, 0)
408
  measure_img, _ = update_measure_view(processed_data, 0)
409
+
410
  return depth_vis, normal_vis, measure_img, []
411
 
412
 
 
414
  # 2) Handle uploaded video/images --> produce target_dir + images
415
  # -------------------------------------------------------------------------
416
  def handle_uploads(unified_upload, s_time_interval=1.0):
417
+ """
418
+ Create a new 'target_dir' + 'images' subfolder, and place user-uploaded
419
+ images or extracted frames from video into it. Return (target_dir, image_paths).
420
+ """
421
  start_time = time.time()
422
  gc.collect()
423
  torch.cuda.empty_cache()
424
 
425
+ # Create a unique folder name
426
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
427
  target_dir = f"input_images_{timestamp}"
428
  target_dir_images = os.path.join(target_dir, "images")
429
 
430
+ # Clean up if somehow that folder already exists
431
  if os.path.exists(target_dir):
432
  shutil.rmtree(target_dir)
433
  os.makedirs(target_dir)
 
435
 
436
  image_paths = []
437
 
438
+ # --- Handle uploaded files (both images and videos) ---
439
  if unified_upload is not None:
440
  for file_data in unified_upload:
441
  if isinstance(file_data, dict) and "name" in file_data:
 
445
 
446
  file_ext = os.path.splitext(file_path)[1].lower()
447
 
448
+ # Check if it's a video file
449
+ video_extensions = [
450
+ ".mp4",
451
+ ".avi",
452
+ ".mov",
453
+ ".mkv",
454
+ ".wmv",
455
+ ".flv",
456
+ ".webm",
457
+ ".m4v",
458
+ ".3gp",
459
+ ]
460
  if file_ext in video_extensions:
461
+ # Handle as video
462
  vs = cv2.VideoCapture(file_path)
463
  fps = vs.get(cv2.CAP_PROP_FPS)
464
+ frame_interval = int(fps * s_time_interval) # frames per interval
465
+
466
  count = 0
467
  video_frame_num = 0
468
  while True:
 
471
  break
472
  count += 1
473
  if count % frame_interval == 0:
474
+ # Use original filename as prefix for frames
475
  base_name = os.path.splitext(os.path.basename(file_path))[0]
476
+ image_path = os.path.join(
477
+ target_dir_images, f"{base_name}_{video_frame_num:06}.png"
478
+ )
479
  cv2.imwrite(image_path, frame)
480
  image_paths.append(image_path)
481
  video_frame_num += 1
482
  vs.release()
483
+ print(
484
+ f"Extracted {video_frame_num} frames from video: {os.path.basename(file_path)}"
485
+ )
486
+
487
  else:
488
+ # Handle as image
489
+ # Check if the file is a HEIC image
490
  if file_ext in [".heic", ".heif"]:
491
+ # Convert HEIC to JPEG for better gallery compatibility
492
  try:
493
  with Image.open(file_path) as img:
494
+ # Convert to RGB if necessary (HEIC can have different color modes)
495
  if img.mode not in ("RGB", "L"):
496
  img = img.convert("RGB")
497
+
498
+ # Create JPEG filename
499
  base_name = os.path.splitext(os.path.basename(file_path))[0]
500
+ dst_path = os.path.join(
501
+ target_dir_images, f"{base_name}.jpg"
502
+ )
503
+
504
+ # Save as JPEG with high quality
505
  img.save(dst_path, "JPEG", quality=95)
506
  image_paths.append(dst_path)
507
+ print(
508
+ f"Converted HEIC to JPEG: {os.path.basename(file_path)} -> {os.path.basename(dst_path)}"
509
+ )
510
  except Exception as e:
511
  print(f"Error converting HEIC file {file_path}: {e}")
512
+ # Fall back to copying as is
513
+ dst_path = os.path.join(
514
+ target_dir_images, os.path.basename(file_path)
515
+ )
516
  shutil.copy(file_path, dst_path)
517
  image_paths.append(dst_path)
518
  else:
519
+ # Regular image files - copy as is
520
+ dst_path = os.path.join(
521
+ target_dir_images, os.path.basename(file_path)
522
+ )
523
  shutil.copy(file_path, dst_path)
524
  image_paths.append(dst_path)
525
 
526
+ # Sort final images for gallery
527
  image_paths = sorted(image_paths)
528
+
529
  end_time = time.time()
530
+ print(
531
+ f"Files processed to {target_dir_images}; took {end_time - start_time:.3f} seconds"
532
+ )
533
  return target_dir, image_paths
534
 
535
 
 
537
  # 3) Update gallery on upload
538
  # -------------------------------------------------------------------------
539
  def update_gallery_on_upload(input_video, input_images, s_time_interval=1.0):
540
+ """
541
+ Whenever user uploads or changes files, immediately handle them
542
+ and show in the gallery. Return (target_dir, image_paths).
543
+ If nothing is uploaded, returns "None" and empty list.
544
+ """
545
  if not input_video and not input_images:
546
  return None, None, None, None
547
  target_dir, image_paths = handle_uploads(input_video, input_images, s_time_interval)
 
554
 
555
 
556
  # -------------------------------------------------------------------------
557
+ # 4) Reconstruction: uses the target_dir plus any viz parameters
558
  # -------------------------------------------------------------------------
559
  @spaces.GPU(duration=120)
560
  def gradio_demo(
 
566
  apply_mask=True,
567
  show_mesh=True,
568
  ):
569
+ """
570
+ Perform reconstruction using the already-created target_dir/images.
571
+ """
572
  if not os.path.isdir(target_dir) or target_dir == "None":
573
  return None, "No valid target directory found. Please upload first.", None, None
574
 
 
576
  gc.collect()
577
  torch.cuda.empty_cache()
578
 
579
+ # Prepare frame_filter dropdown
580
  target_dir_images = os.path.join(target_dir, "images")
581
+ all_files = (
582
+ sorted(os.listdir(target_dir_images))
583
+ if os.path.isdir(target_dir_images)
584
+ else []
585
+ )
586
  all_files = [f"{i}: {filename}" for i, filename in enumerate(all_files)]
587
  frame_filter_choices = ["All"] + all_files
588
 
 
590
  with torch.no_grad():
591
  predictions, processed_data = run_model(target_dir, apply_mask)
592
 
593
+ # Save predictions
594
  prediction_save_path = os.path.join(target_dir, "predictions.npz")
595
  np.savez(prediction_save_path, **predictions)
596
 
597
+ # Handle None frame_filter
598
  if frame_filter is None:
599
  frame_filter = "All"
600
 
601
+ # Build a GLB file name
602
  glbfile = os.path.join(
603
  target_dir,
604
  f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}_black{filter_black_bg}_white{filter_white_bg}.glb",
605
  )
606
 
607
+ # Convert predictions to GLB
608
  glbscene = predictions_to_glb(
609
  predictions,
610
  filter_by_frames=frame_filter,
611
  show_cam=show_cam,
612
  mask_black_bg=filter_black_bg,
613
  mask_white_bg=filter_white_bg,
614
+ as_mesh=show_mesh, # Use the show_mesh parameter
615
  )
616
  glbscene.export(file_obj=glbfile)
617
+
618
+ # ---------------------------------------------------------
619
+ # Generate the Rerun recording using the new helper
620
+ # ---------------------------------------------------------
621
+ rrd_path = create_rerun_recording(glbfile, target_dir)
622
 
623
+ # Cleanup
 
 
624
  del predictions
625
  gc.collect()
626
  torch.cuda.empty_cache()
627
 
628
  end_time = time.time()
629
  print(f"Total time: {end_time - start_time:.2f} seconds")
630
+ log_msg = (
631
+ f"Reconstruction Success ({len(all_files)} frames). Waiting for visualization."
632
+ )
633
+
634
+ # Populate visualization tabs with processed data
635
+ depth_vis, normal_vis, measure_img, measure_pts = populate_visualization_tabs(
636
+ processed_data
637
+ )
638
 
639
+ # Update view selectors based on available views
640
+ depth_selector, normal_selector, measure_selector = update_view_selectors(
641
+ processed_data
642
+ )
643
 
644
  return (
645
+ rrd_path, # Return the Rerun recording path instead of glbfile
646
  log_msg,
647
  gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True),
648
  processed_data,
649
  depth_vis,
650
  normal_vis,
651
  measure_img,
652
+ "", # measure_text (empty initially)
653
  depth_selector,
654
  normal_selector,
655
  measure_selector,
 
657
 
658
 
659
  # -------------------------------------------------------------------------
660
+ # 5) Helper functions for UI resets + re-visualization
661
  # -------------------------------------------------------------------------
662
  def colorize_depth(depth_map, mask=None):
663
+ """Convert depth map to colorized visualization with optional mask"""
664
  if depth_map is None:
665
  return None
666
+
667
+ # Normalize depth to 0-1 range
668
  depth_normalized = depth_map.copy()
669
  valid_mask = depth_normalized > 0
670
+
671
+ # Apply additional mask if provided (for background filtering)
672
  if mask is not None:
673
  valid_mask = valid_mask & mask
674
+
675
  if valid_mask.sum() > 0:
676
  valid_depths = depth_normalized[valid_mask]
677
  p5 = np.percentile(valid_depths, 5)
678
  p95 = np.percentile(valid_depths, 95)
679
+
680
  depth_normalized[valid_mask] = (depth_normalized[valid_mask] - p5) / (p95 - p5)
681
+
682
+ # Apply colormap
683
  import matplotlib.pyplot as plt
684
+
685
  colormap = plt.cm.turbo_r
686
  colored = colormap(depth_normalized)
687
  colored = (colored[:, :, :3] * 255).astype(np.uint8)
688
+
689
+ # Set invalid pixels to white
690
  colored[~valid_mask] = [255, 255, 255]
691
+
692
  return colored
693
 
694
 
695
  def colorize_normal(normal_map, mask=None):
696
+ """Convert normal map to colorized visualization with optional mask"""
697
  if normal_map is None:
698
  return None
699
+
700
+ # Create a copy for modification
701
  normal_vis = normal_map.copy()
702
+
703
+ # Apply mask if provided (set masked areas to [0, 0, 0] which becomes grey after normalization)
704
  if mask is not None:
705
  invalid_mask = ~mask
706
+ normal_vis[invalid_mask] = [0, 0, 0] # Set invalid areas to zero
707
+
708
+ # Normalize normals to [0, 1] range for visualization
709
  normal_vis = (normal_vis + 1.0) / 2.0
710
  normal_vis = (normal_vis * 255).astype(np.uint8)
711
+
712
  return normal_vis
713
 
714
 
715
  def process_predictions_for_visualization(
716
  predictions, views, high_level_config, filter_black_bg=False, filter_white_bg=False
717
  ):
718
+ """Extract depth, normal, and 3D points from predictions for visualization"""
719
  processed_data = {}
720
+
721
+ # Process each view
722
  for view_idx, view in enumerate(views):
723
+ # Get image
724
  image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
725
+
726
+ # Get predicted points
727
  pred_pts3d = predictions["world_points"][view_idx]
728
+
729
+ # Initialize data for this view
730
  view_data = {
731
  "image": image[0],
732
  "points3d": pred_pts3d,
 
734
  "normal": None,
735
  "mask": None,
736
  }
737
+
738
+ # Start with the final mask from predictions
739
  mask = predictions["final_mask"][view_idx].copy()
740
+
741
+ # Apply black background filtering if enabled
742
  if filter_black_bg:
743
+ # Get the image colors (ensure they're in 0-255 range)
744
  view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
745
+ # Filter out black background pixels (sum of RGB < 16)
746
  black_bg_mask = view_colors.sum(axis=2) >= 16
747
  mask = mask & black_bg_mask
748
+
749
+ # Apply white background filtering if enabled
750
  if filter_white_bg:
751
+ # Get the image colors (ensure they're in 0-255 range)
752
  view_colors = image[0] * 255 if image[0].max() <= 1.0 else image[0]
753
+ # Filter out white background pixels (all RGB > 240)
754
  white_bg_mask = ~(
755
  (view_colors[:, :, 0] > 240)
756
  & (view_colors[:, :, 1] > 240)
757
  & (view_colors[:, :, 2] > 240)
758
  )
759
  mask = mask & white_bg_mask
760
+
761
  view_data["mask"] = mask
762
  view_data["depth"] = predictions["depth"][view_idx].squeeze()
763
+
764
  normals, _ = points_to_normals(pred_pts3d, mask=view_data["mask"])
765
  view_data["normal"] = normals
766
+
767
  processed_data[view_idx] = view_data
768
+
769
  return processed_data
770
 
771
 
772
  def reset_measure(processed_data):
773
+ """Reset measure points"""
774
  if processed_data is None or len(processed_data) == 0:
775
  return None, [], ""
776
+
777
+ # Return the first view image
778
  first_view = list(processed_data.values())[0]
779
  return first_view["image"], [], ""
780
 
781
 
782
+ def measure(
783
+ processed_data, measure_points, current_view_selector, event: gr.SelectData
784
+ ):
785
+ """Handle measurement on images"""
786
  try:
787
+ print(f"Measure function called with selector: {current_view_selector}")
788
+
789
  if processed_data is None or len(processed_data) == 0:
790
  return None, [], "No data available"
791
 
792
+ # Use the currently selected view instead of always using the first view
793
  try:
794
  current_view_index = int(current_view_selector.split()[1]) - 1
795
  except:
796
  current_view_index = 0
797
 
798
+ print(f"Using view index: {current_view_index}")
799
+
800
+ # Get view data safely
801
  if current_view_index < 0 or current_view_index >= len(processed_data):
802
  current_view_index = 0
803
 
 
808
  return None, [], "No view data available"
809
 
810
  point2d = event.index[0], event.index[1]
811
+ print(f"Clicked point: {point2d}")
812
 
813
+ # Check if the clicked point is in a masked area (prevent interaction)
814
  if (
815
  current_view["mask"] is not None
816
  and 0 <= point2d[1] < current_view["mask"].shape[0]
817
  and 0 <= point2d[0] < current_view["mask"].shape[1]
818
  ):
819
+ # Check if the point is in a masked (invalid) area
820
  if not current_view["mask"][point2d[1], point2d[0]]:
821
+ print(f"Clicked point {point2d} is in masked area, ignoring click")
822
+ # Always return image with mask overlay
823
+ masked_image, _ = update_measure_view(
824
+ processed_data, current_view_index
825
+ )
826
  return (
827
  masked_image,
828
  measure_points,
 
830
  )
831
 
832
  measure_points.append(point2d)
833
+
834
+ # Get image with mask overlay and ensure it's valid
835
  image, _ = update_measure_view(processed_data, current_view_index)
836
  if image is None:
837
  return None, [], "No image available"
838
+
839
  image = image.copy()
840
  points3d = current_view["points3d"]
841
 
842
+ # Ensure image is in uint8 format for proper cv2 operations
843
+ try:
844
+ if image.dtype != np.uint8:
845
+ if image.max() <= 1.0:
846
+ # Image is in [0, 1] range, convert to [0, 255]
847
+ image = (image * 255).astype(np.uint8)
848
+ else:
849
+ # Image is already in [0, 255] range
850
+ image = image.astype(np.uint8)
851
+ except Exception as e:
852
+ print(f"Image conversion error: {e}")
853
+ return None, [], f"Image conversion error: {e}"
854
 
855
+ # Draw circles for points
856
+ try:
857
+ for p in measure_points:
858
+ if 0 <= p[0] < image.shape[1] and 0 <= p[1] < image.shape[0]:
859
+ image = cv2.circle(
860
+ image, p, radius=5, color=(255, 0, 0), thickness=2
861
+ )
862
+ except Exception as e:
863
+ print(f"Drawing error: {e}")
864
+ return None, [], f"Drawing error: {e}"
865
 
866
  depth_text = ""
867
+ try:
868
+ for i, p in enumerate(measure_points):
 
 
 
 
 
 
 
869
  if (
870
+ current_view["depth"] is not None
871
+ and 0 <= p[1] < current_view["depth"].shape[0]
872
+ and 0 <= p[0] < current_view["depth"].shape[1]
873
  ):
874
+ d = current_view["depth"][p[1], p[0]]
875
+ depth_text += f"- **P{i + 1} depth: {d:.2f}m.**\n"
876
+ else:
877
+ # Use Z coordinate of 3D points if depth not available
878
+ if (
879
+ points3d is not None
880
+ and 0 <= p[1] < points3d.shape[0]
881
+ and 0 <= p[0] < points3d.shape[1]
882
+ ):
883
+ z = points3d[p[1], p[0], 2]
884
+ depth_text += f"- **P{i + 1} Z-coord: {z:.2f}m.**\n"
885
+ except Exception as e:
886
+ print(f"Depth text error: {e}")
887
+ depth_text = f"Error computing depth: {e}\n"
888
 
889
  if len(measure_points) == 2:
890
+ try:
891
+ point1, point2 = measure_points
892
+ # Draw line
893
+ if (
894
+ 0 <= point1[0] < image.shape[1]
895
+ and 0 <= point1[1] < image.shape[0]
896
+ and 0 <= point2[0] < image.shape[1]
897
+ and 0 <= point2[1] < image.shape[0]
898
+ ):
899
+ image = cv2.line(
900
+ image, point1, point2, color=(255, 0, 0), thickness=2
901
+ )
902
+
903
+ # Compute 3D distance
904
+ distance_text = "- **Distance: Unable to compute**"
905
+ if (
906
+ points3d is not None
907
+ and 0 <= point1[1] < points3d.shape[0]
908
+ and 0 <= point1[0] < points3d.shape[1]
909
+ and 0 <= point2[1] < points3d.shape[0]
910
+ and 0 <= point2[0] < points3d.shape[1]
911
+ ):
912
+ try:
913
+ p1_3d = points3d[point1[1], point1[0]]
914
+ p2_3d = points3d[point2[1], point2[0]]
915
+ distance = np.linalg.norm(p1_3d - p2_3d)
916
+ distance_text = f"- **Distance: {distance:.2f}m**"
917
+ except Exception as e:
918
+ print(f"Distance computation error: {e}")
919
+ distance_text = f"- **Distance computation error: {e}**"
920
+
921
+ measure_points = []
922
+ text = depth_text + distance_text
923
+ print(f"Measurement complete: {text}")
924
+ return [image, measure_points, text]
925
+ except Exception as e:
926
+ print(f"Final measurement error: {e}")
927
+ return None, [], f"Measurement error: {e}"
928
  else:
929
+ print(f"Single point measurement: {depth_text}")
930
  return [image, measure_points, depth_text]
931
 
932
  except Exception as e:
 
935
 
936
 
937
  def clear_fields():
938
+ """
939
+ Clears the 3D viewer, the stored target_dir, and empties the gallery.
940
+ """
941
  return None
942
 
943
 
944
  def update_log():
945
+ """
946
+ Display a quick log message while waiting.
947
+ """
948
  return "Loading and Reconstructing..."
949
 
950
 
 
957
  filter_white_bg=False,
958
  show_mesh=True,
959
  ):
960
+ """
961
+ Reload saved predictions from npz, create (or reuse) the GLB for new parameters,
962
+ wrap it in a Rerun recording (.rrd), and return it for the Rerun viewer.
963
+ """
964
+
965
+ # If it's an example click, skip as requested
966
  if is_example == "True":
967
  return (
968
  gr.update(),
 
1000
  as_mesh=show_mesh,
1001
  )
1002
  glbscene.export(file_obj=glbfile)
1003
+
1004
+ # Generate the Rerun recording using the helper
1005
+ rrd_path = create_rerun_recording(glbfile, target_dir)
1006
 
1007
+ return (
1008
+ rrd_path, # Was glbfile
1009
+ "Visualization updated.",
1010
+ )
1011
 
1012
 
1013
  def update_all_views_on_filter_change(
 
1019
  normal_view_selector,
1020
  measure_view_selector,
1021
  ):
1022
+ """
1023
+ Update all individual view tabs when background filtering checkboxes change.
1024
+ This regenerates the processed data with new filtering and updates all views.
1025
+ """
1026
+ # Check if we have a valid target directory and predictions
1027
  if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
1028
  return processed_data, None, None, None, []
1029
 
 
1032
  return processed_data, None, None, None, []
1033
 
1034
  try:
1035
+ # Load the original predictions and views
1036
  loaded = np.load(predictions_path, allow_pickle=True)
1037
  predictions = {key: loaded[key] for key in loaded.keys()}
1038
+
1039
+ # Load images using MapAnything's load_images function
1040
  image_folder_path = os.path.join(target_dir, "images")
1041
  views = load_images(image_folder_path)
1042
+
1043
+ # Regenerate processed data with new filtering settings
1044
  new_processed_data = process_predictions_for_visualization(
1045
  predictions, views, high_level_config, filter_black_bg, filter_white_bg
1046
  )
1047
 
1048
+ # Get current view indices
1049
  try:
1050
+ depth_view_idx = (
1051
+ int(depth_view_selector.split()[1]) - 1 if depth_view_selector else 0
1052
+ )
1053
  except:
1054
  depth_view_idx = 0
1055
+
1056
  try:
1057
+ normal_view_idx = (
1058
+ int(normal_view_selector.split()[1]) - 1 if normal_view_selector else 0
1059
+ )
1060
  except:
1061
  normal_view_idx = 0
1062
+
1063
  try:
1064
+ measure_view_idx = (
1065
+ int(measure_view_selector.split()[1]) - 1
1066
+ if measure_view_selector
1067
+ else 0
1068
+ )
1069
  except:
1070
  measure_view_idx = 0
1071
 
1072
+ # Update all views with new filtered data
1073
  depth_vis = update_depth_view(new_processed_data, depth_view_idx)
1074
  normal_vis = update_normal_view(new_processed_data, normal_view_idx)
1075
  measure_img, _ = update_measure_view(new_processed_data, measure_view_idx)
 
1085
  # Example scene functions
1086
  # -------------------------------------------------------------------------
1087
  def get_scene_info(examples_dir):
1088
+ """Get information about scenes in the examples directory"""
1089
  import glob
1090
+
1091
  scenes = []
1092
  if not os.path.exists(examples_dir):
1093
  return scenes
1094
+
1095
  for scene_folder in sorted(os.listdir(examples_dir)):
1096
  scene_path = os.path.join(examples_dir, scene_folder)
1097
  if os.path.isdir(scene_path):
1098
+ # Find all image files in the scene folder
1099
  image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff", "*.tif"]
1100
  image_files = []
1101
  for ext in image_extensions:
1102
  image_files.extend(glob.glob(os.path.join(scene_path, ext)))
1103
  image_files.extend(glob.glob(os.path.join(scene_path, ext.upper())))
1104
+
1105
  if image_files:
1106
+ # Sort images and get the first one for thumbnail
1107
  image_files = sorted(image_files)
1108
+ first_image = image_files[0]
1109
+ num_images = len(image_files)
1110
+
1111
+ scenes.append(
1112
+ {
1113
+ "name": scene_folder,
1114
+ "path": scene_path,
1115
+ "thumbnail": first_image,
1116
+ "num_images": num_images,
1117
+ "image_files": image_files,
1118
+ }
1119
+ )
1120
+
1121
  return scenes
1122
 
1123
 
1124
  def load_example_scene(scene_name, examples_dir="examples"):
1125
+ """Load a scene from examples directory"""
1126
  scenes = get_scene_info(examples_dir)
1127
+
1128
+ # Find the selected scene
1129
+ selected_scene = None
1130
+ for scene in scenes:
1131
+ if scene["name"] == scene_name:
1132
+ selected_scene = scene
1133
+ break
1134
+
1135
  if selected_scene is None:
1136
  return None, None, None, "Scene not found"
1137
+
1138
+ # Create file-like objects for the unified upload system
1139
+ # Convert image file paths to the format expected by unified_upload
1140
+ file_objects = []
1141
+ for image_path in selected_scene["image_files"]:
1142
+ file_objects.append(image_path)
1143
+
1144
+ # Create target directory and copy images using the unified upload system
1145
  target_dir, image_paths = handle_uploads(file_objects, 1.0)
1146
+
1147
  return (
1148
+ None, # Clear reconstruction output
1149
+ target_dir, # Set target directory
1150
+ image_paths, # Set gallery
1151
  f"Loaded scene '{scene_name}' with {selected_scene['num_images']} images. Click 'Reconstruct' to begin 3D processing.",
1152
  )
1153
 
 
1157
  # -------------------------------------------------------------------------
1158
  theme = get_gradio_theme()
1159
 
1160
+ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
1161
+ # State variables
1162
  is_example = gr.Textbox(label="is_example", visible=False, value="None")
1163
  num_images = gr.Textbox(label="num_images", visible=False, value="None")
1164
  processed_data_state = gr.State(value=None)
1165
  measure_points_state = gr.State(value=[])
1166
+ current_view_index = gr.State(value=0)
1167
  target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
1168
 
1169
+ # --- Header Area ---
1170
+ with gr.Column(elem_id="header-container"):
1171
+ gr.Markdown(
1172
+ "<div style='text-align: center; max-width: 800px; margin: 0 auto; padding-top: 10px;'>"
1173
+ "<h1>๐Ÿ—บ๏ธ Map-Anything-v1</h1>"
1174
+ "<h3 style='color: #666; font-weight: 400;'>Metric 3D Reconstruction (Point Cloud and Camera Poses)</h3>"
1175
+ "</div>"
1176
+ )
1177
+ gr.Markdown("---")
1178
+
1179
+ # --- Main App Layout ---
1180
  with gr.Row():
1181
+
1182
+ # LEFT COLUMN (Sidebar / Controls)
1183
+ with gr.Column(scale=1, min_width=350):
1184
+
1185
+ with gr.Group():
1186
+ gr.Markdown("### ๐Ÿ“ 1. Input Media")
1187
+ unified_upload = gr.File(
1188
+ file_count="multiple",
1189
+ label="Upload Video or Images",
 
 
 
 
 
1190
  interactive=True,
1191
+ file_types=["image", "video"],
1192
+ )
1193
+ with gr.Row():
1194
+ s_time_interval = gr.Slider(
1195
+ minimum=0.1,
1196
+ maximum=5.0,
1197
+ value=1.0,
1198
+ step=0.1,
1199
+ label="Video sample interval (sec)",
1200
+ interactive=True,
1201
+ visible=True,
1202
+ )
1203
+ resample_btn = gr.Button("Resample", visible=False, variant="secondary")
1204
+
1205
+ image_gallery = gr.Gallery(
1206
+ label="Preview",
1207
+ columns=4,
1208
+ height="200px",
1209
+ object_fit="contain",
1210
+ preview=True,
1211
+ )
1212
+ clear_uploads_btn = gr.ClearButton(
1213
+ [unified_upload, image_gallery],
1214
+ value="Clear Uploads",
1215
+ variant="secondary",
1216
+ size="sm",
1217
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1218
 
1219
+ with gr.Group():
1220
+ gr.Markdown("### โš™๏ธ 2. Reconstruction Settings")
1221
+ apply_mask_checkbox = gr.Checkbox(
1222
+ label="Apply mask (depth classes & edges)",
1223
+ value=True,
 
1224
  )
1225
+
1226
+ with gr.Row():
1227
+ submit_btn = gr.Button("๐Ÿš€ Reconstruct", variant="primary", scale=2)
1228
+ clear_btn = gr.ClearButton(
1229
+ [
1230
+ unified_upload,
1231
+ target_dir_output,
1232
+ image_gallery,
1233
+ ],
1234
+ value="Clear All",
1235
+ scale=1,
1236
+ )
1237
 
1238
+ with gr.Accordion("๐ŸŽจ Visualization Options", open=True):
1239
+ gr.Markdown("*Note: Updates automatically applied to viewer.*")
1240
+ frame_filter = gr.Dropdown(
1241
+ choices=["All"], value="All", label="Show Points from Frame"
1242
+ )
1243
+ show_cam = gr.Checkbox(label="Show Camera Paths", value=True)
1244
+ show_mesh = gr.Checkbox(label="Show 3D Mesh", value=True)
1245
+ filter_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
1246
+ filter_white_bg = gr.Checkbox(label="Filter White Background", value=False)
1247
+
1248
+
1249
+ # RIGHT COLUMN (Main Viewer Area)
1250
+ with gr.Column(scale=2, min_width=600):
1251
+ log_output = gr.Markdown("Status: **Ready**. Please upload media or select an example scene below.", elem_classes=["custom-log"])
1252
+
1253
+ with gr.Tabs():
1254
+ with gr.Tab("3D View"):
1255
+ reconstruction_output = Rerun(
1256
+ label="Rerun 3D Viewer",
1257
+ height=600,
1258
+ )
1259
+ with gr.Tab("Depth"):
1260
+ with gr.Row(elem_classes=["navigation-row"]):
1261
+ prev_depth_btn = gr.Button("โ—€ Previous", size="sm", scale=1)
1262
+ depth_view_selector = gr.Dropdown(
1263
+ choices=["View 1"],
1264
+ value="View 1",
1265
+ label="Select View",
1266
+ scale=2,
1267
+ interactive=True,
1268
+ allow_custom_value=True,
1269
  )
1270
+ next_depth_btn = gr.Button("Next โ–ถ", size="sm", scale=1)
1271
+ depth_map = gr.Image(
1272
+ type="numpy",
1273
+ label="Colorized Depth Map",
1274
+ format="png",
1275
+ interactive=False,
1276
+ )
1277
+ with gr.Tab("Normal"):
1278
+ with gr.Row(elem_classes=["navigation-row"]):
1279
+ prev_normal_btn = gr.Button("โ—€ Previous", size="sm", scale=1)
1280
+ normal_view_selector = gr.Dropdown(
1281
+ choices=["View 1"],
1282
+ value="View 1",
1283
+ label="Select View",
1284
+ scale=2,
1285
+ interactive=True,
1286
+ allow_custom_value=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
1287
  )
1288
+ next_normal_btn = gr.Button("Next โ–ถ", size="sm", scale=1)
1289
+ normal_map = gr.Image(
1290
+ type="numpy",
1291
+ label="Normal Map",
1292
+ format="png",
1293
+ interactive=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1294
  )
1295
+ with gr.Tab("Measure"):
1296
+ gr.Markdown(MEASURE_INSTRUCTIONS_HTML)
1297
+ with gr.Row(elem_classes=["navigation-row"]):
1298
+ prev_measure_btn = gr.Button("โ—€ Previous", size="sm", scale=1)
1299
+ measure_view_selector = gr.Dropdown(
1300
+ choices=["View 1"],
1301
+ value="View 1",
1302
+ label="Select View",
1303
+ scale=2,
1304
+ interactive=True,
1305
+ allow_custom_value=True,
1306
+ )
1307
+ next_measure_btn = gr.Button("Next โ–ถ", size="sm", scale=1)
1308
+ measure_image = gr.Image(
1309
+ type="numpy",
1310
+ show_label=False,
1311
+ format="webp",
1312
+ interactive=False,
1313
+ sources=[],
1314
+ )
1315
+ gr.Markdown("**Note:** Light-grey areas indicate regions with no depth information where measurements cannot be taken.")
1316
+ measure_text = gr.Markdown("")
1317
 
1318
+ # --- Footer Area (Example Scenes) ---
1319
+ gr.Markdown("---")
1320
+ gr.Markdown("## ๐ŸŒŸ Example Scenes")
1321
+ gr.Markdown("Click any thumbnail below to load a sample dataset for reconstruction.")
1322
 
1323
  scenes = get_scene_info("examples")
1324
+
1325
  if scenes:
1326
+ for i in range(0, len(scenes), 4):
1327
  with gr.Row():
1328
  for j in range(4):
1329
  scene_idx = i + j
 
1338
  elem_id=f"scene_thumb_{scene['name']}",
1339
  sources=[],
1340
  )
1341
+ gr.Markdown(
1342
+ f"**{scene['name']}** \n {scene['num_images']} images",
1343
+ elem_classes=["scene-info"],
1344
+ )
1345
+ # Clicking an example bypasses the manual process and loads everything automatically
1346
  scene_img.select(
1347
  fn=lambda name=scene["name"]: load_example_scene(name),
1348
+ outputs=[
1349
+ reconstruction_output, # To clear old view
1350
+ target_dir_output,
1351
+ image_gallery,
1352
+ log_output,
1353
+ ],
1354
  )
1355
  else:
1356
  with gr.Column(scale=1):
1357
  pass
1358
 
1359
+ # =========================================================================
1360
+ # Event Bindings & Logic
1361
+ # =========================================================================
1362
+
1363
  submit_btn.click(fn=clear_fields, inputs=[], outputs=[reconstruction_output]).then(
1364
  fn=update_log, inputs=[], outputs=[log_output]
1365
  ).then(
1366
  fn=gradio_demo,
1367
+ inputs=[
1368
+ target_dir_output,
1369
+ frame_filter,
1370
+ show_cam,
1371
+ filter_black_bg,
1372
+ filter_white_bg,
1373
+ apply_mask_checkbox,
1374
+ show_mesh,
1375
+ ],
1376
  outputs=[
1377
+ reconstruction_output,
1378
+ log_output,
1379
+ frame_filter,
1380
+ processed_data_state,
1381
+ depth_map,
1382
+ normal_map,
1383
+ measure_image,
1384
+ measure_text,
1385
+ depth_view_selector,
1386
+ normal_view_selector,
1387
+ measure_view_selector,
1388
  ],
1389
+ ).then(
1390
+ fn=lambda: "False",
1391
+ inputs=[],
1392
+ outputs=[is_example],
1393
+ )
1394
 
1395
+ # Real-time Visualization Updates
1396
  frame_filter.change(
1397
  update_visualization,
1398
  [target_dir_output, frame_filter, show_cam, is_example, filter_black_bg, filter_white_bg, show_mesh],
 
1427
  [reconstruction_output, log_output],
1428
  )
1429
 
1430
+ # Auto-update gallery on upload
1431
  def update_gallery_on_unified_upload(files, interval):
1432
  if not files:
1433
+ return None, None, "Ready for upload."
1434
  target_dir, image_paths = handle_uploads(files, interval)
1435
+ return target_dir, image_paths, "Upload complete. Click '๐Ÿš€ Reconstruct' to begin 3D processing."
1436
 
1437
  def show_resample_button(files):
1438
+ if not files: return gr.update(visible=False)
1439
+ video_exts = [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm", ".m4v", ".3gp"]
1440
+ has_video = False
1441
+ for f_data in files:
1442
+ f_path = str(f_data["name"] if isinstance(f_data, dict) else f_data)
1443
+ if os.path.splitext(f_path)[1].lower() in video_exts:
1444
+ has_video = True
1445
+ break
1446
  return gr.update(visible=has_video)
1447
 
 
 
 
1448
  def resample_video_with_new_interval(files, new_interval, current_target_dir):
1449
+ if not files: return current_target_dir, None, "No files to resample.", gr.update(visible=False)
1450
+ video_exts = [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm", ".m4v", ".3gp"]
1451
+ has_video = any(os.path.splitext(str(f["name"] if isinstance(f, dict) else f))[1].lower() in video_exts for f in files)
1452
+
1453
+ if not has_video: return current_target_dir, None, "No videos found.", gr.update(visible=False)
1454
+
 
 
 
1455
  if current_target_dir and current_target_dir != "None" and os.path.exists(current_target_dir):
1456
  shutil.rmtree(current_target_dir)
1457
+
1458
  target_dir, image_paths = handle_uploads(files, new_interval)
1459
+ return target_dir, image_paths, f"Video resampled ({new_interval}s interval). Click '๐Ÿš€ Reconstruct'.", gr.update(visible=False)
1460
 
1461
  unified_upload.change(
1462
  fn=update_gallery_on_unified_upload,
1463
  inputs=[unified_upload, s_time_interval],
1464
  outputs=[target_dir_output, image_gallery, log_output],
1465
+ ).then(
1466
+ fn=show_resample_button,
1467
+ inputs=[unified_upload],
1468
+ outputs=[resample_btn],
1469
+ )
1470
+
1471
+ s_time_interval.change(
1472
+ fn=show_resample_button,
1473
+ inputs=[unified_upload],
1474
+ outputs=[resample_btn],
1475
+ )
1476
 
 
1477
  resample_btn.click(
1478
  fn=resample_video_with_new_interval,
1479
  inputs=[unified_upload, s_time_interval, target_dir_output],
1480
  outputs=[target_dir_output, image_gallery, log_output, resample_btn],
1481
  )
1482
 
1483
+ # Measure Interactions
1484
  measure_image.select(
1485
  fn=measure,
1486
  inputs=[processed_data_state, measure_points_state, measure_view_selector],
1487
  outputs=[measure_image, measure_points_state, measure_text],
1488
  )
1489
 
1490
+ # Tab Navigations
1491
  prev_depth_btn.click(
1492
+ fn=lambda d, s: navigate_depth_view(d, s, -1),
1493
  inputs=[processed_data_state, depth_view_selector], outputs=[depth_view_selector, depth_map],
1494
  )
1495
  next_depth_btn.click(
1496
+ fn=lambda d, s: navigate_depth_view(d, s, 1),
1497
  inputs=[processed_data_state, depth_view_selector], outputs=[depth_view_selector, depth_map],
1498
  )
1499
  depth_view_selector.change(
1500
+ fn=lambda d, s: update_depth_view(d, int(s.split()[1]) - 1) if s else None,
1501
  inputs=[processed_data_state, depth_view_selector], outputs=[depth_map],
1502
  )
1503
 
1504
  prev_normal_btn.click(
1505
+ fn=lambda d, s: navigate_normal_view(d, s, -1),
1506
  inputs=[processed_data_state, normal_view_selector], outputs=[normal_view_selector, normal_map],
1507
  )
1508
  next_normal_btn.click(
1509
+ fn=lambda d, s: navigate_normal_view(d, s, 1),
1510
  inputs=[processed_data_state, normal_view_selector], outputs=[normal_view_selector, normal_map],
1511
  )
1512
  normal_view_selector.change(
1513
+ fn=lambda d, s: update_normal_view(d, int(s.split()[1]) - 1) if s else None,
1514
  inputs=[processed_data_state, normal_view_selector], outputs=[normal_map],
1515
  )
1516
 
1517
  prev_measure_btn.click(
1518
+ fn=lambda d, s: navigate_measure_view(d, s, -1),
1519
  inputs=[processed_data_state, measure_view_selector], outputs=[measure_view_selector, measure_image, measure_points_state],
1520
  )
1521
  next_measure_btn.click(
1522
+ fn=lambda d, s: navigate_measure_view(d, s, 1),
1523
  inputs=[processed_data_state, measure_view_selector], outputs=[measure_view_selector, measure_image, measure_points_state],
1524
  )
1525
  measure_view_selector.change(
1526
+ fn=lambda d, s: update_measure_view(d, int(s.split()[1]) - 1) if s else (None, []),
1527
  inputs=[processed_data_state, measure_view_selector], outputs=[measure_image, measure_points_state],
1528
  )
1529