Spaces:

BiasLab2025
/

perception

Running on A10G

App Files Files Community

Zhen Ye commited on Jan 10

Commit

91f3b56

1 Parent(s): 94c85d4

added fallback for Nan

Browse files

Files changed (2) hide show

inference.py +24 -4
models/depth_estimators/depth_pro.py +36 -17

inference.py CHANGED Viewed

@@ -433,8 +433,24 @@ def process_frames_depth(
     # Compute global min/max (using percentiles to handle outliers)
     all_depths = np.concatenate(all_values)
-    global_min = np.percentile(all_depths, 1)  # 1st percentile to clip outliers
-    global_max = np.percentile(all_depths, 99)  # 99th percentile
     logging.info(
         "Depth range: %.2f - %.2f meters (1st-99th percentile)",
@@ -472,11 +488,15 @@ def colorize_depth_map(
     """
     import cv2
     if global_max - global_min < 1e-6:  # Handle uniform depth
-        depth_norm = np.zeros_like(depth_map, dtype=np.uint8)
     else:
         # Clip to global range to handle outliers
-        depth_clipped = np.clip(depth_map, global_min, global_max)
         depth_norm = ((depth_clipped - global_min) / (global_max - global_min) * 255).astype(np.uint8)
     # Apply TURBO colormap for vibrant, perceptually uniform visualization

     # Compute global min/max (using percentiles to handle outliers)
     all_depths = np.concatenate(all_values)
+    # Filter out NaN and inf values
+    valid_depths = all_depths[np.isfinite(all_depths)]
+    if len(valid_depths) == 0:
+        logging.warning("All depth values are NaN/inf - using fallback range")
+        global_min = 0.0
+        global_max = 1.0
+    else:
+        global_min = float(np.percentile(valid_depths, 1))  # 1st percentile to clip outliers
+        global_max = float(np.percentile(valid_depths, 99))  # 99th percentile
+        # Handle edge case where min == max
+        if abs(global_max - global_min) < 1e-6:
+            global_min = float(valid_depths.min())
+            global_max = float(valid_depths.max())
+            if abs(global_max - global_min) < 1e-6:
+                global_max = global_min + 1.0
     logging.info(
         "Depth range: %.2f - %.2f meters (1st-99th percentile)",
     """
     import cv2
+    # Replace NaN/inf with min value for visualization
+    depth_clean = np.copy(depth_map)
+    depth_clean[~np.isfinite(depth_clean)] = global_min
     if global_max - global_min < 1e-6:  # Handle uniform depth
+        depth_norm = np.zeros_like(depth_clean, dtype=np.uint8)
     else:
         # Clip to global range to handle outliers
+        depth_clipped = np.clip(depth_clean, global_min, global_max)
         depth_norm = ((depth_clipped - global_min) / (global_max - global_min) * 255).astype(np.uint8)
     # Apply TURBO colormap for vibrant, perceptually uniform visualization

models/depth_estimators/depth_pro.py CHANGED Viewed

@@ -62,23 +62,42 @@ class DepthProEstimator(DepthEstimator):
         with torch.no_grad():
             outputs = self.model(**inputs)
-        # Post-process to get depth and focal length
-        post_processed = self.image_processor.post_process_depth_estimation(
-            outputs,
-            target_sizes=[(height, width)],
-        )
-        # Extract depth map and focal length
-        depth_tensor = post_processed[0]["predicted_depth"]  # Already at target size
-        focal_length_value = post_processed[0].get("focal_length", 1.0)
-        # Convert to numpy
-        depth_map = depth_tensor.cpu().numpy()
-        # focal_length might be a tensor, convert to float
-        if isinstance(focal_length_value, torch.Tensor):
-            focal_length = float(focal_length_value.item())
         else:
-            focal_length = float(focal_length_value)
         return DepthResult(depth_map=depth_map, focal_length=focal_length)

         with torch.no_grad():
             outputs = self.model(**inputs)
+        # Get raw depth prediction
+        raw_depth = outputs.predicted_depth  # Shape: [1, 1, H, W]
+        # Resize to target size if needed
+        if raw_depth.shape[-2:] != (height, width):
+            import torch.nn.functional as F
+            raw_depth = F.interpolate(
+                raw_depth,
+                size=(height, width),
+                mode='bilinear',
+                align_corners=False
+            )
+        # Convert to numpy and remove batch/channel dims
+        depth_map = raw_depth.squeeze().cpu().numpy()  # Shape: [H, W]
+        # Get focal length from outputs if available
+        if hasattr(outputs, 'fov_deg') and outputs.fov_deg is not None:
+            # Convert field of view to focal length
+            fov_rad = outputs.fov_deg * np.pi / 180.0
+            focal_length = float(width / (2.0 * np.tan(fov_rad / 2.0)))
         else:
+            focal_length = 1.0
+        # Debug: Check for NaN values
+        if np.isnan(depth_map).any():
+            nan_count = np.isnan(depth_map).sum()
+            total = depth_map.size
+            logging.warning(
+                f"Depth map contains {nan_count}/{total} ({100*nan_count/total:.1f}%) NaN values"
+            )
+            logging.warning(f"Depth map shape: {depth_map.shape}, dtype: {depth_map.dtype}")
+            valid_depths = depth_map[np.isfinite(depth_map)]
+            if len(valid_depths) > 0:
+                logging.warning(
+                    f"Valid depth range: {valid_depths.min():.4f} - {valid_depths.max():.4f}"
+                )
         return DepthResult(depth_map=depth_map, focal_length=focal_length)