Spaces:

MLBench
/

Inspectech_segmentation

Sleeping

App Files Files Community

Ayesha-Majeed commited on Feb 25

Commit

034884b

verified ·

1 Parent(s): f4aa803

Update binary_segmentation.py

Browse files

Files changed (1) hide show

binary_segmentation.py +124 -124

binary_segmentation.py CHANGED Viewed

@@ -566,166 +566,166 @@ class BinarySegmenter:
         except ImportError:
             raise ImportError("RMBG requires: pip install transformers")
-    def segment(
-        self,
-        image: np.ndarray,
-        threshold: float = 0.5,
-        return_type: Literal["mask", "rgba", "both"] = "mask"
-    ) -> Tuple[Optional[np.ndarray], Optional[Image.Image]]:
-        """
-        Segment foreground object from image.
-        Args:
-            image: Input image as numpy array (H, W, 3) in RGB or BGR
-            threshold: Threshold for binary mask (0-1)
-            return_type: What to return - "mask", "rgba", or "both"
-        Returns:
-            Tuple of (binary_mask, rgba_image) based on return_type
-        """
-        # Convert BGR to RGB if needed
-        if len(image.shape) == 3 and image.shape[2] == 3:
-            if image[0, 0, 0] != image[0, 0, 2]:  # Simple heuristic
-                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-            else:
-                image_rgb = image
-        else:
-            raise ValueError("Input must be a color image (H, W, 3)")
-        # Convert to PIL
-        image_pil = Image.fromarray(image_rgb)
-        original_size = image_pil.size
-        # Transform
-        input_tensor = self.transform(image_pil).unsqueeze(0).to(DEVICE)
-        if DEVICE == "cpu":
-            input_tensor = input_tensor.float()
-        # Inference
-        with torch.no_grad():
-            if self.model_type == "u2netp":
-                outputs = self.model(input_tensor)
-                pred = outputs[0]  # Main output
-            else:  # birefnet or rmbg
-                pred = self.model(input_tensor)[-1].sigmoid()
-        # Post-process
-        pred = pred.squeeze().cpu().numpy()
-        # Resize to original
-        pred_resized = cv2.resize(pred, original_size, interpolation=cv2.INTER_LINEAR)
-        # Normalize to 0-255
-        pred_normalized = ((pred_resized - pred_resized.min()) /
-                          (pred_resized.max() - pred_resized.min() + 1e-8) * 255)
-        # Create binary mask
-        binary_mask = (pred_normalized > (threshold * 255)).astype(np.uint8) * 255
-        # Optional: Morphological operations for cleaner mask
-        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
-        binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
-        binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel)
-        # Create RGBA if needed
-        rgba_image = None
-        if return_type in ["rgba", "both"]:
-            # Create 4-channel image
-            rgba = np.dstack([image_rgb, binary_mask])
-            rgba_image = Image.fromarray(rgba, mode='RGBA')
-        # Return based on type
-        if return_type == "mask":
-            return binary_mask, None
-        elif return_type == "rgba":
-            return None, rgba_image
-        else:  # both
-            return binary_mask, rgba_image
     # def segment(
     #     self,
     #     image: np.ndarray,
     #     threshold: float = 0.5,
     #     return_type: Literal["mask", "rgba", "both"] = "mask"
     # ) -> Tuple[Optional[np.ndarray], Optional[Image.Image]]:
-    #     # Convert BGR to RGB
     #     if len(image.shape) == 3 and image.shape[2] == 3:
-    #         image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     #     else:
     #         raise ValueError("Input must be a color image (H, W, 3)")
-    #     # Store ORIGINAL dimensions (H, W) from numpy
-    #     orig_h, orig_w = image.shape[:2]
-    #     # Convert to PIL for transforms
     #     image_pil = Image.fromarray(image_rgb)
-    #     # Transform (model resizes internally e.g. 320x320 / 512x512)
     #     input_tensor = self.transform(image_pil).unsqueeze(0).to(DEVICE)
     #     if DEVICE == "cpu":
     #         input_tensor = input_tensor.float()
     #     # Inference
     #     with torch.no_grad():
     #         if self.model_type == "u2netp":
     #             outputs = self.model(input_tensor)
-    #             pred = outputs[0]
     #         else:  # birefnet or rmbg
     #             pred = self.model(input_tensor)[-1].sigmoid()
-    #     # Post-process - squeeze to 2D
     #     pred = pred.squeeze().cpu().numpy()
-    #     # ✅ FIX: Resize back to ORIGINAL (width, height) for cv2
-    #     # cv2.resize takes (width, height) = (orig_w, orig_h)
-    #     pred_resized = cv2.resize(
-    #         pred,
-    #         (orig_w, orig_h),          # ← correct order for cv2
-    #         interpolation=cv2.INTER_LINEAR
-    #     )
-    #     # Verify shape matches original
-    #     assert pred_resized.shape == (orig_h, orig_w), \
-    #         f"Shape mismatch! Got {pred_resized.shape}, expected ({orig_h}, {orig_w})"
     #     # Normalize to 0-255
-    #     pred_normalized = (
-    #         (pred_resized - pred_resized.min()) /
-    #         (pred_resized.max() - pred_resized.min() + 1e-8) * 255
-    #     )
-    #     # Binary mask
     #     binary_mask = (pred_normalized > (threshold * 255)).astype(np.uint8) * 255
-    #     # Morphological cleanup
     #     kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
     #     binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
     #     binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel)
-    #     # ✅ Verify final mask dimensions match input
-    #     assert binary_mask.shape == (orig_h, orig_w), \
-    #         f"Final mask mismatch! Got {binary_mask.shape}, expected ({orig_h}, {orig_w})"
-    #     logger.info(f"Input shape: ({orig_h}, {orig_w}) | Output mask shape: {binary_mask.shape} ✅")
     #     # Create RGBA if needed
     #     rgba_image = None
     #     if return_type in ["rgba", "both"]:
     #         rgba = np.dstack([image_rgb, binary_mask])
     #         rgba_image = Image.fromarray(rgba, mode='RGBA')
-    #         # ✅ Verify RGBA dimensions
-    #         assert rgba_image.size == (orig_w, orig_h), \
-    #             f"RGBA size mismatch! Got {rgba_image.size}, expected ({orig_w}, {orig_h})"
     #     if return_type == "mask":
     #         return binary_mask, None
     #     elif return_type == "rgba":
     #         return None, rgba_image
-    #     else:
     #         return binary_mask, rgba_image
     def batch_segment(
         self,

         except ImportError:
             raise ImportError("RMBG requires: pip install transformers")
     # def segment(
     #     self,
     #     image: np.ndarray,
     #     threshold: float = 0.5,
     #     return_type: Literal["mask", "rgba", "both"] = "mask"
     # ) -> Tuple[Optional[np.ndarray], Optional[Image.Image]]:
+    #     """
+    #     Segment foreground object from image.
+    #     Args:
+    #         image: Input image as numpy array (H, W, 3) in RGB or BGR
+    #         threshold: Threshold for binary mask (0-1)
+    #         return_type: What to return - "mask", "rgba", or "both"
+    #     Returns:
+    #         Tuple of (binary_mask, rgba_image) based on return_type
+    #     """
+    #     # Convert BGR to RGB if needed
     #     if len(image.shape) == 3 and image.shape[2] == 3:
+    #         if image[0, 0, 0] != image[0, 0, 2]:  # Simple heuristic
+    #             image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    #         else:
+    #             image_rgb = image
     #     else:
     #         raise ValueError("Input must be a color image (H, W, 3)")
+    #     # Convert to PIL
     #     image_pil = Image.fromarray(image_rgb)
+    #     original_size = image_pil.size
+    #     # Transform
     #     input_tensor = self.transform(image_pil).unsqueeze(0).to(DEVICE)
     #     if DEVICE == "cpu":
     #         input_tensor = input_tensor.float()
     #     # Inference
     #     with torch.no_grad():
     #         if self.model_type == "u2netp":
     #             outputs = self.model(input_tensor)
+    #             pred = outputs[0]  # Main output
     #         else:  # birefnet or rmbg
     #             pred = self.model(input_tensor)[-1].sigmoid()
+    #     # Post-process
     #     pred = pred.squeeze().cpu().numpy()
+    #     # Resize to original
+    #     pred_resized = cv2.resize(pred, original_size, interpolation=cv2.INTER_LINEAR)
     #     # Normalize to 0-255
+    #     pred_normalized = ((pred_resized - pred_resized.min()) /
+    #                       (pred_resized.max() - pred_resized.min() + 1e-8) * 255)
+    #     # Create binary mask
     #     binary_mask = (pred_normalized > (threshold * 255)).astype(np.uint8) * 255
+    #     # Optional: Morphological operations for cleaner mask
     #     kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
     #     binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
     #     binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel)
     #     # Create RGBA if needed
     #     rgba_image = None
     #     if return_type in ["rgba", "both"]:
+    #         # Create 4-channel image
     #         rgba = np.dstack([image_rgb, binary_mask])
     #         rgba_image = Image.fromarray(rgba, mode='RGBA')
+    #     # Return based on type
     #     if return_type == "mask":
     #         return binary_mask, None
     #     elif return_type == "rgba":
     #         return None, rgba_image
+    #     else:  # both
     #         return binary_mask, rgba_image
+    def segment(
+        self,
+        image: np.ndarray,
+        threshold: float = 0.5,
+        return_type: Literal["mask", "rgba", "both"] = "mask"
+    ) -> Tuple[Optional[np.ndarray], Optional[Image.Image]]:
+        # Convert BGR to RGB
+        if len(image.shape) == 3 and image.shape[2] == 3:
+            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        else:
+            raise ValueError("Input must be a color image (H, W, 3)")
+        # Store ORIGINAL dimensions (H, W) from numpy
+        orig_h, orig_w = image.shape[:2]
+        # Convert to PIL for transforms
+        image_pil = Image.fromarray(image_rgb)
+        # Transform (model resizes internally e.g. 320x320 / 512x512)
+        input_tensor = self.transform(image_pil).unsqueeze(0).to(DEVICE)
+        if DEVICE == "cpu":
+            input_tensor = input_tensor.float()
+        # Inference
+        with torch.no_grad():
+            if self.model_type == "u2netp":
+                outputs = self.model(input_tensor)
+                pred = outputs[0]
+            else:  # birefnet or rmbg
+                pred = self.model(input_tensor)[-1].sigmoid()
+        # Post-process - squeeze to 2D
+        pred = pred.squeeze().cpu().numpy()
+        # ✅ FIX: Resize back to ORIGINAL (width, height) for cv2
+        # cv2.resize takes (width, height) = (orig_w, orig_h)
+        pred_resized = cv2.resize(
+            pred,
+            (orig_w, orig_h),          # ← correct order for cv2
+            interpolation=cv2.INTER_LINEAR
+        )
+        # Verify shape matches original
+        assert pred_resized.shape == (orig_h, orig_w), \
+            f"Shape mismatch! Got {pred_resized.shape}, expected ({orig_h}, {orig_w})"
+        # Normalize to 0-255
+        pred_normalized = (
+            (pred_resized - pred_resized.min()) /
+            (pred_resized.max() - pred_resized.min() + 1e-8) * 255
+        )
+        # Binary mask
+        binary_mask = (pred_normalized > (threshold * 255)).astype(np.uint8) * 255
+        # Morphological cleanup
+        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+        binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
+        binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel)
+        # ✅ Verify final mask dimensions match input
+        assert binary_mask.shape == (orig_h, orig_w), \
+            f"Final mask mismatch! Got {binary_mask.shape}, expected ({orig_h}, {orig_w})"
+        logger.info(f"Input shape: ({orig_h}, {orig_w}) | Output mask shape: {binary_mask.shape} ✅")
+        # Create RGBA if needed
+        rgba_image = None
+        if return_type in ["rgba", "both"]:
+            rgba = np.dstack([image_rgb, binary_mask])
+            rgba_image = Image.fromarray(rgba, mode='RGBA')
+            # ✅ Verify RGBA dimensions
+            assert rgba_image.size == (orig_w, orig_h), \
+                f"RGBA size mismatch! Got {rgba_image.size}, expected ({orig_w}, {orig_h})"
+        if return_type == "mask":
+            return binary_mask, None
+        elif return_type == "rgba":
+            return None, rgba_image
+        else:
+            return binary_mask, rgba_image
     def batch_segment(
         self,