Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 15, 2025

Commit

6bc7492

1 Parent(s): b1313ed

Update app.py

Browse files

Files changed (1) hide show

app.py +147 -22

app.py CHANGED Viewed

@@ -58,11 +58,12 @@ def setup_gpu():
 logger.info(f"Device: {DEVICE} | GPU: {GPU_NAME} | Memory: {GPU_MEMORY:.1f}GB | Type: {GPU_TYPE}")
-# SAM2 Lazy Loader with Enhanced Performance
-class SAM2EnhancedLazy:
     def __init__(self):
         self.predictor = None
         self.current_model_size = None
         self.model_cache_dir = Path(tempfile.gettempdir()) / "sam2_cache"
         self.model_cache_dir.mkdir(exist_ok=True)
@@ -99,10 +100,114 @@ def clear_model(self):
             self.predictor = None
             self.current_model_size = None
         if CUDA_AVAILABLE:
             torch.cuda.empty_cache()
         gc.collect()
-        logger.info("SAM2 model cleared from memory")
     def download_model(self, model_size, progress_fn=None):
         """Download model with progress tracking and verification"""
@@ -128,7 +233,7 @@ def download_model(self, model_size, progress_fn=None):
                         downloaded += len(chunk)
                         if progress_fn and total_size > 0:
                             progress = downloaded / total_size * 0.15  # 15% of total progress
-                            progress_fn(progress, f"Downloading SAM2 {model_size} ({downloaded/1024/1024:.1f}MB/{total_size/1024/1024:.1f}MB)")
             logger.info(f"SAM2 {model_size} downloaded successfully")
             return model_path
@@ -142,6 +247,9 @@ def download_model(self, model_size, progress_fn=None):
     def load_model(self, model_size, progress_fn=None):
         """Load SAM2 model with optimization"""
         try:
             # Import SAM2 (lazy import to avoid import errors if not available)
             try:
                 from sam2.build_sam import build_sam2
@@ -153,7 +261,7 @@ def load_model(self, model_size, progress_fn=None):
             model_path = self.download_model(model_size, progress_fn)
             if progress_fn:
-                progress_fn(0.2, f"Loading SAM2 {model_size} model...")
             # Build model
             model_config = self.models[model_size]["config"]
@@ -168,9 +276,9 @@ def load_model(self, model_size, progress_fn=None):
             self.current_model_size = model_size
             if progress_fn:
-                progress_fn(0.25, f"SAM2 {model_size} loaded successfully!")
-            logger.info(f"SAM2 {model_size} model loaded and ready")
             return self.predictor
         except Exception as e:
@@ -185,26 +293,35 @@ def get_predictor(self, model_size="small", progress_fn=None):
             return self.load_model(model_size, progress_fn)
         return self.predictor
-    def segment_image(self, image, model_size="small", progress_fn=None):
-        """Segment image with SAM2"""
         predictor = self.get_predictor(model_size, progress_fn)
         try:
             predictor.set_image(image)
-            h, w = image.shape[:2]
-            # Smart point selection for better segmentation
-            center_points = [
-                [w//2, h//2],      # Center
-                [w//2, h//3],      # Upper center
-                [w//2, 2*h//3],    # Lower center
-                [w//3, h//2],      # Left center
-                [2*w//3, h//2]     # Right center
-            ]
-            point_coords = np.array(center_points)
             point_labels = np.ones(len(point_coords))
             masks, scores, logits = predictor.predict(
                 point_coords=point_coords,
                 point_labels=point_labels,
@@ -216,15 +333,23 @@ def segment_image(self, image, model_size="small", progress_fn=None):
             best_mask = masks[best_mask_idx]
             best_score = scores[best_mask_idx]
-            # Post-process mask for better edges
             kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
             best_mask = cv2.morphologyEx(best_mask.astype(np.uint8), cv2.MORPH_CLOSE, kernel)
             best_mask = cv2.GaussianBlur(best_mask.astype(np.float32), (3, 3), 1.0)
             return best_mask, float(best_score)
         except Exception as e:
-            logger.error(f"Segmentation failed: {e}")
             return None, 0.0
 # MatAnyone Professional Video Matting

 logger.info(f"Device: {DEVICE} | GPU: {GPU_NAME} | Memory: {GPU_MEMORY:.1f}GB | Type: {GPU_TYPE}")
+# Enhanced SAM2 with Person Detection and Tracking
+class SAM2WithPersonDetection:
     def __init__(self):
         self.predictor = None
         self.current_model_size = None
+        self.person_detector = None
         self.model_cache_dir = Path(tempfile.gettempdir()) / "sam2_cache"
         self.model_cache_dir.mkdir(exist_ok=True)
             self.predictor = None
             self.current_model_size = None
+        if self.person_detector:
+            del self.person_detector
+            self.person_detector = None
         if CUDA_AVAILABLE:
             torch.cuda.empty_cache()
         gc.collect()
+        logger.info("SAM2 model and person detector cleared from memory")
+    def load_person_detector(self, progress_fn=None):
+        """Load lightweight person detector"""
+        if self.person_detector is not None:
+            return self.person_detector
+        try:
+            if progress_fn:
+                progress_fn(0.05, "Loading person detector...")
+            # Use OpenCV DNN with MobileNet for fast person detection
+            import cv2
+            # Create a simple person detector using OpenCV's built-in methods
+            # This is lightweight and doesn't require additional models
+            self.person_detector = cv2.createBackgroundSubtractorMOG2(detectShadows=True)
+            if progress_fn:
+                progress_fn(0.1, "Person detector loaded!")
+            logger.info("Person detector loaded successfully")
+            return self.person_detector
+        except Exception as e:
+            logger.warning(f"Failed to load person detector: {e}")
+            self.person_detector = None
+            return None
+    def detect_person_bbox(self, image, progress_fn=None):
+        """Detect person bounding box in image"""
+        try:
+            # Method 1: Use simple contour detection for person-like shapes
+            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+            # Apply GaussianBlur to reduce noise
+            blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+            # Use edge detection to find contours
+            edges = cv2.Canny(blurred, 50, 150)
+            # Find contours
+            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            if not contours:
+                return None
+            # Find the largest contour (likely the main subject)
+            largest_contour = max(contours, key=cv2.contourArea)
+            # Get bounding box of largest contour
+            x, y, w, h = cv2.boundingRect(largest_contour)
+            # Filter out too small or too large bounding boxes
+            image_area = image.shape[0] * image.shape[1]
+            bbox_area = w * h
+            # Person should be 5-80% of image
+            if bbox_area < image_area * 0.05 or bbox_area > image_area * 0.8:
+                return None
+            # Ensure reasonable aspect ratio for person (height > width)
+            if h < w * 0.8:  # Person should be taller than wide
+                return None
+            return [x, y, x + w, y + h]
+        except Exception as e:
+            logger.warning(f"Person detection failed: {e}")
+            return None
+    def get_smart_points_from_bbox(self, bbox, image_shape):
+        """Generate smart points within person bounding box"""
+        if bbox is None:
+            # Fallback to grid points across entire image
+            h, w = image_shape[:2]
+            return [
+                [w//4, h//3], [w//2, h//3], [3*w//4, h//3],
+                [w//4, h//2], [w//2, h//2], [3*w//4, h//2],
+                [w//4, 2*h//3], [w//2, 2*h//3], [3*w//4, 2*h//3]
+            ]
+        x1, y1, x2, y2 = bbox
+        center_x = (x1 + x2) // 2
+        center_y = (y1 + y2) // 2
+        width = x2 - x1
+        height = y2 - y1
+        # Generate points within the person's bounding box
+        points = [
+            [center_x, center_y],                           # Center of person
+            [center_x, y1 + height//4],                     # Upper torso/head
+            [center_x, y1 + height//2],                     # Mid torso
+            [center_x, y1 + 3*height//4],                   # Lower torso
+            [x1 + width//4, center_y],                      # Left side
+            [x2 - width//4, center_y],                      # Right side
+            [center_x - width//6, y1 + height//3],          # Left shoulder area
+            [center_x + width//6, y1 + height//3],          # Right shoulder area
+        ]
+        return points
     def download_model(self, model_size, progress_fn=None):
         """Download model with progress tracking and verification"""
                         downloaded += len(chunk)
                         if progress_fn and total_size > 0:
                             progress = downloaded / total_size * 0.15  # 15% of total progress
+                            progress_fn(0.1 + progress, f"Downloading SAM2 {model_size} ({downloaded/1024/1024:.1f}MB/{total_size/1024/1024:.1f}MB)")
             logger.info(f"SAM2 {model_size} downloaded successfully")
             return model_path
     def load_model(self, model_size, progress_fn=None):
         """Load SAM2 model with optimization"""
         try:
+            # Load person detector first
+            self.load_person_detector(progress_fn)
             # Import SAM2 (lazy import to avoid import errors if not available)
             try:
                 from sam2.build_sam import build_sam2
             model_path = self.download_model(model_size, progress_fn)
             if progress_fn:
+                progress_fn(0.25, f"Loading SAM2 {model_size} model...")
             # Build model
             model_config = self.models[model_size]["config"]
             self.current_model_size = model_size
             if progress_fn:
+                progress_fn(0.3, f"SAM2 {model_size} with person detection ready!")
+            logger.info(f"SAM2 {model_size} model with person detection loaded and ready")
             return self.predictor
         except Exception as e:
             return self.load_model(model_size, progress_fn)
         return self.predictor
+    def segment_image_smart(self, image, model_size="small", progress_fn=None):
+        """Smart segmentation: Find person first, then segment"""
         predictor = self.get_predictor(model_size, progress_fn)
         try:
+            if progress_fn:
+                progress_fn(0.32, "Finding person in image...")
+            # Step 1: Detect person bounding box
+            person_bbox = self.detect_person_bbox(image, progress_fn)
+            if progress_fn:
+                if person_bbox:
+                    progress_fn(0.35, f"Person found! Segmenting with high precision...")
+                else:
+                    progress_fn(0.35, f"Using grid search for segmentation...")
+            # Step 2: Generate smart points based on person location
+            smart_points = self.get_smart_points_from_bbox(person_bbox, image.shape)
+            # Step 3: Set image and predict with smart points
             predictor.set_image(image)
+            point_coords = np.array(smart_points)
             point_labels = np.ones(len(point_coords))
+            if progress_fn:
+                progress_fn(0.38, f"SAM2 segmenting with {len(smart_points)} smart points...")
             masks, scores, logits = predictor.predict(
                 point_coords=point_coords,
                 point_labels=point_labels,
             best_mask = masks[best_mask_idx]
             best_score = scores[best_mask_idx]
+            # Enhanced post-processing for better edges
             kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
             best_mask = cv2.morphologyEx(best_mask.astype(np.uint8), cv2.MORPH_CLOSE, kernel)
+            # Apply gentle blur for smoother edges
             best_mask = cv2.GaussianBlur(best_mask.astype(np.float32), (3, 3), 1.0)
+            # If we found a person bbox, boost confidence
+            if person_bbox and best_score > 0.3:
+                best_score = min(best_score * 1.5, 1.0)  # Boost confidence
+            logger.info(f"Smart segmentation complete: confidence={best_score:.3f}, person_detected={person_bbox is not None}")
             return best_mask, float(best_score)
         except Exception as e:
+            logger.error(f"Smart segmentation failed: {e}")
             return None, 0.0
 # MatAnyone Professional Video Matting