Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 26, 2025

Commit

6fdc616

1 Parent(s): 0d03497

Create utils/segmentation.py

Browse files

Files changed (1) hide show

utils/segmentation.py +293 -0

utils/segmentation.py ADDED Viewed

	@@ -0,0 +1,293 @@

+#!/usr/bin/env python3
+"""
+utils.segmentation
+─────────────────────────────────────────────────────────────────────────────
+All high-quality person-segmentation code for BackgroundFX Pro.
+Exports
+-------
+segment_person_hq(image, predictor, fallback_enabled=True) → np.ndarray
+segment_person_hq_original(image, predictor, fallback_enabled=True) → np.ndarray
+Everything else is prefixed “_” and considered private.
+"""
+from __future__ import annotations
+from typing import Any, Tuple, Optional, Dict
+import logging, os, math
+import cv2
+import numpy as np
+import torch
+log = logging.getLogger(__name__)
+# ============================================================================
+# TUNABLE CONSTANTS
+# ============================================================================
+USE_ENHANCED_SEGMENTATION   = True
+USE_INTELLIGENT_PROMPTING   = True
+USE_ITERATIVE_REFINEMENT    = True
+MIN_AREA_RATIO = 0.015
+MAX_AREA_RATIO = 0.97
+SALIENCY_THRESH = 0.65
+GRABCUT_ITERS   = 3
+# ----------------------------------------------------------------------------
+# Public -- main entry-points
+# ----------------------------------------------------------------------------
+__all__ = [
+    "segment_person_hq",
+    "segment_person_hq_original",
+]
+# ============================================================================
+# MAIN API
+# ============================================================================
+def segment_person_hq(image: np.ndarray, predictor: Any, fallback_enabled: bool = True) -> np.ndarray:
+    """
+    High-quality person segmentation.  Tries SAM-2 with smart prompts first,
+    then a classical CV cascade, then a geometric fallback.
+    Returns uint8 mask (0/255).  Never raises if fallback_enabled=True.
+    """
+    if not USE_ENHANCED_SEGMENTATION:
+        return segment_person_hq_original(image, predictor, fallback_enabled)
+    if image is None or image.size == 0:
+        raise ValueError("Invalid input image")
+    # 1) — SAM-2 path -------------------------------------------------------
+    if predictor and hasattr(predictor, "set_image") and hasattr(predictor, "predict"):
+        try:
+            predictor.set_image(image)
+            mask = (
+                _segment_with_intelligent_prompts(image, predictor)
+                if USE_INTELLIGENT_PROMPTING
+                else _segment_with_basic_prompts(image, predictor)
+            )
+            if USE_ITERATIVE_REFINEMENT:
+                mask = _auto_refine_mask_iteratively(image, mask, predictor)
+            if _validate_mask_quality(mask, image.shape[:2]):
+                return mask
+            log.warning("SAM2 mask failed validation → fallback")
+        except Exception as e:
+            log.warning(f"SAM2 path failed: {e}")
+    # 2) — Classical cascade ----------------------------------------------
+    try:
+        mask = _classical_segmentation_cascade(image)
+        if _validate_mask_quality(mask, image.shape[:2]):
+            return mask
+        log.warning("Classical cascade weak → geometric fallback")
+    except Exception as e:
+        log.debug(f"Classical cascade error: {e}")
+    # 3) — Last-chance geometric ellipse ----------------------------------
+    return _geometric_person_mask(image)
+def segment_person_hq_original(image: np.ndarray, predictor: Any, fallback_enabled: bool = True) -> np.ndarray:
+    """
+    Very first implementation kept for rollback.  Fewer smarts, still robust.
+    """
+    if image is None or image.size == 0:
+        raise ValueError("Invalid input image")
+    try:
+        if predictor and hasattr(predictor, "set_image") and hasattr(predictor, "predict"):
+            h, w = image.shape[:2]
+            predictor.set_image(image)
+            points = np.array([
+                [w//2, h//4],
+                [w//2, h//2],
+                [w//2, 3*h//4],
+                [w//3, h//2],
+                [2*w//3, h//2],
+            ], dtype=np.float32)
+            labels = np.ones(len(points), np.int32)
+            with torch.no_grad():
+                masks, scores, _ = predictor.predict(
+                    point_coords=points,
+                    point_labels=labels,
+                    multimask_output=True,
+                )
+            if masks is not None and len(masks):
+                mask = _process_mask(masks[int(np.argmax(scores))])
+                if _validate_mask_quality(mask, image.shape[:2]):
+                    return mask
+        if fallback_enabled:
+            return _classical_segmentation_cascade(image)
+        raise RuntimeError("SAM2 failed and fallback disabled")
+    except Exception as e:
+        log.warning(f"segment_person_hq_original error: {e}")
+        return _classical_segmentation_cascade(image)
+# ============================================================================
+# INTELLIGENT + BASIC PROMPTING
+# ============================================================================
+def _segment_with_intelligent_prompts(image: np.ndarray, predictor: Any) -> np.ndarray:
+    pos, neg = _generate_smart_prompts(image)
+    return _sam2_predict(image, predictor, pos, neg)
+def _segment_with_basic_prompts(image: np.ndarray, predictor: Any) -> np.ndarray:
+    h, w = image.shape[:2]
+    pos = np.array([[w//2, h//3], [w//2, h//2], [w//2, 2*h//3]], np.float32)
+    neg = np.array([[10, 10], [w-10, 10], [10, h-10], [w-10, h-10]], np.float32)
+    return _sam2_predict(image, predictor, pos, neg)
+def _sam2_predict(image: np.ndarray, predictor: Any,
+                  pos_points: np.ndarray, neg_points: np.ndarray) -> np.ndarray:
+    if pos_points.size == 0:
+        pos_points = np.array([[image.shape[1]//2, image.shape[0]//2]], np.float32)
+    points = np.vstack([pos_points, neg_points])
+    labels = np.hstack([np.ones(len(pos_points)), np.zeros(len(neg_points))]).astype(np.int32)
+    with torch.no_grad():
+        masks, scores, _ = predictor.predict(
+            point_coords=points,
+            point_labels=labels,
+            multimask_output=True,
+        )
+    if masks is None or len(masks) == 0:
+        raise RuntimeError("SAM2 produced no masks")
+    best = masks[int(np.argmax(scores))] if scores is not None else masks[0]
+    return _process_mask(best)
+def _generate_smart_prompts(image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Simple saliency-based heuristic to auto-place positive / negative points.
+    """
+    h, w = image.shape[:2]
+    sal = _compute_saliency(image)
+    pos, neg = [], []
+    if sal is not None:
+        high = sal > (SALIENCY_THRESH - .1)
+        contours, _ = cv2.findContours((high*255).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        for c in sorted(contours, key=cv2.contourArea, reverse=True)[:3]:
+            M = cv2.moments(c)
+            if M["m00"]:
+                pos.append([int(M["m10"]/M["m00"]), int(M["m01"]/M["m00"])])
+    if not pos:
+        pos = [[w//2, h//2]]
+    neg = [[10, 10], [w-10, 10], [10, h-10], [w-10, h-10]]
+    return np.asarray(pos, np.float32), np.asarray(neg, np.float32)
+# ============================================================================
+# CLASSICAL SEGMENTATION CASCADE
+# ============================================================================
+def _classical_segmentation_cascade(image: np.ndarray) -> np.ndarray:
+    """
+    Edge-median background subtraction → saliency flood-fill → GrabCut.
+    """
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    edge_px = np.concatenate([gray[0], gray[-1], gray[:, 0], gray[:, -1]])
+    diff = np.abs(gray.astype(float) - np.median(edge_px))
+    mask = (diff > 30).astype(np.uint8) * 255
+    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE,
+                            cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7)))
+    if _validate_mask_quality(mask, image.shape[:2]):
+        return mask
+    # Saliency + flood-fill
+    mask = _refine_with_saliency(image, mask)
+    if _validate_mask_quality(mask, image.shape[:2]):
+        return mask
+    # GrabCut
+    mask = _refine_with_grabcut(image, mask)
+    if _validate_mask_quality(mask, image.shape[:2]):
+        return mask
+    # Geometric fallback
+    return _geometric_person_mask(image)
+#  Saliency, GrabCut helpers --------------------------------------------------
+def _compute_saliency(image: np.ndarray) -> Optional[np.ndarray]:
+    try:
+        if hasattr(cv2, "saliency"):
+            s = cv2.saliency.StaticSaliencySpectralResidual_create()
+            ok, smap = s.computeSaliency(image)
+            if ok:
+                smap = (smap - smap.min()) / max(1e-6, smap.max()-smap.min())
+                return smap
+    except Exception:
+        pass
+    return None
+def _auto_person_rect(image):
+    sal = _compute_saliency(image)
+    if sal is None:
+        return None
+    m = (sal > SALIENCY_THRESH).astype(np.uint8)
+    cnts, _ = cv2.findContours(m*255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if not cnts:
+        return None
+    x,y,w,h = cv2.boundingRect(max(cnts, key=cv2.contourArea))
+    H,W = image.shape[:2]
+    pad = 0.05
+    x = max(0, int(x-W*pad)); y = max(0, int(y-H*pad))
+    w = min(W-x, int(w*(1+2*pad))); h = min(H-y, int(h*(1+2*pad)))
+    return x,y,w,h
+def _refine_with_grabcut(image: np.ndarray, seed: np.ndarray) -> np.ndarray:
+    h,w = image.shape[:2]
+    gc = np.full((h,w), cv2.GC_PR_BGD, np.uint8)
+    gc[seed>200] = cv2.GC_FGD
+    rect = _auto_person_rect(image) or (w//4, h//6, w//2, int(h*0.7))
+    bgd, fgd = np.zeros((1,65), np.float64), np.zeros((1,65), np.float64)
+    cv2.grabCut(image, gc, rect, bgd, fgd, GRABCUT_ITERS, cv2.GC_INIT_WITH_MASK)
+    return np.where((gc==cv2.GC_FGD)|(gc==cv2.GC_PR_FGD), 255, 0).astype(np.uint8)
+def _refine_with_saliency(image: np.ndarray, seed: np.ndarray) -> np.ndarray:
+    sal = _compute_saliency(image)
+    if sal is None:
+        return seed
+    high = (sal > SALIENCY_THRESH).astype(np.uint8)*255
+    ys,xs = np.where(seed>127)
+    cy,cx = int(np.mean(ys)) if len(ys) else image.shape[0]//2, int(np.mean(xs)) if len(xs) else image.shape[1]//2
+    ff = high.copy()
+    cv2.floodFill(ff, None, (cx,cy), 255, loDiff=5, upDiff=5)
+    return ff
+# ============================================================================
+# QUALITY / HELPER FUNCTIONS
+# ============================================================================
+def _validate_mask_quality(mask: np.ndarray, shape: Tuple[int,int]) -> bool:
+    h,w = shape
+    ratio = np.sum(mask>127)/(h*w)
+    return MIN_AREA_RATIO <= ratio <= MAX_AREA_RATIO
+def _process_mask(mask: np.ndarray) -> np.ndarray:
+    if mask.dtype in (np.float32, np.float64):
+        if mask.max() <= 1.0:
+            mask = (mask*255).astype(np.uint8)
+    if mask.dtype != np.uint8:
+        mask = mask.astype(np.uint8)
+    if mask.ndim == 3:
+        mask = mask.squeeze()
+        if mask.ndim == 3:            # multi-channel mask → collapse
+            mask = mask[:,:,0]
+    _,mask = cv2.threshold(mask,127,255,cv2.THRESH_BINARY)
+    return mask
+def _geometric_person_mask(image: np.ndarray) -> np.ndarray:
+    h,w = image.shape[:2]
+    mask = np.zeros((h,w), np.uint8)
+    cv2.ellipse(mask, (w//2,h//2), (w//3,int(h/2.5)), 0, 0,360, 255,-1)
+    return mask
+# ============================================================================
+# OPTIONAL: Iterative auto-refinement (lightweight)
+# ============================================================================
+def _auto_refine_mask_iteratively(image, mask, predictor, max_iterations=1):
+    # Simple one-pass hook (full version lives in refinement.py)
+    return mask