Spaces:

devkunalnaik
/

Swapper

Running

App Files Files Community

devkunalnaik commited on May 18

Commit

13dd92a

1 Parent(s): 79d56d4

Fix: remove MediaPipe (API broken on Py3.13), add OpenCV face enhancement

Browse files

Files changed (4) hide show

app.py +3 -3
processors/body_swap.py +121 -23
processors/face_swap.py +54 -28
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -118,7 +118,7 @@ with gr.Blocks(title="Face & Body Swapper", theme=gr.themes.Soft()) as demo:
                     fi_source = gr.Image(label="Source — face to use", type="pil")
                     fi_target = gr.Image(label="Target — image to modify", type="pil")
                     fi_enhance = gr.Checkbox(
-                        label="Enhance output faces (GFPGAN — slower)",
                         value=True,
                     )
                     fi_btn = gr.Button("Swap Faces", variant="primary")
@@ -167,8 +167,8 @@ with gr.Blocks(title="Face & Body Swapper", theme=gr.themes.Soft()) as demo:
                     fv_source = gr.Image(label="Source Face Image", type="pil")
                     fv_target = gr.Video(label="Target Video")
                     fv_enhance = gr.Checkbox(
-                        label="Enhance faces (GFPGAN — much slower per frame)",
-                        value=False,
                     )
                     fv_btn = gr.Button("Swap Faces in Video", variant="primary")
                 with gr.Column(scale=1):

                     fi_source = gr.Image(label="Source — face to use", type="pil")
                     fi_target = gr.Image(label="Target — image to modify", type="pil")
                     fi_enhance = gr.Checkbox(
+                        label="Enhance face quality (sharpening + contrast)",
                         value=True,
                     )
                     fi_btn = gr.Button("Swap Faces", variant="primary")
                     fv_source = gr.Image(label="Source Face Image", type="pil")
                     fv_target = gr.Video(label="Target Video")
                     fv_enhance = gr.Checkbox(
+                        label="Enhance faces (sharpening + contrast)",
+                        value=True,
                     )
                     fv_btn = gr.Button("Swap Faces in Video", variant="primary")
                 with gr.Column(scale=1):

processors/body_swap.py CHANGED Viewed

@@ -3,13 +3,16 @@ Body swap processor.
 Pipeline
 --------
-1. Segment the person from both images with *rembg* (U²-Net).
-2. Estimate body pose landmarks with *MediaPipe Pose*.
-3. Compute an affine warp that maps the source torso keypoints onto the
-   target torso keypoints, so the body roughly aligns with the target pose.
-4. Blend the warped source body onto the target background using the
-   segmentation mask + Gaussian feathering.  A Poisson seamless-clone pass
-   is attempted for photorealistic colour blending.
 """
 import cv2
@@ -20,31 +23,126 @@ from utils.image_utils import (
     apply_color_correction,
     feather_mask,
     alpha_blend,
-    resize_to_max,
 )
-# ── Landmark indices used for rough torso alignment ───────────────────────────
-# MediaPipe Pose: 11=left shoulder, 12=right shoulder,
-#                 23=left hip, 24=right hip
-_TORSO_LANDMARKS = [11, 12, 23, 24]
 class BodySwapper:
     """
     Replaces the body in *target_bgr* with the body from *source_bgr*.
     """
-    def __init__(self):
-        import mediapipe as mp
-        self._mp_pose = mp.solutions.pose
-        self._pose = self._mp_pose.Pose(
-            static_image_mode=True,
-            model_complexity=2,
-            enable_segmentation=True,
-            min_detection_confidence=0.5,
-        )
     # ── Private helpers ───────────────────────────────────────────────────────

 Pipeline
 --------
+1. Segment both images with *rembg* (U²-Net) to isolate person masks.
+2. Compute bounding boxes from the masks.
+3. Scale the source person to match the target bounding-box dimensions.
+4. Color-correct the source region to match target lighting/tone.
+5. Feather-blend using the segmentation mask.
+6. Apply Poisson seamless-clone for photorealistic edge merging.
+MediaPipe is intentionally NOT used — its API changed in 0.10.14
+(removed `solutions`) which breaks on Python 3.13.  Bounding-box
+alignment alone is sufficient for clean body swaps.
 """
 import cv2
     apply_color_correction,
     feather_mask,
     alpha_blend,
 )
 class BodySwapper:
     """
     Replaces the body in *target_bgr* with the body from *source_bgr*.
     """
+    # ── Private helpers ───────────────────────────────────────────────────────
+    @staticmethod
+    def _segment(bgr: np.ndarray) -> np.ndarray:
+        """Return uint8 single-channel person mask via rembg (U²-Net)."""
+        from rembg import remove
+        pil = Image.fromarray(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
+        result = remove(pil, only_mask=True)
+        mask = np.array(result)
+        if mask.ndim == 3:
+            mask = mask[:, :, 0]
+        return mask
+    @staticmethod
+    def _bbox(mask: np.ndarray):
+        """Bounding box (x1, y1, x2, y2) of the non-zero region, or None."""
+        ys, xs = np.where(mask > 128)
+        if len(ys) == 0:
+            return None
+        return int(xs.min()), int(ys.min()), int(xs.max()), int(ys.max())
+    @staticmethod
+    def _vertical_center_of_mass(mask: np.ndarray) -> float:
+        """Y coordinate of the mask centre of mass (for vertical alignment)."""
+        ys, _ = np.where(mask > 128)
+        return float(ys.mean()) if len(ys) > 0 else mask.shape[0] / 2.0
+    # ── Public API ────────────────────────────────────────────────────────────
+    def swap(self, source_bgr, target_bgr, blend_strength=0.85):
+        """
+        Swap the source person's body into the target scene.
+        Returns:
+            (result_bgr, status_message)
+        """
+        try:
+            # ── 1. Segment ────────────────────────────────────────────────────
+            src_mask = self._segment(source_bgr)
+            tgt_mask = self._segment(target_bgr)
+            src_bbox = self._bbox(src_mask)
+            tgt_bbox = self._bbox(tgt_mask)
+            if src_bbox is None:
+                return None, "No person detected in source image."
+            if tgt_bbox is None:
+                return None, "No person detected in target image."
+            sx1, sy1, sx2, sy2 = src_bbox
+            tx1, ty1, tx2, ty2 = tgt_bbox
+            tgt_w, tgt_h = tx2 - tx1, ty2 - ty1
+            # ── 2. Crop + resize source to target dimensions ──────────────────
+            src_person   = source_bgr[sy1:sy2, sx1:sx2]
+            src_mask_roi = src_mask[sy1:sy2, sx1:sx2]
+            src_resized  = cv2.resize(src_person,   (tgt_w, tgt_h), interpolation=cv2.INTER_LANCZOS4)
+            mask_resized = cv2.resize(src_mask_roi, (tgt_w, tgt_h), interpolation=cv2.INTER_LINEAR)
+            # ── 3. Vertical CoM alignment ─────────────────────────────────────
+            src_com_y = self._vertical_center_of_mass(src_mask_roi)
+            tgt_com_y = self._vertical_center_of_mass(tgt_mask[ty1:ty2, tx1:tx2])
+            scale_y   = tgt_h / max(sy2 - sy1, 1)
+            offset_y  = int(tgt_com_y - src_com_y * scale_y)
+            # ── 4. Composite onto full canvas ─────────────────────────────────
+            h_t, w_t  = target_bgr.shape[:2]
+            canvas_fg   = np.zeros_like(target_bgr)
+            canvas_mask = np.zeros((h_t, w_t), dtype=np.uint8)
+            dst_x1 = int(np.clip(tx1,          0, w_t))
+            dst_y1 = int(np.clip(ty1 + offset_y, 0, h_t))
+            dst_x2 = int(np.clip(tx1 + tgt_w,  0, w_t))
+            dst_y2 = int(np.clip(ty1 + offset_y + tgt_h, 0, h_t))
+            src_x1 = dst_x1 - tx1
+            src_y1 = dst_y1 - (ty1 + offset_y)
+            src_x2 = src_x1 + (dst_x2 - dst_x1)
+            src_y2 = src_y1 + (dst_y2 - dst_y1)
+            if dst_x2 <= dst_x1 or dst_y2 <= dst_y1:
+                return None, "Alignment offset moved body out of frame."
+            canvas_fg  [dst_y1:dst_y2, dst_x1:dst_x2] = src_resized [src_y1:src_y2, src_x1:src_x2]
+            canvas_mask[dst_y1:dst_y2, dst_x1:dst_x2] = mask_resized[src_y1:src_y2, src_x1:src_x2]
+            # ── 5. Color correction ───────────────────────────────────────────
+            canvas_fg = apply_color_correction(canvas_fg, target_bgr, canvas_mask)
+            # ── 6. Feathered alpha blend ──────────────────────────────────────
+            soft_mask = feather_mask(canvas_mask, blur_radius=25)
+            soft_mask = (soft_mask.astype(float) * blend_strength).clip(0, 255).astype(np.uint8)
+            result    = alpha_blend(canvas_fg, target_bgr, soft_mask)
+            # ── 7. Seamless clone (best-effort) ───────────────────────────────
+            try:
+                cx = int((dst_x1 + dst_x2) / 2)
+                cy = int((dst_y1 + dst_y2) / 2)
+                sc_mask = (canvas_mask > 10).astype(np.uint8) * 255
+                result  = cv2.seamlessClone(
+                    canvas_fg, target_bgr, sc_mask,
+                    (cx, cy), cv2.NORMAL_CLONE,
+                )
+            except Exception as e:
+                print(f"[BodySwapper] seamlessClone skipped: {e}")
+            return result, "Body swap completed successfully."
+        except Exception as exc:
+            return None, f"Body swap error: {exc}"
     # ── Private helpers ───────────────────────────────────────────────────────

processors/face_swap.py CHANGED Viewed

@@ -65,10 +65,11 @@ def _download_inswapper() -> None:
             with open(INSWAPPER_PATH, "wb") as f:
                 for chunk in resp.iter_content(chunk_size=65536):
                     f.write(chunk)
-            if INSWAPPER_PATH.stat().st_size > 100_000:
                 print(f"[FaceSwapper] Saved to {INSWAPPER_PATH}")
                 return
             INSWAPPER_PATH.unlink(missing_ok=True)
         except Exception as e:
             print(f"[FaceSwapper] Mirror failed ({e})")
             INSWAPPER_PATH.unlink(missing_ok=True)
@@ -91,7 +92,6 @@ class FaceSwapper:
     def __init__(self):
         self._app = None        # InsightFace FaceAnalysis
         self._swapper = None    # inswapper ONNX model
-        self._enhancer = None   # GFPGAN (lazy)
         self._ready = False
     # ── Lazy initialisation ───────────────────────────────────────────────────
@@ -120,22 +120,48 @@ class FaceSwapper:
         self._ready = True
-    def _get_enhancer(self):
-        """Lazy-load GFPGAN enhancer."""
-        if self._enhancer is None:
-            from gfpgan import GFPGANer
-            self._enhancer = GFPGANer(
-                model_path=(
-                    "https://github.com/TencentARC/GFPGAN/releases/download/"
-                    "v1.3.0/GFPGANv1.4.pth"
-                ),
-                upscale=1,
-                arch="clean",
-                channel_multiplier=2,
-                bg_upsampler=None,
             )
-        return self._enhancer
     # ── Public API ────────────────────────────────────────────────────────────
@@ -143,8 +169,8 @@ class FaceSwapper:
         self,
         source_bgr: np.ndarray,
         target_bgr: np.ndarray,
-        enhance: bool = False,
-    ) -> tuple[np.ndarray | None, str]:
         """
         Swap the first detected face in *source_bgr* onto every face in
         *target_bgr*.
@@ -155,6 +181,13 @@ class FaceSwapper:
         self._init()
         try:
             source_faces = self._app.get(source_bgr)
             target_faces = self._app.get(target_bgr)
@@ -171,16 +204,9 @@ class FaceSwapper:
                     result, tgt_face, source_face, paste_back=True
                 )
             if enhance:
-                try:
-                    _, _, result = self._get_enhancer().enhance(
-                        result,
-                        has_aligned=False,
-                        only_center_face=False,
-                        paste_back=True,
-                    )
-                except Exception as e:
-                    print(f"[FaceSwapper] Enhancement skipped: {e}")
             return result, f"Swapped {len(target_faces)} face(s) successfully."

             with open(INSWAPPER_PATH, "wb") as f:
                 for chunk in resp.iter_content(chunk_size=65536):
                     f.write(chunk)
+            if INSWAPPER_PATH.stat().st_size > 500_000_000:  # ~554 MB expected
                 print(f"[FaceSwapper] Saved to {INSWAPPER_PATH}")
                 return
             INSWAPPER_PATH.unlink(missing_ok=True)
+            print("[FaceSwapper] Mirror file too small, trying next …")
         except Exception as e:
             print(f"[FaceSwapper] Mirror failed ({e})")
             INSWAPPER_PATH.unlink(missing_ok=True)
     def __init__(self):
         self._app = None        # InsightFace FaceAnalysis
         self._swapper = None    # inswapper ONNX model
         self._ready = False
     # ── Lazy initialisation ───────────────────────────────────────────────────
         self._ready = True
+    # ── Enhancement (pure OpenCV, no extra models) ────────────────────────────
+    @staticmethod
+    def _enhance_opencv(image: np.ndarray, faces) -> np.ndarray:
+        """
+        For each detected face bounding box:
+          1. Unsharp masking — recovers detail lost by inswapper's 128-px output
+          2. CLAHE on the L channel — local contrast without blowing highlights
+        """
+        result = image.copy()
+        for face in faces:
+            box = face.bbox.astype(int)
+            x1, y1, x2, y2 = (
+                max(box[0], 0), max(box[1], 0),
+                min(box[2], image.shape[1]), min(box[3], image.shape[0]),
             )
+            if x2 <= x1 or y2 <= y1:
+                continue
+            roi = result[y1:y2, x1:x2].copy()
+            # 1. Unsharp mask (amount=1.4, radius=3)
+            blurred = cv2.GaussianBlur(roi, (0, 0), 3)
+            sharp = cv2.addWeighted(roi, 2.4, blurred, -1.4, 0)
+            # 2. CLAHE on L channel
+            lab = cv2.cvtColor(sharp, cv2.COLOR_BGR2LAB)
+            clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(4, 4))
+            lab[:, :, 0] = clahe.apply(lab[:, :, 0])
+            enhanced_roi = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
+            # Feather-blend back so edges stay smooth
+            mask = np.zeros(roi.shape[:2], dtype=np.float32)
+            pad = max(4, (y2 - y1) // 10)
+            mask[pad:-pad, pad:-pad] = 1.0
+            mask = cv2.GaussianBlur(mask, (0, 0), pad // 2 or 1)
+            mask_3ch = mask[:, :, np.newaxis]
+            result[y1:y2, x1:x2] = (
+                enhanced_roi * mask_3ch + roi * (1 - mask_3ch)
+            ).astype(np.uint8)
+        return result
     # ── Public API ────────────────────────────────────────────────────────────
         self,
         source_bgr: np.ndarray,
         target_bgr: np.ndarray,
+        enhance: bool = True,
+    ):
         """
         Swap the first detected face in *source_bgr* onto every face in
         *target_bgr*.
         self._init()
         try:
+            # Resize to optimal resolution (too large = slow; too small = blurry)
+            MAX_DIM = 1280
+            h, w = target_bgr.shape[:2]
+            if max(h, w) > MAX_DIM:
+                scale = MAX_DIM / max(h, w)
+                target_bgr = cv2.resize(target_bgr, (int(w * scale), int(h * scale)))
             source_faces = self._app.get(source_bgr)
             target_faces = self._app.get(target_bgr)
                     result, tgt_face, source_face, paste_back=True
                 )
+            # Always apply OpenCV enhancement — no extra deps needed
             if enhance:
+                result = self._enhance_opencv(result, target_faces)
             return result, f"Swapped {len(target_faces)} face(s) successfully."

requirements.txt CHANGED Viewed

@@ -15,8 +15,7 @@ onnxruntime>=1.16.0
 # Body Segmentation
 rembg>=2.0.50
-# Pose Estimation
-mediapipe>=0.10.0
 # Image / Video Processing
 opencv-python-headless>=4.8.0

 # Body Segmentation
 rembg>=2.0.50
+# Pose Estimation — removed (mediapipe 0.10.14+ drops solutions API on Py3.13)
 # Image / Video Processing
 opencv-python-headless>=4.8.0