devkunalnaik commited on
Commit
13dd92a
Β·
1 Parent(s): 79d56d4

Fix: remove MediaPipe (API broken on Py3.13), add OpenCV face enhancement

Browse files
Files changed (4) hide show
  1. app.py +3 -3
  2. processors/body_swap.py +121 -23
  3. processors/face_swap.py +54 -28
  4. requirements.txt +1 -2
app.py CHANGED
@@ -118,7 +118,7 @@ with gr.Blocks(title="Face & Body Swapper", theme=gr.themes.Soft()) as demo:
118
  fi_source = gr.Image(label="Source β€” face to use", type="pil")
119
  fi_target = gr.Image(label="Target β€” image to modify", type="pil")
120
  fi_enhance = gr.Checkbox(
121
- label="Enhance output faces (GFPGAN β€” slower)",
122
  value=True,
123
  )
124
  fi_btn = gr.Button("Swap Faces", variant="primary")
@@ -167,8 +167,8 @@ with gr.Blocks(title="Face & Body Swapper", theme=gr.themes.Soft()) as demo:
167
  fv_source = gr.Image(label="Source Face Image", type="pil")
168
  fv_target = gr.Video(label="Target Video")
169
  fv_enhance = gr.Checkbox(
170
- label="Enhance faces (GFPGAN β€” much slower per frame)",
171
- value=False,
172
  )
173
  fv_btn = gr.Button("Swap Faces in Video", variant="primary")
174
  with gr.Column(scale=1):
 
118
  fi_source = gr.Image(label="Source β€” face to use", type="pil")
119
  fi_target = gr.Image(label="Target β€” image to modify", type="pil")
120
  fi_enhance = gr.Checkbox(
121
+ label="Enhance face quality (sharpening + contrast)",
122
  value=True,
123
  )
124
  fi_btn = gr.Button("Swap Faces", variant="primary")
 
167
  fv_source = gr.Image(label="Source Face Image", type="pil")
168
  fv_target = gr.Video(label="Target Video")
169
  fv_enhance = gr.Checkbox(
170
+ label="Enhance faces (sharpening + contrast)",
171
+ value=True,
172
  )
173
  fv_btn = gr.Button("Swap Faces in Video", variant="primary")
174
  with gr.Column(scale=1):
processors/body_swap.py CHANGED
@@ -3,13 +3,16 @@ Body swap processor.
3
 
4
  Pipeline
5
  --------
6
- 1. Segment the person from both images with *rembg* (UΒ²-Net).
7
- 2. Estimate body pose landmarks with *MediaPipe Pose*.
8
- 3. Compute an affine warp that maps the source torso keypoints onto the
9
- target torso keypoints, so the body roughly aligns with the target pose.
10
- 4. Blend the warped source body onto the target background using the
11
- segmentation mask + Gaussian feathering. A Poisson seamless-clone pass
12
- is attempted for photorealistic colour blending.
 
 
 
13
  """
14
 
15
  import cv2
@@ -20,31 +23,126 @@ from utils.image_utils import (
20
  apply_color_correction,
21
  feather_mask,
22
  alpha_blend,
23
- resize_to_max,
24
  )
25
 
26
 
27
- # ── Landmark indices used for rough torso alignment ───────────────────────────
28
- # MediaPipe Pose: 11=left shoulder, 12=right shoulder,
29
- # 23=left hip, 24=right hip
30
- _TORSO_LANDMARKS = [11, 12, 23, 24]
31
-
32
-
33
  class BodySwapper:
34
  """
35
  Replaces the body in *target_bgr* with the body from *source_bgr*.
36
  """
37
 
38
- def __init__(self):
39
- import mediapipe as mp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- self._mp_pose = mp.solutions.pose
42
- self._pose = self._mp_pose.Pose(
43
- static_image_mode=True,
44
- model_complexity=2,
45
- enable_segmentation=True,
46
- min_detection_confidence=0.5,
47
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  # ── Private helpers ───────────────────────────────────────────────────────
50
 
 
3
 
4
  Pipeline
5
  --------
6
+ 1. Segment both images with *rembg* (UΒ²-Net) to isolate person masks.
7
+ 2. Compute bounding boxes from the masks.
8
+ 3. Scale the source person to match the target bounding-box dimensions.
9
+ 4. Color-correct the source region to match target lighting/tone.
10
+ 5. Feather-blend using the segmentation mask.
11
+ 6. Apply Poisson seamless-clone for photorealistic edge merging.
12
+
13
+ MediaPipe is intentionally NOT used β€” its API changed in 0.10.14
14
+ (removed `solutions`) which breaks on Python 3.13. Bounding-box
15
+ alignment alone is sufficient for clean body swaps.
16
  """
17
 
18
  import cv2
 
23
  apply_color_correction,
24
  feather_mask,
25
  alpha_blend,
 
26
  )
27
 
28
 
 
 
 
 
 
 
29
  class BodySwapper:
30
  """
31
  Replaces the body in *target_bgr* with the body from *source_bgr*.
32
  """
33
 
34
+ # ── Private helpers ───────────────────────────────────────────────────────
35
+
36
+ @staticmethod
37
+ def _segment(bgr: np.ndarray) -> np.ndarray:
38
+ """Return uint8 single-channel person mask via rembg (UΒ²-Net)."""
39
+ from rembg import remove
40
+
41
+ pil = Image.fromarray(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
42
+ result = remove(pil, only_mask=True)
43
+ mask = np.array(result)
44
+ if mask.ndim == 3:
45
+ mask = mask[:, :, 0]
46
+ return mask
47
+
48
+ @staticmethod
49
+ def _bbox(mask: np.ndarray):
50
+ """Bounding box (x1, y1, x2, y2) of the non-zero region, or None."""
51
+ ys, xs = np.where(mask > 128)
52
+ if len(ys) == 0:
53
+ return None
54
+ return int(xs.min()), int(ys.min()), int(xs.max()), int(ys.max())
55
+
56
+ @staticmethod
57
+ def _vertical_center_of_mass(mask: np.ndarray) -> float:
58
+ """Y coordinate of the mask centre of mass (for vertical alignment)."""
59
+ ys, _ = np.where(mask > 128)
60
+ return float(ys.mean()) if len(ys) > 0 else mask.shape[0] / 2.0
61
+
62
+ # ── Public API ────────────────────────────────────────────────────────────
63
+
64
+ def swap(self, source_bgr, target_bgr, blend_strength=0.85):
65
+ """
66
+ Swap the source person's body into the target scene.
67
+
68
+ Returns:
69
+ (result_bgr, status_message)
70
+ """
71
+ try:
72
+ # ── 1. Segment ────────────────────────────────────────────────────
73
+ src_mask = self._segment(source_bgr)
74
+ tgt_mask = self._segment(target_bgr)
75
+
76
+ src_bbox = self._bbox(src_mask)
77
+ tgt_bbox = self._bbox(tgt_mask)
78
+
79
+ if src_bbox is None:
80
+ return None, "No person detected in source image."
81
+ if tgt_bbox is None:
82
+ return None, "No person detected in target image."
83
+
84
+ sx1, sy1, sx2, sy2 = src_bbox
85
+ tx1, ty1, tx2, ty2 = tgt_bbox
86
+ tgt_w, tgt_h = tx2 - tx1, ty2 - ty1
87
+
88
+ # ── 2. Crop + resize source to target dimensions ──────────────────
89
+ src_person = source_bgr[sy1:sy2, sx1:sx2]
90
+ src_mask_roi = src_mask[sy1:sy2, sx1:sx2]
91
 
92
+ src_resized = cv2.resize(src_person, (tgt_w, tgt_h), interpolation=cv2.INTER_LANCZOS4)
93
+ mask_resized = cv2.resize(src_mask_roi, (tgt_w, tgt_h), interpolation=cv2.INTER_LINEAR)
94
+
95
+ # ── 3. Vertical CoM alignment ─────────────────────────────────────
96
+ src_com_y = self._vertical_center_of_mass(src_mask_roi)
97
+ tgt_com_y = self._vertical_center_of_mass(tgt_mask[ty1:ty2, tx1:tx2])
98
+ scale_y = tgt_h / max(sy2 - sy1, 1)
99
+ offset_y = int(tgt_com_y - src_com_y * scale_y)
100
+
101
+ # ── 4. Composite onto full canvas ─────────────────────────────────
102
+ h_t, w_t = target_bgr.shape[:2]
103
+ canvas_fg = np.zeros_like(target_bgr)
104
+ canvas_mask = np.zeros((h_t, w_t), dtype=np.uint8)
105
+
106
+ dst_x1 = int(np.clip(tx1, 0, w_t))
107
+ dst_y1 = int(np.clip(ty1 + offset_y, 0, h_t))
108
+ dst_x2 = int(np.clip(tx1 + tgt_w, 0, w_t))
109
+ dst_y2 = int(np.clip(ty1 + offset_y + tgt_h, 0, h_t))
110
+
111
+ src_x1 = dst_x1 - tx1
112
+ src_y1 = dst_y1 - (ty1 + offset_y)
113
+ src_x2 = src_x1 + (dst_x2 - dst_x1)
114
+ src_y2 = src_y1 + (dst_y2 - dst_y1)
115
+
116
+ if dst_x2 <= dst_x1 or dst_y2 <= dst_y1:
117
+ return None, "Alignment offset moved body out of frame."
118
+
119
+ canvas_fg [dst_y1:dst_y2, dst_x1:dst_x2] = src_resized [src_y1:src_y2, src_x1:src_x2]
120
+ canvas_mask[dst_y1:dst_y2, dst_x1:dst_x2] = mask_resized[src_y1:src_y2, src_x1:src_x2]
121
+
122
+ # ── 5. Color correction ───────────────────────────────────────────
123
+ canvas_fg = apply_color_correction(canvas_fg, target_bgr, canvas_mask)
124
+
125
+ # ── 6. Feathered alpha blend ──────────────────────────────────────
126
+ soft_mask = feather_mask(canvas_mask, blur_radius=25)
127
+ soft_mask = (soft_mask.astype(float) * blend_strength).clip(0, 255).astype(np.uint8)
128
+ result = alpha_blend(canvas_fg, target_bgr, soft_mask)
129
+
130
+ # ── 7. Seamless clone (best-effort) ───────────────────────────────
131
+ try:
132
+ cx = int((dst_x1 + dst_x2) / 2)
133
+ cy = int((dst_y1 + dst_y2) / 2)
134
+ sc_mask = (canvas_mask > 10).astype(np.uint8) * 255
135
+ result = cv2.seamlessClone(
136
+ canvas_fg, target_bgr, sc_mask,
137
+ (cx, cy), cv2.NORMAL_CLONE,
138
+ )
139
+ except Exception as e:
140
+ print(f"[BodySwapper] seamlessClone skipped: {e}")
141
+
142
+ return result, "Body swap completed successfully."
143
+
144
+ except Exception as exc:
145
+ return None, f"Body swap error: {exc}"
146
 
147
  # ── Private helpers ───────────────────────────────────────────────────────
148
 
processors/face_swap.py CHANGED
@@ -65,10 +65,11 @@ def _download_inswapper() -> None:
65
  with open(INSWAPPER_PATH, "wb") as f:
66
  for chunk in resp.iter_content(chunk_size=65536):
67
  f.write(chunk)
68
- if INSWAPPER_PATH.stat().st_size > 100_000:
69
  print(f"[FaceSwapper] Saved to {INSWAPPER_PATH}")
70
  return
71
  INSWAPPER_PATH.unlink(missing_ok=True)
 
72
  except Exception as e:
73
  print(f"[FaceSwapper] Mirror failed ({e})")
74
  INSWAPPER_PATH.unlink(missing_ok=True)
@@ -91,7 +92,6 @@ class FaceSwapper:
91
  def __init__(self):
92
  self._app = None # InsightFace FaceAnalysis
93
  self._swapper = None # inswapper ONNX model
94
- self._enhancer = None # GFPGAN (lazy)
95
  self._ready = False
96
 
97
  # ── Lazy initialisation ───────────────────────────────────────────────────
@@ -120,22 +120,48 @@ class FaceSwapper:
120
 
121
  self._ready = True
122
 
123
- def _get_enhancer(self):
124
- """Lazy-load GFPGAN enhancer."""
125
- if self._enhancer is None:
126
- from gfpgan import GFPGANer
127
-
128
- self._enhancer = GFPGANer(
129
- model_path=(
130
- "https://github.com/TencentARC/GFPGAN/releases/download/"
131
- "v1.3.0/GFPGANv1.4.pth"
132
- ),
133
- upscale=1,
134
- arch="clean",
135
- channel_multiplier=2,
136
- bg_upsampler=None,
 
137
  )
138
- return self._enhancer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  # ── Public API ────────────────────────────────────────────────────────────
141
 
@@ -143,8 +169,8 @@ class FaceSwapper:
143
  self,
144
  source_bgr: np.ndarray,
145
  target_bgr: np.ndarray,
146
- enhance: bool = False,
147
- ) -> tuple[np.ndarray | None, str]:
148
  """
149
  Swap the first detected face in *source_bgr* onto every face in
150
  *target_bgr*.
@@ -155,6 +181,13 @@ class FaceSwapper:
155
  self._init()
156
 
157
  try:
 
 
 
 
 
 
 
158
  source_faces = self._app.get(source_bgr)
159
  target_faces = self._app.get(target_bgr)
160
 
@@ -171,16 +204,9 @@ class FaceSwapper:
171
  result, tgt_face, source_face, paste_back=True
172
  )
173
 
 
174
  if enhance:
175
- try:
176
- _, _, result = self._get_enhancer().enhance(
177
- result,
178
- has_aligned=False,
179
- only_center_face=False,
180
- paste_back=True,
181
- )
182
- except Exception as e:
183
- print(f"[FaceSwapper] Enhancement skipped: {e}")
184
 
185
  return result, f"Swapped {len(target_faces)} face(s) successfully."
186
 
 
65
  with open(INSWAPPER_PATH, "wb") as f:
66
  for chunk in resp.iter_content(chunk_size=65536):
67
  f.write(chunk)
68
+ if INSWAPPER_PATH.stat().st_size > 500_000_000: # ~554 MB expected
69
  print(f"[FaceSwapper] Saved to {INSWAPPER_PATH}")
70
  return
71
  INSWAPPER_PATH.unlink(missing_ok=True)
72
+ print("[FaceSwapper] Mirror file too small, trying next …")
73
  except Exception as e:
74
  print(f"[FaceSwapper] Mirror failed ({e})")
75
  INSWAPPER_PATH.unlink(missing_ok=True)
 
92
  def __init__(self):
93
  self._app = None # InsightFace FaceAnalysis
94
  self._swapper = None # inswapper ONNX model
 
95
  self._ready = False
96
 
97
  # ── Lazy initialisation ───────────────────────────────────────────────────
 
120
 
121
  self._ready = True
122
 
123
+ # ── Enhancement (pure OpenCV, no extra models) ────────────────────────────
124
+
125
+ @staticmethod
126
+ def _enhance_opencv(image: np.ndarray, faces) -> np.ndarray:
127
+ """
128
+ For each detected face bounding box:
129
+ 1. Unsharp masking β€” recovers detail lost by inswapper's 128-px output
130
+ 2. CLAHE on the L channel β€” local contrast without blowing highlights
131
+ """
132
+ result = image.copy()
133
+ for face in faces:
134
+ box = face.bbox.astype(int)
135
+ x1, y1, x2, y2 = (
136
+ max(box[0], 0), max(box[1], 0),
137
+ min(box[2], image.shape[1]), min(box[3], image.shape[0]),
138
  )
139
+ if x2 <= x1 or y2 <= y1:
140
+ continue
141
+
142
+ roi = result[y1:y2, x1:x2].copy()
143
+
144
+ # 1. Unsharp mask (amount=1.4, radius=3)
145
+ blurred = cv2.GaussianBlur(roi, (0, 0), 3)
146
+ sharp = cv2.addWeighted(roi, 2.4, blurred, -1.4, 0)
147
+
148
+ # 2. CLAHE on L channel
149
+ lab = cv2.cvtColor(sharp, cv2.COLOR_BGR2LAB)
150
+ clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(4, 4))
151
+ lab[:, :, 0] = clahe.apply(lab[:, :, 0])
152
+ enhanced_roi = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
153
+
154
+ # Feather-blend back so edges stay smooth
155
+ mask = np.zeros(roi.shape[:2], dtype=np.float32)
156
+ pad = max(4, (y2 - y1) // 10)
157
+ mask[pad:-pad, pad:-pad] = 1.0
158
+ mask = cv2.GaussianBlur(mask, (0, 0), pad // 2 or 1)
159
+ mask_3ch = mask[:, :, np.newaxis]
160
+ result[y1:y2, x1:x2] = (
161
+ enhanced_roi * mask_3ch + roi * (1 - mask_3ch)
162
+ ).astype(np.uint8)
163
+
164
+ return result
165
 
166
  # ── Public API ────────────────────────────────────────────────────────────
167
 
 
169
  self,
170
  source_bgr: np.ndarray,
171
  target_bgr: np.ndarray,
172
+ enhance: bool = True,
173
+ ):
174
  """
175
  Swap the first detected face in *source_bgr* onto every face in
176
  *target_bgr*.
 
181
  self._init()
182
 
183
  try:
184
+ # Resize to optimal resolution (too large = slow; too small = blurry)
185
+ MAX_DIM = 1280
186
+ h, w = target_bgr.shape[:2]
187
+ if max(h, w) > MAX_DIM:
188
+ scale = MAX_DIM / max(h, w)
189
+ target_bgr = cv2.resize(target_bgr, (int(w * scale), int(h * scale)))
190
+
191
  source_faces = self._app.get(source_bgr)
192
  target_faces = self._app.get(target_bgr)
193
 
 
204
  result, tgt_face, source_face, paste_back=True
205
  )
206
 
207
+ # Always apply OpenCV enhancement β€” no extra deps needed
208
  if enhance:
209
+ result = self._enhance_opencv(result, target_faces)
 
 
 
 
 
 
 
 
210
 
211
  return result, f"Swapped {len(target_faces)} face(s) successfully."
212
 
requirements.txt CHANGED
@@ -15,8 +15,7 @@ onnxruntime>=1.16.0
15
  # Body Segmentation
16
  rembg>=2.0.50
17
 
18
- # Pose Estimation
19
- mediapipe>=0.10.0
20
 
21
  # Image / Video Processing
22
  opencv-python-headless>=4.8.0
 
15
  # Body Segmentation
16
  rembg>=2.0.50
17
 
18
+ # Pose Estimation β€” removed (mediapipe 0.10.14+ drops solutions API on Py3.13)
 
19
 
20
  # Image / Video Processing
21
  opencv-python-headless>=4.8.0