Kikut commited on
Commit
fab3293
·
verified ·
1 Parent(s): 167b17d

Deploy IG test profile studio

Browse files
Files changed (3) hide show
  1. README.md +14 -20
  2. app.py +181 -504
  3. requirements.txt +4 -3
README.md CHANGED
@@ -1,33 +1,27 @@
1
  ---
2
- title: Live Upper-Body Swap (FaceFusion)
3
  colorFrom: blue
4
  colorTo: pink
5
- sdk: docker
6
- app_port: 7860
 
 
7
  pinned: false
8
  ---
9
 
10
- # Live Upper-Body Swap (FaceFusion)
11
 
12
- This Space runs FaceFusion in `webcam` mode for realistic live face swap.
13
 
14
- ## How it works
15
 
16
  1. Open the Space in browser.
17
- 2. In FaceFusion UI, set `Source` image to a clear front-facing portrait.
18
- 3. Start webcam target and run with:
19
- - `processors`: `face_swapper`, `face_enhancer`, `expression_restorer`
20
- - `face masker`: `occlusion + region`
21
- 4. Tune blend until movement and identity look natural.
22
-
23
- ## Why this is more realistic than overlay
24
-
25
- - true face swap model (not static image paste)
26
- - temporal consistency from webcam layout
27
- - enhancer and expression restorer for live motion quality
28
 
29
  ## Notes
30
 
31
- - Best result comes from good lighting and neutral background.
32
- - This is designed for consent-based identity effects only.
33
- - A stronger GPU (for example `a10g-large`) is recommended.
 
1
  ---
2
+ title: Live Upper-Body Swap
3
  colorFrom: blue
4
  colorTo: pink
5
+ sdk: gradio
6
+ sdk_version: 5.44.1
7
+ app_file: app.py
8
+ python_version: "3.10"
9
  pinned: false
10
  ---
11
 
12
+ # Live Upper-Body Swap
13
 
14
+ This Space performs real-time face swap from browser webcam frames.
15
 
16
+ ## Workflow
17
 
18
  1. Open the Space in browser.
19
+ 2. Upload source portrait (or use the default one).
20
+ 3. Start webcam stream.
21
+ 4. Adjust swap strength and max faces.
 
 
 
 
 
 
 
 
22
 
23
  ## Notes
24
 
25
+ - Uses `inswapper_128` through `insightface`.
26
+ - Works with browser webcam input on Hugging Face Spaces.
27
+ - Keep strong front lighting for better realism.
app.py CHANGED
@@ -7,555 +7,233 @@ from typing import Any
7
  import cv2
8
  import gradio as gr
9
  import numpy as np
 
 
 
10
 
11
- try:
12
- import mediapipe as mp # type: ignore
13
- except Exception:
14
- mp = None
15
-
16
-
17
- FACE_CASCADE = cv2.CascadeClassifier(
18
- cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
19
- )
20
- UPPER_BODY_CASCADE = cv2.CascadeClassifier(
21
- cv2.data.haarcascades + "haarcascade_upperbody.xml"
22
- )
23
  APP_DIR = Path(__file__).resolve().parent
24
- DEFAULT_AVATAR_PATH = APP_DIR / "assets" / "default_persona.jpg"
25
- DEFAULT_AVATAR_VALUE = str(DEFAULT_AVATAR_PATH) if DEFAULT_AVATAR_PATH.exists() else None
26
-
27
-
28
- def _init_selfie_segmenter() -> tuple[Any | None, str]:
29
- if mp is None:
30
- return None, "mediapipe import failed"
31
- try:
32
- solutions = getattr(mp, "solutions", None)
33
- if solutions is None:
34
- from mediapipe.python import solutions as mp_solutions # type: ignore
35
 
36
- solutions = mp_solutions
37
- segmenter = solutions.selfie_segmentation.SelfieSegmentation(model_selection=0)
38
- return segmenter, ""
39
- except Exception as exc:
40
- return None, f"{type(exc).__name__}: {exc}"
41
 
 
 
42
 
43
- SELFIE_SEGMENTER, SELFIE_SEGMENTER_ERROR = _init_selfie_segmenter()
44
  LAST_FRAME_TS = 0.0
45
  EMA_FPS = 0.0
46
- AVATAR_CACHE_KEY = ""
47
- AVATAR_CACHE_CROP: np.ndarray | None = None
48
- AVATAR_CACHE_MASK: np.ndarray | None = None
49
- TRACK_BOX: tuple[int, int, int, int] | None = None
50
- TRACK_TEMPLATE: np.ndarray | None = None
51
- TRACK_CONFIDENCE = 0.0
52
- FRAME_INDEX = 0
53
-
54
-
55
- def _prepare_bgr(frame: np.ndarray) -> np.ndarray:
56
- if frame.dtype != np.uint8:
57
- frame = np.clip(frame, 0, 255).astype(np.uint8)
58
- if frame.ndim == 2:
59
- return cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
60
- if frame.shape[2] == 4:
61
- return cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)
62
- return cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
63
-
64
-
65
- def _resize_for_speed(frame_bgr: np.ndarray, max_side: int) -> tuple[np.ndarray, tuple[int, int]]:
66
- height, width = frame_bgr.shape[:2]
67
- if max(height, width) <= max_side:
68
- return frame_bgr, (width, height)
69
- ratio = max_side / float(max(height, width))
70
- resized = cv2.resize(
71
- frame_bgr,
72
- (int(width * ratio), int(height * ratio)),
73
- interpolation=cv2.INTER_AREA,
74
- )
75
- return resized, (width, height)
76
-
77
-
78
- def _detect_faces(frame_bgr: np.ndarray) -> np.ndarray:
79
- gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
80
- return FACE_CASCADE.detectMultiScale(
81
- gray,
82
- scaleFactor=1.12,
83
- minNeighbors=5,
84
- minSize=(60, 60),
85
- )
86
 
87
 
88
- def _detect_upper_body(frame_bgr: np.ndarray) -> np.ndarray:
89
- if UPPER_BODY_CASCADE.empty():
90
- return np.array([])
91
- gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
92
- return UPPER_BODY_CASCADE.detectMultiScale(
93
- gray,
94
- scaleFactor=1.08,
95
- minNeighbors=4,
96
- minSize=(80, 80),
97
- )
98
 
99
 
100
- def _clamp_box(
101
- box: tuple[int, int, int, int],
102
- frame_shape: tuple[int, int, int],
103
- ) -> tuple[int, int, int, int]:
104
- x, y, w, h = box
105
- img_h, img_w = frame_shape[:2]
106
- x = max(0, min(x, img_w - 2))
107
- y = max(0, min(y, img_h - 2))
108
- w = max(2, min(w, img_w - x))
109
- h = max(2, min(h, img_h - y))
110
- return x, y, w, h
111
-
112
-
113
- def _box_iou(a: tuple[int, int, int, int], b: tuple[int, int, int, int]) -> float:
114
- ax, ay, aw, ah = a
115
- bx, by, bw, bh = b
116
- ax2, ay2 = ax + aw, ay + ah
117
- bx2, by2 = bx + bw, by + bh
118
- ix1, iy1 = max(ax, bx), max(ay, by)
119
- ix2, iy2 = min(ax2, bx2), min(ay2, by2)
120
- iw, ih = max(0, ix2 - ix1), max(0, iy2 - iy1)
121
- inter = iw * ih
122
- if inter <= 0:
123
- return 0.0
124
- union = aw * ah + bw * bh - inter
125
- return inter / max(union, 1)
126
-
127
-
128
- def _patch_from_box(
129
- frame_bgr: np.ndarray,
130
- box: tuple[int, int, int, int],
131
- ) -> np.ndarray | None:
132
- x, y, w, h = _clamp_box(box, frame_bgr.shape)
133
- patch = frame_bgr[y : y + h, x : x + w]
134
- if patch.size == 0:
135
- return None
136
- return cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
137
-
138
-
139
- def _set_tracker(
140
- frame_bgr: np.ndarray,
141
- box: tuple[int, int, int, int],
142
- ) -> None:
143
- global TRACK_BOX, TRACK_TEMPLATE, TRACK_CONFIDENCE
144
- patch = _patch_from_box(frame_bgr, box)
145
- if patch is None or patch.shape[0] < 12 or patch.shape[1] < 12:
146
- return
147
- TRACK_BOX = _clamp_box(box, frame_bgr.shape)
148
- TRACK_TEMPLATE = patch
149
- TRACK_CONFIDENCE = 1.0
150
-
151
-
152
- def _update_tracker(frame_bgr: np.ndarray) -> tuple[tuple[int, int, int, int] | None, float]:
153
- global TRACK_BOX, TRACK_TEMPLATE, TRACK_CONFIDENCE
154
- if TRACK_BOX is None or TRACK_TEMPLATE is None:
155
- return None, 0.0
156
-
157
- x, y, w, h = TRACK_BOX
158
- gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
159
- margin = int(max(w, h) * 0.55)
160
- sx0 = max(0, x - margin)
161
- sy0 = max(0, y - margin)
162
- sx1 = min(gray.shape[1], x + w + margin)
163
- sy1 = min(gray.shape[0], y + h + margin)
164
-
165
- search = gray[sy0:sy1, sx0:sx1]
166
- tmpl = TRACK_TEMPLATE
167
- if (
168
- search.shape[0] < tmpl.shape[0]
169
- or search.shape[1] < tmpl.shape[1]
170
- or tmpl.shape[0] < 10
171
- or tmpl.shape[1] < 10
172
- ):
173
- TRACK_CONFIDENCE *= 0.75
174
- return None, 0.0
175
-
176
- match = cv2.matchTemplate(search, tmpl, cv2.TM_CCOEFF_NORMED)
177
- _, max_val, _, max_loc = cv2.minMaxLoc(match)
178
- if max_val < 0.35:
179
- TRACK_CONFIDENCE *= 0.7
180
- return None, float(max_val)
181
-
182
- nx = sx0 + int(max_loc[0])
183
- ny = sy0 + int(max_loc[1])
184
- new_box = _clamp_box((nx, ny, w, h), frame_bgr.shape)
185
- new_patch = _patch_from_box(frame_bgr, new_box)
186
- if new_patch is not None and new_patch.shape == tmpl.shape:
187
- TRACK_TEMPLATE = cv2.addWeighted(tmpl, 0.82, new_patch, 0.18, 0)
188
- TRACK_BOX = new_box
189
- TRACK_CONFIDENCE = 0.8 * TRACK_CONFIDENCE + 0.2 * float(max_val)
190
- return new_box, float(max_val)
191
-
192
-
193
- def _body_box_from_face(face_box: tuple[int, int, int, int]) -> tuple[int, int, int, int]:
194
- x, y, fw, fh = face_box
195
- body_w = int(fw * 2.9)
196
- body_h = int(fh * 5.1)
197
- cx = x + fw // 2
198
- cy = y + int(fh * 2.35)
199
- return cx - body_w // 2, cy - body_h // 2, body_w, body_h
200
-
201
-
202
- def _body_box_from_upper(upper_box: tuple[int, int, int, int]) -> tuple[int, int, int, int]:
203
- x, y, uw, uh = upper_box
204
- body_w = int(uw * 1.35)
205
- body_h = int(uh * 2.45)
206
- cx = x + uw // 2
207
- cy = y + int(uh * 1.15)
208
- return cx - body_w // 2, cy - body_h // 2, body_w, body_h
209
-
210
-
211
- def _detect_body_box(frame_bgr: np.ndarray) -> tuple[tuple[int, int, int, int] | None, str]:
212
- faces = _detect_faces(frame_bgr)
213
- if len(faces) > 0:
214
- x, y, w, h = max(faces, key=lambda item: item[2] * item[3])
215
- return _clamp_box(_body_box_from_face((x, y, w, h)), frame_bgr.shape), "face"
216
-
217
- uppers = _detect_upper_body(frame_bgr)
218
- if len(uppers) > 0:
219
- x, y, w, h = max(uppers, key=lambda item: item[2] * item[3])
220
- return _clamp_box(_body_box_from_upper((x, y, w, h)), frame_bgr.shape), "upper-body"
221
-
222
- return None, ""
223
-
224
-
225
- def _resolve_fallback_box(frame_bgr: np.ndarray) -> tuple[tuple[int, int, int, int], str]:
226
- global FRAME_INDEX
227
- FRAME_INDEX += 1
228
-
229
- should_detect = FRAME_INDEX % 3 == 0 or TRACK_BOX is None
230
- detected_box: tuple[int, int, int, int] | None = None
231
- detected_source = ""
232
- if should_detect:
233
- detected_box, detected_source = _detect_body_box(frame_bgr)
234
-
235
- tracked_box, track_score = _update_tracker(frame_bgr)
236
-
237
- if detected_box is not None and tracked_box is not None:
238
- if _box_iou(detected_box, tracked_box) >= 0.1:
239
- ax, ay, aw, ah = detected_box
240
- bx, by, bw, bh = tracked_box
241
- blended = (
242
- int(0.6 * ax + 0.4 * bx),
243
- int(0.6 * ay + 0.4 * by),
244
- int(0.6 * aw + 0.4 * bw),
245
- int(0.6 * ah + 0.4 * bh),
246
- )
247
- final_box = _clamp_box(blended, frame_bgr.shape)
248
- else:
249
- final_box = detected_box
250
- _set_tracker(frame_bgr, final_box)
251
- return final_box, f"fallback detect+track ({detected_source})"
252
-
253
- if detected_box is not None:
254
- _set_tracker(frame_bgr, detected_box)
255
- return detected_box, f"fallback detect ({detected_source})"
256
-
257
- if tracked_box is not None:
258
- return tracked_box, f"fallback track ({track_score:.2f})"
259
-
260
- cx, cy, bw, bh = _fallback_body_box(frame_bgr)
261
- x = cx - bw // 2
262
- y = cy - bh // 2
263
- box = _clamp_box((x, y, bw, bh), frame_bgr.shape)
264
- _set_tracker(frame_bgr, box)
265
- return box, "fallback static"
266
-
267
-
268
- def _segment_person_mask(frame_bgr: np.ndarray, threshold: float) -> tuple[np.ndarray | None, np.ndarray | None]:
269
- if SELFIE_SEGMENTER is None:
270
- return None, None
271
- rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
272
- result = SELFIE_SEGMENTER.process(rgb)
273
- if result.segmentation_mask is None:
274
- return None, None
275
- raw_mask = np.clip(result.segmentation_mask.astype(np.float32), 0.0, 1.0)
276
- binary = raw_mask > threshold
277
- return raw_mask, binary
278
-
279
-
280
- def _avatar_key(avatar_bgr: np.ndarray) -> str:
281
- thumb = cv2.resize(avatar_bgr, (32, 32), interpolation=cv2.INTER_AREA)
282
  return (
283
- f"{avatar_bgr.shape[0]}x{avatar_bgr.shape[1]}"
284
  f"-{int(thumb.mean())}"
285
  f"-{int(thumb[::4, ::4].sum())}"
286
  )
287
 
288
 
289
- def _extract_avatar_person(avatar_bgr: np.ndarray) -> tuple[np.ndarray, np.ndarray, str]:
290
- raw_mask, binary = _segment_person_mask(avatar_bgr, threshold=0.22)
291
- if raw_mask is None or binary is None or int(binary.sum()) < 2000:
292
- # Heuristic fallback: separate subject from corner background color.
293
- h, w = avatar_bgr.shape[:2]
294
- patch = max(10, min(h, w) // 18)
295
- corners = [
296
- avatar_bgr[:patch, :patch],
297
- avatar_bgr[:patch, -patch:],
298
- avatar_bgr[-patch:, :patch],
299
- avatar_bgr[-patch:, -patch:],
300
- ]
301
- bg_color = np.mean(np.concatenate([c.reshape(-1, 3) for c in corners], axis=0), axis=0)
302
- dist = np.linalg.norm(avatar_bgr.astype(np.float32) - bg_color[None, None, :], axis=2)
303
- heuristic = (dist > 26.0).astype(np.float32)
304
- if float(heuristic.mean()) < 0.03:
305
- full_mask = np.ones(avatar_bgr.shape[:2], dtype=np.float32)
306
- return avatar_bgr, full_mask, "Avatar segmentation fallback."
307
- heuristic = cv2.GaussianBlur(heuristic, (0, 0), 2.8)
308
- heuristic = np.clip(heuristic, 0.0, 1.0)
309
- return avatar_bgr, heuristic, "Avatar segmentation fallback."
310
-
311
- ys, xs = np.where(binary)
312
- x0, x1 = int(xs.min()), int(xs.max())
313
- y0, y1 = int(ys.min()), int(ys.max())
314
-
315
- crop = avatar_bgr[y0 : y1 + 1, x0 : x1 + 1]
316
- crop_mask = raw_mask[y0 : y1 + 1, x0 : x1 + 1]
317
- crop_mask = cv2.GaussianBlur(crop_mask, (0, 0), 1.8)
318
- crop_mask = np.clip(crop_mask, 0.0, 1.0)
319
- return crop, crop_mask, ""
320
-
321
-
322
- def _get_avatar_assets(avatar_image: np.ndarray | None) -> tuple[np.ndarray | None, np.ndarray | None, str]:
323
- global AVATAR_CACHE_KEY, AVATAR_CACHE_CROP, AVATAR_CACHE_MASK
324
-
325
- if avatar_image is None:
326
- return None, None, "Upload persona image."
327
- avatar_bgr = _prepare_bgr(avatar_image)
328
- key = _avatar_key(avatar_bgr)
329
-
330
- if (
331
- key == AVATAR_CACHE_KEY
332
- and AVATAR_CACHE_CROP is not None
333
- and AVATAR_CACHE_MASK is not None
334
- ):
335
- return AVATAR_CACHE_CROP, AVATAR_CACHE_MASK, ""
336
-
337
- crop, mask, msg = _extract_avatar_person(avatar_bgr)
338
- AVATAR_CACHE_KEY = key
339
- AVATAR_CACHE_CROP = crop
340
- AVATAR_CACHE_MASK = mask
341
- return crop, mask, msg
342
-
343
-
344
- def _place_with_alpha(
345
- canvas_shape: tuple[int, int, int],
346
- src_img: np.ndarray,
347
- src_alpha: np.ndarray,
348
- center_x: int,
349
- center_y: int,
350
- target_w: int,
351
- target_h: int,
352
- ) -> tuple[np.ndarray, np.ndarray]:
353
- canvas_img = np.zeros(canvas_shape, dtype=np.uint8)
354
- canvas_alpha = np.zeros(canvas_shape[:2], dtype=np.float32)
355
-
356
- if target_w < 4 or target_h < 4:
357
- return canvas_img, canvas_alpha
358
-
359
- resized_img = cv2.resize(src_img, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
360
- resized_alpha = cv2.resize(src_alpha, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
361
-
362
- x0 = center_x - target_w // 2
363
- y0 = center_y - target_h // 2
364
- x1 = x0 + target_w
365
- y1 = y0 + target_h
366
-
367
- dst_x0 = max(0, x0)
368
- dst_y0 = max(0, y0)
369
- dst_x1 = min(canvas_shape[1], x1)
370
- dst_y1 = min(canvas_shape[0], y1)
371
- if dst_x0 >= dst_x1 or dst_y0 >= dst_y1:
372
- return canvas_img, canvas_alpha
373
-
374
- src_x0 = dst_x0 - x0
375
- src_y0 = dst_y0 - y0
376
- src_x1 = src_x0 + (dst_x1 - dst_x0)
377
- src_y1 = src_y0 + (dst_y1 - dst_y0)
378
-
379
- canvas_img[dst_y0:dst_y1, dst_x0:dst_x1] = resized_img[src_y0:src_y1, src_x0:src_x1]
380
- canvas_alpha[dst_y0:dst_y1, dst_x0:dst_x1] = resized_alpha[src_y0:src_y1, src_x0:src_x1]
381
- return canvas_img, np.clip(canvas_alpha, 0.0, 1.0)
382
-
383
-
384
- def _fallback_body_box(frame_bgr: np.ndarray) -> tuple[int, int, int, int]:
385
- h, w = frame_bgr.shape[:2]
386
- faces = _detect_faces(frame_bgr)
387
- if len(faces) > 0:
388
- x, y, fw, fh = max(faces, key=lambda item: item[2] * item[3])
389
- body_w = int(fw * 2.8)
390
- body_h = int(fh * 5.0)
391
- center_x = x + fw // 2
392
- center_y = y + int(fh * 2.3)
393
- return center_x, center_y, body_w, body_h
394
-
395
- return w // 2, int(h * 0.54), int(w * 0.56), int(h * 0.86)
396
-
397
-
398
- def _full_body_replace(
399
- frame_bgr: np.ndarray,
400
- avatar_image: np.ndarray | None,
401
- replace_strength: float,
402
- mask_threshold: float,
403
- edge_softness: float,
404
- ) -> tuple[np.ndarray, str]:
405
- avatar_crop, avatar_mask, avatar_msg = _get_avatar_assets(avatar_image)
406
- if avatar_crop is None or avatar_mask is None:
407
- return frame_bgr, "Upload persona image to start replacement."
408
-
409
- raw_mask, binary = _segment_person_mask(frame_bgr, threshold=mask_threshold)
410
- use_fallback_box = raw_mask is None or binary is None or int(binary.sum()) < 2500
411
-
412
- if use_fallback_box:
413
- box, source = _resolve_fallback_box(frame_bgr)
414
- bx, by, person_w, person_h = box
415
- center_x = bx + person_w // 2
416
- center_y = by + person_h // 2
417
- status = f"Tracking fallback active ({source})."
418
- else:
419
- ys, xs = np.where(binary)
420
- x0, x1 = int(xs.min()), int(xs.max())
421
- y0, y1 = int(ys.min()), int(ys.max())
422
- person_w = x1 - x0 + 1
423
- person_h = y1 - y0 + 1
424
- center_x = x0 + person_w // 2
425
- center_y = y0 + int(person_h * 0.52)
426
- status = "Full body replace active."
427
-
428
- avatar_h, avatar_w = avatar_crop.shape[:2]
429
- scale = 1.08 + 0.34 * replace_strength
430
- target_h = int(person_h * scale)
431
- target_w = int(target_h * (avatar_w / max(1, avatar_h)))
432
- target_w = max(target_w, int(person_w * 0.98))
433
-
434
- avatar_layer, avatar_alpha = _place_with_alpha(
435
- canvas_shape=frame_bgr.shape,
436
- src_img=avatar_crop,
437
- src_alpha=avatar_mask,
438
- center_x=center_x,
439
- center_y=center_y,
440
- target_w=target_w,
441
- target_h=target_h,
442
- )
443
 
444
- if use_fallback_box:
445
- fallback_mask = np.zeros(frame_bgr.shape[:2], dtype=np.float32)
446
- axes = (max(24, int(person_w * 0.58)), max(24, int(person_h * 0.62)))
447
- cv2.ellipse(fallback_mask, (center_x, center_y), axes, 0, 0, 360, 1.0, -1)
448
- target_alpha = cv2.GaussianBlur(fallback_mask, (0, 0), 12.0)
449
- # Make replacement visually obvious in fallback mode.
450
- avatar_alpha = np.clip(0.35 + 0.65 * avatar_alpha, 0.0, 1.0)
451
- else:
452
- target_alpha = cv2.GaussianBlur(raw_mask, (0, 0), 1.5 + 18.0 * edge_softness)
453
-
454
- alpha = np.clip(target_alpha * avatar_alpha * replace_strength, 0.0, 1.0)
455
- mixed = (
456
- avatar_layer.astype(np.float32) * alpha[:, :, None]
457
- + frame_bgr.astype(np.float32) * (1.0 - alpha[:, :, None])
458
- )
459
 
460
- notes: list[str] = [status]
461
- if avatar_msg:
462
- notes.append(avatar_msg)
463
- if SELFIE_SEGMENTER is None:
464
- notes.append("Live segmentation unavailable on server.")
465
- return np.clip(mixed, 0, 255).astype(np.uint8), " ".join(notes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
 
467
 
468
  def transform_live(
469
  frame: np.ndarray,
470
- avatar_image: np.ndarray | None,
471
  mirror: bool,
472
- replace_strength: float,
473
- mask_threshold: float,
474
- edge_softness: float,
475
  ) -> tuple[np.ndarray, str]:
476
  global LAST_FRAME_TS, EMA_FPS
477
 
478
  if frame is None:
479
  return frame, "Waiting for webcam frame."
480
 
481
- started = time.perf_counter()
482
- original_bgr = _prepare_bgr(frame)
483
- resized_bgr, original_size = _resize_for_speed(original_bgr, max_side=640)
484
-
485
- transformed, status_text = _full_body_replace(
486
- resized_bgr,
487
- avatar_image=avatar_image,
488
- replace_strength=replace_strength,
489
- mask_threshold=mask_threshold,
490
- edge_softness=edge_softness,
491
- )
492
 
493
- if mirror:
494
- transformed = cv2.flip(transformed, 1)
 
495
 
496
- if transformed.shape[1] != original_size[0] or transformed.shape[0] != original_size[1]:
497
- transformed = cv2.resize(
498
- transformed,
499
- original_size,
500
- interpolation=cv2.INTER_LINEAR,
 
 
 
 
 
 
 
501
  )
502
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
  now = time.perf_counter()
504
- frame_latency_ms = (now - started) * 1000.0
505
- proc_fps = 1000.0 / max(frame_latency_ms, 1e-4)
506
  if LAST_FRAME_TS > 0.0:
507
- instant_fps = 1.0 / max(now - LAST_FRAME_TS, 1e-4)
508
- EMA_FPS = instant_fps if EMA_FPS <= 0.0 else (0.2 * instant_fps + 0.8 * EMA_FPS)
509
  LAST_FRAME_TS = now
510
 
511
- status_text = (
512
- f"{status_text} | proc {proc_fps:.1f} fps ({frame_latency_ms:.0f} ms) "
513
- f"| stream ~{EMA_FPS:.1f} fps"
514
  )
515
- return cv2.cvtColor(transformed, cv2.COLOR_BGR2RGB), status_text
516
 
517
 
518
- with gr.Blocks(title="Live Full Body Replace") as demo:
519
  gr.Markdown(
520
  """
521
- # Live Full Body Replace
522
- Only one workflow is enabled:
523
- 1. Upload persona image.
524
- 2. Start webcam stream.
525
- 3. Output panel shows live replacement.
526
  """
527
  )
528
 
529
  with gr.Row():
530
- replace_strength = gr.Slider(
531
- label="Replace strength",
532
- minimum=0.35,
533
  maximum=1.0,
534
- value=0.92,
535
  step=0.01,
536
  )
537
- mask_threshold = gr.Slider(
538
- label="Body mask threshold",
539
- minimum=0.1,
540
- maximum=0.8,
541
- value=0.28,
542
- step=0.02,
543
  )
544
- edge_softness = gr.Slider(
545
- label="Edge softness",
546
- minimum=0.0,
547
- maximum=1.0,
548
- value=0.35,
549
- step=0.02,
550
  )
551
  mirror = gr.Checkbox(label="Mirror output", value=True)
552
 
553
- avatar_upload = gr.Image(
554
- label="Persona image (preloaded; replace if you want another look)",
555
  type="numpy",
556
- value=DEFAULT_AVATAR_VALUE,
557
  )
558
 
 
 
 
559
  with gr.Row():
560
  live_input = gr.Image(
561
  label="Webcam input",
@@ -564,25 +242,24 @@ Only one workflow is enabled:
564
  streaming=True,
565
  )
566
  live_output = gr.Image(
567
- label="Live replaced output",
568
  type="numpy",
569
  )
570
 
571
- live_status = gr.Markdown("Waiting for webcam + persona image.")
 
 
 
 
 
 
572
 
573
  live_input.stream(
574
  fn=transform_live,
575
- inputs=[
576
- live_input,
577
- avatar_upload,
578
- mirror,
579
- replace_strength,
580
- mask_threshold,
581
- edge_softness,
582
- ],
583
- outputs=[live_output, live_status],
584
  time_limit=None,
585
- stream_every=0.05,
586
  concurrency_limit=1,
587
  queue=False,
588
  show_progress="hidden",
 
7
  import cv2
8
  import gradio as gr
9
  import numpy as np
10
+ import onnxruntime as ort
11
+ from insightface.app import FaceAnalysis
12
+ from insightface.model_zoo import get_model
13
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  APP_DIR = Path(__file__).resolve().parent
15
+ DEFAULT_SOURCE_PATH = APP_DIR / "assets" / "default_persona.jpg"
16
+ DEFAULT_SOURCE_VALUE = str(DEFAULT_SOURCE_PATH) if DEFAULT_SOURCE_PATH.exists() else None
 
 
 
 
 
 
 
 
 
17
 
18
+ FACE_ANALYSER: FaceAnalysis | None = None
19
+ FACE_SWAPPER: Any | None = None
20
+ MODEL_PROVIDERS: list[str] = []
21
+ MODEL_ERROR = ""
 
22
 
23
+ SOURCE_FACE: Any | None = None
24
+ SOURCE_FACE_KEY = ""
25
 
 
26
  LAST_FRAME_TS = 0.0
27
  EMA_FPS = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
 
30
+ def _to_bgr(image: np.ndarray) -> np.ndarray:
31
+ if image.dtype != np.uint8:
32
+ image = np.clip(image, 0, 255).astype(np.uint8)
33
+ if image.ndim == 2:
34
+ return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
35
+ if image.shape[2] == 4:
36
+ return cv2.cvtColor(image, cv2.COLOR_RGBA2BGR)
37
+ return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
 
 
38
 
39
 
40
+ def _image_key(image_bgr: np.ndarray) -> str:
41
+ thumb = cv2.resize(image_bgr, (32, 32), interpolation=cv2.INTER_AREA)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  return (
43
+ f"{image_bgr.shape[0]}x{image_bgr.shape[1]}"
44
  f"-{int(thumb.mean())}"
45
  f"-{int(thumb[::4, ::4].sum())}"
46
  )
47
 
48
 
49
+ def _face_area(face: Any) -> float:
50
+ x0, y0, x1, y1 = face.bbox
51
+ return max(1.0, float(x1 - x0) * float(y1 - y0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ def _ensure_models() -> tuple[bool, str]:
55
+ global FACE_ANALYSER, FACE_SWAPPER, MODEL_PROVIDERS, MODEL_ERROR
56
+ if FACE_ANALYSER is not None and FACE_SWAPPER is not None:
57
+ return True, ""
58
+ if MODEL_ERROR:
59
+ return False, MODEL_ERROR
60
+
61
+ try:
62
+ available = ort.get_available_providers()
63
+ use_cuda = "CUDAExecutionProvider" in available
64
+ providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if use_cuda else ["CPUExecutionProvider"]
65
+ ctx_id = 0 if use_cuda else -1
66
+
67
+ analyser = FaceAnalysis(name="buffalo_l", providers=providers)
68
+ analyser.prepare(ctx_id=ctx_id, det_size=(640, 640))
69
+ swapper = get_model("inswapper_128.onnx", download=True, download_zip=True, providers=providers)
70
+
71
+ FACE_ANALYSER = analyser
72
+ FACE_SWAPPER = swapper
73
+ MODEL_PROVIDERS = providers
74
+ return True, ""
75
+ except Exception as exc:
76
+ MODEL_ERROR = f"Model init failed: {type(exc).__name__}: {exc}"
77
+ return False, MODEL_ERROR
78
+
79
+
80
+ def _ensure_source_face(source_image: np.ndarray | None) -> tuple[bool, str]:
81
+ global SOURCE_FACE, SOURCE_FACE_KEY
82
+ if source_image is None:
83
+ return False, "Upload source portrait first."
84
+
85
+ ok, msg = _ensure_models()
86
+ if not ok:
87
+ return False, msg
88
+
89
+ source_bgr = _to_bgr(source_image)
90
+ key = _image_key(source_bgr)
91
+ if SOURCE_FACE is not None and key == SOURCE_FACE_KEY:
92
+ return True, ""
93
+
94
+ assert FACE_ANALYSER is not None
95
+ source_faces = FACE_ANALYSER.get(source_bgr)
96
+ if not source_faces:
97
+ SOURCE_FACE = None
98
+ SOURCE_FACE_KEY = ""
99
+ return False, "No face found in source image. Use clear front-facing portrait."
100
+
101
+ SOURCE_FACE = max(source_faces, key=_face_area)
102
+ SOURCE_FACE_KEY = key
103
+ return True, ""
104
+
105
+
106
+ def warmup(source_image: np.ndarray | None) -> str:
107
+ ok, msg = _ensure_models()
108
+ if not ok:
109
+ return msg
110
+ ok, msg = _ensure_source_face(source_image)
111
+ if not ok:
112
+ return msg
113
+ return f"Model ready. Providers: {', '.join(MODEL_PROVIDERS)}"
114
 
115
 
116
  def transform_live(
117
  frame: np.ndarray,
118
+ source_image: np.ndarray | None,
119
  mirror: bool,
120
+ swap_strength: float,
121
+ max_faces: int,
122
+ process_side: int,
123
  ) -> tuple[np.ndarray, str]:
124
  global LAST_FRAME_TS, EMA_FPS
125
 
126
  if frame is None:
127
  return frame, "Waiting for webcam frame."
128
 
129
+ ok, msg = _ensure_models()
130
+ if not ok:
131
+ return frame, msg
132
+ ok, msg = _ensure_source_face(source_image)
133
+ if not ok:
134
+ return frame, msg
 
 
 
 
 
135
 
136
+ assert FACE_ANALYSER is not None
137
+ assert FACE_SWAPPER is not None
138
+ assert SOURCE_FACE is not None
139
 
140
+ started = time.perf_counter()
141
+ frame_bgr = _to_bgr(frame)
142
+ original_h, original_w = frame_bgr.shape[:2]
143
+
144
+ resized = frame_bgr
145
+ ratio = 1.0
146
+ if max(original_h, original_w) > process_side:
147
+ ratio = process_side / float(max(original_h, original_w))
148
+ resized = cv2.resize(
149
+ frame_bgr,
150
+ (int(original_w * ratio), int(original_h * ratio)),
151
+ interpolation=cv2.INTER_AREA,
152
  )
153
 
154
+ target_faces = FACE_ANALYSER.get(resized)
155
+ swapped = resized.copy()
156
+ applied = 0
157
+
158
+ if target_faces:
159
+ ordered_faces = sorted(target_faces, key=_face_area, reverse=True)[:max_faces]
160
+ for target_face in ordered_faces:
161
+ swapped = FACE_SWAPPER.get(swapped, target_face, SOURCE_FACE, paste_back=True)
162
+ applied += 1
163
+
164
+ if swap_strength < 0.999:
165
+ swapped = cv2.addWeighted(
166
+ swapped.astype(np.float32),
167
+ float(swap_strength),
168
+ resized.astype(np.float32),
169
+ float(1.0 - swap_strength),
170
+ 0.0,
171
+ ).astype(np.uint8)
172
+
173
+ if ratio != 1.0:
174
+ swapped = cv2.resize(swapped, (original_w, original_h), interpolation=cv2.INTER_LINEAR)
175
+
176
+ if mirror:
177
+ swapped = cv2.flip(swapped, 1)
178
+
179
  now = time.perf_counter()
180
+ frame_ms = (now - started) * 1000.0
181
+ proc_fps = 1000.0 / max(frame_ms, 1e-4)
182
  if LAST_FRAME_TS > 0.0:
183
+ inst_fps = 1.0 / max(now - LAST_FRAME_TS, 1e-4)
184
+ EMA_FPS = inst_fps if EMA_FPS <= 0.0 else (0.2 * inst_fps + 0.8 * EMA_FPS)
185
  LAST_FRAME_TS = now
186
 
187
+ status = (
188
+ f"Swapped faces: {applied} | providers: {', '.join(MODEL_PROVIDERS)} "
189
+ f"| proc {proc_fps:.1f} fps ({frame_ms:.0f} ms) | stream ~{EMA_FPS:.1f} fps"
190
  )
191
+ return cv2.cvtColor(swapped, cv2.COLOR_BGR2RGB), status
192
 
193
 
194
+ with gr.Blocks(title="Live Upper-Body Swap") as demo:
195
  gr.Markdown(
196
  """
197
+ # Live Upper-Body Swap
198
+ 1. Upload source portrait (or keep preloaded default).
199
+ 2. Click **Warm up model** once.
200
+ 3. Start webcam stream.
 
201
  """
202
  )
203
 
204
  with gr.Row():
205
+ swap_strength = gr.Slider(
206
+ label="Swap strength",
207
+ minimum=0.45,
208
  maximum=1.0,
209
+ value=0.95,
210
  step=0.01,
211
  )
212
+ max_faces = gr.Slider(
213
+ label="Max faces in frame",
214
+ minimum=1,
215
+ maximum=4,
216
+ value=1,
217
+ step=1,
218
  )
219
+ process_side = gr.Slider(
220
+ label="Process size (speed/quality)",
221
+ minimum=384,
222
+ maximum=960,
223
+ value=640,
224
+ step=32,
225
  )
226
  mirror = gr.Checkbox(label="Mirror output", value=True)
227
 
228
+ source_image = gr.Image(
229
+ label="Source portrait",
230
  type="numpy",
231
+ value=DEFAULT_SOURCE_VALUE,
232
  )
233
 
234
+ warmup_button = gr.Button("Warm up model", variant="primary")
235
+ status = gr.Markdown("Idle.")
236
+
237
  with gr.Row():
238
  live_input = gr.Image(
239
  label="Webcam input",
 
242
  streaming=True,
243
  )
244
  live_output = gr.Image(
245
+ label="Live swapped output",
246
  type="numpy",
247
  )
248
 
249
+ warmup_button.click(
250
+ fn=warmup,
251
+ inputs=[source_image],
252
+ outputs=[status],
253
+ queue=False,
254
+ show_progress="hidden",
255
+ )
256
 
257
  live_input.stream(
258
  fn=transform_live,
259
+ inputs=[live_input, source_image, mirror, swap_strength, max_faces, process_side],
260
+ outputs=[live_output, status],
 
 
 
 
 
 
 
261
  time_limit=None,
262
+ stream_every=0.06,
263
  concurrency_limit=1,
264
  queue=False,
265
  show_progress="hidden",
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
- gradio>=5.0.0
2
- numpy>=1.26.4
3
  opencv-python-headless>=4.10.0.84
4
- mediapipe>=0.10.14
 
 
1
+ gradio==5.44.1
2
+ numpy>=1.26.4,<2.3
3
  opencv-python-headless>=4.10.0.84
4
+ onnxruntime-gpu>=1.17.0
5
+ insightface==0.7.3