Deploy IG test profile studio
Browse files- README.md +14 -20
- app.py +181 -504
- requirements.txt +4 -3
README.md
CHANGED
|
@@ -1,33 +1,27 @@
|
|
| 1 |
---
|
| 2 |
-
title: Live Upper-Body Swap
|
| 3 |
colorFrom: blue
|
| 4 |
colorTo: pink
|
| 5 |
-
sdk:
|
| 6 |
-
|
|
|
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
# Live Upper-Body Swap
|
| 11 |
|
| 12 |
-
This Space
|
| 13 |
|
| 14 |
-
##
|
| 15 |
|
| 16 |
1. Open the Space in browser.
|
| 17 |
-
2.
|
| 18 |
-
3. Start webcam
|
| 19 |
-
|
| 20 |
-
- `face masker`: `occlusion + region`
|
| 21 |
-
4. Tune blend until movement and identity look natural.
|
| 22 |
-
|
| 23 |
-
## Why this is more realistic than overlay
|
| 24 |
-
|
| 25 |
-
- true face swap model (not static image paste)
|
| 26 |
-
- temporal consistency from webcam layout
|
| 27 |
-
- enhancer and expression restorer for live motion quality
|
| 28 |
|
| 29 |
## Notes
|
| 30 |
|
| 31 |
-
-
|
| 32 |
-
-
|
| 33 |
-
-
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Live Upper-Body Swap
|
| 3 |
colorFrom: blue
|
| 4 |
colorTo: pink
|
| 5 |
+
sdk: gradio
|
| 6 |
+
sdk_version: 5.44.1
|
| 7 |
+
app_file: app.py
|
| 8 |
+
python_version: "3.10"
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Live Upper-Body Swap
|
| 13 |
|
| 14 |
+
This Space performs real-time face swap from browser webcam frames.
|
| 15 |
|
| 16 |
+
## Workflow
|
| 17 |
|
| 18 |
1. Open the Space in browser.
|
| 19 |
+
2. Upload source portrait (or use the default one).
|
| 20 |
+
3. Start webcam stream.
|
| 21 |
+
4. Adjust swap strength and max faces.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
## Notes
|
| 24 |
|
| 25 |
+
- Uses `inswapper_128` through `insightface`.
|
| 26 |
+
- Works with browser webcam input on Hugging Face Spaces.
|
| 27 |
+
- Keep strong front lighting for better realism.
|
app.py
CHANGED
|
@@ -7,555 +7,233 @@ from typing import Any
|
|
| 7 |
import cv2
|
| 8 |
import gradio as gr
|
| 9 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
try:
|
| 12 |
-
import mediapipe as mp # type: ignore
|
| 13 |
-
except Exception:
|
| 14 |
-
mp = None
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
FACE_CASCADE = cv2.CascadeClassifier(
|
| 18 |
-
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
| 19 |
-
)
|
| 20 |
-
UPPER_BODY_CASCADE = cv2.CascadeClassifier(
|
| 21 |
-
cv2.data.haarcascades + "haarcascade_upperbody.xml"
|
| 22 |
-
)
|
| 23 |
APP_DIR = Path(__file__).resolve().parent
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
def _init_selfie_segmenter() -> tuple[Any | None, str]:
|
| 29 |
-
if mp is None:
|
| 30 |
-
return None, "mediapipe import failed"
|
| 31 |
-
try:
|
| 32 |
-
solutions = getattr(mp, "solutions", None)
|
| 33 |
-
if solutions is None:
|
| 34 |
-
from mediapipe.python import solutions as mp_solutions # type: ignore
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
return None, f"{type(exc).__name__}: {exc}"
|
| 41 |
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
SELFIE_SEGMENTER, SELFIE_SEGMENTER_ERROR = _init_selfie_segmenter()
|
| 44 |
LAST_FRAME_TS = 0.0
|
| 45 |
EMA_FPS = 0.0
|
| 46 |
-
AVATAR_CACHE_KEY = ""
|
| 47 |
-
AVATAR_CACHE_CROP: np.ndarray | None = None
|
| 48 |
-
AVATAR_CACHE_MASK: np.ndarray | None = None
|
| 49 |
-
TRACK_BOX: tuple[int, int, int, int] | None = None
|
| 50 |
-
TRACK_TEMPLATE: np.ndarray | None = None
|
| 51 |
-
TRACK_CONFIDENCE = 0.0
|
| 52 |
-
FRAME_INDEX = 0
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
def _prepare_bgr(frame: np.ndarray) -> np.ndarray:
|
| 56 |
-
if frame.dtype != np.uint8:
|
| 57 |
-
frame = np.clip(frame, 0, 255).astype(np.uint8)
|
| 58 |
-
if frame.ndim == 2:
|
| 59 |
-
return cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
|
| 60 |
-
if frame.shape[2] == 4:
|
| 61 |
-
return cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)
|
| 62 |
-
return cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
def _resize_for_speed(frame_bgr: np.ndarray, max_side: int) -> tuple[np.ndarray, tuple[int, int]]:
|
| 66 |
-
height, width = frame_bgr.shape[:2]
|
| 67 |
-
if max(height, width) <= max_side:
|
| 68 |
-
return frame_bgr, (width, height)
|
| 69 |
-
ratio = max_side / float(max(height, width))
|
| 70 |
-
resized = cv2.resize(
|
| 71 |
-
frame_bgr,
|
| 72 |
-
(int(width * ratio), int(height * ratio)),
|
| 73 |
-
interpolation=cv2.INTER_AREA,
|
| 74 |
-
)
|
| 75 |
-
return resized, (width, height)
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
def _detect_faces(frame_bgr: np.ndarray) -> np.ndarray:
|
| 79 |
-
gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
|
| 80 |
-
return FACE_CASCADE.detectMultiScale(
|
| 81 |
-
gray,
|
| 82 |
-
scaleFactor=1.12,
|
| 83 |
-
minNeighbors=5,
|
| 84 |
-
minSize=(60, 60),
|
| 85 |
-
)
|
| 86 |
|
| 87 |
|
| 88 |
-
def
|
| 89 |
-
if
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
minSize=(80, 80),
|
| 97 |
-
)
|
| 98 |
|
| 99 |
|
| 100 |
-
def
|
| 101 |
-
|
| 102 |
-
frame_shape: tuple[int, int, int],
|
| 103 |
-
) -> tuple[int, int, int, int]:
|
| 104 |
-
x, y, w, h = box
|
| 105 |
-
img_h, img_w = frame_shape[:2]
|
| 106 |
-
x = max(0, min(x, img_w - 2))
|
| 107 |
-
y = max(0, min(y, img_h - 2))
|
| 108 |
-
w = max(2, min(w, img_w - x))
|
| 109 |
-
h = max(2, min(h, img_h - y))
|
| 110 |
-
return x, y, w, h
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
def _box_iou(a: tuple[int, int, int, int], b: tuple[int, int, int, int]) -> float:
|
| 114 |
-
ax, ay, aw, ah = a
|
| 115 |
-
bx, by, bw, bh = b
|
| 116 |
-
ax2, ay2 = ax + aw, ay + ah
|
| 117 |
-
bx2, by2 = bx + bw, by + bh
|
| 118 |
-
ix1, iy1 = max(ax, bx), max(ay, by)
|
| 119 |
-
ix2, iy2 = min(ax2, bx2), min(ay2, by2)
|
| 120 |
-
iw, ih = max(0, ix2 - ix1), max(0, iy2 - iy1)
|
| 121 |
-
inter = iw * ih
|
| 122 |
-
if inter <= 0:
|
| 123 |
-
return 0.0
|
| 124 |
-
union = aw * ah + bw * bh - inter
|
| 125 |
-
return inter / max(union, 1)
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
def _patch_from_box(
|
| 129 |
-
frame_bgr: np.ndarray,
|
| 130 |
-
box: tuple[int, int, int, int],
|
| 131 |
-
) -> np.ndarray | None:
|
| 132 |
-
x, y, w, h = _clamp_box(box, frame_bgr.shape)
|
| 133 |
-
patch = frame_bgr[y : y + h, x : x + w]
|
| 134 |
-
if patch.size == 0:
|
| 135 |
-
return None
|
| 136 |
-
return cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
def _set_tracker(
|
| 140 |
-
frame_bgr: np.ndarray,
|
| 141 |
-
box: tuple[int, int, int, int],
|
| 142 |
-
) -> None:
|
| 143 |
-
global TRACK_BOX, TRACK_TEMPLATE, TRACK_CONFIDENCE
|
| 144 |
-
patch = _patch_from_box(frame_bgr, box)
|
| 145 |
-
if patch is None or patch.shape[0] < 12 or patch.shape[1] < 12:
|
| 146 |
-
return
|
| 147 |
-
TRACK_BOX = _clamp_box(box, frame_bgr.shape)
|
| 148 |
-
TRACK_TEMPLATE = patch
|
| 149 |
-
TRACK_CONFIDENCE = 1.0
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
def _update_tracker(frame_bgr: np.ndarray) -> tuple[tuple[int, int, int, int] | None, float]:
|
| 153 |
-
global TRACK_BOX, TRACK_TEMPLATE, TRACK_CONFIDENCE
|
| 154 |
-
if TRACK_BOX is None or TRACK_TEMPLATE is None:
|
| 155 |
-
return None, 0.0
|
| 156 |
-
|
| 157 |
-
x, y, w, h = TRACK_BOX
|
| 158 |
-
gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
|
| 159 |
-
margin = int(max(w, h) * 0.55)
|
| 160 |
-
sx0 = max(0, x - margin)
|
| 161 |
-
sy0 = max(0, y - margin)
|
| 162 |
-
sx1 = min(gray.shape[1], x + w + margin)
|
| 163 |
-
sy1 = min(gray.shape[0], y + h + margin)
|
| 164 |
-
|
| 165 |
-
search = gray[sy0:sy1, sx0:sx1]
|
| 166 |
-
tmpl = TRACK_TEMPLATE
|
| 167 |
-
if (
|
| 168 |
-
search.shape[0] < tmpl.shape[0]
|
| 169 |
-
or search.shape[1] < tmpl.shape[1]
|
| 170 |
-
or tmpl.shape[0] < 10
|
| 171 |
-
or tmpl.shape[1] < 10
|
| 172 |
-
):
|
| 173 |
-
TRACK_CONFIDENCE *= 0.75
|
| 174 |
-
return None, 0.0
|
| 175 |
-
|
| 176 |
-
match = cv2.matchTemplate(search, tmpl, cv2.TM_CCOEFF_NORMED)
|
| 177 |
-
_, max_val, _, max_loc = cv2.minMaxLoc(match)
|
| 178 |
-
if max_val < 0.35:
|
| 179 |
-
TRACK_CONFIDENCE *= 0.7
|
| 180 |
-
return None, float(max_val)
|
| 181 |
-
|
| 182 |
-
nx = sx0 + int(max_loc[0])
|
| 183 |
-
ny = sy0 + int(max_loc[1])
|
| 184 |
-
new_box = _clamp_box((nx, ny, w, h), frame_bgr.shape)
|
| 185 |
-
new_patch = _patch_from_box(frame_bgr, new_box)
|
| 186 |
-
if new_patch is not None and new_patch.shape == tmpl.shape:
|
| 187 |
-
TRACK_TEMPLATE = cv2.addWeighted(tmpl, 0.82, new_patch, 0.18, 0)
|
| 188 |
-
TRACK_BOX = new_box
|
| 189 |
-
TRACK_CONFIDENCE = 0.8 * TRACK_CONFIDENCE + 0.2 * float(max_val)
|
| 190 |
-
return new_box, float(max_val)
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
def _body_box_from_face(face_box: tuple[int, int, int, int]) -> tuple[int, int, int, int]:
|
| 194 |
-
x, y, fw, fh = face_box
|
| 195 |
-
body_w = int(fw * 2.9)
|
| 196 |
-
body_h = int(fh * 5.1)
|
| 197 |
-
cx = x + fw // 2
|
| 198 |
-
cy = y + int(fh * 2.35)
|
| 199 |
-
return cx - body_w // 2, cy - body_h // 2, body_w, body_h
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
def _body_box_from_upper(upper_box: tuple[int, int, int, int]) -> tuple[int, int, int, int]:
|
| 203 |
-
x, y, uw, uh = upper_box
|
| 204 |
-
body_w = int(uw * 1.35)
|
| 205 |
-
body_h = int(uh * 2.45)
|
| 206 |
-
cx = x + uw // 2
|
| 207 |
-
cy = y + int(uh * 1.15)
|
| 208 |
-
return cx - body_w // 2, cy - body_h // 2, body_w, body_h
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
def _detect_body_box(frame_bgr: np.ndarray) -> tuple[tuple[int, int, int, int] | None, str]:
|
| 212 |
-
faces = _detect_faces(frame_bgr)
|
| 213 |
-
if len(faces) > 0:
|
| 214 |
-
x, y, w, h = max(faces, key=lambda item: item[2] * item[3])
|
| 215 |
-
return _clamp_box(_body_box_from_face((x, y, w, h)), frame_bgr.shape), "face"
|
| 216 |
-
|
| 217 |
-
uppers = _detect_upper_body(frame_bgr)
|
| 218 |
-
if len(uppers) > 0:
|
| 219 |
-
x, y, w, h = max(uppers, key=lambda item: item[2] * item[3])
|
| 220 |
-
return _clamp_box(_body_box_from_upper((x, y, w, h)), frame_bgr.shape), "upper-body"
|
| 221 |
-
|
| 222 |
-
return None, ""
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
def _resolve_fallback_box(frame_bgr: np.ndarray) -> tuple[tuple[int, int, int, int], str]:
|
| 226 |
-
global FRAME_INDEX
|
| 227 |
-
FRAME_INDEX += 1
|
| 228 |
-
|
| 229 |
-
should_detect = FRAME_INDEX % 3 == 0 or TRACK_BOX is None
|
| 230 |
-
detected_box: tuple[int, int, int, int] | None = None
|
| 231 |
-
detected_source = ""
|
| 232 |
-
if should_detect:
|
| 233 |
-
detected_box, detected_source = _detect_body_box(frame_bgr)
|
| 234 |
-
|
| 235 |
-
tracked_box, track_score = _update_tracker(frame_bgr)
|
| 236 |
-
|
| 237 |
-
if detected_box is not None and tracked_box is not None:
|
| 238 |
-
if _box_iou(detected_box, tracked_box) >= 0.1:
|
| 239 |
-
ax, ay, aw, ah = detected_box
|
| 240 |
-
bx, by, bw, bh = tracked_box
|
| 241 |
-
blended = (
|
| 242 |
-
int(0.6 * ax + 0.4 * bx),
|
| 243 |
-
int(0.6 * ay + 0.4 * by),
|
| 244 |
-
int(0.6 * aw + 0.4 * bw),
|
| 245 |
-
int(0.6 * ah + 0.4 * bh),
|
| 246 |
-
)
|
| 247 |
-
final_box = _clamp_box(blended, frame_bgr.shape)
|
| 248 |
-
else:
|
| 249 |
-
final_box = detected_box
|
| 250 |
-
_set_tracker(frame_bgr, final_box)
|
| 251 |
-
return final_box, f"fallback detect+track ({detected_source})"
|
| 252 |
-
|
| 253 |
-
if detected_box is not None:
|
| 254 |
-
_set_tracker(frame_bgr, detected_box)
|
| 255 |
-
return detected_box, f"fallback detect ({detected_source})"
|
| 256 |
-
|
| 257 |
-
if tracked_box is not None:
|
| 258 |
-
return tracked_box, f"fallback track ({track_score:.2f})"
|
| 259 |
-
|
| 260 |
-
cx, cy, bw, bh = _fallback_body_box(frame_bgr)
|
| 261 |
-
x = cx - bw // 2
|
| 262 |
-
y = cy - bh // 2
|
| 263 |
-
box = _clamp_box((x, y, bw, bh), frame_bgr.shape)
|
| 264 |
-
_set_tracker(frame_bgr, box)
|
| 265 |
-
return box, "fallback static"
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
def _segment_person_mask(frame_bgr: np.ndarray, threshold: float) -> tuple[np.ndarray | None, np.ndarray | None]:
|
| 269 |
-
if SELFIE_SEGMENTER is None:
|
| 270 |
-
return None, None
|
| 271 |
-
rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
|
| 272 |
-
result = SELFIE_SEGMENTER.process(rgb)
|
| 273 |
-
if result.segmentation_mask is None:
|
| 274 |
-
return None, None
|
| 275 |
-
raw_mask = np.clip(result.segmentation_mask.astype(np.float32), 0.0, 1.0)
|
| 276 |
-
binary = raw_mask > threshold
|
| 277 |
-
return raw_mask, binary
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
def _avatar_key(avatar_bgr: np.ndarray) -> str:
|
| 281 |
-
thumb = cv2.resize(avatar_bgr, (32, 32), interpolation=cv2.INTER_AREA)
|
| 282 |
return (
|
| 283 |
-
f"{
|
| 284 |
f"-{int(thumb.mean())}"
|
| 285 |
f"-{int(thumb[::4, ::4].sum())}"
|
| 286 |
)
|
| 287 |
|
| 288 |
|
| 289 |
-
def
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
# Heuristic fallback: separate subject from corner background color.
|
| 293 |
-
h, w = avatar_bgr.shape[:2]
|
| 294 |
-
patch = max(10, min(h, w) // 18)
|
| 295 |
-
corners = [
|
| 296 |
-
avatar_bgr[:patch, :patch],
|
| 297 |
-
avatar_bgr[:patch, -patch:],
|
| 298 |
-
avatar_bgr[-patch:, :patch],
|
| 299 |
-
avatar_bgr[-patch:, -patch:],
|
| 300 |
-
]
|
| 301 |
-
bg_color = np.mean(np.concatenate([c.reshape(-1, 3) for c in corners], axis=0), axis=0)
|
| 302 |
-
dist = np.linalg.norm(avatar_bgr.astype(np.float32) - bg_color[None, None, :], axis=2)
|
| 303 |
-
heuristic = (dist > 26.0).astype(np.float32)
|
| 304 |
-
if float(heuristic.mean()) < 0.03:
|
| 305 |
-
full_mask = np.ones(avatar_bgr.shape[:2], dtype=np.float32)
|
| 306 |
-
return avatar_bgr, full_mask, "Avatar segmentation fallback."
|
| 307 |
-
heuristic = cv2.GaussianBlur(heuristic, (0, 0), 2.8)
|
| 308 |
-
heuristic = np.clip(heuristic, 0.0, 1.0)
|
| 309 |
-
return avatar_bgr, heuristic, "Avatar segmentation fallback."
|
| 310 |
-
|
| 311 |
-
ys, xs = np.where(binary)
|
| 312 |
-
x0, x1 = int(xs.min()), int(xs.max())
|
| 313 |
-
y0, y1 = int(ys.min()), int(ys.max())
|
| 314 |
-
|
| 315 |
-
crop = avatar_bgr[y0 : y1 + 1, x0 : x1 + 1]
|
| 316 |
-
crop_mask = raw_mask[y0 : y1 + 1, x0 : x1 + 1]
|
| 317 |
-
crop_mask = cv2.GaussianBlur(crop_mask, (0, 0), 1.8)
|
| 318 |
-
crop_mask = np.clip(crop_mask, 0.0, 1.0)
|
| 319 |
-
return crop, crop_mask, ""
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
def _get_avatar_assets(avatar_image: np.ndarray | None) -> tuple[np.ndarray | None, np.ndarray | None, str]:
|
| 323 |
-
global AVATAR_CACHE_KEY, AVATAR_CACHE_CROP, AVATAR_CACHE_MASK
|
| 324 |
-
|
| 325 |
-
if avatar_image is None:
|
| 326 |
-
return None, None, "Upload persona image."
|
| 327 |
-
avatar_bgr = _prepare_bgr(avatar_image)
|
| 328 |
-
key = _avatar_key(avatar_bgr)
|
| 329 |
-
|
| 330 |
-
if (
|
| 331 |
-
key == AVATAR_CACHE_KEY
|
| 332 |
-
and AVATAR_CACHE_CROP is not None
|
| 333 |
-
and AVATAR_CACHE_MASK is not None
|
| 334 |
-
):
|
| 335 |
-
return AVATAR_CACHE_CROP, AVATAR_CACHE_MASK, ""
|
| 336 |
-
|
| 337 |
-
crop, mask, msg = _extract_avatar_person(avatar_bgr)
|
| 338 |
-
AVATAR_CACHE_KEY = key
|
| 339 |
-
AVATAR_CACHE_CROP = crop
|
| 340 |
-
AVATAR_CACHE_MASK = mask
|
| 341 |
-
return crop, mask, msg
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
def _place_with_alpha(
|
| 345 |
-
canvas_shape: tuple[int, int, int],
|
| 346 |
-
src_img: np.ndarray,
|
| 347 |
-
src_alpha: np.ndarray,
|
| 348 |
-
center_x: int,
|
| 349 |
-
center_y: int,
|
| 350 |
-
target_w: int,
|
| 351 |
-
target_h: int,
|
| 352 |
-
) -> tuple[np.ndarray, np.ndarray]:
|
| 353 |
-
canvas_img = np.zeros(canvas_shape, dtype=np.uint8)
|
| 354 |
-
canvas_alpha = np.zeros(canvas_shape[:2], dtype=np.float32)
|
| 355 |
-
|
| 356 |
-
if target_w < 4 or target_h < 4:
|
| 357 |
-
return canvas_img, canvas_alpha
|
| 358 |
-
|
| 359 |
-
resized_img = cv2.resize(src_img, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
|
| 360 |
-
resized_alpha = cv2.resize(src_alpha, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
|
| 361 |
-
|
| 362 |
-
x0 = center_x - target_w // 2
|
| 363 |
-
y0 = center_y - target_h // 2
|
| 364 |
-
x1 = x0 + target_w
|
| 365 |
-
y1 = y0 + target_h
|
| 366 |
-
|
| 367 |
-
dst_x0 = max(0, x0)
|
| 368 |
-
dst_y0 = max(0, y0)
|
| 369 |
-
dst_x1 = min(canvas_shape[1], x1)
|
| 370 |
-
dst_y1 = min(canvas_shape[0], y1)
|
| 371 |
-
if dst_x0 >= dst_x1 or dst_y0 >= dst_y1:
|
| 372 |
-
return canvas_img, canvas_alpha
|
| 373 |
-
|
| 374 |
-
src_x0 = dst_x0 - x0
|
| 375 |
-
src_y0 = dst_y0 - y0
|
| 376 |
-
src_x1 = src_x0 + (dst_x1 - dst_x0)
|
| 377 |
-
src_y1 = src_y0 + (dst_y1 - dst_y0)
|
| 378 |
-
|
| 379 |
-
canvas_img[dst_y0:dst_y1, dst_x0:dst_x1] = resized_img[src_y0:src_y1, src_x0:src_x1]
|
| 380 |
-
canvas_alpha[dst_y0:dst_y1, dst_x0:dst_x1] = resized_alpha[src_y0:src_y1, src_x0:src_x1]
|
| 381 |
-
return canvas_img, np.clip(canvas_alpha, 0.0, 1.0)
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
def _fallback_body_box(frame_bgr: np.ndarray) -> tuple[int, int, int, int]:
|
| 385 |
-
h, w = frame_bgr.shape[:2]
|
| 386 |
-
faces = _detect_faces(frame_bgr)
|
| 387 |
-
if len(faces) > 0:
|
| 388 |
-
x, y, fw, fh = max(faces, key=lambda item: item[2] * item[3])
|
| 389 |
-
body_w = int(fw * 2.8)
|
| 390 |
-
body_h = int(fh * 5.0)
|
| 391 |
-
center_x = x + fw // 2
|
| 392 |
-
center_y = y + int(fh * 2.3)
|
| 393 |
-
return center_x, center_y, body_w, body_h
|
| 394 |
-
|
| 395 |
-
return w // 2, int(h * 0.54), int(w * 0.56), int(h * 0.86)
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
def _full_body_replace(
|
| 399 |
-
frame_bgr: np.ndarray,
|
| 400 |
-
avatar_image: np.ndarray | None,
|
| 401 |
-
replace_strength: float,
|
| 402 |
-
mask_threshold: float,
|
| 403 |
-
edge_softness: float,
|
| 404 |
-
) -> tuple[np.ndarray, str]:
|
| 405 |
-
avatar_crop, avatar_mask, avatar_msg = _get_avatar_assets(avatar_image)
|
| 406 |
-
if avatar_crop is None or avatar_mask is None:
|
| 407 |
-
return frame_bgr, "Upload persona image to start replacement."
|
| 408 |
-
|
| 409 |
-
raw_mask, binary = _segment_person_mask(frame_bgr, threshold=mask_threshold)
|
| 410 |
-
use_fallback_box = raw_mask is None or binary is None or int(binary.sum()) < 2500
|
| 411 |
-
|
| 412 |
-
if use_fallback_box:
|
| 413 |
-
box, source = _resolve_fallback_box(frame_bgr)
|
| 414 |
-
bx, by, person_w, person_h = box
|
| 415 |
-
center_x = bx + person_w // 2
|
| 416 |
-
center_y = by + person_h // 2
|
| 417 |
-
status = f"Tracking fallback active ({source})."
|
| 418 |
-
else:
|
| 419 |
-
ys, xs = np.where(binary)
|
| 420 |
-
x0, x1 = int(xs.min()), int(xs.max())
|
| 421 |
-
y0, y1 = int(ys.min()), int(ys.max())
|
| 422 |
-
person_w = x1 - x0 + 1
|
| 423 |
-
person_h = y1 - y0 + 1
|
| 424 |
-
center_x = x0 + person_w // 2
|
| 425 |
-
center_y = y0 + int(person_h * 0.52)
|
| 426 |
-
status = "Full body replace active."
|
| 427 |
-
|
| 428 |
-
avatar_h, avatar_w = avatar_crop.shape[:2]
|
| 429 |
-
scale = 1.08 + 0.34 * replace_strength
|
| 430 |
-
target_h = int(person_h * scale)
|
| 431 |
-
target_w = int(target_h * (avatar_w / max(1, avatar_h)))
|
| 432 |
-
target_w = max(target_w, int(person_w * 0.98))
|
| 433 |
-
|
| 434 |
-
avatar_layer, avatar_alpha = _place_with_alpha(
|
| 435 |
-
canvas_shape=frame_bgr.shape,
|
| 436 |
-
src_img=avatar_crop,
|
| 437 |
-
src_alpha=avatar_mask,
|
| 438 |
-
center_x=center_x,
|
| 439 |
-
center_y=center_y,
|
| 440 |
-
target_w=target_w,
|
| 441 |
-
target_h=target_h,
|
| 442 |
-
)
|
| 443 |
|
| 444 |
-
if use_fallback_box:
|
| 445 |
-
fallback_mask = np.zeros(frame_bgr.shape[:2], dtype=np.float32)
|
| 446 |
-
axes = (max(24, int(person_w * 0.58)), max(24, int(person_h * 0.62)))
|
| 447 |
-
cv2.ellipse(fallback_mask, (center_x, center_y), axes, 0, 0, 360, 1.0, -1)
|
| 448 |
-
target_alpha = cv2.GaussianBlur(fallback_mask, (0, 0), 12.0)
|
| 449 |
-
# Make replacement visually obvious in fallback mode.
|
| 450 |
-
avatar_alpha = np.clip(0.35 + 0.65 * avatar_alpha, 0.0, 1.0)
|
| 451 |
-
else:
|
| 452 |
-
target_alpha = cv2.GaussianBlur(raw_mask, (0, 0), 1.5 + 18.0 * edge_softness)
|
| 453 |
-
|
| 454 |
-
alpha = np.clip(target_alpha * avatar_alpha * replace_strength, 0.0, 1.0)
|
| 455 |
-
mixed = (
|
| 456 |
-
avatar_layer.astype(np.float32) * alpha[:, :, None]
|
| 457 |
-
+ frame_bgr.astype(np.float32) * (1.0 - alpha[:, :, None])
|
| 458 |
-
)
|
| 459 |
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
|
| 468 |
def transform_live(
|
| 469 |
frame: np.ndarray,
|
| 470 |
-
|
| 471 |
mirror: bool,
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
) -> tuple[np.ndarray, str]:
|
| 476 |
global LAST_FRAME_TS, EMA_FPS
|
| 477 |
|
| 478 |
if frame is None:
|
| 479 |
return frame, "Waiting for webcam frame."
|
| 480 |
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
avatar_image=avatar_image,
|
| 488 |
-
replace_strength=replace_strength,
|
| 489 |
-
mask_threshold=mask_threshold,
|
| 490 |
-
edge_softness=edge_softness,
|
| 491 |
-
)
|
| 492 |
|
| 493 |
-
|
| 494 |
-
|
|
|
|
| 495 |
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
)
|
| 502 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
now = time.perf_counter()
|
| 504 |
-
|
| 505 |
-
proc_fps = 1000.0 / max(
|
| 506 |
if LAST_FRAME_TS > 0.0:
|
| 507 |
-
|
| 508 |
-
EMA_FPS =
|
| 509 |
LAST_FRAME_TS = now
|
| 510 |
|
| 511 |
-
|
| 512 |
-
f"{
|
| 513 |
-
f"| stream ~{EMA_FPS:.1f} fps"
|
| 514 |
)
|
| 515 |
-
return cv2.cvtColor(
|
| 516 |
|
| 517 |
|
| 518 |
-
with gr.Blocks(title="Live
|
| 519 |
gr.Markdown(
|
| 520 |
"""
|
| 521 |
-
# Live
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
3. Output panel shows live replacement.
|
| 526 |
"""
|
| 527 |
)
|
| 528 |
|
| 529 |
with gr.Row():
|
| 530 |
-
|
| 531 |
-
label="
|
| 532 |
-
minimum=0.
|
| 533 |
maximum=1.0,
|
| 534 |
-
value=0.
|
| 535 |
step=0.01,
|
| 536 |
)
|
| 537 |
-
|
| 538 |
-
label="
|
| 539 |
-
minimum=
|
| 540 |
-
maximum=
|
| 541 |
-
value=
|
| 542 |
-
step=
|
| 543 |
)
|
| 544 |
-
|
| 545 |
-
label="
|
| 546 |
-
minimum=
|
| 547 |
-
maximum=
|
| 548 |
-
value=
|
| 549 |
-
step=
|
| 550 |
)
|
| 551 |
mirror = gr.Checkbox(label="Mirror output", value=True)
|
| 552 |
|
| 553 |
-
|
| 554 |
-
label="
|
| 555 |
type="numpy",
|
| 556 |
-
value=
|
| 557 |
)
|
| 558 |
|
|
|
|
|
|
|
|
|
|
| 559 |
with gr.Row():
|
| 560 |
live_input = gr.Image(
|
| 561 |
label="Webcam input",
|
|
@@ -564,25 +242,24 @@ Only one workflow is enabled:
|
|
| 564 |
streaming=True,
|
| 565 |
)
|
| 566 |
live_output = gr.Image(
|
| 567 |
-
label="Live
|
| 568 |
type="numpy",
|
| 569 |
)
|
| 570 |
|
| 571 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 572 |
|
| 573 |
live_input.stream(
|
| 574 |
fn=transform_live,
|
| 575 |
-
inputs=[
|
| 576 |
-
|
| 577 |
-
avatar_upload,
|
| 578 |
-
mirror,
|
| 579 |
-
replace_strength,
|
| 580 |
-
mask_threshold,
|
| 581 |
-
edge_softness,
|
| 582 |
-
],
|
| 583 |
-
outputs=[live_output, live_status],
|
| 584 |
time_limit=None,
|
| 585 |
-
stream_every=0.
|
| 586 |
concurrency_limit=1,
|
| 587 |
queue=False,
|
| 588 |
show_progress="hidden",
|
|
|
|
| 7 |
import cv2
|
| 8 |
import gradio as gr
|
| 9 |
import numpy as np
|
| 10 |
+
import onnxruntime as ort
|
| 11 |
+
from insightface.app import FaceAnalysis
|
| 12 |
+
from insightface.model_zoo import get_model
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
APP_DIR = Path(__file__).resolve().parent
|
| 15 |
+
DEFAULT_SOURCE_PATH = APP_DIR / "assets" / "default_persona.jpg"
|
| 16 |
+
DEFAULT_SOURCE_VALUE = str(DEFAULT_SOURCE_PATH) if DEFAULT_SOURCE_PATH.exists() else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
+
FACE_ANALYSER: FaceAnalysis | None = None
|
| 19 |
+
FACE_SWAPPER: Any | None = None
|
| 20 |
+
MODEL_PROVIDERS: list[str] = []
|
| 21 |
+
MODEL_ERROR = ""
|
|
|
|
| 22 |
|
| 23 |
+
SOURCE_FACE: Any | None = None
|
| 24 |
+
SOURCE_FACE_KEY = ""
|
| 25 |
|
|
|
|
| 26 |
LAST_FRAME_TS = 0.0
|
| 27 |
EMA_FPS = 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
+
def _to_bgr(image: np.ndarray) -> np.ndarray:
|
| 31 |
+
if image.dtype != np.uint8:
|
| 32 |
+
image = np.clip(image, 0, 255).astype(np.uint8)
|
| 33 |
+
if image.ndim == 2:
|
| 34 |
+
return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
| 35 |
+
if image.shape[2] == 4:
|
| 36 |
+
return cv2.cvtColor(image, cv2.COLOR_RGBA2BGR)
|
| 37 |
+
return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
|
|
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
+
def _image_key(image_bgr: np.ndarray) -> str:
|
| 41 |
+
thumb = cv2.resize(image_bgr, (32, 32), interpolation=cv2.INTER_AREA)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
return (
|
| 43 |
+
f"{image_bgr.shape[0]}x{image_bgr.shape[1]}"
|
| 44 |
f"-{int(thumb.mean())}"
|
| 45 |
f"-{int(thumb[::4, ::4].sum())}"
|
| 46 |
)
|
| 47 |
|
| 48 |
|
| 49 |
+
def _face_area(face: Any) -> float:
|
| 50 |
+
x0, y0, x1, y1 = face.bbox
|
| 51 |
+
return max(1.0, float(x1 - x0) * float(y1 - y0))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
+
def _ensure_models() -> tuple[bool, str]:
|
| 55 |
+
global FACE_ANALYSER, FACE_SWAPPER, MODEL_PROVIDERS, MODEL_ERROR
|
| 56 |
+
if FACE_ANALYSER is not None and FACE_SWAPPER is not None:
|
| 57 |
+
return True, ""
|
| 58 |
+
if MODEL_ERROR:
|
| 59 |
+
return False, MODEL_ERROR
|
| 60 |
+
|
| 61 |
+
try:
|
| 62 |
+
available = ort.get_available_providers()
|
| 63 |
+
use_cuda = "CUDAExecutionProvider" in available
|
| 64 |
+
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if use_cuda else ["CPUExecutionProvider"]
|
| 65 |
+
ctx_id = 0 if use_cuda else -1
|
| 66 |
+
|
| 67 |
+
analyser = FaceAnalysis(name="buffalo_l", providers=providers)
|
| 68 |
+
analyser.prepare(ctx_id=ctx_id, det_size=(640, 640))
|
| 69 |
+
swapper = get_model("inswapper_128.onnx", download=True, download_zip=True, providers=providers)
|
| 70 |
+
|
| 71 |
+
FACE_ANALYSER = analyser
|
| 72 |
+
FACE_SWAPPER = swapper
|
| 73 |
+
MODEL_PROVIDERS = providers
|
| 74 |
+
return True, ""
|
| 75 |
+
except Exception as exc:
|
| 76 |
+
MODEL_ERROR = f"Model init failed: {type(exc).__name__}: {exc}"
|
| 77 |
+
return False, MODEL_ERROR
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def _ensure_source_face(source_image: np.ndarray | None) -> tuple[bool, str]:
|
| 81 |
+
global SOURCE_FACE, SOURCE_FACE_KEY
|
| 82 |
+
if source_image is None:
|
| 83 |
+
return False, "Upload source portrait first."
|
| 84 |
+
|
| 85 |
+
ok, msg = _ensure_models()
|
| 86 |
+
if not ok:
|
| 87 |
+
return False, msg
|
| 88 |
+
|
| 89 |
+
source_bgr = _to_bgr(source_image)
|
| 90 |
+
key = _image_key(source_bgr)
|
| 91 |
+
if SOURCE_FACE is not None and key == SOURCE_FACE_KEY:
|
| 92 |
+
return True, ""
|
| 93 |
+
|
| 94 |
+
assert FACE_ANALYSER is not None
|
| 95 |
+
source_faces = FACE_ANALYSER.get(source_bgr)
|
| 96 |
+
if not source_faces:
|
| 97 |
+
SOURCE_FACE = None
|
| 98 |
+
SOURCE_FACE_KEY = ""
|
| 99 |
+
return False, "No face found in source image. Use clear front-facing portrait."
|
| 100 |
+
|
| 101 |
+
SOURCE_FACE = max(source_faces, key=_face_area)
|
| 102 |
+
SOURCE_FACE_KEY = key
|
| 103 |
+
return True, ""
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def warmup(source_image: np.ndarray | None) -> str:
|
| 107 |
+
ok, msg = _ensure_models()
|
| 108 |
+
if not ok:
|
| 109 |
+
return msg
|
| 110 |
+
ok, msg = _ensure_source_face(source_image)
|
| 111 |
+
if not ok:
|
| 112 |
+
return msg
|
| 113 |
+
return f"Model ready. Providers: {', '.join(MODEL_PROVIDERS)}"
|
| 114 |
|
| 115 |
|
| 116 |
def transform_live(
|
| 117 |
frame: np.ndarray,
|
| 118 |
+
source_image: np.ndarray | None,
|
| 119 |
mirror: bool,
|
| 120 |
+
swap_strength: float,
|
| 121 |
+
max_faces: int,
|
| 122 |
+
process_side: int,
|
| 123 |
) -> tuple[np.ndarray, str]:
|
| 124 |
global LAST_FRAME_TS, EMA_FPS
|
| 125 |
|
| 126 |
if frame is None:
|
| 127 |
return frame, "Waiting for webcam frame."
|
| 128 |
|
| 129 |
+
ok, msg = _ensure_models()
|
| 130 |
+
if not ok:
|
| 131 |
+
return frame, msg
|
| 132 |
+
ok, msg = _ensure_source_face(source_image)
|
| 133 |
+
if not ok:
|
| 134 |
+
return frame, msg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
+
assert FACE_ANALYSER is not None
|
| 137 |
+
assert FACE_SWAPPER is not None
|
| 138 |
+
assert SOURCE_FACE is not None
|
| 139 |
|
| 140 |
+
started = time.perf_counter()
|
| 141 |
+
frame_bgr = _to_bgr(frame)
|
| 142 |
+
original_h, original_w = frame_bgr.shape[:2]
|
| 143 |
+
|
| 144 |
+
resized = frame_bgr
|
| 145 |
+
ratio = 1.0
|
| 146 |
+
if max(original_h, original_w) > process_side:
|
| 147 |
+
ratio = process_side / float(max(original_h, original_w))
|
| 148 |
+
resized = cv2.resize(
|
| 149 |
+
frame_bgr,
|
| 150 |
+
(int(original_w * ratio), int(original_h * ratio)),
|
| 151 |
+
interpolation=cv2.INTER_AREA,
|
| 152 |
)
|
| 153 |
|
| 154 |
+
target_faces = FACE_ANALYSER.get(resized)
|
| 155 |
+
swapped = resized.copy()
|
| 156 |
+
applied = 0
|
| 157 |
+
|
| 158 |
+
if target_faces:
|
| 159 |
+
ordered_faces = sorted(target_faces, key=_face_area, reverse=True)[:max_faces]
|
| 160 |
+
for target_face in ordered_faces:
|
| 161 |
+
swapped = FACE_SWAPPER.get(swapped, target_face, SOURCE_FACE, paste_back=True)
|
| 162 |
+
applied += 1
|
| 163 |
+
|
| 164 |
+
if swap_strength < 0.999:
|
| 165 |
+
swapped = cv2.addWeighted(
|
| 166 |
+
swapped.astype(np.float32),
|
| 167 |
+
float(swap_strength),
|
| 168 |
+
resized.astype(np.float32),
|
| 169 |
+
float(1.0 - swap_strength),
|
| 170 |
+
0.0,
|
| 171 |
+
).astype(np.uint8)
|
| 172 |
+
|
| 173 |
+
if ratio != 1.0:
|
| 174 |
+
swapped = cv2.resize(swapped, (original_w, original_h), interpolation=cv2.INTER_LINEAR)
|
| 175 |
+
|
| 176 |
+
if mirror:
|
| 177 |
+
swapped = cv2.flip(swapped, 1)
|
| 178 |
+
|
| 179 |
now = time.perf_counter()
|
| 180 |
+
frame_ms = (now - started) * 1000.0
|
| 181 |
+
proc_fps = 1000.0 / max(frame_ms, 1e-4)
|
| 182 |
if LAST_FRAME_TS > 0.0:
|
| 183 |
+
inst_fps = 1.0 / max(now - LAST_FRAME_TS, 1e-4)
|
| 184 |
+
EMA_FPS = inst_fps if EMA_FPS <= 0.0 else (0.2 * inst_fps + 0.8 * EMA_FPS)
|
| 185 |
LAST_FRAME_TS = now
|
| 186 |
|
| 187 |
+
status = (
|
| 188 |
+
f"Swapped faces: {applied} | providers: {', '.join(MODEL_PROVIDERS)} "
|
| 189 |
+
f"| proc {proc_fps:.1f} fps ({frame_ms:.0f} ms) | stream ~{EMA_FPS:.1f} fps"
|
| 190 |
)
|
| 191 |
+
return cv2.cvtColor(swapped, cv2.COLOR_BGR2RGB), status
|
| 192 |
|
| 193 |
|
| 194 |
+
with gr.Blocks(title="Live Upper-Body Swap") as demo:
|
| 195 |
gr.Markdown(
|
| 196 |
"""
|
| 197 |
+
# Live Upper-Body Swap
|
| 198 |
+
1. Upload source portrait (or keep preloaded default).
|
| 199 |
+
2. Click **Warm up model** once.
|
| 200 |
+
3. Start webcam stream.
|
|
|
|
| 201 |
"""
|
| 202 |
)
|
| 203 |
|
| 204 |
with gr.Row():
|
| 205 |
+
swap_strength = gr.Slider(
|
| 206 |
+
label="Swap strength",
|
| 207 |
+
minimum=0.45,
|
| 208 |
maximum=1.0,
|
| 209 |
+
value=0.95,
|
| 210 |
step=0.01,
|
| 211 |
)
|
| 212 |
+
max_faces = gr.Slider(
|
| 213 |
+
label="Max faces in frame",
|
| 214 |
+
minimum=1,
|
| 215 |
+
maximum=4,
|
| 216 |
+
value=1,
|
| 217 |
+
step=1,
|
| 218 |
)
|
| 219 |
+
process_side = gr.Slider(
|
| 220 |
+
label="Process size (speed/quality)",
|
| 221 |
+
minimum=384,
|
| 222 |
+
maximum=960,
|
| 223 |
+
value=640,
|
| 224 |
+
step=32,
|
| 225 |
)
|
| 226 |
mirror = gr.Checkbox(label="Mirror output", value=True)
|
| 227 |
|
| 228 |
+
source_image = gr.Image(
|
| 229 |
+
label="Source portrait",
|
| 230 |
type="numpy",
|
| 231 |
+
value=DEFAULT_SOURCE_VALUE,
|
| 232 |
)
|
| 233 |
|
| 234 |
+
warmup_button = gr.Button("Warm up model", variant="primary")
|
| 235 |
+
status = gr.Markdown("Idle.")
|
| 236 |
+
|
| 237 |
with gr.Row():
|
| 238 |
live_input = gr.Image(
|
| 239 |
label="Webcam input",
|
|
|
|
| 242 |
streaming=True,
|
| 243 |
)
|
| 244 |
live_output = gr.Image(
|
| 245 |
+
label="Live swapped output",
|
| 246 |
type="numpy",
|
| 247 |
)
|
| 248 |
|
| 249 |
+
warmup_button.click(
|
| 250 |
+
fn=warmup,
|
| 251 |
+
inputs=[source_image],
|
| 252 |
+
outputs=[status],
|
| 253 |
+
queue=False,
|
| 254 |
+
show_progress="hidden",
|
| 255 |
+
)
|
| 256 |
|
| 257 |
live_input.stream(
|
| 258 |
fn=transform_live,
|
| 259 |
+
inputs=[live_input, source_image, mirror, swap_strength, max_faces, process_side],
|
| 260 |
+
outputs=[live_output, status],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
time_limit=None,
|
| 262 |
+
stream_every=0.06,
|
| 263 |
concurrency_limit=1,
|
| 264 |
queue=False,
|
| 265 |
show_progress="hidden",
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
-
gradio
|
| 2 |
-
numpy>=1.26.4
|
| 3 |
opencv-python-headless>=4.10.0.84
|
| 4 |
-
|
|
|
|
|
|
| 1 |
+
gradio==5.44.1
|
| 2 |
+
numpy>=1.26.4,<2.3
|
| 3 |
opencv-python-headless>=4.10.0.84
|
| 4 |
+
onnxruntime-gpu>=1.17.0
|
| 5 |
+
insightface==0.7.3
|