Spaces:
Running
Running
| import os | |
| import cv2 | |
| import cv2.data | |
| import numpy as np | |
| from .detector import _get_insightface, _ORT_PROVIDERS # reuse shared instance + providers | |
| _swapper_model = None | |
| _swapper_failed = False # True only after a real load error (not "file not there yet") | |
| def _load_swapper(): | |
| global _swapper_model, _swapper_failed | |
| if _swapper_model is not None: | |
| return _swapper_model # already loaded | |
| if _swapper_failed: | |
| return None # previously errored — don't retry | |
| model_path = "models/inswapper_128.onnx" | |
| if not os.path.exists(model_path): | |
| return None # not downloaded yet — retry next call | |
| try: | |
| import insightface | |
| _swapper_model = insightface.model_zoo.get_model( | |
| model_path, providers=_ORT_PROVIDERS | |
| ) | |
| print("[swapper] inswapper_128 loaded OK") | |
| return _swapper_model | |
| except Exception as e: | |
| print(f"[swapper] load failed: {e}") | |
| _swapper_failed = True | |
| return None | |
| def swap_face_insightface( | |
| source: np.ndarray, | |
| target: np.ndarray, | |
| preserve_glasses: bool = False, | |
| ) -> np.ndarray: | |
| """ | |
| Full-identity face swap. The source face replaces every target face. | |
| preserve_glasses: when True, the target's eye/spectacle region is blended | |
| back over the swap (use only for targets that actually wear glasses). It is | |
| OFF by default — restoring that region pastes the target's eyes back, which | |
| is the strongest identity cue and makes the swap look incomplete. | |
| """ | |
| swapper = _load_swapper() | |
| app = _get_insightface() | |
| if swapper is None or app is None: | |
| return _fallback_swap(source, target) | |
| try: | |
| src_faces = app.get(source) # InsightFace expects BGR | |
| tgt_faces = app.get(target) | |
| if not src_faces or not tgt_faces: | |
| print(f"[swapper] faces not detected: src={len(src_faces)} tgt={len(tgt_faces)}") | |
| return _fallback_swap(source, target) | |
| area = lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]) | |
| # Use the largest source face (most frontal/clear) for the best identity. | |
| src_face = max(src_faces, key=area) | |
| # Swap ONLY the main subject — the largest target face. Location photos | |
| # often contain incidental faces (posters, notice boards, reflections); | |
| # swapping every detected face pasted the source onto those too, which | |
| # produced the "mess". We also keep any genuinely large secondary face | |
| # (>= 45% of the main face's area) so real two-person shots still work, | |
| # but exclude the small background faces. | |
| main_area = area(max(tgt_faces, key=area)) | |
| targets = [f for f in tgt_faces if area(f) >= 0.45 * main_area] | |
| result = target.copy() | |
| for tgt_face in targets: | |
| result = swapper.get(result, tgt_face, src_face, paste_back=True) | |
| if preserve_glasses: | |
| result = _restore_glasses_region(result, target, tgt_face) | |
| # Sharpen only the swapped face region(s). | |
| result = _sharpen_face_region(result, targets) | |
| return result | |
| except Exception as e: | |
| print(f"[swapper] swap error: {e}") | |
| return _fallback_swap(source, target) | |
| def _restore_glasses_region(swapped: np.ndarray, original_target: np.ndarray, face) -> np.ndarray: | |
| """ | |
| Restore the glasses/eye region from the original target so spectacles | |
| (frames + lenses) are preserved cleanly regardless of the source face. | |
| Uses InsightFace 5-keypoint or 68/106-landmark eye positions. | |
| """ | |
| h, w = swapped.shape[:2] | |
| result = swapped.copy() | |
| try: | |
| # Get eye centre positions from keypoints [0]=left eye, [1]=right eye | |
| kps = face.kps # shape (5,2): left_eye, right_eye, nose, left_mouth, right_mouth | |
| le, re = kps[0], kps[1] | |
| # Estimate glasses bounding box: wide enough to cover frames + nose bridge | |
| eye_dist = float(np.linalg.norm(re - le)) | |
| cx = int((le[0] + re[0]) / 2) | |
| cy = int((le[1] + re[1]) / 2) | |
| half_w = int(eye_dist * 0.80) | |
| half_h = int(eye_dist * 0.38) | |
| gx1 = max(0, cx - half_w); gx2 = min(w, cx + half_w) | |
| gy1 = max(0, cy - half_h); gy2 = min(h, cy + half_h) | |
| if gx2 <= gx1 or gy2 <= gy1: | |
| return result | |
| rh, rw = gy2 - gy1, gx2 - gx1 | |
| # Feathered blend: original target fully in centre, fade to swapped at edges | |
| mask = np.zeros((rh, rw), dtype=np.float32) | |
| cv2.ellipse(mask, (rw // 2, rh // 2), (rw // 2, rh // 2), | |
| 0, 0, 360, 1.0, -1) | |
| mask = cv2.GaussianBlur(mask, (0, 0), sigmaX=rw * 0.12) | |
| mask = np.stack([mask] * 3, axis=-1) | |
| tgt_crop = original_target[gy1:gy2, gx1:gx2].astype(np.float32) | |
| swp_crop = swapped[gy1:gy2, gx1:gx2].astype(np.float32) | |
| blended = tgt_crop * mask + swp_crop * (1 - mask) | |
| result[gy1:gy2, gx1:gx2] = np.clip(blended, 0, 255).astype(np.uint8) | |
| except Exception: | |
| pass # no landmarks → skip silently | |
| return result | |
| def _sharpen_face_region(image: np.ndarray, tgt_faces: list) -> np.ndarray: | |
| """ | |
| Recover detail lost in InsightFace's 128x128 internal resize. | |
| Strategy: | |
| - Mild bilateral filter to remove compression artefacts (not texture). | |
| - Unsharp mask on the ORIGINAL crop (not the blurred version) for true | |
| high-frequency recovery; 2.3x strength gives clean edges without halos. | |
| - Blend sharp + smooth so skin stays natural while edges are crisp. | |
| """ | |
| result = image.copy() | |
| h, w = image.shape[:2] | |
| for face in tgt_faces: | |
| x1, y1, x2, y2 = [int(v) for v in face.bbox] | |
| pad = int((x2 - x1) * 0.15) | |
| x1p = max(0, x1 - pad); y1p = max(0, y1 - pad) | |
| x2p = min(w, x2 + pad); y2p = min(h, y2 + pad) | |
| crop = result[y1p:y2p, x1p:x2p].copy() | |
| if crop.size == 0: | |
| continue | |
| # 1. Mild bilateral — removes compression blotches while keeping edges | |
| smooth = cv2.bilateralFilter(crop, d=5, sigmaColor=30, sigmaSpace=30) | |
| # 2. Unsharp mask on ORIGINAL crop — true detail recovery | |
| blur = cv2.GaussianBlur(crop, (0, 0), sigmaX=1.5) | |
| sharp = cv2.addWeighted(crop, 2.3, blur, -1.3, 0) | |
| # 3. Composite: 55% sharp detail + 45% smooth skin base | |
| enhanced = cv2.addWeighted(sharp, 0.55, smooth, 0.45, 0) | |
| # 4. Feathered paste — no hard border at crop edges | |
| fh, fw = crop.shape[:2] | |
| feather = np.ones((fh, fw), dtype=np.float32) | |
| border = max(4, pad // 2) | |
| for i in range(border): | |
| v = (i + 1) / (border + 1) | |
| feather[i, :] *= v | |
| feather[fh-1-i, :] *= v | |
| feather[:, i] *= v | |
| feather[:, fw-1-i] *= v | |
| feather = np.stack([feather] * 3, axis=-1) | |
| result[y1p:y2p, x1p:x2p] = ( | |
| enhanced.astype(np.float32) * feather + | |
| result[y1p:y2p, x1p:x2p].astype(np.float32) * (1 - feather) | |
| ).astype(np.uint8) | |
| return result | |
| def _fallback_swap(source: np.ndarray, target: np.ndarray) -> np.ndarray: | |
| gray_src = cv2.cvtColor(source, cv2.COLOR_BGR2GRAY) | |
| gray_tgt = cv2.cvtColor(target, cv2.COLOR_BGR2GRAY) | |
| cascade = cv2.CascadeClassifier( | |
| cv2.data.haarcascades + "haarcascade_frontalface_default.xml" | |
| ) | |
| faces_src = cascade.detectMultiScale(gray_src, 1.1, 5, minSize=(64, 64)) | |
| faces_tgt = cascade.detectMultiScale(gray_tgt, 1.1, 5, minSize=(64, 64)) | |
| if len(faces_src) == 0 or len(faces_tgt) == 0: | |
| return target.copy() | |
| sx, sy, sw, sh = faces_src[0] | |
| tx, ty, tw, th = faces_tgt[0] | |
| face_patch = source[sy:sy+sh, sx:sx+sw] | |
| face_patch_resized = cv2.resize(face_patch, (tw, th)) | |
| result = target.copy() | |
| mask = np.zeros((th, tw), dtype=np.uint8) | |
| cv2.ellipse(mask, (tw // 2, th // 2), (tw // 2, th // 2), 0, 0, 360, 255, -1) | |
| center = (tx + tw // 2, ty + th // 2) | |
| try: | |
| result = cv2.seamlessClone(face_patch_resized, result, mask, center, cv2.NORMAL_CLONE) | |
| except Exception: | |
| result[ty:ty+th, tx:tx+tw] = ( | |
| face_patch_resized * 0.7 + result[ty:ty+th, tx:tx+tw] * 0.3 | |
| ).astype(np.uint8) | |
| return result | |