FaceSWAP / core /swapper.py
aditya-rAj19's picture
Swap only the main subject + always run face swap (fixes the mess)
4a2b4cf
Raw
History Blame Contribute Delete
8.36 kB
import os
import cv2
import cv2.data
import numpy as np
from .detector import _get_insightface, _ORT_PROVIDERS # reuse shared instance + providers
_swapper_model = None
_swapper_failed = False # True only after a real load error (not "file not there yet")
def _load_swapper():
global _swapper_model, _swapper_failed
if _swapper_model is not None:
return _swapper_model # already loaded
if _swapper_failed:
return None # previously errored — don't retry
model_path = "models/inswapper_128.onnx"
if not os.path.exists(model_path):
return None # not downloaded yet — retry next call
try:
import insightface
_swapper_model = insightface.model_zoo.get_model(
model_path, providers=_ORT_PROVIDERS
)
print("[swapper] inswapper_128 loaded OK")
return _swapper_model
except Exception as e:
print(f"[swapper] load failed: {e}")
_swapper_failed = True
return None
def swap_face_insightface(
source: np.ndarray,
target: np.ndarray,
preserve_glasses: bool = False,
) -> np.ndarray:
"""
Full-identity face swap. The source face replaces every target face.
preserve_glasses: when True, the target's eye/spectacle region is blended
back over the swap (use only for targets that actually wear glasses). It is
OFF by default — restoring that region pastes the target's eyes back, which
is the strongest identity cue and makes the swap look incomplete.
"""
swapper = _load_swapper()
app = _get_insightface()
if swapper is None or app is None:
return _fallback_swap(source, target)
try:
src_faces = app.get(source) # InsightFace expects BGR
tgt_faces = app.get(target)
if not src_faces or not tgt_faces:
print(f"[swapper] faces not detected: src={len(src_faces)} tgt={len(tgt_faces)}")
return _fallback_swap(source, target)
area = lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1])
# Use the largest source face (most frontal/clear) for the best identity.
src_face = max(src_faces, key=area)
# Swap ONLY the main subject — the largest target face. Location photos
# often contain incidental faces (posters, notice boards, reflections);
# swapping every detected face pasted the source onto those too, which
# produced the "mess". We also keep any genuinely large secondary face
# (>= 45% of the main face's area) so real two-person shots still work,
# but exclude the small background faces.
main_area = area(max(tgt_faces, key=area))
targets = [f for f in tgt_faces if area(f) >= 0.45 * main_area]
result = target.copy()
for tgt_face in targets:
result = swapper.get(result, tgt_face, src_face, paste_back=True)
if preserve_glasses:
result = _restore_glasses_region(result, target, tgt_face)
# Sharpen only the swapped face region(s).
result = _sharpen_face_region(result, targets)
return result
except Exception as e:
print(f"[swapper] swap error: {e}")
return _fallback_swap(source, target)
def _restore_glasses_region(swapped: np.ndarray, original_target: np.ndarray, face) -> np.ndarray:
"""
Restore the glasses/eye region from the original target so spectacles
(frames + lenses) are preserved cleanly regardless of the source face.
Uses InsightFace 5-keypoint or 68/106-landmark eye positions.
"""
h, w = swapped.shape[:2]
result = swapped.copy()
try:
# Get eye centre positions from keypoints [0]=left eye, [1]=right eye
kps = face.kps # shape (5,2): left_eye, right_eye, nose, left_mouth, right_mouth
le, re = kps[0], kps[1]
# Estimate glasses bounding box: wide enough to cover frames + nose bridge
eye_dist = float(np.linalg.norm(re - le))
cx = int((le[0] + re[0]) / 2)
cy = int((le[1] + re[1]) / 2)
half_w = int(eye_dist * 0.80)
half_h = int(eye_dist * 0.38)
gx1 = max(0, cx - half_w); gx2 = min(w, cx + half_w)
gy1 = max(0, cy - half_h); gy2 = min(h, cy + half_h)
if gx2 <= gx1 or gy2 <= gy1:
return result
rh, rw = gy2 - gy1, gx2 - gx1
# Feathered blend: original target fully in centre, fade to swapped at edges
mask = np.zeros((rh, rw), dtype=np.float32)
cv2.ellipse(mask, (rw // 2, rh // 2), (rw // 2, rh // 2),
0, 0, 360, 1.0, -1)
mask = cv2.GaussianBlur(mask, (0, 0), sigmaX=rw * 0.12)
mask = np.stack([mask] * 3, axis=-1)
tgt_crop = original_target[gy1:gy2, gx1:gx2].astype(np.float32)
swp_crop = swapped[gy1:gy2, gx1:gx2].astype(np.float32)
blended = tgt_crop * mask + swp_crop * (1 - mask)
result[gy1:gy2, gx1:gx2] = np.clip(blended, 0, 255).astype(np.uint8)
except Exception:
pass # no landmarks → skip silently
return result
def _sharpen_face_region(image: np.ndarray, tgt_faces: list) -> np.ndarray:
"""
Recover detail lost in InsightFace's 128x128 internal resize.
Strategy:
- Mild bilateral filter to remove compression artefacts (not texture).
- Unsharp mask on the ORIGINAL crop (not the blurred version) for true
high-frequency recovery; 2.3x strength gives clean edges without halos.
- Blend sharp + smooth so skin stays natural while edges are crisp.
"""
result = image.copy()
h, w = image.shape[:2]
for face in tgt_faces:
x1, y1, x2, y2 = [int(v) for v in face.bbox]
pad = int((x2 - x1) * 0.15)
x1p = max(0, x1 - pad); y1p = max(0, y1 - pad)
x2p = min(w, x2 + pad); y2p = min(h, y2 + pad)
crop = result[y1p:y2p, x1p:x2p].copy()
if crop.size == 0:
continue
# 1. Mild bilateral — removes compression blotches while keeping edges
smooth = cv2.bilateralFilter(crop, d=5, sigmaColor=30, sigmaSpace=30)
# 2. Unsharp mask on ORIGINAL crop — true detail recovery
blur = cv2.GaussianBlur(crop, (0, 0), sigmaX=1.5)
sharp = cv2.addWeighted(crop, 2.3, blur, -1.3, 0)
# 3. Composite: 55% sharp detail + 45% smooth skin base
enhanced = cv2.addWeighted(sharp, 0.55, smooth, 0.45, 0)
# 4. Feathered paste — no hard border at crop edges
fh, fw = crop.shape[:2]
feather = np.ones((fh, fw), dtype=np.float32)
border = max(4, pad // 2)
for i in range(border):
v = (i + 1) / (border + 1)
feather[i, :] *= v
feather[fh-1-i, :] *= v
feather[:, i] *= v
feather[:, fw-1-i] *= v
feather = np.stack([feather] * 3, axis=-1)
result[y1p:y2p, x1p:x2p] = (
enhanced.astype(np.float32) * feather +
result[y1p:y2p, x1p:x2p].astype(np.float32) * (1 - feather)
).astype(np.uint8)
return result
def _fallback_swap(source: np.ndarray, target: np.ndarray) -> np.ndarray:
gray_src = cv2.cvtColor(source, cv2.COLOR_BGR2GRAY)
gray_tgt = cv2.cvtColor(target, cv2.COLOR_BGR2GRAY)
cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)
faces_src = cascade.detectMultiScale(gray_src, 1.1, 5, minSize=(64, 64))
faces_tgt = cascade.detectMultiScale(gray_tgt, 1.1, 5, minSize=(64, 64))
if len(faces_src) == 0 or len(faces_tgt) == 0:
return target.copy()
sx, sy, sw, sh = faces_src[0]
tx, ty, tw, th = faces_tgt[0]
face_patch = source[sy:sy+sh, sx:sx+sw]
face_patch_resized = cv2.resize(face_patch, (tw, th))
result = target.copy()
mask = np.zeros((th, tw), dtype=np.uint8)
cv2.ellipse(mask, (tw // 2, th // 2), (tw // 2, th // 2), 0, 0, 360, 255, -1)
center = (tx + tw // 2, ty + th // 2)
try:
result = cv2.seamlessClone(face_patch_resized, result, mask, center, cv2.NORMAL_CLONE)
except Exception:
result[ty:ty+th, tx:tx+tw] = (
face_patch_resized * 0.7 + result[ty:ty+th, tx:tx+tw] * 0.3
).astype(np.uint8)
return result