Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# =========================
|
| 2 |
-
# app.py (
|
| 3 |
# =========================
|
| 4 |
import os
|
| 5 |
|
|
@@ -66,7 +66,7 @@ with main_col1:
|
|
| 66 |
help="Frames are resized before detection/swap. Lower = faster."
|
| 67 |
)
|
| 68 |
|
| 69 |
-
#
|
| 70 |
face_blend_percent = st.sidebar.slider(
|
| 71 |
"Face Blending Percentage",
|
| 72 |
min_value=0,
|
|
@@ -83,6 +83,32 @@ with main_col1:
|
|
| 83 |
help="Method for selecting which face to use from the source image"
|
| 84 |
)
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
# For video mode only
|
| 87 |
if st.session_state.mode == "video":
|
| 88 |
fps_cap = st.sidebar.selectbox(
|
|
@@ -202,9 +228,9 @@ def _cv2_to_pil(image):
|
|
| 202 |
def _pil_to_cv2(image):
|
| 203 |
return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
| 204 |
|
| 205 |
-
#
|
| 206 |
-
def _blend_faces(original_face, swapped_face, blend_percent):
|
| 207 |
-
"""Blend between original and swapped faces
|
| 208 |
if blend_percent == 100:
|
| 209 |
return swapped_face
|
| 210 |
|
|
@@ -213,7 +239,71 @@ def _blend_faces(original_face, swapped_face, blend_percent):
|
|
| 213 |
swapped_face = cv2.resize(swapped_face, (original_face.shape[1], original_face.shape[0]))
|
| 214 |
|
| 215 |
alpha = blend_percent / 100.0
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
# Face selection methods
|
| 219 |
def _select_face(faces, method, image_shape=None):
|
|
@@ -235,11 +325,12 @@ def _select_face(faces, method, image_shape=None):
|
|
| 235 |
return faces[0]
|
| 236 |
|
| 237 |
# -------------------------------------
|
| 238 |
-
# Core:
|
| 239 |
# -------------------------------------
|
| 240 |
def swap_faces_in_image(
|
| 241 |
source_image_bgr, target_image_bgr, proc_res, max_faces,
|
| 242 |
-
blend_percent=100, face_selection="Largest"
|
|
|
|
| 243 |
):
|
| 244 |
# Get source face
|
| 245 |
try:
|
|
@@ -286,7 +377,7 @@ def swap_faces_in_image(
|
|
| 286 |
reverse=True
|
| 287 |
)[:max_faces]
|
| 288 |
|
| 289 |
-
# Swap faces -
|
| 290 |
result_image = target_image_proc.copy()
|
| 291 |
for tface in target_faces:
|
| 292 |
try:
|
|
@@ -302,18 +393,18 @@ def swap_faces_in_image(
|
|
| 302 |
# Extract the face region
|
| 303 |
face_region = result_image[y1:y2, x1:x2].copy()
|
| 304 |
|
| 305 |
-
#
|
| 306 |
-
|
|
|
|
|
|
|
| 307 |
|
| 308 |
-
#
|
|
|
|
| 309 |
swapped_face = swapped_region[y1:y2, x1:x2]
|
| 310 |
|
| 311 |
-
# Apply blending
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
result_image[y1:y2, x1:x2] = blended_face
|
| 315 |
-
else:
|
| 316 |
-
result_image[y1:y2, x1:x2] = swapped_face
|
| 317 |
|
| 318 |
except Exception as swap_e:
|
| 319 |
st.error(f"Face swap error: {swap_e}")
|
|
@@ -332,7 +423,8 @@ def swap_faces_in_image(
|
|
| 332 |
|
| 333 |
def swap_faces_in_video(
|
| 334 |
image_bgr, video_path, proc_res, fps_cap, keep_original_res,
|
| 335 |
-
max_faces, blend_percent, face_selection, output_quality, progress
|
|
|
|
| 336 |
):
|
| 337 |
# Get source face
|
| 338 |
try:
|
|
@@ -382,12 +474,13 @@ def swap_faces_in_video(
|
|
| 382 |
|
| 383 |
st.info(
|
| 384 |
f"Processing: {proc_w}×{proc_h} | Output: {out_w}×{out_h} @ {write_fps:.2f} fps | "
|
| 385 |
-
f"Frame step: {frame_step} | Blend: {blend_percent}%"
|
| 386 |
)
|
| 387 |
|
| 388 |
# Process loop
|
| 389 |
read_idx = 0
|
| 390 |
processed_frames = 0
|
|
|
|
| 391 |
|
| 392 |
try:
|
| 393 |
while True:
|
|
@@ -415,6 +508,10 @@ def swap_faces_in_video(
|
|
| 415 |
except Exception as det_e:
|
| 416 |
target_faces = []
|
| 417 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
# Limit faces
|
| 419 |
if target_faces:
|
| 420 |
target_faces = sorted(
|
|
@@ -422,8 +519,9 @@ def swap_faces_in_video(
|
|
| 422 |
key=lambda f: (f.bbox[2]-f.bbox[0])*(f.bbox[3]-f.bbox[1]),
|
| 423 |
reverse=True
|
| 424 |
)[:max_faces]
|
|
|
|
| 425 |
|
| 426 |
-
# Swap faces -
|
| 427 |
result_frame = proc_frame.copy()
|
| 428 |
for tface in target_faces:
|
| 429 |
try:
|
|
@@ -439,18 +537,18 @@ def swap_faces_in_video(
|
|
| 439 |
# Extract the face region
|
| 440 |
face_region = result_frame[y1:y2, x1:x2].copy()
|
| 441 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
# Perform the swap
|
| 443 |
swapped_region = swapper.get(result_frame, tface, source_face, paste_back=True)
|
| 444 |
-
|
| 445 |
-
# Extract just the swapped face region
|
| 446 |
swapped_face = swapped_region[y1:y2, x1:x2]
|
| 447 |
|
| 448 |
-
# Apply blending
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
result_frame[y1:y2, x1:x2] = blended_face
|
| 452 |
-
else:
|
| 453 |
-
result_frame[y1:y2, x1:x2] = swapped_face
|
| 454 |
|
| 455 |
except Exception as swap_e:
|
| 456 |
continue
|
|
@@ -484,6 +582,25 @@ def swap_faces_in_video(
|
|
| 484 |
|
| 485 |
return output_path
|
| 486 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
# -------------------------
|
| 488 |
# UI: Improved layout
|
| 489 |
# -------------------------
|
|
@@ -554,7 +671,8 @@ with main_col2:
|
|
| 554 |
output_path = swap_faces_in_video(
|
| 555 |
source_image, tmp_video_path, proc_res, fps_cap,
|
| 556 |
keep_original_res, max_faces, face_blend_percent,
|
| 557 |
-
face_selection_method, output_quality, progress_bar
|
|
|
|
| 558 |
)
|
| 559 |
|
| 560 |
if output_path:
|
|
@@ -601,7 +719,8 @@ with main_col2:
|
|
| 601 |
with st.spinner("Processing image…"):
|
| 602 |
result_image = swap_faces_in_image(
|
| 603 |
source_image, target_image, proc_res, max_faces,
|
| 604 |
-
face_blend_percent, face_selection_method
|
|
|
|
| 605 |
)
|
| 606 |
|
| 607 |
if result_image:
|
|
@@ -667,5 +786,6 @@ with st.expander("🩺 Diagnostics"):
|
|
| 667 |
"- If you see errors: try different source/target images with clear faces\n"
|
| 668 |
"- For better results: use high-quality images with front-facing faces\n"
|
| 669 |
"- If processing is slow: reduce resolution or target FPS\n"
|
| 670 |
-
"- For videos: use MP4 format with H.264 encoding"
|
|
|
|
| 671 |
)
|
|
|
|
| 1 |
# =========================
|
| 2 |
+
# app.py (Enhanced Version with Lip-Sync Optimization)
|
| 3 |
# =========================
|
| 4 |
import os
|
| 5 |
|
|
|
|
| 66 |
help="Frames are resized before detection/swap. Lower = faster."
|
| 67 |
)
|
| 68 |
|
| 69 |
+
# Face blending percentage
|
| 70 |
face_blend_percent = st.sidebar.slider(
|
| 71 |
"Face Blending Percentage",
|
| 72 |
min_value=0,
|
|
|
|
| 83 |
help="Method for selecting which face to use from the source image"
|
| 84 |
)
|
| 85 |
|
| 86 |
+
# NEW: Lip-sync specific settings
|
| 87 |
+
st.sidebar.markdown("---")
|
| 88 |
+
st.sidebar.subheader("🎭 Lip-Sync Optimization")
|
| 89 |
+
|
| 90 |
+
lip_sync_enabled = st.sidebar.checkbox(
|
| 91 |
+
"Enable Lip-Sync Mode",
|
| 92 |
+
value=True,
|
| 93 |
+
help="Reduces glitches and blurriness in mouth movements"
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
mouth_mask_strength = st.sidebar.slider(
|
| 97 |
+
"Mouth Mask Strength",
|
| 98 |
+
min_value=0,
|
| 99 |
+
max_value=100,
|
| 100 |
+
value=80,
|
| 101 |
+
help="How strongly to protect the mouth region from artifacts"
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
frame_consistency = st.sidebar.slider(
|
| 105 |
+
"Frame Consistency",
|
| 106 |
+
min_value=0,
|
| 107 |
+
max_value=100,
|
| 108 |
+
value=70,
|
| 109 |
+
help="Maintain consistency between frames for smoother video"
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
# For video mode only
|
| 113 |
if st.session_state.mode == "video":
|
| 114 |
fps_cap = st.sidebar.selectbox(
|
|
|
|
| 228 |
def _pil_to_cv2(image):
|
| 229 |
return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
| 230 |
|
| 231 |
+
# Enhanced face blending function with mouth protection
|
| 232 |
+
def _blend_faces(original_face, swapped_face, blend_percent, mouth_mask=None):
|
| 233 |
+
"""Blend between original and swapped faces with optional mouth protection"""
|
| 234 |
if blend_percent == 100:
|
| 235 |
return swapped_face
|
| 236 |
|
|
|
|
| 239 |
swapped_face = cv2.resize(swapped_face, (original_face.shape[1], original_face.shape[0]))
|
| 240 |
|
| 241 |
alpha = blend_percent / 100.0
|
| 242 |
+
|
| 243 |
+
if mouth_mask is not None:
|
| 244 |
+
# Apply different blending for mouth vs non-mouth regions
|
| 245 |
+
if mouth_mask.shape[:2] != original_face.shape[:2]:
|
| 246 |
+
mouth_mask = cv2.resize(mouth_mask, (original_face.shape[1], original_face.shape[0]))
|
| 247 |
+
|
| 248 |
+
# Stronger blending for non-mouth areas, preserve mouth more
|
| 249 |
+
mouth_alpha = min(alpha + 0.2, 1.0) # Less blending in mouth area
|
| 250 |
+
blended = np.zeros_like(original_face)
|
| 251 |
+
|
| 252 |
+
# Mouth area - more preservation of swapped face
|
| 253 |
+
blended[mouth_mask > 0] = cv2.addWeighted(
|
| 254 |
+
swapped_face[mouth_mask > 0], mouth_alpha,
|
| 255 |
+
original_face[mouth_mask > 0], 1 - mouth_alpha, 0
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
# Non-mouth area - normal blending
|
| 259 |
+
blended[mouth_mask == 0] = cv2.addWeighted(
|
| 260 |
+
swapped_face[mouth_mask == 0], alpha,
|
| 261 |
+
original_face[mouth_mask == 0], 1 - alpha, 0
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
return blended
|
| 265 |
+
else:
|
| 266 |
+
# Standard blending if no mouth mask
|
| 267 |
+
return cv2.addWeighted(swapped_face, alpha, original_face, 1 - alpha, 0)
|
| 268 |
+
|
| 269 |
+
# Create mouth mask from facial landmarks
|
| 270 |
+
def _create_mouth_mask(face_landmarks, image_shape, strength=80):
|
| 271 |
+
"""Create a mask focusing on the mouth region"""
|
| 272 |
+
if not hasattr(face_landmarks, 'landmark_2d_106'):
|
| 273 |
+
return None
|
| 274 |
+
|
| 275 |
+
landmarks = face_landmarks.landmark_2d_106
|
| 276 |
+
if landmarks is None or len(landmarks) < 106:
|
| 277 |
+
return None
|
| 278 |
+
|
| 279 |
+
# Mouth landmark indices (approximate for 106-point model)
|
| 280 |
+
mouth_indices = list(range(48, 68)) # Lips outline
|
| 281 |
+
mouth_inner = list(range(60, 68)) # Inner mouth
|
| 282 |
+
|
| 283 |
+
if len(landmarks) < 68:
|
| 284 |
+
return None
|
| 285 |
+
|
| 286 |
+
mask = np.zeros(image_shape[:2], dtype=np.uint8)
|
| 287 |
+
|
| 288 |
+
# Create convex hull around mouth
|
| 289 |
+
mouth_points = np.array([landmarks[i] for i in mouth_indices], dtype=np.int32)
|
| 290 |
+
if len(mouth_points) > 2:
|
| 291 |
+
hull = cv2.convexHull(mouth_points)
|
| 292 |
+
cv2.fillPoly(mask, [hull], 255)
|
| 293 |
+
|
| 294 |
+
# Add inner mouth with lower strength
|
| 295 |
+
inner_points = np.array([landmarks[i] for i in mouth_inner], dtype=np.int32)
|
| 296 |
+
if len(inner_points) > 2:
|
| 297 |
+
inner_hull = cv2.convexHull(inner_points)
|
| 298 |
+
cv2.fillPoly(mask, [inner_hull], 200) # Lower value for inner area
|
| 299 |
+
|
| 300 |
+
# Apply Gaussian blur for smooth edges
|
| 301 |
+
mask = cv2.GaussianBlur(mask, (15, 15), 0)
|
| 302 |
+
|
| 303 |
+
# Adjust based on strength parameter
|
| 304 |
+
mask = cv2.addWeighted(mask, strength/100.0, np.zeros_like(mask), 0, 0)
|
| 305 |
+
|
| 306 |
+
return mask
|
| 307 |
|
| 308 |
# Face selection methods
|
| 309 |
def _select_face(faces, method, image_shape=None):
|
|
|
|
| 325 |
return faces[0]
|
| 326 |
|
| 327 |
# -------------------------------------
|
| 328 |
+
# Core: Enhanced face swap functions with lip-sync optimization
|
| 329 |
# -------------------------------------
|
| 330 |
def swap_faces_in_image(
|
| 331 |
source_image_bgr, target_image_bgr, proc_res, max_faces,
|
| 332 |
+
blend_percent=100, face_selection="Largest", lip_sync_enabled=True,
|
| 333 |
+
mouth_mask_strength=80
|
| 334 |
):
|
| 335 |
# Get source face
|
| 336 |
try:
|
|
|
|
| 377 |
reverse=True
|
| 378 |
)[:max_faces]
|
| 379 |
|
| 380 |
+
# Swap faces with lip-sync optimization
|
| 381 |
result_image = target_image_proc.copy()
|
| 382 |
for tface in target_faces:
|
| 383 |
try:
|
|
|
|
| 393 |
# Extract the face region
|
| 394 |
face_region = result_image[y1:y2, x1:x2].copy()
|
| 395 |
|
| 396 |
+
# Create mouth mask if lip-sync is enabled
|
| 397 |
+
mouth_mask = None
|
| 398 |
+
if lip_sync_enabled and hasattr(tface, 'landmark_2d_106'):
|
| 399 |
+
mouth_mask = _create_mouth_mask(tface, face_region.shape, mouth_mask_strength)
|
| 400 |
|
| 401 |
+
# Perform the swap
|
| 402 |
+
swapped_region = swapper.get(result_image, tface, source_face, paste_back=True)
|
| 403 |
swapped_face = swapped_region[y1:y2, x1:x2]
|
| 404 |
|
| 405 |
+
# Apply blending with mouth protection
|
| 406 |
+
blended_face = _blend_faces(face_region, swapped_face, blend_percent, mouth_mask)
|
| 407 |
+
result_image[y1:y2, x1:x2] = blended_face
|
|
|
|
|
|
|
|
|
|
| 408 |
|
| 409 |
except Exception as swap_e:
|
| 410 |
st.error(f"Face swap error: {swap_e}")
|
|
|
|
| 423 |
|
| 424 |
def swap_faces_in_video(
|
| 425 |
image_bgr, video_path, proc_res, fps_cap, keep_original_res,
|
| 426 |
+
max_faces, blend_percent, face_selection, output_quality, progress,
|
| 427 |
+
lip_sync_enabled=True, mouth_mask_strength=80, frame_consistency=70
|
| 428 |
):
|
| 429 |
# Get source face
|
| 430 |
try:
|
|
|
|
| 474 |
|
| 475 |
st.info(
|
| 476 |
f"Processing: {proc_w}×{proc_h} | Output: {out_w}×{out_h} @ {write_fps:.2f} fps | "
|
| 477 |
+
f"Frame step: {frame_step} | Blend: {blend_percent}% | Lip-sync: {'ON' if lip_sync_enabled else 'OFF'}"
|
| 478 |
)
|
| 479 |
|
| 480 |
# Process loop
|
| 481 |
read_idx = 0
|
| 482 |
processed_frames = 0
|
| 483 |
+
previous_faces = {} # For frame consistency
|
| 484 |
|
| 485 |
try:
|
| 486 |
while True:
|
|
|
|
| 508 |
except Exception as det_e:
|
| 509 |
target_faces = []
|
| 510 |
|
| 511 |
+
# Apply frame consistency
|
| 512 |
+
if frame_consistency > 0 and previous_faces and target_faces:
|
| 513 |
+
target_faces = _apply_frame_consistency(target_faces, previous_faces, frame_consistency/100.0)
|
| 514 |
+
|
| 515 |
# Limit faces
|
| 516 |
if target_faces:
|
| 517 |
target_faces = sorted(
|
|
|
|
| 519 |
key=lambda f: (f.bbox[2]-f.bbox[0])*(f.bbox[3]-f.bbox[1]),
|
| 520 |
reverse=True
|
| 521 |
)[:max_faces]
|
| 522 |
+
previous_faces = {i: face for i, face in enumerate(target_faces)}
|
| 523 |
|
| 524 |
+
# Swap faces with lip-sync optimization
|
| 525 |
result_frame = proc_frame.copy()
|
| 526 |
for tface in target_faces:
|
| 527 |
try:
|
|
|
|
| 537 |
# Extract the face region
|
| 538 |
face_region = result_frame[y1:y2, x1:x2].copy()
|
| 539 |
|
| 540 |
+
# Create mouth mask if lip-sync is enabled
|
| 541 |
+
mouth_mask = None
|
| 542 |
+
if lip_sync_enabled and hasattr(tface, 'landmark_2d_106'):
|
| 543 |
+
mouth_mask = _create_mouth_mask(tface, face_region.shape, mouth_mask_strength)
|
| 544 |
+
|
| 545 |
# Perform the swap
|
| 546 |
swapped_region = swapper.get(result_frame, tface, source_face, paste_back=True)
|
|
|
|
|
|
|
| 547 |
swapped_face = swapped_region[y1:y2, x1:x2]
|
| 548 |
|
| 549 |
+
# Apply blending with mouth protection
|
| 550 |
+
blended_face = _blend_faces(face_region, swapped_face, blend_percent, mouth_mask)
|
| 551 |
+
result_frame[y1:y2, x1:x2] = blended_face
|
|
|
|
|
|
|
|
|
|
| 552 |
|
| 553 |
except Exception as swap_e:
|
| 554 |
continue
|
|
|
|
| 582 |
|
| 583 |
return output_path
|
| 584 |
|
| 585 |
+
def _apply_frame_consistency(current_faces, previous_faces, consistency_strength):
|
| 586 |
+
"""Maintain consistency between frames for smoother video"""
|
| 587 |
+
if not current_faces or not previous_faces:
|
| 588 |
+
return current_faces
|
| 589 |
+
|
| 590 |
+
consistent_faces = []
|
| 591 |
+
for i, current_face in enumerate(current_faces):
|
| 592 |
+
if i in previous_faces:
|
| 593 |
+
# Blend current face with previous face for consistency
|
| 594 |
+
prev_face = previous_faces[i]
|
| 595 |
+
# Simple position smoothing (you can add more sophisticated blending)
|
| 596 |
+
current_face.bbox = [
|
| 597 |
+
prev_face.bbox[j] * consistency_strength + current_face.bbox[j] * (1 - consistency_strength)
|
| 598 |
+
for j in range(4)
|
| 599 |
+
]
|
| 600 |
+
consistent_faces.append(current_face)
|
| 601 |
+
|
| 602 |
+
return consistent_faces
|
| 603 |
+
|
| 604 |
# -------------------------
|
| 605 |
# UI: Improved layout
|
| 606 |
# -------------------------
|
|
|
|
| 671 |
output_path = swap_faces_in_video(
|
| 672 |
source_image, tmp_video_path, proc_res, fps_cap,
|
| 673 |
keep_original_res, max_faces, face_blend_percent,
|
| 674 |
+
face_selection_method, output_quality, progress_bar,
|
| 675 |
+
lip_sync_enabled, mouth_mask_strength, frame_consistency
|
| 676 |
)
|
| 677 |
|
| 678 |
if output_path:
|
|
|
|
| 719 |
with st.spinner("Processing image…"):
|
| 720 |
result_image = swap_faces_in_image(
|
| 721 |
source_image, target_image, proc_res, max_faces,
|
| 722 |
+
face_blend_percent, face_selection_method,
|
| 723 |
+
lip_sync_enabled, mouth_mask_strength
|
| 724 |
)
|
| 725 |
|
| 726 |
if result_image:
|
|
|
|
| 786 |
"- If you see errors: try different source/target images with clear faces\n"
|
| 787 |
"- For better results: use high-quality images with front-facing faces\n"
|
| 788 |
"- If processing is slow: reduce resolution or target FPS\n"
|
| 789 |
+
"- For videos: use MP4 format with H.264 encoding\n"
|
| 790 |
+
"- For best lip-sync: enable lip-sync mode and adjust mouth mask strength"
|
| 791 |
)
|