Spaces:

Emir071
/

face-swap

Running

App Files Files Community

Update app.py

by VanNguyen1214 - opened May 30, 2025

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+397

-89

Files changed (1) hide show

app.py +397 -89

app.py CHANGED Viewed

@@ -1,91 +1,399 @@
-# -*- coding:UTF-8 -*-
-# !/usr/bin/env python
-import spaces
-import numpy as np
 import gradio as gr
-import gradio.exceptions
-import roop.globals
-from roop.core import (
-    start,
-    decode_execution_providers,
-)
-from roop.processors.frame.core import get_frame_processors_modules
-from roop.utilities import normalize_output_path
-import os
-import random
-from PIL import Image
-import onnxruntime as ort
 import cv2
-from roop.face_analyser import get_one_face
-@spaces.GPU
-def swap_face(source_file, target_file, doFaceEnhancer):
-    session_dir = "temp"  # Sử dụng thư mục cố định
-    os.makedirs(session_dir, exist_ok=True)
-    # Tạo tên file ngẫu nhiên
-    source_filename = f"source_{random.randint(1000, 9999)}.jpg"
-    target_filename = f"target_{random.randint(1000, 9999)}.jpg"
-    output_filename = f"output_{random.randint(1000, 9999)}.jpg"
-    source_path = os.path.join(session_dir, source_filename)
-    target_path = os.path.join(session_dir, target_filename)
-    source_image = Image.fromarray(source_file)
-    source_image.save(source_path)
-    target_image = Image.fromarray(target_file)
-    target_image.save(target_path)
-    print("source_path: ", source_path)
-    print("target_path: ", target_path)
-    # Check if a face is detected in the source image
-    source_face = get_one_face(cv2.imread(source_path))
-    if source_face is None:
-        raise gradio.exceptions.Error("No face in source path detected.")
-    # Check if a face is detected in the target image
-    target_face = get_one_face(cv2.imread(target_path))
-    if target_face is None:
-        raise gradio.exceptions.Error("No face in target path detected.")
-    output_path = os.path.join(session_dir, output_filename)
-    normalized_output_path = normalize_output_path(source_path, target_path, output_path)
-    frame_processors = ["face_swapper", "face_enhancer"] if doFaceEnhancer else ["face_swapper"]
-    for frame_processor in get_frame_processors_modules(frame_processors):
-        if not frame_processor.pre_check():
-            print(f"Pre-check failed for {frame_processor}")
-            raise gradio.exceptions.Error(f"Pre-check failed for {frame_processor}")
-    roop.globals.source_path = source_path
-    roop.globals.target_path = target_path
-    roop.globals.output_path = normalized_output_path
-    roop.globals.frame_processors = frame_processors
-    roop.globals.headless = True
-    roop.globals.keep_fps = True
-    roop.globals.keep_audio = True
-    roop.globals.keep_frames = False
-    roop.globals.many_faces = False
-    roop.globals.video_encoder = "libx264"
-    roop.globals.video_quality = 18
-    roop.globals.execution_providers = decode_execution_providers(['cpu'])
-    roop.globals.reference_face_position = 0
-    roop.globals.similar_face_distance = 0.6
-    roop.globals.max_memory = 60
-    roop.globals.execution_threads = 8
-    start()
-    return normalized_output_path
-app = gr.Interface(
-    fn=swap_face,
-    inputs=[
-        gr.Image(),
-        gr.Image(),
-        gr.Checkbox(label="Face Enhancer?", info="Do face enhancement?")
-    ],
-    outputs="image"
-)
-app.launch()

 import gradio as gr
+from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation
+from PIL import Image, ImageDraw
+import torch
+import torch.nn.functional as F
+import numpy as np
+import mediapipe as mp
 import cv2
+import io
+import base64
+# Load model
+processor = SegformerImageProcessor.from_pretrained("VanNguyen1214/get_face_and_hair")
+model = AutoModelForSemanticSegmentation.from_pretrained("VanNguyen1214/get_face_and_hair")
+def get_facemesh_mask(image):
+    """Tạo mask khuôn mặt bằng MediaPipe"""
+    image_np = np.array(image)
+    height, width, _ = image_np.shape
+    face_mask = np.zeros((height, width), dtype=np.uint8)
+    mp_face_mesh = mp.solutions.face_mesh
+    with mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True, min_detection_confidence=0.5) as face_mesh:
+        results = face_mesh.process(image_np)
+        if results.multi_face_landmarks:
+            for face_landmarks in results.multi_face_landmarks:
+                points = []
+                for lm in face_landmarks.landmark:
+                    x, y = int(lm.x * width), int(lm.y * height)
+                    points.append([x, y])
+                points = np.array(points, np.int32)
+                if len(points) > 0:
+                    hull = cv2.convexHull(points)
+                    cv2.fillConvexPoly(face_mask, hull, 1)
+    return face_mask
+def expand_forehead_mask(face_mask, expand_percent=0.2):
+    """Mở rộng mask mặt để bao gồm trán"""
+    ys, xs = np.where(face_mask > 0)
+    if len(ys) == 0:
+        return face_mask
+    min_y, max_y = ys.min(), ys.max()
+    height = max_y - min_y
+    expand = int(height * expand_percent)
+    expanded_min_y = max(min_y - expand, 0)
+    expanded_mask = np.zeros_like(face_mask)
+    src_start = min_y
+    src_end = max_y
+    dst_start = expanded_min_y
+    dst_end = expanded_min_y + (src_end - src_start)
+    if dst_end > face_mask.shape[0]:
+        overlap = dst_end - face_mask.shape[0]
+        dst_end = face_mask.shape[0]
+        src_end -= overlap
+    expanded_mask[dst_start:dst_end, :] = face_mask[src_start:src_end, :]
+    return expanded_mask
+def extract_hair_face_mask(image):
+    """Tách mask tóc + mặt + trán từ ảnh"""
+    image = image.convert("RGB")
+    # SegFormer hair mask
+    inputs = processor(images=image, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits.cpu()
+    upsampled_logits = F.interpolate(
+        logits,
+        size=image.size[::-1],
+        mode="bilinear",
+        align_corners=False,
+    )
+    pred_seg = upsampled_logits.argmax(dim=1)[0].numpy()
+    hair_mask = (pred_seg == 2).astype(np.uint8)
+    # Face mesh mask
+    face_mesh_mask = get_facemesh_mask(image)
+    expanded_face_mask = expand_forehead_mask(face_mesh_mask, expand_percent=0.2)
+    # Combine masks
+    expanded_only_forehead = cv2.bitwise_and(expanded_face_mask, 1 - face_mesh_mask)
+    expanded_only_forehead = cv2.bitwise_and(expanded_only_forehead, 1 - hair_mask)
+    combined_mask = ((hair_mask + face_mesh_mask + expanded_only_forehead) > 0).astype(np.uint8)
+    # Smooth mask
+    combined_mask = cv2.GaussianBlur(combined_mask.astype(np.float32), (3, 3), 0)
+    combined_mask = (combined_mask > 0.5).astype(np.uint8)
+    return combined_mask, face_mesh_mask
+def get_face_bbox(face_mask):
+    """Lấy bounding box của khuôn mặt"""
+    ys, xs = np.where(face_mask > 0)
+    if len(ys) == 0:
+        return None
+    return xs.min(), ys.min(), xs.max(), ys.max()
+def detect_head_region(image):
+    """Phát hiện vùng đầu (tóc + mặt + trán)"""
+    combined_mask, _ = extract_hair_face_mask(image)
+    # Tìm vùng có người
+    ys, xs = np.where(combined_mask > 0)
+    if len(ys) == 0:
+        return None
+    # Lấy phần tóc + mặt + trán
+    head_mask = combined_mask.copy()
+    # Làm mượt mask
+    head_mask = cv2.GaussianBlur(head_mask.astype(np.float32), (5, 5), 0)
+    head_mask = (head_mask > 0.5).astype(np.uint8)
+    return head_mask
+def head_replacement_swap(source_head_image, target_body_image):
+    """Thay thế đầu trong ảnh target bằng đầu từ ảnh source"""
+    if source_head_image is None or target_body_image is None:
+        return None, "Vui lòng upload cả 2 ảnh"
+    try:
+        # Resize source head image to match target body image
+        source_resized = source_head_image.resize(target_body_image.size)
+        # Detect head regions
+        source_head_mask = detect_head_region(source_resized)
+        if source_head_mask is None:
+            return None, "Không phát hiện được đầu trong ảnh source"
+        target_head_mask = detect_head_region(target_body_image)
+        if target_head_mask is None:
+            return None, "Không phát hiện được đầu trong ảnh target"
+        # Convert to arrays
+        source_np = np.array(source_resized)
+        target_np = np.array(target_body_image)
+        # Find bounding boxes
+        source_ys, source_xs = np.where(source_head_mask > 0)
+        target_ys, target_xs = np.where(target_head_mask > 0)
+        if len(source_ys) == 0 or len(target_ys) == 0:
+            return None, "Không thể xác định vùng đầu"
+        # Calculate bounding boxes
+        source_bbox = (source_xs.min(), source_ys.min(), source_xs.max(), source_ys.max())
+        target_bbox = (target_xs.min(), target_ys.min(), target_xs.max(), target_ys.max())
+        # Extract source head
+        sx1, sy1, sx2, sy2 = source_bbox
+        source_head_crop = source_np[sy1:sy2, sx1:sx2]
+        source_mask_crop = source_head_mask[sy1:sy2, sx1:sx2]
+        # Target head area
+        tx1, ty1, tx2, ty2 = target_bbox
+        target_width = tx2 - tx1
+        target_height = ty2 - ty1
+        # Resize source head to match target head size
+        source_head_resized = cv2.resize(source_head_crop, (target_width, target_height))
+        source_mask_resized = cv2.resize(source_mask_crop.astype(np.float32), (target_width, target_height))
+        source_mask_resized = (source_mask_resized > 0.5).astype(np.uint8)
+        # Create result starting with target body
+        result = target_np.copy()
+        # Create blending mask
+        mask_3d = np.stack([source_mask_resized] * 3, axis=2).astype(np.float32)
+        # Smooth the mask edges for seamless blending
+        smooth_mask = cv2.GaussianBlur(mask_3d, (7, 7), 0)
+        smooth_mask = np.clip(smooth_mask, 0, 1)
+        # Replace head region with smooth blending
+        result[ty1:ty2, tx1:tx2] = (
+            source_head_resized * smooth_mask +
+            result[ty1:ty2, tx1:tx2] * (1 - smooth_mask)
+        ).astype(np.uint8)
+        return Image.fromarray(result), "Thành công! Thay thế đầu hoàn tất."
+    except Exception as e:
+        return None, f"Lỗi: {str(e)}"
+def preview_head_extraction(image):
+    """Preview vùng đầu sẽ được tách"""
+    if image is None:
+        return None
+    try:
+        head_mask = detect_head_region(image)
+        if head_mask is None:
+            return None
+        np_image = np.array(image)
+        alpha = (head_mask * 255).astype(np.uint8)
+        rgba_image = np.dstack([np_image, alpha])
+        return Image.fromarray(rgba_image)
+    except Exception as e:
+        return None
+def simple_face_swap(source_image, target_face_image, face_mask):
+    """Face swap đơn giản bằng cách resize và blend"""
+    # Get face bbox
+    bbox = get_face_bbox(face_mask)
+    if bbox is None:
+        return source_image
+    x1, y1, x2, y2 = bbox
+    face_width = x2 - x1
+    face_height = y2 - y1
+    # Resize target face to match source face size
+    target_resized = target_face_image.resize((face_width, face_height))
+    target_np = np.array(target_resized)
+    source_np = np.array(source_image)
+    # Create result image
+    result = source_np.copy()
+    # Extract face region mask
+    face_region_mask = face_mask[y1:y2, x1:x2]
+    # Blend faces
+    for c in range(3):
+        result[y1:y2, x1:x2, c] = (
+            target_np[:, :, c] * face_region_mask +
+            source_np[y1:y2, x1:x2, c] * (1 - face_region_mask)
+        )
+    return Image.fromarray(result.astype(np.uint8))
+def blend_with_original(original_image, swapped_hair_face, combined_mask):
+    """Ghép kết quả face swap vào ảnh gốc"""
+    original_np = np.array(original_image)
+    swapped_np = np.array(swapped_hair_face)
+    # Resize swapped image to match original if needed
+    if original_np.shape[:2] != swapped_np.shape[:2]:
+        swapped_hair_face = swapped_hair_face.resize(original_image.size)
+        swapped_np = np.array(swapped_hair_face)
+        combined_mask = cv2.resize(combined_mask, original_image.size)
+    # Blend
+    result = original_np.copy()
+    mask_3d = np.stack([combined_mask] * 3, axis=2)
+    result = swapped_np * mask_3d + original_np * (1 - mask_3d)
+    return Image.fromarray(result.astype(np.uint8))
+def face_swap_workflow(original_image, target_face_image):
+    """Workflow hoàn chỉnh: extract -> face swap -> composite"""
+    if original_image is None or target_face_image is None:
+        return None, "Vui lòng upload cả 2 ảnh"
+    try:
+        # Step 1: Extract hair + face + forehead mask
+        combined_mask, face_only_mask = extract_hair_face_mask(original_image)
+        # Step 2: Perform face swap
+        swapped_image = simple_face_swap(original_image, target_face_image, face_only_mask)
+        # Step 3: Composite back to original
+        final_result = blend_with_original(original_image, swapped_image, combined_mask)
+        return final_result, "Thành công! Face swap hoàn tất."
+    except Exception as e:
+        return None, f"Lỗi: {str(e)}"
+def extract_only(image):
+    """Chỉ tách tóc + mặt + trán (demo)"""
+    if image is None:
+        return None
+    try:
+        combined_mask, _ = extract_hair_face_mask(image)
+        np_image = np.array(image)
+        alpha = (combined_mask * 255).astype(np.uint8)
+        rgba_image = np.dstack([np_image, alpha])
+        return Image.fromarray(rgba_image)
+    except Exception as e:
+        return None
+# Tạo Gradio interface
+with gr.Blocks(title="Face Swap + Head Replacement") as demo:
+    gr.Markdown("""
+    # 🔄 Face Swap + Head Replacement Complete
+    **3 Chức năng chính:**
+    1. 🎭 **Face Swap**: Chỉ thay đổi khuôn mặt, giữ nguyên tóc và background
+    2. 🔄 **Head Replacement**: Thay thế hoàn toàn đầu (tóc + mặt) trong ảnh target
+    3. ✂️ **Extract Demo**: Tách tóc + mặt + trán thành PNG
+    """)
+    with gr.Tab("🎭 Face Swap Workflow"):
+        gr.Markdown("### Chỉ thay đổi khuôn mặt, giữ nguyên tóc và background")
+        with gr.Row():
+            with gr.Column():
+                original_img = gr.Image(type="pil", label="📸 Ảnh Gốc")
+                target_face_img = gr.Image(type="pil", label="👤 Khuôn Mặt Muốn Swap")
+                swap_btn = gr.Button("🔄 Thực hiện Face Swap", variant="primary")
+            with gr.Column():
+                result_img = gr.Image(type="pil", label="✨ Kết Quả Face Swap")
+                status_text = gr.Textbox(label="📋 Trạng thái", interactive=False)
+        swap_btn.click(
+            fn=face_swap_workflow,
+            inputs=[original_img, target_face_img],
+            outputs=[result_img, status_text]
+        )
+    with gr.Tab("🔄 Head Replacement"):
+        gr.Markdown("### Thay thế hoàn toàn đầu (tóc + mặt) trong ảnh target")
+        with gr.Row():
+            with gr.Column():
+                source_head_img = gr.Image(type="pil", label="🗣️ Ảnh Có Đầu Mới (Source)")
+                target_body_img = gr.Image(type="pil", label="🎯 Ảnh Cần Thay Đầu (Target)")
+                replace_btn = gr.Button("🔄 Thay Thế Đầu", variant="secondary")
+            with gr.Column():
+                replace_result = gr.Image(type="pil", label="✨ Kết Quả Head Replacement")
+                replace_status = gr.Textbox(label="📋 Trạng thái", interactive=False)
+        # Preview head extraction
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### 👁️ Preview đầu source:")
+                source_preview = gr.Image(type="pil", label="🔍 Preview Đầu Source", interactive=False)
+            with gr.Column():
+                gr.Markdown("### 👁️ Preview đầu target (sẽ bị thay):")
+                target_preview = gr.Image(type="pil", label="🔍 Preview Đầu Target", interactive=False)
+        source_head_img.change(
+            fn=preview_head_extraction,
+            inputs=source_head_img,
+            outputs=source_preview
+        )
+        target_body_img.change(
+            fn=preview_head_extraction,
+            inputs=target_body_img,
+            outputs=target_preview
+        )
+        replace_btn.click(
+            fn=head_replacement_swap,
+            inputs=[source_head_img, target_body_img],
+            outputs=[replace_result, replace_status]
+        )
+    with gr.Tab("✂️ Demo Tách Tóc + Mặt + Trán"):
+        with gr.Row():
+            demo_input = gr.Image(type="pil", label="📸 Ảnh Input")
+            demo_output = gr.Image(type="pil", label="✂️ Tóc + Mặt + Trán (PNG)")
+        demo_input.change(
+            fn=extract_only,
+            inputs=demo_input,
+            outputs=demo_output
+        )
+    gr.Markdown("""
+    ## 📝 Hướng dẫn sử dụng:
+    ### 🎭 Tab "Face Swap Workflow":
+    - **Mục đích**: Chỉ thay đổi khuôn mặt, giữ nguyên tóc và background
+    - **Cách dùng**: Upload ảnh gốc + ảnh khuôn mặt target → Click Face Swap
+    ### 🔄 Tab "Head Replacement":
+    - **Mục đích**: Thay thế hoàn toàn đầu (tóc + mặt) trong ảnh target
+    - **Cách dùng**: Upload ảnh có đầu mới + ảnh cần thay đầu → Click Thay Thế Đầu
+    - **Preview**: Xem trước cả 2 vùng đầu (source và target)
+    - **Kết quả**: Đầu từ source sẽ thay thế hoàn toàn đầu trong target
+    ### ✂️ Tab "Demo Tách Tóc + Mặt + Trán":
+    - **Mục đích**: Demo chức năng tách thành file PNG với background trong suốt
+    - **Tự động**: Upload ảnh sẽ tự động xử lý
+    ## ✨ Tính năng:
+    - 🎯 **Tách chính xác**: Tóc, mặt và trán với AI
+    - 🔄 **Face swap tự nhiên**: Chỉ thay mặt, giữ tóc
+    - 🔄 **Head replacement**: Thay thế hoàn toàn đầu
+    - 🎨 **Smart scaling**: Tự động điều chỉnh kích thước
+    - 📐 **Auto positioning**: Tự động căn chỉnh vị trí
+    - 👁️ **Dual preview**: Xem trước cả source và target
+    """)
+if __name__ == "__main__":
+    demo.launch(share=True)