Files changed (1) hide show
  1. app.py +397 -89
app.py CHANGED
@@ -1,91 +1,399 @@
1
- # -*- coding:UTF-8 -*-
2
- # !/usr/bin/env python
3
- import spaces
4
- import numpy as np
5
  import gradio as gr
6
- import gradio.exceptions
7
- import roop.globals
8
- from roop.core import (
9
- start,
10
- decode_execution_providers,
11
- )
12
- from roop.processors.frame.core import get_frame_processors_modules
13
- from roop.utilities import normalize_output_path
14
- import os
15
- import random
16
- from PIL import Image
17
- import onnxruntime as ort
18
  import cv2
19
- from roop.face_analyser import get_one_face
20
-
21
- @spaces.GPU
22
- def swap_face(source_file, target_file, doFaceEnhancer):
23
- session_dir = "temp" # Sử dụng thư mục cố định
24
- os.makedirs(session_dir, exist_ok=True)
25
-
26
- # Tạo tên file ngẫu nhiên
27
- source_filename = f"source_{random.randint(1000, 9999)}.jpg"
28
- target_filename = f"target_{random.randint(1000, 9999)}.jpg"
29
- output_filename = f"output_{random.randint(1000, 9999)}.jpg"
30
-
31
- source_path = os.path.join(session_dir, source_filename)
32
- target_path = os.path.join(session_dir, target_filename)
33
-
34
- source_image = Image.fromarray(source_file)
35
- source_image.save(source_path)
36
- target_image = Image.fromarray(target_file)
37
- target_image.save(target_path)
38
-
39
- print("source_path: ", source_path)
40
- print("target_path: ", target_path)
41
-
42
- # Check if a face is detected in the source image
43
- source_face = get_one_face(cv2.imread(source_path))
44
- if source_face is None:
45
- raise gradio.exceptions.Error("No face in source path detected.")
46
-
47
- # Check if a face is detected in the target image
48
- target_face = get_one_face(cv2.imread(target_path))
49
- if target_face is None:
50
- raise gradio.exceptions.Error("No face in target path detected.")
51
-
52
- output_path = os.path.join(session_dir, output_filename)
53
- normalized_output_path = normalize_output_path(source_path, target_path, output_path)
54
-
55
- frame_processors = ["face_swapper", "face_enhancer"] if doFaceEnhancer else ["face_swapper"]
56
-
57
- for frame_processor in get_frame_processors_modules(frame_processors):
58
- if not frame_processor.pre_check():
59
- print(f"Pre-check failed for {frame_processor}")
60
- raise gradio.exceptions.Error(f"Pre-check failed for {frame_processor}")
61
-
62
- roop.globals.source_path = source_path
63
- roop.globals.target_path = target_path
64
- roop.globals.output_path = normalized_output_path
65
- roop.globals.frame_processors = frame_processors
66
- roop.globals.headless = True
67
- roop.globals.keep_fps = True
68
- roop.globals.keep_audio = True
69
- roop.globals.keep_frames = False
70
- roop.globals.many_faces = False
71
- roop.globals.video_encoder = "libx264"
72
- roop.globals.video_quality = 18
73
- roop.globals.execution_providers = decode_execution_providers(['cpu'])
74
- roop.globals.reference_face_position = 0
75
- roop.globals.similar_face_distance = 0.6
76
- roop.globals.max_memory = 60
77
- roop.globals.execution_threads = 8
78
-
79
- start()
80
- return normalized_output_path
81
-
82
- app = gr.Interface(
83
- fn=swap_face,
84
- inputs=[
85
- gr.Image(),
86
- gr.Image(),
87
- gr.Checkbox(label="Face Enhancer?", info="Do face enhancement?")
88
- ],
89
- outputs="image"
90
- )
91
- app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation
3
+ from PIL import Image, ImageDraw
4
+ import torch
5
+ import torch.nn.functional as F
6
+ import numpy as np
7
+ import mediapipe as mp
 
 
 
 
 
 
8
  import cv2
9
+ import io
10
+ import base64
11
+
12
+ # Load model
13
+ processor = SegformerImageProcessor.from_pretrained("VanNguyen1214/get_face_and_hair")
14
+ model = AutoModelForSemanticSegmentation.from_pretrained("VanNguyen1214/get_face_and_hair")
15
+
16
+ def get_facemesh_mask(image):
17
+ """Tạo mask khuôn mặt bằng MediaPipe"""
18
+ image_np = np.array(image)
19
+ height, width, _ = image_np.shape
20
+ face_mask = np.zeros((height, width), dtype=np.uint8)
21
+ mp_face_mesh = mp.solutions.face_mesh
22
+
23
+ with mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True, min_detection_confidence=0.5) as face_mesh:
24
+ results = face_mesh.process(image_np)
25
+ if results.multi_face_landmarks:
26
+ for face_landmarks in results.multi_face_landmarks:
27
+ points = []
28
+ for lm in face_landmarks.landmark:
29
+ x, y = int(lm.x * width), int(lm.y * height)
30
+ points.append([x, y])
31
+ points = np.array(points, np.int32)
32
+ if len(points) > 0:
33
+ hull = cv2.convexHull(points)
34
+ cv2.fillConvexPoly(face_mask, hull, 1)
35
+ return face_mask
36
+
37
+ def expand_forehead_mask(face_mask, expand_percent=0.2):
38
+ """Mở rộng mask mặt để bao gồm trán"""
39
+ ys, xs = np.where(face_mask > 0)
40
+ if len(ys) == 0:
41
+ return face_mask
42
+
43
+ min_y, max_y = ys.min(), ys.max()
44
+ height = max_y - min_y
45
+ expand = int(height * expand_percent)
46
+ expanded_min_y = max(min_y - expand, 0)
47
+ expanded_mask = np.zeros_like(face_mask)
48
+
49
+ src_start = min_y
50
+ src_end = max_y
51
+ dst_start = expanded_min_y
52
+ dst_end = expanded_min_y + (src_end - src_start)
53
+ if dst_end > face_mask.shape[0]:
54
+ overlap = dst_end - face_mask.shape[0]
55
+ dst_end = face_mask.shape[0]
56
+ src_end -= overlap
57
+
58
+ expanded_mask[dst_start:dst_end, :] = face_mask[src_start:src_end, :]
59
+ return expanded_mask
60
+
61
+ def extract_hair_face_mask(image):
62
+ """Tách mask tóc + mặt + trán từ ảnh"""
63
+ image = image.convert("RGB")
64
+
65
+ # SegFormer hair mask
66
+ inputs = processor(images=image, return_tensors="pt")
67
+ with torch.no_grad():
68
+ outputs = model(**inputs)
69
+ logits = outputs.logits.cpu()
70
+
71
+ upsampled_logits = F.interpolate(
72
+ logits,
73
+ size=image.size[::-1],
74
+ mode="bilinear",
75
+ align_corners=False,
76
+ )
77
+ pred_seg = upsampled_logits.argmax(dim=1)[0].numpy()
78
+ hair_mask = (pred_seg == 2).astype(np.uint8)
79
+
80
+ # Face mesh mask
81
+ face_mesh_mask = get_facemesh_mask(image)
82
+ expanded_face_mask = expand_forehead_mask(face_mesh_mask, expand_percent=0.2)
83
+
84
+ # Combine masks
85
+ expanded_only_forehead = cv2.bitwise_and(expanded_face_mask, 1 - face_mesh_mask)
86
+ expanded_only_forehead = cv2.bitwise_and(expanded_only_forehead, 1 - hair_mask)
87
+ combined_mask = ((hair_mask + face_mesh_mask + expanded_only_forehead) > 0).astype(np.uint8)
88
+
89
+ # Smooth mask
90
+ combined_mask = cv2.GaussianBlur(combined_mask.astype(np.float32), (3, 3), 0)
91
+ combined_mask = (combined_mask > 0.5).astype(np.uint8)
92
+
93
+ return combined_mask, face_mesh_mask
94
+
95
+ def get_face_bbox(face_mask):
96
+ """Lấy bounding box của khuôn mặt"""
97
+ ys, xs = np.where(face_mask > 0)
98
+ if len(ys) == 0:
99
+ return None
100
+ return xs.min(), ys.min(), xs.max(), ys.max()
101
+
102
+ def detect_head_region(image):
103
+ """Phát hiện vùng đầu (tóc + mặt + trán)"""
104
+ combined_mask, _ = extract_hair_face_mask(image)
105
+
106
+ # Tìm vùng có người
107
+ ys, xs = np.where(combined_mask > 0)
108
+ if len(ys) == 0:
109
+ return None
110
+
111
+ # Lấy phần tóc + mặt + trán
112
+ head_mask = combined_mask.copy()
113
+
114
+ # Làm mượt mask
115
+ head_mask = cv2.GaussianBlur(head_mask.astype(np.float32), (5, 5), 0)
116
+ head_mask = (head_mask > 0.5).astype(np.uint8)
117
+
118
+ return head_mask
119
+
120
+ def head_replacement_swap(source_head_image, target_body_image):
121
+ """Thay thế đầu trong ảnh target bằng đầu từ ảnh source"""
122
+ if source_head_image is None or target_body_image is None:
123
+ return None, "Vui lòng upload cả 2 ảnh"
124
+
125
+ try:
126
+ # Resize source head image to match target body image
127
+ source_resized = source_head_image.resize(target_body_image.size)
128
+
129
+ # Detect head regions
130
+ source_head_mask = detect_head_region(source_resized)
131
+ if source_head_mask is None:
132
+ return None, "Không phát hiện được đầu trong ảnh source"
133
+
134
+ target_head_mask = detect_head_region(target_body_image)
135
+ if target_head_mask is None:
136
+ return None, "Không phát hiện được đầu trong ảnh target"
137
+
138
+ # Convert to arrays
139
+ source_np = np.array(source_resized)
140
+ target_np = np.array(target_body_image)
141
+
142
+ # Find bounding boxes
143
+ source_ys, source_xs = np.where(source_head_mask > 0)
144
+ target_ys, target_xs = np.where(target_head_mask > 0)
145
+
146
+ if len(source_ys) == 0 or len(target_ys) == 0:
147
+ return None, "Không thể xác định vùng đầu"
148
+
149
+ # Calculate bounding boxes
150
+ source_bbox = (source_xs.min(), source_ys.min(), source_xs.max(), source_ys.max())
151
+ target_bbox = (target_xs.min(), target_ys.min(), target_xs.max(), target_ys.max())
152
+
153
+ # Extract source head
154
+ sx1, sy1, sx2, sy2 = source_bbox
155
+ source_head_crop = source_np[sy1:sy2, sx1:sx2]
156
+ source_mask_crop = source_head_mask[sy1:sy2, sx1:sx2]
157
+
158
+ # Target head area
159
+ tx1, ty1, tx2, ty2 = target_bbox
160
+ target_width = tx2 - tx1
161
+ target_height = ty2 - ty1
162
+
163
+ # Resize source head to match target head size
164
+ source_head_resized = cv2.resize(source_head_crop, (target_width, target_height))
165
+ source_mask_resized = cv2.resize(source_mask_crop.astype(np.float32), (target_width, target_height))
166
+ source_mask_resized = (source_mask_resized > 0.5).astype(np.uint8)
167
+
168
+ # Create result starting with target body
169
+ result = target_np.copy()
170
+
171
+ # Create blending mask
172
+ mask_3d = np.stack([source_mask_resized] * 3, axis=2).astype(np.float32)
173
+
174
+ # Smooth the mask edges for seamless blending
175
+ smooth_mask = cv2.GaussianBlur(mask_3d, (7, 7), 0)
176
+ smooth_mask = np.clip(smooth_mask, 0, 1)
177
+
178
+ # Replace head region with smooth blending
179
+ result[ty1:ty2, tx1:tx2] = (
180
+ source_head_resized * smooth_mask +
181
+ result[ty1:ty2, tx1:tx2] * (1 - smooth_mask)
182
+ ).astype(np.uint8)
183
+
184
+ return Image.fromarray(result), "Thành công! Thay thế đầu hoàn tất."
185
+
186
+ except Exception as e:
187
+ return None, f"Lỗi: {str(e)}"
188
+
189
+ def preview_head_extraction(image):
190
+ """Preview vùng đầu sẽ được tách"""
191
+ if image is None:
192
+ return None
193
+
194
+ try:
195
+ head_mask = detect_head_region(image)
196
+ if head_mask is None:
197
+ return None
198
+
199
+ np_image = np.array(image)
200
+ alpha = (head_mask * 255).astype(np.uint8)
201
+ rgba_image = np.dstack([np_image, alpha])
202
+
203
+ return Image.fromarray(rgba_image)
204
+ except Exception as e:
205
+ return None
206
+
207
+ def simple_face_swap(source_image, target_face_image, face_mask):
208
+ """Face swap đơn giản bằng cách resize và blend"""
209
+ # Get face bbox
210
+ bbox = get_face_bbox(face_mask)
211
+ if bbox is None:
212
+ return source_image
213
+
214
+ x1, y1, x2, y2 = bbox
215
+ face_width = x2 - x1
216
+ face_height = y2 - y1
217
+
218
+ # Resize target face to match source face size
219
+ target_resized = target_face_image.resize((face_width, face_height))
220
+ target_np = np.array(target_resized)
221
+ source_np = np.array(source_image)
222
+
223
+ # Create result image
224
+ result = source_np.copy()
225
+
226
+ # Extract face region mask
227
+ face_region_mask = face_mask[y1:y2, x1:x2]
228
+
229
+ # Blend faces
230
+ for c in range(3):
231
+ result[y1:y2, x1:x2, c] = (
232
+ target_np[:, :, c] * face_region_mask +
233
+ source_np[y1:y2, x1:x2, c] * (1 - face_region_mask)
234
+ )
235
+
236
+ return Image.fromarray(result.astype(np.uint8))
237
+
238
+ def blend_with_original(original_image, swapped_hair_face, combined_mask):
239
+ """Ghép kết quả face swap vào ảnh gốc"""
240
+ original_np = np.array(original_image)
241
+ swapped_np = np.array(swapped_hair_face)
242
+
243
+ # Resize swapped image to match original if needed
244
+ if original_np.shape[:2] != swapped_np.shape[:2]:
245
+ swapped_hair_face = swapped_hair_face.resize(original_image.size)
246
+ swapped_np = np.array(swapped_hair_face)
247
+ combined_mask = cv2.resize(combined_mask, original_image.size)
248
+
249
+ # Blend
250
+ result = original_np.copy()
251
+ mask_3d = np.stack([combined_mask] * 3, axis=2)
252
+
253
+ result = swapped_np * mask_3d + original_np * (1 - mask_3d)
254
+
255
+ return Image.fromarray(result.astype(np.uint8))
256
+
257
+ def face_swap_workflow(original_image, target_face_image):
258
+ """Workflow hoàn chỉnh: extract -> face swap -> composite"""
259
+ if original_image is None or target_face_image is None:
260
+ return None, "Vui lòng upload cả 2 ảnh"
261
+
262
+ try:
263
+ # Step 1: Extract hair + face + forehead mask
264
+ combined_mask, face_only_mask = extract_hair_face_mask(original_image)
265
+
266
+ # Step 2: Perform face swap
267
+ swapped_image = simple_face_swap(original_image, target_face_image, face_only_mask)
268
+
269
+ # Step 3: Composite back to original
270
+ final_result = blend_with_original(original_image, swapped_image, combined_mask)
271
+
272
+ return final_result, "Thành công! Face swap hoàn tất."
273
+
274
+ except Exception as e:
275
+ return None, f"Lỗi: {str(e)}"
276
+
277
+ def extract_only(image):
278
+ """Chỉ tách tóc + mặt + trán (demo)"""
279
+ if image is None:
280
+ return None
281
+
282
+ try:
283
+ combined_mask, _ = extract_hair_face_mask(image)
284
+
285
+ np_image = np.array(image)
286
+ alpha = (combined_mask * 255).astype(np.uint8)
287
+ rgba_image = np.dstack([np_image, alpha])
288
+
289
+ return Image.fromarray(rgba_image)
290
+ except Exception as e:
291
+ return None
292
+
293
+ # Tạo Gradio interface
294
+ with gr.Blocks(title="Face Swap + Head Replacement") as demo:
295
+ gr.Markdown("""
296
+ # 🔄 Face Swap + Head Replacement Complete
297
+
298
+ **3 Chức năng chính:**
299
+ 1. 🎭 **Face Swap**: Chỉ thay đổi khuôn mặt, giữ nguyên tóc và background
300
+ 2. 🔄 **Head Replacement**: Thay thế hoàn toàn đầu (tóc + mặt) trong ảnh target
301
+ 3. ✂️ **Extract Demo**: Tách tóc + mặt + trán thành PNG
302
+ """)
303
+
304
+ with gr.Tab("🎭 Face Swap Workflow"):
305
+ gr.Markdown("### Chỉ thay đổi khuôn mặt, giữ nguyên tóc và background")
306
+ with gr.Row():
307
+ with gr.Column():
308
+ original_img = gr.Image(type="pil", label="📸 Ảnh Gốc")
309
+ target_face_img = gr.Image(type="pil", label="👤 Khuôn Mặt Muốn Swap")
310
+ swap_btn = gr.Button("🔄 Thực hiện Face Swap", variant="primary")
311
+
312
+ with gr.Column():
313
+ result_img = gr.Image(type="pil", label="✨ Kết Quả Face Swap")
314
+ status_text = gr.Textbox(label="📋 Trạng thái", interactive=False)
315
+
316
+ swap_btn.click(
317
+ fn=face_swap_workflow,
318
+ inputs=[original_img, target_face_img],
319
+ outputs=[result_img, status_text]
320
+ )
321
+
322
+ with gr.Tab("🔄 Head Replacement"):
323
+ gr.Markdown("### Thay thế hoàn toàn đầu (tóc + mặt) trong ảnh target")
324
+ with gr.Row():
325
+ with gr.Column():
326
+ source_head_img = gr.Image(type="pil", label="🗣️ Ảnh Có Đầu Mới (Source)")
327
+ target_body_img = gr.Image(type="pil", label="🎯 Ảnh Cần Thay Đầu (Target)")
328
+ replace_btn = gr.Button("🔄 Thay Thế Đầu", variant="secondary")
329
+
330
+ with gr.Column():
331
+ replace_result = gr.Image(type="pil", label="✨ Kết Quả Head Replacement")
332
+ replace_status = gr.Textbox(label="📋 Trạng thái", interactive=False)
333
+
334
+ # Preview head extraction
335
+ with gr.Row():
336
+ with gr.Column():
337
+ gr.Markdown("### 👁️ Preview đầu source:")
338
+ source_preview = gr.Image(type="pil", label="🔍 Preview Đầu Source", interactive=False)
339
+ with gr.Column():
340
+ gr.Markdown("### 👁️ Preview đầu target (sẽ bị thay):")
341
+ target_preview = gr.Image(type="pil", label="🔍 Preview Đầu Target", interactive=False)
342
+
343
+ source_head_img.change(
344
+ fn=preview_head_extraction,
345
+ inputs=source_head_img,
346
+ outputs=source_preview
347
+ )
348
+
349
+ target_body_img.change(
350
+ fn=preview_head_extraction,
351
+ inputs=target_body_img,
352
+ outputs=target_preview
353
+ )
354
+
355
+ replace_btn.click(
356
+ fn=head_replacement_swap,
357
+ inputs=[source_head_img, target_body_img],
358
+ outputs=[replace_result, replace_status]
359
+ )
360
+
361
+ with gr.Tab("✂️ Demo Tách Tóc + Mặt + Trán"):
362
+ with gr.Row():
363
+ demo_input = gr.Image(type="pil", label="📸 Ảnh Input")
364
+ demo_output = gr.Image(type="pil", label="✂️ Tóc + Mặt + Trán (PNG)")
365
+
366
+ demo_input.change(
367
+ fn=extract_only,
368
+ inputs=demo_input,
369
+ outputs=demo_output
370
+ )
371
+
372
+ gr.Markdown("""
373
+ ## 📝 Hướng dẫn sử dụng:
374
+
375
+ ### 🎭 Tab "Face Swap Workflow":
376
+ - **Mục đích**: Chỉ thay đổi khuôn mặt, giữ nguyên tóc và background
377
+ - **Cách dùng**: Upload ảnh gốc + ảnh khuôn mặt target → Click Face Swap
378
+
379
+ ### 🔄 Tab "Head Replacement":
380
+ - **Mục đích**: Thay thế hoàn toàn đầu (tóc + mặt) trong ảnh target
381
+ - **Cách dùng**: Upload ảnh có đầu mới + ảnh cần thay đầu → Click Thay Thế Đầu
382
+ - **Preview**: Xem trước cả 2 vùng đầu (source và target)
383
+ - **Kết quả**: Đầu từ source sẽ thay thế hoàn toàn đầu trong target
384
+
385
+ ### ✂️ Tab "Demo Tách Tóc + Mặt + Trán":
386
+ - **Mục đích**: Demo chức năng tách thành file PNG với background trong suốt
387
+ - **Tự động**: Upload ảnh sẽ tự động xử lý
388
+
389
+ ## ✨ Tính năng:
390
+ - 🎯 **Tách chính xác**: Tóc, mặt và trán với AI
391
+ - 🔄 **Face swap tự nhiên**: Chỉ thay mặt, giữ tóc
392
+ - 🔄 **Head replacement**: Thay thế hoàn toàn đầu
393
+ - 🎨 **Smart scaling**: Tự động điều chỉnh kích thước
394
+ - 📐 **Auto positioning**: Tự động căn chỉnh vị trí
395
+ - 👁️ **Dual preview**: Xem trước cả source và target
396
+ """)
397
+
398
+ if __name__ == "__main__":
399
+ demo.launch(share=True)