Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,44 +1,151 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import
|
| 3 |
-
|
| 4 |
-
import
|
| 5 |
-
import
|
| 6 |
-
import tempfile
|
| 7 |
-
import time
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
"damo-vilab/text-to-video-ms-1.7b",
|
| 16 |
-
torch_dtype=dtype,
|
| 17 |
-
).to(device)
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
video_path = os.path.join(temp_dir, "genv_output.mp4")
|
| 26 |
-
imageio.mimsave(video_path, frames, fps=fps)
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
|
|
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
gr.Markdown("Generate AI-powered videos from text prompts using open-source models!")
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import cv2
|
| 3 |
+
import numpy as np
|
| 4 |
+
import dlib
|
| 5 |
+
from PIL import Image
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
# Load dlib models once
|
| 8 |
+
detector = dlib.get_frontal_face_detector()
|
| 9 |
+
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
|
| 10 |
|
| 11 |
+
def extract_index_nparray(nparray):
|
| 12 |
+
return nparray[0][0] if nparray.size > 0 else None
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
def process_frame(img1_pil, img2_pil):
|
| 15 |
+
# Preprocess images (resize + orientation fix)
|
| 16 |
+
def preprocess(img_pil):
|
| 17 |
+
if img_pil.width > img_pil.height:
|
| 18 |
+
img_pil = img_pil.transpose(Image.ROTATE_270)
|
| 19 |
+
return img_pil.resize((300, 300))
|
| 20 |
|
| 21 |
+
img1 = np.array(preprocess(img1_pil))
|
| 22 |
+
img2 = np.array(preprocess(img2_pil))
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
img1_gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
|
| 25 |
+
img2_gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
|
| 26 |
+
mask = np.zeros_like(img1_gray)
|
| 27 |
|
| 28 |
+
height, width, channels = img2.shape
|
| 29 |
+
img2_new_face = np.zeros((height, width, channels), np.uint8)
|
|
|
|
| 30 |
|
| 31 |
+
# Detect faces and landmarks in img1
|
| 32 |
+
faces1 = detector(img1_gray)
|
| 33 |
+
if len(faces1) == 0:
|
| 34 |
+
return Image.fromarray(img2) # No face found fallback
|
| 35 |
|
| 36 |
+
landmarks_points = []
|
| 37 |
+
for face in faces1:
|
| 38 |
+
landmarks = predictor(img1_gray, face)
|
| 39 |
+
landmarks_points = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(68)]
|
| 40 |
|
| 41 |
+
points = np.array(landmarks_points, np.int32)
|
| 42 |
+
convexhull = cv2.convexHull(points)
|
| 43 |
+
cv2.fillConvexPoly(mask, convexhull, 255)
|
| 44 |
+
face_image_1 = cv2.bitwise_and(img1, img1, mask=mask)
|
| 45 |
|
| 46 |
+
# Delaunay triangulation on face1
|
| 47 |
+
rect = cv2.boundingRect(convexhull)
|
| 48 |
+
subdiv = cv2.Subdiv2D(rect)
|
| 49 |
+
subdiv.insert(landmarks_points)
|
| 50 |
+
triangles = subdiv.getTriangleList()
|
| 51 |
+
triangles = np.array(triangles, dtype=np.int32)
|
| 52 |
+
|
| 53 |
+
indexes_triangles = []
|
| 54 |
+
for t in triangles:
|
| 55 |
+
pt1, pt2, pt3 = (t[0], t[1]), (t[2], t[3]), (t[4], t[5])
|
| 56 |
+
index_pt1 = extract_index_nparray(np.where((points == pt1).all(axis=1)))
|
| 57 |
+
index_pt2 = extract_index_nparray(np.where((points == pt2).all(axis=1)))
|
| 58 |
+
index_pt3 = extract_index_nparray(np.where((points == pt3).all(axis=1)))
|
| 59 |
+
if None not in (index_pt1, index_pt2, index_pt3):
|
| 60 |
+
indexes_triangles.append([index_pt1, index_pt2, index_pt3])
|
| 61 |
+
|
| 62 |
+
# Detect faces and landmarks in img2
|
| 63 |
+
faces2 = detector(img2_gray)
|
| 64 |
+
if len(faces2) == 0:
|
| 65 |
+
return Image.fromarray(img2) # No face found fallback
|
| 66 |
+
|
| 67 |
+
landmarks_points2 = []
|
| 68 |
+
for face in faces2:
|
| 69 |
+
landmarks = predictor(img2_gray, face)
|
| 70 |
+
landmarks_points2 = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(68)]
|
| 71 |
+
|
| 72 |
+
points2 = np.array(landmarks_points2, np.int32)
|
| 73 |
+
convexhull2 = cv2.convexHull(points2)
|
| 74 |
+
|
| 75 |
+
# Warp triangles from img1 to img2
|
| 76 |
+
for triangle_index in indexes_triangles:
|
| 77 |
+
tr1_pts = [landmarks_points[i] for i in triangle_index]
|
| 78 |
+
tr2_pts = [landmarks_points2[i] for i in triangle_index]
|
| 79 |
+
|
| 80 |
+
rect1 = cv2.boundingRect(np.array(tr1_pts))
|
| 81 |
+
x, y, w, h = rect1
|
| 82 |
+
cropped_triangle = img1[y:y+h, x:x+w]
|
| 83 |
+
cropped_tr1_mask = np.zeros((h, w), np.uint8)
|
| 84 |
+
points = np.array([[pt[0]-x, pt[1]-y] for pt in tr1_pts], np.int32)
|
| 85 |
+
cv2.fillConvexPoly(cropped_tr1_mask, points, 255)
|
| 86 |
+
|
| 87 |
+
rect2 = cv2.boundingRect(np.array(tr2_pts))
|
| 88 |
+
x2, y2, w2, h2 = rect2
|
| 89 |
+
cropped_tr2_mask = np.zeros((h2, w2), np.uint8)
|
| 90 |
+
points2 = np.array([[pt[0]-x2, pt[1]-y2] for pt in tr2_pts], np.int32)
|
| 91 |
+
cv2.fillConvexPoly(cropped_tr2_mask, points2, 255)
|
| 92 |
+
|
| 93 |
+
M = cv2.getAffineTransform(np.float32(points), np.float32(points2))
|
| 94 |
+
warped_triangle = cv2.warpAffine(cropped_triangle, M, (w2, h2))
|
| 95 |
+
warped_triangle = cv2.bitwise_and(warped_triangle, warped_triangle, mask=cropped_tr2_mask)
|
| 96 |
+
|
| 97 |
+
img2_face_area = img2_new_face[y2:y2+h2, x2:x2+w2]
|
| 98 |
+
img2_face_area_gray = cv2.cvtColor(img2_face_area, cv2.COLOR_BGR2GRAY)
|
| 99 |
+
_, mask_triangles_inv = cv2.threshold(img2_face_area_gray, 1, 255, cv2.THRESH_BINARY_INV)
|
| 100 |
+
warped_triangle = cv2.bitwise_and(warped_triangle, warped_triangle, mask=mask_triangles_inv)
|
| 101 |
+
img2_face_area = cv2.add(img2_face_area, warped_triangle)
|
| 102 |
+
img2_new_face[y2:y2+h2, x2:x2+w2] = img2_face_area
|
| 103 |
+
|
| 104 |
+
img2_face_mask = np.zeros_like(img2_gray)
|
| 105 |
+
img2_head_mask = cv2.fillConvexPoly(img2_face_mask, convexhull2, 255)
|
| 106 |
+
img2_face_mask = cv2.bitwise_not(img2_head_mask)
|
| 107 |
+
img2_head_noface = cv2.bitwise_and(img2, img2, mask=img2_face_mask)
|
| 108 |
+
result = cv2.add(img2_head_noface, img2_new_face)
|
| 109 |
+
|
| 110 |
+
x, y, w, h = cv2.boundingRect(convexhull2)
|
| 111 |
+
center_face2 = (x + w // 2, y + h // 2)
|
| 112 |
+
seamlessclone = cv2.seamlessClone(result, img2, img2_head_mask, center_face2, cv2.NORMAL_CLONE)
|
| 113 |
+
|
| 114 |
+
return Image.fromarray(seamlessclone)
|
| 115 |
+
|
| 116 |
+
def swap_faces(image1, image2):
|
| 117 |
+
# If video uploaded, grab first frame and convert to PIL
|
| 118 |
+
def video_to_pil(video_file):
|
| 119 |
+
cap = cv2.VideoCapture(video_file.name)
|
| 120 |
+
ret, frame = cap.read()
|
| 121 |
+
cap.release()
|
| 122 |
+
if not ret:
|
| 123 |
+
return None
|
| 124 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 125 |
+
return Image.fromarray(frame_rgb)
|
| 126 |
+
|
| 127 |
+
# Convert inputs to PIL Images
|
| 128 |
+
if hasattr(image1, "name") and image1.name.lower().endswith(('.mp4', '.mov', '.avi')):
|
| 129 |
+
img1_pil = video_to_pil(image1)
|
| 130 |
+
else:
|
| 131 |
+
img1_pil = Image.open(image1)
|
| 132 |
+
|
| 133 |
+
if hasattr(image2, "name") and image2.name.lower().endswith(('.mp4', '.mov', '.avi')):
|
| 134 |
+
img2_pil = video_to_pil(image2)
|
| 135 |
+
else:
|
| 136 |
+
img2_pil = Image.open(image2)
|
| 137 |
+
|
| 138 |
+
if img1_pil is None or img2_pil is None:
|
| 139 |
+
return None
|
| 140 |
+
|
| 141 |
+
return process_frame(img1_pil, img2_pil)
|
| 142 |
+
|
| 143 |
+
iface = gr.Interface(
|
| 144 |
+
fn=swap_faces,
|
| 145 |
+
inputs=[gr.inputs.File(label="Upload Image/Video 1"), gr.inputs.File(label="Upload Image/Video 2")],
|
| 146 |
+
outputs=gr.Image(label="Face Swapped Result"),
|
| 147 |
+
title="Face Swap Image/Video (first frame only for video)",
|
| 148 |
+
description="Upload two images or videos; if videos, only first frame will be used for swapping faces."
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
iface.launch()
|