Xenobd commited on
Commit
0317f8c
·
verified ·
1 Parent(s): 021bde3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -33
app.py CHANGED
@@ -1,44 +1,151 @@
1
  import gradio as gr
2
- import torch
3
- from diffusers import DiffusionPipeline
4
- import imageio
5
- import os
6
- import tempfile
7
- import time
8
 
9
- def generate_video(prompt, fps, progress=gr.Progress()):
10
- device = "cpu"
11
- dtype = torch.float32
12
 
13
- progress(0, desc="Loading pipeline...")
14
- pipe = DiffusionPipeline.from_pretrained(
15
- "damo-vilab/text-to-video-ms-1.7b",
16
- torch_dtype=dtype,
17
- ).to(device)
18
 
19
- progress(20, desc="Generating frames...")
20
- result = pipe(prompt)
21
- frames = result.frames
 
 
 
22
 
23
- progress(80, desc="Saving video...")
24
- temp_dir = tempfile.mkdtemp()
25
- video_path = os.path.join(temp_dir, "genv_output.mp4")
26
- imageio.mimsave(video_path, frames, fps=fps)
27
 
28
- progress(100, desc="Done ✅")
29
- return video_path
 
30
 
31
- with gr.Blocks(title="Gen-V: Text to Video") as demo:
32
- gr.Markdown("## 🎥 Gen-V: Text-to-Video Generator")
33
- gr.Markdown("Generate AI-powered videos from text prompts using open-source models!")
34
 
35
- with gr.Row():
36
- prompt = gr.Textbox(label="Prompt", value="cat wearing black goggles", lines=1)
37
- fps = gr.Slider(1, 30, value=8, label="FPS")
 
38
 
39
- generate_btn = gr.Button("🎬 Generate")
40
- output_video = gr.Video(label="Output Video")
 
 
41
 
42
- generate_btn.click(fn=generate_video, inputs=[prompt, fps], outputs=output_video)
 
 
 
43
 
44
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import dlib
5
+ from PIL import Image
 
 
6
 
7
+ # Load dlib models once
8
+ detector = dlib.get_frontal_face_detector()
9
+ predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
10
 
11
+ def extract_index_nparray(nparray):
12
+ return nparray[0][0] if nparray.size > 0 else None
 
 
 
13
 
14
+ def process_frame(img1_pil, img2_pil):
15
+ # Preprocess images (resize + orientation fix)
16
+ def preprocess(img_pil):
17
+ if img_pil.width > img_pil.height:
18
+ img_pil = img_pil.transpose(Image.ROTATE_270)
19
+ return img_pil.resize((300, 300))
20
 
21
+ img1 = np.array(preprocess(img1_pil))
22
+ img2 = np.array(preprocess(img2_pil))
 
 
23
 
24
+ img1_gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
25
+ img2_gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
26
+ mask = np.zeros_like(img1_gray)
27
 
28
+ height, width, channels = img2.shape
29
+ img2_new_face = np.zeros((height, width, channels), np.uint8)
 
30
 
31
+ # Detect faces and landmarks in img1
32
+ faces1 = detector(img1_gray)
33
+ if len(faces1) == 0:
34
+ return Image.fromarray(img2) # No face found fallback
35
 
36
+ landmarks_points = []
37
+ for face in faces1:
38
+ landmarks = predictor(img1_gray, face)
39
+ landmarks_points = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(68)]
40
 
41
+ points = np.array(landmarks_points, np.int32)
42
+ convexhull = cv2.convexHull(points)
43
+ cv2.fillConvexPoly(mask, convexhull, 255)
44
+ face_image_1 = cv2.bitwise_and(img1, img1, mask=mask)
45
 
46
+ # Delaunay triangulation on face1
47
+ rect = cv2.boundingRect(convexhull)
48
+ subdiv = cv2.Subdiv2D(rect)
49
+ subdiv.insert(landmarks_points)
50
+ triangles = subdiv.getTriangleList()
51
+ triangles = np.array(triangles, dtype=np.int32)
52
+
53
+ indexes_triangles = []
54
+ for t in triangles:
55
+ pt1, pt2, pt3 = (t[0], t[1]), (t[2], t[3]), (t[4], t[5])
56
+ index_pt1 = extract_index_nparray(np.where((points == pt1).all(axis=1)))
57
+ index_pt2 = extract_index_nparray(np.where((points == pt2).all(axis=1)))
58
+ index_pt3 = extract_index_nparray(np.where((points == pt3).all(axis=1)))
59
+ if None not in (index_pt1, index_pt2, index_pt3):
60
+ indexes_triangles.append([index_pt1, index_pt2, index_pt3])
61
+
62
+ # Detect faces and landmarks in img2
63
+ faces2 = detector(img2_gray)
64
+ if len(faces2) == 0:
65
+ return Image.fromarray(img2) # No face found fallback
66
+
67
+ landmarks_points2 = []
68
+ for face in faces2:
69
+ landmarks = predictor(img2_gray, face)
70
+ landmarks_points2 = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(68)]
71
+
72
+ points2 = np.array(landmarks_points2, np.int32)
73
+ convexhull2 = cv2.convexHull(points2)
74
+
75
+ # Warp triangles from img1 to img2
76
+ for triangle_index in indexes_triangles:
77
+ tr1_pts = [landmarks_points[i] for i in triangle_index]
78
+ tr2_pts = [landmarks_points2[i] for i in triangle_index]
79
+
80
+ rect1 = cv2.boundingRect(np.array(tr1_pts))
81
+ x, y, w, h = rect1
82
+ cropped_triangle = img1[y:y+h, x:x+w]
83
+ cropped_tr1_mask = np.zeros((h, w), np.uint8)
84
+ points = np.array([[pt[0]-x, pt[1]-y] for pt in tr1_pts], np.int32)
85
+ cv2.fillConvexPoly(cropped_tr1_mask, points, 255)
86
+
87
+ rect2 = cv2.boundingRect(np.array(tr2_pts))
88
+ x2, y2, w2, h2 = rect2
89
+ cropped_tr2_mask = np.zeros((h2, w2), np.uint8)
90
+ points2 = np.array([[pt[0]-x2, pt[1]-y2] for pt in tr2_pts], np.int32)
91
+ cv2.fillConvexPoly(cropped_tr2_mask, points2, 255)
92
+
93
+ M = cv2.getAffineTransform(np.float32(points), np.float32(points2))
94
+ warped_triangle = cv2.warpAffine(cropped_triangle, M, (w2, h2))
95
+ warped_triangle = cv2.bitwise_and(warped_triangle, warped_triangle, mask=cropped_tr2_mask)
96
+
97
+ img2_face_area = img2_new_face[y2:y2+h2, x2:x2+w2]
98
+ img2_face_area_gray = cv2.cvtColor(img2_face_area, cv2.COLOR_BGR2GRAY)
99
+ _, mask_triangles_inv = cv2.threshold(img2_face_area_gray, 1, 255, cv2.THRESH_BINARY_INV)
100
+ warped_triangle = cv2.bitwise_and(warped_triangle, warped_triangle, mask=mask_triangles_inv)
101
+ img2_face_area = cv2.add(img2_face_area, warped_triangle)
102
+ img2_new_face[y2:y2+h2, x2:x2+w2] = img2_face_area
103
+
104
+ img2_face_mask = np.zeros_like(img2_gray)
105
+ img2_head_mask = cv2.fillConvexPoly(img2_face_mask, convexhull2, 255)
106
+ img2_face_mask = cv2.bitwise_not(img2_head_mask)
107
+ img2_head_noface = cv2.bitwise_and(img2, img2, mask=img2_face_mask)
108
+ result = cv2.add(img2_head_noface, img2_new_face)
109
+
110
+ x, y, w, h = cv2.boundingRect(convexhull2)
111
+ center_face2 = (x + w // 2, y + h // 2)
112
+ seamlessclone = cv2.seamlessClone(result, img2, img2_head_mask, center_face2, cv2.NORMAL_CLONE)
113
+
114
+ return Image.fromarray(seamlessclone)
115
+
116
+ def swap_faces(image1, image2):
117
+ # If video uploaded, grab first frame and convert to PIL
118
+ def video_to_pil(video_file):
119
+ cap = cv2.VideoCapture(video_file.name)
120
+ ret, frame = cap.read()
121
+ cap.release()
122
+ if not ret:
123
+ return None
124
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
125
+ return Image.fromarray(frame_rgb)
126
+
127
+ # Convert inputs to PIL Images
128
+ if hasattr(image1, "name") and image1.name.lower().endswith(('.mp4', '.mov', '.avi')):
129
+ img1_pil = video_to_pil(image1)
130
+ else:
131
+ img1_pil = Image.open(image1)
132
+
133
+ if hasattr(image2, "name") and image2.name.lower().endswith(('.mp4', '.mov', '.avi')):
134
+ img2_pil = video_to_pil(image2)
135
+ else:
136
+ img2_pil = Image.open(image2)
137
+
138
+ if img1_pil is None or img2_pil is None:
139
+ return None
140
+
141
+ return process_frame(img1_pil, img2_pil)
142
+
143
+ iface = gr.Interface(
144
+ fn=swap_faces,
145
+ inputs=[gr.inputs.File(label="Upload Image/Video 1"), gr.inputs.File(label="Upload Image/Video 2")],
146
+ outputs=gr.Image(label="Face Swapped Result"),
147
+ title="Face Swap Image/Video (first frame only for video)",
148
+ description="Upload two images or videos; if videos, only first frame will be used for swapping faces."
149
+ )
150
+
151
+ iface.launch()