Spaces:

Xenobd
/

Video-gen-CPU

Sleeping

App Files Files Community

Xenobd commited on Aug 7, 2025

Commit

0317f8c

verified ·

1 Parent(s): 021bde3

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -33

app.py CHANGED Viewed

@@ -1,44 +1,151 @@
 import gradio as gr
-import torch
-from diffusers import DiffusionPipeline
-import imageio
-import os
-import tempfile
-import time
-def generate_video(prompt, fps, progress=gr.Progress()):
-    device = "cpu"
-    dtype = torch.float32
-    progress(0, desc="Loading pipeline...")
-    pipe = DiffusionPipeline.from_pretrained(
-        "damo-vilab/text-to-video-ms-1.7b",
-        torch_dtype=dtype,
-    ).to(device)
-    progress(20, desc="Generating frames...")
-    result = pipe(prompt)
-    frames = result.frames
-    progress(80, desc="Saving video...")
-    temp_dir = tempfile.mkdtemp()
-    video_path = os.path.join(temp_dir, "genv_output.mp4")
-    imageio.mimsave(video_path, frames, fps=fps)
-    progress(100, desc="Done ✅")
-    return video_path
-with gr.Blocks(title="Gen-V: Text to Video") as demo:
-    gr.Markdown("## 🎥 Gen-V: Text-to-Video Generator")
-    gr.Markdown("Generate AI-powered videos from text prompts using open-source models!")
-    with gr.Row():
-        prompt = gr.Textbox(label="Prompt", value="cat wearing black goggles", lines=1)
-        fps = gr.Slider(1, 30, value=8, label="FPS")
-    generate_btn = gr.Button("🎬 Generate")
-    output_video = gr.Video(label="Output Video")
-    generate_btn.click(fn=generate_video, inputs=[prompt, fps], outputs=output_video)
-demo.launch()

 import gradio as gr
+import cv2
+import numpy as np
+import dlib
+from PIL import Image
+# Load dlib models once
+detector = dlib.get_frontal_face_detector()
+predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
+def extract_index_nparray(nparray):
+    return nparray[0][0] if nparray.size > 0 else None
+def process_frame(img1_pil, img2_pil):
+    # Preprocess images (resize + orientation fix)
+    def preprocess(img_pil):
+        if img_pil.width > img_pil.height:
+            img_pil = img_pil.transpose(Image.ROTATE_270)
+        return img_pil.resize((300, 300))
+    img1 = np.array(preprocess(img1_pil))
+    img2 = np.array(preprocess(img2_pil))
+    img1_gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
+    img2_gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
+    mask = np.zeros_like(img1_gray)
+    height, width, channels = img2.shape
+    img2_new_face = np.zeros((height, width, channels), np.uint8)
+    # Detect faces and landmarks in img1
+    faces1 = detector(img1_gray)
+    if len(faces1) == 0:
+        return Image.fromarray(img2)  # No face found fallback
+    landmarks_points = []
+    for face in faces1:
+        landmarks = predictor(img1_gray, face)
+        landmarks_points = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(68)]
+    points = np.array(landmarks_points, np.int32)
+    convexhull = cv2.convexHull(points)
+    cv2.fillConvexPoly(mask, convexhull, 255)
+    face_image_1 = cv2.bitwise_and(img1, img1, mask=mask)
+    # Delaunay triangulation on face1
+    rect = cv2.boundingRect(convexhull)
+    subdiv = cv2.Subdiv2D(rect)
+    subdiv.insert(landmarks_points)
+    triangles = subdiv.getTriangleList()
+    triangles = np.array(triangles, dtype=np.int32)
+    indexes_triangles = []
+    for t in triangles:
+        pt1, pt2, pt3 = (t[0], t[1]), (t[2], t[3]), (t[4], t[5])
+        index_pt1 = extract_index_nparray(np.where((points == pt1).all(axis=1)))
+        index_pt2 = extract_index_nparray(np.where((points == pt2).all(axis=1)))
+        index_pt3 = extract_index_nparray(np.where((points == pt3).all(axis=1)))
+        if None not in (index_pt1, index_pt2, index_pt3):
+            indexes_triangles.append([index_pt1, index_pt2, index_pt3])
+    # Detect faces and landmarks in img2
+    faces2 = detector(img2_gray)
+    if len(faces2) == 0:
+        return Image.fromarray(img2)  # No face found fallback
+    landmarks_points2 = []
+    for face in faces2:
+        landmarks = predictor(img2_gray, face)
+        landmarks_points2 = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(68)]
+    points2 = np.array(landmarks_points2, np.int32)
+    convexhull2 = cv2.convexHull(points2)
+    # Warp triangles from img1 to img2
+    for triangle_index in indexes_triangles:
+        tr1_pts = [landmarks_points[i] for i in triangle_index]
+        tr2_pts = [landmarks_points2[i] for i in triangle_index]
+        rect1 = cv2.boundingRect(np.array(tr1_pts))
+        x, y, w, h = rect1
+        cropped_triangle = img1[y:y+h, x:x+w]
+        cropped_tr1_mask = np.zeros((h, w), np.uint8)
+        points = np.array([[pt[0]-x, pt[1]-y] for pt in tr1_pts], np.int32)
+        cv2.fillConvexPoly(cropped_tr1_mask, points, 255)
+        rect2 = cv2.boundingRect(np.array(tr2_pts))
+        x2, y2, w2, h2 = rect2
+        cropped_tr2_mask = np.zeros((h2, w2), np.uint8)
+        points2 = np.array([[pt[0]-x2, pt[1]-y2] for pt in tr2_pts], np.int32)
+        cv2.fillConvexPoly(cropped_tr2_mask, points2, 255)
+        M = cv2.getAffineTransform(np.float32(points), np.float32(points2))
+        warped_triangle = cv2.warpAffine(cropped_triangle, M, (w2, h2))
+        warped_triangle = cv2.bitwise_and(warped_triangle, warped_triangle, mask=cropped_tr2_mask)
+        img2_face_area = img2_new_face[y2:y2+h2, x2:x2+w2]
+        img2_face_area_gray = cv2.cvtColor(img2_face_area, cv2.COLOR_BGR2GRAY)
+        _, mask_triangles_inv = cv2.threshold(img2_face_area_gray, 1, 255, cv2.THRESH_BINARY_INV)
+        warped_triangle = cv2.bitwise_and(warped_triangle, warped_triangle, mask=mask_triangles_inv)
+        img2_face_area = cv2.add(img2_face_area, warped_triangle)
+        img2_new_face[y2:y2+h2, x2:x2+w2] = img2_face_area
+    img2_face_mask = np.zeros_like(img2_gray)
+    img2_head_mask = cv2.fillConvexPoly(img2_face_mask, convexhull2, 255)
+    img2_face_mask = cv2.bitwise_not(img2_head_mask)
+    img2_head_noface = cv2.bitwise_and(img2, img2, mask=img2_face_mask)
+    result = cv2.add(img2_head_noface, img2_new_face)
+    x, y, w, h = cv2.boundingRect(convexhull2)
+    center_face2 = (x + w // 2, y + h // 2)
+    seamlessclone = cv2.seamlessClone(result, img2, img2_head_mask, center_face2, cv2.NORMAL_CLONE)
+    return Image.fromarray(seamlessclone)
+def swap_faces(image1, image2):
+    # If video uploaded, grab first frame and convert to PIL
+    def video_to_pil(video_file):
+        cap = cv2.VideoCapture(video_file.name)
+        ret, frame = cap.read()
+        cap.release()
+        if not ret:
+            return None
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        return Image.fromarray(frame_rgb)
+    # Convert inputs to PIL Images
+    if hasattr(image1, "name") and image1.name.lower().endswith(('.mp4', '.mov', '.avi')):
+        img1_pil = video_to_pil(image1)
+    else:
+        img1_pil = Image.open(image1)
+    if hasattr(image2, "name") and image2.name.lower().endswith(('.mp4', '.mov', '.avi')):
+        img2_pil = video_to_pil(image2)
+    else:
+        img2_pil = Image.open(image2)
+    if img1_pil is None or img2_pil is None:
+        return None
+    return process_frame(img1_pil, img2_pil)
+iface = gr.Interface(
+    fn=swap_faces,
+    inputs=[gr.inputs.File(label="Upload Image/Video 1"), gr.inputs.File(label="Upload Image/Video 2")],
+    outputs=gr.Image(label="Face Swapped Result"),
+    title="Face Swap Image/Video (first frame only for video)",
+    description="Upload two images or videos; if videos, only first frame will be used for swapping faces."
+)
+iface.launch()