Spaces:

alvinichi
/

imageToVideo

Runtime error

App Files Files Community

alvinichi commited on Apr 17, 2025

Commit

7a842d4

1 Parent(s): 7491096

udpate lib

Browse files

Files changed (2) hide show

app.py +126 -109
requirements.txt +4 -7

app.py CHANGED Viewed

@@ -3,106 +3,131 @@ import torch
 import numpy as np
 import imageio
 import os
-import gdown
 import cv2
-from skimage.transform import resize
-from skimage import img_as_ubyte
-from first_order_model.demo import load_checkpoints, make_animation
-# Tải và cài đặt mô hình
-def download_model():
-    model_path = 'first_order_model/vox-cpk.pth.tar'
-    if not os.path.exists(model_path):
-        os.makedirs('first_order_model', exist_ok=True)
-        url = 'https://drive.google.com/uc?id=1PyQJmkdCsAkOYwUyaj_l-l0as-iLDgeH'
-        gdown.download(url, model_path, quiet=False)
-    if not os.path.exists('first_order_model/config/vox-256.yaml'):
-        os.makedirs('first_order_model/config', exist_ok=True)
-        config_url = 'https://drive.google.com/uc?id=1PmYZrk1MLyfYFa5vgRy0HVfkWTR42NMj'
-        gdown.download(config_url, 'first_order_model/config/vox-256.yaml', quiet=False)
 # Tạo video người chuyển động
-def animate_person(source_image, driving_video=None, movement_type="Mặc định"):
-    if source_image is None:
-        return None, "Vui lòng tải lên một hình ảnh người."
     try:
-        # Tải mô hình
-        download_model()
-        # Chuẩn bị hình ảnh nguồn
-        source_image_path = "source_image.jpg"
-        cv2.imwrite(source_image_path, cv2.cvtColor(np.array(source_image), cv2.COLOR_RGB2BGR))
-        # Chuẩn bị video tham chiếu
-        if driving_video is None or movement_type != "Tùy chỉnh":
-            # Sử dụng video mẫu dựa trên loại chuyển động
-            driving_videos = {
-                "Mặc định": "driving_videos/default.mp4",
-                "Nói chuyện": "driving_videos/talking.mp4",
-                "Quay đầu": "driving_videos/head_turning.mp4",
-                "Cười": "driving_videos/smiling.mp4"
-            }
-            # Tạo thư mục cho video mẫu
-            os.makedirs("driving_videos", exist_ok=True)
-            # Tạo video mẫu đơn giản nếu chưa có
-            if not os.path.exists(driving_videos[movement_type]):
-                # Tạo một video mẫu đơn giản với OpenCV
-                # Trong thực tế, bạn sẽ tải video mẫu từ nguồn khác
-                video_path = driving_videos[movement_type]
-                out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), 25, (256, 256))
-                # Tạo 100 khung hình với chuyển động đơn giản
-                for i in range(100):
-                    frame = np.zeros((256, 256, 3), dtype=np.uint8)
-                    if movement_type == "Nói chuyện":
-                        cv2.ellipse(frame, (128, 180), (30 + i % 10, 20), 0, 0, 360, (255, 255, 255), -1)
-                    elif movement_type == "Quay đầu":
-                        cv2.ellipse(frame, (128 + int(np.sin(i/10) * 30), 128), (60, 80), 0, 0, 360, (255, 255, 255), -1)
-                    elif movement_type == "Cười":
-                        cv2.ellipse(frame, (128, 128), (60, 80), 0, 0, 360, (255, 255, 255), -1)
-                        cv2.ellipse(frame, (128, 160 - i % 15), (40, 20), 0, 0, 180, (0, 0, 0), -1)
-                    else:  # Mặc định
-                        cv2.ellipse(frame, (128, 128), (60, 80), 0, 0, 360, (255, 255, 255), -1)
-                        cv2.circle(frame, (110, 110), 10, (0, 0, 0), -1)
-                        cv2.circle(frame, (146, 110), 10, (0, 0, 0), -1)
-                    out.write(frame)
-                out.release()
-            driving_video_path = driving_videos[movement_type]
-        else:
-            # Sử dụng video do người dùng tải lên
-            driving_video_path = "driving_video.mp4"
-            # Lưu video tải lên
-            with open(driving_video_path, 'wb') as f:
-                f.write(driving_video)
-        # Tải mô hình và cấu hình
-        generator, kp_detector = load_checkpoints(
-            config_path='first_order_model/config/vox-256.yaml',
-            checkpoint_path='first_order_model/vox-cpk.pth.tar',
-            device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        )
-        # Tạo animation
-        predictions = make_animation(
-            source_image=source_image_path,
-            driving_video=driving_video_path,
-            generator=generator,
-            kp_detector=kp_detector,
-            relative=True,
-            adapt_movement_scale=True,
-            device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        )
-        # Lưu video kết quả
         output_path = "animated_person.mp4"
-        imageio.mimsave(output_path, [img_as_ubyte(frame) for frame in predictions], fps=25)
         return output_path, "Video được tạo thành c��ng!"
@@ -112,23 +137,20 @@ def animate_person(source_image, driving_video=None, movement_type="Mặc địn
 # Tạo giao diện Gradio
 with gr.Blocks(title="Ứng dụng tạo chuyển động cho người trong ảnh") as demo:
     gr.Markdown("# Tạo video người chuyển động từ ảnh")
-    gr.Markdown("Tạo video trong đó người/khuôn mặt trong ảnh của bạn chuyển động tự nhiên")
     with gr.Row():
         with gr.Column():
-            source_image = gr.Image(type="numpy", label="Tải lên ảnh người")
-            with gr.Row():
-                movement_choice = gr.Radio(
-                    ["Mặc định", "Nói chuyện", "Quay đầu", "Cười", "Tùy chỉnh"],
-                    label="Chọn kiểu chuyển động",
-                    value="Mặc định"
-                )
-            driving_video = gr.Video(
-                label="Tải lên video tham chiếu (chỉ khi chọn 'Tùy chỉnh')"
             )
             submit_btn = gr.Button("Tạo video")
         with gr.Column():
@@ -137,18 +159,13 @@ with gr.Blocks(title="Ứng dụng tạo chuyển động cho người trong ả
     submit_btn.click(
         fn=animate_person,
-        inputs=[source_image, driving_video, movement_choice],
         outputs=[output_video, output_message]
     )
-    gr.Markdown("### Cách thức hoạt động")
-    gr.Markdown("1. Ứng dụng sử dụng mô hình First Order Motion Model để phân tích chuyển động")
-    gr.Markdown("2. Mô hình áp dụng chuyển động từ video tham chiếu vào đối tượng trong ảnh của bạn")
-    gr.Markdown("3. Kết quả là một video với người/đối tượng trong ảnh của bạn chuyển động tự nhiên")
     gr.Markdown("### Lưu ý")
-    gr.Markdown("- Kết quả tốt nhất với ảnh chụp chính diện, nền đơn giản")
-    gr.Markdown("- Khuôn mặt/người cần rõ ràng và không bị che khuất")
-    gr.Markdown("- Nếu bạn chọn 'Tùy chỉnh', hãy tải lên video có chuyển động bạn muốn áp dụng")
 demo.launch()

 import numpy as np
 import imageio
 import os
+from PIL import Image
 import cv2
+# Hàm tách đối tượng khỏi nền
+def segment_person(image):
+    # Trong thực tế, bạn sẽ sử dụng một mô hình phân đoạn như U2Net
+    # Đây là một phiên bản đơn giản sử dụng phân ngưỡng màu
+    # Chuyển sang không gian màu HSV
+    image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2HSV)
+    # Tạo mặt nạ đơn giản (trong thực tế cần mô hình phân đoạn thật)
+    # Giả sử nền sáng hơn đối tượng
+    _, mask = cv2.threshold(image_cv[:, :, 2], 127, 255, cv2.THRESH_BINARY_INV)
+    # Xử lý mặt nạ
+    kernel = np.ones((5, 5), np.uint8)
+    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
+    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
+    return mask
 # Tạo video người chuyển động
+def animate_person(image, movement_type, num_frames=24):
+    if image is None:
+        return None, "Vui lòng tải lên một hình ảnh."
     try:
+        # Đảm bảo hình ảnh là định dạng RGB
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        # Thay đổi kích thước hình ảnh
+        image = image.resize((512, 512))
+        image_array = np.array(image)
+        # Tách người từ nền
+        mask = segment_person(image)
+        # Tạo ảnh nền và ảnh người
+        background = image_array.copy()
+        person = image_array.copy()
+        # Áp dụng mặt nạ
+        person_mask = np.stack([mask, mask, mask], axis=2) / 255.0
+        person = person * person_mask
+        background = background * (1 - person_mask)
+        # Tạo frames dựa vào loại chuyển động
+        frames = []
+        if movement_type == "Đi bộ":
+            # Mô phỏng đi bộ - di chuyển lên xuống và sang ngang
+            for i in range(num_frames):
+                y_offset = int(np.sin(i/8 * 2 * np.pi) * 10)
+                x_offset = i % 4 - 2  # Nhịp bước nhỏ
+                # Tạo frame mới với nền tĩnh
+                frame = background.copy()
+                # Thêm người với offset
+                M = np.float32([[1, 0, x_offset], [0, 1, y_offset]])
+                moved_person = cv2.warpAffine(person, M, (512, 512))
+                # Kết hợp nền và người
+                frame = frame + moved_person
+                frames.append(frame.astype(np.uint8))
+        elif movement_type == "Vẫy tay":
+            # Mô phỏng vẫy tay - xoay nhẹ phần trên
+            for i in range(num_frames):
+                angle = np.sin(i/6 * 2 * np.pi) * 5  # Xoay ±5 độ
+                # Tạo ma trận xoay
+                center = (256, 200)  # Giả sử tâm xoay ở phần trên của người
+                M = cv2.getRotationMatrix2D(center, angle, 1.0)
+                # Xoay người
+                rotated_person = cv2.warpAffine(person, M, (512, 512))
+                # Kết hợp nền và người đã xoay
+                frame = background.copy() + rotated_person
+                frames.append(frame.astype(np.uint8))
+        elif movement_type == "Nhảy múa":
+            # Mô phỏng nhảy múa - kết hợp chuyển động
+            for i in range(num_frames):
+                y_offset = int(np.sin(i/6 * 2 * np.pi) * 15)
+                x_offset = int(np.sin(i/4 * 2 * np.pi) * 10)
+                angle = np.sin(i/8 * 2 * np.pi) * 3
+                # Xoay người
+                center = (256, 256)
+                M_rot = cv2.getRotationMatrix2D(center, angle, 1.0)
+                rotated_person = cv2.warpAffine(person, M_rot, (512, 512))
+                # Di chuyển người đã xoay
+                M_trans = np.float32([[1, 0, x_offset], [0, 1, y_offset]])
+                moved_person = cv2.warpAffine(rotated_person, M_trans, (512, 512))
+                # Kết hợp nền và người đã di chuyển
+                frame = background.copy() + moved_person
+                frames.append(frame.astype(np.uint8))
+        else:  # Chuyển động nhẹ
+            for i in range(num_frames):
+                angle = np.sin(i/12 * 2 * np.pi) * 2
+                y_offset = int(np.sin(i/10 * 2 * np.pi) * 5)
+                # Xoay người
+                center = (256, 256)
+                M_rot = cv2.getRotationMatrix2D(center, angle, 1.0)
+                rotated_person = cv2.warpAffine(person, M_rot, (512, 512))
+                # Di chuyển người đã xoay
+                M_trans = np.float32([[1, 0, 0], [0, 1, y_offset]])
+                moved_person = cv2.warpAffine(rotated_person, M_trans, (512, 512))
+                # Kết hợp nền và người đã di chuyển
+                frame = background.copy() + moved_person
+                frames.append(frame.astype(np.uint8))
+        # Lưu video
         output_path = "animated_person.mp4"
+        imageio.mimsave(output_path, frames, fps=8)
         return output_path, "Video được tạo thành c��ng!"
 # Tạo giao diện Gradio
 with gr.Blocks(title="Ứng dụng tạo chuyển động cho người trong ảnh") as demo:
     gr.Markdown("# Tạo video người chuyển động từ ảnh")
+    gr.Markdown("Tạo video trong đó chỉ người trong ảnh chuyển động, nền vẫn giữ nguyên")
     with gr.Row():
         with gr.Column():
+            image_input = gr.Image(type="pil", label="Tải lên ảnh người")
+            movement_type = gr.Radio(
+                ["Đi bộ", "Vẫy tay", "Nhảy múa", "Chuyển động nhẹ"],
+                label="Loại chuyển động",
+                value="Chuyển động nhẹ"
+            )
+            num_frames = gr.Slider(
+                minimum=12, maximum=36, value=24, step=4,
+                label="Số khung hình"
             )
             submit_btn = gr.Button("Tạo video")
         with gr.Column():
     submit_btn.click(
         fn=animate_person,
+        inputs=[image_input, movement_type, num_frames],
         outputs=[output_video, output_message]
     )
     gr.Markdown("### Lưu ý")
+    gr.Markdown("- Sử dụng ảnh có người trên nền đơn giản để có kết quả tốt nhất")
+    gr.Markdown("- Phương pháp này tách người và nền, chỉ làm chuyển động người")
+    gr.Markdown("- Đây là phiên bản đơn giản, kết quả thực tế sẽ phụ thuộc vào chất lượng hình ảnh")
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,10 +1,7 @@
 gradio==4.0.2
-torch==1.7.1
-torchvision==0.8.2
 numpy
-imageio==2.9.0
 imageio-ffmpeg
-scikit-image
-opencv-python
-gdown
-pyyaml

 gradio==4.0.2
+torch
 numpy
+Pillow
+imageio==2.31.1
 imageio-ffmpeg
+opencv-python