File size: 5,739 Bytes
88f1323
6c4f4bc
c6011cd
 
6c4f4bc
c6011cd
6cd2a2e
c6011cd
 
6cd2a2e
c6011cd
6c4f4bc
d8a502c
3feedda
c6011cd
6cd2a2e
c6011cd
4adf2e1
c6011cd
5971931
 
 
6cd2a2e
5971931
f6538ba
 
c6011cd
 
 
 
88f1323
 
30afd9c
 
c6011cd
 
 
 
 
d8a502c
c6011cd
 
 
5971931
d8a502c
 
f6538ba
 
5971931
d8a502c
c6011cd
 
 
 
 
626743e
d8a502c
c6011cd
 
d8a502c
f6538ba
 
c6011cd
f6538ba
c6011cd
 
 
 
 
30afd9c
626743e
 
c6011cd
 
25a48e3
f6538ba
 
 
 
c6011cd
f6538ba
c6011cd
f6538ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e93faf1
f63bd5d
25a48e3
f6538ba
88f1323
f6538ba
c6011cd
f6538ba
 
c6011cd
e93faf1
c6011cd
 
 
f6538ba
c6011cd
 
f6538ba
 
c6011cd
6cd2a2e
c6011cd
f6538ba
6c4f4bc
25a48e3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os  # Для создания директории (на всякий случай оставляем)
import sys
import uuid
import shutil
import time
import gradio as gr
import torch
from diffusers import StableVideoDiffusionPipeline
from PIL import Image
import numpy as np
import cv2
import tempfile
from diffusers.utils import export_to_video  # Для экспорта видео

class WanAnimateApp:
    def __init__(self):
        model_name = "stabilityai/stable-video-diffusion-img2vid-xt"
        self.pipe = StableVideoDiffusionPipeline.from_pretrained(
            model_name,
            torch_dtype=torch.float32,  # Для CPU
            variant="fp16",
            low_cpu_mem_usage=True  # Оптимизация памяти для CPU
        )
        self.pipe.to("cpu")  # Вручную перемещаем на CPU

    def predict(self, ref_img, video, model_id, model):
        if ref_img is None or video is None:
            return None, "Upload both image and video."

        try:
            # Обработка изображения (теперь ref_img — np.array, конвертируем в PIL)
            ref_image = Image.fromarray(ref_img).convert("RGB").resize((576, 320))

            # Извлечение motion из видео (video — filepath)
            cap = cv2.VideoCapture(video)
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            cap.release()
            motion_hint = f" with dynamic motion from {frame_count} frames"

            # Параметры
            num_frames = 25 if model == "wan-pro" else 14
            num_steps = 25 if model == "wan-pro" else 15

            # Адаптация modes
            noise_aug_strength = 0.02
            if model_id == "wan2.2-animate-mix":
                noise_aug_strength = 0.1

            # Генерация
            generator = torch.Generator(device="cpu").manual_seed(42)
            output = self.pipe(
                ref_image,
                num_inference_steps=num_steps,
                num_frames=num_frames,
                generator=generator,
                decode_chunk_size=2,  # Оптимизация для VAE
                noise_aug_strength=noise_aug_strength
            ).frames[0]

            # Экспорт видео
            with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
                export_to_video(output, temp_video.name, fps=7)

            return temp_video.name, "SUCCEEDED" + motion_hint

        except Exception as e:
            return None, f"Failed: {str(e)}"

def start_app():
    # Создаём директорию для Gradio (на всякий случай)
    os.makedirs("/tmp/gradio", exist_ok=True)
    
    app = WanAnimateApp()
    with gr.Blocks(title="Wan2.2-Animate (Local No API)") as demo:
        gr.HTML("""
            Wan2.2-Animate: Unified Character Animation and Replacement with Holistic Replication
            Local version without API (SVD Proxy)
            Tongyi Lab, Alibaba
            📄Paper 💻GitHub 🤗HF Model
        """)

        gr.HTML("""
            ‼️Usage (использования) Wan-Animate supports two modes:

            * Move Mode: animate the character in input image with movements from the input video

            * Mix Mode: replace the character in input video with the character in input image

            Wan-Animate supports two modes:

            * Move Mode: Use the movements extracted from the input video to drive the character in the input image

            * Mix Mode: Use the character in the input image to replace the character in the input video

            Currently, the following restrictions apply to inputs:

            * Video file size: Less than 200MB

            * Video resolution: The shorter side must be greater than 200, and the longer side must be less than 2048

            * Video duration: 2s to 30s

            * Video aspect ratio: 1:3 to 3:1

            * Video formats: mp4, avi, mov

            * Image file size: Less than 5MB

            * Image resolution: The shorter side must be greater than 200, and the longer side must be less than 4096

            * Image formats: jpg, png, jpeg, webp, bmp

            Current, the inference quality has two variants. You can use our open-source code for more flexible configuration.

            * wan-pro: 25fps, 720p

            * wan-std: 15fps, 720p
        """)

        with gr.Row():
            with gr.Column():
                ref_img = gr.Image(label="Reference Image (изображение)", type="numpy", sources=["upload"])  # Изменили на numpy для обхода FileNotFound
                video = gr.Video(label="Template Video (шаблонное видео)", sources=["upload"])
                with gr.Row():
                    model_id = gr.Dropdown(label="Mode (режим)", choices=["wan2.2-animate-move", "wan2.2-animate-mix"], value="wan2.2-animate-move")
                    model = gr.Dropdown(label="Inference Quality (качество)", choices=["wan-pro", "wan-std"], value="wan-pro")
                run_button = gr.Button("Generate Video (генерировать)")

            with gr.Column():
                output_video = gr.Video(label="Output Video (результат)")
                output_status = gr.Textbox(label="Status (статус)")

        run_button.click(
            fn=app.predict,
            inputs=[ref_img, video, model_id, model],
            outputs=[output_video, output_status]
        )

    demo.queue(default_concurrency_limit=1)
    demo.launch(server_name="0.0.0.0", server_port=7860)

if __name__ == "__main__":
    start_app()