File size: 10,551 Bytes
6c4f4bc
 
 
0297a7a
6c4f4bc
6cd2a2e
 
4adf2e1
6cd2a2e
 
 
6c4f4bc
 
6cd2a2e
c3e485a
b6811f4
 
3bb6d5d
b6811f4
 
3bb6d5d
 
6cd2a2e
 
4adf2e1
 
6cd2a2e
f63bd5d
4adf2e1
c20db4f
6cd2a2e
 
b6811f4
 
c3e485a
 
 
b6811f4
 
 
c3e485a
 
 
b6811f4
 
 
 
 
 
 
3bb6d5d
 
b6811f4
 
 
 
 
 
3bb6d5d
 
0297a7a
 
3bb6d5d
 
0297a7a
 
 
3bb6d5d
c3e485a
0d627a0
6cd2a2e
b6811f4
3bb6d5d
 
798d816
 
 
f63bd5d
 
0d627a0
 
 
 
f63bd5d
 
4adf2e1
 
f63bd5d
 
490303c
4adf2e1
 
 
 
 
bee4d96
4adf2e1
f63bd5d
766db7a
6c4f4bc
 
 
3bb6d5d
6c4f4bc
 
 
 
 
f63bd5d
6c4f4bc
f63bd5d
6cd2a2e
6c4f4bc
6cd2a2e
6c4f4bc
 
 
f63bd5d
6c4f4bc
0d627a0
 
 
 
 
 
 
e605f7f
0d627a0
e605f7f
ed2185f
0d627a0
 
 
 
 
 
 
 
 
 
6c4f4bc
 
0d627a0
6c4f4bc
0d627a0
c3e485a
 
ed2185f
c3e485a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed2185f
f63bd5d
 
 
 
 
c3e485a
 
 
 
 
 
f63bd5d
c3e485a
3bb6d5d
c3e485a
 
 
 
 
 
 
 
 
 
 
 
 
f63bd5d
 
c3e485a
f63bd5d
c3e485a
 
 
 
 
 
f63bd5d
 
 
 
b6811f4
f63bd5d
 
 
 
 
 
b6811f4
f63bd5d
 
 
 
b6811f4
f63bd5d
 
b6811f4
 
f63bd5d
 
 
 
 
 
 
 
 
 
 
 
490303c
f63bd5d
 
 
 
 
 
0f934dc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
import os
import sys
import uuid
import shutil
import time
import gradio as gr
import torch
from diffusers import StableVideoDiffusionPipeline
from PIL import Image
import numpy as np
import cv2
import subprocess
import tempfile

# Папки (создаются автоматически)
IMG_DIR = './img'
VIDEO_DIR = './video'
OUTPUT_DIR = './output'
os.makedirs(IMG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

class WanAnimateApp:
    def __init__(self):
        model_name = "stabilityai/stable-video-diffusion-img2vid-xt"
        self.pipe = StableVideoDiffusionPipeline.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            variant="fp16",
            device_map="cpu"
        )

    def get_img_files(self):
        """Список файлов в папке img"""
        if os.path.exists(IMG_DIR):
            return [f for f in os.listdir(IMG_DIR) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        return []

    def get_video_files(self):
        """Список файлов в папке video"""
        if os.path.exists(VIDEO_DIR):
            return [f for f in os.listdir(VIDEO_DIR) if f.lower().endswith(('.mp4', '.avi', '.mov'))]
        return []

    def load_img_from_dropdown(self, selected_file):
        """Загружает выбранное фото в ref_img"""
        if selected_file:
            path = os.path.join(IMG_DIR, selected_file)
            if os.path.exists(path):
                return path
        return None

    def load_video_from_dropdown(self, selected_file):
        """Загружает выбранное видео в video"""
        if selected_file:
            path = os.path.join(VIDEO_DIR, selected_file)
            if os.path.exists(path):
                return path
        return None

    def predict(
        self, 
        ref_img_path,
        video_path,
        model_id,
        model,
    ):
        if ref_img_path is None or video_path is None:
            return None, "Выберите файл из списка или загрузите новый."

        try:
            # Local processing
            ref_image = Image.open(ref_img_path).convert("RGB").resize((576, 320))
            cap = cv2.VideoCapture(video_path)
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            cap.release()
            motion_hint = f" with dynamic motion from {frame_count} frames"

            # Prompt based on mode
            if model_id == "wan2.2-animate-move":
                prompt = f"Animate the character in the reference image{motion_hint}, high quality, smooth movements."
            else:
                prompt = f"Replace the character in the video with the reference image{motion_hint}, seamless, detailed."

            # Parameters
            num_frames = 25 if model == "wan-pro" else 14
            num_steps = 25 if model == "wan-pro" else 15

            # Local generation
            generator = torch.Generator(device="cpu").manual_seed(42)
            output = self.pipe(
                ref_image,
                num_inference_steps=num_steps,
                num_frames=num_frames,
                generator=generator,
                decode_chunk_size=2
            ).frames[0]

            # Save MP4 with ffmpeg
            temp_dir = tempfile.mkdtemp()
            for i, frame in enumerate(output):
                frame.save(f"{temp_dir}/frame_{i:04d}.png")
            temp_video = os.path.join(OUTPUT_DIR, f"output_{uuid.uuid4()}.mp4")
            subprocess.run([
                'ffmpeg', '-y', '-framerate', '7', '-i', f"{temp_dir}/frame_%04d.png",
                '-c:v', 'libx264', '-pix_fmt', 'yuv420p', temp_video
            ], check=True)
            shutil.rmtree(temp_dir)

            return temp_video, "SUCCEEDED"

        except Exception as e:
            return None, f"Failed: {str(e)}"

def start_app():
    app = WanAnimateApp()

    with gr.Blocks(title="Wan2.2-Animate (Local No API)") as demo:
        gr.HTML("""
            <div style="padding: 2rem; text-align: center; max-width: 1200px; margin: 0 auto; font-family: Arial, sans-serif;">
                <h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem; color: #333;">
                    Wan2.2-Animate: Unified Character Animation and Replacement with Holistic Replication
                </h1>
                <h3 style="font-size: 1.5rem; font-weight: bold; margin-bottom: 0.5rem; color: #333;">
                    Local version without API (SVD Proxy)
                </h3>
                <div style="font-size: 1.25rem; margin-bottom: 1.5rem; color: #555;">
                    Tongyi Lab, Alibaba
                </div>
                <div style="display: flex; flex-wrap: wrap; justify-content: center; gap: 1rem; margin-bottom: 1.5rem;">
                    <a href="https://arxiv.org/abs/2509.14055" target="_blank" style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; color: #333; text-decoration: none; border-radius: 9999px; font-weight: 500;">
                        <span style="margin-right: 0.5rem;">📄</span>Paper
                    </a>
                    <a href="https://github.com/Wan-Video/Wan2.2" target="_blank" style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; color: #333; text-decoration: none; border-radius: 9999px; font-weight: 500;">
                        <span style="margin-right: 0.5rem;">💻</span>GitHub
                    </a>
                    <a href="https://huggingface.co/Wan-AI/Wan2.2-Animate-14B" target="_blank" style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; color: #333; text-decoration: none; border-radius: 9999px; font-weight: 500;">
                        <span style="margin-right: 0.5rem;">🤗</span>HF Model
                    </a>
                </div>
            </div>
        """)
        
        gr.HTML("""
            <details>
                <summary>‼️Usage (использования)</summary>
                Wan-Animate supports two modes:
                <ul>
                    <li>Move Mode: animate the character in input image with movements from the input video</li>
                    <li>Mix Mode: replace the character in input video with the character in input image</li>
                </ul>
                Wan-Animate supports two modes:
                <ul>
                    <li>Move Mode: Use the movements extracted from the input video to drive the character in the input image</li>
                    <li>Mix Mode: Use the character in the input image to replace the character in the input video</li>
                </ul>
                Currently, the following restrictions apply to inputs:
                <ul>
                    <li>Video file size: Less than 200MB</li>
                    <li>Video resolution: The shorter side must be greater than 200, and the longer side must be less than 2048</li>
                    <li>Video duration: 2s to 30s</li>
                    <li>Video aspect ratio: 1:3 to 3:1</li>
                    <li>Video formats: mp4, avi, mov</li>
                    <li>Image file size: Less than 5MB</li>
                    <li>Image resolution: The shorter side must be greater than 200, and the longer side must be less than 4096</li>
                    <li>Image formats: jpg, png, jpeg, webp, bmp</li>
                </ul>
                Current, the inference quality has two variants. You can use our open-source code for more flexible configuration.
                <ul>
                    <li>wan-pro: 25fps, 720p</li>
                    <li>wan-std: 15fps, 720p</li>
                </ul>
            </details>
        """)

        with gr.Row():
            with gr.Column():    
                # Dropdown for img
                img_dropdown = gr.Dropdown(
                    label="Select Image from img folder (выбери фото)",
                    choices=app.get_img_files(),
                    value=None,
                )
                ref_img = gr.Image(
                    label="Reference Image (изображение)",
                    type="filepath",
                    value=None,
                )
                img_dropdown.change(
                    fn=app.load_img_from_dropdown,
                    inputs=[img_dropdown],
                    outputs=[ref_img],
                )

                # Dropdown for video
                video_dropdown = gr.Dropdown(
                    label="Select Video from video folder (выбери видео)",
                    choices=app.get_video_files(),
                    value=None,
                )
                video = gr.Video(
                    label="Template Video (шаблонное видео)",
                    sources=["upload"],
                    value=None,
                )
                video_dropdown.change(
                    fn=app.load_video_from_dropdown,
                    inputs=[video_dropdown],
                    outputs=[video],
                )
                
                with gr.Row():
                    model_id = gr.Dropdown(
                        label="Mode (режим)",
                        choices=["wan2.2-animate-move", "wan2.2-animate-mix"],
                        value="wan2.2-animate-move",
                        info=""
                    )

                    model = gr.Dropdown(
                        label="Inference Quality (качество)",
                        choices=["wan-pro", "wan-std"],
                        value="wan-pro",
                    )

                run_button = gr.Button("Generate Video (генерировать)")

            with gr.Column():
                output_video = gr.Video(label="Output Video (результат)")
                output_status = gr.Textbox(label="Status (статус)")
        
        run_button.click(
            fn=app.predict,
            inputs=[
                ref_img,
                video,
                model_id,
                model,
            ],
            outputs=[output_video, output_status],
        )

    demo.queue(default_concurrency_limit=1)
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860
    )

if __name__ == "__main__":
    start_app()