matthewkram commited on
Commit
4b86371
·
verified ·
1 Parent(s): 7810d59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -127
app.py CHANGED
@@ -1,10 +1,6 @@
1
  import os
2
- import sys
3
- import uuid
4
- import shutil
5
- import time
6
- import gradio as gr
7
  import torch
 
8
  from diffusers import StableVideoDiffusionPipeline
9
  from PIL import Image
10
  import numpy as np
@@ -12,127 +8,72 @@ import cv2
12
  import tempfile
13
  from diffusers.utils import export_to_video
14
 
15
- class WanAnimateApp:
16
- def __init__(self):
17
- model_name = "stabilityai/stable-video-diffusion-img2vid-xt"
18
- dtype = torch.float16 if torch.cuda.is_available() else torch.float32
19
- self.pipe = StableVideoDiffusionPipeline.from_pretrained(
20
- model_name,
21
- torch_dtype=dtype,
22
  variant="fp16"
23
  )
24
- device = "cuda" if torch.cuda.is_available() else "cpu"
25
- self.pipe.to(device)
26
- gr.Info(f"Модель на {device.upper()}. Если CPU — переключись на GPU в Settings!")
27
-
28
- def predict(self, ref_img, video, model_id, model, progress=gr.Progress()):
29
- if ref_img is None or video is None:
30
- return None, "Upload both image and video."
31
-
32
- progress(0, desc="Подготовка...")
33
- ref_image = Image.fromarray(ref_img).convert("RGB").resize((576, 320))
34
-
35
- cap = cv2.VideoCapture(video)
36
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
37
- cap.release()
38
- motion_hint = f" with dynamic motion from {frame_count} frames"
39
-
40
- num_frames = 25 if model == "wan-pro" else 14
41
- num_steps = 25 if model == "wan-pro" else 15
42
-
43
- noise_aug_strength = 0.02
44
- if model_id == "wan2.2-animate-mix":
45
- noise_aug_strength = 0.1
46
-
47
- generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(42)
48
- start_time = time.time()
49
- output = self.pipe(
50
- ref_image,
51
- num_inference_steps=num_steps,
52
- num_frames=num_frames,
53
- generator=generator,
54
- decode_chunk_size=2,
55
- noise_aug_strength=noise_aug_strength,
56
- callback_on_step_end=lambda step, timestep, latents: progress((step + 1) / num_steps, desc=f"Шаг {step + 1}/{num_steps}. Время: {int(time.time() - start_time)} сек")
57
- ).frames[0]
58
-
59
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
60
- export_to_video(output, temp_video.name, fps=7)
61
-
62
- return temp_video.name, "SUCCEEDED" + motion_hint
63
-
64
- def start_app():
65
- os.makedirs("/tmp/gradio", exist_ok=True)
66
-
67
- app = WanAnimateApp()
68
- with gr.Blocks(title="Wan2.2-Animate (Local No API)") as demo:
69
- gr.HTML("""
70
- Wan2.2-Animate: Unified Character Animation and Replacement with Holistic Replication
71
- Local version without API (SVD Proxy)
72
- Tongyi Lab, Alibaba
73
- 📄Paper 💻GitHub 🤗HF Model
74
- """)
75
-
76
- with gr.Accordion("Usage Instructions (инструкции)", open=False):
77
- gr.HTML("""
78
- ‼️Usage (использования) Wan-Animate supports two modes:
79
-
80
- * Move Mode: animate the character in input image with movements from the input video
81
-
82
- * Mix Mode: replace the character in input video with the character in input video
83
-
84
- Wan-Animate supports two modes:
85
-
86
- * Move Mode: Use the movements extracted from the input video to drive the character in the input image
87
-
88
- * Mix Mode: Use the character in the input image to replace the character in the input video
89
-
90
- Currently, the following restrictions apply to inputs:
91
-
92
- * Video file size: Less than 200MB
93
-
94
- * Video resolution: The shorter side must be greater than 200, and the longer side must be less than 2048
95
-
96
- * Video duration: 2s to 30s
97
-
98
- * Video aspect ratio: 1:3 to 3:1
99
-
100
- * Video formats: mp4, avi, mov
101
-
102
- * Image file size: Less than 5MB
103
-
104
- * Image resolution: The shorter side must be greater than 200, and the longer side must be less than 4096
105
-
106
- * Image formats: jpg, png, jpeg, webp, bmp
107
-
108
- Current, the inference quality has two variants. You can use our open-source code for more flexible configuration.
109
-
110
- * wan-pro: 25fps, 720p
111
-
112
- * wan-std: 15fps, 720p
113
- """)
114
-
115
- with gr.Row():
116
- with gr.Column():
117
- ref_img = gr.Image(label="Reference Image (изображение)", type="numpy", sources=["upload"])
118
- video = gr.Video(label="Template Video (шаблонное видео)", sources=["upload"])
119
- with gr.Row():
120
- model_id = gr.Dropdown(label="Mode (режим)", choices=["wan2.2-animate-move", "wan2.2-animate-mix"], value="wan2.2-animate-move")
121
- model = gr.Dropdown(label="Inference Quality (качество)", choices=["wan-pro", "wan-std"], value="wan-pro")
122
- run_button = gr.Button("Generate Video (генерировать)")
123
-
124
- with gr.Column():
125
- output_video = gr.Video(label="Output Video (результат)")
126
- output_status = gr.Textbox(label="Status (статус)")
127
-
128
- run_button.click(
129
- fn=app.predict,
130
- inputs=[ref_img, video, model_id, model],
131
- outputs=[output_video, output_status]
132
- )
133
-
134
- demo.queue(default_concurrency_limit=1)
135
- demo.launch(server_name="0.0.0.0", server_port=7860)
136
-
137
- if __name__ == "__main__":
138
- start_app()
 
1
  import os
 
 
 
 
 
2
  import torch
3
+ import gradio as gr
4
  from diffusers import StableVideoDiffusionPipeline
5
  from PIL import Image
6
  import numpy as np
 
8
  import tempfile
9
  from diffusers.utils import export_to_video
10
 
11
+ pipe = None
12
+ def load():
13
+ global pipe
14
+ if pipe is None:
15
+ pipe = StableVideoDiffusionPipeline.from_pretrained(
16
+ "stabilityai/stable-video-diffusion-img2vid-xt",
17
+ torch_dtype=torch.float16,
18
  variant="fp16"
19
  )
20
+ pipe.to("cuda")
21
+ gr.Info("Модель на GPU — генерация 30–60 сек")
22
+ return pipe
23
+
24
+ def run(ref_img, video, mode, quality, prog=gr.Progress()):
25
+ pipe = load()
26
+ prog(0, desc="Подготовка...")
27
+ img = Image.fromarray(ref_img).convert("RGB").resize((576, 320))
28
+
29
+ cap = cv2.VideoCapture(video)
30
+ n = int(cap.get(7)); cap.release()
31
+ hint = f" ({n} кадров)"
32
+
33
+ steps = 25 if quality == "wan-pro" else 15
34
+ frames = 25 if quality == "wan-pro" else 14
35
+ noise = 0.1 if mode == "wan2.2-animate-mix" else 0.02
36
+
37
+ def cb(step, *_):
38
+ prog((step+1)/steps, desc=f"Шаг {step+1}/{steps}")
39
+ prog(0.1, desc="Генерация...")
40
+
41
+ out = pipe(
42
+ img,
43
+ num_inference_steps=steps,
44
+ num_frames=frames,
45
+ decode_chunk_size=2,
46
+ noise_aug_strength=noise,
47
+ callback_on_step_end=cb
48
+ ).frames[0]
49
+
50
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
51
+ export_to_video(out, tmp.name, fps=7)
52
+ return tmp.name, "Готово!" + hint
53
+
54
+ with gr.Blocks() as demo:
55
+ gr.Markdown("# Wan2.2-Animate (GPU)")
56
+ with gr.Accordion("Инструкция", open=False):
57
+ gr.Markdown("Загрузи фото + видео → выбери режим → жми Generate")
58
+
59
+ with gr.Row():
60
+ with gr.Column():
61
+ img = gr.Image(label="Фото", type="numpy")
62
+ vid = gr.Video(label="Видео")
63
+ with gr.Row():
64
+ mode = gr.Dropdown(["wan2.2-animate-move", "wan2.2-animate-mix"],
65
+ label="Режим", value="wan2.2-animate-move")
66
+ qual = gr.Dropdown(["wan-pro", "wan-std"], label="Качество", value="wan-pro")
67
+ btn = gr.Button("Generate Video")
68
+ with gr.Column():
69
+ out = gr.Video(label="Результат")
70
+ stat = gr.Textbox(label="Статус")
71
+
72
+ btn.click(run, [img, vid, mode, qual], [out, stat])
73
+
74
+ demo.queue(max_size=2).launch(
75
+ server_name="0.0.0.0",
76
+ server_port=7860,
77
+ share=True, # ← ФИКС 1
78
+ enable_queue=True
79
+ )