Spaces:
Build error
Build error
File size: 10,551 Bytes
6c4f4bc 0297a7a 6c4f4bc 6cd2a2e 4adf2e1 6cd2a2e 6c4f4bc 6cd2a2e c3e485a b6811f4 3bb6d5d b6811f4 3bb6d5d 6cd2a2e 4adf2e1 6cd2a2e f63bd5d 4adf2e1 c20db4f 6cd2a2e b6811f4 c3e485a b6811f4 c3e485a b6811f4 3bb6d5d b6811f4 3bb6d5d 0297a7a 3bb6d5d 0297a7a 3bb6d5d c3e485a 0d627a0 6cd2a2e b6811f4 3bb6d5d 798d816 f63bd5d 0d627a0 f63bd5d 4adf2e1 f63bd5d 490303c 4adf2e1 bee4d96 4adf2e1 f63bd5d 766db7a 6c4f4bc 3bb6d5d 6c4f4bc f63bd5d 6c4f4bc f63bd5d 6cd2a2e 6c4f4bc 6cd2a2e 6c4f4bc f63bd5d 6c4f4bc 0d627a0 e605f7f 0d627a0 e605f7f ed2185f 0d627a0 6c4f4bc 0d627a0 6c4f4bc 0d627a0 c3e485a ed2185f c3e485a ed2185f f63bd5d c3e485a f63bd5d c3e485a 3bb6d5d c3e485a f63bd5d c3e485a f63bd5d c3e485a f63bd5d b6811f4 f63bd5d b6811f4 f63bd5d b6811f4 f63bd5d b6811f4 f63bd5d 490303c f63bd5d 0f934dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
import os
import sys
import uuid
import shutil
import time
import gradio as gr
import torch
from diffusers import StableVideoDiffusionPipeline
from PIL import Image
import numpy as np
import cv2
import subprocess
import tempfile
# Папки (создаются автоматически)
IMG_DIR = './img'
VIDEO_DIR = './video'
OUTPUT_DIR = './output'
os.makedirs(IMG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
class WanAnimateApp:
def __init__(self):
model_name = "stabilityai/stable-video-diffusion-img2vid-xt"
self.pipe = StableVideoDiffusionPipeline.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
variant="fp16",
device_map="cpu"
)
def get_img_files(self):
"""Список файлов в папке img"""
if os.path.exists(IMG_DIR):
return [f for f in os.listdir(IMG_DIR) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
return []
def get_video_files(self):
"""Список файлов в папке video"""
if os.path.exists(VIDEO_DIR):
return [f for f in os.listdir(VIDEO_DIR) if f.lower().endswith(('.mp4', '.avi', '.mov'))]
return []
def load_img_from_dropdown(self, selected_file):
"""Загружает выбранное фото в ref_img"""
if selected_file:
path = os.path.join(IMG_DIR, selected_file)
if os.path.exists(path):
return path
return None
def load_video_from_dropdown(self, selected_file):
"""Загружает выбранное видео в video"""
if selected_file:
path = os.path.join(VIDEO_DIR, selected_file)
if os.path.exists(path):
return path
return None
def predict(
self,
ref_img_path,
video_path,
model_id,
model,
):
if ref_img_path is None or video_path is None:
return None, "Выберите файл из списка или загрузите новый."
try:
# Local processing
ref_image = Image.open(ref_img_path).convert("RGB").resize((576, 320))
cap = cv2.VideoCapture(video_path)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()
motion_hint = f" with dynamic motion from {frame_count} frames"
# Prompt based on mode
if model_id == "wan2.2-animate-move":
prompt = f"Animate the character in the reference image{motion_hint}, high quality, smooth movements."
else:
prompt = f"Replace the character in the video with the reference image{motion_hint}, seamless, detailed."
# Parameters
num_frames = 25 if model == "wan-pro" else 14
num_steps = 25 if model == "wan-pro" else 15
# Local generation
generator = torch.Generator(device="cpu").manual_seed(42)
output = self.pipe(
ref_image,
num_inference_steps=num_steps,
num_frames=num_frames,
generator=generator,
decode_chunk_size=2
).frames[0]
# Save MP4 with ffmpeg
temp_dir = tempfile.mkdtemp()
for i, frame in enumerate(output):
frame.save(f"{temp_dir}/frame_{i:04d}.png")
temp_video = os.path.join(OUTPUT_DIR, f"output_{uuid.uuid4()}.mp4")
subprocess.run([
'ffmpeg', '-y', '-framerate', '7', '-i', f"{temp_dir}/frame_%04d.png",
'-c:v', 'libx264', '-pix_fmt', 'yuv420p', temp_video
], check=True)
shutil.rmtree(temp_dir)
return temp_video, "SUCCEEDED"
except Exception as e:
return None, f"Failed: {str(e)}"
def start_app():
app = WanAnimateApp()
with gr.Blocks(title="Wan2.2-Animate (Local No API)") as demo:
gr.HTML("""
<div style="padding: 2rem; text-align: center; max-width: 1200px; margin: 0 auto; font-family: Arial, sans-serif;">
<h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem; color: #333;">
Wan2.2-Animate: Unified Character Animation and Replacement with Holistic Replication
</h1>
<h3 style="font-size: 1.5rem; font-weight: bold; margin-bottom: 0.5rem; color: #333;">
Local version without API (SVD Proxy)
</h3>
<div style="font-size: 1.25rem; margin-bottom: 1.5rem; color: #555;">
Tongyi Lab, Alibaba
</div>
<div style="display: flex; flex-wrap: wrap; justify-content: center; gap: 1rem; margin-bottom: 1.5rem;">
<a href="https://arxiv.org/abs/2509.14055" target="_blank" style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; color: #333; text-decoration: none; border-radius: 9999px; font-weight: 500;">
<span style="margin-right: 0.5rem;">📄</span>Paper
</a>
<a href="https://github.com/Wan-Video/Wan2.2" target="_blank" style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; color: #333; text-decoration: none; border-radius: 9999px; font-weight: 500;">
<span style="margin-right: 0.5rem;">💻</span>GitHub
</a>
<a href="https://huggingface.co/Wan-AI/Wan2.2-Animate-14B" target="_blank" style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; color: #333; text-decoration: none; border-radius: 9999px; font-weight: 500;">
<span style="margin-right: 0.5rem;">🤗</span>HF Model
</a>
</div>
</div>
""")
gr.HTML("""
<details>
<summary>‼️Usage (использования)</summary>
Wan-Animate supports two modes:
<ul>
<li>Move Mode: animate the character in input image with movements from the input video</li>
<li>Mix Mode: replace the character in input video with the character in input image</li>
</ul>
Wan-Animate supports two modes:
<ul>
<li>Move Mode: Use the movements extracted from the input video to drive the character in the input image</li>
<li>Mix Mode: Use the character in the input image to replace the character in the input video</li>
</ul>
Currently, the following restrictions apply to inputs:
<ul>
<li>Video file size: Less than 200MB</li>
<li>Video resolution: The shorter side must be greater than 200, and the longer side must be less than 2048</li>
<li>Video duration: 2s to 30s</li>
<li>Video aspect ratio: 1:3 to 3:1</li>
<li>Video formats: mp4, avi, mov</li>
<li>Image file size: Less than 5MB</li>
<li>Image resolution: The shorter side must be greater than 200, and the longer side must be less than 4096</li>
<li>Image formats: jpg, png, jpeg, webp, bmp</li>
</ul>
Current, the inference quality has two variants. You can use our open-source code for more flexible configuration.
<ul>
<li>wan-pro: 25fps, 720p</li>
<li>wan-std: 15fps, 720p</li>
</ul>
</details>
""")
with gr.Row():
with gr.Column():
# Dropdown for img
img_dropdown = gr.Dropdown(
label="Select Image from img folder (выбери фото)",
choices=app.get_img_files(),
value=None,
)
ref_img = gr.Image(
label="Reference Image (изображение)",
type="filepath",
value=None,
)
img_dropdown.change(
fn=app.load_img_from_dropdown,
inputs=[img_dropdown],
outputs=[ref_img],
)
# Dropdown for video
video_dropdown = gr.Dropdown(
label="Select Video from video folder (выбери видео)",
choices=app.get_video_files(),
value=None,
)
video = gr.Video(
label="Template Video (шаблонное видео)",
sources=["upload"],
value=None,
)
video_dropdown.change(
fn=app.load_video_from_dropdown,
inputs=[video_dropdown],
outputs=[video],
)
with gr.Row():
model_id = gr.Dropdown(
label="Mode (режим)",
choices=["wan2.2-animate-move", "wan2.2-animate-mix"],
value="wan2.2-animate-move",
info=""
)
model = gr.Dropdown(
label="Inference Quality (качество)",
choices=["wan-pro", "wan-std"],
value="wan-pro",
)
run_button = gr.Button("Generate Video (генерировать)")
with gr.Column():
output_video = gr.Video(label="Output Video (результат)")
output_status = gr.Textbox(label="Status (статус)")
run_button.click(
fn=app.predict,
inputs=[
ref_img,
video,
model_id,
model,
],
outputs=[output_video, output_status],
)
demo.queue(default_concurrency_limit=1)
demo.launch(
server_name="0.0.0.0",
server_port=7860
)
if __name__ == "__main__":
start_app() |