Spaces:
Build error
Build error
File size: 5,739 Bytes
88f1323 6c4f4bc c6011cd 6c4f4bc c6011cd 6cd2a2e c6011cd 6cd2a2e c6011cd 6c4f4bc d8a502c 3feedda c6011cd 6cd2a2e c6011cd 4adf2e1 c6011cd 5971931 6cd2a2e 5971931 f6538ba c6011cd 88f1323 30afd9c c6011cd d8a502c c6011cd 5971931 d8a502c f6538ba 5971931 d8a502c c6011cd 626743e d8a502c c6011cd d8a502c f6538ba c6011cd f6538ba c6011cd 30afd9c 626743e c6011cd 25a48e3 f6538ba c6011cd f6538ba c6011cd f6538ba e93faf1 f63bd5d 25a48e3 f6538ba 88f1323 f6538ba c6011cd f6538ba c6011cd e93faf1 c6011cd f6538ba c6011cd f6538ba c6011cd 6cd2a2e c6011cd f6538ba 6c4f4bc 25a48e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import os # Для создания директории (на всякий случай оставляем)
import sys
import uuid
import shutil
import time
import gradio as gr
import torch
from diffusers import StableVideoDiffusionPipeline
from PIL import Image
import numpy as np
import cv2
import tempfile
from diffusers.utils import export_to_video # Для экспорта видео
class WanAnimateApp:
def __init__(self):
model_name = "stabilityai/stable-video-diffusion-img2vid-xt"
self.pipe = StableVideoDiffusionPipeline.from_pretrained(
model_name,
torch_dtype=torch.float32, # Для CPU
variant="fp16",
low_cpu_mem_usage=True # Оптимизация памяти для CPU
)
self.pipe.to("cpu") # Вручную перемещаем на CPU
def predict(self, ref_img, video, model_id, model):
if ref_img is None or video is None:
return None, "Upload both image and video."
try:
# Обработка изображения (теперь ref_img — np.array, конвертируем в PIL)
ref_image = Image.fromarray(ref_img).convert("RGB").resize((576, 320))
# Извлечение motion из видео (video — filepath)
cap = cv2.VideoCapture(video)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()
motion_hint = f" with dynamic motion from {frame_count} frames"
# Параметры
num_frames = 25 if model == "wan-pro" else 14
num_steps = 25 if model == "wan-pro" else 15
# Адаптация modes
noise_aug_strength = 0.02
if model_id == "wan2.2-animate-mix":
noise_aug_strength = 0.1
# Генерация
generator = torch.Generator(device="cpu").manual_seed(42)
output = self.pipe(
ref_image,
num_inference_steps=num_steps,
num_frames=num_frames,
generator=generator,
decode_chunk_size=2, # Оптимизация для VAE
noise_aug_strength=noise_aug_strength
).frames[0]
# Экспорт видео
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
export_to_video(output, temp_video.name, fps=7)
return temp_video.name, "SUCCEEDED" + motion_hint
except Exception as e:
return None, f"Failed: {str(e)}"
def start_app():
# Создаём директорию для Gradio (на всякий случай)
os.makedirs("/tmp/gradio", exist_ok=True)
app = WanAnimateApp()
with gr.Blocks(title="Wan2.2-Animate (Local No API)") as demo:
gr.HTML("""
Wan2.2-Animate: Unified Character Animation and Replacement with Holistic Replication
Local version without API (SVD Proxy)
Tongyi Lab, Alibaba
📄Paper 💻GitHub 🤗HF Model
""")
gr.HTML("""
‼️Usage (использования) Wan-Animate supports two modes:
* Move Mode: animate the character in input image with movements from the input video
* Mix Mode: replace the character in input video with the character in input image
Wan-Animate supports two modes:
* Move Mode: Use the movements extracted from the input video to drive the character in the input image
* Mix Mode: Use the character in the input image to replace the character in the input video
Currently, the following restrictions apply to inputs:
* Video file size: Less than 200MB
* Video resolution: The shorter side must be greater than 200, and the longer side must be less than 2048
* Video duration: 2s to 30s
* Video aspect ratio: 1:3 to 3:1
* Video formats: mp4, avi, mov
* Image file size: Less than 5MB
* Image resolution: The shorter side must be greater than 200, and the longer side must be less than 4096
* Image formats: jpg, png, jpeg, webp, bmp
Current, the inference quality has two variants. You can use our open-source code for more flexible configuration.
* wan-pro: 25fps, 720p
* wan-std: 15fps, 720p
""")
with gr.Row():
with gr.Column():
ref_img = gr.Image(label="Reference Image (изображение)", type="numpy", sources=["upload"]) # Изменили на numpy для обхода FileNotFound
video = gr.Video(label="Template Video (шаблонное видео)", sources=["upload"])
with gr.Row():
model_id = gr.Dropdown(label="Mode (режим)", choices=["wan2.2-animate-move", "wan2.2-animate-mix"], value="wan2.2-animate-move")
model = gr.Dropdown(label="Inference Quality (качество)", choices=["wan-pro", "wan-std"], value="wan-pro")
run_button = gr.Button("Generate Video (генерировать)")
with gr.Column():
output_video = gr.Video(label="Output Video (результат)")
output_status = gr.Textbox(label="Status (статус)")
run_button.click(
fn=app.predict,
inputs=[ref_img, video, model_id, model],
outputs=[output_video, output_status]
)
demo.queue(default_concurrency_limit=1)
demo.launch(server_name="0.0.0.0", server_port=7860)
if __name__ == "__main__":
start_app() |