Spaces:

RosticFACE
/

video

Paused

App Files Files Community

RosticFACE commited on Nov 4, 2025

Commit

5b15c0f

verified ·

1 Parent(s): f23efa0

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -10

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ from torchao.quantization import quantize_
 from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
 from torchao.quantization import Int8WeightOnlyConfig
-import aoti
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
@@ -32,7 +32,7 @@ MIN_FRAMES_MODEL = 8
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 DEFAULT_DURATION = 5.0
-# Модель загружается с device_map='auto' для распределения больших трансформеров
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained(
@@ -50,6 +50,7 @@ pipe = WanImageToVideoPipeline.from_pretrained(
     torch_dtype=torch.bfloat16,
 )
 pipe.load_lora_weights(
     "Kijai/WanVideo_comfy",
     weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
@@ -67,15 +68,12 @@ pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transform
 pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
 pipe.unload_lora_weights()
-# Квантизация (происходит на CPU, так как модель еще не полностью материализована)
 quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
 quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
 quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
-# 🚨 УДАЛЕНО ИСПРАВЛЕНИЕ: pipe.text_encoder.to(torch.device("cuda", 0))
-# Эта строка вызывала ошибку 'Cannot copy out of meta tensor'.
-# AOTI
 aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
 aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
@@ -88,7 +86,7 @@ default_negative_prompt = (
     "静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
 )
 def resize_image(image: Image.Image) -> Image.Image:
     width, height = image.size
@@ -175,7 +173,9 @@ def generate_video(
     resized_image = resize_image(input_image)
     # При первом вызове pipe() здесь произойдет перемещение всех оставшихся (CPU/quantized)
-    # частей модели на GPU.
     output_frames_list = pipe(
         image=resized_image,
         prompt=prompt,
@@ -187,6 +187,7 @@ def generate_video(
         guidance_scale_2=float(guidance_scale_2),
         num_inference_steps=int(steps),
         generator=torch.Generator(device="cuda").manual_seed(current_seed),
     ).frames[0]
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
@@ -196,7 +197,7 @@ def generate_video(
     return video_path, current_seed
-# --- Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown("# 🚀 Wan 2.2 I2V (14B) — Unlimited Duration Edition 🕒")
     gr.Markdown("Generate cinematic I2V animations without duration limits. Optimized for 4x NVIDIA L40S.")

 from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
 from torchao.quantization import Int8WeightOnlyConfig
+import aoti # Импорт aoti.py
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 DEFAULT_DURATION = 5.0
+# Модель загружается с device_map='auto' для распределения больших трансформеров
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained(
     torch_dtype=torch.bfloat16,
 )
+# Загрузка и фьюзинг LoRA
 pipe.load_lora_weights(
     "Kijai/WanVideo_comfy",
     weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
 pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
 pipe.unload_lora_weights()
+# Квантизация
 quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
 quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
 quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
+# AOTI
 aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
 aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
     "静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
 )
+# Функция изменения размера изображения
 def resize_image(image: Image.Image) -> Image.Image:
     width, height = image.size
     resized_image = resize_image(input_image)
     # При первом вызове pipe() здесь произойдет перемещение всех оставшихся (CPU/quantized)
+    # частей модели на GPU.
+    # 🟢 ИСПРАВЛЕНО: Добавлен 'device="cuda"' для создания латентов на GPU,
+    # что соответствует генератору 'torch.Generator(device="cuda")'.
     output_frames_list = pipe(
         image=resized_image,
         prompt=prompt,
         guidance_scale_2=float(guidance_scale_2),
         num_inference_steps=int(steps),
         generator=torch.Generator(device="cuda").manual_seed(current_seed),
+        device="cuda", # <--- ИСПРАВЛЕНИЕ: Гарантирует, что латенты создаются на CUDA
     ).frames[0]
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
     return video_path, current_seed
+# --- Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown("# 🚀 Wan 2.2 I2V (14B) — Unlimited Duration Edition 🕒")
     gr.Markdown("Generate cinematic I2V animations without duration limits. Optimized for 4x NVIDIA L40S.")