Spaces:

RosticFACE
/

video

Paused

App Files Files Community

RosticFACE commited on Dec 1, 2025

Commit

d888747

verified ·

1 Parent(s): 5c627f7

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -73

app.py CHANGED Viewed

@@ -14,8 +14,6 @@ from torchao.quantization import quantize_
 from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
 from torchao.quantization import Int8WeightOnlyConfig
-import aoti
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
@@ -32,61 +30,62 @@ MIN_FRAMES_MODEL = 8
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 DEFAULT_DURATION = 5.0
-# Модель загружается с device_map='auto' для распределения больших трансформеров
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
-    transformer=WanTransformer3DModel.from_pretrained(
-        'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
-        subfolder='transformer',
-        torch_dtype=torch.bfloat16,
-        device_map='auto',
-    ),
-    transformer_2=WanTransformer3DModel.from_pretrained(
-        'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
-        subfolder='transformer_2',
-        torch_dtype=torch.bfloat16,
-        device_map='auto',
-    ),
     torch_dtype=torch.bfloat16,
 )
-# Загрузка и фьюзинг LoRA
 pipe.load_lora_weights(
-    "Kijai/WanVideo_comfy",
-    weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
     adapter_name="lightx2v"
 )
-kwargs_lora = {"load_into_transformer_2": True}
 pipe.load_lora_weights(
-    "Kijai/WanVideo_comfy",
-    weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
-    adapter_name="lightx2v_2",
-    **kwargs_lora
 )
 pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
 pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
 pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
 pipe.unload_lora_weights()
-# Квантизация
 quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
 quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
 quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
-# AOTI
-aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
-aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
-# 🟢 ИСПРАВЛЕНИЕ 1: Явно переводим пайплайн на GPU.
-# Это решает проблему "Cannot generate a cpu tensor from a generator of type cuda."
-pipe.to("cuda")
 default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
 default_negative_prompt = (
     "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, "
-    "整体发灰, 最差质量, 低质量, JPEG压缩残留, 丑陋의, 残缺的, 多余的手指, "
-    "画得不хорошие руки, 画得不хорошие лица, 畸形の, 毀容の, 形态畸形的肢体, 手指融合, "
-    "静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
 )
@@ -97,8 +96,8 @@ def resize_image(image: Image.Image) -> Image.Image:
         return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
     aspect_ratio = width / height
-    MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
-    MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
     target_w, target_h = width, height
     image_to_resize = image
@@ -134,28 +133,7 @@ def get_num_frames(duration_seconds: float):
     return 1 + int(round(duration_seconds * FIXED_FPS))
-def get_duration(
-    input_image,
-    prompt,
-    steps,
-    negative_prompt,
-    duration_seconds,
-    guidance_scale,
-    guidance_scale_2,
-    seed,
-    randomize_seed,
-    progress,
-):
-    BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
-    BASE_STEP_DURATION = 15
-    width, height = resize_image(input_image).size
-    frames = get_num_frames(duration_seconds)
-    factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
-    step_duration = BASE_STEP_DURATION * factor ** 1.5
-    return 10 + int(steps) * step_duration
-@spaces.GPU(duration=get_duration)
 def generate_video(
     input_image,
     prompt,
@@ -163,20 +141,20 @@ def generate_video(
     negative_prompt=default_negative_prompt,
     duration_seconds=DEFAULT_DURATION,
     guidance_scale=1,
-    guidance_scale_2=1,
     seed=42,
     randomize_seed=False,
     progress=gr.Progress(track_tqdm=True),
 ):
     if input_image is None:
         raise gr.Error("Please upload an input image.")
     num_frames = get_num_frames(duration_seconds)
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized_image = resize_image(input_image)
-    # 🟢 ИСПРАВЛЕНИЕ 2: Удален аргумент 'device="cuda"', чтобы избежать TypeError,
-    # так как пайплайн уже был переведен на CUDA перед функцией.
     output_frames_list = pipe(
         image=resized_image,
         prompt=prompt,
@@ -187,7 +165,7 @@ def generate_video(
         guidance_scale=float(guidance_scale),
         guidance_scale_2=float(guidance_scale_2),
         num_inference_steps=int(steps),
-        generator=torch.Generator(device="cuda").manual_seed(current_seed),
     ).frames[0]
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
@@ -197,11 +175,11 @@ def generate_video(
     return video_path, current_seed
-# --- Gradio Interface ---
 with gr.Blocks() as demo:
-    gr.Markdown("# 🚀 Wan 2.2 I2V (14B) — Unlimited Duration Edition 🕒")
-    gr.Markdown("Generate cinematic I2V animations without duration limits. Optimized for ZeroCPU.")
     with gr.Row():
         with gr.Column():
             input_image_component = gr.Image(type="pil", label="Input Image")
@@ -213,21 +191,21 @@ with gr.Blocks() as demo:
                 step=0.5,
                 value=DEFAULT_DURATION,
                 label="Duration (seconds)",
-                info=f"Each second = {FIXED_FPS} frames. Longer videos require more VRAM/time."
             )
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
-                steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
-                guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage")
-                guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 - low noise stage")
             generate_button = gr.Button("Generate Video", variant="primary")
         with gr.Column():
             video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
     ui_inputs = [
         input_image_component, prompt_input, steps_slider,
         negative_prompt_input, duration_seconds_input,
@@ -236,5 +214,6 @@ with gr.Blocks() as demo:
     ]
     generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
 if __name__ == "__main__":
-    demo.queue().launch(mcp_server=True)

 from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
 from torchao.quantization import Int8WeightOnlyConfig
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 DEFAULT_DURATION = 5.0
+print("⬇️ Loading models...")
+# ---- LOAD TRANSFORMERS ON CUDA ----
+transformer = WanTransformer3DModel.from_pretrained(
+    'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
+    subfolder='transformer',
+    torch_dtype=torch.bfloat16,
+).to("cuda")
+transformer_2 = WanTransformer3DModel.from_pretrained(
+    'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
+    subfolder='transformer_2',
+    torch_dtype=torch.bfloat16,
+).to("cuda")
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
+    transformer=transformer,
+    transformer_2=transformer_2,
     torch_dtype=torch.bfloat16,
 )
+pipe.to("cuda")
+# ---- LORA ----
 pipe.load_lora_weights(
+    "Kijai/WanVideo_comfy",
+    weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
     adapter_name="lightx2v"
 )
 pipe.load_lora_weights(
+    "Kijai/WanVideo_comfy",
+    weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+    adapter_name="lightx2v_2",
+    load_into_transformer_2=True
 )
 pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
 pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
 pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
 pipe.unload_lora_weights()
+# ---- QUANT ----
 quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
 quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
 quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
+print("✔️ Loaded successfully!")
 default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
 default_negative_prompt = (
     "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, "
+    "整体发灰, 最差质量, 低质量, JPEG压缩残留, 丑陋, 残缺的, 多余的手指, "
+    "画得不好 руки, 畸形, 毀容, 多余的 конечности, 手指融合, 静止不动的画面"
 )
         return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
     aspect_ratio = width / height
+    MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
+    MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
     target_w, target_h = width, height
     image_to_resize = image
     return 1 + int(round(duration_seconds * FIXED_FPS))
+@spaces.GPU()
 def generate_video(
     input_image,
     prompt,
     negative_prompt=default_negative_prompt,
     duration_seconds=DEFAULT_DURATION,
     guidance_scale=1,
+    guidance_scale_2=1,
     seed=42,
     randomize_seed=False,
     progress=gr.Progress(track_tqdm=True),
 ):
     if input_image is None:
         raise gr.Error("Please upload an input image.")
     num_frames = get_num_frames(duration_seconds)
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized_image = resize_image(input_image)
+    generator = torch.cuda.manual_seed(current_seed)
     output_frames_list = pipe(
         image=resized_image,
         prompt=prompt,
         guidance_scale=float(guidance_scale),
         guidance_scale_2=float(guidance_scale_2),
         num_inference_steps=int(steps),
+        generator=generator,
     ).frames[0]
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
     return video_path, current_seed
+# ---- GRADIO UI ----
 with gr.Blocks() as demo:
+    gr.Markdown("# 🚀 Wan 2.2 I2V (14B) — HF CUDA Edition")
+    gr.Markdown("Generate cinematic I2V animations without ZeroGPU — fully optimized for Nvidia L40S.")
     with gr.Row():
         with gr.Column():
             input_image_component = gr.Image(type="pil", label="Input Image")
                 step=0.5,
                 value=DEFAULT_DURATION,
                 label="Duration (seconds)",
             )
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
+                steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
+                guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale")
+                guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2")
             generate_button = gr.Button("Generate Video", variant="primary")
         with gr.Column():
             video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
     ui_inputs = [
         input_image_component, prompt_input, steps_slider,
         negative_prompt_input, duration_seconds_input,
     ]
     generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
 if __name__ == "__main__":
+    demo.queue().launch()