bildzuvideo

Running on Zero

App Files Files Community

Sebastiankay commited on Nov 14, 2025

Commit

7888181

verified ·

1 Parent(s): 550d4eb

Rename app.py to _app.py

Browse files

Files changed (1) hide show

app.py → _app.py +84 -78

app.py → _app.py RENAMED Viewed

@@ -19,6 +19,7 @@ import random
 import base64
 import gc
 import math
 from torchao.quantization import quantize_
 from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
@@ -42,6 +43,10 @@ FIXED_FPS = 16
 MIN_FRAMES_MODEL = 8
 MAX_FRAMES_MODEL = 80
 MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
 MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
@@ -91,47 +96,76 @@ theme = gr.themes.Soft(
 )
-pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
-    transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
-        subfolder='transformer',
-        torch_dtype=torch.bfloat16,
-        device_map='cuda',
-    ),
-    transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
-        subfolder='transformer_2',
-        torch_dtype=torch.bfloat16,
-        device_map='cuda',
-    ),
-    torch_dtype=torch.bfloat16,
-).to('cuda')
-pipe.load_lora_weights(
-    "Kijai/WanVideo_comfy",
-    weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
-    adapter_name="lightx2v"
-)
-kwargs_lora = {}
-kwargs_lora["load_into_transformer_2"] = True
-pipe.load_lora_weights(
-    "Kijai/WanVideo_comfy",
-    weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
-    adapter_name="lightx2v_2", **kwargs_lora
-)
-pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
-pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
-pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
-pipe.unload_lora_weights()
-quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
-quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
-quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
-aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
-aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
-default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
-default_negative_prompt = "Vibrant colors, overexposed, static, blurry details, subtitles, style, artwork, painting, image, still, overall grayish, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn face, deformed, disfigured, deformed limbs, fingers fused together, static image, cluttered background, three legs, many people in the background, walking backwards"
 def export_frames_to_video(frames: torch.Tensor, out_path: str, fps: int = 24):
@@ -140,33 +174,22 @@ def export_frames_to_video(frames: torch.Tensor, out_path: str, fps: int = 24):
     out_path: Pfad zur Ausgabedatei (.mp4)
     fps: Bildrate
     """
-    # 1️⃣ Sicherstellen, dass wir uint8 haben
     if frames.dtype != torch.uint8:
         frames = (frames * 255).clamp(0, 255).to(torch.uint8)
-    # 2️⃣ In ein NumPy‑Array konvertieren (T, H, W, C)
     np_frames = frames.cpu().numpy()
-    # 3️⃣ ffmpeg‑Input aus dem Numpy‑Array erzeugen
-    #    Wir nutzen den "pipe:"-Modus, d.h. die Roh‑RGB‑Daten werden über stdin geschoben
     process = (
         ffmpeg
-        .input('pipe:', format='rawvideo',
-               pix_fmt='rgb24',
-               s='{}x{}'.format(np_frames.shape[2], np_frames.shape[1]),
-               framerate=fps)
-        .output(out_path,
-                vcodec='libx264',
-                pix_fmt='yuv420p',   # wichtig für breite Player‑Kompatibilität
-                crf=23,              # Qualität (niedriger = besser, 18‑23 ist üblich)
-                preset='fast')
         .overwrite_output()
         .run_async(pipe_stdin=True)
     )
-    # 4️⃣ Frames nacheinander in den Pipe‑Strom schreiben
     for frame in np_frames:
-        # frame hat Shape (H, W, C) und dtype uint8 → raw RGB‑Bytes
         process.stdin.write(frame.tobytes())
     process.stdin.close()
     process.wait()
@@ -226,25 +249,8 @@ def get_num_frames(duration_seconds: float):
     ))
-def get_duration(
-    input_image,
-    prompt,
-    steps,
-    negative_prompt,
-    duration_seconds,
-    guidance_scale,
-    guidance_scale_2,
-    seed,
-    randomize_seed,
-    progress,
-):
-    BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
-    BASE_STEP_DURATION = 15
-    width, height = resize_image(input_image).size
-    frames = get_num_frames(duration_seconds)
-    factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
-    step_duration = BASE_STEP_DURATION * factor ** 1.5
-    return 10 + int(steps) * step_duration
 @spaces.GPU(duration=get_duration)
 def generate_video(

 import base64
 import gc
 import math
+import ffmpeg
 from torchao.quantization import quantize_
 from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
 MIN_FRAMES_MODEL = 8
 MAX_FRAMES_MODEL = 80
+default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
+default_negative_prompt = "Vibrant colors, overexposed, static, blurry details, subtitles, style, artwork, painting, image, still, overall grayish, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn face, deformed, disfigured, deformed limbs, fingers fused together, static image, cluttered background, three legs, many people in the background, walking backwards"
 MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
 MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
 )
+# MARK: LOAD MODEL FUNKTION:
+# Globale Pipe-Variable
+pipe = None
+def load_model():
+    global pipe
+    if pipe is None:
+        pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
+            transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
+                subfolder='transformer',
+                torch_dtype=torch.bfloat16,
+                device_map='cuda',
+            ),
+            transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
+                subfolder='transformer_2',
+                torch_dtype=torch.bfloat16,
+                device_map='cuda',
+            ),
+            torch_dtype=torch.bfloat16,
+        ).to('cuda')
+        # LoRA Loading ohne die problematischen adapter_names Parameter
+        pipe.load_lora_weights("Kijai/WanVideo_comfy",
+                              weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+                              adapter_name="lightx2v")
+        pipe.set_adapters(["lightx2v"], adapter_weights=[1.0])
+        # Quantisierung
+        quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
+        quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
+        quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
+    return pipe
+# pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
+#     transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
+#         subfolder='transformer',
+#         torch_dtype=torch.bfloat16,
+#         device_map='cuda',
+#     ),
+#     transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
+#         subfolder='transformer_2',
+#         torch_dtype=torch.bfloat16,
+#         device_map='cuda',
+#     ),
+#     torch_dtype=torch.bfloat16,
+# ).to('cuda')
+# pipe.load_lora_weights(
+#     "Kijai/WanVideo_comfy",
+#     weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+#     adapter_name="lightx2v"
+# )
+# kwargs_lora = {}
+# kwargs_lora["load_into_transformer_2"] = True
+# pipe.load_lora_weights(
+#     "Kijai/WanVideo_comfy",
+#     weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+#     adapter_name="lightx2v_2", **kwargs_lora
+# )
+# pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
+# pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
+# pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
+# pipe.unload_lora_weights()
+# quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
+# quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
+# quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
 def export_frames_to_video(frames: torch.Tensor, out_path: str, fps: int = 24):
     out_path: Pfad zur Ausgabedatei (.mp4)
     fps: Bildrate
     """
     if frames.dtype != torch.uint8:
         frames = (frames * 255).clamp(0, 255).to(torch.uint8)
     np_frames = frames.cpu().numpy()
+    # Korrekter ffmpeg Aufruf:
     process = (
         ffmpeg
+        .input('pipe:', format='rawvideo', pix_fmt='rgb24',
+               s=f'{np_frames.shape[2]}x{np_frames.shape[1]}', framerate=fps)
+        .output(out_path, vcodec='libx264', pix_fmt='yuv420p', crf=23, preset='fast')
         .overwrite_output()
         .run_async(pipe_stdin=True)
     )
     for frame in np_frames:
         process.stdin.write(frame.tobytes())
     process.stdin.close()
     process.wait()
     ))
+def get_duration_simple():
+    return 280
 @spaces.GPU(duration=get_duration)
 def generate_video(