Spaces:
Running on Zero
Running on Zero
Rename app.py to _app.py
Browse files- app.py → _app.py +84 -78
app.py → _app.py
RENAMED
|
@@ -19,6 +19,7 @@ import random
|
|
| 19 |
import base64
|
| 20 |
import gc
|
| 21 |
import math
|
|
|
|
| 22 |
|
| 23 |
from torchao.quantization import quantize_
|
| 24 |
from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
|
|
@@ -42,6 +43,10 @@ FIXED_FPS = 16
|
|
| 42 |
MIN_FRAMES_MODEL = 8
|
| 43 |
MAX_FRAMES_MODEL = 80
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
|
| 46 |
MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
|
| 47 |
|
|
@@ -91,47 +96,76 @@ theme = gr.themes.Soft(
|
|
| 91 |
)
|
| 92 |
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
)
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
)
|
| 120 |
-
|
| 121 |
-
pipe.
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
quantize_(pipe.
|
| 126 |
-
quantize_(pipe.
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
|
| 137 |
def export_frames_to_video(frames: torch.Tensor, out_path: str, fps: int = 24):
|
|
@@ -140,33 +174,22 @@ def export_frames_to_video(frames: torch.Tensor, out_path: str, fps: int = 24):
|
|
| 140 |
out_path: Pfad zur Ausgabedatei (.mp4)
|
| 141 |
fps: Bildrate
|
| 142 |
"""
|
| 143 |
-
# 1️⃣ Sicherstellen, dass wir uint8 haben
|
| 144 |
if frames.dtype != torch.uint8:
|
| 145 |
frames = (frames * 255).clamp(0, 255).to(torch.uint8)
|
| 146 |
-
|
| 147 |
-
# 2️⃣ In ein NumPy‑Array konvertieren (T, H, W, C)
|
| 148 |
np_frames = frames.cpu().numpy()
|
| 149 |
-
|
| 150 |
-
#
|
| 151 |
-
# Wir nutzen den "pipe:"-Modus, d.h. die Roh‑RGB‑Daten werden über stdin geschoben
|
| 152 |
process = (
|
| 153 |
ffmpeg
|
| 154 |
-
.input('pipe:', format='rawvideo',
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
framerate=fps)
|
| 158 |
-
.output(out_path,
|
| 159 |
-
vcodec='libx264',
|
| 160 |
-
pix_fmt='yuv420p', # wichtig für breite Player‑Kompatibilität
|
| 161 |
-
crf=23, # Qualität (niedriger = besser, 18‑23 ist üblich)
|
| 162 |
-
preset='fast')
|
| 163 |
.overwrite_output()
|
| 164 |
.run_async(pipe_stdin=True)
|
| 165 |
)
|
| 166 |
-
|
| 167 |
-
# 4️⃣ Frames nacheinander in den Pipe‑Strom schreiben
|
| 168 |
for frame in np_frames:
|
| 169 |
-
# frame hat Shape (H, W, C) und dtype uint8 → raw RGB‑Bytes
|
| 170 |
process.stdin.write(frame.tobytes())
|
| 171 |
process.stdin.close()
|
| 172 |
process.wait()
|
|
@@ -226,25 +249,8 @@ def get_num_frames(duration_seconds: float):
|
|
| 226 |
))
|
| 227 |
|
| 228 |
|
| 229 |
-
def
|
| 230 |
-
|
| 231 |
-
prompt,
|
| 232 |
-
steps,
|
| 233 |
-
negative_prompt,
|
| 234 |
-
duration_seconds,
|
| 235 |
-
guidance_scale,
|
| 236 |
-
guidance_scale_2,
|
| 237 |
-
seed,
|
| 238 |
-
randomize_seed,
|
| 239 |
-
progress,
|
| 240 |
-
):
|
| 241 |
-
BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
|
| 242 |
-
BASE_STEP_DURATION = 15
|
| 243 |
-
width, height = resize_image(input_image).size
|
| 244 |
-
frames = get_num_frames(duration_seconds)
|
| 245 |
-
factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
|
| 246 |
-
step_duration = BASE_STEP_DURATION * factor ** 1.5
|
| 247 |
-
return 10 + int(steps) * step_duration
|
| 248 |
|
| 249 |
@spaces.GPU(duration=get_duration)
|
| 250 |
def generate_video(
|
|
|
|
| 19 |
import base64
|
| 20 |
import gc
|
| 21 |
import math
|
| 22 |
+
import ffmpeg
|
| 23 |
|
| 24 |
from torchao.quantization import quantize_
|
| 25 |
from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
|
|
|
|
| 43 |
MIN_FRAMES_MODEL = 8
|
| 44 |
MAX_FRAMES_MODEL = 80
|
| 45 |
|
| 46 |
+
default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
|
| 47 |
+
default_negative_prompt = "Vibrant colors, overexposed, static, blurry details, subtitles, style, artwork, painting, image, still, overall grayish, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn face, deformed, disfigured, deformed limbs, fingers fused together, static image, cluttered background, three legs, many people in the background, walking backwards"
|
| 48 |
+
|
| 49 |
+
|
| 50 |
MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
|
| 51 |
MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
|
| 52 |
|
|
|
|
| 96 |
)
|
| 97 |
|
| 98 |
|
| 99 |
+
# MARK: LOAD MODEL FUNKTION:
|
| 100 |
+
|
| 101 |
+
# Globale Pipe-Variable
|
| 102 |
+
pipe = None
|
| 103 |
+
|
| 104 |
+
def load_model():
|
| 105 |
+
global pipe
|
| 106 |
+
if pipe is None:
|
| 107 |
+
pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
|
| 108 |
+
transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
|
| 109 |
+
subfolder='transformer',
|
| 110 |
+
torch_dtype=torch.bfloat16,
|
| 111 |
+
device_map='cuda',
|
| 112 |
+
),
|
| 113 |
+
transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
|
| 114 |
+
subfolder='transformer_2',
|
| 115 |
+
torch_dtype=torch.bfloat16,
|
| 116 |
+
device_map='cuda',
|
| 117 |
+
),
|
| 118 |
+
torch_dtype=torch.bfloat16,
|
| 119 |
+
).to('cuda')
|
| 120 |
+
|
| 121 |
+
# LoRA Loading ohne die problematischen adapter_names Parameter
|
| 122 |
+
pipe.load_lora_weights("Kijai/WanVideo_comfy",
|
| 123 |
+
weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
|
| 124 |
+
adapter_name="lightx2v")
|
| 125 |
+
|
| 126 |
+
pipe.set_adapters(["lightx2v"], adapter_weights=[1.0])
|
| 127 |
+
|
| 128 |
+
# Quantisierung
|
| 129 |
+
quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
|
| 130 |
+
quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
|
| 131 |
+
quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
|
| 132 |
+
|
| 133 |
+
return pipe
|
| 134 |
+
|
| 135 |
+
# pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
|
| 136 |
+
# transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
|
| 137 |
+
# subfolder='transformer',
|
| 138 |
+
# torch_dtype=torch.bfloat16,
|
| 139 |
+
# device_map='cuda',
|
| 140 |
+
# ),
|
| 141 |
+
# transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
|
| 142 |
+
# subfolder='transformer_2',
|
| 143 |
+
# torch_dtype=torch.bfloat16,
|
| 144 |
+
# device_map='cuda',
|
| 145 |
+
# ),
|
| 146 |
+
# torch_dtype=torch.bfloat16,
|
| 147 |
+
# ).to('cuda')
|
| 148 |
+
|
| 149 |
+
# pipe.load_lora_weights(
|
| 150 |
+
# "Kijai/WanVideo_comfy",
|
| 151 |
+
# weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
|
| 152 |
+
# adapter_name="lightx2v"
|
| 153 |
+
# )
|
| 154 |
+
# kwargs_lora = {}
|
| 155 |
+
# kwargs_lora["load_into_transformer_2"] = True
|
| 156 |
+
# pipe.load_lora_weights(
|
| 157 |
+
# "Kijai/WanVideo_comfy",
|
| 158 |
+
# weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
|
| 159 |
+
# adapter_name="lightx2v_2", **kwargs_lora
|
| 160 |
+
# )
|
| 161 |
+
# pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
|
| 162 |
+
# pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
|
| 163 |
+
# pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
|
| 164 |
+
# pipe.unload_lora_weights()
|
| 165 |
+
|
| 166 |
+
# quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
|
| 167 |
+
# quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
|
| 168 |
+
# quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
|
| 169 |
|
| 170 |
|
| 171 |
def export_frames_to_video(frames: torch.Tensor, out_path: str, fps: int = 24):
|
|
|
|
| 174 |
out_path: Pfad zur Ausgabedatei (.mp4)
|
| 175 |
fps: Bildrate
|
| 176 |
"""
|
|
|
|
| 177 |
if frames.dtype != torch.uint8:
|
| 178 |
frames = (frames * 255).clamp(0, 255).to(torch.uint8)
|
| 179 |
+
|
|
|
|
| 180 |
np_frames = frames.cpu().numpy()
|
| 181 |
+
|
| 182 |
+
# Korrekter ffmpeg Aufruf:
|
|
|
|
| 183 |
process = (
|
| 184 |
ffmpeg
|
| 185 |
+
.input('pipe:', format='rawvideo', pix_fmt='rgb24',
|
| 186 |
+
s=f'{np_frames.shape[2]}x{np_frames.shape[1]}', framerate=fps)
|
| 187 |
+
.output(out_path, vcodec='libx264', pix_fmt='yuv420p', crf=23, preset='fast')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
.overwrite_output()
|
| 189 |
.run_async(pipe_stdin=True)
|
| 190 |
)
|
| 191 |
+
|
|
|
|
| 192 |
for frame in np_frames:
|
|
|
|
| 193 |
process.stdin.write(frame.tobytes())
|
| 194 |
process.stdin.close()
|
| 195 |
process.wait()
|
|
|
|
| 249 |
))
|
| 250 |
|
| 251 |
|
| 252 |
+
def get_duration_simple():
|
| 253 |
+
return 280
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
@spaces.GPU(duration=get_duration)
|
| 256 |
def generate_video(
|