Sebastiankay commited on
Commit
7888181
·
verified ·
1 Parent(s): 550d4eb

Rename app.py to _app.py

Browse files
Files changed (1) hide show
  1. app.py → _app.py +84 -78
app.py → _app.py RENAMED
@@ -19,6 +19,7 @@ import random
19
  import base64
20
  import gc
21
  import math
 
22
 
23
  from torchao.quantization import quantize_
24
  from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
@@ -42,6 +43,10 @@ FIXED_FPS = 16
42
  MIN_FRAMES_MODEL = 8
43
  MAX_FRAMES_MODEL = 80
44
 
 
 
 
 
45
  MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
46
  MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
47
 
@@ -91,47 +96,76 @@ theme = gr.themes.Soft(
91
  )
92
 
93
 
94
- pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
95
- transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
96
- subfolder='transformer',
97
- torch_dtype=torch.bfloat16,
98
- device_map='cuda',
99
- ),
100
- transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
101
- subfolder='transformer_2',
102
- torch_dtype=torch.bfloat16,
103
- device_map='cuda',
104
- ),
105
- torch_dtype=torch.bfloat16,
106
- ).to('cuda')
107
-
108
- pipe.load_lora_weights(
109
- "Kijai/WanVideo_comfy",
110
- weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
111
- adapter_name="lightx2v"
112
- )
113
- kwargs_lora = {}
114
- kwargs_lora["load_into_transformer_2"] = True
115
- pipe.load_lora_weights(
116
- "Kijai/WanVideo_comfy",
117
- weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
118
- adapter_name="lightx2v_2", **kwargs_lora
119
- )
120
- pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
121
- pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
122
- pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
123
- pipe.unload_lora_weights()
124
-
125
- quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
126
- quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
127
- quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
128
-
129
- aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
130
- aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
131
-
132
-
133
- default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
134
- default_negative_prompt = "Vibrant colors, overexposed, static, blurry details, subtitles, style, artwork, painting, image, still, overall grayish, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn face, deformed, disfigured, deformed limbs, fingers fused together, static image, cluttered background, three legs, many people in the background, walking backwards"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
 
137
  def export_frames_to_video(frames: torch.Tensor, out_path: str, fps: int = 24):
@@ -140,33 +174,22 @@ def export_frames_to_video(frames: torch.Tensor, out_path: str, fps: int = 24):
140
  out_path: Pfad zur Ausgabedatei (.mp4)
141
  fps: Bildrate
142
  """
143
- # 1️⃣ Sicherstellen, dass wir uint8 haben
144
  if frames.dtype != torch.uint8:
145
  frames = (frames * 255).clamp(0, 255).to(torch.uint8)
146
-
147
- # 2️⃣ In ein NumPy‑Array konvertieren (T, H, W, C)
148
  np_frames = frames.cpu().numpy()
149
-
150
- # 3️⃣ ffmpeg‑Input aus dem Numpy‑Array erzeugen
151
- # Wir nutzen den "pipe:"-Modus, d.h. die Roh‑RGB‑Daten werden über stdin geschoben
152
  process = (
153
  ffmpeg
154
- .input('pipe:', format='rawvideo',
155
- pix_fmt='rgb24',
156
- s='{}x{}'.format(np_frames.shape[2], np_frames.shape[1]),
157
- framerate=fps)
158
- .output(out_path,
159
- vcodec='libx264',
160
- pix_fmt='yuv420p', # wichtig für breite Player‑Kompatibilität
161
- crf=23, # Qualität (niedriger = besser, 18‑23 ist üblich)
162
- preset='fast')
163
  .overwrite_output()
164
  .run_async(pipe_stdin=True)
165
  )
166
-
167
- # 4️⃣ Frames nacheinander in den Pipe‑Strom schreiben
168
  for frame in np_frames:
169
- # frame hat Shape (H, W, C) und dtype uint8 → raw RGB‑Bytes
170
  process.stdin.write(frame.tobytes())
171
  process.stdin.close()
172
  process.wait()
@@ -226,25 +249,8 @@ def get_num_frames(duration_seconds: float):
226
  ))
227
 
228
 
229
- def get_duration(
230
- input_image,
231
- prompt,
232
- steps,
233
- negative_prompt,
234
- duration_seconds,
235
- guidance_scale,
236
- guidance_scale_2,
237
- seed,
238
- randomize_seed,
239
- progress,
240
- ):
241
- BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
242
- BASE_STEP_DURATION = 15
243
- width, height = resize_image(input_image).size
244
- frames = get_num_frames(duration_seconds)
245
- factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
246
- step_duration = BASE_STEP_DURATION * factor ** 1.5
247
- return 10 + int(steps) * step_duration
248
 
249
  @spaces.GPU(duration=get_duration)
250
  def generate_video(
 
19
  import base64
20
  import gc
21
  import math
22
+ import ffmpeg
23
 
24
  from torchao.quantization import quantize_
25
  from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
 
43
  MIN_FRAMES_MODEL = 8
44
  MAX_FRAMES_MODEL = 80
45
 
46
+ default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
47
+ default_negative_prompt = "Vibrant colors, overexposed, static, blurry details, subtitles, style, artwork, painting, image, still, overall grayish, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn face, deformed, disfigured, deformed limbs, fingers fused together, static image, cluttered background, three legs, many people in the background, walking backwards"
48
+
49
+
50
  MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
51
  MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
52
 
 
96
  )
97
 
98
 
99
+ # MARK: LOAD MODEL FUNKTION:
100
+
101
+ # Globale Pipe-Variable
102
+ pipe = None
103
+
104
+ def load_model():
105
+ global pipe
106
+ if pipe is None:
107
+ pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
108
+ transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
109
+ subfolder='transformer',
110
+ torch_dtype=torch.bfloat16,
111
+ device_map='cuda',
112
+ ),
113
+ transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
114
+ subfolder='transformer_2',
115
+ torch_dtype=torch.bfloat16,
116
+ device_map='cuda',
117
+ ),
118
+ torch_dtype=torch.bfloat16,
119
+ ).to('cuda')
120
+
121
+ # LoRA Loading ohne die problematischen adapter_names Parameter
122
+ pipe.load_lora_weights("Kijai/WanVideo_comfy",
123
+ weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
124
+ adapter_name="lightx2v")
125
+
126
+ pipe.set_adapters(["lightx2v"], adapter_weights=[1.0])
127
+
128
+ # Quantisierung
129
+ quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
130
+ quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
131
+ quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
132
+
133
+ return pipe
134
+
135
+ # pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
136
+ # transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
137
+ # subfolder='transformer',
138
+ # torch_dtype=torch.bfloat16,
139
+ # device_map='cuda',
140
+ # ),
141
+ # transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
142
+ # subfolder='transformer_2',
143
+ # torch_dtype=torch.bfloat16,
144
+ # device_map='cuda',
145
+ # ),
146
+ # torch_dtype=torch.bfloat16,
147
+ # ).to('cuda')
148
+
149
+ # pipe.load_lora_weights(
150
+ # "Kijai/WanVideo_comfy",
151
+ # weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
152
+ # adapter_name="lightx2v"
153
+ # )
154
+ # kwargs_lora = {}
155
+ # kwargs_lora["load_into_transformer_2"] = True
156
+ # pipe.load_lora_weights(
157
+ # "Kijai/WanVideo_comfy",
158
+ # weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
159
+ # adapter_name="lightx2v_2", **kwargs_lora
160
+ # )
161
+ # pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
162
+ # pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
163
+ # pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
164
+ # pipe.unload_lora_weights()
165
+
166
+ # quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
167
+ # quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
168
+ # quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
169
 
170
 
171
  def export_frames_to_video(frames: torch.Tensor, out_path: str, fps: int = 24):
 
174
  out_path: Pfad zur Ausgabedatei (.mp4)
175
  fps: Bildrate
176
  """
 
177
  if frames.dtype != torch.uint8:
178
  frames = (frames * 255).clamp(0, 255).to(torch.uint8)
179
+
 
180
  np_frames = frames.cpu().numpy()
181
+
182
+ # Korrekter ffmpeg Aufruf:
 
183
  process = (
184
  ffmpeg
185
+ .input('pipe:', format='rawvideo', pix_fmt='rgb24',
186
+ s=f'{np_frames.shape[2]}x{np_frames.shape[1]}', framerate=fps)
187
+ .output(out_path, vcodec='libx264', pix_fmt='yuv420p', crf=23, preset='fast')
 
 
 
 
 
 
188
  .overwrite_output()
189
  .run_async(pipe_stdin=True)
190
  )
191
+
 
192
  for frame in np_frames:
 
193
  process.stdin.write(frame.tobytes())
194
  process.stdin.close()
195
  process.wait()
 
249
  ))
250
 
251
 
252
+ def get_duration_simple():
253
+ return 280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
  @spaces.GPU(duration=get_duration)
256
  def generate_video(