eeuuia commited on
Commit
226818a
·
verified ·
1 Parent(s): b743563

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -57
app.py CHANGED
@@ -59,20 +59,42 @@ local_repo_path = snapshot_download(
59
 
60
  # 3. Carregar cada componente da pipeline explicitamente
61
  print("=== Carregando componentes da pipeline... ===")
62
- vae = AutoencoderKLLTXVideo.from_pretrained(local_repo_path, subfolder="vae", torch_dtype=torch_dtype)
63
- text_encoder = T5EncoderModel.from_pretrained(local_repo_path, subfolder="text_encoder", torch_dtype=torch_dtype)
64
- tokenizer = T5TokenizerFast.from_pretrained(local_repo_path, subfolder="tokenizer")
65
- scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(local_repo_path, subfolder="scheduler")
66
 
67
- # Correção para o erro 'mu': desativar explicitamente o dynamic shifting
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  if hasattr(scheduler.config, 'use_dynamic_shifting') and scheduler.config.use_dynamic_shifting:
69
  print("[Config] Desativando 'use_dynamic_shifting' no scheduler.")
70
  scheduler.config.use_dynamic_shifting = False
 
71
 
72
- print(f"Carregando pesos do Transformer de: {checkpoint_path}")
73
- transformer = LTXVideoTransformer3DModel.from_pretrained(
74
- local_repo_path, subfolder="transformer", weight_name=checkpoint_path, torch_dtype=torch_dtype
 
75
  )
 
 
 
 
76
 
77
  # 4. Montar a pipeline principal
78
  print("Montando a LTXConditionPipeline...")
@@ -162,60 +184,41 @@ def prepare_and_generate_video(
162
  guidance_scale=guidance_scale,
163
  guidance_rescale=0.7,
164
  generator=torch.Generator().manual_seed(seed),
165
- output_type="latent",
 
166
  **pipeline_args
167
- ).frames
168
 
169
  # ETAPA 2: Upscale dos latentes
170
- upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
171
- upscaled_latents = pipe_upsample(
172
- latents=latents,
173
- output_type="latent"
174
- ).frames
175
-
176
-
177
-
178
- conditions = []
179
- if condition_image_1 is not None:
180
- condition_image_1 = ImageOps.fit(condition_image_1, (upscaled_width, upscaled_height), Image.LANCZOS)
181
- conditions.append(LTXVideoCondition(
182
- image=condition_image_1,
183
- strength=condition_strength_1,
184
- frame_index=int(condition_frame_index_1)
185
- ))
186
- if condition_image_2 is not None:
187
- condition_image_2 = ImageOps.fit(condition_image_2, (upscaled_width, upscaled_height), Image.LANCZOS)
188
- conditions.append(LTXVideoCondition(
189
- image=condition_image_2,
190
- strength=condition_strength_2,
191
- frame_index=int(condition_frame_index_2)
192
- ))
193
-
194
- pipeline_args = {}
195
- if conditions:
196
- pipeline_args["conditions"] = conditions
197
-
198
 
199
-
200
  # ETAPA 3: Denoise final em alta resolução
201
- final_video_frames_np = pipeline(
202
- prompt=prompt,
203
- negative_prompt=negative_prompt,
204
- width=upscaled_width,
205
- height=upscaled_height,
206
- num_frames=num_frames,
207
- denoise_strength=0.999,
208
- timesteps=[1000, 909, 725, 421, 0],
209
- latents=upscaled_latents,
210
- decode_timestep=0.05,
211
- decode_noise_scale=0.025,
212
- image_cond_noise_scale=0.0,
213
- guidance_scale=guidance_scale,
214
- guidance_rescale=0.7,
215
- generator=torch.Generator(device="cuda").manual_seed(seed),
216
- output_type="np",
217
- **pipeline_args
218
- ).frames[0]
 
 
 
 
219
 
220
  # Exportação para arquivo MP4
221
  video_uint8_frames = [(frame * 255).astype(np.uint8) for frame in final_video_frames_np]
 
59
 
60
  # 3. Carregar cada componente da pipeline explicitamente
61
  print("=== Carregando componentes da pipeline... ===")
 
 
 
 
62
 
63
+ vae = AutoModel.from_pretrained(
64
+ "Lightricks/LTX-Video",
65
+ subfolder="vae",
66
+ torch_dtype=torch_dtype
67
+ )
68
+ text_encoder = AutoModel.from_pretrained(
69
+ "Lightricks/LTX-Video",
70
+ subfolder="text_encoder",
71
+ torch_dtype=torch_dtype
72
+ )
73
+ scheduler = AutoModel.from_pretrained(
74
+ "Lightricks/LTX-Video",
75
+ subfolder="scheduler",
76
+ torch_dtype=torch_dtype
77
+ )
78
+ tokenizer = AutoModel.from_pretrained(
79
+ "Lightricks/LTX-Video",
80
+ subfolder="tokenizer",
81
+ torch_dtype=torch_dtype
82
+ )
83
+
84
  if hasattr(scheduler.config, 'use_dynamic_shifting') and scheduler.config.use_dynamic_shifting:
85
  print("[Config] Desativando 'use_dynamic_shifting' no scheduler.")
86
  scheduler.config.use_dynamic_shifting = False
87
+
88
 
89
+ transformer = AutoModel.from_pretrained(
90
+ "Lightricks/LTX-Video",
91
+ subfolder="transformer",
92
+ torch_dtype=torch.bfloat16
93
  )
94
+ transformer.enable_layerwise_casting(
95
+ storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16
96
+ )
97
+
98
 
99
  # 4. Montar a pipeline principal
100
  print("Montando a LTXConditionPipeline...")
 
184
  guidance_scale=guidance_scale,
185
  guidance_rescale=0.7,
186
  generator=torch.Generator().manual_seed(seed),
187
+ #output_type="latent",
188
+ output_type="np",
189
  **pipeline_args
190
+ ).frames[0]
191
 
192
  # ETAPA 2: Upscale dos latentes
193
+ #upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
194
+ #upscaled_latents = pipe_upsample(
195
+ # latents=latents,
196
+ # output_type="latent"
197
+ #).frames
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
 
199
  # ETAPA 3: Denoise final em alta resolução
200
+ if false:
201
+ final_video_frames_np = pipeline(
202
+ prompt=prompt,
203
+ negative_prompt=negative_prompt,
204
+ width=upscaled_width,
205
+ height=upscaled_height,
206
+ num_frames=num_frames,
207
+ denoise_strength=0.999,
208
+ timesteps=[1000, 909, 725, 421, 0],
209
+ latents=upscaled_latents,
210
+ decode_timestep=0.05,
211
+ decode_noise_scale=0.025,
212
+ image_cond_noise_scale=0.0,
213
+ guidance_scale=guidance_scale,
214
+ guidance_rescale=0.7,
215
+ generator=torch.Generator(device="cuda").manual_seed(seed),
216
+ output_type="np",
217
+ **pipeline_args
218
+ ).frames[0]
219
+ else:
220
+ final_video_frames_np = latents
221
+
222
 
223
  # Exportação para arquivo MP4
224
  video_uint8_frames = [(frame * 255).astype(np.uint8) for frame in final_video_frames_np]