Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -59,20 +59,42 @@ local_repo_path = snapshot_download(
|
|
| 59 |
|
| 60 |
# 3. Carregar cada componente da pipeline explicitamente
|
| 61 |
print("=== Carregando componentes da pipeline... ===")
|
| 62 |
-
vae = AutoencoderKLLTXVideo.from_pretrained(local_repo_path, subfolder="vae", torch_dtype=torch_dtype)
|
| 63 |
-
text_encoder = T5EncoderModel.from_pretrained(local_repo_path, subfolder="text_encoder", torch_dtype=torch_dtype)
|
| 64 |
-
tokenizer = T5TokenizerFast.from_pretrained(local_repo_path, subfolder="tokenizer")
|
| 65 |
-
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(local_repo_path, subfolder="scheduler")
|
| 66 |
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
if hasattr(scheduler.config, 'use_dynamic_shifting') and scheduler.config.use_dynamic_shifting:
|
| 69 |
print("[Config] Desativando 'use_dynamic_shifting' no scheduler.")
|
| 70 |
scheduler.config.use_dynamic_shifting = False
|
|
|
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
| 75 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
# 4. Montar a pipeline principal
|
| 78 |
print("Montando a LTXConditionPipeline...")
|
|
@@ -162,60 +184,41 @@ def prepare_and_generate_video(
|
|
| 162 |
guidance_scale=guidance_scale,
|
| 163 |
guidance_rescale=0.7,
|
| 164 |
generator=torch.Generator().manual_seed(seed),
|
| 165 |
-
output_type="latent",
|
|
|
|
| 166 |
**pipeline_args
|
| 167 |
-
).frames
|
| 168 |
|
| 169 |
# ETAPA 2: Upscale dos latentes
|
| 170 |
-
upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
|
| 171 |
-
upscaled_latents = pipe_upsample(
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
).frames
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
conditions = []
|
| 179 |
-
if condition_image_1 is not None:
|
| 180 |
-
condition_image_1 = ImageOps.fit(condition_image_1, (upscaled_width, upscaled_height), Image.LANCZOS)
|
| 181 |
-
conditions.append(LTXVideoCondition(
|
| 182 |
-
image=condition_image_1,
|
| 183 |
-
strength=condition_strength_1,
|
| 184 |
-
frame_index=int(condition_frame_index_1)
|
| 185 |
-
))
|
| 186 |
-
if condition_image_2 is not None:
|
| 187 |
-
condition_image_2 = ImageOps.fit(condition_image_2, (upscaled_width, upscaled_height), Image.LANCZOS)
|
| 188 |
-
conditions.append(LTXVideoCondition(
|
| 189 |
-
image=condition_image_2,
|
| 190 |
-
strength=condition_strength_2,
|
| 191 |
-
frame_index=int(condition_frame_index_2)
|
| 192 |
-
))
|
| 193 |
-
|
| 194 |
-
pipeline_args = {}
|
| 195 |
-
if conditions:
|
| 196 |
-
pipeline_args["conditions"] = conditions
|
| 197 |
-
|
| 198 |
|
| 199 |
-
|
| 200 |
# ETAPA 3: Denoise final em alta resolução
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
# Exportação para arquivo MP4
|
| 221 |
video_uint8_frames = [(frame * 255).astype(np.uint8) for frame in final_video_frames_np]
|
|
|
|
| 59 |
|
| 60 |
# 3. Carregar cada componente da pipeline explicitamente
|
| 61 |
print("=== Carregando componentes da pipeline... ===")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
+
vae = AutoModel.from_pretrained(
|
| 64 |
+
"Lightricks/LTX-Video",
|
| 65 |
+
subfolder="vae",
|
| 66 |
+
torch_dtype=torch_dtype
|
| 67 |
+
)
|
| 68 |
+
text_encoder = AutoModel.from_pretrained(
|
| 69 |
+
"Lightricks/LTX-Video",
|
| 70 |
+
subfolder="text_encoder",
|
| 71 |
+
torch_dtype=torch_dtype
|
| 72 |
+
)
|
| 73 |
+
scheduler = AutoModel.from_pretrained(
|
| 74 |
+
"Lightricks/LTX-Video",
|
| 75 |
+
subfolder="scheduler",
|
| 76 |
+
torch_dtype=torch_dtype
|
| 77 |
+
)
|
| 78 |
+
tokenizer = AutoModel.from_pretrained(
|
| 79 |
+
"Lightricks/LTX-Video",
|
| 80 |
+
subfolder="tokenizer",
|
| 81 |
+
torch_dtype=torch_dtype
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
if hasattr(scheduler.config, 'use_dynamic_shifting') and scheduler.config.use_dynamic_shifting:
|
| 85 |
print("[Config] Desativando 'use_dynamic_shifting' no scheduler.")
|
| 86 |
scheduler.config.use_dynamic_shifting = False
|
| 87 |
+
|
| 88 |
|
| 89 |
+
transformer = AutoModel.from_pretrained(
|
| 90 |
+
"Lightricks/LTX-Video",
|
| 91 |
+
subfolder="transformer",
|
| 92 |
+
torch_dtype=torch.bfloat16
|
| 93 |
)
|
| 94 |
+
transformer.enable_layerwise_casting(
|
| 95 |
+
storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
|
| 99 |
# 4. Montar a pipeline principal
|
| 100 |
print("Montando a LTXConditionPipeline...")
|
|
|
|
| 184 |
guidance_scale=guidance_scale,
|
| 185 |
guidance_rescale=0.7,
|
| 186 |
generator=torch.Generator().manual_seed(seed),
|
| 187 |
+
#output_type="latent",
|
| 188 |
+
output_type="np",
|
| 189 |
**pipeline_args
|
| 190 |
+
).frames[0]
|
| 191 |
|
| 192 |
# ETAPA 2: Upscale dos latentes
|
| 193 |
+
#upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
|
| 194 |
+
#upscaled_latents = pipe_upsample(
|
| 195 |
+
# latents=latents,
|
| 196 |
+
# output_type="latent"
|
| 197 |
+
#).frames
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
|
|
|
| 199 |
# ETAPA 3: Denoise final em alta resolução
|
| 200 |
+
if false:
|
| 201 |
+
final_video_frames_np = pipeline(
|
| 202 |
+
prompt=prompt,
|
| 203 |
+
negative_prompt=negative_prompt,
|
| 204 |
+
width=upscaled_width,
|
| 205 |
+
height=upscaled_height,
|
| 206 |
+
num_frames=num_frames,
|
| 207 |
+
denoise_strength=0.999,
|
| 208 |
+
timesteps=[1000, 909, 725, 421, 0],
|
| 209 |
+
latents=upscaled_latents,
|
| 210 |
+
decode_timestep=0.05,
|
| 211 |
+
decode_noise_scale=0.025,
|
| 212 |
+
image_cond_noise_scale=0.0,
|
| 213 |
+
guidance_scale=guidance_scale,
|
| 214 |
+
guidance_rescale=0.7,
|
| 215 |
+
generator=torch.Generator(device="cuda").manual_seed(seed),
|
| 216 |
+
output_type="np",
|
| 217 |
+
**pipeline_args
|
| 218 |
+
).frames[0]
|
| 219 |
+
else:
|
| 220 |
+
final_video_frames_np = latents
|
| 221 |
+
|
| 222 |
|
| 223 |
# Exportação para arquivo MP4
|
| 224 |
video_uint8_frames = [(frame * 255).astype(np.uint8) for frame in final_video_frames_np]
|