Spaces:
Runtime error
Runtime error
end_latent = end_latent.to(dtype=torch.float32, device=cpu)
Browse files
app.py
CHANGED
|
@@ -1273,6 +1273,7 @@ def worker_video_end(input_video, end_frame, prompts, n_prompt, seed, batch, res
|
|
| 1273 |
end_frame, target_width=width, target_height=height, vae=vae,
|
| 1274 |
image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
|
| 1275 |
)[:2]
|
|
|
|
| 1276 |
else:
|
| 1277 |
end_latent = end_clip_embedding = None
|
| 1278 |
|
|
@@ -1307,7 +1308,7 @@ def worker_video_end(input_video, end_frame, prompts, n_prompt, seed, batch, res
|
|
| 1307 |
def callback(d):
|
| 1308 |
return
|
| 1309 |
|
| 1310 |
-
def compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent,
|
| 1311 |
# 20250506 pftq: Use user-specified number of context frames, matching original allocation for num_clean_frames=2
|
| 1312 |
available_frames = history_latents.shape[2] # Number of latent frames
|
| 1313 |
max_pixel_frames = min(latent_window_size * 4 - 3, available_frames * 4) # Cap at available pixel frames
|
|
@@ -1321,9 +1322,9 @@ def worker_video_end(input_video, end_frame, prompts, n_prompt, seed, batch, res
|
|
| 1321 |
total_context_frames = num_4x_frames + num_2x_frames + effective_clean_frames
|
| 1322 |
total_context_frames = min(total_context_frames, available_frames) # 20250507 pftq: Edge case for <=1 sec videos
|
| 1323 |
|
| 1324 |
-
indices = torch.arange(0, 1 + num_4x_frames + num_2x_frames + effective_clean_frames + adjusted_latent_frames + ((latent_padding_size + 1) if
|
| 1325 |
clean_latent_indices_start, clean_latent_4x_indices, clean_latent_2x_indices, clean_latent_1x_indices, latent_indices, blank_indices, clean_latent_indices_post = indices.split(
|
| 1326 |
-
[1, num_4x_frames, num_2x_frames, effective_clean_frames, adjusted_latent_frames, latent_padding_size if
|
| 1327 |
)
|
| 1328 |
clean_latent_indices = torch.cat([clean_latent_indices_start, clean_latent_1x_indices, clean_latent_indices_post], dim=1)
|
| 1329 |
|
|
@@ -1414,7 +1415,7 @@ def worker_video_end(input_video, end_frame, prompts, n_prompt, seed, batch, res
|
|
| 1414 |
else:
|
| 1415 |
transformer.initialize_teacache(enable_teacache=False)
|
| 1416 |
|
| 1417 |
-
[max_frames, clean_latents, clean_latents_2x, clean_latents_4x, latent_indices, clean_latents, clean_latent_indices, clean_latent_2x_indices, clean_latent_4x_indices] = compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent,
|
| 1418 |
|
| 1419 |
generated_latents = sample_hunyuan(
|
| 1420 |
transformer=transformer,
|
|
|
|
| 1273 |
end_frame, target_width=width, target_height=height, vae=vae,
|
| 1274 |
image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
|
| 1275 |
)[:2]
|
| 1276 |
+
end_latent = end_latent.to(dtype=torch.float32, device=cpu)
|
| 1277 |
else:
|
| 1278 |
end_latent = end_clip_embedding = None
|
| 1279 |
|
|
|
|
| 1308 |
def callback(d):
|
| 1309 |
return
|
| 1310 |
|
| 1311 |
+
def compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent, end_latent):
|
| 1312 |
# 20250506 pftq: Use user-specified number of context frames, matching original allocation for num_clean_frames=2
|
| 1313 |
available_frames = history_latents.shape[2] # Number of latent frames
|
| 1314 |
max_pixel_frames = min(latent_window_size * 4 - 3, available_frames * 4) # Cap at available pixel frames
|
|
|
|
| 1322 |
total_context_frames = num_4x_frames + num_2x_frames + effective_clean_frames
|
| 1323 |
total_context_frames = min(total_context_frames, available_frames) # 20250507 pftq: Edge case for <=1 sec videos
|
| 1324 |
|
| 1325 |
+
indices = torch.arange(0, 1 + num_4x_frames + num_2x_frames + effective_clean_frames + adjusted_latent_frames + ((latent_padding_size + 1) if end_latent is not None else 0)).unsqueeze(0) # 20250507 pftq: latent_window_size to adjusted_latent_frames for edge case for <=1 sec videos
|
| 1326 |
clean_latent_indices_start, clean_latent_4x_indices, clean_latent_2x_indices, clean_latent_1x_indices, latent_indices, blank_indices, clean_latent_indices_post = indices.split(
|
| 1327 |
+
[1, num_4x_frames, num_2x_frames, effective_clean_frames, adjusted_latent_frames, latent_padding_size if end_latent is not None else 0, 1 if end_latent is not None else 0], dim=1 # 20250507 pftq: latent_window_size to adjusted_latent_frames for edge case for <=1 sec videos
|
| 1328 |
)
|
| 1329 |
clean_latent_indices = torch.cat([clean_latent_indices_start, clean_latent_1x_indices, clean_latent_indices_post], dim=1)
|
| 1330 |
|
|
|
|
| 1415 |
else:
|
| 1416 |
transformer.initialize_teacache(enable_teacache=False)
|
| 1417 |
|
| 1418 |
+
[max_frames, clean_latents, clean_latents_2x, clean_latents_4x, latent_indices, clean_latents, clean_latent_indices, clean_latent_2x_indices, clean_latent_4x_indices] = compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent, end_latent)
|
| 1419 |
|
| 1420 |
generated_latents = sample_hunyuan(
|
| 1421 |
transformer=transformer,
|