Fabrice-TIERCELIN commited on
Commit
a0636cb
·
verified ·
1 Parent(s): 8cb4781

end_latent = end_latent.to(dtype=torch.float32, device=cpu)

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -1273,6 +1273,7 @@ def worker_video_end(input_video, end_frame, prompts, n_prompt, seed, batch, res
1273
  end_frame, target_width=width, target_height=height, vae=vae,
1274
  image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
1275
  )[:2]
 
1276
  else:
1277
  end_latent = end_clip_embedding = None
1278
 
@@ -1307,7 +1308,7 @@ def worker_video_end(input_video, end_frame, prompts, n_prompt, seed, batch, res
1307
  def callback(d):
1308
  return
1309
 
1310
- def compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent, end_frame):
1311
  # 20250506 pftq: Use user-specified number of context frames, matching original allocation for num_clean_frames=2
1312
  available_frames = history_latents.shape[2] # Number of latent frames
1313
  max_pixel_frames = min(latent_window_size * 4 - 3, available_frames * 4) # Cap at available pixel frames
@@ -1321,9 +1322,9 @@ def worker_video_end(input_video, end_frame, prompts, n_prompt, seed, batch, res
1321
  total_context_frames = num_4x_frames + num_2x_frames + effective_clean_frames
1322
  total_context_frames = min(total_context_frames, available_frames) # 20250507 pftq: Edge case for <=1 sec videos
1323
 
1324
- indices = torch.arange(0, 1 + num_4x_frames + num_2x_frames + effective_clean_frames + adjusted_latent_frames + ((latent_padding_size + 1) if end_frame is not None else 0)).unsqueeze(0) # 20250507 pftq: latent_window_size to adjusted_latent_frames for edge case for <=1 sec videos
1325
  clean_latent_indices_start, clean_latent_4x_indices, clean_latent_2x_indices, clean_latent_1x_indices, latent_indices, blank_indices, clean_latent_indices_post = indices.split(
1326
- [1, num_4x_frames, num_2x_frames, effective_clean_frames, adjusted_latent_frames, latent_padding_size if end_frame is not None else 0, 1 if end_frame is not None else 0], dim=1 # 20250507 pftq: latent_window_size to adjusted_latent_frames for edge case for <=1 sec videos
1327
  )
1328
  clean_latent_indices = torch.cat([clean_latent_indices_start, clean_latent_1x_indices, clean_latent_indices_post], dim=1)
1329
 
@@ -1414,7 +1415,7 @@ def worker_video_end(input_video, end_frame, prompts, n_prompt, seed, batch, res
1414
  else:
1415
  transformer.initialize_teacache(enable_teacache=False)
1416
 
1417
- [max_frames, clean_latents, clean_latents_2x, clean_latents_4x, latent_indices, clean_latents, clean_latent_indices, clean_latent_2x_indices, clean_latent_4x_indices] = compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent, end_frame)
1418
 
1419
  generated_latents = sample_hunyuan(
1420
  transformer=transformer,
 
1273
  end_frame, target_width=width, target_height=height, vae=vae,
1274
  image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
1275
  )[:2]
1276
+ end_latent = end_latent.to(dtype=torch.float32, device=cpu)
1277
  else:
1278
  end_latent = end_clip_embedding = None
1279
 
 
1308
  def callback(d):
1309
  return
1310
 
1311
+ def compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent, end_latent):
1312
  # 20250506 pftq: Use user-specified number of context frames, matching original allocation for num_clean_frames=2
1313
  available_frames = history_latents.shape[2] # Number of latent frames
1314
  max_pixel_frames = min(latent_window_size * 4 - 3, available_frames * 4) # Cap at available pixel frames
 
1322
  total_context_frames = num_4x_frames + num_2x_frames + effective_clean_frames
1323
  total_context_frames = min(total_context_frames, available_frames) # 20250507 pftq: Edge case for <=1 sec videos
1324
 
1325
+ indices = torch.arange(0, 1 + num_4x_frames + num_2x_frames + effective_clean_frames + adjusted_latent_frames + ((latent_padding_size + 1) if end_latent is not None else 0)).unsqueeze(0) # 20250507 pftq: latent_window_size to adjusted_latent_frames for edge case for <=1 sec videos
1326
  clean_latent_indices_start, clean_latent_4x_indices, clean_latent_2x_indices, clean_latent_1x_indices, latent_indices, blank_indices, clean_latent_indices_post = indices.split(
1327
+ [1, num_4x_frames, num_2x_frames, effective_clean_frames, adjusted_latent_frames, latent_padding_size if end_latent is not None else 0, 1 if end_latent is not None else 0], dim=1 # 20250507 pftq: latent_window_size to adjusted_latent_frames for edge case for <=1 sec videos
1328
  )
1329
  clean_latent_indices = torch.cat([clean_latent_indices_start, clean_latent_1x_indices, clean_latent_indices_post], dim=1)
1330
 
 
1415
  else:
1416
  transformer.initialize_teacache(enable_teacache=False)
1417
 
1418
+ [max_frames, clean_latents, clean_latents_2x, clean_latents_4x, latent_indices, clean_latents, clean_latent_indices, clean_latent_2x_indices, clean_latent_4x_indices] = compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent, end_latent)
1419
 
1420
  generated_latents = sample_hunyuan(
1421
  transformer=transformer,