Spaces:
Runtime error
Runtime error
Optimize memory
Browse files
app.py
CHANGED
|
@@ -468,6 +468,8 @@ def worker(input_image, end_image, image_position, end_stillness, prompts, n_pro
|
|
| 468 |
return [start_latent, image_encoder_last_hidden_state]
|
| 469 |
|
| 470 |
[start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
|
|
|
|
|
|
|
| 471 |
|
| 472 |
# Dtype
|
| 473 |
|
|
@@ -752,6 +754,8 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
|
|
| 752 |
return [start_latent, end_latent, image_encoder_last_hidden_state]
|
| 753 |
|
| 754 |
[start_latent, end_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, has_end_image, end_image, height, width, vae, gpu, image_encoder, high_vram)
|
|
|
|
|
|
|
| 755 |
|
| 756 |
# Dtype
|
| 757 |
image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
|
|
@@ -766,7 +770,6 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
|
|
| 766 |
start_latent = start_latent.to(history_latents)
|
| 767 |
if has_end_image:
|
| 768 |
end_latent = end_latent.to(history_latents)
|
| 769 |
-
end_latent = end_latent.expand(-1, -1, 1 + end_stillness, -1, -1)
|
| 770 |
|
| 771 |
history_pixels = None
|
| 772 |
total_generated_latent_frames = 0
|
|
@@ -859,7 +862,7 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
|
|
| 859 |
|
| 860 |
# Use end image latent for the first section if provided
|
| 861 |
if has_end_image and is_first_section:
|
| 862 |
-
clean_latents_post = end_latent
|
| 863 |
|
| 864 |
clean_latents = torch.cat([start_latent, clean_latents_post], dim=2)
|
| 865 |
|
|
@@ -946,6 +949,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
|
|
| 946 |
|
| 947 |
# 20250506 pftq: Encode video
|
| 948 |
start_latent, input_image_np, video_latents, fps, height, width = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
|
|
|
|
| 949 |
start_latent = start_latent.to(dtype=torch.float32, device=cpu)
|
| 950 |
video_latents = video_latents.cpu()
|
| 951 |
|
|
@@ -994,6 +998,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
|
|
| 994 |
end_frame, target_width=width, target_height=height, vae=vae,
|
| 995 |
image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
|
| 996 |
)[:2]
|
|
|
|
| 997 |
end_latent = end_latent.to(dtype=torch.float32, device=cpu)
|
| 998 |
else:
|
| 999 |
end_latent = end_clip_embedding = None
|
|
@@ -1775,7 +1780,7 @@ with block:
|
|
| 1775 |
"./img_examples/Example5.png", # input_image
|
| 1776 |
"./img_examples/Example6.png", # end_image
|
| 1777 |
0, # image_position
|
| 1778 |
-
|
| 1779 |
"A woman jumps out of the train and arrives on the ground, viewed from the outside, photorealistic, realistic, amateur photography, midday, insanely detailed, 8k",
|
| 1780 |
"start_end", # generation_mode
|
| 1781 |
"Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, poorly framed, blurred, blurry, over-smooth, jumpcut, crossfader, crossfading", # n_prompt
|
|
|
|
| 468 |
return [start_latent, image_encoder_last_hidden_state]
|
| 469 |
|
| 470 |
[start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
|
| 471 |
+
input_image = None
|
| 472 |
+
end_image = None
|
| 473 |
|
| 474 |
# Dtype
|
| 475 |
|
|
|
|
| 754 |
return [start_latent, end_latent, image_encoder_last_hidden_state]
|
| 755 |
|
| 756 |
[start_latent, end_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, has_end_image, end_image, height, width, vae, gpu, image_encoder, high_vram)
|
| 757 |
+
input_image = None
|
| 758 |
+
end_image = None
|
| 759 |
|
| 760 |
# Dtype
|
| 761 |
image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
|
|
|
|
| 770 |
start_latent = start_latent.to(history_latents)
|
| 771 |
if has_end_image:
|
| 772 |
end_latent = end_latent.to(history_latents)
|
|
|
|
| 773 |
|
| 774 |
history_pixels = None
|
| 775 |
total_generated_latent_frames = 0
|
|
|
|
| 862 |
|
| 863 |
# Use end image latent for the first section if provided
|
| 864 |
if has_end_image and is_first_section:
|
| 865 |
+
clean_latents_post = end_latent.expand(-1, -1, 1 + end_stillness, -1, -1)
|
| 866 |
|
| 867 |
clean_latents = torch.cat([start_latent, clean_latents_post], dim=2)
|
| 868 |
|
|
|
|
| 949 |
|
| 950 |
# 20250506 pftq: Encode video
|
| 951 |
start_latent, input_image_np, video_latents, fps, height, width = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
|
| 952 |
+
input_video = None
|
| 953 |
start_latent = start_latent.to(dtype=torch.float32, device=cpu)
|
| 954 |
video_latents = video_latents.cpu()
|
| 955 |
|
|
|
|
| 998 |
end_frame, target_width=width, target_height=height, vae=vae,
|
| 999 |
image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
|
| 1000 |
)[:2]
|
| 1001 |
+
end_frame = None
|
| 1002 |
end_latent = end_latent.to(dtype=torch.float32, device=cpu)
|
| 1003 |
else:
|
| 1004 |
end_latent = end_clip_embedding = None
|
|
|
|
| 1780 |
"./img_examples/Example5.png", # input_image
|
| 1781 |
"./img_examples/Example6.png", # end_image
|
| 1782 |
0, # image_position
|
| 1783 |
+
1, # end_stillness
|
| 1784 |
"A woman jumps out of the train and arrives on the ground, viewed from the outside, photorealistic, realistic, amateur photography, midday, insanely detailed, 8k",
|
| 1785 |
"start_end", # generation_mode
|
| 1786 |
"Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, poorly framed, blurred, blurry, over-smooth, jumpcut, crossfader, crossfading", # n_prompt
|