Spaces:
Runtime error
Runtime error
del
Browse files
app.py
CHANGED
|
@@ -468,8 +468,8 @@ def worker(input_image, end_image, image_position, end_stillness, prompts, n_pro
|
|
| 468 |
return [start_latent, image_encoder_last_hidden_state]
|
| 469 |
|
| 470 |
[start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
|
| 471 |
-
|
| 472 |
-
|
| 473 |
|
| 474 |
# Dtype
|
| 475 |
|
|
@@ -565,7 +565,7 @@ def worker(input_image, end_image, image_position, end_stillness, prompts, n_pro
|
|
| 565 |
[llama_vec, clip_l_pooler, llama_vec_n, clip_l_pooler_n, llama_attention_mask, llama_attention_mask_n] = prompt_parameters[prompt_index]
|
| 566 |
|
| 567 |
if prompt_index < len(prompt_parameters) - 1 or (prompt_index == total_latent_sections - 1):
|
| 568 |
-
prompt_parameters[prompt_index]
|
| 569 |
|
| 570 |
if not high_vram:
|
| 571 |
unload_complete_models()
|
|
@@ -613,6 +613,13 @@ def worker(input_image, end_image, image_position, end_stillness, prompts, n_pro
|
|
| 613 |
clean_latent_4x_indices=clean_latent_4x_indices,
|
| 614 |
callback=callback,
|
| 615 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 616 |
|
| 617 |
[total_generated_latent_frames, history_latents, history_pixels] = post_process(forward, generated_latents, total_generated_latent_frames, history_latents, high_vram, transformer, gpu, vae, history_pixels, latent_window_size, enable_preview, section_index, total_latent_sections, outputs_folder, mp4_crf, stream)
|
| 618 |
|
|
@@ -626,7 +633,8 @@ def worker(input_image, end_image, image_position, end_stillness, prompts, n_pro
|
|
| 626 |
real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :]
|
| 627 |
zero_latents = history_latents[:, :, total_generated_latent_frames:, :, :]
|
| 628 |
history_latents = torch.cat([zero_latents, real_history_latents], dim=2)
|
| 629 |
-
|
|
|
|
| 630 |
|
| 631 |
forward = True
|
| 632 |
section_index = first_section_index
|
|
@@ -754,8 +762,8 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
|
|
| 754 |
return [start_latent, end_latent, image_encoder_last_hidden_state]
|
| 755 |
|
| 756 |
[start_latent, end_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, has_end_image, end_image, height, width, vae, gpu, image_encoder, high_vram)
|
| 757 |
-
|
| 758 |
-
|
| 759 |
|
| 760 |
# Dtype
|
| 761 |
image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
|
|
@@ -905,6 +913,13 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
|
|
| 905 |
clean_latent_4x_indices=clean_latent_4x_indices,
|
| 906 |
callback=callback,
|
| 907 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 908 |
|
| 909 |
[total_generated_latent_frames, history_latents, history_pixels] = post_process(job_id, start_latent, generated_latents, total_generated_latent_frames, history_latents, high_vram, transformer, gpu, vae, history_pixels, latent_window_size, enable_preview, outputs_folder, mp4_crf, stream, is_last_section)
|
| 910 |
|
|
@@ -949,7 +964,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
|
|
| 949 |
|
| 950 |
# 20250506 pftq: Encode video
|
| 951 |
start_latent, input_image_np, video_latents, fps, height, width = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
|
| 952 |
-
|
| 953 |
start_latent = start_latent.to(dtype=torch.float32, device=cpu)
|
| 954 |
video_latents = video_latents.cpu()
|
| 955 |
|
|
@@ -987,7 +1002,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
|
|
| 987 |
load_model_as_complete(image_encoder, target_device=gpu)
|
| 988 |
|
| 989 |
image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
|
| 990 |
-
|
| 991 |
|
| 992 |
# 20250507 pftq: Process end frame if provided
|
| 993 |
if end_frame is not None:
|
|
@@ -999,7 +1014,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
|
|
| 999 |
end_frame, target_width=width, target_height=height, vae=vae,
|
| 1000 |
image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
|
| 1001 |
)[0]
|
| 1002 |
-
|
| 1003 |
end_latent = end_latent.to(dtype=torch.float32, device=cpu)
|
| 1004 |
else:
|
| 1005 |
end_latent = None
|
|
@@ -1009,7 +1024,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
|
|
| 1009 |
unload_complete_models(image_encoder, vae)
|
| 1010 |
|
| 1011 |
image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
|
| 1012 |
-
|
| 1013 |
|
| 1014 |
# Dtype
|
| 1015 |
image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
|
|
@@ -1119,8 +1134,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
|
|
| 1119 |
history_latents = video_latents
|
| 1120 |
total_generated_latent_frames = history_latents.shape[2]
|
| 1121 |
# 20250506 pftq: Initialize history_pixels to fix UnboundLocalError
|
| 1122 |
-
history_pixels = None
|
| 1123 |
-
previous_video = None
|
| 1124 |
|
| 1125 |
# 20250509 Generate backwards with end frame for better end frame anchoring
|
| 1126 |
if total_latent_sections > 4:
|
|
@@ -1181,13 +1195,13 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
|
|
| 1181 |
clean_latent_4x_indices=clean_latent_4x_indices,
|
| 1182 |
callback=callback,
|
| 1183 |
)
|
| 1184 |
-
|
| 1185 |
-
|
| 1186 |
-
|
| 1187 |
-
|
| 1188 |
-
|
| 1189 |
-
|
| 1190 |
-
|
| 1191 |
|
| 1192 |
total_generated_latent_frames += int(generated_latents.shape[2])
|
| 1193 |
history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)
|
|
|
|
| 468 |
return [start_latent, image_encoder_last_hidden_state]
|
| 469 |
|
| 470 |
[start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
|
| 471 |
+
del input_image
|
| 472 |
+
del end_image
|
| 473 |
|
| 474 |
# Dtype
|
| 475 |
|
|
|
|
| 565 |
[llama_vec, clip_l_pooler, llama_vec_n, clip_l_pooler_n, llama_attention_mask, llama_attention_mask_n] = prompt_parameters[prompt_index]
|
| 566 |
|
| 567 |
if prompt_index < len(prompt_parameters) - 1 or (prompt_index == total_latent_sections - 1):
|
| 568 |
+
del prompt_parameters[prompt_index]
|
| 569 |
|
| 570 |
if not high_vram:
|
| 571 |
unload_complete_models()
|
|
|
|
| 613 |
clean_latent_4x_indices=clean_latent_4x_indices,
|
| 614 |
callback=callback,
|
| 615 |
)
|
| 616 |
+
del clean_latents
|
| 617 |
+
del clean_latents_2x
|
| 618 |
+
del clean_latents_4x
|
| 619 |
+
del latent_indices
|
| 620 |
+
del clean_latent_indices
|
| 621 |
+
del clean_latent_2x_indices
|
| 622 |
+
del clean_latent_4x_indices
|
| 623 |
|
| 624 |
[total_generated_latent_frames, history_latents, history_pixels] = post_process(forward, generated_latents, total_generated_latent_frames, history_latents, high_vram, transformer, gpu, vae, history_pixels, latent_window_size, enable_preview, section_index, total_latent_sections, outputs_folder, mp4_crf, stream)
|
| 625 |
|
|
|
|
| 633 |
real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :]
|
| 634 |
zero_latents = history_latents[:, :, total_generated_latent_frames:, :, :]
|
| 635 |
history_latents = torch.cat([zero_latents, real_history_latents], dim=2)
|
| 636 |
+
del real_history_latents
|
| 637 |
+
del zero_latents
|
| 638 |
|
| 639 |
forward = True
|
| 640 |
section_index = first_section_index
|
|
|
|
| 762 |
return [start_latent, end_latent, image_encoder_last_hidden_state]
|
| 763 |
|
| 764 |
[start_latent, end_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, has_end_image, end_image, height, width, vae, gpu, image_encoder, high_vram)
|
| 765 |
+
del input_image
|
| 766 |
+
del end_image
|
| 767 |
|
| 768 |
# Dtype
|
| 769 |
image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
|
|
|
|
| 913 |
clean_latent_4x_indices=clean_latent_4x_indices,
|
| 914 |
callback=callback,
|
| 915 |
)
|
| 916 |
+
del clean_latents
|
| 917 |
+
del clean_latents_2x
|
| 918 |
+
del clean_latents_4x
|
| 919 |
+
del latent_indices
|
| 920 |
+
del clean_latent_indices
|
| 921 |
+
del clean_latent_2x_indices
|
| 922 |
+
del clean_latent_4x_indices
|
| 923 |
|
| 924 |
[total_generated_latent_frames, history_latents, history_pixels] = post_process(job_id, start_latent, generated_latents, total_generated_latent_frames, history_latents, high_vram, transformer, gpu, vae, history_pixels, latent_window_size, enable_preview, outputs_folder, mp4_crf, stream, is_last_section)
|
| 925 |
|
|
|
|
| 964 |
|
| 965 |
# 20250506 pftq: Encode video
|
| 966 |
start_latent, input_image_np, video_latents, fps, height, width = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
|
| 967 |
+
del input_video
|
| 968 |
start_latent = start_latent.to(dtype=torch.float32, device=cpu)
|
| 969 |
video_latents = video_latents.cpu()
|
| 970 |
|
|
|
|
| 1002 |
load_model_as_complete(image_encoder, target_device=gpu)
|
| 1003 |
|
| 1004 |
image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
|
| 1005 |
+
del input_image_np
|
| 1006 |
|
| 1007 |
# 20250507 pftq: Process end frame if provided
|
| 1008 |
if end_frame is not None:
|
|
|
|
| 1014 |
end_frame, target_width=width, target_height=height, vae=vae,
|
| 1015 |
image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
|
| 1016 |
)[0]
|
| 1017 |
+
del end_frame
|
| 1018 |
end_latent = end_latent.to(dtype=torch.float32, device=cpu)
|
| 1019 |
else:
|
| 1020 |
end_latent = None
|
|
|
|
| 1024 |
unload_complete_models(image_encoder, vae)
|
| 1025 |
|
| 1026 |
image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
|
| 1027 |
+
del image_encoder_output
|
| 1028 |
|
| 1029 |
# Dtype
|
| 1030 |
image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
|
|
|
|
| 1134 |
history_latents = video_latents
|
| 1135 |
total_generated_latent_frames = history_latents.shape[2]
|
| 1136 |
# 20250506 pftq: Initialize history_pixels to fix UnboundLocalError
|
| 1137 |
+
history_pixels = previous_video = None
|
|
|
|
| 1138 |
|
| 1139 |
# 20250509 Generate backwards with end frame for better end frame anchoring
|
| 1140 |
if total_latent_sections > 4:
|
|
|
|
| 1195 |
clean_latent_4x_indices=clean_latent_4x_indices,
|
| 1196 |
callback=callback,
|
| 1197 |
)
|
| 1198 |
+
del clean_latents
|
| 1199 |
+
del clean_latents_2x
|
| 1200 |
+
del clean_latents_4x
|
| 1201 |
+
del latent_indices
|
| 1202 |
+
del clean_latent_indices
|
| 1203 |
+
del clean_latent_2x_indices
|
| 1204 |
+
del clean_latent_4x_indices
|
| 1205 |
|
| 1206 |
total_generated_latent_frames += int(generated_latents.shape[2])
|
| 1207 |
history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)
|