Video extension optimization
Browse files
app.py
CHANGED
|
@@ -755,15 +755,15 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
|
|
| 755 |
offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
|
| 756 |
load_model_as_complete(vae, target_device=gpu)
|
| 757 |
|
| 758 |
-
real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]
|
| 759 |
-
|
| 760 |
if history_pixels is None:
|
|
|
|
| 761 |
history_pixels = vae_decode(real_history_latents, vae).cpu()
|
| 762 |
else:
|
| 763 |
-
|
| 764 |
-
|
| 765 |
|
| 766 |
-
|
|
|
|
| 767 |
|
| 768 |
if not high_vram:
|
| 769 |
unload_complete_models()
|
|
@@ -832,7 +832,7 @@ def process(input_image,
|
|
| 832 |
|
| 833 |
if torch.cuda.device_count() == 0:
|
| 834 |
gr.Warning('Set this space to GPU config to make it work.')
|
| 835 |
-
yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
|
| 836 |
return
|
| 837 |
|
| 838 |
if randomize_seed:
|
|
@@ -846,7 +846,7 @@ def process(input_image,
|
|
| 846 |
input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
|
| 847 |
print("No input image provided. Using a blank white image.")
|
| 848 |
|
| 849 |
-
yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
|
| 850 |
|
| 851 |
stream = AsyncStream()
|
| 852 |
|
|
@@ -859,11 +859,11 @@ def process(input_image,
|
|
| 859 |
|
| 860 |
if flag == 'file':
|
| 861 |
output_filename = data
|
| 862 |
-
yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
|
| 863 |
|
| 864 |
if flag == 'progress':
|
| 865 |
preview, desc, html = data
|
| 866 |
-
yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
|
| 867 |
|
| 868 |
if flag == 'end':
|
| 869 |
end = time.time()
|
|
@@ -872,15 +872,15 @@ def process(input_image,
|
|
| 872 |
secondes = secondes - (minutes * 60)
|
| 873 |
hours = math.floor(minutes / 60)
|
| 874 |
minutes = minutes - (hours * 60)
|
| 875 |
-
yield output_filename, gr.update(visible=False), gr.update(), "The
|
| 876 |
((str(hours) + " h, ") if hours != 0 else "") + \
|
| 877 |
((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
|
| 878 |
str(secondes) + " sec. " + \
|
| 879 |
-
"You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character.", gr.update(interactive=True), gr.update(interactive=False)
|
| 880 |
break
|
| 881 |
|
| 882 |
def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
|
| 883 |
-
return total_second_length * 60 * (
|
| 884 |
|
| 885 |
# 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
|
| 886 |
@spaces.GPU(duration=get_duration_video)
|
|
@@ -890,7 +890,7 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
|
|
| 890 |
|
| 891 |
if torch.cuda.device_count() == 0:
|
| 892 |
gr.Warning('Set this space to GPU config to make it work.')
|
| 893 |
-
yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
|
| 894 |
return
|
| 895 |
|
| 896 |
if randomize_seed:
|
|
@@ -901,7 +901,7 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
|
|
| 901 |
# 20250506 pftq: Updated assertion for video input
|
| 902 |
assert input_video is not None, 'No input video!'
|
| 903 |
|
| 904 |
-
yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
|
| 905 |
|
| 906 |
# 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
|
| 907 |
if high_vram and (no_resize or resolution>640):
|
|
@@ -928,12 +928,11 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
|
|
| 928 |
|
| 929 |
if flag == 'file':
|
| 930 |
output_filename = data
|
| 931 |
-
yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
|
| 932 |
|
| 933 |
if flag == 'progress':
|
| 934 |
preview, desc, html = data
|
| 935 |
-
|
| 936 |
-
yield output_filename, gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True) # 20250506 pftq: Keep refreshing the video in case it got hidden when the tab was in the background
|
| 937 |
|
| 938 |
if flag == 'end':
|
| 939 |
end = time.time()
|
|
@@ -943,11 +942,11 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
|
|
| 943 |
hours = math.floor(minutes / 60)
|
| 944 |
minutes = minutes - (hours * 60)
|
| 945 |
yield output_filename, gr.update(visible=False), desc + \
|
| 946 |
-
" The
|
| 947 |
((str(hours) + " h, ") if hours != 0 else "") + \
|
| 948 |
((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
|
| 949 |
str(secondes) + " sec. " + \
|
| 950 |
-
"
|
| 951 |
break
|
| 952 |
|
| 953 |
def end_process():
|
|
@@ -1090,6 +1089,7 @@ with block:
|
|
| 1090 |
seed = gr.Slider(label="Seed", minimum=0, maximum=np.iinfo(np.int32).max, step=1, randomize=True)
|
| 1091 |
|
| 1092 |
with gr.Column():
|
|
|
|
| 1093 |
preview_image = gr.Image(label="Next Latents", height=200, visible=False)
|
| 1094 |
result_video = gr.Video(label="Finished Frames", autoplay=True, show_share_button=False, height=512, loop=True)
|
| 1095 |
progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
|
|
@@ -1261,7 +1261,7 @@ with block:
|
|
| 1261 |
raise gr.Error("Please provide an image to extend.")
|
| 1262 |
if generation_mode == "video" and input_video is None:
|
| 1263 |
raise gr.Error("Please provide a video to extend.")
|
| 1264 |
-
return gr.update(interactive=True)
|
| 1265 |
|
| 1266 |
def handle_generation_mode_change(generation_mode_data):
|
| 1267 |
if generation_mode_data == "text":
|
|
@@ -1275,10 +1275,10 @@ with block:
|
|
| 1275 |
timeless_prompt.change(fn=handle_timeless_prompt_change, inputs=[timeless_prompt], outputs=[final_prompt])
|
| 1276 |
start_button.click(fn = check_parameters, inputs = [
|
| 1277 |
generation_mode, input_image, input_video
|
| 1278 |
-
], outputs = [end_button], queue = False, show_progress = False).success(fn=process, inputs=ips, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button])
|
| 1279 |
start_button_video.click(fn = check_parameters, inputs = [
|
| 1280 |
generation_mode, input_image, input_video
|
| 1281 |
-
], outputs = [end_button], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button])
|
| 1282 |
end_button.click(fn=end_process)
|
| 1283 |
|
| 1284 |
generation_mode.change(fn = save_preferences, inputs = [
|
|
|
|
| 755 |
offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
|
| 756 |
load_model_as_complete(vae, target_device=gpu)
|
| 757 |
|
|
|
|
|
|
|
| 758 |
if history_pixels is None:
|
| 759 |
+
real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]
|
| 760 |
history_pixels = vae_decode(real_history_latents, vae).cpu()
|
| 761 |
else:
|
| 762 |
+
section_latent_frames = latent_window_size * 2
|
| 763 |
+
overlapped_frames = min(latent_window_size * 4 - 3, history_pixels.shape[2])
|
| 764 |
|
| 765 |
+
real_history_latents = history_latents[:, :, -min(total_generated_latent_frames, section_latent_frames):, :, :]
|
| 766 |
+
history_pixels = soft_append_bcthw(history_pixels, vae_decode(real_history_latents, vae).cpu(), overlapped_frames)
|
| 767 |
|
| 768 |
if not high_vram:
|
| 769 |
unload_complete_models()
|
|
|
|
| 832 |
|
| 833 |
if torch.cuda.device_count() == 0:
|
| 834 |
gr.Warning('Set this space to GPU config to make it work.')
|
| 835 |
+
yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible = False)
|
| 836 |
return
|
| 837 |
|
| 838 |
if randomize_seed:
|
|
|
|
| 846 |
input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
|
| 847 |
print("No input image provided. Using a blank white image.")
|
| 848 |
|
| 849 |
+
yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
|
| 850 |
|
| 851 |
stream = AsyncStream()
|
| 852 |
|
|
|
|
| 859 |
|
| 860 |
if flag == 'file':
|
| 861 |
output_filename = data
|
| 862 |
+
yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
|
| 863 |
|
| 864 |
if flag == 'progress':
|
| 865 |
preview, desc, html = data
|
| 866 |
+
yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update()
|
| 867 |
|
| 868 |
if flag == 'end':
|
| 869 |
end = time.time()
|
|
|
|
| 872 |
secondes = secondes - (minutes * 60)
|
| 873 |
hours = math.floor(minutes / 60)
|
| 874 |
minutes = minutes - (hours * 60)
|
| 875 |
+
yield output_filename, gr.update(visible=False), gr.update(), "The process has lasted " + \
|
| 876 |
((str(hours) + " h, ") if hours != 0 else "") + \
|
| 877 |
((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
|
| 878 |
str(secondes) + " sec. " + \
|
| 879 |
+
"You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
|
| 880 |
break
|
| 881 |
|
| 882 |
def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
|
| 883 |
+
return total_second_length * 60 * (1.5 if use_teacache else 2.5) * (1 + ((steps - 25) / 100))
|
| 884 |
|
| 885 |
# 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
|
| 886 |
@spaces.GPU(duration=get_duration_video)
|
|
|
|
| 890 |
|
| 891 |
if torch.cuda.device_count() == 0:
|
| 892 |
gr.Warning('Set this space to GPU config to make it work.')
|
| 893 |
+
yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible = False)
|
| 894 |
return
|
| 895 |
|
| 896 |
if randomize_seed:
|
|
|
|
| 901 |
# 20250506 pftq: Updated assertion for video input
|
| 902 |
assert input_video is not None, 'No input video!'
|
| 903 |
|
| 904 |
+
yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
|
| 905 |
|
| 906 |
# 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
|
| 907 |
if high_vram and (no_resize or resolution>640):
|
|
|
|
| 928 |
|
| 929 |
if flag == 'file':
|
| 930 |
output_filename = data
|
| 931 |
+
yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
|
| 932 |
|
| 933 |
if flag == 'progress':
|
| 934 |
preview, desc, html = data
|
| 935 |
+
yield output_filename, gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update() # 20250506 pftq: Keep refreshing the video in case it got hidden when the tab was in the background
|
|
|
|
| 936 |
|
| 937 |
if flag == 'end':
|
| 938 |
end = time.time()
|
|
|
|
| 942 |
hours = math.floor(minutes / 60)
|
| 943 |
minutes = minutes - (hours * 60)
|
| 944 |
yield output_filename, gr.update(visible=False), desc + \
|
| 945 |
+
" The process has lasted " + \
|
| 946 |
((str(hours) + " h, ") if hours != 0 else "") + \
|
| 947 |
((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
|
| 948 |
str(secondes) + " sec. " + \
|
| 949 |
+
" You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", '', gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
|
| 950 |
break
|
| 951 |
|
| 952 |
def end_process():
|
|
|
|
| 1089 |
seed = gr.Slider(label="Seed", minimum=0, maximum=np.iinfo(np.int32).max, step=1, randomize=True)
|
| 1090 |
|
| 1091 |
with gr.Column():
|
| 1092 |
+
warning = gr.HTML(value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
|
| 1093 |
preview_image = gr.Image(label="Next Latents", height=200, visible=False)
|
| 1094 |
result_video = gr.Video(label="Finished Frames", autoplay=True, show_share_button=False, height=512, loop=True)
|
| 1095 |
progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
|
|
|
|
| 1261 |
raise gr.Error("Please provide an image to extend.")
|
| 1262 |
if generation_mode == "video" and input_video is None:
|
| 1263 |
raise gr.Error("Please provide a video to extend.")
|
| 1264 |
+
return [gr.update(interactive=True), gr.update(visible = True)]
|
| 1265 |
|
| 1266 |
def handle_generation_mode_change(generation_mode_data):
|
| 1267 |
if generation_mode_data == "text":
|
|
|
|
| 1275 |
timeless_prompt.change(fn=handle_timeless_prompt_change, inputs=[timeless_prompt], outputs=[final_prompt])
|
| 1276 |
start_button.click(fn = check_parameters, inputs = [
|
| 1277 |
generation_mode, input_image, input_video
|
| 1278 |
+
], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process, inputs=ips, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button, warning])
|
| 1279 |
start_button_video.click(fn = check_parameters, inputs = [
|
| 1280 |
generation_mode, input_image, input_video
|
| 1281 |
+
], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button, warning])
|
| 1282 |
end_button.click(fn=end_process)
|
| 1283 |
|
| 1284 |
generation_mode.change(fn = save_preferences, inputs = [
|