FramePack

Build error

App Files Files Community

Fabrice-TIERCELIN commited on Jun 21, 2025

Commit

981bb4b

verified ·

1 Parent(s): 2a67a85

Video extension optimization

Browse files

Files changed (1) hide show

app.py +22 -22

app.py CHANGED Viewed

@@ -755,15 +755,15 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
                     offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
                     load_model_as_complete(vae, target_device=gpu)
-                real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]
                 if history_pixels is None:
                     history_pixels = vae_decode(real_history_latents, vae).cpu()
                 else:
-                  section_latent_frames = latent_window_size * 2
-                  overlapped_frames = min(latent_window_size * 4 - 3, history_pixels.shape[2])
-                  history_pixels = soft_append_bcthw(history_pixels, vae_decode(real_history_latents[:, :, -section_latent_frames:], vae).cpu(), overlapped_frames)
                 if not high_vram:
                     unload_complete_models()
@@ -832,7 +832,7 @@ def process(input_image,
     if torch.cuda.device_count() == 0:
         gr.Warning('Set this space to GPU config to make it work.')
-        yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
         return
     if randomize_seed:
@@ -846,7 +846,7 @@ def process(input_image,
         input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
         print("No input image provided. Using a blank white image.")
-    yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
     stream = AsyncStream()
@@ -859,11 +859,11 @@ def process(input_image,
         if flag == 'file':
             output_filename = data
-            yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
         if flag == 'progress':
             preview, desc, html = data
-            yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
         if flag == 'end':
             end = time.time()
@@ -872,15 +872,15 @@ def process(input_image,
             secondes = secondes - (minutes * 60)
             hours = math.floor(minutes / 60)
             minutes = minutes - (hours * 60)
-            yield output_filename, gr.update(visible=False), gr.update(), "The video has been generated in " + \
             ((str(hours) + " h, ") if hours != 0 else "") + \
             ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
             str(secondes) + " sec. " + \
-            "You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character.", gr.update(interactive=True), gr.update(interactive=False)
             break
 def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
-    return total_second_length * 60 * (0.9 if use_teacache else 2.3) * (1 + ((steps - 25) / 100))
 # 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
 @spaces.GPU(duration=get_duration_video)
@@ -890,7 +890,7 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
     if torch.cuda.device_count() == 0:
         gr.Warning('Set this space to GPU config to make it work.')
-        yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
         return
     if randomize_seed:
@@ -901,7 +901,7 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
     # 20250506 pftq: Updated assertion for video input
     assert input_video is not None, 'No input video!'
-    yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
     # 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
     if high_vram and (no_resize or resolution>640):
@@ -928,12 +928,11 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
         if flag == 'file':
             output_filename = data
-            yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
         if flag == 'progress':
             preview, desc, html = data
-            #yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
-            yield output_filename, gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True) # 20250506 pftq: Keep refreshing the video in case it got hidden when the tab was in the background
         if flag == 'end':
             end = time.time()
@@ -943,11 +942,11 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
             hours = math.floor(minutes / 60)
             minutes = minutes - (hours * 60)
             yield output_filename, gr.update(visible=False), desc + \
-            " The video has been generated in " + \
             ((str(hours) + " h, ") if hours != 0 else "") + \
             ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
             str(secondes) + " sec. " + \
-            " Video complete. You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character.", '', gr.update(interactive=True), gr.update(interactive=False)
             break
 def end_process():
@@ -1090,6 +1089,7 @@ with block:
                     seed = gr.Slider(label="Seed", minimum=0, maximum=np.iinfo(np.int32).max, step=1, randomize=True)
         with gr.Column():
             preview_image = gr.Image(label="Next Latents", height=200, visible=False)
             result_video = gr.Video(label="Finished Frames", autoplay=True, show_share_button=False, height=512, loop=True)
             progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
@@ -1261,7 +1261,7 @@ with block:
             raise gr.Error("Please provide an image to extend.")
         if generation_mode == "video" and input_video is None:
             raise gr.Error("Please provide a video to extend.")
-        return gr.update(interactive=True)
     def handle_generation_mode_change(generation_mode_data):
         if generation_mode_data == "text":
@@ -1275,10 +1275,10 @@ with block:
     timeless_prompt.change(fn=handle_timeless_prompt_change, inputs=[timeless_prompt], outputs=[final_prompt])
     start_button.click(fn = check_parameters, inputs = [
         generation_mode, input_image, input_video
-    ], outputs = [end_button], queue = False, show_progress = False).success(fn=process, inputs=ips, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button])
     start_button_video.click(fn = check_parameters, inputs = [
         generation_mode, input_image, input_video
-    ], outputs = [end_button], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button])
     end_button.click(fn=end_process)
     generation_mode.change(fn = save_preferences, inputs = [

                     offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
                     load_model_as_complete(vae, target_device=gpu)
                 if history_pixels is None:
+                    real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]
                     history_pixels = vae_decode(real_history_latents, vae).cpu()
                 else:
+                    section_latent_frames = latent_window_size * 2
+                    overlapped_frames = min(latent_window_size * 4 - 3, history_pixels.shape[2])
+                    real_history_latents = history_latents[:, :, -min(total_generated_latent_frames, section_latent_frames):, :, :]
+                    history_pixels = soft_append_bcthw(history_pixels, vae_decode(real_history_latents, vae).cpu(), overlapped_frames)
                 if not high_vram:
                     unload_complete_models()
     if torch.cuda.device_count() == 0:
         gr.Warning('Set this space to GPU config to make it work.')
+        yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible = False)
         return
     if randomize_seed:
         input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
         print("No input image provided. Using a blank white image.")
+    yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
     stream = AsyncStream()
         if flag == 'file':
             output_filename = data
+            yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
         if flag == 'progress':
             preview, desc, html = data
+            yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update()
         if flag == 'end':
             end = time.time()
             secondes = secondes - (minutes * 60)
             hours = math.floor(minutes / 60)
             minutes = minutes - (hours * 60)
+            yield output_filename, gr.update(visible=False), gr.update(), "The process has lasted " + \
             ((str(hours) + " h, ") if hours != 0 else "") + \
             ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
             str(secondes) + " sec. " + \
+            "You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
             break
 def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
+    return total_second_length * 60 * (1.5 if use_teacache else 2.5) * (1 + ((steps - 25) / 100))
 # 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
 @spaces.GPU(duration=get_duration_video)
     if torch.cuda.device_count() == 0:
         gr.Warning('Set this space to GPU config to make it work.')
+        yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible = False)
         return
     if randomize_seed:
     # 20250506 pftq: Updated assertion for video input
     assert input_video is not None, 'No input video!'
+    yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
     # 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
     if high_vram and (no_resize or resolution>640):
         if flag == 'file':
             output_filename = data
+            yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
         if flag == 'progress':
             preview, desc, html = data
+            yield output_filename, gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update() # 20250506 pftq: Keep refreshing the video in case it got hidden when the tab was in the background
         if flag == 'end':
             end = time.time()
             hours = math.floor(minutes / 60)
             minutes = minutes - (hours * 60)
             yield output_filename, gr.update(visible=False), desc + \
+            " The process has lasted " + \
             ((str(hours) + " h, ") if hours != 0 else "") + \
             ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
             str(secondes) + " sec. " + \
+            " You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", '', gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
             break
 def end_process():
                     seed = gr.Slider(label="Seed", minimum=0, maximum=np.iinfo(np.int32).max, step=1, randomize=True)
         with gr.Column():
+            warning = gr.HTML(value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
             preview_image = gr.Image(label="Next Latents", height=200, visible=False)
             result_video = gr.Video(label="Finished Frames", autoplay=True, show_share_button=False, height=512, loop=True)
             progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
             raise gr.Error("Please provide an image to extend.")
         if generation_mode == "video" and input_video is None:
             raise gr.Error("Please provide a video to extend.")
+        return [gr.update(interactive=True), gr.update(visible = True)]
     def handle_generation_mode_change(generation_mode_data):
         if generation_mode_data == "text":
     timeless_prompt.change(fn=handle_timeless_prompt_change, inputs=[timeless_prompt], outputs=[final_prompt])
     start_button.click(fn = check_parameters, inputs = [
         generation_mode, input_image, input_video
+    ], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process, inputs=ips, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button, warning])
     start_button_video.click(fn = check_parameters, inputs = [
         generation_mode, input_image, input_video
+    ], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button, warning])
     end_button.click(fn=end_process)
     generation_mode.change(fn = save_preferences, inputs = [