FramePack

Build error

App Files Files Community

Fabrice-TIERCELIN commited on Jun 4, 2025

Commit

c5f9ee1

verified ·

1 Parent(s): ca6455d

Adapt the code

Browse files

Files changed (1) hide show

app.py +8 -136

app.py CHANGED Viewed

@@ -14,7 +14,6 @@ import numpy as np
 import argparse
 import random
 import math
-import time
 # 20250506 pftq: Added for video input loading
 import decord
 # 20250506 pftq: Added for progress bars in video_encode
@@ -107,8 +106,6 @@ stream = AsyncStream()
 outputs_folder = './outputs/'
 os.makedirs(outputs_folder, exist_ok=True)
-input_image_debug_value = input_video_debug_value = prompt_debug_value = total_second_length_debug_value = None
 def check_parameters(generation_mode, input_image, input_video):
     if generation_mode == "image" and input_image is None:
         raise gr.Error("Please provide an image to extend.")
@@ -515,10 +512,6 @@ def worker(input_image, prompts, n_prompt, seed, total_second_length, latent_win
     return
 def get_duration(input_image, prompt, generation_mode, n_prompt, randomize_seed, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf):
-    global total_second_length_debug_value
-    if total_second_length_debug_value is not None:
-        return min(total_second_length_debug_value * 60 * 10, 600)
     return total_second_length * 60
@@ -538,18 +531,12 @@ def process(input_image, prompt,
             use_teacache=False,
             mp4_crf=16
            ):
-    global stream, input_image_debug_value, prompt_debug_value, total_second_length_debug_value
     if torch.cuda.device_count() == 0:
         gr.Warning('Set this space to GPU config to make it work.')
         return None, None, None, None, None, None
-    if input_image_debug_value is not None or prompt_debug_value is not None or total_second_length_debug_value is not None:
-        input_image = input_image_debug_value
-        prompt = prompt_debug_value
-        total_second_length = total_second_length_debug_value
-        input_image_debug_value = prompt_debug_value = total_second_length_debug_value = None
     if randomize_seed:
         seed = random.randint(0, np.iinfo(np.int32).max)
@@ -778,28 +765,15 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
                     clean_latent_4x_indices=clean_latent_4x_indices,
                     callback=callback,
                 )
-                start = time.time()
                 total_generated_latent_frames += int(generated_latents.shape[2])
                 history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)
-                end = time.time()
-                secondes = int(end - start)
-                print("1 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
-                start = time.time()
                 if not high_vram:
                     offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
                     load_model_as_complete(vae, target_device=gpu)
-                end = time.time()
-                secondes = int(end - start)
-                print("2 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
-                start = time.time()
                 real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]
-                end = time.time()
-                secondes = int(end - start)
-                print("3 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
-                start = time.time()
                 if history_pixels is None:
                     history_pixels = vae_decode(real_history_latents, vae).cpu()
@@ -814,17 +788,9 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
                   current_pixels = vae_decode(real_history_latents[:, :, -section_latent_frames:], vae).cpu()
                   history_pixels = soft_append_bcthw(history_pixels, current_pixels, overlapped_frames)
-                end = time.time()
-                secondes = int(end - start)
-                print("4 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
-                start = time.time()
                 if not high_vram:
                     unload_complete_models()
-                end = time.time()
-                secondes = int(end - start)
-                print("5 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
-                start = time.time()
                 output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
@@ -834,10 +800,6 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
                 # 20250508 pftq: Save prompt to mp4 metadata comments
                 set_mp4_comments_imageio_ffmpeg(output_filename, f"Prompt: {prompt} | Negative Prompt: {n_prompt}");
                 print(f"Prompt saved to mp4 metadata comments: {output_filename}")
-                end = time.time()
-                secondes = int(end - start)
-                print("6 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
-                start = time.time()
                 # 20250506 pftq: Clean up previous partial files
                 if previous_video is not None and os.path.exists(previous_video):
@@ -847,17 +809,10 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
                     except Exception as e:
                         print(f"Error deleting previous partial video {previous_video}: {e}")
                 previous_video = output_filename
-                end = time.time()
-                secondes = int(end - start)
-                print("7 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
-                start = time.time()
                 print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
                 stream.output_queue.push(('file', output_filename))
-                end = time.time()
-                secondes = int(end - start)
-                print("8 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
             seed = (seed + 1) % np.iinfo(np.int32).max
@@ -873,26 +828,17 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
     return
 def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
-    global total_second_length_debug_value
-    if total_second_length_debug_value is not None:
-        return min(total_second_length_debug_value * 60 * 10, 600)
     return total_second_length * 60 * 2
 # 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
 @spaces.GPU(duration=get_duration_video)
 def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
-    global stream, high_vram, input_video_debug_value, prompt_debug_value, total_second_length_debug_value
     if torch.cuda.device_count() == 0:
         gr.Warning('Set this space to GPU config to make it work.')
         return None, None, None, None, None, None
-    if input_video_debug_value is not None or prompt_debug_value is not None or total_second_length_debug_value is not None:
-        input_video = input_video_debug_value
-        prompt = prompt_debug_value
-        total_second_length = total_second_length_debug_value
-        input_video_debug_value = prompt_debug_value = total_second_length_debug_value = None
     if randomize_seed:
         seed = random.randint(0, np.iinfo(np.int32).max)
@@ -971,9 +917,9 @@ with block:
     if torch.cuda.device_count() == 0:
         with gr.Row():
             gr.HTML("""
-    <p style="background-color: red;"><big><big><big><b>⚠️To use FramePack, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/FramePack?duplicate=true">duplicate this space</a> and set a GPU with 30 GB VRAM.</b>
-    You can't use FramePack directly here because this space runs on a CPU, which is not enough for FramePack. Please provide <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/FramePack/discussions/new">feedback</a> if you have issues.
     </big></big></big></p>
     """)
     gr.HTML(title_html)
@@ -983,7 +929,7 @@ with block:
             text_to_video_hint = gr.HTML("I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.", visible=False)
             input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
             input_video = gr.Video(sources='upload', label="Input Video", height=320, visible=False)
-            prompt = gr.Textbox(label="Prompt", value='', info='Use ; to separate in time', placeholder="The creature starts to move, fast motion, fixed camera")
             prompt_number = gr.Slider(label="Timed prompt number", minimum=0, maximum=1000, value=0, step=1, info='Not for video extension')
             prompt_number.change(fn=handle_prompt_number_change, inputs=[], outputs=[])
@@ -1040,12 +986,6 @@ with block:
                 mp4_crf = gr.Slider(label="MP4 Compression", minimum=0, maximum=100, value=16, step=1, info="Lower means better quality. 0 is uncompressed. Change to 16 if you get black outputs. ")
-            with gr.Accordion("Debug", open=False):
-                input_image_debug = gr.Image(type="numpy", label="Image Debug", height=320)
-                input_video_debug = gr.Video(sources='upload', label="Input Video Debug", height=320)
-                prompt_debug = gr.Textbox(label="Prompt Debug", value='')
-                total_second_length_debug = gr.Slider(label="Additional Video Length to Generate (Seconds) Debug", minimum=1, maximum=120, value=1, step=0.1)
         with gr.Column():
             preview_image = gr.Image(label="Next Latents", height=200, visible=False)
             result_video = gr.Video(label="Finished Frames", autoplay=True, show_share_button=False, height=512, loop=True)
@@ -1064,8 +1004,7 @@ with block:
     ], outputs = [], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button])
     end_button.click(fn=end_process)
-    with gr.Row(elem_id="image_examples", visible=False):
-        gr.Examples(
         examples = [
                 [
                     "./img_examples/Example1.png", # input_image
@@ -1100,41 +1039,7 @@ with block:
                     6, # gpu_memory_preservation
                     False, # use_teacache
                     16 # mp4_crf
-                ],
-                [
-                    "./img_examples/Example1.png", # input_image
-                    "We are sinking, photorealistic, realistic, intricate details, 8k, insanely detailed",
-                    "image", # generation_mode
-                    "Missing arm, unrealistic position, blurred, blurry", # n_prompt
-                    True, # randomize_seed
-                    42, # seed
-                    1, # total_second_length
-                    9, # latent_window_size
-                    25, # steps
-                    1.0, # cfg
-                    10.0, # gs
-                    0.0, # rs
-                    6, # gpu_memory_preservation
-                    False, # use_teacache
-                    16 # mp4_crf
-                ],
-                [
-                    "./img_examples/Example1.png", # input_image
-                    "A boat is passing, photorealistic, realistic, intricate details, 8k, insanely detailed",
-                    "image", # generation_mode
-                    "Missing arm, unrealistic position, blurred, blurry", # n_prompt
-                    True, # randomize_seed
-                    42, # seed
-                    1, # total_second_length
-                    9, # latent_window_size
-                    25, # steps
-                    1.0, # cfg
-                    10.0, # gs
-                    0.0, # rs
-                    6, # gpu_memory_preservation
-                    False, # use_teacache
-                    16 # mp4_crf
-                ],
             ],
         run_on_click = True,
         fn = process,
@@ -1143,8 +1048,7 @@ with block:
         cache_examples = torch.cuda.device_count() > 0,
     )
-    with gr.Row(elem_id="video_examples", visible=False):
-        gr.Examples(
         examples = [
                 [
                     "./img_examples/Example1.mp4", # input_video
@@ -1185,42 +1089,10 @@ with block:
             return [gr.update(visible = False), gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = True)]
-    def handle_field_debug_change(input_image_debug_data, input_video_debug_data, prompt_debug_data, total_second_length_debug_data):
-        global input_image_debug_value, input_video_debug_value, prompt_debug_value, total_second_length_debug_value
-        input_image_debug_value = input_image_debug_data
-        input_video_debug_value = input_video_debug_data
-        prompt_debug_value = prompt_debug_data
-        total_second_length_debug_value = total_second_length_debug_data
-        return []
     generation_mode.change(
         fn=handle_generation_mode_change,
         inputs=[generation_mode],
         outputs=[text_to_video_hint, input_image, input_video, start_button, start_button_video]
     )
-    input_image_debug.upload(
-        fn=handle_field_debug_change,
-        inputs=[input_image_debug, input_video_debug, prompt_debug, total_second_length_debug],
-        outputs=[]
-    )
-    input_video_debug.upload(
-        fn=handle_field_debug_change,
-        inputs=[input_image_debug, input_video_debug, prompt_debug, total_second_length_debug],
-        outputs=[]
-    )
-    prompt_debug.change(
-        fn=handle_field_debug_change,
-        inputs=[input_image_debug, input_video_debug, prompt_debug, total_second_length_debug],
-        outputs=[]
-    )
-    total_second_length_debug.change(
-        fn=handle_field_debug_change,
-        inputs=[input_image_debug, input_video_debug, prompt_debug, total_second_length_debug],
-        outputs=[]
-    )
 block.launch(mcp_server=False, ssr_mode=False)

 import argparse
 import random
 import math
 # 20250506 pftq: Added for video input loading
 import decord
 # 20250506 pftq: Added for progress bars in video_encode
 outputs_folder = './outputs/'
 os.makedirs(outputs_folder, exist_ok=True)
 def check_parameters(generation_mode, input_image, input_video):
     if generation_mode == "image" and input_image is None:
         raise gr.Error("Please provide an image to extend.")
     return
 def get_duration(input_image, prompt, generation_mode, n_prompt, randomize_seed, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf):
     return total_second_length * 60
             use_teacache=False,
             mp4_crf=16
            ):
+    global stream
     if torch.cuda.device_count() == 0:
         gr.Warning('Set this space to GPU config to make it work.')
         return None, None, None, None, None, None
     if randomize_seed:
         seed = random.randint(0, np.iinfo(np.int32).max)
                     clean_latent_4x_indices=clean_latent_4x_indices,
                     callback=callback,
                 )
                 total_generated_latent_frames += int(generated_latents.shape[2])
                 history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)
                 if not high_vram:
                     offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
                     load_model_as_complete(vae, target_device=gpu)
                 real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]
                 if history_pixels is None:
                     history_pixels = vae_decode(real_history_latents, vae).cpu()
                   current_pixels = vae_decode(real_history_latents[:, :, -section_latent_frames:], vae).cpu()
                   history_pixels = soft_append_bcthw(history_pixels, current_pixels, overlapped_frames)
                 if not high_vram:
                     unload_complete_models()
                 output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
                 # 20250508 pftq: Save prompt to mp4 metadata comments
                 set_mp4_comments_imageio_ffmpeg(output_filename, f"Prompt: {prompt} | Negative Prompt: {n_prompt}");
                 print(f"Prompt saved to mp4 metadata comments: {output_filename}")
                 # 20250506 pftq: Clean up previous partial files
                 if previous_video is not None and os.path.exists(previous_video):
                     except Exception as e:
                         print(f"Error deleting previous partial video {previous_video}: {e}")
                 previous_video = output_filename
                 print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
                 stream.output_queue.push(('file', output_filename))
             seed = (seed + 1) % np.iinfo(np.int32).max
     return
 def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
     return total_second_length * 60 * 2
 # 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
 @spaces.GPU(duration=get_duration_video)
 def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
+    global stream
     if torch.cuda.device_count() == 0:
         gr.Warning('Set this space to GPU config to make it work.')
         return None, None, None, None, None, None
     if randomize_seed:
         seed = random.randint(0, np.iinfo(np.int32).max)
     if torch.cuda.device_count() == 0:
         with gr.Row():
             gr.HTML("""
+    <p style="background-color: red;"><big><big><big><b>⚠️To use FramePack, <a href="?duplicate=true">duplicate this space</a> and set a GPU with 30 GB VRAM.</b>
+    You can't use FramePack directly here because this space runs on a CPU, which is not enough for FramePack. Please provide <a href="discussions/new">feedback</a> if you have issues.
     </big></big></big></p>
     """)
     gr.HTML(title_html)
             text_to_video_hint = gr.HTML("I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.", visible=False)
             input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
             input_video = gr.Video(sources='upload', label="Input Video", height=320, visible=False)
+            prompt = gr.Textbox(label="Prompt", value='', info='Use ; to separate in time', placeholder="The creature starts to move, fast motion, focus motion, consistent arm, consistent position, fixed camera")
             prompt_number = gr.Slider(label="Timed prompt number", minimum=0, maximum=1000, value=0, step=1, info='Not for video extension')
             prompt_number.change(fn=handle_prompt_number_change, inputs=[], outputs=[])
                 mp4_crf = gr.Slider(label="MP4 Compression", minimum=0, maximum=100, value=16, step=1, info="Lower means better quality. 0 is uncompressed. Change to 16 if you get black outputs. ")
         with gr.Column():
             preview_image = gr.Image(label="Next Latents", height=200, visible=False)
             result_video = gr.Video(label="Finished Frames", autoplay=True, show_share_button=False, height=512, loop=True)
     ], outputs = [], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button])
     end_button.click(fn=end_process)
+    gr.Examples(
         examples = [
                 [
                     "./img_examples/Example1.png", # input_image
                     6, # gpu_memory_preservation
                     False, # use_teacache
                     16 # mp4_crf
+                ]
             ],
         run_on_click = True,
         fn = process,
         cache_examples = torch.cuda.device_count() > 0,
     )
+    gr.Examples(
         examples = [
                 [
                     "./img_examples/Example1.mp4", # input_video
             return [gr.update(visible = False), gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = True)]
     generation_mode.change(
         fn=handle_generation_mode_change,
         inputs=[generation_mode],
         outputs=[text_to_video_hint, input_image, input_video, start_button, start_button_video]
     )
 block.launch(mcp_server=False, ssr_mode=False)