Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- README.md +14 -14
- app.py +0 -0
- app_endframe.py +30 -19
- requirements.txt +21 -38
README.md
CHANGED
|
@@ -1,21 +1,21 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
|
|
|
|
|
|
|
|
|
| 3 |
sdk: gradio
|
| 4 |
-
|
| 5 |
-
sdk_version: 4.38.1
|
| 6 |
app_file: app.py
|
| 7 |
-
license:
|
| 8 |
-
|
| 9 |
-
colorTo: pink
|
| 10 |
tags:
|
| 11 |
-
-
|
| 12 |
-
-
|
| 13 |
-
-
|
| 14 |
-
-
|
| 15 |
-
- Img-to-Img
|
| 16 |
-
- Img-2-Img
|
| 17 |
- language models
|
| 18 |
- LLMs
|
| 19 |
-
short_description: Restore blurred or small images with prompt
|
| 20 |
suggested_hardware: zero-a10g
|
| 21 |
-
---
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: FramePack/HunyuanVideo
|
| 3 |
+
emoji: 🎥
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: gray
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.29.1
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
short_description: Text-to-Video/Image-to-Video/Video extender (timed prompt)
|
|
|
|
| 11 |
tags:
|
| 12 |
+
- Image-to-Video
|
| 13 |
+
- Image-2-Video
|
| 14 |
+
- Img-to-Vid
|
| 15 |
+
- Img-2-Vid
|
|
|
|
|
|
|
| 16 |
- language models
|
| 17 |
- LLMs
|
|
|
|
| 18 |
suggested_hardware: zero-a10g
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app_endframe.py
CHANGED
|
@@ -108,7 +108,10 @@ stream = AsyncStream()
|
|
| 108 |
outputs_folder = './outputs/'
|
| 109 |
os.makedirs(outputs_folder, exist_ok=True)
|
| 110 |
|
| 111 |
-
input_video_debug_value =
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
# 20250506 pftq: Added function to encode input video frames into latents
|
| 114 |
@torch.no_grad()
|
|
@@ -588,7 +591,8 @@ def worker(input_video, end_frame, end_frame_weight, prompt, n_prompt, seed, bat
|
|
| 588 |
)
|
| 589 |
|
| 590 |
if is_start_of_video:
|
| 591 |
-
generated_latents = torch.cat([video_latents[:, :, -1:].to(generated_latents), generated_latents], dim=2)
|
|
|
|
| 592 |
|
| 593 |
total_generated_latent_frames += int(generated_latents.shape[2])
|
| 594 |
history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
|
|
@@ -669,9 +673,8 @@ def get_duration(
|
|
| 669 |
randomize_seed,
|
| 670 |
seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache,
|
| 671 |
no_resize, mp4_crf, num_clean_frames, vae_batch):
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
return min(total_second_length_debug_value * 60 * 2, 600)
|
| 675 |
return total_second_length * 60 * 2
|
| 676 |
|
| 677 |
@spaces.GPU(duration=get_duration)
|
|
@@ -680,17 +683,18 @@ def process(
|
|
| 680 |
randomize_seed,
|
| 681 |
seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache,
|
| 682 |
no_resize, mp4_crf, num_clean_frames, vae_batch):
|
| 683 |
-
global stream, high_vram
|
| 684 |
|
| 685 |
if torch.cuda.device_count() == 0:
|
| 686 |
gr.Warning('Set this space to GPU config to make it work.')
|
| 687 |
return None, None, None, None, None, None
|
| 688 |
|
| 689 |
-
if input_video_debug_value is not None or prompt_debug_value is not None or total_second_length_debug_value is not None:
|
| 690 |
-
input_video = input_video_debug_value
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
|
|
|
| 694 |
|
| 695 |
if randomize_seed:
|
| 696 |
seed = random.randint(0, np.iinfo(np.int32).max)
|
|
@@ -813,6 +817,7 @@ with block:
|
|
| 813 |
|
| 814 |
with gr.Accordion("Debug", open=False):
|
| 815 |
input_video_debug = gr.Video(sources='upload', label="Input Video Debug", height=320)
|
|
|
|
| 816 |
prompt_debug = gr.Textbox(label="Prompt Debug", value='')
|
| 817 |
total_second_length_debug = gr.Slider(label="Additional Video Length to Generate (Seconds) Debug", minimum=1, maximum=120, value=5, step=0.1)
|
| 818 |
|
|
@@ -885,28 +890,34 @@ with block:
|
|
| 885 |
end_button.click(fn=end_process)
|
| 886 |
|
| 887 |
|
| 888 |
-
def handle_field_debug_change(input_video_debug_data, prompt_debug_data, total_second_length_debug_data):
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
prompt_debug_value = prompt_debug_data
|
| 892 |
-
total_second_length_debug_value = total_second_length_debug_data
|
| 893 |
return []
|
| 894 |
|
| 895 |
input_video_debug.upload(
|
| 896 |
fn=handle_field_debug_change,
|
| 897 |
-
inputs=[input_video_debug, prompt_debug, total_second_length_debug],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 898 |
outputs=[]
|
| 899 |
)
|
| 900 |
|
| 901 |
prompt_debug.change(
|
| 902 |
fn=handle_field_debug_change,
|
| 903 |
-
inputs=[input_video_debug, prompt_debug, total_second_length_debug],
|
| 904 |
outputs=[]
|
| 905 |
)
|
| 906 |
|
| 907 |
total_second_length_debug.change(
|
| 908 |
fn=handle_field_debug_change,
|
| 909 |
-
inputs=[input_video_debug, prompt_debug, total_second_length_debug],
|
| 910 |
outputs=[]
|
| 911 |
)
|
| 912 |
|
|
|
|
| 108 |
outputs_folder = './outputs/'
|
| 109 |
os.makedirs(outputs_folder, exist_ok=True)
|
| 110 |
|
| 111 |
+
input_video_debug_value = [None]
|
| 112 |
+
end_frame_debug_value = [None]
|
| 113 |
+
prompt_debug_value = [None]
|
| 114 |
+
total_second_length_debug_value = [None]
|
| 115 |
|
| 116 |
# 20250506 pftq: Added function to encode input video frames into latents
|
| 117 |
@torch.no_grad()
|
|
|
|
| 591 |
)
|
| 592 |
|
| 593 |
if is_start_of_video:
|
| 594 |
+
#generated_latents = torch.cat([video_latents[:, :, -1:].to(generated_latents), generated_latents], dim=2)
|
| 595 |
+
generated_latents = generated_latents[:, :, 2:]
|
| 596 |
|
| 597 |
total_generated_latent_frames += int(generated_latents.shape[2])
|
| 598 |
history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
|
|
|
|
| 673 |
randomize_seed,
|
| 674 |
seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache,
|
| 675 |
no_resize, mp4_crf, num_clean_frames, vae_batch):
|
| 676 |
+
if total_second_length_debug_value[0] is not None:
|
| 677 |
+
return min(total_second_length_debug_value[0] * 60 * 2, 600)
|
|
|
|
| 678 |
return total_second_length * 60 * 2
|
| 679 |
|
| 680 |
@spaces.GPU(duration=get_duration)
|
|
|
|
| 683 |
randomize_seed,
|
| 684 |
seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache,
|
| 685 |
no_resize, mp4_crf, num_clean_frames, vae_batch):
|
| 686 |
+
global stream, high_vram
|
| 687 |
|
| 688 |
if torch.cuda.device_count() == 0:
|
| 689 |
gr.Warning('Set this space to GPU config to make it work.')
|
| 690 |
return None, None, None, None, None, None
|
| 691 |
|
| 692 |
+
if input_video_debug_value[0] is not None or end_frame_debug_value[0] is not None or prompt_debug_value[0] is not None or total_second_length_debug_value[0] is not None:
|
| 693 |
+
input_video = input_video_debug_value[0]
|
| 694 |
+
end_frame = end_frame_debug_value[0]
|
| 695 |
+
prompt = prompt_debug_value[0]
|
| 696 |
+
total_second_length = total_second_length_debug_value[0]
|
| 697 |
+
allocation_time = min(total_second_length_debug_value[0] * 60 * 100, 600)
|
| 698 |
|
| 699 |
if randomize_seed:
|
| 700 |
seed = random.randint(0, np.iinfo(np.int32).max)
|
|
|
|
| 817 |
|
| 818 |
with gr.Accordion("Debug", open=False):
|
| 819 |
input_video_debug = gr.Video(sources='upload', label="Input Video Debug", height=320)
|
| 820 |
+
end_frame_debug = gr.Image(type="numpy", label="End Image Debug", height=320)
|
| 821 |
prompt_debug = gr.Textbox(label="Prompt Debug", value='')
|
| 822 |
total_second_length_debug = gr.Slider(label="Additional Video Length to Generate (Seconds) Debug", minimum=1, maximum=120, value=5, step=0.1)
|
| 823 |
|
|
|
|
| 890 |
end_button.click(fn=end_process)
|
| 891 |
|
| 892 |
|
| 893 |
+
def handle_field_debug_change(input_video_debug_data, end_frame_debug_data, prompt_debug_data, total_second_length_debug_data):
|
| 894 |
+
input_video_debug_value[0] = input_video_debug_data
|
| 895 |
+
end_frame_debug_value[0] = end_frame_debug_data
|
| 896 |
+
prompt_debug_value[0] = prompt_debug_data
|
| 897 |
+
total_second_length_debug_value[0] = total_second_length_debug_data
|
| 898 |
return []
|
| 899 |
|
| 900 |
input_video_debug.upload(
|
| 901 |
fn=handle_field_debug_change,
|
| 902 |
+
inputs=[input_video_debug, end_frame_debug, prompt_debug, total_second_length_debug],
|
| 903 |
+
outputs=[]
|
| 904 |
+
)
|
| 905 |
+
|
| 906 |
+
end_frame_debug.upload(
|
| 907 |
+
fn=handle_field_debug_change,
|
| 908 |
+
inputs=[input_video_debug, end_frame_debug, prompt_debug, total_second_length_debug],
|
| 909 |
outputs=[]
|
| 910 |
)
|
| 911 |
|
| 912 |
prompt_debug.change(
|
| 913 |
fn=handle_field_debug_change,
|
| 914 |
+
inputs=[input_video_debug, end_frame_debug, prompt_debug, total_second_length_debug],
|
| 915 |
outputs=[]
|
| 916 |
)
|
| 917 |
|
| 918 |
total_second_length_debug.change(
|
| 919 |
fn=handle_field_debug_change,
|
| 920 |
+
inputs=[input_video_debug, end_frame_debug, prompt_debug, total_second_length_debug],
|
| 921 |
outputs=[]
|
| 922 |
)
|
| 923 |
|
requirements.txt
CHANGED
|
@@ -1,41 +1,24 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
gradio_client==1.10.3
|
| 5 |
-
numpy==1.26.4
|
| 6 |
-
requests==2.32.4
|
| 7 |
sentencepiece==0.2.0
|
| 8 |
-
tokenizers==0.19.1
|
| 9 |
-
torchvision==0.22.0
|
| 10 |
-
uvicorn==0.34.3
|
| 11 |
-
wandb==0.20.1
|
| 12 |
-
httpx==0.28.1
|
| 13 |
-
transformers==4.43.0
|
| 14 |
-
accelerate==1.8.0
|
| 15 |
-
scikit-learn==1.7.0
|
| 16 |
-
einops==0.8.1
|
| 17 |
-
einops-exts==0.0.4
|
| 18 |
-
timm==1.0.15
|
| 19 |
-
openai-clip==1.0.1
|
| 20 |
-
fsspec==2025.5.1
|
| 21 |
-
kornia==0.8.1
|
| 22 |
-
matplotlib==3.10.3
|
| 23 |
-
ninja==1.11.1.4
|
| 24 |
-
omegaconf==2.3.0
|
| 25 |
-
opencv-python==4.11.0.86
|
| 26 |
-
pandas==2.3.0
|
| 27 |
pillow==11.2.1
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
scipy==1.
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
pillow-heif==0.22.0
|
| 40 |
-
|
| 41 |
-
open-clip-torch==2.24.0
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
diffusers==0.33.1
|
| 3 |
+
transformers==4.52.4
|
|
|
|
|
|
|
|
|
|
| 4 |
sentencepiece==0.2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
pillow==11.2.1
|
| 6 |
+
av==12.1.0
|
| 7 |
+
numpy==1.26.2
|
| 8 |
+
scipy==1.12.0
|
| 9 |
+
requests==2.32.4
|
| 10 |
+
torchsde==0.2.6
|
| 11 |
+
torch>=2.0.0
|
| 12 |
+
torchvision
|
| 13 |
+
torchaudio
|
| 14 |
+
einops
|
| 15 |
+
opencv-contrib-python
|
| 16 |
+
safetensors
|
| 17 |
+
huggingface_hub
|
| 18 |
+
decord
|
| 19 |
+
imageio_ffmpeg==0.6.0
|
| 20 |
+
sageattention==1.0.6
|
| 21 |
+
xformers==0.0.29.post3
|
| 22 |
+
bitsandbytes==0.46.0
|
| 23 |
pillow-heif==0.22.0
|
| 24 |
+
spaces[security]
|
|
|