Fabrice-TIERCELIN commited on
Commit
ac28010
·
verified ·
1 Parent(s): 1368630

Upload 5 files

Browse files
Files changed (4) hide show
  1. README.md +14 -14
  2. app.py +0 -0
  3. app_endframe.py +30 -19
  4. requirements.txt +21 -38
README.md CHANGED
@@ -1,21 +1,21 @@
1
  ---
2
- title: SUPIR Image Upscaler
 
 
 
3
  sdk: gradio
4
- emoji: 📷
5
- sdk_version: 4.38.1
6
  app_file: app.py
7
- license: mit
8
- colorFrom: blue
9
- colorTo: pink
10
  tags:
11
- - Upscaling
12
- - Restoring
13
- - Image-to-Image
14
- - Image-2-Image
15
- - Img-to-Img
16
- - Img-2-Img
17
  - language models
18
  - LLMs
19
- short_description: Restore blurred or small images with prompt
20
  suggested_hardware: zero-a10g
21
- ---
 
 
 
1
  ---
2
+ title: FramePack/HunyuanVideo
3
+ emoji: 🎥
4
+ colorFrom: pink
5
+ colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 5.29.1
 
8
  app_file: app.py
9
+ license: apache-2.0
10
+ short_description: Text-to-Video/Image-to-Video/Video extender (timed prompt)
 
11
  tags:
12
+ - Image-to-Video
13
+ - Image-2-Video
14
+ - Img-to-Vid
15
+ - Img-2-Vid
 
 
16
  - language models
17
  - LLMs
 
18
  suggested_hardware: zero-a10g
19
+ ---
20
+
21
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
The diff for this file is too large to render. See raw diff
 
app_endframe.py CHANGED
@@ -108,7 +108,10 @@ stream = AsyncStream()
108
  outputs_folder = './outputs/'
109
  os.makedirs(outputs_folder, exist_ok=True)
110
 
111
- input_video_debug_value = prompt_debug_value = total_second_length_debug_value = None
 
 
 
112
 
113
  # 20250506 pftq: Added function to encode input video frames into latents
114
  @torch.no_grad()
@@ -588,7 +591,8 @@ def worker(input_video, end_frame, end_frame_weight, prompt, n_prompt, seed, bat
588
  )
589
 
590
  if is_start_of_video:
591
- generated_latents = torch.cat([video_latents[:, :, -1:].to(generated_latents), generated_latents], dim=2)
 
592
 
593
  total_generated_latent_frames += int(generated_latents.shape[2])
594
  history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
@@ -669,9 +673,8 @@ def get_duration(
669
  randomize_seed,
670
  seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache,
671
  no_resize, mp4_crf, num_clean_frames, vae_batch):
672
- global total_second_length_debug_value
673
- if total_second_length_debug_value is not None:
674
- return min(total_second_length_debug_value * 60 * 2, 600)
675
  return total_second_length * 60 * 2
676
 
677
  @spaces.GPU(duration=get_duration)
@@ -680,17 +683,18 @@ def process(
680
  randomize_seed,
681
  seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache,
682
  no_resize, mp4_crf, num_clean_frames, vae_batch):
683
- global stream, high_vram, input_video_debug_value, prompt_debug_value, total_second_length_debug_value
684
 
685
  if torch.cuda.device_count() == 0:
686
  gr.Warning('Set this space to GPU config to make it work.')
687
  return None, None, None, None, None, None
688
 
689
- if input_video_debug_value is not None or prompt_debug_value is not None or total_second_length_debug_value is not None:
690
- input_video = input_video_debug_value
691
- prompt = prompt_debug_value
692
- total_second_length = total_second_length_debug_value
693
- input_video_debug_value = prompt_debug_value = total_second_length_debug_value = None
 
694
 
695
  if randomize_seed:
696
  seed = random.randint(0, np.iinfo(np.int32).max)
@@ -813,6 +817,7 @@ with block:
813
 
814
  with gr.Accordion("Debug", open=False):
815
  input_video_debug = gr.Video(sources='upload', label="Input Video Debug", height=320)
 
816
  prompt_debug = gr.Textbox(label="Prompt Debug", value='')
817
  total_second_length_debug = gr.Slider(label="Additional Video Length to Generate (Seconds) Debug", minimum=1, maximum=120, value=5, step=0.1)
818
 
@@ -885,28 +890,34 @@ with block:
885
  end_button.click(fn=end_process)
886
 
887
 
888
- def handle_field_debug_change(input_video_debug_data, prompt_debug_data, total_second_length_debug_data):
889
- global input_video_debug_value, prompt_debug_value, total_second_length_debug_value
890
- input_video_debug_value = input_video_debug_data
891
- prompt_debug_value = prompt_debug_data
892
- total_second_length_debug_value = total_second_length_debug_data
893
  return []
894
 
895
  input_video_debug.upload(
896
  fn=handle_field_debug_change,
897
- inputs=[input_video_debug, prompt_debug, total_second_length_debug],
 
 
 
 
 
 
898
  outputs=[]
899
  )
900
 
901
  prompt_debug.change(
902
  fn=handle_field_debug_change,
903
- inputs=[input_video_debug, prompt_debug, total_second_length_debug],
904
  outputs=[]
905
  )
906
 
907
  total_second_length_debug.change(
908
  fn=handle_field_debug_change,
909
- inputs=[input_video_debug, prompt_debug, total_second_length_debug],
910
  outputs=[]
911
  )
912
 
 
108
  outputs_folder = './outputs/'
109
  os.makedirs(outputs_folder, exist_ok=True)
110
 
111
+ input_video_debug_value = [None]
112
+ end_frame_debug_value = [None]
113
+ prompt_debug_value = [None]
114
+ total_second_length_debug_value = [None]
115
 
116
  # 20250506 pftq: Added function to encode input video frames into latents
117
  @torch.no_grad()
 
591
  )
592
 
593
  if is_start_of_video:
594
+ #generated_latents = torch.cat([video_latents[:, :, -1:].to(generated_latents), generated_latents], dim=2)
595
+ generated_latents = generated_latents[:, :, 2:]
596
 
597
  total_generated_latent_frames += int(generated_latents.shape[2])
598
  history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
 
673
  randomize_seed,
674
  seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache,
675
  no_resize, mp4_crf, num_clean_frames, vae_batch):
676
+ if total_second_length_debug_value[0] is not None:
677
+ return min(total_second_length_debug_value[0] * 60 * 2, 600)
 
678
  return total_second_length * 60 * 2
679
 
680
  @spaces.GPU(duration=get_duration)
 
683
  randomize_seed,
684
  seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache,
685
  no_resize, mp4_crf, num_clean_frames, vae_batch):
686
+ global stream, high_vram
687
 
688
  if torch.cuda.device_count() == 0:
689
  gr.Warning('Set this space to GPU config to make it work.')
690
  return None, None, None, None, None, None
691
 
692
+ if input_video_debug_value[0] is not None or end_frame_debug_value[0] is not None or prompt_debug_value[0] is not None or total_second_length_debug_value[0] is not None:
693
+ input_video = input_video_debug_value[0]
694
+ end_frame = end_frame_debug_value[0]
695
+ prompt = prompt_debug_value[0]
696
+ total_second_length = total_second_length_debug_value[0]
697
+ allocation_time = min(total_second_length_debug_value[0] * 60 * 100, 600)
698
 
699
  if randomize_seed:
700
  seed = random.randint(0, np.iinfo(np.int32).max)
 
817
 
818
  with gr.Accordion("Debug", open=False):
819
  input_video_debug = gr.Video(sources='upload', label="Input Video Debug", height=320)
820
+ end_frame_debug = gr.Image(type="numpy", label="End Image Debug", height=320)
821
  prompt_debug = gr.Textbox(label="Prompt Debug", value='')
822
  total_second_length_debug = gr.Slider(label="Additional Video Length to Generate (Seconds) Debug", minimum=1, maximum=120, value=5, step=0.1)
823
 
 
890
  end_button.click(fn=end_process)
891
 
892
 
893
+ def handle_field_debug_change(input_video_debug_data, end_frame_debug_data, prompt_debug_data, total_second_length_debug_data):
894
+ input_video_debug_value[0] = input_video_debug_data
895
+ end_frame_debug_value[0] = end_frame_debug_data
896
+ prompt_debug_value[0] = prompt_debug_data
897
+ total_second_length_debug_value[0] = total_second_length_debug_data
898
  return []
899
 
900
  input_video_debug.upload(
901
  fn=handle_field_debug_change,
902
+ inputs=[input_video_debug, end_frame_debug, prompt_debug, total_second_length_debug],
903
+ outputs=[]
904
+ )
905
+
906
+ end_frame_debug.upload(
907
+ fn=handle_field_debug_change,
908
+ inputs=[input_video_debug, end_frame_debug, prompt_debug, total_second_length_debug],
909
  outputs=[]
910
  )
911
 
912
  prompt_debug.change(
913
  fn=handle_field_debug_change,
914
+ inputs=[input_video_debug, end_frame_debug, prompt_debug, total_second_length_debug],
915
  outputs=[]
916
  )
917
 
918
  total_second_length_debug.change(
919
  fn=handle_field_debug_change,
920
+ inputs=[input_video_debug, end_frame_debug, prompt_debug, total_second_length_debug],
921
  outputs=[]
922
  )
923
 
requirements.txt CHANGED
@@ -1,41 +1,24 @@
1
- pydantic==2.10.6
2
- fastapi==0.115.13
3
- gradio_imageslider==0.0.20
4
- gradio_client==1.10.3
5
- numpy==1.26.4
6
- requests==2.32.4
7
  sentencepiece==0.2.0
8
- tokenizers==0.19.1
9
- torchvision==0.22.0
10
- uvicorn==0.34.3
11
- wandb==0.20.1
12
- httpx==0.28.1
13
- transformers==4.43.0
14
- accelerate==1.8.0
15
- scikit-learn==1.7.0
16
- einops==0.8.1
17
- einops-exts==0.0.4
18
- timm==1.0.15
19
- openai-clip==1.0.1
20
- fsspec==2025.5.1
21
- kornia==0.8.1
22
- matplotlib==3.10.3
23
- ninja==1.11.1.4
24
- omegaconf==2.3.0
25
- opencv-python==4.11.0.86
26
- pandas==2.3.0
27
  pillow==11.2.1
28
- pytorch-lightning==2.5.1.post0
29
- PyYAML==6.0.2
30
- scipy==1.15.3
31
- tqdm==4.67.1
32
- triton==3.3.0
33
- urllib3==2.4.0
34
- webdataset==0.2.111
35
- xformers==0.0.30
36
- facexlib==0.3.0
37
- k-diffusion==0.1.1.post1
38
- diffusers==0.33.1
 
 
 
 
 
 
39
  pillow-heif==0.22.0
40
-
41
- open-clip-torch==2.24.0
 
1
+ accelerate==1.7.0
2
+ diffusers==0.33.1
3
+ transformers==4.52.4
 
 
 
4
  sentencepiece==0.2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  pillow==11.2.1
6
+ av==12.1.0
7
+ numpy==1.26.2
8
+ scipy==1.12.0
9
+ requests==2.32.4
10
+ torchsde==0.2.6
11
+ torch>=2.0.0
12
+ torchvision
13
+ torchaudio
14
+ einops
15
+ opencv-contrib-python
16
+ safetensors
17
+ huggingface_hub
18
+ decord
19
+ imageio_ffmpeg==0.6.0
20
+ sageattention==1.0.6
21
+ xformers==0.0.29.post3
22
+ bitsandbytes==0.46.0
23
  pillow-heif==0.22.0
24
+ spaces[security]