smartdigitalnetworks commited on
Commit
d771711
·
verified ·
1 Parent(s): 6fd9e50

Update hf_gradio_app.py

Browse files
Files changed (1) hide show
  1. hf_gradio_app.py +6 -18
hf_gradio_app.py CHANGED
@@ -85,8 +85,8 @@ def process_audio(file_path, temp_dir):
85
  # Load the audio file
86
  audio = AudioSegment.from_file(file_path)
87
 
88
- # Check and cut the audio if longer than 4 seconds
89
- max_duration = 9 * 1000 # 4 seconds in milliseconds
90
  if len(audio) > max_duration:
91
  audio = audio[:max_duration]
92
 
@@ -99,7 +99,7 @@ def process_audio(file_path, temp_dir):
99
  return output_path
100
 
101
 
102
- @spaces.GPU(duration=540)
103
  @torch.inference_mode()
104
  def generate(input_video, input_audio, seed, progress=gr.Progress(track_tqdm=True)):
105
  """
@@ -116,7 +116,7 @@ def generate(input_video, input_audio, seed, progress=gr.Progress(track_tqdm=Tru
116
  str: File path to the generated output video (MP4 format).
117
  """
118
 
119
- gr.Info("540 seconds will be allocated from your daily ZeroGPU time credits.")
120
 
121
  pipeline.reference_net.enable_xformers_memory_efficient_attention()
122
  pipeline.diffusion_net.enable_xformers_memory_efficient_attention()
@@ -225,25 +225,13 @@ def generate(input_video, input_audio, seed, progress=gr.Progress(track_tqdm=Tru
225
  with gr.Blocks(analytics_enabled=False) as demo:
226
  with gr.Column():
227
  gr.Markdown("# MEMO: Memory-Guided Diffusion for Expressive Talking Video Generation")
228
- gr.Markdown("Note: On fffiloni's shared UI, audio length is trimmed to max 4 seconds, so everyone can get a taste without to much waiting time in queue.")
229
- gr.Markdown("Duplicate the space to skip the queue and enjoy full length capacity.")
230
  gr.HTML("""
231
  <div style="display:flex;column-gap:4px;">
232
  <a href="https://github.com/memoavatar/memo">
233
  <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
234
  </a>
235
- <a href="https://memoavatar.github.io/">
236
- <img src='https://img.shields.io/badge/Project-Page-green'>
237
- </a>
238
- <a href="https://arxiv.org/abs/2412.04448">
239
- <img src='https://img.shields.io/badge/ArXiv-Paper-red'>
240
- </a>
241
- <a href="https://huggingface.co/spaces/fffiloni/MEMO?duplicate=true">
242
- <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-sm.svg" alt="Duplicate this Space">
243
- </a>
244
- <a href="https://huggingface.co/fffiloni">
245
- <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/follow-me-on-HF-sm-dark.svg" alt="Follow me on HF">
246
- </a>
247
  </div>
248
  """)
249
 
 
85
  # Load the audio file
86
  audio = AudioSegment.from_file(file_path)
87
 
88
+ # Check and cut the audio if longer than 9 seconds
89
+ max_duration = 9 * 1000 # 9 seconds in milliseconds
90
  if len(audio) > max_duration:
91
  audio = audio[:max_duration]
92
 
 
99
  return output_path
100
 
101
 
102
+ @spaces.GPU(duration=580)
103
  @torch.inference_mode()
104
  def generate(input_video, input_audio, seed, progress=gr.Progress(track_tqdm=True)):
105
  """
 
116
  str: File path to the generated output video (MP4 format).
117
  """
118
 
119
+ gr.Info("580 seconds will be allocated from your daily ZeroGPU time credits.")
120
 
121
  pipeline.reference_net.enable_xformers_memory_efficient_attention()
122
  pipeline.diffusion_net.enable_xformers_memory_efficient_attention()
 
225
  with gr.Blocks(analytics_enabled=False) as demo:
226
  with gr.Column():
227
  gr.Markdown("# MEMO: Memory-Guided Diffusion for Expressive Talking Video Generation")
228
+ gr.Markdown("Note: Audio length is trimmed to max 9 seconds, using 580 GPU credits.")
229
+ gr.Markdown("Consider adding word at beginning of clip that is later trimmable since lipsync does not always start immediately.")
230
  gr.HTML("""
231
  <div style="display:flex;column-gap:4px;">
232
  <a href="https://github.com/memoavatar/memo">
233
  <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
234
  </a>
 
 
 
 
 
 
 
 
 
 
 
 
235
  </div>
236
  """)
237