Spaces:

fffiloni
/

MEMO

Running on Zero

fffiloni commited on Jun 4

Commit

82a46d8

verified ·

1 Parent(s): 4d28d73

MCP server ready

Files changed (1) hide show

hf_gradio_app.py CHANGED Viewed

@@ -102,6 +102,23 @@ def process_audio(file_path, temp_dir):
 @spaces.GPU(duration=240)
 @torch.inference_mode()
 def generate(input_video, input_audio, seed, progress=gr.Progress(track_tqdm=True)):
     pipeline.reference_net.enable_xformers_memory_efficient_attention()
     pipeline.diffusion_net.enable_xformers_memory_efficient_attention()
@@ -246,4 +263,4 @@ with gr.Blocks(analytics_enabled=False) as demo:
         outputs=[video_output],
     )
-demo.queue().launch(share=False, show_api=False, show_error=True)

 @spaces.GPU(duration=240)
 @torch.inference_mode()
 def generate(input_video, input_audio, seed, progress=gr.Progress(track_tqdm=True)):
+    """
+    Generates a talking-head video synchronized with the input audio using the MEMO pipeline.
+    This function combines an input face image and an audio clip to create a temporally coherent
+    and emotionally expressive talking video. It leverages a memory-guided diffusion model
+    conditioned on audio features, emotional cues, and visual context.
+    Args:
+        input_video (str): Path to the input image file (used as the reference face).
+        input_audio (str): Path to the input audio file (speech or dialogue).
+        seed (int): Random seed for deterministic results. Use 0 for a randomly generated seed.
+        progress (gr.Progress, optional): Gradio progress tracker (automatically passed by Gradio).
+    Returns:
+        str: File path to the generated output video (MP4 format).
+    """
     pipeline.reference_net.enable_xformers_memory_efficient_attention()
     pipeline.diffusion_net.enable_xformers_memory_efficient_attention()
         outputs=[video_output],
     )
+demo.queue().launch(share=False, show_api=True, show_error=True, mcp_server=True)