Spaces:

darshankr
/

diff2lip

Runtime error

App Files Files Community

darshankr commited on Oct 23, 2024

Commit

2142346

verified ·

1 Parent(s): 167fbbe

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -54

app.py CHANGED Viewed

@@ -2,64 +2,93 @@ import gradio as gr
 import os
 import subprocess
-# Replace with your model loading and processing function
-def process_audio_video(audio_file, video_file):
-    audio_path = "input_audio.wav"
-    video_path = "input_video.mp4"
-    out_path = "output_video.mp4"
-    # Save uploaded files
-    audio_file.save(audio_path)
-    video_file.save(video_path)
-    # Define command flags
-    sample_mode = "cross"  # or "reconstruction"
-    generate_from_filelist = 0
-    model_path = "checkpoints/checkpoint.pt"
-    pads = "0,0,0,0"
     if sample_mode == "reconstruction":
-        sample_input_flags = "--sampling_input_type=first_frame --sampling_ref_type=first_frame"
     elif sample_mode == "cross":
-        sample_input_flags = "--sampling_input_type=gt --sampling_ref_type=gt"
     else:
-        return "Error: sample_mode can only be \"cross\" or \"reconstruction\""
-    MODEL_FLAGS = "--attention_resolutions 32,16,8 --class_cond False --learn_sigma True --num_channels 128 --num_head_channels 64 --num_res_blocks 2 --resblock_updown True --use_fp16 True --use_scale_shift_norm False"
-    DIFFUSION_FLAGS = "--predict_xstart False --diffusion_steps 1000 --noise_schedule linear --rescale_timesteps False"
-    SAMPLE_FLAGS = f"--sampling_seed=7 {sample_input_flags} --timestep_respacing ddim25 --use_ddim True --model_path={model_path}"
     DATA_FLAGS = "--nframes 5 --nrefer 1 --image_size 128 --sampling_batch_size=32"
-    TFG_FLAGS = "--face_hide_percentage 0.5 --use_ref=True --use_audio=True --audio_as_style=True"
-    GEN_FLAGS = f"--generate_from_filelist {generate_from_filelist} --video_path={video_path} --audio_path={audio_path} --out_path={out_path} --save_orig=False --face_det_batch_size 16 --pads {pads} --is_voxceleb2=False"
-    # Combine all flags into one command
-    command = f"python your_model_script.py {MODEL_FLAGS} {DIFFUSION_FLAGS} {SAMPLE_FLAGS} {DATA_FLAGS} {TFG_FLAGS} {GEN_FLAGS}"
-    try:
-        subprocess.run(command, shell=True, check=True)
-        return out_path
-    except subprocess.CalledProcessError as e:
-        return f"Error processing video: {e}"
-    finally:
-        # Clean up the files after processing
-        if os.path.exists(audio_path):
-            os.remove(audio_path)
-        if os.path.exists(video_path):
-            os.remove(video_path)
-        if os.path.exists(out_path):
-            os.remove(out_path)
-# Define the Gradio interface
-interface = gr.Interface(
-    fn=process_audio_video,
-    inputs=[
-        gr.Audio(label="Audio File"),
-        gr.Video(label="Video File"),
-    ],
-    outputs="video",
-    description="Process Audio and Video with your Model",
-    allow_flagging=False  # Disable flagging as output is a video
-)
-# Launch the Gradio app
-interface.launch(share=True)

 import os
 import subprocess
+# Define the paths where the input and output files will be stored
+INPUT_AUDIO_PATH = "input_audio.wav"
+INPUT_VIDEO_PATH = "input_video.mp4"
+OUTPUT_VIDEO_PATH = "output_video.mp4"
+MODEL_PATH = "checkpoints/checkpoint.pt"
+# Sample mode configuration
+SAMPLE_MODE = "cross"  # Options: "cross" or "reconstruction"
+PADS = "0,0,0,0"
+GENERATE_FROM_FILELIST = 0
+# Generate the appropriate flags based on the sample mode
+def get_sample_flags(sample_mode):
     if sample_mode == "reconstruction":
+        return "--sampling_input_type=first_frame --sampling_ref_type=first_frame"
     elif sample_mode == "cross":
+        return "--sampling_input_type=gt --sampling_ref_type=gt"
     else:
+        return None
+# Function to run the model inference command
+def generate_video(audio_file, video_file):
+    # Save uploaded files to disk
+    audio_file.save(INPUT_AUDIO_PATH)
+    video_file.save(INPUT_VIDEO_PATH)
+    sample_input_flags = get_sample_flags(SAMPLE_MODE)
+    if not sample_input_flags:
+        return "Error: sample_mode can only be 'cross' or 'reconstruction'"
+    # Build the command string
+    MODEL_FLAGS = (
+        "--attention_resolutions 32,16,8 --class_cond False --learn_sigma True "
+        "--num_channels 128 --num_head_channels 64 --num_res_blocks 2 "
+        "--resblock_updown True --use_fp16 True --use_scale_shift_norm False"
+    )
+    DIFFUSION_FLAGS = (
+        "--predict_xstart False --diffusion_steps 1000 --noise_schedule linear "
+        "--rescale_timesteps False"
+    )
+    SAMPLE_FLAGS = (
+        f"--sampling_seed=7 {sample_input_flags} --timestep_respacing ddim25 "
+        f"--use_ddim True --model_path={MODEL_PATH}"
+    )
     DATA_FLAGS = "--nframes 5 --nrefer 1 --image_size 128 --sampling_batch_size=32"
+    TFG_FLAGS = (
+        "--face_hide_percentage 0.5 --use_ref=True --use_audio=True "
+        "--audio_as_style=True"
+    )
+    GEN_FLAGS = (
+        f"--generate_from_filelist {GENERATE_FROM_FILELIST} "
+        f"--video_path={INPUT_VIDEO_PATH} --audio_path={INPUT_AUDIO_PATH} "
+        f"--out_path={OUTPUT_VIDEO_PATH} --save_orig=False "
+        f"--face_det_batch_size 16 --pads {PADS} --is_voxceleb2=False"
+    )
+    command = (
+        f"python your_model_script.py {MODEL_FLAGS} {DIFFUSION_FLAGS} "
+        f"{SAMPLE_FLAGS} {DATA_FLAGS} {TFG_FLAGS} {GEN_FLAGS}"
+    )
+    # Run the command and wait for it to complete
+    process = subprocess.run(command, shell=True, capture_output=True, text=True)
+    if process.returncode != 0:
+        return f"Error: {process.stderr}"
+    # Return the generated video file
+    return OUTPUT_VIDEO_PATH
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("## Audio-Video Synthesis Model")
+    with gr.Row():
+        audio_input = gr.Audio(label="Upload Audio", type="file")
+        video_input = gr.Video(label="Upload Video", type="file")
+    output_video = gr.Video(label="Generated Video")
+    generate_button = gr.Button("Generate")
+    generate_button.click(
+        fn=generate_video,
+        inputs=[audio_input, video_input],
+        outputs=output_video
+    )
+if __name__ == "__main__":
+    demo.launch()