Spaces:

darshankr
/

diff2lip

Runtime error

App Files Files Community

darshankr commited on Oct 23, 2024

Commit

ba39569

verified ·

1 Parent(s): 8a9b78c

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -65

app.py CHANGED Viewed

@@ -1,88 +1,89 @@
 import gradio as gr
-import os
 import subprocess
-# Define the paths where the output video will be stored
-OUTPUT_VIDEO_PATH = "output_video.mp4"
-MODEL_PATH = "checkpoints/checkpoint.pt"
-# Sample mode configuration
-SAMPLE_MODE = "cross"  # Options: "cross" or "reconstruction"
-PADS = "0,0,0,0"
-GENERATE_FROM_FILELIST = 0
-# Generate the appropriate flags based on the sample mode
-def get_sample_flags(sample_mode):
     if sample_mode == "reconstruction":
-        return "--sampling_input_type=first_frame --sampling_ref_type=first_frame"
     elif sample_mode == "cross":
-        return "--sampling_input_type=gt --sampling_ref_type=gt"
     else:
-        return None
-# Function to run the model inference command
-def generate_video(audio_path, video_path):
-    sample_input_flags = get_sample_flags(SAMPLE_MODE)
-    if not sample_input_flags:
         return "Error: sample_mode can only be 'cross' or 'reconstruction'"
-    # Build the command string
-    MODEL_FLAGS = (
-        "--attention_resolutions 32,16,8 --class_cond False --learn_sigma True "
-        "--num_channels 128 --num_head_channels 64 --num_res_blocks 2 "
-        "--resblock_updown True --use_fp16 True --use_scale_shift_norm False"
-    )
-    DIFFUSION_FLAGS = (
-        "--predict_xstart False --diffusion_steps 1000 --noise_schedule linear "
-        "--rescale_timesteps False"
-    )
-    SAMPLE_FLAGS = (
-        f"--sampling_seed=7 {sample_input_flags} --timestep_respacing ddim25 "
-        f"--use_ddim True --model_path={MODEL_PATH}"
-    )
     DATA_FLAGS = "--nframes 5 --nrefer 1 --image_size 128 --sampling_batch_size=32"
-    TFG_FLAGS = (
-        "--face_hide_percentage 0.5 --use_ref=True --use_audio=True "
-        "--audio_as_style=True"
-    )
-    GEN_FLAGS = (
-        f"--generate_from_filelist {GENERATE_FROM_FILELIST} "
-        f"--video_path={video_path} --audio_path={audio_path} "
-        f"--out_path={OUTPUT_VIDEO_PATH} --save_orig=False "
-        f"--face_det_batch_size 16 --pads {PADS} --is_voxceleb2=False"
-    )
-    command = (
-        f"python generate.py {MODEL_FLAGS} {DIFFUSION_FLAGS} "
-        f"{SAMPLE_FLAGS} {DATA_FLAGS} {TFG_FLAGS} {GEN_FLAGS}"
-    )
-    # Run the command and wait for it to complete
-    process = subprocess.run(command, shell=True, capture_output=True, text=True)
-    if process.returncode != 0:
-        return f"Error: {process.stderr}"
-    # Return the generated video file
-    return OUTPUT_VIDEO_PATH
-# Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("## Audio-Video Synthesis Model")
-    with gr.Row():
-        audio_input = gr.Audio(label="Upload Audio", type="filepath")
-        video_input = gr.Video(label="Upload Video")  # No 'type' argument here
     output_video = gr.Video(label="Generated Video")
-    generate_button = gr.Button("Generate")
-    generate_button.click(
-        fn=generate_video,
-        inputs=[audio_input, video_input],
         outputs=output_video
-    )
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import subprocess
+import os
+import cv2
+import numpy as np
+# Paths and Model Config
+sample_mode = "cross"  # "reconstruction" or "cross"
+model_path = "checkpoints/checkpoint.pt"
+pads = "0,0,0,0"
+generate_from_filelist = 0  # 0 means real-time generation
+def process_video(audio_path, video_path):
+    # Step 1: Check if input files exist
+    audio_exists = os.path.exists(audio_path)
+    video_exists = os.path.exists(video_path)
+    print(f"Audio exists: {audio_exists}, Video exists: {video_exists}")
+    if not (audio_exists and video_exists):
+        return "Error: One or both input files do not exist."
+    # Set flags based on sample mode
     if sample_mode == "reconstruction":
+        sample_input_flags = "--sampling_input_type=first_frame --sampling_ref_type=first_frame"
     elif sample_mode == "cross":
+        sample_input_flags = "--sampling_input_type=gt --sampling_ref_type=gt"
     else:
         return "Error: sample_mode can only be 'cross' or 'reconstruction'"
+    # Model flags and configurations
+    MODEL_FLAGS = "--attention_resolutions 32,16,8 --class_cond False --learn_sigma True --num_channels 128 --num_head_channels 64 --num_res_blocks 2 --resblock_updown True --use_fp16 True --use_scale_shift_norm False"
+    DIFFUSION_FLAGS = "--predict_xstart False --diffusion_steps 1000 --noise_schedule linear --rescale_timesteps False"
+    SAMPLE_FLAGS = f"--sampling_seed=7 {sample_input_flags} --timestep_respacing ddim25 --use_ddim True --model_path={model_path}"
     DATA_FLAGS = "--nframes 5 --nrefer 1 --image_size 128 --sampling_batch_size=32"
+    TFG_FLAGS = "--face_hide_percentage 0.5 --use_ref=True --use_audio=True --audio_as_style=True"
+    GEN_FLAGS = f"--generate_from_filelist {generate_from_filelist} --video_path={video_path} --audio_path={audio_path} --out_path=output.mp4 --save_orig=False --face_det_batch_size 16 --pads {pads} --is_voxceleb2=False"
+    # Step 2: Combine all flags into one command
+    command = f"python your_model_script.py {MODEL_FLAGS} {DIFFUSION_FLAGS} {SAMPLE_FLAGS} {DATA_FLAGS} {TFG_FLAGS} {GEN_FLAGS}"
+    print(f"Running command: {command}")
+    # Step 3: Execute the command and capture output
+    result = subprocess.run(command, shell=True, capture_output=True, text=True)
+    print("STDOUT:", result.stdout)
+    print("STDERR:", result.stderr)
+    if result.returncode != 0:
+        return f"Error during video generation: {result.stderr}"
+    # Step 4: Verify that the output video is generated correctly
+    if not os.path.exists("output.mp4"):
+        return "Error: Output video not generated."
+    print("Video generation successful!")
+    return "output.mp4"
+# Step 5: Create a test function for video writing
+def create_test_video():
+    print("Creating test video...")
+    out = cv2.VideoWriter('test_output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, (128, 128))
+    frame = 255 * np.ones((128, 128, 3), dtype=np.uint8)
+    for _ in range(60):  # 2 seconds of video
+        out.write(frame)
+    out.release()
+    print("Test video created.")
+# Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("### Upload an Audio and Video file to generate an output video.")
+    audio_input = gr.Audio(label="Upload Audio", type="filepath")
+    video_input = gr.Video(label="Upload Video")
     output_video = gr.Video(label="Generated Video")
+    create_test_video()  # Run the test video function once to ensure setup is correct
+    def inference(audio, video):
+        result = process_video(audio, video)
+        if result.endswith(".mp4"):
+            return result  # Return path to the generated video
+        else:
+            return f"Error: {result}"  # Display any errors
+    gr.Interface(
+        fn=inference,
+        inputs=[audio_input, video_input],
         outputs=output_video
+    ).launch()