File size: 3,447 Bytes
872e650
167fbbe
ba39569
 
 
872e650
ba39569
 
 
 
 
2142346
ba39569
 
 
 
 
 
 
 
2142346
ba39569
167fbbe
ba39569
167fbbe
ba39569
167fbbe
2142346
 
ba39569
 
 
 
167fbbe
ba39569
 
2142346
ba39569
 
 
2142346
ba39569
 
 
 
2142346
ba39569
 
2142346
ba39569
 
 
 
 
 
 
 
2142346
ba39569
167fbbe
ba39569
 
2142346
 
ba39569
 
 
 
 
 
 
 
872e650
ba39569
 
 
2142346
ba39569
872e650
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr
import subprocess
import os
import cv2
import numpy as np

# Paths and Model Config
sample_mode = "cross"  # "reconstruction" or "cross"
model_path = "checkpoints/checkpoint.pt"
pads = "0,0,0,0"
generate_from_filelist = 0  # 0 means real-time generation

def process_video(audio_path, video_path):
    # Step 1: Check if input files exist
    audio_exists = os.path.exists(audio_path)
    video_exists = os.path.exists(video_path)
    print(f"Audio exists: {audio_exists}, Video exists: {video_exists}")
    
    if not (audio_exists and video_exists):
        return "Error: One or both input files do not exist."

    # Set flags based on sample mode
    if sample_mode == "reconstruction":
        sample_input_flags = "--sampling_input_type=first_frame --sampling_ref_type=first_frame"
    elif sample_mode == "cross":
        sample_input_flags = "--sampling_input_type=gt --sampling_ref_type=gt"
    else:
        return "Error: sample_mode can only be 'cross' or 'reconstruction'"

    # Model flags and configurations
    MODEL_FLAGS = "--attention_resolutions 32,16,8 --class_cond False --learn_sigma True --num_channels 128 --num_head_channels 64 --num_res_blocks 2 --resblock_updown True --use_fp16 True --use_scale_shift_norm False"
    DIFFUSION_FLAGS = "--predict_xstart False --diffusion_steps 1000 --noise_schedule linear --rescale_timesteps False"
    SAMPLE_FLAGS = f"--sampling_seed=7 {sample_input_flags} --timestep_respacing ddim25 --use_ddim True --model_path={model_path}"
    DATA_FLAGS = "--nframes 5 --nrefer 1 --image_size 128 --sampling_batch_size=32"
    TFG_FLAGS = "--face_hide_percentage 0.5 --use_ref=True --use_audio=True --audio_as_style=True"
    GEN_FLAGS = f"--generate_from_filelist {generate_from_filelist} --video_path={video_path} --audio_path={audio_path} --out_path=output.mp4 --save_orig=False --face_det_batch_size 16 --pads {pads} --is_voxceleb2=False"

    # Step 2: Combine all flags into one command
    command = f"python your_model_script.py {MODEL_FLAGS} {DIFFUSION_FLAGS} {SAMPLE_FLAGS} {DATA_FLAGS} {TFG_FLAGS} {GEN_FLAGS}"
    print(f"Running command: {command}")

    # Step 3: Execute the command and capture output
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    print("STDOUT:", result.stdout)
    print("STDERR:", result.stderr)

    if result.returncode != 0:
        return f"Error during video generation: {result.stderr}"

    # Step 4: Verify that the output video is generated correctly
    if not os.path.exists("output.mp4"):
        return "Error: Output video not generated."
    
    print("Video generation successful!")
    return "output.mp4"

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("### Upload an Audio and Video file to generate an output video.")
    
    audio_input = gr.Audio(label="Upload Audio", type="filepath")
    video_input = gr.Video(label="Upload Video")
    output_video = gr.Video(label="Generated Video")

    create_test_video()  # Run the test video function once to ensure setup is correct

    def inference(audio, video):
        result = process_video(audio, video)
        if result.endswith(".mp4"):
            return result  # Return path to the generated video
        else:
            return f"Error: {result}"  # Display any errors

    gr.Interface(
        fn=inference,
        inputs=[audio_input, video_input],
        outputs=output_video
    ).launch()