File size: 3,447 Bytes
872e650 167fbbe ba39569 872e650 ba39569 2142346 ba39569 2142346 ba39569 167fbbe ba39569 167fbbe ba39569 167fbbe 2142346 ba39569 167fbbe ba39569 2142346 ba39569 2142346 ba39569 2142346 ba39569 2142346 ba39569 2142346 ba39569 167fbbe ba39569 2142346 ba39569 872e650 ba39569 2142346 ba39569 872e650 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import gradio as gr
import subprocess
import os
import cv2
import numpy as np
# Paths and Model Config
sample_mode = "cross" # "reconstruction" or "cross"
model_path = "checkpoints/checkpoint.pt"
pads = "0,0,0,0"
generate_from_filelist = 0 # 0 means real-time generation
def process_video(audio_path, video_path):
# Step 1: Check if input files exist
audio_exists = os.path.exists(audio_path)
video_exists = os.path.exists(video_path)
print(f"Audio exists: {audio_exists}, Video exists: {video_exists}")
if not (audio_exists and video_exists):
return "Error: One or both input files do not exist."
# Set flags based on sample mode
if sample_mode == "reconstruction":
sample_input_flags = "--sampling_input_type=first_frame --sampling_ref_type=first_frame"
elif sample_mode == "cross":
sample_input_flags = "--sampling_input_type=gt --sampling_ref_type=gt"
else:
return "Error: sample_mode can only be 'cross' or 'reconstruction'"
# Model flags and configurations
MODEL_FLAGS = "--attention_resolutions 32,16,8 --class_cond False --learn_sigma True --num_channels 128 --num_head_channels 64 --num_res_blocks 2 --resblock_updown True --use_fp16 True --use_scale_shift_norm False"
DIFFUSION_FLAGS = "--predict_xstart False --diffusion_steps 1000 --noise_schedule linear --rescale_timesteps False"
SAMPLE_FLAGS = f"--sampling_seed=7 {sample_input_flags} --timestep_respacing ddim25 --use_ddim True --model_path={model_path}"
DATA_FLAGS = "--nframes 5 --nrefer 1 --image_size 128 --sampling_batch_size=32"
TFG_FLAGS = "--face_hide_percentage 0.5 --use_ref=True --use_audio=True --audio_as_style=True"
GEN_FLAGS = f"--generate_from_filelist {generate_from_filelist} --video_path={video_path} --audio_path={audio_path} --out_path=output.mp4 --save_orig=False --face_det_batch_size 16 --pads {pads} --is_voxceleb2=False"
# Step 2: Combine all flags into one command
command = f"python your_model_script.py {MODEL_FLAGS} {DIFFUSION_FLAGS} {SAMPLE_FLAGS} {DATA_FLAGS} {TFG_FLAGS} {GEN_FLAGS}"
print(f"Running command: {command}")
# Step 3: Execute the command and capture output
result = subprocess.run(command, shell=True, capture_output=True, text=True)
print("STDOUT:", result.stdout)
print("STDERR:", result.stderr)
if result.returncode != 0:
return f"Error during video generation: {result.stderr}"
# Step 4: Verify that the output video is generated correctly
if not os.path.exists("output.mp4"):
return "Error: Output video not generated."
print("Video generation successful!")
return "output.mp4"
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("### Upload an Audio and Video file to generate an output video.")
audio_input = gr.Audio(label="Upload Audio", type="filepath")
video_input = gr.Video(label="Upload Video")
output_video = gr.Video(label="Generated Video")
create_test_video() # Run the test video function once to ensure setup is correct
def inference(audio, video):
result = process_video(audio, video)
if result.endswith(".mp4"):
return result # Return path to the generated video
else:
return f"Error: {result}" # Display any errors
gr.Interface(
fn=inference,
inputs=[audio_input, video_input],
outputs=output_video
).launch()
|