darshankr commited on
Commit
2ff20eb
·
verified ·
1 Parent(s): 92b1353

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -18
app.py CHANGED
@@ -8,59 +8,59 @@ def process_video(audio_file, video_file):
8
  audio_path = audio_file.name
9
  video_path = video_file.name
10
  out_path = "output_video.mp4"
11
-
12
  # Save uploaded files
13
  audio_file.save(audio_path)
14
  video_file.save(video_path)
15
-
16
  # Define command flags
17
  sample_mode = "cross" # or "reconstruction"
18
  generate_from_filelist = 0
19
  model_path = "checkpoints/checkpoint.pt"
20
  pads = "0,0,0,0"
21
-
22
  if sample_mode == "reconstruction":
23
  sample_input_flags = "--sampling_input_type=first_frame --sampling_ref_type=first_frame"
24
  elif sample_mode == "cross":
25
  sample_input_flags = "--sampling_input_type=gt --sampling_ref_type=gt"
26
  else:
27
  return "Error: sample_mode can only be \"cross\" or \"reconstruction\""
28
-
29
  MODEL_FLAGS = "--attention_resolutions 32,16,8 --class_cond False --learn_sigma True --num_channels 128 --num_head_channels 64 --num_res_blocks 2 --resblock_updown True --use_fp16 True --use_scale_shift_norm False"
30
  DIFFUSION_FLAGS = "--predict_xstart False --diffusion_steps 1000 --noise_schedule linear --rescale_timesteps False"
31
  SAMPLE_FLAGS = f"--sampling_seed=7 {sample_input_flags} --timestep_respacing ddim25 --use_ddim True --model_path={model_path}"
32
  DATA_FLAGS = "--nframes 5 --nrefer 1 --image_size 128 --sampling_batch_size=32"
33
  TFG_FLAGS = "--face_hide_percentage 0.5 --use_ref=True --use_audio=True --audio_as_style=True"
34
  GEN_FLAGS = f"--generate_from_filelist {generate_from_filelist} --video_path={video_path} --audio_path={audio_path} --out_path={out_path} --save_orig=False --face_det_batch_size 16 --pads {pads} --is_voxceleb2=False"
35
-
36
  # Combine all flags into one command
37
  command = f"python your_model_script.py {MODEL_FLAGS} {DIFFUSION_FLAGS} {SAMPLE_FLAGS} {DATA_FLAGS} {TFG_FLAGS} {GEN_FLAGS}"
38
-
39
- # Execute the command
40
  try:
41
  subprocess.run(command, shell=True, check=True)
42
  return out_path
43
  except subprocess.CalledProcessError as e:
44
  return f"Error processing video: {e}"
45
-
46
- # Clean up the files after processing
47
- os.remove(audio_path)
48
- os.remove(video_path)
49
-
50
- # Delete output video after sending to the user
51
- os.remove(out_path)
 
52
 
53
  # Create a Gradio interface
54
  iface = gr.Interface(
55
  fn=process_video,
56
  inputs=[
57
- gr.inputs.Audio(label="Input Audio", type="file"),
58
- gr.inputs.Video(label="Input Video", type="file")
59
  ],
60
- outputs=gr.outputs.Video(label="Processed Video"),
61
  title="Audio-Video Processing",
62
  description="Upload an audio file and a video file to process the video based on the audio input."
63
  )
64
 
65
  # Launch the interface
66
- iface.launch()
 
8
  audio_path = audio_file.name
9
  video_path = video_file.name
10
  out_path = "output_video.mp4"
11
+
12
  # Save uploaded files
13
  audio_file.save(audio_path)
14
  video_file.save(video_path)
15
+
16
  # Define command flags
17
  sample_mode = "cross" # or "reconstruction"
18
  generate_from_filelist = 0
19
  model_path = "checkpoints/checkpoint.pt"
20
  pads = "0,0,0,0"
21
+
22
  if sample_mode == "reconstruction":
23
  sample_input_flags = "--sampling_input_type=first_frame --sampling_ref_type=first_frame"
24
  elif sample_mode == "cross":
25
  sample_input_flags = "--sampling_input_type=gt --sampling_ref_type=gt"
26
  else:
27
  return "Error: sample_mode can only be \"cross\" or \"reconstruction\""
28
+
29
  MODEL_FLAGS = "--attention_resolutions 32,16,8 --class_cond False --learn_sigma True --num_channels 128 --num_head_channels 64 --num_res_blocks 2 --resblock_updown True --use_fp16 True --use_scale_shift_norm False"
30
  DIFFUSION_FLAGS = "--predict_xstart False --diffusion_steps 1000 --noise_schedule linear --rescale_timesteps False"
31
  SAMPLE_FLAGS = f"--sampling_seed=7 {sample_input_flags} --timestep_respacing ddim25 --use_ddim True --model_path={model_path}"
32
  DATA_FLAGS = "--nframes 5 --nrefer 1 --image_size 128 --sampling_batch_size=32"
33
  TFG_FLAGS = "--face_hide_percentage 0.5 --use_ref=True --use_audio=True --audio_as_style=True"
34
  GEN_FLAGS = f"--generate_from_filelist {generate_from_filelist} --video_path={video_path} --audio_path={audio_path} --out_path={out_path} --save_orig=False --face_det_batch_size 16 --pads {pads} --is_voxceleb2=False"
35
+
36
  # Combine all flags into one command
37
  command = f"python your_model_script.py {MODEL_FLAGS} {DIFFUSION_FLAGS} {SAMPLE_FLAGS} {DATA_FLAGS} {TFG_FLAGS} {GEN_FLAGS}"
38
+
 
39
  try:
40
  subprocess.run(command, shell=True, check=True)
41
  return out_path
42
  except subprocess.CalledProcessError as e:
43
  return f"Error processing video: {e}"
44
+ finally:
45
+ # Clean up the files after processing
46
+ if os.path.exists(audio_path):
47
+ os.remove(audio_path)
48
+ if os.path.exists(video_path):
49
+ os.remove(video_path)
50
+ if os.path.exists(out_path):
51
+ os.remove(out_path)
52
 
53
  # Create a Gradio interface
54
  iface = gr.Interface(
55
  fn=process_video,
56
  inputs=[
57
+ gr.Audio(label="Input Audio"), # Updated syntax
58
+ gr.Video(label="Input Video") # Updated syntax
59
  ],
60
+ outputs=gr.Video(label="Processed Video"), # Updated syntax
61
  title="Audio-Video Processing",
62
  description="Upload an audio file and a video file to process the video based on the audio input."
63
  )
64
 
65
  # Launch the interface
66
+ iface.launch()