Spaces:
Running on Zero
Running on Zero
Commit ·
af9489c
1
Parent(s): a8e8684
Update app
Browse files
app.py
CHANGED
|
@@ -882,6 +882,16 @@ def save_video(output_tracks, input_frames, wav_file, result_folder):
|
|
| 882 |
return video_output, "success"
|
| 883 |
|
| 884 |
@spaces.GPU(duration=150)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 885 |
def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
|
| 886 |
|
| 887 |
try:
|
|
@@ -974,26 +984,28 @@ def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
|
|
| 974 |
model = load_checkpoint(CHECKPOINT_PATH, model)
|
| 975 |
print("Successfully loaded the model")
|
| 976 |
|
|
|
|
|
|
|
| 977 |
# Process in batches
|
| 978 |
-
batch_size = 12
|
| 979 |
-
video_emb = []
|
| 980 |
-
audio_emb = []
|
| 981 |
|
| 982 |
-
for i in tqdm(range(0, len(video_sequences), batch_size)):
|
| 983 |
-
|
| 984 |
-
|
| 985 |
|
| 986 |
-
|
| 987 |
-
|
| 988 |
-
|
| 989 |
|
| 990 |
-
|
| 991 |
-
|
| 992 |
|
| 993 |
-
|
| 994 |
|
| 995 |
-
audio_emb = torch.cat(audio_emb, dim=0)
|
| 996 |
-
video_emb = torch.cat(video_emb, dim=0)
|
| 997 |
|
| 998 |
# L2 normalize embeddings
|
| 999 |
video_emb = torch.nn.functional.normalize(video_emb, p=2, dim=1)
|
|
@@ -1027,7 +1039,6 @@ def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
|
|
| 1027 |
return None, f"Error: {str(e)}"
|
| 1028 |
|
| 1029 |
|
| 1030 |
-
@spaces.GPU(duration=150)
|
| 1031 |
def process_video_activespeaker(video_path, global_speaker, num_avg_frames):
|
| 1032 |
try:
|
| 1033 |
# Extract the video filename
|
|
@@ -1079,12 +1090,15 @@ def process_video_activespeaker(video_path, global_speaker, num_avg_frames):
|
|
| 1079 |
return None, status
|
| 1080 |
|
| 1081 |
# Pre-process and extract per-speaker tracks in each scene
|
| 1082 |
-
print("Pre-processing the input video...")
|
| 1083 |
-
status = subprocess.call("python preprocess/inference_preprocess.py --data_dir={}/temp --sd_root={}/crops --work_root={}/metadata --data_root={}".format(result_folder_input, result_folder_input, result_folder_input, video_path), shell=True)
|
| 1084 |
-
if status != 0:
|
| 1085 |
-
|
| 1086 |
-
|
| 1087 |
-
|
|
|
|
|
|
|
|
|
|
| 1088 |
# Load the tracks file saved during pre-processing
|
| 1089 |
with open('{}/metadata/tracks.pckl'.format(result_folder_input), 'rb') as file:
|
| 1090 |
tracks = pickle.load(file)
|
|
|
|
| 882 |
return video_output, "success"
|
| 883 |
|
| 884 |
@spaces.GPU(duration=150)
|
| 885 |
+
def preprocess_asd(video_path, result_folder_input):
|
| 886 |
+
|
| 887 |
+
print("Pre-processing the input video...")
|
| 888 |
+
status = subprocess.call("python preprocess/inference_preprocess.py --data_dir={}/temp --sd_root={}/crops --work_root={}/metadata --data_root={}".format(result_folder_input, result_folder_input, result_folder_input, video_path), shell=True)
|
| 889 |
+
if status != 0:
|
| 890 |
+
msg = "Error in pre-processing the input video, please check the input video and try again..."
|
| 891 |
+
return msg
|
| 892 |
+
|
| 893 |
+
return "success"
|
| 894 |
+
|
| 895 |
def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
|
| 896 |
|
| 897 |
try:
|
|
|
|
| 984 |
model = load_checkpoint(CHECKPOINT_PATH, model)
|
| 985 |
print("Successfully loaded the model")
|
| 986 |
|
| 987 |
+
video_emb, audio_emb = get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True)
|
| 988 |
+
|
| 989 |
# Process in batches
|
| 990 |
+
# batch_size = 12
|
| 991 |
+
# video_emb = []
|
| 992 |
+
# audio_emb = []
|
| 993 |
|
| 994 |
+
# for i in tqdm(range(0, len(video_sequences), batch_size)):
|
| 995 |
+
# video_inp = video_sequences[i:i+batch_size, ]
|
| 996 |
+
# audio_inp = audio_sequences[i:i+batch_size, ]
|
| 997 |
|
| 998 |
+
# vid_emb = model.forward_vid(video_inp.to(device))
|
| 999 |
+
# vid_emb = torch.mean(vid_emb, axis=-1).unsqueeze(-1)
|
| 1000 |
+
# aud_emb = model.forward_aud(audio_inp.to(device))
|
| 1001 |
|
| 1002 |
+
# video_emb.append(vid_emb.detach())
|
| 1003 |
+
# audio_emb.append(aud_emb.detach())
|
| 1004 |
|
| 1005 |
+
# torch.cuda.empty_cache()
|
| 1006 |
|
| 1007 |
+
# audio_emb = torch.cat(audio_emb, dim=0)
|
| 1008 |
+
# video_emb = torch.cat(video_emb, dim=0)
|
| 1009 |
|
| 1010 |
# L2 normalize embeddings
|
| 1011 |
video_emb = torch.nn.functional.normalize(video_emb, p=2, dim=1)
|
|
|
|
| 1039 |
return None, f"Error: {str(e)}"
|
| 1040 |
|
| 1041 |
|
|
|
|
| 1042 |
def process_video_activespeaker(video_path, global_speaker, num_avg_frames):
|
| 1043 |
try:
|
| 1044 |
# Extract the video filename
|
|
|
|
| 1090 |
return None, status
|
| 1091 |
|
| 1092 |
# Pre-process and extract per-speaker tracks in each scene
|
| 1093 |
+
# print("Pre-processing the input video...")
|
| 1094 |
+
# status = subprocess.call("python preprocess/inference_preprocess.py --data_dir={}/temp --sd_root={}/crops --work_root={}/metadata --data_root={}".format(result_folder_input, result_folder_input, result_folder_input, video_path), shell=True)
|
| 1095 |
+
# if status != 0:
|
| 1096 |
+
# msg = "Error in pre-processing the input video, please check the input video and try again..."
|
| 1097 |
+
# return None, msg
|
| 1098 |
+
status = preprocess_asd(video_path, result_folder_input)
|
| 1099 |
+
if status != "success":
|
| 1100 |
+
return None, status
|
| 1101 |
+
|
| 1102 |
# Load the tracks file saved during pre-processing
|
| 1103 |
with open('{}/metadata/tracks.pckl'.format(result_folder_input), 'rb') as file:
|
| 1104 |
tracks = pickle.load(file)
|