Spaces:

sindhuhegde
/

gestsync

Running on Zero

sindhuhegde commited on Aug 26, 2024

Commit

e7ce0d3

1 Parent(s): 90e5b39

Update app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -195,7 +195,7 @@ def inference_video(avi_dir, work_dir, padding=0):
 	dets = []
 	fidx = 0
-	print("Detecting people in the video using YOLO (slowest step in the pipeline)...")
 	def generate_detections():
 		global dets, fidx
 		while True:
@@ -1012,13 +1012,13 @@ def load_masked_input_frames(test_videos, spec, wav_file, scene_num, result_fold
 		print("Successfully loaded the video frames")
 		# Extract the keypoints from the frames
-		kp_dict, status = get_keypoints(frames)
-		if status != "success":
-			return None, None, status
-		print("Successfully extracted the keypoints")
 		# Mask the frames using the keypoints extracted from the frames and prepare the input to the model
-		masked_frames, num_frames, orig_masked_frames, status = load_rgb_masked_frames(frames, kp_dict, asd=True)
 		if status != "success":
 			return None, None, status
 		print("Successfully loaded the masked frames")
@@ -1087,7 +1087,7 @@ def get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True):
 	audio_emb = []
 	model = model.cuda()
 	for i in tqdm(range(0, len(video_sequences), batch_size)):
 		video_inp = video_sequences[i:i+batch_size, ]
 		vid_emb = model.forward_vid(video_inp.to(device), return_feats=False)

 	dets = []
 	fidx = 0
+	print("Detecting people in the video using YOLO...")
 	def generate_detections():
 		global dets, fidx
 		while True:
 		print("Successfully loaded the video frames")
 		# Extract the keypoints from the frames
+		# kp_dict, status = get_keypoints(frames)
+		# if status != "success":
+		# 	return None, None, status
+		# print("Successfully extracted the keypoints")
 		# Mask the frames using the keypoints extracted from the frames and prepare the input to the model
+		masked_frames, num_frames, orig_masked_frames, status = load_rgb_masked_frames(frames, kp_dict=None, asd=True)
 		if status != "success":
 			return None, None, status
 		print("Successfully loaded the masked frames")
 	audio_emb = []
 	model = model.cuda()
 	for i in tqdm(range(0, len(video_sequences), batch_size)):
 		video_inp = video_sequences[i:i+batch_size, ]
 		vid_emb = model.forward_vid(video_inp.to(device), return_feats=False)