Spaces:

sindhuhegde
/

gestsync

Running on Zero

App Files Files Community

sindhuhegde commited on Aug 27, 2024

Commit

afa2bc0

1 Parent(s): 342ecda

Update app

Browse files

Files changed (1) hide show

app.py +2 -52

app.py CHANGED Viewed

@@ -462,57 +462,7 @@ def preprocess_video(path, result_folder, apply_preprocess, padding=20):
 		person_videos, person_tracks, msg = get_person_detection(all_frames, frame_count, padding)
 		if msg != "success":
-			return None, None, None, msg
-		# # Load YOLOv9 model (pre-trained on COCO dataset)
-		# yolo_model = YOLO("yolov9s.pt")
-		# print("Loaded the YOLO model")
-		# person_videos = {}
-		# person_tracks = {}
-		# print("Processing the frames...")
-		# for frame_idx in tqdm(range(frame_count)):
-		# 	frame = all_frames[frame_idx]
-		# 	# Perform person detection
-		# 	results = yolo_model(frame, verbose=False)
-		# 	detections = results[0].boxes
-		# 	for i, det in enumerate(detections):
-		# 		x1, y1, x2, y2 = det.xyxy[0]
-		# 		cls = det.cls[0]
-		# 		if int(cls) == 0:  # Class 0 is 'person' in COCO dataset
-		# 			x1 = max(0, int(x1) - padding)
-		# 			y1 = max(0, int(y1) - padding)
-		# 			x2 = min(frame.shape[1], int(x2) + padding)
-		# 			y2 = min(frame.shape[0], int(y2) + padding)
-		# 			if i not in person_videos:
-		# 				person_videos[i] = []
-		# 				person_tracks[i] = []
-		# 			person_videos[i].append(frame)
-		# 			person_tracks[i].append([x1,y1,x2,y2])
-		# num_persons = 0
-		# for i in person_videos.keys():
-		# 	if len(person_videos[i]) >= frame_count//2:
-		# 		num_persons+=1
-		# if num_persons==0:
-		# 	msg = "No person detected in the video! Please give a video with one person as input"
-		# 	return None, None, None, msg
-		# if num_persons>1:
-		# 	msg = "More than one person detected in the video! Please give a video with only one person as input"
-		# 	return None, None, None, msg
 		# For the person detected, crop the frame based on the bounding box
 		if len(person_videos[0]) > frame_count-10:
@@ -1144,7 +1094,7 @@ def get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True):
 	video_emb = []
 	audio_emb = []
-	model = model.cuda()
 	for i in tqdm(range(0, len(video_sequences), batch_size)):
 		video_inp = video_sequences[i:i+batch_size, ]

 		person_videos, person_tracks, msg = get_person_detection(all_frames, frame_count, padding)
 		if msg != "success":
+			return None, None, None, msg
 		# For the person detected, crop the frame based on the bounding box
 		if len(person_videos[0]) > frame_count-10:
 	video_emb = []
 	audio_emb = []
+	model = model.to(device)
 	for i in tqdm(range(0, len(video_sequences), batch_size)):
 		video_inp = video_sequences[i:i+batch_size, ]