Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
afa2bc0
1
Parent(s):
342ecda
Update app
Browse files
app.py
CHANGED
|
@@ -462,57 +462,7 @@ def preprocess_video(path, result_folder, apply_preprocess, padding=20):
|
|
| 462 |
|
| 463 |
person_videos, person_tracks, msg = get_person_detection(all_frames, frame_count, padding)
|
| 464 |
if msg != "success":
|
| 465 |
-
return None, None, None, msg
|
| 466 |
-
|
| 467 |
-
# # Load YOLOv9 model (pre-trained on COCO dataset)
|
| 468 |
-
# yolo_model = YOLO("yolov9s.pt")
|
| 469 |
-
# print("Loaded the YOLO model")
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
# person_videos = {}
|
| 474 |
-
# person_tracks = {}
|
| 475 |
-
|
| 476 |
-
# print("Processing the frames...")
|
| 477 |
-
# for frame_idx in tqdm(range(frame_count)):
|
| 478 |
-
|
| 479 |
-
# frame = all_frames[frame_idx]
|
| 480 |
-
|
| 481 |
-
# # Perform person detection
|
| 482 |
-
# results = yolo_model(frame, verbose=False)
|
| 483 |
-
# detections = results[0].boxes
|
| 484 |
-
|
| 485 |
-
# for i, det in enumerate(detections):
|
| 486 |
-
# x1, y1, x2, y2 = det.xyxy[0]
|
| 487 |
-
# cls = det.cls[0]
|
| 488 |
-
# if int(cls) == 0: # Class 0 is 'person' in COCO dataset
|
| 489 |
-
|
| 490 |
-
# x1 = max(0, int(x1) - padding)
|
| 491 |
-
# y1 = max(0, int(y1) - padding)
|
| 492 |
-
# x2 = min(frame.shape[1], int(x2) + padding)
|
| 493 |
-
# y2 = min(frame.shape[0], int(y2) + padding)
|
| 494 |
-
|
| 495 |
-
# if i not in person_videos:
|
| 496 |
-
# person_videos[i] = []
|
| 497 |
-
# person_tracks[i] = []
|
| 498 |
-
|
| 499 |
-
# person_videos[i].append(frame)
|
| 500 |
-
# person_tracks[i].append([x1,y1,x2,y2])
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
# num_persons = 0
|
| 504 |
-
# for i in person_videos.keys():
|
| 505 |
-
# if len(person_videos[i]) >= frame_count//2:
|
| 506 |
-
# num_persons+=1
|
| 507 |
-
|
| 508 |
-
# if num_persons==0:
|
| 509 |
-
# msg = "No person detected in the video! Please give a video with one person as input"
|
| 510 |
-
# return None, None, None, msg
|
| 511 |
-
# if num_persons>1:
|
| 512 |
-
# msg = "More than one person detected in the video! Please give a video with only one person as input"
|
| 513 |
-
# return None, None, None, msg
|
| 514 |
-
|
| 515 |
-
|
| 516 |
|
| 517 |
# For the person detected, crop the frame based on the bounding box
|
| 518 |
if len(person_videos[0]) > frame_count-10:
|
|
@@ -1144,7 +1094,7 @@ def get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True):
|
|
| 1144 |
video_emb = []
|
| 1145 |
audio_emb = []
|
| 1146 |
|
| 1147 |
-
model = model.
|
| 1148 |
|
| 1149 |
for i in tqdm(range(0, len(video_sequences), batch_size)):
|
| 1150 |
video_inp = video_sequences[i:i+batch_size, ]
|
|
|
|
| 462 |
|
| 463 |
person_videos, person_tracks, msg = get_person_detection(all_frames, frame_count, padding)
|
| 464 |
if msg != "success":
|
| 465 |
+
return None, None, None, msg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
# For the person detected, crop the frame based on the bounding box
|
| 468 |
if len(person_videos[0]) > frame_count-10:
|
|
|
|
| 1094 |
video_emb = []
|
| 1095 |
audio_emb = []
|
| 1096 |
|
| 1097 |
+
model = model.to(device)
|
| 1098 |
|
| 1099 |
for i in tqdm(range(0, len(video_sequences), batch_size)):
|
| 1100 |
video_inp = video_sequences[i:i+batch_size, ]
|