cvdetectors commited on
Commit
7734681
·
verified ·
1 Parent(s): 7468aae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -16
app.py CHANGED
@@ -1,11 +1,13 @@
1
  import os
2
  import tempfile
3
  import cv2
 
4
  from PIL import Image, ImageDraw
5
  import gradio as gr
6
  from huggingface_hub import hf_hub_download
7
  from ultralytics import YOLO
8
  from supervision import Detections
 
9
 
10
  # Download and load the YOLOv8 face detection model
11
  def load_model():
@@ -14,6 +16,99 @@ def load_model():
14
 
15
  model = load_model()
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def detect_faces(image: Image.Image):
19
  """
@@ -22,56 +117,78 @@ def detect_faces(image: Image.Image):
22
  output = model(image)
23
  results = Detections.from_ultralytics(output[0])
24
  boxes = results.xyxy
 
25
  annotated = image.copy()
26
  draw = ImageDraw.Draw(annotated)
27
  for x1, y1, x2, y2 in boxes:
28
  draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
 
29
  return annotated, f"Number of faces detected: {len(boxes)}"
30
 
31
-
32
  def detect_faces_video(video_path: str):
33
  """
34
- Reads a video file, annotates faces on each frame, and writes out an annotated video.
 
35
  Returns the new video path and a summary.
36
  """
 
 
 
37
  cap = cv2.VideoCapture(video_path)
38
  fps = cap.get(cv2.CAP_PROP_FPS)
39
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
40
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
41
-
42
  # Prepare output
43
  out_file = tempfile.mktemp(suffix=".mp4")
44
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
45
  writer = cv2.VideoWriter(out_file, fourcc, fps, (width, height))
46
-
47
  frame_count = 0
48
- total_faces = 0
 
49
  while True:
50
  ret, frame = cap.read()
51
  if not ret:
52
  break
 
53
  # Convert frame BGR -> RGB and to PIL Image for model
54
  pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
55
  output = model(pil_img)
56
  results = Detections.from_ultralytics(output[0])
57
  boxes = results.xyxy
58
- # Draw boxes on original frame
59
- for x1, y1, x2, y2 in boxes:
 
 
 
 
 
 
 
 
60
  cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
 
 
 
 
 
 
 
 
 
 
61
  writer.write(frame)
62
  frame_count += 1
63
- total_faces += len(boxes)
64
-
65
  cap.release()
66
  writer.release()
67
-
68
- avg_per_frame = total_faces / frame_count if frame_count else 0
69
  summary = (f"Processed {frame_count} frames. "
70
- f"Total faces detected: {total_faces}. "
71
- f"Average per frame: {avg_per_frame:.2f}")
72
  return out_file, summary
73
 
74
-
75
  # Build Gradio interfaces
76
  image_interface = gr.Interface(
77
  fn=detect_faces,
@@ -86,7 +203,7 @@ video_interface = gr.Interface(
86
  inputs=gr.Video(label="Upload Video"),
87
  outputs=[gr.Video(label="Annotated Video"), gr.Text(label="Summary")],
88
  title="YOLOv8 Video Face Detector",
89
- description="Detect and annotate faces in videos using a YOLOv8 model."
90
  )
91
 
92
  # Combine into tabs
@@ -99,4 +216,4 @@ def main():
99
  demo.launch()
100
 
101
  if __name__ == "__main__":
102
- main()
 
1
  import os
2
  import tempfile
3
  import cv2
4
+ import numpy as np
5
  from PIL import Image, ImageDraw
6
  import gradio as gr
7
  from huggingface_hub import hf_hub_download
8
  from ultralytics import YOLO
9
  from supervision import Detections
10
+ from collections import defaultdict
11
 
12
  # Download and load the YOLOv8 face detection model
13
  def load_model():
 
16
 
17
  model = load_model()
18
 
19
+ # Simple Face Tracker
20
+ class FaceTracker:
21
+ def __init__(self, iou_threshold=0.5, max_frames_to_skip=30):
22
+ self.next_id = 0
23
+ self.tracks = {} # Dictionary of tracked faces: id -> face data
24
+ self.iou_threshold = iou_threshold
25
+ self.max_frames_to_skip = max_frames_to_skip
26
+ self.face_features = {} # Store face features for reidentification
27
+
28
+ def calculate_iou(self, box1, box2):
29
+ """Calculate IoU between two bounding boxes"""
30
+ # Extract coordinates
31
+ x1_1, y1_1, x2_1, y2_1 = box1
32
+ x1_2, y1_2, x2_2, y2_2 = box2
33
+
34
+ # Calculate intersection area
35
+ x_left = max(x1_1, x1_2)
36
+ y_top = max(y1_1, y1_2)
37
+ x_right = min(x2_1, x2_2)
38
+ y_bottom = min(y2_1, y2_2)
39
+
40
+ if x_right < x_left or y_bottom < y_top:
41
+ return 0.0
42
+
43
+ intersection_area = (x_right - x_left) * (y_bottom - y_top)
44
+
45
+ # Calculate union area
46
+ box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
47
+ box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
48
+ union_area = box1_area + box2_area - intersection_area
49
+
50
+ return intersection_area / union_area if union_area > 0 else 0.0
51
+
52
+ def update(self, boxes):
53
+ """Update tracking with new detections"""
54
+ # If no tracks yet, initialize all as new tracks
55
+ if not self.tracks:
56
+ for box in boxes:
57
+ self.tracks[self.next_id] = {
58
+ 'box': box,
59
+ 'age': 0,
60
+ 'missed_frames': 0
61
+ }
62
+ self.next_id += 1
63
+ return self.tracks
64
+
65
+ # Match detections with existing tracks
66
+ matched_track_ids = set()
67
+ matched_detection_indices = set()
68
+
69
+ # For each detection, find the best matching track
70
+ for i, new_box in enumerate(boxes):
71
+ best_iou = self.iou_threshold
72
+ best_track_id = None
73
+
74
+ for track_id, track_data in self.tracks.items():
75
+ if track_id in matched_track_ids:
76
+ continue
77
+
78
+ iou = self.calculate_iou(track_data['box'], new_box)
79
+ if iou > best_iou:
80
+ best_iou = iou
81
+ best_track_id = track_id
82
+
83
+ if best_track_id is not None:
84
+ # Update matched track
85
+ self.tracks[best_track_id]['box'] = new_box
86
+ self.tracks[best_track_id]['age'] += 1
87
+ self.tracks[best_track_id]['missed_frames'] = 0
88
+
89
+ matched_track_ids.add(best_track_id)
90
+ matched_detection_indices.add(i)
91
+
92
+ # Create new tracks for unmatched detections
93
+ for i, box in enumerate(boxes):
94
+ if i not in matched_detection_indices:
95
+ self.tracks[self.next_id] = {
96
+ 'box': box,
97
+ 'age': 0,
98
+ 'missed_frames': 0
99
+ }
100
+ self.next_id += 1
101
+
102
+ # Update counters for unmatched tracks
103
+ for track_id in list(self.tracks.keys()):
104
+ if track_id not in matched_track_ids:
105
+ self.tracks[track_id]['missed_frames'] += 1
106
+
107
+ # Remove tracks that have been missing for too long
108
+ if self.tracks[track_id]['missed_frames'] > self.max_frames_to_skip:
109
+ del self.tracks[track_id]
110
+
111
+ return self.tracks
112
 
113
  def detect_faces(image: Image.Image):
114
  """
 
117
  output = model(image)
118
  results = Detections.from_ultralytics(output[0])
119
  boxes = results.xyxy
120
+
121
  annotated = image.copy()
122
  draw = ImageDraw.Draw(annotated)
123
  for x1, y1, x2, y2 in boxes:
124
  draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
125
+
126
  return annotated, f"Number of faces detected: {len(boxes)}"
127
 
 
128
  def detect_faces_video(video_path: str):
129
  """
130
+ Reads a video file, annotates faces on each frame, tracks unique faces,
131
+ and writes out an annotated video.
132
  Returns the new video path and a summary.
133
  """
134
+ # Initialize face tracker
135
+ tracker = FaceTracker(iou_threshold=0.4, max_frames_to_skip=20)
136
+
137
  cap = cv2.VideoCapture(video_path)
138
  fps = cap.get(cv2.CAP_PROP_FPS)
139
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
140
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
141
+
142
  # Prepare output
143
  out_file = tempfile.mktemp(suffix=".mp4")
144
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
145
  writer = cv2.VideoWriter(out_file, fourcc, fps, (width, height))
146
+
147
  frame_count = 0
148
+ unique_person_count = 0
149
+
150
  while True:
151
  ret, frame = cap.read()
152
  if not ret:
153
  break
154
+
155
  # Convert frame BGR -> RGB and to PIL Image for model
156
  pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
157
  output = model(pil_img)
158
  results = Detections.from_ultralytics(output[0])
159
  boxes = results.xyxy
160
+
161
+ # Update tracker with new detections
162
+ tracked_faces = tracker.update(boxes)
163
+
164
+ # Update unique person count
165
+ unique_person_count = max(unique_person_count, len(tracker.tracks))
166
+
167
+ # Draw boxes with IDs on original frame
168
+ for track_id, track_data in tracked_faces.items():
169
+ x1, y1, x2, y2 = track_data['box']
170
  cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
171
+ # Add ID label
172
+ cv2.putText(frame, f"ID: {track_id}", (int(x1), int(y1)-10),
173
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
174
+
175
+ # Add current count to the frame
176
+ cv2.putText(frame, f"Current faces: {len(tracked_faces)}", (10, 30),
177
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
178
+ cv2.putText(frame, f"Unique persons: {unique_person_count}", (10, 60),
179
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
180
+
181
  writer.write(frame)
182
  frame_count += 1
183
+
 
184
  cap.release()
185
  writer.release()
186
+
 
187
  summary = (f"Processed {frame_count} frames. "
188
+ f"Total unique persons detected: {unique_person_count}.")
189
+
190
  return out_file, summary
191
 
 
192
  # Build Gradio interfaces
193
  image_interface = gr.Interface(
194
  fn=detect_faces,
 
203
  inputs=gr.Video(label="Upload Video"),
204
  outputs=[gr.Video(label="Annotated Video"), gr.Text(label="Summary")],
205
  title="YOLOv8 Video Face Detector",
206
+ description="Detect, track and count unique persons in videos using a YOLOv8 model."
207
  )
208
 
209
  # Combine into tabs
 
216
  demo.launch()
217
 
218
  if __name__ == "__main__":
219
+ main()