cvdetectors commited on
Commit
a4a99d6
·
verified ·
1 Parent(s): 7734681

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -154
app.py CHANGED
@@ -1,219 +1,98 @@
1
  import os
2
  import tempfile
3
  import cv2
4
- import numpy as np
5
  from PIL import Image, ImageDraw
6
  import gradio as gr
7
- from huggingface_hub import hf_hub_download
8
  from ultralytics import YOLO
9
  from supervision import Detections
10
- from collections import defaultdict
11
 
12
  # Download and load the YOLOv8 face detection model
13
  def load_model():
14
- model_path = hf_hub_download(repo_id="arnabdhar/YOLOv8-Face-Detection", filename="model.pt")
15
- return YOLO(model_path)
 
16
 
17
  model = load_model()
18
 
19
- # Simple Face Tracker
20
- class FaceTracker:
21
- def __init__(self, iou_threshold=0.5, max_frames_to_skip=30):
22
- self.next_id = 0
23
- self.tracks = {} # Dictionary of tracked faces: id -> face data
24
- self.iou_threshold = iou_threshold
25
- self.max_frames_to_skip = max_frames_to_skip
26
- self.face_features = {} # Store face features for reidentification
27
-
28
- def calculate_iou(self, box1, box2):
29
- """Calculate IoU between two bounding boxes"""
30
- # Extract coordinates
31
- x1_1, y1_1, x2_1, y2_1 = box1
32
- x1_2, y1_2, x2_2, y2_2 = box2
33
-
34
- # Calculate intersection area
35
- x_left = max(x1_1, x1_2)
36
- y_top = max(y1_1, y1_2)
37
- x_right = min(x2_1, x2_2)
38
- y_bottom = min(y2_1, y2_2)
39
-
40
- if x_right < x_left or y_bottom < y_top:
41
- return 0.0
42
-
43
- intersection_area = (x_right - x_left) * (y_bottom - y_top)
44
-
45
- # Calculate union area
46
- box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
47
- box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
48
- union_area = box1_area + box2_area - intersection_area
49
-
50
- return intersection_area / union_area if union_area > 0 else 0.0
51
-
52
- def update(self, boxes):
53
- """Update tracking with new detections"""
54
- # If no tracks yet, initialize all as new tracks
55
- if not self.tracks:
56
- for box in boxes:
57
- self.tracks[self.next_id] = {
58
- 'box': box,
59
- 'age': 0,
60
- 'missed_frames': 0
61
- }
62
- self.next_id += 1
63
- return self.tracks
64
-
65
- # Match detections with existing tracks
66
- matched_track_ids = set()
67
- matched_detection_indices = set()
68
-
69
- # For each detection, find the best matching track
70
- for i, new_box in enumerate(boxes):
71
- best_iou = self.iou_threshold
72
- best_track_id = None
73
-
74
- for track_id, track_data in self.tracks.items():
75
- if track_id in matched_track_ids:
76
- continue
77
-
78
- iou = self.calculate_iou(track_data['box'], new_box)
79
- if iou > best_iou:
80
- best_iou = iou
81
- best_track_id = track_id
82
-
83
- if best_track_id is not None:
84
- # Update matched track
85
- self.tracks[best_track_id]['box'] = new_box
86
- self.tracks[best_track_id]['age'] += 1
87
- self.tracks[best_track_id]['missed_frames'] = 0
88
-
89
- matched_track_ids.add(best_track_id)
90
- matched_detection_indices.add(i)
91
-
92
- # Create new tracks for unmatched detections
93
- for i, box in enumerate(boxes):
94
- if i not in matched_detection_indices:
95
- self.tracks[self.next_id] = {
96
- 'box': box,
97
- 'age': 0,
98
- 'missed_frames': 0
99
- }
100
- self.next_id += 1
101
-
102
- # Update counters for unmatched tracks
103
- for track_id in list(self.tracks.keys()):
104
- if track_id not in matched_track_ids:
105
- self.tracks[track_id]['missed_frames'] += 1
106
-
107
- # Remove tracks that have been missing for too long
108
- if self.tracks[track_id]['missed_frames'] > self.max_frames_to_skip:
109
- del self.tracks[track_id]
110
-
111
- return self.tracks
112
 
113
  def detect_faces(image: Image.Image):
114
- """
115
- Detects faces in an image and returns annotated image and count.
116
- """
117
  output = model(image)
118
  results = Detections.from_ultralytics(output[0])
119
  boxes = results.xyxy
120
-
121
  annotated = image.copy()
122
  draw = ImageDraw.Draw(annotated)
123
  for x1, y1, x2, y2 in boxes:
124
  draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
125
-
126
  return annotated, f"Number of faces detected: {len(boxes)}"
127
 
 
128
  def detect_faces_video(video_path: str):
129
  """
130
- Reads a video file, annotates faces on each frame, tracks unique faces,
131
- and writes out an annotated video.
132
  Returns the new video path and a summary.
133
  """
134
- # Initialize face tracker
135
- tracker = FaceTracker(iou_threshold=0.4, max_frames_to_skip=20)
136
-
137
  cap = cv2.VideoCapture(video_path)
138
  fps = cap.get(cv2.CAP_PROP_FPS)
139
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
140
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
141
-
142
  # Prepare output
143
  out_file = tempfile.mktemp(suffix=".mp4")
144
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
145
  writer = cv2.VideoWriter(out_file, fourcc, fps, (width, height))
146
-
147
  frame_count = 0
148
- unique_person_count = 0
149
-
150
  while True:
151
  ret, frame = cap.read()
152
  if not ret:
153
  break
154
-
155
  # Convert frame BGR -> RGB and to PIL Image for model
156
  pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
157
  output = model(pil_img)
158
  results = Detections.from_ultralytics(output[0])
159
  boxes = results.xyxy
160
-
161
- # Update tracker with new detections
162
- tracked_faces = tracker.update(boxes)
163
-
164
- # Update unique person count
165
- unique_person_count = max(unique_person_count, len(tracker.tracks))
166
-
167
- # Draw boxes with IDs on original frame
168
- for track_id, track_data in tracked_faces.items():
169
- x1, y1, x2, y2 = track_data['box']
170
  cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
171
- # Add ID label
172
- cv2.putText(frame, f"ID: {track_id}", (int(x1), int(y1)-10),
173
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
174
-
175
- # Add current count to the frame
176
- cv2.putText(frame, f"Current faces: {len(tracked_faces)}", (10, 30),
177
- cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
178
- cv2.putText(frame, f"Unique persons: {unique_person_count}", (10, 60),
179
- cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
180
-
181
  writer.write(frame)
182
  frame_count += 1
183
-
 
184
  cap.release()
185
  writer.release()
186
-
187
- summary = (f"Processed {frame_count} frames. "
188
- f"Total unique persons detected: {unique_person_count}.")
189
-
 
 
 
190
  return out_file, summary
191
 
192
- # Build Gradio interfaces
193
- image_interface = gr.Interface(
194
- fn=detect_faces,
195
- inputs=gr.Image(type="pil", label="Upload Image"),
196
- outputs=[gr.Image(type="pil", label="Annotated Image"), gr.Text(label="Face Count")],
197
- title="YOLOv8 Face Detector",
198
- description="Detect faces in images using a YOLOv8 model."
199
- )
200
 
 
201
  video_interface = gr.Interface(
202
  fn=detect_faces_video,
203
  inputs=gr.Video(label="Upload Video"),
204
  outputs=[gr.Video(label="Annotated Video"), gr.Text(label="Summary")],
205
  title="YOLOv8 Video Face Detector",
206
- description="Detect, track and count unique persons in videos using a YOLOv8 model."
207
  )
208
 
209
- # Combine into tabs
210
- demo = gr.TabbedInterface(
211
- interface_list=[image_interface, video_interface],
212
- tab_names=["Image", "Video"]
213
- )
214
 
215
  def main():
216
- demo.launch()
 
217
 
218
  if __name__ == "__main__":
219
- main()
 
1
  import os
2
  import tempfile
3
  import cv2
4
+
5
  from PIL import Image, ImageDraw
6
  import gradio as gr
 
7
  from ultralytics import YOLO
8
  from supervision import Detections
9
+
10
 
11
  # Download and load the YOLOv8 face detection model
12
  def load_model():
13
+ model = YOLO("yolov8n-face.pt") # Make sure the path or name of the model is correct
14
+ return model
15
+
16
 
17
  model = load_model()
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def detect_faces(image: Image.Image):
 
 
 
21
  output = model(image)
22
  results = Detections.from_ultralytics(output[0])
23
  boxes = results.xyxy
24
+
25
  annotated = image.copy()
26
  draw = ImageDraw.Draw(annotated)
27
  for x1, y1, x2, y2 in boxes:
28
  draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
29
+
30
  return annotated, f"Number of faces detected: {len(boxes)}"
31
 
32
+
33
  def detect_faces_video(video_path: str):
34
  """
35
+ Reads a video file, annotates faces on each frame, and writes out an annotated video.
36
+
37
  Returns the new video path and a summary.
38
  """
 
 
 
39
  cap = cv2.VideoCapture(video_path)
40
  fps = cap.get(cv2.CAP_PROP_FPS)
41
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
42
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
43
+
44
  # Prepare output
45
  out_file = tempfile.mktemp(suffix=".mp4")
46
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
47
  writer = cv2.VideoWriter(out_file, fourcc, fps, (width, height))
48
+
49
  frame_count = 0
50
+ total_faces = 0
51
+
52
  while True:
53
  ret, frame = cap.read()
54
  if not ret:
55
  break
56
+
57
  # Convert frame BGR -> RGB and to PIL Image for model
58
  pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
59
  output = model(pil_img)
60
  results = Detections.from_ultralytics(output[0])
61
  boxes = results.xyxy
62
+
63
+ # Draw boxes on original frame
64
+ for x1, y1, x2, y2 in boxes:
 
 
 
 
 
 
 
65
  cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
66
+
 
 
 
 
 
 
 
 
 
67
  writer.write(frame)
68
  frame_count += 1
69
+ total_faces += len(boxes)
70
+
71
  cap.release()
72
  writer.release()
73
+
74
+ avg_per_frame = total_faces / frame_count if frame_count else 0
75
+ summary = (
76
+ f"Processed {frame_count} frames. "
77
+ f"Total faces detected: {total_faces}. "
78
+ f"Average per frame: {avg_per_frame:.2f}"
79
+ )
80
  return out_file, summary
81
 
 
 
 
 
 
 
 
 
82
 
83
+ # Build Gradio interface
84
  video_interface = gr.Interface(
85
  fn=detect_faces_video,
86
  inputs=gr.Video(label="Upload Video"),
87
  outputs=[gr.Video(label="Annotated Video"), gr.Text(label="Summary")],
88
  title="YOLOv8 Video Face Detector",
89
+ description="Detect and annotate faces in videos using a YOLOv8 model."
90
  )
91
 
 
 
 
 
 
92
 
93
  def main():
94
+ video_interface.launch()
95
+
96
 
97
  if __name__ == "__main__":
98
+ main()