Spaces:

cvdetectors
/

imagecounter

Build error

App Files Files Community

cvdetectors commited on Apr 19, 2025

Commit

7734681

verified ·

1 Parent(s): 7468aae

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -16

app.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import os
 import tempfile
 import cv2
 from PIL import Image, ImageDraw
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from ultralytics import YOLO
 from supervision import Detections
 # Download and load the YOLOv8 face detection model
 def load_model():
@@ -14,6 +16,99 @@ def load_model():
 model = load_model()
 def detect_faces(image: Image.Image):
     """
@@ -22,56 +117,78 @@ def detect_faces(image: Image.Image):
     output = model(image)
     results = Detections.from_ultralytics(output[0])
     boxes = results.xyxy
     annotated = image.copy()
     draw = ImageDraw.Draw(annotated)
     for x1, y1, x2, y2 in boxes:
         draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
     return annotated, f"Number of faces detected: {len(boxes)}"
 def detect_faces_video(video_path: str):
     """
-    Reads a video file, annotates faces on each frame, and writes out an annotated video.
     Returns the new video path and a summary.
     """
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Prepare output
     out_file = tempfile.mktemp(suffix=".mp4")
     fourcc = cv2.VideoWriter_fourcc(*"mp4v")
     writer = cv2.VideoWriter(out_file, fourcc, fps, (width, height))
     frame_count = 0
-    total_faces = 0
     while True:
         ret, frame = cap.read()
         if not ret:
             break
         # Convert frame BGR -> RGB and to PIL Image for model
         pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         output = model(pil_img)
         results = Detections.from_ultralytics(output[0])
         boxes = results.xyxy
-        # Draw boxes on original frame
-        for x1, y1, x2, y2 in boxes:
             cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
         writer.write(frame)
         frame_count += 1
-        total_faces += len(boxes)
     cap.release()
     writer.release()
-    avg_per_frame = total_faces / frame_count if frame_count else 0
     summary = (f"Processed {frame_count} frames. "
-               f"Total faces detected: {total_faces}. "
-               f"Average per frame: {avg_per_frame:.2f}")
     return out_file, summary
 # Build Gradio interfaces
 image_interface = gr.Interface(
     fn=detect_faces,
@@ -86,7 +203,7 @@ video_interface = gr.Interface(
     inputs=gr.Video(label="Upload Video"),
     outputs=[gr.Video(label="Annotated Video"), gr.Text(label="Summary")],
     title="YOLOv8 Video Face Detector",
-    description="Detect and annotate faces in videos using a YOLOv8 model."
 )
 # Combine into tabs
@@ -99,4 +216,4 @@ def main():
     demo.launch()
 if __name__ == "__main__":
-    main()

 import os
 import tempfile
 import cv2
+import numpy as np
 from PIL import Image, ImageDraw
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from ultralytics import YOLO
 from supervision import Detections
+from collections import defaultdict
 # Download and load the YOLOv8 face detection model
 def load_model():
 model = load_model()
+# Simple Face Tracker
+class FaceTracker:
+    def __init__(self, iou_threshold=0.5, max_frames_to_skip=30):
+        self.next_id = 0
+        self.tracks = {}  # Dictionary of tracked faces: id -> face data
+        self.iou_threshold = iou_threshold
+        self.max_frames_to_skip = max_frames_to_skip
+        self.face_features = {}  # Store face features for reidentification
+    def calculate_iou(self, box1, box2):
+        """Calculate IoU between two bounding boxes"""
+        # Extract coordinates
+        x1_1, y1_1, x2_1, y2_1 = box1
+        x1_2, y1_2, x2_2, y2_2 = box2
+        # Calculate intersection area
+        x_left = max(x1_1, x1_2)
+        y_top = max(y1_1, y1_2)
+        x_right = min(x2_1, x2_2)
+        y_bottom = min(y2_1, y2_2)
+        if x_right < x_left or y_bottom < y_top:
+            return 0.0
+        intersection_area = (x_right - x_left) * (y_bottom - y_top)
+        # Calculate union area
+        box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
+        box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
+        union_area = box1_area + box2_area - intersection_area
+        return intersection_area / union_area if union_area > 0 else 0.0
+    def update(self, boxes):
+        """Update tracking with new detections"""
+        # If no tracks yet, initialize all as new tracks
+        if not self.tracks:
+            for box in boxes:
+                self.tracks[self.next_id] = {
+                    'box': box,
+                    'age': 0,
+                    'missed_frames': 0
+                }
+                self.next_id += 1
+            return self.tracks
+        # Match detections with existing tracks
+        matched_track_ids = set()
+        matched_detection_indices = set()
+        # For each detection, find the best matching track
+        for i, new_box in enumerate(boxes):
+            best_iou = self.iou_threshold
+            best_track_id = None
+            for track_id, track_data in self.tracks.items():
+                if track_id in matched_track_ids:
+                    continue
+                iou = self.calculate_iou(track_data['box'], new_box)
+                if iou > best_iou:
+                    best_iou = iou
+                    best_track_id = track_id
+            if best_track_id is not None:
+                # Update matched track
+                self.tracks[best_track_id]['box'] = new_box
+                self.tracks[best_track_id]['age'] += 1
+                self.tracks[best_track_id]['missed_frames'] = 0
+                matched_track_ids.add(best_track_id)
+                matched_detection_indices.add(i)
+        # Create new tracks for unmatched detections
+        for i, box in enumerate(boxes):
+            if i not in matched_detection_indices:
+                self.tracks[self.next_id] = {
+                    'box': box,
+                    'age': 0,
+                    'missed_frames': 0
+                }
+                self.next_id += 1
+        # Update counters for unmatched tracks
+        for track_id in list(self.tracks.keys()):
+            if track_id not in matched_track_ids:
+                self.tracks[track_id]['missed_frames'] += 1
+                # Remove tracks that have been missing for too long
+                if self.tracks[track_id]['missed_frames'] > self.max_frames_to_skip:
+                    del self.tracks[track_id]
+        return self.tracks
 def detect_faces(image: Image.Image):
     """
     output = model(image)
     results = Detections.from_ultralytics(output[0])
     boxes = results.xyxy
     annotated = image.copy()
     draw = ImageDraw.Draw(annotated)
     for x1, y1, x2, y2 in boxes:
         draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
     return annotated, f"Number of faces detected: {len(boxes)}"
 def detect_faces_video(video_path: str):
     """
+    Reads a video file, annotates faces on each frame, tracks unique faces,
+    and writes out an annotated video.
     Returns the new video path and a summary.
     """
+    # Initialize face tracker
+    tracker = FaceTracker(iou_threshold=0.4, max_frames_to_skip=20)
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Prepare output
     out_file = tempfile.mktemp(suffix=".mp4")
     fourcc = cv2.VideoWriter_fourcc(*"mp4v")
     writer = cv2.VideoWriter(out_file, fourcc, fps, (width, height))
     frame_count = 0
+    unique_person_count = 0
     while True:
         ret, frame = cap.read()
         if not ret:
             break
         # Convert frame BGR -> RGB and to PIL Image for model
         pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         output = model(pil_img)
         results = Detections.from_ultralytics(output[0])
         boxes = results.xyxy
+        # Update tracker with new detections
+        tracked_faces = tracker.update(boxes)
+        # Update unique person count
+        unique_person_count = max(unique_person_count, len(tracker.tracks))
+        # Draw boxes with IDs on original frame
+        for track_id, track_data in tracked_faces.items():
+            x1, y1, x2, y2 = track_data['box']
             cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
+            # Add ID label
+            cv2.putText(frame, f"ID: {track_id}", (int(x1), int(y1)-10),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
+        # Add current count to the frame
+        cv2.putText(frame, f"Current faces: {len(tracked_faces)}", (10, 30),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+        cv2.putText(frame, f"Unique persons: {unique_person_count}", (10, 60),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
         writer.write(frame)
         frame_count += 1
     cap.release()
     writer.release()
     summary = (f"Processed {frame_count} frames. "
+               f"Total unique persons detected: {unique_person_count}.")
     return out_file, summary
 # Build Gradio interfaces
 image_interface = gr.Interface(
     fn=detect_faces,
     inputs=gr.Video(label="Upload Video"),
     outputs=[gr.Video(label="Annotated Video"), gr.Text(label="Summary")],
     title="YOLOv8 Video Face Detector",
+    description="Detect, track and count unique persons in videos using a YOLOv8 model."
 )
 # Combine into tabs
     demo.launch()
 if __name__ == "__main__":
+    main()