Spaces:

cvdetectors
/

imagecounter

Build error

App Files Files Community

cvdetectors commited on Apr 20, 2025

Commit

a4a99d6

verified ·

1 Parent(s): 7734681

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -154

app.py CHANGED Viewed

@@ -1,219 +1,98 @@
 import os
 import tempfile
 import cv2
-import numpy as np
 from PIL import Image, ImageDraw
 import gradio as gr
-from huggingface_hub import hf_hub_download
 from ultralytics import YOLO
 from supervision import Detections
-from collections import defaultdict
 # Download and load the YOLOv8 face detection model
 def load_model():
-    model_path = hf_hub_download(repo_id="arnabdhar/YOLOv8-Face-Detection", filename="model.pt")
-    return YOLO(model_path)
 model = load_model()
-# Simple Face Tracker
-class FaceTracker:
-    def __init__(self, iou_threshold=0.5, max_frames_to_skip=30):
-        self.next_id = 0
-        self.tracks = {}  # Dictionary of tracked faces: id -> face data
-        self.iou_threshold = iou_threshold
-        self.max_frames_to_skip = max_frames_to_skip
-        self.face_features = {}  # Store face features for reidentification
-    def calculate_iou(self, box1, box2):
-        """Calculate IoU between two bounding boxes"""
-        # Extract coordinates
-        x1_1, y1_1, x2_1, y2_1 = box1
-        x1_2, y1_2, x2_2, y2_2 = box2
-        # Calculate intersection area
-        x_left = max(x1_1, x1_2)
-        y_top = max(y1_1, y1_2)
-        x_right = min(x2_1, x2_2)
-        y_bottom = min(y2_1, y2_2)
-        if x_right < x_left or y_bottom < y_top:
-            return 0.0
-        intersection_area = (x_right - x_left) * (y_bottom - y_top)
-        # Calculate union area
-        box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
-        box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
-        union_area = box1_area + box2_area - intersection_area
-        return intersection_area / union_area if union_area > 0 else 0.0
-    def update(self, boxes):
-        """Update tracking with new detections"""
-        # If no tracks yet, initialize all as new tracks
-        if not self.tracks:
-            for box in boxes:
-                self.tracks[self.next_id] = {
-                    'box': box,
-                    'age': 0,
-                    'missed_frames': 0
-                }
-                self.next_id += 1
-            return self.tracks
-        # Match detections with existing tracks
-        matched_track_ids = set()
-        matched_detection_indices = set()
-        # For each detection, find the best matching track
-        for i, new_box in enumerate(boxes):
-            best_iou = self.iou_threshold
-            best_track_id = None
-            for track_id, track_data in self.tracks.items():
-                if track_id in matched_track_ids:
-                    continue
-                iou = self.calculate_iou(track_data['box'], new_box)
-                if iou > best_iou:
-                    best_iou = iou
-                    best_track_id = track_id
-            if best_track_id is not None:
-                # Update matched track
-                self.tracks[best_track_id]['box'] = new_box
-                self.tracks[best_track_id]['age'] += 1
-                self.tracks[best_track_id]['missed_frames'] = 0
-                matched_track_ids.add(best_track_id)
-                matched_detection_indices.add(i)
-        # Create new tracks for unmatched detections
-        for i, box in enumerate(boxes):
-            if i not in matched_detection_indices:
-                self.tracks[self.next_id] = {
-                    'box': box,
-                    'age': 0,
-                    'missed_frames': 0
-                }
-                self.next_id += 1
-        # Update counters for unmatched tracks
-        for track_id in list(self.tracks.keys()):
-            if track_id not in matched_track_ids:
-                self.tracks[track_id]['missed_frames'] += 1
-                # Remove tracks that have been missing for too long
-                if self.tracks[track_id]['missed_frames'] > self.max_frames_to_skip:
-                    del self.tracks[track_id]
-        return self.tracks
 def detect_faces(image: Image.Image):
-    """
-    Detects faces in an image and returns annotated image and count.
-    """
     output = model(image)
     results = Detections.from_ultralytics(output[0])
     boxes = results.xyxy
     annotated = image.copy()
     draw = ImageDraw.Draw(annotated)
     for x1, y1, x2, y2 in boxes:
         draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
     return annotated, f"Number of faces detected: {len(boxes)}"
 def detect_faces_video(video_path: str):
     """
-    Reads a video file, annotates faces on each frame, tracks unique faces,
-    and writes out an annotated video.
     Returns the new video path and a summary.
     """
-    # Initialize face tracker
-    tracker = FaceTracker(iou_threshold=0.4, max_frames_to_skip=20)
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Prepare output
     out_file = tempfile.mktemp(suffix=".mp4")
     fourcc = cv2.VideoWriter_fourcc(*"mp4v")
     writer = cv2.VideoWriter(out_file, fourcc, fps, (width, height))
     frame_count = 0
-    unique_person_count = 0
     while True:
         ret, frame = cap.read()
         if not ret:
             break
         # Convert frame BGR -> RGB and to PIL Image for model
         pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         output = model(pil_img)
         results = Detections.from_ultralytics(output[0])
         boxes = results.xyxy
-        # Update tracker with new detections
-        tracked_faces = tracker.update(boxes)
-        # Update unique person count
-        unique_person_count = max(unique_person_count, len(tracker.tracks))
-        # Draw boxes with IDs on original frame
-        for track_id, track_data in tracked_faces.items():
-            x1, y1, x2, y2 = track_data['box']
             cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
-            # Add ID label
-            cv2.putText(frame, f"ID: {track_id}", (int(x1), int(y1)-10),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
-        # Add current count to the frame
-        cv2.putText(frame, f"Current faces: {len(tracked_faces)}", (10, 30),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
-        cv2.putText(frame, f"Unique persons: {unique_person_count}", (10, 60),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
         writer.write(frame)
         frame_count += 1
     cap.release()
     writer.release()
-    summary = (f"Processed {frame_count} frames. "
-               f"Total unique persons detected: {unique_person_count}.")
     return out_file, summary
-# Build Gradio interfaces
-image_interface = gr.Interface(
-    fn=detect_faces,
-    inputs=gr.Image(type="pil", label="Upload Image"),
-    outputs=[gr.Image(type="pil", label="Annotated Image"), gr.Text(label="Face Count")],
-    title="YOLOv8 Face Detector",
-    description="Detect faces in images using a YOLOv8 model."
-)
 video_interface = gr.Interface(
     fn=detect_faces_video,
     inputs=gr.Video(label="Upload Video"),
     outputs=[gr.Video(label="Annotated Video"), gr.Text(label="Summary")],
     title="YOLOv8 Video Face Detector",
-    description="Detect, track and count unique persons in videos using a YOLOv8 model."
 )
-# Combine into tabs
-demo = gr.TabbedInterface(
-    interface_list=[image_interface, video_interface],
-    tab_names=["Image", "Video"]
-)
 def main():
-    demo.launch()
 if __name__ == "__main__":
-    main()

 import os
 import tempfile
 import cv2
 from PIL import Image, ImageDraw
 import gradio as gr
 from ultralytics import YOLO
 from supervision import Detections
 # Download and load the YOLOv8 face detection model
 def load_model():
+    model = YOLO("yolov8n-face.pt")  # Make sure the path or name of the model is correct
+    return model
 model = load_model()
 def detect_faces(image: Image.Image):
     output = model(image)
     results = Detections.from_ultralytics(output[0])
     boxes = results.xyxy
     annotated = image.copy()
     draw = ImageDraw.Draw(annotated)
     for x1, y1, x2, y2 in boxes:
         draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
     return annotated, f"Number of faces detected: {len(boxes)}"
 def detect_faces_video(video_path: str):
     """
+    Reads a video file, annotates faces on each frame, and writes out an annotated video.
     Returns the new video path and a summary.
     """
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Prepare output
     out_file = tempfile.mktemp(suffix=".mp4")
     fourcc = cv2.VideoWriter_fourcc(*"mp4v")
     writer = cv2.VideoWriter(out_file, fourcc, fps, (width, height))
     frame_count = 0
+    total_faces = 0
     while True:
         ret, frame = cap.read()
         if not ret:
             break
         # Convert frame BGR -> RGB and to PIL Image for model
         pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         output = model(pil_img)
         results = Detections.from_ultralytics(output[0])
         boxes = results.xyxy
+        # Draw boxes on original frame
+        for x1, y1, x2, y2 in boxes:
             cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
         writer.write(frame)
         frame_count += 1
+        total_faces += len(boxes)
     cap.release()
     writer.release()
+    avg_per_frame = total_faces / frame_count if frame_count else 0
+    summary = (
+        f"Processed {frame_count} frames. "
+        f"Total faces detected: {total_faces}. "
+        f"Average per frame: {avg_per_frame:.2f}"
+    )
     return out_file, summary
+# Build Gradio interface
 video_interface = gr.Interface(
     fn=detect_faces_video,
     inputs=gr.Video(label="Upload Video"),
     outputs=[gr.Video(label="Annotated Video"), gr.Text(label="Summary")],
     title="YOLOv8 Video Face Detector",
+    description="Detect and annotate faces in videos using a YOLOv8 model."
 )
 def main():
+    video_interface.launch()
 if __name__ == "__main__":
+    main()