Spaces:

hb-setosys
/

Setosys_Objects_in_Video_Model

Build error

App Files Files Community

hb-setosys commited on Jan 27, 2025

Commit

6ef8707

verified ·

1 Parent(s): 5b1f18e

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -69

app.py CHANGED Viewed

@@ -1,108 +1,94 @@
-# Install required libraries
-#pip install gradio opencv-python-headless
-# Download YOLO files
-#wget -nc https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg
-#wget -nc https://pjreddie.com/media/files/yolov3.weights
-#wget -nc https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names
 import gradio as gr
 import cv2
 import numpy as np
-def count_people(video_path):
-    # Load YOLO model
-    net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
-    # Load class names
-    with open('coco.names', 'r') as f:
-        classes = [line.strip() for line in f.readlines()]
     # Open video
     cap = cv2.VideoCapture(video_path)
     frame_count = 0
-    total_people_count = 0
-    people_per_frame = []
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
         height, width, _ = frame.shape
-        # Create blob from frame
-        blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
         net.setInput(blob)
-        # Get output layer names
         output_layers_names = net.getUnconnectedOutLayersNames()
-        # Forward pass
         layer_outputs = net.forward(output_layers_names)
-        # Lists to store detected people
         boxes = []
         confidences = []
-        # Process detections
         for output in layer_outputs:
             for detection in output:
                 scores = detection[5:]
                 class_id = np.argmax(scores)
                 confidence = scores[class_id]
-                # Check if detected object is a person
-                if classes[class_id] == 'person' and confidence > 0.5:
-                    # Object detected
                     center_x = int(detection[0] * width)
                     center_y = int(detection[1] * height)
                     w = int(detection[2] * width)
                     h = int(detection[3] * height)
-                    # Rectangle coordinates
-                    x = int(center_x - w/2)
-                    y = int(center_y - h/2)
                     boxes.append([x, y, w, h])
                     confidences.append(float(confidence))
         # Apply non-maximum suppression
         indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
-        # Count people in this frame
-        people_in_frame = len(indexes)
-        people_per_frame.append(people_in_frame)
-        total_people_count += people_in_frame
-        frame_count += 1
-    # Release resources
     cap.release()
-    # Prepare analytics
     return {
-        'Total Frames Processed': frame_count,
-        'Total People Detected': total_people_count,
-        'Average People Per Frame': round(np.mean(people_per_frame), 2),
-        'Max People in a Single Frame': int(np.max(people_per_frame))
     }
-# Define Gradio interface
-def analyze_video(video_file):
-    result = count_people(video_file)
-    result_str = "\n".join([f"{key}: {value}" for key, value in result.items()])
-    return result_str
-# Gradio UI
 interface = gr.Interface(
-    fn=analyze_video,
     inputs=gr.Video(label="Upload Video"),
-    outputs=gr.Textbox(label="People Counting Results"),
-    title="YOLO-based People Counter",
-    description="Upload a video to detect and count people using YOLOv3."
 )
-# Launch Gradio app
-interface.launch()

 import gradio as gr
 import cv2
 import numpy as np
+from deep_sort_realtime.deepsort_tracker import DeepSort
+# Load YOLO model and configuration
+net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
+with open("coco.names", "r") as f:
+    classes = [line.strip() for line in f.readlines()]
+# Initialize DeepSORT tracker
+tracker = DeepSort(max_age=30, n_init=3, nn_budget=20)
+def count_unique_people(video_path):
     # Open video
     cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        return "Error: Unable to open video file."
+    unique_people = set()  # To store unique IDs
     frame_count = 0
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
+        frame_count += 1
         height, width, _ = frame.shape
+        # Detect people using YOLO
+        blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
         net.setInput(blob)
         output_layers_names = net.getUnconnectedOutLayersNames()
         layer_outputs = net.forward(output_layers_names)
         boxes = []
         confidences = []
         for output in layer_outputs:
             for detection in output:
                 scores = detection[5:]
                 class_id = np.argmax(scores)
                 confidence = scores[class_id]
+                # If detected class is 'person'
+                if classes[class_id] == "person" and confidence > 0.5:
                     center_x = int(detection[0] * width)
                     center_y = int(detection[1] * height)
                     w = int(detection[2] * width)
                     h = int(detection[3] * height)
+                    x = int(center_x - w / 2)
+                    y = int(center_y - h / 2)
                     boxes.append([x, y, w, h])
                     confidences.append(float(confidence))
         # Apply non-maximum suppression
         indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
+        detections = []
+        if len(indexes) > 0:
+            for i in indexes.flatten():
+                x, y, w, h = boxes[i]
+                detections.append(([x, y, x + w, y + h], confidences[i]))
+        # Update tracker with detections
+        tracks = tracker.update_tracks(detections, frame=frame)
+        # Track unique IDs
+        for track in tracks:
+            if not track.is_confirmed():
+                continue
+            track_id = track.track_id
+            unique_people.add(track_id)
     cap.release()
     return {
+        "Total Unique People Detected": len(unique_people),
+        "Total Frames Processed": frame_count,
     }
+# Gradio Interface
+description = """
+Upload a video, and the app will count the total number of unique people detected in the video using YOLO and DeepSORT.
+"""
 interface = gr.Interface(
+    fn=count_unique_people,
     inputs=gr.Video(label="Upload Video"),
+    outputs=gr.JSON(label="Unique People Count"),
+    title="Unique People Counter",
+    description=description,
 )
+if __name__ == "__main__":
+    interface.launch(server_name="0.0.0.0", server_port=7860)