File size: 2,972 Bytes
506c307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
object_detector.py
Runs YOLOv8 object detection on extracted frames.
YOLOv8 nano weights (~6 MB) are downloaded automatically on first run.
On Hugging Face the model is cached in ~/.cache/ultralytics.
"""

import cv2
import os
from ultralytics import YOLO

MODEL_NAME = "yolov8n.pt"   # nano = fastest, smallest download; swap to yolov8s.pt for better accuracy


def load_detector(model_name=MODEL_NAME):
    """
    Load (or download) the YOLOv8 model.
    Returns a YOLO model object.
    """
    model = YOLO(model_name)
    return model


def detect_objects_in_frames(
    model,
    frame_arrays,
    frame_paths,
    confidence_threshold=0.4,
    output_dir="/tmp/annotated",
):
    """
    Run YOLOv8 detection on each frame, save annotated images, return results.

    Args:
        model:                YOLO model
        frame_arrays:         list of NumPy BGR arrays
        frame_paths:          list of source file paths (used for naming only)
        confidence_threshold: float — skip detections below this score
        output_dir:           str — where to save annotated frames (use /tmp on HF)

    Returns:
        list of dicts:
            {
              "frame_index":    int,
              "frame_path":     str,
              "annotated_path": str,
              "objects": [
                  {"class": str, "confidence": float, "bbox": [x1,y1,x2,y2]}
              ]
            }
    """
    os.makedirs(output_dir, exist_ok=True)
    all_detections = []

    for i, (frame, path) in enumerate(zip(frame_arrays, frame_paths)):
        results = model(frame, verbose=False)[0]

        frame_objects = []
        for box in results.boxes:
            conf = float(box.conf[0])
            if conf < confidence_threshold:
                continue
            cls_id   = int(box.cls[0])
            cls_name = model.names[cls_id]
            bbox     = [round(v, 1) for v in box.xyxy[0].tolist()]
            frame_objects.append({
                "class":      cls_name,
                "confidence": round(conf, 3),
                "bbox":       bbox,
            })

        # Save annotated image (bounding boxes drawn by ultralytics)
        annotated  = results.plot()
        out_path   = os.path.join(output_dir, f"annotated_{i:04d}.jpg")
        cv2.imwrite(out_path, annotated)

        all_detections.append({
            "frame_index":    i,
            "frame_path":     path,
            "annotated_path": out_path,
            "objects":        frame_objects,
        })

    return all_detections


def summarize_detections(all_detections):
    """
    Aggregate object counts across all frames.

    Returns:
        dict sorted by frequency: {"person": 14, "car": 6, ...}
    """
    counts = {}
    for frame in all_detections:
        for obj in frame["objects"]:
            cls = obj["class"]
            counts[cls] = counts.get(cls, 0) + 1
    return dict(sorted(counts.items(), key=lambda x: x[1], reverse=True))