Spaces:

AmirAziz1221
/

video-analysis-system

Sleeping

File size: 2,972 Bytes

506c307

"""
object_detector.py
Runs YOLOv8 object detection on extracted frames.
YOLOv8 nano weights (~6 MB) are downloaded automatically on first run.
On Hugging Face the model is cached in ~/.cache/ultralytics.
"""

import cv2
import os
from ultralytics import YOLO

MODEL_NAME = "yolov8n.pt"   # nano = fastest, smallest download; swap to yolov8s.pt for better accuracy


def load_detector(model_name=MODEL_NAME):
    """
    Load (or download) the YOLOv8 model.
    Returns a YOLO model object.
    """
    model = YOLO(model_name)
    return model


def detect_objects_in_frames(
    model,
    frame_arrays,
    frame_paths,
    confidence_threshold=0.4,
    output_dir="/tmp/annotated",
):
    """
    Run YOLOv8 detection on each frame, save annotated images, return results.

    Args:
        model:                YOLO model
        frame_arrays:         list of NumPy BGR arrays
        frame_paths:          list of source file paths (used for naming only)
        confidence_threshold: float — skip detections below this score
        output_dir:           str — where to save annotated frames (use /tmp on HF)

    Returns:
        list of dicts:
            {
              "frame_index":    int,
              "frame_path":     str,
              "annotated_path": str,
              "objects": [
                  {"class": str, "confidence": float, "bbox": [x1,y1,x2,y2]}
              ]
            }
    """
    os.makedirs(output_dir, exist_ok=True)
    all_detections = []

    for i, (frame, path) in enumerate(zip(frame_arrays, frame_paths)):
        results = model(frame, verbose=False)[0]

        frame_objects = []
        for box in results.boxes:
            conf = float(box.conf[0])
            if conf < confidence_threshold:
                continue
            cls_id   = int(box.cls[0])
            cls_name = model.names[cls_id]
            bbox     = [round(v, 1) for v in box.xyxy[0].tolist()]
            frame_objects.append({
                "class":      cls_name,
                "confidence": round(conf, 3),
                "bbox":       bbox,
            })

        # Save annotated image (bounding boxes drawn by ultralytics)
        annotated  = results.plot()
        out_path   = os.path.join(output_dir, f"annotated_{i:04d}.jpg")
        cv2.imwrite(out_path, annotated)

        all_detections.append({
            "frame_index":    i,
            "frame_path":     path,
            "annotated_path": out_path,
            "objects":        frame_objects,
        })

    return all_detections


def summarize_detections(all_detections):
    """
    Aggregate object counts across all frames.

    Returns:
        dict sorted by frequency: {"person": 14, "car": 6, ...}
    """
    counts = {}
    for frame in all_detections:
        for obj in frame["objects"]:
            cls = obj["class"]
            counts[cls] = counts.get(cls, 0) + 1
    return dict(sorted(counts.items(), key=lambda x: x[1], reverse=True))