""" object_detector.py Runs YOLOv8 object detection on extracted frames. YOLOv8 nano weights (~6 MB) are downloaded automatically on first run. On Hugging Face the model is cached in ~/.cache/ultralytics. """ import cv2 import os from ultralytics import YOLO MODEL_NAME = "yolov8n.pt" # nano = fastest, smallest download; swap to yolov8s.pt for better accuracy def load_detector(model_name=MODEL_NAME): """ Load (or download) the YOLOv8 model. Returns a YOLO model object. """ model = YOLO(model_name) return model def detect_objects_in_frames( model, frame_arrays, frame_paths, confidence_threshold=0.4, output_dir="/tmp/annotated", ): """ Run YOLOv8 detection on each frame, save annotated images, return results. Args: model: YOLO model frame_arrays: list of NumPy BGR arrays frame_paths: list of source file paths (used for naming only) confidence_threshold: float — skip detections below this score output_dir: str — where to save annotated frames (use /tmp on HF) Returns: list of dicts: { "frame_index": int, "frame_path": str, "annotated_path": str, "objects": [ {"class": str, "confidence": float, "bbox": [x1,y1,x2,y2]} ] } """ os.makedirs(output_dir, exist_ok=True) all_detections = [] for i, (frame, path) in enumerate(zip(frame_arrays, frame_paths)): results = model(frame, verbose=False)[0] frame_objects = [] for box in results.boxes: conf = float(box.conf[0]) if conf < confidence_threshold: continue cls_id = int(box.cls[0]) cls_name = model.names[cls_id] bbox = [round(v, 1) for v in box.xyxy[0].tolist()] frame_objects.append({ "class": cls_name, "confidence": round(conf, 3), "bbox": bbox, }) # Save annotated image (bounding boxes drawn by ultralytics) annotated = results.plot() out_path = os.path.join(output_dir, f"annotated_{i:04d}.jpg") cv2.imwrite(out_path, annotated) all_detections.append({ "frame_index": i, "frame_path": path, "annotated_path": out_path, "objects": frame_objects, }) return all_detections def summarize_detections(all_detections): """ Aggregate object counts across all frames. Returns: dict sorted by frequency: {"person": 14, "car": 6, ...} """ counts = {} for frame in all_detections: for obj in frame["objects"]: cls = obj["class"] counts[cls] = counts.get(cls, 0) + 1 return dict(sorted(counts.items(), key=lambda x: x[1], reverse=True))