video-analysis-system / object_detector.py
AmirAziz1221's picture
Upload 9 files
506c307 verified
"""
object_detector.py
Runs YOLOv8 object detection on extracted frames.
YOLOv8 nano weights (~6 MB) are downloaded automatically on first run.
On Hugging Face the model is cached in ~/.cache/ultralytics.
"""
import cv2
import os
from ultralytics import YOLO
MODEL_NAME = "yolov8n.pt" # nano = fastest, smallest download; swap to yolov8s.pt for better accuracy
def load_detector(model_name=MODEL_NAME):
"""
Load (or download) the YOLOv8 model.
Returns a YOLO model object.
"""
model = YOLO(model_name)
return model
def detect_objects_in_frames(
model,
frame_arrays,
frame_paths,
confidence_threshold=0.4,
output_dir="/tmp/annotated",
):
"""
Run YOLOv8 detection on each frame, save annotated images, return results.
Args:
model: YOLO model
frame_arrays: list of NumPy BGR arrays
frame_paths: list of source file paths (used for naming only)
confidence_threshold: float — skip detections below this score
output_dir: str — where to save annotated frames (use /tmp on HF)
Returns:
list of dicts:
{
"frame_index": int,
"frame_path": str,
"annotated_path": str,
"objects": [
{"class": str, "confidence": float, "bbox": [x1,y1,x2,y2]}
]
}
"""
os.makedirs(output_dir, exist_ok=True)
all_detections = []
for i, (frame, path) in enumerate(zip(frame_arrays, frame_paths)):
results = model(frame, verbose=False)[0]
frame_objects = []
for box in results.boxes:
conf = float(box.conf[0])
if conf < confidence_threshold:
continue
cls_id = int(box.cls[0])
cls_name = model.names[cls_id]
bbox = [round(v, 1) for v in box.xyxy[0].tolist()]
frame_objects.append({
"class": cls_name,
"confidence": round(conf, 3),
"bbox": bbox,
})
# Save annotated image (bounding boxes drawn by ultralytics)
annotated = results.plot()
out_path = os.path.join(output_dir, f"annotated_{i:04d}.jpg")
cv2.imwrite(out_path, annotated)
all_detections.append({
"frame_index": i,
"frame_path": path,
"annotated_path": out_path,
"objects": frame_objects,
})
return all_detections
def summarize_detections(all_detections):
"""
Aggregate object counts across all frames.
Returns:
dict sorted by frequency: {"person": 14, "car": 6, ...}
"""
counts = {}
for frame in all_detections:
for obj in frame["objects"]:
cls = obj["class"]
counts[cls] = counts.get(cls, 0) + 1
return dict(sorted(counts.items(), key=lambda x: x[1], reverse=True))