Par-ity_Project / app /utils /video_processor.py
chenemii's picture
llm
bf56017
raw
history blame
2.6 kB
"""
Video processing and object detection module
"""
import cv2
import platform
import numpy as np
from tqdm import tqdm
from ultralytics import YOLO
class Detection:
"""Class to store detection results"""
def __init__(self, frame_idx, class_id, class_name, bbox, confidence):
self.frame_idx = frame_idx
self.class_id = class_id
self.class_name = class_name
self.bbox = bbox # [x1, y1, x2, y2]
self.confidence = confidence
def process_video(video_path, sample_rate=1):
"""
Process video and detect golfer, club, and ball
Args:
video_path (str): Path to the video file
sample_rate (int): Process every nth frame (default: 1 for all frames)
Returns:
tuple: (frames, detections)
- frames: List of processed frames
- detections: List of Detection objects
"""
model = YOLO("yolov8n.pt")
class_names = model.names
# On macOS ("Darwin"), the AVFoundation backend is often more reliable.
# For other systems, FFMPEG is a good choice.
backend = cv2.CAP_AVFOUNDATION if platform.system() == "Darwin" else cv2.CAP_FFMPEG
cap = cv2.VideoCapture(video_path, backend)
if not cap.isOpened():
backend_name = "AVFoundation" if platform.system() == "Darwin" else "FFMPEG"
print(f"Warning: Could not open video with {backend_name} backend. Trying default.")
cap = cv2.VideoCapture(video_path) # Fallback to default
if not cap.isOpened():
raise ValueError("Error opening video file with any available backend.")
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Process all frames by default
frames = []
detections = []
for frame_idx in tqdm(range(0, frame_count, sample_rate), desc="Processing frames"):
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
ret, frame = cap.read()
if not ret:
print(f"Warning: Could not read frame {frame_idx}")
continue
# Store original frame
frames.append(frame)
# Run YOLOv8 detection
results = model(frame)
# Process detection results
for result in results:
boxes = result.boxes
for box in boxes:
class_id = int(box.cls.item())
class_name = class_names[class_id]
bbox = box.xyxy[0].tolist()
confidence = box.conf.item()
detections.append(Detection(frame_idx, class_id, class_name, bbox, confidence))
cap.release()
return frames, detections