Spaces:

chenemii
/

Par-ity_Project

Paused

File size: 2,603 Bytes

"""
Video processing and object detection module
"""

import cv2
import platform
import numpy as np
from tqdm import tqdm
from ultralytics import YOLO


class Detection:
    """Class to store detection results"""

    def __init__(self, frame_idx, class_id, class_name, bbox, confidence):
        self.frame_idx = frame_idx
        self.class_id = class_id
        self.class_name = class_name
        self.bbox = bbox  # [x1, y1, x2, y2]
        self.confidence = confidence


def process_video(video_path, sample_rate=1):
    """
    Process video and detect golfer, club, and ball
    
    Args:
        video_path (str): Path to the video file
        sample_rate (int): Process every nth frame (default: 1 for all frames)
        
    Returns:
        tuple: (frames, detections)
            - frames: List of processed frames
            - detections: List of Detection objects
    """
    model = YOLO("yolov8n.pt")
    class_names = model.names

    # On macOS ("Darwin"), the AVFoundation backend is often more reliable.
    # For other systems, FFMPEG is a good choice.
    backend = cv2.CAP_AVFOUNDATION if platform.system() == "Darwin" else cv2.CAP_FFMPEG
    cap = cv2.VideoCapture(video_path, backend)
    
    if not cap.isOpened():
        backend_name = "AVFoundation" if platform.system() == "Darwin" else "FFMPEG"
        print(f"Warning: Could not open video with {backend_name} backend. Trying default.")
        cap = cv2.VideoCapture(video_path) # Fallback to default
        if not cap.isOpened():
            raise ValueError("Error opening video file with any available backend.")

    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Process all frames by default
    frames = []
    detections = []

    for frame_idx in tqdm(range(0, frame_count, sample_rate), desc="Processing frames"):
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()
        if not ret:
            print(f"Warning: Could not read frame {frame_idx}")
            continue

        # Store original frame
        frames.append(frame)

        # Run YOLOv8 detection
        results = model(frame)

        # Process detection results
        for result in results:
            boxes = result.boxes
            for box in boxes:
                class_id = int(box.cls.item())
                class_name = class_names[class_id]
                bbox = box.xyxy[0].tolist()
                confidence = box.conf.item()
                detections.append(Detection(frame_idx, class_id, class_name, bbox, confidence))

    cap.release()
    return frames, detections