Spaces:

chenemii
/

Par-ity_Project

Paused

App Files Files Community

chenemii commited on Jun 15, 2025

Commit

0723bf0

1 Parent(s): 3301758

frame analysis

Browse files

Files changed (4) hide show

app/models/pose_estimator.py +51 -101
app/models/swing_analyzer.py +77 -122
app/utils/comparison.py +22 -8
app/utils/video_processor.py +9 -31

app/models/pose_estimator.py CHANGED Viewed

@@ -7,12 +7,8 @@ import numpy as np
 import mediapipe as mp
 from tqdm import tqdm
 class PoseEstimator:
-    """MediaPipe-based pose estimator for golf swing analysis"""
     def __init__(self):
-        """Initialize the pose estimator"""
         self.mp_pose = mp.solutions.pose
         self.pose = self.mp_pose.Pose(static_image_mode=False,
                                       model_complexity=1,
@@ -21,40 +17,26 @@ class PoseEstimator:
                                       min_tracking_confidence=0.5)
     def process_frame(self, frame):
-        """
-        Process a single frame and extract pose landmarks
-        Args:
-            frame (numpy.ndarray): Input frame
-        Returns:
-            list: List of keypoints [x, y, visibility] or None if not detected
-        """
-        # Convert BGR to RGB
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        # Process the frame
         results = self.pose.process(frame_rgb)
-        if not results.pose_landmarks:
-            return None
-        # Extract keypoints
         keypoints = []
-        for landmark in results.pose_landmarks.landmark:
-            # Convert normalized coordinates to pixel coordinates
-            h, w, _ = frame.shape
-            x, y = int(landmark.x * w), int(landmark.y * h)
-            visibility = landmark.visibility
-            keypoints.append([x, y, visibility])
         return keypoints
     def close(self):
-        """Release resources"""
         self.pose.close()
 def analyze_pose(frames):
     """
     Analyze pose in video frames
@@ -70,87 +52,55 @@ def analyze_pose(frames):
     for i, frame in enumerate(tqdm(frames, desc="Analyzing pose")):
         keypoints = pose_estimator.process_frame(frame)
-        if keypoints:
-            pose_data[i] = keypoints
     pose_estimator.close()
     return pose_data
 def calculate_joint_angles(keypoints):
     """
-    Calculate joint angles from pose keypoints
     Args:
-        keypoints (list): List of keypoints [x, y, visibility]
     Returns:
-        dict: Dictionary of joint angles in degrees
     """
-    # Define joint connections for angle calculation
-    joint_connections = {
-        "right_shoulder": [
-            mp.solutions.pose.PoseLandmark.RIGHT_ELBOW.value,
-            mp.solutions.pose.PoseLandmark.RIGHT_SHOULDER.value,
-            mp.solutions.pose.PoseLandmark.RIGHT_HIP.value
-        ],
-        "left_shoulder": [
-            mp.solutions.pose.PoseLandmark.LEFT_ELBOW.value,
-            mp.solutions.pose.PoseLandmark.LEFT_SHOULDER.value,
-            mp.solutions.pose.PoseLandmark.LEFT_HIP.value
-        ],
-        "right_elbow": [
-            mp.solutions.pose.PoseLandmark.RIGHT_WRIST.value,
-            mp.solutions.pose.PoseLandmark.RIGHT_ELBOW.value,
-            mp.solutions.pose.PoseLandmark.RIGHT_SHOULDER.value
-        ],
-        "left_elbow": [
-            mp.solutions.pose.PoseLandmark.LEFT_WRIST.value,
-            mp.solutions.pose.PoseLandmark.LEFT_ELBOW.value,
-            mp.solutions.pose.PoseLandmark.LEFT_SHOULDER.value
-        ],
-        "right_hip": [
-            mp.solutions.pose.PoseLandmark.RIGHT_KNEE.value,
-            mp.solutions.pose.PoseLandmark.RIGHT_HIP.value,
-            mp.solutions.pose.PoseLandmark.RIGHT_SHOULDER.value
-        ],
-        "left_hip": [
-            mp.solutions.pose.PoseLandmark.LEFT_KNEE.value,
-            mp.solutions.pose.PoseLandmark.LEFT_HIP.value,
-            mp.solutions.pose.PoseLandmark.LEFT_SHOULDER.value
-        ],
-        "right_knee": [
-            mp.solutions.pose.PoseLandmark.RIGHT_ANKLE.value,
-            mp.solutions.pose.PoseLandmark.RIGHT_KNEE.value,
-            mp.solutions.pose.PoseLandmark.RIGHT_HIP.value
-        ],
-        "left_knee": [
-            mp.solutions.pose.PoseLandmark.LEFT_ANKLE.value,
-            mp.solutions.pose.PoseLandmark.LEFT_KNEE.value,
-            mp.solutions.pose.PoseLandmark.LEFT_HIP.value
-        ]
-    }
     angles = {}
-    for joint_name, landmarks in joint_connections.items():
-        # Get the three points that form the angle
-        if all(landmarks[i] < len(keypoints) for i in range(3)):
-            p1 = np.array(keypoints[landmarks[0]][:2])
-            p2 = np.array(keypoints[landmarks[1]][:2])
-            p3 = np.array(keypoints[landmarks[2]][:2])
-            # Calculate vectors
-            v1 = p1 - p2
-            v2 = p3 - p2
-            # Calculate angle
-            cosine_angle = np.dot(
-                v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
-            angle = np.arccos(np.clip(cosine_angle, -1.0, 1.0))
-            angle_degrees = np.degrees(angle)
-            angles[joint_name] = angle_degrees
-    return angles

 import mediapipe as mp
 from tqdm import tqdm
 class PoseEstimator:
     def __init__(self):
         self.mp_pose = mp.solutions.pose
         self.pose = self.mp_pose.Pose(static_image_mode=False,
                                       model_complexity=1,
                                       min_tracking_confidence=0.5)
     def process_frame(self, frame):
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         results = self.pose.process(frame_rgb)
         keypoints = []
+        h, w, _ = frame.shape
+        if results.pose_landmarks:
+            for landmark in results.pose_landmarks.landmark:
+                x, y = int(landmark.x * w), int(landmark.y * h)
+                visibility = landmark.visibility
+                keypoints.append([x, y, visibility])
+        else:
+            center_x, center_y = w // 2, h // 2
+            for _ in range(33):
+                keypoints.append([center_x, center_y, 0.0])
         return keypoints
     def close(self):
         self.pose.close()
 def analyze_pose(frames):
     """
     Analyze pose in video frames
     for i, frame in enumerate(tqdm(frames, desc="Analyzing pose")):
         keypoints = pose_estimator.process_frame(frame)
+        # Store all frames, even if no pose is detected
+        pose_data[i] = keypoints if keypoints is not None else []
     pose_estimator.close()
     return pose_data
 def calculate_joint_angles(keypoints):
     """
+    Calculate joint angles from keypoints.
     Args:
+        keypoints: List of [x, y, visibility] for each landmark
     Returns:
+        Dictionary of joint angles
     """
+    if not keypoints or len(keypoints) < 33:  # MediaPipe Pose has 33 landmarks
+        return {}
     angles = {}
+    # Right shoulder angle (landmarks 11, 13, 15)
+    if all(keypoints[i][2] > 0.5 for i in [11, 13, 15]):
+        shoulder = np.array(keypoints[11][:2])
+        elbow = np.array(keypoints[13][:2])
+        wrist = np.array(keypoints[15][:2])
+        v1 = shoulder - elbow
+        v2 = wrist - elbow
+        angle = np.degrees(np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))))
+        angles["right_shoulder"] = angle
+    # Right elbow angle (landmarks 13, 15, 17)
+    if all(keypoints[i][2] > 0.5 for i in [13, 15, 17]):
+        upper_arm = np.array(keypoints[13][:2])
+        elbow = np.array(keypoints[15][:2])
+        wrist = np.array(keypoints[17][:2])
+        v1 = upper_arm - elbow
+        v2 = wrist - elbow
+        angle = np.degrees(np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))))
+        angles["right_elbow"] = angle
+    # Right wrist angle (landmarks 15, 17, 19)
+    if all(keypoints[i][2] > 0.5 for i in [15, 17, 19]):
+        elbow = np.array(keypoints[15][:2])
+        wrist = np.array(keypoints[17][:2])
+        hand = np.array(keypoints[19][:2])
+        v1 = elbow - wrist
+        v2 = hand - wrist
+        angle = np.degrees(np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))))
+        angles["right_wrist"] = angle
+    return angles

app/models/swing_analyzer.py CHANGED Viewed

@@ -3,187 +3,142 @@ Swing analysis module for golf swing segmentation and trajectory analysis
 """
 import numpy as np
-import cv2
 from app.models.pose_estimator import calculate_joint_angles
-def segment_swing(pose_data, detections, sample_rate=5):
-    """
-    Segment the golf swing into key phases
-    Args:
-        pose_data (dict): Dictionary mapping frame indices to pose keypoints
-        detections (list): List of Detection objects
-        sample_rate (int): The frame sampling rate used during processing
-    Returns:
-        dict: Dictionary mapping phase names to lists of frame indices
-    """
-    # Initialize swing phases
-    swing_phases = {
-        "setup": [],
-        "backswing": [],
-        "downswing": [],
-        "impact": [],
-        "follow_through": []
-    }
-    # Get frame indices with pose data
     frame_indices = sorted(pose_data.keys())
     if not frame_indices:
         return swing_phases
-    # Auto-adjust sample rate based on number of frames
-    # For short videos (less than 150 frames), don't skip any frames
-    if len(frame_indices) < 150 and sample_rate > 1:
-        # Get the max frame idx to understand video length
-        max_frame_idx = max(frame_indices) if frame_indices else 0
-        # For videos with less than 150 frames, use sample_rate=1
-        if max_frame_idx < 150:
-            sample_rate = 1
-    # Calculate joint angles for each frame
     angles_by_frame = {}
     for idx in frame_indices:
         keypoints = pose_data[idx]
         angles = calculate_joint_angles(keypoints)
         angles_by_frame[idx] = angles
-    # Analyze shoulder rotation to identify swing phases
-    # This is a simplified approach - a more sophisticated algorithm would be needed for production
-    # Find the frame with the maximum right shoulder angle (top of backswing)
-    max_shoulder_angle = -1
-    top_backswing_frame = frame_indices[0]
     for idx in frame_indices:
-        angles = angles_by_frame[idx]
-        if "right_shoulder" in angles and angles[
-                "right_shoulder"] > max_shoulder_angle:
-            max_shoulder_angle = angles["right_shoulder"]
             top_backswing_frame = idx
-    # Find impact frame (when club meets ball)
-    # In a real implementation, this would use club and ball detection
     impact_frame = None
-    person_positions = {}
-    # Extract person positions from detections
-    for detection in detections:
-        if detection.class_name == "person":
-            frame_idx = detection.frame_idx // sample_rate  # Convert to processed frame index
-            if frame_idx in frame_indices:
-                person_positions[frame_idx] = detection.bbox
-    # Find the frame with the most forward position (impact)
-    if person_positions:
-        min_x = float('inf')
-        for idx, bbox in person_positions.items():
-            if idx > top_backswing_frame and bbox[0] < min_x:
-                min_x = bbox[0]
                 impact_frame = idx
-    # If impact frame not found, estimate it as 2/3 between top of backswing and end
     if impact_frame is None:
-        impact_frame = frame_indices[0] + int(
-            (frame_indices[-1] - top_backswing_frame) * 2 / 3)
-    # Assign frames to phases
     for idx in frame_indices:
-        if idx < frame_indices[len(frame_indices) // 5]:
-            # First 20% of frames are setup
             swing_phases["setup"].append(idx)
-        elif idx < top_backswing_frame:
-            # Frames before top of backswing are backswing
             swing_phases["backswing"].append(idx)
         elif idx < impact_frame:
-            # Frames between top of backswing and impact are downswing
             swing_phases["downswing"].append(idx)
-        elif idx < impact_frame + 5:
-            # Frames around impact
             swing_phases["impact"].append(idx)
         else:
-            # Remaining frames are follow-through
             swing_phases["follow_through"].append(idx)
     return swing_phases
-def analyze_trajectory(frames, detections, swing_phases, sample_rate=5):
-    """
-    Analyze club and ball trajectory and speed
-    Args:
-        frames (list): List of video frames
-        detections (list): List of Detection objects
-        swing_phases (dict): Dictionary mapping phase names to lists of frame indices
-        sample_rate (int): The frame sampling rate used during processing
-    Returns:
-        dict: Dictionary mapping frame indices to trajectory data
-    """
     trajectory_data = {}
-    # Auto-adjust sample rate based on number of frames
-    # For short videos (less than 150 frames), don't skip any frames
-    if len(frames) < 150 and sample_rate > 1:
         sample_rate = 1
-    # Extract ball detections
     ball_detections = [d for d in detections if d.class_name == "sports ball"]
-    # Get impact frame index
     impact_frames = swing_phases.get("impact", [])
     if not impact_frames:
         return trajectory_data
     impact_frame_idx = impact_frames[len(impact_frames) // 2]
-    # Track ball trajectory after impact
     ball_trajectory = []
     ball_positions = {}
     for detection in ball_detections:
-        frame_idx = detection.frame_idx // sample_rate  # Convert to processed frame index
         if frame_idx >= impact_frame_idx:
-            # Calculate ball center
             x1, y1, x2, y2 = detection.bbox
             center_x = (x1 + x2) / 2
             center_y = (y1 + y2) / 2
             ball_positions[frame_idx] = (center_x, center_y)
-    # Sort ball positions by frame index
     sorted_frames = sorted(ball_positions.keys())
     for idx in sorted_frames:
         ball_trajectory.append(ball_positions[idx])
-    # Estimate club speed at impact
-    # In a real implementation, this would use more sophisticated tracking
     club_speed = None
-    if len(swing_phases.get("downswing", [])) >= 2:
-        # Simplified club speed calculation
-        # In reality, this would require tracking the club head specifically
-        downswing_frames = swing_phases["downswing"]
-        # Account for sample rate when calculating time difference
-        actual_frames_elapsed = (downswing_frames[-1] - downswing_frames[0]) * sample_rate
-        time_diff = actual_frames_elapsed / 30  # Assuming 30 fps
         if time_diff > 0:
-            # Simplified speed calculation (just an example)
-            club_speed = 100 * (1 / time_diff)  # Arbitrary scaling
-    # Populate trajectory data
-    for idx in sorted(swing_phases.keys()):
-        frames_in_phase = swing_phases[idx]
         for frame_idx in frames_in_phase:
             trajectory_data[frame_idx] = {
-                "phase":
-                idx,
-                "club_speed":
-                club_speed if idx == "impact" else None,
-                "ball_trajectory":
-                ball_trajectory
-                if idx == "impact" or idx == "follow_through" else None
             }
-    return trajectory_data

 """
 import numpy as np
 from app.models.pose_estimator import calculate_joint_angles
+def segment_swing(pose_data, detections, sample_rate=1):
+    swing_phases = {"setup": [], "backswing": [], "downswing": [], "impact": [], "follow_through": []}
     frame_indices = sorted(pose_data.keys())
     if not frame_indices:
         return swing_phases
     angles_by_frame = {}
     for idx in frame_indices:
         keypoints = pose_data[idx]
         angles = calculate_joint_angles(keypoints)
         angles_by_frame[idx] = angles
+    setup_end = frame_indices[0]
+    initial_angles = angles_by_frame[frame_indices[0]]
+    initial_shoulder = initial_angles.get("right_shoulder")
+    initial_wrist = initial_angles.get("right_elbow")
+    for idx in frame_indices[1:]:
+        angles = angles_by_frame[idx]
+        shoulder = angles.get("right_shoulder")
+        wrist = angles.get("right_elbow")
+        if shoulder and initial_shoulder and abs(shoulder - initial_shoulder) > 10:
+            setup_end = idx
+            break
+        if wrist and initial_wrist and abs(wrist - initial_wrist) > 10:
+            setup_end = idx
+            break
+    max_shoulder_angle = -1
+    top_backswing_frame = setup_end
     for idx in frame_indices:
+        if idx < setup_end:
+            continue
+        shoulder = angles_by_frame[idx].get("right_shoulder")
+        if shoulder and shoulder > max_shoulder_angle:
+            max_shoulder_angle = shoulder
             top_backswing_frame = idx
+    # Find impact frame by looking for the point where the club head is at its lowest point
+    # during the downswing, before it starts rising in the follow-through
     impact_frame = None
+    min_wrist_y = float('inf')
+    prev_wrist_y = None
+    wrist_velocities = []
+    # First pass: collect wrist positions and calculate velocities
+    wrist_positions = []
+    for idx in frame_indices:
+        if idx < top_backswing_frame:
+            continue
+        keypoints = pose_data[idx]
+        if len(keypoints) > 16:
+            wrist_y = keypoints[16][1]
+            wrist_positions.append((idx, wrist_y))
+    # Calculate velocities between consecutive frames
+    for i in range(1, len(wrist_positions)):
+        idx, wrist_y = wrist_positions[i]
+        prev_idx, prev_y = wrist_positions[i-1]
+        velocity = (wrist_y - prev_y) / (idx - prev_idx)
+        wrist_velocities.append((idx, velocity))
+    # Find impact as the point where velocity changes from negative (downward) to positive (upward)
+    for i in range(1, len(wrist_velocities)):
+        idx, velocity = wrist_velocities[i]
+        prev_idx, prev_velocity = wrist_velocities[i-1]
+        if prev_velocity < 0 and velocity > 0:  # Velocity changes from negative to positive
+            impact_frame = prev_idx
+            break
+    # If no clear impact point found, use the frame with minimum wrist Y position
+    if impact_frame is None:
+        for idx, wrist_y in wrist_positions:
+            if wrist_y < min_wrist_y:
+                min_wrist_y = wrist_y
                 impact_frame = idx
     if impact_frame is None:
+        impact_frame = frame_indices[-1]
     for idx in frame_indices:
+        if idx <= setup_end:
             swing_phases["setup"].append(idx)
+        elif idx <= top_backswing_frame:
             swing_phases["backswing"].append(idx)
         elif idx < impact_frame:
             swing_phases["downswing"].append(idx)
+        elif idx == impact_frame:
             swing_phases["impact"].append(idx)
         else:
             swing_phases["follow_through"].append(idx)
     return swing_phases
+def analyze_trajectory(frames, detections, swing_phases, sample_rate=1):
     trajectory_data = {}
+    if len(frames) < 150:
         sample_rate = 1
     ball_detections = [d for d in detections if d.class_name == "sports ball"]
     impact_frames = swing_phases.get("impact", [])
     if not impact_frames:
         return trajectory_data
     impact_frame_idx = impact_frames[len(impact_frames) // 2]
     ball_trajectory = []
     ball_positions = {}
     for detection in ball_detections:
+        frame_idx = detection.frame_idx
         if frame_idx >= impact_frame_idx:
             x1, y1, x2, y2 = detection.bbox
             center_x = (x1 + x2) / 2
             center_y = (y1 + y2) / 2
             ball_positions[frame_idx] = (center_x, center_y)
     sorted_frames = sorted(ball_positions.keys())
     for idx in sorted_frames:
         ball_trajectory.append(ball_positions[idx])
     club_speed = None
+    downswing_frames = swing_phases.get("downswing", [])
+    if len(downswing_frames) >= 2:
+        actual_frames_elapsed = (downswing_frames[-1] - downswing_frames[0])
+        time_diff = actual_frames_elapsed / 30
         if time_diff > 0:
+            club_speed = 100 * (1 / time_diff)
+    for phase_name, frames_in_phase in swing_phases.items():
         for frame_idx in frames_in_phase:
             trajectory_data[frame_idx] = {
+                "phase": phase_name,
+                "club_speed": club_speed if phase_name == "impact" else None,
+                "ball_trajectory": ball_trajectory if phase_name in ["impact", "follow_through"] else None
             }
+    return trajectory_data

app/utils/comparison.py CHANGED Viewed

@@ -123,11 +123,16 @@ def extract_frames(video_path, max_frames=100):
 def extract_key_swing_frames(video_path, swing_phases=None):
     """
     Extract 3 key frames from a golf swing video:
-    1. Starting position (setup)
-    2. Top of backswing
-    3. Impact with ball
-    Simplified version that uses basic OpenCV and handles rotation properly.
     """
     if not os.path.exists(video_path):
         raise ValueError(f"Video file not found: {video_path}")
@@ -164,12 +169,21 @@ def extract_key_swing_frames(video_path, swing_phases=None):
         key_frames = {}
-        # Determine frame indices
         if swing_phases:
-            setup_idx = 0  # Always start from beginning
-            backswing_idx = swing_phases.get('backswing', [total_frames//3])[-1] if swing_phases.get('backswing') else total_frames//3
-            impact_idx = swing_phases.get('impact', [total_frames//2])[len(swing_phases.get('impact', [total_frames//2]))//2] if swing_phases.get('impact') else total_frames//2
         else:
             setup_idx = 0
             backswing_idx = total_frames // 3
             impact_idx = int(total_frames * 0.6)

 def extract_key_swing_frames(video_path, swing_phases=None):
     """
     Extract 3 key frames from a golf swing video:
+    1. First setup frame
+    2. Last backswing frame (top of backswing)
+    3. First impact frame
+    Args:
+        video_path (str): Path to the video file
+        swing_phases (dict): Dictionary mapping phase names to lists of frame indices
+    Returns:
+        dict: Dictionary mapping phase names to frames
     """
     if not os.path.exists(video_path):
         raise ValueError(f"Video file not found: {video_path}")
         key_frames = {}
+        # Determine frame indices based on swing phases
         if swing_phases:
+            # Get first setup frame
+            setup_frames = swing_phases.get('setup', [])
+            setup_idx = setup_frames[0] if setup_frames else 0
+            # Get last backswing frame (top of backswing)
+            backswing_frames = swing_phases.get('backswing', [])
+            backswing_idx = backswing_frames[-1] if backswing_frames else total_frames//3
+            # Get first impact frame
+            impact_frames = swing_phases.get('impact', [])
+            impact_idx = impact_frames[0] if impact_frames else total_frames//2
         else:
+            # Fallback to default indices if no swing phases provided
             setup_idx = 0
             backswing_idx = total_frames // 3
             impact_idx = int(total_frames * 0.6)

app/utils/video_processor.py CHANGED Viewed

@@ -32,40 +32,28 @@ def process_video(video_path, sample_rate=5):
             - frames: List of processed frames
             - detections: List of Detection objects
     """
-    # Load YOLOv8 model
     model = YOLO("yolov8n.pt")
-    # Custom class names for golf-specific objects
     class_names = model.names
-    # Open video file
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         raise ValueError("Error opening video file")
-    # Get video properties
     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    fps = cap.get(cv2.CAP_PROP_FPS)
-    # Auto-adjust sample rate based on video length
-    # For short videos (less than 150 frames), don't skip any frames
-    if frame_count < 150 and sample_rate > 1:
         print(f"Short video detected ({frame_count} frames). Processing all frames.")
         sample_rate = 1
     frames = []
     detections = []
-    # Process frames
-    for frame_idx in tqdm(range(0, frame_count, sample_rate),
-                          desc="Processing frames"):
-        # Set frame position
         cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
-        # Read frame
         ret, frame = cap.read()
         if not ret:
-            break
         # Store original frame
         frames.append(frame)
@@ -77,21 +65,11 @@ def process_video(video_path, sample_rate=5):
         for result in results:
             boxes = result.boxes
             for box in boxes:
-                # Get detection information
                 class_id = int(box.cls.item())
                 class_name = class_names[class_id]
-                # Filter for relevant objects (person, sports ball)
-                if class_name in ["person", "sports ball"]:
-                    bbox = box.xyxy[0].tolist()  # [x1, y1, x2, y2]
-                    confidence = box.conf.item()
-                    # Create Detection object
-                    detection = Detection(frame_idx, class_id, class_name,
-                                          bbox, confidence)
-                    detections.append(detection)
-    # Release video capture
     cap.release()
-    return frames, detections

             - frames: List of processed frames
             - detections: List of Detection objects
     """
     model = YOLO("yolov8n.pt")
     class_names = model.names
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         raise ValueError("Error opening video file")
     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    if frame_count < 150:
         print(f"Short video detected ({frame_count} frames). Processing all frames.")
         sample_rate = 1
     frames = []
     detections = []
+    for frame_idx in tqdm(range(0, frame_count, sample_rate), desc="Processing frames"):
         cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
         ret, frame = cap.read()
         if not ret:
+            print(f"Warning: Could not read frame {frame_idx}")
+            continue
         # Store original frame
         frames.append(frame)
         for result in results:
             boxes = result.boxes
             for box in boxes:
                 class_id = int(box.cls.item())
                 class_name = class_names[class_id]
+                bbox = box.xyxy[0].tolist()
+                confidence = box.conf.item()
+                detections.append(Detection(frame_idx, class_id, class_name, bbox, confidence))
     cap.release()
+    return frames, detections