import os import argparse from ultralytics import YOLO import cv2 from pathlib import Path # Keypoint schema (17 body + 6 feet = 23 total) COCO_BODY_17 = [ "nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle" ] FEET_6_LABELS = [ "left_heel", "left_big_toe", "left_little_toe", "right_heel", "right_big_toe", "right_little_toe" ] ALL_KEYPOINTS = COCO_BODY_17 + FEET_6_LABELS def run_inference(frames_dir, labels_dir, model_path, img_width=1920, img_height=1080): """Run YOLO pose inference on all frames and save labels.""" model = YOLO(model_path) os.makedirs(labels_dir, exist_ok=True) frame_files = sorted([f for f in os.listdir(frames_dir) if f.endswith('.jpg')]) print(f"Running inference on {len(frame_files)} frames...") for idx, frame_file in enumerate(frame_files): frame_path = os.path.join(frames_dir, frame_file) results = model(frame_path, verbose=False) label_file = os.path.join(labels_dir, frame_file.replace('.jpg', '.txt')) with open(label_file, 'w') as f: for result in results: if result.keypoints is not None: for kp in result.keypoints.data: kp_np = kp.cpu().numpy() # Get bounding box (approximate from keypoints) valid_kp = kp_np[kp_np[:, 2] > 0] if len(valid_kp) == 0: continue x_min, y_min = valid_kp[:, 0].min(), valid_kp[:, 1].min() x_max, y_max = valid_kp[:, 0].max(), valid_kp[:, 1].max() bbox_x = (x_min + x_max) / 2 / img_width bbox_y = (y_min + y_max) / 2 / img_height bbox_w = (x_max - x_min) / img_width bbox_h = (y_max - y_min) / img_height # Write YOLO format: class_id bbox keypoints line = f"0 {bbox_x:.6f} {bbox_y:.6f} {bbox_w:.6f} {bbox_h:.6f}" for kp_point in kp_np: x_norm = kp_point[0] / img_width y_norm = kp_point[1] / img_height conf = kp_point[2] line += f" {x_norm:.6f} {y_norm:.6f} {conf:.2f}" f.write(line + "\n") if (idx + 1) % 50 == 0: print(f"Processed {idx + 1}/{len(frame_files)} frames") print(f"✅ Inference complete. Labels saved to {labels_dir}") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--frames", required=True, help="Directory containing JPEG frames") parser.add_argument("--labels", required=True, help="Output directory for labels") parser.add_argument("--model", required=True, help="Path to YOLO weights (.pt)") parser.add_argument("--width", type=int, default=1920) parser.add_argument("--height", type=int, default=1080) args = parser.parse_args() run_inference(args.frames, args.labels, args.model, args.width, args.height)