import gradio as gr import cv2 import mediapipe as mp import tempfile import os # Initialize MediaPipe Pose mp_pose = mp.solutions.pose def detect_pose(video_file): """ This function takes an uploaded video file, limits it to 10 seconds, applies human pose estimation using MediaPipe, and returns a new video with the detected poses drawn on the frames. """ try: # Save uploaded video to a temporary file temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") temp_video.write(open(video_file, "rb").read()) temp_video.close() # Open video using OpenCV cap = cv2.VideoCapture(temp_video.name) if not cap.isOpened(): return "Error: Could not open video file." fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) duration = total_frames / fps # Limit processing to max 10 seconds max_frames = int(min(duration, 10) * fps) output_frames = [] # Initialize MediaPipe Pose for pose detection with mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5) as pose: frame_count = 0 while frame_count < max_frames: ret, frame = cap.read() if not ret: break # Convert frame to RGB for MediaPipe image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) results = pose.process(image_rgb) # Draw pose landmarks if detected if results.pose_landmarks: mp.solutions.drawing_utils.draw_landmarks( frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS ) output_frames.append(frame) frame_count += 1 cap.release() # Check if any frames were processed if len(output_frames) == 0: return "Error: No frames to process." # Save output video output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name height, width, _ = output_frames[0].shape out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) for f in output_frames: out.write(f) out.release() return output_file except Exception as e: # Catch any exceptions and return error message return f"Error during processing: {str(e)}" # Gradio interface iface = gr.Interface( fn=detect_pose, inputs=gr.Video(label="Upload a Video (max 10s)"), outputs=gr.Video(label="Pose Detection Output"), title="Human Pose Estimation", description="Upload a short video, and this app will detect human poses (max 10 seconds)." ) iface.launch()