Spaces:

vu0018
/

my-pose-estimation-space

Sleeping

File size: 2,831 Bytes

015f0f2
f7c47ba
 
8a40a91
f09e80a
f7c47ba
956cfce
f7c47ba
a55051b
015f0f2
 
 
 
 
 
 
 
 
 
 
956cfce
015f0f2
 
a55051b
015f0f2
a55051b
015f0f2
 
 
956cfce
015f0f2
 
a55051b
015f0f2
956cfce
015f0f2
 
 
 
 
 
 
956cfce
015f0f2
 
 
956cfce
015f0f2
 
 
 
 
956cfce
015f0f2
 
956cfce
015f0f2

import gradio as gr
import cv2
import mediapipe as mp
import tempfile
import os

# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose

def detect_pose(video_file):
    """
    This function takes an uploaded video file, limits it to 10 seconds,
    applies human pose estimation using MediaPipe, and returns a new video
    with the detected poses drawn on the frames.
    """
    try:
        # Save uploaded video to a temporary file
        temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
        temp_video.write(open(video_file, "rb").read())
        temp_video.close()

        # Open video using OpenCV
        cap = cv2.VideoCapture(temp_video.name)
        if not cap.isOpened():
            return "Error: Could not open video file."

        fps = cap.get(cv2.CAP_PROP_FPS)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        duration = total_frames / fps

        # Limit processing to max 10 seconds
        max_frames = int(min(duration, 10) * fps)

        output_frames = []

        # Initialize MediaPipe Pose for pose detection
        with mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5) as pose:
            frame_count = 0
            while frame_count < max_frames:
                ret, frame = cap.read()
                if not ret:
                    break

                # Convert frame to RGB for MediaPipe
                image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                results = pose.process(image_rgb)

                # Draw pose landmarks if detected
                if results.pose_landmarks:
                    mp.solutions.drawing_utils.draw_landmarks(
                        frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS
                    )

                output_frames.append(frame)
                frame_count += 1

        cap.release()

        # Check if any frames were processed
        if len(output_frames) == 0:
            return "Error: No frames to process."

        # Save output video
        output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
        height, width, _ = output_frames[0].shape
        out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
        for f in output_frames:
            out.write(f)
        out.release()

        return output_file

    except Exception as e:
        # Catch any exceptions and return error message
        return f"Error during processing: {str(e)}"

# Gradio interface
iface = gr.Interface(
    fn=detect_pose,
    inputs=gr.Video(label="Upload a Video (max 10s)"),
    outputs=gr.Video(label="Pose Detection Output"),
    title="Human Pose Estimation",
    description="Upload a short video, and this app will detect human poses (max 10 seconds)."
)

iface.launch()