File size: 2,878 Bytes
194244e
 
 
 
 
 
c74bf35
4f6f188
c74bf35
8cac824
c74bf35
194244e
 
8cac824
194244e
8cac824
194244e
 
 
 
 
 
 
 
 
8cac824
194244e
6734399
194244e
 
 
 
 
 
 
 
 
c74bf35
 
 
 
8cac824
c74bf35
 
 
 
 
 
8cac824
c74bf35
 
 
 
 
194244e
 
 
 
 
8cac824
 
194244e
 
 
 
 
 
 
 
8cac824
194244e
 
 
 
 
 
8cac824
194244e
8cac824
 
6734399
194244e
 
c74bf35
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
import cv2
import mediapipe as mp
import torch
import tempfile

# Load YOLOv5 model from torch hub
yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, trust_repo=True)
yolo_model.conf = 0.4  # confidence threshold
yolo_model.classes = [0]  # only detect persons

# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

def detect_pose(video_file):
    try:
        temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
        temp_video.write(open(video_file, "rb").read())
        temp_video.close()

        cap = cv2.VideoCapture(temp_video.name)
        if not cap.isOpened():
            return None, "Error: Could not open video."

        fps = cap.get(cv2.CAP_PROP_FPS) or 30
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        max_frames = int(min(total_frames / fps, 15) * fps)  # limit to 15s

        output_frames = []

        with mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5) as pose:
            for _ in range(max_frames):
                ret, frame = cap.read()
                if not ret:
                    break

                results = yolo_model(frame)
                detections = results.xyxy[0].cpu().numpy()

                for det in detections:
                    x1, y1, x2, y2 = map(int, det[:4])
                    person_crop = frame[y1:y2, x1:x2]

                    person_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
                    pose_result = pose.process(person_rgb)

                    if pose_result.pose_landmarks:
                        mp_drawing.draw_landmarks(
                            person_crop, pose_result.pose_landmarks, mp_pose.POSE_CONNECTIONS
                        )

                    # Draw bounding box
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

                output_frames.append(frame)

        cap.release()

        if not output_frames:
            return None, "Error: No frames processed."

        output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
        height, width, _ = output_frames[0].shape
        out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
        for f in output_frames:
            out.write(f)
        out.release()

        return output_file, "Pose detection completed."

    except Exception as e:
        return None, f"Runtime Error: {str(e)}"

# Gradio Interface
iface = gr.Interface(
    fn=detect_pose,
    inputs=gr.Video(label="Upload a Video (max 10s)"),
    outputs=[gr.Video(label="Pose Detection Output"), gr.Textbox(label="Status")],
    title="Multi-Person Pose Detection",
    description="Upload a short video (max 15s). The app detects multiple people and estimates their poses."
)

iface.launch()