Spaces:
Sleeping
Sleeping
File size: 2,878 Bytes
194244e c74bf35 4f6f188 c74bf35 8cac824 c74bf35 194244e 8cac824 194244e 8cac824 194244e 8cac824 194244e 6734399 194244e c74bf35 8cac824 c74bf35 8cac824 c74bf35 194244e 8cac824 194244e 8cac824 194244e 8cac824 194244e 8cac824 6734399 194244e c74bf35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
import cv2
import mediapipe as mp
import torch
import tempfile
# Load YOLOv5 model from torch hub
yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, trust_repo=True)
yolo_model.conf = 0.4 # confidence threshold
yolo_model.classes = [0] # only detect persons
# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
def detect_pose(video_file):
try:
temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
temp_video.write(open(video_file, "rb").read())
temp_video.close()
cap = cv2.VideoCapture(temp_video.name)
if not cap.isOpened():
return None, "Error: Could not open video."
fps = cap.get(cv2.CAP_PROP_FPS) or 30
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
max_frames = int(min(total_frames / fps, 15) * fps) # limit to 15s
output_frames = []
with mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5) as pose:
for _ in range(max_frames):
ret, frame = cap.read()
if not ret:
break
results = yolo_model(frame)
detections = results.xyxy[0].cpu().numpy()
for det in detections:
x1, y1, x2, y2 = map(int, det[:4])
person_crop = frame[y1:y2, x1:x2]
person_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
pose_result = pose.process(person_rgb)
if pose_result.pose_landmarks:
mp_drawing.draw_landmarks(
person_crop, pose_result.pose_landmarks, mp_pose.POSE_CONNECTIONS
)
# Draw bounding box
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
output_frames.append(frame)
cap.release()
if not output_frames:
return None, "Error: No frames processed."
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
height, width, _ = output_frames[0].shape
out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
for f in output_frames:
out.write(f)
out.release()
return output_file, "Pose detection completed."
except Exception as e:
return None, f"Runtime Error: {str(e)}"
# Gradio Interface
iface = gr.Interface(
fn=detect_pose,
inputs=gr.Video(label="Upload a Video (max 10s)"),
outputs=[gr.Video(label="Pose Detection Output"), gr.Textbox(label="Status")],
title="Multi-Person Pose Detection",
description="Upload a short video (max 15s). The app detects multiple people and estimates their poses."
)
iface.launch()
|