Spaces:

vu0018
/

my-pose-estimation-space

Sleeping

App Files Files Community

vu0018 commited on Sep 26

Commit

015f0f2

verified ·

1 Parent(s): ce5aa37

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -99

app.py CHANGED Viewed

@@ -1,117 +1,86 @@
 import cv2
 import mediapipe as mp
-import numpy as np
-import gradio as gr
 import tempfile
-import shutil
 import os
 # Initialize MediaPipe Pose
 mp_pose = mp.solutions.pose
-mp_drawing = mp.solutions.drawing_utils
-pose = mp_pose.Pose(static_image_mode=False,
-                    min_detection_confidence=0.5,
-                    min_tracking_confidence=0.5)
-def calculate_angle(a, b, c):
-    a = np.array(a)
-    b = np.array(b)
-    c = np.array(c)
-    ba = a - b
-    bc = c - b
-    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
-    angle = np.arccos(np.clip(cosine_angle, -1.0, 1.0))
-    return np.degrees(angle)
-def detect_pose_video(video_path, max_duration=10):  # <- 10 seconds limit
-    try:
-        if not os.path.exists(video_path):
-            return None, "Error: Video file does not exist."
-        # Copy video to temp file
-        temp_input = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
-        temp_input.close()
-        shutil.copy(video_path, temp_input.name)
-        cap = cv2.VideoCapture(temp_input.name)
         if not cap.isOpened():
-            return None, "Error: Cannot open video file."
-        fps = cap.get(cv2.CAP_PROP_FPS) or 20.0
-        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 640)
-        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 480)
-        max_frames = int(fps * max_duration)
-        temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
-        temp_output.close()
-        out_path = temp_output.name
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
-        frame_count = 0
-        while frame_count < max_frames:
-            ret, frame = cap.read()
-            if not ret:
-                break
-            # Resize if too large
-            max_dim = 640
-            h, w, _ = frame.shape
-            if max(h, w) > max_dim:
-                scale = max_dim / max(h, w)
-                frame = cv2.resize(frame, (int(w*scale), int(h*scale)))
-            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            results = pose.process(frame_rgb)
-            if results.pose_landmarks:
-                mp_drawing.draw_landmarks(
-                    frame,
-                    results.pose_landmarks,
-                    mp_pose.POSE_CONNECTIONS,
-                    mp_drawing.DrawingSpec(color=(0,255,0), thickness=2, circle_radius=2),
-                    mp_drawing.DrawingSpec(color=(0,0,255), thickness=2)
-                )
-                # Left elbow angle
-                landmarks = results.pose_landmarks.landmark
-                shoulder = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x * frame.shape[1],
-                            landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y * frame.shape[0]]
-                elbow = [landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].x * frame.shape[1],
-                         landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].y * frame.shape[0]]
-                wrist = [landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].x * frame.shape[1],
-                         landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].y * frame.shape[0]]
-                angle = calculate_angle(shoulder, elbow, wrist)
-                cv2.putText(frame, f"Left Elbow: {int(angle)} deg", (20,40),
-                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
-            out.write(frame)
-            frame_count += 1
-        cap.release()
-        out.release()
-        os.remove(temp_input.name)  # Clean up input
-        return out_path, "Video processed successfully."
-    except Exception as e:
-        return None, f"Runtime Error: {str(e)}"
-# Gradio interface with button enable fix
-with gr.Blocks() as demo:
-    gr.Markdown("## Human Pose Estimation on Video\nUpload a video (max 10 seconds). Submit is enabled only after upload.")
-    video_input = gr.Video(label="Upload Video")
-    status_output = gr.Textbox(label="Status")
-    video_output = gr.Video(label="Annotated Video")
-    submit_btn = gr.Button("Submit", interactive=False)
-    # Enable submit button correctly
-    def enable_submit(video):
-        return gr.Button.update(interactive=True) if video else gr.Button.update(interactive=False)
-    video_input.change(enable_submit, inputs=[video_input], outputs=[submit_btn])
-    submit_btn.click(detect_pose_video, inputs=[video_input], outputs=[video_output, status_output])
-demo.launch()

+import gradio as gr
 import cv2
 import mediapipe as mp
 import tempfile
 import os
 # Initialize MediaPipe Pose
 mp_pose = mp.solutions.pose
+def detect_pose(video_file):
+    """
+    This function takes an uploaded video file, limits it to 10 seconds,
+    applies human pose estimation using MediaPipe, and returns a new video
+    with the detected poses drawn on the frames.
+    """
+    try:
+        # Save uploaded video to a temporary file
+        temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+        temp_video.write(open(video_file, "rb").read())
+        temp_video.close()
+        # Open video using OpenCV
+        cap = cv2.VideoCapture(temp_video.name)
         if not cap.isOpened():
+            return "Error: Could not open video file."
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        duration = total_frames / fps
+        # Limit processing to max 10 seconds
+        max_frames = int(min(duration, 10) * fps)
+        output_frames = []
+        # Initialize MediaPipe Pose for pose detection
+        with mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5) as pose:
+            frame_count = 0
+            while frame_count < max_frames:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                # Convert frame to RGB for MediaPipe
+                image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                results = pose.process(image_rgb)
+                # Draw pose landmarks if detected
+                if results.pose_landmarks:
+                    mp.solutions.drawing_utils.draw_landmarks(
+                        frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS
+                    )
+                output_frames.append(frame)
+                frame_count += 1
+        cap.release()
+        # Check if any frames were processed
+        if len(output_frames) == 0:
+            return "Error: No frames to process."
+        # Save output video
+        output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
+        height, width, _ = output_frames[0].shape
+        out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
+        for f in output_frames:
+            out.write(f)
+        out.release()
+        return output_file
+    except Exception as e:
+        # Catch any exceptions and return error message
+        return f"Error during processing: {str(e)}"
+# Gradio interface
+iface = gr.Interface(
+    fn=detect_pose,
+    inputs=gr.Video(label="Upload a Video (max 10s)"),
+    outputs=gr.Video(label="Pose Detection Output"),
+    title="Human Pose Estimation",
+    description="Upload a short video, and this app will detect human poses (max 10 seconds)."
+)
+iface.launch()