Spaces:

seesaw112233
/

pose-estimation

Sleeping

App Files Files Community

seesaw112233 commited on Dec 25, 2025

Commit

391a38b

verified ·

1 Parent(s): 17d3dc7

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -85

app.py CHANGED Viewed

@@ -1,92 +1,130 @@
-import gradio as gr
 import cv2
 import mediapipe as mp
-import tempfile
-import os
-def process_video(input_video):
-    """处理视频"""
-    if input_video is None:
         return None
-    print("📹 开始处理...")
-    try:
-        # 初始化 MediaPipe
-        mp_pose = mp.solutions.pose
-        mp_drawing = mp.solutions.drawing_utils
-        pose = mp_pose.Pose(
-            static_image_mode=False,
-            model_complexity=1,
-            min_detection_confidence=0.5,
-            min_tracking_confidence=0.5
         )
-        # 打开视频
-        cap = cv2.VideoCapture(input_video)
-        if not cap.isOpened():
-            return None
-        # 获取视频信息
-        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = cap.get(cv2.CAP_PROP_FPS)
-        print(f"视频: {width}x{height} @ {fps}fps")
-        # 创建输出
-        output_path = tempfile.mktemp(suffix='.mp4')
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-        frame_count = 0
-        # 处理每一帧
-        while cap.isOpened():
-            ret, frame = cap.read()
-            if not ret:
-                break
-            # 转 RGB
-            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            # 检测姿态
-            results = pose.process(rgb)
-            # 绘制骨架
-            if results.pose_landmarks:
-                mp_drawing.draw_landmarks(
-                    frame,
-                    results.pose_landmarks,
-                    mp_pose.POSE_CONNECTIONS,
-                    mp_drawing.DrawingSpec(color=(0,255,0), thickness=2, circle_radius=2),
-                    mp_drawing.DrawingSpec(color=(0,0,255), thickness=2)
-                )
-            out.write(frame)
-            frame_count += 1
-        cap.release()
-        out.release()
-        pose.close()
-        print(f"✅ 完成! {frame_count} 帧")
-        return output_path
-    except Exception as e:
-        print(f"❌ 错误: {e}")
-        return None
-# 创建界面
-demo = gr.Interface(
-    fn=process_video,
-    inputs=gr.Video(),
-    outputs=gr.Video(),
-    title="👶 Baby Pose Estimation",
-    description="上传视频,自动识别姿态"
-)
-demo.launch(server_name="0.0.0.0", server_port=7860)

+import os
 import cv2
+import numpy as np
+import pandas as pd
+import gradio as gr
 import mediapipe as mp
+mp_pose = mp.solutions.pose
+mp_drawing = mp.solutions.drawing_utils
+def _ensure_rgb(img: np.ndarray) -> np.ndarray:
+    # Gradio Image returns RGB np.uint8
+    if img is None:
         return None
+    if img.dtype != np.uint8:
+        img = np.clip(img, 0, 255).astype(np.uint8)
+    if img.ndim == 2:
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+    return img
+def estimate_pose(image: np.ndarray, model_complexity: int, min_det: float, min_track: float):
+    """
+    Returns:
+      - annotated_image (RGB)
+      - keypoints dataframe
+    """
+    image = _ensure_rgb(image)
+    if image is None:
+        return None, pd.DataFrame()
+    # MediaPipe expects RGB, but drawing is easier in BGR sometimes; we'll keep RGB and convert when needed.
+    rgb = image.copy()
+    with mp_pose.Pose(
+        static_image_mode=True,
+        model_complexity=model_complexity,
+        enable_segmentation=False,
+        min_detection_confidence=float(min_det),
+        min_tracking_confidence=float(min_track),
+    ) as pose:
+        results = pose.process(rgb)
+    annotated = rgb.copy()
+    rows = []
+    if results.pose_landmarks:
+        # Draw landmarks
+        annotated_bgr = cv2.cvtColor(annotated, cv2.COLOR_RGB2BGR)
+        mp_drawing.draw_landmarks(
+            annotated_bgr,
+            results.pose_landmarks,
+            mp_pose.POSE_CONNECTIONS,
+            landmark_drawing_spec=mp_drawing.DrawingSpec(thickness=2, circle_radius=2),
+            connection_drawing_spec=mp_drawing.DrawingSpec(thickness=2),
         )
+        annotated = cv2.cvtColor(annotated_bgr, cv2.COLOR_BGR2RGB)
+        # Collect keypoints
+        for i, lm in enumerate(results.pose_landmarks.landmark):
+            rows.append(
+                {
+                    "id": i,
+                    "name": mp_pose.PoseLandmark(i).name,
+                    "x": float(lm.x),
+                    "y": float(lm.y),
+                    "z": float(lm.z),
+                    "visibility": float(lm.visibility),
+                }
+            )
+    df = pd.DataFrame(rows)
+    return annotated, df
+def build_demo():
+    with gr.Blocks(title="Pose Estimation") as demo:
+        gr.Markdown(
+            "## 🕺 Pose Estimation (MediaPipe)\n"
+            "上传一张图片 → 输出骨架标注图 + 关键点表格。\n\n"
+            "如果你之前遇到 `TypeError: argument of type 'bool' is not iterable`，这是 Gradio 4.x 的一个坑，"
+            "本 Space 已升级到 Gradio 5.x 来避免。"
+        )
+        with gr.Row():
+            inp = gr.Image(label="Input Image", type="numpy")
+            out_img = gr.Image(label="Annotated Output", type="numpy")
+        with gr.Row():
+            model_complexity = gr.Radio(
+                choices=[0, 1, 2],
+                value=1,
+                label="Model Complexity (0=light, 2=accurate)",
+            )
+            min_det = gr.Slider(0.1, 0.99, value=0.5, step=0.01, label="Min Detection Confidence")
+            min_track = gr.Slider(0.1, 0.99, value=0.5, step=0.01, label="Min Tracking Confidence")
+        out_df = gr.Dataframe(
+            label="Keypoints (normalized coords)",
+            headers=["id", "name", "x", "y", "z", "visibility"],
+            interactive=False,
+            wrap=True,
+        )
+        run_btn = gr.Button("Run Pose Estimation", variant="primary")
+        run_btn.click(
+            fn=estimate_pose,
+            inputs=[inp, model_complexity, min_det, min_track],
+            outputs=[out_img, out_df],
+        )
+        gr.Markdown(
+            "### Notes\n"
+            "- `x/y/z` 是相对坐标（0~1），相对于输入图像宽高。\n"
+            "- 这是 CPU 友好版本，适合 Hugging Face Spaces。"
+        )
+    return demo
+demo = build_demo()
+if __name__ == "__main__":
+    # Hugging Face Spaces 通常不需要 share=True
+    # 如果你环境仍然报 localhost 不可访问，可把 share=True 打开兜底
+    share = os.getenv("GRADIO_SHARE", "0") == "1"
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=share)