Spaces:

seesaw112233
/

pose-estimation

Sleeping

App Files Files Community

seesaw112233 commited on Dec 25, 2025

Commit

2841010

verified ·

1 Parent(s): 71c1e8e

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -71

app.py CHANGED Viewed

@@ -120,8 +120,8 @@ FACEMESH_CONTOURS = solutions.face_mesh.FACEMESH_CONTOURS
 # Pose connections
 POSE_CONNECTIONS = solutions.pose.POSE_CONNECTIONS
-def draw_face_landmarks(image, face_landmarks, draw_full_mesh=False):
-    """Draw face landmarks on image using new API format"""
     if face_landmarks is None:
         return
@@ -132,15 +132,16 @@ def draw_face_landmarks(image, face_landmarks, draw_full_mesh=False):
         for lm in face_landmarks
     ])
-    if draw_full_mesh:
-        mp_drawing.draw_landmarks(
-            image=image,
-            landmark_list=face_landmarks_proto,
-            connections=FACEMESH_TESSELATION,
-            landmark_drawing_spec=None,
-            connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
-        )
     mp_drawing.draw_landmarks(
         image=image,
         landmark_list=face_landmarks_proto,
@@ -210,11 +211,11 @@ def process_video(
     min_pose_track_conf: float = 0.5,
     ear_threshold: float = 0.21,
     blink_min_consec: int = 2,
-    draw_full_face_mesh: bool = False,
     max_frames: int = 0,
 ) -> Tuple[str, str, str, str]:
     """
     Process video using new MediaPipe API with GPU support
     """
     # Download models first
     face_model_path, pose_model_path = download_models()
@@ -364,9 +365,9 @@ def process_video(
             left_knee_ang = get_angle(pose_px["left_hip"], pose_px["left_knee"], pose_px["left_ankle"])
             right_knee_ang = get_angle(pose_px["right_hip"], pose_px["right_knee"], pose_px["right_ankle"])
-            # Draw overlays
             draw_pose_landmarks(frame_bgr, pose_landmarks)
-            draw_face_landmarks(frame_bgr, face_landmarks, draw_full_mesh=draw_full_face_mesh)
             # HUD text
             hud_lines = [
@@ -458,36 +459,36 @@ def process_video(
     with open(out_json, "w", encoding="utf-8") as f:
         json.dump(summary, f, ensure_ascii=False, indent=2)
-    report_md = f"""# MediaPipe 面部+姿态分析报告 (GPU加速)
-## 视频信息
-- 分辨率: {width} x {height}
 - FPS: {fps:.2f}
-- 处理帧数: {len(df)}
-- 时长: {summary["video"]["duration_s"]:.2f}秒
-## 眨眼分析 (EAR)
-- 阈值: {ear_threshold}
-- 最小连续帧: {blink_min_consec}
-- 左眼眨眼: {summary["blink"]["left_blinks"]}次 ({summary["blink"]["left_blinks_per_min"]:.2f} 次/分钟)
-- 右眼眨眼: {summary["blink"]["right_blinks"]}次 ({summary["blink"]["right_blinks_per_min"]:.2f} 次/分钟)
-- 左眼EAR: 平均={summary["blink"]["left_ear_stats"]["mean"]}  最小={summary["blink"]["left_ear_stats"]["min"]}  最大={summary["blink"]["left_ear_stats"]["max"]}
-- 右眼EAR: 平均={summary["blink"]["right_ear_stats"]["mean"]} 最小={summary["blink"]["right_ear_stats"]["min"]} 最大={summary["blink"]["right_ear_stats"]["max"]}
-## 肢体运动量 (归一化单位)
-> 基于归一化坐标(0~1)计算，适合相对比较和趋势分析
-- 累计位移 (数值越大=运动越多):
-  - 左手腕: {summary["limb_movement"]["total_disp"]["left_wrist"]:.6f}
-  - 右手腕: {summary["limb_movement"]["total_disp"]["right_wrist"]:.6f}
-  - 左脚踝: {summary["limb_movement"]["total_disp"]["left_ankle"]:.6f}
-  - 右脚踝: {summary["limb_movement"]["total_disp"]["right_ankle"]:.6f}
-## 输出文件
-- annotated.mp4: 叠加了姿态和面部mesh的视频
-- per_frame_metrics.csv: 逐帧指标
-- summary.json: 统计汇总
-**使用GPU加速处理 | 新版Face Landmarker API**
 """
     with open(out_report, "w", encoding="utf-8") as f:
         f.write(report_md)
@@ -506,7 +507,6 @@ def ui_process(
     min_pose_track_conf,
     ear_threshold,
     blink_min_consec,
-    draw_full_face_mesh,
     max_frames
 ):
     if isinstance(video, dict) and "path" in video:
@@ -523,7 +523,6 @@ def ui_process(
             min_pose_track_conf=float(min_pose_track_conf),
             ear_threshold=float(ear_threshold),
             blink_min_consec=int(blink_min_consec),
-            draw_full_face_mesh=bool(draw_full_face_mesh),
             max_frames=int(max_frames),
         )
@@ -534,51 +533,51 @@ def ui_process(
     except Exception as e:
         import traceback
-        error_msg = f"# 处理视频时出错\n\n```\n{traceback.format_exc()}\n```"
         return None, None, None, error_msg
-demo = gr.Blocks(title="视频姿态+面部分析 (GPU加速)")
 with demo:
     gr.Markdown("""
-    ## 上传视频 → MediaPipe GPU加速 → 姿态+面部mesh追踪 + 眨眼/肢体运动分析
-    **特性:**
-    - ✅ GPU加速处理
-    - ✅ 新版Face Landmarker API (更精确的面部mesh)
-    - ✅ 眨眼检测 (EAR算法)
-    - ✅ 肢体运动量化
-    - ✅ 关节角度分析
     """)
     with gr.Row():
-        video_in = gr.Video(label="上传视频")
-    with gr.Accordion("参数设置 (默认值通常就够用)", open=False):
-        gr.Markdown("### 面部检测参数")
-        min_face_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="面部检测置信度阈值")
-        min_face_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="面部追踪置信度阈值")
-        gr.Markdown("### 姿态检测参数")
-        min_pose_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="姿态检测置信度阈值")
-        min_pose_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="姿态追踪置信度阈值")
-        gr.Markdown("### 眨眼检测参数")
-        ear_threshold = gr.Slider(0.10, 0.35, value=0.21, step=0.01, label="眨眼阈值 (EAR, 越小越严格)")
-        blink_min_consec = gr.Slider(1, 6, value=2, step=1, label="眨眼最小连续帧数 (抗抖动)")
-        gr.Markdown("### 可视化选项")
-        draw_full_face_mesh = gr.Checkbox(value=False, label="绘制完整面部mesh (更密集，速度较慢)")
-        max_frames = gr.Number(value=0, precision=0, label="最多处理帧数 (0=全部处理，调试可设300)")
-    run_btn = gr.Button("🚀 开始分析 (GPU加速)", variant="primary", size="lg")
     with gr.Row():
-        video_out = gr.Video(label="输出: 标注后的视频")
     with gr.Row():
-        csv_out = gr.File(label="逐帧指标CSV")
-        json_out = gr.File(label="汇总JSON")
     report_out = gr.Markdown()
     run_btn.click(
@@ -591,7 +590,6 @@ with demo:
             min_pose_track_conf,
             ear_threshold,
             blink_min_consec,
-            draw_full_face_mesh,
             max_frames,
         ],
         outputs=[video_out, csv_out, json_out, report_out],

 # Pose connections
 POSE_CONNECTIONS = solutions.pose.POSE_CONNECTIONS
+def draw_face_landmarks(image, face_landmarks):
+    """Draw face landmarks on image using new API format - always draw full mesh"""
     if face_landmarks is None:
         return
         for lm in face_landmarks
     ])
+    # Always draw full tesselation mesh
+    mp_drawing.draw_landmarks(
+        image=image,
+        landmark_list=face_landmarks_proto,
+        connections=FACEMESH_TESSELATION,
+        landmark_drawing_spec=None,
+        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
+    )
+    # Also draw contours for clarity
     mp_drawing.draw_landmarks(
         image=image,
         landmark_list=face_landmarks_proto,
     min_pose_track_conf: float = 0.5,
     ear_threshold: float = 0.21,
     blink_min_consec: int = 2,
     max_frames: int = 0,
 ) -> Tuple[str, str, str, str]:
     """
     Process video using new MediaPipe API with GPU support
+    Face mesh is always drawn (not optional)
     """
     # Download models first
     face_model_path, pose_model_path = download_models()
             left_knee_ang = get_angle(pose_px["left_hip"], pose_px["left_knee"], pose_px["left_ankle"])
             right_knee_ang = get_angle(pose_px["right_hip"], pose_px["right_knee"], pose_px["right_ankle"])
+            # Draw overlays (face mesh is always drawn, not optional)
             draw_pose_landmarks(frame_bgr, pose_landmarks)
+            draw_face_landmarks(frame_bgr, face_landmarks)
             # HUD text
             hud_lines = [
     with open(out_json, "w", encoding="utf-8") as f:
         json.dump(summary, f, ensure_ascii=False, indent=2)
+    report_md = f"""# MediaPipe Face + Pose Analysis Report (GPU Accelerated)
+## Video Information
+- Resolution: {width} x {height}
 - FPS: {fps:.2f}
+- Frames Processed: {len(df)}
+- Duration: {summary["video"]["duration_s"]:.2f} seconds
+## Blink Analysis (EAR)
+- Threshold: {ear_threshold}
+- Minimum Consecutive Frames: {blink_min_consec}
+- Left Eye Blinks: {summary["blink"]["left_blinks"]} ({summary["blink"]["left_blinks_per_min"]:.2f} blinks/min)
+- Right Eye Blinks: {summary["blink"]["right_blinks"]} ({summary["blink"]["right_blinks_per_min"]:.2f} blinks/min)
+- Left Eye EAR: mean={summary["blink"]["left_ear_stats"]["mean"]}  min={summary["blink"]["left_ear_stats"]["min"]}  max={summary["blink"]["left_ear_stats"]["max"]}
+- Right Eye EAR: mean={summary["blink"]["right_ear_stats"]["mean"]} min={summary["blink"]["right_ear_stats"]["min"]} max={summary["blink"]["right_ear_stats"]["max"]}
+## Limb Movement (Normalized Units)
+> Displacement/speed calculated based on normalized coordinates (0~1), suitable for relative comparison and trend analysis
+- Total Displacement (higher = more movement):
+  - Left Wrist: {summary["limb_movement"]["total_disp"]["left_wrist"]:.6f}
+  - Right Wrist: {summary["limb_movement"]["total_disp"]["right_wrist"]:.6f}
+  - Left Ankle: {summary["limb_movement"]["total_disp"]["left_ankle"]:.6f}
+  - Right Ankle: {summary["limb_movement"]["total_disp"]["right_ankle"]:.6f}
+## Output Files
+- annotated.mp4: Video with pose skeleton and face mesh overlays
+- per_frame_metrics.csv: Frame-by-frame metrics
+- summary.json: Statistical summary
+**Processed with GPU acceleration | New Face Landmarker API | Full Face Mesh Always Enabled**
 """
     with open(out_report, "w", encoding="utf-8") as f:
         f.write(report_md)
     min_pose_track_conf,
     ear_threshold,
     blink_min_consec,
     max_frames
 ):
     if isinstance(video, dict) and "path" in video:
             min_pose_track_conf=float(min_pose_track_conf),
             ear_threshold=float(ear_threshold),
             blink_min_consec=int(blink_min_consec),
             max_frames=int(max_frames),
         )
     except Exception as e:
         import traceback
+        error_msg = f"# Error Processing Video\n\n```\n{traceback.format_exc()}\n```"
         return None, None, None, error_msg
+demo = gr.Blocks(title="Video Pose + Face Analysis (GPU Accelerated)")
 with demo:
     gr.Markdown("""
+    ## Upload Video → MediaPipe GPU Acceleration → Pose + Face Mesh Tracking + Blink/Limb Analysis
+    **Features:**
+    - ✅ GPU Accelerated Processing
+    - ✅ New Face Landmarker API (more accurate 478-point face mesh)
+    - ✅ Full Face Mesh Always Enabled
+    - ✅ Blink Detection (EAR Algorithm)
+    - ✅ Limb Movement Quantification
+    - ✅ Joint Angle Analysis
     """)
     with gr.Row():
+        video_in = gr.Video(label="Upload Video")
+    with gr.Accordion("Parameters (defaults work well for most cases)", open=False):
+        gr.Markdown("### Face Detection Parameters")
+        min_face_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Face Detection Confidence Threshold")
+        min_face_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Face Tracking Confidence Threshold")
+        gr.Markdown("### Pose Detection Parameters")
+        min_pose_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose Detection Confidence Threshold")
+        min_pose_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose Tracking Confidence Threshold")
+        gr.Markdown("### Blink Detection Parameters")
+        ear_threshold = gr.Slider(0.10, 0.35, value=0.21, step=0.01, label="Blink Threshold (EAR, lower = stricter)")
+        blink_min_consec = gr.Slider(1, 6, value=2, step=1, label="Blink Minimum Consecutive Frames (anti-jitter)")
+        gr.Markdown("### Processing Options")
+        max_frames = gr.Number(value=0, precision=0, label="Maximum Frames to Process (0 = process all, set to 300 for debugging)")
+    run_btn = gr.Button("🚀 Start Analysis (GPU Accelerated)", variant="primary", size="lg")
     with gr.Row():
+        video_out = gr.Video(label="Output: Annotated Video")
     with gr.Row():
+        csv_out = gr.File(label="Per-Frame Metrics CSV")
+        json_out = gr.File(label="Summary JSON")
     report_out = gr.Markdown()
     run_btn.click(
             min_pose_track_conf,
             ear_threshold,
             blink_min_consec,
             max_frames,
         ],
         outputs=[video_out, csv_out, json_out, report_out],