seesaw112233 commited on
Commit
2841010
·
verified ·
1 Parent(s): 71c1e8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -71
app.py CHANGED
@@ -120,8 +120,8 @@ FACEMESH_CONTOURS = solutions.face_mesh.FACEMESH_CONTOURS
120
  # Pose connections
121
  POSE_CONNECTIONS = solutions.pose.POSE_CONNECTIONS
122
 
123
- def draw_face_landmarks(image, face_landmarks, draw_full_mesh=False):
124
- """Draw face landmarks on image using new API format"""
125
  if face_landmarks is None:
126
  return
127
 
@@ -132,15 +132,16 @@ def draw_face_landmarks(image, face_landmarks, draw_full_mesh=False):
132
  for lm in face_landmarks
133
  ])
134
 
135
- if draw_full_mesh:
136
- mp_drawing.draw_landmarks(
137
- image=image,
138
- landmark_list=face_landmarks_proto,
139
- connections=FACEMESH_TESSELATION,
140
- landmark_drawing_spec=None,
141
- connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
142
- )
143
 
 
144
  mp_drawing.draw_landmarks(
145
  image=image,
146
  landmark_list=face_landmarks_proto,
@@ -210,11 +211,11 @@ def process_video(
210
  min_pose_track_conf: float = 0.5,
211
  ear_threshold: float = 0.21,
212
  blink_min_consec: int = 2,
213
- draw_full_face_mesh: bool = False,
214
  max_frames: int = 0,
215
  ) -> Tuple[str, str, str, str]:
216
  """
217
  Process video using new MediaPipe API with GPU support
 
218
  """
219
  # Download models first
220
  face_model_path, pose_model_path = download_models()
@@ -364,9 +365,9 @@ def process_video(
364
  left_knee_ang = get_angle(pose_px["left_hip"], pose_px["left_knee"], pose_px["left_ankle"])
365
  right_knee_ang = get_angle(pose_px["right_hip"], pose_px["right_knee"], pose_px["right_ankle"])
366
 
367
- # Draw overlays
368
  draw_pose_landmarks(frame_bgr, pose_landmarks)
369
- draw_face_landmarks(frame_bgr, face_landmarks, draw_full_mesh=draw_full_face_mesh)
370
 
371
  # HUD text
372
  hud_lines = [
@@ -458,36 +459,36 @@ def process_video(
458
  with open(out_json, "w", encoding="utf-8") as f:
459
  json.dump(summary, f, ensure_ascii=False, indent=2)
460
 
461
- report_md = f"""# MediaPipe 面部+姿态分析报告 (GPU加速)
462
 
463
- ## 视频信息
464
- - 分辨率: {width} x {height}
465
  - FPS: {fps:.2f}
466
- - 处理帧数: {len(df)}
467
- - 时长: {summary["video"]["duration_s"]:.2f}
468
-
469
- ## 眨眼分析 (EAR)
470
- - 阈值: {ear_threshold}
471
- - 最小连续帧: {blink_min_consec}
472
- - 左眼眨眼: {summary["blink"]["left_blinks"]} ({summary["blink"]["left_blinks_per_min"]:.2f} /分钟)
473
- - 右眼眨眼: {summary["blink"]["right_blinks"]} ({summary["blink"]["right_blinks_per_min"]:.2f} /分钟)
474
- - 左眼EAR: 平均={summary["blink"]["left_ear_stats"]["mean"]} 最小={summary["blink"]["left_ear_stats"]["min"]} 最大={summary["blink"]["left_ear_stats"]["max"]}
475
- - 右眼EAR: 平均={summary["blink"]["right_ear_stats"]["mean"]} 最小={summary["blink"]["right_ear_stats"]["min"]} 最大={summary["blink"]["right_ear_stats"]["max"]}
476
-
477
- ## 肢体运动量 (归一化单位)
478
- > 基于归一化坐标(0~1)计算,适合相对比较和趋势分析
479
- - 累计位移 (数值越大=运动越多):
480
- - 左手腕: {summary["limb_movement"]["total_disp"]["left_wrist"]:.6f}
481
- - 右手腕: {summary["limb_movement"]["total_disp"]["right_wrist"]:.6f}
482
- - 左脚踝: {summary["limb_movement"]["total_disp"]["left_ankle"]:.6f}
483
- - 右脚踝: {summary["limb_movement"]["total_disp"]["right_ankle"]:.6f}
484
-
485
- ## 输出文件
486
- - annotated.mp4: 叠加了姿态和面部mesh的视频
487
- - per_frame_metrics.csv: 逐帧指标
488
- - summary.json: 统计汇总
489
-
490
- **使用GPU加速处理 | 新版Face Landmarker API**
491
  """
492
  with open(out_report, "w", encoding="utf-8") as f:
493
  f.write(report_md)
@@ -506,7 +507,6 @@ def ui_process(
506
  min_pose_track_conf,
507
  ear_threshold,
508
  blink_min_consec,
509
- draw_full_face_mesh,
510
  max_frames
511
  ):
512
  if isinstance(video, dict) and "path" in video:
@@ -523,7 +523,6 @@ def ui_process(
523
  min_pose_track_conf=float(min_pose_track_conf),
524
  ear_threshold=float(ear_threshold),
525
  blink_min_consec=int(blink_min_consec),
526
- draw_full_face_mesh=bool(draw_full_face_mesh),
527
  max_frames=int(max_frames),
528
  )
529
 
@@ -534,51 +533,51 @@ def ui_process(
534
 
535
  except Exception as e:
536
  import traceback
537
- error_msg = f"# 处理视频时出错\n\n```\n{traceback.format_exc()}\n```"
538
  return None, None, None, error_msg
539
 
540
 
541
- demo = gr.Blocks(title="视频姿态+面部分析 (GPU加速)")
542
 
543
  with demo:
544
  gr.Markdown("""
545
- ## 上传视频 → MediaPipe GPU加速姿态+面部mesh追踪 + 眨眼/肢体运动分析
546
 
547
- **特性:**
548
- - ✅ GPU加速处理
549
- - ✅ 新版Face Landmarker API (更精确的面部mesh)
550
- - ✅ 眨眼检测 (EAR算法)
551
- - ✅ 肢体运动量化
552
- - ✅ 关节角度分析
 
553
  """)
554
 
555
  with gr.Row():
556
- video_in = gr.Video(label="上传视频")
557
 
558
- with gr.Accordion("参数设置 (默认值通常就够用)", open=False):
559
- gr.Markdown("### 面部检测参数")
560
- min_face_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="面部检测置信度阈值")
561
- min_face_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="面部追踪置信度阈值")
562
 
563
- gr.Markdown("### 姿态检测参数")
564
- min_pose_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="姿态检测置信度阈值")
565
- min_pose_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="姿态追踪置信度阈值")
566
 
567
- gr.Markdown("### 眨眼检测参数")
568
- ear_threshold = gr.Slider(0.10, 0.35, value=0.21, step=0.01, label="眨眼阈值 (EAR, 越小越严格)")
569
- blink_min_consec = gr.Slider(1, 6, value=2, step=1, label="眨眼最小连续帧数 (抗抖动)")
570
 
571
- gr.Markdown("### 可视化选项")
572
- draw_full_face_mesh = gr.Checkbox(value=False, label="绘制完整面部mesh (更密集,速度较慢)")
573
- max_frames = gr.Number(value=0, precision=0, label="最多处理帧数 (0=全部处理,调试可设300)")
574
 
575
- run_btn = gr.Button("🚀 开始分析 (GPU加速)", variant="primary", size="lg")
576
 
577
  with gr.Row():
578
- video_out = gr.Video(label="输出: 标注后的视频")
579
  with gr.Row():
580
- csv_out = gr.File(label="逐帧指标CSV")
581
- json_out = gr.File(label="汇总JSON")
582
  report_out = gr.Markdown()
583
 
584
  run_btn.click(
@@ -591,7 +590,6 @@ with demo:
591
  min_pose_track_conf,
592
  ear_threshold,
593
  blink_min_consec,
594
- draw_full_face_mesh,
595
  max_frames,
596
  ],
597
  outputs=[video_out, csv_out, json_out, report_out],
 
120
  # Pose connections
121
  POSE_CONNECTIONS = solutions.pose.POSE_CONNECTIONS
122
 
123
+ def draw_face_landmarks(image, face_landmarks):
124
+ """Draw face landmarks on image using new API format - always draw full mesh"""
125
  if face_landmarks is None:
126
  return
127
 
 
132
  for lm in face_landmarks
133
  ])
134
 
135
+ # Always draw full tesselation mesh
136
+ mp_drawing.draw_landmarks(
137
+ image=image,
138
+ landmark_list=face_landmarks_proto,
139
+ connections=FACEMESH_TESSELATION,
140
+ landmark_drawing_spec=None,
141
+ connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
142
+ )
143
 
144
+ # Also draw contours for clarity
145
  mp_drawing.draw_landmarks(
146
  image=image,
147
  landmark_list=face_landmarks_proto,
 
211
  min_pose_track_conf: float = 0.5,
212
  ear_threshold: float = 0.21,
213
  blink_min_consec: int = 2,
 
214
  max_frames: int = 0,
215
  ) -> Tuple[str, str, str, str]:
216
  """
217
  Process video using new MediaPipe API with GPU support
218
+ Face mesh is always drawn (not optional)
219
  """
220
  # Download models first
221
  face_model_path, pose_model_path = download_models()
 
365
  left_knee_ang = get_angle(pose_px["left_hip"], pose_px["left_knee"], pose_px["left_ankle"])
366
  right_knee_ang = get_angle(pose_px["right_hip"], pose_px["right_knee"], pose_px["right_ankle"])
367
 
368
+ # Draw overlays (face mesh is always drawn, not optional)
369
  draw_pose_landmarks(frame_bgr, pose_landmarks)
370
+ draw_face_landmarks(frame_bgr, face_landmarks)
371
 
372
  # HUD text
373
  hud_lines = [
 
459
  with open(out_json, "w", encoding="utf-8") as f:
460
  json.dump(summary, f, ensure_ascii=False, indent=2)
461
 
462
+ report_md = f"""# MediaPipe Face + Pose Analysis Report (GPU Accelerated)
463
 
464
+ ## Video Information
465
+ - Resolution: {width} x {height}
466
  - FPS: {fps:.2f}
467
+ - Frames Processed: {len(df)}
468
+ - Duration: {summary["video"]["duration_s"]:.2f} seconds
469
+
470
+ ## Blink Analysis (EAR)
471
+ - Threshold: {ear_threshold}
472
+ - Minimum Consecutive Frames: {blink_min_consec}
473
+ - Left Eye Blinks: {summary["blink"]["left_blinks"]} ({summary["blink"]["left_blinks_per_min"]:.2f} blinks/min)
474
+ - Right Eye Blinks: {summary["blink"]["right_blinks"]} ({summary["blink"]["right_blinks_per_min"]:.2f} blinks/min)
475
+ - Left Eye EAR: mean={summary["blink"]["left_ear_stats"]["mean"]} min={summary["blink"]["left_ear_stats"]["min"]} max={summary["blink"]["left_ear_stats"]["max"]}
476
+ - Right Eye EAR: mean={summary["blink"]["right_ear_stats"]["mean"]} min={summary["blink"]["right_ear_stats"]["min"]} max={summary["blink"]["right_ear_stats"]["max"]}
477
+
478
+ ## Limb Movement (Normalized Units)
479
+ > Displacement/speed calculated based on normalized coordinates (0~1), suitable for relative comparison and trend analysis
480
+ - Total Displacement (higher = more movement):
481
+ - Left Wrist: {summary["limb_movement"]["total_disp"]["left_wrist"]:.6f}
482
+ - Right Wrist: {summary["limb_movement"]["total_disp"]["right_wrist"]:.6f}
483
+ - Left Ankle: {summary["limb_movement"]["total_disp"]["left_ankle"]:.6f}
484
+ - Right Ankle: {summary["limb_movement"]["total_disp"]["right_ankle"]:.6f}
485
+
486
+ ## Output Files
487
+ - annotated.mp4: Video with pose skeleton and face mesh overlays
488
+ - per_frame_metrics.csv: Frame-by-frame metrics
489
+ - summary.json: Statistical summary
490
+
491
+ **Processed with GPU acceleration | New Face Landmarker API | Full Face Mesh Always Enabled**
492
  """
493
  with open(out_report, "w", encoding="utf-8") as f:
494
  f.write(report_md)
 
507
  min_pose_track_conf,
508
  ear_threshold,
509
  blink_min_consec,
 
510
  max_frames
511
  ):
512
  if isinstance(video, dict) and "path" in video:
 
523
  min_pose_track_conf=float(min_pose_track_conf),
524
  ear_threshold=float(ear_threshold),
525
  blink_min_consec=int(blink_min_consec),
 
526
  max_frames=int(max_frames),
527
  )
528
 
 
533
 
534
  except Exception as e:
535
  import traceback
536
+ error_msg = f"# Error Processing Video\n\n```\n{traceback.format_exc()}\n```"
537
  return None, None, None, error_msg
538
 
539
 
540
+ demo = gr.Blocks(title="Video Pose + Face Analysis (GPU Accelerated)")
541
 
542
  with demo:
543
  gr.Markdown("""
544
+ ## Upload Video → MediaPipe GPU Acceleration Pose + Face Mesh Tracking + Blink/Limb Analysis
545
 
546
+ **Features:**
547
+ - ✅ GPU Accelerated Processing
548
+ - ✅ New Face Landmarker API (more accurate 478-point face mesh)
549
+ - ✅ Full Face Mesh Always Enabled
550
+ - ✅ Blink Detection (EAR Algorithm)
551
+ - ✅ Limb Movement Quantification
552
+ - ✅ Joint Angle Analysis
553
  """)
554
 
555
  with gr.Row():
556
+ video_in = gr.Video(label="Upload Video")
557
 
558
+ with gr.Accordion("Parameters (defaults work well for most cases)", open=False):
559
+ gr.Markdown("### Face Detection Parameters")
560
+ min_face_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Face Detection Confidence Threshold")
561
+ min_face_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Face Tracking Confidence Threshold")
562
 
563
+ gr.Markdown("### Pose Detection Parameters")
564
+ min_pose_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose Detection Confidence Threshold")
565
+ min_pose_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose Tracking Confidence Threshold")
566
 
567
+ gr.Markdown("### Blink Detection Parameters")
568
+ ear_threshold = gr.Slider(0.10, 0.35, value=0.21, step=0.01, label="Blink Threshold (EAR, lower = stricter)")
569
+ blink_min_consec = gr.Slider(1, 6, value=2, step=1, label="Blink Minimum Consecutive Frames (anti-jitter)")
570
 
571
+ gr.Markdown("### Processing Options")
572
+ max_frames = gr.Number(value=0, precision=0, label="Maximum Frames to Process (0 = process all, set to 300 for debugging)")
 
573
 
574
+ run_btn = gr.Button("🚀 Start Analysis (GPU Accelerated)", variant="primary", size="lg")
575
 
576
  with gr.Row():
577
+ video_out = gr.Video(label="Output: Annotated Video")
578
  with gr.Row():
579
+ csv_out = gr.File(label="Per-Frame Metrics CSV")
580
+ json_out = gr.File(label="Summary JSON")
581
  report_out = gr.Markdown()
582
 
583
  run_btn.click(
 
590
  min_pose_track_conf,
591
  ear_threshold,
592
  blink_min_consec,
 
593
  max_frames,
594
  ],
595
  outputs=[video_out, csv_out, json_out, report_out],