Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -120,8 +120,8 @@ FACEMESH_CONTOURS = solutions.face_mesh.FACEMESH_CONTOURS
|
|
| 120 |
# Pose connections
|
| 121 |
POSE_CONNECTIONS = solutions.pose.POSE_CONNECTIONS
|
| 122 |
|
| 123 |
-
def draw_face_landmarks(image, face_landmarks
|
| 124 |
-
"""Draw face landmarks on image using new API format"""
|
| 125 |
if face_landmarks is None:
|
| 126 |
return
|
| 127 |
|
|
@@ -132,15 +132,16 @@ def draw_face_landmarks(image, face_landmarks, draw_full_mesh=False):
|
|
| 132 |
for lm in face_landmarks
|
| 133 |
])
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
|
|
|
|
| 144 |
mp_drawing.draw_landmarks(
|
| 145 |
image=image,
|
| 146 |
landmark_list=face_landmarks_proto,
|
|
@@ -210,11 +211,11 @@ def process_video(
|
|
| 210 |
min_pose_track_conf: float = 0.5,
|
| 211 |
ear_threshold: float = 0.21,
|
| 212 |
blink_min_consec: int = 2,
|
| 213 |
-
draw_full_face_mesh: bool = False,
|
| 214 |
max_frames: int = 0,
|
| 215 |
) -> Tuple[str, str, str, str]:
|
| 216 |
"""
|
| 217 |
Process video using new MediaPipe API with GPU support
|
|
|
|
| 218 |
"""
|
| 219 |
# Download models first
|
| 220 |
face_model_path, pose_model_path = download_models()
|
|
@@ -364,9 +365,9 @@ def process_video(
|
|
| 364 |
left_knee_ang = get_angle(pose_px["left_hip"], pose_px["left_knee"], pose_px["left_ankle"])
|
| 365 |
right_knee_ang = get_angle(pose_px["right_hip"], pose_px["right_knee"], pose_px["right_ankle"])
|
| 366 |
|
| 367 |
-
# Draw overlays
|
| 368 |
draw_pose_landmarks(frame_bgr, pose_landmarks)
|
| 369 |
-
draw_face_landmarks(frame_bgr, face_landmarks
|
| 370 |
|
| 371 |
# HUD text
|
| 372 |
hud_lines = [
|
|
@@ -458,36 +459,36 @@ def process_video(
|
|
| 458 |
with open(out_json, "w", encoding="utf-8") as f:
|
| 459 |
json.dump(summary, f, ensure_ascii=False, indent=2)
|
| 460 |
|
| 461 |
-
report_md = f"""# MediaPipe
|
| 462 |
|
| 463 |
-
##
|
| 464 |
-
-
|
| 465 |
- FPS: {fps:.2f}
|
| 466 |
-
-
|
| 467 |
-
-
|
| 468 |
-
|
| 469 |
-
##
|
| 470 |
-
-
|
| 471 |
-
-
|
| 472 |
-
-
|
| 473 |
-
-
|
| 474 |
-
-
|
| 475 |
-
-
|
| 476 |
-
|
| 477 |
-
##
|
| 478 |
-
>
|
| 479 |
-
-
|
| 480 |
-
-
|
| 481 |
-
-
|
| 482 |
-
-
|
| 483 |
-
-
|
| 484 |
-
|
| 485 |
-
##
|
| 486 |
-
- annotated.mp4:
|
| 487 |
-
- per_frame_metrics.csv:
|
| 488 |
-
- summary.json:
|
| 489 |
-
|
| 490 |
-
**
|
| 491 |
"""
|
| 492 |
with open(out_report, "w", encoding="utf-8") as f:
|
| 493 |
f.write(report_md)
|
|
@@ -506,7 +507,6 @@ def ui_process(
|
|
| 506 |
min_pose_track_conf,
|
| 507 |
ear_threshold,
|
| 508 |
blink_min_consec,
|
| 509 |
-
draw_full_face_mesh,
|
| 510 |
max_frames
|
| 511 |
):
|
| 512 |
if isinstance(video, dict) and "path" in video:
|
|
@@ -523,7 +523,6 @@ def ui_process(
|
|
| 523 |
min_pose_track_conf=float(min_pose_track_conf),
|
| 524 |
ear_threshold=float(ear_threshold),
|
| 525 |
blink_min_consec=int(blink_min_consec),
|
| 526 |
-
draw_full_face_mesh=bool(draw_full_face_mesh),
|
| 527 |
max_frames=int(max_frames),
|
| 528 |
)
|
| 529 |
|
|
@@ -534,51 +533,51 @@ def ui_process(
|
|
| 534 |
|
| 535 |
except Exception as e:
|
| 536 |
import traceback
|
| 537 |
-
error_msg = f"#
|
| 538 |
return None, None, None, error_msg
|
| 539 |
|
| 540 |
|
| 541 |
-
demo = gr.Blocks(title="
|
| 542 |
|
| 543 |
with demo:
|
| 544 |
gr.Markdown("""
|
| 545 |
-
##
|
| 546 |
|
| 547 |
-
**
|
| 548 |
-
- ✅ GPU
|
| 549 |
-
- ✅
|
| 550 |
-
- ✅
|
| 551 |
-
- ✅
|
| 552 |
-
- ✅
|
|
|
|
| 553 |
""")
|
| 554 |
|
| 555 |
with gr.Row():
|
| 556 |
-
video_in = gr.Video(label="
|
| 557 |
|
| 558 |
-
with gr.Accordion("
|
| 559 |
-
gr.Markdown("###
|
| 560 |
-
min_face_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="
|
| 561 |
-
min_face_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="
|
| 562 |
|
| 563 |
-
gr.Markdown("###
|
| 564 |
-
min_pose_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="
|
| 565 |
-
min_pose_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="
|
| 566 |
|
| 567 |
-
gr.Markdown("###
|
| 568 |
-
ear_threshold = gr.Slider(0.10, 0.35, value=0.21, step=0.01, label="
|
| 569 |
-
blink_min_consec = gr.Slider(1, 6, value=2, step=1, label="
|
| 570 |
|
| 571 |
-
gr.Markdown("###
|
| 572 |
-
|
| 573 |
-
max_frames = gr.Number(value=0, precision=0, label="最多处理帧数 (0=全部处理,调试可设300)")
|
| 574 |
|
| 575 |
-
run_btn = gr.Button("🚀
|
| 576 |
|
| 577 |
with gr.Row():
|
| 578 |
-
video_out = gr.Video(label="
|
| 579 |
with gr.Row():
|
| 580 |
-
csv_out = gr.File(label="
|
| 581 |
-
json_out = gr.File(label="
|
| 582 |
report_out = gr.Markdown()
|
| 583 |
|
| 584 |
run_btn.click(
|
|
@@ -591,7 +590,6 @@ with demo:
|
|
| 591 |
min_pose_track_conf,
|
| 592 |
ear_threshold,
|
| 593 |
blink_min_consec,
|
| 594 |
-
draw_full_face_mesh,
|
| 595 |
max_frames,
|
| 596 |
],
|
| 597 |
outputs=[video_out, csv_out, json_out, report_out],
|
|
|
|
| 120 |
# Pose connections
|
| 121 |
POSE_CONNECTIONS = solutions.pose.POSE_CONNECTIONS
|
| 122 |
|
| 123 |
+
def draw_face_landmarks(image, face_landmarks):
|
| 124 |
+
"""Draw face landmarks on image using new API format - always draw full mesh"""
|
| 125 |
if face_landmarks is None:
|
| 126 |
return
|
| 127 |
|
|
|
|
| 132 |
for lm in face_landmarks
|
| 133 |
])
|
| 134 |
|
| 135 |
+
# Always draw full tesselation mesh
|
| 136 |
+
mp_drawing.draw_landmarks(
|
| 137 |
+
image=image,
|
| 138 |
+
landmark_list=face_landmarks_proto,
|
| 139 |
+
connections=FACEMESH_TESSELATION,
|
| 140 |
+
landmark_drawing_spec=None,
|
| 141 |
+
connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
|
| 142 |
+
)
|
| 143 |
|
| 144 |
+
# Also draw contours for clarity
|
| 145 |
mp_drawing.draw_landmarks(
|
| 146 |
image=image,
|
| 147 |
landmark_list=face_landmarks_proto,
|
|
|
|
| 211 |
min_pose_track_conf: float = 0.5,
|
| 212 |
ear_threshold: float = 0.21,
|
| 213 |
blink_min_consec: int = 2,
|
|
|
|
| 214 |
max_frames: int = 0,
|
| 215 |
) -> Tuple[str, str, str, str]:
|
| 216 |
"""
|
| 217 |
Process video using new MediaPipe API with GPU support
|
| 218 |
+
Face mesh is always drawn (not optional)
|
| 219 |
"""
|
| 220 |
# Download models first
|
| 221 |
face_model_path, pose_model_path = download_models()
|
|
|
|
| 365 |
left_knee_ang = get_angle(pose_px["left_hip"], pose_px["left_knee"], pose_px["left_ankle"])
|
| 366 |
right_knee_ang = get_angle(pose_px["right_hip"], pose_px["right_knee"], pose_px["right_ankle"])
|
| 367 |
|
| 368 |
+
# Draw overlays (face mesh is always drawn, not optional)
|
| 369 |
draw_pose_landmarks(frame_bgr, pose_landmarks)
|
| 370 |
+
draw_face_landmarks(frame_bgr, face_landmarks)
|
| 371 |
|
| 372 |
# HUD text
|
| 373 |
hud_lines = [
|
|
|
|
| 459 |
with open(out_json, "w", encoding="utf-8") as f:
|
| 460 |
json.dump(summary, f, ensure_ascii=False, indent=2)
|
| 461 |
|
| 462 |
+
report_md = f"""# MediaPipe Face + Pose Analysis Report (GPU Accelerated)
|
| 463 |
|
| 464 |
+
## Video Information
|
| 465 |
+
- Resolution: {width} x {height}
|
| 466 |
- FPS: {fps:.2f}
|
| 467 |
+
- Frames Processed: {len(df)}
|
| 468 |
+
- Duration: {summary["video"]["duration_s"]:.2f} seconds
|
| 469 |
+
|
| 470 |
+
## Blink Analysis (EAR)
|
| 471 |
+
- Threshold: {ear_threshold}
|
| 472 |
+
- Minimum Consecutive Frames: {blink_min_consec}
|
| 473 |
+
- Left Eye Blinks: {summary["blink"]["left_blinks"]} ({summary["blink"]["left_blinks_per_min"]:.2f} blinks/min)
|
| 474 |
+
- Right Eye Blinks: {summary["blink"]["right_blinks"]} ({summary["blink"]["right_blinks_per_min"]:.2f} blinks/min)
|
| 475 |
+
- Left Eye EAR: mean={summary["blink"]["left_ear_stats"]["mean"]} min={summary["blink"]["left_ear_stats"]["min"]} max={summary["blink"]["left_ear_stats"]["max"]}
|
| 476 |
+
- Right Eye EAR: mean={summary["blink"]["right_ear_stats"]["mean"]} min={summary["blink"]["right_ear_stats"]["min"]} max={summary["blink"]["right_ear_stats"]["max"]}
|
| 477 |
+
|
| 478 |
+
## Limb Movement (Normalized Units)
|
| 479 |
+
> Displacement/speed calculated based on normalized coordinates (0~1), suitable for relative comparison and trend analysis
|
| 480 |
+
- Total Displacement (higher = more movement):
|
| 481 |
+
- Left Wrist: {summary["limb_movement"]["total_disp"]["left_wrist"]:.6f}
|
| 482 |
+
- Right Wrist: {summary["limb_movement"]["total_disp"]["right_wrist"]:.6f}
|
| 483 |
+
- Left Ankle: {summary["limb_movement"]["total_disp"]["left_ankle"]:.6f}
|
| 484 |
+
- Right Ankle: {summary["limb_movement"]["total_disp"]["right_ankle"]:.6f}
|
| 485 |
+
|
| 486 |
+
## Output Files
|
| 487 |
+
- annotated.mp4: Video with pose skeleton and face mesh overlays
|
| 488 |
+
- per_frame_metrics.csv: Frame-by-frame metrics
|
| 489 |
+
- summary.json: Statistical summary
|
| 490 |
+
|
| 491 |
+
**Processed with GPU acceleration | New Face Landmarker API | Full Face Mesh Always Enabled**
|
| 492 |
"""
|
| 493 |
with open(out_report, "w", encoding="utf-8") as f:
|
| 494 |
f.write(report_md)
|
|
|
|
| 507 |
min_pose_track_conf,
|
| 508 |
ear_threshold,
|
| 509 |
blink_min_consec,
|
|
|
|
| 510 |
max_frames
|
| 511 |
):
|
| 512 |
if isinstance(video, dict) and "path" in video:
|
|
|
|
| 523 |
min_pose_track_conf=float(min_pose_track_conf),
|
| 524 |
ear_threshold=float(ear_threshold),
|
| 525 |
blink_min_consec=int(blink_min_consec),
|
|
|
|
| 526 |
max_frames=int(max_frames),
|
| 527 |
)
|
| 528 |
|
|
|
|
| 533 |
|
| 534 |
except Exception as e:
|
| 535 |
import traceback
|
| 536 |
+
error_msg = f"# Error Processing Video\n\n```\n{traceback.format_exc()}\n```"
|
| 537 |
return None, None, None, error_msg
|
| 538 |
|
| 539 |
|
| 540 |
+
demo = gr.Blocks(title="Video Pose + Face Analysis (GPU Accelerated)")
|
| 541 |
|
| 542 |
with demo:
|
| 543 |
gr.Markdown("""
|
| 544 |
+
## Upload Video → MediaPipe GPU Acceleration → Pose + Face Mesh Tracking + Blink/Limb Analysis
|
| 545 |
|
| 546 |
+
**Features:**
|
| 547 |
+
- ✅ GPU Accelerated Processing
|
| 548 |
+
- ✅ New Face Landmarker API (more accurate 478-point face mesh)
|
| 549 |
+
- ✅ Full Face Mesh Always Enabled
|
| 550 |
+
- ✅ Blink Detection (EAR Algorithm)
|
| 551 |
+
- ✅ Limb Movement Quantification
|
| 552 |
+
- ✅ Joint Angle Analysis
|
| 553 |
""")
|
| 554 |
|
| 555 |
with gr.Row():
|
| 556 |
+
video_in = gr.Video(label="Upload Video")
|
| 557 |
|
| 558 |
+
with gr.Accordion("Parameters (defaults work well for most cases)", open=False):
|
| 559 |
+
gr.Markdown("### Face Detection Parameters")
|
| 560 |
+
min_face_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Face Detection Confidence Threshold")
|
| 561 |
+
min_face_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Face Tracking Confidence Threshold")
|
| 562 |
|
| 563 |
+
gr.Markdown("### Pose Detection Parameters")
|
| 564 |
+
min_pose_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose Detection Confidence Threshold")
|
| 565 |
+
min_pose_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose Tracking Confidence Threshold")
|
| 566 |
|
| 567 |
+
gr.Markdown("### Blink Detection Parameters")
|
| 568 |
+
ear_threshold = gr.Slider(0.10, 0.35, value=0.21, step=0.01, label="Blink Threshold (EAR, lower = stricter)")
|
| 569 |
+
blink_min_consec = gr.Slider(1, 6, value=2, step=1, label="Blink Minimum Consecutive Frames (anti-jitter)")
|
| 570 |
|
| 571 |
+
gr.Markdown("### Processing Options")
|
| 572 |
+
max_frames = gr.Number(value=0, precision=0, label="Maximum Frames to Process (0 = process all, set to 300 for debugging)")
|
|
|
|
| 573 |
|
| 574 |
+
run_btn = gr.Button("🚀 Start Analysis (GPU Accelerated)", variant="primary", size="lg")
|
| 575 |
|
| 576 |
with gr.Row():
|
| 577 |
+
video_out = gr.Video(label="Output: Annotated Video")
|
| 578 |
with gr.Row():
|
| 579 |
+
csv_out = gr.File(label="Per-Frame Metrics CSV")
|
| 580 |
+
json_out = gr.File(label="Summary JSON")
|
| 581 |
report_out = gr.Markdown()
|
| 582 |
|
| 583 |
run_btn.click(
|
|
|
|
| 590 |
min_pose_track_conf,
|
| 591 |
ear_threshold,
|
| 592 |
blink_min_consec,
|
|
|
|
| 593 |
max_frames,
|
| 594 |
],
|
| 595 |
outputs=[video_out, csv_out, json_out, report_out],
|