seesaw112233 commited on
Commit
f4c0656
·
verified ·
1 Parent(s): 9b73e8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +289 -355
app.py CHANGED
@@ -9,15 +9,11 @@ import cv2
9
  import numpy as np
10
  import pandas as pd
11
  import gradio as gr
12
-
13
- # Headless plotting (HF Spaces safe)
14
- import matplotlib
15
- matplotlib.use("Agg")
16
  import matplotlib.pyplot as plt
17
 
18
  import mediapipe as mp
19
- from mediapipe.tasks import python as mp_python
20
- from mediapipe.tasks.python import vision as mp_vision
21
  from mediapipe.framework.formats import landmark_pb2
22
 
23
 
@@ -45,9 +41,6 @@ def eye_aspect_ratio(pts: Dict[int, np.ndarray], idx: List[int]) -> Optional[flo
45
  return _safe_div((A + B), (2.0 * C))
46
 
47
  def angle_3pts(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> Optional[float]:
48
- """
49
- angle at point b in degrees formed by a-b-c
50
- """
51
  ba = a - b
52
  bc = c - b
53
  nba = np.linalg.norm(ba)
@@ -58,15 +51,23 @@ def angle_3pts(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> Optional[float]:
58
  cosang = max(-1.0, min(1.0, cosang))
59
  return float(np.degrees(np.arccos(cosang)))
60
 
 
 
 
 
 
 
 
 
 
61
 
62
  # -------------------------
63
- # MediaPipe indices
64
  # -------------------------
65
  LEFT_EYE_EAR_IDX = [33, 160, 158, 133, 153, 144]
66
  RIGHT_EYE_EAR_IDX = [362, 385, 387, 263, 373, 380]
67
- NEEDED_FACE_IDXS = sorted(set(LEFT_EYE_EAR_IDX + RIGHT_EYE_EAR_IDX))
68
 
69
- # Pose landmark enum mapping (MediaPipe Pose)
70
  POSE = mp.solutions.pose
71
  POSE_LM = POSE.PoseLandmark
72
 
@@ -89,53 +90,43 @@ JOINTS = {
89
 
90
 
91
  # -------------------------
92
- # Drawing (Tasks results)
93
  # -------------------------
94
  mp_drawing = mp.solutions.drawing_utils
95
- mp_drawing_styles = mp.solutions.drawing_styles
96
  mp_face_mesh = mp.solutions.face_mesh
97
 
98
- LIGHT_MESH = mp_drawing.DrawingSpec(color=(245, 245, 245), thickness=1, circle_radius=1)
99
-
100
- def _to_nll_from_tasks_landmarks(tasks_landmarks) -> landmark_pb2.NormalizedLandmarkList:
101
- # tasks_landmarks: list[NormalizedLandmark] (has x,y,z,visibility,presence sometimes)
102
- nll = landmark_pb2.NormalizedLandmarkList(
103
- landmark=[
104
- landmark_pb2.NormalizedLandmark(
105
- x=float(lm.x),
106
- y=float(lm.y),
107
- z=float(getattr(lm, "z", 0.0)),
108
- visibility=float(getattr(lm, "visibility", 0.0)) if hasattr(lm, "visibility") else 0.0,
109
- presence=float(getattr(lm, "presence", 0.0)) if hasattr(lm, "presence") else 0.0,
110
- )
111
- for lm in tasks_landmarks
112
- ]
113
  )
114
- return nll
115
 
116
- def draw_pose_tasks(image_bgr, pose_res):
117
- # pose_res.pose_landmarks: list[list[NormalizedLandmark]]
118
  if not pose_res.pose_landmarks:
119
  return
120
- nll = _to_nll_from_tasks_landmarks(pose_res.pose_landmarks[0])
 
121
  mp_drawing.draw_landmarks(
122
  image=image_bgr,
123
  landmark_list=nll,
124
  connections=POSE.POSE_CONNECTIONS,
125
- landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style(),
 
126
  )
127
 
128
- def draw_face_mesh_light(image_bgr, face_res):
129
- # face_res.face_landmarks: list[list[NormalizedLandmark]]
130
  if not face_res.face_landmarks:
131
  return
132
- nll = _to_nll_from_tasks_landmarks(face_res.face_landmarks[0])
 
 
 
133
  mp_drawing.draw_landmarks(
134
  image=image_bgr,
135
  landmark_list=nll,
136
  connections=mp_face_mesh.FACEMESH_TESSELATION,
137
  landmark_drawing_spec=None,
138
- connection_drawing_spec=LIGHT_MESH,
139
  )
140
 
141
 
@@ -164,69 +155,7 @@ def update_blink(state: BlinkState, ear: Optional[float], thr: float, min_consec
164
 
165
 
166
  # -------------------------
167
- # Task landmarker creation (GPU delegate with fallback)
168
- # -------------------------
169
- def create_pose_landmarker(
170
- model_path: str,
171
- min_pose_det_conf: float,
172
- min_pose_track_conf: float,
173
- use_gpu: bool = True,
174
- ):
175
- BaseOptions = mp_python.BaseOptions
176
- RunningMode = mp_vision.RunningMode
177
-
178
- def _make(delegate):
179
- opts = mp_vision.PoseLandmarkerOptions(
180
- base_options=BaseOptions(model_asset_path=model_path, delegate=delegate),
181
- running_mode=RunningMode.VIDEO,
182
- num_poses=1,
183
- min_pose_detection_confidence=float(min_pose_det_conf),
184
- min_pose_presence_confidence=float(min_pose_det_conf),
185
- min_tracking_confidence=float(min_pose_track_conf),
186
- )
187
- return mp_vision.PoseLandmarker.create_from_options(opts)
188
-
189
- if use_gpu:
190
- try:
191
- return _make(BaseOptions.Delegate.GPU), "GPU"
192
- except Exception:
193
- # Fallback to CPU
194
- return _make(BaseOptions.Delegate.CPU), "CPU(Fallback)"
195
- else:
196
- return _make(BaseOptions.Delegate.CPU), "CPU"
197
-
198
- def create_face_landmarker(
199
- model_path: str,
200
- min_face_det_conf: float,
201
- use_gpu: bool = True,
202
- ):
203
- BaseOptions = mp_python.BaseOptions
204
- RunningMode = mp_vision.RunningMode
205
-
206
- def _make(delegate):
207
- opts = mp_vision.FaceLandmarkerOptions(
208
- base_options=BaseOptions(model_asset_path=model_path, delegate=delegate),
209
- running_mode=RunningMode.VIDEO,
210
- num_faces=1,
211
- min_face_detection_confidence=float(min_face_det_conf),
212
- min_face_presence_confidence=float(min_face_det_conf),
213
- min_tracking_confidence=float(min_face_det_conf),
214
- # NOTE: FaceLandmarker has extra options (output_face_blendshapes, output_facial_transformation_matrixes)
215
- # We keep them off for speed.
216
- )
217
- return mp_vision.FaceLandmarker.create_from_options(opts)
218
-
219
- if use_gpu:
220
- try:
221
- return _make(BaseOptions.Delegate.GPU), "GPU"
222
- except Exception:
223
- return _make(BaseOptions.Delegate.CPU), "CPU(Fallback)"
224
- else:
225
- return _make(BaseOptions.Delegate.CPU), "CPU"
226
-
227
-
228
- # -------------------------
229
- # Core processing
230
  # -------------------------
231
  def process_video(
232
  video_path: str,
@@ -241,20 +170,16 @@ def process_video(
241
  ear_threshold: float = 0.21,
242
  blink_min_consec: int = 2,
243
 
244
- max_frames: int = 0, # 0 => all
 
 
 
 
245
  ) -> Tuple[str, str, str, str, str]:
246
  """
247
  Returns:
248
- annotated_video_path, csv_path, json_path, plot_png_path, report_md_path
249
  """
250
- if not os.path.exists(video_path):
251
- raise RuntimeError("Video path not found.")
252
-
253
- if not os.path.exists(pose_model_path):
254
- raise RuntimeError(f"Pose model not found: {pose_model_path} (请把 .task 模型放到这个路径)")
255
- if not os.path.exists(face_model_path):
256
- raise RuntimeError(f"Face model not found: {face_model_path} (请把 .task 模型放到这个路径)")
257
-
258
  cap = cv2.VideoCapture(video_path)
259
  if not cap.isOpened():
260
  raise RuntimeError("Cannot open video. Please upload a valid video file.")
@@ -262,10 +187,19 @@ def process_video(
262
  fps = cap.get(cv2.CAP_PROP_FPS)
263
  if fps <= 1e-6:
264
  fps = 30.0
265
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
266
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
267
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
268
 
 
 
 
 
 
 
 
 
269
  tmpdir = tempfile.mkdtemp(prefix="mp_tasks_analysis_")
270
  out_video = os.path.join(tmpdir, "annotated.mp4")
271
  out_csv = os.path.join(tmpdir, "per_frame_metrics.csv")
@@ -276,19 +210,44 @@ def process_video(
276
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
277
  writer = cv2.VideoWriter(out_video, fourcc, fps, (width, height))
278
 
279
- # Create task landmarkers
280
- pose_landmarker, pose_device = create_pose_landmarker(
281
- model_path=pose_model_path,
282
- min_pose_det_conf=min_pose_det_conf,
283
- min_pose_track_conf=min_pose_track_conf,
284
- use_gpu=use_gpu_delegate,
285
- )
286
- face_landmarker, face_device = create_face_landmarker(
287
- model_path=face_model_path,
288
- min_face_det_conf=min_face_det_conf,
289
- use_gpu=use_gpu_delegate,
290
- )
291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  rows = []
293
  left_blink = BlinkState()
294
  right_blink = BlinkState()
@@ -301,177 +260,159 @@ def process_video(
301
  eye_area_diff_series = []
302
 
303
  frame_idx = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  try:
305
- while True:
306
- ok, frame_bgr = cap.read()
307
- if not ok:
308
- break
309
- frame_idx += 1
310
- if max_frames and frame_idx > max_frames:
311
- break
312
-
313
- frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
314
- mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)
315
- timestamp_ms = int((frame_idx - 1) * 1000.0 / fps)
316
-
317
- pose_res = pose_landmarker.detect_for_video(mp_image, timestamp_ms)
318
- face_res = face_landmarker.detect_for_video(mp_image, timestamp_ms)
319
-
320
- # ---- Face points (ONLY needed idxs) in pixel coords
321
- face_pts: Dict[int, np.ndarray] = {}
322
- if face_res.face_landmarks:
323
- lms = face_res.face_landmarks[0]
324
- for i in NEEDED_FACE_IDXS:
325
- lm = lms[i]
326
- face_pts[i] = np.array([lm.x * width, lm.y * height], dtype=np.float32)
327
-
328
- # EAR
329
- left_ear = eye_aspect_ratio(face_pts, LEFT_EYE_EAR_IDX)
330
- right_ear = eye_aspect_ratio(face_pts, RIGHT_EYE_EAR_IDX)
331
-
332
- left_blink = update_blink(left_blink, left_ear, ear_threshold, blink_min_consec)
333
- right_blink = update_blink(right_blink, right_ear, ear_threshold, blink_min_consec)
334
-
335
- # Eye area + area diff (pixel^2)
336
- def poly_area(idxs):
337
- pts = [face_pts.get(i) for i in idxs]
338
- if any(p is None for p in pts):
339
- return None
340
- cnt = np.array(pts, dtype=np.float32)
341
- return float(cv2.contourArea(cnt))
342
-
343
- left_eye_area = poly_area(LEFT_EYE_EAR_IDX)
344
- right_eye_area = poly_area(RIGHT_EYE_EAR_IDX)
345
-
346
- def area_diff(cur, key):
347
- prev = prev_eye_area[key]
348
- prev_eye_area[key] = cur
349
- if cur is None:
350
- return None
351
- if prev is None:
352
- return 0.0
353
- return float(abs(cur - prev))
354
-
355
- left_eye_area_diff = area_diff(left_eye_area, "L")
356
- right_eye_area_diff = area_diff(right_eye_area, "R")
357
- eye_area_diff_total = sum(v for v in [left_eye_area_diff, right_eye_area_diff] if v is not None)
358
-
359
- # ---- Pose pixel coords
360
- pose_px: Dict[str, Optional[np.ndarray]] = {}
361
- if pose_res.pose_landmarks:
362
- lms = pose_res.pose_landmarks[0]
363
- for name, idx in JOINTS.items():
364
- lm = lms[idx]
365
- pose_px[name] = np.array([lm.x * width, lm.y * height], dtype=np.float32)
366
- else:
367
- for name in JOINTS:
368
- pose_px[name] = None
369
-
370
- def pixel_disp(key: str):
371
- cur = pose_px.get(key)
372
- if cur is None:
373
- return None
374
- prev = prev_pose_px.get(key)
375
- prev_pose_px[key] = cur
376
- if prev is None:
377
- return 0.0
378
- return float(np.linalg.norm(cur - prev))
379
-
380
- lw_pix = pixel_disp("left_wrist")
381
- rw_pix = pixel_disp("right_wrist")
382
- la_pix = pixel_disp("left_ankle")
383
- ra_pix = pixel_disp("right_ankle")
384
- limbs_pix_total = sum(v for v in [lw_pix, rw_pix, la_pix, ra_pix] if v is not None)
385
-
386
- # Joint angles (pixel coords)
387
- def get_angle(a, b, c):
388
- if a is None or b is None or c is None:
389
- return None
390
- return angle_3pts(a, b, c)
391
-
392
- left_elbow_ang = get_angle(pose_px["left_shoulder"], pose_px["left_elbow"], pose_px["left_wrist"])
393
- right_elbow_ang = get_angle(pose_px["right_shoulder"], pose_px["right_elbow"], pose_px["right_wrist"])
394
- left_knee_ang = get_angle(pose_px["left_hip"], pose_px["left_knee"], pose_px["left_ankle"])
395
- right_knee_ang = get_angle(pose_px["right_hip"], pose_px["right_knee"], pose_px["right_ankle"])
396
-
397
- # ---- Draw overlays
398
- draw_pose_tasks(frame_bgr, pose_res)
399
- draw_face_mesh_light(frame_bgr, face_res)
400
-
401
- # HUD
402
- hud_lines = [
403
- f"frame: {frame_idx}/{total_frames if total_frames>0 else '?'} fps:{fps:.1f}",
404
- f"Pose:{pose_device} Face:{face_device} GPU_req:{use_gpu_delegate}",
405
- f"EAR L:{left_ear:.3f}" if left_ear is not None else "EAR L:None",
406
- f"EAR R:{right_ear:.3f}" if right_ear is not None else "EAR R:None",
407
- f"Blink L:{left_blink.blink_count} R:{right_blink.blink_count}",
408
- f"Limb pix disp(sum): {limbs_pix_total:.2f}" if limbs_pix_total is not None else "Limb pix disp(sum): None",
409
- f"Eye area diff(sum): {eye_area_diff_total:.2f}" if eye_area_diff_total is not None else "Eye area diff(sum): None",
410
- ]
411
- y0 = 24
412
- for line in hud_lines:
413
- cv2.putText(frame_bgr, line, (12, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2)
414
- y0 += 20
415
-
416
- writer.write(frame_bgr)
417
-
418
- t = (frame_idx - 1) / fps
419
- times.append(t)
420
- limb_pix_series.append(float(limbs_pix_total) if limbs_pix_total is not None else 0.0)
421
- eye_area_diff_series.append(float(eye_area_diff_total) if eye_area_diff_total is not None else 0.0)
422
-
423
- rows.append({
424
- "frame": frame_idx,
425
- "time_s": t,
426
-
427
- "left_ear": left_ear,
428
- "right_ear": right_ear,
429
-
430
- "left_eye_area_px2": left_eye_area,
431
- "right_eye_area_px2": right_eye_area,
432
- "left_eye_area_diff_px2": left_eye_area_diff,
433
- "right_eye_area_diff_px2": right_eye_area_diff,
434
- "eye_area_diff_total_px2": eye_area_diff_total,
435
-
436
- "lw_pix_disp": lw_pix,
437
- "rw_pix_disp": rw_pix,
438
- "la_pix_disp": la_pix,
439
- "ra_pix_disp": ra_pix,
440
- "limbs_pix_disp_total": limbs_pix_total,
441
-
442
- "left_elbow_angle": left_elbow_ang,
443
- "right_elbow_angle": right_elbow_ang,
444
- "left_knee_angle": left_knee_ang,
445
- "right_knee_angle": right_knee_ang,
446
- })
447
-
448
- finally:
449
- cap.release()
450
- writer.release()
451
- # Close landmarkers
452
- try:
453
- pose_landmarker.close()
454
- except Exception:
455
- pass
456
- try:
457
- face_landmarker.close()
458
- except Exception:
459
- pass
460
 
461
  df = pd.DataFrame(rows)
462
 
463
- # Plot
464
- plt.figure()
465
- plt.plot(times, limb_pix_series, label="Limb pixel displacement (sum)")
466
- plt.plot(times, eye_area_diff_series, label="Eye area pixel diff (sum, px^2)")
467
- plt.xlabel("Time (s)")
468
- plt.ylabel("Pixel difference")
469
- plt.legend()
470
- plt.tight_layout()
471
- plt.savefig(out_plot, dpi=150)
472
- plt.close()
473
-
474
- # Summaries
475
  def _sum_series(s: pd.Series):
476
  s2 = s.dropna()
477
  if len(s2) == 0:
@@ -485,13 +426,8 @@ def process_video(
485
  "height": int(height),
486
  "frames_processed": int(len(df)),
487
  "duration_s": float(len(df) / fps) if len(df) else 0.0,
488
- },
489
- "runtime": {
490
- "use_gpu_delegate_requested": bool(use_gpu_delegate),
491
- "pose_device": str(pose_device),
492
- "face_device": str(face_device),
493
- "pose_model_path": str(pose_model_path),
494
- "face_model_path": str(face_model_path),
495
  },
496
  "blink": {
497
  "ear_threshold": float(ear_threshold),
@@ -500,48 +436,40 @@ def process_video(
500
  "right_blinks": int(right_blink.blink_count),
501
  "left_blinks_per_min": float(_safe_div(left_blink.blink_count, (len(df)/fps)/60.0)) if len(df) else 0.0,
502
  "right_blinks_per_min": float(_safe_div(right_blink.blink_count, (len(df)/fps)/60.0)) if len(df) else 0.0,
503
- "left_ear_stats": _sum_series(df["left_ear"]) if "left_ear" in df else {"mean": None, "min": None, "max": None},
504
- "right_ear_stats": _sum_series(df["right_ear"]) if "right_ear" in df else {"mean": None, "min": None, "max": None},
505
- "left_eye_area_diff_stats_px2": _sum_series(df["left_eye_area_diff_px2"]) if "left_eye_area_diff_px2" in df else {"mean": None, "min": None, "max": None},
506
- "right_eye_area_diff_stats_px2": _sum_series(df["right_eye_area_diff_px2"]) if "right_eye_area_diff_px2" in df else {"mean": None, "min": None, "max": None},
507
  },
508
- "limb_motion_pixel": {
509
- "total_disp_px": {
510
- "left_wrist": float(df["lw_pix_disp"].fillna(0).sum()) if "lw_pix_disp" in df else 0.0,
511
- "right_wrist": float(df["rw_pix_disp"].fillna(0).sum()) if "rw_pix_disp" in df else 0.0,
512
- "left_ankle": float(df["la_pix_disp"].fillna(0).sum()) if "la_pix_disp" in df else 0.0,
513
- "right_ankle": float(df["ra_pix_disp"].fillna(0).sum()) if "ra_pix_disp" in df else 0.0,
514
- "sum_limbs": float(df["limbs_pix_disp_total"].fillna(0).sum()) if "limbs_pix_disp_total" in df else 0.0,
515
- },
516
- "per_frame_sum_stats_px": _sum_series(df["limbs_pix_disp_total"]) if "limbs_pix_disp_total" in df else {"mean": None, "min": None, "max": None},
517
- "angle_stats_deg": {
518
- "left_elbow": _sum_series(df["left_elbow_angle"]) if "left_elbow_angle" in df else {"mean": None, "min": None, "max": None},
519
- "right_elbow": _sum_series(df["right_elbow_angle"]) if "right_elbow_angle" in df else {"mean": None, "min": None, "max": None},
520
- "left_knee": _sum_series(df["left_knee_angle"]) if "left_knee_angle" in df else {"mean": None, "min": None, "max": None},
521
- "right_knee": _sum_series(df["right_knee_angle"]) if "right_knee_angle" in df else {"mean": None, "min": None, "max": None},
522
- }
523
  }
524
  }
525
 
526
- # Save outputs
527
  df.to_csv(out_csv, index=False)
528
  with open(out_json, "w", encoding="utf-8") as f:
529
  json.dump(summary, f, ensure_ascii=False, indent=2)
530
 
531
- report_md = f"""# MediaPipe Tasks (GPU Delegate) 分析报告
 
 
 
 
 
 
 
 
 
 
 
532
 
533
  ## 视频信息
534
  - 分辨率: {width} x {height}
535
  - FPS: {fps:.2f}
536
  - 处理帧数: {len(df)}
537
  - 时长(秒): {summary["video"]["duration_s"]:.2f}
538
-
539
- ## 运行设备
540
- - 请求 GPU delegate: {use_gpu_delegate}
541
- - Pose 实际设备: {pose_device}
542
- - Face 实际设备: {face_device}
543
-
544
- > 如果这里显示 CPU(Fallback),说明 GPU delegate 初始化失败(例如环境没 GPU 或驱动/依赖不匹配)。
545
 
546
  ## 眨眼分析(EAR)
547
  - 阈值: {ear_threshold}
@@ -551,23 +479,16 @@ def process_video(
551
  - 左眼 EAR: mean={summary["blink"]["left_ear_stats"]["mean"]} min={summary["blink"]["left_ear_stats"]["min"]} max={summary["blink"]["left_ear_stats"]["max"]}
552
  - 右眼 EAR: mean={summary["blink"]["right_ear_stats"]["mean"]} min={summary["blink"]["right_ear_stats"]["min"]} max={summary["blink"]["right_ear_stats"]["max"]}
553
 
554
- ## 眼睛面积变化pixel^2
555
- - 左眼面积变化: mean={summary["blink"]["left_eye_area_diff_stats_px2"]["mean"]} min={summary["blink"]["left_eye_area_diff_stats_px2"]["min"]} max={summary["blink"]["left_eye_area_diff_stats_px2"]["max"]}
556
- - 眼面积变化: mean={summary["blink"]["right_eye_area_diff_stats_px2"]["mean"]} min={summary["blink"]["right_eye_area_diff_stats_px2"]["min"]} max={summary["blink"]["right_eye_area_diff_stats_px2"]["max"]}
557
-
558
- ## 四肢运动像素位移(pixel)
559
- - 累计位移(像素):
560
- - 左手腕: {summary["limb_motion_pixel"]["total_disp_px"]["left_wrist"]:.2f}
561
- - 右手腕: {summary["limb_motion_pixel"]["total_disp_px"]["right_wrist"]:.2f}
562
- - 左脚踝: {summary["limb_motion_pixel"]["total_disp_px"]["left_ankle"]:.2f}
563
- - 右脚踝: {summary["limb_motion_pixel"]["total_disp_px"]["right_ankle"]:.2f}
564
- - 四肢合计: {summary["limb_motion_pixel"]["total_disp_px"]["sum_limbs"]:.2f}
565
 
566
  ## 输出文件
567
- - annotated.mp4:叠加 Pose + 浅色 FaceMesh
568
- - per_frame_metrics.csv:逐帧指标(四肢像素位移眼睛面积变化等
569
  - summary.json:汇总统计
570
- - motion_eye_timeseries.png:时间序列曲线图(横轴时间)
571
  """
572
  with open(out_report, "w", encoding="utf-8") as f:
573
  f.write(report_md)
@@ -588,7 +509,10 @@ def ui_process(
588
  min_face_det_conf,
589
  ear_threshold,
590
  blink_min_consec,
591
- max_frames,
 
 
 
592
  ):
593
  if isinstance(video, dict) and "path" in video:
594
  video_path = video["path"]
@@ -600,11 +524,18 @@ def ui_process(
600
  pose_model_path=str(pose_model_path),
601
  face_model_path=str(face_model_path),
602
  use_gpu_delegate=bool(use_gpu_delegate),
 
603
  min_pose_det_conf=float(min_pose_det_conf),
604
  min_pose_track_conf=float(min_pose_track_conf),
605
  min_face_det_conf=float(min_face_det_conf),
 
606
  ear_threshold=float(ear_threshold),
607
  blink_min_consec=int(blink_min_consec),
 
 
 
 
 
608
  max_frames=int(max_frames),
609
  )
610
 
@@ -614,41 +545,41 @@ def ui_process(
614
  return out_video, out_csv, out_json, out_plot, report_text
615
 
616
 
617
- demo = gr.Blocks(title="Video Pose + FaceLandmarks + Blink/Limb Analytics (GPU Delegate)")
618
 
619
  with demo:
620
- gr.Markdown(
621
- "## 上传视频 → MediaPipe Tasks (Pose+FaceLandmarker) → 四肢像素位移 & 眼睛面积变化(时间序列)\n\n"
622
- "- 需要你把 `.task` 模型放到指定路径(默认:`models/pose_landmarker_lite.task`、`models/face_landmarker.task`)\n"
623
- "- 勾选 GPU delegate 后,若环境不支持会自动 fallback 到 CPU,并在报告里显示。"
624
- )
625
 
626
  with gr.Row():
627
  video_in = gr.Video(label="上传视频", sources=["upload"])
628
 
629
- with gr.Accordion("模型与参数", open=False):
630
  pose_model_path = gr.Textbox(value="models/pose_landmarker_lite.task", label="Pose .task 路径")
631
  face_model_path = gr.Textbox(value="models/face_landmarker.task", label="Face .task 路径")
632
- use_gpu_delegate = gr.Checkbox(value=True, label="使用 GPU delegate(不支持会自动回退 CPU)")
 
 
633
 
 
634
  min_pose_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose min_detection_confidence")
635
  min_pose_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose min_tracking_confidence")
636
  min_face_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Face min_detection_confidence")
637
-
638
  ear_threshold = gr.Slider(0.10, 0.35, value=0.21, step=0.01, label="眨眼阈值 EAR(越小越严格)")
639
  blink_min_consec = gr.Slider(1, 6, value=2, step=1, label="眨眼最小连续帧数(抗抖动)")
640
 
641
- max_frames = gr.Number(value=0, precision=0, label="最多处理帧数(0=全处理,调试设 300)")
 
 
642
 
643
  run_btn = gr.Button("开始分析")
644
 
645
  with gr.Row():
646
- video_out = gr.Video(label="输出:叠加标注视频(浅色 FaceMesh)")
647
- with gr.Row():
648
- plot_out = gr.Image(label="输出:时间序列图(四肢像素位移 & 眼睛面积变化)")
649
  with gr.Row():
650
  csv_out = gr.File(label="逐帧指标 CSV(per_frame_metrics.csv)")
651
  json_out = gr.File(label="汇总 JSON(summary.json)")
 
 
652
  report_out = gr.Markdown()
653
 
654
  run_btn.click(
@@ -663,6 +594,9 @@ with demo:
663
  min_face_det_conf,
664
  ear_threshold,
665
  blink_min_consec,
 
 
 
666
  max_frames,
667
  ],
668
  outputs=[video_out, csv_out, json_out, plot_out, report_out],
 
9
  import numpy as np
10
  import pandas as pd
11
  import gradio as gr
 
 
 
 
12
  import matplotlib.pyplot as plt
13
 
14
  import mediapipe as mp
15
+ from mediapipe.tasks import python
16
+ from mediapipe.tasks.python import vision
17
  from mediapipe.framework.formats import landmark_pb2
18
 
19
 
 
41
  return _safe_div((A + B), (2.0 * C))
42
 
43
  def angle_3pts(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> Optional[float]:
 
 
 
44
  ba = a - b
45
  bc = c - b
46
  nba = np.linalg.norm(ba)
 
51
  cosang = max(-1.0, min(1.0, cosang))
52
  return float(np.degrees(np.arccos(cosang)))
53
 
54
+ def poly_area(pts: Dict[int, np.ndarray], idxs: List[int]) -> Optional[float]:
55
+ arr = []
56
+ for i in idxs:
57
+ if i not in pts:
58
+ return None
59
+ arr.append(pts[i])
60
+ cnt = np.array(arr, dtype=np.float32)
61
+ return float(cv2.contourArea(cnt))
62
+
63
 
64
  # -------------------------
65
+ # Indices
66
  # -------------------------
67
  LEFT_EYE_EAR_IDX = [33, 160, 158, 133, 153, 144]
68
  RIGHT_EYE_EAR_IDX = [362, 385, 387, 263, 373, 380]
69
+ NEEDED_FACE_IDX = set(LEFT_EYE_EAR_IDX + RIGHT_EYE_EAR_IDX)
70
 
 
71
  POSE = mp.solutions.pose
72
  POSE_LM = POSE.PoseLandmark
73
 
 
90
 
91
 
92
  # -------------------------
93
+ # Drawing helpers (Tasks output -> draw_landmarks)
94
  # -------------------------
95
  mp_drawing = mp.solutions.drawing_utils
 
96
  mp_face_mesh = mp.solutions.face_mesh
97
 
98
+ def _to_normalized_landmark_list(lms) -> landmark_pb2.NormalizedLandmarkList:
99
+ return landmark_pb2.NormalizedLandmarkList(
100
+ landmark=[landmark_pb2.NormalizedLandmark(x=lm.x, y=lm.y, z=getattr(lm, "z", 0.0)) for lm in lms]
 
 
 
 
 
 
 
 
 
 
 
 
101
  )
 
102
 
103
+ def draw_pose_from_tasks(image_bgr, pose_res):
 
104
  if not pose_res.pose_landmarks:
105
  return
106
+ lms = pose_res.pose_landmarks[0]
107
+ nll = _to_normalized_landmark_list(lms)
108
  mp_drawing.draw_landmarks(
109
  image=image_bgr,
110
  landmark_list=nll,
111
  connections=POSE.POSE_CONNECTIONS,
112
+ landmark_drawing_spec=None,
113
+ connection_drawing_spec=mp_drawing.DrawingSpec(thickness=2, circle_radius=1),
114
  )
115
 
116
+ def draw_face_mesh_light(image_bgr, face_res, lightness: int = 245):
117
+ # lightness: 0~255, bigger => lighter
118
  if not face_res.face_landmarks:
119
  return
120
+ lms = face_res.face_landmarks[0]
121
+ nll = _to_normalized_landmark_list(lms)
122
+
123
+ light_spec = mp_drawing.DrawingSpec(color=(lightness, lightness, lightness), thickness=1, circle_radius=1)
124
  mp_drawing.draw_landmarks(
125
  image=image_bgr,
126
  landmark_list=nll,
127
  connections=mp_face_mesh.FACEMESH_TESSELATION,
128
  landmark_drawing_spec=None,
129
+ connection_drawing_spec=light_spec,
130
  )
131
 
132
 
 
155
 
156
 
157
  # -------------------------
158
+ # Core processing (Tasks + GPU delegate)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  # -------------------------
160
  def process_video(
161
  video_path: str,
 
170
  ear_threshold: float = 0.21,
171
  blink_min_consec: int = 2,
172
 
173
+ draw_face_mesh: bool = True,
174
+ face_mesh_lightness: int = 245,
175
+
176
+ resize_width: int = 0, # 0 => no resize; e.g. 640 to speed up
177
+ max_frames: int = 0, # 0 => all
178
  ) -> Tuple[str, str, str, str, str]:
179
  """
180
  Returns:
181
+ annotated_video_path, csv_path, json_path, plot_path, report_md_path
182
  """
 
 
 
 
 
 
 
 
183
  cap = cv2.VideoCapture(video_path)
184
  if not cap.isOpened():
185
  raise RuntimeError("Cannot open video. Please upload a valid video file.")
 
187
  fps = cap.get(cv2.CAP_PROP_FPS)
188
  if fps <= 1e-6:
189
  fps = 30.0
190
+
191
+ orig_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
192
+ orig_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
193
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
194
 
195
+ # optional resize target
196
+ if resize_width and resize_width > 0 and orig_w > 0:
197
+ scale = resize_width / float(orig_w)
198
+ width = int(orig_w * scale)
199
+ height = int(orig_h * scale)
200
+ else:
201
+ width, height = orig_w, orig_h
202
+
203
  tmpdir = tempfile.mkdtemp(prefix="mp_tasks_analysis_")
204
  out_video = os.path.join(tmpdir, "annotated.mp4")
205
  out_csv = os.path.join(tmpdir, "per_frame_metrics.csv")
 
210
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
211
  writer = cv2.VideoWriter(out_video, fourcc, fps, (width, height))
212
 
213
+ # ---- MediaPipe Tasks init ----
214
+ BaseOptions = python.BaseOptions
215
+ RunningMode = vision.RunningMode
216
+
217
+ delegate = BaseOptions.Delegate.GPU if use_gpu_delegate else BaseOptions.Delegate.CPU
 
 
 
 
 
 
 
218
 
219
+ def _create_landmarkers(delegate_to_use):
220
+ pose_options = vision.PoseLandmarkerOptions(
221
+ base_options=BaseOptions(model_asset_path=pose_model_path, delegate=delegate_to_use),
222
+ running_mode=RunningMode.VIDEO,
223
+ num_poses=1,
224
+ min_pose_detection_confidence=min_pose_det_conf,
225
+ min_pose_presence_confidence=min_pose_det_conf,
226
+ min_tracking_confidence=min_pose_track_conf,
227
+ )
228
+ face_options = vision.FaceLandmarkerOptions(
229
+ base_options=BaseOptions(model_asset_path=face_model_path, delegate=delegate_to_use),
230
+ running_mode=RunningMode.VIDEO,
231
+ num_faces=1,
232
+ min_face_detection_confidence=min_face_det_conf,
233
+ min_face_presence_confidence=min_face_det_conf,
234
+ min_tracking_confidence=min_face_det_conf,
235
+ )
236
+ pose_landmarker = vision.PoseLandmarker.create_from_options(pose_options)
237
+ face_landmarker = vision.FaceLandmarker.create_from_options(face_options)
238
+ return pose_landmarker, face_landmarker
239
+
240
+ # try GPU, fallback to CPU if GPU delegate fails (HF 有时环境/驱动不齐)
241
+ try:
242
+ pose_landmarker, face_landmarker = _create_landmarkers(delegate)
243
+ delegate_used = "GPU" if use_gpu_delegate else "CPU"
244
+ except Exception as e:
245
+ # fallback
246
+ pose_landmarker, face_landmarker = _create_landmarkers(BaseOptions.Delegate.CPU)
247
+ delegate_used = "CPU(fallback)"
248
+ print("[WARN] GPU delegate init failed, fallback to CPU. Error:", repr(e))
249
+
250
+ # ---- per-frame states ----
251
  rows = []
252
  left_blink = BlinkState()
253
  right_blink = BlinkState()
 
260
  eye_area_diff_series = []
261
 
262
  frame_idx = 0
263
+ while True:
264
+ ok, frame_bgr = cap.read()
265
+ if not ok:
266
+ break
267
+ frame_idx += 1
268
+ if max_frames and frame_idx > max_frames:
269
+ break
270
+
271
+ if (width != orig_w) or (height != orig_h):
272
+ frame_bgr = cv2.resize(frame_bgr, (width, height), interpolation=cv2.INTER_AREA)
273
+
274
+ frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
275
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)
276
+ timestamp_ms = int((frame_idx - 1) * 1000.0 / fps)
277
+
278
+ pose_res = pose_landmarker.detect_for_video(mp_image, timestamp_ms)
279
+ face_res = face_landmarker.detect_for_video(mp_image, timestamp_ms)
280
+
281
+ # ---- Face: extract only needed points for EAR + eye area ----
282
+ face_pts: Dict[int, np.ndarray] = {}
283
+ if face_res.face_landmarks:
284
+ lms = face_res.face_landmarks[0]
285
+ for i in NEEDED_FACE_IDX:
286
+ lm = lms[i]
287
+ face_pts[i] = np.array([lm.x * width, lm.y * height], dtype=np.float32)
288
+
289
+ left_ear = eye_aspect_ratio(face_pts, LEFT_EYE_EAR_IDX)
290
+ right_ear = eye_aspect_ratio(face_pts, RIGHT_EYE_EAR_IDX)
291
+
292
+ left_blink = update_blink(left_blink, left_ear, ear_threshold, blink_min_consec)
293
+ right_blink = update_blink(right_blink, right_ear, ear_threshold, blink_min_consec)
294
+
295
+ left_eye_area = poly_area(face_pts, LEFT_EYE_EAR_IDX)
296
+ right_eye_area = poly_area(face_pts, RIGHT_EYE_EAR_IDX)
297
+
298
+ def area_diff(cur, key):
299
+ prev = prev_eye_area[key]
300
+ prev_eye_area[key] = cur
301
+ if cur is None:
302
+ return None
303
+ if prev is None:
304
+ return 0.0
305
+ return float(abs(cur - prev))
306
+
307
+ left_eye_area_diff = area_diff(left_eye_area, "L")
308
+ right_eye_area_diff = area_diff(right_eye_area, "R")
309
+ eye_area_diff_total = sum(v for v in [left_eye_area_diff, right_eye_area_diff] if v is not None)
310
+
311
+ # ---- Pose: pixel displacement + angles ----
312
+ pose_px: Dict[str, Optional[np.ndarray]] = {}
313
+ if pose_res.pose_landmarks:
314
+ lms = pose_res.pose_landmarks[0]
315
+ for name, idx in JOINTS.items():
316
+ lm = lms[idx]
317
+ pose_px[name] = np.array([lm.x * width, lm.y * height], dtype=np.float32)
318
+ else:
319
+ for name in JOINTS:
320
+ pose_px[name] = None
321
+
322
+ def pixel_disp(key: str) -> Optional[float]:
323
+ cur = pose_px.get(key)
324
+ if cur is None:
325
+ return None
326
+ prev = prev_pose_px.get(key)
327
+ prev_pose_px[key] = cur
328
+ if prev is None:
329
+ return 0.0
330
+ return float(np.linalg.norm(cur - prev))
331
+
332
+ lw_pix = pixel_disp("left_wrist")
333
+ rw_pix = pixel_disp("right_wrist")
334
+ la_pix = pixel_disp("left_ankle")
335
+ ra_pix = pixel_disp("right_ankle")
336
+ limbs_pix_total = sum(v for v in [lw_pix, rw_pix, la_pix, ra_pix] if v is not None)
337
+
338
+ def get_angle(a, b, c):
339
+ if a is None or b is None or c is None:
340
+ return None
341
+ return angle_3pts(a, b, c)
342
+
343
+ left_elbow_ang = get_angle(pose_px["left_shoulder"], pose_px["left_elbow"], pose_px["left_wrist"])
344
+ right_elbow_ang = get_angle(pose_px["right_shoulder"], pose_px["right_elbow"], pose_px["right_wrist"])
345
+ left_knee_ang = get_angle(pose_px["left_hip"], pose_px["left_knee"], pose_px["left_ankle"])
346
+ right_knee_ang = get_angle(pose_px["right_hip"], pose_px["right_knee"], pose_px["right_ankle"])
347
+
348
+ # ---- Draw overlays ----
349
+ # pose skeleton
350
+ draw_pose_from_tasks(frame_bgr, pose_res)
351
+ # light face mesh
352
+ if draw_face_mesh:
353
+ draw_face_mesh_light(frame_bgr, face_res, lightness=int(face_mesh_lightness))
354
+
355
+ # HUD
356
+ hud_lines = [
357
+ f"frame: {frame_idx}/{total_frames if total_frames>0 else '?'} fps:{fps:.1f} delegate:{delegate_used}",
358
+ f"EAR L:{left_ear:.3f}" if left_ear is not None else "EAR L:None",
359
+ f"EAR R:{right_ear:.3f}" if right_ear is not None else "EAR R:None",
360
+ f"Blink L:{left_blink.blink_count} R:{right_blink.blink_count}",
361
+ f"LimbPix(sum): {limbs_pix_total:.2f} EyeAreaDiff(sum): {eye_area_diff_total:.2f}",
362
+ ]
363
+ y0 = 24
364
+ for line in hud_lines:
365
+ cv2.putText(frame_bgr, line, (12, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2)
366
+ y0 += 20
367
+
368
+ writer.write(frame_bgr)
369
+
370
+ t = (frame_idx - 1) / fps
371
+ times.append(t)
372
+ limb_pix_series.append(limbs_pix_total)
373
+ eye_area_diff_series.append(eye_area_diff_total)
374
+
375
+ rows.append({
376
+ "frame": frame_idx,
377
+ "time_s": t,
378
+
379
+ "left_ear": left_ear,
380
+ "right_ear": right_ear,
381
+
382
+ # pixel displacement per joint
383
+ "lw_pix_disp": lw_pix,
384
+ "rw_pix_disp": rw_pix,
385
+ "la_pix_disp": la_pix,
386
+ "ra_pix_disp": ra_pix,
387
+ "limbs_pix_disp_sum": limbs_pix_total,
388
+
389
+ # eye area / diffs
390
+ "left_eye_area_px2": left_eye_area,
391
+ "right_eye_area_px2": right_eye_area,
392
+ "left_eye_area_diff_px2": left_eye_area_diff,
393
+ "right_eye_area_diff_px2": right_eye_area_diff,
394
+ "eye_area_diff_sum_px2": eye_area_diff_total,
395
+
396
+ # angles
397
+ "left_elbow_angle": left_elbow_ang,
398
+ "right_elbow_angle": right_elbow_ang,
399
+ "left_knee_angle": left_knee_ang,
400
+ "right_knee_angle": right_knee_ang,
401
+ })
402
+
403
+ cap.release()
404
+ writer.release()
405
+
406
+ # close landmarker resources
407
  try:
408
+ pose_landmarker.close()
409
+ face_landmarker.close()
410
+ except Exception:
411
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
 
413
  df = pd.DataFrame(rows)
414
 
415
+ # ---- Summaries ----
 
 
 
 
 
 
 
 
 
 
 
416
  def _sum_series(s: pd.Series):
417
  s2 = s.dropna()
418
  if len(s2) == 0:
 
426
  "height": int(height),
427
  "frames_processed": int(len(df)),
428
  "duration_s": float(len(df) / fps) if len(df) else 0.0,
429
+ "delegate_used": delegate_used,
430
+ "resize_width": int(resize_width),
 
 
 
 
 
431
  },
432
  "blink": {
433
  "ear_threshold": float(ear_threshold),
 
436
  "right_blinks": int(right_blink.blink_count),
437
  "left_blinks_per_min": float(_safe_div(left_blink.blink_count, (len(df)/fps)/60.0)) if len(df) else 0.0,
438
  "right_blinks_per_min": float(_safe_div(right_blink.blink_count, (len(df)/fps)/60.0)) if len(df) else 0.0,
439
+ "left_ear_stats": _sum_series(df["left_ear"]) if len(df) else {"mean": None, "min": None, "max": None},
440
+ "right_ear_stats": _sum_series(df["right_ear"]) if len(df) else {"mean": None, "min": None, "max": None},
 
 
441
  },
442
+ "pixel_motion": {
443
+ "limbs_pix_disp_sum_stats": _sum_series(df["limbs_pix_disp_sum"]) if len(df) else {"mean": None, "min": None, "max": None},
444
+ "eye_area_diff_sum_px2_stats": _sum_series(df["eye_area_diff_sum_px2"]) if len(df) else {"mean": None, "min": None, "max": None},
 
 
 
 
 
 
 
 
 
 
 
 
445
  }
446
  }
447
 
448
+ # ---- Save outputs ----
449
  df.to_csv(out_csv, index=False)
450
  with open(out_json, "w", encoding="utf-8") as f:
451
  json.dump(summary, f, ensure_ascii=False, indent=2)
452
 
453
+ # ---- Plot ----
454
+ plt.figure()
455
+ plt.plot(times, limb_pix_series, label="Limb pixel displacement (sum)")
456
+ plt.plot(times, eye_area_diff_series, label="Eye area diff (sum, px^2)")
457
+ plt.xlabel("Time (s)")
458
+ plt.ylabel("Pixel difference")
459
+ plt.legend()
460
+ plt.tight_layout()
461
+ plt.savefig(out_plot, dpi=150)
462
+ plt.close()
463
+
464
+ report_md = f"""# MediaPipe Tasks (GPU delegate) 分析报告
465
 
466
  ## 视频信息
467
  - 分辨率: {width} x {height}
468
  - FPS: {fps:.2f}
469
  - 处理帧数: {len(df)}
470
  - 时长(秒): {summary["video"]["duration_s"]:.2f}
471
+ - Delegate: {delegate_used}
472
+ - Resize width: {resize_width}
 
 
 
 
 
473
 
474
  ## 眨眼分析(EAR)
475
  - 阈值: {ear_threshold}
 
479
  - 左眼 EAR: mean={summary["blink"]["left_ear_stats"]["mean"]} min={summary["blink"]["left_ear_stats"]["min"]} max={summary["blink"]["left_ear_stats"]["max"]}
480
  - 右眼 EAR: mean={summary["blink"]["right_ear_stats"]["mean"]} min={summary["blink"]["right_ear_stats"]["min"]} max={summary["blink"]["right_ear_stats"]["max"]}
481
 
482
+ ## Pixel Difference 指标横轴时间
483
+ - 四肢运动 pixel displacement:对 /右手腕 + 左/右脚踝 的逐帧像素位移求和(单位像素)
484
+ - 眼面积 pixel diff:左右眼(6点多边形)面积的逐帧差值求和(单位像素^2)
485
+ > 对应曲线图:motion_eye_timeseries.png
 
 
 
 
 
 
 
486
 
487
  ## 输出文件
488
+ - annotated.mp4:叠加 Pose + 浅色 FaceMesh 的视频
489
+ - per_frame_metrics.csv:逐帧指标(含 limbs pixel dispeye area diff
490
  - summary.json:汇总统计
491
+ - motion_eye_timeseries.png:时间序列曲线图
492
  """
493
  with open(out_report, "w", encoding="utf-8") as f:
494
  f.write(report_md)
 
509
  min_face_det_conf,
510
  ear_threshold,
511
  blink_min_consec,
512
+ draw_face_mesh,
513
+ face_mesh_lightness,
514
+ resize_width,
515
+ max_frames
516
  ):
517
  if isinstance(video, dict) and "path" in video:
518
  video_path = video["path"]
 
524
  pose_model_path=str(pose_model_path),
525
  face_model_path=str(face_model_path),
526
  use_gpu_delegate=bool(use_gpu_delegate),
527
+
528
  min_pose_det_conf=float(min_pose_det_conf),
529
  min_pose_track_conf=float(min_pose_track_conf),
530
  min_face_det_conf=float(min_face_det_conf),
531
+
532
  ear_threshold=float(ear_threshold),
533
  blink_min_consec=int(blink_min_consec),
534
+
535
+ draw_face_mesh=bool(draw_face_mesh),
536
+ face_mesh_lightness=int(face_mesh_lightness),
537
+
538
+ resize_width=int(resize_width),
539
  max_frames=int(max_frames),
540
  )
541
 
 
545
  return out_video, out_csv, out_json, out_plot, report_text
546
 
547
 
548
+ demo = gr.Blocks(title="Video Pose + FaceLandmarker (GPU) + CSV + Plot")
549
 
550
  with demo:
551
+ gr.Markdown("## 上传视频 → MediaPipe Tasks (PoseLandmarker + FaceLandmarker, GPU delegate) → CSV + 曲线图 + 标注视频")
 
 
 
 
552
 
553
  with gr.Row():
554
  video_in = gr.Video(label="上传视频", sources=["upload"])
555
 
556
+ with gr.Accordion("模型与性能参数", open=False):
557
  pose_model_path = gr.Textbox(value="models/pose_landmarker_lite.task", label="Pose .task 路径")
558
  face_model_path = gr.Textbox(value="models/face_landmarker.task", label="Face .task 路径")
559
+ use_gpu_delegate = gr.Checkbox(value=True, label="使用 GPU delegate(失败会自动 fallback CPU)")
560
+ resize_width = gr.Slider(0, 1280, value=640, step=10, label="Resize width(0=不缩放;建议 640 加速)")
561
+ max_frames = gr.Number(value=0, precision=0, label="最多处理帧数(0=全处理,调试可设 300)")
562
 
563
+ with gr.Accordion("检测阈值参数", open=False):
564
  min_pose_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose min_detection_confidence")
565
  min_pose_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose min_tracking_confidence")
566
  min_face_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Face min_detection_confidence")
 
567
  ear_threshold = gr.Slider(0.10, 0.35, value=0.21, step=0.01, label="眨眼阈值 EAR(越小越严格)")
568
  blink_min_consec = gr.Slider(1, 6, value=2, step=1, label="眨眼最小连续帧数(抗抖动)")
569
 
570
+ with gr.Accordion("可视化参数", open=False):
571
+ draw_face_mesh = gr.Checkbox(value=True, label="输出视频叠加 FaceMesh")
572
+ face_mesh_lightness = gr.Slider(200, 255, value=245, step=1, label="FaceMesh 颜色浅度(越大越浅)")
573
 
574
  run_btn = gr.Button("开始分析")
575
 
576
  with gr.Row():
577
+ video_out = gr.Video(label="输出:标注视频(浅色 FaceMesh)")
 
 
578
  with gr.Row():
579
  csv_out = gr.File(label="逐帧指标 CSV(per_frame_metrics.csv)")
580
  json_out = gr.File(label="汇总 JSON(summary.json)")
581
+ with gr.Row():
582
+ plot_out = gr.Image(label="曲线图:四肢像素位移 & 眼睛面积变化", type="filepath")
583
  report_out = gr.Markdown()
584
 
585
  run_btn.click(
 
594
  min_face_det_conf,
595
  ear_threshold,
596
  blink_min_consec,
597
+ draw_face_mesh,
598
+ face_mesh_lightness,
599
+ resize_width,
600
  max_frames,
601
  ],
602
  outputs=[video_out, csv_out, json_out, plot_out, report_out],