seesaw112233 commited on
Commit
2fc3a46
·
verified ·
1 Parent(s): 014d038

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +473 -96
app.py CHANGED
@@ -1,130 +1,507 @@
1
  import os
 
 
 
 
 
 
2
  import cv2
3
  import numpy as np
4
  import pandas as pd
5
  import gradio as gr
6
-
7
  import mediapipe as mp
8
 
9
 
10
- mp_pose = mp.solutions.pose
11
- mp_drawing = mp.solutions.drawing_utils
 
 
 
12
 
 
 
13
 
14
- def _ensure_rgb(img: np.ndarray) -> np.ndarray:
15
- # Gradio Image returns RGB np.uint8
16
- if img is None:
 
 
 
 
 
17
  return None
18
- if img.dtype != np.uint8:
19
- img = np.clip(img, 0, 255).astype(np.uint8)
20
- if img.ndim == 2:
21
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
22
- return img
23
 
24
-
25
- def estimate_pose(image: np.ndarray, model_complexity: int, min_det: float, min_track: float):
26
  """
27
- Returns:
28
- - annotated_image (RGB)
29
- - keypoints dataframe
30
  """
31
- image = _ensure_rgb(image)
32
- if image is None:
33
- return None, pd.DataFrame()
 
 
 
 
 
 
34
 
35
- # MediaPipe expects RGB, but drawing is easier in BGR sometimes; we'll keep RGB and convert when needed.
36
- rgb = image.copy()
37
 
38
- with mp_pose.Pose(
39
- static_image_mode=True,
40
- model_complexity=model_complexity,
41
- enable_segmentation=False,
42
- min_detection_confidence=float(min_det),
43
- min_tracking_confidence=float(min_track),
44
- ) as pose:
45
- results = pose.process(rgb)
 
 
46
 
47
- annotated = rgb.copy()
 
 
 
 
 
48
 
49
- rows = []
50
- if results.pose_landmarks:
51
- # Draw landmarks
52
- annotated_bgr = cv2.cvtColor(annotated, cv2.COLOR_RGB2BGR)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  mp_drawing.draw_landmarks(
54
- annotated_bgr,
55
- results.pose_landmarks,
56
- mp_pose.POSE_CONNECTIONS,
57
- landmark_drawing_spec=mp_drawing.DrawingSpec(thickness=2, circle_radius=2),
58
- connection_drawing_spec=mp_drawing.DrawingSpec(thickness=2),
59
  )
60
- annotated = cv2.cvtColor(annotated_bgr, cv2.COLOR_BGR2RGB)
61
-
62
- # Collect keypoints
63
- for i, lm in enumerate(results.pose_landmarks.landmark):
64
- rows.append(
65
- {
66
- "id": i,
67
- "name": mp_pose.PoseLandmark(i).name,
68
- "x": float(lm.x),
69
- "y": float(lm.y),
70
- "z": float(lm.z),
71
- "visibility": float(lm.visibility),
72
- }
73
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  df = pd.DataFrame(rows)
76
- return annotated, df
77
 
 
 
 
 
 
 
78
 
79
- def build_demo():
80
- with gr.Blocks(title="Pose Estimation") as demo:
81
- gr.Markdown(
82
- "## 🕺 Pose Estimation (MediaPipe)\n"
83
- "上传一张图片 → 输出骨架标注图 + 关键点表格。\n\n"
84
- "如果你之前遇到 `TypeError: argument of type 'bool' is not iterable`,这是 Gradio 4.x 的一个坑,"
85
- "本 Space 已升级到 Gradio 5.x 来避免。"
86
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- with gr.Row():
89
- inp = gr.Image(label="Input Image", type="numpy")
90
- out_img = gr.Image(label="Annotated Output", type="numpy")
 
91
 
92
- with gr.Row():
93
- model_complexity = gr.Radio(
94
- choices=[0, 1, 2],
95
- value=1,
96
- label="Model Complexity (0=light, 2=accurate)",
97
- )
98
- min_det = gr.Slider(0.1, 0.99, value=0.5, step=0.01, label="Min Detection Confidence")
99
- min_track = gr.Slider(0.1, 0.99, value=0.5, step=0.01, label="Min Tracking Confidence")
100
-
101
- out_df = gr.Dataframe(
102
- label="Keypoints (normalized coords)",
103
- headers=["id", "name", "x", "y", "z", "visibility"],
104
- interactive=False,
105
- wrap=True,
106
- )
107
 
108
- run_btn = gr.Button("Run Pose Estimation", variant="primary")
109
- run_btn.click(
110
- fn=estimate_pose,
111
- inputs=[inp, model_complexity, min_det, min_track],
112
- outputs=[out_img, out_df],
113
- )
114
 
115
- gr.Markdown(
116
- "### Notes\n"
117
- "- `x/y/z` 是相对坐标(0~1),相对于输入图像宽高。\n"
118
- "- 这是 CPU 友好版本,适合 Hugging Face Spaces。"
119
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
- return demo
122
 
 
 
 
 
 
 
123
 
124
- demo = build_demo()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  if __name__ == "__main__":
127
- # Hugging Face Spaces 通常不需要 share=True
128
- # 如果你环境仍然报 localhost 不可访问,可把 share=True 打开兜底
129
- share = os.getenv("GRADIO_SHARE", "0") == "1"
130
- demo.launch(server_name="0.0.0.0", server_port=7860, share=share)
 
1
  import os
2
+ import math
3
+ import json
4
+ import tempfile
5
+ from dataclasses import dataclass
6
+ from typing import Dict, List, Tuple, Optional
7
+
8
  import cv2
9
  import numpy as np
10
  import pandas as pd
11
  import gradio as gr
 
12
  import mediapipe as mp
13
 
14
 
15
+ # -------------------------
16
+ # Utils: geometry
17
+ # -------------------------
18
+ def _dist(a: np.ndarray, b: np.ndarray) -> float:
19
+ return float(np.linalg.norm(a - b))
20
 
21
+ def _safe_div(a: float, b: float, eps: float = 1e-8) -> float:
22
+ return a / (b + eps)
23
 
24
+ def eye_aspect_ratio(pts: Dict[int, np.ndarray], idx: List[int]) -> Optional[float]:
25
+ """
26
+ EAR = (||p2-p6|| + ||p3-p5||) / (2*||p1-p4||)
27
+ idx: [p1, p2, p3, p4, p5, p6]
28
+ """
29
+ try:
30
+ p1, p2, p3, p4, p5, p6 = [pts[i] for i in idx]
31
+ except KeyError:
32
  return None
33
+ A = _dist(p2, p6)
34
+ B = _dist(p3, p5)
35
+ C = _dist(p1, p4)
36
+ return _safe_div((A + B), (2.0 * C))
 
37
 
38
+ def angle_3pts(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> Optional[float]:
 
39
  """
40
+ angle at point b in degrees formed by a-b-c
 
 
41
  """
42
+ ba = a - b
43
+ bc = c - b
44
+ nba = np.linalg.norm(ba)
45
+ nbc = np.linalg.norm(bc)
46
+ if nba < 1e-8 or nbc < 1e-8:
47
+ return None
48
+ cosang = float(np.dot(ba, bc) / (nba * nbc))
49
+ cosang = max(-1.0, min(1.0, cosang))
50
+ return float(np.degrees(np.arccos(cosang)))
51
 
 
 
52
 
53
+ # -------------------------
54
+ # MediaPipe indices
55
+ # -------------------------
56
+ # FaceMesh landmarks for EAR (common set)
57
+ LEFT_EYE_EAR_IDX = [33, 160, 158, 133, 153, 144]
58
+ RIGHT_EYE_EAR_IDX = [362, 385, 387, 263, 373, 380]
59
+
60
+ # Pose landmark enum mapping (MediaPipe Pose)
61
+ POSE = mp.solutions.pose
62
+ POSE_LM = POSE.PoseLandmark
63
 
64
+ # Key joints for limb movement/angles
65
+ JOINTS = {
66
+ "left_wrist": POSE_LM.LEFT_WRIST.value,
67
+ "right_wrist": POSE_LM.RIGHT_WRIST.value,
68
+ "left_ankle": POSE_LM.LEFT_ANKLE.value,
69
+ "right_ankle": POSE_LM.RIGHT_ANKLE.value,
70
 
71
+ "left_shoulder": POSE_LM.LEFT_SHOULDER.value,
72
+ "right_shoulder": POSE_LM.RIGHT_SHOULDER.value,
73
+ "left_elbow": POSE_LM.LEFT_ELBOW.value,
74
+ "right_elbow": POSE_LM.RIGHT_ELBOW.value,
75
+
76
+ "left_hip": POSE_LM.LEFT_HIP.value,
77
+ "right_hip": POSE_LM.RIGHT_HIP.value,
78
+ "left_knee": POSE_LM.LEFT_KNEE.value,
79
+ "right_knee": POSE_LM.RIGHT_KNEE.value,
80
+ }
81
+
82
+
83
+ # -------------------------
84
+ # Drawing
85
+ # -------------------------
86
+ mp_drawing = mp.solutions.drawing_utils
87
+ mp_drawing_styles = mp.solutions.drawing_styles
88
+ mp_face_mesh = mp.solutions.face_mesh
89
+
90
+ def draw_pose(image_bgr, pose_results):
91
+ if pose_results.pose_landmarks:
92
  mp_drawing.draw_landmarks(
93
+ image_bgr,
94
+ pose_results.pose_landmarks,
95
+ POSE.POSE_CONNECTIONS,
96
+ landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style(),
 
97
  )
98
+
99
+ def draw_face(image_bgr, face_results, draw_full_mesh: bool = False):
100
+ if not face_results.multi_face_landmarks:
101
+ return
102
+ for face_landmarks in face_results.multi_face_landmarks:
103
+ if draw_full_mesh:
104
+ # full mesh (dense) - heavier visually
105
+ mp_drawing.draw_landmarks(
106
+ image_bgr,
107
+ face_landmarks,
108
+ mp_face_mesh.FACEMESH_TESSELATION,
109
+ landmark_drawing_spec=None,
110
+ connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style(),
111
  )
112
+ # contours are enough for most
113
+ mp_drawing.draw_landmarks(
114
+ image_bgr,
115
+ face_landmarks,
116
+ mp_face_mesh.FACEMESH_CONTOURS,
117
+ landmark_drawing_spec=None,
118
+ connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style(),
119
+ )
120
+
121
+
122
+ # -------------------------
123
+ # Blink detection
124
+ # -------------------------
125
+ @dataclass
126
+ class BlinkState:
127
+ in_blink: bool = False
128
+ blink_count: int = 0
129
+ consec_below: int = 0
130
+
131
+ def update_blink(state: BlinkState, ear: Optional[float], thr: float, min_consec: int) -> BlinkState:
132
+ """
133
+ Basic blink logic:
134
+ - ear below threshold for >= min_consec frames => blink start
135
+ - when ear goes back above => blink end (count once)
136
+ """
137
+ if ear is None:
138
+ # treat missing as no-update
139
+ return state
140
+
141
+ if ear < thr:
142
+ state.consec_below += 1
143
+ if (not state.in_blink) and state.consec_below >= min_consec:
144
+ state.in_blink = True
145
+ else:
146
+ if state.in_blink:
147
+ state.blink_count += 1
148
+ state.in_blink = False
149
+ state.consec_below = 0
150
+ return state
151
+
152
+
153
+ # -------------------------
154
+ # Core processing
155
+ # -------------------------
156
+ def process_video(
157
+ video_path: str,
158
+ pose_model_complexity: int = 1,
159
+ min_pose_det_conf: float = 0.5,
160
+ min_pose_track_conf: float = 0.5,
161
+ min_face_det_conf: float = 0.5,
162
+ ear_threshold: float = 0.21,
163
+ blink_min_consec: int = 2,
164
+ draw_full_face_mesh: bool = False,
165
+ max_frames: int = 0, # 0 => all
166
+ ) -> Tuple[str, str, str, str]:
167
+ """
168
+ Returns:
169
+ annotated_video_path, csv_path, json_path, report_md
170
+ """
171
+ cap = cv2.VideoCapture(video_path)
172
+ if not cap.isOpened():
173
+ raise RuntimeError("Cannot open video. Please upload a valid video file.")
174
+
175
+ fps = cap.get(cv2.CAP_PROP_FPS)
176
+ if fps <= 1e-6:
177
+ fps = 30.0
178
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
179
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
180
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
181
+
182
+ # output paths
183
+ tmpdir = tempfile.mkdtemp(prefix="mp_analysis_")
184
+ out_video = os.path.join(tmpdir, "annotated.mp4")
185
+ out_csv = os.path.join(tmpdir, "per_frame_metrics.csv")
186
+ out_json = os.path.join(tmpdir, "summary.json")
187
+ out_report = os.path.join(tmpdir, "report.md")
188
+
189
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
190
+ writer = cv2.VideoWriter(out_video, fourcc, fps, (width, height))
191
+
192
+ # MediaPipe init
193
+ with mp.solutions.pose.Pose(
194
+ static_image_mode=False,
195
+ model_complexity=pose_model_complexity,
196
+ enable_segmentation=False,
197
+ min_detection_confidence=min_pose_det_conf,
198
+ min_tracking_confidence=min_pose_track_conf,
199
+ ) as pose, mp_face_mesh.FaceMesh(
200
+ static_image_mode=False,
201
+ max_num_faces=1,
202
+ refine_landmarks=True, # improves eye landmarks
203
+ min_detection_confidence=min_face_det_conf,
204
+ min_tracking_confidence=min_face_det_conf,
205
+ ) as face_mesh:
206
+
207
+ rows = []
208
+ prev_pts = {} # for movement delta (normalized coordinates)
209
+ left_blink = BlinkState()
210
+ right_blink = BlinkState()
211
+
212
+ frame_idx = 0
213
+ while True:
214
+ ok, frame_bgr = cap.read()
215
+ if not ok:
216
+ break
217
+ frame_idx += 1
218
+ if max_frames and frame_idx > max_frames:
219
+ break
220
+
221
+ frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
222
+
223
+ pose_res = pose.process(frame_rgb)
224
+ face_res = face_mesh.process(frame_rgb)
225
+
226
+ # Extract face landmarks (pixel coords)
227
+ face_pts: Dict[int, np.ndarray] = {}
228
+ if face_res.multi_face_landmarks:
229
+ lm = face_res.multi_face_landmarks[0].landmark
230
+ for i in range(len(lm)):
231
+ face_pts[i] = np.array([lm[i].x * width, lm[i].y * height], dtype=np.float32)
232
+
233
+ # EAR
234
+ left_ear = eye_aspect_ratio(face_pts, LEFT_EYE_EAR_IDX)
235
+ right_ear = eye_aspect_ratio(face_pts, RIGHT_EYE_EAR_IDX)
236
+
237
+ left_blink = update_blink(left_blink, left_ear, ear_threshold, blink_min_consec)
238
+ right_blink = update_blink(right_blink, right_ear, ear_threshold, blink_min_consec)
239
+
240
+ # Extract pose landmarks (normalized coords + pixel)
241
+ pose_norm: Dict[str, Optional[np.ndarray]] = {}
242
+ pose_px: Dict[str, Optional[np.ndarray]] = {}
243
+ if pose_res.pose_landmarks:
244
+ lms = pose_res.pose_landmarks.landmark
245
+ for name, idx in JOINTS.items():
246
+ if idx < len(lms):
247
+ pose_norm[name] = np.array([lms[idx].x, lms[idx].y], dtype=np.float32)
248
+ pose_px[name] = np.array([lms[idx].x * width, lms[idx].y * height], dtype=np.float32)
249
+ else:
250
+ pose_norm[name] = None
251
+ pose_px[name] = None
252
+ else:
253
+ for name in JOINTS:
254
+ pose_norm[name] = None
255
+ pose_px[name] = None
256
+
257
+ # Limb movement: per-frame displacement & speed (in normalized units)
258
+ def movement_metrics(key: str):
259
+ cur = pose_norm.get(key)
260
+ if cur is None:
261
+ return None, None
262
+ prev = prev_pts.get(key)
263
+ if prev is None:
264
+ d = 0.0
265
+ else:
266
+ d = float(np.linalg.norm(cur - prev))
267
+ v = d * fps
268
+ prev_pts[key] = cur
269
+ return d, v
270
+
271
+ lw_d, lw_v = movement_metrics("left_wrist")
272
+ rw_d, rw_v = movement_metrics("right_wrist")
273
+ la_d, la_v = movement_metrics("left_ankle")
274
+ ra_d, ra_v = movement_metrics("right_ankle")
275
+
276
+ # Joint angles (pixel coords for stability)
277
+ def get_angle(a, b, c):
278
+ if a is None or b is None or c is None:
279
+ return None
280
+ return angle_3pts(a, b, c)
281
+
282
+ left_elbow_ang = get_angle(pose_px["left_shoulder"], pose_px["left_elbow"], pose_px["left_wrist"])
283
+ right_elbow_ang = get_angle(pose_px["right_shoulder"], pose_px["right_elbow"], pose_px["right_wrist"])
284
+ left_knee_ang = get_angle(pose_px["left_hip"], pose_px["left_knee"], pose_px["left_ankle"])
285
+ right_knee_ang = get_angle(pose_px["right_hip"], pose_px["right_knee"], pose_px["right_ankle"])
286
+
287
+ # Draw overlays
288
+ draw_pose(frame_bgr, pose_res)
289
+ draw_face(frame_bgr, face_res, draw_full_mesh=draw_full_face_mesh)
290
+
291
+ # HUD text
292
+ hud_lines = [
293
+ f"frame: {frame_idx}/{total_frames if total_frames>0 else '?'} fps:{fps:.1f}",
294
+ f"EAR L:{left_ear:.3f}" if left_ear is not None else "EAR L:None",
295
+ f"EAR R:{right_ear:.3f}" if right_ear is not None else "EAR R:None",
296
+ f"Blink L:{left_blink.blink_count} R:{right_blink.blink_count}",
297
+ ]
298
+ y0 = 24
299
+ for line in hud_lines:
300
+ cv2.putText(frame_bgr, line, (12, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
301
+ y0 += 22
302
+
303
+ writer.write(frame_bgr)
304
+
305
+ rows.append({
306
+ "frame": frame_idx,
307
+ "time_s": (frame_idx - 1) / fps,
308
+
309
+ "left_ear": left_ear,
310
+ "right_ear": right_ear,
311
+
312
+ "lw_disp": lw_d,
313
+ "rw_disp": rw_d,
314
+ "la_disp": la_d,
315
+ "ra_disp": ra_d,
316
+
317
+ "lw_speed": lw_v,
318
+ "rw_speed": rw_v,
319
+ "la_speed": la_v,
320
+ "ra_speed": ra_v,
321
+
322
+ "left_elbow_angle": left_elbow_ang,
323
+ "right_elbow_angle": right_elbow_ang,
324
+ "left_knee_angle": left_knee_ang,
325
+ "right_knee_angle": right_knee_ang,
326
+ })
327
+
328
+ cap.release()
329
+ writer.release()
330
 
331
  df = pd.DataFrame(rows)
 
332
 
333
+ # Summaries
334
+ def _sum_series(s: pd.Series):
335
+ s2 = s.dropna()
336
+ if len(s2) == 0:
337
+ return {"mean": None, "min": None, "max": None}
338
+ return {"mean": float(s2.mean()), "min": float(s2.min()), "max": float(s2.max())}
339
 
340
+ # movement totals in normalized units (roughly proportional)
341
+ summary = {
342
+ "video": {
343
+ "fps": float(fps),
344
+ "width": width,
345
+ "height": height,
346
+ "frames_processed": int(len(df)),
347
+ "duration_s": float(len(df) / fps),
348
+ },
349
+ "blink": {
350
+ "ear_threshold": float(ear_threshold),
351
+ "min_consecutive_frames": int(blink_min_consec),
352
+ "left_blinks": int(left_blink.blink_count),
353
+ "right_blinks": int(right_blink.blink_count),
354
+ "left_blinks_per_min": float(_safe_div(left_blink.blink_count, (len(df)/fps)/60.0)) if len(df) else 0.0,
355
+ "right_blinks_per_min": float(_safe_div(right_blink.blink_count, (len(df)/fps)/60.0)) if len(df) else 0.0,
356
+ "left_ear_stats": _sum_series(df["left_ear"]),
357
+ "right_ear_stats": _sum_series(df["right_ear"]),
358
+ },
359
+ "limb_movement": {
360
+ "total_disp": {
361
+ "left_wrist": float(df["lw_disp"].fillna(0).sum()),
362
+ "right_wrist": float(df["rw_disp"].fillna(0).sum()),
363
+ "left_ankle": float(df["la_disp"].fillna(0).sum()),
364
+ "right_ankle": float(df["ra_disp"].fillna(0).sum()),
365
+ },
366
+ "speed_stats": {
367
+ "left_wrist": _sum_series(df["lw_speed"]),
368
+ "right_wrist": _sum_series(df["rw_speed"]),
369
+ "left_ankle": _sum_series(df["la_speed"]),
370
+ "right_ankle": _sum_series(df["ra_speed"]),
371
+ },
372
+ "angle_stats_deg": {
373
+ "left_elbow": _sum_series(df["left_elbow_angle"]),
374
+ "right_elbow": _sum_series(df["right_elbow_angle"]),
375
+ "left_knee": _sum_series(df["left_knee_angle"]),
376
+ "right_knee": _sum_series(df["right_knee_angle"]),
377
+ }
378
+ }
379
+ }
380
 
381
+ # Save outputs
382
+ df.to_csv(out_csv, index=False)
383
+ with open(out_json, "w", encoding="utf-8") as f:
384
+ json.dump(summary, f, ensure_ascii=False, indent=2)
385
 
386
+ report_md = f"""# MediaPipe Pose + FaceLandmarks 分析报告
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
+ ## 视频信息
389
+ - 分辨率: {width} x {height}
390
+ - FPS: {fps:.2f}
391
+ - 处理帧数: {len(df)}
392
+ - 时长(秒): {summary["video"]["duration_s"]:.2f}
 
393
 
394
+ ## 眨眼分析(EAR)
395
+ - 阈值: {ear_threshold}
396
+ - 最小连续帧数: {blink_min_consec}
397
+ - 左眼眨眼次数: {summary["blink"]["left_blinks"]}({summary["blink"]["left_blinks_per_min"]:.2f} 次/分钟)
398
+ - 右眼眨眼次数: {summary["blink"]["right_blinks"]}({summary["blink"]["right_blinks_per_min"]:.2f} 次/分钟)
399
+ - 左眼 EAR: mean={summary["blink"]["left_ear_stats"]["mean"]} min={summary["blink"]["left_ear_stats"]["min"]} max={summary["blink"]["left_ear_stats"]["max"]}
400
+ - 右眼 EAR: mean={summary["blink"]["right_ear_stats"]["mean"]} min={summary["blink"]["right_ear_stats"]["min"]} max={summary["blink"]["right_ear_stats"]["max"]}
401
+
402
+ ## 肢体运动量(normalized units)
403
+ > 这里的位移/速度是基于归一化坐标(0~1)计算,适合“相对比较”和趋势分析。
404
+ - 累计位移(越大代表越动):
405
+ - 左手腕: {summary["limb_movement"]["total_disp"]["left_wrist"]:.6f}
406
+ - 右手腕: {summary["limb_movement"]["total_disp"]["right_wrist"]:.6f}
407
+ - 左脚踝: {summary["limb_movement"]["total_disp"]["left_ankle"]:.6f}
408
+ - 右脚踝: {summary["limb_movement"]["total_disp"]["right_ankle"]:.6f}
409
+
410
+ ## 输出文件
411
+ - annotated.mp4:叠加了 Pose 和 FaceMesh 的视频
412
+ - per_frame_metrics.csv:逐帧指标(EAR / 位移 / 速度 / 关节角)
413
+ - summary.json:汇总统计
414
+ """
415
+ with open(out_report, "w", encoding="utf-8") as f:
416
+ f.write(report_md)
417
+
418
+ return out_video, out_csv, out_json, out_report
419
+
420
+
421
+ # -------------------------
422
+ # Gradio UI
423
+ # -------------------------
424
+ def ui_process(
425
+ video,
426
+ pose_model_complexity,
427
+ min_pose_det_conf,
428
+ min_pose_track_conf,
429
+ min_face_det_conf,
430
+ ear_threshold,
431
+ blink_min_consec,
432
+ draw_full_face_mesh,
433
+ max_frames
434
+ ):
435
+ # video may be dict in some gradio versions
436
+ if isinstance(video, dict) and "path" in video:
437
+ video_path = video["path"]
438
+ else:
439
+ video_path = video
440
+
441
+ out_video, out_csv, out_json, out_report = process_video(
442
+ video_path=str(video_path),
443
+ pose_model_complexity=int(pose_model_complexity),
444
+ min_pose_det_conf=float(min_pose_det_conf),
445
+ min_pose_track_conf=float(min_pose_track_conf),
446
+ min_face_det_conf=float(min_face_det_conf),
447
+ ear_threshold=float(ear_threshold),
448
+ blink_min_consec=int(blink_min_consec),
449
+ draw_full_face_mesh=bool(draw_full_face_mesh),
450
+ max_frames=int(max_frames),
451
+ )
452
+
453
+ # Show report text + return files
454
+ with open(out_report, "r", encoding="utf-8") as f:
455
+ report_text = f.read()
456
+
457
+ return out_video, out_csv, out_json, report_text
458
+
459
+
460
+ demo = gr.Blocks(title="Video Pose + FaceLandmarks + Blink/Limb Analytics")
461
+
462
+ with demo:
463
+ gr.Markdown("## 上传视频 → MediaPipe Pose + FaceMesh → 肢体运动量 & 眨眼量化(EAR)")
464
+
465
+ with gr.Row():
466
+ video_in = gr.Video(label="上传视频", sources=["upload"])
467
+
468
+ with gr.Accordion("参数(一般默认就够用)", open=False):
469
+ pose_model_complexity = gr.Radio([0, 1, 2], value=1, label="Pose model_complexity (0快/2准)")
470
+ min_pose_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose min_detection_confidence")
471
+ min_pose_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose min_tracking_confidence")
472
+ min_face_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Face min_detection_confidence")
473
+
474
+ ear_threshold = gr.Slider(0.10, 0.35, value=0.21, step=0.01, label="眨眼阈值 EAR(越小越严格)")
475
+ blink_min_consec = gr.Slider(1, 6, value=2, step=1, label="眨眼最小连续帧数(抗抖动)")
476
+
477
+ draw_full_face_mesh = gr.Checkbox(value=False, label="叠加完整 FaceMesh(更密/更慢)")
478
+ max_frames = gr.Number(value=0, precision=0, label="最多处理帧数(0=全处理,调试可设 300)")
479
 
480
+ run_btn = gr.Button("开始分析")
481
 
482
+ with gr.Row():
483
+ video_out = gr.Video(label="输出:叠加标注视频")
484
+ with gr.Row():
485
+ csv_out = gr.File(label="逐帧指标 CSV(per_frame_metrics.csv)")
486
+ json_out = gr.File(label="汇总 JSON(summary.json)")
487
+ report_out = gr.Markdown()
488
 
489
+ run_btn.click(
490
+ fn=ui_process,
491
+ inputs=[
492
+ video_in,
493
+ pose_model_complexity,
494
+ min_pose_det_conf,
495
+ min_pose_track_conf,
496
+ min_face_det_conf,
497
+ ear_threshold,
498
+ blink_min_consec,
499
+ draw_full_face_mesh,
500
+ max_frames,
501
+ ],
502
+ outputs=[video_out, csv_out, json_out, report_out],
503
+ )
504
 
505
  if __name__ == "__main__":
506
+ # HF Spaces 不需要 share=True;也别开 share,省事
507
+ demo.launch(server_name="0.0.0.0", server_port=7860)