seesaw112233 commited on
Commit
8d2db9a
·
verified ·
1 Parent(s): dc0f3d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +290 -415
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
- import json
3
  import math
 
4
  import tempfile
5
  from dataclasses import dataclass
6
  from typing import Dict, List, Tuple, Optional
@@ -9,52 +9,7 @@ import cv2
9
  import numpy as np
10
  import pandas as pd
11
  import gradio as gr
12
- import matplotlib.pyplot as plt
13
- import requests
14
-
15
  import mediapipe as mp
16
- from mediapipe.tasks import python
17
- from mediapipe.tasks.python import vision
18
- from mediapipe.framework.formats import landmark_pb2
19
-
20
-
21
- # =========================
22
- # Official model download (Spaces-safe)
23
- # =========================
24
- POSE_URL = "https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_full/float16/latest/pose_landmarker_full.task"
25
- FACE_URL = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"
26
-
27
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
28
- MODELS_DIR = os.path.join(BASE_DIR, "models")
29
- POSE_PATH_DEFAULT = os.path.join(MODELS_DIR, "pose_landmarker_full.task")
30
- FACE_PATH_DEFAULT = os.path.join(MODELS_DIR, "face_landmarker.task")
31
-
32
-
33
- def _download_if_needed(url: str, local_path: str, min_bytes: int = 100 * 1024) -> None:
34
- os.makedirs(os.path.dirname(local_path), exist_ok=True)
35
- if os.path.exists(local_path) and os.path.getsize(local_path) >= min_bytes:
36
- return
37
- print(f"[INFO] Downloading model: {url} -> {local_path}")
38
- r = requests.get(url, timeout=120)
39
- r.raise_for_status()
40
- with open(local_path, "wb") as f:
41
- f.write(r.content)
42
- print(f"[INFO] Download complete. size={os.path.getsize(local_path)} bytes")
43
-
44
-
45
- def ensure_models(pose_path: str, face_path: str) -> Tuple[str, str]:
46
- # If user passed default paths, ensure official downloads exist.
47
- # If user passed custom paths, we just trust them (but can still fail later).
48
- if pose_path == POSE_PATH_DEFAULT:
49
- _download_if_needed(POSE_URL, pose_path)
50
- if face_path == FACE_PATH_DEFAULT:
51
- _download_if_needed(FACE_URL, face_path)
52
- return pose_path, face_path
53
-
54
-
55
- def _read_bytes(path: str) -> bytes:
56
- with open(path, "rb") as f:
57
- return f.read()
58
 
59
 
60
  # -------------------------
@@ -63,11 +18,9 @@ def _read_bytes(path: str) -> bytes:
63
  def _dist(a: np.ndarray, b: np.ndarray) -> float:
64
  return float(np.linalg.norm(a - b))
65
 
66
-
67
  def _safe_div(a: float, b: float, eps: float = 1e-8) -> float:
68
  return a / (b + eps)
69
 
70
-
71
  def eye_aspect_ratio(pts: Dict[int, np.ndarray], idx: List[int]) -> Optional[float]:
72
  """
73
  EAR = (||p2-p6|| + ||p3-p5||) / (2*||p1-p4||)
@@ -82,8 +35,10 @@ def eye_aspect_ratio(pts: Dict[int, np.ndarray], idx: List[int]) -> Optional[flo
82
  C = _dist(p1, p4)
83
  return _safe_div((A + B), (2.0 * C))
84
 
85
-
86
  def angle_3pts(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> Optional[float]:
 
 
 
87
  ba = a - b
88
  bc = c - b
89
  nba = np.linalg.norm(ba)
@@ -95,89 +50,73 @@ def angle_3pts(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> Optional[float]:
95
  return float(np.degrees(np.arccos(cosang)))
96
 
97
 
98
- def poly_area(pts: Dict[int, np.ndarray], idxs: List[int]) -> Optional[float]:
99
- arr = []
100
- for i in idxs:
101
- if i not in pts:
102
- return None
103
- arr.append(pts[i])
104
- cnt = np.array(arr, dtype=np.float32)
105
- return float(cv2.contourArea(cnt))
106
-
107
-
108
  # -------------------------
109
- # Indices
110
  # -------------------------
111
- LEFT_EYE_EAR_IDX = [33, 160, 158, 133, 153, 144]
 
112
  RIGHT_EYE_EAR_IDX = [362, 385, 387, 263, 373, 380]
113
- NEEDED_FACE_IDX = set(LEFT_EYE_EAR_IDX + RIGHT_EYE_EAR_IDX)
114
 
 
115
  POSE = mp.solutions.pose
116
  POSE_LM = POSE.PoseLandmark
117
 
 
118
  JOINTS = {
119
  "left_wrist": POSE_LM.LEFT_WRIST.value,
120
  "right_wrist": POSE_LM.RIGHT_WRIST.value,
121
  "left_ankle": POSE_LM.LEFT_ANKLE.value,
122
  "right_ankle": POSE_LM.RIGHT_ANKLE.value,
 
123
  "left_shoulder": POSE_LM.LEFT_SHOULDER.value,
124
  "right_shoulder": POSE_LM.RIGHT_SHOULDER.value,
125
  "left_elbow": POSE_LM.LEFT_ELBOW.value,
126
  "right_elbow": POSE_LM.RIGHT_ELBOW.value,
 
127
  "left_hip": POSE_LM.LEFT_HIP.value,
128
  "right_hip": POSE_LM.RIGHT_HIP.value,
129
  "left_knee": POSE_LM.LEFT_KNEE.value,
130
  "right_knee": POSE_LM.RIGHT_KNEE.value,
131
  }
132
 
 
133
  # -------------------------
134
- # Drawing helpers (Tasks output -> draw_landmarks)
135
  # -------------------------
136
  mp_drawing = mp.solutions.drawing_utils
 
137
  mp_face_mesh = mp.solutions.face_mesh
138
 
 
 
 
 
 
 
 
 
139
 
140
- def _to_normalized_landmark_list(lms) -> landmark_pb2.NormalizedLandmarkList:
141
- return landmark_pb2.NormalizedLandmarkList(
142
- landmark=[
143
- landmark_pb2.NormalizedLandmark(
144
- x=lm.x, y=lm.y, z=getattr(lm, "z", 0.0)
145
- )
146
- for lm in lms
147
- ]
148
- )
149
-
150
-
151
- def draw_pose_from_tasks(image_bgr, pose_res):
152
- if not pose_res.pose_landmarks:
153
- return
154
- lms = pose_res.pose_landmarks[0]
155
- nll = _to_normalized_landmark_list(lms)
156
- mp_drawing.draw_landmarks(
157
- image=image_bgr,
158
- landmark_list=nll,
159
- connections=POSE.POSE_CONNECTIONS,
160
- landmark_drawing_spec=None,
161
- connection_drawing_spec=mp_drawing.DrawingSpec(thickness=2, circle_radius=1),
162
- )
163
-
164
-
165
- def draw_face_mesh_light(image_bgr, face_res, lightness: int = 245):
166
- if not face_res.face_landmarks:
167
  return
168
- lms = face_res.face_landmarks[0]
169
- nll = _to_normalized_landmark_list(lms)
170
-
171
- light_spec = mp_drawing.DrawingSpec(
172
- color=(lightness, lightness, lightness), thickness=1, circle_radius=1
173
- )
174
- mp_drawing.draw_landmarks(
175
- image=image_bgr,
176
- landmark_list=nll,
177
- connections=mp_face_mesh.FACEMESH_TESSELATION,
178
- landmark_drawing_spec=None,
179
- connection_drawing_spec=light_spec,
180
- )
 
 
 
 
 
181
 
182
 
183
  # -------------------------
@@ -189,12 +128,16 @@ class BlinkState:
189
  blink_count: int = 0
190
  consec_below: int = 0
191
 
192
-
193
- def update_blink(
194
- state: BlinkState, ear: Optional[float], thr: float, min_consec: int
195
- ) -> BlinkState:
 
 
196
  if ear is None:
 
197
  return state
 
198
  if ear < thr:
199
  state.consec_below += 1
200
  if (not state.in_blink) and state.consec_below >= min_consec:
@@ -208,30 +151,22 @@ def update_blink(
208
 
209
 
210
  # -------------------------
211
- # Core processing (Tasks CPU-only + buffer load)
212
  # -------------------------
213
  def process_video(
214
  video_path: str,
215
- pose_model_path: str = POSE_PATH_DEFAULT,
216
- face_model_path: str = FACE_PATH_DEFAULT,
217
- use_gpu_delegate: bool = False, # ignored, always CPU
218
-
219
  min_pose_det_conf: float = 0.5,
220
  min_pose_track_conf: float = 0.5,
221
  min_face_det_conf: float = 0.5,
222
-
223
  ear_threshold: float = 0.21,
224
  blink_min_consec: int = 2,
225
-
226
- draw_face_mesh: bool = True,
227
- face_mesh_lightness: int = 245,
228
-
229
- resize_width: int = 0,
230
- max_frames: int = 0,
231
- ) -> Tuple[str, str, str, str, str]:
232
  """
233
  Returns:
234
- annotated_video_path, csv_path, json_path, plot_path, report_md_path
235
  """
236
  cap = cv2.VideoCapture(video_path)
237
  if not cap.isOpened():
@@ -240,233 +175,176 @@ def process_video(
240
  fps = cap.get(cv2.CAP_PROP_FPS)
241
  if fps <= 1e-6:
242
  fps = 30.0
243
-
244
- orig_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
245
- orig_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
246
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
247
 
248
- if resize_width and resize_width > 0 and orig_w > 0:
249
- scale = resize_width / float(orig_w)
250
- width = int(orig_w * scale)
251
- height = int(orig_h * scale)
252
- else:
253
- width, height = orig_w, orig_h
254
-
255
- tmpdir = tempfile.mkdtemp(prefix="mp_tasks_analysis_")
256
  out_video = os.path.join(tmpdir, "annotated.mp4")
257
  out_csv = os.path.join(tmpdir, "per_frame_metrics.csv")
258
  out_json = os.path.join(tmpdir, "summary.json")
259
- out_plot = os.path.join(tmpdir, "motion_eye_timeseries.png")
260
  out_report = os.path.join(tmpdir, "report.md")
261
 
262
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
263
  writer = cv2.VideoWriter(out_video, fourcc, fps, (width, height))
264
 
265
- # ---- MediaPipe Tasks init ----
266
- BaseOptions = python.BaseOptions
267
- RunningMode = vision.RunningMode
268
-
269
- # Ensure official models exist (download if default paths)
270
- pose_model_path, face_model_path = ensure_models(pose_model_path, face_model_path)
271
-
272
- # Always CPU in Spaces (stable)
273
- delegate_used = "CPU"
274
-
275
- def _create_landmarkers_cpu_buffer():
276
- pose_data = _read_bytes(pose_model_path)
277
- face_data = _read_bytes(face_model_path)
278
-
279
- pose_options = vision.PoseLandmarkerOptions(
280
- base_options=BaseOptions(model_asset_buffer=pose_data, delegate=BaseOptions.Delegate.CPU),
281
- running_mode=RunningMode.VIDEO,
282
- num_poses=1,
283
- min_pose_detection_confidence=min_pose_det_conf,
284
- min_pose_presence_confidence=min_pose_det_conf,
285
- min_tracking_confidence=min_pose_track_conf,
286
- )
287
- face_options = vision.FaceLandmarkerOptions(
288
- base_options=BaseOptions(model_asset_buffer=face_data, delegate=BaseOptions.Delegate.CPU),
289
- running_mode=RunningMode.VIDEO,
290
- num_faces=1,
291
- min_face_detection_confidence=min_face_det_conf,
292
- min_face_presence_confidence=min_face_det_conf,
293
- min_tracking_confidence=min_face_det_conf,
294
- )
295
- pose_landmarker = vision.PoseLandmarker.create_from_options(pose_options)
296
- face_landmarker = vision.FaceLandmarker.create_from_options(face_options)
297
- return pose_landmarker, face_landmarker
298
-
299
- pose_landmarker, face_landmarker = _create_landmarkers_cpu_buffer()
300
-
301
- # ---- per-frame states ----
302
- rows = []
303
- left_blink = BlinkState()
304
- right_blink = BlinkState()
305
-
306
- prev_pose_px: Dict[str, np.ndarray] = {}
307
- prev_eye_area = {"L": None, "R": None}
308
-
309
- times = []
310
- limb_pix_series = []
311
- eye_area_diff_series = []
312
-
313
- frame_idx = 0
314
- while True:
315
- ok, frame_bgr = cap.read()
316
- if not ok:
317
- break
318
- frame_idx += 1
319
- if max_frames and frame_idx > max_frames:
320
- break
321
-
322
- if (width != orig_w) or (height != orig_h):
323
- frame_bgr = cv2.resize(frame_bgr, (width, height), interpolation=cv2.INTER_AREA)
324
-
325
- frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
326
- mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)
327
- timestamp_ms = int((frame_idx - 1) * 1000.0 / fps)
328
-
329
- pose_res = pose_landmarker.detect_for_video(mp_image, timestamp_ms)
330
- face_res = face_landmarker.detect_for_video(mp_image, timestamp_ms)
331
-
332
- # ---- Face: EAR + eye area ----
333
- face_pts: Dict[int, np.ndarray] = {}
334
- if face_res.face_landmarks:
335
- lms = face_res.face_landmarks[0]
336
- for i in NEEDED_FACE_IDX:
337
- lm = lms[i]
338
- face_pts[i] = np.array([lm.x * width, lm.y * height], dtype=np.float32)
339
-
340
- left_ear = eye_aspect_ratio(face_pts, LEFT_EYE_EAR_IDX)
341
- right_ear = eye_aspect_ratio(face_pts, RIGHT_EYE_EAR_IDX)
342
-
343
- left_blink = update_blink(left_blink, left_ear, ear_threshold, blink_min_consec)
344
- right_blink = update_blink(right_blink, right_ear, ear_threshold, blink_min_consec)
345
-
346
- left_eye_area = poly_area(face_pts, LEFT_EYE_EAR_IDX)
347
- right_eye_area = poly_area(face_pts, RIGHT_EYE_EAR_IDX)
348
-
349
- def area_diff(cur, key):
350
- prev = prev_eye_area[key]
351
- prev_eye_area[key] = cur
352
- if cur is None:
353
- return None
354
- if prev is None:
355
- return 0.0
356
- return float(abs(cur - prev))
357
-
358
- left_eye_area_diff = area_diff(left_eye_area, "L")
359
- right_eye_area_diff = area_diff(right_eye_area, "R")
360
- eye_area_diff_total = sum(v for v in [left_eye_area_diff, right_eye_area_diff] if v is not None)
361
-
362
- # ---- Pose: pixel displacement + angles ----
363
- pose_px: Dict[str, Optional[np.ndarray]] = {}
364
- if pose_res.pose_landmarks:
365
- lms = pose_res.pose_landmarks[0]
366
- for name, idx in JOINTS.items():
367
- lm = lms[idx]
368
- pose_px[name] = np.array([lm.x * width, lm.y * height], dtype=np.float32)
369
- else:
370
- for name in JOINTS:
371
- pose_px[name] = None
372
-
373
- def pixel_disp(key: str) -> Optional[float]:
374
- cur = pose_px.get(key)
375
- if cur is None:
376
- return None
377
- prev = prev_pose_px.get(key)
378
- prev_pose_px[key] = cur
379
- if prev is None:
380
- return 0.0
381
- return float(np.linalg.norm(cur - prev))
382
-
383
- lw_pix = pixel_disp("left_wrist")
384
- rw_pix = pixel_disp("right_wrist")
385
- la_pix = pixel_disp("left_ankle")
386
- ra_pix = pixel_disp("right_ankle")
387
- limbs_pix_total = sum(v for v in [lw_pix, rw_pix, la_pix, ra_pix] if v is not None)
388
-
389
- def get_angle(a, b, c):
390
- if a is None or b is None or c is None:
391
- return None
392
- return angle_3pts(a, b, c)
393
-
394
- left_elbow_ang = get_angle(pose_px["left_shoulder"], pose_px["left_elbow"], pose_px["left_wrist"])
395
- right_elbow_ang = get_angle(pose_px["right_shoulder"], pose_px["right_elbow"], pose_px["right_wrist"])
396
- left_knee_ang = get_angle(pose_px["left_hip"], pose_px["left_knee"], pose_px["left_ankle"])
397
- right_knee_ang = get_angle(pose_px["right_hip"], pose_px["right_knee"], pose_px["right_ankle"])
398
-
399
- # ---- Draw overlays ----
400
- draw_pose_from_tasks(frame_bgr, pose_res)
401
- if draw_face_mesh:
402
- draw_face_mesh_light(frame_bgr, face_res, lightness=int(face_mesh_lightness))
403
-
404
- hud_lines = [
405
- f"frame: {frame_idx}/{total_frames if total_frames>0 else '?'} fps:{fps:.1f} delegate:{delegate_used}",
406
- f"EAR L:{left_ear:.3f}" if left_ear is not None else "EAR L:None",
407
- f"EAR R:{right_ear:.3f}" if right_ear is not None else "EAR R:None",
408
- f"Blink L:{left_blink.blink_count} R:{right_blink.blink_count}",
409
- f"LimbPix(sum): {limbs_pix_total:.2f} EyeAreaDiff(sum): {eye_area_diff_total:.2f}",
410
- ]
411
- y0 = 24
412
- for line in hud_lines:
413
- cv2.putText(frame_bgr, line, (12, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2)
414
- y0 += 20
415
-
416
- writer.write(frame_bgr)
417
-
418
- t = (frame_idx - 1) / fps
419
- times.append(t)
420
- limb_pix_series.append(limbs_pix_total)
421
- eye_area_diff_series.append(eye_area_diff_total)
422
-
423
- rows.append({
424
- "frame": frame_idx,
425
- "time_s": t,
426
- "left_ear": left_ear,
427
- "right_ear": right_ear,
428
- "lw_pix_disp": lw_pix,
429
- "rw_pix_disp": rw_pix,
430
- "la_pix_disp": la_pix,
431
- "ra_pix_disp": ra_pix,
432
- "limbs_pix_disp_sum": limbs_pix_total,
433
- "left_eye_area_px2": left_eye_area,
434
- "right_eye_area_px2": right_eye_area,
435
- "left_eye_area_diff_px2": left_eye_area_diff,
436
- "right_eye_area_diff_px2": right_eye_area_diff,
437
- "eye_area_diff_sum_px2": eye_area_diff_total,
438
- "left_elbow_angle": left_elbow_ang,
439
- "right_elbow_angle": right_elbow_ang,
440
- "left_knee_angle": left_knee_ang,
441
- "right_knee_angle": right_knee_ang,
442
- })
443
 
444
  cap.release()
445
  writer.release()
446
 
447
- try:
448
- pose_landmarker.close()
449
- face_landmarker.close()
450
- except Exception:
451
- pass
452
-
453
  df = pd.DataFrame(rows)
454
 
 
455
  def _sum_series(s: pd.Series):
456
  s2 = s.dropna()
457
  if len(s2) == 0:
458
  return {"mean": None, "min": None, "max": None}
459
  return {"mean": float(s2.mean()), "min": float(s2.min()), "max": float(s2.max())}
460
 
 
461
  summary = {
462
  "video": {
463
  "fps": float(fps),
464
- "width": int(width),
465
- "height": int(height),
466
  "frames_processed": int(len(df)),
467
- "duration_s": float(len(df) / fps) if len(df) else 0.0,
468
- "delegate_used": delegate_used,
469
- "resize_width": int(resize_width),
470
  },
471
  "blink": {
472
  "ear_threshold": float(ear_threshold),
@@ -475,62 +353,69 @@ def process_video(
475
  "right_blinks": int(right_blink.blink_count),
476
  "left_blinks_per_min": float(_safe_div(left_blink.blink_count, (len(df)/fps)/60.0)) if len(df) else 0.0,
477
  "right_blinks_per_min": float(_safe_div(right_blink.blink_count, (len(df)/fps)/60.0)) if len(df) else 0.0,
478
- "left_ear_stats": _sum_series(df["left_ear"]) if len(df) else {"mean": None, "min": None, "max": None},
479
- "right_ear_stats": _sum_series(df["right_ear"]) if len(df) else {"mean": None, "min": None, "max": None},
480
  },
481
- "pixel_motion": {
482
- "limbs_pix_disp_sum_stats": _sum_series(df["limbs_pix_disp_sum"]) if len(df) else {"mean": None, "min": None, "max": None},
483
- "eye_area_diff_sum_px2_stats": _sum_series(df["eye_area_diff_sum_px2"]) if len(df) else {"mean": None, "min": None, "max": None},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  }
485
  }
486
 
 
487
  df.to_csv(out_csv, index=False)
488
  with open(out_json, "w", encoding="utf-8") as f:
489
  json.dump(summary, f, ensure_ascii=False, indent=2)
490
 
491
- plt.figure()
492
- plt.plot(times, limb_pix_series, label="Limb pixel displacement (sum)")
493
- plt.plot(times, eye_area_diff_series, label="Eye area diff (sum, px^2)")
494
- plt.xlabel("Time (s)")
495
- plt.ylabel("Pixel difference")
496
- plt.legend()
497
- plt.tight_layout()
498
- plt.savefig(out_plot, dpi=150)
499
- plt.close()
500
 
501
- report_md = f"""# MediaPipe Tasks(CPU-only)分析报告
502
-
503
- ## 视频信息
504
- - 分辨率: {width} x {height}
505
  - FPS: {fps:.2f}
506
- - 处理帧数: {len(df)}
507
- - 时长(): {summary["video"]["duration_s"]:.2f}
508
- - Delegate: {delegate_used}
509
- - Resize width: {resize_width}
510
-
511
- ## 眨眼分析(EAR)
512
- - 阈值: {ear_threshold}
513
- - 最小连续帧数: {blink_min_consec}
514
- - 左眼眨眼次数: {summary["blink"]["left_blinks"]}{summary["blink"]["left_blinks_per_min"]:.2f} 次/分钟)
515
- - 右眼眨眼次数: {summary["blink"]["right_blinks"]}{summary["blink"]["right_blinks_per_min"]:.2f} 次/分钟)
516
- - 左眼 EAR: mean={summary["blink"]["left_ear_stats"]["mean"]} min={summary["blink"]["left_ear_stats"]["min"]} max={summary["blink"]["left_ear_stats"]["max"]}
517
- - 右眼 EAR: mean={summary["blink"]["right_ear_stats"]["mean"]} min={summary["blink"]["right_ear_stats"]["min"]} max={summary["blink"]["right_ear_stats"]["max"]}
518
-
519
- ## Pixel Difference 指标(横轴时间)
520
- - 四肢运动 pixel displacement:对 左/右手腕 + 左/右脚踝 的逐帧像素位移求和(单位像素)
521
- - 眼睛面积 pixel diff:左右眼(6点多边形)面积的逐帧差值求和(单位像素^2)
522
- > 对应曲线图:motion_eye_timeseries.png
523
-
524
- ## 输出文件
525
- - annotated.mp4:叠加 Pose + 浅色 FaceMesh 的视频
526
- - per_frame_metrics.csv:逐帧指标(含 limbs pixel disp、eye area diff)
527
- - summary.json:汇总统计
528
- - motion_eye_timeseries.png:时间序列曲线图
529
  """
530
  with open(out_report, "w", encoding="utf-8") as f:
531
  f.write(report_md)
532
 
533
- return out_video, out_csv, out_json, out_plot, out_report
534
 
535
 
536
  # -------------------------
@@ -538,99 +423,89 @@ def process_video(
538
  # -------------------------
539
  def ui_process(
540
  video,
541
- pose_model_path,
542
- face_model_path,
543
  min_pose_det_conf,
544
  min_pose_track_conf,
545
  min_face_det_conf,
546
  ear_threshold,
547
  blink_min_consec,
548
- draw_face_mesh,
549
- face_mesh_lightness,
550
- resize_width,
551
  max_frames
552
  ):
 
553
  if isinstance(video, dict) and "path" in video:
554
  video_path = video["path"]
555
  else:
556
  video_path = video
557
 
558
- out_video, out_csv, out_json, out_plot, out_report = process_video(
559
- video_path=str(video_path),
560
- pose_model_path=str(pose_model_path),
561
- face_model_path=str(face_model_path),
562
- use_gpu_delegate=False, # always CPU
563
- min_pose_det_conf=float(min_pose_det_conf),
564
- min_pose_track_conf=float(min_pose_track_conf),
565
- min_face_det_conf=float(min_face_det_conf),
566
- ear_threshold=float(ear_threshold),
567
- blink_min_consec=int(blink_min_consec),
568
- draw_face_mesh=bool(draw_face_mesh),
569
- face_mesh_lightness=int(face_mesh_lightness),
570
- resize_width=int(resize_width),
571
- max_frames=int(max_frames),
572
- )
573
 
574
- with open(out_report, "r", encoding="utf-8") as f:
575
- report_text = f.read()
 
576
 
577
- return out_video, out_csv, out_json, out_plot, report_text
 
 
 
 
578
 
579
 
580
- demo = gr.Blocks(title="Video Pose + FaceLandmarker (CPU-only) + CSV + Plot")
581
 
582
  with demo:
583
- gr.Markdown("## 上传视频 → MediaPipe Tasks(PoseLandmarker + FaceLandmarker,CPU-only)→ CSV + 曲线图 + 标注视频")
584
 
585
  with gr.Row():
586
- video_in = gr.Video(label="上传视频", sources=["upload"])
587
 
588
- with gr.Accordion("模型与性能参数", open=False):
589
- pose_model_path = gr.Textbox(value=POSE_PATH_DEFAULT, label="Pose .task 路径(默认自动下载官方模型)")
590
- face_model_path = gr.Textbox(value=FACE_PATH_DEFAULT, label="Face .task 路径(默认自动下载官方模型)")
591
- resize_width = gr.Slider(0, 1280, value=640, step=10, label="Resize width(0=不缩放;建议 640 加速)")
592
- max_frames = gr.Number(value=0, precision=0, label="最多处理帧数(0=全处理,调试可设 300)")
593
-
594
- with gr.Accordion("检测阈值参数", open=False):
595
  min_pose_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose min_detection_confidence")
596
  min_pose_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose min_tracking_confidence")
597
  min_face_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Face min_detection_confidence")
598
- ear_threshold = gr.Slider(0.10, 0.35, value=0.21, step=0.01, label="眨眼阈值 EAR(越小越严格)")
599
- blink_min_consec = gr.Slider(1, 6, value=2, step=1, label="眨眼最小连续帧数(抗抖动)")
600
 
601
- with gr.Accordion("可视化参数", open=False):
602
- draw_face_mesh = gr.Checkbox(value=True, label="输出视频叠加 FaceMesh")
603
- face_mesh_lightness = gr.Slider(200, 255, value=245, step=1, label="FaceMesh 颜色浅度(越大越浅)")
604
 
605
- run_btn = gr.Button("开始分析")
 
 
 
606
 
607
  with gr.Row():
608
- video_out = gr.Video(label="输出:标注视频(浅色 FaceMesh)")
609
- with gr.Row():
610
- csv_out = gr.File(label="逐帧指标 CSV(per_frame_metrics.csv)")
611
- json_out = gr.File(label="汇总 JSON(summary.json)")
612
  with gr.Row():
613
- plot_out = gr.Image(label="曲线图:四肢像素位移 & 眼睛面积变化", type="filepath")
 
614
  report_out = gr.Markdown()
615
 
616
  run_btn.click(
617
  fn=ui_process,
618
  inputs=[
619
  video_in,
620
- pose_model_path,
621
- face_model_path,
622
  min_pose_det_conf,
623
  min_pose_track_conf,
624
  min_face_det_conf,
625
  ear_threshold,
626
  blink_min_consec,
627
- draw_face_mesh,
628
- face_mesh_lightness,
629
- resize_width,
630
  max_frames,
631
  ],
632
- outputs=[video_out, csv_out, json_out, plot_out, report_out],
633
  )
634
 
635
  if __name__ == "__main__":
636
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
 
2
  import math
3
+ import json
4
  import tempfile
5
  from dataclasses import dataclass
6
  from typing import Dict, List, Tuple, Optional
 
9
  import numpy as np
10
  import pandas as pd
11
  import gradio as gr
 
 
 
12
  import mediapipe as mp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  # -------------------------
 
18
  def _dist(a: np.ndarray, b: np.ndarray) -> float:
19
  return float(np.linalg.norm(a - b))
20
 
 
21
  def _safe_div(a: float, b: float, eps: float = 1e-8) -> float:
22
  return a / (b + eps)
23
 
 
24
  def eye_aspect_ratio(pts: Dict[int, np.ndarray], idx: List[int]) -> Optional[float]:
25
  """
26
  EAR = (||p2-p6|| + ||p3-p5||) / (2*||p1-p4||)
 
35
  C = _dist(p1, p4)
36
  return _safe_div((A + B), (2.0 * C))
37
 
 
38
  def angle_3pts(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> Optional[float]:
39
+ """
40
+ angle at point b in degrees formed by a-b-c
41
+ """
42
  ba = a - b
43
  bc = c - b
44
  nba = np.linalg.norm(ba)
 
50
  return float(np.degrees(np.arccos(cosang)))
51
 
52
 
 
 
 
 
 
 
 
 
 
 
53
  # -------------------------
54
+ # MediaPipe indices
55
  # -------------------------
56
+ # FaceMesh landmarks for EAR (common set)
57
+ LEFT_EYE_EAR_IDX = [33, 160, 158, 133, 153, 144]
58
  RIGHT_EYE_EAR_IDX = [362, 385, 387, 263, 373, 380]
 
59
 
60
+ # Pose landmark enum mapping (MediaPipe Pose)
61
  POSE = mp.solutions.pose
62
  POSE_LM = POSE.PoseLandmark
63
 
64
+ # Key joints for limb movement/angles
65
  JOINTS = {
66
  "left_wrist": POSE_LM.LEFT_WRIST.value,
67
  "right_wrist": POSE_LM.RIGHT_WRIST.value,
68
  "left_ankle": POSE_LM.LEFT_ANKLE.value,
69
  "right_ankle": POSE_LM.RIGHT_ANKLE.value,
70
+
71
  "left_shoulder": POSE_LM.LEFT_SHOULDER.value,
72
  "right_shoulder": POSE_LM.RIGHT_SHOULDER.value,
73
  "left_elbow": POSE_LM.LEFT_ELBOW.value,
74
  "right_elbow": POSE_LM.RIGHT_ELBOW.value,
75
+
76
  "left_hip": POSE_LM.LEFT_HIP.value,
77
  "right_hip": POSE_LM.RIGHT_HIP.value,
78
  "left_knee": POSE_LM.LEFT_KNEE.value,
79
  "right_knee": POSE_LM.RIGHT_KNEE.value,
80
  }
81
 
82
+
83
  # -------------------------
84
+ # Drawing
85
  # -------------------------
86
  mp_drawing = mp.solutions.drawing_utils
87
+ mp_drawing_styles = mp.solutions.drawing_styles
88
  mp_face_mesh = mp.solutions.face_mesh
89
 
90
+ def draw_pose(image_bgr, pose_results):
91
+ if pose_results.pose_landmarks:
92
+ mp_drawing.draw_landmarks(
93
+ image_bgr,
94
+ pose_results.pose_landmarks,
95
+ POSE.POSE_CONNECTIONS,
96
+ landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style(),
97
+ )
98
 
99
+ def draw_face(image_bgr, face_results, draw_full_mesh: bool = False):
100
+ if not face_results.multi_face_landmarks:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  return
102
+ for face_landmarks in face_results.multi_face_landmarks:
103
+ if draw_full_mesh:
104
+ # full mesh (dense) - heavier visually
105
+ mp_drawing.draw_landmarks(
106
+ image_bgr,
107
+ face_landmarks,
108
+ mp_face_mesh.FACEMESH_TESSELATION,
109
+ landmark_drawing_spec=None,
110
+ connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style(),
111
+ )
112
+ # contours are enough for most
113
+ mp_drawing.draw_landmarks(
114
+ image_bgr,
115
+ face_landmarks,
116
+ mp_face_mesh.FACEMESH_CONTOURS,
117
+ landmark_drawing_spec=None,
118
+ connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style(),
119
+ )
120
 
121
 
122
  # -------------------------
 
128
  blink_count: int = 0
129
  consec_below: int = 0
130
 
131
+ def update_blink(state: BlinkState, ear: Optional[float], thr: float, min_consec: int) -> BlinkState:
132
+ """
133
+ Basic blink logic:
134
+ - ear below threshold for >= min_consec frames => blink start
135
+ - when ear goes back above => blink end (count once)
136
+ """
137
  if ear is None:
138
+ # treat missing as no-update
139
  return state
140
+
141
  if ear < thr:
142
  state.consec_below += 1
143
  if (not state.in_blink) and state.consec_below >= min_consec:
 
151
 
152
 
153
  # -------------------------
154
+ # Core processing
155
  # -------------------------
156
  def process_video(
157
  video_path: str,
158
+ pose_model_complexity: int = 1,
 
 
 
159
  min_pose_det_conf: float = 0.5,
160
  min_pose_track_conf: float = 0.5,
161
  min_face_det_conf: float = 0.5,
 
162
  ear_threshold: float = 0.21,
163
  blink_min_consec: int = 2,
164
+ draw_full_face_mesh: bool = False,
165
+ max_frames: int = 0, # 0 => all
166
+ ) -> Tuple[str, str, str, str]:
 
 
 
 
167
  """
168
  Returns:
169
+ annotated_video_path, csv_path, json_path, report_md
170
  """
171
  cap = cv2.VideoCapture(video_path)
172
  if not cap.isOpened():
 
175
  fps = cap.get(cv2.CAP_PROP_FPS)
176
  if fps <= 1e-6:
177
  fps = 30.0
178
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
179
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
180
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
181
 
182
+ # output paths
183
+ tmpdir = tempfile.mkdtemp(prefix="mp_analysis_")
 
 
 
 
 
 
184
  out_video = os.path.join(tmpdir, "annotated.mp4")
185
  out_csv = os.path.join(tmpdir, "per_frame_metrics.csv")
186
  out_json = os.path.join(tmpdir, "summary.json")
 
187
  out_report = os.path.join(tmpdir, "report.md")
188
 
189
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
190
  writer = cv2.VideoWriter(out_video, fourcc, fps, (width, height))
191
 
192
+ # MediaPipe init - using legacy API (works without model downloads)
193
+ with mp.solutions.pose.Pose(
194
+ static_image_mode=False,
195
+ model_complexity=pose_model_complexity,
196
+ enable_segmentation=False,
197
+ min_detection_confidence=min_pose_det_conf,
198
+ min_tracking_confidence=min_pose_track_conf,
199
+ ) as pose, mp_face_mesh.FaceMesh(
200
+ static_image_mode=False,
201
+ max_num_faces=1,
202
+ refine_landmarks=True, # improves eye landmarks
203
+ min_detection_confidence=min_face_det_conf,
204
+ min_tracking_confidence=min_face_det_conf,
205
+ ) as face_mesh:
206
+
207
+ rows = []
208
+ prev_pts = {} # for movement delta (normalized coordinates)
209
+ left_blink = BlinkState()
210
+ right_blink = BlinkState()
211
+
212
+ frame_idx = 0
213
+ while True:
214
+ ok, frame_bgr = cap.read()
215
+ if not ok:
216
+ break
217
+ frame_idx += 1
218
+ if max_frames and frame_idx > max_frames:
219
+ break
220
+
221
+ frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
222
+
223
+ pose_res = pose.process(frame_rgb)
224
+ face_res = face_mesh.process(frame_rgb)
225
+
226
+ # Extract face landmarks (pixel coords)
227
+ face_pts: Dict[int, np.ndarray] = {}
228
+ if face_res.multi_face_landmarks:
229
+ lm = face_res.multi_face_landmarks[0].landmark
230
+ for i in range(len(lm)):
231
+ face_pts[i] = np.array([lm[i].x * width, lm[i].y * height], dtype=np.float32)
232
+
233
+ # EAR
234
+ left_ear = eye_aspect_ratio(face_pts, LEFT_EYE_EAR_IDX)
235
+ right_ear = eye_aspect_ratio(face_pts, RIGHT_EYE_EAR_IDX)
236
+
237
+ left_blink = update_blink(left_blink, left_ear, ear_threshold, blink_min_consec)
238
+ right_blink = update_blink(right_blink, right_ear, ear_threshold, blink_min_consec)
239
+
240
+ # Extract pose landmarks (normalized coords + pixel)
241
+ pose_norm: Dict[str, Optional[np.ndarray]] = {}
242
+ pose_px: Dict[str, Optional[np.ndarray]] = {}
243
+ if pose_res.pose_landmarks:
244
+ lms = pose_res.pose_landmarks.landmark
245
+ for name, idx in JOINTS.items():
246
+ if idx < len(lms):
247
+ pose_norm[name] = np.array([lms[idx].x, lms[idx].y], dtype=np.float32)
248
+ pose_px[name] = np.array([lms[idx].x * width, lms[idx].y * height], dtype=np.float32)
249
+ else:
250
+ pose_norm[name] = None
251
+ pose_px[name] = None
252
+ else:
253
+ for name in JOINTS:
254
+ pose_norm[name] = None
255
+ pose_px[name] = None
256
+
257
+ # Limb movement: per-frame displacement & speed (in normalized units)
258
+ def movement_metrics(key: str):
259
+ cur = pose_norm.get(key)
260
+ if cur is None:
261
+ return None, None
262
+ prev = prev_pts.get(key)
263
+ if prev is None:
264
+ d = 0.0
265
+ else:
266
+ d = float(np.linalg.norm(cur - prev))
267
+ v = d * fps
268
+ prev_pts[key] = cur
269
+ return d, v
270
+
271
+ lw_d, lw_v = movement_metrics("left_wrist")
272
+ rw_d, rw_v = movement_metrics("right_wrist")
273
+ la_d, la_v = movement_metrics("left_ankle")
274
+ ra_d, ra_v = movement_metrics("right_ankle")
275
+
276
+ # Joint angles (pixel coords for stability)
277
+ def get_angle(a, b, c):
278
+ if a is None or b is None or c is None:
279
+ return None
280
+ return angle_3pts(a, b, c)
281
+
282
+ left_elbow_ang = get_angle(pose_px["left_shoulder"], pose_px["left_elbow"], pose_px["left_wrist"])
283
+ right_elbow_ang = get_angle(pose_px["right_shoulder"], pose_px["right_elbow"], pose_px["right_wrist"])
284
+ left_knee_ang = get_angle(pose_px["left_hip"], pose_px["left_knee"], pose_px["left_ankle"])
285
+ right_knee_ang = get_angle(pose_px["right_hip"], pose_px["right_knee"], pose_px["right_ankle"])
286
+
287
+ # Draw overlays
288
+ draw_pose(frame_bgr, pose_res)
289
+ draw_face(frame_bgr, face_res, draw_full_mesh=draw_full_face_mesh)
290
+
291
+ # HUD text
292
+ hud_lines = [
293
+ f"frame: {frame_idx}/{total_frames if total_frames>0 else '?'} fps:{fps:.1f}",
294
+ f"EAR L:{left_ear:.3f}" if left_ear is not None else "EAR L:None",
295
+ f"EAR R:{right_ear:.3f}" if right_ear is not None else "EAR R:None",
296
+ f"Blink L:{left_blink.blink_count} R:{right_blink.blink_count}",
297
+ ]
298
+ y0 = 24
299
+ for line in hud_lines:
300
+ cv2.putText(frame_bgr, line, (12, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
301
+ y0 += 22
302
+
303
+ writer.write(frame_bgr)
304
+
305
+ rows.append({
306
+ "frame": frame_idx,
307
+ "time_s": (frame_idx - 1) / fps,
308
+
309
+ "left_ear": left_ear,
310
+ "right_ear": right_ear,
311
+
312
+ "lw_disp": lw_d,
313
+ "rw_disp": rw_d,
314
+ "la_disp": la_d,
315
+ "ra_disp": ra_d,
316
+
317
+ "lw_speed": lw_v,
318
+ "rw_speed": rw_v,
319
+ "la_speed": la_v,
320
+ "ra_speed": ra_v,
321
+
322
+ "left_elbow_angle": left_elbow_ang,
323
+ "right_elbow_angle": right_elbow_ang,
324
+ "left_knee_angle": left_knee_ang,
325
+ "right_knee_angle": right_knee_ang,
326
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
 
328
  cap.release()
329
  writer.release()
330
 
 
 
 
 
 
 
331
  df = pd.DataFrame(rows)
332
 
333
+ # Summaries
334
  def _sum_series(s: pd.Series):
335
  s2 = s.dropna()
336
  if len(s2) == 0:
337
  return {"mean": None, "min": None, "max": None}
338
  return {"mean": float(s2.mean()), "min": float(s2.min()), "max": float(s2.max())}
339
 
340
+ # movement totals in normalized units (roughly proportional)
341
  summary = {
342
  "video": {
343
  "fps": float(fps),
344
+ "width": width,
345
+ "height": height,
346
  "frames_processed": int(len(df)),
347
+ "duration_s": float(len(df) / fps),
 
 
348
  },
349
  "blink": {
350
  "ear_threshold": float(ear_threshold),
 
353
  "right_blinks": int(right_blink.blink_count),
354
  "left_blinks_per_min": float(_safe_div(left_blink.blink_count, (len(df)/fps)/60.0)) if len(df) else 0.0,
355
  "right_blinks_per_min": float(_safe_div(right_blink.blink_count, (len(df)/fps)/60.0)) if len(df) else 0.0,
356
+ "left_ear_stats": _sum_series(df["left_ear"]),
357
+ "right_ear_stats": _sum_series(df["right_ear"]),
358
  },
359
+ "limb_movement": {
360
+ "total_disp": {
361
+ "left_wrist": float(df["lw_disp"].fillna(0).sum()),
362
+ "right_wrist": float(df["rw_disp"].fillna(0).sum()),
363
+ "left_ankle": float(df["la_disp"].fillna(0).sum()),
364
+ "right_ankle": float(df["ra_disp"].fillna(0).sum()),
365
+ },
366
+ "speed_stats": {
367
+ "left_wrist": _sum_series(df["lw_speed"]),
368
+ "right_wrist": _sum_series(df["rw_speed"]),
369
+ "left_ankle": _sum_series(df["la_speed"]),
370
+ "right_ankle": _sum_series(df["ra_speed"]),
371
+ },
372
+ "angle_stats_deg": {
373
+ "left_elbow": _sum_series(df["left_elbow_angle"]),
374
+ "right_elbow": _sum_series(df["right_elbow_angle"]),
375
+ "left_knee": _sum_series(df["left_knee_angle"]),
376
+ "right_knee": _sum_series(df["right_knee_angle"]),
377
+ }
378
  }
379
  }
380
 
381
+ # Save outputs
382
  df.to_csv(out_csv, index=False)
383
  with open(out_json, "w", encoding="utf-8") as f:
384
  json.dump(summary, f, ensure_ascii=False, indent=2)
385
 
386
+ report_md = f"""# MediaPipe Pose + FaceLandmarks Analysis Report
 
 
 
 
 
 
 
 
387
 
388
+ ## Video Information
389
+ - Resolution: {width} x {height}
 
 
390
  - FPS: {fps:.2f}
391
+ - Frames Processed: {len(df)}
392
+ - Duration (seconds): {summary["video"]["duration_s"]:.2f}
393
+
394
+ ## Blink Analysis (EAR)
395
+ - Threshold: {ear_threshold}
396
+ - Minimum Consecutive Frames: {blink_min_consec}
397
+ - Left Eye Blinks: {summary["blink"]["left_blinks"]} ({summary["blink"]["left_blinks_per_min"]:.2f} blinks/min)
398
+ - Right Eye Blinks: {summary["blink"]["right_blinks"]} ({summary["blink"]["right_blinks_per_min"]:.2f} blinks/min)
399
+ - Left Eye EAR: mean={summary["blink"]["left_ear_stats"]["mean"]} min={summary["blink"]["left_ear_stats"]["min"]} max={summary["blink"]["left_ear_stats"]["max"]}
400
+ - Right Eye EAR: mean={summary["blink"]["right_ear_stats"]["mean"]} min={summary["blink"]["right_ear_stats"]["min"]} max={summary["blink"]["right_ear_stats"]["max"]}
401
+
402
+ ## Limb Movement (normalized units)
403
+ > Displacement/speed based on normalized coordinates (0~1), suitable for relative comparison and trend analysis.
404
+ - Total Displacement (higher = more movement):
405
+ - Left Wrist: {summary["limb_movement"]["total_disp"]["left_wrist"]:.6f}
406
+ - Right Wrist: {summary["limb_movement"]["total_disp"]["right_wrist"]:.6f}
407
+ - Left Ankle: {summary["limb_movement"]["total_disp"]["left_ankle"]:.6f}
408
+ - Right Ankle: {summary["limb_movement"]["total_disp"]["right_ankle"]:.6f}
409
+
410
+ ## Output Files
411
+ - annotated.mp4: Video with Pose and FaceMesh overlays
412
+ - per_frame_metrics.csv: Frame-by-frame metrics (EAR / displacement / speed / joint angles)
413
+ - summary.json: Statistical summary
414
  """
415
  with open(out_report, "w", encoding="utf-8") as f:
416
  f.write(report_md)
417
 
418
+ return out_video, out_csv, out_json, out_report
419
 
420
 
421
  # -------------------------
 
423
  # -------------------------
424
  def ui_process(
425
  video,
426
+ pose_model_complexity,
 
427
  min_pose_det_conf,
428
  min_pose_track_conf,
429
  min_face_det_conf,
430
  ear_threshold,
431
  blink_min_consec,
432
+ draw_full_face_mesh,
 
 
433
  max_frames
434
  ):
435
+ # video may be dict in some gradio versions
436
  if isinstance(video, dict) and "path" in video:
437
  video_path = video["path"]
438
  else:
439
  video_path = video
440
 
441
+ try:
442
+ out_video, out_csv, out_json, out_report = process_video(
443
+ video_path=str(video_path),
444
+ pose_model_complexity=int(pose_model_complexity),
445
+ min_pose_det_conf=float(min_pose_det_conf),
446
+ min_pose_track_conf=float(min_pose_track_conf),
447
+ min_face_det_conf=float(min_face_det_conf),
448
+ ear_threshold=float(ear_threshold),
449
+ blink_min_consec=int(blink_min_consec),
450
+ draw_full_face_mesh=bool(draw_full_face_mesh),
451
+ max_frames=int(max_frames),
452
+ )
 
 
 
453
 
454
+ # Show report text + return files
455
+ with open(out_report, "r", encoding="utf-8") as f:
456
+ report_text = f.read()
457
 
458
+ return out_video, out_csv, out_json, report_text
459
+
460
+ except Exception as e:
461
+ error_msg = f"# Error Processing Video\n\n{str(e)}"
462
+ return None, None, None, error_msg
463
 
464
 
465
+ demo = gr.Blocks(title="Video Pose + FaceLandmarks + Blink/Limb Analytics")
466
 
467
  with demo:
468
+ gr.Markdown("## Upload Video → MediaPipe Pose + FaceMesh Limb Movement & Blink Quantification (EAR)")
469
 
470
  with gr.Row():
471
+ video_in = gr.Video(label="Upload Video")
472
 
473
+ with gr.Accordion("Parameters (defaults work well)", open=False):
474
+ pose_model_complexity = gr.Radio([0, 1, 2], value=1, label="Pose model_complexity (0=fast / 2=accurate)")
 
 
 
 
 
475
  min_pose_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose min_detection_confidence")
476
  min_pose_track_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Pose min_tracking_confidence")
477
  min_face_det_conf = gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Face min_detection_confidence")
 
 
478
 
479
+ ear_threshold = gr.Slider(0.10, 0.35, value=0.21, step=0.01, label="Blink Threshold EAR (lower = stricter)")
480
+ blink_min_consec = gr.Slider(1, 6, value=2, step=1, label="Blink Min Consecutive Frames (anti-jitter)")
 
481
 
482
+ draw_full_face_mesh = gr.Checkbox(value=False, label="Overlay Full FaceMesh (denser/slower)")
483
+ max_frames = gr.Number(value=0, precision=0, label="Max Frames to Process (0=all, set 300 for debugging)")
484
+
485
+ run_btn = gr.Button("Start Analysis", variant="primary")
486
 
487
  with gr.Row():
488
+ video_out = gr.Video(label="Output: Annotated Video")
 
 
 
489
  with gr.Row():
490
+ csv_out = gr.File(label="Per-Frame Metrics CSV")
491
+ json_out = gr.File(label="Summary JSON")
492
  report_out = gr.Markdown()
493
 
494
  run_btn.click(
495
  fn=ui_process,
496
  inputs=[
497
  video_in,
498
+ pose_model_complexity,
 
499
  min_pose_det_conf,
500
  min_pose_track_conf,
501
  min_face_det_conf,
502
  ear_threshold,
503
  blink_min_consec,
504
+ draw_full_face_mesh,
 
 
505
  max_frames,
506
  ],
507
+ outputs=[video_out, csv_out, json_out, report_out],
508
  )
509
 
510
  if __name__ == "__main__":
511
+ demo.launch(server_name="0.0.0.0", server_port=7860)