# app.py — 無菌手套姿勢檢測系統(F-segment 偵測)複賽正式版 # 說明: # 1) 結合 Mediapipe Pose + Hands / 自動腰線偵測,並檢測雙手「腰部以上」與「指尖朝上」比例 # 2) AND 規則:腰部以上比例 >= PASS_TH_POS 且 指尖朝上比例 >= PASS_TH_UP 才算通過 # 3) 本版 UI 採「正式評圖版」精簡介面: # a. 上傳影片區塊 # b. 統整結果表 + CSV 下載 # c. 三個可展開的說明區塊(標註資訊 / 系統限制 / 未來改善) import os import cv2 import numpy as np import pandas as pd import tempfile import gradio as gr # ----------------------------------------------------------------------------- # Mediapipe 相關設定(若無法匯入,則自動降級為僅顯示錯誤) # ----------------------------------------------------------------------------- _HAS_MP = True _HAS_POSE = True try: import mediapipe as mp mp_hands = mp.solutions.hands mp_pose = mp.solutions.pose except Exception: _HAS_MP = False _HAS_POSE = False # ----------------------------------------------------------------------------- # 全域參數:可視為本系統的「版本設定」 # ----------------------------------------------------------------------------- MAX_LONG_SIDE = 960 # 影像長邊縮放上限(避免太大) # 自動腰線預設 DEFAULT_WAIST_RATIO = 0.65 CLAMP_LOW = 0.58 # 自動腰線 y 下限 CLAMP_HIGH = 0.72 # 自動腰線 y 上限 WAIST_DELTA_Y = -0.05 # 自動腰線 Δy 微調(負值代表往上移) # 判定門檻(本次複賽設定) PASS_TH_POS = 50 # 腰部門檻 (%) PASS_TH_UP = 40 # 指尖門檻 (%) # 其他固定設定 TAIL_MODE = "全片" # 本版一律分析全片 TAIL_SECONDS = 3.0 # 保留參數(若未來要開啟尾段模式可用) COLS = [ "檔案名稱", "檢測結果", "整體正確率(%)", "腰部以上(%)", "指尖朝上(%)", "通過幀/有效幀", "有效幀/總幀", "FPS", "長度(秒)", "門檻設定", "判斷區間", "腰線 y", "腰線來源" ] # ----------------------------------------------------------------------------- # 工具函式 # ----------------------------------------------------------------------------- def _to_path(f): """把 gr.File / dict / 字串 轉成實際檔案路徑。""" if isinstance(f, str): return f if hasattr(f, "name"): return f.name if isinstance(f, dict) and "name" in f: return f["name"] return str(f) def _resize_keep_ar(frame, max_long=MAX_LONG_SIDE): """依長邊等比例縮放,避免影像過大。""" h, w = frame.shape[:2] long_side = max(h, w) if long_side <= max_long: return frame scale = max_long / long_side nh, nw = int(h * scale), int(w * scale) return cv2.resize(frame, (nw, nh), interpolation=cv2.INTER_AREA) def _is_index_up(lms, h): """ 判斷單手的食指是否「明顯朝上」: - 利用 PIP→TIP 向量的角度 - 並考慮 tip 與 wrist 的高度差 """ tip = lms[mp_hands.HandLandmark.INDEX_FINGER_TIP.value] pip = lms[mp_hands.HandLandmark.INDEX_FINGER_PIP.value] wrist = lms[mp_hands.HandLandmark.WRIST.value] dy = (pip.y - tip.y) * h dx = abs(pip.x - tip.x) * h angle_deg = np.degrees(np.arctan2(dy, dx)) height_diff = (wrist.y - tip.y) * h return (angle_deg > 25) and (height_diff > 40) # ----------------------------------------------------------------------------- # 自動腰線估計:Pose / Wrist percentile # ----------------------------------------------------------------------------- def _auto_waist_ratio_pose(video_path, sample_frames=40, clamp_low=CLAMP_LOW, clamp_high=CLAMP_HIGH): """以 Pose hip 平均位置估計腰線比例 y。""" if not _HAS_POSE: return None cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return None total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0) if total <= 0: cap.release() return None step = max(1, total // sample_frames) ys = [] with mp_pose.Pose( model_complexity=0, enable_segmentation=False, min_detection_confidence=0.5, min_tracking_confidence=0.5 ) as pose: idx = 0 while idx < total and len(ys) < sample_frames: cap.set(cv2.CAP_PROP_POS_FRAMES, idx) ok, frame = cap.read() if not ok: break frame = _resize_keep_ar(frame) rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) res = pose.process(rgb) if res.pose_landmarks: lms = res.pose_landmarks.landmark hip_y = (lms[23].y + lms[24].y) / 2.0 ys.append(hip_y) idx += step cap.release() if not ys: return None p = float(np.median(ys)) + WAIST_DELTA_Y return float(min(clamp_high, max(clamp_low, p))) def _auto_waist_ratio_hand(video_path, sample_frames=40, percentile=85, clamp_low=CLAMP_LOW, clamp_high=CLAMP_HIGH): """以手腕高度的分位數估計腰線比例 y(作為 Pose 的備援)。""" if not _HAS_MP: return DEFAULT_WAIST_RATIO cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return DEFAULT_WAIST_RATIO total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0) if total <= 0: cap.release() return DEFAULT_WAIST_RATIO step = max(1, total // sample_frames) ys = [] with mp_hands.Hands( static_image_mode=False, max_num_hands=2, model_complexity=0, min_detection_confidence=0.5, min_tracking_confidence=0.5 ) as hands: idx = 0 while idx < total and len(ys) < sample_frames: cap.set(cv2.CAP_PROP_POS_FRAMES, idx) ok, frame = cap.read() if not ok: break frame = _resize_keep_ar(frame) rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) res = hands.process(rgb) if res.multi_hand_landmarks: wrist_ys = [ lm.landmark[mp_hands.HandLandmark.WRIST.value].y for lm in res.multi_hand_landmarks ] if wrist_ys: ys.append(max(wrist_ys)) idx += step cap.release() if not ys: return DEFAULT_WAIST_RATIO p = float(np.percentile(ys, percentile)) + WAIST_DELTA_Y return float(min(clamp_high, max(clamp_low, p))) # ----------------------------------------------------------------------------- # 單一影片分析 # ----------------------------------------------------------------------------- def analyze_one_video(video_path): """分析單一影片,回傳一列結果。""" cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return [ os.path.basename(video_path), "未通過", 0, 0, 0, "0/0", "0/0", 0, 0, f"{PASS_TH_POS}/{PASS_TH_UP}", "-", "-", "開啟失敗" ] fps = cap.get(cv2.CAP_PROP_FPS) or 30.0 total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0) duration_s = (total_frames / fps) if total_frames > 0 else 0.0 # 本版設定只分析全片 start = 0 span = "全片" cap.set(cv2.CAP_PROP_POS_FRAMES, start) # 決定腰線位置 used_y_ratio = DEFAULT_WAIST_RATIO waist_source = "預設值" # 先嘗試 Pose,自動腰線 auto_pose = _auto_waist_ratio_pose(video_path) if auto_pose is not None: used_y_ratio = auto_pose waist_source = "Pose" else: # 若 Pose 不可靠,改採 Wrist percentile used_y_ratio = _auto_waist_ratio_hand(video_path) waist_source = "手腕分位數" hands = None if _HAS_MP: hands = mp_hands.Hands( static_image_mode=False, max_num_hands=2, model_complexity=0, min_detection_confidence=0.5, min_tracking_confidence=0.5 ) valid = ok_pos = ok_up = ok_both = 0 while True: ok, frame = cap.read() if not ok: break frame = _resize_keep_ar(frame) h, w = frame.shape[:2] y_ref = int(h * used_y_ratio) this_pos = this_up = False detected = False if _HAS_MP and hands: rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) res = hands.process(rgb) if res.multi_hand_landmarks: detected = True pos_flags = [] up_flags = [] for lmset in res.multi_hand_landmarks: lms = lmset.landmark wrist = lms[mp_hands.HandLandmark.WRIST.value] pos_flags.append((wrist.y * h) <= y_ref) up_flags.append(_is_index_up(lms, h)) # 要求雙手皆符合(若只偵測到一手,就以那一手為準) if len(pos_flags) >= 2: this_pos = all(pos_flags[:2]) this_up = all(up_flags[:2]) else: this_pos = pos_flags[0] this_up = up_flags[0] if detected: valid += 1 if this_pos: ok_pos += 1 if this_up: ok_up += 1 if this_pos and this_up: ok_both += 1 cap.release() if hands: hands.close() if valid == 0: above_ratio = up_ratio = acc = 0.0 passed_valid = "0/0" else: above_ratio = (ok_pos / valid) * 100.0 up_ratio = (ok_up / valid) * 100.0 acc = (ok_both / valid) * 100.0 passed_valid = f"{ok_both}/{valid}" result_str = ( "通過" if (above_ratio >= PASS_TH_POS and up_ratio >= PASS_TH_UP) else "未通過" ) valid_total = f"{valid}/{total_frames}" return [ os.path.basename(video_path), result_str, round(acc, 2), round(above_ratio, 2), round(up_ratio, 2), passed_valid, valid_total, int(round(fps)), round(duration_s, 2), f"{int(PASS_TH_POS)}/{int(PASS_TH_UP)}", span, round(float(used_y_ratio), 3), waist_source ] # ----------------------------------------------------------------------------- # 多支影片分析核心 + 統整摘要 # ----------------------------------------------------------------------------- def run_core(files, state): """實際進行分析並回傳 df 和 CSV 路徑。""" if not files: df = pd.DataFrame([], columns=COLS) tmp_path = None return None, df, [], tmp_path rows = [] for f in files: path = _to_path(f) if os.path.exists(path): rows.append(analyze_one_video(path)) state_new = (state or []) + rows df = pd.DataFrame(state_new, columns=COLS) tmp = tempfile.NamedTemporaryFile(suffix=".csv", delete=False) df.to_csv(tmp.name, index=False, encoding="utf-8-sig") return files, df, state_new, tmp.name def build_summary_text(df: pd.DataFrame) -> str: """產生較完整的統整摘要。""" if df is None or df.empty: return "尚未進行分析。" total = len(df) passed = (df["檢測結果"] == "通過").sum() failed = total - passed pass_rate = (passed / total * 100.0) if total > 0 else 0.0 mean_acc = df["整體正確率(%)"].mean() mean_pos = df["腰部以上(%)"].mean() mean_up = df["指尖朝上(%)"].mean() best_row = df.loc[df["整體正確率(%)"].idxmax()] worst_row = df.loc[df["整體正確率(%)"].idxmin()] text = f"""### 📌 統整摘要 - 本次分析 **{total} 支影片**,其中 **{passed} 支通過、{failed} 支未通過**(通過率約 **{pass_rate:.1f}%**)。 - 平均整體正確率:約 **{mean_acc:.1f}%**。 - 平均腰部以上比例:約 **{mean_pos:.1f}%**;平均指尖朝上比例:約 **{mean_up:.1f}%**。 - 最佳表現影片:**{best_row['檔案名稱']}**(整體正確率 **{best_row['整體正確率(%)']:.1f}%**)。 - 需要特別留意的影片:**{worst_row['檔案名稱']}**(整體正確率 **{worst_row['整體正確率(%)']:.1f}%**)。 - 目前門檻設定:**腰部門檻 {PASS_TH_POS:.0f}%、指尖門檻 {PASS_TH_UP:.0f}%**,採 **AND 規則**(兩者皆需達標)。 - 腰線來源:優先使用 **Pose 腰點中位數自動估計**,必要時改採 **手腕高度分位數** 作為備援,並限制在安全範圍內。 """ return text def run_with_status(files, state): """供按鈕呼叫:更新狀態列 + DataFrame + CSV + 摘要。""" # 進入分析狀態 status_text = "🟠 系統正在分析影片,請稍候…" files2, df, state2, csv_path = run_core(files, state) summary_text = build_summary_text(df) # 分析完成狀態 status_text = "🟢 分析完成,可以檢視表格並下載 CSV 報表。" return status_text, files2, df, state2, csv_path, summary_text def do_clear(state): """清除按鈕:重置所有顯示。""" df = pd.DataFrame([], columns=COLS) status_text = "🟢 系統就緒,請上傳影片後按下「分析」。" return status_text, None, df, [], None, "尚未進行分析。" # ----------------------------------------------------------------------------- # Gradio UI 建構 # ----------------------------------------------------------------------------- with gr.Blocks( title="無菌手套姿勢檢測系統(F-segment 偵測)", css=""" #status-text { color: #1565c0; /* 深藍色狀態列 */ font-weight: 600; padding: 4px 0; } """ ) as demo: # 標題 gr.Markdown("# 👑 醫療技術AI王") gr.Markdown("## 🧤 無菌手套姿勢檢測系統(F-segment 偵測)") status_md = gr.Markdown( "🟢 系統就緒,請上傳影片後按下「分析」。", elem_id="status-text" ) # 上傳區 files = gr.File( label="上傳檢測影片(mp4,可多支)", file_types=[".mp4"], file_count="multiple" ) # 按鈕列 with gr.Row(): btn_run = gr.Button("分析", variant="primary", scale=1) btn_csv = gr.DownloadButton("下載 CSV 報表", visible=False, scale=1) btn_clear = gr.Button("清除", variant="secondary", scale=1) # 狀態儲存 table_state = gr.State([]) # 結果表格 table = gr.Dataframe( headers=COLS, label="📊 手套穿戴通過率彙整表", interactive=False, wrap=True ) # 統整摘要 summary = gr.Markdown("尚未進行分析。") # 額外說明面板(可收合) with gr.Accordion("📘 本次標註資訊(可展開)", open=False): gr.Markdown(""" ### 🔍 本次標註 - **標準片(應通過)**:FV3、FV4、FV5、FV6 - **錯片(應不通過)**:WFV6-2、WFV6 ### 🟡 邊界案例說明 - **FV5** - 臨床判定為「動作正確」,屬標準片。 - 目前版本採取較保守的腰線與門檻設定,在自動腰線估計偏高的情況下, 會被系統判為「未通過」,屬於安全邊界上的 **false negative(誤殺)**。 - 此設計反映系統的取捨:**寧可多擋一支可疑影片,也不輕易放過可能錯誤的動作。** """) with gr.Accordion("⚠️ 系統限制說明(可展開)", open=False): gr.Markdown(""" ### 📌 系統目前限制 - 受拍攝角度與距離影響:腰線位置與手部高度皆為 2D 投影,視角改變可能影響判斷。 - 光線與背景:低光源、強反光或背景雜訊會影響 Mediapipe 標記的穩定度。 - 動作抖動:快速移動或 motion blur 會降低有效幀比例,使通過率偏低。 """) with gr.Accordion("🚀 未來改善方向(可展開)", open=False): gr.Markdown(""" ### 🌱 系統優化方向 - 引入 YOLO / 其他手部偵測模型,提升各種光線與遮擋情境下的穩定度。 - 加入 temporal smoothing(時序平滑),降低單一錯誤幀對結果的影響。 - 導入影像前處理(亮度 / 對比度校正)以改善暗色環境的偵測品質。 - 若資料量充足,可進一步訓練專用的 F-segment 深度學習模型。 """) # 事件綁定:顯示 Gradio 內建 minimal 進度條即可 btn_run.click( fn=run_with_status, inputs=[files, table_state], outputs=[status_md, files, table, table_state, btn_csv, summary], show_progress="minimal" ) btn_clear.click( fn=do_clear, inputs=[table_state], outputs=[status_md, files, table, table_state, btn_csv, summary], show_progress="hidden" ) # ----------------------------------------------------------------------------- # 啟動(本地測試用;在 HuggingFace 會由 Spaces 自動呼叫 demo) # ----------------------------------------------------------------------------- if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)