glove / app.py
amanda-cgu's picture
Update app.py
0c9453b verified
# app.py — 無菌手套姿勢檢測系統(F-segment 偵測)複賽正式版
# 說明:
# 1) 結合 Mediapipe Pose + Hands / 自動腰線偵測,並檢測雙手「腰部以上」與「指尖朝上」比例
# 2) AND 規則:腰部以上比例 >= PASS_TH_POS 且 指尖朝上比例 >= PASS_TH_UP 才算通過
# 3) 本版 UI 採「正式評圖版」精簡介面:
# a. 上傳影片區塊
# b. 統整結果表 + CSV 下載
# c. 三個可展開的說明區塊(標註資訊 / 系統限制 / 未來改善)
import os
import cv2
import numpy as np
import pandas as pd
import tempfile
import gradio as gr
# -----------------------------------------------------------------------------
# Mediapipe 相關設定(若無法匯入,則自動降級為僅顯示錯誤)
# -----------------------------------------------------------------------------
_HAS_MP = True
_HAS_POSE = True
try:
import mediapipe as mp
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
except Exception:
_HAS_MP = False
_HAS_POSE = False
# -----------------------------------------------------------------------------
# 全域參數:可視為本系統的「版本設定」
# -----------------------------------------------------------------------------
MAX_LONG_SIDE = 960 # 影像長邊縮放上限(避免太大)
# 自動腰線預設
DEFAULT_WAIST_RATIO = 0.65
CLAMP_LOW = 0.58 # 自動腰線 y 下限
CLAMP_HIGH = 0.72 # 自動腰線 y 上限
WAIST_DELTA_Y = -0.05 # 自動腰線 Δy 微調(負值代表往上移)
# 判定門檻(本次複賽設定)
PASS_TH_POS = 50 # 腰部門檻 (%)
PASS_TH_UP = 40 # 指尖門檻 (%)
# 其他固定設定
TAIL_MODE = "全片" # 本版一律分析全片
TAIL_SECONDS = 3.0 # 保留參數(若未來要開啟尾段模式可用)
COLS = [
"檔案名稱", "檢測結果", "整體正確率(%)", "腰部以上(%)", "指尖朝上(%)",
"通過幀/有效幀", "有效幀/總幀", "FPS", "長度(秒)",
"門檻設定", "判斷區間", "腰線 y", "腰線來源"
]
# -----------------------------------------------------------------------------
# 工具函式
# -----------------------------------------------------------------------------
def _to_path(f):
"""把 gr.File / dict / 字串 轉成實際檔案路徑。"""
if isinstance(f, str):
return f
if hasattr(f, "name"):
return f.name
if isinstance(f, dict) and "name" in f:
return f["name"]
return str(f)
def _resize_keep_ar(frame, max_long=MAX_LONG_SIDE):
"""依長邊等比例縮放,避免影像過大。"""
h, w = frame.shape[:2]
long_side = max(h, w)
if long_side <= max_long:
return frame
scale = max_long / long_side
nh, nw = int(h * scale), int(w * scale)
return cv2.resize(frame, (nw, nh), interpolation=cv2.INTER_AREA)
def _is_index_up(lms, h):
"""
判斷單手的食指是否「明顯朝上」:
- 利用 PIP→TIP 向量的角度
- 並考慮 tip 與 wrist 的高度差
"""
tip = lms[mp_hands.HandLandmark.INDEX_FINGER_TIP.value]
pip = lms[mp_hands.HandLandmark.INDEX_FINGER_PIP.value]
wrist = lms[mp_hands.HandLandmark.WRIST.value]
dy = (pip.y - tip.y) * h
dx = abs(pip.x - tip.x) * h
angle_deg = np.degrees(np.arctan2(dy, dx))
height_diff = (wrist.y - tip.y) * h
return (angle_deg > 25) and (height_diff > 40)
# -----------------------------------------------------------------------------
# 自動腰線估計:Pose / Wrist percentile
# -----------------------------------------------------------------------------
def _auto_waist_ratio_pose(video_path, sample_frames=40,
clamp_low=CLAMP_LOW, clamp_high=CLAMP_HIGH):
"""以 Pose hip 平均位置估計腰線比例 y。"""
if not _HAS_POSE:
return None
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return None
total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
if total <= 0:
cap.release()
return None
step = max(1, total // sample_frames)
ys = []
with mp_pose.Pose(
model_complexity=0,
enable_segmentation=False,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
) as pose:
idx = 0
while idx < total and len(ys) < sample_frames:
cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
ok, frame = cap.read()
if not ok:
break
frame = _resize_keep_ar(frame)
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
res = pose.process(rgb)
if res.pose_landmarks:
lms = res.pose_landmarks.landmark
hip_y = (lms[23].y + lms[24].y) / 2.0
ys.append(hip_y)
idx += step
cap.release()
if not ys:
return None
p = float(np.median(ys)) + WAIST_DELTA_Y
return float(min(clamp_high, max(clamp_low, p)))
def _auto_waist_ratio_hand(video_path, sample_frames=40,
percentile=85,
clamp_low=CLAMP_LOW, clamp_high=CLAMP_HIGH):
"""以手腕高度的分位數估計腰線比例 y(作為 Pose 的備援)。"""
if not _HAS_MP:
return DEFAULT_WAIST_RATIO
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return DEFAULT_WAIST_RATIO
total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
if total <= 0:
cap.release()
return DEFAULT_WAIST_RATIO
step = max(1, total // sample_frames)
ys = []
with mp_hands.Hands(
static_image_mode=False,
max_num_hands=2,
model_complexity=0,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
) as hands:
idx = 0
while idx < total and len(ys) < sample_frames:
cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
ok, frame = cap.read()
if not ok:
break
frame = _resize_keep_ar(frame)
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
res = hands.process(rgb)
if res.multi_hand_landmarks:
wrist_ys = [
lm.landmark[mp_hands.HandLandmark.WRIST.value].y
for lm in res.multi_hand_landmarks
]
if wrist_ys:
ys.append(max(wrist_ys))
idx += step
cap.release()
if not ys:
return DEFAULT_WAIST_RATIO
p = float(np.percentile(ys, percentile)) + WAIST_DELTA_Y
return float(min(clamp_high, max(clamp_low, p)))
# -----------------------------------------------------------------------------
# 單一影片分析
# -----------------------------------------------------------------------------
def analyze_one_video(video_path):
"""分析單一影片,回傳一列結果。"""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return [
os.path.basename(video_path), "未通過",
0, 0, 0,
"0/0", "0/0", 0, 0,
f"{PASS_TH_POS}/{PASS_TH_UP}", "-",
"-", "開啟失敗"
]
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
duration_s = (total_frames / fps) if total_frames > 0 else 0.0
# 本版設定只分析全片
start = 0
span = "全片"
cap.set(cv2.CAP_PROP_POS_FRAMES, start)
# 決定腰線位置
used_y_ratio = DEFAULT_WAIST_RATIO
waist_source = "預設值"
# 先嘗試 Pose,自動腰線
auto_pose = _auto_waist_ratio_pose(video_path)
if auto_pose is not None:
used_y_ratio = auto_pose
waist_source = "Pose"
else:
# 若 Pose 不可靠,改採 Wrist percentile
used_y_ratio = _auto_waist_ratio_hand(video_path)
waist_source = "手腕分位數"
hands = None
if _HAS_MP:
hands = mp_hands.Hands(
static_image_mode=False,
max_num_hands=2,
model_complexity=0,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
valid = ok_pos = ok_up = ok_both = 0
while True:
ok, frame = cap.read()
if not ok:
break
frame = _resize_keep_ar(frame)
h, w = frame.shape[:2]
y_ref = int(h * used_y_ratio)
this_pos = this_up = False
detected = False
if _HAS_MP and hands:
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
res = hands.process(rgb)
if res.multi_hand_landmarks:
detected = True
pos_flags = []
up_flags = []
for lmset in res.multi_hand_landmarks:
lms = lmset.landmark
wrist = lms[mp_hands.HandLandmark.WRIST.value]
pos_flags.append((wrist.y * h) <= y_ref)
up_flags.append(_is_index_up(lms, h))
# 要求雙手皆符合(若只偵測到一手,就以那一手為準)
if len(pos_flags) >= 2:
this_pos = all(pos_flags[:2])
this_up = all(up_flags[:2])
else:
this_pos = pos_flags[0]
this_up = up_flags[0]
if detected:
valid += 1
if this_pos:
ok_pos += 1
if this_up:
ok_up += 1
if this_pos and this_up:
ok_both += 1
cap.release()
if hands:
hands.close()
if valid == 0:
above_ratio = up_ratio = acc = 0.0
passed_valid = "0/0"
else:
above_ratio = (ok_pos / valid) * 100.0
up_ratio = (ok_up / valid) * 100.0
acc = (ok_both / valid) * 100.0
passed_valid = f"{ok_both}/{valid}"
result_str = (
"通過"
if (above_ratio >= PASS_TH_POS and up_ratio >= PASS_TH_UP)
else "未通過"
)
valid_total = f"{valid}/{total_frames}"
return [
os.path.basename(video_path), result_str,
round(acc, 2), round(above_ratio, 2), round(up_ratio, 2),
passed_valid, valid_total, int(round(fps)), round(duration_s, 2),
f"{int(PASS_TH_POS)}/{int(PASS_TH_UP)}",
span, round(float(used_y_ratio), 3), waist_source
]
# -----------------------------------------------------------------------------
# 多支影片分析核心 + 統整摘要
# -----------------------------------------------------------------------------
def run_core(files, state):
"""實際進行分析並回傳 df 和 CSV 路徑。"""
if not files:
df = pd.DataFrame([], columns=COLS)
tmp_path = None
return None, df, [], tmp_path
rows = []
for f in files:
path = _to_path(f)
if os.path.exists(path):
rows.append(analyze_one_video(path))
state_new = (state or []) + rows
df = pd.DataFrame(state_new, columns=COLS)
tmp = tempfile.NamedTemporaryFile(suffix=".csv", delete=False)
df.to_csv(tmp.name, index=False, encoding="utf-8-sig")
return files, df, state_new, tmp.name
def build_summary_text(df: pd.DataFrame) -> str:
"""產生較完整的統整摘要。"""
if df is None or df.empty:
return "尚未進行分析。"
total = len(df)
passed = (df["檢測結果"] == "通過").sum()
failed = total - passed
pass_rate = (passed / total * 100.0) if total > 0 else 0.0
mean_acc = df["整體正確率(%)"].mean()
mean_pos = df["腰部以上(%)"].mean()
mean_up = df["指尖朝上(%)"].mean()
best_row = df.loc[df["整體正確率(%)"].idxmax()]
worst_row = df.loc[df["整體正確率(%)"].idxmin()]
text = f"""### 📌 統整摘要
- 本次分析 **{total} 支影片**,其中 **{passed} 支通過、{failed} 支未通過**(通過率約 **{pass_rate:.1f}%**)。
- 平均整體正確率:約 **{mean_acc:.1f}%**。
- 平均腰部以上比例:約 **{mean_pos:.1f}%**;平均指尖朝上比例:約 **{mean_up:.1f}%**。
- 最佳表現影片:**{best_row['檔案名稱']}**(整體正確率 **{best_row['整體正確率(%)']:.1f}%**)。
- 需要特別留意的影片:**{worst_row['檔案名稱']}**(整體正確率 **{worst_row['整體正確率(%)']:.1f}%**)。
- 目前門檻設定:**腰部門檻 {PASS_TH_POS:.0f}%、指尖門檻 {PASS_TH_UP:.0f}%**,採 **AND 規則**(兩者皆需達標)。
- 腰線來源:優先使用 **Pose 腰點中位數自動估計**,必要時改採 **手腕高度分位數** 作為備援,並限制在安全範圍內。
"""
return text
def run_with_status(files, state):
"""供按鈕呼叫:更新狀態列 + DataFrame + CSV + 摘要。"""
# 進入分析狀態
status_text = "🟠 系統正在分析影片,請稍候…"
files2, df, state2, csv_path = run_core(files, state)
summary_text = build_summary_text(df)
# 分析完成狀態
status_text = "🟢 分析完成,可以檢視表格並下載 CSV 報表。"
return status_text, files2, df, state2, csv_path, summary_text
def do_clear(state):
"""清除按鈕:重置所有顯示。"""
df = pd.DataFrame([], columns=COLS)
status_text = "🟢 系統就緒,請上傳影片後按下「分析」。"
return status_text, None, df, [], None, "尚未進行分析。"
# -----------------------------------------------------------------------------
# Gradio UI 建構
# -----------------------------------------------------------------------------
with gr.Blocks(
title="無菌手套姿勢檢測系統(F-segment 偵測)",
css="""
#status-text {
color: #1565c0; /* 深藍色狀態列 */
font-weight: 600;
padding: 4px 0;
}
"""
) as demo:
# 標題
gr.Markdown("# 👑 醫療技術AI王")
gr.Markdown("## 🧤 無菌手套姿勢檢測系統(F-segment 偵測)")
status_md = gr.Markdown(
"🟢 系統就緒,請上傳影片後按下「分析」。",
elem_id="status-text"
)
# 上傳區
files = gr.File(
label="上傳檢測影片(mp4,可多支)",
file_types=[".mp4"],
file_count="multiple"
)
# 按鈕列
with gr.Row():
btn_run = gr.Button("分析", variant="primary", scale=1)
btn_csv = gr.DownloadButton("下載 CSV 報表", visible=False, scale=1)
btn_clear = gr.Button("清除", variant="secondary", scale=1)
# 狀態儲存
table_state = gr.State([])
# 結果表格
table = gr.Dataframe(
headers=COLS,
label="📊 手套穿戴通過率彙整表",
interactive=False,
wrap=True
)
# 統整摘要
summary = gr.Markdown("尚未進行分析。")
# 額外說明面板(可收合)
with gr.Accordion("📘 本次標註資訊(可展開)", open=False):
gr.Markdown("""
### 🔍 本次標註
- **標準片(應通過)**:FV3、FV4、FV5、FV6
- **錯片(應不通過)**:WFV6-2、WFV6
### 🟡 邊界案例說明
- **FV5**
- 臨床判定為「動作正確」,屬標準片。
- 目前版本採取較保守的腰線與門檻設定,在自動腰線估計偏高的情況下,
會被系統判為「未通過」,屬於安全邊界上的 **false negative(誤殺)**。
- 此設計反映系統的取捨:**寧可多擋一支可疑影片,也不輕易放過可能錯誤的動作。**
""")
with gr.Accordion("⚠️ 系統限制說明(可展開)", open=False):
gr.Markdown("""
### 📌 系統目前限制
- 受拍攝角度與距離影響:腰線位置與手部高度皆為 2D 投影,視角改變可能影響判斷。
- 光線與背景:低光源、強反光或背景雜訊會影響 Mediapipe 標記的穩定度。
- 動作抖動:快速移動或 motion blur 會降低有效幀比例,使通過率偏低。
""")
with gr.Accordion("🚀 未來改善方向(可展開)", open=False):
gr.Markdown("""
### 🌱 系統優化方向
- 引入 YOLO / 其他手部偵測模型,提升各種光線與遮擋情境下的穩定度。
- 加入 temporal smoothing(時序平滑),降低單一錯誤幀對結果的影響。
- 導入影像前處理(亮度 / 對比度校正)以改善暗色環境的偵測品質。
- 若資料量充足,可進一步訓練專用的 F-segment 深度學習模型。
""")
# 事件綁定:顯示 Gradio 內建 minimal 進度條即可
btn_run.click(
fn=run_with_status,
inputs=[files, table_state],
outputs=[status_md, files, table, table_state, btn_csv, summary],
show_progress="minimal"
)
btn_clear.click(
fn=do_clear,
inputs=[table_state],
outputs=[status_md, files, table, table_state, btn_csv, summary],
show_progress="hidden"
)
# -----------------------------------------------------------------------------
# 啟動(本地測試用;在 HuggingFace 會由 Spaces 自動呼叫 demo)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)