Spaces:
Running
Running
File size: 7,725 Bytes
24836e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
#!/usr/bin/env python3
"""ジェスチャー認識のデバッグスクリプト
カメラから手を検出し、各指の状態と判定結果をリアルタイムで表示します。
"""
import cv2
import numpy as np
from rock_paper_scissors.detection import HandDetector, GestureDetector
from rock_paper_scissors.detection.gesture_detector import (
LandmarkIndex,
FINGER_EXTENDED_ANGLE_THRESHOLD,
FINGER_CURLED_ANGLE_THRESHOLD,
)
from rock_paper_scissors.game.states import Hand
def calculate_finger_angle(landmarks: np.ndarray, mcp_idx: int, pip_idx: int, dip_idx: int) -> float:
"""指の曲がり角度を計算"""
mcp = landmarks[mcp_idx][:2]
pip = landmarks[pip_idx][:2]
dip = landmarks[dip_idx][:2]
v1 = mcp - pip
v2 = dip - pip
norm1 = np.linalg.norm(v1)
norm2 = np.linalg.norm(v2)
if norm1 < 1e-10 or norm2 < 1e-10:
return 180.0
cos_angle = np.dot(v1, v2) / (norm1 * norm2)
angle = np.arccos(np.clip(cos_angle, -1.0, 1.0))
return np.degrees(angle)
def get_finger_debug_info(landmarks: np.ndarray) -> dict:
"""各指の詳細情報を取得"""
info = {}
# 各指の情報
fingers = {
"index": (LandmarkIndex.INDEX_MCP, LandmarkIndex.INDEX_PIP, LandmarkIndex.INDEX_DIP, LandmarkIndex.INDEX_TIP),
"middle": (LandmarkIndex.MIDDLE_MCP, LandmarkIndex.MIDDLE_PIP, LandmarkIndex.MIDDLE_DIP, LandmarkIndex.MIDDLE_TIP),
"ring": (LandmarkIndex.RING_MCP, LandmarkIndex.RING_PIP, LandmarkIndex.RING_DIP, LandmarkIndex.RING_TIP),
"pinky": (LandmarkIndex.PINKY_MCP, LandmarkIndex.PINKY_PIP, LandmarkIndex.PINKY_DIP, LandmarkIndex.PINKY_TIP),
}
for name, (mcp_idx, pip_idx, dip_idx, tip_idx) in fingers.items():
tip = landmarks[tip_idx]
pip = landmarks[pip_idx]
mcp = landmarks[mcp_idx]
# 距離計算
tip_to_mcp = np.linalg.norm(tip[:2] - mcp[:2])
pip_to_mcp = np.linalg.norm(pip[:2] - mcp[:2])
ratio = tip_to_mcp / pip_to_mcp if pip_to_mcp > 0 else 0
# 角度計算
angle = calculate_finger_angle(landmarks, mcp_idx, pip_idx, dip_idx)
# Y座標の比較
tip_above_pip = tip[1] < pip[1]
# 判定
distance_extended = tip_above_pip and ratio > 0.9
angle_curled = angle < FINGER_CURLED_ANGLE_THRESHOLD
# 最終判定(距離ベースで伸びていると判定されても、角度が小さければ閉じている)
is_extended = distance_extended and not angle_curled
info[name] = {
"tip_y": tip[1],
"pip_y": pip[1],
"tip_above_pip": tip_above_pip,
"distance_ratio": ratio,
"angle": angle,
"distance_extended": distance_extended,
"angle_curled": angle_curled,
"is_extended": is_extended,
}
# 親指
thumb_tip = landmarks[LandmarkIndex.THUMB_TIP]
thumb_ip = landmarks[LandmarkIndex.THUMB_IP]
thumb_mcp = landmarks[LandmarkIndex.THUMB_MCP]
tip_to_mcp = np.linalg.norm(thumb_tip[:2] - thumb_mcp[:2])
ip_to_mcp = np.linalg.norm(thumb_ip[:2] - thumb_mcp[:2])
thumb_extended = tip_to_mcp > ip_to_mcp * 1.2
info["thumb"] = {
"tip_to_mcp": tip_to_mcp,
"ip_to_mcp": ip_to_mcp,
"ratio": tip_to_mcp / ip_to_mcp if ip_to_mcp > 0 else 0,
"is_extended": thumb_extended,
}
return info
def main():
print("=" * 60)
print("ジェスチャー認識デバッグツール")
print("=" * 60)
print(f"角度閾値: 伸びている >= {FINGER_EXTENDED_ANGLE_THRESHOLD}°, 曲がっている < {FINGER_CURLED_ANGLE_THRESHOLD}°")
print("距離閾値: 0.9 (tip_to_mcp / pip_to_mcp)")
print("'q' で終了")
print("=" * 60)
hand_detector = HandDetector()
gesture_detector = GestureDetector()
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("カメラを開けませんでした")
return
try:
while True:
ret, frame = cap.read()
if not ret:
break
# 左右反転(鏡像)
frame = cv2.flip(frame, 1)
# 手を検出
hand_data = hand_detector.detect(frame)
if hand_data is not None:
landmarks = hand_data.landmarks
# ジェスチャー判定
gesture, confidence = gesture_detector.detect(landmarks)
# デバッグ情報を取得
debug_info = get_finger_debug_info(landmarks)
# 画面に表示
y_offset = 30
# ジェスチャー結果
gesture_text = f"Gesture: {gesture.value} (conf: {confidence:.2f})"
color = (0, 255, 0) if gesture != Hand.UNKNOWN else (0, 0, 255)
cv2.putText(frame, gesture_text, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
y_offset += 35
# 拡張カウント
extended_count = sum(1 for name in ["thumb", "index", "middle", "ring", "pinky"]
if debug_info[name]["is_extended"])
cv2.putText(frame, f"Extended fingers: {extended_count}", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
y_offset += 25
# 各指の情報
for finger_name in ["thumb", "index", "middle", "ring", "pinky"]:
info = debug_info[finger_name]
if finger_name == "thumb":
text = f"{finger_name}: ext={info['is_extended']} (ratio={info['ratio']:.2f})"
else:
text = f"{finger_name}: ext={info['is_extended']} (ratio={info['distance_ratio']:.2f}, angle={info['angle']:.0f}°)"
color = (0, 255, 0) if info["is_extended"] else (0, 0, 255)
cv2.putText(frame, text, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
y_offset += 20
# ランドマークを描画
for i, lm in enumerate(landmarks):
x = int(lm[0] * frame.shape[1])
y = int(lm[1] * frame.shape[0])
cv2.circle(frame, (x, y), 3, (0, 255, 255), -1)
cv2.putText(frame, str(i), (x + 5, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1)
# コンソールにも出力(1秒に1回)
import time
if not hasattr(main, 'last_print') or time.time() - main.last_print > 1.0:
main.last_print = time.time()
print(f"\n--- {gesture.value} (conf: {confidence:.2f}) ---")
for finger_name in ["thumb", "index", "middle", "ring", "pinky"]:
info = debug_info[finger_name]
ext_str = "○" if info["is_extended"] else "×"
if finger_name == "thumb":
print(f" {finger_name}: {ext_str} (ratio={info['ratio']:.2f})")
else:
print(f" {finger_name}: {ext_str} (ratio={info['distance_ratio']:.2f}, angle={info['angle']:.0f}°, tip_above={info['tip_above_pip']})")
else:
cv2.putText(frame, "No hand detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
cv2.imshow("Gesture Debug", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
finally:
cap.release()
cv2.destroyAllWindows()
hand_detector.close()
if __name__ == "__main__":
main()
|