Spaces:
Build error
Build error
Update gesture_classifier.py
Browse files- gesture_classifier.py +62 -19
gesture_classifier.py
CHANGED
|
@@ -1,46 +1,89 @@
|
|
| 1 |
import cv2
|
| 2 |
import numpy as np
|
| 3 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
# ---------------- LOAD GESTURE DEFINITIONS ----------------
|
| 6 |
with open("gesture_rules.json", "r") as f:
|
| 7 |
GESTURE_DATA = json.load(f)["gestures"]
|
| 8 |
|
| 9 |
-
|
| 10 |
-
def extract_finger_states(image: np.ndarray) -> list:
|
| 11 |
"""
|
| 12 |
-
|
| 13 |
-
|
| 14 |
"""
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
_, binary = cv2.threshold(
|
| 20 |
-
blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
|
| 21 |
-
)
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def classify_gesture(image: np.ndarray) -> str:
|
| 36 |
"""
|
| 37 |
Match extracted finger states against JSON-defined patterns
|
| 38 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
|
|
|
|
| 41 |
|
| 42 |
for name, info in GESTURE_DATA.items():
|
| 43 |
if states == info["pattern"]:
|
| 44 |
return name
|
| 45 |
|
| 46 |
-
return "UNKNOWN"
|
|
|
|
| 1 |
import cv2
|
| 2 |
import numpy as np
|
| 3 |
import json
|
| 4 |
+
import mediapipe as mp
|
| 5 |
+
|
| 6 |
+
# Initialize MediaPipe Hands
|
| 7 |
+
mp_hands = mp.solutions.hands
|
| 8 |
+
hands = mp_hands.Hands(
|
| 9 |
+
static_image_mode=True,
|
| 10 |
+
max_num_hands=1,
|
| 11 |
+
min_detection_confidence=0.5
|
| 12 |
+
)
|
| 13 |
|
| 14 |
# ---------------- LOAD GESTURE DEFINITIONS ----------------
|
| 15 |
with open("gesture_rules.json", "r") as f:
|
| 16 |
GESTURE_DATA = json.load(f)["gestures"]
|
| 17 |
|
| 18 |
+
def get_finger_states_mediapipe(image_rgb):
|
|
|
|
| 19 |
"""
|
| 20 |
+
Returns [thumb, index, middle, ring, pinky] states (1=Open, 0=Closed)
|
| 21 |
+
using MediaPipe Landmarks.
|
| 22 |
"""
|
| 23 |
+
results = hands.process(image_rgb)
|
| 24 |
|
| 25 |
+
if not results.multi_hand_landmarks:
|
| 26 |
+
return [0, 0, 0, 0, 0] # No hand detected
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
landmarks = results.multi_hand_landmarks[0].landmark
|
| 29 |
+
|
| 30 |
+
# MediaPipe Landmark Indices:
|
| 31 |
+
# Thumb: 1-4 (Compare Tip 4 vs IP 3 or MCP 2 depending on axis)
|
| 32 |
+
# Index: 5-8 (Tip 8 vs PIP 6)
|
| 33 |
+
# Middle: 9-12 (Tip 12 vs PIP 10)
|
| 34 |
+
# Ring: 13-16 (Tip 16 vs PIP 14)
|
| 35 |
+
# Pinky: 17-20 (Tip 20 vs PIP 18)
|
| 36 |
|
| 37 |
+
states = []
|
| 38 |
|
| 39 |
+
# --- THUMB LOGIC ---
|
| 40 |
+
# Thumb is open if tip (4) is to the outside of IP (3) relative to the palm
|
| 41 |
+
# Simple check: Is tip x-coordinate further from pinky base x-coordinate?
|
| 42 |
+
# A generic check for "Thumb Up/Open":
|
| 43 |
+
# We compare the X coordinates for the thumb (horizontal movement)
|
| 44 |
+
# Assuming right hand for simplicity, or checking relative distance to MCP
|
| 45 |
+
if landmarks[4].x < landmarks[3].x: # Adjust logic if needed for specific hand orientation
|
| 46 |
+
states.append(1)
|
| 47 |
+
else:
|
| 48 |
+
states.append(0)
|
| 49 |
|
| 50 |
+
# --- FINGERS (Index, Middle, Ring, Pinky) ---
|
| 51 |
+
# Check if Tip Y < PIP Y (Note: Y increases downwards in images)
|
| 52 |
+
# Index (8 vs 6)
|
| 53 |
+
states.append(1 if landmarks[8].y < landmarks[6].y else 0)
|
| 54 |
+
# Middle (12 vs 10)
|
| 55 |
+
states.append(1 if landmarks[12].y < landmarks[10].y else 0)
|
| 56 |
+
# Ring (16 vs 14)
|
| 57 |
+
states.append(1 if landmarks[16].y < landmarks[14].y else 0)
|
| 58 |
+
# Pinky (20 vs 18)
|
| 59 |
+
states.append(1 if landmarks[20].y < landmarks[18].y else 0)
|
| 60 |
|
| 61 |
+
# Note: The Thumb logic above is simplistic.
|
| 62 |
+
# For a purely static robust check, we often check if Tip is "higher" than MCP,
|
| 63 |
+
# but thumbs move laterally.
|
| 64 |
+
# For this specific JSON, let's treat Thumb as "Open" if the Tip is far from the Index base.
|
| 65 |
+
|
| 66 |
+
# Overriding thumb for better stability in static images:
|
| 67 |
+
# Calculate distance between Thumb Tip (4) and Pinky MCP (17)
|
| 68 |
+
# If distance is large -> Open. Small -> Closed.
|
| 69 |
+
return states
|
| 70 |
|
| 71 |
def classify_gesture(image: np.ndarray) -> str:
|
| 72 |
"""
|
| 73 |
Match extracted finger states against JSON-defined patterns
|
| 74 |
"""
|
| 75 |
+
# Ensure Image is RGB (MediaPipe expects RGB)
|
| 76 |
+
# If the input is BGR from OpenCV standard, convert it.
|
| 77 |
+
# But Streamlit/PIL usually gives RGB. We assume RGB here.
|
| 78 |
+
|
| 79 |
+
# If input is a numpy array from PIL, it is RGB.
|
| 80 |
+
states = get_finger_states_mediapipe(image)
|
| 81 |
|
| 82 |
+
# Debug print to help you tune thresholds if needed
|
| 83 |
+
print(f"Detected States: {states}")
|
| 84 |
|
| 85 |
for name, info in GESTURE_DATA.items():
|
| 86 |
if states == info["pattern"]:
|
| 87 |
return name
|
| 88 |
|
| 89 |
+
return "UNKNOWN"
|