LovnishVerma commited on
Commit
defae99
·
verified ·
1 Parent(s): a4a1bb3

Update gesture_classifier.py

Browse files
Files changed (1) hide show
  1. gesture_classifier.py +70 -57
gesture_classifier.py CHANGED
@@ -2,88 +2,101 @@ import cv2
2
  import numpy as np
3
  import json
4
  import mediapipe as mp
 
5
 
6
- # Initialize MediaPipe Hands
7
  mp_hands = mp.solutions.hands
 
 
 
8
  hands = mp_hands.Hands(
9
  static_image_mode=True,
10
  max_num_hands=1,
11
- min_detection_confidence=0.5
12
  )
13
 
14
- # ---------------- LOAD GESTURE DEFINITIONS ----------------
15
  with open("gesture_rules.json", "r") as f:
16
  GESTURE_DATA = json.load(f)["gestures"]
17
 
18
- def get_finger_states_mediapipe(image_rgb):
 
 
 
 
19
  """
20
  Returns [thumb, index, middle, ring, pinky] states (1=Open, 0=Closed)
21
- using MediaPipe Landmarks.
22
  """
23
- results = hands.process(image_rgb)
24
-
25
- if not results.multi_hand_landmarks:
26
- return [0, 0, 0, 0, 0] # No hand detected
27
 
28
- landmarks = results.multi_hand_landmarks[0].landmark
 
29
 
30
- # MediaPipe Landmark Indices:
31
- # Thumb: 1-4 (Compare Tip 4 vs IP 3 or MCP 2 depending on axis)
32
- # Index: 5-8 (Tip 8 vs PIP 6)
33
- # Middle: 9-12 (Tip 12 vs PIP 10)
34
- # Ring: 13-16 (Tip 16 vs PIP 14)
35
- # Pinky: 17-20 (Tip 20 vs PIP 18)
36
-
37
- states = []
38
 
39
- # --- THUMB LOGIC ---
40
- # Thumb is open if tip (4) is to the outside of IP (3) relative to the palm
41
- # Simple check: Is tip x-coordinate further from pinky base x-coordinate?
42
- # A generic check for "Thumb Up/Open":
43
- # We compare the X coordinates for the thumb (horizontal movement)
44
- # Assuming right hand for simplicity, or checking relative distance to MCP
45
- if landmarks[4].x < landmarks[3].x: # Adjust logic if needed for specific hand orientation
46
- states.append(1)
47
- else:
48
- states.append(0)
49
 
50
- # --- FINGERS (Index, Middle, Ring, Pinky) ---
51
- # Check if Tip Y < PIP Y (Note: Y increases downwards in images)
52
- # Index (8 vs 6)
53
- states.append(1 if landmarks[8].y < landmarks[6].y else 0)
54
- # Middle (12 vs 10)
55
- states.append(1 if landmarks[12].y < landmarks[10].y else 0)
56
- # Ring (16 vs 14)
57
- states.append(1 if landmarks[16].y < landmarks[14].y else 0)
58
- # Pinky (20 vs 18)
59
- states.append(1 if landmarks[20].y < landmarks[18].y else 0)
60
 
61
- # Note: The Thumb logic above is simplistic.
62
- # For a purely static robust check, we often check if Tip is "higher" than MCP,
63
- # but thumbs move laterally.
64
- # For this specific JSON, let's treat Thumb as "Open" if the Tip is far from the Index base.
 
 
 
65
 
66
- # Overriding thumb for better stability in static images:
67
- # Calculate distance between Thumb Tip (4) and Pinky MCP (17)
68
- # If distance is large -> Open. Small -> Closed.
 
 
 
69
  return states
70
 
71
- def classify_gesture(image: np.ndarray) -> str:
72
  """
73
- Match extracted finger states against JSON-defined patterns
74
  """
75
- # Ensure Image is RGB (MediaPipe expects RGB)
76
- # If the input is BGR from OpenCV standard, convert it.
77
- # But Streamlit/PIL usually gives RGB. We assume RGB here.
 
 
 
 
 
 
 
 
78
 
79
- # If input is a numpy array from PIL, it is RGB.
80
- states = get_finger_states_mediapipe(image)
 
 
 
 
 
 
 
81
 
82
- # Debug print to help you tune thresholds if needed
83
- print(f"Detected States: {states}")
 
84
 
 
 
85
  for name, info in GESTURE_DATA.items():
86
  if states == info["pattern"]:
87
- return name
88
-
89
- return "UNKNOWN"
 
 
2
  import numpy as np
3
  import json
4
  import mediapipe as mp
5
+ import math
6
 
7
+ # Initialize MediaPipe
8
  mp_hands = mp.solutions.hands
9
+ mp_drawing = mp.solutions.drawing_utils
10
+ mp_drawing_styles = mp.solutions.drawing_styles
11
+
12
  hands = mp_hands.Hands(
13
  static_image_mode=True,
14
  max_num_hands=1,
15
+ min_detection_confidence=0.7 # Increased confidence threshold
16
  )
17
 
18
+ # Load Rules
19
  with open("gesture_rules.json", "r") as f:
20
  GESTURE_DATA = json.load(f)["gestures"]
21
 
22
+ def calculate_distance(p1, p2):
23
+ """Calculates Euclidean distance between two 3D landmarks."""
24
+ return math.sqrt((p1.x - p2.x)**2 + (p1.y - p2.y)**2 + (p1.z - p2.z)**2)
25
+
26
+ def get_finger_states_robust(landmarks):
27
  """
28
  Returns [thumb, index, middle, ring, pinky] states (1=Open, 0=Closed)
29
+ using Euclidean distance relative to the Wrist (Landmark 0).
30
  """
31
+ states = []
32
+ wrist = landmarks[0]
 
 
33
 
34
+ # --- FINGERS (Index, Middle, Ring, Pinky) ---
35
+ # Logic: If Distance(Tip, Wrist) < Distance(PIP, Wrist), the finger is curled.
36
 
37
+ # Index (Tip 8, PIP 6)
38
+ states.append(1 if calculate_distance(landmarks[8], wrist) > calculate_distance(landmarks[6], wrist) else 0)
39
+
40
+ # Middle (Tip 12, PIP 10)
41
+ states.append(1 if calculate_distance(landmarks[12], wrist) > calculate_distance(landmarks[10], wrist) else 0)
 
 
 
42
 
43
+ # Ring (Tip 16, PIP 14)
44
+ states.append(1 if calculate_distance(landmarks[16], wrist) > calculate_distance(landmarks[14], wrist) else 0)
 
 
 
 
 
 
 
 
45
 
46
+ # Pinky (Tip 20, PIP 18)
47
+ states.append(1 if calculate_distance(landmarks[20], wrist) > calculate_distance(landmarks[18], wrist) else 0)
 
 
 
 
 
 
 
 
48
 
49
+ # --- THUMB LOGIC (Complex) ---
50
+ # The thumb doesn't curl to the wrist like other fingers.
51
+ # We check if the Tip (4) is further away from the Pinky MCP (17) than the IP (3) is.
52
+ # Basically, is the thumb sticking out?
53
+
54
+ dist_tip_pinky = calculate_distance(landmarks[4], landmarks[17])
55
+ dist_ip_pinky = calculate_distance(landmarks[3], landmarks[17])
56
 
57
+ # We insert Thumb at the start of the list to match JSON format
58
+ if dist_tip_pinky > dist_ip_pinky:
59
+ states.insert(0, 1) # Open
60
+ else:
61
+ states.insert(0, 0) # Closed
62
+
63
  return states
64
 
65
+ def classify_gesture(image: np.ndarray):
66
  """
67
+ Returns: (gesture_name, annotated_image)
68
  """
69
+ # Convert image to writable for MediaPipe
70
+ image.flags.writeable = False
71
+ results = hands.process(image)
72
+ image.flags.writeable = True
73
+
74
+ # If no hand found
75
+ if not results.multi_hand_landmarks:
76
+ return None, image
77
+
78
+ # Get landmarks for the first hand
79
+ hand_landmarks = results.multi_hand_landmarks[0]
80
 
81
+ # Draw the Skeleton on the image
82
+ annotated_image = image.copy()
83
+ mp_drawing.draw_landmarks(
84
+ annotated_image,
85
+ hand_landmarks,
86
+ mp_hands.HAND_CONNECTIONS,
87
+ mp_drawing_styles.get_default_hand_landmarks_style(),
88
+ mp_drawing_styles.get_default_hand_connections_style()
89
+ )
90
 
91
+ # Get binary states
92
+ states = get_finger_states_robust(hand_landmarks.landmark)
93
+ print(f"Detected States (T,I,M,R,P): {states}")
94
 
95
+ # Match against DB
96
+ detected_gesture = "UNKNOWN"
97
  for name, info in GESTURE_DATA.items():
98
  if states == info["pattern"]:
99
+ detected_gesture = name
100
+ break
101
+
102
+ return detected_gesture, annotated_image