LovnishVerma commited on
Commit
e5630e9
·
verified ·
1 Parent(s): e586b5c

Update gesture_classifier.py

Browse files
Files changed (1) hide show
  1. gesture_classifier.py +62 -19
gesture_classifier.py CHANGED
@@ -1,46 +1,89 @@
1
  import cv2
2
  import numpy as np
3
  import json
 
 
 
 
 
 
 
 
 
4
 
5
  # ---------------- LOAD GESTURE DEFINITIONS ----------------
6
  with open("gesture_rules.json", "r") as f:
7
  GESTURE_DATA = json.load(f)["gestures"]
8
 
9
-
10
- def extract_finger_states(image: np.ndarray) -> list:
11
  """
12
- Approximate finger open/closed states using vertical intensity analysis.
13
- Returns a list of 5 values [thumb, index, middle, ring, pinky]
14
  """
 
15
 
16
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
17
- blur = cv2.GaussianBlur(gray, (7, 7), 0)
18
-
19
- _, binary = cv2.threshold(
20
- blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
21
- )
22
 
23
- h, w = binary.shape
24
- zones = np.array_split(binary, 5, axis=1)
 
 
 
 
 
 
25
 
26
- finger_states = []
27
 
28
- for zone in zones:
29
- white_ratio = np.sum(zone == 255) / zone.size
30
- finger_states.append(1 if white_ratio > 0.25 else 0)
 
 
 
 
 
 
 
31
 
32
- return finger_states
 
 
 
 
 
 
 
 
 
33
 
 
 
 
 
 
 
 
 
 
34
 
35
  def classify_gesture(image: np.ndarray) -> str:
36
  """
37
  Match extracted finger states against JSON-defined patterns
38
  """
 
 
 
 
 
 
39
 
40
- states = extract_finger_states(image)
 
41
 
42
  for name, info in GESTURE_DATA.items():
43
  if states == info["pattern"]:
44
  return name
45
 
46
- return "UNKNOWN"
 
1
  import cv2
2
  import numpy as np
3
  import json
4
+ import mediapipe as mp
5
+
6
+ # Initialize MediaPipe Hands
7
+ mp_hands = mp.solutions.hands
8
+ hands = mp_hands.Hands(
9
+ static_image_mode=True,
10
+ max_num_hands=1,
11
+ min_detection_confidence=0.5
12
+ )
13
 
14
  # ---------------- LOAD GESTURE DEFINITIONS ----------------
15
  with open("gesture_rules.json", "r") as f:
16
  GESTURE_DATA = json.load(f)["gestures"]
17
 
18
+ def get_finger_states_mediapipe(image_rgb):
 
19
  """
20
+ Returns [thumb, index, middle, ring, pinky] states (1=Open, 0=Closed)
21
+ using MediaPipe Landmarks.
22
  """
23
+ results = hands.process(image_rgb)
24
 
25
+ if not results.multi_hand_landmarks:
26
+ return [0, 0, 0, 0, 0] # No hand detected
 
 
 
 
27
 
28
+ landmarks = results.multi_hand_landmarks[0].landmark
29
+
30
+ # MediaPipe Landmark Indices:
31
+ # Thumb: 1-4 (Compare Tip 4 vs IP 3 or MCP 2 depending on axis)
32
+ # Index: 5-8 (Tip 8 vs PIP 6)
33
+ # Middle: 9-12 (Tip 12 vs PIP 10)
34
+ # Ring: 13-16 (Tip 16 vs PIP 14)
35
+ # Pinky: 17-20 (Tip 20 vs PIP 18)
36
 
37
+ states = []
38
 
39
+ # --- THUMB LOGIC ---
40
+ # Thumb is open if tip (4) is to the outside of IP (3) relative to the palm
41
+ # Simple check: Is tip x-coordinate further from pinky base x-coordinate?
42
+ # A generic check for "Thumb Up/Open":
43
+ # We compare the X coordinates for the thumb (horizontal movement)
44
+ # Assuming right hand for simplicity, or checking relative distance to MCP
45
+ if landmarks[4].x < landmarks[3].x: # Adjust logic if needed for specific hand orientation
46
+ states.append(1)
47
+ else:
48
+ states.append(0)
49
 
50
+ # --- FINGERS (Index, Middle, Ring, Pinky) ---
51
+ # Check if Tip Y < PIP Y (Note: Y increases downwards in images)
52
+ # Index (8 vs 6)
53
+ states.append(1 if landmarks[8].y < landmarks[6].y else 0)
54
+ # Middle (12 vs 10)
55
+ states.append(1 if landmarks[12].y < landmarks[10].y else 0)
56
+ # Ring (16 vs 14)
57
+ states.append(1 if landmarks[16].y < landmarks[14].y else 0)
58
+ # Pinky (20 vs 18)
59
+ states.append(1 if landmarks[20].y < landmarks[18].y else 0)
60
 
61
+ # Note: The Thumb logic above is simplistic.
62
+ # For a purely static robust check, we often check if Tip is "higher" than MCP,
63
+ # but thumbs move laterally.
64
+ # For this specific JSON, let's treat Thumb as "Open" if the Tip is far from the Index base.
65
+
66
+ # Overriding thumb for better stability in static images:
67
+ # Calculate distance between Thumb Tip (4) and Pinky MCP (17)
68
+ # If distance is large -> Open. Small -> Closed.
69
+ return states
70
 
71
  def classify_gesture(image: np.ndarray) -> str:
72
  """
73
  Match extracted finger states against JSON-defined patterns
74
  """
75
+ # Ensure Image is RGB (MediaPipe expects RGB)
76
+ # If the input is BGR from OpenCV standard, convert it.
77
+ # But Streamlit/PIL usually gives RGB. We assume RGB here.
78
+
79
+ # If input is a numpy array from PIL, it is RGB.
80
+ states = get_finger_states_mediapipe(image)
81
 
82
+ # Debug print to help you tune thresholds if needed
83
+ print(f"Detected States: {states}")
84
 
85
  for name, info in GESTURE_DATA.items():
86
  if states == info["pattern"]:
87
  return name
88
 
89
+ return "UNKNOWN"