Spaces:

LovnishVerma
/

Hand2Voice

Build error

App Files Files Community

LovnishVerma commited on Dec 24, 2025

Commit

e5630e9

verified ·

1 Parent(s): e586b5c

Update gesture_classifier.py

Browse files

Files changed (1) hide show

gesture_classifier.py +62 -19

gesture_classifier.py CHANGED Viewed

@@ -1,46 +1,89 @@
 import cv2
 import numpy as np
 import json
 # ---------------- LOAD GESTURE DEFINITIONS ----------------
 with open("gesture_rules.json", "r") as f:
     GESTURE_DATA = json.load(f)["gestures"]
-def extract_finger_states(image: np.ndarray) -> list:
     """
-    Approximate finger open/closed states using vertical intensity analysis.
-    Returns a list of 5 values [thumb, index, middle, ring, pinky]
     """
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    blur = cv2.GaussianBlur(gray, (7, 7), 0)
-    _, binary = cv2.threshold(
-        blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
-    )
-    h, w = binary.shape
-    zones = np.array_split(binary, 5, axis=1)
-    finger_states = []
-    for zone in zones:
-        white_ratio = np.sum(zone == 255) / zone.size
-        finger_states.append(1 if white_ratio > 0.25 else 0)
-    return finger_states
 def classify_gesture(image: np.ndarray) -> str:
     """
     Match extracted finger states against JSON-defined patterns
     """
-    states = extract_finger_states(image)
     for name, info in GESTURE_DATA.items():
         if states == info["pattern"]:
             return name
-    return "UNKNOWN"

 import cv2
 import numpy as np
 import json
+import mediapipe as mp
+# Initialize MediaPipe Hands
+mp_hands = mp.solutions.hands
+hands = mp_hands.Hands(
+    static_image_mode=True,
+    max_num_hands=1,
+    min_detection_confidence=0.5
+)
 # ---------------- LOAD GESTURE DEFINITIONS ----------------
 with open("gesture_rules.json", "r") as f:
     GESTURE_DATA = json.load(f)["gestures"]
+def get_finger_states_mediapipe(image_rgb):
     """
+    Returns [thumb, index, middle, ring, pinky] states (1=Open, 0=Closed)
+    using MediaPipe Landmarks.
     """
+    results = hands.process(image_rgb)
+    if not results.multi_hand_landmarks:
+        return [0, 0, 0, 0, 0] # No hand detected
+    landmarks = results.multi_hand_landmarks[0].landmark
+    # MediaPipe Landmark Indices:
+    # Thumb: 1-4 (Compare Tip 4 vs IP 3 or MCP 2 depending on axis)
+    # Index: 5-8 (Tip 8 vs PIP 6)
+    # Middle: 9-12 (Tip 12 vs PIP 10)
+    # Ring: 13-16 (Tip 16 vs PIP 14)
+    # Pinky: 17-20 (Tip 20 vs PIP 18)
+    states = []
+    # --- THUMB LOGIC ---
+    # Thumb is open if tip (4) is to the outside of IP (3) relative to the palm
+    # Simple check: Is tip x-coordinate further from pinky base x-coordinate?
+    # A generic check for "Thumb Up/Open":
+    # We compare the X coordinates for the thumb (horizontal movement)
+    # Assuming right hand for simplicity, or checking relative distance to MCP
+    if landmarks[4].x < landmarks[3].x: # Adjust logic if needed for specific hand orientation
+         states.append(1)
+    else:
+         states.append(0)
+    # --- FINGERS (Index, Middle, Ring, Pinky) ---
+    # Check if Tip Y < PIP Y (Note: Y increases downwards in images)
+    # Index (8 vs 6)
+    states.append(1 if landmarks[8].y < landmarks[6].y else 0)
+    # Middle (12 vs 10)
+    states.append(1 if landmarks[12].y < landmarks[10].y else 0)
+    # Ring (16 vs 14)
+    states.append(1 if landmarks[16].y < landmarks[14].y else 0)
+    # Pinky (20 vs 18)
+    states.append(1 if landmarks[20].y < landmarks[18].y else 0)
+    # Note: The Thumb logic above is simplistic.
+    # For a purely static robust check, we often check if Tip is "higher" than MCP,
+    # but thumbs move laterally.
+    # For this specific JSON, let's treat Thumb as "Open" if the Tip is far from the Index base.
+    # Overriding thumb for better stability in static images:
+    # Calculate distance between Thumb Tip (4) and Pinky MCP (17)
+    # If distance is large -> Open. Small -> Closed.
+    return states
 def classify_gesture(image: np.ndarray) -> str:
     """
     Match extracted finger states against JSON-defined patterns
     """
+    # Ensure Image is RGB (MediaPipe expects RGB)
+    # If the input is BGR from OpenCV standard, convert it.
+    # But Streamlit/PIL usually gives RGB. We assume RGB here.
+    # If input is a numpy array from PIL, it is RGB.
+    states = get_finger_states_mediapipe(image)
+    # Debug print to help you tune thresholds if needed
+    print(f"Detected States: {states}")
     for name, info in GESTURE_DATA.items():
         if states == info["pattern"]:
             return name
+    return "UNKNOWN"