Hand2Voice / gesture_classifier.py
LovnishVerma's picture
Update gesture_classifier.py
defae99 verified
import cv2
import numpy as np
import json
import mediapipe as mp
import math
# Initialize MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(
static_image_mode=True,
max_num_hands=1,
min_detection_confidence=0.7 # Increased confidence threshold
)
# Load Rules
with open("gesture_rules.json", "r") as f:
GESTURE_DATA = json.load(f)["gestures"]
def calculate_distance(p1, p2):
"""Calculates Euclidean distance between two 3D landmarks."""
return math.sqrt((p1.x - p2.x)**2 + (p1.y - p2.y)**2 + (p1.z - p2.z)**2)
def get_finger_states_robust(landmarks):
"""
Returns [thumb, index, middle, ring, pinky] states (1=Open, 0=Closed)
using Euclidean distance relative to the Wrist (Landmark 0).
"""
states = []
wrist = landmarks[0]
# --- FINGERS (Index, Middle, Ring, Pinky) ---
# Logic: If Distance(Tip, Wrist) < Distance(PIP, Wrist), the finger is curled.
# Index (Tip 8, PIP 6)
states.append(1 if calculate_distance(landmarks[8], wrist) > calculate_distance(landmarks[6], wrist) else 0)
# Middle (Tip 12, PIP 10)
states.append(1 if calculate_distance(landmarks[12], wrist) > calculate_distance(landmarks[10], wrist) else 0)
# Ring (Tip 16, PIP 14)
states.append(1 if calculate_distance(landmarks[16], wrist) > calculate_distance(landmarks[14], wrist) else 0)
# Pinky (Tip 20, PIP 18)
states.append(1 if calculate_distance(landmarks[20], wrist) > calculate_distance(landmarks[18], wrist) else 0)
# --- THUMB LOGIC (Complex) ---
# The thumb doesn't curl to the wrist like other fingers.
# We check if the Tip (4) is further away from the Pinky MCP (17) than the IP (3) is.
# Basically, is the thumb sticking out?
dist_tip_pinky = calculate_distance(landmarks[4], landmarks[17])
dist_ip_pinky = calculate_distance(landmarks[3], landmarks[17])
# We insert Thumb at the start of the list to match JSON format
if dist_tip_pinky > dist_ip_pinky:
states.insert(0, 1) # Open
else:
states.insert(0, 0) # Closed
return states
def classify_gesture(image: np.ndarray):
"""
Returns: (gesture_name, annotated_image)
"""
# Convert image to writable for MediaPipe
image.flags.writeable = False
results = hands.process(image)
image.flags.writeable = True
# If no hand found
if not results.multi_hand_landmarks:
return None, image
# Get landmarks for the first hand
hand_landmarks = results.multi_hand_landmarks[0]
# Draw the Skeleton on the image
annotated_image = image.copy()
mp_drawing.draw_landmarks(
annotated_image,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style()
)
# Get binary states
states = get_finger_states_robust(hand_landmarks.landmark)
print(f"Detected States (T,I,M,R,P): {states}")
# Match against DB
detected_gesture = "UNKNOWN"
for name, info in GESTURE_DATA.items():
if states == info["pattern"]:
detected_gesture = name
break
return detected_gesture, annotated_image