import cv2 import numpy as np import json import mediapipe as mp import math # Initialize MediaPipe mp_hands = mp.solutions.hands mp_drawing = mp.solutions.drawing_utils mp_drawing_styles = mp.solutions.drawing_styles hands = mp_hands.Hands( static_image_mode=True, max_num_hands=1, min_detection_confidence=0.7 # Increased confidence threshold ) # Load Rules with open("gesture_rules.json", "r") as f: GESTURE_DATA = json.load(f)["gestures"] def calculate_distance(p1, p2): """Calculates Euclidean distance between two 3D landmarks.""" return math.sqrt((p1.x - p2.x)**2 + (p1.y - p2.y)**2 + (p1.z - p2.z)**2) def get_finger_states_robust(landmarks): """ Returns [thumb, index, middle, ring, pinky] states (1=Open, 0=Closed) using Euclidean distance relative to the Wrist (Landmark 0). """ states = [] wrist = landmarks[0] # --- FINGERS (Index, Middle, Ring, Pinky) --- # Logic: If Distance(Tip, Wrist) < Distance(PIP, Wrist), the finger is curled. # Index (Tip 8, PIP 6) states.append(1 if calculate_distance(landmarks[8], wrist) > calculate_distance(landmarks[6], wrist) else 0) # Middle (Tip 12, PIP 10) states.append(1 if calculate_distance(landmarks[12], wrist) > calculate_distance(landmarks[10], wrist) else 0) # Ring (Tip 16, PIP 14) states.append(1 if calculate_distance(landmarks[16], wrist) > calculate_distance(landmarks[14], wrist) else 0) # Pinky (Tip 20, PIP 18) states.append(1 if calculate_distance(landmarks[20], wrist) > calculate_distance(landmarks[18], wrist) else 0) # --- THUMB LOGIC (Complex) --- # The thumb doesn't curl to the wrist like other fingers. # We check if the Tip (4) is further away from the Pinky MCP (17) than the IP (3) is. # Basically, is the thumb sticking out? dist_tip_pinky = calculate_distance(landmarks[4], landmarks[17]) dist_ip_pinky = calculate_distance(landmarks[3], landmarks[17]) # We insert Thumb at the start of the list to match JSON format if dist_tip_pinky > dist_ip_pinky: states.insert(0, 1) # Open else: states.insert(0, 0) # Closed return states def classify_gesture(image: np.ndarray): """ Returns: (gesture_name, annotated_image) """ # Convert image to writable for MediaPipe image.flags.writeable = False results = hands.process(image) image.flags.writeable = True # If no hand found if not results.multi_hand_landmarks: return None, image # Get landmarks for the first hand hand_landmarks = results.multi_hand_landmarks[0] # Draw the Skeleton on the image annotated_image = image.copy() mp_drawing.draw_landmarks( annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS, mp_drawing_styles.get_default_hand_landmarks_style(), mp_drawing_styles.get_default_hand_connections_style() ) # Get binary states states = get_finger_states_robust(hand_landmarks.landmark) print(f"Detected States (T,I,M,R,P): {states}") # Match against DB detected_gesture = "UNKNOWN" for name, info in GESTURE_DATA.items(): if states == info["pattern"]: detected_gesture = name break return detected_gesture, annotated_image