File size: 3,332 Bytes
202e07d
 
e5b8340
e5630e9
defae99
e5630e9
defae99
e5630e9
defae99
 
 
e5630e9
 
 
defae99
e5630e9
202e07d
defae99
e5b8340
 
 
defae99
 
 
 
 
202e07d
e5630e9
defae99
202e07d
defae99
 
e5b8340
defae99
 
e5630e9
defae99
 
 
 
 
e5b8340
defae99
 
e5b8340
defae99
 
e5b8340
defae99
 
 
 
 
 
 
e5630e9
defae99
 
 
 
 
 
e5630e9
e5b8340
defae99
e5b8340
defae99
e5b8340
defae99
 
 
 
 
 
 
 
 
 
 
e5630e9
defae99
 
 
 
 
 
 
 
 
e5b8340
defae99
 
 
e5b8340
defae99
 
e5b8340
 
defae99
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import cv2
import numpy as np
import json
import mediapipe as mp
import math

# Initialize MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.7 # Increased confidence threshold
)

# Load Rules
with open("gesture_rules.json", "r") as f:
    GESTURE_DATA = json.load(f)["gestures"]

def calculate_distance(p1, p2):
    """Calculates Euclidean distance between two 3D landmarks."""
    return math.sqrt((p1.x - p2.x)**2 + (p1.y - p2.y)**2 + (p1.z - p2.z)**2)

def get_finger_states_robust(landmarks):
    """
    Returns [thumb, index, middle, ring, pinky] states (1=Open, 0=Closed)
    using Euclidean distance relative to the Wrist (Landmark 0).
    """
    states = []
    wrist = landmarks[0]

    # --- FINGERS (Index, Middle, Ring, Pinky) ---
    # Logic: If Distance(Tip, Wrist) < Distance(PIP, Wrist), the finger is curled.
    
    # Index (Tip 8, PIP 6)
    states.append(1 if calculate_distance(landmarks[8], wrist) > calculate_distance(landmarks[6], wrist) else 0)
    
    # Middle (Tip 12, PIP 10)
    states.append(1 if calculate_distance(landmarks[12], wrist) > calculate_distance(landmarks[10], wrist) else 0)

    # Ring (Tip 16, PIP 14)
    states.append(1 if calculate_distance(landmarks[16], wrist) > calculate_distance(landmarks[14], wrist) else 0)

    # Pinky (Tip 20, PIP 18)
    states.append(1 if calculate_distance(landmarks[20], wrist) > calculate_distance(landmarks[18], wrist) else 0)

    # --- THUMB LOGIC (Complex) ---
    # The thumb doesn't curl to the wrist like other fingers.
    # We check if the Tip (4) is further away from the Pinky MCP (17) than the IP (3) is.
    # Basically, is the thumb sticking out?
    
    dist_tip_pinky = calculate_distance(landmarks[4], landmarks[17])
    dist_ip_pinky = calculate_distance(landmarks[3], landmarks[17])
    
    # We insert Thumb at the start of the list to match JSON format
    if dist_tip_pinky > dist_ip_pinky:
        states.insert(0, 1) # Open
    else:
        states.insert(0, 0) # Closed

    return states

def classify_gesture(image: np.ndarray):
    """
    Returns: (gesture_name, annotated_image)
    """
    # Convert image to writable for MediaPipe
    image.flags.writeable = False
    results = hands.process(image)
    image.flags.writeable = True

    # If no hand found
    if not results.multi_hand_landmarks:
        return None, image

    # Get landmarks for the first hand
    hand_landmarks = results.multi_hand_landmarks[0]
    
    # Draw the Skeleton on the image
    annotated_image = image.copy()
    mp_drawing.draw_landmarks(
        annotated_image,
        hand_landmarks,
        mp_hands.HAND_CONNECTIONS,
        mp_drawing_styles.get_default_hand_landmarks_style(),
        mp_drawing_styles.get_default_hand_connections_style()
    )

    # Get binary states
    states = get_finger_states_robust(hand_landmarks.landmark)
    print(f"Detected States (T,I,M,R,P): {states}")

    # Match against DB
    detected_gesture = "UNKNOWN"
    for name, info in GESTURE_DATA.items():
        if states == info["pattern"]:
            detected_gesture = name
            break
            
    return detected_gesture, annotated_image