File size: 4,650 Bytes
5230777
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import time
import os

# --- Config ---
MODEL_PATH = 'hand_landmarker.task'
THRESHOLD = 0.7 # confidence threshold (70%)

BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Loading model
if not os.path.exists('nsl_model_v1.h5'):
    print("Run training script first!")
    exit()

model = tf.keras.models.load_model('nsl_model_v1.h5')
classes = np.load('classes.npy', allow_pickle=True)
print(f"Loaded classes: {classes}")

# Helper to draw
def draw_landmarks(image, landmarks):
    h, w, _ = image.shape
    # Draw connections (standard MP)
    HAND_CONNECTIONS = [
        (0, 1), (1, 2), (2, 3), (3, 4),           # thumb
        (0, 5), (5, 6), (6, 7), (7, 8),           # index
        (5, 9), (9, 10), (10, 11), (11, 12),      # middle
        (9, 13), (13, 14), (14, 15), (15, 16),    # ring
        (13, 17), (17, 18), (18, 19), (19, 20),   # pinky
        (0, 17)                                   # wrist to pinky base
    ]

    for start_idx, end_idx in HAND_CONNECTIONS:
        start = landmarks[start_idx]
        end = landmarks[end_idx]
        cv2.line(image, (int(start.x * w), int(start.y * h)), (int(end.x * w), int(end.y * h)), (200, 200, 200), 2)
    # Draw points
    for lm in landmarks:
        cv2.circle(image, (int(lm.x * w), int(lm.y * h)), 4, (0, 0, 255), -1)

# Initialize Landmarker
options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=MODEL_PATH),
    running_mode=VisionRunningMode.VIDEO,
    num_hands=1,
    min_hand_detection_confidence=0.5
)

with HandLandmarker.create_from_options(options) as landmarker:
    cap = cv2.VideoCapture(0)
    start_time = time.time()

    while True:
        ret, frame = cap.read()
        if not ret: break
        
        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
        
        timestamp = int((time.time() - start_time) * 1000)
        detection_result = landmarker.detect_for_video(mp_image, timestamp)
        
        # Output info
        display_text = "Waiting..."
        color = (200, 200, 200)

        if detection_result.hand_landmarks:
            hand_landmarks = detection_result.hand_landmarks[0]
            
            # --- Draw ---
            draw_landmarks(frame, hand_landmarks)

            # --- Preprocess (MUST MATCH COLLECTION LOGIC) ---
            wrist = hand_landmarks[0]
            middle_mcp = hand_landmarks[9]

            # Calculate scale
            scale = np.sqrt(
                (middle_mcp.x - wrist.x)**2 + 
                (middle_mcp.y - wrist.y)**2 + 
                (middle_mcp.z - wrist.z)**2
            )
            if scale == 0: scale = 1.0

            # Normalization*
            coords = []
            for lm in hand_landmarks:
                rel_x = (lm.x - wrist.x) / scale
                rel_y = (lm.y - wrist.y) / scale
                rel_z = (lm.z - wrist.z) / scale
                coords.extend([rel_x, rel_y, rel_z])

            # --- Predict ---
            input_data = np.array([coords])
            prediction = model.predict(input_data, verbose=0)
            class_id = np.argmax(prediction)
            confidence = np.max(prediction)

            predicted_char = classes[class_id]

            # --- Display logic ---
            if confidence > THRESHOLD:
                display_text = f"Sign: {predicted_char}"
                color = (0, 255, 0)
                
                # Dynamic visual bar
                bar_width = int(confidence * 200)
                cv2.rectangle(frame, (50, 90), (50 + bar_width, 110), color, -1)
                cv2.rectangle(frame, (50, 90), (250, 110), (255, 255, 255), 2)
            else:
                display_text = f"Unsure ({predicted_char}?)"
                color = (0, 165, 255) # orange

            cv2.putText(frame, f"{confidence:.2f}", (260, 108), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)

        # on screen result
        cv2.rectangle(frame, (0, 0), (640, 60), (0, 0, 0), -1)
        cv2.putText(frame, display_text, (20, 45), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3)

        cv2.imshow('NSL Live Test', frame)
        if cv2.waitKey(1) & 0xFF == 27:
            break

    cap.release()
    cv2.destroyAllWindows()