import cv2 import mediapipe as mp import numpy as np import tensorflow as tf import time import os # --- Config --- MODEL_PATH = 'hand_landmarker.task' THRESHOLD = 0.7 # confidence threshold (70%) BaseOptions = mp.tasks.BaseOptions HandLandmarker = mp.tasks.vision.HandLandmarker HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions VisionRunningMode = mp.tasks.vision.RunningMode # Loading model if not os.path.exists('nsl_model_v1.h5'): print("Run training script first!") exit() model = tf.keras.models.load_model('nsl_model_v1.h5') classes = np.load('classes.npy', allow_pickle=True) print(f"Loaded classes: {classes}") # Helper to draw def draw_landmarks(image, landmarks): h, w, _ = image.shape # Draw connections (standard MP) HAND_CONNECTIONS = [ (0, 1), (1, 2), (2, 3), (3, 4), # thumb (0, 5), (5, 6), (6, 7), (7, 8), # index (5, 9), (9, 10), (10, 11), (11, 12), # middle (9, 13), (13, 14), (14, 15), (15, 16), # ring (13, 17), (17, 18), (18, 19), (19, 20), # pinky (0, 17) # wrist to pinky base ] for start_idx, end_idx in HAND_CONNECTIONS: start = landmarks[start_idx] end = landmarks[end_idx] cv2.line(image, (int(start.x * w), int(start.y * h)), (int(end.x * w), int(end.y * h)), (200, 200, 200), 2) # Draw points for lm in landmarks: cv2.circle(image, (int(lm.x * w), int(lm.y * h)), 4, (0, 0, 255), -1) # Initialize Landmarker options = HandLandmarkerOptions( base_options=BaseOptions(model_asset_path=MODEL_PATH), running_mode=VisionRunningMode.VIDEO, num_hands=1, min_hand_detection_confidence=0.5 ) with HandLandmarker.create_from_options(options) as landmarker: cap = cv2.VideoCapture(0) start_time = time.time() while True: ret, frame = cap.read() if not ret: break frame = cv2.flip(frame, 1) rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame) timestamp = int((time.time() - start_time) * 1000) detection_result = landmarker.detect_for_video(mp_image, timestamp) # Output info display_text = "Waiting..." color = (200, 200, 200) if detection_result.hand_landmarks: hand_landmarks = detection_result.hand_landmarks[0] # --- Draw --- draw_landmarks(frame, hand_landmarks) # --- Preprocess (MUST MATCH COLLECTION LOGIC) --- wrist = hand_landmarks[0] middle_mcp = hand_landmarks[9] # Calculate scale scale = np.sqrt( (middle_mcp.x - wrist.x)**2 + (middle_mcp.y - wrist.y)**2 + (middle_mcp.z - wrist.z)**2 ) if scale == 0: scale = 1.0 # Normalization* coords = [] for lm in hand_landmarks: rel_x = (lm.x - wrist.x) / scale rel_y = (lm.y - wrist.y) / scale rel_z = (lm.z - wrist.z) / scale coords.extend([rel_x, rel_y, rel_z]) # --- Predict --- input_data = np.array([coords]) prediction = model.predict(input_data, verbose=0) class_id = np.argmax(prediction) confidence = np.max(prediction) predicted_char = classes[class_id] # --- Display logic --- if confidence > THRESHOLD: display_text = f"Sign: {predicted_char}" color = (0, 255, 0) # Dynamic visual bar bar_width = int(confidence * 200) cv2.rectangle(frame, (50, 90), (50 + bar_width, 110), color, -1) cv2.rectangle(frame, (50, 90), (250, 110), (255, 255, 255), 2) else: display_text = f"Unsure ({predicted_char}?)" color = (0, 165, 255) # orange cv2.putText(frame, f"{confidence:.2f}", (260, 108), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1) # on screen result cv2.rectangle(frame, (0, 0), (640, 60), (0, 0, 0), -1) cv2.putText(frame, display_text, (20, 45), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3) cv2.imshow('NSL Live Test', frame) if cv2.waitKey(1) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()