File size: 4,650 Bytes
5230777 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import time
import os
# --- Config ---
MODEL_PATH = 'hand_landmarker.task'
THRESHOLD = 0.7 # confidence threshold (70%)
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
# Loading model
if not os.path.exists('nsl_model_v1.h5'):
print("Run training script first!")
exit()
model = tf.keras.models.load_model('nsl_model_v1.h5')
classes = np.load('classes.npy', allow_pickle=True)
print(f"Loaded classes: {classes}")
# Helper to draw
def draw_landmarks(image, landmarks):
h, w, _ = image.shape
# Draw connections (standard MP)
HAND_CONNECTIONS = [
(0, 1), (1, 2), (2, 3), (3, 4), # thumb
(0, 5), (5, 6), (6, 7), (7, 8), # index
(5, 9), (9, 10), (10, 11), (11, 12), # middle
(9, 13), (13, 14), (14, 15), (15, 16), # ring
(13, 17), (17, 18), (18, 19), (19, 20), # pinky
(0, 17) # wrist to pinky base
]
for start_idx, end_idx in HAND_CONNECTIONS:
start = landmarks[start_idx]
end = landmarks[end_idx]
cv2.line(image, (int(start.x * w), int(start.y * h)), (int(end.x * w), int(end.y * h)), (200, 200, 200), 2)
# Draw points
for lm in landmarks:
cv2.circle(image, (int(lm.x * w), int(lm.y * h)), 4, (0, 0, 255), -1)
# Initialize Landmarker
options = HandLandmarkerOptions(
base_options=BaseOptions(model_asset_path=MODEL_PATH),
running_mode=VisionRunningMode.VIDEO,
num_hands=1,
min_hand_detection_confidence=0.5
)
with HandLandmarker.create_from_options(options) as landmarker:
cap = cv2.VideoCapture(0)
start_time = time.time()
while True:
ret, frame = cap.read()
if not ret: break
frame = cv2.flip(frame, 1)
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
timestamp = int((time.time() - start_time) * 1000)
detection_result = landmarker.detect_for_video(mp_image, timestamp)
# Output info
display_text = "Waiting..."
color = (200, 200, 200)
if detection_result.hand_landmarks:
hand_landmarks = detection_result.hand_landmarks[0]
# --- Draw ---
draw_landmarks(frame, hand_landmarks)
# --- Preprocess (MUST MATCH COLLECTION LOGIC) ---
wrist = hand_landmarks[0]
middle_mcp = hand_landmarks[9]
# Calculate scale
scale = np.sqrt(
(middle_mcp.x - wrist.x)**2 +
(middle_mcp.y - wrist.y)**2 +
(middle_mcp.z - wrist.z)**2
)
if scale == 0: scale = 1.0
# Normalization*
coords = []
for lm in hand_landmarks:
rel_x = (lm.x - wrist.x) / scale
rel_y = (lm.y - wrist.y) / scale
rel_z = (lm.z - wrist.z) / scale
coords.extend([rel_x, rel_y, rel_z])
# --- Predict ---
input_data = np.array([coords])
prediction = model.predict(input_data, verbose=0)
class_id = np.argmax(prediction)
confidence = np.max(prediction)
predicted_char = classes[class_id]
# --- Display logic ---
if confidence > THRESHOLD:
display_text = f"Sign: {predicted_char}"
color = (0, 255, 0)
# Dynamic visual bar
bar_width = int(confidence * 200)
cv2.rectangle(frame, (50, 90), (50 + bar_width, 110), color, -1)
cv2.rectangle(frame, (50, 90), (250, 110), (255, 255, 255), 2)
else:
display_text = f"Unsure ({predicted_char}?)"
color = (0, 165, 255) # orange
cv2.putText(frame, f"{confidence:.2f}", (260, 108), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
# on screen result
cv2.rectangle(frame, (0, 0), (640, 60), (0, 0, 0), -1)
cv2.putText(frame, display_text, (20, 45), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3)
cv2.imshow('NSL Live Test', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows() |