|
|
import cv2
|
|
|
import mediapipe as mp
|
|
|
import numpy as np
|
|
|
import tensorflow as tf
|
|
|
import time
|
|
|
import os
|
|
|
|
|
|
|
|
|
MODEL_PATH = 'hand_landmarker.task'
|
|
|
THRESHOLD = 0.7
|
|
|
|
|
|
BaseOptions = mp.tasks.BaseOptions
|
|
|
HandLandmarker = mp.tasks.vision.HandLandmarker
|
|
|
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
|
|
|
VisionRunningMode = mp.tasks.vision.RunningMode
|
|
|
|
|
|
|
|
|
if not os.path.exists('nsl_model_v1.h5'):
|
|
|
print("Run training script first!")
|
|
|
exit()
|
|
|
|
|
|
model = tf.keras.models.load_model('nsl_model_v1.h5')
|
|
|
classes = np.load('classes.npy', allow_pickle=True)
|
|
|
print(f"Loaded classes: {classes}")
|
|
|
|
|
|
|
|
|
def draw_landmarks(image, landmarks):
|
|
|
h, w, _ = image.shape
|
|
|
|
|
|
HAND_CONNECTIONS = [
|
|
|
(0, 1), (1, 2), (2, 3), (3, 4),
|
|
|
(0, 5), (5, 6), (6, 7), (7, 8),
|
|
|
(5, 9), (9, 10), (10, 11), (11, 12),
|
|
|
(9, 13), (13, 14), (14, 15), (15, 16),
|
|
|
(13, 17), (17, 18), (18, 19), (19, 20),
|
|
|
(0, 17)
|
|
|
]
|
|
|
|
|
|
for start_idx, end_idx in HAND_CONNECTIONS:
|
|
|
start = landmarks[start_idx]
|
|
|
end = landmarks[end_idx]
|
|
|
cv2.line(image, (int(start.x * w), int(start.y * h)), (int(end.x * w), int(end.y * h)), (200, 200, 200), 2)
|
|
|
|
|
|
for lm in landmarks:
|
|
|
cv2.circle(image, (int(lm.x * w), int(lm.y * h)), 4, (0, 0, 255), -1)
|
|
|
|
|
|
|
|
|
options = HandLandmarkerOptions(
|
|
|
base_options=BaseOptions(model_asset_path=MODEL_PATH),
|
|
|
running_mode=VisionRunningMode.VIDEO,
|
|
|
num_hands=1,
|
|
|
min_hand_detection_confidence=0.5
|
|
|
)
|
|
|
|
|
|
with HandLandmarker.create_from_options(options) as landmarker:
|
|
|
cap = cv2.VideoCapture(0)
|
|
|
start_time = time.time()
|
|
|
|
|
|
while True:
|
|
|
ret, frame = cap.read()
|
|
|
if not ret: break
|
|
|
|
|
|
frame = cv2.flip(frame, 1)
|
|
|
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
|
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
|
|
|
|
|
|
timestamp = int((time.time() - start_time) * 1000)
|
|
|
detection_result = landmarker.detect_for_video(mp_image, timestamp)
|
|
|
|
|
|
|
|
|
display_text = "Waiting..."
|
|
|
color = (200, 200, 200)
|
|
|
|
|
|
if detection_result.hand_landmarks:
|
|
|
hand_landmarks = detection_result.hand_landmarks[0]
|
|
|
|
|
|
|
|
|
draw_landmarks(frame, hand_landmarks)
|
|
|
|
|
|
|
|
|
wrist = hand_landmarks[0]
|
|
|
middle_mcp = hand_landmarks[9]
|
|
|
|
|
|
|
|
|
scale = np.sqrt(
|
|
|
(middle_mcp.x - wrist.x)**2 +
|
|
|
(middle_mcp.y - wrist.y)**2 +
|
|
|
(middle_mcp.z - wrist.z)**2
|
|
|
)
|
|
|
if scale == 0: scale = 1.0
|
|
|
|
|
|
|
|
|
coords = []
|
|
|
for lm in hand_landmarks:
|
|
|
rel_x = (lm.x - wrist.x) / scale
|
|
|
rel_y = (lm.y - wrist.y) / scale
|
|
|
rel_z = (lm.z - wrist.z) / scale
|
|
|
coords.extend([rel_x, rel_y, rel_z])
|
|
|
|
|
|
|
|
|
input_data = np.array([coords])
|
|
|
prediction = model.predict(input_data, verbose=0)
|
|
|
class_id = np.argmax(prediction)
|
|
|
confidence = np.max(prediction)
|
|
|
|
|
|
predicted_char = classes[class_id]
|
|
|
|
|
|
|
|
|
if confidence > THRESHOLD:
|
|
|
display_text = f"Sign: {predicted_char}"
|
|
|
color = (0, 255, 0)
|
|
|
|
|
|
|
|
|
bar_width = int(confidence * 200)
|
|
|
cv2.rectangle(frame, (50, 90), (50 + bar_width, 110), color, -1)
|
|
|
cv2.rectangle(frame, (50, 90), (250, 110), (255, 255, 255), 2)
|
|
|
else:
|
|
|
display_text = f"Unsure ({predicted_char}?)"
|
|
|
color = (0, 165, 255)
|
|
|
|
|
|
cv2.putText(frame, f"{confidence:.2f}", (260, 108), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
|
|
|
|
|
|
|
|
|
cv2.rectangle(frame, (0, 0), (640, 60), (0, 0, 0), -1)
|
|
|
cv2.putText(frame, display_text, (20, 45), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3)
|
|
|
|
|
|
cv2.imshow('NSL Live Test', frame)
|
|
|
if cv2.waitKey(1) & 0xFF == 27:
|
|
|
break
|
|
|
|
|
|
cap.release()
|
|
|
cv2.destroyAllWindows() |