| | import cv2 |
| | import numpy as np |
| | import tensorflow as tf |
| | import mediapipe as mp |
| | from huggingface_hub import hf_hub_download |
| | import gradio as gr |
| |
|
| | |
| | model_path = hf_hub_download(repo_id="Par24/sign-language-model", filename="saved_model.h5") |
| | model = tf.keras.models.load_model(model_path) |
| |
|
| | |
| | mp_hands = mp.solutions.hands |
| | mp_drawing = mp.solutions.drawing_utils |
| | hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1) |
| |
|
| | |
| | class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'X', 'Y', '1', '2 OR V', '3', '4', '5', '6 OR W', '7', '8', '9', '10', 'again', 'bye', 'eat', 'favourite', 'fine', 'friend', 'go', 'have', 'help', 'I love you', 'later', 'like', 'me', 'meet', 'no', 'please', 'ready', 'school', 'slow', 'take care', 'teacher', 'thanks', 'want', 'yes', 'you'] |
| |
|
| | def detect_and_predict(frame): |
| | image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
| | results = hands.process(image) |
| |
|
| | if results.multi_hand_landmarks: |
| | for hand_landmarks in results.multi_hand_landmarks: |
| | |
| | h, w, _ = frame.shape |
| | x_min = int(min([lm.x for lm in hand_landmarks.landmark]) * w) |
| | y_min = int(min([lm.y for lm in hand_landmarks.landmark]) * h) |
| | x_max = int(max([lm.x for lm in hand_landmarks.landmark]) * w) |
| | y_max = int(max([lm.y for lm in hand_landmarks.landmark]) * h) |
| |
|
| | |
| | padding = 20 |
| | x_min = max(x_min - padding, 0) |
| | y_min = max(y_min - padding, 0) |
| | x_max = min(x_max + padding, w) |
| | y_max = min(y_max + padding, h) |
| |
|
| | |
| | hand_img = frame[y_min:y_max, x_min:x_max] |
| | if hand_img.size == 0: |
| | continue |
| |
|
| | hand_img = cv2.resize(hand_img, (224, 224)) |
| | hand_img = hand_img.astype('float32') / 255.0 |
| | hand_img = np.expand_dims(hand_img, axis=0) |
| |
|
| | |
| | prediction = model.predict(hand_img) |
| | class_id = np.argmax(prediction) |
| | confidence = prediction[0][class_id] |
| | label = f"{class_names[class_id]} ({confidence*100:.1f}%)" |
| |
|
| | |
| | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) |
| | cv2.putText(frame, label, (x_min, y_min - 10), |
| | cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) |
| |
|
| | |
| | mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS) |
| |
|
| | return frame |
| |
|
| | |
| | app = gr.Interface(fn=detect_and_predict, |
| | inputs=gr.Image(source="webcam", streaming=True), |
| | outputs="image", |
| | live=True, |
| | title="Real-Time ASL Sign Detection", |
| | description="Using webcam to detect and classify hand signs in real-time.") |
| |
|
| | app.launch() |
| |
|