Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import cv2 | |
| import numpy as np | |
| from tensorflow.keras.models import load_model | |
| import mediapipe as mp | |
| # Load your label to alphabet mapping | |
| from config import label_to_alphabet # Ensure this file has the correct mapping | |
| # Load the saved ASL model | |
| model = load_model("model/asl_model.h5") | |
| # Initialize MediaPipe for hand detection | |
| mp_hands = mp.solutions.hands | |
| hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5) | |
| mp_drawing = mp.solutions.drawing_utils # For hand landmark drawing | |
| def detect_and_crop_hand(image): | |
| """ | |
| Detect the hand in the image, crop the region, and return the cropped hand image. | |
| """ | |
| # Convert the image to RGB format (required by MediaPipe) | |
| rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| # Detect hand landmarks | |
| results = hands.process(rgb_image) | |
| if results.multi_hand_landmarks: | |
| for hand_landmarks in results.multi_hand_landmarks: | |
| # Get the image dimensions | |
| h, w, _ = image.shape | |
| x_min = w | |
| y_min = h | |
| x_max = y_max = 0 | |
| # Loop through landmarks to determine bounding box for the hand | |
| for landmark in hand_landmarks.landmark: | |
| x, y = int(landmark.x * w), int(landmark.y * h) | |
| x_min = min(x, x_min) | |
| y_min = min(y, y_min) | |
| x_max = max(x, x_max) | |
| y_max = max(y, y_max) | |
| # Crop the hand portion from the image | |
| cropped_hand = image[y_min:y_max, x_min:x_max] | |
| # Optional: Draw the landmarks on the original image for visualization (for debugging) | |
| mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS) | |
| return cropped_hand | |
| # If no hand is detected, return None | |
| return None | |
| def preprocess_hand_image(hand_image): | |
| """ | |
| Preprocess the cropped hand image for the ASL recognition model. | |
| This involves resizing, normalizing, and reshaping the image. | |
| """ | |
| # Resize the image to 150x150 pixels (or your model's input size) | |
| hand_image_resized = cv2.resize(hand_image, (150, 150)) | |
| # Normalize the image (scale pixel values to [0, 1]) | |
| hand_image_normalized = hand_image_resized / 255.0 | |
| # Reshape the image to match the model's expected input shape (1, 150, 150, 3) | |
| hand_image_reshaped = np.expand_dims(hand_image_normalized, axis=0) | |
| return hand_image_reshaped | |
| def predict_asl_alphabet(cropped_hand): | |
| """ | |
| Feed the cropped hand image into the ASL recognition model and return the predicted alphabet. | |
| """ | |
| # Preprocess the hand image | |
| processed_hand = preprocess_hand_image(cropped_hand) | |
| # Make the prediction using the ASL model | |
| predictions = model.predict(processed_hand) | |
| # Get the predicted label (the index of the highest predicted probability) | |
| predicted_label = np.argmax(predictions[0]) | |
| # Map the label to the corresponding alphabet | |
| predicted_alphabet = label_to_alphabet[predicted_label] | |
| return predicted_alphabet | |
| # Gradio interface function | |
| def process_video_frame(image): | |
| """ | |
| Process the webcam feed to detect, crop the hand, and predict the ASL alphabet. | |
| """ | |
| # Detect and crop the hand from the image | |
| cropped_hand = detect_and_crop_hand(image) | |
| if cropped_hand is None: | |
| return "No hand detected" | |
| # Predict the ASL alphabet using the cropped hand image | |
| predicted_alphabet = predict_asl_alphabet(cropped_hand) | |
| return predicted_alphabet # Return the predicted alphabet | |
| # Gradio interface setup | |
| iface = gr.Interface( | |
| fn=process_video_frame, | |
| inputs=gr.Image(sources=["webcam"], streaming=True), # Webcam input | |
| outputs="text", # Display the predicted alphabet | |
| live=True, # Enable live video streaming | |
| description="Real-Time ASL Hand Gesture Recognition" | |
| ) | |
| # Launch the Gradio app | |
| iface.launch() | |