Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import cv2 | |
| from tensorflow.keras.models import load_model | |
| IMG_HEIGHT = 96 | |
| IMG_WIDTH = 96 | |
| # Load the saved Keras model | |
| model = load_model("model_01.keras") | |
| # Define the labels for ASL classes | |
| labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', | |
| 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', | |
| 'T', 'U', 'V', 'W', 'X', 'Y'] # Replace with your actual label names | |
| def preprocess_frame(frame): | |
| """Preprocess the video frame for the ASL model.""" | |
| # Convert the frame to a TensorFlow tensor | |
| if isinstance(frame, np.ndarray): | |
| frame = tf.convert_to_tensor(frame, dtype=tf.float32) | |
| # Reshape to add channel dimension if grayscale | |
| if frame.ndim == 2: # If the input is grayscale | |
| frame = tf.expand_dims(frame, axis=-1) | |
| frame = tf.image.grayscale_to_rgb(frame) | |
| # Ensure the frame has 3 channels (RGB) | |
| if frame.shape[-1] == 1: # Grayscale image | |
| frame = tf.image.grayscale_to_rgb(frame) | |
| # First scale down to dataset dimensions (if applicable) | |
| frame = tf.image.resize(frame, [28, 28]) # Resize to smaller dimensions for consistency | |
| # Resize to the target model input dimensions | |
| frame = tf.image.resize(frame, [IMG_HEIGHT, IMG_WIDTH]) | |
| # Normalize pixel values to [0, 1] | |
| frame = tf.cast(frame, tf.float32) / 255.0 | |
| # Add batch dimension for model input | |
| frame = tf.expand_dims(frame, axis=0) | |
| return frame | |
| def predict_asl(frame): | |
| """Predict the ASL sign from the webcam frame.""" | |
| # Preprocess the frame | |
| processed_frame = preprocess_frame(frame) | |
| # Make a prediction | |
| predictions = model.predict(processed_frame) | |
| # Get the class with the highest probability | |
| predicted_label = labels[np.argmax(predictions)] | |
| return predicted_label | |
| css = """.my-group {max-width: 500px !important; max-height: 500px !important;} | |
| .my-column {display: flex !important; justify-content: center !important; align-items: center !important};""" | |
| with gr.Blocks(css=css) as demo: | |
| with gr.Column(elem_classes=["my-column"]): | |
| with gr.Group(elem_classes=["my-group"]): | |
| input_img = gr.Image(sources=["webcam"], type="numpy", streaming=True, label="Webcam Input") | |
| output_label = gr.Label(label="Predicted ASL Sign") | |
| input_img.stream(predict_asl, [input_img], [output_label], time_limit=30, stream_every=0.1) | |
| demo.launch() | |