File size: 3,205 Bytes
cc3b6f2
654e5aa
cc3b6f2
b36f17d
7b9e8c8
b36f17d
 
f4253f8
 
eceb91c
b36f17d
8e3b19a
b36f17d
 
0b3395a
 
8e3b19a
b36f17d
 
eceb91c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55803b2
 
 
 
 
b36f17d
 
654e5aa
8e3b19a
654e5aa
 
 
 
 
 
 
 
 
 
 
 
 
 
b36f17d
 
 
cc3b6f2
 
3a79ec1
 
 
 
b36f17d
3a79ec1
b36f17d
654e5aa
cc3b6f2
3a79ec1
 
 
 
798b0dd
cc3b6f2
b36f17d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import load_model

IMG_HEIGHT = 96
IMG_WIDTH = 96

# Load the saved Keras model
model = load_model("model_01.keras")

# Define the labels for ASL classes
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
         'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
          'U', 'V', 'W', 'X', 'Y']  

def preprocess_frame(frame):
    """Preprocess the video frame for the ASL model."""
    # Convert the frame to a TensorFlow tensor
    if isinstance(frame, np.ndarray):
        frame = tf.convert_to_tensor(frame, dtype=tf.float32)
        # Reshape to add channel dimension if grayscale
        if frame.ndim == 2:  # If the input is grayscale
            frame = tf.expand_dims(frame, axis=-1)
            frame = tf.image.grayscale_to_rgb(frame)

    # Ensure the frame has 3 channels (RGB)
    if frame.shape[-1] == 1:  # Grayscale image
        frame = tf.image.grayscale_to_rgb(frame)

    # First scale down to dataset dimensions (if applicable)
    frame = tf.image.resize(frame, [28, 28])  # Resize to smaller dimensions for consistency

    # Resize to the target model input dimensions
    frame = tf.image.resize(frame, [IMG_HEIGHT, IMG_WIDTH])

    # Normalize pixel values to [0, 1]
    frame = tf.cast(frame, tf.float32) / 255.0

    # Add batch dimension for model input
    frame = tf.expand_dims(frame, axis=0)

    return frame

def preprocess_frame_cnn(frame):
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    img = np.expand_dims(cv2.resize(img, (64, 64)), axis = 0)
    return img


def predict_asl(frame):
    """Predict the ASL sign and return the label and probabilities."""
    processed_frame = preprocess_frame(frame)
    predictions = model.predict(processed_frame)  # Predict probabilities
    predicted_label = labels[np.argmax(predictions)]  # Get the class with the highest probability

    # Generate a bar chart for probabilities
    fig, ax = plt.subplots(figsize=(6, 4))
    ax.bar(labels, predictions[0])
    ax.set_title("Class Probabilities")
    ax.set_ylabel("Probability")
    ax.set_xlabel("ASL Classes")
    ax.set_xticks(range(len(labels)))
    ax.set_xticklabels(labels, rotation=45)
    plt.tight_layout()

    return predicted_label, fig

css = """.my-group {max-width: 500px !important; max-height: 500px !important;}
            .my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""

with gr.Blocks(css=css) as demo:
    with gr.Row():
        gr.Markdown("# ASL Recognition App")
    with gr.Row():
        with gr.Column(scale=1):
            input_img = gr.Image(sources=["webcam"], type="numpy", streaming=True, label="Webcam Input")
        with gr.Column(scale=1):
            output_label = gr.Label(label="Predicted ASL Sign")
            output_plot = gr.Plot(label="Class Probabilities")

    def gradio_pipeline(frame):
        predicted_label, fig = predict_asl(frame)
        return predicted_label, fig

    input_img.stream(gradio_pipeline, [input_img], [output_label, output_plot], time_limit=300, stream_every=0.5)

demo.launch()