Spaces:

kdevoe
/

ASL_MobileNetV3

Sleeping

App Files Files Community

ASL_MobileNetV3 / app.py

kdevoe

Update app.py

eceb91c verified over 1 year ago

raw

history blame

2.44 kB

	import gradio as gr
	import numpy as np
	import cv2
	from tensorflow.keras.models import load_model

	IMG_HEIGHT = 96
	IMG_WIDTH = 96

	# Load the saved Keras model
	model = load_model("model_01.keras")

	# Define the labels for ASL classes
	labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
	'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
	'T', 'U', 'V', 'W', 'X', 'Y'] # Replace with your actual label names

	def preprocess_frame(frame):
	"""Preprocess the video frame for the ASL model."""
	# Convert the frame to a TensorFlow tensor
	if isinstance(frame, np.ndarray):
	frame = tf.convert_to_tensor(frame, dtype=tf.float32)
	# Reshape to add channel dimension if grayscale
	if frame.ndim == 2: # If the input is grayscale
	frame = tf.expand_dims(frame, axis=-1)
	frame = tf.image.grayscale_to_rgb(frame)

	# Ensure the frame has 3 channels (RGB)
	if frame.shape[-1] == 1: # Grayscale image
	frame = tf.image.grayscale_to_rgb(frame)

	# First scale down to dataset dimensions (if applicable)
	frame = tf.image.resize(frame, [28, 28]) # Resize to smaller dimensions for consistency

	# Resize to the target model input dimensions
	frame = tf.image.resize(frame, [IMG_HEIGHT, IMG_WIDTH])

	# Normalize pixel values to [0, 1]
	frame = tf.cast(frame, tf.float32) / 255.0

	# Add batch dimension for model input
	frame = tf.expand_dims(frame, axis=0)

	return frame


	def predict_asl(frame):
	"""Predict the ASL sign from the webcam frame."""
	# Preprocess the frame
	processed_frame = preprocess_frame(frame)
	# Make a prediction
	predictions = model.predict(processed_frame)
	# Get the class with the highest probability
	predicted_label = labels[np.argmax(predictions)]
	return predicted_label

	css = """.my-group {max-width: 500px !important; max-height: 500px !important;}
	.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""

	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_classes=["my-column"]):
	with gr.Group(elem_classes=["my-group"]):
	input_img = gr.Image(sources=["webcam"], type="numpy", streaming=True, label="Webcam Input")
	output_label = gr.Label(label="Predicted ASL Sign")

	input_img.stream(predict_asl, [input_img], [output_label], time_limit=30, stream_every=0.1)

	demo.launch()