project / app.py
Asmitha3's picture
Update app.py
8a6bcad verified
import gradio as gr
import tensorflow as tf
import numpy as np
import os
# --- 1. Model Loading and Classes Configuration ---
interpreter = None
model_loaded = False
input_details = None
output_details = None
MODEL_PATH = '/tmp/sign_language_model_lite.tflite'
try:
if os.path.exists(MODEL_PATH):
# Load the TFLite model file instead of the heavy H5 file
interpreter = tf.lite.Interpreter(model_path=MODEL_PATH)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
model_loaded = True
print("SUCCESS: TFLite Model loaded successfully.")
else:
print(f"ERROR: Model file not found at {MODEL_PATH}")
except Exception as e:
print(f"FATAL ERROR: Failed to initialize TFLite interpreter: {e}")
model_loaded = False
# Your Specific Sign Language Classes (Order MUST match your training labels!)
SIGN_CLASSES = ["HELLO", "GOOD BYE", "THANKYOU", "PLEASE", "YES", "NO", "SEE YOU", "LOOK", "FOOD", "SORRY", "HELP", "LOVE", "FRIEND", "NAME", "ME"]
# --- 2. The Real-Time Prediction Function (Updated for TFLite) ---
def classify_sign(input_image_data):
"""Processes a single frame from the live webcam feed using the TFLite interpreter."""
if not model_loaded or input_image_data is None:
return "Model Loading Error or Camera Feed Not Active..."
# 1. Preprocessing (adjust to model's input: 64x64 grayscale, required for the model)
image_resized = tf.image.resize(input_image_data, (64, 64))
image_normalized = image_resized / 255.0
# Convert to grayscale if the input is color (shape[3] == 3)
if image_normalized.shape[-1] == 3:
image_normalized = tf.image.rgb_to_grayscale(image_normalized)
# Add batch dimension (1, 64, 64, 1)
input_tensor = np.expand_dims(image_normalized, axis=0)
# 2. TFLite Prediction Logic
try:
# Set the input tensor
interpreter.set_tensor(input_details[0]['index'], input_tensor.numpy().astype(np.float32))
# Invoke the model
interpreter.invoke()
# Get prediction results
predictions = interpreter.get_tensor(output_details[0]['index'])[0]
# 3. Post-processing
predicted_index = np.argmax(predictions)
predicted_sign = SIGN_CLASSES[predicted_index]
confidence = predictions[predicted_index] * 100
return f"PREDICTED SIGN: {predicted_sign} | Confidence: {confidence:.2f}%"
except Exception as e:
# This catches runtime errors during invocation
return f"Prediction Runtime Error: {e}"
# --- 3. The Gradio Interface for Continuous Streaming ---
gr.Interface(
fn=classify_sign,
inputs=gr.Image(
sources=['webcam'],
type="numpy",
shape=(300, 300),
label="Live Sign Camera"
),
outputs=gr.Textbox(label="Real-Time Translation"),
live=True,
title="Real-Time Sign Language Translator",
description="Show your sign in front of the camera, and the prediction will update instantly.",
theme="soft",
# FIX for PermissionError and general stability
allow_flagging=False
).launch(server_name="0.0.0.0", server_port=7860)