import gradio as gr import tensorflow as tf import numpy as np import os # --- 1. Model Loading and Classes Configuration --- interpreter = None model_loaded = False input_details = None output_details = None MODEL_PATH = '/tmp/sign_language_model_lite.tflite' try: if os.path.exists(MODEL_PATH): # Load the TFLite model file instead of the heavy H5 file interpreter = tf.lite.Interpreter(model_path=MODEL_PATH) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() model_loaded = True print("SUCCESS: TFLite Model loaded successfully.") else: print(f"ERROR: Model file not found at {MODEL_PATH}") except Exception as e: print(f"FATAL ERROR: Failed to initialize TFLite interpreter: {e}") model_loaded = False # Your Specific Sign Language Classes (Order MUST match your training labels!) SIGN_CLASSES = ["HELLO", "GOOD BYE", "THANKYOU", "PLEASE", "YES", "NO", "SEE YOU", "LOOK", "FOOD", "SORRY", "HELP", "LOVE", "FRIEND", "NAME", "ME"] # --- 2. The Real-Time Prediction Function (Updated for TFLite) --- def classify_sign(input_image_data): """Processes a single frame from the live webcam feed using the TFLite interpreter.""" if not model_loaded or input_image_data is None: return "Model Loading Error or Camera Feed Not Active..." # 1. Preprocessing (adjust to model's input: 64x64 grayscale, required for the model) image_resized = tf.image.resize(input_image_data, (64, 64)) image_normalized = image_resized / 255.0 # Convert to grayscale if the input is color (shape[3] == 3) if image_normalized.shape[-1] == 3: image_normalized = tf.image.rgb_to_grayscale(image_normalized) # Add batch dimension (1, 64, 64, 1) input_tensor = np.expand_dims(image_normalized, axis=0) # 2. TFLite Prediction Logic try: # Set the input tensor interpreter.set_tensor(input_details[0]['index'], input_tensor.numpy().astype(np.float32)) # Invoke the model interpreter.invoke() # Get prediction results predictions = interpreter.get_tensor(output_details[0]['index'])[0] # 3. Post-processing predicted_index = np.argmax(predictions) predicted_sign = SIGN_CLASSES[predicted_index] confidence = predictions[predicted_index] * 100 return f"PREDICTED SIGN: {predicted_sign} | Confidence: {confidence:.2f}%" except Exception as e: # This catches runtime errors during invocation return f"Prediction Runtime Error: {e}" # --- 3. The Gradio Interface for Continuous Streaming --- gr.Interface( fn=classify_sign, inputs=gr.Image( sources=['webcam'], type="numpy", shape=(300, 300), label="Live Sign Camera" ), outputs=gr.Textbox(label="Real-Time Translation"), live=True, title="Real-Time Sign Language Translator", description="Show your sign in front of the camera, and the prediction will update instantly.", theme="soft", # FIX for PermissionError and general stability allow_flagging=False ).launch(server_name="0.0.0.0", server_port=7860)