import gradio as gr
import tensorflow as tf
import numpy as np
import os

# --- 1. Model Loading and Classes Configuration ---
interpreter = None
model_loaded = False
input_details = None
output_details = None
MODEL_PATH = '/tmp/sign_language_model_lite.tflite'

try:
    if os.path.exists(MODEL_PATH):
        # Load the TFLite model file instead of the heavy H5 file
        interpreter = tf.lite.Interpreter(model_path=MODEL_PATH)
        interpreter.allocate_tensors()
        
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        model_loaded = True
        print("SUCCESS: TFLite Model loaded successfully.")
    else:
        print(f"ERROR: Model file not found at {MODEL_PATH}")
        
except Exception as e:
    print(f"FATAL ERROR: Failed to initialize TFLite interpreter: {e}")
    model_loaded = False 

# Your Specific Sign Language Classes (Order MUST match your training labels!)
SIGN_CLASSES = ["HELLO", "GOOD BYE", "THANKYOU", "PLEASE", "YES", "NO", "SEE YOU", "LOOK", "FOOD", "SORRY", "HELP", "LOVE", "FRIEND", "NAME", "ME"] 

# --- 2. The Real-Time Prediction Function (Updated for TFLite) ---
def classify_sign(input_image_data):
    """Processes a single frame from the live webcam feed using the TFLite interpreter."""
    
    if not model_loaded or input_image_data is None:
        return "Model Loading Error or Camera Feed Not Active..."

    # 1. Preprocessing (adjust to model's input: 64x64 grayscale, required for the model)
    image_resized = tf.image.resize(input_image_data, (64, 64)) 
    image_normalized = image_resized / 255.0

    # Convert to grayscale if the input is color (shape[3] == 3)
    if image_normalized.shape[-1] == 3: 
        image_normalized = tf.image.rgb_to_grayscale(image_normalized)
        
    # Add batch dimension (1, 64, 64, 1)
    input_tensor = np.expand_dims(image_normalized, axis=0)
    
    # 2. TFLite Prediction Logic
    try:
        # Set the input tensor
        interpreter.set_tensor(input_details[0]['index'], input_tensor.numpy().astype(np.float32))
        
        # Invoke the model
        interpreter.invoke()
        
        # Get prediction results
        predictions = interpreter.get_tensor(output_details[0]['index'])[0]
        
        # 3. Post-processing
        predicted_index = np.argmax(predictions)
        predicted_sign = SIGN_CLASSES[predicted_index]
        confidence = predictions[predicted_index] * 100 

        return f"PREDICTED SIGN: {predicted_sign} | Confidence: {confidence:.2f}%"
    
    except Exception as e:
        # This catches runtime errors during invocation
        return f"Prediction Runtime Error: {e}"


# --- 3. The Gradio Interface for Continuous Streaming ---
gr.Interface(
    fn=classify_sign,
    inputs=gr.Image(
        sources=['webcam'],
        type="numpy",
        shape=(300, 300),
        label="Live Sign Camera"
    ),
    outputs=gr.Textbox(label="Real-Time Translation"), 
    live=True, 
    title="Real-Time Sign Language Translator",
    description="Show your sign in front of the camera, and the prediction will update instantly.",
    theme="soft",
    # FIX for PermissionError and general stability
    allow_flagging=False 
).launch(server_name="0.0.0.0", server_port=7860)