"""
Arabic Sign Language Recognition API
Optimized for Hugging Face Spaces with Python 3.10
"""

import gradio as gr
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import pickle
from huggingface_hub import hf_hub_download
import os

# Disable TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.get_logger().setLevel('ERROR')

# Global variables
model = None
encoder = None
mp_hands = None
hands = None

def load_model():
    """Load model and encoder from Hugging Face"""
    global model, encoder, mp_hands, hands
    
    if model is None:
        print("📥 Downloading model from Hugging Face...")
        model_path = hf_hub_download(
            repo_id="katyy2000/arabic-sign-language-recognition",
            filename="asl_mediapipe_new_version.keras"
        )
        model = tf.keras.models.load_model(model_path, compile=False)
        print("✅ Model loaded!")
    
    if encoder is None:
        print("📥 Downloading encoder from Hugging Face...")
        encoder_path = hf_hub_download(
            repo_id="katyy2000/arabic-sign-language-recognition",
            filename="encoder.pkl"
        )
        with open(encoder_path, "rb") as f:
            encoder = pickle.load(f)
        print("✅ Encoder loaded!")
    
    if mp_hands is None:
        print("🔧 Initializing MediaPipe...")
        mp_hands = mp.solutions.hands
        hands = mp_hands.Hands(
            static_image_mode=True,
            max_num_hands=1,
            min_detection_confidence=0.5
        )
        print("✅ MediaPipe ready!")

def predict_sign(image):
    """
    Predict sign language from image
    
    Args:
        image: numpy array (from Gradio)
    
    Returns:
        tuple: (annotated_image, prediction_text, confidence_text)
    """
    try:
        # Load model if not loaded
        load_model()
        
        # Convert BGR to RGB if needed
        if len(image.shape) == 3 and image.shape[2] == 3:
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        else:
            image_rgb = image
        
        # Process with MediaPipe
        results = hands.process(image_rgb)
        
        if not results.multi_hand_landmarks:
            return image, "❌ No hand detected", "Please show your hand clearly in the image"
        
        # Get first hand
        hand_landmarks = results.multi_hand_landmarks[0]
        
        # Draw landmarks on image
        mp_drawing = mp.solutions.drawing_utils
        annotated_image = image.copy()
        mp_drawing.draw_landmarks(
            annotated_image,
            hand_landmarks,
            mp.solutions.hands.HAND_CONNECTIONS,
            mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=3),
            mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2)
        )
        
        # Extract landmarks (63 values: 21 landmarks × 3 coordinates)
        landmarks = []
        for lm in hand_landmarks.landmark:
            landmarks.extend([lm.x, lm.y, lm.z])
        
        # Convert to numpy array
        landmarks_array = np.array(landmarks, dtype=np.float32).reshape(1, -1)
        
        # Predict
        prediction = model.predict(landmarks_array, verbose=0)
        predicted_idx = np.argmax(prediction)
        predicted_class = encoder.inverse_transform([predicted_idx])[0]
        confidence = float(prediction[0][predicted_idx])
        
        # Get top 5 predictions
        top_5_idx = np.argsort(prediction[0])[-5:][::-1]
        
        # Format results
        result_text = f"# 🎯 Predicted Sign: **{predicted_class}**"
        
        confidence_text = f"### Confidence: **{confidence:.1%}**\n\n### Top 5 Predictions:\n"
        for i, idx in enumerate(top_5_idx, 1):
            class_name = encoder.inverse_transform([idx])[0]
            conf = float(prediction[0][idx])
            bar = "█" * int(conf * 20)
            confidence_text += f"{i}. **{class_name}**: {conf:.1%} {bar}\n"
        
        return annotated_image, result_text, confidence_text
        
    except Exception as e:
        return image, f"❌ Error: {str(e)}", "Please try again with a different image"

# Create Gradio interface
with gr.Blocks(title="Arabic Sign Language API", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🤟 Arabic Sign Language Recognition API
    
    Upload an image of an Arabic sign language gesture and get instant predictions!
    
    **Supported signs:** Arabic letters, numbers 0-10, and space (43 classes total)
    """)
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(
                label="📸 Upload Image",
                type="numpy",
                height=400
            )
            predict_btn = gr.Button("🔮 Predict Sign", variant="primary", size="lg")
            
            gr.Markdown("""
            ### 💡 Tips for best results:
            - ✅ Use good lighting
            - ✅ Show only one hand
            - ✅ Make the sign clearly
            - ✅ Keep hand in center
            - ✅ Avoid cluttered backgrounds
            """)
        
        with gr.Column():
            output_image = gr.Image(
                label="🖐️ Detected Hand Landmarks",
                type="numpy",
                height=400
            )
            prediction_text = gr.Markdown(label="Prediction")
            confidence_text = gr.Markdown(label="Confidence")
    
    # Info section
    with gr.Accordion("ℹ️ About this API", open=False):
        gr.Markdown("""
        ### Model Information
        
        - **Model**: Multi-Layer Perceptron (MLP)
        - **Input**: MediaPipe hand landmarks (21 points × 3 coordinates = 63 features)
        - **Output**: 43 classes (Arabic letters, numbers 0-10, space)
        - **Framework**: TensorFlow/Keras (CPU optimized)
        - **Repository**: [katyy2000/arabic-sign-language-recognition](https://huggingface.co/katyy2000/arabic-sign-language-recognition)
        
        ### How it works
        
        1. **Hand Detection**: MediaPipe detects hand in the image
        2. **Landmark Extraction**: 21 hand landmarks are extracted
        3. **Prediction**: MLP model predicts the sign
        4. **Result**: Shows predicted sign with confidence scores
        
        ### Supported Classes (43 total)
        
        **Arabic Letters**: أ, ب, ت, ث, ج, ح, خ, د, ذ, ر, ز, س, ش, ص, ض, ط, ظ, ع, غ, ف, ق, ك, ل, م, ن, ه, و, ي
        
        **Numbers**: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
        
        **Special**: Space
        """)
    
    # Connect button to function
    predict_btn.click(
        fn=predict_sign,
        inputs=input_image,
        outputs=[output_image, prediction_text, confidence_text]
    )

# Load model on startup
print("="*60)
print("🚀 Starting Arabic Sign Language Recognition API")
print("="*60)

try:
    load_model()
    print("✅ All models loaded successfully!")
except Exception as e:
    print(f"⚠️ Models will load on first prediction: {e}")

print("="*60)

# Launch
if __name__ == "__main__":
    demo.launch()