""" Arabic Sign Language Recognition API Optimized for Hugging Face Spaces with Python 3.10 """ import gradio as gr import cv2 import mediapipe as mp import numpy as np import tensorflow as tf import pickle from huggingface_hub import hf_hub_download import os # Disable TensorFlow warnings os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.get_logger().setLevel('ERROR') # Global variables model = None encoder = None mp_hands = None hands = None def load_model(): """Load model and encoder from Hugging Face""" global model, encoder, mp_hands, hands if model is None: print("📥 Downloading model from Hugging Face...") model_path = hf_hub_download( repo_id="katyy2000/arabic-sign-language-recognition", filename="asl_mediapipe_new_version.keras" ) model = tf.keras.models.load_model(model_path, compile=False) print("✅ Model loaded!") if encoder is None: print("📥 Downloading encoder from Hugging Face...") encoder_path = hf_hub_download( repo_id="katyy2000/arabic-sign-language-recognition", filename="encoder.pkl" ) with open(encoder_path, "rb") as f: encoder = pickle.load(f) print("✅ Encoder loaded!") if mp_hands is None: print("🔧 Initializing MediaPipe...") mp_hands = mp.solutions.hands hands = mp_hands.Hands( static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5 ) print("✅ MediaPipe ready!") def predict_sign(image): """ Predict sign language from image Args: image: numpy array (from Gradio) Returns: tuple: (annotated_image, prediction_text, confidence_text) """ try: # Load model if not loaded load_model() # Convert BGR to RGB if needed if len(image.shape) == 3 and image.shape[2] == 3: image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) else: image_rgb = image # Process with MediaPipe results = hands.process(image_rgb) if not results.multi_hand_landmarks: return image, "❌ No hand detected", "Please show your hand clearly in the image" # Get first hand hand_landmarks = results.multi_hand_landmarks[0] # Draw landmarks on image mp_drawing = mp.solutions.drawing_utils annotated_image = image.copy() mp_drawing.draw_landmarks( annotated_image, hand_landmarks, mp.solutions.hands.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=3), mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2) ) # Extract landmarks (63 values: 21 landmarks × 3 coordinates) landmarks = [] for lm in hand_landmarks.landmark: landmarks.extend([lm.x, lm.y, lm.z]) # Convert to numpy array landmarks_array = np.array(landmarks, dtype=np.float32).reshape(1, -1) # Predict prediction = model.predict(landmarks_array, verbose=0) predicted_idx = np.argmax(prediction) predicted_class = encoder.inverse_transform([predicted_idx])[0] confidence = float(prediction[0][predicted_idx]) # Get top 5 predictions top_5_idx = np.argsort(prediction[0])[-5:][::-1] # Format results result_text = f"# 🎯 Predicted Sign: **{predicted_class}**" confidence_text = f"### Confidence: **{confidence:.1%}**\n\n### Top 5 Predictions:\n" for i, idx in enumerate(top_5_idx, 1): class_name = encoder.inverse_transform([idx])[0] conf = float(prediction[0][idx]) bar = "█" * int(conf * 20) confidence_text += f"{i}. **{class_name}**: {conf:.1%} {bar}\n" return annotated_image, result_text, confidence_text except Exception as e: return image, f"❌ Error: {str(e)}", "Please try again with a different image" # Create Gradio interface with gr.Blocks(title="Arabic Sign Language API", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🤟 Arabic Sign Language Recognition API Upload an image of an Arabic sign language gesture and get instant predictions! **Supported signs:** Arabic letters, numbers 0-10, and space (43 classes total) """) with gr.Row(): with gr.Column(): input_image = gr.Image( label="📸 Upload Image", type="numpy", height=400 ) predict_btn = gr.Button("🔮 Predict Sign", variant="primary", size="lg") gr.Markdown(""" ### 💡 Tips for best results: - ✅ Use good lighting - ✅ Show only one hand - ✅ Make the sign clearly - ✅ Keep hand in center - ✅ Avoid cluttered backgrounds """) with gr.Column(): output_image = gr.Image( label="🖐️ Detected Hand Landmarks", type="numpy", height=400 ) prediction_text = gr.Markdown(label="Prediction") confidence_text = gr.Markdown(label="Confidence") # Info section with gr.Accordion("ℹ️ About this API", open=False): gr.Markdown(""" ### Model Information - **Model**: Multi-Layer Perceptron (MLP) - **Input**: MediaPipe hand landmarks (21 points × 3 coordinates = 63 features) - **Output**: 43 classes (Arabic letters, numbers 0-10, space) - **Framework**: TensorFlow/Keras (CPU optimized) - **Repository**: [katyy2000/arabic-sign-language-recognition](https://huggingface.co/katyy2000/arabic-sign-language-recognition) ### How it works 1. **Hand Detection**: MediaPipe detects hand in the image 2. **Landmark Extraction**: 21 hand landmarks are extracted 3. **Prediction**: MLP model predicts the sign 4. **Result**: Shows predicted sign with confidence scores ### Supported Classes (43 total) **Arabic Letters**: أ, ب, ت, ث, ج, ح, خ, د, ذ, ر, ز, س, ش, ص, ض, ط, ظ, ع, غ, ف, ق, ك, ل, م, ن, ه, و, ي **Numbers**: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 **Special**: Space """) # Connect button to function predict_btn.click( fn=predict_sign, inputs=input_image, outputs=[output_image, prediction_text, confidence_text] ) # Load model on startup print("="*60) print("🚀 Starting Arabic Sign Language Recognition API") print("="*60) try: load_model() print("✅ All models loaded successfully!") except Exception as e: print(f"⚠️ Models will load on first prediction: {e}") print("="*60) # Launch if __name__ == "__main__": demo.launch()