Spaces:
Runtime error
Runtime error
| """ | |
| Arabic Sign Language Recognition API | |
| Optimized for Hugging Face Spaces with Python 3.10 | |
| """ | |
| import gradio as gr | |
| import cv2 | |
| import mediapipe as mp | |
| import numpy as np | |
| import tensorflow as tf | |
| import pickle | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| # Disable TensorFlow warnings | |
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' | |
| tf.get_logger().setLevel('ERROR') | |
| # Global variables | |
| model = None | |
| encoder = None | |
| mp_hands = None | |
| hands = None | |
| def load_model(): | |
| """Load model and encoder from Hugging Face""" | |
| global model, encoder, mp_hands, hands | |
| if model is None: | |
| print("📥 Downloading model from Hugging Face...") | |
| model_path = hf_hub_download( | |
| repo_id="katyy2000/arabic-sign-language-recognition", | |
| filename="asl_mediapipe_new_version.keras" | |
| ) | |
| model = tf.keras.models.load_model(model_path, compile=False) | |
| print("✅ Model loaded!") | |
| if encoder is None: | |
| print("📥 Downloading encoder from Hugging Face...") | |
| encoder_path = hf_hub_download( | |
| repo_id="katyy2000/arabic-sign-language-recognition", | |
| filename="encoder.pkl" | |
| ) | |
| with open(encoder_path, "rb") as f: | |
| encoder = pickle.load(f) | |
| print("✅ Encoder loaded!") | |
| if mp_hands is None: | |
| print("🔧 Initializing MediaPipe...") | |
| mp_hands = mp.solutions.hands | |
| hands = mp_hands.Hands( | |
| static_image_mode=True, | |
| max_num_hands=1, | |
| min_detection_confidence=0.5 | |
| ) | |
| print("✅ MediaPipe ready!") | |
| def predict_sign(image): | |
| """ | |
| Predict sign language from image | |
| Args: | |
| image: numpy array (from Gradio) | |
| Returns: | |
| tuple: (annotated_image, prediction_text, confidence_text) | |
| """ | |
| try: | |
| # Load model if not loaded | |
| load_model() | |
| # Convert BGR to RGB if needed | |
| if len(image.shape) == 3 and image.shape[2] == 3: | |
| image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| else: | |
| image_rgb = image | |
| # Process with MediaPipe | |
| results = hands.process(image_rgb) | |
| if not results.multi_hand_landmarks: | |
| return image, "❌ No hand detected", "Please show your hand clearly in the image" | |
| # Get first hand | |
| hand_landmarks = results.multi_hand_landmarks[0] | |
| # Draw landmarks on image | |
| mp_drawing = mp.solutions.drawing_utils | |
| annotated_image = image.copy() | |
| mp_drawing.draw_landmarks( | |
| annotated_image, | |
| hand_landmarks, | |
| mp.solutions.hands.HAND_CONNECTIONS, | |
| mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=3), | |
| mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2) | |
| ) | |
| # Extract landmarks (63 values: 21 landmarks × 3 coordinates) | |
| landmarks = [] | |
| for lm in hand_landmarks.landmark: | |
| landmarks.extend([lm.x, lm.y, lm.z]) | |
| # Convert to numpy array | |
| landmarks_array = np.array(landmarks, dtype=np.float32).reshape(1, -1) | |
| # Predict | |
| prediction = model.predict(landmarks_array, verbose=0) | |
| predicted_idx = np.argmax(prediction) | |
| predicted_class = encoder.inverse_transform([predicted_idx])[0] | |
| confidence = float(prediction[0][predicted_idx]) | |
| # Get top 5 predictions | |
| top_5_idx = np.argsort(prediction[0])[-5:][::-1] | |
| # Format results | |
| result_text = f"# 🎯 Predicted Sign: **{predicted_class}**" | |
| confidence_text = f"### Confidence: **{confidence:.1%}**\n\n### Top 5 Predictions:\n" | |
| for i, idx in enumerate(top_5_idx, 1): | |
| class_name = encoder.inverse_transform([idx])[0] | |
| conf = float(prediction[0][idx]) | |
| bar = "█" * int(conf * 20) | |
| confidence_text += f"{i}. **{class_name}**: {conf:.1%} {bar}\n" | |
| return annotated_image, result_text, confidence_text | |
| except Exception as e: | |
| return image, f"❌ Error: {str(e)}", "Please try again with a different image" | |
| # Create Gradio interface | |
| with gr.Blocks(title="Arabic Sign Language API", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # 🤟 Arabic Sign Language Recognition API | |
| Upload an image of an Arabic sign language gesture and get instant predictions! | |
| **Supported signs:** Arabic letters, numbers 0-10, and space (43 classes total) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_image = gr.Image( | |
| label="📸 Upload Image", | |
| type="numpy", | |
| height=400 | |
| ) | |
| predict_btn = gr.Button("🔮 Predict Sign", variant="primary", size="lg") | |
| gr.Markdown(""" | |
| ### 💡 Tips for best results: | |
| - ✅ Use good lighting | |
| - ✅ Show only one hand | |
| - ✅ Make the sign clearly | |
| - ✅ Keep hand in center | |
| - ✅ Avoid cluttered backgrounds | |
| """) | |
| with gr.Column(): | |
| output_image = gr.Image( | |
| label="🖐️ Detected Hand Landmarks", | |
| type="numpy", | |
| height=400 | |
| ) | |
| prediction_text = gr.Markdown(label="Prediction") | |
| confidence_text = gr.Markdown(label="Confidence") | |
| # Info section | |
| with gr.Accordion("ℹ️ About this API", open=False): | |
| gr.Markdown(""" | |
| ### Model Information | |
| - **Model**: Multi-Layer Perceptron (MLP) | |
| - **Input**: MediaPipe hand landmarks (21 points × 3 coordinates = 63 features) | |
| - **Output**: 43 classes (Arabic letters, numbers 0-10, space) | |
| - **Framework**: TensorFlow/Keras (CPU optimized) | |
| - **Repository**: [katyy2000/arabic-sign-language-recognition](https://huggingface.co/katyy2000/arabic-sign-language-recognition) | |
| ### How it works | |
| 1. **Hand Detection**: MediaPipe detects hand in the image | |
| 2. **Landmark Extraction**: 21 hand landmarks are extracted | |
| 3. **Prediction**: MLP model predicts the sign | |
| 4. **Result**: Shows predicted sign with confidence scores | |
| ### Supported Classes (43 total) | |
| **Arabic Letters**: أ, ب, ت, ث, ج, ح, خ, د, ذ, ر, ز, س, ش, ص, ض, ط, ظ, ع, غ, ف, ق, ك, ل, م, ن, ه, و, ي | |
| **Numbers**: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 | |
| **Special**: Space | |
| """) | |
| # Connect button to function | |
| predict_btn.click( | |
| fn=predict_sign, | |
| inputs=input_image, | |
| outputs=[output_image, prediction_text, confidence_text] | |
| ) | |
| # Load model on startup | |
| print("="*60) | |
| print("🚀 Starting Arabic Sign Language Recognition API") | |
| print("="*60) | |
| try: | |
| load_model() | |
| print("✅ All models loaded successfully!") | |
| except Exception as e: | |
| print(f"⚠️ Models will load on first prediction: {e}") | |
| print("="*60) | |
| # Launch | |
| if __name__ == "__main__": | |
| demo.launch() | |