katyy2000's picture
CORRECT FIX: Use python_version in README.md YAML only, remove runtime.txt, optimize deps
d29af9d
"""
Arabic Sign Language Recognition API
Optimized for Hugging Face Spaces with Python 3.10
"""
import gradio as gr
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import pickle
from huggingface_hub import hf_hub_download
import os
# Disable TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.get_logger().setLevel('ERROR')
# Global variables
model = None
encoder = None
mp_hands = None
hands = None
def load_model():
"""Load model and encoder from Hugging Face"""
global model, encoder, mp_hands, hands
if model is None:
print("📥 Downloading model from Hugging Face...")
model_path = hf_hub_download(
repo_id="katyy2000/arabic-sign-language-recognition",
filename="asl_mediapipe_new_version.keras"
)
model = tf.keras.models.load_model(model_path, compile=False)
print("✅ Model loaded!")
if encoder is None:
print("📥 Downloading encoder from Hugging Face...")
encoder_path = hf_hub_download(
repo_id="katyy2000/arabic-sign-language-recognition",
filename="encoder.pkl"
)
with open(encoder_path, "rb") as f:
encoder = pickle.load(f)
print("✅ Encoder loaded!")
if mp_hands is None:
print("🔧 Initializing MediaPipe...")
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
static_image_mode=True,
max_num_hands=1,
min_detection_confidence=0.5
)
print("✅ MediaPipe ready!")
def predict_sign(image):
"""
Predict sign language from image
Args:
image: numpy array (from Gradio)
Returns:
tuple: (annotated_image, prediction_text, confidence_text)
"""
try:
# Load model if not loaded
load_model()
# Convert BGR to RGB if needed
if len(image.shape) == 3 and image.shape[2] == 3:
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
else:
image_rgb = image
# Process with MediaPipe
results = hands.process(image_rgb)
if not results.multi_hand_landmarks:
return image, "❌ No hand detected", "Please show your hand clearly in the image"
# Get first hand
hand_landmarks = results.multi_hand_landmarks[0]
# Draw landmarks on image
mp_drawing = mp.solutions.drawing_utils
annotated_image = image.copy()
mp_drawing.draw_landmarks(
annotated_image,
hand_landmarks,
mp.solutions.hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=3),
mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2)
)
# Extract landmarks (63 values: 21 landmarks × 3 coordinates)
landmarks = []
for lm in hand_landmarks.landmark:
landmarks.extend([lm.x, lm.y, lm.z])
# Convert to numpy array
landmarks_array = np.array(landmarks, dtype=np.float32).reshape(1, -1)
# Predict
prediction = model.predict(landmarks_array, verbose=0)
predicted_idx = np.argmax(prediction)
predicted_class = encoder.inverse_transform([predicted_idx])[0]
confidence = float(prediction[0][predicted_idx])
# Get top 5 predictions
top_5_idx = np.argsort(prediction[0])[-5:][::-1]
# Format results
result_text = f"# 🎯 Predicted Sign: **{predicted_class}**"
confidence_text = f"### Confidence: **{confidence:.1%}**\n\n### Top 5 Predictions:\n"
for i, idx in enumerate(top_5_idx, 1):
class_name = encoder.inverse_transform([idx])[0]
conf = float(prediction[0][idx])
bar = "█" * int(conf * 20)
confidence_text += f"{i}. **{class_name}**: {conf:.1%} {bar}\n"
return annotated_image, result_text, confidence_text
except Exception as e:
return image, f"❌ Error: {str(e)}", "Please try again with a different image"
# Create Gradio interface
with gr.Blocks(title="Arabic Sign Language API", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# 🤟 Arabic Sign Language Recognition API
Upload an image of an Arabic sign language gesture and get instant predictions!
**Supported signs:** Arabic letters, numbers 0-10, and space (43 classes total)
""")
with gr.Row():
with gr.Column():
input_image = gr.Image(
label="📸 Upload Image",
type="numpy",
height=400
)
predict_btn = gr.Button("🔮 Predict Sign", variant="primary", size="lg")
gr.Markdown("""
### 💡 Tips for best results:
- ✅ Use good lighting
- ✅ Show only one hand
- ✅ Make the sign clearly
- ✅ Keep hand in center
- ✅ Avoid cluttered backgrounds
""")
with gr.Column():
output_image = gr.Image(
label="🖐️ Detected Hand Landmarks",
type="numpy",
height=400
)
prediction_text = gr.Markdown(label="Prediction")
confidence_text = gr.Markdown(label="Confidence")
# Info section
with gr.Accordion("ℹ️ About this API", open=False):
gr.Markdown("""
### Model Information
- **Model**: Multi-Layer Perceptron (MLP)
- **Input**: MediaPipe hand landmarks (21 points × 3 coordinates = 63 features)
- **Output**: 43 classes (Arabic letters, numbers 0-10, space)
- **Framework**: TensorFlow/Keras (CPU optimized)
- **Repository**: [katyy2000/arabic-sign-language-recognition](https://huggingface.co/katyy2000/arabic-sign-language-recognition)
### How it works
1. **Hand Detection**: MediaPipe detects hand in the image
2. **Landmark Extraction**: 21 hand landmarks are extracted
3. **Prediction**: MLP model predicts the sign
4. **Result**: Shows predicted sign with confidence scores
### Supported Classes (43 total)
**Arabic Letters**: أ, ب, ت, ث, ج, ح, خ, د, ذ, ر, ز, س, ش, ص, ض, ط, ظ, ع, غ, ف, ق, ك, ل, م, ن, ه, و, ي
**Numbers**: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
**Special**: Space
""")
# Connect button to function
predict_btn.click(
fn=predict_sign,
inputs=input_image,
outputs=[output_image, prediction_text, confidence_text]
)
# Load model on startup
print("="*60)
print("🚀 Starting Arabic Sign Language Recognition API")
print("="*60)
try:
load_model()
print("✅ All models loaded successfully!")
except Exception as e:
print(f"⚠️ Models will load on first prediction: {e}")
print("="*60)
# Launch
if __name__ == "__main__":
demo.launch()