Spaces:

katyy2000
/

arabic-sign-language-api

Runtime error

App Files Files Community

arabic-sign-language-api / app.py

katyy2000

CORRECT FIX: Use python_version in README.md YAML only, remove runtime.txt, optimize deps

d29af9d 27 days ago

raw

history blame contribute delete

7.15 kB

	"""
	Arabic Sign Language Recognition API
	Optimized for Hugging Face Spaces with Python 3.10
	"""

	import gradio as gr
	import cv2
	import mediapipe as mp
	import numpy as np
	import tensorflow as tf
	import pickle
	from huggingface_hub import hf_hub_download
	import os

	# Disable TensorFlow warnings
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
	tf.get_logger().setLevel('ERROR')

	# Global variables
	model = None
	encoder = None
	mp_hands = None
	hands = None

	def load_model():
	"""Load model and encoder from Hugging Face"""
	global model, encoder, mp_hands, hands

	if model is None:
	print("📥 Downloading model from Hugging Face...")
	model_path = hf_hub_download(
	repo_id="katyy2000/arabic-sign-language-recognition",
	filename="asl_mediapipe_new_version.keras"
	)
	model = tf.keras.models.load_model(model_path, compile=False)
	print("✅ Model loaded!")

	if encoder is None:
	print("📥 Downloading encoder from Hugging Face...")
	encoder_path = hf_hub_download(
	repo_id="katyy2000/arabic-sign-language-recognition",
	filename="encoder.pkl"
	)
	with open(encoder_path, "rb") as f:
	encoder = pickle.load(f)
	print("✅ Encoder loaded!")

	if mp_hands is None:
	print("🔧 Initializing MediaPipe...")
	mp_hands = mp.solutions.hands
	hands = mp_hands.Hands(
	static_image_mode=True,
	max_num_hands=1,
	min_detection_confidence=0.5
	)
	print("✅ MediaPipe ready!")

	def predict_sign(image):
	"""
	Predict sign language from image

	Args:
	image: numpy array (from Gradio)

	Returns:
	tuple: (annotated_image, prediction_text, confidence_text)
	"""
	try:
	# Load model if not loaded
	load_model()

	# Convert BGR to RGB if needed
	if len(image.shape) == 3 and image.shape[2] == 3:
	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	else:
	image_rgb = image

	# Process with MediaPipe
	results = hands.process(image_rgb)

	if not results.multi_hand_landmarks:
	return image, "❌ No hand detected", "Please show your hand clearly in the image"

	# Get first hand
	hand_landmarks = results.multi_hand_landmarks[0]

	# Draw landmarks on image
	mp_drawing = mp.solutions.drawing_utils
	annotated_image = image.copy()
	mp_drawing.draw_landmarks(
	annotated_image,
	hand_landmarks,
	mp.solutions.hands.HAND_CONNECTIONS,
	mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=3),
	mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2)
	)

	# Extract landmarks (63 values: 21 landmarks × 3 coordinates)
	landmarks = []
	for lm in hand_landmarks.landmark:
	landmarks.extend([lm.x, lm.y, lm.z])

	# Convert to numpy array
	landmarks_array = np.array(landmarks, dtype=np.float32).reshape(1, -1)

	# Predict
	prediction = model.predict(landmarks_array, verbose=0)
	predicted_idx = np.argmax(prediction)
	predicted_class = encoder.inverse_transform([predicted_idx])[0]
	confidence = float(prediction[0][predicted_idx])

	# Get top 5 predictions
	top_5_idx = np.argsort(prediction[0])[-5:][::-1]

	# Format results
	result_text = f"# 🎯 Predicted Sign: {predicted_class}"

	confidence_text = f"### Confidence: {confidence:.1%}\n\n### Top 5 Predictions:\n"
	for i, idx in enumerate(top_5_idx, 1):
	class_name = encoder.inverse_transform([idx])[0]
	conf = float(prediction[0][idx])
	bar = "█" * int(conf * 20)
	confidence_text += f"{i}. {class_name}: {conf:.1%} {bar}\n"

	return annotated_image, result_text, confidence_text

	except Exception as e:
	return image, f"❌ Error: {str(e)}", "Please try again with a different image"

	# Create Gradio interface
	with gr.Blocks(title="Arabic Sign Language API", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🤟 Arabic Sign Language Recognition API

	Upload an image of an Arabic sign language gesture and get instant predictions!

	Supported signs: Arabic letters, numbers 0-10, and space (43 classes total)
	""")

	with gr.Row():
	with gr.Column():
	input_image = gr.Image(
	label="📸 Upload Image",
	type="numpy",
	height=400
	)
	predict_btn = gr.Button("🔮 Predict Sign", variant="primary", size="lg")

	gr.Markdown("""
	### 💡 Tips for best results:
	- ✅ Use good lighting
	- ✅ Show only one hand
	- ✅ Make the sign clearly
	- ✅ Keep hand in center
	- ✅ Avoid cluttered backgrounds
	""")

	with gr.Column():
	output_image = gr.Image(
	label="🖐️ Detected Hand Landmarks",
	type="numpy",
	height=400
	)
	prediction_text = gr.Markdown(label="Prediction")
	confidence_text = gr.Markdown(label="Confidence")

	# Info section
	with gr.Accordion("ℹ️ About this API", open=False):
	gr.Markdown("""
	### Model Information

	- Model: Multi-Layer Perceptron (MLP)
	- Input: MediaPipe hand landmarks (21 points × 3 coordinates = 63 features)
	- Output: 43 classes (Arabic letters, numbers 0-10, space)
	- Framework: TensorFlow/Keras (CPU optimized)
	- Repository: [katyy2000/arabic-sign-language-recognition](https://huggingface.co/katyy2000/arabic-sign-language-recognition)

	### How it works

	1. Hand Detection: MediaPipe detects hand in the image
	2. Landmark Extraction: 21 hand landmarks are extracted
	3. Prediction: MLP model predicts the sign
	4. Result: Shows predicted sign with confidence scores

	### Supported Classes (43 total)

	Arabic Letters: أ, ب, ت, ث, ج, ح, خ, د, ذ, ر, ز, س, ش, ص, ض, ط, ظ, ع, غ, ف, ق, ك, ل, م, ن, ه, و, ي

	Numbers: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

	Special: Space
	""")

	# Connect button to function
	predict_btn.click(
	fn=predict_sign,
	inputs=input_image,
	outputs=[output_image, prediction_text, confidence_text]
	)

	# Load model on startup
	print("="*60)
	print("🚀 Starting Arabic Sign Language Recognition API")
	print("="*60)

	try:
	load_model()
	print("✅ All models loaded successfully!")
	except Exception as e:
	print(f"⚠️ Models will load on first prediction: {e}")

	print("="*60)

	# Launch
	if __name__ == "__main__":
	demo.launch()