Miniproj / app.py
Par24's picture
Update app.py
9fdeca4 verified
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
from huggingface_hub import hf_hub_download
import gradio as gr
# Download model from Hugging Face
model_path = hf_hub_download(repo_id="Par24/sign-language-model", filename="saved_model.h5")
model = tf.keras.models.load_model(model_path)
# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1)
# List of class labels (update according to your model)
class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'X', 'Y', '1', '2 OR V', '3', '4', '5', '6 OR W', '7', '8', '9', '10', 'again', 'bye', 'eat', 'favourite', 'fine', 'friend', 'go', 'have', 'help', 'I love you', 'later', 'like', 'me', 'meet', 'no', 'please', 'ready', 'school', 'slow', 'take care', 'teacher', 'thanks', 'want', 'yes', 'you'] # Example: Update with your actual classes
def detect_and_predict(frame):
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = hands.process(image)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
# Get bounding box from landmarks
h, w, _ = frame.shape
x_min = int(min([lm.x for lm in hand_landmarks.landmark]) * w)
y_min = int(min([lm.y for lm in hand_landmarks.landmark]) * h)
x_max = int(max([lm.x for lm in hand_landmarks.landmark]) * w)
y_max = int(max([lm.y for lm in hand_landmarks.landmark]) * h)
# Add padding and clip to image bounds
padding = 20
x_min = max(x_min - padding, 0)
y_min = max(y_min - padding, 0)
x_max = min(x_max + padding, w)
y_max = min(y_max + padding, h)
# Crop and preprocess the hand region
hand_img = frame[y_min:y_max, x_min:x_max]
if hand_img.size == 0:
continue # Avoid errors on empty images
hand_img = cv2.resize(hand_img, (224, 224))
hand_img = hand_img.astype('float32') / 255.0
hand_img = np.expand_dims(hand_img, axis=0)
# Make prediction
prediction = model.predict(hand_img)
class_id = np.argmax(prediction)
confidence = prediction[0][class_id]
label = f"{class_names[class_id]} ({confidence*100:.1f}%)"
# Draw bounding box and label
cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
cv2.putText(frame, label, (x_min, y_min - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
# Draw landmarks (optional)
mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
return frame
# Gradio Interface
app = gr.Interface(fn=detect_and_predict,
inputs=gr.Image(source="webcam", streaming=True),
outputs="image",
live=True,
title="Real-Time ASL Sign Detection",
description="Using webcam to detect and classify hand signs in real-time.")
app.launch()