File size: 9,200 Bytes
a72dba2 ec5d10e d8ed5ec a72dba2 d8ed5ec 05db39a ec5d10e a72dba2 d8ed5ec 05db39a d8ed5ec ec5d10e d8ed5ec ec5d10e d8ed5ec ec5d10e d8ed5ec ec5d10e d8ed5ec ec5d10e d8ed5ec ec5d10e d8ed5ec a72dba2 3a0807d d8ed5ec 3a0807d d8ed5ec a72dba2 d8ed5ec ec5d10e d8ed5ec ec5d10e d8ed5ec 3a0807d d8ed5ec a72dba2 d8ed5ec ec5d10e d8ed5ec ec5d10e d8ed5ec ec5d10e d8ed5ec ec5d10e 3a0807d ec5d10e d8ed5ec ec5d10e d8ed5ec ec5d10e d8ed5ec a72dba2 d8ed5ec a72dba2 d8ed5ec a72dba2 3a0807d d8ed5ec a72dba2 3a0807d d8ed5ec 3a0807d d8ed5ec a72dba2 d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d ec5d10e d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec 3a0807d d8ed5ec a72dba2 f640e22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | import gradio as gr
import pickle
import joblib
import cv2
import mediapipe as mp
import numpy as np
from PIL import Image
import warnings
import os
# Suppress sklearn version warnings
warnings.filterwarnings('ignore', category=UserWarning)
# Load the model with multiple fallback options
def load_model():
"""Try loading model from different formats"""
if os.path.exists('./model.joblib'):
print("Loading model from joblib...")
return joblib.load('./model.joblib')
elif os.path.exists('./model_v2.p'):
print("Loading model from model_v2.p...")
with open('./model_v2.p', 'rb') as f:
model_dict = pickle.load(f)
return model_dict['model']
elif os.path.exists('./model.p'):
print("Loading model from model.p...")
with open('./model.p', 'rb') as f:
model_dict = pickle.load(f)
return model_dict['model']
else:
raise FileNotFoundError("No model file found!")
try:
model = load_model()
print("✓ Model loaded successfully!")
except Exception as e:
print(f"✗ Error loading model: {e}")
raise
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
# Initialize hand detection - optimized for speed
hands = mp_hands.Hands(
static_image_mode=False, # False for video/real-time
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
labels_dict = {
0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I',
9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'nothing', 15: 'O', 16: 'P', 17: 'Q',
18: 'R', 19: 'S', 20: 'space', 21: 'T', 22: 'U', 23: 'V', 24: 'W', 25: 'X', 26: 'Y', 27: 'Z'
}
# Store history for smoothing predictions
prediction_history = []
HISTORY_SIZE = 5
def smooth_prediction(new_pred):
"""Smooth predictions to reduce jitter"""
global prediction_history
prediction_history.append(new_pred)
if len(prediction_history) > HISTORY_SIZE:
prediction_history.pop(0)
# Return most common prediction
if prediction_history:
return max(set(prediction_history), key=prediction_history.count)
return new_pred
def predict_sign_realtime(image):
"""Process image and predict sign language character in real-time"""
if image is None:
return None, "No image provided", ""
try:
# Convert PIL Image to numpy array
frame = np.array(image)
# Convert RGB to BGR for OpenCV
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
H, W, _ = frame.shape
# Convert back to RGB for MediaPipe
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Process the frame with MediaPipe
results = hands.process(frame_rgb)
predicted_character = "No hand detected"
confidence_text = ""
if results.multi_hand_landmarks:
data_aux = []
x_all, y_all = [], []
if len(results.multi_hand_landmarks) == 2: # Two-hand sign
for hand_landmarks in results.multi_hand_landmarks:
x_, y_ = [], []
for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y
x_.append(x)
y_.append(y)
x_all.extend(x_)
y_all.extend(y_)
for i in range(len(hand_landmarks.landmark)):
data_aux.append(hand_landmarks.landmark[i].x - min(x_))
data_aux.append(hand_landmarks.landmark[i].y - min(y_))
# Draw hand landmarks
mp_drawing.draw_landmarks(
frame,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style()
)
elif len(results.multi_hand_landmarks) == 1: # One-hand sign
hand_landmarks = results.multi_hand_landmarks[0]
x_, y_ = [], []
for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y
x_.append(x)
y_.append(y)
x_all.extend(x_)
y_all.extend(y_)
for i in range(len(hand_landmarks.landmark)):
data_aux.append(hand_landmarks.landmark[i].x - min(x_))
data_aux.append(hand_landmarks.landmark[i].y - min(y_))
# Pad with zeros to match two-hand format
data_aux.extend([0] * (84 - len(data_aux)))
# Draw hand landmarks
mp_drawing.draw_landmarks(
frame,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style()
)
# Convert to NumPy array and predict
try:
prediction = model.predict([np.asarray(data_aux)])
raw_pred = labels_dict.get(prediction[0], str(prediction[0]))
# Smooth prediction
predicted_character = smooth_prediction(raw_pred)
# Get confidence if available
if hasattr(model, 'predict_proba'):
proba = model.predict_proba([np.asarray(data_aux)])
confidence = np.max(proba) * 100
confidence_text = f"Confidence: {confidence:.1f}%"
except Exception as e:
predicted_character = f"Error: {str(e)}"
print(f"Prediction error: {e}")
# Draw the bounding box and prediction
x1 = int(min(x_all) * W) - 10
y1 = int(min(y_all) * H) - 10
x2 = int(max(x_all) * W) + 10
y2 = int(max(y_all) * H) + 10
# Draw bounding box
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
# Draw prediction text with background
text = predicted_character
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1.5
thickness = 3
# Get text size for background
(text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness)
# Draw black background for text
cv2.rectangle(frame, (x1, y1 - text_height - 20), (x1 + text_width + 10, y1), (0, 0, 0), -1)
# Draw text
cv2.putText(frame, text, (x1 + 5, y1 - 10), font, font_scale, (0, 255, 0), thickness, cv2.LINE_AA)
# Convert BGR back to RGB for display
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
return frame, predicted_character, confidence_text
except Exception as e:
print(f"Error in predict_sign: {e}")
return None, f"Error: {str(e)}", ""
# Create Gradio interface with real-time streaming
with gr.Blocks(title="Sign Language Recognition") as demo:
gr.Markdown(
"""
# 🤟 Real-Time Sign Language Recognition
Show your sign language gesture to the camera for real-time detection!
"""
)
with gr.Row():
with gr.Column():
input_image = gr.Image(
sources=["webcam"],
type="pil",
label="Webcam Feed",
streaming=True # Enable streaming for real-time
)
with gr.Column():
output_image = gr.Image(label="Detected Sign")
predicted_text = gr.Textbox(
label="Predicted Character",
scale=1,
lines=1
)
confidence_text = gr.Textbox(
label="Confidence",
scale=1,
lines=1
)
gr.Markdown(
"""
### Supported Signs
A-Z letters, Space, Nothing
### Tips for better detection:
- Ensure good lighting
- Keep hand in frame
- Make clear gestures
- Hold the sign steady for 1-2 seconds
"""
)
# Set up real-time prediction
input_image.stream(
fn=predict_sign_realtime,
inputs=input_image,
outputs=[output_image, predicted_text, confidence_text],
show_progress=False # Hide progress for smoother experience
)
if __name__ == "__main__":
demo.launch() |