TiH0 commited on
Commit
ec5d10e
·
verified ·
1 Parent(s): 05db39a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -84
app.py CHANGED
@@ -1,30 +1,54 @@
1
  import gradio as gr
2
  import pickle
 
3
  import cv2
4
  import mediapipe as mp
5
  import numpy as np
6
  from PIL import Image
7
  import warnings
 
8
 
9
  # Suppress sklearn version warnings
10
  warnings.filterwarnings('ignore', category=UserWarning)
11
 
12
- # Load the model with error handling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  try:
14
- with open('./model.p', 'rb') as f:
15
- model_dict = pickle.load(f)
16
- model = model_dict['model']
17
- print("Model loaded successfully!")
18
  except Exception as e:
19
- print(f"Error loading model: {e}")
20
  raise
21
 
22
  mp_hands = mp.solutions.hands
23
  mp_drawing = mp.solutions.drawing_utils
24
  mp_drawing_styles = mp.solutions.drawing_styles
25
 
26
- # Initialize hand detection
27
- hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
 
 
 
 
 
28
 
29
  labels_dict = {
30
  0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I',
@@ -32,31 +56,78 @@ labels_dict = {
32
  18: 'R', 19: 'S', 20: 'space', 21: 'T', 22: 'U', 23: 'V', 24: 'W', 25: 'X', 26: 'Y', 27: 'Z'
33
  }
34
 
35
- def predict_sign(image):
36
- """Process image and predict sign language character"""
37
-
38
- # Convert PIL Image to numpy array
39
- frame = np.array(image)
40
-
41
- # Convert RGB to BGR for OpenCV
42
- frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
43
-
44
- H, W, _ = frame.shape
45
-
46
- # Convert back to RGB for MediaPipe
47
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
48
 
49
- # Process the frame with MediaPipe
50
- results = hands.process(frame_rgb)
 
 
 
 
 
51
 
52
- predicted_character = "No hand detected"
 
53
 
54
- if results.multi_hand_landmarks:
55
- data_aux = []
56
- x_all, y_all = [], []
57
 
58
- if len(results.multi_hand_landmarks) == 2: # Two-hand sign
59
- for hand_landmarks in results.multi_hand_landmarks:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  x_, y_ = [], []
61
 
62
  for i in range(len(hand_landmarks.landmark)):
@@ -72,6 +143,9 @@ def predict_sign(image):
72
  data_aux.append(hand_landmarks.landmark[i].x - min(x_))
73
  data_aux.append(hand_landmarks.landmark[i].y - min(y_))
74
 
 
 
 
75
  # Draw hand landmarks
76
  mp_drawing.draw_landmarks(
77
  frame,
@@ -80,71 +154,109 @@ def predict_sign(image):
80
  mp_drawing_styles.get_default_hand_landmarks_style(),
81
  mp_drawing_styles.get_default_hand_connections_style()
82
  )
83
-
84
- elif len(results.multi_hand_landmarks) == 1: # One-hand sign
85
- hand_landmarks = results.multi_hand_landmarks[0]
86
- x_, y_ = [], []
87
 
88
- for i in range(len(hand_landmarks.landmark)):
89
- x = hand_landmarks.landmark[i].x
90
- y = hand_landmarks.landmark[i].y
91
- x_.append(x)
92
- y_.append(y)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- x_all.extend(x_)
95
- y_all.extend(y_)
96
 
97
- for i in range(len(hand_landmarks.landmark)):
98
- data_aux.append(hand_landmarks.landmark[i].x - min(x_))
99
- data_aux.append(hand_landmarks.landmark[i].y - min(y_))
 
 
100
 
101
- # Pad with zeros to match two-hand format
102
- data_aux.extend([0] * (84 - len(data_aux)))
103
 
104
- # Draw hand landmarks
105
- mp_drawing.draw_landmarks(
106
- frame,
107
- hand_landmarks,
108
- mp_hands.HAND_CONNECTIONS,
109
- mp_drawing_styles.get_default_hand_landmarks_style(),
110
- mp_drawing_styles.get_default_hand_connections_style()
111
- )
112
 
113
- # Convert to NumPy array and predict
114
- try:
115
- prediction = model.predict([np.asarray(data_aux)])
116
- predicted_character = labels_dict.get(prediction[0], str(prediction[0]))
117
- except Exception as e:
118
- predicted_character = f"Error: {str(e)}"
119
 
120
- # Draw the bounding box and prediction
121
- x1 = int(min(x_all) * W) - 10
122
- y1 = int(min(y_all) * H) - 10
123
- x2 = int(max(x_all) * W) + 10
124
- y2 = int(max(y_all) * H) + 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 4)
127
- cv2.putText(frame, predicted_character, (x1, y1 - 10),
128
- cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3, cv2.LINE_AA)
 
 
 
 
 
 
 
 
 
129
 
130
- # Convert BGR back to RGB for display
131
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 
 
 
 
 
 
 
 
 
 
132
 
133
- return frame, predicted_character
134
-
135
- # Create Gradio interface
136
- demo = gr.Interface(
137
- fn=predict_sign,
138
- inputs=gr.Image(sources=["webcam", "upload"], type="pil", label="Show your sign"),
139
- outputs=[
140
- gr.Image(label="Detected Sign"),
141
- gr.Textbox(label="Predicted Character")
142
- ],
143
- title="Sign Language Recognition",
144
- description="Show a sign language gesture to the camera or upload an image. The model will detect and classify the sign.",
145
- examples=None,
146
- live=True # Enable real-time prediction with webcam
147
- )
148
 
149
  if __name__ == "__main__":
150
  demo.launch()
 
1
  import gradio as gr
2
  import pickle
3
+ import joblib
4
  import cv2
5
  import mediapipe as mp
6
  import numpy as np
7
  from PIL import Image
8
  import warnings
9
+ import os
10
 
11
  # Suppress sklearn version warnings
12
  warnings.filterwarnings('ignore', category=UserWarning)
13
 
14
+ # Load the model with multiple fallback options
15
+ def load_model():
16
+ """Try loading model from different formats"""
17
+
18
+ if os.path.exists('./model.joblib'):
19
+ print("Loading model from joblib...")
20
+ return joblib.load('./model.joblib')
21
+ elif os.path.exists('./model_v2.p'):
22
+ print("Loading model from model_v2.p...")
23
+ with open('./model_v2.p', 'rb') as f:
24
+ model_dict = pickle.load(f)
25
+ return model_dict['model']
26
+ elif os.path.exists('./model.p'):
27
+ print("Loading model from model.p...")
28
+ with open('./model.p', 'rb') as f:
29
+ model_dict = pickle.load(f)
30
+ return model_dict['model']
31
+ else:
32
+ raise FileNotFoundError("No model file found!")
33
+
34
  try:
35
+ model = load_model()
36
+ print("✓ Model loaded successfully!")
 
 
37
  except Exception as e:
38
+ print(f"Error loading model: {e}")
39
  raise
40
 
41
  mp_hands = mp.solutions.hands
42
  mp_drawing = mp.solutions.drawing_utils
43
  mp_drawing_styles = mp.solutions.drawing_styles
44
 
45
+ # Initialize hand detection - optimized for speed
46
+ hands = mp_hands.Hands(
47
+ static_image_mode=False, # False for video/real-time
48
+ max_num_hands=2,
49
+ min_detection_confidence=0.5,
50
+ min_tracking_confidence=0.5
51
+ )
52
 
53
  labels_dict = {
54
  0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I',
 
56
  18: 'R', 19: 'S', 20: 'space', 21: 'T', 22: 'U', 23: 'V', 24: 'W', 25: 'X', 26: 'Y', 27: 'Z'
57
  }
58
 
59
+ # Store history for smoothing predictions
60
+ prediction_history = []
61
+ HISTORY_SIZE = 5
62
+
63
+ def smooth_prediction(new_pred):
64
+ """Smooth predictions to reduce jitter"""
65
+ global prediction_history
66
+ prediction_history.append(new_pred)
67
+ if len(prediction_history) > HISTORY_SIZE:
68
+ prediction_history.pop(0)
 
 
 
69
 
70
+ # Return most common prediction
71
+ if prediction_history:
72
+ return max(set(prediction_history), key=prediction_history.count)
73
+ return new_pred
74
+
75
+ def predict_sign_realtime(image):
76
+ """Process image and predict sign language character in real-time"""
77
 
78
+ if image is None:
79
+ return None, "No image provided", ""
80
 
81
+ try:
82
+ # Convert PIL Image to numpy array
83
+ frame = np.array(image)
84
 
85
+ # Convert RGB to BGR for OpenCV
86
+ frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
87
+
88
+ H, W, _ = frame.shape
89
+
90
+ # Convert back to RGB for MediaPipe
91
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
92
+
93
+ # Process the frame with MediaPipe
94
+ results = hands.process(frame_rgb)
95
+
96
+ predicted_character = "No hand detected"
97
+ confidence_text = ""
98
+
99
+ if results.multi_hand_landmarks:
100
+ data_aux = []
101
+ x_all, y_all = [], []
102
+
103
+ if len(results.multi_hand_landmarks) == 2: # Two-hand sign
104
+ for hand_landmarks in results.multi_hand_landmarks:
105
+ x_, y_ = [], []
106
+
107
+ for i in range(len(hand_landmarks.landmark)):
108
+ x = hand_landmarks.landmark[i].x
109
+ y = hand_landmarks.landmark[i].y
110
+ x_.append(x)
111
+ y_.append(y)
112
+
113
+ x_all.extend(x_)
114
+ y_all.extend(y_)
115
+
116
+ for i in range(len(hand_landmarks.landmark)):
117
+ data_aux.append(hand_landmarks.landmark[i].x - min(x_))
118
+ data_aux.append(hand_landmarks.landmark[i].y - min(y_))
119
+
120
+ # Draw hand landmarks
121
+ mp_drawing.draw_landmarks(
122
+ frame,
123
+ hand_landmarks,
124
+ mp_hands.HAND_CONNECTIONS,
125
+ mp_drawing_styles.get_default_hand_landmarks_style(),
126
+ mp_drawing_styles.get_default_hand_connections_style()
127
+ )
128
+
129
+ elif len(results.multi_hand_landmarks) == 1: # One-hand sign
130
+ hand_landmarks = results.multi_hand_landmarks[0]
131
  x_, y_ = [], []
132
 
133
  for i in range(len(hand_landmarks.landmark)):
 
143
  data_aux.append(hand_landmarks.landmark[i].x - min(x_))
144
  data_aux.append(hand_landmarks.landmark[i].y - min(y_))
145
 
146
+ # Pad with zeros to match two-hand format
147
+ data_aux.extend([0] * (84 - len(data_aux)))
148
+
149
  # Draw hand landmarks
150
  mp_drawing.draw_landmarks(
151
  frame,
 
154
  mp_drawing_styles.get_default_hand_landmarks_style(),
155
  mp_drawing_styles.get_default_hand_connections_style()
156
  )
 
 
 
 
157
 
158
+ # Convert to NumPy array and predict
159
+ try:
160
+ prediction = model.predict([np.asarray(data_aux)])
161
+ raw_pred = labels_dict.get(prediction[0], str(prediction[0]))
162
+
163
+ # Smooth prediction
164
+ predicted_character = smooth_prediction(raw_pred)
165
+
166
+ # Get confidence if available
167
+ if hasattr(model, 'predict_proba'):
168
+ proba = model.predict_proba([np.asarray(data_aux)])
169
+ confidence = np.max(proba) * 100
170
+ confidence_text = f"Confidence: {confidence:.1f}%"
171
+
172
+ except Exception as e:
173
+ predicted_character = f"Error: {str(e)}"
174
+ print(f"Prediction error: {e}")
175
+
176
+ # Draw the bounding box and prediction
177
+ x1 = int(min(x_all) * W) - 10
178
+ y1 = int(min(y_all) * H) - 10
179
+ x2 = int(max(x_all) * W) + 10
180
+ y2 = int(max(y_all) * H) + 10
181
 
182
+ # Draw bounding box
183
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
184
 
185
+ # Draw prediction text with background
186
+ text = predicted_character
187
+ font = cv2.FONT_HERSHEY_SIMPLEX
188
+ font_scale = 1.5
189
+ thickness = 3
190
 
191
+ # Get text size for background
192
+ (text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness)
193
 
194
+ # Draw black background for text
195
+ cv2.rectangle(frame, (x1, y1 - text_height - 20), (x1 + text_width + 10, y1), (0, 0, 0), -1)
196
+
197
+ # Draw text
198
+ cv2.putText(frame, text, (x1 + 5, y1 - 10), font, font_scale, (0, 255, 0), thickness, cv2.LINE_AA)
 
 
 
199
 
200
+ # Convert BGR back to RGB for display
201
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 
 
 
 
202
 
203
+ return frame, predicted_character, confidence_text
204
+
205
+ except Exception as e:
206
+ print(f"Error in predict_sign: {e}")
207
+ return None, f"Error: {str(e)}", ""
208
+
209
+ # Create Gradio interface with real-time streaming
210
+ with gr.Blocks(title="Sign Language Recognition") as demo:
211
+ gr.Markdown(
212
+ """
213
+ # 🤟 Real-Time Sign Language Recognition
214
+ Show your sign language gesture to the camera for real-time detection!
215
+ """
216
+ )
217
+
218
+ with gr.Row():
219
+ with gr.Column():
220
+ input_image = gr.Image(
221
+ sources=["webcam"],
222
+ type="pil",
223
+ label="Webcam Feed",
224
+ streaming=True # Enable streaming for real-time
225
+ )
226
 
227
+ with gr.Column():
228
+ output_image = gr.Image(label="Detected Sign")
229
+ predicted_text = gr.Textbox(
230
+ label="Predicted Character",
231
+ scale=1,
232
+ lines=1
233
+ )
234
+ confidence_text = gr.Textbox(
235
+ label="Confidence",
236
+ scale=1,
237
+ lines=1
238
+ )
239
 
240
+ gr.Markdown(
241
+ """
242
+ ### Supported Signs
243
+ A-Z letters, Space, Nothing
244
+
245
+ ### Tips for better detection:
246
+ - Ensure good lighting
247
+ - Keep hand in frame
248
+ - Make clear gestures
249
+ - Hold the sign steady for 1-2 seconds
250
+ """
251
+ )
252
 
253
+ # Set up real-time prediction
254
+ input_image.stream(
255
+ fn=predict_sign_realtime,
256
+ inputs=input_image,
257
+ outputs=[output_image, predicted_text, confidence_text],
258
+ show_progress=False # Hide progress for smoother experience
259
+ )
 
 
 
 
 
 
 
 
260
 
261
  if __name__ == "__main__":
262
  demo.launch()