d2j666 commited on
Commit
4705494
·
1 Parent(s): d8fdc96

Integrate ML models with model selector dropdown

Browse files

- Add model selector with 4 options (B4, B7, B9, MediaPipe)

Files changed (1) hide show
  1. app.py +85 -32
app.py CHANGED
@@ -2,12 +2,34 @@ import gradio as gr
2
  import cv2
3
  import numpy as np
4
  from model import ASLDetector
 
5
 
6
- detector = ASLDetector()
 
7
 
8
- def detect_asl(image):
9
- """Process image and detect ASL gesture."""
10
- print(f"[INFO] detect_asl called - image type: {type(image)}, is None: {image is None}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  if image is None or not isinstance(image, np.ndarray):
13
  print(f"[WARN] Invalid input - rejecting image")
@@ -23,36 +45,66 @@ def detect_asl(image):
23
  image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
24
  print(f"[INFO] Converted RGBA to RGB")
25
 
26
- # Process image
27
- annotated_image, letter, confidence = detector.process_frame(image)
28
- print(f"[INFO] Detection result - letter: {letter}, confidence: {confidence}")
29
-
30
- # Create result message
31
- if letter and letter != "Unknown":
32
- result = f"Detected: {letter} (Confidence: {confidence:.2f})"
33
- elif letter == "Unknown":
34
- result = "Hand detected but gesture not recognized. Try: A, V, B, 1, or W"
35
- else:
36
- result = "No hand detected. Please show a clear hand gesture."
37
-
38
- print(f"[INFO] Returning result: {result}")
39
- return annotated_image, result
40
-
41
-
42
- # Create Gradio interface with tabs
 
 
 
 
 
 
 
 
 
 
 
 
43
  with gr.Blocks(title="ASL Hand Detection System") as demo:
44
  gr.Markdown("""
45
  # ASL Hand Detection System
46
- American Sign Language hand gesture detection using MediaPipe.
47
-
48
- **Supported Gestures:**
49
- - A: Closed fist
50
- - V: Peace sign (index and middle fingers extended)
51
- - B: All fingers extended, thumb tucked
52
- - 1: Index finger only extended
53
- - W: Index, middle, and ring fingers extended
 
 
54
  """)
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  with gr.Tabs():
57
  with gr.Tab("Take a Picture"):
58
  with gr.Row():
@@ -71,7 +123,7 @@ with gr.Blocks(title="ASL Hand Detection System") as demo:
71
 
72
  webcam_btn.click(
73
  fn=detect_asl,
74
- inputs=webcam_input,
75
  outputs=[webcam_output, webcam_result]
76
  )
77
 
@@ -92,7 +144,7 @@ with gr.Blocks(title="ASL Hand Detection System") as demo:
92
 
93
  upload_btn.click(
94
  fn=detect_asl,
95
- inputs=upload_input,
96
  outputs=[upload_output, upload_result]
97
  )
98
 
@@ -113,13 +165,14 @@ with gr.Blocks(title="ASL Hand Detection System") as demo:
113
 
114
  stream_input.stream(
115
  fn=detect_asl,
116
- inputs=stream_input,
117
  outputs=[stream_output, stream_result]
118
  )
119
 
120
  if __name__ == "__main__":
121
  try:
122
  print("[INFO] Starting ASL Hand Detection System...")
 
123
  demo.launch()
124
  except KeyboardInterrupt:
125
  print("\n[INFO] Shutting down gracefully...")
 
2
  import cv2
3
  import numpy as np
4
  from model import ASLDetector
5
+ from model_ml import ASLDetectorML
6
 
7
+ # Global detector cache for lazy loading
8
+ _detector_cache = {}
9
 
10
+
11
+ def get_detector(model_choice):
12
+ """Get or create detector instance with lazy loading and caching."""
13
+ global _detector_cache
14
+
15
+ # Check if detector is already cached
16
+ if model_choice in _detector_cache:
17
+ return _detector_cache[model_choice]
18
+
19
+ # Create new detector instance
20
+ print(f"[INFO] Creating new detector: {model_choice}")
21
+
22
+ detector = ASLDetector() if model_choice == "MediaPipe (Rule-based)" else ASLDetectorML(model_name=model_choice)
23
+
24
+ # Cache for future use
25
+ _detector_cache[model_choice] = detector
26
+
27
+ return detector
28
+
29
+
30
+ def detect_asl(image, model_choice):
31
+ """Process image and detect ASL gesture using selected model."""
32
+ print(f"[INFO] detect_asl called - model: {model_choice}, image type: {type(image)}, is None: {image is None}")
33
 
34
  if image is None or not isinstance(image, np.ndarray):
35
  print(f"[WARN] Invalid input - rejecting image")
 
45
  image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
46
  print(f"[INFO] Converted RGBA to RGB")
47
 
48
+ try:
49
+ # Get or create detector (lazy loading)
50
+ detector = get_detector(model_choice)
51
+
52
+ # Process image
53
+ annotated_image, letter, confidence = detector.process_frame(image)
54
+ print(f"[INFO] Detection result - letter: {letter}, confidence: {confidence}")
55
+
56
+ # Create result message
57
+ if letter and letter != "Unknown":
58
+ result = f"Detected: {letter} (Confidence: {confidence:.2f})\nModel: {model_choice}"
59
+ elif letter == "Unknown":
60
+ if model_choice == "MediaPipe (Rule-based)":
61
+ result = "Hand detected but gesture not recognized. Try: A, V, B, 1, or W"
62
+ else:
63
+ result = f"Hand detected but gesture not recognized.\nModel: {model_choice}"
64
+ else:
65
+ result = "No hand detected. Please show a clear hand gesture."
66
+
67
+ print(f"[INFO] Returning result: {result}")
68
+ return annotated_image, result
69
+
70
+ except Exception as e:
71
+ error_msg = f"Error loading model: {str(e)}\n\nPlease ensure models are uploaded to HuggingFace Hub.\nSee MODEL_SETUP.md for instructions."
72
+ print(f"[ERROR] {error_msg}")
73
+ return image, error_msg
74
+
75
+
76
+ # Create Gradio interface with tabs for different input methods
77
  with gr.Blocks(title="ASL Hand Detection System") as demo:
78
  gr.Markdown("""
79
  # ASL Hand Detection System
80
+ American Sign Language hand gesture detection using MediaPipe and Deep Learning.
81
+
82
+ - **EfficientNetB4**: Balanced performance and speed (recommended)
83
+ - **EfficientNetB7**: Higher accuracy, slower inference
84
+ - **EfficientNetB9**: Highest accuracy, slowest inference
85
+ - **MediaPipe (Rule-based)**: Fast, lightweight fallback (5 gestures only)
86
+
87
+ **Supported Gestures (ML Models):** A-Z, del, nothing, space (29 total)
88
+
89
+ **MediaPipe Gestures:** A, V, B, 1, W (5 total)
90
  """)
91
 
92
+ # Model selector dropdown
93
+ with gr.Row():
94
+ model_selector = gr.Dropdown(
95
+ choices=[
96
+ "EfficientNetB4",
97
+ "EfficientNetB7",
98
+ "EfficientNetB9",
99
+ "MediaPipe (Rule-based)"
100
+ ],
101
+ value="MediaPipe (Rule-based)",
102
+ label="Select Model",
103
+ info="First-time model (EfficientNet Based) loading may take 5-10 seconds"
104
+ )
105
+
106
+ gr.Markdown("**Note:** Switching between ML models (B4/B7/B9) may take 5-10 seconds on first load as the model downloads from HuggingFace Hub. Subsequent uses will be instant.")
107
+
108
  with gr.Tabs():
109
  with gr.Tab("Take a Picture"):
110
  with gr.Row():
 
123
 
124
  webcam_btn.click(
125
  fn=detect_asl,
126
+ inputs=[webcam_input, model_selector],
127
  outputs=[webcam_output, webcam_result]
128
  )
129
 
 
144
 
145
  upload_btn.click(
146
  fn=detect_asl,
147
+ inputs=[upload_input, model_selector],
148
  outputs=[upload_output, upload_result]
149
  )
150
 
 
165
 
166
  stream_input.stream(
167
  fn=detect_asl,
168
+ inputs=[stream_input, model_selector],
169
  outputs=[stream_output, stream_result]
170
  )
171
 
172
  if __name__ == "__main__":
173
  try:
174
  print("[INFO] Starting ASL Hand Detection System...")
175
+ print("[INFO] Note: First-time model loading may take 5-10 seconds")
176
  demo.launch()
177
  except KeyboardInterrupt:
178
  print("\n[INFO] Shutting down gracefully...")