Spaces:
Sleeping
Sleeping
File size: 6,778 Bytes
d1b7d70 4705494 d1b7d70 4705494 d1b7d70 4705494 b151b61 7f11f80 b151b61 7f11f80 d1b7d70 b151b61 d1b7d70 b151b61 d1b7d70 b151b61 d1b7d70 4705494 7f11f80 4705494 7f11f80 d1b7d70 4705494 2ae3f41 4705494 2ae3f41 7f11f80 2ae3f41 7f11f80 2ae3f41 4705494 2ae3f41 4705494 2ae3f41 d1b7d70 2ae3f41 4705494 2ae3f41 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | import gradio as gr
import cv2
import numpy as np
from model import ASLDetector
from model_ml import ASLDetectorML
# Global detector cache for lazy loading
_detector_cache = {}
def get_detector(model_choice):
"""Get or create detector instance with lazy loading and caching."""
global _detector_cache
# Check if detector is already cached
if model_choice in _detector_cache:
return _detector_cache[model_choice]
# Create new detector instance
print(f"[INFO] Creating new detector: {model_choice}")
detector = ASLDetector() if model_choice == "MediaPipe (Rule-based)" else ASLDetectorML(model_name=model_choice)
# Cache for future use
_detector_cache[model_choice] = detector
return detector
def detect_asl(image, model_choice):
"""Process image and detect ASL gesture using selected model."""
print(f"[INFO] detect_asl called - model: {model_choice}, image type: {type(image)}, is None: {image is None}")
if image is None or not isinstance(image, np.ndarray):
print(f"[WARN] Invalid input - rejecting image")
return None, "Please provide an image (use Upload or capture from Webcam)"
print(f"[INFO] Image received - shape: {image.shape}, dtype: {image.dtype}")
# Convert to RGB if needed
if len(image.shape) == 2:
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
print(f"[INFO] Converted grayscale to RGB")
elif len(image.shape) == 3 and image.shape[2] == 4:
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
print(f"[INFO] Converted RGBA to RGB")
try:
# Get or create detector (lazy loading)
detector = get_detector(model_choice)
# Process image
annotated_image, letter, confidence = detector.process_frame(image)
print(f"[INFO] Detection result - letter: {letter}, confidence: {confidence}")
# Create result message
if letter and letter != "Unknown":
result = f"Detected: {letter} (Confidence: {confidence:.2f})\nModel: {model_choice}"
elif letter == "Unknown":
if model_choice == "MediaPipe (Rule-based)":
result = "Hand detected but gesture not recognized. Try: A, V, B, 1, or W"
else:
result = f"Hand detected but gesture not recognized.\nModel: {model_choice}"
else:
result = "No hand detected. Please show a clear hand gesture."
print(f"[INFO] Returning result: {result}")
return annotated_image, result
except Exception as e:
error_msg = f"Error loading model: {str(e)}\n\nPlease ensure models are uploaded to HuggingFace Hub.\nSee MODEL_SETUP.md for instructions."
print(f"[ERROR] {error_msg}")
return image, error_msg
# Create Gradio interface with tabs for different input methods
with gr.Blocks(title="ASL Hand Detection System") as demo:
gr.Markdown("""
# ASL Hand Detection System
American Sign Language hand gesture detection using MediaPipe and Deep Learning.
- **EfficientNetB4**: Balanced performance and speed (recommended)
- **EfficientNetB7**: Higher accuracy, slower inference
- **EfficientNetB9**: Highest accuracy, slowest inference
- **MediaPipe (Rule-based)**: Fast, lightweight fallback (5 gestures only)
**Supported Gestures (ML Models):** A-Z, del, nothing, space (29 total)
**MediaPipe Gestures:** A, V, B, 1, W (5 total)
""")
# Model selector dropdown
with gr.Row():
model_selector = gr.Dropdown(
choices=[
"EfficientNetB4",
"EfficientNetB7",
"EfficientNetB9",
"MediaPipe (Rule-based)"
],
value="MediaPipe (Rule-based)",
label="Select Model",
info="First-time model (EfficientNet Based) loading may take 5-10 seconds"
)
gr.Markdown("**Note:** Switching between ML models (B4/B7/B9) may take 5-10 seconds on first load as the model downloads from HuggingFace Hub. Subsequent uses will be instant.")
with gr.Tabs():
with gr.Tab("Take a Picture"):
with gr.Row():
with gr.Column():
webcam_input = gr.Image(
sources=["webcam"],
type="numpy",
label="Webcam",
interactive=True
)
webcam_btn = gr.Button("Detect Gesture", variant="primary")
with gr.Column():
webcam_output = gr.Image(label="Detected Hand Landmarks")
webcam_result = gr.Textbox(label="Detection Result", lines=3)
webcam_btn.click(
fn=detect_asl,
inputs=[webcam_input, model_selector],
outputs=[webcam_output, webcam_result]
)
with gr.Tab("Upload Image"):
with gr.Row():
with gr.Column():
upload_input = gr.Image(
sources=["upload"],
type="numpy",
label="Upload Image",
interactive=True
)
upload_btn = gr.Button("Detect Gesture", variant="primary")
with gr.Column():
upload_output = gr.Image(label="Detected Hand Landmarks")
upload_result = gr.Textbox(label="Detection Result", lines=3)
upload_btn.click(
fn=detect_asl,
inputs=[upload_input, model_selector],
outputs=[upload_output, upload_result]
)
with gr.Tab("Live Streaming"):
with gr.Row():
with gr.Column():
stream_input = gr.Image(
sources=["webcam"],
type="numpy",
label="Live Webcam Feed",
interactive=True,
streaming=True
)
with gr.Column():
stream_output = gr.Image(label="Detected Hand Landmarks")
stream_result = gr.Textbox(label="Detection Result", lines=3)
stream_input.stream(
fn=detect_asl,
inputs=[stream_input, model_selector],
outputs=[stream_output, stream_result]
)
if __name__ == "__main__":
try:
print("[INFO] Starting ASL Hand Detection System...")
print("[INFO] Note: First-time model loading may take 5-10 seconds")
demo.launch()
except KeyboardInterrupt:
print("\n[INFO] Shutting down gracefully...")
finally:
print("[INFO] Application stopped") |