d2j666 commited on
Commit
d1b7d70
·
1 Parent(s): 746ada4

Create basic Gradio interface with image upload

Browse files

- Implement gr.Interface with image upload with Gradio framework
- Connect to ASLDetector model
- Display annotated landmarks and detection results

Files changed (3) hide show
  1. README.md +2 -0
  2. app.py +57 -0
  3. model.py +104 -0
README.md CHANGED
@@ -36,6 +36,8 @@ uv sync
36
 
37
  # Run the application
38
  uv run python app.py
 
 
39
  ```
40
 
41
  ## Technical Stack
 
36
 
37
  # Run the application
38
  uv run python app.py
39
+
40
+ The application will be available at `http://localhots:7860`
41
  ```
42
 
43
  ## Technical Stack
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ from model import ASLDetector
5
+
6
+ detector = ASLDetector()
7
+
8
+ def detect_asl(image):
9
+ """Process image and detect ASL gesture."""
10
+ if image is None:
11
+ return None, "Please provide an image"
12
+
13
+ # Convert to RGB if needed
14
+ if len(image.shape) == 2:
15
+ image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
16
+ elif len(image.shape) == 3 and image.shape[2] == 4:
17
+ image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
18
+
19
+ # Process image
20
+ annotated_image, letter, confidence = detector.process_frame(image)
21
+
22
+ # Create result message
23
+ if letter and letter != "Unknown":
24
+ result = f"Detected: {letter} (Confidence: {confidence:.2f})"
25
+ elif letter == "Unknown":
26
+ result = "Hand detected but gesture not recognized. Try: A, V, B, 1, or W"
27
+ else:
28
+ result = "No hand detected. Please show a clear hand gesture."
29
+
30
+ return annotated_image, result
31
+
32
+ # Create Gradio interface
33
+ demo = gr.Interface(
34
+ fn=detect_asl,
35
+ inputs=gr.Image(sources=["upload"], type="numpy", label="Upload Image"),
36
+ outputs=[
37
+ gr.Image(label="Detected Hand Landmarks"),
38
+ gr.Textbox(label="Detection Result", lines=3)
39
+ ],
40
+ title="ASL Hand Detection System",
41
+ description="""
42
+ American Sign Language hand gesture detection using MediaPipe.
43
+
44
+ **Supported Gestures:**
45
+ - A: Closed fist
46
+ - V: Peace sign (index and middle fingers extended)
47
+ - B: All fingers extended, thumb tucked
48
+ - 1: Index finger only extended
49
+ - W: Index, middle, and ring fingers extended
50
+
51
+ Upload an image to detect ASL gestures!
52
+ """,
53
+ live=False
54
+ )
55
+
56
+ if __name__ == "__main__":
57
+ demo.launch()
model.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import mediapipe as mp
3
+ from typing import Optional, Tuple
4
+
5
+ mp_hands = mp.solutions.hands
6
+ mp_drawing = mp.solutions.drawing_utils
7
+
8
+
9
+ class ASLDetector:
10
+ """ASL hand gesture detection using MediaPipe Hands."""
11
+
12
+ def __init__(self):
13
+ self.hands = mp_hands.Hands(
14
+ static_image_mode=False,
15
+ max_num_hands=1,
16
+ min_detection_confidence=0.5,
17
+ min_tracking_confidence=0.5
18
+ )
19
+
20
+ def process_frame(self, image: np.ndarray) -> Tuple[Optional[np.ndarray], Optional[str], Optional[float]]:
21
+ """
22
+ Process a single frame for hand detection and ASL classification.
23
+
24
+ Args:
25
+ image: RGB image array
26
+
27
+ Returns:
28
+ Tuple of (annotated_image, predicted_letter, confidence)
29
+ """
30
+ results = self.hands.process(image)
31
+
32
+ if not results.multi_hand_landmarks:
33
+ return image, None, None
34
+
35
+ annotated_image = image.copy()
36
+
37
+ for hand_landmarks in results.multi_hand_landmarks:
38
+ mp_drawing.draw_landmarks(
39
+ annotated_image,
40
+ hand_landmarks,
41
+ mp_hands.HAND_CONNECTIONS
42
+ )
43
+
44
+ letter, confidence = self._classify_gesture(hand_landmarks)
45
+
46
+ return annotated_image, letter, confidence
47
+
48
+ return annotated_image, None, None
49
+
50
+ def _classify_gesture(self, landmarks) -> Tuple[str, float]:
51
+ """
52
+ Classify ASL gesture based on hand landmarks.
53
+
54
+ Args:
55
+ landmarks: MediaPipe hand landmarks
56
+
57
+ Returns:
58
+ Tuple of (predicted_letter, confidence)
59
+ """
60
+ landmark_array = np.array([[lm.x, lm.y, lm.z] for lm in landmarks.landmark])
61
+
62
+ thumb_tip = landmark_array[4]
63
+ index_tip = landmark_array[8]
64
+ middle_tip = landmark_array[12]
65
+ ring_tip = landmark_array[16]
66
+ pinky_tip = landmark_array[20]
67
+
68
+ thumb_ip = landmark_array[3]
69
+ index_pip = landmark_array[6]
70
+ middle_pip = landmark_array[10]
71
+ ring_pip = landmark_array[14]
72
+ pinky_pip = landmark_array[18]
73
+
74
+ wrist = landmark_array[0]
75
+
76
+ fingers_extended = [
77
+ thumb_tip[0] > thumb_ip[0] if thumb_tip[0] > wrist[0] else thumb_tip[0] < thumb_ip[0],
78
+ index_tip[1] < index_pip[1],
79
+ middle_tip[1] < middle_pip[1],
80
+ ring_tip[1] < ring_pip[1],
81
+ pinky_tip[1] < pinky_pip[1]
82
+ ]
83
+
84
+ num_extended = sum(fingers_extended[1:])
85
+
86
+ if num_extended == 0 and not fingers_extended[0]:
87
+ return "A", 0.8
88
+ elif fingers_extended[1] and fingers_extended[2] and not fingers_extended[3] and not fingers_extended[4]:
89
+ return "V", 0.85
90
+ elif all(fingers_extended[1:]):
91
+ if fingers_extended[0]:
92
+ return "B", 0.8
93
+ else:
94
+ return "4", 0.75
95
+ elif fingers_extended[1] and not any(fingers_extended[2:]):
96
+ return "1", 0.8
97
+ elif num_extended == 3 and fingers_extended[1] and fingers_extended[2] and fingers_extended[3]:
98
+ return "W", 0.75
99
+ else:
100
+ return "Unknown", 0.5
101
+
102
+ def close(self):
103
+ """Release MediaPipe resources."""
104
+ self.hands.close()