arshtech commited on
Commit
bb6e324
Β·
verified Β·
1 Parent(s): 50099c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -61
app.py CHANGED
@@ -1,47 +1,46 @@
1
  import streamlit as st
2
  import cv2
3
  import numpy as np
4
- import mediapipe as mp
5
  import requests
6
  from gtts import gTTS
7
  import tempfile
8
 
9
  # ---------------- STREAMLIT CONFIG ----------------
10
  st.set_page_config(page_title="Hand2Voice", layout="wide")
11
- st.empty() # ensures immediate render (HF fix)
 
12
 
13
- # ---------------- GESTURE DATA ----------------
14
  GESTURE_URL = "https://raw.githubusercontent.com/imarshbir/Hand2Voice/main/gestures/gesture_rules.json"
15
 
 
16
  @st.cache_data
17
  def load_gestures():
18
- response = requests.get(GESTURE_URL)
19
- return response.json()["gestures"]
20
-
21
- gesture_rules = load_gestures()
22
-
23
- # ---------------- MEDIAPIPE SETUP ----------------
24
- mp_hands = mp.solutions.hands
25
- hands = mp_hands.Hands(
26
- static_image_mode=True, # IMPORTANT for snapshots
27
- max_num_hands=1,
28
- min_detection_confidence=0.7
29
- )
30
 
31
- # ---------------- FINGER STATE LOGIC ----------------
32
  def get_finger_states(hand_landmarks):
33
  finger_tips = [4, 8, 12, 16, 20]
34
  finger_bases = [2, 6, 10, 14, 18]
35
 
36
  states = []
37
 
38
- # Thumb logic
39
  states.append(
40
  1 if hand_landmarks.landmark[4].x >
41
  hand_landmarks.landmark[3].x else 0
42
  )
43
 
44
- # Other fingers
45
  for tip, base in zip(finger_tips[1:], finger_bases[1:]):
46
  states.append(
47
  1 if hand_landmarks.landmark[tip].y <
@@ -50,78 +49,56 @@ def get_finger_states(hand_landmarks):
50
 
51
  return states
52
 
53
- # ---------------- GESTURE MATCHING ----------------
54
- def match_gesture(states):
55
- for name, info in gesture_rules.items():
56
  if states == info["pattern"]:
57
  return name
58
  return "Unknown Gesture"
59
 
60
- # ---------------- GESTURE RECOGNITION ----------------
61
- def recognize_gesture(frame):
62
  rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
63
  result = hands.process(rgb)
64
 
65
  if result.multi_hand_landmarks:
66
  for hand_landmarks in result.multi_hand_landmarks:
67
  states = get_finger_states(hand_landmarks)
68
- return match_gesture(states)
69
 
70
  return "No Hand Detected"
71
 
72
  # ---------------- TEXT TO SPEECH ----------------
73
  def speak_text(text):
74
  tts = gTTS(text=text, lang="en")
75
- audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
76
- tts.save(audio_file.name)
77
- return audio_file.name
78
 
79
  # ---------------- UI ----------------
80
- st.markdown(
81
- "<h1 style='text-align:center;'>🀟 Hand2Voice</h1>"
82
- "<p style='text-align:center;'>Hand Gesture to Voice Conversion</p>",
83
- unsafe_allow_html=True
84
- )
85
-
86
- st.divider()
87
-
88
  col1, col2 = st.columns(2)
89
 
90
- # -------- CAMERA --------
91
  with col1:
92
- st.subheader("πŸ“· Camera Input")
93
- image = st.camera_input("Enable camera and capture gesture")
94
 
95
- # -------- OUTPUT --------
96
  with col2:
97
  st.subheader("πŸ“ Output")
98
 
99
- if image is not None:
 
 
 
100
  img_bytes = image.getvalue()
101
  img_array = np.frombuffer(img_bytes, np.uint8)
102
  frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
103
 
104
- gesture_text = recognize_gesture(frame)
105
-
106
- st.markdown(
107
- f"""
108
- <div style="
109
- padding:20px;
110
- background-color:#e0f2fe;
111
- border-radius:12px;
112
- font-size:24px;
113
- font-weight:bold;
114
- text-align:center;">
115
- πŸ”Š {gesture_text}
116
- </div>
117
- """,
118
- unsafe_allow_html=True
119
- )
120
 
121
- # Speak only meaningful gestures
122
- if gesture_text not in ["Unknown Gesture", "No Hand Detected"]:
123
- audio_path = speak_text(gesture_text)
124
- st.audio(audio_path)
125
 
 
 
 
126
  else:
127
- st.info("Capture an image to recognize gesture")
 
1
  import streamlit as st
2
  import cv2
3
  import numpy as np
 
4
  import requests
5
  from gtts import gTTS
6
  import tempfile
7
 
8
  # ---------------- STREAMLIT CONFIG ----------------
9
  st.set_page_config(page_title="Hand2Voice", layout="wide")
10
+ st.title("🀟 Hand2Voice")
11
+ st.write("Hand Gesture to Voice Conversion")
12
 
13
+ # ---------------- CONSTANTS ----------------
14
  GESTURE_URL = "https://raw.githubusercontent.com/imarshbir/Hand2Voice/main/gestures/gesture_rules.json"
15
 
16
+ # ---------------- LOAD GESTURES ----------------
17
  @st.cache_data
18
  def load_gestures():
19
+ return requests.get(GESTURE_URL).json()["gestures"]
20
+
21
+ # ---------------- LAZY MEDIAPIPE ----------------
22
+ @st.cache_resource
23
+ def load_mediapipe():
24
+ import mediapipe as mp
25
+ mp_hands = mp.solutions.hands
26
+ return mp_hands.Hands(
27
+ static_image_mode=True,
28
+ max_num_hands=1,
29
+ min_detection_confidence=0.7
30
+ )
31
 
32
+ # ---------------- FINGER LOGIC ----------------
33
  def get_finger_states(hand_landmarks):
34
  finger_tips = [4, 8, 12, 16, 20]
35
  finger_bases = [2, 6, 10, 14, 18]
36
 
37
  states = []
38
 
 
39
  states.append(
40
  1 if hand_landmarks.landmark[4].x >
41
  hand_landmarks.landmark[3].x else 0
42
  )
43
 
 
44
  for tip, base in zip(finger_tips[1:], finger_bases[1:]):
45
  states.append(
46
  1 if hand_landmarks.landmark[tip].y <
 
49
 
50
  return states
51
 
52
+ # ---------------- MATCH GESTURE ----------------
53
+ def match_gesture(states, rules):
54
+ for name, info in rules.items():
55
  if states == info["pattern"]:
56
  return name
57
  return "Unknown Gesture"
58
 
59
+ # ---------------- RECOGNITION ----------------
60
+ def recognize_gesture(frame, hands, rules):
61
  rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
62
  result = hands.process(rgb)
63
 
64
  if result.multi_hand_landmarks:
65
  for hand_landmarks in result.multi_hand_landmarks:
66
  states = get_finger_states(hand_landmarks)
67
+ return match_gesture(states, rules)
68
 
69
  return "No Hand Detected"
70
 
71
  # ---------------- TEXT TO SPEECH ----------------
72
  def speak_text(text):
73
  tts = gTTS(text=text, lang="en")
74
+ file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
75
+ tts.save(file.name)
76
+ return file.name
77
 
78
  # ---------------- UI ----------------
 
 
 
 
 
 
 
 
79
  col1, col2 = st.columns(2)
80
 
 
81
  with col1:
82
+ st.subheader("πŸ“· Camera")
83
+ image = st.camera_input("Capture hand gesture")
84
 
 
85
  with col2:
86
  st.subheader("πŸ“ Output")
87
 
88
+ if image:
89
+ gestures = load_gestures()
90
+ hands = load_mediapipe()
91
+
92
  img_bytes = image.getvalue()
93
  img_array = np.frombuffer(img_bytes, np.uint8)
94
  frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
95
 
96
+ gesture = recognize_gesture(frame, hands, gestures)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ st.success(f"πŸ”Š {gesture}")
 
 
 
99
 
100
+ if gesture not in ["Unknown Gesture", "No Hand Detected"]:
101
+ audio = speak_text(gesture)
102
+ st.audio(audio)
103
  else:
104
+ st.info("Capture an image to start")