arshtech commited on
Commit
50099c0
Β·
verified Β·
1 Parent(s): 4afd840

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -46
app.py CHANGED
@@ -5,43 +5,37 @@ import mediapipe as mp
5
  import requests
6
  from gtts import gTTS
7
  import tempfile
8
- import time
9
 
10
- # ---------------- CONFIG ----------------
11
  st.set_page_config(page_title="Hand2Voice", layout="wide")
 
12
 
13
- GESTURE_URL = "https://raw.githubusercontent.com/YOUR_USERNAME/Hand2Voice/main/gestures/gesture_rules.json"
 
14
 
15
- # ---------------- SESSION STATE ----------------
16
- if "last_gesture" not in st.session_state:
17
- st.session_state.last_gesture = ""
18
-
19
- if "last_spoken" not in st.session_state:
20
- st.session_state.last_spoken = ""
21
-
22
- # ---------------- LOAD GESTURES ----------------
23
  @st.cache_data
24
  def load_gestures():
25
- return requests.get(GESTURE_URL).json()["gestures"]
 
26
 
27
  gesture_rules = load_gestures()
28
 
29
- # ---------------- MEDIAPIPE ----------------
30
  mp_hands = mp.solutions.hands
31
  hands = mp_hands.Hands(
32
- static_image_mode=True,
33
  max_num_hands=1,
34
  min_detection_confidence=0.7
35
  )
36
 
37
- # ---------------- FINGER LOGIC ----------------
38
  def get_finger_states(hand_landmarks):
39
  finger_tips = [4, 8, 12, 16, 20]
40
  finger_bases = [2, 6, 10, 14, 18]
41
 
42
  states = []
43
 
44
- # Thumb
45
  states.append(
46
  1 if hand_landmarks.landmark[4].x >
47
  hand_landmarks.landmark[3].x else 0
@@ -56,15 +50,15 @@ def get_finger_states(hand_landmarks):
56
 
57
  return states
58
 
59
- # ---------------- MATCH ----------------
60
  def match_gesture(states):
61
  for name, info in gesture_rules.items():
62
  if states == info["pattern"]:
63
  return name
64
- return "Unknown"
65
 
66
- # ---------------- RECOGNIZER ----------------
67
- def recognize(frame):
68
  rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
69
  result = hands.process(rgb)
70
 
@@ -73,19 +67,19 @@ def recognize(frame):
73
  states = get_finger_states(hand_landmarks)
74
  return match_gesture(states)
75
 
76
- return "No Hand"
77
 
78
  # ---------------- TEXT TO SPEECH ----------------
79
- def speak(text):
80
  tts = gTTS(text=text, lang="en")
81
- file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
82
- tts.save(file.name)
83
- return file.name
84
 
85
  # ---------------- UI ----------------
86
  st.markdown(
87
  "<h1 style='text-align:center;'>🀟 Hand2Voice</h1>"
88
- "<p style='text-align:center;'>Real-Time Hand Gesture to Voice</p>",
89
  unsafe_allow_html=True
90
  )
91
 
@@ -95,45 +89,39 @@ col1, col2 = st.columns(2)
95
 
96
  # -------- CAMERA --------
97
  with col1:
98
- st.subheader("πŸ“· Live Camera")
99
- image = st.camera_input("Live gesture feed")
100
 
101
  # -------- OUTPUT --------
102
  with col2:
103
- st.subheader("πŸ“ Live Output")
104
 
105
- if image:
106
  img_bytes = image.getvalue()
107
- np_img = np.frombuffer(img_bytes, np.uint8)
108
- frame = cv2.imdecode(np_img, cv2.IMREAD_COLOR)
109
 
110
- gesture = recognize(frame)
111
- st.session_state.last_gesture = gesture
112
 
113
  st.markdown(
114
  f"""
115
  <div style="
116
  padding:20px;
117
- background-color:#dcfce7;
118
  border-radius:12px;
119
  font-size:24px;
120
  font-weight:bold;
121
  text-align:center;">
122
- πŸ”Š {gesture}
123
  </div>
124
  """,
125
  unsafe_allow_html=True
126
  )
127
 
128
- # Speak only if gesture changes
129
- if gesture not in ["Unknown", "No Hand"] and gesture != st.session_state.last_spoken:
130
- audio = speak(gesture)
131
- st.audio(audio)
132
- st.session_state.last_spoken = gesture
133
-
134
- # Auto refresh (real-time effect)
135
- time.sleep(0.5)
136
- st.experimental_rerun()
137
 
138
  else:
139
- st.info("Enable camera and show hand gestures")
 
5
  import requests
6
  from gtts import gTTS
7
  import tempfile
 
8
 
9
+ # ---------------- STREAMLIT CONFIG ----------------
10
  st.set_page_config(page_title="Hand2Voice", layout="wide")
11
+ st.empty() # ensures immediate render (HF fix)
12
 
13
+ # ---------------- GESTURE DATA ----------------
14
+ GESTURE_URL = "https://raw.githubusercontent.com/imarshbir/Hand2Voice/main/gestures/gesture_rules.json"
15
 
 
 
 
 
 
 
 
 
16
  @st.cache_data
17
  def load_gestures():
18
+ response = requests.get(GESTURE_URL)
19
+ return response.json()["gestures"]
20
 
21
  gesture_rules = load_gestures()
22
 
23
+ # ---------------- MEDIAPIPE SETUP ----------------
24
  mp_hands = mp.solutions.hands
25
  hands = mp_hands.Hands(
26
+ static_image_mode=True, # IMPORTANT for snapshots
27
  max_num_hands=1,
28
  min_detection_confidence=0.7
29
  )
30
 
31
+ # ---------------- FINGER STATE LOGIC ----------------
32
  def get_finger_states(hand_landmarks):
33
  finger_tips = [4, 8, 12, 16, 20]
34
  finger_bases = [2, 6, 10, 14, 18]
35
 
36
  states = []
37
 
38
+ # Thumb logic
39
  states.append(
40
  1 if hand_landmarks.landmark[4].x >
41
  hand_landmarks.landmark[3].x else 0
 
50
 
51
  return states
52
 
53
+ # ---------------- GESTURE MATCHING ----------------
54
  def match_gesture(states):
55
  for name, info in gesture_rules.items():
56
  if states == info["pattern"]:
57
  return name
58
+ return "Unknown Gesture"
59
 
60
+ # ---------------- GESTURE RECOGNITION ----------------
61
+ def recognize_gesture(frame):
62
  rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
63
  result = hands.process(rgb)
64
 
 
67
  states = get_finger_states(hand_landmarks)
68
  return match_gesture(states)
69
 
70
+ return "No Hand Detected"
71
 
72
  # ---------------- TEXT TO SPEECH ----------------
73
+ def speak_text(text):
74
  tts = gTTS(text=text, lang="en")
75
+ audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
76
+ tts.save(audio_file.name)
77
+ return audio_file.name
78
 
79
  # ---------------- UI ----------------
80
  st.markdown(
81
  "<h1 style='text-align:center;'>🀟 Hand2Voice</h1>"
82
+ "<p style='text-align:center;'>Hand Gesture to Voice Conversion</p>",
83
  unsafe_allow_html=True
84
  )
85
 
 
89
 
90
  # -------- CAMERA --------
91
  with col1:
92
+ st.subheader("πŸ“· Camera Input")
93
+ image = st.camera_input("Enable camera and capture gesture")
94
 
95
  # -------- OUTPUT --------
96
  with col2:
97
+ st.subheader("πŸ“ Output")
98
 
99
+ if image is not None:
100
  img_bytes = image.getvalue()
101
+ img_array = np.frombuffer(img_bytes, np.uint8)
102
+ frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
103
 
104
+ gesture_text = recognize_gesture(frame)
 
105
 
106
  st.markdown(
107
  f"""
108
  <div style="
109
  padding:20px;
110
+ background-color:#e0f2fe;
111
  border-radius:12px;
112
  font-size:24px;
113
  font-weight:bold;
114
  text-align:center;">
115
+ πŸ”Š {gesture_text}
116
  </div>
117
  """,
118
  unsafe_allow_html=True
119
  )
120
 
121
+ # Speak only meaningful gestures
122
+ if gesture_text not in ["Unknown Gesture", "No Hand Detected"]:
123
+ audio_path = speak_text(gesture_text)
124
+ st.audio(audio_path)
 
 
 
 
 
125
 
126
  else:
127
+ st.info("Capture an image to recognize gesture")