arshtech commited on
Commit
d5df74d
Β·
verified Β·
1 Parent(s): 55bbccc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -36
app.py CHANGED
@@ -4,23 +4,29 @@ import numpy as np
4
  import mediapipe as mp
5
  import requests
6
  from gtts import gTTS
7
- import os
8
  import tempfile
 
9
 
10
  # ---------------- CONFIG ----------------
11
  st.set_page_config(page_title="Hand2Voice", layout="wide")
12
 
13
  GESTURE_URL = "https://raw.githubusercontent.com/YOUR_USERNAME/Hand2Voice/main/gestures/gesture_rules.json"
14
 
 
 
 
 
 
 
 
15
  # ---------------- LOAD GESTURES ----------------
16
  @st.cache_data
17
  def load_gestures():
18
- response = requests.get(GESTURE_URL)
19
- return response.json()["gestures"]
20
 
21
  gesture_rules = load_gestures()
22
 
23
- # ---------------- MEDIAPIPE SETUP ----------------
24
  mp_hands = mp.solutions.hands
25
  hands = mp_hands.Hands(
26
  static_image_mode=True,
@@ -50,36 +56,36 @@ def get_finger_states(hand_landmarks):
50
 
51
  return states
52
 
53
- # ---------------- GESTURE MATCHING ----------------
54
- def match_gesture(finger_states):
55
  for name, info in gesture_rules.items():
56
- if finger_states == info["pattern"]:
57
  return name
58
- return "Unknown Gesture"
59
 
60
- # ---------------- GESTURE RECOGNIZER ----------------
61
- def recognize_gesture(frame):
62
  rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
63
- results = hands.process(rgb)
64
 
65
- if results.multi_hand_landmarks:
66
- for hand_landmarks in results.multi_hand_landmarks:
67
  states = get_finger_states(hand_landmarks)
68
  return match_gesture(states)
69
 
70
- return "No Hand Detected"
71
 
72
  # ---------------- TEXT TO SPEECH ----------------
73
- def speak_text(text):
74
  tts = gTTS(text=text, lang="en")
75
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
76
- tts.save(temp_file.name)
77
- return temp_file.name
78
 
79
  # ---------------- UI ----------------
80
  st.markdown(
81
  "<h1 style='text-align:center;'>🀟 Hand2Voice</h1>"
82
- "<p style='text-align:center;'>Hand Gesture to Voice Conversion</p>",
83
  unsafe_allow_html=True
84
  )
85
 
@@ -87,42 +93,47 @@ st.divider()
87
 
88
  col1, col2 = st.columns(2)
89
 
90
- # -------- CAMERA COLUMN --------
91
  with col1:
92
- st.subheader("πŸ“· Camera Input")
93
- image = st.camera_input("Capture your hand gesture")
94
 
95
- # -------- OUTPUT COLUMN --------
96
  with col2:
97
- st.subheader("πŸ“ Output")
98
 
99
- if image is not None:
100
- bytes_data = image.getvalue()
101
- np_img = np.frombuffer(bytes_data, np.uint8)
102
  frame = cv2.imdecode(np_img, cv2.IMREAD_COLOR)
103
 
104
- gesture_text = recognize_gesture(frame)
 
105
 
106
- # Highlight text
107
  st.markdown(
108
  f"""
109
  <div style="
110
  padding:20px;
111
- background-color:#e0f2fe;
112
  border-radius:12px;
113
  font-size:24px;
114
  font-weight:bold;
115
  text-align:center;">
116
- πŸ”Š {gesture_text}
117
  </div>
118
  """,
119
  unsafe_allow_html=True
120
  )
121
 
122
- # Speak text
123
- if gesture_text not in ["No Hand Detected", "Unknown Gesture"]:
124
- audio_path = speak_text(gesture_text)
125
- st.audio(audio_path, format="audio/mp3")
 
 
 
 
 
126
 
127
  else:
128
- st.info("Show your hand and capture an image")
 
4
  import mediapipe as mp
5
  import requests
6
  from gtts import gTTS
 
7
  import tempfile
8
+ import time
9
 
10
  # ---------------- CONFIG ----------------
11
  st.set_page_config(page_title="Hand2Voice", layout="wide")
12
 
13
  GESTURE_URL = "https://raw.githubusercontent.com/YOUR_USERNAME/Hand2Voice/main/gestures/gesture_rules.json"
14
 
15
+ # ---------------- SESSION STATE ----------------
16
+ if "last_gesture" not in st.session_state:
17
+ st.session_state.last_gesture = ""
18
+
19
+ if "last_spoken" not in st.session_state:
20
+ st.session_state.last_spoken = ""
21
+
22
  # ---------------- LOAD GESTURES ----------------
23
  @st.cache_data
24
  def load_gestures():
25
+ return requests.get(GESTURE_URL).json()["gestures"]
 
26
 
27
  gesture_rules = load_gestures()
28
 
29
+ # ---------------- MEDIAPIPE ----------------
30
  mp_hands = mp.solutions.hands
31
  hands = mp_hands.Hands(
32
  static_image_mode=True,
 
56
 
57
  return states
58
 
59
+ # ---------------- MATCH ----------------
60
+ def match_gesture(states):
61
  for name, info in gesture_rules.items():
62
+ if states == info["pattern"]:
63
  return name
64
+ return "Unknown"
65
 
66
+ # ---------------- RECOGNIZER ----------------
67
+ def recognize(frame):
68
  rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
69
+ result = hands.process(rgb)
70
 
71
+ if result.multi_hand_landmarks:
72
+ for hand_landmarks in result.multi_hand_landmarks:
73
  states = get_finger_states(hand_landmarks)
74
  return match_gesture(states)
75
 
76
+ return "No Hand"
77
 
78
  # ---------------- TEXT TO SPEECH ----------------
79
+ def speak(text):
80
  tts = gTTS(text=text, lang="en")
81
+ file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
82
+ tts.save(file.name)
83
+ return file.name
84
 
85
  # ---------------- UI ----------------
86
  st.markdown(
87
  "<h1 style='text-align:center;'>🀟 Hand2Voice</h1>"
88
+ "<p style='text-align:center;'>Real-Time Hand Gesture to Voice</p>",
89
  unsafe_allow_html=True
90
  )
91
 
 
93
 
94
  col1, col2 = st.columns(2)
95
 
96
+ # -------- CAMERA --------
97
  with col1:
98
+ st.subheader("πŸ“· Live Camera")
99
+ image = st.camera_input("Live gesture feed")
100
 
101
+ # -------- OUTPUT --------
102
  with col2:
103
+ st.subheader("πŸ“ Live Output")
104
 
105
+ if image:
106
+ img_bytes = image.getvalue()
107
+ np_img = np.frombuffer(img_bytes, np.uint8)
108
  frame = cv2.imdecode(np_img, cv2.IMREAD_COLOR)
109
 
110
+ gesture = recognize(frame)
111
+ st.session_state.last_gesture = gesture
112
 
 
113
  st.markdown(
114
  f"""
115
  <div style="
116
  padding:20px;
117
+ background-color:#dcfce7;
118
  border-radius:12px;
119
  font-size:24px;
120
  font-weight:bold;
121
  text-align:center;">
122
+ πŸ”Š {gesture}
123
  </div>
124
  """,
125
  unsafe_allow_html=True
126
  )
127
 
128
+ # Speak only if gesture changes
129
+ if gesture not in ["Unknown", "No Hand"] and gesture != st.session_state.last_spoken:
130
+ audio = speak(gesture)
131
+ st.audio(audio)
132
+ st.session_state.last_spoken = gesture
133
+
134
+ # Auto refresh (real-time effect)
135
+ time.sleep(0.5)
136
+ st.experimental_rerun()
137
 
138
  else:
139
+ st.info("Enable camera and show hand gestures")