Spaces:

arshtech
/

Hand2Voice

Runtime error

App Files Files Community

arshtech commited on 8 days ago

Commit

50099c0

verified ·

1 Parent(s): 4afd840

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -46

app.py CHANGED Viewed

@@ -5,43 +5,37 @@ import mediapipe as mp
 import requests
 from gtts import gTTS
 import tempfile
-import time
-# ---------------- CONFIG ----------------
 st.set_page_config(page_title="Hand2Voice", layout="wide")
-GESTURE_URL = "https://raw.githubusercontent.com/YOUR_USERNAME/Hand2Voice/main/gestures/gesture_rules.json"
-# ---------------- SESSION STATE ----------------
-if "last_gesture" not in st.session_state:
-    st.session_state.last_gesture = ""
-if "last_spoken" not in st.session_state:
-    st.session_state.last_spoken = ""
-# ---------------- LOAD GESTURES ----------------
 @st.cache_data
 def load_gestures():
-    return requests.get(GESTURE_URL).json()["gestures"]
 gesture_rules = load_gestures()
-# ---------------- MEDIAPIPE ----------------
 mp_hands = mp.solutions.hands
 hands = mp_hands.Hands(
-    static_image_mode=True,
     max_num_hands=1,
     min_detection_confidence=0.7
 )
-# ---------------- FINGER LOGIC ----------------
 def get_finger_states(hand_landmarks):
     finger_tips = [4, 8, 12, 16, 20]
     finger_bases = [2, 6, 10, 14, 18]
     states = []
-    # Thumb
     states.append(
         1 if hand_landmarks.landmark[4].x >
              hand_landmarks.landmark[3].x else 0
@@ -56,15 +50,15 @@ def get_finger_states(hand_landmarks):
     return states
-# ---------------- MATCH ----------------
 def match_gesture(states):
     for name, info in gesture_rules.items():
         if states == info["pattern"]:
             return name
-    return "Unknown"
-# ---------------- RECOGNIZER ----------------
-def recognize(frame):
     rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     result = hands.process(rgb)
@@ -73,19 +67,19 @@ def recognize(frame):
             states = get_finger_states(hand_landmarks)
             return match_gesture(states)
-    return "No Hand"
 # ---------------- TEXT TO SPEECH ----------------
-def speak(text):
     tts = gTTS(text=text, lang="en")
-    file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
-    tts.save(file.name)
-    return file.name
 # ---------------- UI ----------------
 st.markdown(
     "<h1 style='text-align:center;'>🤟 Hand2Voice</h1>"
-    "<p style='text-align:center;'>Real-Time Hand Gesture to Voice</p>",
     unsafe_allow_html=True
 )
@@ -95,45 +89,39 @@ col1, col2 = st.columns(2)
 # -------- CAMERA --------
 with col1:
-    st.subheader("📷 Live Camera")
-    image = st.camera_input("Live gesture feed")
 # -------- OUTPUT --------
 with col2:
-    st.subheader("📝 Live Output")
-    if image:
         img_bytes = image.getvalue()
-        np_img = np.frombuffer(img_bytes, np.uint8)
-        frame = cv2.imdecode(np_img, cv2.IMREAD_COLOR)
-        gesture = recognize(frame)
-        st.session_state.last_gesture = gesture
         st.markdown(
             f"""
             <div style="
                 padding:20px;
-                background-color:#dcfce7;
                 border-radius:12px;
                 font-size:24px;
                 font-weight:bold;
                 text-align:center;">
-                🔊 {gesture}
             </div>
             """,
             unsafe_allow_html=True
         )
-        # Speak only if gesture changes
-        if gesture not in ["Unknown", "No Hand"] and gesture != st.session_state.last_spoken:
-            audio = speak(gesture)
-            st.audio(audio)
-            st.session_state.last_spoken = gesture
-        # Auto refresh (real-time effect)
-        time.sleep(0.5)
-        st.experimental_rerun()
     else:
-        st.info("Enable camera and show hand gestures")

 import requests
 from gtts import gTTS
 import tempfile
+# ---------------- STREAMLIT CONFIG ----------------
 st.set_page_config(page_title="Hand2Voice", layout="wide")
+st.empty()  # ensures immediate render (HF fix)
+# ---------------- GESTURE DATA ----------------
+GESTURE_URL = "https://raw.githubusercontent.com/imarshbir/Hand2Voice/main/gestures/gesture_rules.json"
 @st.cache_data
 def load_gestures():
+    response = requests.get(GESTURE_URL)
+    return response.json()["gestures"]
 gesture_rules = load_gestures()
+# ---------------- MEDIAPIPE SETUP ----------------
 mp_hands = mp.solutions.hands
 hands = mp_hands.Hands(
+    static_image_mode=True,     # IMPORTANT for snapshots
     max_num_hands=1,
     min_detection_confidence=0.7
 )
+# ---------------- FINGER STATE LOGIC ----------------
 def get_finger_states(hand_landmarks):
     finger_tips = [4, 8, 12, 16, 20]
     finger_bases = [2, 6, 10, 14, 18]
     states = []
+    # Thumb logic
     states.append(
         1 if hand_landmarks.landmark[4].x >
              hand_landmarks.landmark[3].x else 0
     return states
+# ---------------- GESTURE MATCHING ----------------
 def match_gesture(states):
     for name, info in gesture_rules.items():
         if states == info["pattern"]:
             return name
+    return "Unknown Gesture"
+# ---------------- GESTURE RECOGNITION ----------------
+def recognize_gesture(frame):
     rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     result = hands.process(rgb)
             states = get_finger_states(hand_landmarks)
             return match_gesture(states)
+    return "No Hand Detected"
 # ---------------- TEXT TO SPEECH ----------------
+def speak_text(text):
     tts = gTTS(text=text, lang="en")
+    audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+    tts.save(audio_file.name)
+    return audio_file.name
 # ---------------- UI ----------------
 st.markdown(
     "<h1 style='text-align:center;'>🤟 Hand2Voice</h1>"
+    "<p style='text-align:center;'>Hand Gesture to Voice Conversion</p>",
     unsafe_allow_html=True
 )
 # -------- CAMERA --------
 with col1:
+    st.subheader("📷 Camera Input")
+    image = st.camera_input("Enable camera and capture gesture")
 # -------- OUTPUT --------
 with col2:
+    st.subheader("📝 Output")
+    if image is not None:
         img_bytes = image.getvalue()
+        img_array = np.frombuffer(img_bytes, np.uint8)
+        frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
+        gesture_text = recognize_gesture(frame)
         st.markdown(
             f"""
             <div style="
                 padding:20px;
+                background-color:#e0f2fe;
                 border-radius:12px;
                 font-size:24px;
                 font-weight:bold;
                 text-align:center;">
+                🔊 {gesture_text}
             </div>
             """,
             unsafe_allow_html=True
         )
+        # Speak only meaningful gestures
+        if gesture_text not in ["Unknown Gesture", "No Hand Detected"]:
+            audio_path = speak_text(gesture_text)
+            st.audio(audio_path)
     else:
+        st.info("Capture an image to recognize gesture")