Spaces:

arshtech
/

Hand2Voice

Runtime error

App Files Files Community

arshtech commited on 16 days ago

Commit

d5df74d

verified ·

1 Parent(s): 55bbccc

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -36

app.py CHANGED Viewed

@@ -4,23 +4,29 @@ import numpy as np
 import mediapipe as mp
 import requests
 from gtts import gTTS
-import os
 import tempfile
 # ---------------- CONFIG ----------------
 st.set_page_config(page_title="Hand2Voice", layout="wide")
 GESTURE_URL = "https://raw.githubusercontent.com/YOUR_USERNAME/Hand2Voice/main/gestures/gesture_rules.json"
 # ---------------- LOAD GESTURES ----------------
 @st.cache_data
 def load_gestures():
-    response = requests.get(GESTURE_URL)
-    return response.json()["gestures"]
 gesture_rules = load_gestures()
-# ---------------- MEDIAPIPE SETUP ----------------
 mp_hands = mp.solutions.hands
 hands = mp_hands.Hands(
     static_image_mode=True,
@@ -50,36 +56,36 @@ def get_finger_states(hand_landmarks):
     return states
-# ---------------- GESTURE MATCHING ----------------
-def match_gesture(finger_states):
     for name, info in gesture_rules.items():
-        if finger_states == info["pattern"]:
             return name
-    return "Unknown Gesture"
-# ---------------- GESTURE RECOGNIZER ----------------
-def recognize_gesture(frame):
     rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    results = hands.process(rgb)
-    if results.multi_hand_landmarks:
-        for hand_landmarks in results.multi_hand_landmarks:
             states = get_finger_states(hand_landmarks)
             return match_gesture(states)
-    return "No Hand Detected"
 # ---------------- TEXT TO SPEECH ----------------
-def speak_text(text):
     tts = gTTS(text=text, lang="en")
-    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
-    tts.save(temp_file.name)
-    return temp_file.name
 # ---------------- UI ----------------
 st.markdown(
     "<h1 style='text-align:center;'>🤟 Hand2Voice</h1>"
-    "<p style='text-align:center;'>Hand Gesture to Voice Conversion</p>",
     unsafe_allow_html=True
 )
@@ -87,42 +93,47 @@ st.divider()
 col1, col2 = st.columns(2)
-# -------- CAMERA COLUMN --------
 with col1:
-    st.subheader("📷 Camera Input")
-    image = st.camera_input("Capture your hand gesture")
-# -------- OUTPUT COLUMN --------
 with col2:
-    st.subheader("📝 Output")
-    if image is not None:
-        bytes_data = image.getvalue()
-        np_img = np.frombuffer(bytes_data, np.uint8)
         frame = cv2.imdecode(np_img, cv2.IMREAD_COLOR)
-        gesture_text = recognize_gesture(frame)
-        # Highlight text
         st.markdown(
             f"""
             <div style="
                 padding:20px;
-                background-color:#e0f2fe;
                 border-radius:12px;
                 font-size:24px;
                 font-weight:bold;
                 text-align:center;">
-                🔊 {gesture_text}
             </div>
             """,
             unsafe_allow_html=True
         )
-        # Speak text
-        if gesture_text not in ["No Hand Detected", "Unknown Gesture"]:
-            audio_path = speak_text(gesture_text)
-            st.audio(audio_path, format="audio/mp3")
     else:
-        st.info("Show your hand and capture an image")

 import mediapipe as mp
 import requests
 from gtts import gTTS
 import tempfile
+import time
 # ---------------- CONFIG ----------------
 st.set_page_config(page_title="Hand2Voice", layout="wide")
 GESTURE_URL = "https://raw.githubusercontent.com/YOUR_USERNAME/Hand2Voice/main/gestures/gesture_rules.json"
+# ---------------- SESSION STATE ----------------
+if "last_gesture" not in st.session_state:
+    st.session_state.last_gesture = ""
+if "last_spoken" not in st.session_state:
+    st.session_state.last_spoken = ""
 # ---------------- LOAD GESTURES ----------------
 @st.cache_data
 def load_gestures():
+    return requests.get(GESTURE_URL).json()["gestures"]
 gesture_rules = load_gestures()
+# ---------------- MEDIAPIPE ----------------
 mp_hands = mp.solutions.hands
 hands = mp_hands.Hands(
     static_image_mode=True,
     return states
+# ---------------- MATCH ----------------
+def match_gesture(states):
     for name, info in gesture_rules.items():
+        if states == info["pattern"]:
             return name
+    return "Unknown"
+# ---------------- RECOGNIZER ----------------
+def recognize(frame):
     rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    result = hands.process(rgb)
+    if result.multi_hand_landmarks:
+        for hand_landmarks in result.multi_hand_landmarks:
             states = get_finger_states(hand_landmarks)
             return match_gesture(states)
+    return "No Hand"
 # ---------------- TEXT TO SPEECH ----------------
+def speak(text):
     tts = gTTS(text=text, lang="en")
+    file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+    tts.save(file.name)
+    return file.name
 # ---------------- UI ----------------
 st.markdown(
     "<h1 style='text-align:center;'>🤟 Hand2Voice</h1>"
+    "<p style='text-align:center;'>Real-Time Hand Gesture to Voice</p>",
     unsafe_allow_html=True
 )
 col1, col2 = st.columns(2)
+# -------- CAMERA --------
 with col1:
+    st.subheader("📷 Live Camera")
+    image = st.camera_input("Live gesture feed")
+# -------- OUTPUT --------
 with col2:
+    st.subheader("📝 Live Output")
+    if image:
+        img_bytes = image.getvalue()
+        np_img = np.frombuffer(img_bytes, np.uint8)
         frame = cv2.imdecode(np_img, cv2.IMREAD_COLOR)
+        gesture = recognize(frame)
+        st.session_state.last_gesture = gesture
         st.markdown(
             f"""
             <div style="
                 padding:20px;
+                background-color:#dcfce7;
                 border-radius:12px;
                 font-size:24px;
                 font-weight:bold;
                 text-align:center;">
+                🔊 {gesture}
             </div>
             """,
             unsafe_allow_html=True
         )
+        # Speak only if gesture changes
+        if gesture not in ["Unknown", "No Hand"] and gesture != st.session_state.last_spoken:
+            audio = speak(gesture)
+            st.audio(audio)
+            st.session_state.last_spoken = gesture
+        # Auto refresh (real-time effect)
+        time.sleep(0.5)
+        st.experimental_rerun()
     else:
+        st.info("Enable camera and show hand gestures")