Spaces:

arshtech
/

Hand2Voice

Runtime error

App Files Files Community

arshtech commited on 8 days ago

Commit

bb6e324

verified ·

1 Parent(s): 50099c0

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -61

app.py CHANGED Viewed

@@ -1,47 +1,46 @@
 import streamlit as st
 import cv2
 import numpy as np
-import mediapipe as mp
 import requests
 from gtts import gTTS
 import tempfile
 # ---------------- STREAMLIT CONFIG ----------------
 st.set_page_config(page_title="Hand2Voice", layout="wide")
-st.empty()  # ensures immediate render (HF fix)
-# ---------------- GESTURE DATA ----------------
 GESTURE_URL = "https://raw.githubusercontent.com/imarshbir/Hand2Voice/main/gestures/gesture_rules.json"
 @st.cache_data
 def load_gestures():
-    response = requests.get(GESTURE_URL)
-    return response.json()["gestures"]
-gesture_rules = load_gestures()
-# ---------------- MEDIAPIPE SETUP ----------------
-mp_hands = mp.solutions.hands
-hands = mp_hands.Hands(
-    static_image_mode=True,     # IMPORTANT for snapshots
-    max_num_hands=1,
-    min_detection_confidence=0.7
-)
-# ---------------- FINGER STATE LOGIC ----------------
 def get_finger_states(hand_landmarks):
     finger_tips = [4, 8, 12, 16, 20]
     finger_bases = [2, 6, 10, 14, 18]
     states = []
-    # Thumb logic
     states.append(
         1 if hand_landmarks.landmark[4].x >
              hand_landmarks.landmark[3].x else 0
     )
-    # Other fingers
     for tip, base in zip(finger_tips[1:], finger_bases[1:]):
         states.append(
             1 if hand_landmarks.landmark[tip].y <
@@ -50,78 +49,56 @@ def get_finger_states(hand_landmarks):
     return states
-# ---------------- GESTURE MATCHING ----------------
-def match_gesture(states):
-    for name, info in gesture_rules.items():
         if states == info["pattern"]:
             return name
     return "Unknown Gesture"
-# ---------------- GESTURE RECOGNITION ----------------
-def recognize_gesture(frame):
     rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     result = hands.process(rgb)
     if result.multi_hand_landmarks:
         for hand_landmarks in result.multi_hand_landmarks:
             states = get_finger_states(hand_landmarks)
-            return match_gesture(states)
     return "No Hand Detected"
 # ---------------- TEXT TO SPEECH ----------------
 def speak_text(text):
     tts = gTTS(text=text, lang="en")
-    audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
-    tts.save(audio_file.name)
-    return audio_file.name
 # ---------------- UI ----------------
-st.markdown(
-    "<h1 style='text-align:center;'>🤟 Hand2Voice</h1>"
-    "<p style='text-align:center;'>Hand Gesture to Voice Conversion</p>",
-    unsafe_allow_html=True
-)
-st.divider()
 col1, col2 = st.columns(2)
-# -------- CAMERA --------
 with col1:
-    st.subheader("📷 Camera Input")
-    image = st.camera_input("Enable camera and capture gesture")
-# -------- OUTPUT --------
 with col2:
     st.subheader("📝 Output")
-    if image is not None:
         img_bytes = image.getvalue()
         img_array = np.frombuffer(img_bytes, np.uint8)
         frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-        gesture_text = recognize_gesture(frame)
-        st.markdown(
-            f"""
-            <div style="
-                padding:20px;
-                background-color:#e0f2fe;
-                border-radius:12px;
-                font-size:24px;
-                font-weight:bold;
-                text-align:center;">
-                🔊 {gesture_text}
-            </div>
-            """,
-            unsafe_allow_html=True
-        )
-        # Speak only meaningful gestures
-        if gesture_text not in ["Unknown Gesture", "No Hand Detected"]:
-            audio_path = speak_text(gesture_text)
-            st.audio(audio_path)
     else:
-        st.info("Capture an image to recognize gesture")

 import streamlit as st
 import cv2
 import numpy as np
 import requests
 from gtts import gTTS
 import tempfile
 # ---------------- STREAMLIT CONFIG ----------------
 st.set_page_config(page_title="Hand2Voice", layout="wide")
+st.title("🤟 Hand2Voice")
+st.write("Hand Gesture to Voice Conversion")
+# ---------------- CONSTANTS ----------------
 GESTURE_URL = "https://raw.githubusercontent.com/imarshbir/Hand2Voice/main/gestures/gesture_rules.json"
+# ---------------- LOAD GESTURES ----------------
 @st.cache_data
 def load_gestures():
+    return requests.get(GESTURE_URL).json()["gestures"]
+# ---------------- LAZY MEDIAPIPE ----------------
+@st.cache_resource
+def load_mediapipe():
+    import mediapipe as mp
+    mp_hands = mp.solutions.hands
+    return mp_hands.Hands(
+        static_image_mode=True,
+        max_num_hands=1,
+        min_detection_confidence=0.7
+    )
+# ---------------- FINGER LOGIC ----------------
 def get_finger_states(hand_landmarks):
     finger_tips = [4, 8, 12, 16, 20]
     finger_bases = [2, 6, 10, 14, 18]
     states = []
     states.append(
         1 if hand_landmarks.landmark[4].x >
              hand_landmarks.landmark[3].x else 0
     )
     for tip, base in zip(finger_tips[1:], finger_bases[1:]):
         states.append(
             1 if hand_landmarks.landmark[tip].y <
     return states
+# ---------------- MATCH GESTURE ----------------
+def match_gesture(states, rules):
+    for name, info in rules.items():
         if states == info["pattern"]:
             return name
     return "Unknown Gesture"
+# ---------------- RECOGNITION ----------------
+def recognize_gesture(frame, hands, rules):
     rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     result = hands.process(rgb)
     if result.multi_hand_landmarks:
         for hand_landmarks in result.multi_hand_landmarks:
             states = get_finger_states(hand_landmarks)
+            return match_gesture(states, rules)
     return "No Hand Detected"
 # ---------------- TEXT TO SPEECH ----------------
 def speak_text(text):
     tts = gTTS(text=text, lang="en")
+    file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+    tts.save(file.name)
+    return file.name
 # ---------------- UI ----------------
 col1, col2 = st.columns(2)
 with col1:
+    st.subheader("📷 Camera")
+    image = st.camera_input("Capture hand gesture")
 with col2:
     st.subheader("📝 Output")
+    if image:
+        gestures = load_gestures()
+        hands = load_mediapipe()
         img_bytes = image.getvalue()
         img_array = np.frombuffer(img_bytes, np.uint8)
         frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
+        gesture = recognize_gesture(frame, hands, gestures)
+        st.success(f"🔊 {gesture}")
+        if gesture not in ["Unknown Gesture", "No Hand Detected"]:
+            audio = speak_text(gesture)
+            st.audio(audio)
     else:
+        st.info("Capture an image to start")