Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import cv2 | |
| import numpy as np | |
| import mediapipe as mp | |
| import requests | |
| from gtts import gTTS | |
| import tempfile | |
| import time | |
| # ---------------- CONFIG ---------------- | |
| st.set_page_config(page_title="Hand2Voice", layout="wide") | |
| GESTURE_URL = "https://raw.githubusercontent.com/YOUR_USERNAME/Hand2Voice/main/gestures/gesture_rules.json" | |
| # ---------------- SESSION STATE ---------------- | |
| if "last_gesture" not in st.session_state: | |
| st.session_state.last_gesture = "" | |
| if "last_spoken" not in st.session_state: | |
| st.session_state.last_spoken = "" | |
| # ---------------- LOAD GESTURES ---------------- | |
| def load_gestures(): | |
| return requests.get(GESTURE_URL).json()["gestures"] | |
| gesture_rules = load_gestures() | |
| # ---------------- MEDIAPIPE ---------------- | |
| mp_hands = mp.solutions.hands | |
| hands = mp_hands.Hands( | |
| static_image_mode=True, | |
| max_num_hands=1, | |
| min_detection_confidence=0.7 | |
| ) | |
| # ---------------- FINGER LOGIC ---------------- | |
| def get_finger_states(hand_landmarks): | |
| finger_tips = [4, 8, 12, 16, 20] | |
| finger_bases = [2, 6, 10, 14, 18] | |
| states = [] | |
| # Thumb | |
| states.append( | |
| 1 if hand_landmarks.landmark[4].x > | |
| hand_landmarks.landmark[3].x else 0 | |
| ) | |
| # Other fingers | |
| for tip, base in zip(finger_tips[1:], finger_bases[1:]): | |
| states.append( | |
| 1 if hand_landmarks.landmark[tip].y < | |
| hand_landmarks.landmark[base].y else 0 | |
| ) | |
| return states | |
| # ---------------- MATCH ---------------- | |
| def match_gesture(states): | |
| for name, info in gesture_rules.items(): | |
| if states == info["pattern"]: | |
| return name | |
| return "Unknown" | |
| # ---------------- RECOGNIZER ---------------- | |
| def recognize(frame): | |
| rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| result = hands.process(rgb) | |
| if result.multi_hand_landmarks: | |
| for hand_landmarks in result.multi_hand_landmarks: | |
| states = get_finger_states(hand_landmarks) | |
| return match_gesture(states) | |
| return "No Hand" | |
| # ---------------- TEXT TO SPEECH ---------------- | |
| def speak(text): | |
| tts = gTTS(text=text, lang="en") | |
| file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts.save(file.name) | |
| return file.name | |
| # ---------------- UI ---------------- | |
| st.markdown( | |
| "<h1 style='text-align:center;'>π€ Hand2Voice</h1>" | |
| "<p style='text-align:center;'>Real-Time Hand Gesture to Voice</p>", | |
| unsafe_allow_html=True | |
| ) | |
| st.divider() | |
| col1, col2 = st.columns(2) | |
| # -------- CAMERA -------- | |
| with col1: | |
| st.subheader("π· Live Camera") | |
| image = st.camera_input("Live gesture feed") | |
| # -------- OUTPUT -------- | |
| with col2: | |
| st.subheader("π Live Output") | |
| if image: | |
| img_bytes = image.getvalue() | |
| np_img = np.frombuffer(img_bytes, np.uint8) | |
| frame = cv2.imdecode(np_img, cv2.IMREAD_COLOR) | |
| gesture = recognize(frame) | |
| st.session_state.last_gesture = gesture | |
| st.markdown( | |
| f""" | |
| <div style=" | |
| padding:20px; | |
| background-color:#dcfce7; | |
| border-radius:12px; | |
| font-size:24px; | |
| font-weight:bold; | |
| text-align:center;"> | |
| π {gesture} | |
| </div> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # Speak only if gesture changes | |
| if gesture not in ["Unknown", "No Hand"] and gesture != st.session_state.last_spoken: | |
| audio = speak(gesture) | |
| st.audio(audio) | |
| st.session_state.last_spoken = gesture | |
| # Auto refresh (real-time effect) | |
| time.sleep(0.5) | |
| st.experimental_rerun() | |
| else: | |
| st.info("Enable camera and show hand gestures") | |