Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import cv2 | |
| import numpy as np | |
| import requests | |
| from gtts import gTTS | |
| import tempfile | |
| # ---------------- STREAMLIT CONFIG ---------------- | |
| st.set_page_config(page_title="Hand2Voice", layout="wide") | |
| st.title("π€ Hand2Voice") | |
| st.write("Hand Gesture to Voice Conversion") | |
| # ---------------- CONSTANTS ---------------- | |
| GESTURE_URL = "https://raw.githubusercontent.com/imarshbir/Hand2Voice/main/gestures/gesture_rules.json" | |
| # ---------------- LOAD GESTURES ---------------- | |
| def load_gestures(): | |
| return requests.get(GESTURE_URL).json()["gestures"] | |
| # ---------------- LAZY MEDIAPIPE ---------------- | |
| def load_mediapipe(): | |
| import mediapipe as mp | |
| mp_hands = mp.solutions.hands | |
| return mp_hands.Hands( | |
| static_image_mode=True, | |
| max_num_hands=1, | |
| min_detection_confidence=0.7 | |
| ) | |
| # ---------------- FINGER LOGIC ---------------- | |
| def get_finger_states(hand_landmarks): | |
| finger_tips = [4, 8, 12, 16, 20] | |
| finger_bases = [2, 6, 10, 14, 18] | |
| states = [] | |
| states.append( | |
| 1 if hand_landmarks.landmark[4].x > | |
| hand_landmarks.landmark[3].x else 0 | |
| ) | |
| for tip, base in zip(finger_tips[1:], finger_bases[1:]): | |
| states.append( | |
| 1 if hand_landmarks.landmark[tip].y < | |
| hand_landmarks.landmark[base].y else 0 | |
| ) | |
| return states | |
| # ---------------- MATCH GESTURE ---------------- | |
| def match_gesture(states, rules): | |
| for name, info in rules.items(): | |
| if states == info["pattern"]: | |
| return name | |
| return "Unknown Gesture" | |
| # ---------------- RECOGNITION ---------------- | |
| def recognize_gesture(frame, hands, rules): | |
| rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| result = hands.process(rgb) | |
| if result.multi_hand_landmarks: | |
| for hand_landmarks in result.multi_hand_landmarks: | |
| states = get_finger_states(hand_landmarks) | |
| return match_gesture(states, rules) | |
| return "No Hand Detected" | |
| # ---------------- TEXT TO SPEECH ---------------- | |
| def speak_text(text): | |
| tts = gTTS(text=text, lang="en") | |
| file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts.save(file.name) | |
| return file.name | |
| # ---------------- UI ---------------- | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("π· Camera") | |
| image = st.camera_input("Capture hand gesture") | |
| with col2: | |
| st.subheader("π Output") | |
| if image: | |
| gestures = load_gestures() | |
| hands = load_mediapipe() | |
| img_bytes = image.getvalue() | |
| img_array = np.frombuffer(img_bytes, np.uint8) | |
| frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR) | |
| gesture = recognize_gesture(frame, hands, gestures) | |
| st.success(f"π {gesture}") | |
| if gesture not in ["Unknown Gesture", "No Hand Detected"]: | |
| audio = speak_text(gesture) | |
| st.audio(audio) | |
| else: | |
| st.info("Capture an image to start") | |