Spaces:
Runtime error
Runtime error
File size: 2,961 Bytes
7ecb8ab 55bbccc 7ecb8ab 50099c0 55bbccc bb6e324 55bbccc bb6e324 50099c0 55bbccc bb6e324 55bbccc bb6e324 7ecb8ab bb6e324 55bbccc bb6e324 d5df74d 55bbccc 50099c0 55bbccc bb6e324 55bbccc d5df74d 55bbccc d5df74d 55bbccc bb6e324 55bbccc 50099c0 55bbccc 50099c0 55bbccc bb6e324 55bbccc 7ecb8ab bb6e324 7ecb8ab 50099c0 55bbccc bb6e324 d5df74d 50099c0 55bbccc bb6e324 7ecb8ab bb6e324 7ecb8ab bb6e324 55bbccc bb6e324 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import streamlit as st
import cv2
import numpy as np
import requests
from gtts import gTTS
import tempfile
# ---------------- STREAMLIT CONFIG ----------------
st.set_page_config(page_title="Hand2Voice", layout="wide")
st.title("π€ Hand2Voice")
st.write("Hand Gesture to Voice Conversion")
# ---------------- CONSTANTS ----------------
GESTURE_URL = "https://raw.githubusercontent.com/imarshbir/Hand2Voice/main/gestures/gesture_rules.json"
# ---------------- LOAD GESTURES ----------------
@st.cache_data
def load_gestures():
return requests.get(GESTURE_URL).json()["gestures"]
# ---------------- LAZY MEDIAPIPE ----------------
@st.cache_resource
def load_mediapipe():
import mediapipe as mp
mp_hands = mp.solutions.hands
return mp_hands.Hands(
static_image_mode=True,
max_num_hands=1,
min_detection_confidence=0.7
)
# ---------------- FINGER LOGIC ----------------
def get_finger_states(hand_landmarks):
finger_tips = [4, 8, 12, 16, 20]
finger_bases = [2, 6, 10, 14, 18]
states = []
states.append(
1 if hand_landmarks.landmark[4].x >
hand_landmarks.landmark[3].x else 0
)
for tip, base in zip(finger_tips[1:], finger_bases[1:]):
states.append(
1 if hand_landmarks.landmark[tip].y <
hand_landmarks.landmark[base].y else 0
)
return states
# ---------------- MATCH GESTURE ----------------
def match_gesture(states, rules):
for name, info in rules.items():
if states == info["pattern"]:
return name
return "Unknown Gesture"
# ---------------- RECOGNITION ----------------
def recognize_gesture(frame, hands, rules):
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
result = hands.process(rgb)
if result.multi_hand_landmarks:
for hand_landmarks in result.multi_hand_landmarks:
states = get_finger_states(hand_landmarks)
return match_gesture(states, rules)
return "No Hand Detected"
# ---------------- TEXT TO SPEECH ----------------
def speak_text(text):
tts = gTTS(text=text, lang="en")
file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(file.name)
return file.name
# ---------------- UI ----------------
col1, col2 = st.columns(2)
with col1:
st.subheader("π· Camera")
image = st.camera_input("Capture hand gesture")
with col2:
st.subheader("π Output")
if image:
gestures = load_gestures()
hands = load_mediapipe()
img_bytes = image.getvalue()
img_array = np.frombuffer(img_bytes, np.uint8)
frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
gesture = recognize_gesture(frame, hands, gestures)
st.success(f"π {gesture}")
if gesture not in ["Unknown Gesture", "No Hand Detected"]:
audio = speak_text(gesture)
st.audio(audio)
else:
st.info("Capture an image to start")
|