Spaces:

arshtech
/

Hand2Voice

Runtime error

App Files Files Community

Hand2Voice / app.py

arshtech

Update app.py

bb6e324 verified 8 days ago

raw

history blame contribute delete

2.96 kB

	import streamlit as st
	import cv2
	import numpy as np
	import requests
	from gtts import gTTS
	import tempfile

	# ---------------- STREAMLIT CONFIG ----------------
	st.set_page_config(page_title="Hand2Voice", layout="wide")
	st.title("🤟 Hand2Voice")
	st.write("Hand Gesture to Voice Conversion")

	# ---------------- CONSTANTS ----------------
	GESTURE_URL = "https://raw.githubusercontent.com/imarshbir/Hand2Voice/main/gestures/gesture_rules.json"

	# ---------------- LOAD GESTURES ----------------
	@st.cache_data
	def load_gestures():
	return requests.get(GESTURE_URL).json()["gestures"]

	# ---------------- LAZY MEDIAPIPE ----------------
	@st.cache_resource
	def load_mediapipe():
	import mediapipe as mp
	mp_hands = mp.solutions.hands
	return mp_hands.Hands(
	static_image_mode=True,
	max_num_hands=1,
	min_detection_confidence=0.7
	)

	# ---------------- FINGER LOGIC ----------------
	def get_finger_states(hand_landmarks):
	finger_tips = [4, 8, 12, 16, 20]
	finger_bases = [2, 6, 10, 14, 18]

	states = []

	states.append(
	1 if hand_landmarks.landmark[4].x >
	hand_landmarks.landmark[3].x else 0
	)

	for tip, base in zip(finger_tips[1:], finger_bases[1:]):
	states.append(
	1 if hand_landmarks.landmark[tip].y <
	hand_landmarks.landmark[base].y else 0
	)

	return states

	# ---------------- MATCH GESTURE ----------------
	def match_gesture(states, rules):
	for name, info in rules.items():
	if states == info["pattern"]:
	return name
	return "Unknown Gesture"

	# ---------------- RECOGNITION ----------------
	def recognize_gesture(frame, hands, rules):
	rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	result = hands.process(rgb)

	if result.multi_hand_landmarks:
	for hand_landmarks in result.multi_hand_landmarks:
	states = get_finger_states(hand_landmarks)
	return match_gesture(states, rules)

	return "No Hand Detected"

	# ---------------- TEXT TO SPEECH ----------------
	def speak_text(text):
	tts = gTTS(text=text, lang="en")
	file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(file.name)
	return file.name

	# ---------------- UI ----------------
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("📷 Camera")
	image = st.camera_input("Capture hand gesture")

	with col2:
	st.subheader("📝 Output")

	if image:
	gestures = load_gestures()
	hands = load_mediapipe()

	img_bytes = image.getvalue()
	img_array = np.frombuffer(img_bytes, np.uint8)
	frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)

	gesture = recognize_gesture(frame, hands, gestures)

	st.success(f"🔊 {gesture}")

	if gesture not in ["Unknown Gesture", "No Hand Detected"]:
	audio = speak_text(gesture)
	st.audio(audio)
	else:
	st.info("Capture an image to start")