Spaces:
Runtime error
Runtime error
| import os | |
| os.environ["MEDIAPIPE_DISABLE_GPU"] = "1" | |
| import gradio as gr | |
| import cv2 | |
| import numpy as np | |
| import json | |
| import time | |
| from gtts import gTTS | |
| import tempfile | |
| import mediapipe as mp | |
| from mediapipe import solutions | |
| # ---------------- LOAD GESTURES ---------------- | |
| with open("gestures_rules.json", "r") as f: | |
| gesture_data = json.load(f)["gestures"] | |
| # ---------------- MEDIAPIPE (CPU ONLY) ---------------- | |
| mp_hands = solutions.hands | |
| hands = mp_hands.Hands( | |
| max_num_hands=1, | |
| min_detection_confidence=0.7, | |
| min_tracking_confidence=0.7 | |
| ) | |
| # ---------------- UTIL ---------------- | |
| def get_finger_states(hand_landmarks): | |
| tips = [4, 8, 12, 16, 20] | |
| pips = [2, 6, 10, 14, 18] | |
| return [ | |
| 1 if hand_landmarks.landmark[t].y < | |
| hand_landmarks.landmark[p].y else 0 | |
| for t, p in zip(tips, pips) | |
| ] | |
| def detect_gesture(states): | |
| for name, rule in gesture_data.items(): | |
| if rule["pattern"] == states: | |
| return name | |
| return None | |
| def speak_text(text): | |
| tts = gTTS(text=text) | |
| f = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts.save(f.name) | |
| return f.name | |
| # ---------------- FRAME PROCESS ---------------- | |
| def process_frame(frame, sentence, last_char, last_time): | |
| if frame is None: | |
| return frame, sentence, last_char, last_time | |
| frame = cv2.flip(frame, 1) | |
| rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| result = hands.process(rgb) | |
| if result.multi_hand_landmarks: | |
| hand = result.multi_hand_landmarks[0] | |
| states = get_finger_states(hand) | |
| char = detect_gesture(states) | |
| now = time.time() | |
| if char and char != last_char and now - last_time > 1: | |
| sentence += char | |
| last_char = char | |
| last_time = now | |
| return frame, sentence, last_char, last_time | |
| def clear_text(): | |
| return "", "", 0.0 | |
| def speak(sentence): | |
| return speak_text(sentence) if sentence else None | |
| # ---------------- CSS ---------------- | |
| with open("styles.css") as f: | |
| custom_css = f.read() | |
| # ---------------- UI ---------------- | |
| with gr.Blocks(title="Hand2Voice") as demo: | |
| gr.Markdown("## π€ Hand2Voice β Gesture to Speech") | |
| with gr.Row(): | |
| with gr.Column(): | |
| webcam = gr.Image( | |
| label="Webcam", | |
| type="numpy", | |
| live=True | |
| ) | |
| with gr.Column(): | |
| output = gr.HTML("<h3>Waiting for gestures...</h3>") | |
| speak_btn = gr.Button("π Speak") | |
| clear_btn = gr.Button("π§Ή Clear") | |
| audio = gr.Audio(autoplay=True) | |
| sentence = gr.State("") | |
| last_char = gr.State("") | |
| last_time = gr.State(0.0) | |
| webcam.change( | |
| process_frame, | |
| inputs=[webcam, sentence, last_char, last_time], | |
| outputs=[webcam, sentence, last_char, last_time] | |
| ).then( | |
| lambda s: f"<h2>{s}</h2>", | |
| sentence, | |
| output | |
| ) | |
| clear_btn.click(clear_text, outputs=[sentence, last_char, last_time]) | |
| speak_btn.click(speak, sentence, audio) | |
| demo.launch(css=custom_css) | |