import os os.environ["MEDIAPIPE_DISABLE_GPU"] = "1" import gradio as gr import cv2 import numpy as np import json import time from gtts import gTTS import tempfile import mediapipe as mp from mediapipe import solutions # ---------------- LOAD GESTURES ---------------- with open("gestures_rules.json", "r") as f: gesture_data = json.load(f)["gestures"] # ---------------- MEDIAPIPE (CPU ONLY) ---------------- mp_hands = solutions.hands hands = mp_hands.Hands( max_num_hands=1, min_detection_confidence=0.7, min_tracking_confidence=0.7 ) # ---------------- UTIL ---------------- def get_finger_states(hand_landmarks): tips = [4, 8, 12, 16, 20] pips = [2, 6, 10, 14, 18] return [ 1 if hand_landmarks.landmark[t].y < hand_landmarks.landmark[p].y else 0 for t, p in zip(tips, pips) ] def detect_gesture(states): for name, rule in gesture_data.items(): if rule["pattern"] == states: return name return None def speak_text(text): tts = gTTS(text=text) f = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(f.name) return f.name # ---------------- FRAME PROCESS ---------------- def process_frame(frame, sentence, last_char, last_time): if frame is None: return frame, sentence, last_char, last_time frame = cv2.flip(frame, 1) rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) result = hands.process(rgb) if result.multi_hand_landmarks: hand = result.multi_hand_landmarks[0] states = get_finger_states(hand) char = detect_gesture(states) now = time.time() if char and char != last_char and now - last_time > 1: sentence += char last_char = char last_time = now return frame, sentence, last_char, last_time def clear_text(): return "", "", 0.0 def speak(sentence): return speak_text(sentence) if sentence else None # ---------------- CSS ---------------- with open("styles.css") as f: custom_css = f.read() # ---------------- UI ---------------- with gr.Blocks(title="Hand2Voice") as demo: gr.Markdown("## ๐ค Hand2Voice โ Gesture to Speech") with gr.Row(): with gr.Column(): webcam = gr.Image( label="Webcam", type="numpy", live=True ) with gr.Column(): output = gr.HTML("