# HF rebuild trigger import joblib try: import keyboard except ImportError: keyboard = None try: import pyautogui except ImportError: pyautogui = None from tensorflow.keras.models import load_model # import keyboard from collections import deque import time import numpy as np import mediapipe as mp import cv2 import warnings warnings.filterwarnings( "ignore", message="SymbolDatabase.GetPrototype() is deprecated.*", category=UserWarning, module="google.protobuf.symbol_database" ) class GestureControl: def __init__(self, model_path='asl_model.h5', encoder_path='label_encoder.pkl'): # Load ASL model and encoder self.model = load_model(model_path, compile=False) self.le = joblib.load(encoder_path) # Setup MediaPipe Hands self.mp_hands = mp.solutions.hands self.hands = self.mp_hands.Hands( min_detection_confidence=0.7, min_tracking_confidence=0.7 ) # Screen metrics for mouse self.screen_w, self.screen_h = pyautogui.size() # Mode: False = ASL, True = gesture self.gesture_mode = False self.switch_cooldown = 5.0 self.last_switch_time = 0 self.queue = deque(maxlen=4) # Mouse smoothing self.prev_x = 0 self.prev_y = 0 self.smooth = 0.05 self.deadzone = 0.05 self.last_right = 0 self.right_cooldown = 1.0 # ASL cooldown self.asl_cooldown = 2.0 self.last_asl = 0 self.last_label = None self.typed_text = "" # Key mapping self.map = {'space': 'space', 'del': 'backspace', 'delete': 'backspace'} self.running = False self.cam = None def set_webcam(self, cam): if self.cam and self.cam.isOpened(): self.cam.release() self.cam = cam def start(self): if not (self.cam and self.cam.isOpened()): raise RuntimeError("Call set_webcam() first.") self.typed_text = "" self.running = True self._loop() def stop(self): self.running = False if self.cam and self.cam.isOpened(): self.cam.release() self.cam = None def get_typed_text(self): return self.typed_text def _normalize(self, flat): w = flat[:3] pts = [[flat[i]-w[0], flat[i+1]-w[1], flat[i+2]-w[2]] for i in range(0, len(flat), 3)] arr = np.array(pts) m = np.max(np.linalg.norm(arr-arr[0], axis=1)) return (arr/m).flatten() if m > 0 else arr.flatten() def _finger_states(self, lm): H = self.mp_hands st = [lm[H.HandLandmark.THUMB_TIP].x > lm[H.HandLandmark.THUMB_IP].x] for tip, pip in [(H.HandLandmark.INDEX_FINGER_TIP, H.HandLandmark.INDEX_FINGER_PIP), (H.HandLandmark.MIDDLE_FINGER_TIP, H.HandLandmark.MIDDLE_FINGER_PIP), (H.HandLandmark.RING_FINGER_TIP, H.HandLandmark.RING_FINGER_PIP), (H.HandLandmark.PINKY_TIP, H.HandLandmark.PINKY_PIP)]: st.append(lm[tip].y < lm[pip].y) return st def _detect_simple(self, lm): st = self._finger_states(lm) H = self.mp_hands # Thumb-ring toggle d = np.linalg.norm([ lm[H.HandLandmark.THUMB_TIP].x - lm[H.HandLandmark.RING_FINGER_TIP].x, lm[H.HandLandmark.THUMB_TIP].y - lm[H.HandLandmark.RING_FINGER_TIP].y ]) if d < 0.05 and st[1] and st[2] and not st[3] and not st[4]: return 'thumb_ring' # Pinch left click dp = np.linalg.norm([ lm[H.HandLandmark.THUMB_TIP].x - lm[H.HandLandmark.INDEX_FINGER_TIP].x, lm[H.HandLandmark.THUMB_TIP].y - lm[H.HandLandmark.INDEX_FINGER_TIP].y ]) if dp < 0.05 and st[1]: return 'Pinch' # Right click dr = np.linalg.norm([ lm[H.HandLandmark.INDEX_FINGER_TIP].x - lm[H.HandLandmark.MIDDLE_FINGER_TIP].x, lm[H.HandLandmark.INDEX_FINGER_TIP].y - lm[H.HandLandmark.MIDDLE_FINGER_TIP].y ]) if dr < 0.06 and st[1] and st[2] and not st[3] and not st[4]: return 'Right Click' # Volume if st == [True, False, False, False, False]: return 'Volume Up' if st == [False, False, False, False, True]: return 'Volume Down' return None def _loop(self): import pyautogui import keyboard while self.running and self.cam.isOpened(): ret, frame = self.cam.read() if not ret: break frame = cv2.flip(frame, 1) rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) res = self.hands.process(rgb) now = time.time() simple = None if res.multi_hand_landmarks: hand = res.multi_hand_landmarks[0] lm = hand.landmark simple = self._detect_simple(lm) self.queue.append(simple) # Mode toggle via thumb-ring if now - self.last_switch_time > self.switch_cooldown and self.queue.count('thumb_ring') >= 3: self.gesture_mode = not self.gesture_mode print( f"Mode now: {'Gesture' if self.gesture_mode else 'ASL'}") self.last_switch_time = now self.queue.clear() if self.gesture_mode: # Mouse behavior every frame tip = lm[self.mp_hands.HandLandmark.INDEX_FINGER_TIP] x = int(tip.x*self.screen_w) y = int(tip.y*self.screen_h) dx = abs(tip.x - (self.prev_x/self.screen_w)) dy = abs(tip.y - (self.prev_y/self.screen_h)) if dx > self.deadzone or dy > self.deadzone: nx = int(self.prev_x + (x-self.prev_x)*self.smooth) ny = int(self.prev_y + (y-self.prev_y)*self.smooth) pyautogui.moveTo(nx, ny) self.prev_x, self.prev_y = nx, ny if simple == 'Pinch': pyautogui.click() elif simple == 'Right Click' and now-self.last_right > self.right_cooldown: pyautogui.click(button='right') self.last_right = now elif simple == 'Volume Up': pyautogui.press('volumeup') elif simple == 'Volume Down': pyautogui.press('volumedown') else: # ASL only when cooldown passed flat = [c for p in lm for c in (p.x, p.y, p.z)] if now - self.last_asl > self.asl_cooldown: data = np.array([self._normalize(flat)]) pred = self.model.predict(data, verbose=0)[0] idx = np.argmax(pred) label = self.le.inverse_transform([idx])[0] print(f"ASL: {label}") if label != self.last_label: if label in self.map: pyautogui.press(self.map[label]) elif len(label) == 1: pyautogui.typewrite(label) self.typed_text += label self.last_label = label self.last_asl = now # ESC to exit if keyboard.is_pressed('esc'): break self.running = False if self.cam and self.cam.isOpened(): self.cam.release() print("GestureControl stopped.")