Spaces:
Sleeping
Sleeping
| # HF rebuild trigger | |
| import joblib | |
| try: | |
| import keyboard | |
| except ImportError: | |
| keyboard = None | |
| try: | |
| import pyautogui | |
| except ImportError: | |
| pyautogui = None | |
| from tensorflow.keras.models import load_model | |
| # import keyboard | |
| from collections import deque | |
| import time | |
| import numpy as np | |
| import mediapipe as mp | |
| import cv2 | |
| import warnings | |
| warnings.filterwarnings( | |
| "ignore", | |
| message="SymbolDatabase.GetPrototype() is deprecated.*", | |
| category=UserWarning, | |
| module="google.protobuf.symbol_database" | |
| ) | |
| class GestureControl: | |
| def __init__(self, model_path='asl_model.h5', encoder_path='label_encoder.pkl'): | |
| # Load ASL model and encoder | |
| self.model = load_model(model_path, compile=False) | |
| self.le = joblib.load(encoder_path) | |
| # Setup MediaPipe Hands | |
| self.mp_hands = mp.solutions.hands | |
| self.hands = self.mp_hands.Hands( | |
| min_detection_confidence=0.7, | |
| min_tracking_confidence=0.7 | |
| ) | |
| # Screen metrics for mouse | |
| self.screen_w, self.screen_h = pyautogui.size() | |
| # Mode: False = ASL, True = gesture | |
| self.gesture_mode = False | |
| self.switch_cooldown = 5.0 | |
| self.last_switch_time = 0 | |
| self.queue = deque(maxlen=4) | |
| # Mouse smoothing | |
| self.prev_x = 0 | |
| self.prev_y = 0 | |
| self.smooth = 0.05 | |
| self.deadzone = 0.05 | |
| self.last_right = 0 | |
| self.right_cooldown = 1.0 | |
| # ASL cooldown | |
| self.asl_cooldown = 2.0 | |
| self.last_asl = 0 | |
| self.last_label = None | |
| self.typed_text = "" | |
| # Key mapping | |
| self.map = {'space': 'space', | |
| 'del': 'backspace', 'delete': 'backspace'} | |
| self.running = False | |
| self.cam = None | |
| def set_webcam(self, cam): | |
| if self.cam and self.cam.isOpened(): | |
| self.cam.release() | |
| self.cam = cam | |
| def start(self): | |
| if not (self.cam and self.cam.isOpened()): | |
| raise RuntimeError("Call set_webcam() first.") | |
| self.typed_text = "" | |
| self.running = True | |
| self._loop() | |
| def stop(self): | |
| self.running = False | |
| if self.cam and self.cam.isOpened(): | |
| self.cam.release() | |
| self.cam = None | |
| def get_typed_text(self): | |
| return self.typed_text | |
| def _normalize(self, flat): | |
| w = flat[:3] | |
| pts = [[flat[i]-w[0], flat[i+1]-w[1], flat[i+2]-w[2]] | |
| for i in range(0, len(flat), 3)] | |
| arr = np.array(pts) | |
| m = np.max(np.linalg.norm(arr-arr[0], axis=1)) | |
| return (arr/m).flatten() if m > 0 else arr.flatten() | |
| def _finger_states(self, lm): | |
| H = self.mp_hands | |
| st = [lm[H.HandLandmark.THUMB_TIP].x > lm[H.HandLandmark.THUMB_IP].x] | |
| for tip, pip in [(H.HandLandmark.INDEX_FINGER_TIP, H.HandLandmark.INDEX_FINGER_PIP), | |
| (H.HandLandmark.MIDDLE_FINGER_TIP, | |
| H.HandLandmark.MIDDLE_FINGER_PIP), | |
| (H.HandLandmark.RING_FINGER_TIP, | |
| H.HandLandmark.RING_FINGER_PIP), | |
| (H.HandLandmark.PINKY_TIP, H.HandLandmark.PINKY_PIP)]: | |
| st.append(lm[tip].y < lm[pip].y) | |
| return st | |
| def _detect_simple(self, lm): | |
| st = self._finger_states(lm) | |
| H = self.mp_hands | |
| # Thumb-ring toggle | |
| d = np.linalg.norm([ | |
| lm[H.HandLandmark.THUMB_TIP].x - | |
| lm[H.HandLandmark.RING_FINGER_TIP].x, | |
| lm[H.HandLandmark.THUMB_TIP].y - | |
| lm[H.HandLandmark.RING_FINGER_TIP].y | |
| ]) | |
| if d < 0.05 and st[1] and st[2] and not st[3] and not st[4]: | |
| return 'thumb_ring' | |
| # Pinch left click | |
| dp = np.linalg.norm([ | |
| lm[H.HandLandmark.THUMB_TIP].x - | |
| lm[H.HandLandmark.INDEX_FINGER_TIP].x, | |
| lm[H.HandLandmark.THUMB_TIP].y - | |
| lm[H.HandLandmark.INDEX_FINGER_TIP].y | |
| ]) | |
| if dp < 0.05 and st[1]: | |
| return 'Pinch' | |
| # Right click | |
| dr = np.linalg.norm([ | |
| lm[H.HandLandmark.INDEX_FINGER_TIP].x - | |
| lm[H.HandLandmark.MIDDLE_FINGER_TIP].x, | |
| lm[H.HandLandmark.INDEX_FINGER_TIP].y - | |
| lm[H.HandLandmark.MIDDLE_FINGER_TIP].y | |
| ]) | |
| if dr < 0.06 and st[1] and st[2] and not st[3] and not st[4]: | |
| return 'Right Click' | |
| # Volume | |
| if st == [True, False, False, False, False]: | |
| return 'Volume Up' | |
| if st == [False, False, False, False, True]: | |
| return 'Volume Down' | |
| return None | |
| def _loop(self): | |
| import pyautogui | |
| import keyboard | |
| while self.running and self.cam.isOpened(): | |
| ret, frame = self.cam.read() | |
| if not ret: | |
| break | |
| frame = cv2.flip(frame, 1) | |
| rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| res = self.hands.process(rgb) | |
| now = time.time() | |
| simple = None | |
| if res.multi_hand_landmarks: | |
| hand = res.multi_hand_landmarks[0] | |
| lm = hand.landmark | |
| simple = self._detect_simple(lm) | |
| self.queue.append(simple) | |
| # Mode toggle via thumb-ring | |
| if now - self.last_switch_time > self.switch_cooldown and self.queue.count('thumb_ring') >= 3: | |
| self.gesture_mode = not self.gesture_mode | |
| print( | |
| f"Mode now: {'Gesture' if self.gesture_mode else 'ASL'}") | |
| self.last_switch_time = now | |
| self.queue.clear() | |
| if self.gesture_mode: | |
| # Mouse behavior every frame | |
| tip = lm[self.mp_hands.HandLandmark.INDEX_FINGER_TIP] | |
| x = int(tip.x*self.screen_w) | |
| y = int(tip.y*self.screen_h) | |
| dx = abs(tip.x - (self.prev_x/self.screen_w)) | |
| dy = abs(tip.y - (self.prev_y/self.screen_h)) | |
| if dx > self.deadzone or dy > self.deadzone: | |
| nx = int(self.prev_x + (x-self.prev_x)*self.smooth) | |
| ny = int(self.prev_y + (y-self.prev_y)*self.smooth) | |
| pyautogui.moveTo(nx, ny) | |
| self.prev_x, self.prev_y = nx, ny | |
| if simple == 'Pinch': | |
| pyautogui.click() | |
| elif simple == 'Right Click' and now-self.last_right > self.right_cooldown: | |
| pyautogui.click(button='right') | |
| self.last_right = now | |
| elif simple == 'Volume Up': | |
| pyautogui.press('volumeup') | |
| elif simple == 'Volume Down': | |
| pyautogui.press('volumedown') | |
| else: | |
| # ASL only when cooldown passed | |
| flat = [c for p in lm for c in (p.x, p.y, p.z)] | |
| if now - self.last_asl > self.asl_cooldown: | |
| data = np.array([self._normalize(flat)]) | |
| pred = self.model.predict(data, verbose=0)[0] | |
| idx = np.argmax(pred) | |
| label = self.le.inverse_transform([idx])[0] | |
| print(f"ASL: {label}") | |
| if label != self.last_label: | |
| if label in self.map: | |
| pyautogui.press(self.map[label]) | |
| elif len(label) == 1: | |
| pyautogui.typewrite(label) | |
| self.typed_text += label | |
| self.last_label = label | |
| self.last_asl = now | |
| # ESC to exit | |
| if keyboard.is_pressed('esc'): | |
| break | |
| self.running = False | |
| if self.cam and self.cam.isOpened(): | |
| self.cam.release() | |
| print("GestureControl stopped.") | |