Spaces:
Runtime error
Runtime error
| # app.py | |
| import gradio as gr | |
| import mediapipe as mp | |
| import numpy as np | |
| import joblib | |
| import tensorflow as tf | |
| from collections import deque | |
| import cv2 | |
| SEQ_LEN = 30 | |
| MODEL_PATH = "gesture_lstm.h5" | |
| LABELS_PATH = "labels.joblib" | |
| mp_hands = mp.solutions.hands | |
| # load model and labels | |
| model = tf.keras.models.load_model(MODEL_PATH) | |
| le = joblib.load(LABELS_PATH) | |
| # buffer to hold sequence of vectors (global, per app instance) | |
| buffer = deque(maxlen=SEQ_LEN) | |
| def extract_landmarks_from_image(img): | |
| # img is in RGB (gradio webcam returns RGB) | |
| with mp_hands.Hands(static_image_mode=False, | |
| max_num_hands=1, | |
| min_detection_confidence=0.5, | |
| min_tracking_confidence=0.5) as hands: | |
| res = hands.process(img) | |
| if res.multi_hand_landmarks: | |
| lm = res.multi_hand_landmarks[0] | |
| vec = [] | |
| for p in lm.landmark: | |
| vec.extend([p.x, p.y, p.z]) | |
| return np.array(vec, dtype=np.float32), res | |
| else: | |
| return np.zeros(21*3, dtype=np.float32), res | |
| # We will keep a persistent mediapipe Hands object across calls for speed: | |
| mp_hands_persistent = mp.solutions.hands.Hands(static_image_mode=False, | |
| max_num_hands=1, | |
| min_detection_confidence=0.5, | |
| min_tracking_confidence=0.5) | |
| def predict_frame(frame): | |
| # frame: numpy array RGB from gradio webcam | |
| # returns image with overlay and predicted label + confidence | |
| global buffer, mp_hands_persistent | |
| image = frame.copy() | |
| # MediaPipe expects RGB β gradio already gives RGB | |
| res = mp_hands_persistent.process(image) | |
| if res.multi_hand_landmarks: | |
| lm = res.multi_hand_landmarks[0] | |
| vec = [] | |
| for p in lm.landmark: | |
| vec.extend([p.x, p.y, p.z]) | |
| vec = np.array(vec, dtype=np.float32) | |
| # draw landmarks on image (convert to BGR for cv2 drawing) | |
| img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
| mp.solutions.drawing_utils.draw_landmarks(img_bgr, lm, mp_hands.HAND_CONNECTIONS) | |
| image = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) | |
| else: | |
| vec = np.zeros(21*3, dtype=np.float32) | |
| buffer.append(vec) | |
| label_text = "No prediction (buffering...)" | |
| confidence = 0.0 | |
| if len(buffer) == SEQ_LEN: | |
| seq = np.stack(buffer, axis=0) # (seq_len, features) | |
| # normalize sample | |
| mean = seq.mean(axis=0) | |
| std = seq.std(axis=0) + 1e-8 | |
| seq = (seq - mean) / std | |
| seq = np.expand_dims(seq, axis=0) # (1, seq_len, features) | |
| probs = model.predict(seq, verbose=0)[0] | |
| idx = np.argmax(probs) | |
| label = le.inverse_transform([idx])[0] | |
| confidence = float(probs[idx]) | |
| label_text = f"{label} ({confidence*100:.1f}%)" | |
| return image, label_text | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Air Hacking / Security Gesture Simulator\nPoint your webcam and perform a stored gesture. The model predicts after it has collected enough frames.") | |
| with gr.Row(): | |
| webcam = gr.Image(source="webcam", streaming=True, tool="none", type="numpy") | |
| output_label = gr.Textbox(label="Prediction") | |
| def process_frame(frame): | |
| img, label = predict_frame(frame) | |
| return img, label | |
| webcam.stream(fn=process_frame, outputs=[webcam, output_label], every=0.06) | |
| gr.Markdown("**Security notes:** Use this demo for demonstration only. See README for production hardening tips.") | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |