# collect_data.py import cv2 import mediapipe as mp import numpy as np import os import time from collections import deque # configuration DATA_DIR = "gesture_data" SEQUENCE_LENGTH = 30 # number of frames per sample EXAMPLES_PER_LABEL = 50 LABELS = ["air_lock", "swipe_left", "swipe_right", "circle", "hug"] # update as needed os.makedirs(DATA_DIR, exist_ok=True) mp_hands = mp.solutions.hands mp_drawing = mp.solutions.drawing_utils def extract_landmarks(hand_landmarks): # returns 21*3 normalized coords (x,y,z) flattened; if hand missing, return zeros if hand_landmarks is None: return np.zeros(21 * 3, dtype=np.float32) coords = [] for lm in hand_landmarks.landmark: coords.extend([lm.x, lm.y, lm.z]) return np.array(coords, dtype=np.float32) def capture_label_sequence(label, cap, hands): seq = deque(maxlen=SEQUENCE_LENGTH) print(f"Prepare to record label: {label}. Press 'r' to start recording one example.") while True: ret, frame = cap.read() if not ret: raise RuntimeError("Failed reading webcam") frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) res = hands.process(frame_rgb) left_hand = None right_hand = None # We will just use first detected hand (or zeros) to keep it simple: lm_vec = None if res.multi_hand_landmarks: # choose the first hand lm_vec = extract_landmarks(res.multi_hand_landmarks[0]) mp_drawing.draw_landmarks(frame, res.multi_hand_landmarks[0], mp_hands.HAND_CONNECTIONS) else: lm_vec = extract_landmarks(None) cv2.putText(frame, f"Label: {label} | Press 'r' start, 'q' quit", (10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2) cv2.imshow("Collect Gestures", frame) k = cv2.waitKey(1) & 0xFF if k == ord('r'): # record one sequence seq.clear() print("Recording...") t0 = time.time() while len(seq) < SEQUENCE_LENGTH: ret, f2 = cap.read() if not ret: break f2_rgb = cv2.cvtColor(f2, cv2.COLOR_BGR2RGB) r = hands.process(f2_rgb) if r.multi_hand_landmarks: vec = extract_landmarks(r.multi_hand_landmarks[0]) mp_drawing.draw_landmarks(f2, r.multi_hand_landmarks[0], mp_hands.HAND_CONNECTIONS) else: vec = extract_landmarks(None) seq.append(vec) cv2.putText(f2, f"Recording... {len(seq)}/{SEQUENCE_LENGTH}", (10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2) cv2.imshow("Collect Gestures", f2) cv2.waitKey(1) t1 = time.time() print(f"Finished recording (took {t1-t0:.2f}s).") if len(seq) == SEQUENCE_LENGTH: arr = np.stack(seq, axis=0) # (seq_len, features) # save into disk label_dir = os.path.join(DATA_DIR, label) os.makedirs(label_dir, exist_ok=True) idx = len(os.listdir(label_dir)) fname = os.path.join(label_dir, f"{idx:04d}.npz") np.savez_compressed(fname, data=arr) print(f"Saved {fname}") return True elif k == ord('q'): return False def main(): cap = cv2.VideoCapture(0) if not cap.isOpened(): raise RuntimeError("Cannot open webcam") with mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands: for label in LABELS: label_dir = os.path.join(DATA_DIR, label) os.makedirs(label_dir, exist_ok=True) cur = len(os.listdir(label_dir)) print(f"Label '{label}' currently has {cur} examples. Target: {EXAMPLES_PER_LABEL}") while cur < EXAMPLES_PER_LABEL: ok = capture_label_sequence(label, cap, hands) if not ok: print("User requested quit.") cap.release() cv2.destroyAllWindows() return cur = len(os.listdir(label_dir)) print(f"Now {cur}/{EXAMPLES_PER_LABEL} for label '{label}'") cap.release() cv2.destroyAllWindows() if __name__ == "__main__": main()