AirHacking-Security / collect_data.py
kuldeep0204's picture
Create collect_data.py
a7586d6 verified
# collect_data.py
import cv2
import mediapipe as mp
import numpy as np
import os
import time
from collections import deque
# configuration
DATA_DIR = "gesture_data"
SEQUENCE_LENGTH = 30 # number of frames per sample
EXAMPLES_PER_LABEL = 50
LABELS = ["air_lock", "swipe_left", "swipe_right", "circle", "hug"] # update as needed
os.makedirs(DATA_DIR, exist_ok=True)
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
def extract_landmarks(hand_landmarks):
# returns 21*3 normalized coords (x,y,z) flattened; if hand missing, return zeros
if hand_landmarks is None:
return np.zeros(21 * 3, dtype=np.float32)
coords = []
for lm in hand_landmarks.landmark:
coords.extend([lm.x, lm.y, lm.z])
return np.array(coords, dtype=np.float32)
def capture_label_sequence(label, cap, hands):
seq = deque(maxlen=SEQUENCE_LENGTH)
print(f"Prepare to record label: {label}. Press 'r' to start recording one example.")
while True:
ret, frame = cap.read()
if not ret:
raise RuntimeError("Failed reading webcam")
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
res = hands.process(frame_rgb)
left_hand = None
right_hand = None
# We will just use first detected hand (or zeros) to keep it simple:
lm_vec = None
if res.multi_hand_landmarks:
# choose the first hand
lm_vec = extract_landmarks(res.multi_hand_landmarks[0])
mp_drawing.draw_landmarks(frame, res.multi_hand_landmarks[0], mp_hands.HAND_CONNECTIONS)
else:
lm_vec = extract_landmarks(None)
cv2.putText(frame, f"Label: {label} | Press 'r' start, 'q' quit", (10,30),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)
cv2.imshow("Collect Gestures", frame)
k = cv2.waitKey(1) & 0xFF
if k == ord('r'):
# record one sequence
seq.clear()
print("Recording...")
t0 = time.time()
while len(seq) < SEQUENCE_LENGTH:
ret, f2 = cap.read()
if not ret: break
f2_rgb = cv2.cvtColor(f2, cv2.COLOR_BGR2RGB)
r = hands.process(f2_rgb)
if r.multi_hand_landmarks:
vec = extract_landmarks(r.multi_hand_landmarks[0])
mp_drawing.draw_landmarks(f2, r.multi_hand_landmarks[0], mp_hands.HAND_CONNECTIONS)
else:
vec = extract_landmarks(None)
seq.append(vec)
cv2.putText(f2, f"Recording... {len(seq)}/{SEQUENCE_LENGTH}", (10,30),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)
cv2.imshow("Collect Gestures", f2)
cv2.waitKey(1)
t1 = time.time()
print(f"Finished recording (took {t1-t0:.2f}s).")
if len(seq) == SEQUENCE_LENGTH:
arr = np.stack(seq, axis=0) # (seq_len, features)
# save into disk
label_dir = os.path.join(DATA_DIR, label)
os.makedirs(label_dir, exist_ok=True)
idx = len(os.listdir(label_dir))
fname = os.path.join(label_dir, f"{idx:04d}.npz")
np.savez_compressed(fname, data=arr)
print(f"Saved {fname}")
return True
elif k == ord('q'):
return False
def main():
cap = cv2.VideoCapture(0)
if not cap.isOpened():
raise RuntimeError("Cannot open webcam")
with mp_hands.Hands(static_image_mode=False,
max_num_hands=1,
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as hands:
for label in LABELS:
label_dir = os.path.join(DATA_DIR, label)
os.makedirs(label_dir, exist_ok=True)
cur = len(os.listdir(label_dir))
print(f"Label '{label}' currently has {cur} examples. Target: {EXAMPLES_PER_LABEL}")
while cur < EXAMPLES_PER_LABEL:
ok = capture_label_sequence(label, cap, hands)
if not ok:
print("User requested quit.")
cap.release()
cv2.destroyAllWindows()
return
cur = len(os.listdir(label_dir))
print(f"Now {cur}/{EXAMPLES_PER_LABEL} for label '{label}'")
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()