File size: 4,536 Bytes
a7586d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# collect_data.py
import cv2
import mediapipe as mp
import numpy as np
import os
import time
from collections import deque

# configuration
DATA_DIR = "gesture_data"
SEQUENCE_LENGTH = 30  # number of frames per sample
EXAMPLES_PER_LABEL = 50
LABELS = ["air_lock", "swipe_left", "swipe_right", "circle", "hug"]  # update as needed

os.makedirs(DATA_DIR, exist_ok=True)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

def extract_landmarks(hand_landmarks):
    # returns 21*3 normalized coords (x,y,z) flattened; if hand missing, return zeros
    if hand_landmarks is None:
        return np.zeros(21 * 3, dtype=np.float32)
    coords = []
    for lm in hand_landmarks.landmark:
        coords.extend([lm.x, lm.y, lm.z])
    return np.array(coords, dtype=np.float32)

def capture_label_sequence(label, cap, hands):
    seq = deque(maxlen=SEQUENCE_LENGTH)
    print(f"Prepare to record label: {label}. Press 'r' to start recording one example.")
    while True:
        ret, frame = cap.read()
        if not ret:
            raise RuntimeError("Failed reading webcam")
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        res = hands.process(frame_rgb)
        left_hand = None
        right_hand = None
        # We will just use first detected hand (or zeros) to keep it simple:
        lm_vec = None
        if res.multi_hand_landmarks:
            # choose the first hand
            lm_vec = extract_landmarks(res.multi_hand_landmarks[0])
            mp_drawing.draw_landmarks(frame, res.multi_hand_landmarks[0], mp_hands.HAND_CONNECTIONS)
        else:
            lm_vec = extract_landmarks(None)
        cv2.putText(frame, f"Label: {label} | Press 'r' start, 'q' quit", (10,30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)
        cv2.imshow("Collect Gestures", frame)
        k = cv2.waitKey(1) & 0xFF
        if k == ord('r'):
            # record one sequence
            seq.clear()
            print("Recording...")
            t0 = time.time()
            while len(seq) < SEQUENCE_LENGTH:
                ret, f2 = cap.read()
                if not ret: break
                f2_rgb = cv2.cvtColor(f2, cv2.COLOR_BGR2RGB)
                r = hands.process(f2_rgb)
                if r.multi_hand_landmarks:
                    vec = extract_landmarks(r.multi_hand_landmarks[0])
                    mp_drawing.draw_landmarks(f2, r.multi_hand_landmarks[0], mp_hands.HAND_CONNECTIONS)
                else:
                    vec = extract_landmarks(None)
                seq.append(vec)
                cv2.putText(f2, f"Recording... {len(seq)}/{SEQUENCE_LENGTH}", (10,30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)
                cv2.imshow("Collect Gestures", f2)
                cv2.waitKey(1)
            t1 = time.time()
            print(f"Finished recording (took {t1-t0:.2f}s).")
            if len(seq) == SEQUENCE_LENGTH:
                arr = np.stack(seq, axis=0)  # (seq_len, features)
                # save into disk
                label_dir = os.path.join(DATA_DIR, label)
                os.makedirs(label_dir, exist_ok=True)
                idx = len(os.listdir(label_dir))
                fname = os.path.join(label_dir, f"{idx:04d}.npz")
                np.savez_compressed(fname, data=arr)
                print(f"Saved {fname}")
                return True
        elif k == ord('q'):
            return False

def main():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        raise RuntimeError("Cannot open webcam")
    with mp_hands.Hands(static_image_mode=False,
                        max_num_hands=1,
                        min_detection_confidence=0.5,
                        min_tracking_confidence=0.5) as hands:
        for label in LABELS:
            label_dir = os.path.join(DATA_DIR, label)
            os.makedirs(label_dir, exist_ok=True)
            cur = len(os.listdir(label_dir))
            print(f"Label '{label}' currently has {cur} examples. Target: {EXAMPLES_PER_LABEL}")
            while cur < EXAMPLES_PER_LABEL:
                ok = capture_label_sequence(label, cap, hands)
                if not ok:
                    print("User requested quit.")
                    cap.release()
                    cv2.destroyAllWindows()
                    return
                cur = len(os.listdir(label_dir))
                print(f"Now {cur}/{EXAMPLES_PER_LABEL} for label '{label}'")
    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()