MatteoFasulo commited on
Commit
32e7d7d
Β·
1 Parent(s): 5a4a2ee

refactor: window size parameter naming and update documentation for clarity

Browse files
scripts/README.md CHANGED
@@ -8,13 +8,15 @@ Remember to add the flag `--download_data` if the dataset is not downloaded yet.
8
 
9
  Substitute the `$DATA_PATH` environment variable with your path for saving the dataset.
10
 
 
 
11
  The required libraries for running the scripts are located inside the `requirements.txt` file.
12
 
13
  ## Pretraining Datasets
14
 
15
- For the pretraining:
16
 
17
- ### emg2pose
18
 
19
  ```bash
20
  python scripts/emg2pose.py \
@@ -24,7 +26,7 @@ python scripts/emg2pose.py \
24
  --stride 500
25
  ```
26
 
27
- ### Ninapro DB6
28
 
29
  ```bash
30
  python scripts/db6.py \
@@ -34,7 +36,7 @@ python scripts/db6.py \
34
  --stride 500
35
  ```
36
 
37
- ### Ninapro DB7
38
 
39
  ```bash
40
  python scripts/db7.py \
@@ -48,9 +50,9 @@ python scripts/db7.py \
48
 
49
  ## Downstream Datasets
50
 
51
- For the downstream tasks:
52
 
53
- ### Ninapro DB5 (200 ms, 25% overlap)
54
 
55
  ```bash
56
  python scripts/db5.py \
@@ -60,7 +62,7 @@ python scripts/db5.py \
60
  --stride 50
61
  ```
62
 
63
- ### Ninapro DB5 (1000 ms, 25% overlap)
64
 
65
  ```bash
66
  python scripts/db5.py \
@@ -70,7 +72,7 @@ python scripts/db5.py \
70
  --stride 250
71
  ```
72
 
73
- ### EMG-EPN612 (200 ms)
74
 
75
  ```bash
76
  python scripts/epn.py \
@@ -81,7 +83,7 @@ python scripts/epn.py \
81
  --window_size 200
82
  ```
83
 
84
- ### EMG-EPN612 (1000 ms)
85
 
86
  ```bash
87
  python scripts/epn.py \
@@ -92,27 +94,27 @@ python scripts/epn.py \
92
  --window_size 1000
93
  ```
94
 
95
- ### UCI EMG (200 ms, 25% overlap)
96
 
97
  ```bash
98
  python scripts/uci.py \
99
  --data_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/ \
100
  --save_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/h5/ \
101
- --window_size 200 \
102
  --stride 50
103
  ```
104
 
105
- ### UCI EMG (1000 ms, 25% overlap)
106
 
107
  ```bash
108
  python scripts/uci.py \
109
  --data_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/ \
110
  --save_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/h5/ \
111
- --window_size 1000 \
112
  --stride 250
113
  ```
114
 
115
- ### Ninapro DB8 (200 ms, no overlap)
116
 
117
  ```bash
118
  python scripts/db8.py \
@@ -122,7 +124,7 @@ python scripts/db8.py \
122
  --stride 200
123
  ```
124
 
125
- ### Ninapro DB8 (1000 ms, no overlap)
126
 
127
  ```bash
128
  python scripts/db8.py \
 
8
 
9
  Substitute the `$DATA_PATH` environment variable with your path for saving the dataset.
10
 
11
+ The `seq_len` parameter in the scripts corresponds to the window size in samples, and the `stride` parameter corresponds to the step size between windows in samples. The sampling rate for the pretraining datasets is 2 kHz, while for the downstream datasets it is either 200 Hz or 2 kHz depending on the dataset.
12
+
13
  The required libraries for running the scripts are located inside the `requirements.txt` file.
14
 
15
  ## Pretraining Datasets
16
 
17
+ For the pretraining datasets, we use a window size of 0.5 seconds with a 50% overlap at 2 kHz sampling rate:
18
 
19
+ ### emg2pose (0.5 sec, 50% overlap)
20
 
21
  ```bash
22
  python scripts/emg2pose.py \
 
26
  --stride 500
27
  ```
28
 
29
+ ### Ninapro DB6 (0.5 sec, 50% overlap)
30
 
31
  ```bash
32
  python scripts/db6.py \
 
36
  --stride 500
37
  ```
38
 
39
+ ### Ninapro DB7 (0.5 sec, 50% overlap)
40
 
41
  ```bash
42
  python scripts/db7.py \
 
50
 
51
  ## Downstream Datasets
52
 
53
+ For the downstream tasks, gesture classification is performed on NinaPro DB5, EMG-EPN612, and UCI EMG datasets (200 Hz) while regression is performed on NinaPro DB8 (2 kHz).
54
 
55
+ ### Ninapro DB5 (1 sec, 25% overlap)
56
 
57
  ```bash
58
  python scripts/db5.py \
 
62
  --stride 50
63
  ```
64
 
65
+ ### Ninapro DB5 (5 sec, 25% overlap)
66
 
67
  ```bash
68
  python scripts/db5.py \
 
72
  --stride 250
73
  ```
74
 
75
+ ### EMG-EPN612 (1 sec, no overlap)
76
 
77
  ```bash
78
  python scripts/epn.py \
 
83
  --window_size 200
84
  ```
85
 
86
+ ### EMG-EPN612 (5 sec, no overlap)
87
 
88
  ```bash
89
  python scripts/epn.py \
 
94
  --window_size 1000
95
  ```
96
 
97
+ ### UCI EMG (1 sec, 25% overlap)
98
 
99
  ```bash
100
  python scripts/uci.py \
101
  --data_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/ \
102
  --save_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/h5/ \
103
+ --seq_len 200 \
104
  --stride 50
105
  ```
106
 
107
+ ### UCI EMG (5 sec, 25% overlap)
108
 
109
  ```bash
110
  python scripts/uci.py \
111
  --data_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/ \
112
  --save_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/h5/ \
113
+ --seq_len 1000 \
114
  --stride 250
115
  ```
116
 
117
+ ### Ninapro DB8 (100 ms, no overlap)
118
 
119
  ```bash
120
  python scripts/db8.py \
 
124
  --stride 200
125
  ```
126
 
127
+ ### Ninapro DB8 (500 ms, no overlap)
128
 
129
  ```bash
130
  python scripts/db8.py \
scripts/db5.py CHANGED
@@ -7,6 +7,8 @@ import scipy.io
7
  import scipy.signal as signal
8
  from scipy.signal import iirnotch
9
 
 
 
10
 
11
  # ==== Data augmentation functions ====
12
  def random_amplitude_scale(sig, scale_range=(0.9, 1.1)):
@@ -101,10 +103,12 @@ def main():
101
  args.add_argument("--data_dir", type=str)
102
  args.add_argument("--save_dir", type=str)
103
  args.add_argument(
104
- "--window_size", type=int, help="Size of the sliding window for segmentation."
105
  )
106
  args.add_argument(
107
- "--stride", type=int, help="Stride for the sliding window segmentation."
 
 
108
  )
109
  args = args.parse_args()
110
 
@@ -127,7 +131,11 @@ def main():
127
  sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
128
 
129
  fs = 200.0 # original sampling rate
130
- window_size, stride = args.window_size, args.stride
 
 
 
 
131
  train_reps = [1, 3, 4, 6]
132
  val_reps = [2]
133
  test_reps = [5]
 
7
  import scipy.signal as signal
8
  from scipy.signal import iirnotch
9
 
10
+ sequence_to_seconds = lambda seq_len, fs: seq_len / fs
11
+
12
 
13
  # ==== Data augmentation functions ====
14
  def random_amplitude_scale(sig, scale_range=(0.9, 1.1)):
 
103
  args.add_argument("--data_dir", type=str)
104
  args.add_argument("--save_dir", type=str)
105
  args.add_argument(
106
+ "--seq_len", type=int, help="Size of the window in samples for segmentation."
107
  )
108
  args.add_argument(
109
+ "--stride",
110
+ type=int,
111
+ help="Step size between windows in samples for segmentation.",
112
  )
113
  args = args.parse_args()
114
 
 
131
  sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
132
 
133
  fs = 200.0 # original sampling rate
134
+ window_size, stride = args.seq_len, args.stride
135
+
136
+ window_seconds = sequence_to_seconds(window_size, fs)
137
+ print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
138
+
139
  train_reps = [1, 3, 4, 6]
140
  val_reps = [2]
141
  test_reps = [5]
scripts/db6.py CHANGED
@@ -7,6 +7,8 @@ import scipy.io
7
  import scipy.signal as signal
8
  from scipy.signal import iirnotch
9
 
 
 
10
 
11
  # ─────────────── Filtering ──────────────────
12
  def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
@@ -56,10 +58,12 @@ def main():
56
  args.add_argument("--data_dir", type=str)
57
  args.add_argument("--save_dir", type=str)
58
  args.add_argument(
59
- "--window_size", type=int, help="Size of the sliding window for segmentation."
60
  )
61
  args.add_argument(
62
- "--stride", type=int, help="Stride for the sliding window segmentation."
 
 
63
  )
64
  args = args.parse_args()
65
  data_dir = args.data_dir # input folder with .mat files
@@ -86,7 +90,10 @@ def main():
86
  sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
87
 
88
  fs = 2000.0
89
- window_size, stride = args.window_size, args.stride
 
 
 
90
 
91
  train_reps = list(range(1, 9)) # 1–8
92
  val_reps = [9, 10] # 9–10
 
7
  import scipy.signal as signal
8
  from scipy.signal import iirnotch
9
 
10
+ sequence_to_seconds = lambda seq_len, fs: seq_len / fs
11
+
12
 
13
  # ─────────────── Filtering ──────────────────
14
  def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
 
58
  args.add_argument("--data_dir", type=str)
59
  args.add_argument("--save_dir", type=str)
60
  args.add_argument(
61
+ "--seq_len", type=int, help="Size of the window in samples for segmentation."
62
  )
63
  args.add_argument(
64
+ "--stride",
65
+ type=int,
66
+ help="Step size between windows in samples for segmentation.",
67
  )
68
  args = args.parse_args()
69
  data_dir = args.data_dir # input folder with .mat files
 
90
  sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
91
 
92
  fs = 2000.0
93
+ window_size, stride = args.seq_len, args.stride
94
+
95
+ window_seconds = sequence_to_seconds(window_size, fs)
96
+ print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
97
 
98
  train_reps = list(range(1, 9)) # 1–8
99
  val_reps = [9, 10] # 9–10
scripts/db7.py CHANGED
@@ -7,6 +7,8 @@ import scipy.io
7
  import scipy.signal as signal
8
  from scipy.signal import iirnotch
9
 
 
 
10
 
11
  # ─────────────── Filtering ──────────────────
12
  def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
@@ -56,16 +58,12 @@ def main():
56
  args.add_argument("--data_dir", type=str)
57
  args.add_argument("--save_dir", type=str)
58
  args.add_argument(
59
- "--window_size",
60
- type=int,
61
- default=256,
62
- help="Size of the sliding window for segmentation.",
63
  )
64
  args.add_argument(
65
  "--stride",
66
  type=int,
67
- default=128,
68
- help="Stride for the sliding window segmentation.",
69
  )
70
  args = args.parse_args()
71
  data_dir = args.data_dir # input folder with .mat files
@@ -87,7 +85,10 @@ def main():
87
  sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
88
 
89
  fs = 2000.0
90
- window_size, stride = args.window_size, args.stride
 
 
 
91
 
92
  train_reps = [1, 2, 3, 4] # 1–4
93
  val_reps = [5] # 5
 
7
  import scipy.signal as signal
8
  from scipy.signal import iirnotch
9
 
10
+ sequence_to_seconds = lambda seq_len, fs: seq_len / fs
11
+
12
 
13
  # ─────────────── Filtering ──────────────────
14
  def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
 
58
  args.add_argument("--data_dir", type=str)
59
  args.add_argument("--save_dir", type=str)
60
  args.add_argument(
61
+ "--seq_len", type=int, help="Size of the window in samples for segmentation."
 
 
 
62
  )
63
  args.add_argument(
64
  "--stride",
65
  type=int,
66
+ help="Step size between windows in samples for segmentation.",
 
67
  )
68
  args = args.parse_args()
69
  data_dir = args.data_dir # input folder with .mat files
 
85
  sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
86
 
87
  fs = 2000.0
88
+ window_size, stride = args.seq_len, args.stride
89
+
90
+ window_seconds = sequence_to_seconds(window_size, fs)
91
+ print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
92
 
93
  train_reps = [1, 2, 3, 4] # 1–4
94
  val_reps = [5] # 5
scripts/db8.py CHANGED
@@ -9,6 +9,8 @@ from joblib import Parallel, delayed
9
  from scipy.signal import iirnotch
10
  from tqdm import tqdm
11
 
 
 
12
  _MATRIX_DOF2DOA_TRANSPOSED = np.array(
13
  # https://www.frontiersin.org/articles/10.3389/fnins.2019.00891/full
14
  # Open supplemental data > Data Sheet 1.PDF >
@@ -127,10 +129,12 @@ def main():
127
  args.add_argument("--data_dir", type=str, required=True)
128
  args.add_argument("--save_dir", type=str, required=True)
129
  args.add_argument(
130
- "--window_size", type=int, help="Size of the sliding window for segmentation."
131
  )
132
  args.add_argument(
133
- "--stride", type=int, help="Stride for the sliding window segmentation."
 
 
134
  )
135
  args.add_argument(
136
  "--n_jobs", type=int, default=-1, help="Number of parallel jobs to run."
@@ -158,6 +162,11 @@ def main():
158
  sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
159
 
160
  fs = 2000.0 # Hz
 
 
 
 
 
161
 
162
  # collect all .mat paths
163
  mat_paths = [
@@ -168,7 +177,7 @@ def main():
168
 
169
  # run in parallel
170
  results = Parallel(n_jobs=min(os.cpu_count(), args.n_jobs), verbose=5)(
171
- delayed(process_mat_file)(mp, args.window_size, args.stride, fs)
172
  for mp in mat_paths
173
  )
174
 
 
9
  from scipy.signal import iirnotch
10
  from tqdm import tqdm
11
 
12
+ sequence_to_seconds = lambda seq_len, fs: seq_len / fs
13
+
14
  _MATRIX_DOF2DOA_TRANSPOSED = np.array(
15
  # https://www.frontiersin.org/articles/10.3389/fnins.2019.00891/full
16
  # Open supplemental data > Data Sheet 1.PDF >
 
129
  args.add_argument("--data_dir", type=str, required=True)
130
  args.add_argument("--save_dir", type=str, required=True)
131
  args.add_argument(
132
+ "--seq_len", type=int, help="Size of the window in samples for segmentation."
133
  )
134
  args.add_argument(
135
+ "--stride",
136
+ type=int,
137
+ help="Step size between windows in samples for segmentation.",
138
  )
139
  args.add_argument(
140
  "--n_jobs", type=int, default=-1, help="Number of parallel jobs to run."
 
162
  sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
163
 
164
  fs = 2000.0 # Hz
165
+ window_size, stride = args.seq_len, args.stride
166
+
167
+ window_seconds = sequence_to_seconds(window_size, fs)
168
+ print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
169
+
170
 
171
  # collect all .mat paths
172
  mat_paths = [
 
177
 
178
  # run in parallel
179
  results = Parallel(n_jobs=min(os.cpu_count(), args.n_jobs), verbose=5)(
180
+ delayed(process_mat_file)(mp, window_size, stride, fs)
181
  for mp in mat_paths
182
  )
183
 
scripts/emg2pose.py CHANGED
@@ -4,12 +4,13 @@ from pathlib import Path
4
  import h5py
5
  import numpy as np
6
  import pandas as pd
7
- import scipy.io
8
  import scipy.signal as signal
9
  from joblib import Parallel, delayed
10
  from scipy.signal import iirnotch
11
  from tqdm import tqdm
12
 
 
 
13
 
14
  # ==== Filter functions (operate at original fs=2000) ====
15
  def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
@@ -78,10 +79,10 @@ def main():
78
  args.add_argument("--data_dir", type=str)
79
  args.add_argument("--save_dir", type=str)
80
  args.add_argument(
81
- "--window_size", type=int, help="Size of the sliding window for segmentation."
82
  )
83
  args.add_argument(
84
- "--stride", type=int, help="Stride for the sliding window segmentation."
85
  )
86
  args.add_argument(
87
  "--subsample", type=float, default=1.0, help="Whether to subsample the data"
@@ -102,7 +103,10 @@ def main():
102
  os.makedirs(save_dir, exist_ok=True)
103
 
104
  fs = 2000.0 # original sampling rate
105
- window_size, stride = args.window_size, args.stride
 
 
 
106
 
107
  df = pd.read_csv(os.path.join(data_dir, "metadata.csv"))
108
  df = df.groupby("split").apply(
 
4
  import h5py
5
  import numpy as np
6
  import pandas as pd
 
7
  import scipy.signal as signal
8
  from joblib import Parallel, delayed
9
  from scipy.signal import iirnotch
10
  from tqdm import tqdm
11
 
12
+ sequence_to_seconds = lambda seq_len, fs: seq_len / fs
13
+
14
 
15
  # ==== Filter functions (operate at original fs=2000) ====
16
  def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
 
79
  args.add_argument("--data_dir", type=str)
80
  args.add_argument("--save_dir", type=str)
81
  args.add_argument(
82
+ "--seq_len", type=int, help="Size of the window in samples for segmentation."
83
  )
84
  args.add_argument(
85
+ "--stride", type=int, help="Step size between windows in samples for segmentation."
86
  )
87
  args.add_argument(
88
  "--subsample", type=float, default=1.0, help="Whether to subsample the data"
 
103
  os.makedirs(save_dir, exist_ok=True)
104
 
105
  fs = 2000.0 # original sampling rate
106
+ window_size, stride = args.seq_len, args.stride
107
+
108
+ window_seconds = sequence_to_seconds(window_size, fs)
109
+ print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
110
 
111
  df = pd.read_csv(os.path.join(data_dir, "metadata.csv"))
112
  df = df.groupby("split").apply(
scripts/epn.py CHANGED
@@ -10,6 +10,8 @@ from joblib import Parallel, delayed
10
  from scipy.signal import iirnotch
11
  from tqdm.auto import tqdm
12
 
 
 
13
  # Sampling frequency and EMG channels
14
  tfs, n_ch = 200.0, 8
15
 
@@ -122,7 +124,9 @@ def main():
122
  parser.add_argument("--source_training", required=True)
123
  parser.add_argument("--source_testing", required=True)
124
  parser.add_argument("--dest_dir", required=True)
125
- parser.add_argument("--window_size", type=int, required=True)
 
 
126
  parser.add_argument("--n_jobs", type=int, default=-1)
127
  args = parser.parse_args()
128
  data_dir = args.data_dir
@@ -142,7 +146,11 @@ def main():
142
  print(f"Downloaded and unzipped dataset\n{data_dir}/EMG-EPN612_Dataset.zip")
143
  sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
144
 
145
- seq_len = args.window_size
 
 
 
 
146
  train_X, train_y, val_X, val_y, test_X, test_y = [], [], [], [], [], []
147
 
148
  paths = glob.glob(os.path.join(args.source_training, "user*", "user*.json"))
 
10
  from scipy.signal import iirnotch
11
  from tqdm.auto import tqdm
12
 
13
+ sequence_to_seconds = lambda seq_len, fs: seq_len / fs
14
+
15
  # Sampling frequency and EMG channels
16
  tfs, n_ch = 200.0, 8
17
 
 
124
  parser.add_argument("--source_training", required=True)
125
  parser.add_argument("--source_testing", required=True)
126
  parser.add_argument("--dest_dir", required=True)
127
+ parser.add_argument(
128
+ "--seq_len", type=int, help="Size of the window in samples for segmentation."
129
+ )
130
  parser.add_argument("--n_jobs", type=int, default=-1)
131
  args = parser.parse_args()
132
  data_dir = args.data_dir
 
146
  print(f"Downloaded and unzipped dataset\n{data_dir}/EMG-EPN612_Dataset.zip")
147
  sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
148
 
149
+ seq_len = args.seq_len
150
+
151
+ window_seconds = sequence_to_seconds(seq_len, tfs)
152
+ print(f"Window size: {seq_len} samples ({window_seconds:.2f} seconds)")
153
+
154
  train_X, train_y, val_X, val_y, test_X, test_y = [], [], [], [], [], []
155
 
156
  paths = glob.glob(os.path.join(args.source_training, "user*", "user*.json"))
scripts/uci.py CHANGED
@@ -7,6 +7,8 @@ import numpy as np
7
  import scipy.signal as signal
8
  from scipy.signal import iirnotch
9
 
 
 
10
 
11
  # ─────────────────────────────────────────────
12
  # Filtering utilities
@@ -152,8 +154,14 @@ if __name__ == "__main__":
152
  required=True,
153
  help="Directory to save the output h5 files",
154
  )
155
- arg.add_argument("--window_size", type=int, help="Window size for sliding window")
156
- arg.add_argument("--stride", type=int, help="Stride for sliding window")
 
 
 
 
 
 
157
  args = arg.parse_args()
158
 
159
  data_root = args.data_dir
@@ -173,7 +181,10 @@ if __name__ == "__main__":
173
  sys.exit("Rerun without --download_data.")
174
 
175
  fs = 200.0 # sampling rate of MYO bracelet
176
- window_size, stride = args.window_size, args.stride
 
 
 
177
 
178
  split_map = {
179
  "train": list(range(1, 25)), # 1–24
 
7
  import scipy.signal as signal
8
  from scipy.signal import iirnotch
9
 
10
+ sequence_to_seconds = lambda seq_len, fs: seq_len / fs
11
+
12
 
13
  # ─────────────────────────────────────────────
14
  # Filtering utilities
 
154
  required=True,
155
  help="Directory to save the output h5 files",
156
  )
157
+ arg.add_argument(
158
+ "--seq_len", type=int, help="Size of the window in samples for segmentation."
159
+ )
160
+ arg.add_argument(
161
+ "--stride",
162
+ type=int,
163
+ help="Step size between windows in samples for segmentation.",
164
+ )
165
  args = arg.parse_args()
166
 
167
  data_root = args.data_dir
 
181
  sys.exit("Rerun without --download_data.")
182
 
183
  fs = 200.0 # sampling rate of MYO bracelet
184
+ window_size, stride = args.seq_len, args.stride
185
+
186
+ window_seconds = sequence_to_seconds(window_size, fs)
187
+ print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
188
 
189
  split_map = {
190
  "train": list(range(1, 25)), # 1–24