refactor: window size parameter naming and update documentation for clarity

Browse files

Files changed (8) hide show

scripts/README.md +17 -15
scripts/db5.py +11 -3
scripts/db6.py +10 -3
scripts/db7.py +8 -7
scripts/db8.py +12 -3
scripts/emg2pose.py +8 -4
scripts/epn.py +10 -2
scripts/uci.py +14 -3

scripts/README.md CHANGED Viewed

@@ -8,13 +8,15 @@ Remember to add the flag `--download_data` if the dataset is not downloaded yet.
 Substitute the `$DATA_PATH` environment variable with your path for saving the dataset.
 The required libraries for running the scripts are located inside the `requirements.txt` file.
 ## Pretraining Datasets
-For the pretraining:
-### emg2pose
 ```bash
 python scripts/emg2pose.py \
@@ -24,7 +26,7 @@ python scripts/emg2pose.py \
     --stride 500
 ```
-### Ninapro DB6
 ```bash
 python scripts/db6.py \
@@ -34,7 +36,7 @@ python scripts/db6.py \
     --stride 500
 ```
-### Ninapro DB7
 ```bash
 python scripts/db7.py \
@@ -48,9 +50,9 @@ python scripts/db7.py \
 ## Downstream Datasets
-For the downstream tasks:
-### Ninapro DB5 (200 ms, 25% overlap)
 ```bash
 python scripts/db5.py \
@@ -60,7 +62,7 @@ python scripts/db5.py \
     --stride 50
 ```
-### Ninapro DB5 (1000 ms, 25% overlap)
 ```bash
 python scripts/db5.py \
@@ -70,7 +72,7 @@ python scripts/db5.py \
     --stride 250
 ```
-### EMG-EPN612 (200 ms)
 ```bash
 python scripts/epn.py \
@@ -81,7 +83,7 @@ python scripts/epn.py \
     --window_size 200
 ```
-### EMG-EPN612 (1000 ms)
 ```bash
 python scripts/epn.py \
@@ -92,27 +94,27 @@ python scripts/epn.py \
     --window_size 1000
 ```
-### UCI EMG (200 ms, 25% overlap)
 ```bash
 python scripts/uci.py \
     --data_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/ \
     --save_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/h5/ \
-    --window_size 200 \
     --stride 50
 ```
-### UCI EMG (1000 ms, 25% overlap)
 ```bash
 python scripts/uci.py \
     --data_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/ \
     --save_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/h5/ \
-    --window_size 1000 \
     --stride 250
 ```
-### Ninapro DB8 (200 ms, no overlap)
 ```bash
 python scripts/db8.py \
@@ -122,7 +124,7 @@ python scripts/db8.py \
     --stride 200
 ```
-### Ninapro DB8 (1000 ms, no overlap)
 ```bash
 python scripts/db8.py \

 Substitute the `$DATA_PATH` environment variable with your path for saving the dataset.
+The `seq_len` parameter in the scripts corresponds to the window size in samples, and the `stride` parameter corresponds to the step size between windows in samples. The sampling rate for the pretraining datasets is 2 kHz, while for the downstream datasets it is either 200 Hz or 2 kHz depending on the dataset.
 The required libraries for running the scripts are located inside the `requirements.txt` file.
 ## Pretraining Datasets
+For the pretraining datasets, we use a window size of 0.5 seconds with a 50% overlap at 2 kHz sampling rate:
+### emg2pose (0.5 sec, 50% overlap)
 ```bash
 python scripts/emg2pose.py \
     --stride 500
 ```
+### Ninapro DB6 (0.5 sec, 50% overlap)
 ```bash
 python scripts/db6.py \
     --stride 500
 ```
+### Ninapro DB7 (0.5 sec, 50% overlap)
 ```bash
 python scripts/db7.py \
 ## Downstream Datasets
+For the downstream tasks, gesture classification is performed on NinaPro DB5, EMG-EPN612, and UCI EMG datasets (200 Hz) while regression is performed on NinaPro DB8 (2 kHz).
+### Ninapro DB5 (1 sec, 25% overlap)
 ```bash
 python scripts/db5.py \
     --stride 50
 ```
+### Ninapro DB5 (5 sec, 25% overlap)
 ```bash
 python scripts/db5.py \
     --stride 250
 ```
+### EMG-EPN612 (1 sec, no overlap)
 ```bash
 python scripts/epn.py \
     --window_size 200
 ```
+### EMG-EPN612 (5 sec, no overlap)
 ```bash
 python scripts/epn.py \
     --window_size 1000
 ```
+### UCI EMG (1 sec, 25% overlap)
 ```bash
 python scripts/uci.py \
     --data_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/ \
     --save_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/h5/ \
+    --seq_len 200 \
     --stride 50
 ```
+### UCI EMG (5 sec, 25% overlap)
 ```bash
 python scripts/uci.py \
     --data_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/ \
     --save_dir $DATA_PATH/datasets/UCI_EMG/EMG_data_for_gestures-master/h5/ \
+    --seq_len 1000 \
     --stride 250
 ```
+### Ninapro DB8 (100 ms, no overlap)
 ```bash
 python scripts/db8.py \
     --stride 200
 ```
+### Ninapro DB8 (500 ms, no overlap)
 ```bash
 python scripts/db8.py \

scripts/db5.py CHANGED Viewed

@@ -7,6 +7,8 @@ import scipy.io
 import scipy.signal as signal
 from scipy.signal import iirnotch
 # ==== Data augmentation functions ====
 def random_amplitude_scale(sig, scale_range=(0.9, 1.1)):
@@ -101,10 +103,12 @@ def main():
     args.add_argument("--data_dir", type=str)
     args.add_argument("--save_dir", type=str)
     args.add_argument(
-        "--window_size", type=int, help="Size of the sliding window for segmentation."
     )
     args.add_argument(
-        "--stride", type=int, help="Stride for the sliding window segmentation."
     )
     args = args.parse_args()
@@ -127,7 +131,11 @@ def main():
         sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
     fs = 200.0  # original sampling rate
-    window_size, stride = args.window_size, args.stride
     train_reps = [1, 3, 4, 6]
     val_reps = [2]
     test_reps = [5]

 import scipy.signal as signal
 from scipy.signal import iirnotch
+sequence_to_seconds = lambda seq_len, fs: seq_len / fs
 # ==== Data augmentation functions ====
 def random_amplitude_scale(sig, scale_range=(0.9, 1.1)):
     args.add_argument("--data_dir", type=str)
     args.add_argument("--save_dir", type=str)
     args.add_argument(
+        "--seq_len", type=int, help="Size of the window in samples for segmentation."
     )
     args.add_argument(
+        "--stride",
+        type=int,
+        help="Step size between windows in samples for segmentation.",
     )
     args = args.parse_args()
         sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
     fs = 200.0  # original sampling rate
+    window_size, stride = args.seq_len, args.stride
+    window_seconds = sequence_to_seconds(window_size, fs)
+    print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
     train_reps = [1, 3, 4, 6]
     val_reps = [2]
     test_reps = [5]

scripts/db6.py CHANGED Viewed

@@ -7,6 +7,8 @@ import scipy.io
 import scipy.signal as signal
 from scipy.signal import iirnotch
 # ─────────────── Filtering ──────────────────
 def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
@@ -56,10 +58,12 @@ def main():
     args.add_argument("--data_dir", type=str)
     args.add_argument("--save_dir", type=str)
     args.add_argument(
-        "--window_size", type=int, help="Size of the sliding window for segmentation."
     )
     args.add_argument(
-        "--stride", type=int, help="Stride for the sliding window segmentation."
     )
     args = args.parse_args()
     data_dir = args.data_dir  # input folder with .mat files
@@ -86,7 +90,10 @@ def main():
         sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
     fs = 2000.0
-    window_size, stride = args.window_size, args.stride
     train_reps = list(range(1, 9))  # 1–8
     val_reps = [9, 10]  # 9–10

 import scipy.signal as signal
 from scipy.signal import iirnotch
+sequence_to_seconds = lambda seq_len, fs: seq_len / fs
 # ─────────────── Filtering ──────────────────
 def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
     args.add_argument("--data_dir", type=str)
     args.add_argument("--save_dir", type=str)
     args.add_argument(
+        "--seq_len", type=int, help="Size of the window in samples for segmentation."
     )
     args.add_argument(
+        "--stride",
+        type=int,
+        help="Step size between windows in samples for segmentation.",
     )
     args = args.parse_args()
     data_dir = args.data_dir  # input folder with .mat files
         sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
     fs = 2000.0
+    window_size, stride = args.seq_len, args.stride
+    window_seconds = sequence_to_seconds(window_size, fs)
+    print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
     train_reps = list(range(1, 9))  # 1–8
     val_reps = [9, 10]  # 9–10

scripts/db7.py CHANGED Viewed

@@ -7,6 +7,8 @@ import scipy.io
 import scipy.signal as signal
 from scipy.signal import iirnotch
 # ─────────────── Filtering ──────────────────
 def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
@@ -56,16 +58,12 @@ def main():
     args.add_argument("--data_dir", type=str)
     args.add_argument("--save_dir", type=str)
     args.add_argument(
-        "--window_size",
-        type=int,
-        default=256,
-        help="Size of the sliding window for segmentation.",
     )
     args.add_argument(
         "--stride",
         type=int,
-        default=128,
-        help="Stride for the sliding window segmentation.",
     )
     args = args.parse_args()
     data_dir = args.data_dir  # input folder with .mat files
@@ -87,7 +85,10 @@ def main():
         sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
     fs = 2000.0
-    window_size, stride = args.window_size, args.stride
     train_reps = [1, 2, 3, 4]  # 1–4
     val_reps = [5]  # 5

 import scipy.signal as signal
 from scipy.signal import iirnotch
+sequence_to_seconds = lambda seq_len, fs: seq_len / fs
 # ─────────────── Filtering ──────────────────
 def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
     args.add_argument("--data_dir", type=str)
     args.add_argument("--save_dir", type=str)
     args.add_argument(
+        "--seq_len", type=int, help="Size of the window in samples for segmentation."
     )
     args.add_argument(
         "--stride",
         type=int,
+        help="Step size between windows in samples for segmentation.",
     )
     args = args.parse_args()
     data_dir = args.data_dir  # input folder with .mat files
         sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
     fs = 2000.0
+    window_size, stride = args.seq_len, args.stride
+    window_seconds = sequence_to_seconds(window_size, fs)
+    print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
     train_reps = [1, 2, 3, 4]  # 1–4
     val_reps = [5]  # 5

scripts/db8.py CHANGED Viewed

@@ -9,6 +9,8 @@ from joblib import Parallel, delayed
 from scipy.signal import iirnotch
 from tqdm import tqdm
 _MATRIX_DOF2DOA_TRANSPOSED = np.array(
     # https://www.frontiersin.org/articles/10.3389/fnins.2019.00891/full
     # Open supplemental data > Data Sheet 1.PDF >
@@ -127,10 +129,12 @@ def main():
     args.add_argument("--data_dir", type=str, required=True)
     args.add_argument("--save_dir", type=str, required=True)
     args.add_argument(
-        "--window_size", type=int, help="Size of the sliding window for segmentation."
     )
     args.add_argument(
-        "--stride", type=int, help="Stride for the sliding window segmentation."
     )
     args.add_argument(
         "--n_jobs", type=int, default=-1, help="Number of parallel jobs to run."
@@ -158,6 +162,11 @@ def main():
         sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
     fs = 2000.0  # Hz
     # collect all .mat paths
     mat_paths = [
@@ -168,7 +177,7 @@ def main():
     # run in parallel
     results = Parallel(n_jobs=min(os.cpu_count(), args.n_jobs), verbose=5)(
-        delayed(process_mat_file)(mp, args.window_size, args.stride, fs)
         for mp in mat_paths
     )

 from scipy.signal import iirnotch
 from tqdm import tqdm
+sequence_to_seconds = lambda seq_len, fs: seq_len / fs
 _MATRIX_DOF2DOA_TRANSPOSED = np.array(
     # https://www.frontiersin.org/articles/10.3389/fnins.2019.00891/full
     # Open supplemental data > Data Sheet 1.PDF >
     args.add_argument("--data_dir", type=str, required=True)
     args.add_argument("--save_dir", type=str, required=True)
     args.add_argument(
+        "--seq_len", type=int, help="Size of the window in samples for segmentation."
     )
     args.add_argument(
+        "--stride",
+        type=int,
+        help="Step size between windows in samples for segmentation.",
     )
     args.add_argument(
         "--n_jobs", type=int, default=-1, help="Number of parallel jobs to run."
         sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
     fs = 2000.0  # Hz
+    window_size, stride = args.seq_len, args.stride
+    window_seconds = sequence_to_seconds(window_size, fs)
+    print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
     # collect all .mat paths
     mat_paths = [
     # run in parallel
     results = Parallel(n_jobs=min(os.cpu_count(), args.n_jobs), verbose=5)(
+        delayed(process_mat_file)(mp, window_size, stride, fs)
         for mp in mat_paths
     )

scripts/emg2pose.py CHANGED Viewed

@@ -4,12 +4,13 @@ from pathlib import Path
 import h5py
 import numpy as np
 import pandas as pd
-import scipy.io
 import scipy.signal as signal
 from joblib import Parallel, delayed
 from scipy.signal import iirnotch
 from tqdm import tqdm
 # ==== Filter functions (operate at original fs=2000) ====
 def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
@@ -78,10 +79,10 @@ def main():
     args.add_argument("--data_dir", type=str)
     args.add_argument("--save_dir", type=str)
     args.add_argument(
-        "--window_size", type=int, help="Size of the sliding window for segmentation."
     )
     args.add_argument(
-        "--stride", type=int, help="Stride for the sliding window segmentation."
     )
     args.add_argument(
         "--subsample", type=float, default=1.0, help="Whether to subsample the data"
@@ -102,7 +103,10 @@ def main():
     os.makedirs(save_dir, exist_ok=True)
     fs = 2000.0  # original sampling rate
-    window_size, stride = args.window_size, args.stride
     df = pd.read_csv(os.path.join(data_dir, "metadata.csv"))
     df = df.groupby("split").apply(

 import h5py
 import numpy as np
 import pandas as pd
 import scipy.signal as signal
 from joblib import Parallel, delayed
 from scipy.signal import iirnotch
 from tqdm import tqdm
+sequence_to_seconds = lambda seq_len, fs: seq_len / fs
 # ==== Filter functions (operate at original fs=2000) ====
 def notch_filter(data, notch_freq=50.0, Q=30.0, fs=2000.0):
     args.add_argument("--data_dir", type=str)
     args.add_argument("--save_dir", type=str)
     args.add_argument(
+        "--seq_len", type=int, help="Size of the window in samples for segmentation."
     )
     args.add_argument(
+        "--stride", type=int, help="Step size between windows in samples for segmentation."
     )
     args.add_argument(
         "--subsample", type=float, default=1.0, help="Whether to subsample the data"
     os.makedirs(save_dir, exist_ok=True)
     fs = 2000.0  # original sampling rate
+    window_size, stride = args.seq_len, args.stride
+    window_seconds = sequence_to_seconds(window_size, fs)
+    print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
     df = pd.read_csv(os.path.join(data_dir, "metadata.csv"))
     df = df.groupby("split").apply(

scripts/epn.py CHANGED Viewed

@@ -10,6 +10,8 @@ from joblib import Parallel, delayed
 from scipy.signal import iirnotch
 from tqdm.auto import tqdm
 # Sampling frequency and EMG channels
 tfs, n_ch = 200.0, 8
@@ -122,7 +124,9 @@ def main():
     parser.add_argument("--source_training", required=True)
     parser.add_argument("--source_testing", required=True)
     parser.add_argument("--dest_dir", required=True)
-    parser.add_argument("--window_size", type=int, required=True)
     parser.add_argument("--n_jobs", type=int, default=-1)
     args = parser.parse_args()
     data_dir = args.data_dir
@@ -142,7 +146,11 @@ def main():
         print(f"Downloaded and unzipped dataset\n{data_dir}/EMG-EPN612_Dataset.zip")
         sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
-    seq_len = args.window_size
     train_X, train_y, val_X, val_y, test_X, test_y = [], [], [], [], [], []
     paths = glob.glob(os.path.join(args.source_training, "user*", "user*.json"))

 from scipy.signal import iirnotch
 from tqdm.auto import tqdm
+sequence_to_seconds = lambda seq_len, fs: seq_len / fs
 # Sampling frequency and EMG channels
 tfs, n_ch = 200.0, 8
     parser.add_argument("--source_training", required=True)
     parser.add_argument("--source_testing", required=True)
     parser.add_argument("--dest_dir", required=True)
+    parser.add_argument(
+        "--seq_len", type=int, help="Size of the window in samples for segmentation."
+    )
     parser.add_argument("--n_jobs", type=int, default=-1)
     args = parser.parse_args()
     data_dir = args.data_dir
         print(f"Downloaded and unzipped dataset\n{data_dir}/EMG-EPN612_Dataset.zip")
         sys.exit("Data downloaded and unzipped. Rerun without --download_data.")
+    seq_len = args.seq_len
+    window_seconds = sequence_to_seconds(seq_len, tfs)
+    print(f"Window size: {seq_len} samples ({window_seconds:.2f} seconds)")
     train_X, train_y, val_X, val_y, test_X, test_y = [], [], [], [], [], []
     paths = glob.glob(os.path.join(args.source_training, "user*", "user*.json"))

scripts/uci.py CHANGED Viewed

@@ -7,6 +7,8 @@ import numpy as np
 import scipy.signal as signal
 from scipy.signal import iirnotch
 # ─────────────────────────────────────────────
 # Filtering utilities
@@ -152,8 +154,14 @@ if __name__ == "__main__":
         required=True,
         help="Directory to save the output h5 files",
     )
-    arg.add_argument("--window_size", type=int, help="Window size for sliding window")
-    arg.add_argument("--stride", type=int, help="Stride for sliding window")
     args = arg.parse_args()
     data_root = args.data_dir
@@ -173,7 +181,10 @@ if __name__ == "__main__":
         sys.exit("Rerun without --download_data.")
     fs = 200.0  # sampling rate of MYO bracelet
-    window_size, stride = args.window_size, args.stride
     split_map = {
         "train": list(range(1, 25)),  # 1–24

 import scipy.signal as signal
 from scipy.signal import iirnotch
+sequence_to_seconds = lambda seq_len, fs: seq_len / fs
 # ─────────────────────────────────────────────
 # Filtering utilities
         required=True,
         help="Directory to save the output h5 files",
     )
+    arg.add_argument(
+        "--seq_len", type=int, help="Size of the window in samples for segmentation."
+    )
+    arg.add_argument(
+        "--stride",
+        type=int,
+        help="Step size between windows in samples for segmentation.",
+    )
     args = arg.parse_args()
     data_root = args.data_dir
         sys.exit("Rerun without --download_data.")
     fs = 200.0  # sampling rate of MYO bracelet
+    window_size, stride = args.seq_len, args.stride
+    window_seconds = sequence_to_seconds(window_size, fs)
+    print(f"Window size: {window_size} samples ({window_seconds:.2f} seconds)")
     split_map = {
         "train": list(range(1, 25)),  # 1–24