Spaces:
Running
Running
File size: 8,187 Bytes
3bb804c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
# Authors: The MNE-Python contributors.
# License: BSD-3-Clause
# Copyright the MNE-Python contributors.
import os
import re
import time
from importlib.resources import files
from os import path as op
from pathlib import Path
from ...utils import _url_to_local_path, logger, verbose
from ..utils import _do_path_update, _downloader_params, _get_path, _log_time_size
EEGMI_URL = "https://physionet.org/files/eegmmidb/1.0.0/"
@verbose
def data_path(url, path=None, force_update=False, update_path=None, *, verbose=None):
"""Get path to local copy of EEGMMI dataset URL.
This is a low-level function useful for getting a local copy of a remote EEGBCI
dataset :footcite:`SchalkEtAl2004`, which is also available at PhysioNet
:footcite:`GoldbergerEtAl2000`. Metadata, such as the meaning of event markers
may be obtained from the
`PhysioNet documentation page <https://physionet.org/content/eegmmidb/1.0.0/>`_.
Parameters
----------
url : str
The dataset to use.
path : None | path-like
Location of where to look for the EEGBCI data. If ``None``, the environment
variable or config parameter ``MNE_DATASETS_EEGBCI_PATH`` is used. If neither
exists, the ``~/mne_data`` directory is used. If the EEGBCI dataset is not found
under the given path, the data will be automatically downloaded to the specified
folder.
force_update : bool
Force update of the dataset even if a local copy exists.
update_path : bool | None
If ``True``, set ``MNE_DATASETS_EEGBCI_PATH`` in the configuration to the given
path. If ``None``, the user is prompted.
%(verbose)s
Returns
-------
path : list of Path
Local path to the given data file. This path is contained inside a list of
length one for compatibility.
Notes
-----
For example, one could do:
>>> from mne.datasets import eegbci
>>> url = "http://www.physionet.org/physiobank/database/eegmmidb/"
>>> eegbci.data_path(url, "~/datasets") # doctest:+SKIP
This would download the given EEGBCI data file to the ``~/datasets`` folder and
prompt the user to store this path in the config (if it does not already exist).
References
----------
.. footbibliography::
"""
import pooch
key = "MNE_DATASETS_EEGBCI_PATH"
name = "EEGBCI"
path = _get_path(path, key, name)
fname = "MNE-eegbci-data"
destination = _url_to_local_path(url, op.join(path, fname))
destinations = [destination]
# fetch the file
downloader = pooch.HTTPDownloader(**_downloader_params())
if not op.isfile(destination) or force_update:
if op.isfile(destination):
os.remove(destination)
if not op.isdir(op.dirname(destination)):
os.makedirs(op.dirname(destination))
pooch.retrieve(
url=url,
path=destination,
downloader=downloader,
fname=fname,
)
# offer to update the path
_do_path_update(path, update_path, key, name)
destinations = [Path(dest) for dest in destinations]
return destinations
@verbose
def load_data(
subjects,
runs,
*,
path=None,
force_update=False,
update_path=None,
base_url=EEGMI_URL,
verbose=None,
): # noqa: D301
"""Get paths to local copies of EEGBCI dataset files.
This will fetch data for the EEGBCI dataset :footcite:`SchalkEtAl2004`, which is
also available at PhysioNet :footcite:`GoldbergerEtAl2000`. Metadata, such as the
meaning of event markers may be obtained from the
`PhysioNet documentation page <https://physionet.org/content/eegmmidb/1.0.0/>`_.
Parameters
----------
subjects : int | list of int
The subjects to use. Can be in the range of 1-109 (inclusive).
runs : int | list of int
The runs to use (see Notes for details).
path : None | path-like
Location of where to look for the EEGBCI data. If ``None``, the environment
variable or config parameter ``MNE_DATASETS_EEGBCI_PATH`` is used. If neither
exists, the ``~/mne_data`` directory is used. If the EEGBCI dataset is not found
under the given path, the data will be automatically downloaded to the specified
folder.
force_update : bool
Force update of the dataset even if a local copy exists.
update_path : bool | None
If ``True``, set ``MNE_DATASETS_EEGBCI_PATH`` in the configuration to the given
path. If ``None``, the user is prompted.
base_url : str
The URL root for the data.
%(verbose)s
Returns
-------
paths : list
List of local data paths of the given type.
Notes
-----
The run numbers correspond to:
========= ===================================
run task
========= ===================================
1 Baseline, eyes open
2 Baseline, eyes closed
3, 7, 11 Motor execution: left vs right hand
4, 8, 12 Motor imagery: left vs right hand
5, 9, 13 Motor execution: hands vs feet
6, 10, 14 Motor imagery: hands vs feet
========= ===================================
For example, one could do::
>>> from mne.datasets import eegbci
>>> eegbci.load_data([1, 2], [6, 10, 14], "~/datasets") # doctest:+SKIP
This would download runs 6, 10, and 14 (hand/foot motor imagery) runs from subjects
1 and 2 in the EEGBCI dataset to "~/datasets" and prompt the user to store this path
in the config (if it does not already exist).
References
----------
.. footbibliography::
"""
import pooch
t0 = time.time()
if not hasattr(subjects, "__iter__"):
subjects = [subjects]
if not hasattr(runs, "__iter__"):
runs = [runs]
# get local storage path
config_key = "MNE_DATASETS_EEGBCI_PATH"
folder = "MNE-eegbci-data"
name = "EEGBCI"
path = _get_path(path, config_key, name)
# extract path parts
pattern = r"(?:https?://.*)(files)/(eegmmidb)/(\d+\.\d+\.\d+)/?"
match = re.compile(pattern).match(base_url)
if match is None:
raise ValueError(
"base_url does not match the expected EEGMI folder "
"structure. Please notify MNE-Python developers."
)
base_path = op.join(path, folder, *match.groups())
# create the download manager
fetcher = pooch.create(
path=base_path,
base_url=base_url,
version=None, # data versioning is decoupled from MNE-Python version
registry=None, # registry is loaded from file (below)
retry_if_failed=2, # 2 retries = 3 total attempts
)
# load the checksum registry
registry = files("mne").joinpath("data", "eegbci_checksums.txt")
fetcher.load_registry(registry)
# fetch the file(s)
data_paths = []
sz = 0
for subject in subjects:
for run in runs:
file_part = f"S{subject:03d}/S{subject:03d}R{run:02d}.edf"
destination = Path(base_path, file_part)
data_paths.append(destination)
if destination.exists():
if force_update:
destination.unlink()
else:
continue
if sz == 0: # log once
logger.info("Downloading EEGBCI data")
fetcher.fetch(file_part)
# update path in config if desired
sz += destination.stat().st_size
_do_path_update(path, update_path, config_key, name)
if sz > 0:
_log_time_size(t0, sz)
return data_paths
def standardize(raw):
"""Standardize channel positions and names.
Parameters
----------
raw : instance of Raw
The raw data to standardize. Operates in-place.
"""
rename = dict()
for name in raw.ch_names:
std_name = name.strip(".")
std_name = std_name.upper()
if std_name.endswith("Z"):
std_name = std_name[:-1] + "z"
if std_name.startswith("FP"):
std_name = "Fp" + std_name[2:]
rename[name] = std_name
raw.rename_channels(rename)
|