Spaces:
Running
Running
| # Authors: The MNE-Python contributors. | |
| # License: BSD-3-Clause | |
| # Copyright the MNE-Python contributors. | |
| from __future__ import annotations # only needed for Python ≤ 3.9 | |
| import os | |
| import os.path as op | |
| import sys | |
| import time | |
| from pathlib import Path | |
| from shutil import rmtree | |
| from .. import __version__ as mne_version | |
| from ..fixes import _compare_version | |
| from ..utils import _safe_input, logger, warn | |
| from .config import ( | |
| MISC_VERSIONED, | |
| RELEASES, | |
| TESTING_VERSIONED, | |
| _bst_license_text, | |
| ) | |
| from .utils import ( | |
| _dataset_version, | |
| _do_path_update, | |
| _downloader_params, | |
| _get_path, | |
| _log_time_size, | |
| ) | |
| _FAKE_VERSION = None # used for monkeypatching while testing versioning | |
| def fetch_dataset( | |
| dataset_params, | |
| processor=None, | |
| path=None, | |
| force_update=False, | |
| update_path=True, | |
| download=True, | |
| check_version=False, | |
| return_version=False, | |
| accept=False, | |
| auth=None, | |
| token=None, | |
| ) -> Path | tuple[Path, str]: | |
| """Fetch an MNE-compatible dataset using pooch. | |
| Parameters | |
| ---------- | |
| dataset_params : list of dict | dict | |
| The dataset name(s) and corresponding parameters to download the | |
| dataset(s). The dataset parameters that contains the following keys: | |
| ``archive_name``, ``url``, ``folder_name``, ``hash``, | |
| ``config_key`` (optional). See Notes. | |
| processor : None | "unzip" | "untar" | instance of pooch.Unzip | instance of pooch.Untar | |
| What to do after downloading the file. ``"unzip"`` and ``"untar"`` will | |
| decompress the downloaded file in place; for custom extraction (e.g., | |
| only extracting certain files from the archive) pass an instance of | |
| ``pooch.Unzip`` or ``pooch.Untar``. If ``None`` (the | |
| default), the files are left as-is. | |
| path : None | str | |
| Directory in which to put the dataset. If ``None``, the dataset | |
| location is determined by first checking whether | |
| ``dataset_params['config_key']`` is defined, and if so, whether that | |
| config key exists in the MNE-Python config file. If so, the configured | |
| path is used; if not, the location is set to the value of the | |
| ``MNE_DATA`` config key (if it exists), or ``~/mne_data`` otherwise. | |
| force_update : bool | |
| Force update of the dataset even if a local copy exists. | |
| Default is False. | |
| update_path : bool | None | |
| If True (default), set the mne-python config to the given | |
| path. If None, the user is prompted. | |
| download : bool | |
| If False and the dataset has not been downloaded yet, it will not be | |
| downloaded and the path will be returned as ``''`` (empty string). This | |
| is mostly used for testing purposes and can be safely ignored by most | |
| users. | |
| check_version : bool | |
| Whether to check the version of the dataset or not. Each version | |
| of the dataset is stored in the root with a ``version.txt`` file. | |
| return_version : bool | |
| Whether or not to return the version of the dataset or not. | |
| Defaults to False. | |
| accept : bool | |
| Some MNE-supplied datasets require acceptance of an additional license. | |
| Default is ``False``. | |
| auth : tuple | None | |
| Optional authentication tuple containing the username and | |
| password/token, passed to ``pooch.HTTPDownloader`` (e.g., | |
| ``auth=('foo', 012345)``). | |
| token : str | None | |
| Optional authentication token passed to ``pooch.HTTPDownloader``. | |
| Returns | |
| ------- | |
| data_path : instance of Path | |
| The path to the fetched dataset. | |
| version : str | |
| Only returned if ``return_version`` is True. | |
| See Also | |
| -------- | |
| mne.get_config | |
| mne.set_config | |
| mne.datasets.has_dataset | |
| Notes | |
| ----- | |
| The ``dataset_params`` argument must contain the following keys: | |
| - ``archive_name``: The name of the (possibly compressed) file to download | |
| - ``url``: URL from which the file can be downloaded | |
| - ``folder_name``: the subfolder within the ``MNE_DATA`` folder in which to | |
| save and uncompress (if needed) the file(s) | |
| - ``hash``: the cryptographic hash type of the file followed by a colon and | |
| then the hash value (examples: "sha256:19uheid...", "md5:upodh2io...") | |
| - ``config_key`` (optional): key passed to :func:`mne.set_config` to store | |
| the on-disk location of the downloaded dataset (e.g., | |
| ``"MNE_DATASETS_EEGBCI_PATH"``). This will only work for the provided | |
| datasets listed :ref:`here <datasets>`; do not use for user-defined | |
| datasets. | |
| An example would look like:: | |
| {'dataset_name': 'sample', | |
| 'archive_name': 'MNE-sample-data-processed.tar.gz', | |
| 'hash': 'md5:e8f30c4516abdc12a0c08e6bae57409c', | |
| 'url': 'https://osf.io/download/86qa2?version=6', | |
| 'folder_name': 'MNE-sample-data', | |
| 'config_key': 'MNE_DATASETS_SAMPLE_PATH'} | |
| For datasets where a single (possibly compressed) file must be downloaded, | |
| pass a single :class:`dict` as ``dataset_params``. For datasets where | |
| multiple files must be downloaded and (optionally) uncompressed separately, | |
| pass a list of dicts. | |
| """ # noqa E501 | |
| import pooch | |
| t0 = time.time() | |
| if auth is not None: | |
| if len(auth) != 2: | |
| raise RuntimeError( | |
| "auth should be a 2-tuple consisting of a username and password/token." | |
| ) | |
| # processor to uncompress files | |
| if processor == "untar": | |
| processor = pooch.Untar(extract_dir=path) | |
| elif processor == "unzip": | |
| processor = pooch.Unzip(extract_dir=path) | |
| if isinstance(dataset_params, dict): | |
| dataset_params = [dataset_params] | |
| # extract configuration parameters | |
| names = [params["dataset_name"] for params in dataset_params] | |
| name = names[0] | |
| dataset_dict = dataset_params[0] | |
| config_key = dataset_dict.get("config_key", None) | |
| folder_name = dataset_dict["folder_name"] | |
| # get download path for specific dataset | |
| path = _get_path(path=path, key=config_key, name=name) | |
| # get the actual path to each dataset folder name | |
| final_path = op.join(path, folder_name) | |
| # handle BrainStorm datasets with nested folders for datasets | |
| if name.startswith("bst_"): | |
| final_path = op.join(final_path, name) | |
| final_path = Path(final_path) | |
| # additional condition: check for version.txt and parse it | |
| # check if testing or misc data is outdated; if so, redownload it | |
| want_version = RELEASES.get(name, None) | |
| want_version = _FAKE_VERSION if name == "fake" else want_version | |
| # get the version of the dataset and then check if the version is outdated | |
| data_version = _dataset_version(final_path, name) | |
| outdated = want_version is not None and _compare_version( | |
| want_version, ">", data_version | |
| ) | |
| if outdated: | |
| logger.info( | |
| f"Dataset {name} version {data_version} out of date, " | |
| f"latest version is {want_version}" | |
| ) | |
| empty = Path("") | |
| # return empty string if outdated dataset and we don't want to download | |
| if (not force_update) and outdated and not download: | |
| logger.info( | |
| "Dataset out of date but force_update=False and download=False, " | |
| "returning empty data_path" | |
| ) | |
| return (empty, data_version) if return_version else empty | |
| # reasons to bail early (hf_sef has separate code for this): | |
| if (not force_update) and (not outdated) and (not name.startswith("hf_sef_")): | |
| # ...if target folder exists (otherwise pooch downloads every | |
| # time because we don't save the archive files after unpacking, so | |
| # pooch can't check its checksum) | |
| if op.isdir(final_path): | |
| if config_key is not None: | |
| _do_path_update(path, update_path, config_key, name) | |
| return (final_path, data_version) if return_version else final_path | |
| # ...if download=False (useful for debugging) | |
| elif not download: | |
| return (empty, data_version) if return_version else empty | |
| # ...if user didn't accept the license | |
| elif name.startswith("bst_"): | |
| if accept or "--accept-brainstorm-license" in sys.argv: | |
| answer = "y" | |
| else: | |
| # If they don't have stdin, just accept the license | |
| # https://github.com/mne-tools/mne-python/issues/8513#issuecomment-726823724 # noqa: E501 | |
| answer = _safe_input(f"{_bst_license_text}Agree (y/[n])? ", use="y") | |
| if answer.lower() != "y": | |
| raise RuntimeError("You must agree to the license to use this dataset") | |
| # downloader & processors | |
| download_params = _downloader_params(auth=auth, token=token) | |
| if name == "fake": | |
| download_params["progressbar"] = False | |
| downloader = pooch.HTTPDownloader(**download_params) | |
| # make mappings from archive names to urls and to checksums | |
| urls = dict() | |
| registry = dict() | |
| for idx, this_name in enumerate(names): | |
| this_dataset = dataset_params[idx] | |
| archive_name = this_dataset["archive_name"] | |
| dataset_url = this_dataset["url"] | |
| dataset_hash = this_dataset["hash"] | |
| urls[archive_name] = dataset_url | |
| registry[archive_name] = dataset_hash | |
| # create the download manager | |
| use_path = final_path if processor is None else Path(path) | |
| fetcher = pooch.create( | |
| path=str(use_path), | |
| base_url="", # Full URLs are given in the `urls` dict. | |
| version=None, # Data versioning is decoupled from MNE-Python version. | |
| urls=urls, | |
| registry=registry, | |
| retry_if_failed=2, # 2 retries = 3 total attempts | |
| ) | |
| # use our logger level for pooch's logger too | |
| pooch.get_logger().setLevel(logger.getEffectiveLevel()) | |
| sz = 0 | |
| for idx in range(len(names)): | |
| # fetch and unpack the data | |
| archive_name = dataset_params[idx]["archive_name"] | |
| try: | |
| fetcher.fetch( | |
| fname=archive_name, downloader=downloader, processor=processor | |
| ) | |
| except ValueError as err: | |
| err = str(err) | |
| if "hash of downloaded file" in str(err): | |
| raise ValueError( | |
| f"{err} Consider using force_update=True to force " | |
| "the dataset to be downloaded again." | |
| ) from None | |
| else: | |
| raise | |
| fname = use_path / archive_name | |
| sz += fname.stat().st_size | |
| # after unpacking, remove the archive file | |
| if processor is not None: | |
| fname.unlink() | |
| # remove version number from "misc" and "testing" datasets folder names | |
| if name == "misc": | |
| rmtree(final_path, ignore_errors=True) | |
| os.replace(op.join(path, MISC_VERSIONED), final_path) | |
| elif name == "testing": | |
| rmtree(final_path, ignore_errors=True) | |
| os.replace(op.join(path, TESTING_VERSIONED), final_path) | |
| # maybe update the config | |
| if config_key is not None: | |
| old_name = "brainstorm" if name.startswith("bst_") else name | |
| _do_path_update(path, update_path, config_key, old_name) | |
| # compare the version of the dataset and mne | |
| data_version = _dataset_version(path, name) | |
| # 0.7 < 0.7.git should be False, therefore strip | |
| if check_version and ( | |
| _compare_version(data_version, "<", mne_version.strip(".git")) | |
| ): | |
| # OK to `nosec` because it's false positive (misidentified as SQL) | |
| warn( | |
| f"The {name} dataset (version {data_version}) is older than " | |
| f"mne-python (version {mne_version}). If the examples fail, " | |
| f"you may need to update the {name} dataset by using " | |
| f"mne.datasets.{name}.data_path(force_update=True)" # nosec B608 | |
| ) | |
| _log_time_size(t0, sz) | |
| return (final_path, data_version) if return_version else final_path | |