koichi12 commited on Feb 12, 2025

Commit

7561da3

verified ·

1 Parent(s): 91f1872

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.venv/lib/python3.11/site-packages/torchaudio/_internal/__init__.py +10 -0
.venv/lib/python3.11/site-packages/torchaudio/_internal/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/_internal/__pycache__/module_utils.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/_internal/module_utils.py +113 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/__init__.py +47 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/__pycache__/librilight_limited.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/__pycache__/librimix.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/__pycache__/librispeech_biasing.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/__pycache__/libritts.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/cmuarctic.py +157 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/cmudict.py +186 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/commonvoice.py +86 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/dr_vctk.py +121 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/fluentcommands.py +108 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/gtzan.py +1118 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/iemocap.py +147 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/librilight_limited.py +111 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/librimix.py +133 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/librispeech.py +174 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/librispeech_biasing.py +189 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/libritts.py +168 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/ljspeech.py +107 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/musdb_hq.py +139 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/quesst14.py +136 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/snips.py +157 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/speechcommands.py +183 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/tedlium.py +218 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/utils.py +54 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/vctk.py +143 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/voxceleb1.py +309 -0
.venv/lib/python3.11/site-packages/torchaudio/datasets/yesno.py +89 -0
.venv/lib/python3.11/site-packages/torchaudio/io/__init__.py +13 -0
.venv/lib/python3.11/site-packages/torchaudio/io/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/io/__pycache__/_effector.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/io/__pycache__/_playback.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/io/_effector.py +347 -0
.venv/lib/python3.11/site-packages/torchaudio/io/_playback.py +72 -0
.venv/lib/python3.11/site-packages/torchaudio/lib/__init__.py +0 -0
.venv/lib/python3.11/site-packages/torchaudio/lib/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/_hdemucs.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/conformer.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/conv_tasnet.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/deepspeech.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/emformer.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/rnnt.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/rnnt_decoder.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/tacotron2.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/wav2letter.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/wavernn.cpython-311.pyc +0 -0

.venv/lib/python3.11/site-packages/torchaudio/_internal/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+try:
+    from .fb import download_url_to_file, load_state_dict_from_url
+except ImportError:
+    from torch.hub import download_url_to_file, load_state_dict_from_url
+__all__ = [
+    "load_state_dict_from_url",
+    "download_url_to_file",
+]

.venv/lib/python3.11/site-packages/torchaudio/_internal/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (488 Bytes). View file

.venv/lib/python3.11/site-packages/torchaudio/_internal/__pycache__/module_utils.cpython-311.pyc ADDED Viewed

Binary file (6.24 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/_internal/module_utils.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import importlib.util
+import os
+import warnings
+from functools import wraps
+from typing import Optional
+def eval_env(var, default):
+    """Check if environment varable has True-y value"""
+    if var not in os.environ:
+        return default
+    val = os.environ.get(var, "0")
+    trues = ["1", "true", "TRUE", "on", "ON", "yes", "YES"]
+    falses = ["0", "false", "FALSE", "off", "OFF", "no", "NO"]
+    if val in trues:
+        return True
+    if val not in falses:
+        # fmt: off
+        raise RuntimeError(
+            f"Unexpected environment variable value `{var}={val}`. "
+            f"Expected one of {trues + falses}")
+        # fmt: on
+    return False
+def is_module_available(*modules: str) -> bool:
+    r"""Returns if a top-level module with :attr:`name` exists *without**
+    importing it. This is generally safer than try-catch block around a
+    `import X`. It avoids third party libraries breaking assumptions of some of
+    our tests, e.g., setting multiprocessing start method when imported
+    (see librosa/#747, torchvision/#544).
+    """
+    return all(importlib.util.find_spec(m) is not None for m in modules)
+def requires_module(*modules: str):
+    """Decorate function to give error message if invoked without required optional modules.
+    This decorator is to give better error message to users rather
+    than raising ``NameError:  name 'module' is not defined`` at random places.
+    """
+    missing = [m for m in modules if not is_module_available(m)]
+    if not missing:
+        # fall through. If all the modules are available, no need to decorate
+        def decorator(func):
+            return func
+    else:
+        req = f"module: {missing[0]}" if len(missing) == 1 else f"modules: {missing}"
+        def decorator(func):
+            @wraps(func)
+            def wrapped(*args, **kwargs):
+                raise RuntimeError(f"{func.__module__}.{func.__name__} requires {req}")
+            return wrapped
+    return decorator
+def deprecated(direction: str, version: Optional[str] = None, remove: bool = False):
+    """Decorator to add deprecation message
+    Args:
+        direction (str): Migration steps to be given to users.
+        version (str or int): The version when the object will be removed
+        remove (bool): If enabled, append future removal message.
+    """
+    def decorator(func):
+        @wraps(func)
+        def wrapped(*args, **kwargs):
+            message = f"{func.__module__}.{func.__name__} has been deprecated. {direction}"
+            if remove:
+                message += f' It will be removed from {"future" if version is None else version} release. '
+            warnings.warn(message, stacklevel=2)
+            return func(*args, **kwargs)
+        message = "This function has been deprecated. "
+        if remove:
+            message += f'It will be removed from {"future" if version is None else version} release. '
+        wrapped.__doc__ = f"""DEPRECATED: {func.__doc__}
+    .. warning::
+       {message}
+       {direction}
+        """
+        return wrapped
+    return decorator
+def fail_with_message(message):
+    """Generate decorator to give users message about missing TorchAudio extension."""
+    def decorator(func):
+        @wraps(func)
+        def wrapped(*args, **kwargs):
+            raise RuntimeError(f"{func.__module__}.{func.__name__} {message}")
+        return wrapped
+    return decorator
+def no_op(func):
+    """Op-op decorator. Used in place of fail_with_message when a functionality that requires extension works fine."""
+    return func

.venv/lib/python3.11/site-packages/torchaudio/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from .cmuarctic import CMUARCTIC
+from .cmudict import CMUDict
+from .commonvoice import COMMONVOICE
+from .dr_vctk import DR_VCTK
+from .fluentcommands import FluentSpeechCommands
+from .gtzan import GTZAN
+from .iemocap import IEMOCAP
+from .librilight_limited import LibriLightLimited
+from .librimix import LibriMix
+from .librispeech import LIBRISPEECH
+from .librispeech_biasing import LibriSpeechBiasing
+from .libritts import LIBRITTS
+from .ljspeech import LJSPEECH
+from .musdb_hq import MUSDB_HQ
+from .quesst14 import QUESST14
+from .snips import Snips
+from .speechcommands import SPEECHCOMMANDS
+from .tedlium import TEDLIUM
+from .vctk import VCTK_092
+from .voxceleb1 import VoxCeleb1Identification, VoxCeleb1Verification
+from .yesno import YESNO
+__all__ = [
+    "COMMONVOICE",
+    "LIBRISPEECH",
+    "LibriSpeechBiasing",
+    "LibriLightLimited",
+    "SPEECHCOMMANDS",
+    "VCTK_092",
+    "DR_VCTK",
+    "YESNO",
+    "LJSPEECH",
+    "GTZAN",
+    "CMUARCTIC",
+    "CMUDict",
+    "LibriMix",
+    "LIBRITTS",
+    "TEDLIUM",
+    "QUESST14",
+    "MUSDB_HQ",
+    "FluentSpeechCommands",
+    "VoxCeleb1Identification",
+    "VoxCeleb1Verification",
+    "IEMOCAP",
+    "Snips",
+]

.venv/lib/python3.11/site-packages/torchaudio/datasets/__pycache__/librilight_limited.cpython-311.pyc ADDED Viewed

Binary file (6.97 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/datasets/__pycache__/librimix.cpython-311.pyc ADDED Viewed

Binary file (7.72 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/datasets/__pycache__/librispeech_biasing.cpython-311.pyc ADDED Viewed

Binary file (9.78 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/datasets/__pycache__/libritts.cpython-311.pyc ADDED Viewed

Binary file (8.05 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/datasets/cmuarctic.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import csv
+import os
+from pathlib import Path
+from typing import Tuple, Union
+import torchaudio
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_tar
+URL = "aew"
+FOLDER_IN_ARCHIVE = "ARCTIC"
+_CHECKSUMS = {
+    "http://festvox.org/cmu_arctic/packed/cmu_us_aew_arctic.tar.bz2": "645cb33c0f0b2ce41384fdd8d3db2c3f5fc15c1e688baeb74d2e08cab18ab406",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_ahw_arctic.tar.bz2": "024664adeb892809d646a3efd043625b46b5bfa3e6189b3500b2d0d59dfab06c",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_aup_arctic.tar.bz2": "2c55bc3050caa996758869126ad10cf42e1441212111db034b3a45189c18b6fc",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_awb_arctic.tar.bz2": "d74a950c9739a65f7bfc4dfa6187f2730fa03de5b8eb3f2da97a51b74df64d3c",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_axb_arctic.tar.bz2": "dd65c3d2907d1ee52f86e44f578319159e60f4bf722a9142be01161d84e330ff",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_bdl_arctic.tar.bz2": "26b91aaf48b2799b2956792b4632c2f926cd0542f402b5452d5adecb60942904",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_clb_arctic.tar.bz2": "3f16dc3f3b97955ea22623efb33b444341013fc660677b2e170efdcc959fa7c6",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_eey_arctic.tar.bz2": "8a0ee4e5acbd4b2f61a4fb947c1730ab3adcc9dc50b195981d99391d29928e8a",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_fem_arctic.tar.bz2": "3fcff629412b57233589cdb058f730594a62c4f3a75c20de14afe06621ef45e2",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_gka_arctic.tar.bz2": "dc82e7967cbd5eddbed33074b0699128dbd4482b41711916d58103707e38c67f",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_jmk_arctic.tar.bz2": "3a37c0e1dfc91e734fdbc88b562d9e2ebca621772402cdc693bbc9b09b211d73",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_ksp_arctic.tar.bz2": "8029cafce8296f9bed3022c44ef1e7953332b6bf6943c14b929f468122532717",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_ljm_arctic.tar.bz2": "b23993765cbf2b9e7bbc3c85b6c56eaf292ac81ee4bb887b638a24d104f921a0",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_lnh_arctic.tar.bz2": "4faf34d71aa7112813252fb20c5433e2fdd9a9de55a00701ffcbf05f24a5991a",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_rms_arctic.tar.bz2": "c6dc11235629c58441c071a7ba8a2d067903dfefbaabc4056d87da35b72ecda4",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_rxr_arctic.tar.bz2": "1fa4271c393e5998d200e56c102ff46fcfea169aaa2148ad9e9469616fbfdd9b",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_slp_arctic.tar.bz2": "54345ed55e45c23d419e9a823eef427f1cc93c83a710735ec667d068c916abf1",  # noqa: E501
+    "http://festvox.org/cmu_arctic/packed/cmu_us_slt_arctic.tar.bz2": "7c173297916acf3cc7fcab2713be4c60b27312316765a90934651d367226b4ea",  # noqa: E501
+}
+def load_cmuarctic_item(line: str, path: str, folder_audio: str, ext_audio: str) -> Tuple[Tensor, int, str, str]:
+    utterance_id, transcript = line[0].strip().split(" ", 2)[1:]
+    # Remove space, double quote, and single parenthesis from transcript
+    transcript = transcript[1:-3]
+    file_audio = os.path.join(path, folder_audio, utterance_id + ext_audio)
+    # Load audio
+    waveform, sample_rate = torchaudio.load(file_audio)
+    return (waveform, sample_rate, transcript, utterance_id.split("_")[1])
+class CMUARCTIC(Dataset):
+    """*CMU ARCTIC* :cite:`Kominek03cmuarctic` dataset.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        url (str, optional):
+            The URL to download the dataset from or the type of the dataset to download.
+            (default: ``"aew"``)
+            Allowed type values are ``"aew"``, ``"ahw"``, ``"aup"``, ``"awb"``, ``"axb"``, ``"bdl"``,
+            ``"clb"``, ``"eey"``, ``"fem"``, ``"gka"``, ``"jmk"``, ``"ksp"``, ``"ljm"``, ``"lnh"``,
+            ``"rms"``, ``"rxr"``, ``"slp"`` or ``"slt"``.
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"ARCTIC"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+    """
+    _file_text = "txt.done.data"
+    _folder_text = "etc"
+    _ext_audio = ".wav"
+    _folder_audio = "wav"
+    def __init__(
+        self, root: Union[str, Path], url: str = URL, folder_in_archive: str = FOLDER_IN_ARCHIVE, download: bool = False
+    ) -> None:
+        if url in [
+            "aew",
+            "ahw",
+            "aup",
+            "awb",
+            "axb",
+            "bdl",
+            "clb",
+            "eey",
+            "fem",
+            "gka",
+            "jmk",
+            "ksp",
+            "ljm",
+            "lnh",
+            "rms",
+            "rxr",
+            "slp",
+            "slt",
+        ]:
+            url = "cmu_us_" + url + "_arctic"
+            ext_archive = ".tar.bz2"
+            base_url = "http://www.festvox.org/cmu_arctic/packed/"
+            url = os.path.join(base_url, url + ext_archive)
+        # Get string representation of 'root' in case Path object is passed
+        root = os.fspath(root)
+        basename = os.path.basename(url)
+        root = os.path.join(root, folder_in_archive)
+        if not os.path.isdir(root):
+            os.mkdir(root)
+        archive = os.path.join(root, basename)
+        basename = basename.split(".")[0]
+        self._path = os.path.join(root, basename)
+        if download:
+            if not os.path.isdir(self._path):
+                if not os.path.isfile(archive):
+                    checksum = _CHECKSUMS.get(url, None)
+                    download_url_to_file(url, archive, hash_prefix=checksum)
+                _extract_tar(archive)
+        else:
+            if not os.path.exists(self._path):
+                raise RuntimeError(
+                    f"The path {self._path} doesn't exist. "
+                    "Please check the ``root`` path or set `download=True` to download it"
+                )
+        self._text = os.path.join(self._path, self._folder_text, self._file_text)
+        with open(self._text, "r") as text:
+            walker = csv.reader(text, delimiter="\n")
+            self._walker = list(walker)
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                Transcript
+            str:
+                Utterance ID
+        """
+        line = self._walker[n]
+        return load_cmuarctic_item(line, self._path, self._folder_audio, self._ext_audio)
+    def __len__(self) -> int:
+        return len(self._walker)

.venv/lib/python3.11/site-packages/torchaudio/datasets/cmudict.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import os
+import re
+from pathlib import Path
+from typing import Iterable, List, Tuple, Union
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+_CHECKSUMS = {
+    "http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b": "209a8b4cd265013e96f4658632a9878103b0c5abf62b50d4ef3ae1be226b29e4",  # noqa: E501
+    "http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols": "408ccaae803641c6d7b626b6299949320c2dbca96b2220fd3fb17887b023b027",  # noqa: E501
+}
+_PUNCTUATIONS = {
+    "!EXCLAMATION-POINT",
+    '"CLOSE-QUOTE',
+    '"DOUBLE-QUOTE',
+    '"END-OF-QUOTE',
+    '"END-QUOTE',
+    '"IN-QUOTES',
+    '"QUOTE',
+    '"UNQUOTE',
+    "#HASH-MARK",
+    "#POUND-SIGN",
+    "#SHARP-SIGN",
+    "%PERCENT",
+    "&AMPERSAND",
+    "'END-INNER-QUOTE",
+    "'END-QUOTE",
+    "'INNER-QUOTE",
+    "'QUOTE",
+    "'SINGLE-QUOTE",
+    "(BEGIN-PARENS",
+    "(IN-PARENTHESES",
+    "(LEFT-PAREN",
+    "(OPEN-PARENTHESES",
+    "(PAREN",
+    "(PARENS",
+    "(PARENTHESES",
+    ")CLOSE-PAREN",
+    ")CLOSE-PARENTHESES",
+    ")END-PAREN",
+    ")END-PARENS",
+    ")END-PARENTHESES",
+    ")END-THE-PAREN",
+    ")PAREN",
+    ")PARENS",
+    ")RIGHT-PAREN",
+    ")UN-PARENTHESES",
+    "+PLUS",
+    ",COMMA",
+    "--DASH",
+    "-DASH",
+    "-HYPHEN",
+    "...ELLIPSIS",
+    ".DECIMAL",
+    ".DOT",
+    ".FULL-STOP",
+    ".PERIOD",
+    ".POINT",
+    "/SLASH",
+    ":COLON",
+    ";SEMI-COLON",
+    ";SEMI-COLON(1)",
+    "?QUESTION-MARK",
+    "{BRACE",
+    "{LEFT-BRACE",
+    "{OPEN-BRACE",
+    "}CLOSE-BRACE",
+    "}RIGHT-BRACE",
+}
+def _parse_dictionary(lines: Iterable[str], exclude_punctuations: bool) -> List[str]:
+    _alt_re = re.compile(r"\([0-9]+\)")
+    cmudict: List[Tuple[str, List[str]]] = []
+    for line in lines:
+        if not line or line.startswith(";;;"):  # ignore comments
+            continue
+        word, phones = line.strip().split("  ")
+        if word in _PUNCTUATIONS:
+            if exclude_punctuations:
+                continue
+            # !EXCLAMATION-POINT -> !
+            # --DASH -> --
+            # ...ELLIPSIS -> ...
+            if word.startswith("..."):
+                word = "..."
+            elif word.startswith("--"):
+                word = "--"
+            else:
+                word = word[0]
+        # if a word have multiple pronunciations, there will be (number) appended to it
+        # for example, DATAPOINTS and DATAPOINTS(1),
+        # the regular expression `_alt_re` removes the '(1)' and change the word DATAPOINTS(1) to DATAPOINTS
+        word = re.sub(_alt_re, "", word)
+        phones = phones.split(" ")
+        cmudict.append((word, phones))
+    return cmudict
+class CMUDict(Dataset):
+    """*CMU Pronouncing Dictionary* :cite:`cmudict` (CMUDict) dataset.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        exclude_punctuations (bool, optional):
+            When enabled, exclude the pronounciation of punctuations, such as
+            `!EXCLAMATION-POINT` and `#HASH-MARK`.
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        url (str, optional):
+            The URL to download the dictionary from.
+            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b"``)
+        url_symbols (str, optional):
+            The URL to download the list of symbols from.
+            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols"``)
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        exclude_punctuations: bool = True,
+        *,
+        download: bool = False,
+        url: str = "http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b",
+        url_symbols: str = "http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols",
+    ) -> None:
+        self.exclude_punctuations = exclude_punctuations
+        self._root_path = Path(root)
+        if not os.path.isdir(self._root_path):
+            raise RuntimeError(f"The root directory does not exist; {root}")
+        dict_file = self._root_path / os.path.basename(url)
+        symbol_file = self._root_path / os.path.basename(url_symbols)
+        if not os.path.exists(dict_file):
+            if not download:
+                raise RuntimeError(
+                    "The dictionary file is not found in the following location. "
+                    f"Set `download=True` to download it. {dict_file}"
+                )
+            checksum = _CHECKSUMS.get(url, None)
+            download_url_to_file(url, dict_file, checksum)
+        if not os.path.exists(symbol_file):
+            if not download:
+                raise RuntimeError(
+                    "The symbol file is not found in the following location. "
+                    f"Set `download=True` to download it. {symbol_file}"
+                )
+            checksum = _CHECKSUMS.get(url_symbols, None)
+            download_url_to_file(url_symbols, symbol_file, checksum)
+        with open(symbol_file, "r") as text:
+            self._symbols = [line.strip() for line in text.readlines()]
+        with open(dict_file, "r", encoding="latin-1") as text:
+            self._dictionary = _parse_dictionary(text.readlines(), exclude_punctuations=self.exclude_punctuations)
+    def __getitem__(self, n: int) -> Tuple[str, List[str]]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded.
+        Returns:
+            Tuple of a word and its phonemes
+            str:
+                Word
+            List[str]:
+                Phonemes
+        """
+        return self._dictionary[n]
+    def __len__(self) -> int:
+        return len(self._dictionary)
+    @property
+    def symbols(self) -> List[str]:
+        """list[str]: A list of phonemes symbols, such as ``"AA"``, ``"AE"``, ``"AH"``."""
+        return self._symbols.copy()

.venv/lib/python3.11/site-packages/torchaudio/datasets/commonvoice.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import csv
+import os
+from pathlib import Path
+from typing import Dict, List, Tuple, Union
+import torchaudio
+from torch import Tensor
+from torch.utils.data import Dataset
+def load_commonvoice_item(
+    line: List[str], header: List[str], path: str, folder_audio: str, ext_audio: str
+) -> Tuple[Tensor, int, Dict[str, str]]:
+    # Each line as the following data:
+    # client_id, path, sentence, up_votes, down_votes, age, gender, accent
+    if header[1] != "path":
+        raise ValueError(f"expect `header[1]` to be 'path', but got {header[1]}")
+    fileid = line[1]
+    filename = os.path.join(path, folder_audio, fileid)
+    if not filename.endswith(ext_audio):
+        filename += ext_audio
+    waveform, sample_rate = torchaudio.load(filename)
+    dic = dict(zip(header, line))
+    return waveform, sample_rate, dic
+class COMMONVOICE(Dataset):
+    """*CommonVoice* :cite:`ardila2020common` dataset.
+    Args:
+        root (str or Path): Path to the directory where the dataset is located.
+             (Where the ``tsv`` file is present.)
+        tsv (str, optional):
+            The name of the tsv file used to construct the metadata, such as
+            ``"train.tsv"``, ``"test.tsv"``, ``"dev.tsv"``, ``"invalidated.tsv"``,
+            ``"validated.tsv"`` and ``"other.tsv"``. (default: ``"train.tsv"``)
+    """
+    _ext_txt = ".txt"
+    _ext_audio = ".mp3"
+    _folder_audio = "clips"
+    def __init__(self, root: Union[str, Path], tsv: str = "train.tsv") -> None:
+        # Get string representation of 'root' in case Path object is passed
+        self._path = os.fspath(root)
+        self._tsv = os.path.join(self._path, tsv)
+        with open(self._tsv, "r") as tsv_:
+            walker = csv.reader(tsv_, delimiter="\t")
+            self._header = next(walker)
+            self._walker = list(walker)
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, Dict[str, str]]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            Dict[str, str]:
+                Dictionary containing the following items from the corresponding TSV file;
+                * ``"client_id"``
+                * ``"path"``
+                * ``"sentence"``
+                * ``"up_votes"``
+                * ``"down_votes"``
+                * ``"age"``
+                * ``"gender"``
+                * ``"accent"``
+        """
+        line = self._walker[n]
+        return load_commonvoice_item(line, self._header, self._path, self._folder_audio, self._ext_audio)
+    def __len__(self) -> int:
+        return len(self._walker)

.venv/lib/python3.11/site-packages/torchaudio/datasets/dr_vctk.py ADDED Viewed

	@@ -0,0 +1,121 @@

+from pathlib import Path
+from typing import Dict, Tuple, Union
+import torchaudio
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_zip
+_URL = "https://datashare.ed.ac.uk/bitstream/handle/10283/3038/DR-VCTK.zip"
+_CHECKSUM = "781f12f4406ed36ed27ae3bce55da47ba176e2d8bae67319e389e07b2c9bd769"
+_SUPPORTED_SUBSETS = {"train", "test"}
+class DR_VCTK(Dataset):
+    """*Device Recorded VCTK (Small subset version)* :cite:`Sarfjoo2018DeviceRV` dataset.
+    Args:
+        root (str or Path): Root directory where the dataset's top level directory is found.
+        subset (str): The subset to use. Can be one of ``"train"`` and ``"test"``. (default: ``"train"``).
+        download (bool):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        url (str): The URL to download the dataset from.
+            (default: ``"https://datashare.ed.ac.uk/bitstream/handle/10283/3038/DR-VCTK.zip"``)
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        subset: str = "train",
+        *,
+        download: bool = False,
+        url: str = _URL,
+    ) -> None:
+        if subset not in _SUPPORTED_SUBSETS:
+            raise RuntimeError(
+                f"The subset '{subset}' does not match any of the supported subsets: {_SUPPORTED_SUBSETS}"
+            )
+        root = Path(root).expanduser()
+        archive = root / "DR-VCTK.zip"
+        self._subset = subset
+        self._path = root / "DR-VCTK" / "DR-VCTK"
+        self._clean_audio_dir = self._path / f"clean_{self._subset}set_wav_16k"
+        self._noisy_audio_dir = self._path / f"device-recorded_{self._subset}set_wav_16k"
+        self._config_filepath = self._path / "configurations" / f"{self._subset}_ch_log.txt"
+        if not self._path.is_dir():
+            if not archive.is_file():
+                if not download:
+                    raise RuntimeError("Dataset not found. Please use `download=True` to download it.")
+                download_url_to_file(url, archive, hash_prefix=_CHECKSUM)
+            _extract_zip(archive, root)
+        self._config = self._load_config(self._config_filepath)
+        self._filename_list = sorted(self._config)
+    def _load_config(self, filepath: str) -> Dict[str, Tuple[str, int]]:
+        # Skip header
+        skip_rows = 2 if self._subset == "train" else 1
+        config = {}
+        with open(filepath) as f:
+            for i, line in enumerate(f):
+                if i < skip_rows or not line:
+                    continue
+                filename, source, channel_id = line.strip().split("\t")
+                config[filename] = (source, int(channel_id))
+        return config
+    def _load_dr_vctk_item(self, filename: str) -> Tuple[Tensor, int, Tensor, int, str, str, str, int]:
+        speaker_id, utterance_id = filename.split(".")[0].split("_")
+        source, channel_id = self._config[filename]
+        file_clean_audio = self._clean_audio_dir / filename
+        file_noisy_audio = self._noisy_audio_dir / filename
+        waveform_clean, sample_rate_clean = torchaudio.load(file_clean_audio)
+        waveform_noisy, sample_rate_noisy = torchaudio.load(file_noisy_audio)
+        return (
+            waveform_clean,
+            sample_rate_clean,
+            waveform_noisy,
+            sample_rate_noisy,
+            speaker_id,
+            utterance_id,
+            source,
+            channel_id,
+        )
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, Tensor, int, str, str, str, int]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Clean waveform
+            int:
+                Sample rate of the clean waveform
+            Tensor:
+                Noisy waveform
+            int:
+                Sample rate of the noisy waveform
+            str:
+                Speaker ID
+            str:
+                Utterance ID
+            str:
+                Source
+            int:
+                Channel ID
+        """
+        filename = self._filename_list[n]
+        return self._load_dr_vctk_item(filename)
+    def __len__(self) -> int:
+        return len(self._filename_list)

.venv/lib/python3.11/site-packages/torchaudio/datasets/fluentcommands.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import csv
+import os
+from pathlib import Path
+from typing import Tuple, Union
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio.datasets.utils import _load_waveform
+SAMPLE_RATE = 16000
+class FluentSpeechCommands(Dataset):
+    """*Fluent Speech Commands* :cite:`fluent` dataset
+    Args:
+        root (str of Path): Path to the directory where the dataset is found.
+        subset (str, optional): subset of the dataset to use.
+            Options: [``"train"``, ``"valid"``, ``"test"``].
+            (Default: ``"train"``)
+    """
+    def __init__(self, root: Union[str, Path], subset: str = "train"):
+        if subset not in ["train", "valid", "test"]:
+            raise ValueError("`subset` must be one of ['train', 'valid', 'test']")
+        root = os.fspath(root)
+        self._path = os.path.join(root, "fluent_speech_commands_dataset")
+        if not os.path.isdir(self._path):
+            raise RuntimeError("Dataset not found.")
+        subset_path = os.path.join(self._path, "data", f"{subset}_data.csv")
+        with open(subset_path) as subset_csv:
+            subset_reader = csv.reader(subset_csv)
+            data = list(subset_reader)
+        self.header = data[0]
+        self.data = data[1:]
+    def get_metadata(self, n: int) -> Tuple[str, int, str, int, str, str, str, str]:
+        """Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
+        but otherwise returns the same fields as :py:func:`__getitem__`.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            str:
+                Path to audio
+            int:
+                Sample rate
+            str:
+                File name
+            int:
+                Speaker ID
+            str:
+                Transcription
+            str:
+                Action
+            str:
+                Object
+            str:
+                Location
+        """
+        sample = self.data[n]
+        file_name = sample[self.header.index("path")].split("/")[-1]
+        file_name = file_name.split(".")[0]
+        speaker_id, transcription, action, obj, location = sample[2:]
+        file_path = os.path.join("wavs", "speakers", speaker_id, f"{file_name}.wav")
+        return file_path, SAMPLE_RATE, file_name, speaker_id, transcription, action, obj, location
+    def __len__(self) -> int:
+        return len(self.data)
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, str, str, str, str]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                File name
+            int:
+                Speaker ID
+            str:
+                Transcription
+            str:
+                Action
+            str:
+                Object
+            str:
+                Location
+        """
+        metadata = self.get_metadata(n)
+        waveform = _load_waveform(self._path, metadata[0], metadata[1])
+        return (waveform,) + metadata[1:]

.venv/lib/python3.11/site-packages/torchaudio/datasets/gtzan.py ADDED Viewed

	@@ -0,0 +1,1118 @@

+import os
+from pathlib import Path
+from typing import Optional, Tuple, Union
+import torchaudio
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_tar
+# The following lists prefixed with `filtered_` provide a filtered split
+# that:
+#
+# a. Mitigate a known issue with GTZAN (duplication)
+#
+# b. Provide a standard split for testing it against other
+#    methods (e.g. the one in jordipons/sklearn-audio-transfer-learning).
+#
+# Those are used when GTZAN is initialised with the `filtered` keyword.
+# The split was taken from (github) jordipons/sklearn-audio-transfer-learning.
+gtzan_genres = [
+    "blues",
+    "classical",
+    "country",
+    "disco",
+    "hiphop",
+    "jazz",
+    "metal",
+    "pop",
+    "reggae",
+    "rock",
+]
+filtered_test = [
+    "blues.00012",
+    "blues.00013",
+    "blues.00014",
+    "blues.00015",
+    "blues.00016",
+    "blues.00017",
+    "blues.00018",
+    "blues.00019",
+    "blues.00020",
+    "blues.00021",
+    "blues.00022",
+    "blues.00023",
+    "blues.00024",
+    "blues.00025",
+    "blues.00026",
+    "blues.00027",
+    "blues.00028",
+    "blues.00061",
+    "blues.00062",
+    "blues.00063",
+    "blues.00064",
+    "blues.00065",
+    "blues.00066",
+    "blues.00067",
+    "blues.00068",
+    "blues.00069",
+    "blues.00070",
+    "blues.00071",
+    "blues.00072",
+    "blues.00098",
+    "blues.00099",
+    "classical.00011",
+    "classical.00012",
+    "classical.00013",
+    "classical.00014",
+    "classical.00015",
+    "classical.00016",
+    "classical.00017",
+    "classical.00018",
+    "classical.00019",
+    "classical.00020",
+    "classical.00021",
+    "classical.00022",
+    "classical.00023",
+    "classical.00024",
+    "classical.00025",
+    "classical.00026",
+    "classical.00027",
+    "classical.00028",
+    "classical.00029",
+    "classical.00034",
+    "classical.00035",
+    "classical.00036",
+    "classical.00037",
+    "classical.00038",
+    "classical.00039",
+    "classical.00040",
+    "classical.00041",
+    "classical.00049",
+    "classical.00077",
+    "classical.00078",
+    "classical.00079",
+    "country.00030",
+    "country.00031",
+    "country.00032",
+    "country.00033",
+    "country.00034",
+    "country.00035",
+    "country.00036",
+    "country.00037",
+    "country.00038",
+    "country.00039",
+    "country.00040",
+    "country.00043",
+    "country.00044",
+    "country.00046",
+    "country.00047",
+    "country.00048",
+    "country.00050",
+    "country.00051",
+    "country.00053",
+    "country.00054",
+    "country.00055",
+    "country.00056",
+    "country.00057",
+    "country.00058",
+    "country.00059",
+    "country.00060",
+    "country.00061",
+    "country.00062",
+    "country.00063",
+    "country.00064",
+    "disco.00001",
+    "disco.00021",
+    "disco.00058",
+    "disco.00062",
+    "disco.00063",
+    "disco.00064",
+    "disco.00065",
+    "disco.00066",
+    "disco.00069",
+    "disco.00076",
+    "disco.00077",
+    "disco.00078",
+    "disco.00079",
+    "disco.00080",
+    "disco.00081",
+    "disco.00082",
+    "disco.00083",
+    "disco.00084",
+    "disco.00085",
+    "disco.00086",
+    "disco.00087",
+    "disco.00088",
+    "disco.00091",
+    "disco.00092",
+    "disco.00093",
+    "disco.00094",
+    "disco.00096",
+    "disco.00097",
+    "disco.00099",
+    "hiphop.00000",
+    "hiphop.00026",
+    "hiphop.00027",
+    "hiphop.00030",
+    "hiphop.00040",
+    "hiphop.00043",
+    "hiphop.00044",
+    "hiphop.00045",
+    "hiphop.00051",
+    "hiphop.00052",
+    "hiphop.00053",
+    "hiphop.00054",
+    "hiphop.00062",
+    "hiphop.00063",
+    "hiphop.00064",
+    "hiphop.00065",
+    "hiphop.00066",
+    "hiphop.00067",
+    "hiphop.00068",
+    "hiphop.00069",
+    "hiphop.00070",
+    "hiphop.00071",
+    "hiphop.00072",
+    "hiphop.00073",
+    "hiphop.00074",
+    "hiphop.00075",
+    "hiphop.00099",
+    "jazz.00073",
+    "jazz.00074",
+    "jazz.00075",
+    "jazz.00076",
+    "jazz.00077",
+    "jazz.00078",
+    "jazz.00079",
+    "jazz.00080",
+    "jazz.00081",
+    "jazz.00082",
+    "jazz.00083",
+    "jazz.00084",
+    "jazz.00085",
+    "jazz.00086",
+    "jazz.00087",
+    "jazz.00088",
+    "jazz.00089",
+    "jazz.00090",
+    "jazz.00091",
+    "jazz.00092",
+    "jazz.00093",
+    "jazz.00094",
+    "jazz.00095",
+    "jazz.00096",
+    "jazz.00097",
+    "jazz.00098",
+    "jazz.00099",
+    "metal.00012",
+    "metal.00013",
+    "metal.00014",
+    "metal.00015",
+    "metal.00022",
+    "metal.00023",
+    "metal.00025",
+    "metal.00026",
+    "metal.00027",
+    "metal.00028",
+    "metal.00029",
+    "metal.00030",
+    "metal.00031",
+    "metal.00032",
+    "metal.00033",
+    "metal.00038",
+    "metal.00039",
+    "metal.00067",
+    "metal.00070",
+    "metal.00073",
+    "metal.00074",
+    "metal.00075",
+    "metal.00078",
+    "metal.00083",
+    "metal.00085",
+    "metal.00087",
+    "metal.00088",
+    "pop.00000",
+    "pop.00001",
+    "pop.00013",
+    "pop.00014",
+    "pop.00043",
+    "pop.00063",
+    "pop.00064",
+    "pop.00065",
+    "pop.00066",
+    "pop.00069",
+    "pop.00070",
+    "pop.00071",
+    "pop.00072",
+    "pop.00073",
+    "pop.00074",
+    "pop.00075",
+    "pop.00076",
+    "pop.00077",
+    "pop.00078",
+    "pop.00079",
+    "pop.00082",
+    "pop.00088",
+    "pop.00089",
+    "pop.00090",
+    "pop.00091",
+    "pop.00092",
+    "pop.00093",
+    "pop.00094",
+    "pop.00095",
+    "pop.00096",
+    "reggae.00034",
+    "reggae.00035",
+    "reggae.00036",
+    "reggae.00037",
+    "reggae.00038",
+    "reggae.00039",
+    "reggae.00040",
+    "reggae.00046",
+    "reggae.00047",
+    "reggae.00048",
+    "reggae.00052",
+    "reggae.00053",
+    "reggae.00064",
+    "reggae.00065",
+    "reggae.00066",
+    "reggae.00067",
+    "reggae.00068",
+    "reggae.00071",
+    "reggae.00079",
+    "reggae.00082",
+    "reggae.00083",
+    "reggae.00084",
+    "reggae.00087",
+    "reggae.00088",
+    "reggae.00089",
+    "reggae.00090",
+    "rock.00010",
+    "rock.00011",
+    "rock.00012",
+    "rock.00013",
+    "rock.00014",
+    "rock.00015",
+    "rock.00027",
+    "rock.00028",
+    "rock.00029",
+    "rock.00030",
+    "rock.00031",
+    "rock.00032",
+    "rock.00033",
+    "rock.00034",
+    "rock.00035",
+    "rock.00036",
+    "rock.00037",
+    "rock.00039",
+    "rock.00040",
+    "rock.00041",
+    "rock.00042",
+    "rock.00043",
+    "rock.00044",
+    "rock.00045",
+    "rock.00046",
+    "rock.00047",
+    "rock.00048",
+    "rock.00086",
+    "rock.00087",
+    "rock.00088",
+    "rock.00089",
+    "rock.00090",
+]
+filtered_train = [
+    "blues.00029",
+    "blues.00030",
+    "blues.00031",
+    "blues.00032",
+    "blues.00033",
+    "blues.00034",
+    "blues.00035",
+    "blues.00036",
+    "blues.00037",
+    "blues.00038",
+    "blues.00039",
+    "blues.00040",
+    "blues.00041",
+    "blues.00042",
+    "blues.00043",
+    "blues.00044",
+    "blues.00045",
+    "blues.00046",
+    "blues.00047",
+    "blues.00048",
+    "blues.00049",
+    "blues.00073",
+    "blues.00074",
+    "blues.00075",
+    "blues.00076",
+    "blues.00077",
+    "blues.00078",
+    "blues.00079",
+    "blues.00080",
+    "blues.00081",
+    "blues.00082",
+    "blues.00083",
+    "blues.00084",
+    "blues.00085",
+    "blues.00086",
+    "blues.00087",
+    "blues.00088",
+    "blues.00089",
+    "blues.00090",
+    "blues.00091",
+    "blues.00092",
+    "blues.00093",
+    "blues.00094",
+    "blues.00095",
+    "blues.00096",
+    "blues.00097",
+    "classical.00030",
+    "classical.00031",
+    "classical.00032",
+    "classical.00033",
+    "classical.00043",
+    "classical.00044",
+    "classical.00045",
+    "classical.00046",
+    "classical.00047",
+    "classical.00048",
+    "classical.00050",
+    "classical.00051",
+    "classical.00052",
+    "classical.00053",
+    "classical.00054",
+    "classical.00055",
+    "classical.00056",
+    "classical.00057",
+    "classical.00058",
+    "classical.00059",
+    "classical.00060",
+    "classical.00061",
+    "classical.00062",
+    "classical.00063",
+    "classical.00064",
+    "classical.00065",
+    "classical.00066",
+    "classical.00067",
+    "classical.00080",
+    "classical.00081",
+    "classical.00082",
+    "classical.00083",
+    "classical.00084",
+    "classical.00085",
+    "classical.00086",
+    "classical.00087",
+    "classical.00088",
+    "classical.00089",
+    "classical.00090",
+    "classical.00091",
+    "classical.00092",
+    "classical.00093",
+    "classical.00094",
+    "classical.00095",
+    "classical.00096",
+    "classical.00097",
+    "classical.00098",
+    "classical.00099",
+    "country.00019",
+    "country.00020",
+    "country.00021",
+    "country.00022",
+    "country.00023",
+    "country.00024",
+    "country.00025",
+    "country.00026",
+    "country.00028",
+    "country.00029",
+    "country.00065",
+    "country.00066",
+    "country.00067",
+    "country.00068",
+    "country.00069",
+    "country.00070",
+    "country.00071",
+    "country.00072",
+    "country.00073",
+    "country.00074",
+    "country.00075",
+    "country.00076",
+    "country.00077",
+    "country.00078",
+    "country.00079",
+    "country.00080",
+    "country.00081",
+    "country.00082",
+    "country.00083",
+    "country.00084",
+    "country.00085",
+    "country.00086",
+    "country.00087",
+    "country.00088",
+    "country.00089",
+    "country.00090",
+    "country.00091",
+    "country.00092",
+    "country.00093",
+    "country.00094",
+    "country.00095",
+    "country.00096",
+    "country.00097",
+    "country.00098",
+    "country.00099",
+    "disco.00005",
+    "disco.00015",
+    "disco.00016",
+    "disco.00017",
+    "disco.00018",
+    "disco.00019",
+    "disco.00020",
+    "disco.00022",
+    "disco.00023",
+    "disco.00024",
+    "disco.00025",
+    "disco.00026",
+    "disco.00027",
+    "disco.00028",
+    "disco.00029",
+    "disco.00030",
+    "disco.00031",
+    "disco.00032",
+    "disco.00033",
+    "disco.00034",
+    "disco.00035",
+    "disco.00036",
+    "disco.00037",
+    "disco.00039",
+    "disco.00040",
+    "disco.00041",
+    "disco.00042",
+    "disco.00043",
+    "disco.00044",
+    "disco.00045",
+    "disco.00047",
+    "disco.00049",
+    "disco.00053",
+    "disco.00054",
+    "disco.00056",
+    "disco.00057",
+    "disco.00059",
+    "disco.00061",
+    "disco.00070",
+    "disco.00073",
+    "disco.00074",
+    "disco.00089",
+    "hiphop.00002",
+    "hiphop.00003",
+    "hiphop.00004",
+    "hiphop.00005",
+    "hiphop.00006",
+    "hiphop.00007",
+    "hiphop.00008",
+    "hiphop.00009",
+    "hiphop.00010",
+    "hiphop.00011",
+    "hiphop.00012",
+    "hiphop.00013",
+    "hiphop.00014",
+    "hiphop.00015",
+    "hiphop.00016",
+    "hiphop.00017",
+    "hiphop.00018",
+    "hiphop.00019",
+    "hiphop.00020",
+    "hiphop.00021",
+    "hiphop.00022",
+    "hiphop.00023",
+    "hiphop.00024",
+    "hiphop.00025",
+    "hiphop.00028",
+    "hiphop.00029",
+    "hiphop.00031",
+    "hiphop.00032",
+    "hiphop.00033",
+    "hiphop.00034",
+    "hiphop.00035",
+    "hiphop.00036",
+    "hiphop.00037",
+    "hiphop.00038",
+    "hiphop.00041",
+    "hiphop.00042",
+    "hiphop.00055",
+    "hiphop.00056",
+    "hiphop.00057",
+    "hiphop.00058",
+    "hiphop.00059",
+    "hiphop.00060",
+    "hiphop.00061",
+    "hiphop.00077",
+    "hiphop.00078",
+    "hiphop.00079",
+    "hiphop.00080",
+    "jazz.00000",
+    "jazz.00001",
+    "jazz.00011",
+    "jazz.00012",
+    "jazz.00013",
+    "jazz.00014",
+    "jazz.00015",
+    "jazz.00016",
+    "jazz.00017",
+    "jazz.00018",
+    "jazz.00019",
+    "jazz.00020",
+    "jazz.00021",
+    "jazz.00022",
+    "jazz.00023",
+    "jazz.00024",
+    "jazz.00041",
+    "jazz.00047",
+    "jazz.00048",
+    "jazz.00049",
+    "jazz.00050",
+    "jazz.00051",
+    "jazz.00052",
+    "jazz.00053",
+    "jazz.00054",
+    "jazz.00055",
+    "jazz.00056",
+    "jazz.00057",
+    "jazz.00058",
+    "jazz.00059",
+    "jazz.00060",
+    "jazz.00061",
+    "jazz.00062",
+    "jazz.00063",
+    "jazz.00064",
+    "jazz.00065",
+    "jazz.00066",
+    "jazz.00067",
+    "jazz.00068",
+    "jazz.00069",
+    "jazz.00070",
+    "jazz.00071",
+    "jazz.00072",
+    "metal.00002",
+    "metal.00003",
+    "metal.00005",
+    "metal.00021",
+    "metal.00024",
+    "metal.00035",
+    "metal.00046",
+    "metal.00047",
+    "metal.00048",
+    "metal.00049",
+    "metal.00050",
+    "metal.00051",
+    "metal.00052",
+    "metal.00053",
+    "metal.00054",
+    "metal.00055",
+    "metal.00056",
+    "metal.00057",
+    "metal.00059",
+    "metal.00060",
+    "metal.00061",
+    "metal.00062",
+    "metal.00063",
+    "metal.00064",
+    "metal.00065",
+    "metal.00066",
+    "metal.00069",
+    "metal.00071",
+    "metal.00072",
+    "metal.00079",
+    "metal.00080",
+    "metal.00084",
+    "metal.00086",
+    "metal.00089",
+    "metal.00090",
+    "metal.00091",
+    "metal.00092",
+    "metal.00093",
+    "metal.00094",
+    "metal.00095",
+    "metal.00096",
+    "metal.00097",
+    "metal.00098",
+    "metal.00099",
+    "pop.00002",
+    "pop.00003",
+    "pop.00004",
+    "pop.00005",
+    "pop.00006",
+    "pop.00007",
+    "pop.00008",
+    "pop.00009",
+    "pop.00011",
+    "pop.00012",
+    "pop.00016",
+    "pop.00017",
+    "pop.00018",
+    "pop.00019",
+    "pop.00020",
+    "pop.00023",
+    "pop.00024",
+    "pop.00025",
+    "pop.00026",
+    "pop.00027",
+    "pop.00028",
+    "pop.00029",
+    "pop.00031",
+    "pop.00032",
+    "pop.00033",
+    "pop.00034",
+    "pop.00035",
+    "pop.00036",
+    "pop.00038",
+    "pop.00039",
+    "pop.00040",
+    "pop.00041",
+    "pop.00042",
+    "pop.00044",
+    "pop.00046",
+    "pop.00049",
+    "pop.00050",
+    "pop.00080",
+    "pop.00097",
+    "pop.00098",
+    "pop.00099",
+    "reggae.00000",
+    "reggae.00001",
+    "reggae.00002",
+    "reggae.00004",
+    "reggae.00006",
+    "reggae.00009",
+    "reggae.00011",
+    "reggae.00012",
+    "reggae.00014",
+    "reggae.00015",
+    "reggae.00016",
+    "reggae.00017",
+    "reggae.00018",
+    "reggae.00019",
+    "reggae.00020",
+    "reggae.00021",
+    "reggae.00022",
+    "reggae.00023",
+    "reggae.00024",
+    "reggae.00025",
+    "reggae.00026",
+    "reggae.00027",
+    "reggae.00028",
+    "reggae.00029",
+    "reggae.00030",
+    "reggae.00031",
+    "reggae.00032",
+    "reggae.00042",
+    "reggae.00043",
+    "reggae.00044",
+    "reggae.00045",
+    "reggae.00049",
+    "reggae.00050",
+    "reggae.00051",
+    "reggae.00054",
+    "reggae.00055",
+    "reggae.00056",
+    "reggae.00057",
+    "reggae.00058",
+    "reggae.00059",
+    "reggae.00060",
+    "reggae.00063",
+    "reggae.00069",
+    "rock.00000",
+    "rock.00001",
+    "rock.00002",
+    "rock.00003",
+    "rock.00004",
+    "rock.00005",
+    "rock.00006",
+    "rock.00007",
+    "rock.00008",
+    "rock.00009",
+    "rock.00016",
+    "rock.00017",
+    "rock.00018",
+    "rock.00019",
+    "rock.00020",
+    "rock.00021",
+    "rock.00022",
+    "rock.00023",
+    "rock.00024",
+    "rock.00025",
+    "rock.00026",
+    "rock.00057",
+    "rock.00058",
+    "rock.00059",
+    "rock.00060",
+    "rock.00061",
+    "rock.00062",
+    "rock.00063",
+    "rock.00064",
+    "rock.00065",
+    "rock.00066",
+    "rock.00067",
+    "rock.00068",
+    "rock.00069",
+    "rock.00070",
+    "rock.00091",
+    "rock.00092",
+    "rock.00093",
+    "rock.00094",
+    "rock.00095",
+    "rock.00096",
+    "rock.00097",
+    "rock.00098",
+    "rock.00099",
+]
+filtered_valid = [
+    "blues.00000",
+    "blues.00001",
+    "blues.00002",
+    "blues.00003",
+    "blues.00004",
+    "blues.00005",
+    "blues.00006",
+    "blues.00007",
+    "blues.00008",
+    "blues.00009",
+    "blues.00010",
+    "blues.00011",
+    "blues.00050",
+    "blues.00051",
+    "blues.00052",
+    "blues.00053",
+    "blues.00054",
+    "blues.00055",
+    "blues.00056",
+    "blues.00057",
+    "blues.00058",
+    "blues.00059",
+    "blues.00060",
+    "classical.00000",
+    "classical.00001",
+    "classical.00002",
+    "classical.00003",
+    "classical.00004",
+    "classical.00005",
+    "classical.00006",
+    "classical.00007",
+    "classical.00008",
+    "classical.00009",
+    "classical.00010",
+    "classical.00068",
+    "classical.00069",
+    "classical.00070",
+    "classical.00071",
+    "classical.00072",
+    "classical.00073",
+    "classical.00074",
+    "classical.00075",
+    "classical.00076",
+    "country.00000",
+    "country.00001",
+    "country.00002",
+    "country.00003",
+    "country.00004",
+    "country.00005",
+    "country.00006",
+    "country.00007",
+    "country.00009",
+    "country.00010",
+    "country.00011",
+    "country.00012",
+    "country.00013",
+    "country.00014",
+    "country.00015",
+    "country.00016",
+    "country.00017",
+    "country.00018",
+    "country.00027",
+    "country.00041",
+    "country.00042",
+    "country.00045",
+    "country.00049",
+    "disco.00000",
+    "disco.00002",
+    "disco.00003",
+    "disco.00004",
+    "disco.00006",
+    "disco.00007",
+    "disco.00008",
+    "disco.00009",
+    "disco.00010",
+    "disco.00011",
+    "disco.00012",
+    "disco.00013",
+    "disco.00014",
+    "disco.00046",
+    "disco.00048",
+    "disco.00052",
+    "disco.00067",
+    "disco.00068",
+    "disco.00072",
+    "disco.00075",
+    "disco.00090",
+    "disco.00095",
+    "hiphop.00081",
+    "hiphop.00082",
+    "hiphop.00083",
+    "hiphop.00084",
+    "hiphop.00085",
+    "hiphop.00086",
+    "hiphop.00087",
+    "hiphop.00088",
+    "hiphop.00089",
+    "hiphop.00090",
+    "hiphop.00091",
+    "hiphop.00092",
+    "hiphop.00093",
+    "hiphop.00094",
+    "hiphop.00095",
+    "hiphop.00096",
+    "hiphop.00097",
+    "hiphop.00098",
+    "jazz.00002",
+    "jazz.00003",
+    "jazz.00004",
+    "jazz.00005",
+    "jazz.00006",
+    "jazz.00007",
+    "jazz.00008",
+    "jazz.00009",
+    "jazz.00010",
+    "jazz.00025",
+    "jazz.00026",
+    "jazz.00027",
+    "jazz.00028",
+    "jazz.00029",
+    "jazz.00030",
+    "jazz.00031",
+    "jazz.00032",
+    "metal.00000",
+    "metal.00001",
+    "metal.00006",
+    "metal.00007",
+    "metal.00008",
+    "metal.00009",
+    "metal.00010",
+    "metal.00011",
+    "metal.00016",
+    "metal.00017",
+    "metal.00018",
+    "metal.00019",
+    "metal.00020",
+    "metal.00036",
+    "metal.00037",
+    "metal.00068",
+    "metal.00076",
+    "metal.00077",
+    "metal.00081",
+    "metal.00082",
+    "pop.00010",
+    "pop.00053",
+    "pop.00055",
+    "pop.00058",
+    "pop.00059",
+    "pop.00060",
+    "pop.00061",
+    "pop.00062",
+    "pop.00081",
+    "pop.00083",
+    "pop.00084",
+    "pop.00085",
+    "pop.00086",
+    "reggae.00061",
+    "reggae.00062",
+    "reggae.00070",
+    "reggae.00072",
+    "reggae.00074",
+    "reggae.00076",
+    "reggae.00077",
+    "reggae.00078",
+    "reggae.00085",
+    "reggae.00092",
+    "reggae.00093",
+    "reggae.00094",
+    "reggae.00095",
+    "reggae.00096",
+    "reggae.00097",
+    "reggae.00098",
+    "reggae.00099",
+    "rock.00038",
+    "rock.00049",
+    "rock.00050",
+    "rock.00051",
+    "rock.00052",
+    "rock.00053",
+    "rock.00054",
+    "rock.00055",
+    "rock.00056",
+    "rock.00071",
+    "rock.00072",
+    "rock.00073",
+    "rock.00074",
+    "rock.00075",
+    "rock.00076",
+    "rock.00077",
+    "rock.00078",
+    "rock.00079",
+    "rock.00080",
+    "rock.00081",
+    "rock.00082",
+    "rock.00083",
+    "rock.00084",
+    "rock.00085",
+]
+URL = "http://opihi.cs.uvic.ca/sound/genres.tar.gz"
+FOLDER_IN_ARCHIVE = "genres"
+_CHECKSUMS = {
+    "http://opihi.cs.uvic.ca/sound/genres.tar.gz": "24347e0223d2ba798e0a558c4c172d9d4a19c00bb7963fe055d183dadb4ef2c6"
+}
+def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str]:
+    """
+    Loads a file from the dataset and returns the raw waveform
+    as a Torch Tensor, its sample rate as an integer, and its
+    genre as a string.
+    """
+    # Filenames are of the form label.id, e.g. blues.00078
+    label, _ = fileid.split(".")
+    # Read wav
+    file_audio = os.path.join(path, label, fileid + ext_audio)
+    waveform, sample_rate = torchaudio.load(file_audio)
+    return waveform, sample_rate, label
+class GTZAN(Dataset):
+    """*GTZAN* :cite:`tzanetakis_essl_cook_2001` dataset.
+    Note:
+        Please see http://marsyas.info/downloads/datasets.html if you are planning to use
+        this dataset to publish results.
+    Note:
+        As of October 2022, the download link is not currently working. Setting ``download=True``
+        in GTZAN dataset will result in a URL connection error.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from.
+            (default: ``"http://opihi.cs.uvic.ca/sound/genres.tar.gz"``)
+        folder_in_archive (str, optional): The top-level directory of the dataset.
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        subset (str or None, optional): Which subset of the dataset to use.
+            One of ``"training"``, ``"validation"``, ``"testing"`` or ``None``.
+            If ``None``, the entire dataset is used. (default: ``None``).
+    """
+    _ext_audio = ".wav"
+    def __init__(
+        self,
+        root: Union[str, Path],
+        url: str = URL,
+        folder_in_archive: str = FOLDER_IN_ARCHIVE,
+        download: bool = False,
+        subset: Optional[str] = None,
+    ) -> None:
+        # super(GTZAN, self).__init__()
+        # Get string representation of 'root' in case Path object is passed
+        root = os.fspath(root)
+        self.root = root
+        self.url = url
+        self.folder_in_archive = folder_in_archive
+        self.download = download
+        self.subset = subset
+        if subset is not None and subset not in ["training", "validation", "testing"]:
+            raise ValueError("When `subset` is not None, it must be one of ['training', 'validation', 'testing'].")
+        archive = os.path.basename(url)
+        archive = os.path.join(root, archive)
+        self._path = os.path.join(root, folder_in_archive)
+        if download:
+            if not os.path.isdir(self._path):
+                if not os.path.isfile(archive):
+                    checksum = _CHECKSUMS.get(url, None)
+                    download_url_to_file(url, archive, hash_prefix=checksum)
+                _extract_tar(archive)
+        if not os.path.isdir(self._path):
+            raise RuntimeError("Dataset not found. Please use `download=True` to download it.")
+        if self.subset is None:
+            # Check every subdirectory under dataset root
+            # which has the same name as the genres in
+            # GTZAN (e.g. `root_dir'/blues/, `root_dir'/rock, etc.)
+            # This lets users remove or move around song files,
+            # useful when e.g. they want to use only some of the files
+            # in a genre or want to label other files with a different
+            # genre.
+            self._walker = []
+            root = os.path.expanduser(self._path)
+            for directory in gtzan_genres:
+                fulldir = os.path.join(root, directory)
+                if not os.path.exists(fulldir):
+                    continue
+                songs_in_genre = os.listdir(fulldir)
+                songs_in_genre.sort()
+                for fname in songs_in_genre:
+                    name, ext = os.path.splitext(fname)
+                    if ext.lower() == ".wav" and "." in name:
+                        # Check whether the file is of the form
+                        # `gtzan_genre`.`5 digit number`.wav
+                        genre, num = name.split(".")
+                        if genre in gtzan_genres and len(num) == 5 and num.isdigit():
+                            self._walker.append(name)
+        else:
+            if self.subset == "training":
+                self._walker = filtered_train
+            elif self.subset == "validation":
+                self._walker = filtered_valid
+            elif self.subset == "testing":
+                self._walker = filtered_test
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, str]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                Label
+        """
+        fileid = self._walker[n]
+        item = load_gtzan_item(fileid, self._path, self._ext_audio)
+        waveform, sample_rate, label = item
+        return waveform, sample_rate, label
+    def __len__(self) -> int:
+        return len(self._walker)

.venv/lib/python3.11/site-packages/torchaudio/datasets/iemocap.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import os
+import re
+from pathlib import Path
+from typing import Optional, Tuple, Union
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio.datasets.utils import _load_waveform
+_SAMPLE_RATE = 16000
+def _get_wavs_paths(data_dir):
+    wav_dir = data_dir / "sentences" / "wav"
+    wav_paths = sorted(str(p) for p in wav_dir.glob("*/*.wav"))
+    relative_paths = []
+    for wav_path in wav_paths:
+        start = wav_path.find("Session")
+        wav_path = wav_path[start:]
+        relative_paths.append(wav_path)
+    return relative_paths
+class IEMOCAP(Dataset):
+    """*IEMOCAP* :cite:`iemocap` dataset.
+    Args:
+        root (str or Path): Root directory where the dataset's top level directory is found
+        sessions (Tuple[int]): Tuple of sessions (1-5) to use. (Default: ``(1, 2, 3, 4, 5)``)
+        utterance_type (str or None, optional): Which type(s) of utterances to include in the dataset.
+            Options: ("scripted", "improvised", ``None``). If ``None``, both scripted and improvised
+            data are used.
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        sessions: Tuple[str] = (1, 2, 3, 4, 5),
+        utterance_type: Optional[str] = None,
+    ):
+        root = Path(root)
+        self._path = root / "IEMOCAP"
+        if not os.path.isdir(self._path):
+            raise RuntimeError("Dataset not found.")
+        if utterance_type not in ["scripted", "improvised", None]:
+            raise ValueError("utterance_type must be one of ['scripted', 'improvised', or None]")
+        all_data = []
+        self.data = []
+        self.mapping = {}
+        for session in sessions:
+            session_name = f"Session{session}"
+            session_dir = self._path / session_name
+            # get wav paths
+            wav_paths = _get_wavs_paths(session_dir)
+            for wav_path in wav_paths:
+                wav_stem = str(Path(wav_path).stem)
+                all_data.append(wav_stem)
+            # add labels
+            label_dir = session_dir / "dialog" / "EmoEvaluation"
+            query = "*.txt"
+            if utterance_type == "scripted":
+                query = "*script*.txt"
+            elif utterance_type == "improvised":
+                query = "*impro*.txt"
+            label_paths = label_dir.glob(query)
+            for label_path in label_paths:
+                with open(label_path, "r") as f:
+                    for line in f:
+                        if not line.startswith("["):
+                            continue
+                        line = re.split("[\t\n]", line)
+                        wav_stem = line[1]
+                        label = line[2]
+                        if wav_stem not in all_data:
+                            continue
+                        if label not in ["neu", "hap", "ang", "sad", "exc", "fru"]:
+                            continue
+                        self.mapping[wav_stem] = {}
+                        self.mapping[wav_stem]["label"] = label
+            for wav_path in wav_paths:
+                wav_stem = str(Path(wav_path).stem)
+                if wav_stem in self.mapping:
+                    self.data.append(wav_stem)
+                    self.mapping[wav_stem]["path"] = wav_path
+    def get_metadata(self, n: int) -> Tuple[str, int, str, str, str]:
+        """Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
+        but otherwise returns the same fields as :py:meth:`__getitem__`.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            str:
+                Path to audio
+            int:
+                Sample rate
+            str:
+                File name
+            str:
+                Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``, ``"exc"``, ``"fru"``)
+            str:
+                Speaker
+        """
+        wav_stem = self.data[n]
+        wav_path = self.mapping[wav_stem]["path"]
+        label = self.mapping[wav_stem]["label"]
+        speaker = wav_stem.split("_")[0]
+        return (wav_path, _SAMPLE_RATE, wav_stem, label, speaker)
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, str]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                File name
+            str:
+                Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``, ``"exc"``, ``"fru"``)
+            str:
+                Speaker
+        """
+        metadata = self.get_metadata(n)
+        waveform = _load_waveform(self._path, metadata[0], metadata[1])
+        return (waveform,) + metadata[1:]
+    def __len__(self):
+        return len(self.data)

.venv/lib/python3.11/site-packages/torchaudio/datasets/librilight_limited.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import os
+from pathlib import Path
+from typing import List, Tuple, Union
+import torchaudio
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.librispeech import _get_librispeech_metadata
+from torchaudio.datasets.utils import _extract_tar
+_ARCHIVE_NAME = "librispeech_finetuning"
+_URL = "https://dl.fbaipublicfiles.com/librilight/data/librispeech_finetuning.tgz"
+_CHECKSUM = "5d1efdc777b548194d7e09ba89126e2188026df9fd57aa57eb14408d2b2342af"
+_SUBSET_MAP = {"10min": ["1h/0"], "1h": ["1h/*"], "10h": ["1h/*", "9h"]}
+def _get_fileids_paths(path: Path, folders: List[str], _ext_audio: str) -> List[Tuple[str, str]]:
+    """Get the file names and the corresponding file paths without `speaker_id`
+    and `chapter_id` directories.
+    The format of path is like:
+        {root}/{_ARCHIVE_NAME}/1h/[0-5]/[clean, other] or
+        {root}/{_ARCHIVE_NAME}/9h/[clean, other]
+    Args:
+        path (Path): Root path to the dataset.
+        folders (List[str]): Folders that contain the desired audio files.
+        _ext_audio (str): Extension of audio files.
+    Returns:
+        List[Tuple[str, str]]:
+            List of tuples where the first element is the relative path to the audio file.
+            The format of relative path is like:
+            1h/[0-5]/[clean, other] or 9h/[clean, other]
+            The second element is the file name without audio extension.
+    """
+    path = Path(path)
+    files_paths = []
+    for folder in folders:
+        paths = [p.relative_to(path) for p in path.glob(f"{folder}/*/*/*/*{_ext_audio}")]
+        files_paths += [(str(p.parent.parent.parent), str(p.stem)) for p in paths]  # get subset folder and file name
+    files_paths.sort(key=lambda x: x[0] + x[1])
+    return files_paths
+class LibriLightLimited(Dataset):
+    """Subset of Libri-light :cite:`librilight` dataset,
+    which was used in HuBERT :cite:`hsu2021hubert` for supervised fine-tuning.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        subset (str, optional): The subset to use. Options: [``"10min"``, ``"1h"``, ``"10h"``]
+            (Default: ``"10min"``).
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+    """
+    _ext_txt = ".trans.txt"
+    _ext_audio = ".flac"
+    def __init__(
+        self,
+        root: Union[str, Path],
+        subset: str = "10min",
+        download: bool = False,
+    ) -> None:
+        if subset not in _SUBSET_MAP:
+            raise ValueError(f"`subset` must be one of {_SUBSET_MAP.keys()}. Found: {subset}")
+        folders = _SUBSET_MAP[subset]
+        root = os.fspath(root)
+        self._path = os.path.join(root, _ARCHIVE_NAME)
+        archive = os.path.join(root, f"{_ARCHIVE_NAME}.tgz")
+        if not os.path.isdir(self._path):
+            if not download:
+                raise RuntimeError("Dataset not found. Please use `download=True` to download")
+            if not os.path.isfile(archive):
+                download_url_to_file(_URL, archive, hash_prefix=_CHECKSUM)
+            _extract_tar(archive)
+        self._fileids_paths = _get_fileids_paths(self._path, folders, self._ext_audio)
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                Transcript
+            int:
+                Speaker ID
+            int:
+                Chapter ID
+            int:
+                Utterance ID
+        """
+        file_path, fileid = self._fileids_paths[n]
+        metadata = _get_librispeech_metadata(fileid, self._path, file_path, self._ext_audio, self._ext_txt)
+        waveform, _ = torchaudio.load(os.path.join(self._path, metadata[0]))
+        return (waveform,) + metadata[1:]
+    def __len__(self) -> int:
+        return len(self._fileids_paths)

.venv/lib/python3.11/site-packages/torchaudio/datasets/librimix.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import os
+from pathlib import Path
+from typing import List, Tuple, Union
+import torch
+from torch.utils.data import Dataset
+from torchaudio.datasets.utils import _load_waveform
+_TASKS_TO_MIXTURE = {
+    "sep_clean": "mix_clean",
+    "enh_single": "mix_single",
+    "enh_both": "mix_both",
+    "sep_noisy": "mix_both",
+}
+class LibriMix(Dataset):
+    r"""*LibriMix* :cite:`cosentino2020librimix` dataset.
+    Args:
+        root (str or Path): The path where the directory ``Libri2Mix`` or
+            ``Libri3Mix`` is stored. Not the path of those directories.
+        subset (str, optional): The subset to use. Options: [``"train-360"``, ``"train-100"``,
+            ``"dev"``, and ``"test"``] (Default: ``"train-360"``).
+        num_speakers (int, optional): The number of speakers, which determines the directories
+            to traverse. The Dataset will traverse ``s1`` to ``sN`` directories to collect
+            N source audios. (Default: 2)
+        sample_rate (int, optional): Sample rate of audio files. The ``sample_rate`` determines
+            which subdirectory the audio are fetched. If any of the audio has a different sample
+            rate, raises ``ValueError``. Options: [8000, 16000] (Default: 8000)
+        task (str, optional): The task of LibriMix.
+            Options: [``"enh_single"``, ``"enh_both"``, ``"sep_clean"``, ``"sep_noisy"``]
+            (Default: ``"sep_clean"``)
+        mode (str, optional): The mode when creating the mixture. If set to ``"min"``, the lengths of mixture
+            and sources are the minimum length of all sources. If set to ``"max"``, the lengths of mixture and
+            sources are zero padded to the maximum length of all sources.
+            Options: [``"min"``, ``"max"``]
+            (Default: ``"min"``)
+    Note:
+        The LibriMix dataset needs to be manually generated. Please check https://github.com/JorisCos/LibriMix
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        subset: str = "train-360",
+        num_speakers: int = 2,
+        sample_rate: int = 8000,
+        task: str = "sep_clean",
+        mode: str = "min",
+    ):
+        self.root = Path(root) / f"Libri{num_speakers}Mix"
+        if not os.path.exists(self.root):
+            raise RuntimeError(
+                f"The path {self.root} doesn't exist. "
+                "Please check the ``root`` path and ``num_speakers`` or download the dataset manually."
+            )
+        if mode not in ["max", "min"]:
+            raise ValueError(f'Expect ``mode`` to be one in ["min", "max"]. Found {mode}.')
+        if sample_rate == 8000:
+            mix_dir = self.root / "wav8k" / mode / subset
+        elif sample_rate == 16000:
+            mix_dir = self.root / "wav16k" / mode / subset
+        else:
+            raise ValueError(f"Unsupported sample rate. Found {sample_rate}.")
+        self.sample_rate = sample_rate
+        self.task = task
+        self.mix_dir = mix_dir / _TASKS_TO_MIXTURE[task]
+        if task == "enh_both":
+            self.src_dirs = [(mix_dir / "mix_clean")]
+        else:
+            self.src_dirs = [(mix_dir / f"s{i+1}") for i in range(num_speakers)]
+        self.files = [p.name for p in self.mix_dir.glob("*.wav")]
+        self.files.sort()
+    def _load_sample(self, key) -> Tuple[int, torch.Tensor, List[torch.Tensor]]:
+        metadata = self.get_metadata(key)
+        mixed = _load_waveform(self.root, metadata[1], metadata[0])
+        srcs = []
+        for i, path_ in enumerate(metadata[2]):
+            src = _load_waveform(self.root, path_, metadata[0])
+            if mixed.shape != src.shape:
+                raise ValueError(f"Different waveform shapes. mixed: {mixed.shape}, src[{i}]: {src.shape}")
+            srcs.append(src)
+        return self.sample_rate, mixed, srcs
+    def get_metadata(self, key: int) -> Tuple[int, str, List[str]]:
+        """Get metadata for the n-th sample from the dataset.
+        Args:
+            key (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            int:
+                Sample rate
+            str:
+                Path to mixed audio
+            List of str:
+                List of paths to source audios
+        """
+        filename = self.files[key]
+        mixed_path = os.path.relpath(self.mix_dir / filename, self.root)
+        srcs_paths = []
+        for dir_ in self.src_dirs:
+            src = os.path.relpath(dir_ / filename, self.root)
+            srcs_paths.append(src)
+        return self.sample_rate, mixed_path, srcs_paths
+    def __len__(self) -> int:
+        return len(self.files)
+    def __getitem__(self, key: int) -> Tuple[int, torch.Tensor, List[torch.Tensor]]:
+        """Load the n-th sample from the dataset.
+        Args:
+            key (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            int:
+                Sample rate
+            Tensor:
+                Mixture waveform
+            List of Tensors:
+                List of source waveforms
+        """
+        return self._load_sample(key)

.venv/lib/python3.11/site-packages/torchaudio/datasets/librispeech.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import os
+from pathlib import Path
+from typing import Tuple, Union
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_tar, _load_waveform
+URL = "train-clean-100"
+FOLDER_IN_ARCHIVE = "LibriSpeech"
+SAMPLE_RATE = 16000
+_DATA_SUBSETS = [
+    "dev-clean",
+    "dev-other",
+    "test-clean",
+    "test-other",
+    "train-clean-100",
+    "train-clean-360",
+    "train-other-500",
+]
+_CHECKSUMS = {
+    "http://www.openslr.org/resources/12/dev-clean.tar.gz": "76f87d090650617fca0cac8f88b9416e0ebf80350acb97b343a85fa903728ab3",  # noqa: E501
+    "http://www.openslr.org/resources/12/dev-other.tar.gz": "12661c48e8c3fe1de2c1caa4c3e135193bfb1811584f11f569dd12645aa84365",  # noqa: E501
+    "http://www.openslr.org/resources/12/test-clean.tar.gz": "39fde525e59672dc6d1551919b1478f724438a95aa55f874b576be21967e6c23",  # noqa: E501
+    "http://www.openslr.org/resources/12/test-other.tar.gz": "d09c181bba5cf717b3dee7d4d592af11a3ee3a09e08ae025c5506f6ebe961c29",  # noqa: E501
+    "http://www.openslr.org/resources/12/train-clean-100.tar.gz": "d4ddd1d5a6ab303066f14971d768ee43278a5f2a0aa43dc716b0e64ecbbbf6e2",  # noqa: E501
+    "http://www.openslr.org/resources/12/train-clean-360.tar.gz": "146a56496217e96c14334a160df97fffedd6e0a04e66b9c5af0d40be3c792ecf",  # noqa: E501
+    "http://www.openslr.org/resources/12/train-other-500.tar.gz": "ddb22f27f96ec163645d53215559df6aa36515f26e01dd70798188350adcb6d2",  # noqa: E501
+}
+def _download_librispeech(root, url):
+    base_url = "http://www.openslr.org/resources/12/"
+    ext_archive = ".tar.gz"
+    filename = url + ext_archive
+    archive = os.path.join(root, filename)
+    download_url = os.path.join(base_url, filename)
+    if not os.path.isfile(archive):
+        checksum = _CHECKSUMS.get(download_url, None)
+        download_url_to_file(download_url, archive, hash_prefix=checksum)
+    _extract_tar(archive)
+def _get_librispeech_metadata(
+    fileid: str, root: str, folder: str, ext_audio: str, ext_txt: str
+) -> Tuple[str, int, str, int, int, int]:
+    speaker_id, chapter_id, utterance_id = fileid.split("-")
+    # Get audio path and sample rate
+    fileid_audio = f"{speaker_id}-{chapter_id}-{utterance_id}"
+    filepath = os.path.join(folder, speaker_id, chapter_id, f"{fileid_audio}{ext_audio}")
+    # Load text
+    file_text = f"{speaker_id}-{chapter_id}{ext_txt}"
+    file_text = os.path.join(root, folder, speaker_id, chapter_id, file_text)
+    with open(file_text) as ft:
+        for line in ft:
+            fileid_text, transcript = line.strip().split(" ", 1)
+            if fileid_audio == fileid_text:
+                break
+        else:
+            # Translation not found
+            raise FileNotFoundError(f"Translation not found for {fileid_audio}")
+    return (
+        filepath,
+        SAMPLE_RATE,
+        transcript,
+        int(speaker_id),
+        int(chapter_id),
+        int(utterance_id),
+    )
+class LIBRISPEECH(Dataset):
+    """*LibriSpeech* :cite:`7178964` dataset.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from,
+            or the type of the dataset to dowload.
+            Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
+            ``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
+            ``"train-other-500"``. (default: ``"train-clean-100"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"LibriSpeech"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+    """
+    _ext_txt = ".trans.txt"
+    _ext_audio = ".flac"
+    def __init__(
+        self,
+        root: Union[str, Path],
+        url: str = URL,
+        folder_in_archive: str = FOLDER_IN_ARCHIVE,
+        download: bool = False,
+    ) -> None:
+        self._url = url
+        if url not in _DATA_SUBSETS:
+            raise ValueError(f"Invalid url '{url}' given; please provide one of {_DATA_SUBSETS}.")
+        root = os.fspath(root)
+        self._archive = os.path.join(root, folder_in_archive)
+        self._path = os.path.join(root, folder_in_archive, url)
+        if not os.path.isdir(self._path):
+            if download:
+                _download_librispeech(root, url)
+            else:
+                raise RuntimeError(
+                    f"Dataset not found at {self._path}. Please set `download=True` to download the dataset."
+                )
+        self._walker = sorted(str(p.stem) for p in Path(self._path).glob("*/*/*" + self._ext_audio))
+    def get_metadata(self, n: int) -> Tuple[str, int, str, int, int, int]:
+        """Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
+        but otherwise returns the same fields as :py:func:`__getitem__`.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            str:
+                Path to audio
+            int:
+                Sample rate
+            str:
+                Transcript
+            int:
+                Speaker ID
+            int:
+                Chapter ID
+            int:
+                Utterance ID
+        """
+        fileid = self._walker[n]
+        return _get_librispeech_metadata(fileid, self._archive, self._url, self._ext_audio, self._ext_txt)
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                Transcript
+            int:
+                Speaker ID
+            int:
+                Chapter ID
+            int:
+                Utterance ID
+        """
+        metadata = self.get_metadata(n)
+        waveform = _load_waveform(self._archive, metadata[0], metadata[1])
+        return (waveform,) + metadata[1:]
+    def __len__(self) -> int:
+        return len(self._walker)

.venv/lib/python3.11/site-packages/torchaudio/datasets/librispeech_biasing.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import os
+from pathlib import Path
+from typing import List, Tuple, Union
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_tar, _load_waveform
+URL = "train-clean-100"
+FOLDER_IN_ARCHIVE = "LibriSpeech"
+SAMPLE_RATE = 16000
+_DATA_SUBSETS = [
+    "dev-clean",
+    "dev-other",
+    "test-clean",
+    "test-other",
+    "train-clean-100",
+    "train-clean-360",
+    "train-other-500",
+]
+_CHECKSUMS = {
+    "http://www.openslr.org/resources/12/dev-clean.tar.gz": "76f87d090650617fca0cac8f88b9416e0ebf80350acb97b343a85fa903728ab3",  # noqa: E501
+    "http://www.openslr.org/resources/12/dev-other.tar.gz": "12661c48e8c3fe1de2c1caa4c3e135193bfb1811584f11f569dd12645aa84365",  # noqa: E501
+    "http://www.openslr.org/resources/12/test-clean.tar.gz": "39fde525e59672dc6d1551919b1478f724438a95aa55f874b576be21967e6c23",  # noqa: E501
+    "http://www.openslr.org/resources/12/test-other.tar.gz": "d09c181bba5cf717b3dee7d4d592af11a3ee3a09e08ae025c5506f6ebe961c29",  # noqa: E501
+    "http://www.openslr.org/resources/12/train-clean-100.tar.gz": "d4ddd1d5a6ab303066f14971d768ee43278a5f2a0aa43dc716b0e64ecbbbf6e2",  # noqa: E501
+    "http://www.openslr.org/resources/12/train-clean-360.tar.gz": "146a56496217e96c14334a160df97fffedd6e0a04e66b9c5af0d40be3c792ecf",  # noqa: E501
+    "http://www.openslr.org/resources/12/train-other-500.tar.gz": "ddb22f27f96ec163645d53215559df6aa36515f26e01dd70798188350adcb6d2",  # noqa: E501
+}
+def _download_librispeech(root, url):
+    base_url = "http://www.openslr.org/resources/12/"
+    ext_archive = ".tar.gz"
+    filename = url + ext_archive
+    archive = os.path.join(root, filename)
+    download_url = os.path.join(base_url, filename)
+    if not os.path.isfile(archive):
+        checksum = _CHECKSUMS.get(download_url, None)
+        download_url_to_file(download_url, archive, hash_prefix=checksum)
+    _extract_tar(archive)
+def _get_librispeech_metadata(
+    fileid: str, root: str, folder: str, ext_audio: str, ext_txt: str, blist: List[str]
+) -> Tuple[str, int, str, int, int, int]:
+    blist = blist or []
+    speaker_id, chapter_id, utterance_id = fileid.split("-")
+    # Get audio path and sample rate
+    fileid_audio = f"{speaker_id}-{chapter_id}-{utterance_id}"
+    filepath = os.path.join(folder, speaker_id, chapter_id, f"{fileid_audio}{ext_audio}")
+    # Load text
+    file_text = f"{speaker_id}-{chapter_id}{ext_txt}"
+    file_text = os.path.join(root, folder, speaker_id, chapter_id, file_text)
+    uttblist = []
+    with open(file_text) as ft:
+        for line in ft:
+            fileid_text, transcript = line.strip().split(" ", 1)
+            if fileid_audio == fileid_text:
+                # get utterance biasing list
+                for word in transcript.split():
+                    if word in blist and word not in uttblist:
+                        uttblist.append(word)
+                break
+        else:
+            # Translation not found
+            raise FileNotFoundError(f"Translation not found for {fileid_audio}")
+    return (
+        filepath,
+        SAMPLE_RATE,
+        transcript,
+        int(speaker_id),
+        int(chapter_id),
+        int(utterance_id),
+        uttblist,
+    )
+class LibriSpeechBiasing(Dataset):
+    """*LibriSpeech* :cite:`7178964` dataset with prefix-tree construction and biasing support.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from,
+            or the type of the dataset to dowload.
+            Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
+            ``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
+            ``"train-other-500"``. (default: ``"train-clean-100"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"LibriSpeech"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        blist (list, optional):
+            The list of biasing words (default: ``[]``).
+    """
+    _ext_txt = ".trans.txt"
+    _ext_audio = ".flac"
+    def __init__(
+        self,
+        root: Union[str, Path],
+        url: str = URL,
+        folder_in_archive: str = FOLDER_IN_ARCHIVE,
+        download: bool = False,
+        blist: List[str] = None,
+    ) -> None:
+        self._url = url
+        if url not in _DATA_SUBSETS:
+            raise ValueError(f"Invalid url '{url}' given; please provide one of {_DATA_SUBSETS}.")
+        root = os.fspath(root)
+        self._archive = os.path.join(root, folder_in_archive)
+        self._path = os.path.join(root, folder_in_archive, url)
+        if not os.path.isdir(self._path):
+            if download:
+                _download_librispeech(root, url)
+            else:
+                raise RuntimeError(
+                    f"Dataset not found at {self._path}. Please set `download=True` to download the dataset."
+                )
+        self._walker = sorted(str(p.stem) for p in Path(self._path).glob("*/*/*" + self._ext_audio))
+        self.blist = blist
+    def get_metadata(self, n: int) -> Tuple[str, int, str, int, int, int]:
+        """Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
+        but otherwise returns the same fields as :py:func:`__getitem__`.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            str:
+                Path to audio
+            int:
+                Sample rate
+            str:
+                Transcript
+            int:
+                Speaker ID
+            int:
+                Chapter ID
+            int:
+                Utterance ID
+            list:
+                List of biasing words in the utterance
+        """
+        fileid = self._walker[n]
+        return _get_librispeech_metadata(fileid, self._archive, self._url, self._ext_audio, self._ext_txt, self.blist)
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                Transcript
+            int:
+                Speaker ID
+            int:
+                Chapter ID
+            int:
+                Utterance ID
+            list:
+                List of biasing words in the utterance
+        """
+        metadata = self.get_metadata(n)
+        waveform = _load_waveform(self._archive, metadata[0], metadata[1])
+        return (waveform,) + metadata[1:]
+    def __len__(self) -> int:
+        return len(self._walker)

.venv/lib/python3.11/site-packages/torchaudio/datasets/libritts.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import os
+from pathlib import Path
+from typing import Tuple, Union
+import torchaudio
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_tar
+URL = "train-clean-100"
+FOLDER_IN_ARCHIVE = "LibriTTS"
+_CHECKSUMS = {
+    "http://www.openslr.org/resources/60/dev-clean.tar.gz": "da0864e1bd26debed35da8a869dd5c04dfc27682921936de7cff9c8a254dbe1a",  # noqa: E501
+    "http://www.openslr.org/resources/60/dev-other.tar.gz": "d413eda26f3a152ac7c9cf3658ef85504dfb1b625296e5fa83727f5186cca79c",  # noqa: E501
+    "http://www.openslr.org/resources/60/test-clean.tar.gz": "234ea5b25859102a87024a4b9b86641f5b5aaaf1197335c95090cde04fe9a4f5",  # noqa: E501
+    "http://www.openslr.org/resources/60/test-other.tar.gz": "33a5342094f3bba7ccc2e0500b9e72d558f72eb99328ac8debe1d9080402f10d",  # noqa: E501
+    "http://www.openslr.org/resources/60/train-clean-100.tar.gz": "c5608bf1ef74bb621935382b8399c5cdd51cd3ee47cec51f00f885a64c6c7f6b",  # noqa: E501
+    "http://www.openslr.org/resources/60/train-clean-360.tar.gz": "ce7cff44dcac46009d18379f37ef36551123a1dc4e5c8e4eb73ae57260de4886",  # noqa: E501
+    "http://www.openslr.org/resources/60/train-other-500.tar.gz": "e35f7e34deeb2e2bdfe4403d88c8fdd5fbf64865cae41f027a185a6965f0a5df",  # noqa: E501
+}
+def load_libritts_item(
+    fileid: str,
+    path: str,
+    ext_audio: str,
+    ext_original_txt: str,
+    ext_normalized_txt: str,
+) -> Tuple[Tensor, int, str, str, int, int, str]:
+    speaker_id, chapter_id, segment_id, utterance_id = fileid.split("_")
+    utterance_id = fileid
+    normalized_text = utterance_id + ext_normalized_txt
+    normalized_text = os.path.join(path, speaker_id, chapter_id, normalized_text)
+    original_text = utterance_id + ext_original_txt
+    original_text = os.path.join(path, speaker_id, chapter_id, original_text)
+    file_audio = utterance_id + ext_audio
+    file_audio = os.path.join(path, speaker_id, chapter_id, file_audio)
+    # Load audio
+    waveform, sample_rate = torchaudio.load(file_audio)
+    # Load original text
+    with open(original_text) as ft:
+        original_text = ft.readline()
+    # Load normalized text
+    with open(normalized_text, "r") as ft:
+        normalized_text = ft.readline()
+    return (
+        waveform,
+        sample_rate,
+        original_text,
+        normalized_text,
+        int(speaker_id),
+        int(chapter_id),
+        utterance_id,
+    )
+class LIBRITTS(Dataset):
+    """*LibriTTS* :cite:`Zen2019LibriTTSAC` dataset.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from,
+            or the type of the dataset to dowload.
+            Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
+            ``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
+            ``"train-other-500"``. (default: ``"train-clean-100"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"LibriTTS"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+    """
+    _ext_original_txt = ".original.txt"
+    _ext_normalized_txt = ".normalized.txt"
+    _ext_audio = ".wav"
+    def __init__(
+        self,
+        root: Union[str, Path],
+        url: str = URL,
+        folder_in_archive: str = FOLDER_IN_ARCHIVE,
+        download: bool = False,
+    ) -> None:
+        if url in [
+            "dev-clean",
+            "dev-other",
+            "test-clean",
+            "test-other",
+            "train-clean-100",
+            "train-clean-360",
+            "train-other-500",
+        ]:
+            ext_archive = ".tar.gz"
+            base_url = "http://www.openslr.org/resources/60/"
+            url = os.path.join(base_url, url + ext_archive)
+        # Get string representation of 'root' in case Path object is passed
+        root = os.fspath(root)
+        basename = os.path.basename(url)
+        archive = os.path.join(root, basename)
+        basename = basename.split(".")[0]
+        folder_in_archive = os.path.join(folder_in_archive, basename)
+        self._path = os.path.join(root, folder_in_archive)
+        if download:
+            if not os.path.isdir(self._path):
+                if not os.path.isfile(archive):
+                    checksum = _CHECKSUMS.get(url, None)
+                    download_url_to_file(url, archive, hash_prefix=checksum)
+                _extract_tar(archive)
+        else:
+            if not os.path.exists(self._path):
+                raise RuntimeError(
+                    f"The path {self._path} doesn't exist. "
+                    "Please check the ``root`` path or set `download=True` to download it"
+                )
+        self._walker = sorted(str(p.stem) for p in Path(self._path).glob("*/*/*" + self._ext_audio))
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                Original text
+            str:
+                Normalized text
+            int:
+                Speaker ID
+            int:
+                Chapter ID
+            str:
+                Utterance ID
+        """
+        fileid = self._walker[n]
+        return load_libritts_item(
+            fileid,
+            self._path,
+            self._ext_audio,
+            self._ext_original_txt,
+            self._ext_normalized_txt,
+        )
+    def __len__(self) -> int:
+        return len(self._walker)

.venv/lib/python3.11/site-packages/torchaudio/datasets/ljspeech.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import csv
+import os
+from pathlib import Path
+from typing import Tuple, Union
+import torchaudio
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_tar
+_RELEASE_CONFIGS = {
+    "release1": {
+        "folder_in_archive": "wavs",
+        "url": "https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2",
+        "checksum": "be1a30453f28eb8dd26af4101ae40cbf2c50413b1bb21936cbcdc6fae3de8aa5",
+    }
+}
+class LJSPEECH(Dataset):
+    """*LJSpeech-1.1* :cite:`ljspeech17` dataset.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from.
+            (default: ``"https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"wavs"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        url: str = _RELEASE_CONFIGS["release1"]["url"],
+        folder_in_archive: str = _RELEASE_CONFIGS["release1"]["folder_in_archive"],
+        download: bool = False,
+    ) -> None:
+        self._parse_filesystem(root, url, folder_in_archive, download)
+    def _parse_filesystem(self, root: str, url: str, folder_in_archive: str, download: bool) -> None:
+        root = Path(root)
+        basename = os.path.basename(url)
+        archive = root / basename
+        basename = Path(basename.split(".tar.bz2")[0])
+        folder_in_archive = basename / folder_in_archive
+        self._path = root / folder_in_archive
+        self._metadata_path = root / basename / "metadata.csv"
+        if download:
+            if not os.path.isdir(self._path):
+                if not os.path.isfile(archive):
+                    checksum = _RELEASE_CONFIGS["release1"]["checksum"]
+                    download_url_to_file(url, archive, hash_prefix=checksum)
+                _extract_tar(archive)
+        else:
+            if not os.path.exists(self._path):
+                raise RuntimeError(
+                    f"The path {self._path} doesn't exist. "
+                    "Please check the ``root`` path or set `download=True` to download it"
+                )
+        with open(self._metadata_path, "r", newline="") as metadata:
+            flist = csv.reader(metadata, delimiter="|", quoting=csv.QUOTE_NONE)
+            self._flist = list(flist)
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                Transcript
+            str:
+                Normalized Transcript
+        """
+        line = self._flist[n]
+        fileid, transcript, normalized_transcript = line
+        fileid_audio = self._path / (fileid + ".wav")
+        # Load audio
+        waveform, sample_rate = torchaudio.load(fileid_audio)
+        return (
+            waveform,
+            sample_rate,
+            transcript,
+            normalized_transcript,
+        )
+    def __len__(self) -> int:
+        return len(self._flist)

.venv/lib/python3.11/site-packages/torchaudio/datasets/musdb_hq.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import os
+from pathlib import Path
+from typing import List, Optional, Tuple, Union
+import torch
+import torchaudio
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_zip
+_URL = "https://zenodo.org/record/3338373/files/musdb18hq.zip"
+_CHECKSUM = "baac80d0483c61d74b2e5f3be75fa557eec52898339e6aa45c1fa48833c5d21d"
+_EXT = ".wav"
+_SAMPLE_RATE = 44100
+_VALIDATION_SET = [
+    "Actions - One Minute Smile",
+    "Clara Berry And Wooldog - Waltz For My Victims",
+    "Johnny Lokke - Promises & Lies",
+    "Patrick Talbot - A Reason To Leave",
+    "Triviul - Angelsaint",
+    "Alexander Ross - Goodbye Bolero",
+    "Fergessen - Nos Palpitants",
+    "Leaf - Summerghost",
+    "Skelpolu - Human Mistakes",
+    "Young Griffo - Pennies",
+    "ANiMAL - Rockshow",
+    "James May - On The Line",
+    "Meaxic - Take A Step",
+    "Traffic Experiment - Sirens",
+]
+class MUSDB_HQ(Dataset):
+    """*MUSDB_HQ* :cite:`MUSDB18HQ` dataset.
+    Args:
+        root (str or Path): Root directory where the dataset's top level directory is found
+        subset (str): Subset of the dataset to use. Options: [``"train"``, ``"test"``].
+        sources (List[str] or None, optional): Sources extract data from.
+            List can contain the following options: [``"bass"``, ``"drums"``, ``"other"``, ``"mixture"``, ``"vocals"``].
+            If ``None``, dataset consists of tracks except mixture.
+            (default: ``None``)
+        split (str or None, optional): Whether to split training set into train and validation set.
+            If ``None``, no splitting occurs. If ``train`` or ``validation``, returns respective set.
+            (default: ``None``)
+        download (bool, optional): Whether to download the dataset if it is not found at root path.
+            (default: ``False``)
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        subset: str,
+        sources: Optional[List[str]] = None,
+        split: Optional[str] = None,
+        download: bool = False,
+    ) -> None:
+        self.sources = ["bass", "drums", "other", "vocals"] if not sources else sources
+        self.split = split
+        basename = os.path.basename(_URL)
+        archive = os.path.join(root, basename)
+        basename = basename.rsplit(".", 2)[0]
+        if subset not in ["test", "train"]:
+            raise ValueError("`subset` must be one of ['test', 'train']")
+        if self.split is not None and self.split not in ["train", "validation"]:
+            raise ValueError("`split` must be one of ['train', 'validation']")
+        base_path = os.path.join(root, basename)
+        self._path = os.path.join(base_path, subset)
+        if not os.path.isdir(self._path):
+            if not os.path.isfile(archive):
+                if not download:
+                    raise RuntimeError("Dataset not found. Please use `download=True` to download")
+                download_url_to_file(_URL, archive, hash_prefix=_CHECKSUM)
+            os.makedirs(base_path, exist_ok=True)
+            _extract_zip(archive, base_path)
+        self.names = self._collect_songs()
+    def _get_track(self, name, source):
+        return Path(self._path) / name / f"{source}{_EXT}"
+    def _load_sample(self, n: int) -> Tuple[torch.Tensor, int, int, str]:
+        name = self.names[n]
+        wavs = []
+        num_frames = None
+        for source in self.sources:
+            track = self._get_track(name, source)
+            wav, sr = torchaudio.load(str(track))
+            if sr != _SAMPLE_RATE:
+                raise ValueError(f"expected sample rate {_SAMPLE_RATE}, but got {sr}")
+            if num_frames is None:
+                num_frames = wav.shape[-1]
+            else:
+                if wav.shape[-1] != num_frames:
+                    raise ValueError("num_frames do not match across sources")
+            wavs.append(wav)
+        stacked = torch.stack(wavs)
+        return stacked, _SAMPLE_RATE, num_frames, name
+    def _collect_songs(self):
+        if self.split == "validation":
+            return _VALIDATION_SET
+        path = Path(self._path)
+        names = []
+        for root, folders, _ in os.walk(path, followlinks=True):
+            root = Path(root)
+            if root.name.startswith(".") or folders or root == path:
+                continue
+            name = str(root.relative_to(path))
+            if self.split and name in _VALIDATION_SET:
+                continue
+            names.append(name)
+        return sorted(names)
+    def __getitem__(self, n: int) -> Tuple[torch.Tensor, int, int, str]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            int:
+                Num frames
+            str:
+                Track name
+        """
+        return self._load_sample(n)
+    def __len__(self) -> int:
+        return len(self.names)

.venv/lib/python3.11/site-packages/torchaudio/datasets/quesst14.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import os
+import re
+from pathlib import Path
+from typing import Optional, Tuple, Union
+import torch
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_tar, _load_waveform
+URL = "https://speech.fit.vutbr.cz/files/quesst14Database.tgz"
+SAMPLE_RATE = 8000
+_CHECKSUM = "4f869e06bc066bbe9c5dde31dbd3909a0870d70291110ebbb38878dcbc2fc5e4"
+_LANGUAGES = [
+    "albanian",
+    "basque",
+    "czech",
+    "nnenglish",
+    "romanian",
+    "slovak",
+]
+class QUESST14(Dataset):
+    """*QUESST14* :cite:`Mir2015QUESST2014EQ` dataset.
+    Args:
+        root (str or Path): Root directory where the dataset's top level directory is found
+        subset (str): Subset of the dataset to use. Options: [``"docs"``, ``"dev"``, ``"eval"``].
+        language (str or None, optional): Language to get dataset for.
+            Options: [``None``, ``albanian``, ``basque``, ``czech``, ``nnenglish``, ``romanian``, ``slovak``].
+            If ``None``, dataset consists of all languages. (default: ``"nnenglish"``)
+        download (bool, optional): Whether to download the dataset if it is not found at root path.
+            (default: ``False``)
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        subset: str,
+        language: Optional[str] = "nnenglish",
+        download: bool = False,
+    ) -> None:
+        if subset not in ["docs", "dev", "eval"]:
+            raise ValueError("`subset` must be one of ['docs', 'dev', 'eval']")
+        if language is not None and language not in _LANGUAGES:
+            raise ValueError(f"`language` must be None or one of {str(_LANGUAGES)}")
+        # Get string representation of 'root'
+        root = os.fspath(root)
+        basename = os.path.basename(URL)
+        archive = os.path.join(root, basename)
+        basename = basename.rsplit(".", 2)[0]
+        self._path = os.path.join(root, basename)
+        if not os.path.isdir(self._path):
+            if not os.path.isfile(archive):
+                if not download:
+                    raise RuntimeError("Dataset not found. Please use `download=True` to download")
+                download_url_to_file(URL, archive, hash_prefix=_CHECKSUM)
+            _extract_tar(archive, root)
+        if subset == "docs":
+            self.data = filter_audio_paths(self._path, language, "language_key_utterances.lst")
+        elif subset == "dev":
+            self.data = filter_audio_paths(self._path, language, "language_key_dev.lst")
+        elif subset == "eval":
+            self.data = filter_audio_paths(self._path, language, "language_key_eval.lst")
+    def get_metadata(self, n: int) -> Tuple[str, int, str]:
+        """Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
+        but otherwise returns the same fields as :py:func:`__getitem__`.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            str:
+                Path to audio
+            int:
+                Sample rate
+            str:
+                File name
+        """
+        audio_path = self.data[n]
+        relpath = os.path.relpath(audio_path, self._path)
+        return relpath, SAMPLE_RATE, audio_path.with_suffix("").name
+    def __getitem__(self, n: int) -> Tuple[torch.Tensor, int, str]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                File name
+        """
+        metadata = self.get_metadata(n)
+        waveform = _load_waveform(self._path, metadata[0], metadata[1])
+        return (waveform,) + metadata[1:]
+    def __len__(self) -> int:
+        return len(self.data)
+def filter_audio_paths(
+    path: str,
+    language: str,
+    lst_name: str,
+):
+    """Extract audio paths for the given language."""
+    audio_paths = []
+    path = Path(path)
+    with open(path / "scoring" / lst_name) as f:
+        for line in f:
+            audio_path, lang = line.strip().split()
+            if language is not None and lang != language:
+                continue
+            audio_path = re.sub(r"^.*?\/", "", audio_path)
+            audio_paths.append(path / audio_path)
+    return audio_paths

.venv/lib/python3.11/site-packages/torchaudio/datasets/snips.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import os
+from pathlib import Path
+from typing import List, Optional, Tuple, Union
+import torch
+from torch.utils.data import Dataset
+from torchaudio.datasets.utils import _load_waveform
+_SAMPLE_RATE = 16000
+_SPEAKERS = [
+    "Aditi",
+    "Amy",
+    "Brian",
+    "Emma",
+    "Geraint",
+    "Ivy",
+    "Joanna",
+    "Joey",
+    "Justin",
+    "Kendra",
+    "Kimberly",
+    "Matthew",
+    "Nicole",
+    "Raveena",
+    "Russell",
+    "Salli",
+]
+def _load_labels(file: Path, subset: str):
+    """Load transcirpt, iob, and intent labels for all utterances.
+    Args:
+        file (Path): The path to the label file.
+        subset (str): Subset of the dataset to use. Options: [``"train"``, ``"valid"``, ``"test"``].
+    Returns:
+        Dictionary of labels, where the key is the filename of the audio,
+            and the label is a Tuple of transcript, Inside–outside–beginning (IOB) label, and intention label.
+    """
+    labels = {}
+    with open(file, "r") as f:
+        for line in f:
+            line = line.strip().split(" ")
+            index = line[0]
+            trans, iob_intent = " ".join(line[1:]).split("\t")
+            trans = " ".join(trans.split(" ")[1:-1])
+            iob = " ".join(iob_intent.split(" ")[1:-1])
+            intent = iob_intent.split(" ")[-1]
+            if subset in index:
+                labels[index] = (trans, iob, intent)
+    return labels
+class Snips(Dataset):
+    """*Snips* :cite:`coucke2018snips` dataset.
+    Args:
+        root (str or Path): Root directory where the dataset's top level directory is found.
+        subset (str): Subset of the dataset to use. Options: [``"train"``, ``"valid"``, ``"test"``].
+        speakers (List[str] or None, optional): The speaker list to include in the dataset. If ``None``,
+            include all speakers in the subset. (Default: ``None``)
+        audio_format (str, optional): The extension of the audios. Options: [``"mp3"``, ``"wav"``].
+            (Default: ``"mp3"``)
+    """
+    _trans_file = "all.iob.snips.txt"
+    def __init__(
+        self,
+        root: Union[str, Path],
+        subset: str,
+        speakers: Optional[List[str]] = None,
+        audio_format: str = "mp3",
+    ) -> None:
+        if subset not in ["train", "valid", "test"]:
+            raise ValueError('`subset` must be one of ["train", "valid", "test"].')
+        if audio_format not in ["mp3", "wav"]:
+            raise ValueError('`audio_format` must be one of ["mp3", "wav].')
+        root = Path(root)
+        self._path = root / "SNIPS"
+        self.audio_path = self._path / subset
+        if speakers is None:
+            speakers = _SPEAKERS
+        if not os.path.isdir(self._path):
+            raise RuntimeError("Dataset not found.")
+        self.audio_paths = self.audio_path.glob(f"*.{audio_format}")
+        self.data = []
+        for audio_path in sorted(self.audio_paths):
+            audio_name = str(audio_path.name)
+            speaker = audio_name.split("-")[0]
+            if speaker in speakers:
+                self.data.append(audio_path)
+        transcript_path = self._path / self._trans_file
+        self.labels = _load_labels(transcript_path, subset)
+    def get_metadata(self, n: int) -> Tuple[str, int, str, str, str]:
+        """Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
+        but otherwise returns the same fields as :py:func:`__getitem__`.
+        Args:
+            n (int): The index of the sample to be loaded.
+        Returns:
+            Tuple of the following items:
+            str:
+                Path to audio
+            int:
+                Sample rate
+            str:
+                File name
+            str:
+                Transcription of audio
+            str:
+                Inside–outside–beginning (IOB) label of transcription
+            str:
+                Intention label of the audio.
+        """
+        audio_path = self.data[n]
+        relpath = os.path.relpath(audio_path, self._path)
+        file_name = audio_path.with_suffix("").name
+        transcript, iob, intent = self.labels[file_name]
+        return relpath, _SAMPLE_RATE, file_name, transcript, iob, intent
+    def __getitem__(self, n: int) -> Tuple[torch.Tensor, int, str, str, str]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items:
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                File name
+            str:
+                Transcription of audio
+            str:
+                Inside–outside–beginning (IOB) label of transcription
+            str:
+                Intention label of the audio.
+        """
+        metadata = self.get_metadata(n)
+        waveform = _load_waveform(self._path, metadata[0], metadata[1])
+        return (waveform,) + metadata[1:]
+    def __len__(self) -> int:
+        return len(self.data)

.venv/lib/python3.11/site-packages/torchaudio/datasets/speechcommands.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import os
+from pathlib import Path
+from typing import Optional, Tuple, Union
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_tar, _load_waveform
+FOLDER_IN_ARCHIVE = "SpeechCommands"
+URL = "speech_commands_v0.02"
+HASH_DIVIDER = "_nohash_"
+EXCEPT_FOLDER = "_background_noise_"
+SAMPLE_RATE = 16000
+_CHECKSUMS = {
+    "http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz": "743935421bb51cccdb6bdd152e04c5c70274e935c82119ad7faeec31780d811d",  # noqa: E501
+    "http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz": "af14739ee7dc311471de98f5f9d2c9191b18aedfe957f4a6ff791c709868ff58",  # noqa: E501
+}
+def _load_list(root, *filenames):
+    output = []
+    for filename in filenames:
+        filepath = os.path.join(root, filename)
+        with open(filepath) as fileobj:
+            output += [os.path.normpath(os.path.join(root, line.strip())) for line in fileobj]
+    return output
+def _get_speechcommands_metadata(filepath: str, path: str) -> Tuple[str, int, str, str, int]:
+    relpath = os.path.relpath(filepath, path)
+    reldir, filename = os.path.split(relpath)
+    _, label = os.path.split(reldir)
+    # Besides the officially supported split method for datasets defined by "validation_list.txt"
+    # and "testing_list.txt" over "speech_commands_v0.0x.tar.gz" archives, an alternative split
+    # method referred to in paragraph 2-3 of Section 7.1, references 13 and 14 of the original
+    # paper, and the checksums file from the tensorflow_datasets package [1] is also supported.
+    # Some filenames in those "speech_commands_test_set_v0.0x.tar.gz" archives have the form
+    # "xxx.wav.wav", so file extensions twice needs to be stripped twice.
+    # [1] https://github.com/tensorflow/datasets/blob/master/tensorflow_datasets/url_checksums/speech_commands.txt
+    speaker, _ = os.path.splitext(filename)
+    speaker, _ = os.path.splitext(speaker)
+    speaker_id, utterance_number = speaker.split(HASH_DIVIDER)
+    utterance_number = int(utterance_number)
+    return relpath, SAMPLE_RATE, label, speaker_id, utterance_number
+class SPEECHCOMMANDS(Dataset):
+    """*Speech Commands* :cite:`speechcommandsv2` dataset.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from,
+            or the type of the dataset to dowload.
+            Allowed type values are ``"speech_commands_v0.01"`` and ``"speech_commands_v0.02"``
+            (default: ``"speech_commands_v0.02"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"SpeechCommands"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        subset (str or None, optional):
+            Select a subset of the dataset [None, "training", "validation", "testing"]. None means
+            the whole dataset. "validation" and "testing" are defined in "validation_list.txt" and
+            "testing_list.txt", respectively, and "training" is the rest. Details for the files
+            "validation_list.txt" and "testing_list.txt" are explained in the README of the dataset
+            and in the introduction of Section 7 of the original paper and its reference 12. The
+            original paper can be found `here <https://arxiv.org/pdf/1804.03209.pdf>`_. (Default: ``None``)
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        url: str = URL,
+        folder_in_archive: str = FOLDER_IN_ARCHIVE,
+        download: bool = False,
+        subset: Optional[str] = None,
+    ) -> None:
+        if subset is not None and subset not in ["training", "validation", "testing"]:
+            raise ValueError("When `subset` is not None, it must be one of ['training', 'validation', 'testing'].")
+        if url in [
+            "speech_commands_v0.01",
+            "speech_commands_v0.02",
+        ]:
+            base_url = "http://download.tensorflow.org/data/"
+            ext_archive = ".tar.gz"
+            url = os.path.join(base_url, url + ext_archive)
+        # Get string representation of 'root' in case Path object is passed
+        root = os.fspath(root)
+        self._archive = os.path.join(root, folder_in_archive)
+        basename = os.path.basename(url)
+        archive = os.path.join(root, basename)
+        basename = basename.rsplit(".", 2)[0]
+        folder_in_archive = os.path.join(folder_in_archive, basename)
+        self._path = os.path.join(root, folder_in_archive)
+        if download:
+            if not os.path.isdir(self._path):
+                if not os.path.isfile(archive):
+                    checksum = _CHECKSUMS.get(url, None)
+                    download_url_to_file(url, archive, hash_prefix=checksum)
+                _extract_tar(archive, self._path)
+        else:
+            if not os.path.exists(self._path):
+                raise RuntimeError(
+                    f"The path {self._path} doesn't exist. "
+                    "Please check the ``root`` path or set `download=True` to download it"
+                )
+        if subset == "validation":
+            self._walker = _load_list(self._path, "validation_list.txt")
+        elif subset == "testing":
+            self._walker = _load_list(self._path, "testing_list.txt")
+        elif subset == "training":
+            excludes = set(_load_list(self._path, "validation_list.txt", "testing_list.txt"))
+            walker = sorted(str(p) for p in Path(self._path).glob("*/*.wav"))
+            self._walker = [
+                w
+                for w in walker
+                if HASH_DIVIDER in w and EXCEPT_FOLDER not in w and os.path.normpath(w) not in excludes
+            ]
+        else:
+            walker = sorted(str(p) for p in Path(self._path).glob("*/*.wav"))
+            self._walker = [w for w in walker if HASH_DIVIDER in w and EXCEPT_FOLDER not in w]
+    def get_metadata(self, n: int) -> Tuple[str, int, str, str, int]:
+        """Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
+        but otherwise returns the same fields as :py:func:`__getitem__`.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            str:
+                Path to the audio
+            int:
+                Sample rate
+            str:
+                Label
+            str:
+                Speaker ID
+            int:
+                Utterance number
+        """
+        fileid = self._walker[n]
+        return _get_speechcommands_metadata(fileid, self._archive)
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                Label
+            str:
+                Speaker ID
+            int:
+                Utterance number
+        """
+        metadata = self.get_metadata(n)
+        waveform = _load_waveform(self._archive, metadata[0], metadata[1])
+        return (waveform,) + metadata[1:]
+    def __len__(self) -> int:
+        return len(self._walker)

.venv/lib/python3.11/site-packages/torchaudio/datasets/tedlium.py ADDED Viewed

	@@ -0,0 +1,218 @@

+import os
+from pathlib import Path
+from typing import Tuple, Union
+import torchaudio
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_tar
+_RELEASE_CONFIGS = {
+    "release1": {
+        "folder_in_archive": "TEDLIUM_release1",
+        "url": "http://www.openslr.org/resources/7/TEDLIUM_release1.tar.gz",
+        "checksum": "30301975fd8c5cac4040c261c0852f57cfa8adbbad2ce78e77e4986957445f27",
+        "data_path": "",
+        "subset": "train",
+        "supported_subsets": ["train", "test", "dev"],
+        "dict": "TEDLIUM.150K.dic",
+    },
+    "release2": {
+        "folder_in_archive": "TEDLIUM_release2",
+        "url": "http://www.openslr.org/resources/19/TEDLIUM_release2.tar.gz",
+        "checksum": "93281b5fcaaae5c88671c9d000b443cb3c7ea3499ad12010b3934ca41a7b9c58",
+        "data_path": "",
+        "subset": "train",
+        "supported_subsets": ["train", "test", "dev"],
+        "dict": "TEDLIUM.152k.dic",
+    },
+    "release3": {
+        "folder_in_archive": "TEDLIUM_release-3",
+        "url": "http://www.openslr.org/resources/51/TEDLIUM_release-3.tgz",
+        "checksum": "ad1e454d14d1ad550bc2564c462d87c7a7ec83d4dc2b9210f22ab4973b9eccdb",
+        "data_path": "data/",
+        "subset": "train",
+        "supported_subsets": ["train", "test", "dev"],
+        "dict": "TEDLIUM.152k.dic",
+    },
+}
+class TEDLIUM(Dataset):
+    """*Tedlium* :cite:`rousseau2012tedlium` dataset (releases 1,2 and 3).
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        release (str, optional): Release version.
+            Allowed values are ``"release1"``, ``"release2"`` or ``"release3"``.
+            (default: ``"release1"``).
+        subset (str, optional): The subset of dataset to use. Valid options are ``"train"``, ``"dev"``,
+            and ``"test"``. Defaults to ``"train"``.
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        audio_ext (str, optional): extension for audio file (default: ``".sph"``)
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        release: str = "release1",
+        subset: str = "train",
+        download: bool = False,
+        audio_ext: str = ".sph",
+    ) -> None:
+        self._ext_audio = audio_ext
+        if release in _RELEASE_CONFIGS.keys():
+            folder_in_archive = _RELEASE_CONFIGS[release]["folder_in_archive"]
+            url = _RELEASE_CONFIGS[release]["url"]
+            subset = subset if subset else _RELEASE_CONFIGS[release]["subset"]
+        else:
+            # Raise warning
+            raise RuntimeError(
+                "The release {} does not match any of the supported tedlium releases{} ".format(
+                    release,
+                    _RELEASE_CONFIGS.keys(),
+                )
+            )
+        if subset not in _RELEASE_CONFIGS[release]["supported_subsets"]:
+            # Raise warning
+            raise RuntimeError(
+                "The subset {} does not match any of the supported tedlium subsets{} ".format(
+                    subset,
+                    _RELEASE_CONFIGS[release]["supported_subsets"],
+                )
+            )
+        # Get string representation of 'root' in case Path object is passed
+        root = os.fspath(root)
+        basename = os.path.basename(url)
+        archive = os.path.join(root, basename)
+        basename = basename.split(".")[0]
+        if release == "release3":
+            if subset == "train":
+                self._path = os.path.join(root, folder_in_archive, _RELEASE_CONFIGS[release]["data_path"])
+            else:
+                self._path = os.path.join(root, folder_in_archive, "legacy", subset)
+        else:
+            self._path = os.path.join(root, folder_in_archive, _RELEASE_CONFIGS[release]["data_path"], subset)
+        if download:
+            if not os.path.isdir(self._path):
+                if not os.path.isfile(archive):
+                    checksum = _RELEASE_CONFIGS[release]["checksum"]
+                    download_url_to_file(url, archive, hash_prefix=checksum)
+                _extract_tar(archive)
+        else:
+            if not os.path.exists(self._path):
+                raise RuntimeError(
+                    f"The path {self._path} doesn't exist. "
+                    "Please check the ``root`` path or set `download=True` to download it"
+                )
+        # Create list for all samples
+        self._filelist = []
+        stm_path = os.path.join(self._path, "stm")
+        for file in sorted(os.listdir(stm_path)):
+            if file.endswith(".stm"):
+                stm_path = os.path.join(self._path, "stm", file)
+                with open(stm_path) as f:
+                    l = len(f.readlines())
+                    file = file.replace(".stm", "")
+                    self._filelist.extend((file, line) for line in range(l))
+        # Create dict path for later read
+        self._dict_path = os.path.join(root, folder_in_archive, _RELEASE_CONFIGS[release]["dict"])
+        self._phoneme_dict = None
+    def _load_tedlium_item(self, fileid: str, line: int, path: str) -> Tuple[Tensor, int, str, int, int, int]:
+        """Loads a TEDLIUM dataset sample given a file name and corresponding sentence name.
+        Args:
+            fileid (str): File id to identify both text and audio files corresponding to the sample
+            line (int): Line identifier for the sample inside the text file
+            path (str): Dataset root path
+        Returns:
+            (Tensor, int, str, int, int, int):
+            ``(waveform, sample_rate, transcript, talk_id, speaker_id, identifier)``
+        """
+        transcript_path = os.path.join(path, "stm", fileid)
+        with open(transcript_path + ".stm") as f:
+            transcript = f.readlines()[line]
+            talk_id, _, speaker_id, start_time, end_time, identifier, transcript = transcript.split(" ", 6)
+        wave_path = os.path.join(path, "sph", fileid)
+        waveform, sample_rate = self._load_audio(wave_path + self._ext_audio, start_time=start_time, end_time=end_time)
+        return (waveform, sample_rate, transcript, talk_id, speaker_id, identifier)
+    def _load_audio(self, path: str, start_time: float, end_time: float, sample_rate: int = 16000) -> [Tensor, int]:
+        """Default load function used in TEDLIUM dataset, you can overwrite this function to customize functionality
+        and load individual sentences from a full ted audio talk file.
+        Args:
+            path (str): Path to audio file
+            start_time (int): Time in seconds where the sample sentence stars
+            end_time (int): Time in seconds where the sample sentence finishes
+            sample_rate (float, optional): Sampling rate
+        Returns:
+            [Tensor, int]: Audio tensor representation and sample rate
+        """
+        start_time = int(float(start_time) * sample_rate)
+        end_time = int(float(end_time) * sample_rate)
+        kwargs = {"frame_offset": start_time, "num_frames": end_time - start_time}
+        return torchaudio.load(path, **kwargs)
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                Transcript
+            int:
+                Talk ID
+            int:
+                Speaker ID
+            int:
+                Identifier
+        """
+        fileid, line = self._filelist[n]
+        return self._load_tedlium_item(fileid, line, self._path)
+    def __len__(self) -> int:
+        """TEDLIUM dataset custom function overwritting len default behaviour.
+        Returns:
+            int: TEDLIUM dataset length
+        """
+        return len(self._filelist)
+    @property
+    def phoneme_dict(self):
+        """dict[str, tuple[str]]: Phonemes. Mapping from word to tuple of phonemes.
+        Note that some words have empty phonemes.
+        """
+        # Read phoneme dictionary
+        if not self._phoneme_dict:
+            self._phoneme_dict = {}
+            with open(self._dict_path, "r", encoding="utf-8") as f:
+                for line in f.readlines():
+                    content = line.strip().split()
+                    self._phoneme_dict[content[0]] = tuple(content[1:])  # content[1:] can be empty list
+        return self._phoneme_dict.copy()

.venv/lib/python3.11/site-packages/torchaudio/datasets/utils.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import logging
+import os
+import tarfile
+import zipfile
+from typing import Any, List, Optional
+import torchaudio
+_LG = logging.getLogger(__name__)
+def _extract_tar(from_path: str, to_path: Optional[str] = None, overwrite: bool = False) -> List[str]:
+    if to_path is None:
+        to_path = os.path.dirname(from_path)
+    with tarfile.open(from_path, "r") as tar:
+        files = []
+        for file_ in tar:  # type: Any
+            file_path = os.path.join(to_path, file_.name)
+            if file_.isfile():
+                files.append(file_path)
+                if os.path.exists(file_path):
+                    _LG.info("%s already extracted.", file_path)
+                    if not overwrite:
+                        continue
+            tar.extract(file_, to_path)
+        return files
+def _extract_zip(from_path: str, to_path: Optional[str] = None, overwrite: bool = False) -> List[str]:
+    if to_path is None:
+        to_path = os.path.dirname(from_path)
+    with zipfile.ZipFile(from_path, "r") as zfile:
+        files = zfile.namelist()
+        for file_ in files:
+            file_path = os.path.join(to_path, file_)
+            if os.path.exists(file_path):
+                _LG.info("%s already extracted.", file_path)
+                if not overwrite:
+                    continue
+            zfile.extract(file_, to_path)
+    return files
+def _load_waveform(
+    root: str,
+    filename: str,
+    exp_sample_rate: int,
+):
+    path = os.path.join(root, filename)
+    waveform, sample_rate = torchaudio.load(path)
+    if exp_sample_rate != sample_rate:
+        raise ValueError(f"sample rate should be {exp_sample_rate}, but got {sample_rate}")
+    return waveform

.venv/lib/python3.11/site-packages/torchaudio/datasets/vctk.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import os
+from typing import Tuple
+import torchaudio
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_zip
+URL = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"
+_CHECKSUMS = {
+    "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip": "f96258be9fdc2cbff6559541aae7ea4f59df3fcaf5cf963aae5ca647357e359c"  # noqa: E501
+}
+SampleType = Tuple[Tensor, int, str, str, str]
+class VCTK_092(Dataset):
+    """*VCTK 0.92* :cite:`yamagishi2019vctk` dataset
+    Args:
+        root (str): Root directory where the dataset's top level directory is found.
+        mic_id (str, optional): Microphone ID. Either ``"mic1"`` or ``"mic2"``. (default: ``"mic2"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        url (str, optional): The URL to download the dataset from.
+            (default: ``"https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"``)
+        audio_ext (str, optional): Custom audio extension if dataset is converted to non-default audio format.
+    Note:
+        * All the speeches from speaker ``p315`` will be skipped due to the lack of the corresponding text files.
+        * All the speeches from ``p280`` will be skipped for ``mic_id="mic2"`` due to the lack of the audio files.
+        * Some of the speeches from speaker ``p362`` will be skipped due to the lack of  the audio files.
+        * See Also: https://datashare.is.ed.ac.uk/handle/10283/3443
+    """
+    def __init__(
+        self,
+        root: str,
+        mic_id: str = "mic2",
+        download: bool = False,
+        url: str = URL,
+        audio_ext=".flac",
+    ):
+        if mic_id not in ["mic1", "mic2"]:
+            raise RuntimeError(f'`mic_id` has to be either "mic1" or "mic2". Found: {mic_id}')
+        archive = os.path.join(root, "VCTK-Corpus-0.92.zip")
+        self._path = os.path.join(root, "VCTK-Corpus-0.92")
+        self._txt_dir = os.path.join(self._path, "txt")
+        self._audio_dir = os.path.join(self._path, "wav48_silence_trimmed")
+        self._mic_id = mic_id
+        self._audio_ext = audio_ext
+        if download:
+            if not os.path.isdir(self._path):
+                if not os.path.isfile(archive):
+                    checksum = _CHECKSUMS.get(url, None)
+                    download_url_to_file(url, archive, hash_prefix=checksum)
+                _extract_zip(archive, self._path)
+        if not os.path.isdir(self._path):
+            raise RuntimeError("Dataset not found. Please use `download=True` to download it.")
+        # Extracting speaker IDs from the folder structure
+        self._speaker_ids = sorted(os.listdir(self._txt_dir))
+        self._sample_ids = []
+        """
+        Due to some insufficient data complexity in the 0.92 version of this dataset,
+        we start traversing the audio folder structure in accordance with the text folder.
+        As some of the audio files are missing of either ``mic_1`` or ``mic_2`` but the
+        text is present for the same, we first check for the existence of the audio file
+        before adding it to the ``sample_ids`` list.
+        Once the ``audio_ids`` are loaded into memory we can quickly access the list for
+        different parameters required by the user.
+        """
+        for speaker_id in self._speaker_ids:
+            if speaker_id == "p280" and mic_id == "mic2":
+                continue
+            utterance_dir = os.path.join(self._txt_dir, speaker_id)
+            for utterance_file in sorted(f for f in os.listdir(utterance_dir) if f.endswith(".txt")):
+                utterance_id = os.path.splitext(utterance_file)[0]
+                audio_path_mic = os.path.join(
+                    self._audio_dir,
+                    speaker_id,
+                    f"{utterance_id}_{mic_id}{self._audio_ext}",
+                )
+                if speaker_id == "p362" and not os.path.isfile(audio_path_mic):
+                    continue
+                self._sample_ids.append(utterance_id.split("_"))
+    def _load_text(self, file_path) -> str:
+        with open(file_path) as file_path:
+            return file_path.readlines()[0]
+    def _load_audio(self, file_path) -> Tuple[Tensor, int]:
+        return torchaudio.load(file_path)
+    def _load_sample(self, speaker_id: str, utterance_id: str, mic_id: str) -> SampleType:
+        transcript_path = os.path.join(self._txt_dir, speaker_id, f"{speaker_id}_{utterance_id}.txt")
+        audio_path = os.path.join(
+            self._audio_dir,
+            speaker_id,
+            f"{speaker_id}_{utterance_id}_{mic_id}{self._audio_ext}",
+        )
+        # Reading text
+        transcript = self._load_text(transcript_path)
+        # Reading FLAC
+        waveform, sample_rate = self._load_audio(audio_path)
+        return (waveform, sample_rate, transcript, speaker_id, utterance_id)
+    def __getitem__(self, n: int) -> SampleType:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            str:
+                Transcript
+            str:
+                Speaker ID
+            std:
+                Utterance ID
+        """
+        speaker_id, utterance_id = self._sample_ids[n]
+        return self._load_sample(speaker_id, utterance_id, self._mic_id)
+    def __len__(self) -> int:
+        return len(self._sample_ids)

.venv/lib/python3.11/site-packages/torchaudio/datasets/voxceleb1.py ADDED Viewed

	@@ -0,0 +1,309 @@

+import os
+from pathlib import Path
+from typing import List, Tuple, Union
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_zip, _load_waveform
+SAMPLE_RATE = 16000
+_ARCHIVE_CONFIGS = {
+    "dev": {
+        "archive_name": "vox1_dev_wav.zip",
+        "urls": [
+            "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partaa",
+            "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partab",
+            "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partac",
+            "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partad",
+        ],
+        "checksums": [
+            "21ec6ca843659ebc2fdbe04b530baa4f191ad4b0971912672d92c158f32226a0",
+            "311d21e0c8cbf33573a4fce6c80e5a279d80736274b381c394319fc557159a04",
+            "92b64465f2b2a3dc0e4196ae8dd6828cbe9ddd1f089419a11e4cbfe2e1750df0",
+            "00e6190c770b27f27d2a3dd26ee15596b17066b715ac111906861a7d09a211a5",
+        ],
+    },
+    "test": {
+        "archive_name": "vox1_test_wav.zip",
+        "url": "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_test_wav.zip",
+        "checksum": "8de57f347fe22b2c24526e9f444f689ecf5096fc2a92018cf420ff6b5b15eaea",
+    },
+}
+_IDEN_SPLIT_URL = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/iden_split.txt"
+_VERI_TEST_URL = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test.txt"
+def _download_extract_wavs(root: str):
+    for archive in ["dev", "test"]:
+        archive_name = _ARCHIVE_CONFIGS[archive]["archive_name"]
+        archive_path = os.path.join(root, archive_name)
+        # The zip file of dev data is splited to 4 chunks.
+        # Download and combine them into one file before extraction.
+        if archive == "dev":
+            urls = _ARCHIVE_CONFIGS[archive]["urls"]
+            checksums = _ARCHIVE_CONFIGS[archive]["checksums"]
+            with open(archive_path, "wb") as f:
+                for url, checksum in zip(urls, checksums):
+                    file_path = os.path.join(root, os.path.basename(url))
+                    download_url_to_file(url, file_path, hash_prefix=checksum)
+                    with open(file_path, "rb") as f_split:
+                        f.write(f_split.read())
+        else:
+            url = _ARCHIVE_CONFIGS[archive]["url"]
+            checksum = _ARCHIVE_CONFIGS[archive]["checksum"]
+            download_url_to_file(url, archive_path, hash_prefix=checksum)
+        _extract_zip(archive_path)
+def _get_flist(root: str, file_path: str, subset: str) -> List[str]:
+    f_list = []
+    if subset == "train":
+        index = 1
+    elif subset == "dev":
+        index = 2
+    else:
+        index = 3
+    with open(file_path, "r") as f:
+        for line in f:
+            id, path = line.split()
+            if int(id) == index:
+                f_list.append(path)
+    return sorted(f_list)
+def _get_paired_flist(root: str, veri_test_path: str):
+    f_list = []
+    with open(veri_test_path, "r") as f:
+        for line in f:
+            label, path1, path2 = line.split()
+            f_list.append((label, path1, path2))
+    return f_list
+def _get_file_id(file_path: str, _ext_audio: str):
+    speaker_id, youtube_id, utterance_id = file_path.split("/")[-3:]
+    utterance_id = utterance_id.replace(_ext_audio, "")
+    file_id = "-".join([speaker_id, youtube_id, utterance_id])
+    return file_id
+class VoxCeleb1(Dataset):
+    """*VoxCeleb1* :cite:`nagrani2017voxceleb` dataset.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (Default: ``False``).
+    """
+    _ext_audio = ".wav"
+    def __init__(self, root: Union[str, Path], download: bool = False) -> None:
+        # Get string representation of 'root' in case Path object is passed
+        root = os.fspath(root)
+        self._path = os.path.join(root, "wav")
+        if not os.path.isdir(self._path):
+            if not download:
+                raise RuntimeError(
+                    f"Dataset not found at {self._path}. Please set `download=True` to download the dataset."
+                )
+            _download_extract_wavs(root)
+    def get_metadata(self, n: int):
+        raise NotImplementedError
+    def __getitem__(self, n: int):
+        raise NotImplementedError
+    def __len__(self) -> int:
+        raise NotImplementedError
+class VoxCeleb1Identification(VoxCeleb1):
+    """*VoxCeleb1* :cite:`nagrani2017voxceleb` dataset for speaker identification task.
+    Each data sample contains the waveform, sample rate, speaker id, and the file id.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        subset (str, optional): Subset of the dataset to use. Options: ["train", "dev", "test"]. (Default: ``"train"``)
+        meta_url (str, optional): The url of meta file that contains the list of subset labels and file paths.
+            The format of each row is ``subset file_path". For example: ``1 id10006/nLEBBc9oIFs/00003.wav``.
+            ``1``, ``2``, ``3`` mean ``train``, ``dev``, and ``test`` subest, respectively.
+            (Default: ``"https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/iden_split.txt"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (Default: ``False``).
+    Note:
+        The file structure of `VoxCeleb1Identification` dataset is as follows:
+        └─ root/
+         └─ wav/
+         └─ speaker_id folders
+        Users who pre-downloaded the ``"vox1_dev_wav.zip"`` and ``"vox1_test_wav.zip"`` files need to move
+        the extracted files into the same ``root`` directory.
+    """
+    def __init__(
+        self, root: Union[str, Path], subset: str = "train", meta_url: str = _IDEN_SPLIT_URL, download: bool = False
+    ) -> None:
+        super().__init__(root, download)
+        if subset not in ["train", "dev", "test"]:
+            raise ValueError("`subset` must be one of ['train', 'dev', 'test']")
+        # download the iden_split.txt to get the train, dev, test lists.
+        meta_list_path = os.path.join(root, os.path.basename(meta_url))
+        if not os.path.exists(meta_list_path):
+            download_url_to_file(meta_url, meta_list_path)
+        self._flist = _get_flist(self._path, meta_list_path, subset)
+    def get_metadata(self, n: int) -> Tuple[str, int, int, str]:
+        """Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
+        but otherwise returns the same fields as :py:func:`__getitem__`.
+        Args:
+            n (int): The index of the sample
+        Returns:
+            Tuple of the following items;
+            str:
+                Path to audio
+            int:
+                Sample rate
+            int:
+                Speaker ID
+            str:
+                File ID
+        """
+        file_path = self._flist[n]
+        file_id = _get_file_id(file_path, self._ext_audio)
+        speaker_id = file_id.split("-")[0]
+        speaker_id = int(speaker_id[3:])
+        return file_path, SAMPLE_RATE, speaker_id, file_id
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, int, str]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            int:
+                Speaker ID
+            str:
+                File ID
+        """
+        metadata = self.get_metadata(n)
+        waveform = _load_waveform(self._path, metadata[0], metadata[1])
+        return (waveform,) + metadata[1:]
+    def __len__(self) -> int:
+        return len(self._flist)
+class VoxCeleb1Verification(VoxCeleb1):
+    """*VoxCeleb1* :cite:`nagrani2017voxceleb` dataset for speaker verification task.
+    Each data sample contains a pair of waveforms, sample rate, the label indicating if they are
+    from the same speaker, and the file ids.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        meta_url (str, optional): The url of meta file that contains a list of utterance pairs
+            and the corresponding labels. The format of each row is ``label file_path1 file_path2".
+            For example: ``1 id10270/x6uYqmx31kE/00001.wav id10270/8jEAjG6SegY/00008.wav``.
+            ``1`` means the two utterances are from the same speaker, ``0`` means not.
+            (Default: ``"https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test.txt"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (Default: ``False``).
+    Note:
+        The file structure of `VoxCeleb1Verification` dataset is as follows:
+        └─ root/
+         └─ wav/
+         └─ speaker_id folders
+        Users who pre-downloaded the ``"vox1_dev_wav.zip"`` and ``"vox1_test_wav.zip"`` files need to move
+        the extracted files into the same ``root`` directory.
+    """
+    def __init__(self, root: Union[str, Path], meta_url: str = _VERI_TEST_URL, download: bool = False) -> None:
+        super().__init__(root, download)
+        # download the veri_test.txt to get the list of training pairs and labels.
+        meta_list_path = os.path.join(root, os.path.basename(meta_url))
+        if not os.path.exists(meta_list_path):
+            download_url_to_file(meta_url, meta_list_path)
+        self._flist = _get_paired_flist(self._path, meta_list_path)
+    def get_metadata(self, n: int) -> Tuple[str, str, int, int, str, str]:
+        """Get metadata for the n-th sample from the dataset. Returns filepaths instead of waveforms,
+        but otherwise returns the same fields as :py:func:`__getitem__`.
+        Args:
+            n (int): The index of the sample
+        Returns:
+            Tuple of the following items;
+            str:
+                Path to audio file of speaker 1
+            str:
+                Path to audio file of speaker 2
+            int:
+                Sample rate
+            int:
+                Label
+            str:
+                File ID of speaker 1
+            str:
+                File ID of speaker 2
+        """
+        label, file_path_spk1, file_path_spk2 = self._flist[n]
+        label = int(label)
+        file_id_spk1 = _get_file_id(file_path_spk1, self._ext_audio)
+        file_id_spk2 = _get_file_id(file_path_spk2, self._ext_audio)
+        return file_path_spk1, file_path_spk2, SAMPLE_RATE, label, file_id_spk1, file_id_spk2
+    def __getitem__(self, n: int) -> Tuple[Tensor, Tensor, int, int, str, str]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded.
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform of speaker 1
+            Tensor:
+                Waveform of speaker 2
+            int:
+                Sample rate
+            int:
+                Label
+            str:
+                File ID of speaker 1
+            str:
+                File ID of speaker 2
+        """
+        metadata = self.get_metadata(n)
+        waveform_spk1 = _load_waveform(self._path, metadata[0], metadata[2])
+        waveform_spk2 = _load_waveform(self._path, metadata[1], metadata[2])
+        return (waveform_spk1, waveform_spk2) + metadata[2:]
+    def __len__(self) -> int:
+        return len(self._flist)

.venv/lib/python3.11/site-packages/torchaudio/datasets/yesno.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+from pathlib import Path
+from typing import List, Tuple, Union
+import torchaudio
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio._internal import download_url_to_file
+from torchaudio.datasets.utils import _extract_tar
+_RELEASE_CONFIGS = {
+    "release1": {
+        "folder_in_archive": "waves_yesno",
+        "url": "http://www.openslr.org/resources/1/waves_yesno.tar.gz",
+        "checksum": "c3f49e0cca421f96b75b41640749167b52118f232498667ca7a5f9416aef8e73",
+    }
+}
+class YESNO(Dataset):
+    """*YesNo* :cite:`YesNo` dataset.
+    Args:
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from.
+            (default: ``"http://www.openslr.org/resources/1/waves_yesno.tar.gz"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"waves_yesno"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        url: str = _RELEASE_CONFIGS["release1"]["url"],
+        folder_in_archive: str = _RELEASE_CONFIGS["release1"]["folder_in_archive"],
+        download: bool = False,
+    ) -> None:
+        self._parse_filesystem(root, url, folder_in_archive, download)
+    def _parse_filesystem(self, root: str, url: str, folder_in_archive: str, download: bool) -> None:
+        root = Path(root)
+        archive = os.path.basename(url)
+        archive = root / archive
+        self._path = root / folder_in_archive
+        if download:
+            if not os.path.isdir(self._path):
+                if not os.path.isfile(archive):
+                    checksum = _RELEASE_CONFIGS["release1"]["checksum"]
+                    download_url_to_file(url, archive, hash_prefix=checksum)
+                _extract_tar(archive)
+        if not os.path.isdir(self._path):
+            raise RuntimeError("Dataset not found. Please use `download=True` to download it.")
+        self._walker = sorted(str(p.stem) for p in Path(self._path).glob("*.wav"))
+    def _load_item(self, fileid: str, path: str):
+        labels = [int(c) for c in fileid.split("_")]
+        file_audio = os.path.join(path, fileid + ".wav")
+        waveform, sample_rate = torchaudio.load(file_audio)
+        return waveform, sample_rate, labels
+    def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]:
+        """Load the n-th sample from the dataset.
+        Args:
+            n (int): The index of the sample to be loaded
+        Returns:
+            Tuple of the following items;
+            Tensor:
+                Waveform
+            int:
+                Sample rate
+            List[int]:
+                labels
+        """
+        fileid = self._walker[n]
+        item = self._load_item(fileid, self._path)
+        return item
+    def __len__(self) -> int:
+        return len(self._walker)

.venv/lib/python3.11/site-packages/torchaudio/io/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from torio.io import CodecConfig, StreamingMediaDecoder as StreamReader, StreamingMediaEncoder as StreamWriter
+from ._effector import AudioEffector
+from ._playback import play_audio
+__all__ = [
+    "AudioEffector",
+    "StreamReader",
+    "StreamWriter",
+    "CodecConfig",
+    "play_audio",
+]

.venv/lib/python3.11/site-packages/torchaudio/io/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (535 Bytes). View file

.venv/lib/python3.11/site-packages/torchaudio/io/__pycache__/_effector.cpython-311.pyc ADDED Viewed

Binary file (15.3 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/io/__pycache__/_playback.cpython-311.pyc ADDED Viewed

Binary file (3.69 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/io/_effector.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import io
+from typing import Iterator, List, Optional
+import torch
+from torch import Tensor
+from torio.io._streaming_media_decoder import _get_afilter_desc, StreamingMediaDecoder as StreamReader
+from torio.io._streaming_media_encoder import CodecConfig, StreamingMediaEncoder as StreamWriter
+class _StreamingIOBuffer:
+    """Streaming Bytes IO buffer. Data are dropped when read."""
+    def __init__(self):
+        self._buffer: List(bytes) = []
+    def write(self, b: bytes):
+        if b:
+            self._buffer.append(b)
+        return len(b)
+    def pop(self, n):
+        """Pop the oldest byte string. It does not necessary return the requested amount"""
+        if not self._buffer:
+            return b""
+        if len(self._buffer[0]) <= n:
+            return self._buffer.pop(0)
+        ret = self._buffer[0][:n]
+        self._buffer[0] = self._buffer[0][n:]
+        return ret
+def _get_sample_fmt(dtype: torch.dtype):
+    types = {
+        torch.uint8: "u8",
+        torch.int16: "s16",
+        torch.int32: "s32",
+        torch.float32: "flt",
+        torch.float64: "dbl",
+    }
+    if dtype not in types:
+        raise ValueError(f"Unsupported dtype is provided {dtype}. Supported dtypes are: {types.keys()}")
+    return types[dtype]
+class _AudioStreamingEncoder:
+    """Given a waveform, encode on-demand and return bytes"""
+    def __init__(
+        self,
+        src: Tensor,
+        sample_rate: int,
+        effect: str,
+        muxer: str,
+        encoder: Optional[str],
+        codec_config: Optional[CodecConfig],
+        frames_per_chunk: int,
+    ):
+        self.src = src
+        self.buffer = _StreamingIOBuffer()
+        self.writer = StreamWriter(self.buffer, format=muxer)
+        self.writer.add_audio_stream(
+            num_channels=src.size(1),
+            sample_rate=sample_rate,
+            format=_get_sample_fmt(src.dtype),
+            encoder=encoder,
+            filter_desc=effect,
+            codec_config=codec_config,
+        )
+        self.writer.open()
+        self.fpc = frames_per_chunk
+        # index on the input tensor (along time-axis)
+        # we use -1 to indicate that we finished iterating the tensor and
+        # the writer is closed.
+        self.i_iter = 0
+    def read(self, n):
+        while not self.buffer._buffer and self.i_iter >= 0:
+            self.writer.write_audio_chunk(0, self.src[self.i_iter : self.i_iter + self.fpc])
+            self.i_iter += self.fpc
+            if self.i_iter >= self.src.size(0):
+                self.writer.flush()
+                self.writer.close()
+                self.i_iter = -1
+        return self.buffer.pop(n)
+def _encode(
+    src: Tensor,
+    sample_rate: int,
+    effect: str,
+    muxer: str,
+    encoder: Optional[str],
+    codec_config: Optional[CodecConfig],
+):
+    buffer = io.BytesIO()
+    writer = StreamWriter(buffer, format=muxer)
+    writer.add_audio_stream(
+        num_channels=src.size(1),
+        sample_rate=sample_rate,
+        format=_get_sample_fmt(src.dtype),
+        encoder=encoder,
+        filter_desc=effect,
+        codec_config=codec_config,
+    )
+    with writer.open():
+        writer.write_audio_chunk(0, src)
+    buffer.seek(0)
+    return buffer
+def _get_muxer(dtype: torch.dtype):
+    # TODO: check if this works in Windows.
+    types = {
+        torch.uint8: "u8",
+        torch.int16: "s16le",
+        torch.int32: "s32le",
+        torch.float32: "f32le",
+        torch.float64: "f64le",
+    }
+    if dtype not in types:
+        raise ValueError(f"Unsupported dtype is provided {dtype}. Supported dtypes are: {types.keys()}")
+    return types[dtype]
+class AudioEffector:
+    """Apply various filters and/or codecs to waveforms.
+    .. versionadded:: 2.1
+    Args:
+        effect (str or None, optional): Filter expressions or ``None`` to apply no filter.
+            See https://ffmpeg.org/ffmpeg-filters.html#Audio-Filters for the
+            details of filter syntax.
+        format (str or None, optional): When provided, encode the audio into the
+            corresponding format. Default: ``None``.
+        encoder (str or None, optional): When provided, override the encoder used
+            by the ``format``. Default: ``None``.
+        codec_config (CodecConfig or None, optional): When provided, configure the encoding codec.
+            Should be provided in conjunction with ``format`` option.
+        pad_end (bool, optional): When enabled, and if the waveform becomes shorter after applying
+            effects/codec, then pad the end with silence.
+    Example - Basic usage
+        To use ``AudioEffector``, first instantiate it with a set of
+        ``effect`` and ``format``.
+        >>> # instantiate the effector
+        >>> effector = AudioEffector(effect=..., format=...)
+        Then, use :py:meth:`~AudioEffector.apply` or :py:meth:`~AudioEffector.stream`
+        method to apply them.
+        >>> # Apply the effect to the whole waveform
+        >>> applied = effector.apply(waveform, sample_rate)
+        >>> # Apply the effect chunk-by-chunk
+        >>> for chunk in effector.stream(waveform, sample_rate):
+        >>>    ...
+    Example - Applying effects
+        Please refer to
+        https://ffmpeg.org/ffmpeg-filters.html#Filtergraph-description
+        for the overview of filter description, and
+        https://ffmpeg.org/ffmpeg-filters.html#toc-Audio-Filters
+        for the list of available filters.
+        Tempo - https://ffmpeg.org/ffmpeg-filters.html#atempo
+        >>> AudioEffector(effect="atempo=1.5")
+        Echo - https://ffmpeg.org/ffmpeg-filters.html#aecho
+        >>> AudioEffector(effect="aecho=0.8:0.88:60:0.4")
+        Flanger - https://ffmpeg.org/ffmpeg-filters.html#flanger
+        >>> AudioEffector(effect="aflanger")
+        Vibrato - https://ffmpeg.org/ffmpeg-filters.html#vibrato
+        >>> AudioEffector(effect="vibrato")
+        Tremolo - https://ffmpeg.org/ffmpeg-filters.html#tremolo
+        >>> AudioEffector(effect="vibrato")
+        You can also apply multiple effects at once.
+        >>> AudioEffector(effect="")
+    Example - Applying codec
+        One can apply codec using ``format`` argument. ``format`` can be
+        audio format or container format. If the container format supports
+        multiple encoders, you can specify it with ``encoder`` argument.
+        Wav format
+        (no compression is applied but samples are converted to
+        16-bit signed integer)
+        >>> AudioEffector(format="wav")
+        Ogg format with default encoder
+        >>> AudioEffector(format="ogg")
+        Ogg format with vorbis
+        >>> AudioEffector(format="ogg", encoder="vorbis")
+        Ogg format with opus
+        >>> AudioEffector(format="ogg", encoder="opus")
+        Webm format with opus
+        >>> AudioEffector(format="webm", encoder="opus")
+    Example - Applying codec with configuration
+        Reference: https://trac.ffmpeg.org/wiki/Encode/MP3
+        MP3 with default config
+        >>> AudioEffector(format="mp3")
+        MP3 with variable bitrate
+        >>> AudioEffector(format="mp3", codec_config=CodecConfig(qscale=5))
+        MP3 with constant bitrate
+        >>> AudioEffector(format="mp3", codec_config=CodecConfig(bit_rate=32_000))
+    """
+    def __init__(
+        self,
+        effect: Optional[str] = None,
+        format: Optional[str] = None,
+        *,
+        encoder: Optional[str] = None,
+        codec_config: Optional[CodecConfig] = None,
+        pad_end: bool = True,
+    ):
+        if format is None:
+            if encoder is not None or codec_config is not None:
+                raise ValueError("`encoder` and/or `condec_config` opions are provided without `format` option.")
+        self.effect = effect
+        self.format = format
+        self.encoder = encoder
+        self.codec_config = codec_config
+        self.pad_end = pad_end
+    def _get_reader(self, waveform, sample_rate, output_sample_rate, frames_per_chunk=None):
+        num_frames, num_channels = waveform.shape
+        if self.format is not None:
+            muxer = self.format
+            encoder = self.encoder
+            option = {}
+            # Some formats are headerless, so need to provide these infomation.
+            if self.format == "mulaw":
+                option = {"sample_rate": f"{sample_rate}", "channels": f"{num_channels}"}
+        else:  # PCM
+            muxer = _get_muxer(waveform.dtype)
+            encoder = None
+            option = {"sample_rate": f"{sample_rate}", "channels": f"{num_channels}"}
+        if frames_per_chunk is None:
+            src = _encode(waveform, sample_rate, self.effect, muxer, encoder, self.codec_config)
+        else:
+            src = _AudioStreamingEncoder(
+                waveform, sample_rate, self.effect, muxer, encoder, self.codec_config, frames_per_chunk
+            )
+        output_sr = sample_rate if output_sample_rate is None else output_sample_rate
+        filter_desc = _get_afilter_desc(output_sr, _get_sample_fmt(waveform.dtype), num_channels)
+        if self.pad_end:
+            filter_desc = f"{filter_desc},apad=whole_len={num_frames}"
+        reader = StreamReader(src, format=muxer, option=option)
+        reader.add_audio_stream(frames_per_chunk or -1, -1, filter_desc=filter_desc)
+        return reader
+    def apply(self, waveform: Tensor, sample_rate: int, output_sample_rate: Optional[int] = None) -> Tensor:
+        """Apply the effect and/or codecs to the whole tensor.
+        Args:
+            waveform (Tensor): The input waveform. Shape: ``(time, channel)``
+            sample_rate (int): Sample rate of the input waveform.
+            output_sample_rate (int or None, optional): Output sample rate.
+                If provided, override the output sample rate.
+                Otherwise, the resulting tensor is resampled to have
+                the same sample rate as the input.
+                Default: ``None``.
+        Returns:
+            Tensor:
+                Resulting Tensor. Shape: ``(time, channel)``. The number of frames
+                could be different from that of the input.
+        """
+        if waveform.ndim != 2:
+            raise ValueError(f"Expected the input waveform to be 2D. Found: {waveform.ndim}")
+        if waveform.numel() == 0:
+            return waveform
+        reader = self._get_reader(waveform, sample_rate, output_sample_rate)
+        reader.process_all_packets()
+        (applied,) = reader.pop_chunks()
+        return Tensor(applied)
+    def stream(
+        self, waveform: Tensor, sample_rate: int, frames_per_chunk: int, output_sample_rate: Optional[int] = None
+    ) -> Iterator[Tensor]:
+        """Apply the effect and/or codecs to the given tensor chunk by chunk.
+        Args:
+            waveform (Tensor): The input waveform. Shape: ``(time, channel)``
+            sample_rate (int): Sample rate of the waveform.
+            frames_per_chunk (int): The number of frames to return at a time.
+            output_sample_rate (int or None, optional): Output sample rate.
+                If provided, override the output sample rate.
+                Otherwise, the resulting tensor is resampled to have
+                the same sample rate as the input.
+                Default: ``None``.
+        Returns:
+            Iterator[Tensor]:
+                Series of processed chunks. Shape: ``(time, channel)``, where the
+                the number of frames matches ``frames_per_chunk`` except the
+                last chunk, which could be shorter.
+        """
+        if waveform.ndim != 2:
+            raise ValueError(f"Expected the input waveform to be 2D. Found: {waveform.ndim}")
+        if waveform.numel() == 0:
+            return waveform
+        reader = self._get_reader(waveform, sample_rate, output_sample_rate, frames_per_chunk)
+        for (applied,) in reader.stream():
+            yield Tensor(applied)

.venv/lib/python3.11/site-packages/torchaudio/io/_playback.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import warnings
+from sys import platform
+from typing import Optional
+import torch
+import torchaudio
+dict_format = {
+    torch.uint8: "u8",
+    torch.int16: "s16",
+    torch.int32: "s32",
+    torch.int64: "s64",
+    torch.float32: "flt",
+    torch.float64: "dbl",
+}
+def play_audio(
+    waveform: torch.Tensor,
+    sample_rate: Optional[float],
+    device: Optional[str] = None,
+) -> None:
+    """Plays audio through specified or available output device.
+    .. warning::
+       This function is currently only supported on MacOS, and requires
+       libavdevice (FFmpeg) with ``audiotoolbox`` output device.
+    .. note::
+       This function can play up to two audio channels.
+    Args:
+        waveform: Tensor containing the audio to play.
+            Expected shape: `(time, num_channels)`.
+        sample_rate: Sample rate of the audio to play.
+        device: Output device to use. If None, the default device is used.
+    """
+    if platform == "darwin":
+        device = device or "audiotoolbox"
+        path = "-"
+    else:
+        raise ValueError(f"This function only supports MacOS, but current OS is {platform}")
+    available_devices = list(torchaudio.utils.ffmpeg_utils.get_output_devices().keys())
+    if device not in available_devices:
+        raise ValueError(f"Device {device} is not available. Available devices are: {available_devices}")
+    if waveform.dtype not in dict_format:
+        raise ValueError(f"Unsupported type {waveform.dtype}. The list of supported types is: {dict_format.keys()}")
+    format = dict_format[waveform.dtype]
+    if waveform.ndim != 2:
+        raise ValueError(f"Expected 2D tensor with shape `(time, num_channels)`, got {waveform.ndim}D tensor instead")
+    time, num_channels = waveform.size()
+    if num_channels > 2:
+        warnings.warn(
+            f"Expected up to 2 channels, got {num_channels} channels instead. "
+            "Only the first 2 channels will be played.",
+            stacklevel=2,
+        )
+    # Write to speaker device
+    s = torchaudio.io.StreamWriter(dst=path, format=device)
+    s.add_audio_stream(sample_rate, num_channels, format=format)
+    # write audio to the device
+    block_size = 256
+    with s.open():
+        for i in range(0, time, block_size):
+            s.write_audio_chunk(0, waveform[i : i + block_size, :])

.venv/lib/python3.11/site-packages/torchaudio/lib/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/torchaudio/lib/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (187 Bytes). View file

.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (2.25 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/_hdemucs.cpython-311.pyc ADDED Viewed

Binary file (51.3 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/conformer.cpython-311.pyc ADDED Viewed

Binary file (14.6 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/conv_tasnet.cpython-311.pyc ADDED Viewed

Binary file (15.1 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/deepspeech.cpython-311.pyc ADDED Viewed

Binary file (4.91 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/emformer.cpython-311.pyc ADDED Viewed

Binary file (49.5 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/rnnt.cpython-311.pyc ADDED Viewed

Binary file (41.9 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/rnnt_decoder.cpython-311.pyc ADDED Viewed

Binary file (20.6 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/tacotron2.cpython-311.pyc ADDED Viewed

Binary file (49.9 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/wav2letter.cpython-311.pyc ADDED Viewed

Binary file (4.41 kB). View file

.venv/lib/python3.11/site-packages/torchaudio/models/__pycache__/wavernn.cpython-311.pyc ADDED Viewed

Binary file (22.9 kB). View file