drixo
/

nanochatt

Model card Files Files and versions

nanochatt / venv /lib /python3.10 /site-packages /torchaudio /datasets /dr_vctk.py

drixo's picture

Upload folder using huggingface_hub

838f737 verified 6 months ago

history blame contribute delete

4.38 kB

	from pathlib import Path
	from typing import Dict, Tuple, Union

	import torchaudio
	from torch import Tensor
	from torch.utils.data import Dataset
	from torchaudio._internal import download_url_to_file
	from torchaudio.datasets.utils import _extract_zip


	_URL = "https://datashare.ed.ac.uk/bitstream/handle/10283/3038/DR-VCTK.zip"
	_CHECKSUM = "781f12f4406ed36ed27ae3bce55da47ba176e2d8bae67319e389e07b2c9bd769"
	_SUPPORTED_SUBSETS = {"train", "test"}


	class DR_VCTK(Dataset):
	"""Device Recorded VCTK (Small subset version) :cite:`Sarfjoo2018DeviceRV` dataset.

	Args:
	root (str or Path): Root directory where the dataset's top level directory is found.
	subset (str): The subset to use. Can be one of ``"train"`` and ``"test"``. (default: ``"train"``).
	download (bool):
	Whether to download the dataset if it is not found at root path. (default: ``False``).
	url (str): The URL to download the dataset from.
	(default: ``"https://datashare.ed.ac.uk/bitstream/handle/10283/3038/DR-VCTK.zip"``)
	"""

	def __init__(
	self,
	root: Union[str, Path],
	subset: str = "train",
	*,
	download: bool = False,
	url: str = _URL,
	) -> None:
	if subset not in _SUPPORTED_SUBSETS:
	raise RuntimeError(
	f"The subset '{subset}' does not match any of the supported subsets: {_SUPPORTED_SUBSETS}"
	)

	root = Path(root).expanduser()
	archive = root / "DR-VCTK.zip"

	self._subset = subset
	self._path = root / "DR-VCTK" / "DR-VCTK"
	self._clean_audio_dir = self._path / f"clean_{self._subset}set_wav_16k"
	self._noisy_audio_dir = self._path / f"device-recorded_{self._subset}set_wav_16k"
	self._config_filepath = self._path / "configurations" / f"{self._subset}_ch_log.txt"

	if not self._path.is_dir():
	if not archive.is_file():
	if not download:
	raise RuntimeError("Dataset not found. Please use `download=True` to download it.")
	download_url_to_file(url, archive, hash_prefix=_CHECKSUM)
	_extract_zip(archive, root)

	self._config = self._load_config(self._config_filepath)
	self._filename_list = sorted(self._config)

	def _load_config(self, filepath: str) -> Dict[str, Tuple[str, int]]:
	# Skip header
	skip_rows = 2 if self._subset == "train" else 1

	config = {}
	with open(filepath) as f:
	for i, line in enumerate(f):
	if i < skip_rows or not line:
	continue
	filename, source, channel_id = line.strip().split("\t")
	config[filename] = (source, int(channel_id))
	return config

	def _load_dr_vctk_item(self, filename: str) -> Tuple[Tensor, int, Tensor, int, str, str, str, int]:
	speaker_id, utterance_id = filename.split(".")[0].split("_")
	source, channel_id = self._config[filename]
	file_clean_audio = self._clean_audio_dir / filename
	file_noisy_audio = self._noisy_audio_dir / filename
	waveform_clean, sample_rate_clean = torchaudio.load(file_clean_audio)
	waveform_noisy, sample_rate_noisy = torchaudio.load(file_noisy_audio)
	return (
	waveform_clean,
	sample_rate_clean,
	waveform_noisy,
	sample_rate_noisy,
	speaker_id,
	utterance_id,
	source,
	channel_id,
	)

	def __getitem__(self, n: int) -> Tuple[Tensor, int, Tensor, int, str, str, str, int]:
	"""Load the n-th sample from the dataset.

	Args:
	n (int): The index of the sample to be loaded

	Returns:
	Tuple of the following items;

	Tensor:
	Clean waveform
	int:
	Sample rate of the clean waveform
	Tensor:
	Noisy waveform
	int:
	Sample rate of the noisy waveform
	str:
	Speaker ID
	str:
	Utterance ID
	str:
	Source
	int:
	Channel ID
	"""
	filename = self._filename_list[n]
	return self._load_dr_vctk_item(filename)

	def __len__(self) -> int:
	return len(self._filename_list)