Spaces:
Running
Running
| # ================================================================================================== | |
| # DEEPFAKE AUDIO - encoder/data_objects/speaker_verification_dataset.py (PyTorch Data Layer) | |
| # ================================================================================================== | |
| # | |
| # π DESCRIPTION | |
| # This module implements the PyTorch Dataset and DataLoader abstractions tailored | |
| # for Speaker Verification. It manages the discovery of speaker directories, | |
| # categorical sampling via RandomCycler, and high-performance batch collation. | |
| # | |
| # π€ AUTHORS | |
| # - Amey Thakur (https://github.com/Amey-Thakur) | |
| # - Mega Satish (https://github.com/msatmod) | |
| # | |
| # π€π» CREDITS | |
| # Original Real-Time Voice Cloning methodology by CorentinJ | |
| # Repository: https://github.com/CorentinJ/Real-Time-Voice-Cloning | |
| # | |
| # π PROJECT LINKS | |
| # Repository: https://github.com/Amey-Thakur/DEEPFAKE-AUDIO | |
| # Video Demo: https://youtu.be/i3wnBcbHDbs | |
| # Research: https://github.com/Amey-Thakur/DEEPFAKE-AUDIO/blob/main/DEEPFAKE-AUDIO.ipynb | |
| # | |
| # π LICENSE | |
| # Released under the MIT License | |
| # Release Date: 2021-02-06 | |
| # ================================================================================================== | |
| from encoder.data_objects.random_cycler import RandomCycler | |
| from encoder.data_objects.speaker_batch import SpeakerBatch | |
| from encoder.data_objects.speaker import Speaker | |
| from encoder.params_data import partials_n_frames | |
| from torch.utils.data import Dataset, DataLoader | |
| from pathlib import Path | |
| class SpeakerVerificationDataset(Dataset): | |
| """ | |
| Neural Corpus Interface: | |
| Scans a root directory for processed speaker identities and provides | |
| an infinite stochastic stream of categorical data. | |
| """ | |
| def __init__(self, datasets_root: Path): | |
| self.root = datasets_root | |
| # Identity Discovery | |
| speaker_dirs = [f for f in self.root.glob("*") if f.is_dir()] | |
| if len(speaker_dirs) == 0: | |
| raise Exception("β οΈ Technical Alert: No speakers detected in %s." % self.root) | |
| self.speakers = [Speaker(speaker_dir) for speaker_dir in speaker_dirs] | |
| self.speaker_cycler = RandomCycler(self.speakers) | |
| def __len__(self): | |
| """Returns a high constant to simulate an infinite stream for the DataLoader.""" | |
| return int(1e10) | |
| def __getitem__(self, index): | |
| """Retrieves the next stochastic categorical identity.""" | |
| return next(self.speaker_cycler) | |
| def get_logs(self): | |
| """Aggregates all preprocessing logs into a single analytical string.""" | |
| log_string = "" | |
| for log_fpath in self.root.glob("*.txt"): | |
| with log_fpath.open("r") as log_file: | |
| log_string += "".join(log_file.readlines()) | |
| return log_string | |
| class SpeakerVerificationDataLoader(DataLoader): | |
| """ | |
| High-Throughput Orchestrator: | |
| Custom DataLoader designed to yield SpeakerBatch objects containing | |
| diverse identities and utterances. | |
| """ | |
| def __init__(self, dataset, speakers_per_batch, utterances_per_speaker, sampler=None, | |
| batch_sampler=None, num_workers=0, pin_memory=False, timeout=0, | |
| worker_init_fn=None): | |
| self.utterances_per_speaker = utterances_per_speaker | |
| super().__init__( | |
| dataset=dataset, | |
| batch_size=speakers_per_batch, | |
| shuffle=False, | |
| sampler=sampler, | |
| batch_sampler=batch_sampler, | |
| num_workers=num_workers, | |
| collate_fn=self.collate, # Custom collation for GE2E loss | |
| pin_memory=pin_memory, | |
| drop_last=False, | |
| timeout=timeout, | |
| worker_init_fn=worker_init_fn | |
| ) | |
| def collate(self, speakers): | |
| """Constructs a SpeakerBatch from a set of sampled identities.""" | |
| return SpeakerBatch(speakers, self.utterances_per_speaker, partials_n_frames) | |