Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import warnings | |
| from logging import getLogger | |
| from pathlib import Path | |
| from typing import Iterable | |
| import librosa | |
| import soundfile | |
| from joblib import Parallel, delayed | |
| from tqdm_joblib import tqdm_joblib | |
| from .preprocess_utils import check_hubert_min_duration | |
| LOG = getLogger(__name__) | |
| # input_dir and output_dir exists. | |
| # write code to convert input dir audio files to output dir audio files, | |
| # without changing folder structure. Use joblib to parallelize. | |
| # Converting audio files includes: | |
| # - resampling to specified sampling rate | |
| # - trim silence | |
| # - adjust volume in a smart way | |
| # - save as 16-bit wav file | |
| def _get_unique_filename(path: Path, existing_paths: Iterable[Path]) -> Path: | |
| """Return a unique path by appending a number to the original path.""" | |
| if path not in existing_paths: | |
| return path | |
| i = 1 | |
| while True: | |
| new_path = path.parent / f"{path.stem}_{i}{path.suffix}" | |
| if new_path not in existing_paths: | |
| return new_path | |
| i += 1 | |
| def is_relative_to(path: Path, *other): | |
| """Return True if the path is relative to another path or False. | |
| Python 3.9+ has Path.is_relative_to() method, but we need to support Python 3.8. | |
| """ | |
| try: | |
| path.relative_to(*other) | |
| return True | |
| except ValueError: | |
| return False | |
| def _preprocess_one( | |
| input_path: Path, | |
| output_path: Path, | |
| sr: int, | |
| *, | |
| top_db: int, | |
| frame_seconds: float, | |
| hop_seconds: float, | |
| ) -> None: | |
| """Preprocess one audio file.""" | |
| try: | |
| audio, sr = librosa.load(input_path, sr=sr, mono=True) | |
| # Audioread is the last backend it will attempt, so this is the exception thrown on failure | |
| except Exception as e: | |
| # Failure due to attempting to load a file that is not audio, so return early | |
| LOG.warning(f"Failed to load {input_path} due to {e}") | |
| return | |
| if not check_hubert_min_duration(audio, sr): | |
| LOG.info(f"Skip {input_path} because it is too short.") | |
| return | |
| # Adjust volume | |
| audio /= max(audio.max(), -audio.min()) | |
| # Trim silence | |
| audio, _ = librosa.effects.trim( | |
| audio, | |
| top_db=top_db, | |
| frame_length=int(frame_seconds * sr), | |
| hop_length=int(hop_seconds * sr), | |
| ) | |
| if not check_hubert_min_duration(audio, sr): | |
| LOG.info(f"Skip {input_path} because it is too short.") | |
| return | |
| soundfile.write(output_path, audio, samplerate=sr, subtype="PCM_16") | |
| def preprocess_resample( | |
| input_dir: Path | str, | |
| output_dir: Path | str, | |
| sampling_rate: int, | |
| n_jobs: int = -1, | |
| *, | |
| top_db: int = 30, | |
| frame_seconds: float = 0.1, | |
| hop_seconds: float = 0.05, | |
| ) -> None: | |
| input_dir = Path(input_dir) | |
| output_dir = Path(output_dir) | |
| """Preprocess audio files in input_dir and save them to output_dir.""" | |
| out_paths = [] | |
| in_paths = list(input_dir.rglob("*.*")) | |
| if not in_paths: | |
| raise ValueError(f"No audio files found in {input_dir}") | |
| for in_path in in_paths: | |
| in_path_relative = in_path.relative_to(input_dir) | |
| if not in_path.is_absolute() and is_relative_to( | |
| in_path, Path("dataset_raw") / "44k" | |
| ): | |
| new_in_path_relative = in_path_relative.relative_to("44k") | |
| warnings.warn( | |
| f"Recommended folder structure has changed since v1.0.0. " | |
| "Please move your dataset directly under dataset_raw folder. " | |
| f"Recoginzed {in_path_relative} as {new_in_path_relative}" | |
| ) | |
| in_path_relative = new_in_path_relative | |
| if len(in_path_relative.parts) < 2: | |
| continue | |
| speaker_name = in_path_relative.parts[0] | |
| file_name = in_path_relative.with_suffix(".wav").name | |
| out_path = output_dir / speaker_name / file_name | |
| out_path = _get_unique_filename(out_path, out_paths) | |
| out_path.parent.mkdir(parents=True, exist_ok=True) | |
| out_paths.append(out_path) | |
| in_and_out_paths = list(zip(in_paths, out_paths)) | |
| with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)): | |
| Parallel(n_jobs=n_jobs)( | |
| delayed(_preprocess_one)( | |
| *args, | |
| sr=sampling_rate, | |
| top_db=top_db, | |
| frame_seconds=frame_seconds, | |
| hop_seconds=hop_seconds, | |
| ) | |
| for args in in_and_out_paths | |
| ) | |