Spaces:
Running
Running
| import numpy as np | |
| import torch | |
| import crepe | |
| ############################################################################### | |
| # Pitch thresholding methods | |
| ############################################################################### | |
| class At: | |
| """Simple thresholding at a specified probability value""" | |
| def __init__(self, value): | |
| self.value = value | |
| def __call__(self, pitch, periodicity): | |
| # Make a copy to prevent in-place modification | |
| pitch = torch.clone(pitch) | |
| # Threshold | |
| pitch[periodicity < self.value] = crepe.UNVOICED | |
| return pitch | |
| class Hysteresis: | |
| """Hysteresis thresholding""" | |
| def __init__(self, | |
| lower_bound=.19, | |
| upper_bound=.31, | |
| width=.2, | |
| stds=1.7, | |
| return_threshold=False): | |
| self.lower_bound = lower_bound | |
| self.upper_bound = upper_bound | |
| self.width = width | |
| self.stds = stds | |
| self.return_threshold = return_threshold | |
| def __call__(self, pitch, periodicity): | |
| # Save output device | |
| device = pitch.device | |
| # Perform hysteresis in log-2 space | |
| pitch = torch.log2(pitch).detach().flatten().cpu().numpy() | |
| # Flatten periodicity | |
| periodicity = periodicity.flatten().cpu().numpy() | |
| # Ignore confidently unvoiced pitch | |
| pitch[periodicity < self.lower_bound] = crepe.UNVOICED | |
| # Whiten pitch | |
| mean, std = np.nanmean(pitch), np.nanstd(pitch) | |
| pitch = (pitch - mean) / std | |
| # Require high confidence to make predictions far from the mean | |
| parabola = self.width * pitch ** 2 - self.width * self.stds ** 2 | |
| threshold = \ | |
| self.lower_bound + np.clip(parabola, 0, 1 - self.lower_bound) | |
| threshold[np.isnan(threshold)] = self.lower_bound | |
| # Apply hysteresis to prevent short, unconfident voiced regions | |
| i = 0 | |
| while i < len(periodicity) - 1: | |
| # Detect unvoiced to voiced transition | |
| if periodicity[i] < threshold[i] and \ | |
| periodicity[i + 1] > threshold[i + 1]: | |
| # Grow region until next unvoiced or end of array | |
| start, end, keep = i + 1, i + 1, False | |
| while end < len(periodicity) and \ | |
| periodicity[end] > threshold[end]: | |
| if periodicity[end] > self.upper_bound: | |
| keep = True | |
| end += 1 | |
| # Force unvoiced if we didn't pass the confidence required by | |
| # the hysteresis | |
| if not keep: | |
| threshold[start:end] = 1 | |
| i = end | |
| else: | |
| i += 1 | |
| # Remove pitch with low periodicity | |
| pitch[periodicity < threshold] = crepe.UNVOICED | |
| # Unwhiten | |
| pitch = pitch * std + mean | |
| # Convert to Hz | |
| pitch = torch.tensor(2 ** pitch, device=device)[None, :] | |
| # Optionally return threshold | |
| if self.return_threshold: | |
| return pitch, torch.tensor(threshold, device=device) | |
| return pitch | |
| ############################################################################### | |
| # Periodicity thresholding methods | |
| ############################################################################### | |
| class Silence: | |
| """Set periodicity to zero in silent regions""" | |
| def __init__(self, value=-60): | |
| self.value = value | |
| def __call__(self, | |
| periodicity, | |
| audio, | |
| sample_rate=crepe.SAMPLE_RATE, | |
| hop_length=None, | |
| pad=True): | |
| # Don't modify in-place | |
| periodicity = torch.clone(periodicity) | |
| # Compute loudness | |
| loudness = crepe.loudness.a_weighted( | |
| audio, sample_rate, hop_length, pad) | |
| # Threshold silence | |
| periodicity[loudness < self.value] = 0. | |
| return periodicity | |