Spaces:
Sleeping
Sleeping
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -227,13 +227,27 @@ def compute_mel(y, sr=SAMPLE_RATE):
|
|
| 227 |
return mel
|
| 228 |
|
| 229 |
|
| 230 |
-
def
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
|
|
|
| 234 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
mel_power = torch.exp(mel)
|
| 236 |
-
spec =
|
|
|
|
|
|
|
| 237 |
gl = torchaudio.transforms.GriffinLim(n_fft=1024, hop_length=256, n_iter=n_iter)
|
| 238 |
audio = gl(spec)
|
| 239 |
return audio.detach().cpu().numpy() if np is not None else audio.detach().cpu().tolist()
|
|
|
|
| 227 |
return mel
|
| 228 |
|
| 229 |
|
| 230 |
+
def _get_mel_fb_pinv(sr=SAMPLE_RATE, n_mels=N_MELS):
|
| 231 |
+
"""Compute pseudo-inverse of mel filterbank (cached)."""
|
| 232 |
+
fb = torchaudio.functional.melscale_filterbanks(
|
| 233 |
+
n_freqs=513, f_min=0, f_max=float(sr // 2),
|
| 234 |
+
n_mels=n_mels, sample_rate=sr, norm=None, mel_scale="htk",
|
| 235 |
)
|
| 236 |
+
return torch.linalg.pinv(fb) # (513, n_mels)
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
_FB_PINV_CACHE = {}
|
| 240 |
+
|
| 241 |
+
def mel_to_audio_griffinlim(mel, sr=SAMPLE_RATE, n_iter=60):
|
| 242 |
+
key = (sr, mel.shape[0])
|
| 243 |
+
if key not in _FB_PINV_CACHE:
|
| 244 |
+
_FB_PINV_CACHE[key] = _get_mel_fb_pinv(sr=sr, n_mels=mel.shape[0])
|
| 245 |
+
fb_pinv = _FB_PINV_CACHE[key]
|
| 246 |
+
|
| 247 |
mel_power = torch.exp(mel)
|
| 248 |
+
spec = fb_pinv @ mel_power
|
| 249 |
+
spec = torch.clamp(spec, min=0)
|
| 250 |
+
|
| 251 |
gl = torchaudio.transforms.GriffinLim(n_fft=1024, hop_length=256, n_iter=n_iter)
|
| 252 |
audio = gl(spec)
|
| 253 |
return audio.detach().cpu().numpy() if np is not None else audio.detach().cpu().tolist()
|