Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,16 +16,18 @@ vocoder = HIFIGAN.from_hparams(
|
|
| 16 |
)
|
| 17 |
|
| 18 |
def generate_wav(text):
|
| 19 |
-
# Generate mel spectrogram
|
| 20 |
mel_output, mel_length, alignment = taco.encode_text(text)
|
| 21 |
|
| 22 |
-
# Slow down speech by stretching
|
|
|
|
| 23 |
mel_output = torch.nn.functional.interpolate(
|
| 24 |
-
mel_output
|
| 25 |
-
scale_factor=1.25, # 1.1 = slightly slower, 1.25 = calm
|
| 26 |
mode="linear",
|
| 27 |
align_corners=False
|
| 28 |
-
)
|
|
|
|
| 29 |
|
| 30 |
# Smooth mel for more natural prosody
|
| 31 |
mel_output = mel_output * 0.9
|
|
|
|
| 16 |
)
|
| 17 |
|
| 18 |
def generate_wav(text):
|
| 19 |
+
# Generate mel spectrogram
|
| 20 |
mel_output, mel_length, alignment = taco.encode_text(text)
|
| 21 |
|
| 22 |
+
# Slow down speech by stretching ONLY the time dimension
|
| 23 |
+
mel_output = mel_output.permute(0, 2, 1) # [1, 80, T]
|
| 24 |
mel_output = torch.nn.functional.interpolate(
|
| 25 |
+
mel_output,
|
| 26 |
+
scale_factor=1.25, # 1.1 = slightly slower, 1.25 = calm
|
| 27 |
mode="linear",
|
| 28 |
align_corners=False
|
| 29 |
+
)
|
| 30 |
+
mel_output = mel_output.permute(0, 2, 1) # back to [1, T, 80]
|
| 31 |
|
| 32 |
# Smooth mel for more natural prosody
|
| 33 |
mel_output = mel_output * 0.9
|