Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,15 +25,26 @@ def synth(text):
|
|
| 25 |
if not text:
|
| 26 |
return None
|
| 27 |
|
| 28 |
-
|
| 29 |
-
seq
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
seq = torch.tensor(seq, dtype=torch.long, device=DEVICE).unsqueeze(0)
|
| 32 |
seq_len = torch.tensor([seq_len], device=DEVICE)
|
| 33 |
-
|
| 34 |
mel, _, _ = tacotron2.infer(seq, seq_len)
|
| 35 |
-
wav = hifigan.decode_batch(mel)
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
if wav.dim() == 3:
|
| 38 |
wav = wav.squeeze(1)
|
| 39 |
wav = wav[0].cpu()
|
|
|
|
| 25 |
if not text:
|
| 26 |
return None
|
| 27 |
|
| 28 |
+
seq, seq_len = tacotron2.text_to_seq(text)
|
| 29 |
+
seq = [int(x) for x in seq]
|
| 30 |
+
|
| 31 |
+
# Tacotron2 encoder conv needs enough timesteps
|
| 32 |
+
min_tokens = 5
|
| 33 |
+
pad_id = 0
|
| 34 |
+
if len(seq) < min_tokens:
|
| 35 |
+
seq = seq + [pad_id] * (min_tokens - len(seq))
|
| 36 |
+
seq_len = len(seq)
|
| 37 |
+
|
| 38 |
seq = torch.tensor(seq, dtype=torch.long, device=DEVICE).unsqueeze(0)
|
| 39 |
seq_len = torch.tensor([seq_len], device=DEVICE)
|
| 40 |
+
|
| 41 |
mel, _, _ = tacotron2.infer(seq, seq_len)
|
|
|
|
| 42 |
|
| 43 |
+
# Optional: still keep mel padding for vocoder safety
|
| 44 |
+
if mel.shape[-1] < 5:
|
| 45 |
+
mel = F.pad(mel, (0, 5 - mel.shape[-1]), mode="replicate")
|
| 46 |
+
|
| 47 |
+
wav = hifigan.decode_batch(mel)
|
| 48 |
if wav.dim() == 3:
|
| 49 |
wav = wav.squeeze(1)
|
| 50 |
wav = wav[0].cpu()
|