Shroukkkk commited on
Commit
e030059
·
verified ·
1 Parent(s): d2713b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -25,15 +25,26 @@ def synth(text):
25
  if not text:
26
  return None
27
 
28
- # Force integer token ids for embedding
29
- seq, seq_len = tacotron2.text_to_seq(text) # uses hparams.text_to_sequence internally :contentReference[oaicite:1]{index=1}
30
- seq = [int(x) for x in seq] # convert any float-like ids to int
 
 
 
 
 
 
 
31
  seq = torch.tensor(seq, dtype=torch.long, device=DEVICE).unsqueeze(0)
32
  seq_len = torch.tensor([seq_len], device=DEVICE)
33
-
34
  mel, _, _ = tacotron2.infer(seq, seq_len)
35
- wav = hifigan.decode_batch(mel)
36
 
 
 
 
 
 
37
  if wav.dim() == 3:
38
  wav = wav.squeeze(1)
39
  wav = wav[0].cpu()
 
25
  if not text:
26
  return None
27
 
28
+ seq, seq_len = tacotron2.text_to_seq(text)
29
+ seq = [int(x) for x in seq]
30
+
31
+ # Tacotron2 encoder conv needs enough timesteps
32
+ min_tokens = 5
33
+ pad_id = 0
34
+ if len(seq) < min_tokens:
35
+ seq = seq + [pad_id] * (min_tokens - len(seq))
36
+ seq_len = len(seq)
37
+
38
  seq = torch.tensor(seq, dtype=torch.long, device=DEVICE).unsqueeze(0)
39
  seq_len = torch.tensor([seq_len], device=DEVICE)
40
+
41
  mel, _, _ = tacotron2.infer(seq, seq_len)
 
42
 
43
+ # Optional: still keep mel padding for vocoder safety
44
+ if mel.shape[-1] < 5:
45
+ mel = F.pad(mel, (0, 5 - mel.shape[-1]), mode="replicate")
46
+
47
+ wav = hifigan.decode_batch(mel)
48
  if wav.dim() == 3:
49
  wav = wav.squeeze(1)
50
  wav = wav[0].cpu()