nambn0321 commited on
Commit
2dc786b
·
verified ·
1 Parent(s): af16e48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -11
app.py CHANGED
@@ -28,23 +28,21 @@ def tts_generate(text):
28
  inputs = processor(text=text, return_tensors="pt").to(device)
29
  print("✅ Text processed.")
30
 
31
- # Generate mel spectrogram
32
- print("🎤 Generating speech...")
33
  with torch.no_grad():
34
- mel = model.generate_speech(inputs["input_ids"], speaker_embedding)
35
- print("✅ Mel spectrogram generated.")
36
-
37
- # Convert mel spectrogram to waveform
38
- print("🎚️ Vocoding waveform...")
39
- waveform = vocoder(mel)
40
- waveform = waveform.cpu()
41
  print("✅ Waveform generated.")
42
 
43
  # Save waveform
44
  output_path = "output.wav"
45
  if waveform.dim() == 1:
46
- waveform = waveform.unsqueeze(0)
47
- torchaudio.save(output_path, waveform, sample_rate=16000)
48
  print(f"💾 Audio saved to {output_path}")
49
 
50
  return output_path
 
28
  inputs = processor(text=text, return_tensors="pt").to(device)
29
  print("✅ Text processed.")
30
 
31
+ # Generate waveform directly (with vocoder)
32
+ print("🎤 Generating speech waveform...")
33
  with torch.no_grad():
34
+ waveform = model.generate_speech(
35
+ inputs["input_ids"],
36
+ speaker_embedding,
37
+ vocoder=vocoder
38
+ )
 
 
39
  print("✅ Waveform generated.")
40
 
41
  # Save waveform
42
  output_path = "output.wav"
43
  if waveform.dim() == 1:
44
+ waveform = waveform.unsqueeze(0)
45
+ torchaudio.save(output_path, waveform.cpu(), sample_rate=16000)
46
  print(f"💾 Audio saved to {output_path}")
47
 
48
  return output_path