Update app.py
Browse files
app.py
CHANGED
|
@@ -28,23 +28,21 @@ def tts_generate(text):
|
|
| 28 |
inputs = processor(text=text, return_tensors="pt").to(device)
|
| 29 |
print("✅ Text processed.")
|
| 30 |
|
| 31 |
-
# Generate
|
| 32 |
-
print("🎤 Generating speech...")
|
| 33 |
with torch.no_grad():
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
waveform = vocoder(mel)
|
| 40 |
-
waveform = waveform.cpu()
|
| 41 |
print("✅ Waveform generated.")
|
| 42 |
|
| 43 |
# Save waveform
|
| 44 |
output_path = "output.wav"
|
| 45 |
if waveform.dim() == 1:
|
| 46 |
-
waveform = waveform.unsqueeze(0)
|
| 47 |
-
torchaudio.save(output_path, waveform, sample_rate=16000)
|
| 48 |
print(f"💾 Audio saved to {output_path}")
|
| 49 |
|
| 50 |
return output_path
|
|
|
|
| 28 |
inputs = processor(text=text, return_tensors="pt").to(device)
|
| 29 |
print("✅ Text processed.")
|
| 30 |
|
| 31 |
+
# Generate waveform directly (with vocoder)
|
| 32 |
+
print("🎤 Generating speech waveform...")
|
| 33 |
with torch.no_grad():
|
| 34 |
+
waveform = model.generate_speech(
|
| 35 |
+
inputs["input_ids"],
|
| 36 |
+
speaker_embedding,
|
| 37 |
+
vocoder=vocoder
|
| 38 |
+
)
|
|
|
|
|
|
|
| 39 |
print("✅ Waveform generated.")
|
| 40 |
|
| 41 |
# Save waveform
|
| 42 |
output_path = "output.wav"
|
| 43 |
if waveform.dim() == 1:
|
| 44 |
+
waveform = waveform.unsqueeze(0)
|
| 45 |
+
torchaudio.save(output_path, waveform.cpu(), sample_rate=16000)
|
| 46 |
print(f"💾 Audio saved to {output_path}")
|
| 47 |
|
| 48 |
return output_path
|