Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -116,22 +116,21 @@ def text_to_speech(text, audio_file=None):
|
|
| 116 |
# Normalize the input text
|
| 117 |
normalized_text = normalize_text(text)
|
| 118 |
|
|
|
|
| 119 |
inputs = processor(text=normalized_text, return_tensors="pt").to(device)
|
| 120 |
|
|
|
|
| 121 |
speaker_embeddings = default_embedding
|
| 122 |
|
| 123 |
# Generate speech
|
| 124 |
-
|
|
|
|
| 125 |
|
| 126 |
# Convert the generated speech to numpy array format
|
| 127 |
speech_np = speech.cpu().numpy()
|
| 128 |
|
| 129 |
-
# Write the output to a temporary file
|
| 130 |
-
output_file = "output.wav"
|
| 131 |
-
sf.write(output_file, speech_np, samplerate=16000)
|
| 132 |
-
|
| 133 |
# Return the numpy array and the sample rate
|
| 134 |
-
return speech_np, 16000
|
| 135 |
|
| 136 |
iface = gr.Interface(
|
| 137 |
fn=text_to_speech,
|
|
@@ -145,4 +144,4 @@ iface = gr.Interface(
|
|
| 145 |
description="Enter Turkish text, optionally upload a short audio sample of the target speaker, and listen to the generated speech using the fine-tuned SpeechT5 model."
|
| 146 |
)
|
| 147 |
|
| 148 |
-
iface.launch(share=True)
|
|
|
|
| 116 |
# Normalize the input text
|
| 117 |
normalized_text = normalize_text(text)
|
| 118 |
|
| 119 |
+
# Prepare the input for the model
|
| 120 |
inputs = processor(text=normalized_text, return_tensors="pt").to(device)
|
| 121 |
|
| 122 |
+
# Use the default speaker embedding
|
| 123 |
speaker_embeddings = default_embedding
|
| 124 |
|
| 125 |
# Generate speech
|
| 126 |
+
with torch.no_grad():
|
| 127 |
+
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
|
| 128 |
|
| 129 |
# Convert the generated speech to numpy array format
|
| 130 |
speech_np = speech.cpu().numpy()
|
| 131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
# Return the numpy array and the sample rate
|
| 133 |
+
return (speech_np, 16000)
|
| 134 |
|
| 135 |
iface = gr.Interface(
|
| 136 |
fn=text_to_speech,
|
|
|
|
| 144 |
description="Enter Turkish text, optionally upload a short audio sample of the target speaker, and listen to the generated speech using the fine-tuned SpeechT5 model."
|
| 145 |
)
|
| 146 |
|
| 147 |
+
iface.launch(share=True)
|