Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,20 +12,23 @@ model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parle
|
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
|
| 13 |
description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
|
| 14 |
|
| 15 |
-
def generate_audio(
|
| 16 |
"""
|
| 17 |
-
Generate synthesized speech audio based on the input
|
| 18 |
|
| 19 |
Args:
|
| 20 |
-
|
| 21 |
-
description (str): A description to guide the voice characteristics.
|
| 22 |
|
| 23 |
Returns:
|
| 24 |
tuple: A tuple containing the audio numpy array and the sampling rate.
|
| 25 |
"""
|
| 26 |
-
#
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# Generate the audio tensor using the model
|
| 31 |
generation = model.generate(
|
|
@@ -42,13 +45,10 @@ def generate_audio(prompt: str, description: str):
|
|
| 42 |
sampling_rate = model.config.sampling_rate
|
| 43 |
return (audio_arr, sampling_rate)
|
| 44 |
|
| 45 |
-
# Build the Gradio interface
|
| 46 |
iface = gr.Interface(
|
| 47 |
fn=generate_audio,
|
| 48 |
-
inputs=
|
| 49 |
-
gr.Textbox(label="Prompt", value="เค
เคฐเฅ, เคคเฅเคฎ เคเค เคเฅเคธเฅ เคนเฅ?"),
|
| 50 |
-
gr.Textbox(label="Description", value="Divya's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.")
|
| 51 |
-
],
|
| 52 |
outputs=gr.Audio(label="Generated Audio"),
|
| 53 |
title="Indic Parler TTS",
|
| 54 |
description="Generate synthesized speech using the Indic Parler TTS model from ai4bharat."
|
|
|
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
|
| 13 |
description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
|
| 14 |
|
| 15 |
+
def generate_audio(text: str):
|
| 16 |
"""
|
| 17 |
+
Generate synthesized speech audio based on the input text.
|
| 18 |
|
| 19 |
Args:
|
| 20 |
+
text (str): The text prompt to be spoken.
|
|
|
|
| 21 |
|
| 22 |
Returns:
|
| 23 |
tuple: A tuple containing the audio numpy array and the sampling rate.
|
| 24 |
"""
|
| 25 |
+
# Set a default voice description
|
| 26 |
+
default_description = ("Divya's voice is monotone yet slightly fast in delivery, with a very close recording "
|
| 27 |
+
"that almost has no background noise.")
|
| 28 |
+
|
| 29 |
+
# Tokenize the default description and the input text
|
| 30 |
+
description_tokens = description_tokenizer(default_description, return_tensors="pt").to(device)
|
| 31 |
+
prompt_tokens = tokenizer(text, return_tensors="pt").to(device)
|
| 32 |
|
| 33 |
# Generate the audio tensor using the model
|
| 34 |
generation = model.generate(
|
|
|
|
| 45 |
sampling_rate = model.config.sampling_rate
|
| 46 |
return (audio_arr, sampling_rate)
|
| 47 |
|
| 48 |
+
# Build the Gradio interface with a single text input
|
| 49 |
iface = gr.Interface(
|
| 50 |
fn=generate_audio,
|
| 51 |
+
inputs=gr.Textbox(label="Enter Text", value="เค
เคฐเฅ, เคคเฅเคฎ เคเค เคเฅเคธเฅ เคนเฅ?"),
|
|
|
|
|
|
|
|
|
|
| 52 |
outputs=gr.Audio(label="Generated Audio"),
|
| 53 |
title="Indic Parler TTS",
|
| 54 |
description="Generate synthesized speech using the Indic Parler TTS model from ai4bharat."
|