Spaces:

NLPV
/

Maithli_TTS

Runtime error

App Files Files Community

NLPV commited on Mar 23, 2025

Commit

3e166ec

verified ·

1 Parent(s): 8c08a70

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -12

app.py CHANGED Viewed

@@ -12,20 +12,23 @@ model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parle
 tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
 description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
-def generate_audio(prompt: str, description: str):
     """
-    Generate synthesized speech audio based on the input prompt and description.
     Args:
-        prompt (str): The text prompt to be spoken.
-        description (str): A description to guide the voice characteristics.
     Returns:
         tuple: A tuple containing the audio numpy array and the sampling rate.
     """
-    # Tokenize inputs for the description and prompt
-    description_tokens = description_tokenizer(description, return_tensors="pt").to(device)
-    prompt_tokens = tokenizer(prompt, return_tensors="pt").to(device)
     # Generate the audio tensor using the model
     generation = model.generate(
@@ -42,13 +45,10 @@ def generate_audio(prompt: str, description: str):
     sampling_rate = model.config.sampling_rate
     return (audio_arr, sampling_rate)
-# Build the Gradio interface
 iface = gr.Interface(
     fn=generate_audio,
-    inputs=[
-        gr.Textbox(label="Prompt", value="अरे, तुम आज कैसे हो?"),
-        gr.Textbox(label="Description", value="Divya's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.")
-    ],
     outputs=gr.Audio(label="Generated Audio"),
     title="Indic Parler TTS",
     description="Generate synthesized speech using the Indic Parler TTS model from ai4bharat."

 tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
 description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
+def generate_audio(text: str):
     """
+    Generate synthesized speech audio based on the input text.
     Args:
+        text (str): The text prompt to be spoken.
     Returns:
         tuple: A tuple containing the audio numpy array and the sampling rate.
     """
+    # Set a default voice description
+    default_description = ("Divya's voice is monotone yet slightly fast in delivery, with a very close recording "
+                           "that almost has no background noise.")
+    # Tokenize the default description and the input text
+    description_tokens = description_tokenizer(default_description, return_tensors="pt").to(device)
+    prompt_tokens = tokenizer(text, return_tensors="pt").to(device)
     # Generate the audio tensor using the model
     generation = model.generate(
     sampling_rate = model.config.sampling_rate
     return (audio_arr, sampling_rate)
+# Build the Gradio interface with a single text input
 iface = gr.Interface(
     fn=generate_audio,
+    inputs=gr.Textbox(label="Enter Text", value="अरे, तुम आज कैसे हो?"),
     outputs=gr.Audio(label="Generated Audio"),
     title="Indic Parler TTS",
     description="Generate synthesized speech using the Indic Parler TTS model from ai4bharat."