Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from parler_tts import ParlerTTSForConditionalGeneration | |
| from transformers import AutoTokenizer | |
| import soundfile as sf | |
| import numpy as np | |
| import os | |
| # Set device (GPU if available, else CPU) | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| # Load Indic Parler-TTS model and tokenizer | |
| model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts").to(device) | |
| tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts") | |
| # Supported languages (Indic Parler-TTS officially supports these) | |
| languages = [ | |
| "Assamese", "Bengali", "Bodo", "Dogri", "English", "Gujarati", "Hindi", | |
| "Kannada", "Konkani", "Maithili", "Malayalam", "Manipuri", "Marathi", | |
| "Nepali", "Odia", "Sanskrit", "Santali", "Sindhi", "Tamil", "Telugu", "Urdu" | |
| ] | |
| def generate_speech(text, language, voice_description): | |
| """ | |
| Generate speech from text, language, and voice description. | |
| Returns the path to the generated audio file. | |
| """ | |
| if not text.strip(): | |
| return None, "Error: Text input cannot be empty." | |
| if language not in languages: | |
| return None, f"Error: Language '{language}' is not supported. Choose from: {', '.join(languages)}" | |
| # Combine voice description with language context (optional, for better control) | |
| description = f"A speaker delivering speech in {language}. {voice_description}" | |
| # Tokenize inputs | |
| input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device) | |
| prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device) | |
| # Generate audio | |
| try: | |
| generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids) | |
| audio_arr = generation.cpu().numpy().squeeze() | |
| # Save audio to a temporary file | |
| output_file = "output.wav" | |
| sf.write(output_file, audio_arr, model.config.sampling_rate) | |
| return output_file, None | |
| except Exception as e: | |
| return None, f"Error generating audio: {str(e)}" | |
| # Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Indic Parler-TTS: Text-to-Speech") | |
| gr.Markdown("Enter text, select a language, and describe the voice to generate audio. Download the audio output.") | |
| with gr.Row(): | |
| text_input = gr.Textbox(label="Input Text", placeholder="Enter text to convert to speech...") | |
| language_input = gr.Dropdown(label="Language", choices=languages, value="English") | |
| voice_description = gr.Textbox( | |
| label="Voice Description", | |
| placeholder="E.g., A female speaker with a clear, cheerful tone and moderate pace.", | |
| value="A neutral speaker with clear audio quality." | |
| ) | |
| generate_btn = gr.Button("Generate Audio") | |
| audio_output = gr.Audio(label="Generated Audio", type="filepath", interactive=False) | |
| error_output = gr.Textbox(label="Status/Error", visible=True, interactive=False) | |
| # Connect button to function | |
| generate_btn.click( | |
| fn=generate_speech, | |
| inputs=[text_input, language_input, voice_description], | |
| outputs=[audio_output, error_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |