Spaces:
Runtime error
Runtime error
| import os | |
| import scipy | |
| import nltk | |
| import tempfile | |
| import numpy as np | |
| from bark.generation import preload_models, SAMPLE_RATE | |
| from bark import generate_audio | |
| from scipy.io import wavfile | |
| import gradio as gr | |
| nltk.download('punkt') | |
| os.environ["CUDA_VISIBLE_DEVICES"] = "0" | |
| preload_models() | |
| def generate_audio_from_text(text,language_prompt,speaker_prompt): | |
| if language_prompt == "english": | |
| if speaker_prompt=="speaker 1": | |
| history_prompt = "v2/en_speaker_0" | |
| elif speaker_prompt=="speaker 2": | |
| history_prompt = "v2/en_speaker_1" | |
| elif speaker_prompt=="speaker 3": | |
| history_prompt = "v2/en_speaker_2" | |
| elif speaker_prompt=="speaker 4": | |
| history_prompt = "v2/en_speaker_3" | |
| elif speaker_prompt=="speaker 5": | |
| history_prompt = "v2/en_speaker_4" | |
| elif speaker_prompt=="speaker 6": | |
| history_prompt = "v2/en_speaker_5" | |
| elif speaker_prompt=="speaker 7": | |
| history_prompt = "v2/en_speaker_6" | |
| elif speaker_prompt=="speaker 8": | |
| history_prompt = "v2/en_speaker_7" | |
| elif speaker_prompt=="speaker 9": | |
| history_prompt = "v2/en_speaker_8" | |
| elif speaker_prompt=="speaker 10": | |
| history_prompt = "v2/en_speaker_9" | |
| else: | |
| history_prompt = "v2/en_speaker_9" | |
| elif language_prompt == "french": | |
| if speaker_prompt=="speaker 1": | |
| history_prompt = "v2/fr_speaker_0" | |
| elif speaker_prompt=="speaker 2": | |
| history_prompt = "v2/fr_speaker_1" | |
| elif speaker_prompt=="speaker 3": | |
| history_prompt = "v2/fr_speaker_2" | |
| elif speaker_prompt=="speaker 4": | |
| history_prompt = "v2/fr_speaker_3" | |
| elif speaker_prompt=="speaker 5": | |
| history_prompt = "v2/fr_speaker_4" | |
| elif speaker_prompt=="speaker 6": | |
| history_prompt = "v2/fr_speaker_5" | |
| elif speaker_prompt=="speaker 7": | |
| history_prompt = "v2/fr_speaker_6" | |
| elif speaker_prompt=="speaker 8": | |
| history_prompt = "v2/fr_speaker_7" | |
| elif speaker_prompt=="speaker 9": | |
| history_prompt = "v2/fr_speaker_8" | |
| elif speaker_prompt=="speaker 10": | |
| history_prompt = "v2/fr_speaker_9" | |
| else: | |
| history_prompt = "v2/fr_speaker_9" | |
| elif language_prompt =="german": | |
| if speaker_prompt=="speaker 1": | |
| history_prompt = "v2/de_speaker_0" | |
| elif speaker_prompt=="speaker 2": | |
| history_prompt = "v2/de_speaker_1" | |
| elif speaker_prompt=="speaker 3": | |
| history_prompt = "v2/de_speaker_2" | |
| elif speaker_prompt=="speaker 4": | |
| history_prompt = "v2/de_speaker_3" | |
| elif speaker_prompt=="speaker 5": | |
| history_prompt = "v2/de_speaker_4" | |
| elif speaker_prompt=="speaker 6": | |
| history_prompt = "v2/de_speaker_5" | |
| elif speaker_prompt=="speaker 7": | |
| history_prompt = "v2/de_speaker_6" | |
| elif speaker_prompt=="speaker 8": | |
| history_prompt = "v2/de_speaker_7" | |
| elif speaker_prompt=="speaker 9": | |
| history_prompt = "v2/de_speaker_8" | |
| elif speaker_prompt=="speaker 10": | |
| history_prompt = "v2/de_speaker_9" | |
| else: | |
| history_prompt = "v2/de_speaker_9" | |
| elif language_prompt =="hindi": | |
| if speaker_prompt=="speaker 1": | |
| history_prompt = "v2/hi_speaker_0" | |
| elif speaker_prompt=="speaker 2": | |
| history_prompt = "v2/hi_speaker_1" | |
| elif speaker_prompt=="speaker 3": | |
| history_prompt = "v2/hi_speaker_2" | |
| elif speaker_prompt=="speaker 4": | |
| history_prompt = "v2/hi_speaker_3" | |
| elif speaker_prompt=="speaker 5": | |
| history_prompt = "v2/hi_speaker_4" | |
| elif speaker_prompt=="speaker 6": | |
| history_prompt = "v2/hi_speaker_5" | |
| elif speaker_prompt=="speaker 7": | |
| history_prompt = "v2/hi_speaker_6" | |
| elif speaker_prompt=="speaker 8": | |
| history_prompt = "v2/hi_speaker_7" | |
| elif speaker_prompt=="speaker 9": | |
| history_prompt = "v2/hi_speaker_8" | |
| elif speaker_prompt=="speaker 10": | |
| history_prompt = "v2/hi_speaker_9" | |
| else: | |
| history_prompt = "v2/hi_speaker_9" | |
| elif language_prompt =="chinese": | |
| if speaker_prompt=="speaker 1": | |
| history_prompt = "v2/zh_speaker_0" | |
| elif speaker_prompt=="speaker 2": | |
| history_prompt = "v2/zh_speaker_1" | |
| elif speaker_prompt=="speaker 3": | |
| history_prompt = "v2/zh_speaker_2" | |
| elif speaker_prompt=="speaker 4": | |
| history_prompt = "v2/zh_speaker_3" | |
| elif speaker_prompt=="speaker 5": | |
| history_prompt = "v2/zh_speaker_4" | |
| elif speaker_prompt=="speaker 6": | |
| history_prompt = "v2/zh_speaker_5" | |
| elif speaker_prompt=="speaker 7": | |
| history_prompt = "v2/zh_speaker_6" | |
| elif speaker_prompt=="speaker 8": | |
| history_prompt = "v2/zh_speaker_7" | |
| elif speaker_prompt=="speaker 9": | |
| history_prompt = "v2/zh_speaker_8" | |
| elif speaker_prompt=="speaker 10": | |
| history_prompt = "v2/zh_speaker_9" | |
| else: | |
| history_prompt = "v2/zh_speaker_9" | |
| elif language_prompt =="italian": | |
| if speaker_prompt=="speaker 1": | |
| history_prompt = "v2/it_speaker_0" | |
| elif speaker_prompt=="speaker 2": | |
| history_prompt = "v2/it_speaker_1" | |
| elif speaker_prompt=="speaker 3": | |
| history_prompt = "v2/it_speaker_2" | |
| elif speaker_prompt=="speaker 4": | |
| history_prompt = "v2/it_speaker_3" | |
| elif speaker_prompt=="speaker 5": | |
| history_prompt = "v2/it_speaker_4" | |
| elif speaker_prompt=="speaker 6": | |
| history_prompt = "v2/it_speaker_5" | |
| elif speaker_prompt=="speaker 7": | |
| history_prompt = "v2/it_speaker_6" | |
| elif speaker_prompt=="speaker 8": | |
| history_prompt = "v2/it_speaker_7" | |
| elif speaker_prompt=="speaker 9": | |
| history_prompt = "v2/it_speaker_8" | |
| elif speaker_prompt=="speaker 10": | |
| history_prompt = "v2/it_speaker_9" | |
| else: | |
| history_prompt = "v2/it_speaker_9" | |
| elif language_prompt =="japanese": | |
| if speaker_prompt=="speaker 1": | |
| history_prompt = "v2/ja_speaker_0" | |
| elif speaker_prompt=="speaker 2": | |
| history_prompt = "v2/ja_speaker_1" | |
| elif speaker_prompt=="speaker 3": | |
| history_prompt = "v2/ja_speaker_2" | |
| elif speaker_prompt=="speaker 4": | |
| history_prompt = "v2/ja_speaker_3" | |
| elif speaker_prompt=="speaker 5": | |
| history_prompt = "v2/ja_speaker_4" | |
| elif speaker_prompt=="speaker 6": | |
| history_prompt = "v2/ja_speaker_5" | |
| elif speaker_prompt=="speaker 7": | |
| history_prompt = "v2/ja_speaker_6" | |
| elif speaker_prompt=="speaker 8": | |
| history_prompt = "v2/ja_speaker_7" | |
| elif speaker_prompt=="speaker 9": | |
| history_prompt = "v2/ja_speaker_8" | |
| elif speaker_prompt=="speaker 10": | |
| history_prompt = "v2/ja_speaker_9" | |
| else: | |
| history_prompt = "v2/ja_speaker_9" | |
| else: | |
| raise ValueError("Invalid language or gender selection") | |
| sentences = nltk.sent_tokenize(text) | |
| silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence | |
| pieces = [] | |
| for sentence in sentences: | |
| audio_array = generate_audio(sentence, history_prompt=history_prompt) | |
| pieces += [audio_array] | |
| # Concatenate the audio pieces | |
| final_audio = np.concatenate(pieces) | |
| # Save the audio to a WAV file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav: | |
| wavfile.write(temp_wav.name, SAMPLE_RATE, final_audio) | |
| # Return the saved audio file | |
| return temp_wav.name | |
| # Define lists of language and gender options | |
| language_options = [ | |
| "english", | |
| "french", | |
| "german", | |
| "hindi", | |
| "chinese", | |
| "italian", | |
| "japanese", | |
| ] | |
| speaker_options=[ | |
| "speaker 1", | |
| "speaker 2", | |
| "speaker 3", | |
| "speaker 4", | |
| "speaker 5", | |
| "speaker 6", | |
| "speaker 7", | |
| "speaker 8", | |
| "speaker 9", | |
| "speaker 10", | |
| ] | |
| # Create a Gradio interface with text input and dropdown menus for language and gender | |
| iface = gr.Interface( | |
| fn=generate_audio_from_text, | |
| inputs=[ | |
| gr.Textbox(text="Enter text to convert to speech:"), | |
| gr.Dropdown(choices=language_options, label="Select language:"), | |
| gr.Dropdown(choices=speaker_options, label="Select speaker:"), | |
| ], | |
| outputs=gr.outputs.File(label="Download WAV File"), | |
| title="Text-to-Speech App Vertical Solution", | |
| timeout=300, | |
| ) | |
| # Launch the Gradio app with sharing enabled | |
| iface.launch(debug=True, enable_queue=True) | |