| import gradio as gr |
| import torch |
| from transformers import AutoProcessor, AutoModel |
| import numpy as np |
|
|
| |
| model_id = "facebook/mms-tts" |
| processor = AutoProcessor.from_pretrained(model_id) |
| model = AutoModel.from_pretrained(model_id) |
|
|
| LANGUAGES = { |
| "English": "eng", |
| "French": "fra", |
| "Spanish": "spa" |
| } |
|
|
| SPEAKERS = { |
| "Male": 0, |
| "Female": 1 |
| } |
|
|
| def text_to_speech(text, language, speaker_gender, speed): |
| try: |
| |
| inputs = processor( |
| text=text, |
| language=LANGUAGES[language], |
| return_tensors="pt", |
| ) |
| |
| |
| with torch.no_grad(): |
| output = model.generate( |
| **inputs, |
| speaker_id=torch.tensor([SPEAKERS[speaker_gender]]), |
| speed_ratios=torch.tensor([speed]) |
| ) |
| |
| |
| speech = output[0].cpu().numpy() |
| sampling_rate = model.config.sampling_rate |
| |
| return (sampling_rate, speech) |
| except Exception as e: |
| return None |
|
|
| |
| def create_interface(): |
| with gr.Blocks(theme=gr.themes.Soft( |
| primary_hue="blue", |
| secondary_hue="gray", |
| )) as demo: |
| gr.Markdown( |
| """ |
| # 🎙️ Multilingual Text-to-Speech |
| Convert text to natural-sounding speech in multiple languages. |
| """ |
| ) |
| |
| with gr.Row(): |
| with gr.Column(): |
| text_input = gr.Textbox( |
| label="Enter Text", |
| placeholder="Type your text here...", |
| lines=5 |
| ) |
| language = gr.Dropdown( |
| choices=list(LANGUAGES.keys()), |
| value="English", |
| label="Language" |
| ) |
| speaker = gr.Radio( |
| choices=list(SPEAKERS.keys()), |
| value="Male", |
| label="Speaker Gender" |
| ) |
| speed = gr.Slider( |
| minimum=0.5, |
| maximum=2.0, |
| value=1.0, |
| step=0.1, |
| label="Speech Speed" |
| ) |
| submit_btn = gr.Button("Generate Speech", variant="primary") |
| |
| with gr.Column(): |
| audio_output = gr.Audio( |
| label="Generated Speech", |
| type="numpy" |
| ) |
| |
| submit_btn.click( |
| fn=text_to_speech, |
| inputs=[text_input, language, speaker, speed], |
| outputs=audio_output |
| ) |
| |
| gr.Markdown( |
| """ |
| ### Features: |
| - Support for English, French, and Spanish |
| - Male and Female voice options |
| - Adjustable speech speed |
| - High-quality, natural-sounding voices |
| """ |
| ) |
| |
| return demo |
|
|
| demo = create_interface() |
| demo.launch() |
|
|