| import gradio as gr |
| from transformers import pipeline |
| import scipy.io.wavfile |
| import numpy as np |
| import os |
| import time |
|
|
| |
| model_id = "ganga4364/mms-tts-bod-female" |
| synthesiser = pipeline("text-to-speech", model_id) |
|
|
| |
| os.makedirs("outputs", exist_ok=True) |
|
|
| def generate_audio(input_text): |
| if not input_text or not input_text.strip(): |
| return None, None |
| |
| |
| speech = synthesiser(input_text) |
| audio_data = speech["audio"][0] |
| sample_rate = speech["sampling_rate"] |
| |
| |
| audio_data = (audio_data / np.max(np.abs(audio_data)) * 32767).astype(np.int16) |
| |
| |
| timestamp = int(time.time()) |
| file_path = f"outputs/tibetan_tts_{timestamp}.wav" |
| |
| |
| scipy.io.wavfile.write(file_path, rate=sample_rate, data=audio_data) |
| |
| |
| return file_path, file_path |
|
|
| |
| with gr.Blocks(title="Tibetan TTS") as demo: |
| gr.Markdown("# 🎙️ Tibetan Text-to-Speech") |
| gr.Markdown("Enter Tibetan text to generate speech audio with downloadable output.") |
| |
| with gr.Row(): |
| text_input = gr.Textbox( |
| label="Tibetan Text", |
| placeholder="བཀྲ་ཤིས་བདེ་ལེགས།", |
| lines=3 |
| ) |
| |
| generate_btn = gr.Button("🔊 Generate Audio", variant="primary") |
| |
| with gr.Row(): |
| audio_output = gr.Audio(label="🎧 Listen to Audio", type="filepath") |
| |
| with gr.Row(): |
| download_output = gr.File(label="📥 Download Audio File") |
| |
| generate_btn.click( |
| fn=generate_audio, |
| inputs=text_input, |
| outputs=[audio_output, download_output] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=7860, share=True) |
|
|