tts-test / app.py
ganga4364's picture
Create app.py
b5ca2f8 verified
import gradio as gr
from transformers import pipeline
import scipy.io.wavfile
import numpy as np
import os
import time
# Load the MMS-TTS model for Tibetan
model_id = "ganga4364/mms-tts-bod-female"
synthesiser = pipeline("text-to-speech", model_id)
# Create output directory
os.makedirs("outputs", exist_ok=True)
def generate_audio(input_text):
if not input_text or not input_text.strip():
return None, None
# Perform TTS inference
speech = synthesiser(input_text)
audio_data = speech["audio"][0]
sample_rate = speech["sampling_rate"]
# Normalize audio to 16-bit PCM
audio_data = (audio_data / np.max(np.abs(audio_data)) * 32767).astype(np.int16)
# Create unique filename with timestamp
timestamp = int(time.time())
file_path = f"outputs/tibetan_tts_{timestamp}.wav"
# Save the audio file
scipy.io.wavfile.write(file_path, rate=sample_rate, data=audio_data)
# Return path for both audio player and download
return file_path, file_path
# Create Gradio interface with download button
with gr.Blocks(title="Tibetan TTS") as demo:
gr.Markdown("# 🎙️ Tibetan Text-to-Speech")
gr.Markdown("Enter Tibetan text to generate speech audio with downloadable output.")
with gr.Row():
text_input = gr.Textbox(
label="Tibetan Text",
placeholder="བཀྲ་ཤིས་བདེ་ལེགས།",
lines=3
)
generate_btn = gr.Button("🔊 Generate Audio", variant="primary")
with gr.Row():
audio_output = gr.Audio(label="🎧 Listen to Audio", type="filepath")
with gr.Row():
download_output = gr.File(label="📥 Download Audio File")
generate_btn.click(
fn=generate_audio,
inputs=text_input,
outputs=[audio_output, download_output]
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)