Spaces:

ganga4364
/

tts-test

Sleeping

App Files Files Community

tts-test / app.py

ganga4364

Create app.py

b5ca2f8 verified 4 months ago

raw

history blame contribute delete

1.97 kB

	import gradio as gr
	from transformers import pipeline
	import scipy.io.wavfile
	import numpy as np
	import os
	import time

	# Load the MMS-TTS model for Tibetan
	model_id = "ganga4364/mms-tts-bod-female"
	synthesiser = pipeline("text-to-speech", model_id)

	# Create output directory
	os.makedirs("outputs", exist_ok=True)

	def generate_audio(input_text):
	if not input_text or not input_text.strip():
	return None, None

	# Perform TTS inference
	speech = synthesiser(input_text)
	audio_data = speech["audio"][0]
	sample_rate = speech["sampling_rate"]

	# Normalize audio to 16-bit PCM
	audio_data = (audio_data / np.max(np.abs(audio_data)) * 32767).astype(np.int16)

	# Create unique filename with timestamp
	timestamp = int(time.time())
	file_path = f"outputs/tibetan_tts_{timestamp}.wav"

	# Save the audio file
	scipy.io.wavfile.write(file_path, rate=sample_rate, data=audio_data)

	# Return path for both audio player and download
	return file_path, file_path

	# Create Gradio interface with download button
	with gr.Blocks(title="Tibetan TTS") as demo:
	gr.Markdown("# 🎙️ Tibetan Text-to-Speech")
	gr.Markdown("Enter Tibetan text to generate speech audio with downloadable output.")

	with gr.Row():
	text_input = gr.Textbox(
	label="Tibetan Text",
	placeholder="བཀྲ་ཤིས་བདེ་ལེགས།",
	lines=3
	)

	generate_btn = gr.Button("🔊 Generate Audio", variant="primary")

	with gr.Row():
	audio_output = gr.Audio(label="🎧 Listen to Audio", type="filepath")

	with gr.Row():
	download_output = gr.File(label="📥 Download Audio File")

	generate_btn.click(
	fn=generate_audio,
	inputs=text_input,
	outputs=[audio_output, download_output]
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860, share=True)