Spaces:

KIMOSSINO
/

texttoson

Runtime error

App Files Files Community

texttoson / app.py

KIMOSSINO

Create app.py

41a5749 verified over 1 year ago

raw

history blame contribute delete

3.08 kB

	import gradio as gr
	import torch
	from transformers import AutoProcessor, AutoModel
	import numpy as np

	# Initialize models and processors
	model_id = "facebook/mms-tts"
	processor = AutoProcessor.from_pretrained(model_id)
	model = AutoModel.from_pretrained(model_id)

	LANGUAGES = {
	"English": "eng",
	"French": "fra",
	"Spanish": "spa"
	}

	SPEAKERS = {
	"Male": 0,
	"Female": 1
	}

	def text_to_speech(text, language, speaker_gender, speed):
	try:
	# Prepare inputs
	inputs = processor(
	text=text,
	language=LANGUAGES[language],
	return_tensors="pt",
	)

	# Generate speech
	with torch.no_grad():
	output = model.generate(
	**inputs,
	speaker_id=torch.tensor([SPEAKERS[speaker_gender]]),
	speed_ratios=torch.tensor([speed])
	)

	# Convert to waveform
	speech = output[0].cpu().numpy()
	sampling_rate = model.config.sampling_rate

	return (sampling_rate, speech)
	except Exception as e:
	return None

	# Create Gradio interface
	def create_interface():
	with gr.Blocks(theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="gray",
	)) as demo:
	gr.Markdown(
	"""
	# 🎙️ Multilingual Text-to-Speech
	Convert text to natural-sounding speech in multiple languages.
	"""
	)

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="Enter Text",
	placeholder="Type your text here...",
	lines=5
	)
	language = gr.Dropdown(
	choices=list(LANGUAGES.keys()),
	value="English",
	label="Language"
	)
	speaker = gr.Radio(
	choices=list(SPEAKERS.keys()),
	value="Male",
	label="Speaker Gender"
	)
	speed = gr.Slider(
	minimum=0.5,
	maximum=2.0,
	value=1.0,
	step=0.1,
	label="Speech Speed"
	)
	submit_btn = gr.Button("Generate Speech", variant="primary")

	with gr.Column():
	audio_output = gr.Audio(
	label="Generated Speech",
	type="numpy"
	)

	submit_btn.click(
	fn=text_to_speech,
	inputs=[text_input, language, speaker, speed],
	outputs=audio_output
	)

	gr.Markdown(
	"""
	### Features:
	- Support for English, French, and Spanish
	- Male and Female voice options
	- Adjustable speech speed
	- High-quality, natural-sounding voices
	"""
	)

	return demo

	demo = create_interface()
	demo.launch()