Spaces:

krishna195
/

IITR_Text_to_audio

Sleeping

App Files Files Community

IITR_Text_to_audio / app.py

krishna195

Update app.py

1048abe verified about 1 year ago

raw

history blame contribute delete

1.76 kB

	# Install necessary libraries (if not already installed)
	#!pip install gradio transformers soundfile torch

	import torch
	import soundfile as sf
	import gradio as gr
	from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan

	# Load your fine-tuned model, processor, and vocoder
	model = SpeechT5ForTextToSpeech.from_pretrained("krishna195/speecht5_krishna_finatuned")
	vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
	processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")

	# Use pre-defined speaker embeddings (you can replace this with your actual embeddings)
	speaker_embeddings = torch.randn(1, 512) # Example embedding size, adjust to your speaker embeddings

	# Function to generate speech from text
	def text_to_speech(input_text):
	# Process the input text
	inputs = processor(text=input_text, return_tensors="pt")

	# Generate speech using the model and vocoder
	speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)

	# Save the generated speech to a temporary file
	output_file = "generated_speech.wav"
	sf.write(output_file, speech.numpy(), 16000)

	# Return the path to the audio file for Gradio to play
	return output_file

	# Create the Gradio UI interface
	iface = gr.Interface(
	fn=text_to_speech,
	inputs="text",
	outputs="audio",
	title="Text to Speech Converter",
	description="Enter text and convert it into speech using a fine-tuned SpeechT5 model.",
	examples=[
	["Hello, how are you doing today?"],
	["Speech synthesis is amazing with deep learning models."],
	["TensorFlow and PyTorch are powerful machine learning frameworks."]
	]
	)

	# Launch the Gradio app
	iface.launch()