Spaces:
Sleeping
Sleeping
| # Install necessary libraries (if not already installed) | |
| #!pip install gradio transformers soundfile torch | |
| import torch | |
| import soundfile as sf | |
| import gradio as gr | |
| from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan | |
| # Load your fine-tuned model, processor, and vocoder | |
| model = SpeechT5ForTextToSpeech.from_pretrained("krishna195/speecht5_krishna_finatuned") | |
| vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") | |
| processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") | |
| # Use pre-defined speaker embeddings (you can replace this with your actual embeddings) | |
| speaker_embeddings = torch.randn(1, 512) # Example embedding size, adjust to your speaker embeddings | |
| # Function to generate speech from text | |
| def text_to_speech(input_text): | |
| # Process the input text | |
| inputs = processor(text=input_text, return_tensors="pt") | |
| # Generate speech using the model and vocoder | |
| speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) | |
| # Save the generated speech to a temporary file | |
| output_file = "generated_speech.wav" | |
| sf.write(output_file, speech.numpy(), 16000) | |
| # Return the path to the audio file for Gradio to play | |
| return output_file | |
| # Create the Gradio UI interface | |
| iface = gr.Interface( | |
| fn=text_to_speech, | |
| inputs="text", | |
| outputs="audio", | |
| title="Text to Speech Converter", | |
| description="Enter text and convert it into speech using a fine-tuned SpeechT5 model.", | |
| examples=[ | |
| ["Hello, how are you doing today?"], | |
| ["Speech synthesis is amazing with deep learning models."], | |
| ["TensorFlow and PyTorch are powerful machine learning frameworks."] | |
| ] | |
| ) | |
| # Launch the Gradio app | |
| iface.launch() | |