Spaces:
Sleeping
Sleeping
File size: 1,450 Bytes
c5e3ece d000c57 c5e3ece d000c57 c5e3ece d000c57 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from transformers import BarkModel, AutoProcessor
import torch
import scipy
def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
"""
Convert text to speech using Bark model
Args:
text (str): Text to convert to speech
voice_preset (str): Voice preset to use for the speech synthesis
Returns:
torch.Tensor: Generated speech audio
sampling_rate (int): Sampling rate of the generated audio
"""
# Check if CUDA is available and set device accordingly
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# Load the model and processor
model = BarkModel.from_pretrained("suno/bark-small")
processor = AutoProcessor.from_pretrained("suno/bark")
# Move model and inputs to the appropriate device
model = model.to(device)
inputs = processor(text=text, voice_preset=voice_preset)
for key, value in inputs.items():
inputs[key] = value.to(device)
# prepare the inputs
inputs = processor(text, voice_preset=voice_preset)
for key, value in inputs.items():
inputs[key] = inputs[key].to(device)
# generate speech
speech_output = model.generate(**inputs)
sampling_rate = model.generation_config.sample_rate
path = "output_audio.wav"
# Save the generated audio to a fileimport scipy
scipy.io.wavfile.write("output_audio.wav", rate=sampling_rate, data=speech_output[0].cpu().numpy())
return path
|