lingobot / utils /tts.py
gent
rm proxy
eb02aca
import azure.cognitiveservices.speech as speechsdk
import os
# Create a speech synthesizer object
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY',''), region=os.environ.get('SPEECH_REGION',''))
speech_config.speech_synthesis_voice_name = "en-GB-ElliotNeural"; # Set the desired voice here
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
def tts(text)-> speechsdk.SpeechSynthesisResult:
# Synthesize the text to audio
speech_synthesis_result = speech_synthesizer.speak_text(text)
if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
print("Speech synthesized for text [{}]".format(text))
elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_synthesis_result.cancellation_details
print("Speech synthesis canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
if cancellation_details.error_details:
print("Error details: {}".format(cancellation_details.error_details))
print("Did you set the speech resource key and region values?")
return speech_synthesis_result
if __name__ == '__main__':
# Save the audio to file
import time
for i,text in enumerate([
"Welcome to Jade Palace. My name is Jack, I'll be your server tonight. How can I help you?",
"We have a variety of traditional Chinese dishes. Some of our most popular items are Kung Pao Chicken, Sweet and Sour Pork, Beef with Broccoli, and Egg Foo Young. We also offer Dim Sum, fresh seafood, noodle soups, and of course classic dishes like Spring Rolls, Dumplings, and Won Ton Soup. Would you like to see our full menu?",
"Absolutely, my apologies. Please, right this way. Here we are, I have a nice quiet table for you in the corner. Please have a seat. Would you like to start with some tea or a drink while you look over the menu? We have jasmine tea, oolong tea, Tsingtao beer or hot sake if you prefer. Just let me know when you are ready to order. Thank you."
]):
start = time.time()
result = tts(text)
with open(f"audio_samples/audo_{i}.wav", "wb") as audio_file:
audio_file.write(result.audio_data)
print(result, " in ", time.time()-start)