File size: 2,403 Bytes
d809d42
 
 
 
eb02aca
d809d42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import azure.cognitiveservices.speech as speechsdk
import os

# Create a speech synthesizer object
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY',''), region=os.environ.get('SPEECH_REGION',''))
speech_config.speech_synthesis_voice_name = "en-GB-ElliotNeural"; # Set the desired voice here
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)

def tts(text)-> speechsdk.SpeechSynthesisResult:
    # Synthesize the text to audio
    speech_synthesis_result = speech_synthesizer.speak_text(text)
    if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        print("Speech synthesized for text [{}]".format(text))
    elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = speech_synthesis_result.cancellation_details
        print("Speech synthesis canceled: {}".format(cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            if cancellation_details.error_details:
                print("Error details: {}".format(cancellation_details.error_details))
                print("Did you set the speech resource key and region values?")
    return speech_synthesis_result

if __name__ == '__main__':

    # Save the audio to file
    import time
    
    for i,text in enumerate([
        "Welcome to Jade Palace. My name is Jack, I'll be your server tonight. How can I help you?",
        "We have a variety of traditional Chinese dishes. Some of our most popular items are Kung Pao Chicken, Sweet and Sour Pork, Beef with Broccoli, and Egg Foo Young. We also offer Dim Sum, fresh seafood, noodle soups, and of course classic dishes like Spring Rolls, Dumplings, and Won Ton Soup. Would you like to see our full menu?",
        "Absolutely, my apologies. Please, right this way. Here we are, I have a nice quiet table for you in the corner. Please have a seat. Would you like to start with some tea or a drink while you look over the menu? We have jasmine tea, oolong tea, Tsingtao beer or hot sake if you prefer. Just let me know when you are ready to order. Thank you."
    ]):
        start = time.time()
        result = tts(text)
        with open(f"audio_samples/audo_{i}.wav", "wb") as audio_file:
            audio_file.write(result.audio_data)
        print(result, " in ", time.time()-start)