"""Synthesizes speech from the input string of text or ssml. Make sure to be working in a virtual environment. Note: ssml must be well-formed according to: https://www.w3.org/TR/speech-synthesis/ """ from google.cloud import texttospeech from loguru import logger class Speech: def __init__(self) -> None: # Instantiates a client self.client = texttospeech.TextToSpeechClient() # Select the type of audio file you want returned self.audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 ) logger.info(f"Initialized Speech()") def text_to_speech(self, text:str, language:str): logger.info(f"Converting text to speech.") # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=text) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL ) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = self.client.synthesize_speech( input=synthesis_input, voice=voice, audio_config=self.audio_config ) # The response's audio_content is binary. with open("output.mp3", "wb") as out: # Write the response to the output file. out.write(response.audio_content) logger.info('Audio content written to file "output.mp3"')