translation_game / app /speech_google.py
wdeback's picture
Upload folder using huggingface_hub
20f080f
"""Synthesizes speech from the input string of text or ssml.
Make sure to be working in a virtual environment.
Note: ssml must be well-formed according to:
https://www.w3.org/TR/speech-synthesis/
"""
from google.cloud import texttospeech
from loguru import logger
class Speech:
def __init__(self) -> None:
# Instantiates a client
self.client = texttospeech.TextToSpeechClient()
# Select the type of audio file you want returned
self.audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
logger.info(f"Initialized Speech()")
def text_to_speech(self, text:str, language:str):
logger.info(f"Converting text to speech.")
# Set the text input to be synthesized
synthesis_input = texttospeech.SynthesisInput(text=text)
# Build the voice request, select the language code ("en-US") and the ssml
# voice gender ("neutral")
voice = texttospeech.VoiceSelectionParams(
language_code="en-US",
ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
)
# Perform the text-to-speech request on the text input with the selected
# voice parameters and audio file type
response = self.client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=self.audio_config
)
# The response's audio_content is binary.
with open("output.mp3", "wb") as out:
# Write the response to the output file.
out.write(response.audio_content)
logger.info('Audio content written to file "output.mp3"')