import base64 import random import re from io import BytesIO from typing import List, Generator from gtts import gTTS from gtts.tokenizer import pre_processors class TextToSpeech: def __init__(self): self.preprocessing = [pre_processors.tone_marks, pre_processors.end_of_line, pre_processors.word_sub, pre_processors.abbreviations] def _convert_sentence(self, text: str, lang: str, tld: str) -> bytes: tts = gTTS(text=text, lang=lang, slow=False, tld=tld, pre_processor_funcs=self.preprocessing) mp3_fp = BytesIO() tts.write_to_fp(mp3_fp) mp3_fp.seek(0) return mp3_fp.getvalue() def _split_corpus(self, corpus: str) -> List[str]: sentences = re.split(r'(?<=[.!?]) +', corpus) return sentences def sentence_audio_generator(self, paragraph: str, lang: str, tld: str) -> Generator[str, None, None]: sentences = self._split_corpus(paragraph) for sentence in sentences: mp3 = self._convert_sentence(sentence, lang, tld) base64_audio = base64.b64encode(mp3).decode("utf-8") yield base64_audio