learnable-ai / src /services /speech /text_to_speech.py
ishworrsubedii's picture
Added new features and improved code formatting:
32a0eda
import base64
import random
import re
from io import BytesIO
from typing import List, Generator
from gtts import gTTS
from gtts.tokenizer import pre_processors
class TextToSpeech:
def __init__(self):
self.preprocessing = [pre_processors.tone_marks, pre_processors.end_of_line, pre_processors.word_sub,
pre_processors.abbreviations]
def _convert_sentence(self, text: str, lang: str, tld: str) -> bytes:
tts = gTTS(text=text, lang=lang, slow=False, tld=tld, pre_processor_funcs=self.preprocessing)
mp3_fp = BytesIO()
tts.write_to_fp(mp3_fp)
mp3_fp.seek(0)
return mp3_fp.getvalue()
def _split_corpus(self, corpus: str) -> List[str]:
sentences = re.split(r'(?<=[.!?]) +', corpus)
return sentences
def sentence_audio_generator(self, paragraph: str, lang: str, tld: str) -> Generator[str, None, None]:
sentences = self._split_corpus(paragraph)
for sentence in sentences:
mp3 = self._convert_sentence(sentence, lang, tld)
base64_audio = base64.b64encode(mp3).decode("utf-8")
yield base64_audio