First_agent_template / tts_gen.py
gbv's picture
1
9d8a49f
import os
import torch
from num2words import num2words
import io
import soundfile as sf
#from config import Config
SILERO_LOCAL_PATH='./models/v2_multi.pt'
def translit2rus(text):
translit_map = {
'a': 'а', 'b': 'б', 'c': 'ц', 'd': 'д', 'e': 'е', 'f': 'ф', 'g': 'г', 'h': 'х',
'i': 'и', 'j': 'й', 'k': 'к', 'l': 'л', 'm': 'м', 'n': 'н', 'o': 'о', 'p': 'п',
'q': 'к', 'r': 'р', 's': 'с', 't': 'т', 'u': 'у', 'v': 'в', 'w': 'в', 'x': 'кс',
'y': 'ы', 'z': 'з',
'A': 'А', 'B': 'Б', 'C': 'Ц', 'D': 'Д', 'E': 'Е', 'F': 'Ф', 'G': 'Г', 'H': 'Х',
'I': 'И', 'J': 'Й', 'K': 'К', 'L': 'Л', 'M': 'М', 'N': 'Н', 'O': 'О', 'P': 'П',
'Q': 'К', 'R': 'Р', 'S': 'С', 'T': 'Т', 'U': 'У', 'V': 'В', 'W': 'В', 'X': 'Кс',
'Y': 'Ы', 'Z': 'З',
'/': ' дробь ',
'0': ' ноль ',
'1': ' один ',
'2': ' два ',
'3': ' три ',
'4': ' чет+ыре ',
'5': ' пять ',
'6': ' шесть ',
'7': ' семь ',
'8': ' в+осемь ',
'9': ' д+евять ',
}
return ''.join([translit_map.get(char, char) for char in text])
def float2text(number):
# Разделяем число на целую и дробную части
rubles, kopecks = divmod(number, 1)
rubles = int(rubles)
kopecks = int(round(kopecks * 100))
# Преобразуем целую часть в текст
rubles_text = num2words(rubles, lang='ru')
kopecks_text = num2words(kopecks, lang='ru')
# Формируем строку с правильными окончаниями
rubles_text += " рубль" if rubles % 10 == 1 and rubles % 100 != 11 else \
" рубля" if 2 <= rubles % 10 <= 4 and (rubles % 100 < 10 or rubles % 100 >= 20) else \
" рублей"
if kopecks > 0:
kopecks_text += " копейка" if kopecks % 10 == 1 and kopecks % 100 != 11 else \
" копейки" if 2 <= kopecks % 10 <= 4 and (kopecks % 100 < 10 or kopecks % 100 >= 20) else \
" копеек"
else:
kopecks_text = ""
return f"{rubles_text} {kopecks_text}"
def is_speak_xml(text):
return text.strip().startswith("<speak>") and text.strip().endswith("</speak>")
class TTSGenerator():
def __init__(self, sample_rate):
self.sample_rate = sample_rate
device = torch.device('cpu')
torch.set_num_threads(4)
local_file = SILERO_LOCAL_PATH
if not os.path.isfile(local_file):
torch.hub.download_url_to_file('https://models.silero.ai/models/tts/multi/v2_multi.pt',
local_file)
self.model = torch.package.PackageImporter(local_file).load_pickle("tts_models", "model")
self.model.to(device)
def get_all_voices(self):
return ['dilyara']
#return self.model.speakers
def generate_voice_to_file(self, text, speaker, output_file):
ssml_text = text if is_speak_xml(text) else None
plain_text = None if is_speak_xml(text) else text
return self.model.save_wav(text=plain_text, ssml_text=ssml_text,
speaker=speaker,
sample_rate=self.sample_rate,
audio_path=output_file)
def generate_voice(self, text, speaker):
ssml_text = text if is_speak_xml(text) else None
plain_text = None if is_speak_xml(text) else text
#audio = self.model.apply_tts(text=plain_text, ssml_text=ssml_text,
# speaker=speaker,
# sample_rate=self.sample_rate)
audio = self.model.apply_tts(texts=[plain_text],
speakers=[speaker],
sample_rate=self.sample_rate)
buffer = io.BytesIO()
sf.write(buffer, audio[0].numpy(), self.sample_rate, format='WAV')
buffer.seek(0)
return buffer