test_ui / src /open_llm_vtuber /audio_manager.py
britto224's picture
Upload 130 files
5669b22 verified
import re
import uuid
import unicodedata
class AudioManager:
def __init__(self, tts, live2d, translator, config, verbose=False):
self.tts = tts
self.live2d = live2d
self.translator = translator
self.config = config
self.verbose = verbose
self.remove_special_char = config.get("REMOVE_SPECIAL_CHAR", True)
def clean_text(self, text: str) -> str:
text = re.sub(r'[^\u4e00-\u9fffA-Za-z0-9,]', ' ', text)
if self.remove_special_char:
text = self.remove_special_characters(text)
return text
def remove_special_characters(self, text: str) -> str:
"""Filter text to remove all non-letter, non-number, and non-punctuation characters."""
normalized_text = unicodedata.normalize("NFKC", text)
def is_valid_char(char: str) -> bool:
category = unicodedata.category(char)
return (
category.startswith("L")
or category.startswith("N")
or category.startswith("P")
or char.isspace()
)
filtered_text = "".join(char for char in normalized_text if is_valid_char(char))
return filtered_text
def generate_audio_file(self, sentence: str, file_name_no_ext: str) -> str | None:
"""
Generate an audio file from a given sentence using the TTS engine.
Parameters:
- sentence (str): The sentence to generate audio for
- file_name_no_ext (str): The name of the audio file (without extension)
Returns:
- str or None: The path of the generated audio file, or None if the sentence iempty
"""
sentence = self.clean_text(sentence)
if self.verbose:
print(f">> generating {file_name_no_ext}...")
if not self.tts:
return None
if self.live2d:
sentence = self.live2d.remove_emotion_keywords(sentence)
if sentence.strip() == "":
return None
return self.tts.generate_audio(sentence, file_name_no_ext=file_name_no_ext)
def play_audio_file(self, sentence: str | None, filepath: str | None, instrument_filepath: str | None = None) -> None:
"""
Play the audio file located at the given filepath.
"""
if filepath is None:
print("No audio to be streamed. Response is empty.")
return
if sentence is None:
sentence = ""
try:
if self.verbose:
print(f">> Playing {filepath}...")
self.tts.play_audio_file_local(filepath)
self.tts.remove_file(filepath, verbose=self.verbose)
except ValueError as e:
if str(e) == "Audio is empty or all zero.":
print("No audio to be streamed. Response is empty.")
else:
raise e
except Exception as e:
print(f"Error playing the audio file {filepath}: {e}")
def play_text(self, text: str) -> None:
if not text.strip():
print("No text to play.")
return
sentences = re.split(r'(?<=[.!?。!?])\s*', text)
sentences = [s for s in sentences if s.strip()]
for sentence in sentences:
tts_target_sentence = self.live2d.remove_emotion_keywords(sentence)
if self.translator and self.config.get("TRANSLATE_AUDIO", False):
print("Translating...")
tts_target_sentence = self.translator.translate (tts_target_sentence)
print(f"Translated: {tts_target_sentence}")
audio_filepath = self.generate_audio_file(
tts_target_sentence, file_name_no_ext=f"temp_text_{uuid.uuid4()}"
)
if audio_filepath:
self.play_audio_file(sentence=sentence, filepath=audio_filepath)
else:
print("No audio generated for sentence.")