import re import uuid import unicodedata class AudioManager: def __init__(self, tts, live2d, translator, config, verbose=False): self.tts = tts self.live2d = live2d self.translator = translator self.config = config self.verbose = verbose self.remove_special_char = config.get("REMOVE_SPECIAL_CHAR", True) def clean_text(self, text: str) -> str: text = re.sub(r'[^\u4e00-\u9fffA-Za-z0-9,]', ' ', text) if self.remove_special_char: text = self.remove_special_characters(text) return text def remove_special_characters(self, text: str) -> str: """Filter text to remove all non-letter, non-number, and non-punctuation characters.""" normalized_text = unicodedata.normalize("NFKC", text) def is_valid_char(char: str) -> bool: category = unicodedata.category(char) return ( category.startswith("L") or category.startswith("N") or category.startswith("P") or char.isspace() ) filtered_text = "".join(char for char in normalized_text if is_valid_char(char)) return filtered_text def generate_audio_file(self, sentence: str, file_name_no_ext: str) -> str | None: """ Generate an audio file from a given sentence using the TTS engine. Parameters: - sentence (str): The sentence to generate audio for - file_name_no_ext (str): The name of the audio file (without extension) Returns: - str or None: The path of the generated audio file, or None if the sentence iempty """ sentence = self.clean_text(sentence) if self.verbose: print(f">> generating {file_name_no_ext}...") if not self.tts: return None if self.live2d: sentence = self.live2d.remove_emotion_keywords(sentence) if sentence.strip() == "": return None return self.tts.generate_audio(sentence, file_name_no_ext=file_name_no_ext) def play_audio_file(self, sentence: str | None, filepath: str | None, instrument_filepath: str | None = None) -> None: """ Play the audio file located at the given filepath. """ if filepath is None: print("No audio to be streamed. Response is empty.") return if sentence is None: sentence = "" try: if self.verbose: print(f">> Playing {filepath}...") self.tts.play_audio_file_local(filepath) self.tts.remove_file(filepath, verbose=self.verbose) except ValueError as e: if str(e) == "Audio is empty or all zero.": print("No audio to be streamed. Response is empty.") else: raise e except Exception as e: print(f"Error playing the audio file {filepath}: {e}") def play_text(self, text: str) -> None: if not text.strip(): print("No text to play.") return sentences = re.split(r'(?<=[.!?。!?])\s*', text) sentences = [s for s in sentences if s.strip()] for sentence in sentences: tts_target_sentence = self.live2d.remove_emotion_keywords(sentence) if self.translator and self.config.get("TRANSLATE_AUDIO", False): print("Translating...") tts_target_sentence = self.translator.translate (tts_target_sentence) print(f"Translated: {tts_target_sentence}") audio_filepath = self.generate_audio_file( tts_target_sentence, file_name_no_ext=f"temp_text_{uuid.uuid4()}" ) if audio_filepath: self.play_audio_file(sentence=sentence, filepath=audio_filepath) else: print("No audio generated for sentence.")