Spaces:
Sleeping
Sleeping
| from abc import ABC, abstractmethod | |
| from .Base_TTS_Task import Base_TTS_Task as TTS_Task | |
| import json | |
| from typing import List, Dict, Literal, Optional, Any, Union, Generator, Tuple | |
| from pydantic import BaseModel, Field, model_validator | |
| import numpy as np | |
| from abc import ABC, abstractmethod | |
| from typing import Dict, List, Union, Generator, Tuple | |
| from typing_extensions import Literal | |
| import numpy as np | |
| import wave,io | |
| class Base_TTS_Synthesizer(ABC): | |
| """ | |
| Abstract base class for a Text-To-Speech (TTS) synthesizer. | |
| Attributes: | |
| ui_config (Dict[str, List]): A dictionary containing UI configuration settings. | |
| debug_mode (bool): Flag to toggle debug mode for additional logging and debugging information. | |
| """ | |
| ui_config: Dict[str, List] = {} | |
| debug_mode: bool = False | |
| def __init__(self, **kwargs): | |
| """ | |
| Initializes the TTS synthesizer with optional UI configurations and debug mode setting. | |
| Args: | |
| ui_config (Dict[str, List], optional): Configuration for user interface settings. | |
| debug_mode (bool, optional): Enables or disables debug mode. | |
| """ | |
| self.ui_config = kwargs.get("ui_config", {}) | |
| self.debug_mode = kwargs.get("debug_mode", False) | |
| def generate( | |
| self, | |
| task: TTS_Task, | |
| return_type: Literal["filepath", "numpy"] = "numpy", | |
| save_path: Optional[str] = None, | |
| ) -> Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]: | |
| """ | |
| Generates speech from a given TTS task. | |
| Args: | |
| task (TTS_Task): The task containing data and parameters for speech synthesis. | |
| return_type (Literal["filepath", "numpy"], optional): The type of return value, either a file path or audio data. | |
| save_path (str, optional): The path to save the audio file. | |
| Returns: | |
| Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]: Depending on the return_type, returns a file path, a generator of audio data, or other types. | |
| """ | |
| pass | |
| def get_characters(self): | |
| """ | |
| Retrieves the available characters and their emotions for the TTS. | |
| Returns: | |
| Dict[str, List[str]]: A dictionary mapping character names to lists of their emotions. | |
| """ | |
| pass | |
| def params_parser(self, data): | |
| """ | |
| Parses input data into a TTS_Task. | |
| Args: | |
| data (Any): The raw input data to be parsed. | |
| Returns: | |
| TTS_Task: A TTS task object created from the input data. | |
| """ | |
| pass | |
| def ms_like_parser(self, data): | |
| """ | |
| Parses input data in a Microsoft-like format into a TTS_Task. | |
| Args: | |
| data (Any): The raw input data to be parsed. | |
| Returns: | |
| TTS_Task: A TTS task object created from the Microsoft-like formatted input data. | |
| """ | |
| pass | |
| def get_wave_header_chunk(sample_rate: int, channels: int = 1, sample_width: int = 2): | |
| """ | |
| Generate a wave header with no data. | |
| Args: | |
| sample_rate (int): The sample rate of the audio. | |
| channels (int, optional): The number of audio channels. Defaults to 1. | |
| sample_width (int, optional): The sample width in bytes. Defaults to 2. | |
| Returns: | |
| bytes: The wave header as bytes. | |
| """ | |
| wav_buf = io.BytesIO() | |
| with wave.open(wav_buf, "wb") as vfout: | |
| vfout.setnchannels(channels) | |
| vfout.setsampwidth(sample_width) | |
| vfout.setframerate(sample_rate) | |
| wav_buf.seek(0) | |
| return wav_buf.read() | |